Update Parse::CPAN::Meta to 0.04_01
[p5sagit/p5-mst-13.2.git] / lib / Parse / CPAN / Meta.pm
CommitLineData
be96f5c3 1package Parse::CPAN::Meta;
2
3use strict;
4use Carp 'croak';
5BEGIN {
6 require 5.004;
7 require Exporter;
c59d1bfa 8 $Parse::CPAN::Meta::VERSION = '0.04_01';
be96f5c3 9 @Parse::CPAN::Meta::ISA = qw{ Exporter };
10 @Parse::CPAN::Meta::EXPORT_OK = qw{ Load LoadFile };
11}
12
13# Prototypes
14sub LoadFile ($);
15sub Load ($);
16sub _scalar ($$$);
17sub _array ($$$);
18sub _hash ($$$);
19
20# Printable characters for escapes
21my %UNESCAPES = (
22 z => "\x00", a => "\x07", t => "\x09",
23 n => "\x0a", v => "\x0b", f => "\x0c",
24 r => "\x0d", e => "\x1b", '\\' => '\\',
25);
26
27
c59d1bfa 28my %BOM = (
29 "\357\273\277" => 'UTF-8',
30 "\376\377" => 'UTF-16BE',
31 "\377\376" => 'UTF-16LE',
32 "\0\0\376\377" => 'UTF-32BE',
33 "\377\376\0\0" => 'UTF-32LE'
34);
35
36sub BOM_MIN_LENGTH () { 2 }
37sub BOM_MAX_LENGTH () { 4 }
38sub HAVE_UTF8 () { $] >= 5.007003 }
39
40BEGIN { require utf8 if HAVE_UTF8 }
be96f5c3 41
42
43#####################################################################
44# Implementation
45
46# Create an object from a file
47sub LoadFile ($) {
48 # Check the file
49 my $file = shift;
50 croak('You did not specify a file name') unless $file;
51 croak( "File '$file' does not exist" ) unless -e $file;
52 croak( "'$file' is a directory, not a file" ) unless -f _;
53 croak( "Insufficient permissions to read '$file'" ) unless -r _;
54
55 # Slurp in the file
56 local $/ = undef;
57 open( CFG, $file ) or croak("Failed to open file '$file': $!");
58 my $yaml = <CFG>;
59 close CFG or croak("Failed to close file '$file': $!");
60
61 # Hand off to the actual parser
62 Load( $yaml );
63}
64
65# Parse a document from a string.
66# Doing checks on $_[0] prevents us having to do a string copy.
67sub Load ($) {
c59d1bfa 68
69 my $str = $_[0];
70
71 # Handle special cases
72 foreach my $length ( BOM_MIN_LENGTH .. BOM_MAX_LENGTH ) {
73 if ( my $enc = $BOM{substr($str, 0, $length)} ) {
74 croak("Stream has a non UTF-8 BOM") unless $enc eq 'UTF-8';
75 substr($str, 0, $length) = ''; # strip UTF-8 bom if found, we'll just ignore it
76 }
77 }
78
79 if ( HAVE_UTF8 ) {
80 utf8::decode($str); # try to decode as utf8
81 }
82
83 unless ( defined $str ) {
be96f5c3 84 croak("Did not provide a string to Load");
85 }
c59d1bfa 86 return() unless length $str;
87 unless ( $str =~ /[\012\015]+$/ ) {
be96f5c3 88 croak("Stream does not end with newline character");
89 }
90
91 # Split the file into lines
92 my @lines = grep { ! /^\s*(?:\#.*)?$/ }
c59d1bfa 93 split /(?:\015{1,2}\012|\015|\012)/, $str;
be96f5c3 94
95 # A nibbling parser
96 my @documents = ();
97 while ( @lines ) {
98 # Do we have a document header?
99 if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?$/ ) {
100 # Handle scalar documents
101 shift @lines;
102 if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML:[\d\.]+)$/ ) {
103 push @documents, _scalar( "$1", [ undef ], \@lines );
104 next;
105 }
106 }
107
108 if ( ! @lines or $lines[0] =~ /^---\s*(?:(.+)\s*)?$/ ) {
109 # A naked document
110 push @documents, undef;
111
112 } elsif ( $lines[0] =~ /^\s*\-/ ) {
113 # An array at the root
114 my $document = [ ];
115 push @documents, $document;
116 _array( $document, [ 0 ], \@lines );
117
118 } elsif ( $lines[0] =~ /^(\s*)\w/ ) {
119 # A hash at the root
120 my $document = { };
121 push @documents, $document;
122 _hash( $document, [ length($1) ], \@lines );
123
124 } else {
125 croak("Parse::CPAN::Meta does not support the line '$lines[0]'");
126 }
127 }
128
129 if ( wantarray ) {
130 return @documents;
131 } else {
132 return $documents[-1];
133 }
134}
135
136# Deparse a scalar string to the actual scalar
137sub _scalar ($$$) {
138 my $string = shift;
139 my $indent = shift;
140 my $lines = shift;
141
142 # Trim trailing whitespace
143 $string =~ s/\s*$//;
144
145 # Explitic null/undef
146 return undef if $string eq '~';
147
148 # Quotes
149 if ( $string =~ /^\'(.*?)\'$/ ) {
150 return '' unless defined $1;
151 my $rv = $1;
152 $rv =~ s/\'\'/\'/g;
153 return $rv;
154 }
155 if ( $string =~ /^\"((?:\\.|[^\"])*)\"$/ ) {
156 my $str = $1;
157 $str =~ s/\\"/"/g;
158 $str =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex;
159 return $str;
160 }
161 if ( $string =~ /^[\'\"]/ ) {
162 # A quote with folding... we don't support that
163 croak("Parse::CPAN::Meta does not support multi-line quoted scalars");
164 }
165
166 # Null hash and array
167 if ( $string eq '{}' ) {
168 # Null hash
169 return {};
170 }
171 if ( $string eq '[]' ) {
172 # Null array
173 return [];
174 }
175
176 # Regular unquoted string
177 return $string unless $string =~ /^[>|]/;
178
179 # Error
180 croak("Multi-line scalar content missing") unless @$lines;
181
182 # Check the indent depth
183 $lines->[0] =~ /^(\s*)/;
184 $indent->[-1] = length("$1");
185 if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) {
186 croak("Illegal line indenting");
187 }
188
189 # Pull the lines
190 my @multiline = ();
191 while ( @$lines ) {
192 $lines->[0] =~ /^(\s*)/;
193 last unless length($1) >= $indent->[-1];
194 push @multiline, substr(shift(@$lines), length($1));
195 }
196
197 my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n";
198 my $t = (substr($string, 1, 1) eq '-') ? '' : "\n";
199 return join( $j, @multiline ) . $t;
200}
201
202# Parse an array
203sub _array ($$$) {
204 my $array = shift;
205 my $indent = shift;
206 my $lines = shift;
207
208 while ( @$lines ) {
209 # Check for a new document
210 return 1 if $lines->[0] =~ /^---\s*(?:(.+)\s*)?$/;
211
212 # Check the indent level
213 $lines->[0] =~ /^(\s*)/;
214 if ( length($1) < $indent->[-1] ) {
215 return 1;
216 } elsif ( length($1) > $indent->[-1] ) {
217 croak("Hash line over-indented");
218 }
219
220 if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) {
221 # Inline nested hash
222 my $indent2 = length("$1");
223 $lines->[0] =~ s/-/ /;
224 push @$array, { };
225 _hash( $array->[-1], [ @$indent, $indent2 ], $lines );
226
227 } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*$/ ) {
228 # Array entry with a value
229 shift @$lines;
230 push @$array, _scalar( "$2", [ @$indent, undef ], $lines );
231
232 } elsif ( $lines->[0] =~ /^\s*\-\s*$/ ) {
233 shift @$lines;
234 unless ( @$lines ) {
235 push @$array, undef;
236 return 1;
237 }
238 if ( $lines->[0] =~ /^(\s*)\-/ ) {
239 my $indent2 = length("$1");
240 if ( $indent->[-1] == $indent2 ) {
241 # Null array entry
242 push @$array, undef;
243 } else {
244 # Naked indenter
245 push @$array, [ ];
246 _array( $array->[-1], [ @$indent, $indent2 ], $lines );
247 }
248
249 } elsif ( $lines->[0] =~ /^(\s*)\w/ ) {
250 push @$array, { };
251 _hash( $array->[-1], [ @$indent, length("$1") ], $lines );
252
253 } else {
254 croak("Parse::CPAN::Meta does not support the line '$lines->[0]'");
255 }
256
257 } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) {
258 # This is probably a structure like the following...
259 # ---
260 # foo:
261 # - list
262 # bar: value
263 #
264 # ... so lets return and let the hash parser handle it
265 return 1;
266
267 } else {
268 croak("Parse::CPAN::Meta does not support the line '$lines->[0]'");
269 }
270 }
271
272 return 1;
273}
274
275# Parse an array
276sub _hash ($$$) {
277 my $hash = shift;
278 my $indent = shift;
279 my $lines = shift;
280
281 while ( @$lines ) {
282 # Check for a new document
283 return 1 if $lines->[0] =~ /^---\s*(?:(.+)\s*)?$/;
284
285 # Check the indent level
286 $lines->[0] =~/^(\s*)/;
287 if ( length($1) < $indent->[-1] ) {
288 return 1;
289 } elsif ( length($1) > $indent->[-1] ) {
290 croak("Hash line over-indented");
291 }
292
293 # Get the key
294 unless ( $lines->[0] =~ s/^\s*([^\'\"][^\n]*?)\s*:(\s+|$)// ) {
295 croak("Bad hash line");
296 }
297 my $key = $1;
298
299 # Do we have a value?
300 if ( length $lines->[0] ) {
301 # Yes
302 $hash->{$key} = _scalar( shift(@$lines), [ @$indent, undef ], $lines );
303 next;
304 }
305
306 # An indent
307 shift @$lines;
308 unless ( @$lines ) {
309 $hash->{$key} = undef;
310 return 1;
311 }
312 if ( $lines->[0] =~ /^(\s*)-/ ) {
313 $hash->{$key} = [];
314 _array( $hash->{$key}, [ @$indent, length($1) ], $lines );
315 } elsif ( $lines->[0] =~ /^(\s*)./ ) {
316 my $indent2 = length("$1");
317 if ( $indent->[-1] >= $indent2 ) {
318 # Null hash entry
319 $hash->{$key} = undef;
320 } else {
321 $hash->{$key} = {};
322 _hash( $hash->{$key}, [ @$indent, length($1) ], $lines );
323 }
324 }
325 }
326
327 return 1;
328}
329
3301;
331
332__END__
333
334=pod
335
336=head1 NAME
337
338Parse::CPAN::Meta - Parse META.yml and other similar CPAN metadata files
339
340=head1 SYNOPSIS
341
342 #############################################
343 # In your file
344
345 ---
346 rootproperty: blah
347 section:
348 one: two
349 three: four
350 Foo: Bar
351 empty: ~
352
353
354
355 #############################################
356 # In your program
357
358 use Parse::CPAN::Meta;
359
360 # Create a YAML file
361 my @yaml = Parse::CPAN::Meta::LoadFile( 'Meta.yml' );
362
363 # Reading properties
364 my $root = $yaml[0]->{rootproperty};
365 my $one = $yaml[0]->{section}->{one};
366 my $Foo = $yaml[0]->{section}->{Foo};
367
368=head1 DESCRIPTION
369
370B<Parse::CPAN::Meta> is a parser for META.yml files, based on the
371parser half of L<YAML::Tiny>.
372
373It supports a basic subset of the full YAML specification, enough to
374implement parsing of typical META.yml files, and other similarly simple
375YAML files.
376
377If you need something with more power, move up to a full YAML parser such
378as L<YAML>, L<YAML::Syck> or L<YAML::LibYAML>.
379
380Parse::CPAN::Meta provides a very simply API of only two functions, based
381on the YAML functions of the same name. Wherever possible, identical
382calling semantics are used.
383
384All error reporting is done with exceptions (dieing).
385
386=head1 FUNCTIONS
387
388For maintenance clarity, no functions are exported.
389
390=head2 Load( $string )
391
392 my @documents = Load( $string );
393
394Parses a string containing a valid YAML stream into a list of Perl data
395structures.
396
397=head2 LoadFile( $file_name )
398
399Reads the YAML stream from a file instead of a string.
400
401=head1 SUPPORT
402
403Bugs should be reported via the CPAN bug tracker at
404
405L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Parse-CPAN-Meta>
406
407=head1 AUTHOR
408
409Adam Kennedy E<lt>adamk@cpan.orgE<gt>
410
411=head1 SEE ALSO
412
413L<YAML::Tiny>, L<YAML>, L<YAML::Syck>
414
415=head1 COPYRIGHT
416
417Copyright 2006 - 2009 Adam Kennedy.
418
419This program is free software; you can redistribute
420it and/or modify it under the same terms as Perl itself.
421
422The full text of the license can be found in the
423LICENSE file included with this module.
424
425=cut