Upgrade to Parse::CPAN::Meta 1.38
[p5sagit/p5-mst-13.2.git] / lib / Parse / CPAN / Meta.pm
CommitLineData
de044c36 1package Parse::CPAN::Meta;\r
2\r
3use strict;\r
4use Carp 'croak';\r
5\r
6# UTF Support?\r
7sub HAVE_UTF8 () { $] >= 5.007003 }\r
8BEGIN {\r
9 if ( HAVE_UTF8 ) {\r
10 # The string eval helps hide this from Test::MinimumVersion\r
11 eval "require utf8;";\r
12 die "Failed to load UTF-8 support" if $@;\r
13 }\r
14\r
15 # Class structure\r
16 require 5.004;\r
17 require Exporter;\r
18 $Parse::CPAN::Meta::VERSION = '1.38';\r
19 @Parse::CPAN::Meta::ISA = qw{ Exporter };\r
20 @Parse::CPAN::Meta::EXPORT_OK = qw{ Load LoadFile };\r
21}\r
22\r
23# Prototypes\r
24sub LoadFile ($);\r
25sub Load ($);\r
26sub _scalar ($$$);\r
27sub _array ($$$);\r
28sub _hash ($$$);\r
29\r
30# Printable characters for escapes\r
31my %UNESCAPES = (\r
32 z => "\x00", a => "\x07", t => "\x09",\r
33 n => "\x0a", v => "\x0b", f => "\x0c",\r
34 r => "\x0d", e => "\x1b", '\\' => '\\',\r
35);\r
36\r
37\r
38\r
39\r
40\r
41#####################################################################\r
42# Implementation\r
43\r
44# Create an object from a file\r
45sub LoadFile ($) {\r
46 # Check the file\r
47 my $file = shift;\r
48 croak('You did not specify a file name') unless $file;\r
49 croak( "File '$file' does not exist" ) unless -e $file;\r
50 croak( "'$file' is a directory, not a file" ) unless -f _;\r
51 croak( "Insufficient permissions to read '$file'" ) unless -r _;\r
52\r
53 # Slurp in the file\r
54 local $/ = undef;\r
55 local *CFG;\r
56 unless ( open( CFG, $file ) ) {\r
57 croak("Failed to open file '$file': $!");\r
58 }\r
59 my $yaml = <CFG>;\r
60 unless ( close(CFG) ) {\r
61 croak("Failed to close file '$file': $!");\r
62 }\r
63\r
64 # Hand off to the actual parser\r
65 Load( $yaml );\r
66}\r
67\r
68# Parse a document from a string.\r
69# Doing checks on $_[0] prevents us having to do a string copy.\r
70sub Load ($) {\r
71 my $string = $_[0];\r
72 unless ( defined $string ) {\r
73 croak("Did not provide a string to load");\r
74 }\r
75\r
76 # Byte order marks\r
77 if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {\r
78 croak("Stream has a non UTF-8 Unicode Byte Order Mark");\r
79 } else {\r
80 # Strip UTF-8 bom if found, we'll just ignore it\r
81 $string =~ s/^\357\273\277//;\r
82 }\r
83\r
84 # Try to decode as utf8\r
85 utf8::decode($string) if HAVE_UTF8;\r
86\r
87 # Check for some special cases\r
88 return () unless length $string;\r
89 unless ( $string =~ /[\012\015]+\z/ ) {\r
90 croak("Stream does not end with newline character");\r
91 }\r
92\r
93 # Split the file into lines\r
94 my @lines = grep { ! /^\s*(?:\#.*)?\z/ }\r
95 split /(?:\015{1,2}\012|\015|\012)/, $string;\r
96\r
97 # Strip the initial YAML header\r
98 @lines and $lines[0] =~ /^\%YAML[: ][\d\.]+.*\z/ and shift @lines;\r
99\r
100 # A nibbling parser\r
101 my @documents = ();\r
102 while ( @lines ) {\r
103 # Do we have a document header?\r
104 if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?\z/ ) {\r
105 # Handle scalar documents\r
106 shift @lines;\r
107 if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML[: ][\d\.]+)\z/ ) {\r
108 push @documents, _scalar( "$1", [ undef ], \@lines );\r
109 next;\r
110 }\r
111 }\r
112\r
113 if ( ! @lines or $lines[0] =~ /^(?:---|\.\.\.)/ ) {\r
114 # A naked document\r
115 push @documents, undef;\r
116 while ( @lines and $lines[0] !~ /^---/ ) {\r
117 shift @lines;\r
118 }\r
119\r
120 } elsif ( $lines[0] =~ /^\s*\-/ ) {\r
121 # An array at the root\r
122 my $document = [ ];\r
123 push @documents, $document;\r
124 _array( $document, [ 0 ], \@lines );\r
125\r
126 } elsif ( $lines[0] =~ /^(\s*)\S/ ) {\r
127 # A hash at the root\r
128 my $document = { };\r
129 push @documents, $document;\r
130 _hash( $document, [ length($1) ], \@lines );\r
131\r
132 } else {\r
133 croak("Parse::CPAN::Meta failed to classify line '$lines[0]'");\r
134 }\r
135 }\r
136\r
137 if ( wantarray ) {\r
138 return @documents;\r
139 } else {\r
140 return $documents[-1];\r
141 }\r
142}\r
143\r
144# Deparse a scalar string to the actual scalar\r
145sub _scalar ($$$) {\r
146 my ($string, $indent, $lines) = @_;\r
147\r
148 # Trim trailing whitespace\r
149 $string =~ s/\s*\z//;\r
150\r
151 # Explitic null/undef\r
152 return undef if $string eq '~';\r
153\r
154 # Quotes\r
155 if ( $string =~ /^\'(.*?)\'\z/ ) {\r
156 return '' unless defined $1;\r
157 $string = $1;\r
158 $string =~ s/\'\'/\'/g;\r
159 return $string;\r
160 }\r
161 if ( $string =~ /^\"((?:\\.|[^\"])*)\"\z/ ) {\r
162 # Reusing the variable is a little ugly,\r
163 # but avoids a new variable and a string copy.\r
164 $string = $1;\r
165 $string =~ s/\\"/"/g;\r
166 $string =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex;\r
167 return $string;\r
168 }\r
169\r
170 # Special cases\r
171 if ( $string =~ /^[\'\"!&]/ ) {\r
172 croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'");\r
173 }\r
174 return {} if $string eq '{}';\r
175 return [] if $string eq '[]';\r
176\r
177 # Regular unquoted string\r
178 return $string unless $string =~ /^[>|]/;\r
179\r
180 # Error\r
181 croak("Parse::CPAN::Meta failed to find multi-line scalar content") unless @$lines;\r
182\r
183 # Check the indent depth\r
184 $lines->[0] =~ /^(\s*)/;\r
185 $indent->[-1] = length("$1");\r
186 if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) {\r
187 croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");\r
188 }\r
189\r
190 # Pull the lines\r
191 my @multiline = ();\r
192 while ( @$lines ) {\r
193 $lines->[0] =~ /^(\s*)/;\r
194 last unless length($1) >= $indent->[-1];\r
195 push @multiline, substr(shift(@$lines), length($1));\r
196 }\r
197\r
198 my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n";\r
199 my $t = (substr($string, 1, 1) eq '-') ? '' : "\n";\r
200 return join( $j, @multiline ) . $t;\r
201}\r
202\r
203# Parse an array\r
204sub _array ($$$) {\r
205 my ($array, $indent, $lines) = @_;\r
206\r
207 while ( @$lines ) {\r
208 # Check for a new document\r
209 if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {\r
210 while ( @$lines and $lines->[0] !~ /^---/ ) {\r
211 shift @$lines;\r
212 }\r
213 return 1;\r
214 }\r
215\r
216 # Check the indent level\r
217 $lines->[0] =~ /^(\s*)/;\r
218 if ( length($1) < $indent->[-1] ) {\r
219 return 1;\r
220 } elsif ( length($1) > $indent->[-1] ) {\r
221 croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");\r
222 }\r
223\r
224 if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) {\r
225 # Inline nested hash\r
226 my $indent2 = length("$1");\r
227 $lines->[0] =~ s/-/ /;\r
228 push @$array, { };\r
229 _hash( $array->[-1], [ @$indent, $indent2 ], $lines );\r
230\r
231 } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*\z/ ) {\r
232 # Array entry with a value\r
233 shift @$lines;\r
234 push @$array, _scalar( "$2", [ @$indent, undef ], $lines );\r
235\r
236 } elsif ( $lines->[0] =~ /^\s*\-\s*\z/ ) {\r
237 shift @$lines;\r
238 unless ( @$lines ) {\r
239 push @$array, undef;\r
240 return 1;\r
241 }\r
242 if ( $lines->[0] =~ /^(\s*)\-/ ) {\r
243 my $indent2 = length("$1");\r
244 if ( $indent->[-1] == $indent2 ) {\r
245 # Null array entry\r
246 push @$array, undef;\r
247 } else {\r
248 # Naked indenter\r
249 push @$array, [ ];\r
250 _array( $array->[-1], [ @$indent, $indent2 ], $lines );\r
251 }\r
252\r
253 } elsif ( $lines->[0] =~ /^(\s*)\S/ ) {\r
254 push @$array, { };\r
255 _hash( $array->[-1], [ @$indent, length("$1") ], $lines );\r
256\r
257 } else {\r
258 croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");\r
259 }\r
260\r
261 } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) {\r
262 # This is probably a structure like the following...\r
263 # ---\r
264 # foo:\r
265 # - list\r
266 # bar: value\r
267 #\r
268 # ... so lets return and let the hash parser handle it\r
269 return 1;\r
270\r
271 } else {\r
272 croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");\r
273 }\r
274 }\r
275\r
276 return 1;\r
277}\r
278\r
279# Parse an array\r
280sub _hash ($$$) {\r
281 my ($hash, $indent, $lines) = @_;\r
282\r
283 while ( @$lines ) {\r
284 # Check for a new document\r
285 if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {\r
286 while ( @$lines and $lines->[0] !~ /^---/ ) {\r
287 shift @$lines;\r
288 }\r
289 return 1;\r
290 }\r
291\r
292 # Check the indent level\r
293 $lines->[0] =~ /^(\s*)/;\r
294 if ( length($1) < $indent->[-1] ) {\r
295 return 1;\r
296 } elsif ( length($1) > $indent->[-1] ) {\r
297 croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");\r
298 }\r
299\r
300 # Get the key\r
301 unless ( $lines->[0] =~ s/^\s*([^\'\" ][^\n]*?)\s*:(\s+|$)// ) {\r
302 if ( $lines->[0] =~ /^\s*[?\'\"]/ ) {\r
303 croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'");\r
304 }\r
305 croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");\r
306 }\r
307 my $key = $1;\r
308\r
309 # Do we have a value?\r
310 if ( length $lines->[0] ) {\r
311 # Yes\r
312 $hash->{$key} = _scalar( shift(@$lines), [ @$indent, undef ], $lines );\r
313 } else {\r
314 # An indent\r
315 shift @$lines;\r
316 unless ( @$lines ) {\r
317 $hash->{$key} = undef;\r
318 return 1;\r
319 }\r
320 if ( $lines->[0] =~ /^(\s*)-/ ) {\r
321 $hash->{$key} = [];\r
322 _array( $hash->{$key}, [ @$indent, length($1) ], $lines );\r
323 } elsif ( $lines->[0] =~ /^(\s*)./ ) {\r
324 my $indent2 = length("$1");\r
325 if ( $indent->[-1] >= $indent2 ) {\r
326 # Null hash entry\r
327 $hash->{$key} = undef;\r
328 } else {\r
329 $hash->{$key} = {};\r
330 _hash( $hash->{$key}, [ @$indent, length($1) ], $lines );\r
331 }\r
332 }\r
333 }\r
334 }\r
335\r
336 return 1;\r
337}\r
338\r
3391;\r
340\r
341__END__\r
342\r
343=pod\r
344\r
345=head1 NAME\r
346\r
347Parse::CPAN::Meta - Parse META.yml and other similar CPAN metadata files\r
348\r
349=head1 SYNOPSIS\r
350\r
351 #############################################\r
352 # In your file\r
353 \r
354 ---\r
355 rootproperty: blah\r
356 section:\r
357 one: two\r
358 three: four\r
359 Foo: Bar\r
360 empty: ~\r
361 \r
362 \r
363 \r
364 #############################################\r
365 # In your program\r
366 \r
367 use Parse::CPAN::Meta;\r
368 \r
369 # Create a YAML file\r
370 my @yaml = Parse::CPAN::Meta::LoadFile( 'Meta.yml' );\r
371 \r
372 # Reading properties\r
373 my $root = $yaml[0]->{rootproperty};\r
374 my $one = $yaml[0]->{section}->{one};\r
375 my $Foo = $yaml[0]->{section}->{Foo};\r
376\r
377=head1 DESCRIPTION\r
378\r
379B<Parse::CPAN::Meta> is a parser for F<META.yml> files, based on the\r
380parser half of L<YAML::Tiny>.\r
381\r
382It supports a basic subset of the full YAML specification, enough to\r
383implement parsing of typical F<META.yml> files, and other similarly simple\r
384YAML files.\r
385\r
386If you need something with more power, move up to a full YAML parser such\r
387as L<YAML>, L<YAML::Syck> or L<YAML::LibYAML>.\r
388\r
389B<Parse::CPAN::Meta> provides a very simply API of only two functions,\r
390based on the YAML functions of the same name. Wherever possible,\r
391identical calling semantics are used.\r
392\r
393All error reporting is done with exceptions (die'ing).\r
394\r
395=head1 FUNCTIONS\r
396\r
397For maintenance clarity, no functions are exported.\r
398\r
399=head2 Load\r
400\r
401 my @yaml = Load( $string );\r
402\r
403Parses a string containing a valid YAML stream into a list of Perl data\r
404structures.\r
405\r
406=head2 LoadFile\r
407\r
408 my @yaml = LoadFile( 'META.yml' );\r
409\r
410Reads the YAML stream from a file instead of a string.\r
411\r
412=head1 SUPPORT\r
413\r
414Bugs should be reported via the CPAN bug tracker at\r
415\r
416L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Parse-CPAN-Meta>\r
417\r
418=head1 AUTHOR\r
419\r
420Adam Kennedy E<lt>adamk@cpan.orgE<gt>\r
421\r
422=head1 SEE ALSO\r
423\r
424L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>,\r
425L<http://use.perl.org/~Alias/journal/29427>, L<http://ali.as/>\r
426\r
427=head1 COPYRIGHT\r
428\r
429Copyright 2006 - 2009 Adam Kennedy.\r
430\r
431This program is free software; you can redistribute\r
432it and/or modify it under the same terms as Perl itself.\r
433\r
434The full text of the license can be found in the\r
435LICENSE file included with this module.\r
436\r
437=cut\r