Commit | Line | Data |
de044c36 |
1 | package Parse::CPAN::Meta;\r |
2 | \r |
3 | use strict;\r |
4 | use Carp 'croak';\r |
5 | \r |
6 | # UTF Support?\r |
7 | sub HAVE_UTF8 () { $] >= 5.007003 }\r |
8 | BEGIN {\r |
9 | if ( HAVE_UTF8 ) {\r |
10 | # The string eval helps hide this from Test::MinimumVersion\r |
11 | eval "require utf8;";\r |
12 | die "Failed to load UTF-8 support" if $@;\r |
13 | }\r |
14 | \r |
15 | # Class structure\r |
16 | require 5.004;\r |
17 | require Exporter;\r |
18 | $Parse::CPAN::Meta::VERSION = '1.38';\r |
19 | @Parse::CPAN::Meta::ISA = qw{ Exporter };\r |
20 | @Parse::CPAN::Meta::EXPORT_OK = qw{ Load LoadFile };\r |
21 | }\r |
22 | \r |
23 | # Prototypes\r |
24 | sub LoadFile ($);\r |
25 | sub Load ($);\r |
26 | sub _scalar ($$$);\r |
27 | sub _array ($$$);\r |
28 | sub _hash ($$$);\r |
29 | \r |
30 | # Printable characters for escapes\r |
31 | my %UNESCAPES = (\r |
32 | z => "\x00", a => "\x07", t => "\x09",\r |
33 | n => "\x0a", v => "\x0b", f => "\x0c",\r |
34 | r => "\x0d", e => "\x1b", '\\' => '\\',\r |
35 | );\r |
36 | \r |
37 | \r |
38 | \r |
39 | \r |
40 | \r |
41 | #####################################################################\r |
42 | # Implementation\r |
43 | \r |
44 | # Create an object from a file\r |
45 | sub LoadFile ($) {\r |
46 | # Check the file\r |
47 | my $file = shift;\r |
48 | croak('You did not specify a file name') unless $file;\r |
49 | croak( "File '$file' does not exist" ) unless -e $file;\r |
50 | croak( "'$file' is a directory, not a file" ) unless -f _;\r |
51 | croak( "Insufficient permissions to read '$file'" ) unless -r _;\r |
52 | \r |
53 | # Slurp in the file\r |
54 | local $/ = undef;\r |
55 | local *CFG;\r |
56 | unless ( open( CFG, $file ) ) {\r |
57 | croak("Failed to open file '$file': $!");\r |
58 | }\r |
59 | my $yaml = <CFG>;\r |
60 | unless ( close(CFG) ) {\r |
61 | croak("Failed to close file '$file': $!");\r |
62 | }\r |
63 | \r |
64 | # Hand off to the actual parser\r |
65 | Load( $yaml );\r |
66 | }\r |
67 | \r |
68 | # Parse a document from a string.\r |
69 | # Doing checks on $_[0] prevents us having to do a string copy.\r |
70 | sub Load ($) {\r |
71 | my $string = $_[0];\r |
72 | unless ( defined $string ) {\r |
73 | croak("Did not provide a string to load");\r |
74 | }\r |
75 | \r |
76 | # Byte order marks\r |
77 | if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {\r |
78 | croak("Stream has a non UTF-8 Unicode Byte Order Mark");\r |
79 | } else {\r |
80 | # Strip UTF-8 bom if found, we'll just ignore it\r |
81 | $string =~ s/^\357\273\277//;\r |
82 | }\r |
83 | \r |
84 | # Try to decode as utf8\r |
85 | utf8::decode($string) if HAVE_UTF8;\r |
86 | \r |
87 | # Check for some special cases\r |
88 | return () unless length $string;\r |
89 | unless ( $string =~ /[\012\015]+\z/ ) {\r |
90 | croak("Stream does not end with newline character");\r |
91 | }\r |
92 | \r |
93 | # Split the file into lines\r |
94 | my @lines = grep { ! /^\s*(?:\#.*)?\z/ }\r |
95 | split /(?:\015{1,2}\012|\015|\012)/, $string;\r |
96 | \r |
97 | # Strip the initial YAML header\r |
98 | @lines and $lines[0] =~ /^\%YAML[: ][\d\.]+.*\z/ and shift @lines;\r |
99 | \r |
100 | # A nibbling parser\r |
101 | my @documents = ();\r |
102 | while ( @lines ) {\r |
103 | # Do we have a document header?\r |
104 | if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?\z/ ) {\r |
105 | # Handle scalar documents\r |
106 | shift @lines;\r |
107 | if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML[: ][\d\.]+)\z/ ) {\r |
108 | push @documents, _scalar( "$1", [ undef ], \@lines );\r |
109 | next;\r |
110 | }\r |
111 | }\r |
112 | \r |
113 | if ( ! @lines or $lines[0] =~ /^(?:---|\.\.\.)/ ) {\r |
114 | # A naked document\r |
115 | push @documents, undef;\r |
116 | while ( @lines and $lines[0] !~ /^---/ ) {\r |
117 | shift @lines;\r |
118 | }\r |
119 | \r |
120 | } elsif ( $lines[0] =~ /^\s*\-/ ) {\r |
121 | # An array at the root\r |
122 | my $document = [ ];\r |
123 | push @documents, $document;\r |
124 | _array( $document, [ 0 ], \@lines );\r |
125 | \r |
126 | } elsif ( $lines[0] =~ /^(\s*)\S/ ) {\r |
127 | # A hash at the root\r |
128 | my $document = { };\r |
129 | push @documents, $document;\r |
130 | _hash( $document, [ length($1) ], \@lines );\r |
131 | \r |
132 | } else {\r |
133 | croak("Parse::CPAN::Meta failed to classify line '$lines[0]'");\r |
134 | }\r |
135 | }\r |
136 | \r |
137 | if ( wantarray ) {\r |
138 | return @documents;\r |
139 | } else {\r |
140 | return $documents[-1];\r |
141 | }\r |
142 | }\r |
143 | \r |
144 | # Deparse a scalar string to the actual scalar\r |
145 | sub _scalar ($$$) {\r |
146 | my ($string, $indent, $lines) = @_;\r |
147 | \r |
148 | # Trim trailing whitespace\r |
149 | $string =~ s/\s*\z//;\r |
150 | \r |
151 | # Explitic null/undef\r |
152 | return undef if $string eq '~';\r |
153 | \r |
154 | # Quotes\r |
155 | if ( $string =~ /^\'(.*?)\'\z/ ) {\r |
156 | return '' unless defined $1;\r |
157 | $string = $1;\r |
158 | $string =~ s/\'\'/\'/g;\r |
159 | return $string;\r |
160 | }\r |
161 | if ( $string =~ /^\"((?:\\.|[^\"])*)\"\z/ ) {\r |
162 | # Reusing the variable is a little ugly,\r |
163 | # but avoids a new variable and a string copy.\r |
164 | $string = $1;\r |
165 | $string =~ s/\\"/"/g;\r |
166 | $string =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex;\r |
167 | return $string;\r |
168 | }\r |
169 | \r |
170 | # Special cases\r |
171 | if ( $string =~ /^[\'\"!&]/ ) {\r |
172 | croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'");\r |
173 | }\r |
174 | return {} if $string eq '{}';\r |
175 | return [] if $string eq '[]';\r |
176 | \r |
177 | # Regular unquoted string\r |
178 | return $string unless $string =~ /^[>|]/;\r |
179 | \r |
180 | # Error\r |
181 | croak("Parse::CPAN::Meta failed to find multi-line scalar content") unless @$lines;\r |
182 | \r |
183 | # Check the indent depth\r |
184 | $lines->[0] =~ /^(\s*)/;\r |
185 | $indent->[-1] = length("$1");\r |
186 | if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) {\r |
187 | croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");\r |
188 | }\r |
189 | \r |
190 | # Pull the lines\r |
191 | my @multiline = ();\r |
192 | while ( @$lines ) {\r |
193 | $lines->[0] =~ /^(\s*)/;\r |
194 | last unless length($1) >= $indent->[-1];\r |
195 | push @multiline, substr(shift(@$lines), length($1));\r |
196 | }\r |
197 | \r |
198 | my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n";\r |
199 | my $t = (substr($string, 1, 1) eq '-') ? '' : "\n";\r |
200 | return join( $j, @multiline ) . $t;\r |
201 | }\r |
202 | \r |
203 | # Parse an array\r |
204 | sub _array ($$$) {\r |
205 | my ($array, $indent, $lines) = @_;\r |
206 | \r |
207 | while ( @$lines ) {\r |
208 | # Check for a new document\r |
209 | if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {\r |
210 | while ( @$lines and $lines->[0] !~ /^---/ ) {\r |
211 | shift @$lines;\r |
212 | }\r |
213 | return 1;\r |
214 | }\r |
215 | \r |
216 | # Check the indent level\r |
217 | $lines->[0] =~ /^(\s*)/;\r |
218 | if ( length($1) < $indent->[-1] ) {\r |
219 | return 1;\r |
220 | } elsif ( length($1) > $indent->[-1] ) {\r |
221 | croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");\r |
222 | }\r |
223 | \r |
224 | if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) {\r |
225 | # Inline nested hash\r |
226 | my $indent2 = length("$1");\r |
227 | $lines->[0] =~ s/-/ /;\r |
228 | push @$array, { };\r |
229 | _hash( $array->[-1], [ @$indent, $indent2 ], $lines );\r |
230 | \r |
231 | } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*\z/ ) {\r |
232 | # Array entry with a value\r |
233 | shift @$lines;\r |
234 | push @$array, _scalar( "$2", [ @$indent, undef ], $lines );\r |
235 | \r |
236 | } elsif ( $lines->[0] =~ /^\s*\-\s*\z/ ) {\r |
237 | shift @$lines;\r |
238 | unless ( @$lines ) {\r |
239 | push @$array, undef;\r |
240 | return 1;\r |
241 | }\r |
242 | if ( $lines->[0] =~ /^(\s*)\-/ ) {\r |
243 | my $indent2 = length("$1");\r |
244 | if ( $indent->[-1] == $indent2 ) {\r |
245 | # Null array entry\r |
246 | push @$array, undef;\r |
247 | } else {\r |
248 | # Naked indenter\r |
249 | push @$array, [ ];\r |
250 | _array( $array->[-1], [ @$indent, $indent2 ], $lines );\r |
251 | }\r |
252 | \r |
253 | } elsif ( $lines->[0] =~ /^(\s*)\S/ ) {\r |
254 | push @$array, { };\r |
255 | _hash( $array->[-1], [ @$indent, length("$1") ], $lines );\r |
256 | \r |
257 | } else {\r |
258 | croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");\r |
259 | }\r |
260 | \r |
261 | } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) {\r |
262 | # This is probably a structure like the following...\r |
263 | # ---\r |
264 | # foo:\r |
265 | # - list\r |
266 | # bar: value\r |
267 | #\r |
268 | # ... so lets return and let the hash parser handle it\r |
269 | return 1;\r |
270 | \r |
271 | } else {\r |
272 | croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");\r |
273 | }\r |
274 | }\r |
275 | \r |
276 | return 1;\r |
277 | }\r |
278 | \r |
279 | # Parse an array\r |
280 | sub _hash ($$$) {\r |
281 | my ($hash, $indent, $lines) = @_;\r |
282 | \r |
283 | while ( @$lines ) {\r |
284 | # Check for a new document\r |
285 | if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {\r |
286 | while ( @$lines and $lines->[0] !~ /^---/ ) {\r |
287 | shift @$lines;\r |
288 | }\r |
289 | return 1;\r |
290 | }\r |
291 | \r |
292 | # Check the indent level\r |
293 | $lines->[0] =~ /^(\s*)/;\r |
294 | if ( length($1) < $indent->[-1] ) {\r |
295 | return 1;\r |
296 | } elsif ( length($1) > $indent->[-1] ) {\r |
297 | croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");\r |
298 | }\r |
299 | \r |
300 | # Get the key\r |
301 | unless ( $lines->[0] =~ s/^\s*([^\'\" ][^\n]*?)\s*:(\s+|$)// ) {\r |
302 | if ( $lines->[0] =~ /^\s*[?\'\"]/ ) {\r |
303 | croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'");\r |
304 | }\r |
305 | croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");\r |
306 | }\r |
307 | my $key = $1;\r |
308 | \r |
309 | # Do we have a value?\r |
310 | if ( length $lines->[0] ) {\r |
311 | # Yes\r |
312 | $hash->{$key} = _scalar( shift(@$lines), [ @$indent, undef ], $lines );\r |
313 | } else {\r |
314 | # An indent\r |
315 | shift @$lines;\r |
316 | unless ( @$lines ) {\r |
317 | $hash->{$key} = undef;\r |
318 | return 1;\r |
319 | }\r |
320 | if ( $lines->[0] =~ /^(\s*)-/ ) {\r |
321 | $hash->{$key} = [];\r |
322 | _array( $hash->{$key}, [ @$indent, length($1) ], $lines );\r |
323 | } elsif ( $lines->[0] =~ /^(\s*)./ ) {\r |
324 | my $indent2 = length("$1");\r |
325 | if ( $indent->[-1] >= $indent2 ) {\r |
326 | # Null hash entry\r |
327 | $hash->{$key} = undef;\r |
328 | } else {\r |
329 | $hash->{$key} = {};\r |
330 | _hash( $hash->{$key}, [ @$indent, length($1) ], $lines );\r |
331 | }\r |
332 | }\r |
333 | }\r |
334 | }\r |
335 | \r |
336 | return 1;\r |
337 | }\r |
338 | \r |
339 | 1;\r |
340 | \r |
341 | __END__\r |
342 | \r |
343 | =pod\r |
344 | \r |
345 | =head1 NAME\r |
346 | \r |
347 | Parse::CPAN::Meta - Parse META.yml and other similar CPAN metadata files\r |
348 | \r |
349 | =head1 SYNOPSIS\r |
350 | \r |
351 | #############################################\r |
352 | # In your file\r |
353 | \r |
354 | ---\r |
355 | rootproperty: blah\r |
356 | section:\r |
357 | one: two\r |
358 | three: four\r |
359 | Foo: Bar\r |
360 | empty: ~\r |
361 | \r |
362 | \r |
363 | \r |
364 | #############################################\r |
365 | # In your program\r |
366 | \r |
367 | use Parse::CPAN::Meta;\r |
368 | \r |
369 | # Create a YAML file\r |
370 | my @yaml = Parse::CPAN::Meta::LoadFile( 'Meta.yml' );\r |
371 | \r |
372 | # Reading properties\r |
373 | my $root = $yaml[0]->{rootproperty};\r |
374 | my $one = $yaml[0]->{section}->{one};\r |
375 | my $Foo = $yaml[0]->{section}->{Foo};\r |
376 | \r |
377 | =head1 DESCRIPTION\r |
378 | \r |
379 | B<Parse::CPAN::Meta> is a parser for F<META.yml> files, based on the\r |
380 | parser half of L<YAML::Tiny>.\r |
381 | \r |
382 | It supports a basic subset of the full YAML specification, enough to\r |
383 | implement parsing of typical F<META.yml> files, and other similarly simple\r |
384 | YAML files.\r |
385 | \r |
386 | If you need something with more power, move up to a full YAML parser such\r |
387 | as L<YAML>, L<YAML::Syck> or L<YAML::LibYAML>.\r |
388 | \r |
389 | B<Parse::CPAN::Meta> provides a very simply API of only two functions,\r |
390 | based on the YAML functions of the same name. Wherever possible,\r |
391 | identical calling semantics are used.\r |
392 | \r |
393 | All error reporting is done with exceptions (die'ing).\r |
394 | \r |
395 | =head1 FUNCTIONS\r |
396 | \r |
397 | For maintenance clarity, no functions are exported.\r |
398 | \r |
399 | =head2 Load\r |
400 | \r |
401 | my @yaml = Load( $string );\r |
402 | \r |
403 | Parses a string containing a valid YAML stream into a list of Perl data\r |
404 | structures.\r |
405 | \r |
406 | =head2 LoadFile\r |
407 | \r |
408 | my @yaml = LoadFile( 'META.yml' );\r |
409 | \r |
410 | Reads the YAML stream from a file instead of a string.\r |
411 | \r |
412 | =head1 SUPPORT\r |
413 | \r |
414 | Bugs should be reported via the CPAN bug tracker at\r |
415 | \r |
416 | L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Parse-CPAN-Meta>\r |
417 | \r |
418 | =head1 AUTHOR\r |
419 | \r |
420 | Adam Kennedy E<lt>adamk@cpan.orgE<gt>\r |
421 | \r |
422 | =head1 SEE ALSO\r |
423 | \r |
424 | L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>,\r |
425 | L<http://use.perl.org/~Alias/journal/29427>, L<http://ali.as/>\r |
426 | \r |
427 | =head1 COPYRIGHT\r |
428 | \r |
429 | Copyright 2006 - 2009 Adam Kennedy.\r |
430 | \r |
431 | This program is free software; you can redistribute\r |
432 | it and/or modify it under the same terms as Perl itself.\r |
433 | \r |
434 | The full text of the license can be found in the\r |
435 | LICENSE file included with this module.\r |
436 | \r |
437 | =cut\r |