Commit | Line | Data |
35df902d |
1 | package Parse::CPAN::Meta; |
2 | |
3 | use strict; |
4 | use Carp 'croak'; |
5 | |
6 | # UTF Support? |
7 | sub HAVE_UTF8 () { $] >= 5.007003 } |
8 | BEGIN { |
9 | if ( HAVE_UTF8 ) { |
10 | # The string eval helps hide this from Test::MinimumVersion |
11 | eval "require utf8;"; |
12 | die "Failed to load UTF-8 support" if $@; |
13 | } |
14 | |
15 | # Class structure |
16 | require 5.004; |
17 | require Exporter; |
c82fc722 |
18 | $Parse::CPAN::Meta::VERSION = '1.40'; |
35df902d |
19 | @Parse::CPAN::Meta::ISA = qw{ Exporter }; |
20 | @Parse::CPAN::Meta::EXPORT_OK = qw{ Load LoadFile }; |
21 | } |
22 | |
23 | # Prototypes |
24 | sub LoadFile ($); |
25 | sub Load ($); |
26 | sub _scalar ($$$); |
27 | sub _array ($$$); |
28 | sub _hash ($$$); |
29 | |
30 | # Printable characters for escapes |
31 | my %UNESCAPES = ( |
32 | z => "\x00", a => "\x07", t => "\x09", |
33 | n => "\x0a", v => "\x0b", f => "\x0c", |
34 | r => "\x0d", e => "\x1b", '\\' => '\\', |
35 | ); |
36 | |
37 | |
38 | |
39 | |
40 | |
41 | ##################################################################### |
42 | # Implementation |
43 | |
44 | # Create an object from a file |
45 | sub LoadFile ($) { |
46 | # Check the file |
47 | my $file = shift; |
48 | croak('You did not specify a file name') unless $file; |
49 | croak( "File '$file' does not exist" ) unless -e $file; |
50 | croak( "'$file' is a directory, not a file" ) unless -f _; |
51 | croak( "Insufficient permissions to read '$file'" ) unless -r _; |
52 | |
53 | # Slurp in the file |
54 | local $/ = undef; |
55 | local *CFG; |
56 | unless ( open( CFG, $file ) ) { |
57 | croak("Failed to open file '$file': $!"); |
58 | } |
59 | my $yaml = <CFG>; |
60 | unless ( close(CFG) ) { |
61 | croak("Failed to close file '$file': $!"); |
62 | } |
63 | |
64 | # Hand off to the actual parser |
65 | Load( $yaml ); |
66 | } |
67 | |
68 | # Parse a document from a string. |
69 | # Doing checks on $_[0] prevents us having to do a string copy. |
70 | sub Load ($) { |
71 | my $string = $_[0]; |
72 | unless ( defined $string ) { |
73 | croak("Did not provide a string to load"); |
74 | } |
75 | |
76 | # Byte order marks |
77 | if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) { |
78 | croak("Stream has a non UTF-8 Unicode Byte Order Mark"); |
79 | } else { |
80 | # Strip UTF-8 bom if found, we'll just ignore it |
81 | $string =~ s/^\357\273\277//; |
82 | } |
83 | |
84 | # Try to decode as utf8 |
85 | utf8::decode($string) if HAVE_UTF8; |
86 | |
87 | # Check for some special cases |
88 | return () unless length $string; |
89 | unless ( $string =~ /[\012\015]+\z/ ) { |
90 | croak("Stream does not end with newline character"); |
91 | } |
92 | |
93 | # Split the file into lines |
94 | my @lines = grep { ! /^\s*(?:\#.*)?\z/ } |
95 | split /(?:\015{1,2}\012|\015|\012)/, $string; |
96 | |
97 | # Strip the initial YAML header |
98 | @lines and $lines[0] =~ /^\%YAML[: ][\d\.]+.*\z/ and shift @lines; |
99 | |
100 | # A nibbling parser |
101 | my @documents = (); |
102 | while ( @lines ) { |
103 | # Do we have a document header? |
104 | if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?\z/ ) { |
105 | # Handle scalar documents |
106 | shift @lines; |
107 | if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML[: ][\d\.]+)\z/ ) { |
108 | push @documents, _scalar( "$1", [ undef ], \@lines ); |
109 | next; |
110 | } |
111 | } |
112 | |
113 | if ( ! @lines or $lines[0] =~ /^(?:---|\.\.\.)/ ) { |
114 | # A naked document |
115 | push @documents, undef; |
116 | while ( @lines and $lines[0] !~ /^---/ ) { |
117 | shift @lines; |
118 | } |
119 | |
120 | } elsif ( $lines[0] =~ /^\s*\-/ ) { |
121 | # An array at the root |
122 | my $document = [ ]; |
123 | push @documents, $document; |
124 | _array( $document, [ 0 ], \@lines ); |
125 | |
126 | } elsif ( $lines[0] =~ /^(\s*)\S/ ) { |
127 | # A hash at the root |
128 | my $document = { }; |
129 | push @documents, $document; |
130 | _hash( $document, [ length($1) ], \@lines ); |
131 | |
132 | } else { |
133 | croak("Parse::CPAN::Meta failed to classify line '$lines[0]'"); |
134 | } |
135 | } |
136 | |
137 | if ( wantarray ) { |
138 | return @documents; |
139 | } else { |
140 | return $documents[-1]; |
141 | } |
142 | } |
143 | |
144 | # Deparse a scalar string to the actual scalar |
145 | sub _scalar ($$$) { |
146 | my ($string, $indent, $lines) = @_; |
147 | |
148 | # Trim trailing whitespace |
149 | $string =~ s/\s*\z//; |
150 | |
151 | # Explitic null/undef |
152 | return undef if $string eq '~'; |
153 | |
154 | # Quotes |
155 | if ( $string =~ /^\'(.*?)\'\z/ ) { |
156 | return '' unless defined $1; |
157 | $string = $1; |
158 | $string =~ s/\'\'/\'/g; |
159 | return $string; |
160 | } |
161 | if ( $string =~ /^\"((?:\\.|[^\"])*)\"\z/ ) { |
162 | # Reusing the variable is a little ugly, |
163 | # but avoids a new variable and a string copy. |
164 | $string = $1; |
165 | $string =~ s/\\"/"/g; |
166 | $string =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex; |
167 | return $string; |
168 | } |
169 | |
170 | # Special cases |
171 | if ( $string =~ /^[\'\"!&]/ ) { |
172 | croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'"); |
173 | } |
174 | return {} if $string eq '{}'; |
175 | return [] if $string eq '[]'; |
176 | |
177 | # Regular unquoted string |
178 | return $string unless $string =~ /^[>|]/; |
179 | |
180 | # Error |
181 | croak("Parse::CPAN::Meta failed to find multi-line scalar content") unless @$lines; |
182 | |
183 | # Check the indent depth |
184 | $lines->[0] =~ /^(\s*)/; |
185 | $indent->[-1] = length("$1"); |
186 | if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) { |
187 | croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'"); |
188 | } |
189 | |
190 | # Pull the lines |
191 | my @multiline = (); |
192 | while ( @$lines ) { |
193 | $lines->[0] =~ /^(\s*)/; |
194 | last unless length($1) >= $indent->[-1]; |
195 | push @multiline, substr(shift(@$lines), length($1)); |
196 | } |
197 | |
198 | my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n"; |
199 | my $t = (substr($string, 1, 1) eq '-') ? '' : "\n"; |
200 | return join( $j, @multiline ) . $t; |
201 | } |
202 | |
203 | # Parse an array |
204 | sub _array ($$$) { |
205 | my ($array, $indent, $lines) = @_; |
206 | |
207 | while ( @$lines ) { |
208 | # Check for a new document |
209 | if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) { |
210 | while ( @$lines and $lines->[0] !~ /^---/ ) { |
211 | shift @$lines; |
212 | } |
213 | return 1; |
214 | } |
215 | |
216 | # Check the indent level |
217 | $lines->[0] =~ /^(\s*)/; |
218 | if ( length($1) < $indent->[-1] ) { |
219 | return 1; |
220 | } elsif ( length($1) > $indent->[-1] ) { |
221 | croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'"); |
222 | } |
223 | |
224 | if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) { |
225 | # Inline nested hash |
226 | my $indent2 = length("$1"); |
227 | $lines->[0] =~ s/-/ /; |
228 | push @$array, { }; |
229 | _hash( $array->[-1], [ @$indent, $indent2 ], $lines ); |
230 | |
231 | } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*\z/ ) { |
232 | # Array entry with a value |
233 | shift @$lines; |
234 | push @$array, _scalar( "$2", [ @$indent, undef ], $lines ); |
235 | |
236 | } elsif ( $lines->[0] =~ /^\s*\-\s*\z/ ) { |
237 | shift @$lines; |
238 | unless ( @$lines ) { |
239 | push @$array, undef; |
240 | return 1; |
241 | } |
242 | if ( $lines->[0] =~ /^(\s*)\-/ ) { |
243 | my $indent2 = length("$1"); |
244 | if ( $indent->[-1] == $indent2 ) { |
245 | # Null array entry |
246 | push @$array, undef; |
247 | } else { |
248 | # Naked indenter |
249 | push @$array, [ ]; |
250 | _array( $array->[-1], [ @$indent, $indent2 ], $lines ); |
251 | } |
252 | |
253 | } elsif ( $lines->[0] =~ /^(\s*)\S/ ) { |
254 | push @$array, { }; |
255 | _hash( $array->[-1], [ @$indent, length("$1") ], $lines ); |
256 | |
257 | } else { |
258 | croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'"); |
259 | } |
260 | |
261 | } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) { |
262 | # This is probably a structure like the following... |
263 | # --- |
264 | # foo: |
265 | # - list |
266 | # bar: value |
267 | # |
268 | # ... so lets return and let the hash parser handle it |
269 | return 1; |
270 | |
271 | } else { |
272 | croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'"); |
273 | } |
274 | } |
275 | |
276 | return 1; |
277 | } |
278 | |
279 | # Parse an array |
280 | sub _hash ($$$) { |
281 | my ($hash, $indent, $lines) = @_; |
282 | |
283 | while ( @$lines ) { |
284 | # Check for a new document |
285 | if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) { |
286 | while ( @$lines and $lines->[0] !~ /^---/ ) { |
287 | shift @$lines; |
288 | } |
289 | return 1; |
290 | } |
291 | |
292 | # Check the indent level |
293 | $lines->[0] =~ /^(\s*)/; |
294 | if ( length($1) < $indent->[-1] ) { |
295 | return 1; |
296 | } elsif ( length($1) > $indent->[-1] ) { |
297 | croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'"); |
298 | } |
299 | |
300 | # Get the key |
301 | unless ( $lines->[0] =~ s/^\s*([^\'\" ][^\n]*?)\s*:(\s+|$)// ) { |
302 | if ( $lines->[0] =~ /^\s*[?\'\"]/ ) { |
303 | croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'"); |
304 | } |
305 | croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'"); |
306 | } |
307 | my $key = $1; |
308 | |
309 | # Do we have a value? |
310 | if ( length $lines->[0] ) { |
311 | # Yes |
312 | $hash->{$key} = _scalar( shift(@$lines), [ @$indent, undef ], $lines ); |
313 | } else { |
314 | # An indent |
315 | shift @$lines; |
316 | unless ( @$lines ) { |
317 | $hash->{$key} = undef; |
318 | return 1; |
319 | } |
320 | if ( $lines->[0] =~ /^(\s*)-/ ) { |
321 | $hash->{$key} = []; |
322 | _array( $hash->{$key}, [ @$indent, length($1) ], $lines ); |
323 | } elsif ( $lines->[0] =~ /^(\s*)./ ) { |
324 | my $indent2 = length("$1"); |
325 | if ( $indent->[-1] >= $indent2 ) { |
326 | # Null hash entry |
327 | $hash->{$key} = undef; |
328 | } else { |
329 | $hash->{$key} = {}; |
330 | _hash( $hash->{$key}, [ @$indent, length($1) ], $lines ); |
331 | } |
332 | } |
333 | } |
334 | } |
335 | |
336 | return 1; |
337 | } |
338 | |
339 | 1; |
340 | |
341 | __END__ |
342 | |
343 | =pod |
344 | |
345 | =head1 NAME |
346 | |
347 | Parse::CPAN::Meta - Parse META.yml and other similar CPAN metadata files |
348 | |
349 | =head1 SYNOPSIS |
350 | |
351 | ############################################# |
352 | # In your file |
353 | |
354 | --- |
355 | rootproperty: blah |
356 | section: |
357 | one: two |
358 | three: four |
359 | Foo: Bar |
360 | empty: ~ |
361 | |
362 | |
363 | |
364 | ############################################# |
365 | # In your program |
366 | |
367 | use Parse::CPAN::Meta; |
368 | |
369 | # Create a YAML file |
370 | my @yaml = Parse::CPAN::Meta::LoadFile( 'Meta.yml' ); |
371 | |
372 | # Reading properties |
373 | my $root = $yaml[0]->{rootproperty}; |
374 | my $one = $yaml[0]->{section}->{one}; |
375 | my $Foo = $yaml[0]->{section}->{Foo}; |
376 | |
377 | =head1 DESCRIPTION |
378 | |
379 | B<Parse::CPAN::Meta> is a parser for F<META.yml> files, based on the |
380 | parser half of L<YAML::Tiny>. |
381 | |
382 | It supports a basic subset of the full YAML specification, enough to |
383 | implement parsing of typical F<META.yml> files, and other similarly simple |
384 | YAML files. |
385 | |
386 | If you need something with more power, move up to a full YAML parser such |
387 | as L<YAML>, L<YAML::Syck> or L<YAML::LibYAML>. |
388 | |
389 | B<Parse::CPAN::Meta> provides a very simply API of only two functions, |
390 | based on the YAML functions of the same name. Wherever possible, |
391 | identical calling semantics are used. |
392 | |
393 | All error reporting is done with exceptions (die'ing). |
394 | |
395 | =head1 FUNCTIONS |
396 | |
397 | For maintenance clarity, no functions are exported. |
398 | |
399 | =head2 Load |
400 | |
401 | my @yaml = Load( $string ); |
402 | |
403 | Parses a string containing a valid YAML stream into a list of Perl data |
404 | structures. |
405 | |
406 | =head2 LoadFile |
407 | |
408 | my @yaml = LoadFile( 'META.yml' ); |
409 | |
410 | Reads the YAML stream from a file instead of a string. |
411 | |
412 | =head1 SUPPORT |
413 | |
414 | Bugs should be reported via the CPAN bug tracker at |
415 | |
416 | L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Parse-CPAN-Meta> |
417 | |
418 | =head1 AUTHOR |
419 | |
420 | Adam Kennedy E<lt>adamk@cpan.orgE<gt> |
421 | |
422 | =head1 SEE ALSO |
423 | |
424 | L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>, |
425 | L<http://use.perl.org/~Alias/journal/29427>, L<http://ali.as/> |
426 | |
427 | =head1 COPYRIGHT |
428 | |
429 | Copyright 2006 - 2009 Adam Kennedy. |
430 | |
431 | This program is free software; you can redistribute |
432 | it and/or modify it under the same terms as Perl itself. |
433 | |
434 | The full text of the license can be found in the |
435 | LICENSE file included with this module. |
436 | |
437 | =cut |