Commit | Line | Data |
be96f5c3 |
1 | package Parse::CPAN::Meta; |
2 | |
3 | use strict; |
4 | use Carp 'croak'; |
5 | BEGIN { |
6 | require 5.004; |
7 | require Exporter; |
c59d1bfa |
8 | $Parse::CPAN::Meta::VERSION = '0.04_01'; |
be96f5c3 |
9 | @Parse::CPAN::Meta::ISA = qw{ Exporter }; |
10 | @Parse::CPAN::Meta::EXPORT_OK = qw{ Load LoadFile }; |
11 | } |
12 | |
13 | # Prototypes |
14 | sub LoadFile ($); |
15 | sub Load ($); |
16 | sub _scalar ($$$); |
17 | sub _array ($$$); |
18 | sub _hash ($$$); |
19 | |
20 | # Printable characters for escapes |
21 | my %UNESCAPES = ( |
22 | z => "\x00", a => "\x07", t => "\x09", |
23 | n => "\x0a", v => "\x0b", f => "\x0c", |
24 | r => "\x0d", e => "\x1b", '\\' => '\\', |
25 | ); |
26 | |
27 | |
c59d1bfa |
28 | my %BOM = ( |
29 | "\357\273\277" => 'UTF-8', |
30 | "\376\377" => 'UTF-16BE', |
31 | "\377\376" => 'UTF-16LE', |
32 | "\0\0\376\377" => 'UTF-32BE', |
33 | "\377\376\0\0" => 'UTF-32LE' |
34 | ); |
35 | |
36 | sub BOM_MIN_LENGTH () { 2 } |
37 | sub BOM_MAX_LENGTH () { 4 } |
38 | sub HAVE_UTF8 () { $] >= 5.007003 } |
39 | |
40 | BEGIN { require utf8 if HAVE_UTF8 } |
be96f5c3 |
41 | |
42 | |
43 | ##################################################################### |
44 | # Implementation |
45 | |
46 | # Create an object from a file |
47 | sub LoadFile ($) { |
48 | # Check the file |
49 | my $file = shift; |
50 | croak('You did not specify a file name') unless $file; |
51 | croak( "File '$file' does not exist" ) unless -e $file; |
52 | croak( "'$file' is a directory, not a file" ) unless -f _; |
53 | croak( "Insufficient permissions to read '$file'" ) unless -r _; |
54 | |
55 | # Slurp in the file |
56 | local $/ = undef; |
57 | open( CFG, $file ) or croak("Failed to open file '$file': $!"); |
58 | my $yaml = <CFG>; |
59 | close CFG or croak("Failed to close file '$file': $!"); |
60 | |
61 | # Hand off to the actual parser |
62 | Load( $yaml ); |
63 | } |
64 | |
65 | # Parse a document from a string. |
66 | # Doing checks on $_[0] prevents us having to do a string copy. |
67 | sub Load ($) { |
c59d1bfa |
68 | |
69 | my $str = $_[0]; |
70 | |
71 | # Handle special cases |
72 | foreach my $length ( BOM_MIN_LENGTH .. BOM_MAX_LENGTH ) { |
73 | if ( my $enc = $BOM{substr($str, 0, $length)} ) { |
74 | croak("Stream has a non UTF-8 BOM") unless $enc eq 'UTF-8'; |
75 | substr($str, 0, $length) = ''; # strip UTF-8 bom if found, we'll just ignore it |
76 | } |
77 | } |
78 | |
79 | if ( HAVE_UTF8 ) { |
80 | utf8::decode($str); # try to decode as utf8 |
81 | } |
82 | |
83 | unless ( defined $str ) { |
be96f5c3 |
84 | croak("Did not provide a string to Load"); |
85 | } |
c59d1bfa |
86 | return() unless length $str; |
87 | unless ( $str =~ /[\012\015]+$/ ) { |
be96f5c3 |
88 | croak("Stream does not end with newline character"); |
89 | } |
90 | |
91 | # Split the file into lines |
92 | my @lines = grep { ! /^\s*(?:\#.*)?$/ } |
c59d1bfa |
93 | split /(?:\015{1,2}\012|\015|\012)/, $str; |
be96f5c3 |
94 | |
95 | # A nibbling parser |
96 | my @documents = (); |
97 | while ( @lines ) { |
98 | # Do we have a document header? |
99 | if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?$/ ) { |
100 | # Handle scalar documents |
101 | shift @lines; |
102 | if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML:[\d\.]+)$/ ) { |
103 | push @documents, _scalar( "$1", [ undef ], \@lines ); |
104 | next; |
105 | } |
106 | } |
107 | |
108 | if ( ! @lines or $lines[0] =~ /^---\s*(?:(.+)\s*)?$/ ) { |
109 | # A naked document |
110 | push @documents, undef; |
111 | |
112 | } elsif ( $lines[0] =~ /^\s*\-/ ) { |
113 | # An array at the root |
114 | my $document = [ ]; |
115 | push @documents, $document; |
116 | _array( $document, [ 0 ], \@lines ); |
117 | |
118 | } elsif ( $lines[0] =~ /^(\s*)\w/ ) { |
119 | # A hash at the root |
120 | my $document = { }; |
121 | push @documents, $document; |
122 | _hash( $document, [ length($1) ], \@lines ); |
123 | |
124 | } else { |
125 | croak("Parse::CPAN::Meta does not support the line '$lines[0]'"); |
126 | } |
127 | } |
128 | |
129 | if ( wantarray ) { |
130 | return @documents; |
131 | } else { |
132 | return $documents[-1]; |
133 | } |
134 | } |
135 | |
136 | # Deparse a scalar string to the actual scalar |
137 | sub _scalar ($$$) { |
138 | my $string = shift; |
139 | my $indent = shift; |
140 | my $lines = shift; |
141 | |
142 | # Trim trailing whitespace |
143 | $string =~ s/\s*$//; |
144 | |
145 | # Explitic null/undef |
146 | return undef if $string eq '~'; |
147 | |
148 | # Quotes |
149 | if ( $string =~ /^\'(.*?)\'$/ ) { |
150 | return '' unless defined $1; |
151 | my $rv = $1; |
152 | $rv =~ s/\'\'/\'/g; |
153 | return $rv; |
154 | } |
155 | if ( $string =~ /^\"((?:\\.|[^\"])*)\"$/ ) { |
156 | my $str = $1; |
157 | $str =~ s/\\"/"/g; |
158 | $str =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex; |
159 | return $str; |
160 | } |
161 | if ( $string =~ /^[\'\"]/ ) { |
162 | # A quote with folding... we don't support that |
163 | croak("Parse::CPAN::Meta does not support multi-line quoted scalars"); |
164 | } |
165 | |
166 | # Null hash and array |
167 | if ( $string eq '{}' ) { |
168 | # Null hash |
169 | return {}; |
170 | } |
171 | if ( $string eq '[]' ) { |
172 | # Null array |
173 | return []; |
174 | } |
175 | |
176 | # Regular unquoted string |
177 | return $string unless $string =~ /^[>|]/; |
178 | |
179 | # Error |
180 | croak("Multi-line scalar content missing") unless @$lines; |
181 | |
182 | # Check the indent depth |
183 | $lines->[0] =~ /^(\s*)/; |
184 | $indent->[-1] = length("$1"); |
185 | if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) { |
186 | croak("Illegal line indenting"); |
187 | } |
188 | |
189 | # Pull the lines |
190 | my @multiline = (); |
191 | while ( @$lines ) { |
192 | $lines->[0] =~ /^(\s*)/; |
193 | last unless length($1) >= $indent->[-1]; |
194 | push @multiline, substr(shift(@$lines), length($1)); |
195 | } |
196 | |
197 | my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n"; |
198 | my $t = (substr($string, 1, 1) eq '-') ? '' : "\n"; |
199 | return join( $j, @multiline ) . $t; |
200 | } |
201 | |
202 | # Parse an array |
203 | sub _array ($$$) { |
204 | my $array = shift; |
205 | my $indent = shift; |
206 | my $lines = shift; |
207 | |
208 | while ( @$lines ) { |
209 | # Check for a new document |
210 | return 1 if $lines->[0] =~ /^---\s*(?:(.+)\s*)?$/; |
211 | |
212 | # Check the indent level |
213 | $lines->[0] =~ /^(\s*)/; |
214 | if ( length($1) < $indent->[-1] ) { |
215 | return 1; |
216 | } elsif ( length($1) > $indent->[-1] ) { |
217 | croak("Hash line over-indented"); |
218 | } |
219 | |
220 | if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) { |
221 | # Inline nested hash |
222 | my $indent2 = length("$1"); |
223 | $lines->[0] =~ s/-/ /; |
224 | push @$array, { }; |
225 | _hash( $array->[-1], [ @$indent, $indent2 ], $lines ); |
226 | |
227 | } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*$/ ) { |
228 | # Array entry with a value |
229 | shift @$lines; |
230 | push @$array, _scalar( "$2", [ @$indent, undef ], $lines ); |
231 | |
232 | } elsif ( $lines->[0] =~ /^\s*\-\s*$/ ) { |
233 | shift @$lines; |
234 | unless ( @$lines ) { |
235 | push @$array, undef; |
236 | return 1; |
237 | } |
238 | if ( $lines->[0] =~ /^(\s*)\-/ ) { |
239 | my $indent2 = length("$1"); |
240 | if ( $indent->[-1] == $indent2 ) { |
241 | # Null array entry |
242 | push @$array, undef; |
243 | } else { |
244 | # Naked indenter |
245 | push @$array, [ ]; |
246 | _array( $array->[-1], [ @$indent, $indent2 ], $lines ); |
247 | } |
248 | |
249 | } elsif ( $lines->[0] =~ /^(\s*)\w/ ) { |
250 | push @$array, { }; |
251 | _hash( $array->[-1], [ @$indent, length("$1") ], $lines ); |
252 | |
253 | } else { |
254 | croak("Parse::CPAN::Meta does not support the line '$lines->[0]'"); |
255 | } |
256 | |
257 | } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) { |
258 | # This is probably a structure like the following... |
259 | # --- |
260 | # foo: |
261 | # - list |
262 | # bar: value |
263 | # |
264 | # ... so lets return and let the hash parser handle it |
265 | return 1; |
266 | |
267 | } else { |
268 | croak("Parse::CPAN::Meta does not support the line '$lines->[0]'"); |
269 | } |
270 | } |
271 | |
272 | return 1; |
273 | } |
274 | |
275 | # Parse an array |
276 | sub _hash ($$$) { |
277 | my $hash = shift; |
278 | my $indent = shift; |
279 | my $lines = shift; |
280 | |
281 | while ( @$lines ) { |
282 | # Check for a new document |
283 | return 1 if $lines->[0] =~ /^---\s*(?:(.+)\s*)?$/; |
284 | |
285 | # Check the indent level |
286 | $lines->[0] =~/^(\s*)/; |
287 | if ( length($1) < $indent->[-1] ) { |
288 | return 1; |
289 | } elsif ( length($1) > $indent->[-1] ) { |
290 | croak("Hash line over-indented"); |
291 | } |
292 | |
293 | # Get the key |
294 | unless ( $lines->[0] =~ s/^\s*([^\'\"][^\n]*?)\s*:(\s+|$)// ) { |
295 | croak("Bad hash line"); |
296 | } |
297 | my $key = $1; |
298 | |
299 | # Do we have a value? |
300 | if ( length $lines->[0] ) { |
301 | # Yes |
302 | $hash->{$key} = _scalar( shift(@$lines), [ @$indent, undef ], $lines ); |
303 | next; |
304 | } |
305 | |
306 | # An indent |
307 | shift @$lines; |
308 | unless ( @$lines ) { |
309 | $hash->{$key} = undef; |
310 | return 1; |
311 | } |
312 | if ( $lines->[0] =~ /^(\s*)-/ ) { |
313 | $hash->{$key} = []; |
314 | _array( $hash->{$key}, [ @$indent, length($1) ], $lines ); |
315 | } elsif ( $lines->[0] =~ /^(\s*)./ ) { |
316 | my $indent2 = length("$1"); |
317 | if ( $indent->[-1] >= $indent2 ) { |
318 | # Null hash entry |
319 | $hash->{$key} = undef; |
320 | } else { |
321 | $hash->{$key} = {}; |
322 | _hash( $hash->{$key}, [ @$indent, length($1) ], $lines ); |
323 | } |
324 | } |
325 | } |
326 | |
327 | return 1; |
328 | } |
329 | |
330 | 1; |
331 | |
332 | __END__ |
333 | |
334 | =pod |
335 | |
336 | =head1 NAME |
337 | |
338 | Parse::CPAN::Meta - Parse META.yml and other similar CPAN metadata files |
339 | |
340 | =head1 SYNOPSIS |
341 | |
342 | ############################################# |
343 | # In your file |
344 | |
345 | --- |
346 | rootproperty: blah |
347 | section: |
348 | one: two |
349 | three: four |
350 | Foo: Bar |
351 | empty: ~ |
352 | |
353 | |
354 | |
355 | ############################################# |
356 | # In your program |
357 | |
358 | use Parse::CPAN::Meta; |
359 | |
360 | # Create a YAML file |
361 | my @yaml = Parse::CPAN::Meta::LoadFile( 'Meta.yml' ); |
362 | |
363 | # Reading properties |
364 | my $root = $yaml[0]->{rootproperty}; |
365 | my $one = $yaml[0]->{section}->{one}; |
366 | my $Foo = $yaml[0]->{section}->{Foo}; |
367 | |
368 | =head1 DESCRIPTION |
369 | |
370 | B<Parse::CPAN::Meta> is a parser for META.yml files, based on the |
371 | parser half of L<YAML::Tiny>. |
372 | |
373 | It supports a basic subset of the full YAML specification, enough to |
374 | implement parsing of typical META.yml files, and other similarly simple |
375 | YAML files. |
376 | |
377 | If you need something with more power, move up to a full YAML parser such |
378 | as L<YAML>, L<YAML::Syck> or L<YAML::LibYAML>. |
379 | |
380 | Parse::CPAN::Meta provides a very simply API of only two functions, based |
381 | on the YAML functions of the same name. Wherever possible, identical |
382 | calling semantics are used. |
383 | |
384 | All error reporting is done with exceptions (dieing). |
385 | |
386 | =head1 FUNCTIONS |
387 | |
388 | For maintenance clarity, no functions are exported. |
389 | |
390 | =head2 Load( $string ) |
391 | |
392 | my @documents = Load( $string ); |
393 | |
394 | Parses a string containing a valid YAML stream into a list of Perl data |
395 | structures. |
396 | |
397 | =head2 LoadFile( $file_name ) |
398 | |
399 | Reads the YAML stream from a file instead of a string. |
400 | |
401 | =head1 SUPPORT |
402 | |
403 | Bugs should be reported via the CPAN bug tracker at |
404 | |
405 | L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Parse-CPAN-Meta> |
406 | |
407 | =head1 AUTHOR |
408 | |
409 | Adam Kennedy E<lt>adamk@cpan.orgE<gt> |
410 | |
411 | =head1 SEE ALSO |
412 | |
413 | L<YAML::Tiny>, L<YAML>, L<YAML::Syck> |
414 | |
415 | =head1 COPYRIGHT |
416 | |
417 | Copyright 2006 - 2009 Adam Kennedy. |
418 | |
419 | This program is free software; you can redistribute |
420 | it and/or modify it under the same terms as Perl itself. |
421 | |
422 | The full text of the license can be found in the |
423 | LICENSE file included with this module. |
424 | |
425 | =cut |