Upgrade to Parse-CPAN-Meta-1.40
[p5sagit/p5-mst-13.2.git] / cpan / Parse-CPAN-Meta / lib / Parse / CPAN / Meta.pm
CommitLineData
35df902d 1package Parse::CPAN::Meta;
2
3use strict;
4use Carp 'croak';
5
6# UTF Support?
7sub HAVE_UTF8 () { $] >= 5.007003 }
8BEGIN {
9 if ( HAVE_UTF8 ) {
10 # The string eval helps hide this from Test::MinimumVersion
11 eval "require utf8;";
12 die "Failed to load UTF-8 support" if $@;
13 }
14
15 # Class structure
16 require 5.004;
17 require Exporter;
c82fc722 18 $Parse::CPAN::Meta::VERSION = '1.40';
35df902d 19 @Parse::CPAN::Meta::ISA = qw{ Exporter };
20 @Parse::CPAN::Meta::EXPORT_OK = qw{ Load LoadFile };
21}
22
23# Prototypes
24sub LoadFile ($);
25sub Load ($);
26sub _scalar ($$$);
27sub _array ($$$);
28sub _hash ($$$);
29
30# Printable characters for escapes
31my %UNESCAPES = (
32 z => "\x00", a => "\x07", t => "\x09",
33 n => "\x0a", v => "\x0b", f => "\x0c",
34 r => "\x0d", e => "\x1b", '\\' => '\\',
35);
36
37
38
39
40
41#####################################################################
42# Implementation
43
44# Create an object from a file
45sub LoadFile ($) {
46 # Check the file
47 my $file = shift;
48 croak('You did not specify a file name') unless $file;
49 croak( "File '$file' does not exist" ) unless -e $file;
50 croak( "'$file' is a directory, not a file" ) unless -f _;
51 croak( "Insufficient permissions to read '$file'" ) unless -r _;
52
53 # Slurp in the file
54 local $/ = undef;
55 local *CFG;
56 unless ( open( CFG, $file ) ) {
57 croak("Failed to open file '$file': $!");
58 }
59 my $yaml = <CFG>;
60 unless ( close(CFG) ) {
61 croak("Failed to close file '$file': $!");
62 }
63
64 # Hand off to the actual parser
65 Load( $yaml );
66}
67
68# Parse a document from a string.
69# Doing checks on $_[0] prevents us having to do a string copy.
70sub Load ($) {
71 my $string = $_[0];
72 unless ( defined $string ) {
73 croak("Did not provide a string to load");
74 }
75
76 # Byte order marks
77 if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {
78 croak("Stream has a non UTF-8 Unicode Byte Order Mark");
79 } else {
80 # Strip UTF-8 bom if found, we'll just ignore it
81 $string =~ s/^\357\273\277//;
82 }
83
84 # Try to decode as utf8
85 utf8::decode($string) if HAVE_UTF8;
86
87 # Check for some special cases
88 return () unless length $string;
89 unless ( $string =~ /[\012\015]+\z/ ) {
90 croak("Stream does not end with newline character");
91 }
92
93 # Split the file into lines
94 my @lines = grep { ! /^\s*(?:\#.*)?\z/ }
95 split /(?:\015{1,2}\012|\015|\012)/, $string;
96
97 # Strip the initial YAML header
98 @lines and $lines[0] =~ /^\%YAML[: ][\d\.]+.*\z/ and shift @lines;
99
100 # A nibbling parser
101 my @documents = ();
102 while ( @lines ) {
103 # Do we have a document header?
104 if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?\z/ ) {
105 # Handle scalar documents
106 shift @lines;
107 if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML[: ][\d\.]+)\z/ ) {
108 push @documents, _scalar( "$1", [ undef ], \@lines );
109 next;
110 }
111 }
112
113 if ( ! @lines or $lines[0] =~ /^(?:---|\.\.\.)/ ) {
114 # A naked document
115 push @documents, undef;
116 while ( @lines and $lines[0] !~ /^---/ ) {
117 shift @lines;
118 }
119
120 } elsif ( $lines[0] =~ /^\s*\-/ ) {
121 # An array at the root
122 my $document = [ ];
123 push @documents, $document;
124 _array( $document, [ 0 ], \@lines );
125
126 } elsif ( $lines[0] =~ /^(\s*)\S/ ) {
127 # A hash at the root
128 my $document = { };
129 push @documents, $document;
130 _hash( $document, [ length($1) ], \@lines );
131
132 } else {
133 croak("Parse::CPAN::Meta failed to classify line '$lines[0]'");
134 }
135 }
136
137 if ( wantarray ) {
138 return @documents;
139 } else {
140 return $documents[-1];
141 }
142}
143
144# Deparse a scalar string to the actual scalar
145sub _scalar ($$$) {
146 my ($string, $indent, $lines) = @_;
147
148 # Trim trailing whitespace
149 $string =~ s/\s*\z//;
150
151 # Explitic null/undef
152 return undef if $string eq '~';
153
154 # Quotes
155 if ( $string =~ /^\'(.*?)\'\z/ ) {
156 return '' unless defined $1;
157 $string = $1;
158 $string =~ s/\'\'/\'/g;
159 return $string;
160 }
161 if ( $string =~ /^\"((?:\\.|[^\"])*)\"\z/ ) {
162 # Reusing the variable is a little ugly,
163 # but avoids a new variable and a string copy.
164 $string = $1;
165 $string =~ s/\\"/"/g;
166 $string =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex;
167 return $string;
168 }
169
170 # Special cases
171 if ( $string =~ /^[\'\"!&]/ ) {
172 croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'");
173 }
174 return {} if $string eq '{}';
175 return [] if $string eq '[]';
176
177 # Regular unquoted string
178 return $string unless $string =~ /^[>|]/;
179
180 # Error
181 croak("Parse::CPAN::Meta failed to find multi-line scalar content") unless @$lines;
182
183 # Check the indent depth
184 $lines->[0] =~ /^(\s*)/;
185 $indent->[-1] = length("$1");
186 if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) {
187 croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");
188 }
189
190 # Pull the lines
191 my @multiline = ();
192 while ( @$lines ) {
193 $lines->[0] =~ /^(\s*)/;
194 last unless length($1) >= $indent->[-1];
195 push @multiline, substr(shift(@$lines), length($1));
196 }
197
198 my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n";
199 my $t = (substr($string, 1, 1) eq '-') ? '' : "\n";
200 return join( $j, @multiline ) . $t;
201}
202
203# Parse an array
204sub _array ($$$) {
205 my ($array, $indent, $lines) = @_;
206
207 while ( @$lines ) {
208 # Check for a new document
209 if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {
210 while ( @$lines and $lines->[0] !~ /^---/ ) {
211 shift @$lines;
212 }
213 return 1;
214 }
215
216 # Check the indent level
217 $lines->[0] =~ /^(\s*)/;
218 if ( length($1) < $indent->[-1] ) {
219 return 1;
220 } elsif ( length($1) > $indent->[-1] ) {
221 croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");
222 }
223
224 if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) {
225 # Inline nested hash
226 my $indent2 = length("$1");
227 $lines->[0] =~ s/-/ /;
228 push @$array, { };
229 _hash( $array->[-1], [ @$indent, $indent2 ], $lines );
230
231 } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*\z/ ) {
232 # Array entry with a value
233 shift @$lines;
234 push @$array, _scalar( "$2", [ @$indent, undef ], $lines );
235
236 } elsif ( $lines->[0] =~ /^\s*\-\s*\z/ ) {
237 shift @$lines;
238 unless ( @$lines ) {
239 push @$array, undef;
240 return 1;
241 }
242 if ( $lines->[0] =~ /^(\s*)\-/ ) {
243 my $indent2 = length("$1");
244 if ( $indent->[-1] == $indent2 ) {
245 # Null array entry
246 push @$array, undef;
247 } else {
248 # Naked indenter
249 push @$array, [ ];
250 _array( $array->[-1], [ @$indent, $indent2 ], $lines );
251 }
252
253 } elsif ( $lines->[0] =~ /^(\s*)\S/ ) {
254 push @$array, { };
255 _hash( $array->[-1], [ @$indent, length("$1") ], $lines );
256
257 } else {
258 croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");
259 }
260
261 } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) {
262 # This is probably a structure like the following...
263 # ---
264 # foo:
265 # - list
266 # bar: value
267 #
268 # ... so lets return and let the hash parser handle it
269 return 1;
270
271 } else {
272 croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");
273 }
274 }
275
276 return 1;
277}
278
279# Parse an array
280sub _hash ($$$) {
281 my ($hash, $indent, $lines) = @_;
282
283 while ( @$lines ) {
284 # Check for a new document
285 if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {
286 while ( @$lines and $lines->[0] !~ /^---/ ) {
287 shift @$lines;
288 }
289 return 1;
290 }
291
292 # Check the indent level
293 $lines->[0] =~ /^(\s*)/;
294 if ( length($1) < $indent->[-1] ) {
295 return 1;
296 } elsif ( length($1) > $indent->[-1] ) {
297 croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");
298 }
299
300 # Get the key
301 unless ( $lines->[0] =~ s/^\s*([^\'\" ][^\n]*?)\s*:(\s+|$)// ) {
302 if ( $lines->[0] =~ /^\s*[?\'\"]/ ) {
303 croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'");
304 }
305 croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");
306 }
307 my $key = $1;
308
309 # Do we have a value?
310 if ( length $lines->[0] ) {
311 # Yes
312 $hash->{$key} = _scalar( shift(@$lines), [ @$indent, undef ], $lines );
313 } else {
314 # An indent
315 shift @$lines;
316 unless ( @$lines ) {
317 $hash->{$key} = undef;
318 return 1;
319 }
320 if ( $lines->[0] =~ /^(\s*)-/ ) {
321 $hash->{$key} = [];
322 _array( $hash->{$key}, [ @$indent, length($1) ], $lines );
323 } elsif ( $lines->[0] =~ /^(\s*)./ ) {
324 my $indent2 = length("$1");
325 if ( $indent->[-1] >= $indent2 ) {
326 # Null hash entry
327 $hash->{$key} = undef;
328 } else {
329 $hash->{$key} = {};
330 _hash( $hash->{$key}, [ @$indent, length($1) ], $lines );
331 }
332 }
333 }
334 }
335
336 return 1;
337}
338
3391;
340
341__END__
342
343=pod
344
345=head1 NAME
346
347Parse::CPAN::Meta - Parse META.yml and other similar CPAN metadata files
348
349=head1 SYNOPSIS
350
351 #############################################
352 # In your file
353
354 ---
355 rootproperty: blah
356 section:
357 one: two
358 three: four
359 Foo: Bar
360 empty: ~
361
362
363
364 #############################################
365 # In your program
366
367 use Parse::CPAN::Meta;
368
369 # Create a YAML file
370 my @yaml = Parse::CPAN::Meta::LoadFile( 'Meta.yml' );
371
372 # Reading properties
373 my $root = $yaml[0]->{rootproperty};
374 my $one = $yaml[0]->{section}->{one};
375 my $Foo = $yaml[0]->{section}->{Foo};
376
377=head1 DESCRIPTION
378
379B<Parse::CPAN::Meta> is a parser for F<META.yml> files, based on the
380parser half of L<YAML::Tiny>.
381
382It supports a basic subset of the full YAML specification, enough to
383implement parsing of typical F<META.yml> files, and other similarly simple
384YAML files.
385
386If you need something with more power, move up to a full YAML parser such
387as L<YAML>, L<YAML::Syck> or L<YAML::LibYAML>.
388
389B<Parse::CPAN::Meta> provides a very simply API of only two functions,
390based on the YAML functions of the same name. Wherever possible,
391identical calling semantics are used.
392
393All error reporting is done with exceptions (die'ing).
394
395=head1 FUNCTIONS
396
397For maintenance clarity, no functions are exported.
398
399=head2 Load
400
401 my @yaml = Load( $string );
402
403Parses a string containing a valid YAML stream into a list of Perl data
404structures.
405
406=head2 LoadFile
407
408 my @yaml = LoadFile( 'META.yml' );
409
410Reads the YAML stream from a file instead of a string.
411
412=head1 SUPPORT
413
414Bugs should be reported via the CPAN bug tracker at
415
416L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Parse-CPAN-Meta>
417
418=head1 AUTHOR
419
420Adam Kennedy E<lt>adamk@cpan.orgE<gt>
421
422=head1 SEE ALSO
423
424L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>,
425L<http://use.perl.org/~Alias/journal/29427>, L<http://ali.as/>
426
427=head1 COPYRIGHT
428
429Copyright 2006 - 2009 Adam Kennedy.
430
431This program is free software; you can redistribute
432it and/or modify it under the same terms as Perl itself.
433
434The full text of the license can be found in the
435LICENSE file included with this module.
436
437=cut