lib/Parse/CPAN/Meta.pm

   1 package Parse::CPAN::Meta;\r
   2 \r
   3 use strict;\r
   4 use Carp 'croak';\r
   5 \r
   6 # UTF Support?\r
   7 sub HAVE_UTF8 () { $] >= 5.007003 }\r
   8 BEGIN {\r
   9         if ( HAVE_UTF8 ) {\r
  10                 # The string eval helps hide this from Test::MinimumVersion\r
  11                 eval "require utf8;";\r
  12                 die "Failed to load UTF-8 support" if $@;\r
  13         }\r
  14 \r
  15         # Class structure\r
  16         require 5.004;\r
  17         require Exporter;\r
  18         $Parse::CPAN::Meta::VERSION   = '1.38';\r
  19         @Parse::CPAN::Meta::ISA       = qw{ Exporter      };\r
  20         @Parse::CPAN::Meta::EXPORT_OK = qw{ Load LoadFile };\r
  21 }\r
  22 \r
  23 # Prototypes\r
  24 sub LoadFile ($);\r
  25 sub Load     ($);\r
  26 sub _scalar  ($$$);\r
  27 sub _array   ($$$);\r
  28 sub _hash    ($$$);\r
  29 \r
  30 # Printable characters for escapes\r
  31 my %UNESCAPES = (\r
  32         z => "\x00", a => "\x07", t    => "\x09",\r
  33         n => "\x0a", v => "\x0b", f    => "\x0c",\r
  34         r => "\x0d", e => "\x1b", '\\' => '\\',\r
  35 );\r
  36 \r
  37 \r
  38 \r
  39 \r
  40 \r
  41 #####################################################################\r
  42 # Implementation\r
  43 \r
  44 # Create an object from a file\r
  45 sub LoadFile ($) {\r
  46         # Check the file\r
  47         my $file = shift;\r
  48         croak('You did not specify a file name')            unless $file;\r
  49         croak( "File '$file' does not exist" )              unless -e $file;\r
  50         croak( "'$file' is a directory, not a file" )       unless -f _;\r
  51         croak( "Insufficient permissions to read '$file'" ) unless -r _;\r
  52 \r
  53         # Slurp in the file\r
  54         local $/ = undef;\r
  55         local *CFG;\r
  56         unless ( open( CFG, $file ) ) {\r
  57                 croak("Failed to open file '$file': $!");\r
  58         }\r
  59         my $yaml = <CFG>;\r
  60         unless ( close(CFG) ) {\r
  61                 croak("Failed to close file '$file': $!");\r
  62         }\r
  63 \r
  64         # Hand off to the actual parser\r
  65         Load( $yaml );\r
  66 }\r
  67 \r
  68 # Parse a document from a string.\r
  69 # Doing checks on $_[0] prevents us having to do a string copy.\r
  70 sub Load ($) {\r
  71         my $string = $_[0];\r
  72         unless ( defined $string ) {\r
  73                 croak("Did not provide a string to load");\r
  74         }\r
  75 \r
  76         # Byte order marks\r
  77         if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) {\r
  78                 croak("Stream has a non UTF-8 Unicode Byte Order Mark");\r
  79         } else {\r
  80                 # Strip UTF-8 bom if found, we'll just ignore it\r
  81                 $string =~ s/^\357\273\277//;\r
  82         }\r
  83 \r
  84         # Try to decode as utf8\r
  85         utf8::decode($string) if HAVE_UTF8;\r
  86 \r
  87         # Check for some special cases\r
  88         return () unless length $string;\r
  89         unless ( $string =~ /[\012\015]+\z/ ) {\r
  90                 croak("Stream does not end with newline character");\r
  91         }\r
  92 \r
  93         # Split the file into lines\r
  94         my @lines = grep { ! /^\s*(?:\#.*)?\z/ }\r
  95                     split /(?:\015{1,2}\012|\015|\012)/, $string;\r
  96 \r
  97         # Strip the initial YAML header\r
  98         @lines and $lines[0] =~ /^\%YAML[: ][\d\.]+.*\z/ and shift @lines;\r
  99 \r
 100         # A nibbling parser\r
 101         my @documents = ();\r
 102         while ( @lines ) {\r
 103                 # Do we have a document header?\r
 104                 if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?\z/ ) {\r
 105                         # Handle scalar documents\r
 106                         shift @lines;\r
 107                         if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML[: ][\d\.]+)\z/ ) {\r
 108                                 push @documents, _scalar( "$1", [ undef ], \@lines );\r
 109                                 next;\r
 110                         }\r
 111                 }\r
 112 \r
 113                 if ( ! @lines or $lines[0] =~ /^(?:---|\.\.\.)/ ) {\r
 114                         # A naked document\r
 115                         push @documents, undef;\r
 116                         while ( @lines and $lines[0] !~ /^---/ ) {\r
 117                                 shift @lines;\r
 118                         }\r
 119 \r
 120                 } elsif ( $lines[0] =~ /^\s*\-/ ) {\r
 121                         # An array at the root\r
 122                         my $document = [ ];\r
 123                         push @documents, $document;\r
 124                         _array( $document, [ 0 ], \@lines );\r
 125 \r
 126                 } elsif ( $lines[0] =~ /^(\s*)\S/ ) {\r
 127                         # A hash at the root\r
 128                         my $document = { };\r
 129                         push @documents, $document;\r
 130                         _hash( $document, [ length($1) ], \@lines );\r
 131 \r
 132                 } else {\r
 133                         croak("Parse::CPAN::Meta failed to classify line '$lines[0]'");\r
 134                 }\r
 135         }\r
 136 \r
 137         if ( wantarray ) {\r
 138                 return @documents;\r
 139         } else {\r
 140                 return $documents[-1];\r
 141         }\r
 142 }\r
 143 \r
 144 # Deparse a scalar string to the actual scalar\r
 145 sub _scalar ($$$) {\r
 146         my ($string, $indent, $lines) = @_;\r
 147 \r
 148         # Trim trailing whitespace\r
 149         $string =~ s/\s*\z//;\r
 150 \r
 151         # Explitic null/undef\r
 152         return undef if $string eq '~';\r
 153 \r
 154         # Quotes\r
 155         if ( $string =~ /^\'(.*?)\'\z/ ) {\r
 156                 return '' unless defined $1;\r
 157                 $string = $1;\r
 158                 $string =~ s/\'\'/\'/g;\r
 159                 return $string;\r
 160         }\r
 161         if ( $string =~ /^\"((?:\\.|[^\"])*)\"\z/ ) {\r
 162                 # Reusing the variable is a little ugly,\r
 163                 # but avoids a new variable and a string copy.\r
 164                 $string = $1;\r
 165                 $string =~ s/\\"/"/g;\r
 166                 $string =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex;\r
 167                 return $string;\r
 168         }\r
 169 \r
 170         # Special cases\r
 171         if ( $string =~ /^[\'\"!&]/ ) {\r
 172                 croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'");\r
 173         }\r
 174         return {} if $string eq '{}';\r
 175         return [] if $string eq '[]';\r
 176 \r
 177         # Regular unquoted string\r
 178         return $string unless $string =~ /^[>|]/;\r
 179 \r
 180         # Error\r
 181         croak("Parse::CPAN::Meta failed to find multi-line scalar content") unless @$lines;\r
 182 \r
 183         # Check the indent depth\r
 184         $lines->[0]   =~ /^(\s*)/;\r
 185         $indent->[-1] = length("$1");\r
 186         if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) {\r
 187                 croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");\r
 188         }\r
 189 \r
 190         # Pull the lines\r
 191         my @multiline = ();\r
 192         while ( @$lines ) {\r
 193                 $lines->[0] =~ /^(\s*)/;\r
 194                 last unless length($1) >= $indent->[-1];\r
 195                 push @multiline, substr(shift(@$lines), length($1));\r
 196         }\r
 197 \r
 198         my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n";\r
 199         my $t = (substr($string, 1, 1) eq '-') ? ''  : "\n";\r
 200         return join( $j, @multiline ) . $t;\r
 201 }\r
 202 \r
 203 # Parse an array\r
 204 sub _array ($$$) {\r
 205         my ($array, $indent, $lines) = @_;\r
 206 \r
 207         while ( @$lines ) {\r
 208                 # Check for a new document\r
 209                 if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {\r
 210                         while ( @$lines and $lines->[0] !~ /^---/ ) {\r
 211                                 shift @$lines;\r
 212                         }\r
 213                         return 1;\r
 214                 }\r
 215 \r
 216                 # Check the indent level\r
 217                 $lines->[0] =~ /^(\s*)/;\r
 218                 if ( length($1) < $indent->[-1] ) {\r
 219                         return 1;\r
 220                 } elsif ( length($1) > $indent->[-1] ) {\r
 221                         croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");\r
 222                 }\r
 223 \r
 224                 if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) {\r
 225                         # Inline nested hash\r
 226                         my $indent2 = length("$1");\r
 227                         $lines->[0] =~ s/-/ /;\r
 228                         push @$array, { };\r
 229                         _hash( $array->[-1], [ @$indent, $indent2 ], $lines );\r
 230 \r
 231                 } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*\z/ ) {\r
 232                         # Array entry with a value\r
 233                         shift @$lines;\r
 234                         push @$array, _scalar( "$2", [ @$indent, undef ], $lines );\r
 235 \r
 236                 } elsif ( $lines->[0] =~ /^\s*\-\s*\z/ ) {\r
 237                         shift @$lines;\r
 238                         unless ( @$lines ) {\r
 239                                 push @$array, undef;\r
 240                                 return 1;\r
 241                         }\r
 242                         if ( $lines->[0] =~ /^(\s*)\-/ ) {\r
 243                                 my $indent2 = length("$1");\r
 244                                 if ( $indent->[-1] == $indent2 ) {\r
 245                                         # Null array entry\r
 246                                         push @$array, undef;\r
 247                                 } else {\r
 248                                         # Naked indenter\r
 249                                         push @$array, [ ];\r
 250                                         _array( $array->[-1], [ @$indent, $indent2 ], $lines );\r
 251                                 }\r
 252 \r
 253                         } elsif ( $lines->[0] =~ /^(\s*)\S/ ) {\r
 254                                 push @$array, { };\r
 255                                 _hash( $array->[-1], [ @$indent, length("$1") ], $lines );\r
 256 \r
 257                         } else {\r
 258                                 croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");\r
 259                         }\r
 260 \r
 261                 } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) {\r
 262                         # This is probably a structure like the following...\r
 263                         # ---\r
 264                         # foo:\r
 265                         # - list\r
 266                         # bar: value\r
 267                         #\r
 268                         # ... so lets return and let the hash parser handle it\r
 269                         return 1;\r
 270 \r
 271                 } else {\r
 272                         croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");\r
 273                 }\r
 274         }\r
 275 \r
 276         return 1;\r
 277 }\r
 278 \r
 279 # Parse an array\r
 280 sub _hash ($$$) {\r
 281         my ($hash, $indent, $lines) = @_;\r
 282 \r
 283         while ( @$lines ) {\r
 284                 # Check for a new document\r
 285                 if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) {\r
 286                         while ( @$lines and $lines->[0] !~ /^---/ ) {\r
 287                                 shift @$lines;\r
 288                         }\r
 289                         return 1;\r
 290                 }\r
 291 \r
 292                 # Check the indent level\r
 293                 $lines->[0] =~ /^(\s*)/;\r
 294                 if ( length($1) < $indent->[-1] ) {\r
 295                         return 1;\r
 296                 } elsif ( length($1) > $indent->[-1] ) {\r
 297                         croak("Parse::CPAN::Meta found bad indenting in line '$lines->[0]'");\r
 298                 }\r
 299 \r
 300                 # Get the key\r
 301                 unless ( $lines->[0] =~ s/^\s*([^\'\" ][^\n]*?)\s*:(\s+|$)// ) {\r
 302                         if ( $lines->[0] =~ /^\s*[?\'\"]/ ) {\r
 303                                 croak("Parse::CPAN::Meta does not support a feature in line '$lines->[0]'");\r
 304                         }\r
 305                         croak("Parse::CPAN::Meta failed to classify line '$lines->[0]'");\r
 306                 }\r
 307                 my $key = $1;\r
 308 \r
 309                 # Do we have a value?\r
 310                 if ( length $lines->[0] ) {\r
 311                         # Yes\r
 312                         $hash->{$key} = _scalar( shift(@$lines), [ @$indent, undef ], $lines );\r
 313                 } else {\r
 314                         # An indent\r
 315                         shift @$lines;\r
 316                         unless ( @$lines ) {\r
 317                                 $hash->{$key} = undef;\r
 318                                 return 1;\r
 319                         }\r
 320                         if ( $lines->[0] =~ /^(\s*)-/ ) {\r
 321                                 $hash->{$key} = [];\r
 322                                 _array( $hash->{$key}, [ @$indent, length($1) ], $lines );\r
 323                         } elsif ( $lines->[0] =~ /^(\s*)./ ) {\r
 324                                 my $indent2 = length("$1");\r
 325                                 if ( $indent->[-1] >= $indent2 ) {\r
 326                                         # Null hash entry\r
 327                                         $hash->{$key} = undef;\r
 328                                 } else {\r
 329                                         $hash->{$key} = {};\r
 330                                         _hash( $hash->{$key}, [ @$indent, length($1) ], $lines );\r
 331                                 }\r
 332                         }\r
 333                 }\r
 334         }\r
 335 \r
 336         return 1;\r
 337 }\r
 338 \r
 339 1;\r
 340 \r
 341 __END__\r
 342 \r
 343 =pod\r
 344 \r
 345 =head1 NAME\r
 346 \r
 347 Parse::CPAN::Meta - Parse META.yml and other similar CPAN metadata files\r
 348 \r
 349 =head1 SYNOPSIS\r
 350 \r
 351     #############################################\r
 352     # In your file\r
 353     \r
 354     ---\r
 355     rootproperty: blah\r
 356     section:\r
 357       one: two\r
 358       three: four\r
 359       Foo: Bar\r
 360       empty: ~\r
 361     \r
 362     \r
 363     \r
 364     #############################################\r
 365     # In your program\r
 366     \r
 367     use Parse::CPAN::Meta;\r
 368     \r
 369     # Create a YAML file\r
 370     my @yaml = Parse::CPAN::Meta::LoadFile( 'Meta.yml' );\r
 371     \r
 372     # Reading properties\r
 373     my $root = $yaml[0]->{rootproperty};\r
 374     my $one  = $yaml[0]->{section}->{one};\r
 375     my $Foo  = $yaml[0]->{section}->{Foo};\r
 376 \r
 377 =head1 DESCRIPTION\r
 378 \r
 379 B<Parse::CPAN::Meta> is a parser for F<META.yml> files, based on the\r
 380 parser half of L<YAML::Tiny>.\r
 381 \r
 382 It supports a basic subset of the full YAML specification, enough to\r
 383 implement parsing of typical F<META.yml> files, and other similarly simple\r
 384 YAML files.\r
 385 \r
 386 If you need something with more power, move up to a full YAML parser such\r
 387 as L<YAML>, L<YAML::Syck> or L<YAML::LibYAML>.\r
 388 \r
 389 B<Parse::CPAN::Meta> provides a very simply API of only two functions,\r
 390 based on the YAML functions of the same name. Wherever possible,\r
 391 identical calling semantics are used.\r
 392 \r
 393 All error reporting is done with exceptions (die'ing).\r
 394 \r
 395 =head1 FUNCTIONS\r
 396 \r
 397 For maintenance clarity, no functions are exported.\r
 398 \r
 399 =head2 Load\r
 400 \r
 401   my @yaml = Load( $string );\r
 402 \r
 403 Parses a string containing a valid YAML stream into a list of Perl data\r
 404 structures.\r
 405 \r
 406 =head2 LoadFile\r
 407 \r
 408   my @yaml = LoadFile( 'META.yml' );\r
 409 \r
 410 Reads the YAML stream from a file instead of a string.\r
 411 \r
 412 =head1 SUPPORT\r
 413 \r
 414 Bugs should be reported via the CPAN bug tracker at\r
 415 \r
 416 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Parse-CPAN-Meta>\r
 417 \r
 418 =head1 AUTHOR\r
 419 \r
 420 Adam Kennedy E<lt>adamk@cpan.orgE<gt>\r
 421 \r
 422 =head1 SEE ALSO\r
 423 \r
 424 L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>,\r
 425 L<http://use.perl.org/~Alias/journal/29427>, L<http://ali.as/>\r
 426 \r
 427 =head1 COPYRIGHT\r
 428 \r
 429 Copyright 2006 - 2009 Adam Kennedy.\r
 430 \r
 431 This program is free software; you can redistribute\r
 432 it and/or modify it under the same terms as Perl itself.\r
 433 \r
 434 The full text of the license can be found in the\r
 435 LICENSE file included with this module.\r
 436 \r
 437 =cut\r