Update Parse::CPAN::Meta to 0.04_01
Steffen Mueller [Tue, 3 Mar 2009 20:18:30 +0000 (21:18 +0100)]
lib/Parse/CPAN/Meta.pm
lib/Parse/CPAN/Meta/Changes
lib/Parse/CPAN/Meta/t/11_meta_yml.t
lib/Parse/CPAN/Meta/t/15_multibyte.t
lib/Parse/CPAN/Meta/t/data/utf_16_le_bom.yml [new file with mode: 0644]

index 8d5e29b..6a80681 100644 (file)
@@ -5,7 +5,7 @@ use Carp 'croak';
 BEGIN {
        require 5.004;
        require Exporter;
-       $Parse::CPAN::Meta::VERSION   = '0.04';
+       $Parse::CPAN::Meta::VERSION   = '0.04_01';
        @Parse::CPAN::Meta::ISA       = qw{ Exporter      };
        @Parse::CPAN::Meta::EXPORT_OK = qw{ Load LoadFile };
 }
@@ -25,7 +25,19 @@ my %UNESCAPES = (
 );
 
 
-
+my %BOM = (                                                       
+       "\357\273\277" => 'UTF-8',                                    
+       "\376\377"     => 'UTF-16BE',                                 
+       "\377\376"     => 'UTF-16LE',                                 
+       "\0\0\376\377" => 'UTF-32BE',                                 
+       "\377\376\0\0" => 'UTF-32LE'                                  
+);                                                                
+                                                                  
+sub BOM_MIN_LENGTH () { 2 }                                       
+sub BOM_MAX_LENGTH () { 4 }                                       
+sub HAVE_UTF8      () { $] >= 5.007003 }                          
+                                                                  
+BEGIN { require utf8 if HAVE_UTF8 }
 
 
 #####################################################################
@@ -53,17 +65,32 @@ sub LoadFile ($) {
 # Parse a document from a string.
 # Doing checks on $_[0] prevents us having to do a string copy.
 sub Load ($) {
-       unless ( defined $_[0] ) {
+
+       my $str = $_[0];
+
+       # Handle special cases
+       foreach my $length ( BOM_MIN_LENGTH .. BOM_MAX_LENGTH ) {
+               if ( my $enc = $BOM{substr($str, 0, $length)} ) {
+                       croak("Stream has a non UTF-8 BOM") unless $enc eq 'UTF-8';
+                       substr($str, 0, $length) = ''; # strip UTF-8 bom if found, we'll just ignore it
+               }
+       }
+
+       if ( HAVE_UTF8 ) {
+               utf8::decode($str); # try to decode as utf8
+       }
+
+       unless ( defined $str ) {
                croak("Did not provide a string to Load");
        }
-       return () unless length $_[0];
-       unless ( $_[0] =~ /[\012\015]+$/ ) {
+       return() unless length $str;
+       unless ( $str =~ /[\012\015]+$/ ) {
                croak("Stream does not end with newline character");
        }
 
        # Split the file into lines
        my @lines = grep { ! /^\s*(?:\#.*)?$/ }
-                   split /(?:\015{1,2}\012|\015|\012)/, shift;
+                   split /(?:\015{1,2}\012|\015|\012)/, $str;
 
        # A nibbling parser
        my @documents = ();
index 9bd147c..5a16651 100644 (file)
@@ -1,5 +1,8 @@
 Revision history for Perl extension Parse-CPAN-Meta
 
+0.04_01 Tue  3 Mar 2009
+       - Merge Unicode / BOM fixes from YAML::Tiny
+
 0.04 Wed  7 Jan 2009
        - Matching changes in YAML::Tiny 1.36
        - Fixing missing feature reported by H.Merijn Brand
index 9b789f0..c5a4934 100644 (file)
@@ -95,7 +95,7 @@ generated_by: ExtUtils::MakeMaker version 6.30
 END_YAML
        [ {
                name              => 'ITS-SIN-FIDS-Content-XML',
-               version           => 0.01,
+               version           => "0.01",
                version_from      => 'lib/ITS/SIN/FIDS/Content/XML.pm',
                installdirs       => 'site',
                requires          => {
index c5e0d67..a67c0f9 100644 (file)
@@ -20,7 +20,7 @@ BEGIN {
 
 use File::Spec::Functions ':ALL';
 use Parse::CPAN::Meta::Test;
-use Test::More tests(0, 1, 3);
+use Test::More tests(0, 1, 4);
 
 
 
@@ -46,3 +46,11 @@ SKIP: {
                'XSLoader'   => 0,
        }, 'build_requires ok' );
 }
+
+SKIP: {
+       skip "no utf8 support", 1 unless Parse::CPAN::Meta::HAVE_UTF8();
+       ok( utf8::is_utf8($yaml[0]->{author}), "utf8 decoded" );
+}
+
+exit(0);
+
diff --git a/lib/Parse/CPAN/Meta/t/data/utf_16_le_bom.yml b/lib/Parse/CPAN/Meta/t/data/utf_16_le_bom.yml
new file mode 100644 (file)
index 0000000..b9230eb
Binary files /dev/null and b/lib/Parse/CPAN/Meta/t/data/utf_16_le_bom.yml differ