X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=p5sagit%2FModule-Metadata.git;a=blobdiff_plain;f=lib%2FModule%2FMetadata.pm;h=281d738dda92b64ed1d9b3d5cc05a370de06dd12;hp=ed36891da7e614c5887283413c80ba7141793180;hb=HEAD;hpb=74b897430462fcbb115ae8e82682df7b2407270b diff --git a/lib/Module/Metadata.pm b/lib/Module/Metadata.pm index ed36891..281d738 100644 --- a/lib/Module/Metadata.pm +++ b/lib/Module/Metadata.pm @@ -10,13 +10,19 @@ package Module::Metadata; # parrot future to look at other types of modules). use strict; -use vars qw($VERSION); -$VERSION = '1.000010_001'; +use warnings; + +our $VERSION = '1.000020'; $VERSION = eval $VERSION; use Carp qw/croak/; use File::Spec; -use IO::File; +BEGIN { + # Try really hard to not depend ony any DynaLoaded module, such as IO::File or Fcntl + eval { + require Fcntl; Fcntl->import('SEEK_SET'); 1; + } or *SEEK_SET = sub { 0 } +} use version 0.87; BEGIN { if ($INC{'Log/Contextual.pm'}) { @@ -29,11 +35,39 @@ use File::Find qw(find); my $V_NUM_REGEXP = qr{v?[0-9._]+}; # crudely, a v-string or decimal +my $PKG_FIRST_WORD_REGEXP = qr{ # the FIRST word in a package name + [a-zA-Z_] # the first word CANNOT start with a digit + (?: + [\w']? # can contain letters, digits, _, or ticks + \w # But, NO multi-ticks or trailing ticks + )* +}x; + +my $PKG_ADDL_WORD_REGEXP = qr{ # the 2nd+ word in a package name + \w # the 2nd+ word CAN start with digits + (?: + [\w']? # and can contain letters or ticks + \w # But, NO multi-ticks or trailing ticks + )* +}x; + +my $PKG_NAME_REGEXP = qr{ # match a package name + (?: :: )? # a pkg name can start with arisdottle + $PKG_FIRST_WORD_REGEXP # a package word + (?: + (?: :: )+ ### arisdottle (allow one or many times) + $PKG_ADDL_WORD_REGEXP ### a package word + )* # ^ zero, one or many times + (?: + :: # allow trailing arisdottle + )? +}x; + my $PKG_REGEXP = qr{ # match a package declaration ^[\s\{;]* # intro chars on a line package # the word 'package' \s+ # whitespace - ([\w:]+) # a package name + ($PKG_NAME_REGEXP) # a package name \s* # optional whitespace ($V_NUM_REGEXP)? # optional version number \s* # optional whitesapce @@ -58,7 +92,7 @@ my $VERS_REGEXP = qr{ # match a VERSION definition $VARNAME_REGEXP # without parens ) \s* - =[^=~] # = but not ==, nor =~ + =[^=~>] # = but not ==, nor =~, nor => }x; sub new_from_file { @@ -93,16 +127,16 @@ sub new_from_module { } { - + my $compare_versions = sub { my ($v1, $op, $v2) = @_; $v1 = version->new($v1) unless UNIVERSAL::isa($v1,'version'); - + my $eval_str = "\$v1 $op \$v2"; my $result = eval $eval_str; log_info { "error comparing versions: '$eval_str' $@" } if $@; - + return $result; }; @@ -128,35 +162,35 @@ sub new_from_module { my $resolve_module_versions = sub { my $packages = shift; - + my( $file, $version ); my $err = ''; foreach my $p ( @$packages ) { if ( defined( $p->{version} ) ) { - if ( defined( $version ) ) { - if ( $compare_versions->( $version, '!=', $p->{version} ) ) { - $err .= " $p->{file} ($p->{version})\n"; - } else { - # same version declared multiple times, ignore - } - } else { - $file = $p->{file}; - $version = $p->{version}; - } + if ( defined( $version ) ) { + if ( $compare_versions->( $version, '!=', $p->{version} ) ) { + $err .= " $p->{file} ($p->{version})\n"; + } else { + # same version declared multiple times, ignore + } + } else { + $file = $p->{file}; + $version = $p->{version}; + } } - $file ||= $p->{file} if defined( $p->{file} ); - } - + $file ||= $p->{file} if defined( $p->{file} ); + } + if ( $err ) { $err = " $file ($version)\n" . $err; } - + my %result = ( file => $file, version => $version, err => $err ); - + return \%result; }; @@ -221,16 +255,16 @@ sub new_from_module { my $mapped_filename = File::Spec::Unix->abs2rel( $file, $dir ); my @path = split( /\//, $mapped_filename ); (my $prime_package = join( '::', @path )) =~ s/\.pm$//; - + my $pm_info = $class->new_from_file( $file ); - + foreach my $package ( $pm_info->packages_inside ) { next if $package eq 'main'; # main can appear numerous times, ignore next if $package eq 'DB'; # special debugging package, ignore next if grep /^_/, split( /::/, $package ); # private package, ignore - + my $version = $pm_info->version( $package ); - + $prime_package = $package if lc($prime_package) eq lc($package); if ( $package eq $prime_package ) { if ( exists( $prime{$package} ) ) { @@ -248,84 +282,84 @@ sub new_from_module { } } } - + # Then we iterate over all the packages found above, identifying conflicts # and selecting the "best" candidate for recording the file & version # for each package. foreach my $package ( keys( %alt ) ) { my $result = $resolve_module_versions->( $alt{$package} ); - + if ( exists( $prime{$package} ) ) { # primary package selected - + if ( $result->{err} ) { - # Use the selected primary package, but there are conflicting - # errors among multiple alternative packages that need to be - # reported + # Use the selected primary package, but there are conflicting + # errors among multiple alternative packages that need to be + # reported log_info { - "Found conflicting versions for package '$package'\n" . - " $prime{$package}{file} ($prime{$package}{version})\n" . - $result->{err} + "Found conflicting versions for package '$package'\n" . + " $prime{$package}{file} ($prime{$package}{version})\n" . + $result->{err} }; - + } elsif ( defined( $result->{version} ) ) { - # There is a primary package selected, and exactly one - # alternative package - - if ( exists( $prime{$package}{version} ) && - defined( $prime{$package}{version} ) ) { - # Unless the version of the primary package agrees with the - # version of the alternative package, report a conflict - if ( $compare_versions->( + # There is a primary package selected, and exactly one + # alternative package + + if ( exists( $prime{$package}{version} ) && + defined( $prime{$package}{version} ) ) { + # Unless the version of the primary package agrees with the + # version of the alternative package, report a conflict + if ( $compare_versions->( $prime{$package}{version}, '!=', $result->{version} ) ) { log_info { "Found conflicting versions for package '$package'\n" . - " $prime{$package}{file} ($prime{$package}{version})\n" . - " $result->{file} ($result->{version})\n" + " $prime{$package}{file} ($prime{$package}{version})\n" . + " $result->{file} ($result->{version})\n" }; - } - - } else { - # The prime package selected has no version so, we choose to - # use any alternative package that does have a version - $prime{$package}{file} = $result->{file}; - $prime{$package}{version} = $result->{version}; - } - + } + } else { - # no alt package found with a version, but we have a prime - # package so we use it whether it has a version or not + # The prime package selected has no version so, we choose to + # use any alternative package that does have a version + $prime{$package}{file} = $result->{file}; + $prime{$package}{version} = $result->{version}; } - + + } else { + # no alt package found with a version, but we have a prime + # package so we use it whether it has a version or not + } + } else { # No primary package was selected, use the best alternative - + if ( $result->{err} ) { log_info { "Found conflicting versions for package '$package'\n" . - $result->{err} + $result->{err} }; } - + # Despite possible conflicting versions, we choose to record # something rather than nothing $prime{$package}{file} = $result->{file}; $prime{$package}{version} = $result->{version} - if defined( $result->{version} ); + if defined( $result->{version} ); } } - + # Normalize versions. Can't use exists() here because of bug in YAML::Node. - # XXX "bug in YAML::Node" comment seems irrelvant -- dagolden, 2009-05-18 + # XXX "bug in YAML::Node" comment seems irrelevant -- dagolden, 2009-05-18 for (grep defined $_->{version}, values %prime) { $_->{version} = $normalize_version->( $_->{version} ); } - + return \%prime; } -} - +} + sub _init { my $class = shift; @@ -354,12 +388,14 @@ sub _init { my $self = bless(\%data, $class); - if ( $handle ) { - $self->_parse_fh($handle); - } - else { - $self->_parse_file(); + if ( not $handle ) { + my $filename = $self->{filename}; + open $handle, '<', $filename + or croak( "Can't open '$filename': $!" ); + + $self->_handle_bom($handle, $filename); } + $self->_parse_fh($handle); unless($self->{module} and length($self->{module})) { my ($v, $d, $f) = File::Spec->splitpath($self->{filename}); @@ -394,9 +430,9 @@ sub _do_find_module { foreach my $dir ( @$dirs ) { my $testfile = File::Spec->catfile($dir, $file); return [ File::Spec->rel2abs( $testfile ), $dir ] - if -e $testfile and !-d _; # For stuff like ExtUtils::xsubpp + if -e $testfile and !-d _; # For stuff like ExtUtils::xsubpp return [ File::Spec->rel2abs( "$testfile.pm" ), $dir ] - if -e "$testfile.pm"; + if -e "$testfile.pm"; } return; } @@ -420,28 +456,16 @@ sub _parse_version_expression { my $self = shift; my $line = shift; - my( $sig, $var, $pkg ); + my( $sigil, $variable_name, $package); if ( $line =~ /$VERS_REGEXP/o ) { - ( $sig, $var, $pkg ) = $2 ? ( $1, $2, $3 ) : ( $4, $5, $6 ); - if ( $pkg ) { - $pkg = ($pkg eq '::') ? 'main' : $pkg; - $pkg =~ s/::$//; + ( $sigil, $variable_name, $package) = $2 ? ( $1, $2, $3 ) : ( $4, $5, $6 ); + if ( $package ) { + $package = ($package eq '::') ? 'main' : $package; + $package =~ s/::$//; } } - return ( $sig, $var, $pkg ); -} - -sub _parse_file { - my $self = shift; - - my $filename = $self->{filename}; - my $fh = IO::File->new( $filename ) - or croak( "Can't open '$filename': $!" ); - - $self->_handle_bom($fh, $filename); - - $self->_parse_fh($fh); + return ( $sigil, $variable_name, $package ); } # Look for a UTF-8/UTF-16BE/UTF-16LE BOM at the beginning of the stream. @@ -449,11 +473,11 @@ sub _parse_file { sub _handle_bom { my ($self, $fh, $filename) = @_; - my $pos = $fh->getpos; + my $pos = tell $fh; return unless defined $pos; my $buf = ' ' x 2; - my $count = $fh->read( $buf, length $buf ); + my $count = read $fh, $buf, length $buf; return unless defined $count and $count >= 2; my $encoding; @@ -463,7 +487,7 @@ sub _handle_bom { $encoding = 'UTF-16LE'; } elsif ( $buf eq "\x{EF}\x{BB}" ) { $buf = ' '; - $count = $fh->read( $buf, length $buf ); + $count = read $fh, $buf, length $buf; if ( defined $count and $count >= 1 and $buf eq "\x{BF}" ) { $encoding = 'UTF-8'; } @@ -471,11 +495,10 @@ sub _handle_bom { if ( defined $encoding ) { if ( "$]" >= 5.008 ) { - # $fh->binmode requires perl 5.10 binmode( $fh, ":encoding($encoding)" ); } } else { - $fh->setpos($pos) + seek $fh, $pos, SEEK_SET or croak( sprintf "Can't reset position to the top of '$filename'" ); } @@ -486,10 +509,11 @@ sub _parse_fh { my ($self, $fh) = @_; my( $in_pod, $seen_end, $need_vers ) = ( 0, 0, 0 ); - my( @pkgs, %vers, %pod, @pod ); - my $pkg = 'main'; + my( @packages, %vers, %pod, @pod ); + my $package = 'main'; my $pod_sect = ''; my $pod_data = ''; + my $in_end = 0; while (defined( my $line = <$fh> )) { my $line_num = $.; @@ -510,15 +534,15 @@ sub _parse_fh { if ( $in_pod ) { if ( $line =~ /^=head[1-4]\s+(.+)\s*$/ ) { - push( @pod, $1 ); - if ( $self->{collect_pod} && length( $pod_data ) ) { + push( @pod, $1 ); + if ( $self->{collect_pod} && length( $pod_data ) ) { $pod{$pod_sect} = $pod_data; $pod_data = ''; } - $pod_sect = $1; + $pod_sect = $1; } elsif ( $self->{collect_pod} ) { - $pod_data .= "$line\n"; + $pod_data .= "$line\n"; } @@ -532,68 +556,63 @@ sub _parse_fh { } else { + # Skip after __END__ + next if $in_end; + # Skip comments in code next if $line =~ /^\s*#/; # Would be nice if we could also check $in_string or something too - last if $line =~ /^__(?:DATA|END)__$/; + if ($line eq '__END__') { + $in_end++; + next; + } + last if $line eq '__DATA__'; # parse $line to see if it's a $VERSION declaration - my( $vers_sig, $vers_fullname, $vers_pkg ) = + my( $version_sigil, $version_fullname, $version_package ) = ($line =~ /VERSION/) ? $self->_parse_version_expression( $line ) : (); if ( $line =~ /$PKG_REGEXP/o ) { - $pkg = $1; - push( @pkgs, $pkg ) unless grep( $pkg eq $_, @pkgs ); - $vers{$pkg} = $2 unless exists( $vers{$pkg} ); + $package = $1; + push( @packages, $package ) unless grep( $package eq $_, @packages ); + $vers{$package} = $2 unless exists( $vers{$package} ); $need_vers = defined $2 ? 0 : 1; # VERSION defined with full package spec, i.e. $Module::VERSION - } elsif ( $vers_fullname && $vers_pkg ) { - push( @pkgs, $vers_pkg ) unless grep( $vers_pkg eq $_, @pkgs ); - $need_vers = 0 if $vers_pkg eq $pkg; + } elsif ( $version_fullname && $version_package ) { + push( @packages, $version_package ) unless grep( $version_package eq $_, @packages ); + $need_vers = 0 if $version_package eq $package; - unless ( defined $vers{$vers_pkg} && length $vers{$vers_pkg} ) { - $vers{$vers_pkg} = - $self->_evaluate_version_line( $vers_sig, $vers_fullname, $line ); - } + unless ( defined $vers{$version_package} && length $vers{$version_package} ) { + $vers{$version_package} = $self->_evaluate_version_line( $version_sigil, $version_fullname, $line ); + } # first non-comment line in undeclared package main is VERSION - } elsif ( !exists($vers{main}) && $pkg eq 'main' && $vers_fullname ) { - $need_vers = 0; - my $v = - $self->_evaluate_version_line( $vers_sig, $vers_fullname, $line ); - $vers{$pkg} = $v; - push( @pkgs, 'main' ); + } elsif ( !exists($vers{main}) && $package eq 'main' && $version_fullname ) { + $need_vers = 0; + my $v = $self->_evaluate_version_line( $version_sigil, $version_fullname, $line ); + $vers{$package} = $v; + push( @packages, 'main' ); # first non-comment line in undeclared package defines package main - } elsif ( !exists($vers{main}) && $pkg eq 'main' && $line =~ /\w+/ ) { - $need_vers = 1; - $vers{main} = ''; - push( @pkgs, 'main' ); + } elsif ( !exists($vers{main}) && $package eq 'main' && $line =~ /\w+/ ) { + $need_vers = 1; + $vers{main} = ''; + push( @packages, 'main' ); # only keep if this is the first $VERSION seen - } elsif ( $vers_fullname && $need_vers ) { - $need_vers = 0; - my $v = - $self->_evaluate_version_line( $vers_sig, $vers_fullname, $line ); - - - unless ( defined $vers{$pkg} && length $vers{$pkg} ) { - $vers{$pkg} = $v; - } else { - warn <<"EOM"; -Package '$pkg' already declared with version '$vers{$pkg}' -ignoring new version '$v' on line $line_num. -EOM - } + } elsif ( $version_fullname && $need_vers ) { + $need_vers = 0; + my $v = $self->_evaluate_version_line( $version_sigil, $version_fullname, $line ); + unless ( defined $vers{$package} && length $vers{$package} ) { + $vers{$package} = $v; + } } - } - } if ( $self->{collect_pod} && length($pod_data) ) { @@ -601,7 +620,7 @@ EOM } $self->{versions} = \%vers; - $self->{packages} = \@pkgs; + $self->{packages} = \@packages; $self->{pod} = \%pod; $self->{pod_headings} = \@pod; } @@ -610,7 +629,7 @@ EOM my $pn = 0; sub _evaluate_version_line { my $self = shift; - my( $sigil, $var, $line ) = @_; + my( $sigil, $variable_name, $line ) = @_; # Some of this code came from the ExtUtils:: hierarchy. @@ -618,19 +637,22 @@ sub _evaluate_version_line { # compiletime/runtime issues with local() my $vsub; $pn++; # everybody gets their own package - my $eval = qq{BEGIN { q# Hide from _packages_inside() + my $eval = qq{BEGIN { my \$dummy = q# Hide from _packages_inside() #; package Module::Metadata::_version::p$pn; use version; no strict; + no warnings; \$vsub = sub { - local $sigil$var; - \$$var=undef; + local $sigil$variable_name; + \$$variable_name=undef; $line; - \$$var + \$$variable_name }; }}; + $eval = $1 if $eval =~ m{^(.+)}s; + local $^W; # Try to get the $VERSION eval $eval; @@ -718,22 +740,22 @@ sub _evaluate_version_line { ############################################################ # accessors -sub name { $_[0]->{module} } +sub name { $_[0]->{module} } -sub filename { $_[0]->{filename} } -sub packages_inside { @{$_[0]->{packages}} } -sub pod_inside { @{$_[0]->{pod_headings}} } -sub contains_pod { $#{$_[0]->{pod_headings}} } +sub filename { $_[0]->{filename} } +sub packages_inside { @{$_[0]->{packages}} } +sub pod_inside { @{$_[0]->{pod_headings}} } +sub contains_pod { 0+@{$_[0]->{pod_headings}} } sub version { my $self = shift; my $mod = shift || $self->{module}; my $vers; if ( defined( $mod ) && length( $mod ) && - exists( $self->{versions}{$mod} ) ) { - return $self->{versions}{$mod}; + exists( $self->{versions}{$mod} ) ) { + return $self->{versions}{$mod}; } else { - return undef; + return undef; } } @@ -741,13 +763,25 @@ sub pod { my $self = shift; my $sect = shift; if ( defined( $sect ) && length( $sect ) && - exists( $self->{pod}{$sect} ) ) { - return $self->{pod}{$sect}; + exists( $self->{pod}{$sect} ) ) { + return $self->{pod}{$sect}; } else { - return undef; + return undef; } } +sub is_indexable { + my ($self, $package) = @_; + + my @indexable_packages = grep { $_ ne 'main' } $self->packages_inside; + + # check for specific package, if provided + return !! grep { $_ eq $package } @indexable_packages if $package; + + # otherwise, check for any indexable packages at all + return !! @indexable_packages; +} + 1; =head1 NAME @@ -769,8 +803,10 @@ Module::Metadata - Gather package and POD information from perl module files =head1 DESCRIPTION -This module provides a standard way to gather metadata about a .pm file -without executing unsafe code. +This module provides a standard way to gather metadata about a .pm file through +(mostly) static analysis and (some) code execution. When determining the +version of a module, the C<$VERSION> assignment is Ced, as is traditional +in the CPAN toolchain. =head1 USAGE @@ -919,7 +955,7 @@ Log::Contextual has already been loaded, otherwise simply calls warn. =item C<< name() >> Returns the name of the package represented by this module. If there -are more than one packages, it makes a best guess based on the +is more than one package, it makes a best guess based on the filename. If it's a script (i.e. not a *.pm) the package name is 'main'. @@ -939,7 +975,10 @@ Returns the absolute path to the file. Returns a list of packages. Note: this is a raw list of packages discovered (or assumed, in the case of C
). It is not filtered for C, C
or private packages the way the -C method does. +C method does. Invalid package names are not returned, +for example "Foo:Bar". Strange but valid package names are +returned, for example "Foo::Bar::", and are left up to the caller +on how to handle. =item C<< pod_inside() >> @@ -953,6 +992,13 @@ Returns true if there is any POD in the file. Returns the POD data in the given section. +=item C<< is_indexable($package) >> or C<< is_indexable() >> + +Returns a boolean indicating whether the package (if provided) or any package +(otherwise) is eligible for indexing by PAUSE, the Perl Authors Upload Server. +Note This only checks for valid C declarations, and does not take any +ownership information into account. + =back =head1 AUTHOR