1 # -*- mode: cperl; tab-width: 8; indent-tabs-mode: nil; basic-offset: 2 -*-
2 # vim:ts=8:sw=2:et:sta:sts=2
3 package Module::Metadata;
5 # Adapted from Perl-licensed code originally distributed with
6 # Module-Build by Ken Williams
8 # This module provides routines to gather information about
9 # perl modules (assuming this may be expanded in the distant
10 # parrot future to look at other types of modules).
13 use vars qw($VERSION);
14 $VERSION = '1.000010';
15 $VERSION = eval $VERSION;
22 if ($INC{'Log/Contextual.pm'}) {
23 Log::Contextual->import('log_info');
25 *log_info = sub (&) { warn $_[0]->() };
28 use File::Find qw(find);
30 my $V_NUM_REGEXP = qr{v?[0-9._]+}; # crudely, a v-string or decimal
32 my $PKG_REGEXP = qr{ # match a package declaration
33 ^[\s\{;]* # intro chars on a line
34 package # the word 'package'
36 ([\w:]+) # a package name
37 \s* # optional whitespace
38 ($V_NUM_REGEXP)? # optional version number
39 \s* # optional whitesapce
40 [;\{] # semicolon line terminator or block start (since 5.16)
43 my $VARNAME_REGEXP = qr{ # match fully-qualified VERSION name
44 ([\$*]) # sigil - $ or *
46 ( # optional leading package name
47 (?:::|\')? # possibly starting like just :: (Ì la $::VERSION)
48 (?:\w+(?:::|\'))* # Foo::Bar:: ...
54 my $VERS_REGEXP = qr{ # match a VERSION definition
56 \(\s*$VARNAME_REGEXP\s*\) # with parens
58 $VARNAME_REGEXP # without parens
61 =[^=~] # = but not ==, nor =~
64 my $PODSECT_REGEXP = qr{
65 ^=(cut|pod|head[1-4]|over|item|back|begin|end|for|encoding)\b
70 my $filename = File::Spec->rel2abs( shift );
72 return undef unless defined( $filename ) && -f $filename;
73 return $class->_init(undef, $filename, @_);
80 return undef unless defined($handle) && defined($filename);
81 $filename = File::Spec->rel2abs( $filename );
83 return $class->_init(undef, $filename, @_, handle => $handle);
93 $props{inc} ||= \@INC;
94 my $filename = $class->find_module_by_name( $module, $props{inc} );
95 return undef unless defined( $filename ) && -f $filename;
96 return $class->_init($module, $filename, %props);
101 my $compare_versions = sub {
102 my ($v1, $op, $v2) = @_;
103 $v1 = version->new($v1)
104 unless UNIVERSAL::isa($v1,'version');
106 my $eval_str = "\$v1 $op \$v2";
107 my $result = eval $eval_str;
108 log_info { "error comparing versions: '$eval_str' $@" } if $@;
113 my $normalize_version = sub {
115 if ( $version =~ /[=<>!,]/ ) { # logic, not just version
116 # take as is without modification
118 elsif ( ref $version eq 'version' ) { # version objects
119 $version = $version->is_qv ? $version->normal : $version->stringify;
121 elsif ( $version =~ /^[^v][^.]*\.[^.]+\./ ) { # no leading v, multiple dots
122 # normalize string tuples without "v": "1.2.3" -> "v1.2.3"
123 $version = "v$version";
131 # separate out some of the conflict resolution logic
133 my $resolve_module_versions = sub {
134 my $packages = shift;
136 my( $file, $version );
138 foreach my $p ( @$packages ) {
139 if ( defined( $p->{version} ) ) {
140 if ( defined( $version ) ) {
141 if ( $compare_versions->( $version, '!=', $p->{version} ) ) {
142 $err .= " $p->{file} ($p->{version})\n";
144 # same version declared multiple times, ignore
148 $version = $p->{version};
151 $file ||= $p->{file} if defined( $p->{file} );
155 $err = " $file ($version)\n" . $err;
170 croak "provides() requires key/value pairs \n" if @_ % 2;
173 croak "provides() takes only one of 'dir' or 'files'\n"
174 if $args{dir} && $args{files};
176 croak "provides() requires a 'version' argument"
177 unless defined $args{version};
179 croak "provides() does not support version '$args{version}' metadata"
180 unless grep { $args{version} eq $_ } qw/1.4 2/;
182 $args{prefix} = 'lib' unless defined $args{prefix};
186 $p = $class->package_versions_from_directory($args{dir});
189 croak "provides() requires 'files' to be an array reference\n"
190 unless ref $args{files} eq 'ARRAY';
191 $p = $class->package_versions_from_directory($args{files});
194 # Now, fix up files with prefix
195 if ( length $args{prefix} ) { # check in case disabled with q{}
196 $args{prefix} =~ s{/$}{};
197 for my $v ( values %$p ) {
198 $v->{file} = "$args{prefix}/$v->{file}";
205 sub package_versions_from_directory {
206 my ( $class, $dir, $files ) = @_;
215 push @files, $_ if -f $_ && /\.pm$/;
221 # First, we enumerate all packages & versions,
222 # separating into primary & alternative candidates
224 foreach my $file (@files) {
225 my $mapped_filename = File::Spec::Unix->abs2rel( $file, $dir );
226 my @path = split( /\//, $mapped_filename );
227 (my $prime_package = join( '::', @path )) =~ s/\.pm$//;
229 my $pm_info = $class->new_from_file( $file );
231 foreach my $package ( $pm_info->packages_inside ) {
232 next if $package eq 'main'; # main can appear numerous times, ignore
233 next if $package eq 'DB'; # special debugging package, ignore
234 next if grep /^_/, split( /::/, $package ); # private package, ignore
236 my $version = $pm_info->version( $package );
238 $prime_package = $package if lc($prime_package) eq lc($package);
239 if ( $package eq $prime_package ) {
240 if ( exists( $prime{$package} ) ) {
241 croak "Unexpected conflict in '$package'; multiple versions found.\n";
243 $mapped_filename = "$package.pm" if lc("$package.pm") eq lc($mapped_filename);
244 $prime{$package}{file} = $mapped_filename;
245 $prime{$package}{version} = $version if defined( $version );
248 push( @{$alt{$package}}, {
249 file => $mapped_filename,
256 # Then we iterate over all the packages found above, identifying conflicts
257 # and selecting the "best" candidate for recording the file & version
259 foreach my $package ( keys( %alt ) ) {
260 my $result = $resolve_module_versions->( $alt{$package} );
262 if ( exists( $prime{$package} ) ) { # primary package selected
264 if ( $result->{err} ) {
265 # Use the selected primary package, but there are conflicting
266 # errors among multiple alternative packages that need to be
269 "Found conflicting versions for package '$package'\n" .
270 " $prime{$package}{file} ($prime{$package}{version})\n" .
274 } elsif ( defined( $result->{version} ) ) {
275 # There is a primary package selected, and exactly one
276 # alternative package
278 if ( exists( $prime{$package}{version} ) &&
279 defined( $prime{$package}{version} ) ) {
280 # Unless the version of the primary package agrees with the
281 # version of the alternative package, report a conflict
282 if ( $compare_versions->(
283 $prime{$package}{version}, '!=', $result->{version}
288 "Found conflicting versions for package '$package'\n" .
289 " $prime{$package}{file} ($prime{$package}{version})\n" .
290 " $result->{file} ($result->{version})\n"
295 # The prime package selected has no version so, we choose to
296 # use any alternative package that does have a version
297 $prime{$package}{file} = $result->{file};
298 $prime{$package}{version} = $result->{version};
302 # no alt package found with a version, but we have a prime
303 # package so we use it whether it has a version or not
306 } else { # No primary package was selected, use the best alternative
308 if ( $result->{err} ) {
310 "Found conflicting versions for package '$package'\n" .
315 # Despite possible conflicting versions, we choose to record
316 # something rather than nothing
317 $prime{$package}{file} = $result->{file};
318 $prime{$package}{version} = $result->{version}
319 if defined( $result->{version} );
323 # Normalize versions. Can't use exists() here because of bug in YAML::Node.
324 # XXX "bug in YAML::Node" comment seems irrelvant -- dagolden, 2009-05-18
325 for (grep defined $_->{version}, values %prime) {
326 $_->{version} = $normalize_version->( $_->{version} );
337 my $filename = shift;
340 my $handle = delete $props{handle};
341 my( %valid_props, @valid_props );
342 @valid_props = qw( collect_pod inc );
343 @valid_props{@valid_props} = delete( @props{@valid_props} );
344 warn "Unknown properties: @{[keys %props]}\n" if scalar( %props );
348 filename => $filename,
359 my $self = bless(\%data, $class);
362 $self->_parse_fh($handle);
365 $self->_parse_file();
368 unless($self->{module} and length($self->{module})) {
369 my ($v, $d, $f) = File::Spec->splitpath($self->{filename});
372 my @candidates = grep /$f$/, @{$self->{packages}};
373 $self->{module} = shift(@candidates); # punt
376 if(grep /main/, @{$self->{packages}}) {
377 $self->{module} = 'main';
380 $self->{module} = $self->{packages}[0] || '';
385 $self->{version} = $self->{versions}{$self->{module}}
386 if defined( $self->{module} );
392 sub _do_find_module {
394 my $module = shift || croak 'find_module_by_name() requires a package name';
395 my $dirs = shift || \@INC;
397 my $file = File::Spec->catfile(split( /::/, $module));
398 foreach my $dir ( @$dirs ) {
399 my $testfile = File::Spec->catfile($dir, $file);
400 return [ File::Spec->rel2abs( $testfile ), $dir ]
401 if -e $testfile and !-d _; # For stuff like ExtUtils::xsubpp
402 return [ File::Spec->rel2abs( "$testfile.pm" ), $dir ]
403 if -e "$testfile.pm";
409 sub find_module_by_name {
410 my $found = shift()->_do_find_module(@_) or return;
415 sub find_module_dir_by_name {
416 my $found = shift()->_do_find_module(@_) or return;
421 # given a line of perl code, attempt to parse it if it looks like a
422 # $VERSION assignment, returning sigil, full name, & package name
423 sub _parse_version_expression {
427 my( $sig, $var, $pkg );
428 if ( $line =~ /$VERS_REGEXP/o ) {
429 ( $sig, $var, $pkg ) = $2 ? ( $1, $2, $3 ) : ( $4, $5, $6 );
431 $pkg = ($pkg eq '::') ? 'main' : $pkg;
436 return ( $sig, $var, $pkg );
442 my $filename = $self->{filename};
443 my $fh = IO::File->new( $filename )
444 or croak( "Can't open '$filename': $!" );
446 $self->_handle_bom($fh, $filename);
448 $self->_parse_fh($fh);
451 # Look for a UTF-8/UTF-16BE/UTF-16LE BOM at the beginning of the stream.
452 # If there's one, then skip it and set the :encoding layer appropriately.
454 my ($self, $fh, $filename) = @_;
456 my $pos = $fh->getpos;
457 return unless defined $pos;
460 my $count = $fh->read( $buf, length $buf );
461 return unless defined $count and $count >= 2;
464 if ( $buf eq "\x{FE}\x{FF}" ) {
465 $encoding = 'UTF-16BE';
466 } elsif ( $buf eq "\x{FF}\x{FE}" ) {
467 $encoding = 'UTF-16LE';
468 } elsif ( $buf eq "\x{EF}\x{BB}" ) {
470 $count = $fh->read( $buf, length $buf );
471 if ( defined $count and $count >= 1 and $buf eq "\x{BF}" ) {
476 if ( defined $encoding ) {
477 if ( "$]" >= 5.008 ) {
478 # $fh->binmode requires perl 5.10
479 binmode( $fh, ":encoding($encoding)" );
483 or croak( sprintf "Can't reset position to the top of '$filename'" );
490 my ($self, $fh) = @_;
492 my( $in_pod, $seen_end, $need_vers ) = ( 0, 0, 0 );
493 my( @pkgs, %vers, %pod, @pod );
498 while (defined( my $line = <$fh> )) {
504 if ( $line =~ /$PODSECT_REGEXP/o ) {
505 $is_cut = $1 eq 'cut';
509 # Would be nice if we could also check $in_string or something too
510 last if !$in_pod && $line =~ /^__(?:DATA|END)__$/;
514 if ( $line =~ /^=head[1-4]\s+(.+)\s*$/ ) {
516 if ( $self->{collect_pod} && length( $pod_data ) ) {
517 $pod{$pod_sect} = $pod_data;
522 } elsif ( $self->{collect_pod} ) {
523 $pod_data .= "$line\n";
527 } elsif ( $is_cut ) {
529 if ( $self->{collect_pod} && length( $pod_data ) ) {
530 $pod{$pod_sect} = $pod_data;
537 # Skip comments in code
538 next if $line =~ /^\s*#/;
540 # parse $line to see if it's a $VERSION declaration
541 my( $vers_sig, $vers_fullname, $vers_pkg ) =
543 ? $self->_parse_version_expression( $line )
546 if ( $line =~ /$PKG_REGEXP/o ) {
548 push( @pkgs, $pkg ) unless grep( $pkg eq $_, @pkgs );
549 $vers{$pkg} = $2 unless exists( $vers{$pkg} );
550 $need_vers = defined $2 ? 0 : 1;
552 # VERSION defined with full package spec, i.e. $Module::VERSION
553 } elsif ( $vers_fullname && $vers_pkg ) {
554 push( @pkgs, $vers_pkg ) unless grep( $vers_pkg eq $_, @pkgs );
555 $need_vers = 0 if $vers_pkg eq $pkg;
557 unless ( defined $vers{$vers_pkg} && length $vers{$vers_pkg} ) {
559 $self->_evaluate_version_line( $vers_sig, $vers_fullname, $line );
561 # Warn unless the user is using the "$VERSION = eval
562 # $VERSION" idiom (though there are probably other idioms
563 # that we should watch out for...)
564 warn <<"EOM" unless $line =~ /=\s*eval/;
565 Package '$vers_pkg' already declared with version '$vers{$vers_pkg}',
566 ignoring subsequent declaration on line $line_num.
570 # first non-comment line in undeclared package main is VERSION
571 } elsif ( !exists($vers{main}) && $pkg eq 'main' && $vers_fullname ) {
574 $self->_evaluate_version_line( $vers_sig, $vers_fullname, $line );
576 push( @pkgs, 'main' );
578 # first non-comment line in undeclared package defines package main
579 } elsif ( !exists($vers{main}) && $pkg eq 'main' && $line =~ /\w+/ ) {
582 push( @pkgs, 'main' );
584 # only keep if this is the first $VERSION seen
585 } elsif ( $vers_fullname && $need_vers ) {
588 $self->_evaluate_version_line( $vers_sig, $vers_fullname, $line );
591 unless ( defined $vers{$pkg} && length $vers{$pkg} ) {
595 Package '$pkg' already declared with version '$vers{$pkg}'
596 ignoring new version '$v' on line $line_num.
606 if ( $self->{collect_pod} && length($pod_data) ) {
607 $pod{$pod_sect} = $pod_data;
610 $self->{versions} = \%vers;
611 $self->{packages} = \@pkgs;
612 $self->{pod} = \%pod;
613 $self->{pod_headings} = \@pod;
618 sub _evaluate_version_line {
620 my( $sigil, $var, $line ) = @_;
622 # Some of this code came from the ExtUtils:: hierarchy.
624 # We compile into $vsub because 'use version' would cause
625 # compiletime/runtime issues with local()
627 $pn++; # everybody gets their own package
628 my $eval = qq{BEGIN { q# Hide from _packages_inside()
629 #; package Module::Metadata::_version::p$pn;
642 # Try to get the $VERSION
644 # some modules say $VERSION = $Foo::Bar::VERSION, but Foo::Bar isn't
645 # installed, so we need to hunt in ./lib for it
646 if ( $@ =~ /Can't locate/ && -d 'lib' ) {
647 local @INC = ('lib',@INC);
650 warn "Error evaling version line '$eval' in $self->{filename}: $@\n"
652 (ref($vsub) eq 'CODE') or
653 croak "failed to build version sub for $self->{filename}";
654 my $result = eval { $vsub->() };
655 croak "Could not get version from $self->{filename} by executing:\n$eval\n\nThe fatal error was: $@\n"
658 # Upgrade it into a version object
659 my $version = eval { _dwim_version($result) };
661 croak "Version '$result' from $self->{filename} does not appear to be valid:\n$eval\n\nThe fatal error was: $@\n"
662 unless defined $version; # "0" is OK!
668 # Try to DWIM when things fail the lax version test in obvious ways
671 # Best case, it just works
672 sub { return shift },
674 # If we still don't have a version, try stripping any
675 # trailing junk that is prohibited by lax rules
678 $v =~ s{([0-9])[a-z-].*$}{$1}i; # 1.23-alpha or 1.23b
682 # Activestate apparently creates custom versions like '1.23_45_01', which
683 # cause version.pm to think it's an invalid alpha. So check for that
687 my $num_dots = () = $v =~ m{(\.)}g;
688 my $num_unders = () = $v =~ m{(_)}g;
689 my $leading_v = substr($v,0,1) eq 'v';
690 if ( ! $leading_v && $num_dots < 2 && $num_unders > 1 ) {
692 $num_unders = () = $v =~ m{(_)}g;
697 # Worst case, try numifying it like we would have before version objects
700 no warnings 'numeric';
707 my ($result) = shift;
709 return $result if ref($result) eq 'version';
711 my ($version, $error);
712 for my $f (@version_prep) {
713 $result = $f->($result);
714 $version = eval { version->new($result) };
715 $error ||= $@ if $@; # capture first failure
716 last if defined $version;
719 croak $error unless defined $version;
725 ############################################################
728 sub name { $_[0]->{module} }
730 sub filename { $_[0]->{filename} }
731 sub packages_inside { @{$_[0]->{packages}} }
732 sub pod_inside { @{$_[0]->{pod_headings}} }
733 sub contains_pod { $#{$_[0]->{pod_headings}} }
737 my $mod = shift || $self->{module};
739 if ( defined( $mod ) && length( $mod ) &&
740 exists( $self->{versions}{$mod} ) ) {
741 return $self->{versions}{$mod};
750 if ( defined( $sect ) && length( $sect ) &&
751 exists( $self->{pod}{$sect} ) ) {
752 return $self->{pod}{$sect};
762 Module::Metadata - Gather package and POD information from perl module files
766 use Module::Metadata;
768 # information about a .pm file
769 my $info = Module::Metadata->new_from_file( $file );
770 my $version = $info->version;
772 # CPAN META 'provides' field for .pm files in a directory
773 my $provides = Module::Metadata->provides(
774 dir => 'lib', version => 2
779 This module provides a standard way to gather metadata about a .pm file
780 without executing unsafe code.
788 =item C<< new_from_file($filename, collect_pod => 1) >>
790 Constructs a C<Module::Metadata> object given the path to a file. Returns
791 undef if the filename does not exist.
793 C<collect_pod> is a optional boolean argument that determines whether POD
794 data is collected and stored for reference. POD data is not collected by
795 default. POD headings are always collected.
797 If the file begins by an UTF-8, UTF-16BE or UTF-16LE byte-order mark, then
798 it is skipped before processing, and the content of the file is also decoded
799 appropriately starting from perl 5.8.
801 =item C<< new_from_handle($handle, $filename, collect_pod => 1) >>
803 This works just like C<new_from_file>, except that a handle can be provided
804 as the first argument.
806 Note that there is no validation to confirm that the handle is a handle or
807 something that can act like one. Passing something that isn't a handle will
808 cause a exception when trying to read from it. The C<filename> argument is
809 mandatory or undef will be returned.
811 You are responsible for setting the decoding layers on C<$handle> if
814 =item C<< new_from_module($module, collect_pod => 1, inc => \@dirs) >>
816 Constructs a C<Module::Metadata> object given a module or package name.
817 Returns undef if the module cannot be found.
819 In addition to accepting the C<collect_pod> argument as described above,
820 this method accepts a C<inc> argument which is a reference to an array of
821 directories to search for the module. If none are given, the default is
824 If the file that contains the module begins by an UTF-8, UTF-16BE or
825 UTF-16LE byte-order mark, then it is skipped before processing, and the
826 content of the file is also decoded appropriately starting from perl 5.8.
828 =item C<< find_module_by_name($module, \@dirs) >>
830 Returns the path to a module given the module or package name. A list
831 of directories can be passed in as an optional parameter, otherwise
834 Can be called as either an object or a class method.
836 =item C<< find_module_dir_by_name($module, \@dirs) >>
838 Returns the entry in C<@dirs> (or C<@INC> by default) that contains
839 the module C<$module>. A list of directories can be passed in as an
840 optional parameter, otherwise @INC is searched.
842 Can be called as either an object or a class method.
844 =item C<< provides( %options ) >>
846 This is a convenience wrapper around C<package_versions_from_directory>
847 to generate a CPAN META C<provides> data structure. It takes key/value
848 pairs. Valid option keys include:
852 =item version B<(required)>
854 Specifies which version of the L<CPAN::Meta::Spec> should be used as
855 the format of the C<provides> output. Currently only '1.4' and '2'
856 are supported (and their format is identical). This may change in
857 the future as the definition of C<provides> changes.
859 The C<version> option is required. If it is omitted or if
860 an unsupported version is given, then C<provides> will throw an error.
864 Directory to search recursively for F<.pm> files. May not be specified with
869 Array reference of files to examine. May not be specified with C<dir>.
873 String to prepend to the C<file> field of the resulting output. This defaults
874 to F<lib>, which is the common case for most CPAN distributions with their
875 F<.pm> files in F<lib>. This option ensures the META information has the
876 correct relative path even when the C<dir> or C<files> arguments are
877 absolute or have relative paths from a location other than the distribution
882 For example, given C<dir> of 'lib' and C<prefix> of 'lib', the return value
883 is a hashref of the form:
888 file => 'lib/Package/Name.pm'
890 'OtherPackage::Name' => ...
893 =item C<< package_versions_from_directory($dir, \@files?) >>
895 Scans C<$dir> for .pm files (unless C<@files> is given, in which case looks
896 for those files in C<$dir> - and reads each file for packages and versions,
897 returning a hashref of the form:
902 file => 'Package/Name.pm'
904 'OtherPackage::Name' => ...
907 The C<DB> and C<main> packages are always omitted, as are any "private"
908 packages that have leading underscores in the namespace (e.g.
911 Note that the file path is relative to C<$dir> if that is specified.
912 This B<must not> be used directly for CPAN META C<provides>. See
913 the C<provides> method instead.
915 =item C<< log_info (internal) >>
917 Used internally to perform logging; imported from Log::Contextual if
918 Log::Contextual has already been loaded, otherwise simply calls warn.
922 =head2 Object methods
928 Returns the name of the package represented by this module. If there
929 are more than one packages, it makes a best guess based on the
930 filename. If it's a script (i.e. not a *.pm) the package name is
933 =item C<< version($package) >>
935 Returns the version as defined by the $VERSION variable for the
936 package as returned by the C<name> method if no arguments are
937 given. If given the name of a package it will attempt to return the
938 version of that package if it is specified in the file.
940 =item C<< filename() >>
942 Returns the absolute path to the file.
944 =item C<< packages_inside() >>
946 Returns a list of packages. Note: this is a raw list of packages
947 discovered (or assumed, in the case of C<main>). It is not
948 filtered for C<DB>, C<main> or private packages the way the
949 C<provides> method does.
951 =item C<< pod_inside() >>
953 Returns a list of POD sections.
955 =item C<< contains_pod() >>
957 Returns true if there is any POD in the file.
959 =item C<< pod($section) >>
961 Returns the POD data in the given section.
967 Original code from Module::Build::ModuleInfo by Ken Williams
968 <kwilliams@cpan.org>, Randy W. Sims <RandyS@ThePierianSpring.org>
970 Released as Module::Metadata by Matt S Trout (mst) <mst@shadowcat.co.uk> with
971 assistance from David Golden (xdg) <dagolden@cpan.org>.
973 =head1 COPYRIGHT & LICENSE
975 Original code Copyright (c) 2001-2011 Ken Williams.
976 Additional code Copyright (c) 2010-2011 Matt Trout and David Golden.
979 This library is free software; you can redistribute it and/or
980 modify it under the same terms as Perl itself.