From: Edward Zborowski Date: Wed, 27 Feb 2013 07:36:12 +0000 (-0500) Subject: Improve package detection regular expression X-Git-Tag: v1.000012~9 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=8e4bef0157d39acfa8212ca3c644daea44a815d6;p=p5sagit%2FModule-Metadata.git Improve package detection regular expression Added tests to metadata.t for valid and invalid package names. Updated regular expressions within Module::Metadata so that invalid package names are not passed along as valid package names --- diff --git a/lib/Module/Metadata.pm b/lib/Module/Metadata.pm index d992477..9f92dde 100644 --- a/lib/Module/Metadata.pm +++ b/lib/Module/Metadata.pm @@ -29,11 +29,39 @@ use File::Find qw(find); my $V_NUM_REGEXP = qr{v?[0-9._]+}; # crudely, a v-string or decimal +my $PKG_FIRST_WORD_REGEXP = qr{ # the FIRST word in a package name + [a-zA-Z_] # the first word CANNOT start with a digit + (?: + [\w']? # can contain letters, digits, _, or ticks + \w # But, NO multi-ticks or trailing ticks + )* +}x; + +my $PKG_ADDL_WORD_REGEXP = qr{ # the 2nd+ word in a package name + \w # the 2nd+ word CAN start with digits + (?: + [\w']? # and can contain letters or ticks + \w # But, NO multi-ticks or trailing ticks + )* +}x; + +my $PKG_NAME_REGEXP = qr{ # match a package name + (?: :: )? # a pkg name can start with aristotle + $PKG_FIRST_WORD_REGEXP # a package word + (?: + (?: :: )+ ### aristotle (allow one or many times) + $PKG_ADDL_WORD_REGEXP ### a package word + )* # ^ zero, one or many times + (?: + :: # allow trailing aristotle + )? +}x; + my $PKG_REGEXP = qr{ # match a package declaration ^[\s\{;]* # intro chars on a line package # the word 'package' \s+ # whitespace - ([\w:]+) # a package name + ($PKG_NAME_REGEXP) # a package name \s* # optional whitespace ($V_NUM_REGEXP)? # optional version number \s* # optional whitesapce @@ -934,7 +962,10 @@ Returns the absolute path to the file. Returns a list of packages. Note: this is a raw list of packages discovered (or assumed, in the case of C
). It is not filtered for C, C
or private packages the way the -C method does. +C method does. Invalid package names are not returned, +for example "Foo:Bar". Strange but valid package names are +returned, for example "Foo::Bar::", and are left up to the caller +on how to handle. =item C<< pod_inside() >> diff --git a/t/metadata.t b/t/metadata.t index b7adb1e..286b1ae 100644 --- a/t/metadata.t +++ b/t/metadata.t @@ -212,7 +212,47 @@ package Simple v1.2.3_4 { ); my %modules = reverse @modules; -plan tests => 54 + 2 * keys( %modules ); +my @pkg_names = ( + [ 'Simple' ] => <<'---', # package NAME +package Simple; +--- + [ 'Simple::Edward' ] => <<'---', # package NAME::SUBNAME +package Simple::Edward; +--- + [ 'Simple::Edward::' ] => <<'---', # package NAME::SUBNAME:: +package Simple::Edward::; +--- + [ "Simple'Edward" ] => <<'---', # package NAME'SUBNAME +package Simple'Edward; +--- + [ "Simple'Edward::" ] => <<'---', # package NAME'SUBNAME:: +package Simple'Edward::; +--- + [ 'Simple::::Edward' ] => <<'---', # package NAME::::SUBNAME +package Simple::::Edward; +--- + [ '::Simple::Edward' ] => <<'---', # package ::NAME::SUBNAME +package ::Simple::Edward; +--- + [ 'main' ] => <<'---', # package NAME:SUBNAME (fail) +package Simple:Edward; +--- + [ 'main' ] => <<'---', # package NAME' (fail) +package Simple'; +--- + [ 'main' ] => <<'---', # package NAME::SUBNAME' (fail) +package Simple::Edward'; +--- + [ 'main' ] => <<'---', # package NAME''SUBNAME (fail) +package Simple''Edward; +--- + [ 'main' ] => <<'---', # package NAME-SUBNAME (fail) +package Simple-Edward; +--- +); +my %pkg_names = reverse @pkg_names; + +plan tests => 54 + (2 * keys( %modules )) + (2 * keys( %pkg_names )); require_ok('Module::Metadata'); @@ -298,6 +338,29 @@ foreach my $module ( sort keys %modules ) { # revert to pristine state $dist->regen( clean => 1 ); +foreach my $pkg_name ( sort keys %pkg_names ) { + my $expected = $pkg_names{$pkg_name}; + + $dist->change_file( 'lib/Simple.pm', $pkg_name ); + $dist->regen; + + my $warnings = ''; + local $SIG{__WARN__} = sub { $warnings .= $_ for @_ }; + my $pm_info = Module::Metadata->new_from_file( $file ); + + # Test::Builder will prematurely numify objects, so use this form + my $errs; + my @got = $pm_info->packages_inside(); + is_deeply( \@got, $expected, + "correct package names (expected '" . join(', ', @$expected) . "')" ) + or $errs++; + is( $warnings, '', 'no warnings from parsing' ) or $errs++; + diag "Got: '" . join(', ', @got) . "'\nModule contents:\n$pkg_name" if $errs; +} + +# revert to pristine state +$dist->regen( clean => 1 ); + # Find each package only once $dist->change_file( 'lib/Simple.pm', <<'---' ); package Simple;