From: David Golden Date: Thu, 14 Jan 2010 02:47:30 +0000 (-0500) Subject: Omnibus strict and lax version parsing X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=91152fc19d1c59a1213e39f74ac8a80f4a015f5e;p=p5sagit%2Fp5-mst-13.2.git Omnibus strict and lax version parsing Authors: John Peacock, David Golden and Zefram The goal of this mega-patch is to enforce strict rules for version numbers provided to 'package NAME VERSION' while formalizing the prior, lax rules used for version object creation. Parsing for use() is unchanged. version.pm adds two globals, $STRICT and $LAX, containing regular expressions that define the rules. There are two additional functions -- version::is_strict and version::is_lax -- that test an argument against these rules. However, parsing of strings that might contain version numbers is done in core via the Perl_scan_version function, which may be called during compilation or may be called later when version objects are created by Perl_new_version or Perl_upg_version. A new helper function, Perl_prescan_version, has been added to validate a string under either strict or lax rules. This is used in toke.c for 'package NAME VERSION' in strict mode and by Perl_scan_version in lax mode. It matches the behavior of the verison.pm regular expressions, but does not use them directly. A new test file, comp/packagev.t, validates strict and lax behaviors of 'package NAME VERSION' and 'version->new(VERSION)' respectively and verifies their behavior against the $STRICT and $LAX regular expressions, as well. Validating these two implementation should help ensure they each work as intended. Other files and tests have been modified as necessary to support these changes. There is remaining work to be done in a few areas: * documenting all changes in behavior and new functions * determining proper treatment of "," as decimal separators in various locales * updating diagnostics for new error messages * porting changes back to the version.pm distribution on CPAN, including pure-Perl versions --- diff --git a/dist/Safe/Safe.pm b/dist/Safe/Safe.pm index eb7d68b..476b9fd 100644 --- a/dist/Safe/Safe.pm +++ b/dist/Safe/Safe.pm @@ -62,6 +62,8 @@ my $default_share = [qw[ &utf8::unicode_to_native $version::VERSION $version::CLASS + $version::STRICT + $version::LAX @version::ISA ], ($] >= 5.008001 && qw[ &Regexp::DESTROY diff --git a/dist/XSLoader/t/XSLoader.t b/dist/XSLoader/t/XSLoader.t index 038986e..211c4d8 100644 --- a/dist/XSLoader/t/XSLoader.t +++ b/dist/XSLoader/t/XSLoader.t @@ -30,7 +30,7 @@ my %modules = ( 'Time::HiRes'=> q| ::can_ok( 'Time::HiRes' => 'usleep' ) |, # 5.7.3 ); -plan tests => keys(%modules) * 4 + 5; +plan tests => keys(%modules) * 3 + 5; # Try to load the module use_ok( 'XSLoader' ); @@ -65,11 +65,9 @@ for my $module (sort keys %modules) { SKIP: { skip "$module not available", 4 if $extensions !~ /\b$module\b/; - eval qq{ package $module; XSLoader::load('$module', "qunckkk"); }; - like( $@, "/^$module object version \\S+ does not match bootstrap parameter (?:qunckkk|0)/", + eval qq{ package $module; XSLoader::load('$module', "12345678"); }; + like( $@, "/^$module object version \\S+ does not match bootstrap parameter (?:12345678|0)/", "calling XSLoader::load() with a XS module and an incorrect version" ); - like( $warnings, "/^\$|^Version string 'qunckkk' contains invalid data; ignoring: 'qunckkk'/", - "in Perl 5.10, DynaLoader warns about the incorrect version string" ); eval qq{ package $module; XSLoader::load('$module'); }; is( $@, '', "XSLoader::load($module)"); diff --git a/embed.fnc b/embed.fnc index 17089ff..abfa92b 100644 --- a/embed.fnc +++ b/embed.fnc @@ -745,6 +745,8 @@ Apa |PERL_SI*|new_stackinfo|I32 stitems|I32 cxitems Ap |char* |scan_vstring |NN const char *s|NN const char *const e \ |NN SV *sv Apd |const char* |scan_version |NN const char *s|NN SV *rv|bool qv +Apd |const char* |prescan_version |NN const char *s\ + |bool strict|NULLOK const char** errstr|bool *sqv|int *ssaw_period|int *swidth|bool *salpha Apd |SV* |new_version |NN SV *ver Apd |SV* |upg_version |NN SV *ver|bool qv Apd |bool |vverify |NN SV *vs @@ -1804,6 +1806,7 @@ sRn |PTR_TBL_ENT_t *|ptr_table_find|NN PTR_TBL_t *const tbl|NULLOK const void *c s |void |check_uni s |void |force_next |I32 type s |char* |force_version |NN char *s|int guessing +s |char* |force_strict_version |NN char *s s |char* |force_word |NN char *start|int token|int check_keyword \ |int allow_pack|int allow_tick s |SV* |tokeq |NN SV *sv diff --git a/embed.h b/embed.h index c949c5c..246106b 100644 --- a/embed.h +++ b/embed.h @@ -636,6 +636,7 @@ #define new_stackinfo Perl_new_stackinfo #define scan_vstring Perl_scan_vstring #define scan_version Perl_scan_version +#define prescan_version Perl_prescan_version #define new_version Perl_new_version #define upg_version Perl_upg_version #define vverify Perl_vverify @@ -1591,6 +1592,7 @@ #define check_uni S_check_uni #define force_next S_force_next #define force_version S_force_version +#define force_strict_version S_force_strict_version #define force_word S_force_word #define tokeq S_tokeq #define readpipe_override S_readpipe_override @@ -3036,6 +3038,7 @@ #define new_stackinfo(a,b) Perl_new_stackinfo(aTHX_ a,b) #define scan_vstring(a,b,c) Perl_scan_vstring(aTHX_ a,b,c) #define scan_version(a,b,c) Perl_scan_version(aTHX_ a,b,c) +#define prescan_version(a,b,c,d,e,f,g) Perl_prescan_version(aTHX_ a,b,c,d,e,f,g) #define new_version(a) Perl_new_version(aTHX_ a) #define upg_version(a,b) Perl_upg_version(aTHX_ a,b) #define vverify(a) Perl_vverify(aTHX_ a) @@ -4000,6 +4003,7 @@ #define check_uni() S_check_uni(aTHX) #define force_next(a) S_force_next(aTHX_ a) #define force_version(a,b) S_force_version(aTHX_ a,b) +#define force_strict_version(a) S_force_strict_version(aTHX_ a) #define force_word(a,b,c,d,e) S_force_word(aTHX_ a,b,c,d,e) #define tokeq(a) S_tokeq(aTHX_ a) #define readpipe_override() S_readpipe_override(aTHX) diff --git a/global.sym b/global.sym index ae6a48f..f0361df 100644 --- a/global.sym +++ b/global.sym @@ -376,6 +376,7 @@ Perl_newWHILEOP Perl_new_stackinfo Perl_scan_vstring Perl_scan_version +Perl_prescan_version Perl_new_version Perl_upg_version Perl_vverify diff --git a/handy.h b/handy.h index 63f7fd8..07ab78d 100644 --- a/handy.h +++ b/handy.h @@ -656,6 +656,18 @@ US-ASCII (Basic Latin) range are viewed as not having any case. typedef U32 line_t; #define NOLINE ((line_t) 4294967295UL) +/* Helpful alias for version prescan */ +#define is_LAX_VERSION(a,b) \ + (a != Perl_prescan_version(aTHX_ a, FALSE, b, NULL, NULL, NULL, NULL)) + +#define is_STRICT_VERSION(a,b) \ + (a != Perl_prescan_version(aTHX_ a, TRUE, b, NULL, NULL, NULL, NULL)) + +#define BADVERSION(a,b,c) \ + if (b) { \ + *b = c; \ + } \ + return a; /* =head1 Memory Management diff --git a/lib/version.pm b/lib/version.pm index 9201a02..424463d 100644 --- a/lib/version.pm +++ b/lib/version.pm @@ -4,12 +4,116 @@ package version; use 5.005_04; use strict; -use vars qw(@ISA $VERSION $CLASS *declare *qv); +use vars qw(@ISA $VERSION $CLASS $STRICT $LAX *declare *qv); -$VERSION = 0.77; +$VERSION = 0.81; $CLASS = 'version'; +#--------------------------------------------------------------------------# +# Version regexp components +#--------------------------------------------------------------------------# + +# Fraction part of a decimal version number. This is a common part of +# both strict and lax decimal versions + +my $FRACTION_PART = qr/\.[0-9]+/; + +# First part of either decimal or dotted-decimal strict version number. +# Unsigned integer with no leading zeroes (except for zero itself) to +# avoid confusion with octal. + +my $STRICT_INTEGER_PART = qr/0|[1-9][0-9]*/; + +# First part of either decimal or dotted-decimal lax version number. +# Unsigned integer, but allowing leading zeros. Always interpreted +# as decimal. However, some forms of the resulting syntax give odd +# results if used as ordinary Perl expressions, due to how perl treats +# octals. E.g. +# version->new("010" ) == 10 +# version->new( 010 ) == 8 +# version->new( 010.2) == 82 # "8" . "2" + +my $LAX_INTEGER_PART = qr/[0-9]+/; + +# Second and subsequent part of a strict dotted-decimal version number. +# Leading zeroes are permitted, and the number is always decimal. +# Limited to three digits to avoid overflow when converting to decimal +# form and also avoid problematic style with excessive leading zeroes. + +my $STRICT_DOTTED_DECIMAL_PART = qr/\.[0-9]{1,3}/; + +# Second and subsequent part of a lax dotted-decimal version number. +# Leading zeroes are permitted, and the number is always decimal. No +# limit on the numerical value or number of digits, so there is the +# possibility of overflow when converting to decimal form. + +my $LAX_DOTTED_DECIMAL_PART = qr/\.[0-9]+/; + +# Alpha suffix part of lax version number syntax. Acts like a +# dotted-decimal part. + +my $LAX_ALPHA_PART = qr/_[0-9]+/; + +#--------------------------------------------------------------------------# +# Strict version regexp definitions +#--------------------------------------------------------------------------# + +# Strict decimal version number. + +my $STRICT_DECIMAL_VERSION = + qr/ $STRICT_INTEGER_PART $FRACTION_PART? /x; + +# Strict dotted-decimal version number. Must have both leading "v" and +# at least three parts, to avoid confusion with decimal syntax. + +my $STRICT_DOTTED_DECIMAL_VERSION = + qr/ v $STRICT_INTEGER_PART $STRICT_DOTTED_DECIMAL_PART{2,} /x; + +# Complete strict version number syntax -- should generally be used +# anchored: qr/ \A $STRICT \z /x + +$STRICT = + qr/ $STRICT_DECIMAL_VERSION | $STRICT_DOTTED_DECIMAL_VERSION /x; + +#--------------------------------------------------------------------------# +# Lax version regexp definitions +#--------------------------------------------------------------------------# + +# Lax decimal version number. Just like the strict one except for +# allowing an alpha suffix or allowing a leading or trailing +# decimal-point + +my $LAX_DECIMAL_VERSION = + qr/ $LAX_INTEGER_PART (?: \. | $FRACTION_PART $LAX_ALPHA_PART? )? + | + $FRACTION_PART $LAX_ALPHA_PART? + /x; + +# Lax dotted-decimal version number. Distinguished by having either +# leading "v" or at least three non-alpha parts. Alpha part is only +# permitted if there are at least two non-alpha parts. Strangely +# enough, without the leading "v", Perl takes .1.2 to mean v0.1.2, +# so when there is no "v", the leading part is optional + +my $LAX_DOTTED_DECIMAL_VERSION = + qr/ + v $LAX_INTEGER_PART (?: $LAX_DOTTED_DECIMAL_PART+ $LAX_ALPHA_PART? )? + | + $LAX_INTEGER_PART? $LAX_DOTTED_DECIMAL_PART{2,} $LAX_ALPHA_PART? + /x; + +# Complete lax version number syntax -- should generally be used +# anchored: qr/ \A $LAX \z /x +# +# The string 'undef' is a special case to make for easier handling +# of return values from ExtUtils::MM->parse_version + +$LAX = + qr/ undef | $LAX_DECIMAL_VERSION | $LAX_DOTTED_DECIMAL_VERSION /x; + +#--------------------------------------------------------------------------# + # Preloaded methods go here. sub import { no strict 'refs'; @@ -33,7 +137,7 @@ sub import { 'UNIVERSAL::VERSION' => 1, ); } - + my $callpkg = caller(); if (exists($args{declare})) { @@ -53,4 +157,7 @@ sub import { } } +sub is_strict { defined $_[0] && $_[0] =~ qr/ \A $STRICT \z /x } +sub is_lax { defined $_[0] && $_[0] =~ qr/ \A $LAX \z /x } + 1; diff --git a/lib/version.t b/lib/version.t index 8067f1a..f44cfea 100644 --- a/lib/version.t +++ b/lib/version.t @@ -132,43 +132,32 @@ sub BaseTests { # test illegal formats diag "test illegal formats" unless $ENV{PERL_CORE}; - eval {my $version = $CLASS->$method("1.2_3_4")}; + eval {$version = $CLASS->$method("1.2_3_4")}; like($@, qr/multiple underscores/, "Invalid version format (multiple underscores)"); - eval {my $version = $CLASS->$method("1.2_3.4")}; + eval {$version = $CLASS->$method("1.2_3.4")}; like($@, qr/underscores before decimal/, "Invalid version format (underscores before decimal)"); - eval {my $version = $CLASS->$method("1_2")}; + eval {$version = $CLASS->$method("1_2")}; like($@, qr/alpha without decimal/, "Invalid version format (alpha without decimal)"); - # for this test, upgrade the warn() to die() - eval { - local $SIG{__WARN__} = sub { die $_[0] }; - $version = $CLASS->$method("1.2b3"); - }; - my $warnregex = "Version string '.+' contains invalid data; ". - "ignoring: '.+'"; - - like($@, qr/$warnregex/, - "Version string contains invalid data; ignoring"); + eval { $version = $CLASS->$method("1.2b3")}; + like($@, qr/non-numeric data/, + "Invalid version format (non-numeric data)"); # from here on out capture the warning and test independently { - $version = $CLASS->$method("99 and 44/100 pure"); + eval{$version = $CLASS->$method("99 and 44/100 pure")}; - like($warning, qr/$warnregex/, - "Version string contains invalid data; ignoring"); - is ("$version", "99", '$version eq "99"'); - ok ($version->numify == 99.0, '$version->numify == 99.0'); - ok ($version->normal eq "v99.0.0", '$version->normal eq v99.0.0'); + like($@, qr/non-numeric data/, + "Invalid version format (non-numeric data)"); - $version = $CLASS->$method("something"); - like($warning, qr/$warnregex/, - "Version string contains invalid data; ignoring"); - ok (defined $version, 'defined $version'); + eval{$version = $CLASS->$method("something")}; + like($@, qr/non-numeric data/, + "Invalid version format (non-numeric data)"); # reset the test object to something reasonable $version = $CLASS->$method("1.2.3"); @@ -557,9 +546,8 @@ SKIP: { local $SIG{__WARN__} = sub { $warning = $_[0] }; $DB::single = 1; - my $v = $CLASS->$method('1,7'); - unlike($warning, qr"Version string '1,7' contains invalid data", - 'Directly test comma as decimal compliance'); + my $v = eval { $CLASS->$method('1,7') }; +# is( $@, "", 'Directly test comma as decimal compliance'); my $ver = 1.23; # has to be floating point number my $orig_loc = setlocale( LC_ALL ); diff --git a/proto.h b/proto.h index 02fdd2d..223086c 100644 --- a/proto.h +++ b/proto.h @@ -2383,6 +2383,11 @@ PERL_CALLCONV const char* Perl_scan_version(pTHX_ const char *s, SV *rv, bool qv #define PERL_ARGS_ASSERT_SCAN_VERSION \ assert(s); assert(rv) +PERL_CALLCONV const char* Perl_prescan_version(pTHX_ const char *s, bool strict, const char** errstr, bool *sqv, int *ssaw_period, int *swidth, bool *salpha) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_PRESCAN_VERSION \ + assert(s) + PERL_CALLCONV SV* Perl_new_version(pTHX_ SV *ver) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_NEW_VERSION \ @@ -5802,6 +5807,11 @@ STATIC char* S_force_version(pTHX_ char *s, int guessing) #define PERL_ARGS_ASSERT_FORCE_VERSION \ assert(s) +STATIC char* S_force_strict_version(pTHX_ char *s) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_FORCE_STRICT_VERSION \ + assert(s) + STATIC char* S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_tick) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_FORCE_WORD \ diff --git a/t/comp/package.t b/t/comp/package.t index 85fd1a5..fa28868 100644 --- a/t/comp/package.t +++ b/t/comp/package.t @@ -1,6 +1,6 @@ #!./perl -print "1..22\n"; +print "1..14\n"; $blurfl = 123; $foo = 3; @@ -72,35 +72,3 @@ package bug32562; print __PACKAGE__ eq 'bug32562' ? "ok 13\n" : "not ok 13\n"; print eval '__PACKAGE__' eq 'bug32562' ? "ok 14\n" : "not ok 14\n"; -# test: package NAME VERSION - -my @variations = ( - '1.00', - '1.00_01', - 'v1.2.3', - 'v1.2_3', -); - -my $test_count = 15; - -for my $v ( @variations ) { - my $ok = eval "package withversion $v; $v eq \$withversion::VERSION"; - print $ok ? "ok $test_count\n" : "not ok $test_count\n"; - $test_count++; -} - -eval q/package Foo Bar/; -$@ =~ /syntax error/ or print "not "; -print "ok $test_count\n"; $test_count++; - -eval q/package Foo 1a/; -$@ =~ /syntax error/ or print "not "; -print "ok $test_count\n"; $test_count++; - -eval q/package Foo v/; -$@ =~ /syntax error/ or print "not "; -print "ok $test_count\n"; $test_count++; - -eval q/package Foo $foo/; -$@ =~ /syntax error/ or print "not "; -print "ok $test_count\n"; $test_count++; diff --git a/t/comp/packagev.t b/t/comp/packagev.t new file mode 100644 index 0000000..bc99ec4 --- /dev/null +++ b/t/comp/packagev.t @@ -0,0 +1,169 @@ +#!./perl + +BEGIN { + chdir 't'; + @INC = '../lib'; + require './test.pl'; +} + +# XXX remove this later -- dagolden, 2010-01-13 +# local *STDERR = *STDOUT; + +my @syntax_cases = ( + 'package Foo', + 'package Bar 1.23', + 'package Baz v1.2.3', +); + +my @version_cases = ; + +plan tests => 5 * @syntax_cases + 5 * grep { $_ !~ /^#/ } @version_cases; + +use warnings qw/syntax/; +use version; + +for my $string ( @syntax_cases ) { + eval "$string"; + is( $@, '', qq/eval "$string"/ ); + eval "$string;"; + is( $@, '', qq/eval "$string;"/ ); + eval "$string ;"; + is( $@, '', qq/eval "$string ;"/ ); + eval "{$string}"; + is( $@, '', qq/eval "{$string}"/ ); + eval "{ $string }"; + is( $@, '', qq/eval "{ $string }"/ ); +} + +LINE: +for my $line (@version_cases) { + chomp $line; + # comments in data section are just diagnostics + if ($line =~ /^#/) { + diag $line; + next LINE; + } + + my ($v, $package, $quoted, $bare, $match) = split /\t+/, $line; + my $warning = ""; + local $SIG{__WARN__} = sub { $warning .= $_[0] . "\n" }; + $match = defined $match ? $match : ""; + $match =~ s/\s*\z//; # kill trailing spaces + + # First handle the 'package NAME VERSION' case + $withversion::VERSION = undef; + if ($package eq 'fail') { + eval "package withversion $v"; + like($@, qr/$match/, "package withversion $v -> syntax error ($match)"); + ok(! version::is_strict($v), qq{... and "$v" should also fail STRICT regex}); + } + else { + my $ok = eval "package withversion $v; $v eq \$withversion::VERSION"; + ok($ok, "package withversion $v") + or diag( $@ ? $@ : "and \$VERSION = $withversion::VERSION"); + ok( version::is_strict($v), qq{... and "$v" should pass STRICT regex}); + } + + + # Now check the version->new("V") case + my $ver = undef; + eval qq/\$ver = version->new("$v")/; + if ($quoted eq 'fail') { + like($@, qr/$match/, qq{version->new("$v") -> invalid format ($match)}) + or diag( $@ ? $@ : "and \$ver = $ver" ); + ok( ! version::is_lax($v), qq{... and "$v" should fail LAX regex}); + } + else { + is($@, "", qq{version->new("$v")}); + ok( version::is_lax($v), qq{... and "$v" should pass LAX regex}); + } + + # Now check the version->new(V) case, unless we're skipping it + if ( $bare eq 'na' ) { + pass( "... skipping version->new($v)" ); + next LINE; + } + $ver = undef; + eval qq/\$ver = version->new($v)/; + if ($bare eq 'fail') { + like($@, qr/$match/m, qq{... and unquoted version->new($v) has same error}) + or diag( $@ ? $@ : "and \$ver = $ver" ); + } + else { + is($@, "", qq{... and version->new($v) is ok}); + } +} + + +# The data is organized in tab delimited format with these columns: +# +# value package version->new version->new regex +# quoted unquoted +# +# For each value, it is tested using eval in the following expressions +# +# package foo $value; # column 2 +# and +# my $ver = version->new("$value"); # column 3 +# and +# my $ver = version->new($value); # column 4 +# +# The second through fourth columns can contain 'pass' or 'fail'. +# +# For any column with 'pass', the tests makes sure that no warning/error +# was thrown. For any column with 'fail', the tests make sure that the +# error thrown matches the regex in the last column. The unquoted column +# may also have 'na' indicating that it's pointless to test as behavior +# is subject to the perl parser before a stringifiable value is available +# to version->new +# +# If all columns are marked 'pass', the regex column is left empty. +# +# there are multiple ways that underscores can fail depending on strict +# vs lax format so these test do not distinguish between them +# +# If the DATA line begins with a # mark, it is used as a diag comment +__DATA__ +1.00 pass pass pass +1.00001 pass pass pass +0.123 pass pass pass +12.345 pass pass pass +42 pass pass pass +0 pass pass pass +0.0 pass pass pass +v1.2.3 pass pass pass +v1.2.3.4 pass pass pass +v0.1.2 pass pass pass +v0.0.0 pass pass pass +01 fail pass pass no leading zeros +01.0203 fail pass pass no leading zeros +v01 fail pass pass no leading zeros +v01.02.03 fail pass pass no leading zeros +.1 fail pass pass 0 before decimal required +.1.2 fail pass pass 0 before decimal required +1. fail pass pass fractional part required +1.a fail fail na fractional part required +1._ fail fail na fractional part required +1.02_03 fail pass pass underscore +v1.2_3 fail pass pass underscore +v1.02_03 fail pass pass underscore +v1.2_3_4 fail fail fail underscore +v1.2_3.4 fail fail fail underscore +1.2_3.4 fail fail fail underscore +0_ fail fail na underscore +1_ fail fail na underscore +1_. fail fail na underscore +1.1_ fail fail na underscore +1.02_03_04 fail fail na underscore +1.2.3 fail pass pass dotted-decimal versions must begin with 'v' +v1.2 fail pass pass dotted-decimal versions require at least three parts +v0 fail pass pass dotted-decimal versions require at least three parts +v1 fail pass pass dotted-decimal versions require at least three parts +v.1.2.3 fail fail na dotted-decimal versions require at least three parts +v fail fail na dotted-decimal versions require at least three parts +v1.2345.6 fail pass pass maximum 3 digits between decimals +undef fail pass pass non-numeric data +1a fail fail na non-numeric data +1.2a3 fail fail na non-numeric data +bar fail fail na non-numeric data +_ fail fail na non-numeric data diff --git a/t/porting/diag.t b/t/porting/diag.t index 65e1958..06f9849 100644 --- a/t/porting/diag.t +++ b/t/porting/diag.t @@ -273,6 +273,12 @@ Invalid type '%c' in pack Invalid type '%c' in %s Invalid type '%c' in unpack Invalid type ',' in %s +Invalid strict version format (0 before decimal required) +Invalid strict version format (no leading zeros) +Invalid strict version format (no underscores) +Invalid strict version format (v1.2.3 required) +Invalid strict version format (version required) +Invalid strict version format (1.[0-9] required) Invalid version format (alpha without decimal) Invalid version format (misplaced _ in number) Invalid version object diff --git a/toke.c b/toke.c index deae6a5..2b98ada 100644 --- a/toke.c +++ b/toke.c @@ -2130,6 +2130,53 @@ S_force_version(pTHX_ char *s, int guessing) } /* + * S_force_strict_version + * Forces the next token to be a version number using strict syntax rules. + */ + +STATIC char * +S_force_strict_version(pTHX_ char *s) +{ + dVAR; + OP *version = NULL; +#ifdef PERL_MAD + I32 startoff = s - SvPVX(PL_linestr); +#endif + const char *errstr = NULL; + + PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; + + while (isSPACE(*s)) /* leading whitespace */ + s++; + + if (is_STRICT_VERSION(s,&errstr)) { + SV *ver = newSV(0); + s = (char *)scan_version(s, ver, 0); + version = newSVOP(OP_CONST, 0, ver); + } + else if ( (*s != ';' && *s != '}' ) && (s = SKIPSPACE1(s), (*s != ';' && *s !='}' ))) { + PL_bufptr = s; + if (errstr) + yyerror(errstr); /* version required */ + return s; + } + +#ifdef PERL_MAD + if (PL_madskills && !version) { + sv_free(PL_nextwhite); /* let next token collect whitespace */ + PL_nextwhite = 0; + s = SvPVX(PL_linestr) + startoff; + } +#endif + /* NOTE: The parser sees the package name and the VERSION swapped */ + start_force(PL_curforce); + NEXTVAL_NEXTTOKE.opval = version; + force_next(WORD); + + return s; +} + +/* * S_tokeq * Tokenize a quoted string passed in as an SV. It finds the next * chunk, up to end of string or a backslash. It may make a new @@ -6961,7 +7008,7 @@ Perl_yylex(pTHX) case KEY_package: s = force_word(s,WORD,FALSE,TRUE,FALSE); - s = force_version(s, FALSE); + s = force_strict_version(s); OPERATOR(PACKAGE); case KEY_pipe: diff --git a/universal.c b/universal.c index 3a91c5c..5a2cddb 100644 --- a/universal.c +++ b/universal.c @@ -546,10 +546,10 @@ XS(XS_version_new) ? HvNAME(SvSTASH(SvRV(ST(0)))) : (char *)SvPV_nolen(ST(0)); - if ( items == 1 || vs == &PL_sv_undef ) { /* no param or explicit undef */ + if ( items == 1 || ! SvOK(vs) ) { /* no param or explicit undef */ /* create empty object */ vs = sv_newmortal(); - sv_setpvs(vs,""); + sv_setpvs(vs, "undef"); } else if ( items == 3 ) { vs = sv_newmortal(); @@ -659,7 +659,7 @@ XS(XS_version_vcmp) if ( ! sv_derived_from(robj, "version") ) { - robj = new_version(robj); + robj = new_version(SvOK(robj) ? robj : newSVpvs("undef")); } rvs = SvRV(robj); @@ -743,7 +743,7 @@ XS(XS_version_qv) SV * ver = ST(0); SV * rv; const char * classname = ""; - if ( items == 2 && (ST(1)) != &PL_sv_undef ) { + if ( items == 2 && SvOK(ST(1)) ) { /* getting called as object or class method */ ver = ST(1); classname = diff --git a/util.c b/util.c index 70f5a26..9b11ada 100644 --- a/util.c +++ b/util.c @@ -4181,6 +4181,205 @@ Perl_getcwd_sv(pTHX_ register SV *sv) } #define VERSION_MAX 0x7FFFFFFF + +const char * +Perl_prescan_version(pTHX_ const char *s, bool strict, + const char **errstr, + bool *sqv, int *ssaw_decimal, int *swidth, bool *salpha) { + bool qv = (sqv ? *sqv : FALSE); + int width = 3; + int saw_decimal = 0; + bool alpha = FALSE; + const char *d = s; + + PERL_ARGS_ASSERT_PRESCAN_VERSION; + + if (qv && isDIGIT(*d)) + goto dotted_decimal_version; + + if (*d == 'v') { /* explicit v-string */ + d++; + if (isDIGIT(*d)) { + qv = TRUE; + } + else { /* degenerate v-string */ + /* requires v1.2.3 */ + BADVERSION(s,errstr,"Invalid version format (dotted-decimal versions require at least three parts)"); + } + +dotted_decimal_version: + if (strict && d[0] == '0' && isDIGIT(d[1])) { + /* no leading zeros allowed */ + BADVERSION(s,errstr,"Invalid version format (no leading zeros)"); + } + + while (isDIGIT(*d)) /* integer part */ + d++; + + if (*d == '.') + { + saw_decimal++; + d++; /* decimal point */ + } + else + { + if (strict) { + /* require v1.2.3 */ + BADVERSION(s,errstr,"Invalid version format (dotted-decimal versions require at least three parts)"); + } + else { + goto version_prescan_finish; + } + } + + { + int i = 0; + int j = 0; + while (isDIGIT(*d)) { /* just keep reading */ + i++; + while (isDIGIT(*d)) { + d++; j++; + /* maximum 3 digits between decimal */ + if (strict && j > 3) { + BADVERSION(s,errstr,"Invalid version format (maximum 3 digits between decimals)"); + } + } + if (*d == '_') { + if (strict) { + BADVERSION(s,errstr,"Invalid version format (no underscores)"); + } + if ( alpha ) { + BADVERSION(s,errstr,"Invalid version format (multiple underscores)"); + } + d++; + alpha = TRUE; + } + else if (*d == '.') { + if (alpha) { + BADVERSION(s,errstr,"Invalid version format (underscores before decimal)"); + } + saw_decimal++; + d++; + } + else if (!isDIGIT(*d)) { + break; + } + j = 0; + } + + if (strict && i < 2) { + /* requires v1.2.3 */ + BADVERSION(s,errstr,"Invalid version format (dotted-decimal versions require at least three parts)"); + } + } + } /* end if dotted-decimal */ + else + { /* decimal versions */ + /* special strict case for leading '.' or '0' */ + if (strict) { + if (*d == '.') { + BADVERSION(s,errstr,"Invalid version format (0 before decimal required)"); + } + if (*d == '0' && isDIGIT(d[1])) { + BADVERSION(s,errstr,"Invalid version format (no leading zeros)"); + } + } + + /* consume all of the integer part */ + while (isDIGIT(*d)) + d++; + + /* look for a fractional part */ + if (*d == '.') { + /* we found it, so consume it */ + saw_decimal++; + d++; + } + else if (!*d || *d == ';' || isSPACE(*d) || *d == '}') { + if ( d == s ) { + /* found nothing */ + BADVERSION(s,errstr,"Invalid version format (version required)"); + } + /* found just an integer */ + goto version_prescan_finish; + } + else if ( d == s ) { + /* didn't find either integer or period */ + BADVERSION(s,errstr,"Invalid version format (non-numeric data)"); + } + else if (*d == '_') { + /* underscore can't come after integer part */ + if (strict) { + BADVERSION(s,errstr,"Invalid version format (no underscores)"); + } + else if (isDIGIT(d[1])) { + BADVERSION(s,errstr,"Invalid version format (alpha without decimal)"); + } + else { + BADVERSION(s,errstr,"Invalid version format (misplaced underscore)"); + } + } + else { + /* anything else after integer part is just invalid data */ + BADVERSION(s,errstr,"Invalid version format (non-numeric data)"); + } + + /* scan the fractional part after the decimal point*/ + + if (!isDIGIT(*d) && (strict || ! (!*d || *d == ';' || isSPACE(*d) || *d == '}') )) { + /* strict or lax-but-not-the-end */ + BADVERSION(s,errstr,"Invalid version format (fractional part required)"); + } + + while (isDIGIT(*d)) { + d++; + if (*d == '.' && isDIGIT(d[-1])) { + if (alpha) { + BADVERSION(s,errstr,"Invalid version format (underscores before decimal)"); + } + if (strict) { + BADVERSION(s,errstr,"Invalid version format (dotted-decimal versions must begin with 'v')"); + } + d = (char *)s; /* start all over again */ + qv = TRUE; + goto dotted_decimal_version; + } + if (*d == '_') { + if (strict) { + BADVERSION(s,errstr,"Invalid version format (no underscores)"); + } + if ( alpha ) { + BADVERSION(s,errstr,"Invalid version format (multiple underscores)"); + } + if ( ! isDIGIT(d[1]) ) { + BADVERSION(s,errstr,"Invalid version format (misplaced underscore)"); + } + d++; + alpha = TRUE; + } + } + } + +version_prescan_finish: + while (isSPACE(*d)) + d++; + + if (!isDIGIT(*d) && (! (!*d || *d == ';' || *d == '}') )) { + /* trailing non-numeric data */ + BADVERSION(s,errstr,"Invalid version format (non-numeric data)"); + } + + if (sqv) + *sqv = qv; + if (swidth) + *swidth = width; + if (ssaw_decimal) + *ssaw_decimal = saw_decimal; + if (salpha) + *salpha = alpha; + return d; +} + /* =for apidoc scan_version @@ -4209,9 +4408,10 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, bool qv) const char *start; const char *pos; const char *last; - int saw_period = 0; - int alpha = 0; + const char *errstr = NULL; + int saw_decimal = 0; int width = 3; + bool alpha = FALSE; bool vinf = FALSE; AV * const av = newAV(); SV * const hv = newSVrv(rv, "version"); /* create an SV and upgrade the RV */ @@ -4220,54 +4420,24 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, bool qv) (void)sv_upgrade(hv, SVt_PVHV); /* needs to be an HV type */ +#ifndef NODEFAULT_SHAREKEYS + HvSHAREKEYS_on(hv); /* key-sharing on by default */ +#endif + while (isSPACE(*s)) /* leading whitespace is OK */ s++; - start = last = s; - - if (*s == 'v') { - s++; /* get past 'v' */ - qv = 1; /* force quoted version processing */ - } - - pos = s; - - /* pre-scan the input string to check for decimals/underbars */ - while ( *pos == '.' || *pos == '_' || *pos == ',' || isDIGIT(*pos) ) - { - if ( *pos == '.' ) - { - if ( alpha ) - Perl_croak(aTHX_ "Invalid version format (underscores before decimal)"); - saw_period++ ; - last = pos; + last = prescan_version(s, FALSE, &errstr, &qv, &saw_decimal, &width, &alpha); + if (errstr) { + /* "undef" is a special case and not an error */ + if ( ! ( *s == 'u' && strEQ(s,"undef")) ) { + Perl_croak(aTHX_ "%s", errstr); } - else if ( *pos == '_' ) - { - if ( alpha ) - Perl_croak(aTHX_ "Invalid version format (multiple underscores)"); - alpha = 1; - width = pos - last - 1; /* natural width of sub-version */ - } - else if ( *pos == ',' && isDIGIT(pos[1]) ) - { - saw_period++ ; - last = pos; - } - - pos++; } - if ( alpha && !saw_period ) - Perl_croak(aTHX_ "Invalid version format (alpha without decimal)"); - - if ( alpha && saw_period && width == 0 ) - Perl_croak(aTHX_ "Invalid version format (misplaced _ in number)"); - - if ( saw_period > 1 ) - qv = 1; /* force quoted version processing */ - - last = pos; + start = s; + if (*s == 'v') + s++; pos = s; if ( qv ) @@ -4294,7 +4464,7 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, bool qv) * point of a version originally created with a bare * floating point number, i.e. not quoted in any way */ - if ( !qv && s > start && saw_period == 1 ) { + if ( !qv && s > start && saw_decimal == 1 ) { mult *= 100; while ( s < end ) { orev = rev; @@ -4384,7 +4554,7 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, bool qv) } else if ( s > start ) { SV * orig = newSVpvn(start,s-start); - if ( qv && saw_period == 1 && *start != 'v' ) { + if ( qv && saw_decimal == 1 && *start != 'v' ) { /* need to insert a v to be consistent */ sv_insert(orig, 0, 0, "v", 1); } @@ -4433,6 +4603,9 @@ Perl_new_version(pTHX_ SV *ver) /* This will get reblessed later if a derived class*/ SV * const hv = newSVrv(rv, "version"); (void)sv_upgrade(hv, SVt_PVHV); /* needs to be an HV type */ +#ifndef NODEFAULT_SHAREKEYS + HvSHAREKEYS_on(hv); /* key-sharing on by default */ +#endif if ( SvROK(ver) ) ver = SvRV(ver); @@ -4475,7 +4648,7 @@ Perl_new_version(pTHX_ SV *ver) char * const version = savepvn( (const char*)mg->mg_ptr, len); sv_setpvn(rv,version,len); /* this is for consistency with the pure Perl class */ - if ( *version != 'v' ) + if ( isDIGIT(*version) ) sv_insert(rv, 0, 0, "v", 1); Safefree(version); } @@ -4530,7 +4703,7 @@ Perl_upg_version(pTHX_ SV *ver, bool qv) #ifdef SvVOK else if ( (mg = SvVSTRING_mg(ver)) ) { /* already a v-string */ version = savepvn( (const char*)mg->mg_ptr,mg->mg_len ); - qv = 1; + qv = TRUE; } #endif else /* must be a string or something like a string */ @@ -4540,12 +4713,14 @@ Perl_upg_version(pTHX_ SV *ver, bool qv) #ifndef SvVOK # if PERL_VERSION > 5 /* This will only be executed for 5.6.0 - 5.8.0 inclusive */ - if ( len == 3 && !instr(version,".") && !instr(version,"_") ) { + if ( len >= 3 && !instr(version,".") && !instr(version,"_") + && !(*version == 'u' && strEQ(version, "undef")) + && (*version < '0' || *version > '9') ) { /* may be a v-string */ SV * const nsv = sv_newmortal(); const char *nver; const char *pos; - int saw_period = 0; + int saw_decimal = 0; sv_setpvf(nsv,"v%vd",ver); pos = nver = savepv(SvPV_nolen(nsv)); @@ -4553,12 +4728,12 @@ Perl_upg_version(pTHX_ SV *ver, bool qv) pos++; /* skip the leading 'v' */ while ( *pos == '.' || isDIGIT(*pos) ) { if ( *pos == '.' ) - saw_period++ ; + saw_decimal++ ; pos++; } /* is definitely a v-string */ - if ( saw_period == 2 ) { + if ( saw_decimal >= 2 ) { Safefree(version); version = nver; }