From: Nicholas Clark Date: Tue, 11 Sep 2001 00:00:31 +0000 (+0100) Subject: Re: the remaining bugs in \x escapes (was Re: [PATCH] oct and hex in glorious 64... X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=a4c04bdcc508b6a45f83e703d0f82401445aa55b;p=p5sagit%2Fp5-mst-13.2.git Re: the remaining bugs in \x escapes (was Re: [PATCH] oct and hex in glorious 64 bit (with less bugs) (was Re: hex and oct again (was Re: FreeBSD MD5 crypt? Re: crypt/hex/oct and Unicode?))) Message-ID: <20010911000031.G1512@plum.flirble.org> p4raw-id: //depot/perl@11990 --- diff --git a/MANIFEST b/MANIFEST index d56709b..1bad4c8 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2096,6 +2096,7 @@ t/op/pat.t See if esoteric patterns work t/op/pos.t See if pos works t/op/push.t See if push and pop work t/op/pwent.t See if getpw*() functions work +t/op/qq.t See if qq works t/op/quotemeta.t See if quotemeta works t/op/rand.t See if rand works t/op/range.t See if .. works diff --git a/numeric.c b/numeric.c index c71d5b3..2e1e261 100644 --- a/numeric.c +++ b/numeric.c @@ -122,8 +122,9 @@ returns UV_MAX, sets C in the output flags, and writes the value to I<*result> (or the value is discarded if I is NULL). -The hex number may optinally be prefixed with "0b" or "b". If -C is set in I<*flags> on entry then the binary +The hex number may optinally be prefixed with "0b" or "b" unless +C is set in I<*flags> on entry. If +C is set in I<*flags> then the binary number may use '_' characters to separate digits. =cut @@ -140,18 +141,20 @@ Perl_grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) { bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; bool overflowed = FALSE; - /* strip off leading b or 0b. - for compatibility silently suffer "b" and "0b" as valid binary numbers. - */ - if (len >= 1) { - if (s[0] == 'b') { - s++; - len--; - } - else if (len >= 2 && s[0] == '0' && s[1] == 'b') { - s+=2; - len-=2; - } + if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { + /* strip off leading b or 0b. + for compatibility silently suffer "b" and "0b" as valid binary + numbers. */ + if (len >= 1) { + if (s[0] == 'b') { + s++; + len--; + } + else if (len >= 2 && s[0] == '0' && s[1] == 'b') { + s+=2; + len-=2; + } + } } for (; len-- && *s; s++) { @@ -233,8 +236,9 @@ returns UV_MAX, sets C in the output flags, and writes the value to I<*result> (or the value is discarded if I is NULL). -The hex number may optinally be prefixed with "0x" or "x". If -C is set in I<*flags> on entry then the hex +The hex number may optinally be prefixed with "0x" or "x" unless +C is set in I<*flags> on entry. If +C is set in I<*flags> then the hex number may use '_' characters to separate digits. =cut @@ -252,17 +256,20 @@ Perl_grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) { bool overflowed = FALSE; const char *hexdigit; - /* strip off leading x or 0x. - for compatibility silently suffer "x" and "0x" as valid hex numbers. */ - if (len >= 1) { - if (s[0] == 'x') { - s++; - len--; - } - else if (len >= 2 && s[0] == '0' && s[1] == 'x') { - s+=2; - len-=2; - } + if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { + /* strip off leading x or 0x. + for compatibility silently suffer "x" and "0x" as valid hex numbers. + */ + if (len >= 1) { + if (s[0] == 'x') { + s++; + len--; + } + else if (len >= 2 && s[0] == '0' && s[1] == 'x') { + s+=2; + len-=2; + } + } } for (; len-- && *s; s++) { diff --git a/perl.h b/perl.h index 0610ae7..cbe2cf3 100644 --- a/perl.h +++ b/perl.h @@ -3860,6 +3860,7 @@ int flock(int fd, int op); /* Input flags: */ #define PERL_SCAN_ALLOW_UNDERSCORES 0x01 /* grok_??? accept _ in numbers */ +#define PERL_SCAN_DISALLOW_PREFIX 0x02 /* grok_??? reject 0x in hex etc */ /* Output flags: */ #define PERL_SCAN_GREATER_THAN_UV_MAX 0x02 /* should this merge with above? */ diff --git a/regcomp.c b/regcomp.c index 3d75a48..4455730 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3039,7 +3039,8 @@ tryagain: vFAIL("Missing right brace on \\x{}"); } else { - I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES + | PERL_SCAN_DISALLOW_PREFIX; numlen = e - p - 1; ender = grok_hex(p + 1, &numlen, &flags, NULL); if (ender > 0xff) @@ -3053,7 +3054,7 @@ tryagain: } } else { - I32 flags = 0; + I32 flags = PERL_SCAN_DISALLOW_PREFIX; numlen = 2; ender = grok_hex(p, &numlen, &flags, NULL); p += numlen; @@ -3449,7 +3450,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) case 'a': value = ASCII_TO_NATIVE('\007');break; case 'x': if (*RExC_parse == '{') { - I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES + | PERL_SCAN_DISALLOW_PREFIX; e = strchr(RExC_parse++, '}'); if (!e) vFAIL("Missing right brace on \\x{}"); @@ -3459,7 +3461,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) RExC_parse = e + 1; } else { - I32 flags = 0; + I32 flags = PERL_SCAN_DISALLOW_PREFIX; numlen = 2; value = grok_hex(RExC_parse, &numlen, &flags, NULL); RExC_parse += numlen; diff --git a/t/op/pat.t b/t/op/pat.t index 2e89225..23d9c85 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..686\n"; +print "1..714\n"; BEGIN { chdir 't' if -d 't'; @@ -2008,3 +2008,113 @@ print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; print "not " unless length($y) == 2 && $y eq $x; print "ok 686\n"; } + +my $test = 687; + +# Force scalar context on the patern match +sub ok ($$) { + my($ok, $name) = @_; + + printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name; + + printf "# Failed test at line %d\n", (caller)[2] unless $ok; + + $test++; + return $ok; +} + +{ + # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. + $x = "\x4e" . "E"; + ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); + + $x = "\x4e" . "i"; + ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); + + $x = "\x4" . "j"; + ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); + + $x = "\x0" . "k"; + ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); + + $x = "\x0" . "x"; + ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); + + $x = "\x0" . "xa"; + ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); + + $x = "\x9" . "_b"; + ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); + + print "# and now again in [] ranges\n"; + + $x = "\x4e" . "E"; + ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); + + $x = "\x4e" . "i"; + ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); + + $x = "\x4" . "j"; + ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); + + $x = "\x0" . "k"; + ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); + + $x = "\x0" . "x"; + ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); + + $x = "\x0" . "xa"; + ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); + + $x = "\x9" . "_b"; + ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); + +} + +{ + # Check that \x{##} works. 5.6.1 fails quite a few of these. + + $x = "\x9b"; + ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); + + $x = "\x0" . "y"; + ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); + + $x = "\x0" . "y"; + ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); + + print "# and now again in [] ranges\n"; + + $x = "\x9b"; + ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); + + $x = "\x0" . "y"; + ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); + + $x = "\x0" . "y"; + ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); +} diff --git a/t/op/qq.t b/t/op/qq.t new file mode 100644 index 0000000..651cf18 --- /dev/null +++ b/t/op/qq.t @@ -0,0 +1,63 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +print q(1..21 +); + +# This is() function is written to avoid "" +my $test = 1; +sub is { + my($left, $right) = @_; + + if ($left eq $right) { + printf 'ok %d +', $test++; + return 1; + } + foreach ($left, $right) { + # Comment out these regexps to map non-printables to ord if the perl under + # test is so broken that it's not helping + s/([^-+A-Za-z_0-9])/sprintf q{'.chr(%d).'}, ord $1/ge; + $_ = sprintf q('%s'), $_; + s/^''\.//; + s/\.''$//; + } + printf q(not ok %d - got %s expected %s +), $test++, $left, $right; + + printf q(# Failed test at line %d +), (caller)[2]; + + return 0; +} + +is ("\x53", chr 83); +is ("\x4EE", chr (78) . 'E'); +is ("\x4i", chr (4) . 'i'); # This will warn +is ("\xh", chr (0) . 'h'); # This will warn +is ("\xx", chr (0) . 'x'); # This will warn +is ("\xx9", chr (0) . 'x9'); # This will warn. \x9 is tab in EBCDIC too? +is ("\x9_E", chr (9) . '_E'); # This will warn + +is ("\x{4E}", chr 78); +is ("\x{6_9}", chr 105); +is ("\x{_6_3}", chr 99); +is ("\x{_6B}", chr 107); + +is ("\x{9__0}", chr 9); # multiple underscores not allowed. +is ("\x{77_}", chr 119); # trailing underscore warns. +is ("\x{6FQ}z", chr (111) . 'z'); + +is ("\x{0x4E}", chr 0); +is ("\x{x4E}", chr 0); + +is ("\x{0065}", chr 101); +is ("\x{000000000000000000000000000000000000000000000000000000000000000072}", + chr 114); +is ("\x{0_06_5}", chr 101); +is ("\x{1234}", chr 4660); +is ("\x{98765432}", chr 2557891634); diff --git a/toke.c b/toke.c index f0c0071..d526275 100644 --- a/toke.c +++ b/toke.c @@ -1452,7 +1452,8 @@ S_scan_const(pTHX_ char *start) ++s; if (*s == '{') { char* e = strchr(s, '}'); - I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | + PERL_SCAN_DISALLOW_PREFIX; STRLEN len; ++s; @@ -1467,7 +1468,7 @@ S_scan_const(pTHX_ char *start) else { { STRLEN len = 2; - I32 flags = 0; + I32 flags = PERL_SCAN_DISALLOW_PREFIX; uv = grok_hex(s, &len, &flags, NULL); s += len; }