From: Gurusamy Sarathy Date: Tue, 2 May 2000 06:48:19 +0000 (+0000) Subject: change#3798 broke the meaning of "\0_7_7", tr/\0_// etc.; fix it X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b21ed0a92b5a07dd021a85728802e72edfa03699;p=p5sagit%2Fp5-mst-13.2.git change#3798 broke the meaning of "\0_7_7", tr/\0_// etc.; fix it such that underscores are only ignored in literal numbers, "\x{...}", and hex/oct argument p4raw-link: @3798 on //depot/cfgperl: 252aa0820e6bce274b33bd342cfc65e18a59a165 p4raw-id: //depot/perl@6044 --- diff --git a/perl.c b/perl.c index ee71369..ff851b4 100644 --- a/perl.c +++ b/perl.c @@ -1984,6 +1984,7 @@ Perl_moreswitches(pTHX_ char *s) case '0': { dTHR; + numlen = 0; /* disallow underscores */ rschar = (U32)scan_oct(s, 4, &numlen); SvREFCNT_dec(PL_nrs); if (rschar & ~((U8)~0)) @@ -2099,6 +2100,7 @@ Perl_moreswitches(pTHX_ char *s) if (isDIGIT(*s)) { PL_ors = savepv("\n"); PL_orslen = 1; + numlen = 0; /* disallow underscores */ *PL_ors = (char)scan_oct(s, 3 + (*s == '0'), &numlen); s += numlen; } diff --git a/pp.c b/pp.c index 17824bd..a86be7a 100644 --- a/pp.c +++ b/pp.c @@ -1885,6 +1885,7 @@ PP(pp_hex) STRLEN n_a; tmps = POPpx; + argtype = 1; /* allow underscores */ XPUSHn(scan_hex(tmps, 99, &argtype)); RETURN; } @@ -1902,6 +1903,7 @@ PP(pp_oct) tmps++; if (*tmps == '0') tmps++; + argtype = 1; /* allow underscores */ if (*tmps == 'x') value = scan_hex(++tmps, 99, &argtype); else if (*tmps == 'b') diff --git a/regcomp.c b/regcomp.c index 7af090e..9543710 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2605,8 +2605,10 @@ tryagain: if (!e) FAIL("Missing right brace on \\x{}"); else if (UTF) { + numlen = 1; /* allow underscores */ ender = (UV)scan_hex(p + 1, e - p, &numlen); - if (numlen + len >= 127) { /* numlen is generous */ + /* numlen is generous */ + if (numlen + len >= 127) { p--; goto loopdone; } @@ -2616,6 +2618,7 @@ tryagain: FAIL("Can't use \\x{} without 'use utf8' declaration"); } else { + numlen = 0; /* disallow underscores */ ender = (UV)scan_hex(p, 2, &numlen); p += numlen; } @@ -2629,6 +2632,7 @@ tryagain: case '5': case '6': case '7': case '8':case '9': if (*p == '0' || (isDIGIT(p[1]) && atoi(p) >= PL_regnpar) ) { + numlen = 0; /* disallow underscores */ ender = (UV)scan_oct(p, 3, &numlen); p += numlen; } @@ -2940,6 +2944,7 @@ S_regclass(pTHX) case 'a': value = '\057'; break; #endif case 'x': + numlen = 0; /* disallow underscores */ value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen); PL_regcomp_parse += numlen; break; @@ -2949,6 +2954,7 @@ S_regclass(pTHX) break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + numlen = 0; /* disallow underscores */ value = (UV)scan_oct(--PL_regcomp_parse, 3, &numlen); PL_regcomp_parse += numlen; break; @@ -3414,12 +3420,14 @@ S_regclassutf8(pTHX) e = strchr(PL_regcomp_parse++, '}'); if (!e) FAIL("Missing right brace on \\x{}"); + numlen = 1; /* allow underscores */ value = (UV)scan_hex(PL_regcomp_parse, e - PL_regcomp_parse, &numlen); PL_regcomp_parse = e + 1; } else { + numlen = 0; /* disallow underscores */ value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen); PL_regcomp_parse += numlen; } @@ -3430,6 +3438,7 @@ S_regclassutf8(pTHX) break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + numlen = 0; /* disallow underscores */ value = (UV)scan_oct(--PL_regcomp_parse, 3, &numlen); PL_regcomp_parse += numlen; break; diff --git a/t/op/oct.t b/t/op/oct.t index 27ac5aa..3a487d8 100755 --- a/t/op/oct.t +++ b/t/op/oct.t @@ -1,53 +1,67 @@ #!./perl -print "1..36\n"; +print "1..44\n"; -print +(oct('0b10101') == 0b10101) ? "ok" : "not ok", " 1\n"; -print +(oct('0b10101') == 025) ? "ok" : "not ok", " 2\n"; -print +(oct('0b10101') == 21) ? "ok" : "not ok", " 3\n"; -print +(oct('0b10101') == 0x15) ? "ok" : "not ok", " 4\n"; +print +(oct('0b1_0101') == 0b101_01) ? "ok" : "not ok", " 1\n"; +print +(oct('0b10_101') == 0_2_5) ? "ok" : "not ok", " 2\n"; +print +(oct('0b101_01') == 2_1) ? "ok" : "not ok", " 3\n"; +print +(oct('0b1010_1') == 0x1_5) ? "ok" : "not ok", " 4\n"; -print +(oct('b10101') == 0b10101) ? "ok" : "not ok", " 5\n"; -print +(oct('b10101') == 025) ? "ok" : "not ok", " 6\n"; -print +(oct('b10101') == 21) ? "ok" : "not ok", " 7\n"; -print +(oct('b10101') == 0x15) ? "ok" : "not ok", " 8\n"; +print +(oct('b1_0101') == 0b10101) ? "ok" : "not ok", " 5\n"; +print +(oct('b10_101') == 025) ? "ok" : "not ok", " 6\n"; +print +(oct('b101_01') == 21) ? "ok" : "not ok", " 7\n"; +print +(oct('b1010_1') == 0x15) ? "ok" : "not ok", " 8\n"; -print +(oct('01234') == 0b1010011100) ? "ok" : "not ok", " 9\n"; -print +(oct('01234') == 01234) ? "ok" : "not ok", " 10\n"; -print +(oct('01234') == 668) ? "ok" : "not ok", " 11\n"; +print +(oct('01_234') == 0b10_1001_1100) ? "ok" : "not ok", " 9\n"; +print +(oct('012_34') == 01234) ? "ok" : "not ok", " 10\n"; +print +(oct('0123_4') == 668) ? "ok" : "not ok", " 11\n"; print +(oct('01234') == 0x29c) ? "ok" : "not ok", " 12\n"; -print +(oct('0x1234') == 0b1001000110100) ? "ok" : "not ok", " 13\n"; -print +(oct('0x1234') == 011064) ? "ok" : "not ok", " 14\n"; -print +(oct('0x1234') == 4660) ? "ok" : "not ok", " 15\n"; -print +(oct('0x1234') == 0x1234) ? "ok" : "not ok", " 16\n"; +print +(oct('0x1_234') == 0b10010_00110100) ? "ok" : "not ok", " 13\n"; +print +(oct('0x12_34') == 01_1064) ? "ok" : "not ok", " 14\n"; +print +(oct('0x123_4') == 4660) ? "ok" : "not ok", " 15\n"; +print +(oct('0x1234') == 0x12_34) ? "ok" : "not ok", " 16\n"; -print +(oct('x1234') == 0b1001000110100) ? "ok" : "not ok", " 17\n"; -print +(oct('x1234') == 011064) ? "ok" : "not ok", " 18\n"; -print +(oct('x1234') == 4660) ? "ok" : "not ok", " 19\n"; -print +(oct('x1234') == 0x1234) ? "ok" : "not ok", " 20\n"; +print +(oct('x1_234') == 0b100100011010_0) ? "ok" : "not ok", " 17\n"; +print +(oct('x12_34') == 0_11064) ? "ok" : "not ok", " 18\n"; +print +(oct('x123_4') == 4660) ? "ok" : "not ok", " 19\n"; +print +(oct('x1234') == 0x_1234) ? "ok" : "not ok", " 20\n"; -print +(hex('01234') == 0b1001000110100) ? "ok" : "not ok", " 21\n"; -print +(hex('01234') == 011064) ? "ok" : "not ok", " 22\n"; -print +(hex('01234') == 4660) ? "ok" : "not ok", " 23\n"; -print +(hex('01234') == 0x1234) ? "ok" : "not ok", " 24\n"; +print +(hex('01_234') == 0b_1001000110100) ? "ok" : "not ok", " 21\n"; +print +(hex('012_34') == 011064) ? "ok" : "not ok", " 22\n"; +print +(hex('0123_4') == 4660) ? "ok" : "not ok", " 23\n"; +print +(hex('01234_') == 0x1234) ? "ok" : "not ok", " 24\n"; -print +(hex('0x1234') == 0b1001000110100) ? "ok" : "not ok", " 25\n"; -print +(hex('0x1234') == 011064) ? "ok" : "not ok", " 26\n"; -print +(hex('0x1234') == 4660) ? "ok" : "not ok", " 27\n"; -print +(hex('0x1234') == 0x1234) ? "ok" : "not ok", " 28\n"; +print +(hex('0x_1234') == 0b1001000110100) ? "ok" : "not ok", " 25\n"; +print +(hex('0x1_234') == 011064) ? "ok" : "not ok", " 26\n"; +print +(hex('0x12_34') == 4660) ? "ok" : "not ok", " 27\n"; +print +(hex('0x1234_') == 0x1234) ? "ok" : "not ok", " 28\n"; -print +(hex('x1234') == 0b1001000110100) ? "ok" : "not ok", " 29\n"; -print +(hex('x1234') == 011064) ? "ok" : "not ok", " 30\n"; -print +(hex('x1234') == 4660) ? "ok" : "not ok", " 31\n"; -print +(hex('x1234') == 0x1234) ? "ok" : "not ok", " 32\n"; +print +(hex('x_1234') == 0b1001000110100) ? "ok" : "not ok", " 29\n"; +print +(hex('x12_34') == 011064) ? "ok" : "not ok", " 30\n"; +print +(hex('x123_4') == 4660) ? "ok" : "not ok", " 31\n"; +print +(hex('x1234_') == 0x1234) ? "ok" : "not ok", " 32\n"; -print +(oct('0b11111111111111111111111111111111') == 4294967295) ? +print +(oct('0b1111_1111_1111_1111_1111_1111_1111_1111') == 4294967295) ? "ok" : "not ok", " 33\n"; -print +(oct('037777777777') == 4294967295) ? +print +(oct('037_777_777_777') == 4294967295) ? "ok" : "not ok", " 34\n"; -print +(oct('0xffffffff') == 4294967295) ? +print +(oct('0xffff_ffff') == 4294967295) ? "ok" : "not ok", " 35\n"; -print +(hex('0xffffffff') == 4294967295) ? +print +(hex('0xff_ff_ff_ff') == 4294967295) ? "ok" : "not ok", " 36\n"; + +$_ = "\0_7_7"; +print length eq 5 ? "ok" : "not ok", " 37\n"; +print $_ eq "\0"."_"."7"."_"."7" ? "ok" : "not ok", " 38\n"; +chop, chop, chop, chop; +print $_ eq "\0" ? "ok" : "not ok", " 39\n"; +print "\077_" eq "?_" ? "ok" : "not ok", " 40\n"; + +$_ = "\x_7_7"; +print length eq 5 ? "ok" : "not ok", " 41\n"; +print $_ eq "\0"."_"."7"."_"."7" ? "ok" : "not ok", " 42\n"; +chop, chop, chop, chop; +print $_ eq "\0" ? "ok" : "not ok", " 43\n"; +print "\x2F_" eq "/_" ? "ok" : "not ok", " 44\n"; diff --git a/toke.c b/toke.c index 860e3c1..10273a0 100644 --- a/toke.c +++ b/toke.c @@ -1389,6 +1389,7 @@ S_scan_const(pTHX_ char *start) /* \132 indicates an octal constant */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': + len = 0; /* disallow underscores */ uv = (UV)scan_oct(s, 3, &len); s += len; goto NUM_ESCAPE_INSERT; @@ -1402,10 +1403,12 @@ S_scan_const(pTHX_ char *start) yyerror("Missing right brace on \\x{}"); e = s; } + len = 1; /* allow underscores */ uv = (UV)scan_hex(s + 1, e - s - 1, &len); s = e + 1; } else { + len = 0; /* disallow underscores */ uv = (UV)scan_hex(s, 2, &len); s += len; } diff --git a/util.c b/util.c index 059d9a4..2dfbfaa 100644 --- a/util.c +++ b/util.c @@ -2877,9 +2877,13 @@ Perl_scan_bin(pTHX_ char *start, I32 len, I32 *retlen) for (; len-- && *s; s++) { if (!(*s == '0' || *s == '1')) { - if (*s == '_') - continue; /* Note: does not check for __ and the like. */ - if (seenb == FALSE && *s == 'b' && ruv == 0) { + if (*s == '_' && len && *retlen + && (s[1] == '0' || s[1] == '1')) + { + --len; + ++s; + } + else if (seenb == FALSE && *s == 'b' && ruv == 0) { /* Disallow 0bbb0b0bbb... */ seenb = TRUE; continue; @@ -2902,7 +2906,8 @@ Perl_scan_bin(pTHX_ char *start, I32 len, I32 *retlen) if (ckWARN_d(WARN_OVERFLOW)) Perl_warner(aTHX_ WARN_OVERFLOW, "Integer overflow in binary number"); - } else + } + else ruv = xuv | (*s - '0'); } if (overflowed) { @@ -2942,8 +2947,12 @@ Perl_scan_oct(pTHX_ char *start, I32 len, I32 *retlen) for (; len-- && *s; s++) { if (!(*s >= '0' && *s <= '7')) { - if (*s == '_') - continue; /* Note: does not check for __ and the like. */ + if (*s == '_' && len && *retlen + && (s[1] >= '0' && s[1] <= '7')) + { + --len; + ++s; + } else { /* Allow \octal to work the DWIM way (that is, stop scanning * as soon as non-octal characters are seen, complain only iff @@ -2967,7 +2976,8 @@ Perl_scan_oct(pTHX_ char *start, I32 len, I32 *retlen) if (ckWARN_d(WARN_OVERFLOW)) Perl_warner(aTHX_ WARN_OVERFLOW, "Integer overflow in octal number"); - } else + } + else ruv = xuv | (*s - '0'); } if (overflowed) { @@ -3010,9 +3020,13 @@ Perl_scan_hex(pTHX_ char *start, I32 len, I32 *retlen) for (; len-- && *s; s++) { hexdigit = strchr((char *) PL_hexdigit, *s); if (!hexdigit) { - if (*s == '_') - continue; /* Note: does not check for __ and the like. */ - if (seenx == FALSE && *s == 'x' && ruv == 0) { + if (*s == '_' && len && *retlen && s[1] + && (hexdigit = strchr((char *) PL_hexdigit, s[1]))) + { + --len; + ++s; + } + else if (seenx == FALSE && *s == 'x' && ruv == 0) { /* Disallow 0xxx0x0xxx... */ seenx = TRUE; continue; @@ -3035,7 +3049,8 @@ Perl_scan_hex(pTHX_ char *start, I32 len, I32 *retlen) if (ckWARN_d(WARN_OVERFLOW)) Perl_warner(aTHX_ WARN_OVERFLOW, "Integer overflow in hexadecimal number"); - } else + } + else ruv = xuv | ((hexdigit - PL_hexdigit) & 15); } if (overflowed) {