From: SADAHIRO Tomoyuki Date: Sun, 15 Oct 2006 22:23:00 +0000 (+0900) Subject: remove leaveit from toke.c:scan_const X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=cc74c5bd822c38bbfaf542cfef5f7219e05e467e;p=p5sagit%2Fp5-mst-13.2.git remove leaveit from toke.c:scan_const Message-Id: <20061015222223.BC38.BQW10602@nifty.com> p4raw-id: //depot/perl@29026 --- diff --git a/pod/perlop.pod b/pod/perlop.pod index dcd537b..205556c 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1972,16 +1972,12 @@ is emitted if the C pragma or the B<-w> command-line flag =item C in C, C, C, C, -Processing of C<\Q>, C<\U>, C<\u>, C<\L>, C<\l>, and interpolation -happens (almost) as with C constructs. - -However combinations of C<\> followed by RE-special chars are not -substituted but only skipped. The full list of RE-special chars is -C<\>, C<.>, C<^>, C<$>, C<@>, C, C, C, C, C, C, C, -C, C, C, C, C

, C

, C, C, C<+>, C<*>, C, C<|>, -C<(>, C<)>, C<->, C, C, C, C, C, C, C, C, C, -C, digits (C<0> to C<9>), C<[>, C<{>, C<]>, C<}>, whitespaces -(SPACE, TAB, LF, CR, FF, and VT in addition), and C<#>. +Processing of C<\Q>, C<\U>, C<\u>, C<\L>, C<\l>, C<\E>, +and interpolation happens (almost) as with C constructs. + +However any other combinations of C<\> followed by a character +are not substituted but only skipped, in order to parse them +as regular expressions at the following step. As C<\c> is skipped at this step, C<@> of C<\c@> in RE is possibly treated as an array symbol (for example C<@foo>), even though the same text in C gives interpolation of C<\c@>. diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp index 4982016..6818c62 100644 --- a/t/lib/warnings/regcomp +++ b/t/lib/warnings/regcomp @@ -56,6 +56,17 @@ $a =~ /a$x/ ; EXPECT Unrecognized escape \m passed through in regex; marked by <-- HERE in m/a\m <-- HERE / at - line 4. ######## +# regcomp.c [S_regatom] +# The \q should warn, the \_ should NOT warn. +use warnings 'regexp'; +"foo" =~ /\q/; +"bar" =~ /\_/; +no warnings 'regexp'; +"foo" =~ /\q/; +"bar" =~ /\_/; +EXPECT +Unrecognized escape \q passed through in regex; marked by <-- HERE in m/\q <-- HERE / at - line 4. +######## # regcomp.c [S_regpposixcc S_checkposixcc] # use warnings 'regexp' ; diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index 5c44df7..e4fa82c 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -737,17 +737,6 @@ EXPECT Possible unintended interpolation of @mjd_previously_unused_array in string at - line 3. ######## # toke.c -# The \q should warn, the \_ should NOT warn. -use warnings 'misc'; -"foo" =~ /\q/; -"bar" =~ /\_/; -no warnings 'misc'; -"foo" =~ /\q/; -"bar" =~ /\_/; -EXPECT -Unrecognized escape \q passed through at - line 4. -######## -# toke.c # 20020328 mjd-perl-patch+@plover.com at behest of jfriedl@yahoo.com use warnings 'regexp'; "foo" =~ /foo/c; diff --git a/t/op/pat.t b/t/op/pat.t index a6ea46c..9bd6553 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -476,27 +476,27 @@ print "not " unless $^R eq '79' and $x eq '12'; print "ok $test\n"; $test++; -print "not " unless qr/\b\v$/i eq '(?i-xsm:\bv$)'; +print "not " unless qr/\b\v$/i eq '(?i-xsm:\b\v$)'; print "ok $test\n"; $test++; -print "not " unless qr/\b\v$/s eq '(?s-xim:\bv$)'; +print "not " unless qr/\b\v$/s eq '(?s-xim:\b\v$)'; print "ok $test\n"; $test++; -print "not " unless qr/\b\v$/m eq '(?m-xis:\bv$)'; +print "not " unless qr/\b\v$/m eq '(?m-xis:\b\v$)'; print "ok $test\n"; $test++; -print "not " unless qr/\b\v$/x eq '(?x-ism:\bv$)'; +print "not " unless qr/\b\v$/x eq '(?x-ism:\b\v$)'; print "ok $test\n"; $test++; -print "not " unless qr/\b\v$/xism eq '(?msix:\bv$)'; +print "not " unless qr/\b\v$/xism eq '(?msix:\b\v$)'; print "ok $test\n"; $test++; -print "not " unless qr/\b\v$/ eq '(?-xism:\bv$)'; +print "not " unless qr/\b\v$/ eq '(?-xism:\b\v$)'; print "ok $test\n"; $test++; @@ -3824,6 +3824,20 @@ sub iseq($$;$) { ok($ok, $msg); } +# \, breaks {3,4} +ok("xaaay" !~ /xa{3\,4}y/, "\, in a pattern"); +ok("xa{3,4}y" =~ /xa{3\,4}y/, "\, in a pattern"); + +# \c\ followed by _ +ok("x\c_y" !~ /x\c\_y/, "\_ in a pattern"); +ok("x\c\_y" =~ /x\c\_y/, "\_ in a pattern"); + +# \c\ followed by other characters +for my $c ("z", "\0", "!", chr(254), chr(256)) { + my $targ = "a\034$c"; + my $reg = "a\\c\\$c"; + ok(eval("qq/$targ/ =~ /$reg/"), "\\c\\ in pattern"); +} # Keep the following tests last -- they may crash perl @@ -3835,5 +3849,5 @@ ok((q(a)x 100) =~ /^(??{'(.)'x 100})/, or print "# Unexpected outcome: should pass or crash perl\n"; # Don't forget to update this! -BEGIN{print "1..1275\n"}; +BEGIN{print "1..1284\n"}; diff --git a/t/op/regmesg.t b/t/op/regmesg.t index 1b613ed..fbfb6b2 100644 --- a/t/op/regmesg.t +++ b/t/op/regmesg.t @@ -47,6 +47,15 @@ my @death = '/(?;x/' => 'Sequence (?;...) not recognized in regex; marked by {#} in m/(?;{#}x/', '/(?<;x/' => 'Sequence (?<;...) not recognized in regex; marked by {#} in m/(?<;{#}x/', + '/(?\ix/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}ix/', + '/(?\mx/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}mx/', + '/(?\:x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}:x/', + '/(?\=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}=x/', + '/(?\!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}!x/', + '/(?\<=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<=x/', + '/(?\ 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}>x/', + '/((x)/' => 'Unmatched ( in regex; marked by {#} in m/({#}(x)/', "/x{$inf_p1}/" => "Quantifier in {,} bigger than $inf_m1 in regex; marked by {#} in m/x{{#}$inf_p1}/", diff --git a/toke.c b/toke.c index 927f904..4ab58ea 100644 --- a/toke.c +++ b/toke.c @@ -1790,12 +1790,6 @@ S_scan_const(pTHX_ char *start) bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */ #endif - const char * const leaveit = /* set of acceptably-backslashed characters */ - (const char *) - (PL_lex_inpat - ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-Nnrktfeaxcz0123456789[{]} \t\n\r\f\v#" - : ""); - if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { /* If we are doing a trans and we know we want UTF8 set expectation */ has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); @@ -2020,13 +2014,6 @@ S_scan_const(pTHX_ char *start) if (*s == '\\' && s+1 < send) { s++; - /* some backslashes we leave behind */ - if (*leaveit && *s && strchr(leaveit, *s)) { - *d++ = NATIVE_TO_NEED(has_utf8,'\\'); - *d++ = NATIVE_TO_NEED(has_utf8,*s++); - continue; - } - /* deprecate \1 in strings and substitution replacements */ if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) @@ -2042,6 +2029,11 @@ S_scan_const(pTHX_ char *start) --s; break; } + /* skip any other backslash escapes in a pattern */ + else if (PL_lex_inpat) { + *d++ = NATIVE_TO_NEED(has_utf8,'\\'); + goto default_action; + } /* if we get here, it's either a quoted -, or a digit */ switch (*s) {