From: Gurusamy Sarathy Date: Mon, 6 Jul 1998 06:41:17 +0000 (+0000) Subject: allow eval-groups in patterns only if they C X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e4d48cc9bddb8984cf12bdfbcbac9580d192b5a5;p=p5sagit%2Fp5-mst-13.2.git allow eval-groups in patterns only if they C p4raw-id: //depot/perl@1334 --- diff --git a/lib/re.pm b/lib/re.pm index ff38c41..b7375e3 100644 --- a/lib/re.pm +++ b/lib/re.pm @@ -6,26 +6,42 @@ re - Perl pragma to alter regular expression behaviour =head1 SYNOPSIS - ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here + use re 'taint'; + ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here - use re "taint"; - ($x) = ($^X =~ /^(.*)$/s); # $x _is_ tainted here + use re 'eval'; + /foo(?{ $foo = 1 })bar/; # won't fail (when not under -T switch) + + { + no re 'taint'; # the default + ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here + + no re 'eval'; # the default + /foo(?{ $foo = 1 })bar/; # disallowed (with or without -T switch) + } =head1 DESCRIPTION When C is in effect, and a tainted string is the target of a regex, the regex memories (or values returned by the m// operator -in list context) are tainted. +in list context) are tainted. This feature is useful when regex operations +on tainted data aren't meant to extract safe substrings, but to perform +other transformations. -This feature is useful when regex operations on tainted data aren't -meant to extract safe substrings, but to perform other transformations. +When C is in effect, a regex is allowed to contain +C<(?{ ... })> zero-width assertions (which may not be interpolated in +the regex). That is normally disallowed, since it is a potential security +risk. Note that this pragma is ignored when perl detects tainted data, +i.e. evaluation is always disallowed with tainted data. See +L. See L. =cut my %bitmask = ( -taint => 0x00100000 +taint => 0x00100000, +eval => 0x00200000, ); sub bits { diff --git a/perl.h b/perl.h index 7f3fd63..9d982ec 100644 --- a/perl.h +++ b/perl.h @@ -1824,6 +1824,7 @@ typedef enum { #define HINT_LOCALIZE_HH 0x00020000 /* %^H needs to be copied */ #define HINT_RE_TAINT 0x00100000 +#define HINT_RE_EVAL 0x00200000 /* Various states of an input record separator SV (rs, nrs) */ #define RsSNARF(sv) (! SvOK(sv)) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 7c8ab3d..221cc35 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1063,6 +1063,27 @@ single form when it must operate on them directly. Either you've passed an invalid file specification to Perl, or you've found a case the conversion routines don't handle. Drat. +=item %s: Eval-group in insecure regular expression + +(F) Perl detected tainted data when trying to compile a regular expression +that contains the C<(?{ ... })> zero-width assertion, which is unsafe. +See L, and L. + +=item %s: Eval-group not allowed, use re 'eval' + +(F) A regular expression contained the C<(?{ ... })> zero-width assertion, +but that construct is only allowed when the C pragma is +in effect. See L. + +=item %s: Eval-group not allowed at run time + +(F) Perl tried to compile a regular expression containing the C<(?{ ... })> +zero-width assertion at run time, at it would when the pattern contains +interpolated values. Since this is a risk to security, it is not allowed. +If you insist, you may still do this by explicitly building the pattern +from an interpolated string at run time and using that in an eval(). +See L. + =item Excessively long <> operator (F) The contents of a <> operator may not exceed the maximum size of a diff --git a/pod/perlre.pod b/pod/perlre.pod index 30608ce..f6fdc29 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -330,6 +330,10 @@ Experimental "evaluate any Perl code" zero-width assertion. Always succeeds. C is not interpolated. Currently the rules to determine where the C ends are somewhat convoluted. +Owing to the risks to security, this is only available when the +C pragma is used, and then only for patterns that don't +have any variables that must be interpolated at run time. + The C is properly scoped in the following sense: if the assertion is backtracked (compare L<"Backtracking">), all the changes introduced after Cisation are undone, so @@ -360,11 +364,6 @@ other C<(?{ code })> assertions inside the same regular expression. The above assignment to $^R is properly localized, thus the old value of $^R is restored if the assertion is backtracked (compare L<"Backtracking">). -B: This is a grave security risk for arbitrarily interpolated -patterns. It introduces security holes in previously safe programs. -A fix to Perl, and to this documentation, will be forthcoming prior -to the actual 5.005 release. - =item C<(?Epattern)> An "independent" subexpression. Matches the substring that a diff --git a/regcomp.c b/regcomp.c index 0a36cbb..b18740c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1043,6 +1043,13 @@ reg(I32 paren, I32 *flagp) regcomp_rx->data->data[n+2] = (void*)sop; SvREFCNT_dec(sv); } else { /* First pass */ + if (curcop == &compiling) { + if (!(hints & HINT_RE_EVAL)) + FAIL("Eval-group not allowed, use re 'eval'"); + } + else { + FAIL("Eval-group not allowed at run time"); + } if (tainted) FAIL("Eval-group in insecure regular expression"); } diff --git a/t/op/misc.t b/t/op/misc.t index 9ab6831..25f566e 100755 --- a/t/op/misc.t +++ b/t/op/misc.t @@ -336,16 +336,18 @@ sub foo { local $_ = shift; split; @_ } @x = foo(' x y z '); print "you die joe!\n" unless "@x" eq 'x y z'; ######## +use re 'eval'; /(?{"{"})/ # Check it outside of eval too EXPECT -Sequence (?{...}) not terminated or not {}-balanced at - line 1, within pattern -/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 1. +Sequence (?{...}) not terminated or not {}-balanced at - line 2, within pattern +/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 2. ######## +use re 'eval'; /(?{"{"}})/ # Check it outside of eval too EXPECT Unmatched right bracket at (re_eval 1) line 1, at end of line syntax error at (re_eval 1) line 1, near ""{"}" -Compilation failed in regexp at - line 1. +Compilation failed in regexp at - line 2. ######## BEGIN { @ARGV = qw(a b c) } BEGIN { print "argv <@ARGV>\nbegin <",shift,">\n" } diff --git a/t/op/pat.t b/t/op/pat.t index fecdf0c..7ee1f09 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,11 +6,14 @@ # $RCSfile: pat.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:12 $ -print "1..123\n"; +print "1..124\n"; -chdir 't' if -d 't'; -@INC = "../lib"; +BEGIN { + chdir 't' if -d 't'; + @INC = "../lib" if -d "../lib"; +} eval 'use Config'; # Defaults assumed if this fails +use re 'eval'; $x = "abc\ndef\n"; @@ -379,7 +382,14 @@ $test++; $code = '{$blah = 45}'; $blah = 12; -/(?$code)/; +eval { /(?$code)/ }; +print "not " unless $@ and $@ =~ /not allowed at run time/ and $blah == 12; +print "ok $test\n"; +$test++; + +$code = '{$blah = 45}'; +$blah = 12; +eval "/(?$code)/"; print "not " if $blah != 45; print "ok $test\n"; $test++; diff --git a/t/op/regexp.t b/t/op/regexp.t index 7e43526..244ed4a 100755 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -26,6 +26,13 @@ # If you want to add a regular expression test that can't be expressed # in this format, don't add it here: put it in op/pat.t instead. +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib' if -d '../lib'; +} + +use re 'eval'; + $iters = shift || 1; # Poor man performance suite, 10000 is OK. open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') || diff --git a/t/op/subst.t b/t/op/subst.t index 92a848f..1323b2d 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -1,6 +1,10 @@ #!./perl -# $RCSfile: s.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:22 $ + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib' if -d '../lib'; +} print "1..70\n"; @@ -276,6 +280,7 @@ $_ = <<'EOL'; EOL $^R = 'junk'; +use re 'eval'; $foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' . ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' . ' lowercase $@%#MiXeD$@%# ';