=head1 SYNOPSIS
- ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
+ use re 'taint';
+ ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
- use re "taint";
- ($x) = ($^X =~ /^(.*)$/s); # $x _is_ tainted here
+ use re 'eval';
+ /foo(?{ $foo = 1 })bar/; # won't fail (when not under -T switch)
+
+ {
+ no re 'taint'; # the default
+ ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
+
+ no re 'eval'; # the default
+ /foo(?{ $foo = 1 })bar/; # disallowed (with or without -T switch)
+ }
=head1 DESCRIPTION
When C<use re 'taint'> is in effect, and a tainted string is the target
of a regex, the regex memories (or values returned by the m// operator
-in list context) are tainted.
+in list context) are tainted. This feature is useful when regex operations
+on tainted data aren't meant to extract safe substrings, but to perform
+other transformations.
-This feature is useful when regex operations on tainted data aren't
-meant to extract safe substrings, but to perform other transformations.
+When C<use re 'eval'> is in effect, a regex is allowed to contain
+C<(?{ ... })> zero-width assertions (which may not be interpolated in
+the regex). That is normally disallowed, since it is a potential security
+risk. Note that this pragma is ignored when perl detects tainted data,
+i.e. evaluation is always disallowed with tainted data. See
+L<perlre/(?{ code })>.
See L<perlmodlib/Pragmatic Modules>.
=cut
my %bitmask = (
-taint => 0x00100000
+taint => 0x00100000,
+eval => 0x00200000,
);
sub bits {
#define HINT_LOCALIZE_HH 0x00020000 /* %^H needs to be copied */
#define HINT_RE_TAINT 0x00100000
+#define HINT_RE_EVAL 0x00200000
/* Various states of an input record separator SV (rs, nrs) */
#define RsSNARF(sv) (! SvOK(sv))
passed an invalid file specification to Perl, or you've found a
case the conversion routines don't handle. Drat.
+=item %s: Eval-group in insecure regular expression
+
+(F) Perl detected tainted data when trying to compile a regular expression
+that contains the C<(?{ ... })> zero-width assertion, which is unsafe.
+See L<perlre/(?{ code })>, and L<perlsec>.
+
+=item %s: Eval-group not allowed, use re 'eval'
+
+(F) A regular expression contained the C<(?{ ... })> zero-width assertion,
+but that construct is only allowed when the C<use re 'eval'> pragma is
+in effect. See L<perlre/(?{ code })>.
+
+=item %s: Eval-group not allowed at run time
+
+(F) Perl tried to compile a regular expression containing the C<(?{ ... })>
+zero-width assertion at run time, at it would when the pattern contains
+interpolated values. Since this is a risk to security, it is not allowed.
+If you insist, you may still do this by explicitly building the pattern
+from an interpolated string at run time and using that in an eval().
+See L<perlre/(?{ code })>.
+
=item Excessively long <> operator
(F) The contents of a <> operator may not exceed the maximum size of a
succeeds. C<code> is not interpolated. Currently the rules to
determine where the C<code> ends are somewhat convoluted.
+Owing to the risks to security, this is only available when the
+C<use re 'eval'> pragma is used, and then only for patterns that don't
+have any variables that must be interpolated at run time.
+
The C<code> is properly scoped in the following sense: if the assertion
is backtracked (compare L<"Backtracking">), all the changes introduced after
C<local>isation are undone, so
The above assignment to $^R is properly localized, thus the old value of $^R
is restored if the assertion is backtracked (compare L<"Backtracking">).
-B<WARNING>: This is a grave security risk for arbitrarily interpolated
-patterns. It introduces security holes in previously safe programs.
-A fix to Perl, and to this documentation, will be forthcoming prior
-to the actual 5.005 release.
-
=item C<(?E<gt>pattern)>
An "independent" subexpression. Matches the substring that a
regcomp_rx->data->data[n+2] = (void*)sop;
SvREFCNT_dec(sv);
} else { /* First pass */
+ if (curcop == &compiling) {
+ if (!(hints & HINT_RE_EVAL))
+ FAIL("Eval-group not allowed, use re 'eval'");
+ }
+ else {
+ FAIL("Eval-group not allowed at run time");
+ }
if (tainted)
FAIL("Eval-group in insecure regular expression");
}
@x = foo(' x y z ');
print "you die joe!\n" unless "@x" eq 'x y z';
########
+use re 'eval';
/(?{"{"})/ # Check it outside of eval too
EXPECT
-Sequence (?{...}) not terminated or not {}-balanced at - line 1, within pattern
-/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 1.
+Sequence (?{...}) not terminated or not {}-balanced at - line 2, within pattern
+/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 2.
########
+use re 'eval';
/(?{"{"}})/ # Check it outside of eval too
EXPECT
Unmatched right bracket at (re_eval 1) line 1, at end of line
syntax error at (re_eval 1) line 1, near ""{"}"
-Compilation failed in regexp at - line 1.
+Compilation failed in regexp at - line 2.
########
BEGIN { @ARGV = qw(a b c) }
BEGIN { print "argv <@ARGV>\nbegin <",shift,">\n" }
# $RCSfile: pat.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:12 $
-print "1..123\n";
+print "1..124\n";
-chdir 't' if -d 't';
-@INC = "../lib";
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = "../lib" if -d "../lib";
+}
eval 'use Config'; # Defaults assumed if this fails
+use re 'eval';
$x = "abc\ndef\n";
$code = '{$blah = 45}';
$blah = 12;
-/(?$code)/;
+eval { /(?$code)/ };
+print "not " unless $@ and $@ =~ /not allowed at run time/ and $blah == 12;
+print "ok $test\n";
+$test++;
+
+$code = '{$blah = 45}';
+$blah = 12;
+eval "/(?$code)/";
print "not " if $blah != 45;
print "ok $test\n";
$test++;
# If you want to add a regular expression test that can't be expressed
# in this format, don't add it here: put it in op/pat.t instead.
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib' if -d '../lib';
+}
+
+use re 'eval';
+
$iters = shift || 1; # Poor man performance suite, 10000 is OK.
open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') ||
#!./perl
-# $RCSfile: s.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:22 $
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib' if -d '../lib';
+}
print "1..70\n";
EOL
$^R = 'junk';
+use re 'eval';
$foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' .
' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' .
' lowercase $@%#MiXeD$@%# ';