Re: [PATCH] Better version of the Aho-Corasick patch and lots of benchmarks.
[p5sagit/p5-mst-13.2.git] / ext / re / re.pm
CommitLineData
b3eb6a9b 1package re;
2
380e0b81 3our $VERSION = 0.06_01;
56953603 4
b3eb6a9b 5=head1 NAME
6
7re - Perl pragma to alter regular expression behaviour
8
9=head1 SYNOPSIS
10
e4d48cc9 11 use re 'taint';
12 ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
b3eb6a9b 13
2cd61cdb 14 $pat = '(?{ $foo = 1 })';
e4d48cc9 15 use re 'eval';
2cd61cdb 16 /foo${pat}bar/; # won't fail (when not under -T switch)
e4d48cc9 17
18 {
19 no re 'taint'; # the default
20 ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
21
22 no re 'eval'; # the default
2cd61cdb 23 /foo${pat}bar/; # disallowed (with or without -T switch)
e4d48cc9 24 }
b3eb6a9b 25
0a92e3a8 26 use re 'debug'; # NOT lexically scoped (as others are)
27 /^(.*)$/s; # output debugging info during
28 # compile and run time
2cd61cdb 29
02ea72ae 30 use re 'debugcolor'; # same as 'debug', but with colored output
31 ...
32
a3621e74 33 use re qw(Debug All); # Finer tuned debugging options.
34 use re qw(Debug More); # Similarly not lexically scoped.
35 no re qw(Debug ALL); # Turn of all re dugging and unload the module.
36
3ffabb8c 37(We use $^X in these examples because it's tainted by default.)
38
b3eb6a9b 39=head1 DESCRIPTION
40
41When C<use re 'taint'> is in effect, and a tainted string is the target
42of a regex, the regex memories (or values returned by the m// operator
e4d48cc9 43in list context) are tainted. This feature is useful when regex operations
44on tainted data aren't meant to extract safe substrings, but to perform
45other transformations.
b3eb6a9b 46
e4d48cc9 47When C<use re 'eval'> is in effect, a regex is allowed to contain
2cd61cdb 48C<(?{ ... })> zero-width assertions even if regular expression contains
ffbc6a93 49variable interpolation. That is normally disallowed, since it is a
2cd61cdb 50potential security risk. Note that this pragma is ignored when the regular
51expression is obtained from tainted data, i.e. evaluation is always
3c4b39be 52disallowed with tainted regular expressions. See L<perlre/(?{ code })>.
2cd61cdb 53
ffbc6a93 54For the purpose of this pragma, interpolation of precompiled regular
0a92e3a8 55expressions (i.e., the result of C<qr//>) is I<not> considered variable
56interpolation. Thus:
2cd61cdb 57
58 /foo${pat}bar/
59
ffbc6a93 60I<is> allowed if $pat is a precompiled regular expression, even
2cd61cdb 61if $pat contains C<(?{ ... })> assertions.
62
ffbc6a93 63When C<use re 'debug'> is in effect, perl emits debugging messages when
2cd61cdb 64compiling and using regular expressions. The output is the same as that
65obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
66B<-Dr> switch. It may be quite voluminous depending on the complexity
02ea72ae 67of the match. Using C<debugcolor> instead of C<debug> enables a
68form of output that can be used to get a colorful display on terminals
69that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a
70comma-separated list of C<termcap> properties to use for highlighting
ffbc6a93 71strings on/off, pre-point part on/off.
2cd61cdb 72See L<perldebug/"Debugging regular expressions"> for additional info.
73
a3621e74 74Similarly C<use re 'Debug'> produces debugging output, the difference
75being that it allows the fine tuning of what debugging output will be
76emitted. Following the 'Debug' keyword one of several options may be
77provided: COMPILE, EXECUTE, TRIE_COMPILE, TRIE_EXECUTE, TRIE_MORE,
78OPTIMISE, OFFSETS and ALL. Additionally the special keywords 'All' and
79'More' may be provided. 'All' represents everything but OPTIMISE and
80OFFSETS and TRIE_MORE, and 'More' is similar but include TRIE_MORE.
81Saying C<< no re Debug => 'EXECUTE' >> will disable executing debug
82statements and saying C<< use re Debug => 'EXECUTE' >> will turn it on. Note
83that these flags can be set directly via ${^RE_DEBUG_FLAGS} by using the
84following flag values:
85
3dab1dad 86
f2278c82 87 RE_DEBUG_COMPILE 0x001
88 RE_DEBUG_EXECUTE 0x002
89 RE_DEBUG_TRIE_COMPILE 0x004
90 RE_DEBUG_TRIE_EXECUTE 0x008
91 RE_DEBUG_TRIE_MORE 0x010
92 RE_DEBUG_OPTIMISE 0x020
93 RE_DEBUG_OFFSETS 0x040
94 RE_DEBUG_PARSE 0x080
95 RE_DEBUG_OFFSETS_DEBUG 0x100
a3621e74 96
97The directive C<use re 'debug'> and its equivalents are I<not> lexically
98scoped, as the other directives are. They have both compile-time and run-time
99effects.
b3eb6a9b 100
101See L<perlmodlib/Pragmatic Modules>.
102
103=cut
104
918c0b2d 105# N.B. File::Basename contains a literal for 'taint' as a fallback. If
106# taint is changed here, File::Basename must be updated as well.
b3eb6a9b 107my %bitmask = (
9cfe5470 108taint => 0x00100000, # HINT_RE_TAINT
109eval => 0x00200000, # HINT_RE_EVAL
b3eb6a9b 110);
111
02ea72ae 112sub setcolor {
113 eval { # Ignore errors
114 require Term::Cap;
115
116 my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
8d300b32 117 my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
02ea72ae 118 my @props = split /,/, $props;
c712d376 119 my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
02ea72ae 120
c712d376 121 $colors =~ s/\0//g;
122 $ENV{PERL_RE_COLORS} = $colors;
02ea72ae 123 };
02ea72ae 124}
125
a3621e74 126my %flags = (
f2278c82 127 COMPILE => 1,
128 EXECUTE => 2,
129 TRIE_COMPILE => 4,
130 TRIE_EXECUTE => 8,
131 TRIE_MORE => 16,
132 OPTIMISE => 32,
133 OPTIMIZE => 32, # alias
134 OFFSETS => 64,
135 PARSE => 128,
136 OFFSETS_DEBUG => 256,
137 OFFSETS_OLD => 576,
138 ALL => 0xFFFF,
139 All => 15,
140 More => 31,
a3621e74 141);
142
143my $installed = 0;
144
380e0b81 145sub _load_unload {
146 my $on = shift;
147 require XSLoader;
148 XSLoader::load('re');
1839d50f 149 install($on);
380e0b81 150}
151
b3eb6a9b 152sub bits {
56953603 153 my $on = shift;
b3eb6a9b 154 my $bits = 0;
2570cdf1 155 unless (@_) {
b3eb6a9b 156 require Carp;
157 Carp::carp("Useless use of \"re\" pragma");
158 }
a3621e74 159 foreach my $idx (0..$#_){
160 my $s=$_[$idx];
161 if ($s eq 'Debug' or $s eq 'Debugcolor') {
162 setcolor() if $s eq 'Debugcolor';
163 ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
a3621e74 164 for my $idx ($idx+1..$#_) {
165 if ($flags{$_[$idx]}) {
166 if ($on) {
167 ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
3dab1dad 168 ${^RE_DEBUG_FLAGS} |= 1
169 if $flags{$_[$idx]}>2;
a3621e74 170 } else {
171 ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
172 }
173 } else {
174 require Carp;
175 Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
176 join(", ",sort { $flags{$a} <=> $flags{$b} } keys %flags ) );
177 }
178 }
380e0b81 179 _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
a3621e74 180 last;
181 } elsif ($s eq 'debug' or $s eq 'debugcolor') {
182 setcolor() if $s eq 'debugcolor';
380e0b81 183 _load_unload($on);
a3621e74 184 } elsif (exists $bitmask{$s}) {
185 $bits |= $bitmask{$s};
186 } else {
187 require Carp;
188 Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
189 join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
190 ")");
191 }
56953603 192 }
b3eb6a9b 193 $bits;
194}
195
196sub import {
197 shift;
2570cdf1 198 $^H |= bits(1, @_);
b3eb6a9b 199}
200
201sub unimport {
202 shift;
2570cdf1 203 $^H &= ~ bits(0, @_);
b3eb6a9b 204}
205
2061;