Re-order ops to the implementation order in pp_sys.c - this makes a
[p5sagit/p5-mst-13.2.git] / ext / re / re.pm
CommitLineData
b3eb6a9b 1package re;
2
b8f647c3 3our $VERSION = 0.06_03;
56953603 4
b3eb6a9b 5=head1 NAME
6
7re - Perl pragma to alter regular expression behaviour
8
9=head1 SYNOPSIS
10
e4d48cc9 11 use re 'taint';
12 ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
b3eb6a9b 13
2cd61cdb 14 $pat = '(?{ $foo = 1 })';
e4d48cc9 15 use re 'eval';
2cd61cdb 16 /foo${pat}bar/; # won't fail (when not under -T switch)
e4d48cc9 17
18 {
19 no re 'taint'; # the default
20 ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
21
22 no re 'eval'; # the default
2cd61cdb 23 /foo${pat}bar/; # disallowed (with or without -T switch)
e4d48cc9 24 }
b3eb6a9b 25
1e2e3d02 26 use re 'debug'; # output debugging info during
27 /^(.*)$/s; # compile and run time
28
2cd61cdb 29
02ea72ae 30 use re 'debugcolor'; # same as 'debug', but with colored output
31 ...
32
a3621e74 33 use re qw(Debug All); # Finer tuned debugging options.
1e2e3d02 34 use re qw(Debug More);
fe759410 35 no re qw(Debug ALL); # Turn of all re debugging in this scope
a3621e74 36
3ffabb8c 37(We use $^X in these examples because it's tainted by default.)
38
b3eb6a9b 39=head1 DESCRIPTION
40
41When C<use re 'taint'> is in effect, and a tainted string is the target
42of a regex, the regex memories (or values returned by the m// operator
e4d48cc9 43in list context) are tainted. This feature is useful when regex operations
44on tainted data aren't meant to extract safe substrings, but to perform
45other transformations.
b3eb6a9b 46
e4d48cc9 47When C<use re 'eval'> is in effect, a regex is allowed to contain
2cd61cdb 48C<(?{ ... })> zero-width assertions even if regular expression contains
ffbc6a93 49variable interpolation. That is normally disallowed, since it is a
2cd61cdb 50potential security risk. Note that this pragma is ignored when the regular
51expression is obtained from tainted data, i.e. evaluation is always
3c4b39be 52disallowed with tainted regular expressions. See L<perlre/(?{ code })>.
2cd61cdb 53
ffbc6a93 54For the purpose of this pragma, interpolation of precompiled regular
0a92e3a8 55expressions (i.e., the result of C<qr//>) is I<not> considered variable
56interpolation. Thus:
2cd61cdb 57
58 /foo${pat}bar/
59
ffbc6a93 60I<is> allowed if $pat is a precompiled regular expression, even
2cd61cdb 61if $pat contains C<(?{ ... })> assertions.
62
ffbc6a93 63When C<use re 'debug'> is in effect, perl emits debugging messages when
2cd61cdb 64compiling and using regular expressions. The output is the same as that
65obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
66B<-Dr> switch. It may be quite voluminous depending on the complexity
02ea72ae 67of the match. Using C<debugcolor> instead of C<debug> enables a
68form of output that can be used to get a colorful display on terminals
69that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a
70comma-separated list of C<termcap> properties to use for highlighting
ffbc6a93 71strings on/off, pre-point part on/off.
2cd61cdb 72See L<perldebug/"Debugging regular expressions"> for additional info.
73
a3621e74 74Similarly C<use re 'Debug'> produces debugging output, the difference
75being that it allows the fine tuning of what debugging output will be
be8e71aa 76emitted. Options are divided into three groups, those related to
77compilation, those related to execution and those related to special
78purposes. The options are as follows:
79
80=over 4
81
82=item Compile related options
83
84=over 4
85
86=item COMPILE
87
88Turns on all compile related debug options.
89
90=item PARSE
91
92Turns on debug output related to the process of parsing the pattern.
93
94=item OPTIMISE
95
96Enables output related to the optimisation phase of compilation.
97
24b23f37 98=item TRIEC
be8e71aa 99
100Detailed info about trie compilation.
101
102=item DUMP
103
104Dump the final program out after it is compiled and optimised.
105
be8e71aa 106
107=back
108
109=item Execute related options
110
111=over 4
112
113=item EXECUTE
114
115Turns on all execute related debug options.
116
117=item MATCH
118
119Turns on debugging of the main matching loop.
120
24b23f37 121=item TRIEE
be8e71aa 122
123Extra debugging of how tries execute.
124
125=item INTUIT
126
127Enable debugging of start point optimisations.
128
129=back
130
131=item Extra debugging options
132
133=over 4
134
135=item EXTRA
136
137Turns on all "extra" debugging options.
138
24b23f37 139=item TRIEM
140
141Enable enhanced TRIE debugging. Enhances both TRIEE
142and TRIEC.
143
144=item STATE
145
146Enable debugging of states in the engine.
147
148=item STACK
be8e71aa 149
24b23f37 150Enable debugging of the recursion stack in the engine. Enabling
151or disabling this option automatically does the same for debugging
152states as well. This output from this can be quite large.
153
154=item OPTIMISEM
155
156Enable enhanced optimisation debugging and start point optimisations.
157Probably not useful except when debugging the regex engine itself.
158
159=item OFFSETS
160
161Dump offset information. This can be used to see how regops correlate
162to the pattern. Output format is
163
164 NODENUM:POSITION[LENGTH]
165
166Where 1 is the position of the first char in the string. Note that position
167can be 0, or larger than the actual length of the pattern, likewise length
168can be zero.
be8e71aa 169
24b23f37 170=item OFFSETSDBG
be8e71aa 171
172Enable debugging of offsets information. This emits copious
fe759410 173amounts of trace information and doesn't mesh well with other
be8e71aa 174debug options.
175
fe759410 176Almost definitely only useful to people hacking
be8e71aa 177on the offsets part of the debug engine.
178
179=back
180
181=item Other useful flags
182
183These are useful shortcuts to save on the typing.
184
185=over 4
186
187=item ALL
188
189Enable all compile and execute options at once.
190
191=item All
192
fe759410 193Enable DUMP and all execute options. Equivalent to:
be8e71aa 194
195 use re 'debug';
196
197=item MORE
198
199=item More
200
24b23f37 201Enable TRIEM and all execute compile and execute options.
be8e71aa 202
dba3f186 203=back
be8e71aa 204
dba3f186 205=back
a3621e74 206
1e2e3d02 207As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
208lexically scoped, as the other directives are. However they have both
209compile-time and run-time effects.
b3eb6a9b 210
211See L<perlmodlib/Pragmatic Modules>.
212
213=cut
214
918c0b2d 215# N.B. File::Basename contains a literal for 'taint' as a fallback. If
216# taint is changed here, File::Basename must be updated as well.
b3eb6a9b 217my %bitmask = (
9cfe5470 218taint => 0x00100000, # HINT_RE_TAINT
219eval => 0x00200000, # HINT_RE_EVAL
b3eb6a9b 220);
221
02ea72ae 222sub setcolor {
223 eval { # Ignore errors
224 require Term::Cap;
225
226 my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
8d300b32 227 my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
02ea72ae 228 my @props = split /,/, $props;
c712d376 229 my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
02ea72ae 230
c712d376 231 $colors =~ s/\0//g;
232 $ENV{PERL_RE_COLORS} = $colors;
02ea72ae 233 };
f9f4320a 234 if ($@) {
235 $ENV{PERL_RE_COLORS}||=qq'\t\t> <\t> <\t\t'
236 }
894be9b7 237
02ea72ae 238}
239
a3621e74 240my %flags = (
be8e71aa 241 COMPILE => 0x0000FF,
242 PARSE => 0x000001,
243 OPTIMISE => 0x000002,
a5ca303d 244 TRIEC => 0x000004,
be8e71aa 245 DUMP => 0x000008,
be8e71aa 246
247 EXECUTE => 0x00FF00,
248 INTUIT => 0x000100,
249 MATCH => 0x000200,
a5ca303d 250 TRIEE => 0x000400,
be8e71aa 251
252 EXTRA => 0xFF0000,
a5ca303d 253 TRIEM => 0x010000,
254 OFFSETS => 0x020000,
255 OFFSETSDBG => 0x040000,
256 STATE => 0x080000,
257 OPTIMISEM => 0x100000,
24b23f37 258 STACK => 0x280000,
a3621e74 259);
786e8c11 260$flags{ALL} = -1;
be8e71aa 261$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
894be9b7 262$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE};
a5ca303d 263$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
ddc5bc0f 264$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
a5ca303d 265$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
a3621e74 266
894be9b7 267my $installed;
9b47c5f6 268my $installed_error;
f9f4320a 269
270sub _load_unload {
271 my ($on)= @_;
272 if ($on) {
894be9b7 273 if ( ! defined($installed) ) {
274 require XSLoader;
9b47c5f6 275 $installed = eval { XSLoader::load('re') } || 0;
276 $installed_error = $@;
894be9b7 277 }
278 if ( ! $installed ) {
9b47c5f6 279 die "'re' not installed!? ($installed_error)";
dba3f186 280 } else {
281 # We call install() every time, as if we didn't, we wouldn't
282 # "see" any changes to the color environment var since
283 # the last time it was called.
284
285 # install() returns an integer, which if casted properly
286 # in C resolves to a structure containing the regex
287 # hooks. Setting it to a random integer will guarantee
288 # segfaults.
289 $^H{regcomp} = install();
894be9b7 290 }
f9f4320a 291 } else {
292 delete $^H{regcomp};
293 }
380e0b81 294}
295
b3eb6a9b 296sub bits {
56953603 297 my $on = shift;
b3eb6a9b 298 my $bits = 0;
2570cdf1 299 unless (@_) {
b8f647c3 300 require Carp;
301 Carp::carp("Useless use of \"re\" pragma");
b3eb6a9b 302 }
a3621e74 303 foreach my $idx (0..$#_){
304 my $s=$_[$idx];
305 if ($s eq 'Debug' or $s eq 'Debugcolor') {
f9f4320a 306 setcolor() if $s =~/color/i;
a3621e74 307 ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
a3621e74 308 for my $idx ($idx+1..$#_) {
309 if ($flags{$_[$idx]}) {
310 if ($on) {
311 ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
312 } else {
313 ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
314 }
315 } else {
316 require Carp;
317 Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
1e2e3d02 318 join(", ",sort keys %flags ) );
a3621e74 319 }
320 }
380e0b81 321 _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
a3621e74 322 last;
323 } elsif ($s eq 'debug' or $s eq 'debugcolor') {
f9f4320a 324 setcolor() if $s =~/color/i;
380e0b81 325 _load_unload($on);
a3621e74 326 } elsif (exists $bitmask{$s}) {
327 $bits |= $bitmask{$s};
328 } else {
329 require Carp;
330 Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
331 join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
332 ")");
333 }
56953603 334 }
b3eb6a9b 335 $bits;
336}
337
338sub import {
339 shift;
2570cdf1 340 $^H |= bits(1, @_);
b3eb6a9b 341}
342
343sub unimport {
344 shift;
2570cdf1 345 $^H &= ~ bits(0, @_);
b3eb6a9b 346}
347
3481;