Spelling nits from Debian bug list...
[p5sagit/p5-mst-13.2.git] / ext / re / re.pm
CommitLineData
b3eb6a9b 1package re;
2
a5ca303d 3our $VERSION = 0.06_02;
56953603 4
b3eb6a9b 5=head1 NAME
6
7re - Perl pragma to alter regular expression behaviour
8
9=head1 SYNOPSIS
10
e4d48cc9 11 use re 'taint';
12 ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
b3eb6a9b 13
2cd61cdb 14 $pat = '(?{ $foo = 1 })';
e4d48cc9 15 use re 'eval';
2cd61cdb 16 /foo${pat}bar/; # won't fail (when not under -T switch)
e4d48cc9 17
18 {
19 no re 'taint'; # the default
20 ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
21
22 no re 'eval'; # the default
2cd61cdb 23 /foo${pat}bar/; # disallowed (with or without -T switch)
e4d48cc9 24 }
b3eb6a9b 25
1e2e3d02 26 use re 'debug'; # output debugging info during
27 /^(.*)$/s; # compile and run time
28
2cd61cdb 29
02ea72ae 30 use re 'debugcolor'; # same as 'debug', but with colored output
31 ...
32
a3621e74 33 use re qw(Debug All); # Finer tuned debugging options.
1e2e3d02 34 use re qw(Debug More);
35 no re qw(Debug ALL); # Turn of all re dugging in this scope
a3621e74 36
3ffabb8c 37(We use $^X in these examples because it's tainted by default.)
38
b3eb6a9b 39=head1 DESCRIPTION
40
41When C<use re 'taint'> is in effect, and a tainted string is the target
42of a regex, the regex memories (or values returned by the m// operator
e4d48cc9 43in list context) are tainted. This feature is useful when regex operations
44on tainted data aren't meant to extract safe substrings, but to perform
45other transformations.
b3eb6a9b 46
e4d48cc9 47When C<use re 'eval'> is in effect, a regex is allowed to contain
2cd61cdb 48C<(?{ ... })> zero-width assertions even if regular expression contains
ffbc6a93 49variable interpolation. That is normally disallowed, since it is a
2cd61cdb 50potential security risk. Note that this pragma is ignored when the regular
51expression is obtained from tainted data, i.e. evaluation is always
3c4b39be 52disallowed with tainted regular expressions. See L<perlre/(?{ code })>.
2cd61cdb 53
ffbc6a93 54For the purpose of this pragma, interpolation of precompiled regular
0a92e3a8 55expressions (i.e., the result of C<qr//>) is I<not> considered variable
56interpolation. Thus:
2cd61cdb 57
58 /foo${pat}bar/
59
ffbc6a93 60I<is> allowed if $pat is a precompiled regular expression, even
2cd61cdb 61if $pat contains C<(?{ ... })> assertions.
62
ffbc6a93 63When C<use re 'debug'> is in effect, perl emits debugging messages when
2cd61cdb 64compiling and using regular expressions. The output is the same as that
65obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
66B<-Dr> switch. It may be quite voluminous depending on the complexity
02ea72ae 67of the match. Using C<debugcolor> instead of C<debug> enables a
68form of output that can be used to get a colorful display on terminals
69that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a
70comma-separated list of C<termcap> properties to use for highlighting
ffbc6a93 71strings on/off, pre-point part on/off.
2cd61cdb 72See L<perldebug/"Debugging regular expressions"> for additional info.
73
a3621e74 74Similarly C<use re 'Debug'> produces debugging output, the difference
75being that it allows the fine tuning of what debugging output will be
be8e71aa 76emitted. Options are divided into three groups, those related to
77compilation, those related to execution and those related to special
78purposes. The options are as follows:
79
80=over 4
81
82=item Compile related options
83
84=over 4
85
86=item COMPILE
87
88Turns on all compile related debug options.
89
90=item PARSE
91
92Turns on debug output related to the process of parsing the pattern.
93
94=item OPTIMISE
95
96Enables output related to the optimisation phase of compilation.
97
98=item TRIE_COMPILE
99
100Detailed info about trie compilation.
101
102=item DUMP
103
104Dump the final program out after it is compiled and optimised.
105
106=item OFFSETS
107
108Dump offset information. This can be used to see how regops correlate
109to the pattern. Output format is
110
111 NODENUM:POSITION[LENGTH]
112
113Where 1 is the position of the first char in the string. Note that position
114can be 0, or larger than the actual length of the pattern, likewise length
115can be zero.
116
117=back
118
119=item Execute related options
120
121=over 4
122
123=item EXECUTE
124
125Turns on all execute related debug options.
126
127=item MATCH
128
129Turns on debugging of the main matching loop.
130
131=item TRIE_EXECUTE
132
133Extra debugging of how tries execute.
134
135=item INTUIT
136
137Enable debugging of start point optimisations.
138
139=back
140
141=item Extra debugging options
142
143=over 4
144
145=item EXTRA
146
147Turns on all "extra" debugging options.
148
149=item TRIE_MORE
150
151Enable enhanced TRIE debugging. Enhances both TRIE_EXECUTE
152and TRIE_COMPILE.
153
154=item OFFSETS_DEBUG
155
156Enable debugging of offsets information. This emits copious
157amounts of trace information and doesnt mesh well with other
158debug options.
159
160Almost definately only useful to people hacking
161on the offsets part of the debug engine.
162
163=back
164
165=item Other useful flags
166
167These are useful shortcuts to save on the typing.
168
169=over 4
170
171=item ALL
172
173Enable all compile and execute options at once.
174
175=item All
176
177Enable DUMP and all execute options. Equivelent to:
178
179 use re 'debug';
180
181=item MORE
182
183=item More
184
185Enable TRIE_MORE and all execute compile and execute options.
186
dba3f186 187=back
be8e71aa 188
dba3f186 189=back
a3621e74 190
1e2e3d02 191As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
192lexically scoped, as the other directives are. However they have both
193compile-time and run-time effects.
b3eb6a9b 194
195See L<perlmodlib/Pragmatic Modules>.
196
197=cut
198
918c0b2d 199# N.B. File::Basename contains a literal for 'taint' as a fallback. If
200# taint is changed here, File::Basename must be updated as well.
b3eb6a9b 201my %bitmask = (
9cfe5470 202taint => 0x00100000, # HINT_RE_TAINT
203eval => 0x00200000, # HINT_RE_EVAL
b3eb6a9b 204);
205
02ea72ae 206sub setcolor {
207 eval { # Ignore errors
208 require Term::Cap;
209
210 my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
8d300b32 211 my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
02ea72ae 212 my @props = split /,/, $props;
c712d376 213 my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
02ea72ae 214
c712d376 215 $colors =~ s/\0//g;
216 $ENV{PERL_RE_COLORS} = $colors;
02ea72ae 217 };
f9f4320a 218 if ($@) {
219 $ENV{PERL_RE_COLORS}||=qq'\t\t> <\t> <\t\t'
220 }
894be9b7 221
02ea72ae 222}
223
a3621e74 224my %flags = (
be8e71aa 225 COMPILE => 0x0000FF,
226 PARSE => 0x000001,
227 OPTIMISE => 0x000002,
a5ca303d 228 TRIEC => 0x000004,
be8e71aa 229 DUMP => 0x000008,
be8e71aa 230
231 EXECUTE => 0x00FF00,
232 INTUIT => 0x000100,
233 MATCH => 0x000200,
a5ca303d 234 TRIEE => 0x000400,
be8e71aa 235
236 EXTRA => 0xFF0000,
a5ca303d 237 TRIEM => 0x010000,
238 OFFSETS => 0x020000,
239 OFFSETSDBG => 0x040000,
240 STATE => 0x080000,
241 OPTIMISEM => 0x100000,
a3621e74 242);
786e8c11 243$flags{ALL} = -1;
be8e71aa 244$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
894be9b7 245$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE};
a5ca303d 246$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
ddc5bc0f 247$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
a5ca303d 248$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
a3621e74 249
894be9b7 250my $installed;
9b47c5f6 251my $installed_error;
f9f4320a 252
253sub _load_unload {
254 my ($on)= @_;
255 if ($on) {
894be9b7 256 if ( ! defined($installed) ) {
257 require XSLoader;
9b47c5f6 258 $installed = eval { XSLoader::load('re') } || 0;
259 $installed_error = $@;
894be9b7 260 }
261 if ( ! $installed ) {
9b47c5f6 262 die "'re' not installed!? ($installed_error)";
dba3f186 263 } else {
264 # We call install() every time, as if we didn't, we wouldn't
265 # "see" any changes to the color environment var since
266 # the last time it was called.
267
268 # install() returns an integer, which if casted properly
269 # in C resolves to a structure containing the regex
270 # hooks. Setting it to a random integer will guarantee
271 # segfaults.
272 $^H{regcomp} = install();
894be9b7 273 }
f9f4320a 274 } else {
275 delete $^H{regcomp};
276 }
380e0b81 277}
278
b3eb6a9b 279sub bits {
56953603 280 my $on = shift;
b3eb6a9b 281 my $bits = 0;
2570cdf1 282 unless (@_) {
f9f4320a 283 return;
b3eb6a9b 284 }
a3621e74 285 foreach my $idx (0..$#_){
286 my $s=$_[$idx];
287 if ($s eq 'Debug' or $s eq 'Debugcolor') {
f9f4320a 288 setcolor() if $s =~/color/i;
a3621e74 289 ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
a3621e74 290 for my $idx ($idx+1..$#_) {
291 if ($flags{$_[$idx]}) {
292 if ($on) {
293 ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
294 } else {
295 ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
296 }
297 } else {
298 require Carp;
299 Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
1e2e3d02 300 join(", ",sort keys %flags ) );
a3621e74 301 }
302 }
380e0b81 303 _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
a3621e74 304 last;
305 } elsif ($s eq 'debug' or $s eq 'debugcolor') {
f9f4320a 306 setcolor() if $s =~/color/i;
380e0b81 307 _load_unload($on);
a3621e74 308 } elsif (exists $bitmask{$s}) {
309 $bits |= $bitmask{$s};
310 } else {
311 require Carp;
312 Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
313 join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
314 ")");
315 }
56953603 316 }
b3eb6a9b 317 $bits;
318}
319
320sub import {
321 shift;
2570cdf1 322 $^H |= bits(1, @_);
b3eb6a9b 323}
324
325sub unimport {
326 shift;
2570cdf1 327 $^H &= ~ bits(0, @_);
b3eb6a9b 328}
329
3301;