Commit | Line | Data |
b3eb6a9b |
1 | package re; |
2 | |
b8f647c3 |
3 | our $VERSION = 0.06_03; |
56953603 |
4 | |
b3eb6a9b |
5 | =head1 NAME |
6 | |
7 | re - Perl pragma to alter regular expression behaviour |
8 | |
9 | =head1 SYNOPSIS |
10 | |
e4d48cc9 |
11 | use re 'taint'; |
12 | ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here |
b3eb6a9b |
13 | |
2cd61cdb |
14 | $pat = '(?{ $foo = 1 })'; |
e4d48cc9 |
15 | use re 'eval'; |
2cd61cdb |
16 | /foo${pat}bar/; # won't fail (when not under -T switch) |
e4d48cc9 |
17 | |
18 | { |
19 | no re 'taint'; # the default |
20 | ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here |
21 | |
22 | no re 'eval'; # the default |
2cd61cdb |
23 | /foo${pat}bar/; # disallowed (with or without -T switch) |
e4d48cc9 |
24 | } |
b3eb6a9b |
25 | |
1e2e3d02 |
26 | use re 'debug'; # output debugging info during |
27 | /^(.*)$/s; # compile and run time |
28 | |
2cd61cdb |
29 | |
02ea72ae |
30 | use re 'debugcolor'; # same as 'debug', but with colored output |
31 | ... |
32 | |
a3621e74 |
33 | use re qw(Debug All); # Finer tuned debugging options. |
1e2e3d02 |
34 | use re qw(Debug More); |
fe759410 |
35 | no re qw(Debug ALL); # Turn of all re debugging in this scope |
a3621e74 |
36 | |
3ffabb8c |
37 | (We use $^X in these examples because it's tainted by default.) |
38 | |
b3eb6a9b |
39 | =head1 DESCRIPTION |
40 | |
41 | When C<use re 'taint'> is in effect, and a tainted string is the target |
42 | of a regex, the regex memories (or values returned by the m// operator |
e4d48cc9 |
43 | in list context) are tainted. This feature is useful when regex operations |
44 | on tainted data aren't meant to extract safe substrings, but to perform |
45 | other transformations. |
b3eb6a9b |
46 | |
e4d48cc9 |
47 | When C<use re 'eval'> is in effect, a regex is allowed to contain |
2cd61cdb |
48 | C<(?{ ... })> zero-width assertions even if regular expression contains |
ffbc6a93 |
49 | variable interpolation. That is normally disallowed, since it is a |
2cd61cdb |
50 | potential security risk. Note that this pragma is ignored when the regular |
51 | expression is obtained from tainted data, i.e. evaluation is always |
3c4b39be |
52 | disallowed with tainted regular expressions. See L<perlre/(?{ code })>. |
2cd61cdb |
53 | |
ffbc6a93 |
54 | For the purpose of this pragma, interpolation of precompiled regular |
0a92e3a8 |
55 | expressions (i.e., the result of C<qr//>) is I<not> considered variable |
56 | interpolation. Thus: |
2cd61cdb |
57 | |
58 | /foo${pat}bar/ |
59 | |
ffbc6a93 |
60 | I<is> allowed if $pat is a precompiled regular expression, even |
2cd61cdb |
61 | if $pat contains C<(?{ ... })> assertions. |
62 | |
ffbc6a93 |
63 | When C<use re 'debug'> is in effect, perl emits debugging messages when |
2cd61cdb |
64 | compiling and using regular expressions. The output is the same as that |
65 | obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the |
66 | B<-Dr> switch. It may be quite voluminous depending on the complexity |
02ea72ae |
67 | of the match. Using C<debugcolor> instead of C<debug> enables a |
68 | form of output that can be used to get a colorful display on terminals |
69 | that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a |
70 | comma-separated list of C<termcap> properties to use for highlighting |
ffbc6a93 |
71 | strings on/off, pre-point part on/off. |
2cd61cdb |
72 | See L<perldebug/"Debugging regular expressions"> for additional info. |
73 | |
a3621e74 |
74 | Similarly C<use re 'Debug'> produces debugging output, the difference |
75 | being that it allows the fine tuning of what debugging output will be |
be8e71aa |
76 | emitted. Options are divided into three groups, those related to |
77 | compilation, those related to execution and those related to special |
78 | purposes. The options are as follows: |
79 | |
80 | =over 4 |
81 | |
82 | =item Compile related options |
83 | |
84 | =over 4 |
85 | |
86 | =item COMPILE |
87 | |
88 | Turns on all compile related debug options. |
89 | |
90 | =item PARSE |
91 | |
92 | Turns on debug output related to the process of parsing the pattern. |
93 | |
94 | =item OPTIMISE |
95 | |
96 | Enables output related to the optimisation phase of compilation. |
97 | |
24b23f37 |
98 | =item TRIEC |
be8e71aa |
99 | |
100 | Detailed info about trie compilation. |
101 | |
102 | =item DUMP |
103 | |
104 | Dump the final program out after it is compiled and optimised. |
105 | |
be8e71aa |
106 | |
107 | =back |
108 | |
109 | =item Execute related options |
110 | |
111 | =over 4 |
112 | |
113 | =item EXECUTE |
114 | |
115 | Turns on all execute related debug options. |
116 | |
117 | =item MATCH |
118 | |
119 | Turns on debugging of the main matching loop. |
120 | |
24b23f37 |
121 | =item TRIEE |
be8e71aa |
122 | |
123 | Extra debugging of how tries execute. |
124 | |
125 | =item INTUIT |
126 | |
127 | Enable debugging of start point optimisations. |
128 | |
129 | =back |
130 | |
131 | =item Extra debugging options |
132 | |
133 | =over 4 |
134 | |
135 | =item EXTRA |
136 | |
137 | Turns on all "extra" debugging options. |
138 | |
24b23f37 |
139 | =item TRIEM |
140 | |
141 | Enable enhanced TRIE debugging. Enhances both TRIEE |
142 | and TRIEC. |
143 | |
144 | =item STATE |
145 | |
146 | Enable debugging of states in the engine. |
147 | |
148 | =item STACK |
be8e71aa |
149 | |
24b23f37 |
150 | Enable debugging of the recursion stack in the engine. Enabling |
151 | or disabling this option automatically does the same for debugging |
152 | states as well. This output from this can be quite large. |
153 | |
154 | =item OPTIMISEM |
155 | |
156 | Enable enhanced optimisation debugging and start point optimisations. |
157 | Probably not useful except when debugging the regex engine itself. |
158 | |
159 | =item OFFSETS |
160 | |
161 | Dump offset information. This can be used to see how regops correlate |
162 | to the pattern. Output format is |
163 | |
164 | NODENUM:POSITION[LENGTH] |
165 | |
166 | Where 1 is the position of the first char in the string. Note that position |
167 | can be 0, or larger than the actual length of the pattern, likewise length |
168 | can be zero. |
be8e71aa |
169 | |
24b23f37 |
170 | =item OFFSETSDBG |
be8e71aa |
171 | |
172 | Enable debugging of offsets information. This emits copious |
fe759410 |
173 | amounts of trace information and doesn't mesh well with other |
be8e71aa |
174 | debug options. |
175 | |
fe759410 |
176 | Almost definitely only useful to people hacking |
be8e71aa |
177 | on the offsets part of the debug engine. |
178 | |
179 | =back |
180 | |
181 | =item Other useful flags |
182 | |
183 | These are useful shortcuts to save on the typing. |
184 | |
185 | =over 4 |
186 | |
187 | =item ALL |
188 | |
189 | Enable all compile and execute options at once. |
190 | |
191 | =item All |
192 | |
fe759410 |
193 | Enable DUMP and all execute options. Equivalent to: |
be8e71aa |
194 | |
195 | use re 'debug'; |
196 | |
197 | =item MORE |
198 | |
199 | =item More |
200 | |
24b23f37 |
201 | Enable TRIEM and all execute compile and execute options. |
be8e71aa |
202 | |
dba3f186 |
203 | =back |
be8e71aa |
204 | |
dba3f186 |
205 | =back |
a3621e74 |
206 | |
1e2e3d02 |
207 | As of 5.9.5 the directive C<use re 'debug'> and its equivalents are |
208 | lexically scoped, as the other directives are. However they have both |
209 | compile-time and run-time effects. |
b3eb6a9b |
210 | |
211 | See L<perlmodlib/Pragmatic Modules>. |
212 | |
213 | =cut |
214 | |
918c0b2d |
215 | # N.B. File::Basename contains a literal for 'taint' as a fallback. If |
216 | # taint is changed here, File::Basename must be updated as well. |
b3eb6a9b |
217 | my %bitmask = ( |
9cfe5470 |
218 | taint => 0x00100000, # HINT_RE_TAINT |
219 | eval => 0x00200000, # HINT_RE_EVAL |
b3eb6a9b |
220 | ); |
221 | |
02ea72ae |
222 | sub setcolor { |
223 | eval { # Ignore errors |
224 | require Term::Cap; |
225 | |
226 | my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning. |
8d300b32 |
227 | my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue'; |
02ea72ae |
228 | my @props = split /,/, $props; |
c712d376 |
229 | my $colors = join "\t", map {$terminal->Tputs($_,1)} @props; |
02ea72ae |
230 | |
c712d376 |
231 | $colors =~ s/\0//g; |
232 | $ENV{PERL_RE_COLORS} = $colors; |
02ea72ae |
233 | }; |
f9f4320a |
234 | if ($@) { |
235 | $ENV{PERL_RE_COLORS}||=qq'\t\t> <\t> <\t\t' |
236 | } |
894be9b7 |
237 | |
02ea72ae |
238 | } |
239 | |
a3621e74 |
240 | my %flags = ( |
be8e71aa |
241 | COMPILE => 0x0000FF, |
242 | PARSE => 0x000001, |
243 | OPTIMISE => 0x000002, |
a5ca303d |
244 | TRIEC => 0x000004, |
be8e71aa |
245 | DUMP => 0x000008, |
be8e71aa |
246 | |
247 | EXECUTE => 0x00FF00, |
248 | INTUIT => 0x000100, |
249 | MATCH => 0x000200, |
a5ca303d |
250 | TRIEE => 0x000400, |
be8e71aa |
251 | |
252 | EXTRA => 0xFF0000, |
a5ca303d |
253 | TRIEM => 0x010000, |
254 | OFFSETS => 0x020000, |
255 | OFFSETSDBG => 0x040000, |
256 | STATE => 0x080000, |
257 | OPTIMISEM => 0x100000, |
24b23f37 |
258 | STACK => 0x280000, |
a3621e74 |
259 | ); |
786e8c11 |
260 | $flags{ALL} = -1; |
be8e71aa |
261 | $flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE}; |
894be9b7 |
262 | $flags{Extra} = $flags{EXECUTE} | $flags{COMPILE}; |
a5ca303d |
263 | $flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE}; |
ddc5bc0f |
264 | $flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE}; |
a5ca303d |
265 | $flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC}; |
a3621e74 |
266 | |
894be9b7 |
267 | my $installed; |
9b47c5f6 |
268 | my $installed_error; |
f9f4320a |
269 | |
270 | sub _load_unload { |
271 | my ($on)= @_; |
272 | if ($on) { |
894be9b7 |
273 | if ( ! defined($installed) ) { |
274 | require XSLoader; |
9b47c5f6 |
275 | $installed = eval { XSLoader::load('re') } || 0; |
276 | $installed_error = $@; |
894be9b7 |
277 | } |
278 | if ( ! $installed ) { |
9b47c5f6 |
279 | die "'re' not installed!? ($installed_error)"; |
dba3f186 |
280 | } else { |
281 | # We call install() every time, as if we didn't, we wouldn't |
282 | # "see" any changes to the color environment var since |
283 | # the last time it was called. |
284 | |
285 | # install() returns an integer, which if casted properly |
286 | # in C resolves to a structure containing the regex |
287 | # hooks. Setting it to a random integer will guarantee |
288 | # segfaults. |
289 | $^H{regcomp} = install(); |
894be9b7 |
290 | } |
f9f4320a |
291 | } else { |
292 | delete $^H{regcomp}; |
293 | } |
380e0b81 |
294 | } |
295 | |
b3eb6a9b |
296 | sub bits { |
56953603 |
297 | my $on = shift; |
b3eb6a9b |
298 | my $bits = 0; |
2570cdf1 |
299 | unless (@_) { |
b8f647c3 |
300 | require Carp; |
301 | Carp::carp("Useless use of \"re\" pragma"); |
b3eb6a9b |
302 | } |
a3621e74 |
303 | foreach my $idx (0..$#_){ |
304 | my $s=$_[$idx]; |
305 | if ($s eq 'Debug' or $s eq 'Debugcolor') { |
f9f4320a |
306 | setcolor() if $s =~/color/i; |
a3621e74 |
307 | ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS}; |
a3621e74 |
308 | for my $idx ($idx+1..$#_) { |
309 | if ($flags{$_[$idx]}) { |
310 | if ($on) { |
311 | ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]}; |
312 | } else { |
313 | ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]}; |
314 | } |
315 | } else { |
316 | require Carp; |
317 | Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ", |
1e2e3d02 |
318 | join(", ",sort keys %flags ) ); |
a3621e74 |
319 | } |
320 | } |
380e0b81 |
321 | _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS}); |
a3621e74 |
322 | last; |
323 | } elsif ($s eq 'debug' or $s eq 'debugcolor') { |
f9f4320a |
324 | setcolor() if $s =~/color/i; |
380e0b81 |
325 | _load_unload($on); |
a3621e74 |
326 | } elsif (exists $bitmask{$s}) { |
327 | $bits |= $bitmask{$s}; |
328 | } else { |
329 | require Carp; |
330 | Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ", |
331 | join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask), |
332 | ")"); |
333 | } |
56953603 |
334 | } |
b3eb6a9b |
335 | $bits; |
336 | } |
337 | |
338 | sub import { |
339 | shift; |
2570cdf1 |
340 | $^H |= bits(1, @_); |
b3eb6a9b |
341 | } |
342 | |
343 | sub unimport { |
344 | shift; |
2570cdf1 |
345 | $^H &= ~ bits(0, @_); |
b3eb6a9b |
346 | } |
347 | |
348 | 1; |