Commit | Line | Data |
b3eb6a9b |
1 | package re; |
2 | |
380e0b81 |
3 | our $VERSION = 0.06_01; |
56953603 |
4 | |
b3eb6a9b |
5 | =head1 NAME |
6 | |
7 | re - Perl pragma to alter regular expression behaviour |
8 | |
9 | =head1 SYNOPSIS |
10 | |
e4d48cc9 |
11 | use re 'taint'; |
12 | ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here |
b3eb6a9b |
13 | |
2cd61cdb |
14 | $pat = '(?{ $foo = 1 })'; |
e4d48cc9 |
15 | use re 'eval'; |
2cd61cdb |
16 | /foo${pat}bar/; # won't fail (when not under -T switch) |
e4d48cc9 |
17 | |
18 | { |
19 | no re 'taint'; # the default |
20 | ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here |
21 | |
22 | no re 'eval'; # the default |
2cd61cdb |
23 | /foo${pat}bar/; # disallowed (with or without -T switch) |
e4d48cc9 |
24 | } |
b3eb6a9b |
25 | |
0a92e3a8 |
26 | use re 'debug'; # NOT lexically scoped (as others are) |
27 | /^(.*)$/s; # output debugging info during |
28 | # compile and run time |
2cd61cdb |
29 | |
02ea72ae |
30 | use re 'debugcolor'; # same as 'debug', but with colored output |
31 | ... |
32 | |
a3621e74 |
33 | use re qw(Debug All); # Finer tuned debugging options. |
34 | use re qw(Debug More); # Similarly not lexically scoped. |
35 | no re qw(Debug ALL); # Turn of all re dugging and unload the module. |
36 | |
3ffabb8c |
37 | (We use $^X in these examples because it's tainted by default.) |
38 | |
b3eb6a9b |
39 | =head1 DESCRIPTION |
40 | |
41 | When C<use re 'taint'> is in effect, and a tainted string is the target |
42 | of a regex, the regex memories (or values returned by the m// operator |
e4d48cc9 |
43 | in list context) are tainted. This feature is useful when regex operations |
44 | on tainted data aren't meant to extract safe substrings, but to perform |
45 | other transformations. |
b3eb6a9b |
46 | |
e4d48cc9 |
47 | When C<use re 'eval'> is in effect, a regex is allowed to contain |
2cd61cdb |
48 | C<(?{ ... })> zero-width assertions even if regular expression contains |
ffbc6a93 |
49 | variable interpolation. That is normally disallowed, since it is a |
2cd61cdb |
50 | potential security risk. Note that this pragma is ignored when the regular |
51 | expression is obtained from tainted data, i.e. evaluation is always |
3c4b39be |
52 | disallowed with tainted regular expressions. See L<perlre/(?{ code })>. |
2cd61cdb |
53 | |
ffbc6a93 |
54 | For the purpose of this pragma, interpolation of precompiled regular |
0a92e3a8 |
55 | expressions (i.e., the result of C<qr//>) is I<not> considered variable |
56 | interpolation. Thus: |
2cd61cdb |
57 | |
58 | /foo${pat}bar/ |
59 | |
ffbc6a93 |
60 | I<is> allowed if $pat is a precompiled regular expression, even |
2cd61cdb |
61 | if $pat contains C<(?{ ... })> assertions. |
62 | |
ffbc6a93 |
63 | When C<use re 'debug'> is in effect, perl emits debugging messages when |
2cd61cdb |
64 | compiling and using regular expressions. The output is the same as that |
65 | obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the |
66 | B<-Dr> switch. It may be quite voluminous depending on the complexity |
02ea72ae |
67 | of the match. Using C<debugcolor> instead of C<debug> enables a |
68 | form of output that can be used to get a colorful display on terminals |
69 | that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a |
70 | comma-separated list of C<termcap> properties to use for highlighting |
ffbc6a93 |
71 | strings on/off, pre-point part on/off. |
2cd61cdb |
72 | See L<perldebug/"Debugging regular expressions"> for additional info. |
73 | |
a3621e74 |
74 | Similarly C<use re 'Debug'> produces debugging output, the difference |
75 | being that it allows the fine tuning of what debugging output will be |
76 | emitted. Following the 'Debug' keyword one of several options may be |
77 | provided: COMPILE, EXECUTE, TRIE_COMPILE, TRIE_EXECUTE, TRIE_MORE, |
78 | OPTIMISE, OFFSETS and ALL. Additionally the special keywords 'All' and |
79 | 'More' may be provided. 'All' represents everything but OPTIMISE and |
80 | OFFSETS and TRIE_MORE, and 'More' is similar but include TRIE_MORE. |
81 | Saying C<< no re Debug => 'EXECUTE' >> will disable executing debug |
82 | statements and saying C<< use re Debug => 'EXECUTE' >> will turn it on. Note |
83 | that these flags can be set directly via ${^RE_DEBUG_FLAGS} by using the |
84 | following flag values: |
85 | |
3dab1dad |
86 | |
f2278c82 |
87 | RE_DEBUG_COMPILE 0x001 |
88 | RE_DEBUG_EXECUTE 0x002 |
89 | RE_DEBUG_TRIE_COMPILE 0x004 |
90 | RE_DEBUG_TRIE_EXECUTE 0x008 |
91 | RE_DEBUG_TRIE_MORE 0x010 |
92 | RE_DEBUG_OPTIMISE 0x020 |
93 | RE_DEBUG_OFFSETS 0x040 |
94 | RE_DEBUG_PARSE 0x080 |
95 | RE_DEBUG_OFFSETS_DEBUG 0x100 |
a3621e74 |
96 | |
97 | The directive C<use re 'debug'> and its equivalents are I<not> lexically |
98 | scoped, as the other directives are. They have both compile-time and run-time |
99 | effects. |
b3eb6a9b |
100 | |
101 | See L<perlmodlib/Pragmatic Modules>. |
102 | |
103 | =cut |
104 | |
918c0b2d |
105 | # N.B. File::Basename contains a literal for 'taint' as a fallback. If |
106 | # taint is changed here, File::Basename must be updated as well. |
b3eb6a9b |
107 | my %bitmask = ( |
9cfe5470 |
108 | taint => 0x00100000, # HINT_RE_TAINT |
109 | eval => 0x00200000, # HINT_RE_EVAL |
b3eb6a9b |
110 | ); |
111 | |
02ea72ae |
112 | sub setcolor { |
113 | eval { # Ignore errors |
114 | require Term::Cap; |
115 | |
116 | my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning. |
8d300b32 |
117 | my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue'; |
02ea72ae |
118 | my @props = split /,/, $props; |
c712d376 |
119 | my $colors = join "\t", map {$terminal->Tputs($_,1)} @props; |
02ea72ae |
120 | |
c712d376 |
121 | $colors =~ s/\0//g; |
122 | $ENV{PERL_RE_COLORS} = $colors; |
02ea72ae |
123 | }; |
02ea72ae |
124 | } |
125 | |
a3621e74 |
126 | my %flags = ( |
f2278c82 |
127 | COMPILE => 1, |
128 | EXECUTE => 2, |
129 | TRIE_COMPILE => 4, |
130 | TRIE_EXECUTE => 8, |
131 | TRIE_MORE => 16, |
132 | OPTIMISE => 32, |
133 | OPTIMIZE => 32, # alias |
134 | OFFSETS => 64, |
135 | PARSE => 128, |
136 | OFFSETS_DEBUG => 256, |
137 | OFFSETS_OLD => 576, |
138 | ALL => 0xFFFF, |
139 | All => 15, |
140 | More => 31, |
a3621e74 |
141 | ); |
142 | |
143 | my $installed = 0; |
144 | |
380e0b81 |
145 | sub _load_unload { |
146 | my $on = shift; |
147 | require XSLoader; |
148 | XSLoader::load('re'); |
1839d50f |
149 | install($on); |
380e0b81 |
150 | } |
151 | |
b3eb6a9b |
152 | sub bits { |
56953603 |
153 | my $on = shift; |
b3eb6a9b |
154 | my $bits = 0; |
2570cdf1 |
155 | unless (@_) { |
b3eb6a9b |
156 | require Carp; |
157 | Carp::carp("Useless use of \"re\" pragma"); |
158 | } |
a3621e74 |
159 | foreach my $idx (0..$#_){ |
160 | my $s=$_[$idx]; |
161 | if ($s eq 'Debug' or $s eq 'Debugcolor') { |
162 | setcolor() if $s eq 'Debugcolor'; |
163 | ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS}; |
a3621e74 |
164 | for my $idx ($idx+1..$#_) { |
165 | if ($flags{$_[$idx]}) { |
166 | if ($on) { |
167 | ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]}; |
3dab1dad |
168 | ${^RE_DEBUG_FLAGS} |= 1 |
169 | if $flags{$_[$idx]}>2; |
a3621e74 |
170 | } else { |
171 | ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]}; |
172 | } |
173 | } else { |
174 | require Carp; |
175 | Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ", |
176 | join(", ",sort { $flags{$a} <=> $flags{$b} } keys %flags ) ); |
177 | } |
178 | } |
380e0b81 |
179 | _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS}); |
a3621e74 |
180 | last; |
181 | } elsif ($s eq 'debug' or $s eq 'debugcolor') { |
182 | setcolor() if $s eq 'debugcolor'; |
380e0b81 |
183 | _load_unload($on); |
a3621e74 |
184 | } elsif (exists $bitmask{$s}) { |
185 | $bits |= $bitmask{$s}; |
186 | } else { |
187 | require Carp; |
188 | Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ", |
189 | join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask), |
190 | ")"); |
191 | } |
56953603 |
192 | } |
b3eb6a9b |
193 | $bits; |
194 | } |
195 | |
196 | sub import { |
197 | shift; |
2570cdf1 |
198 | $^H |= bits(1, @_); |
b3eb6a9b |
199 | } |
200 | |
201 | sub unimport { |
202 | shift; |
2570cdf1 |
203 | $^H &= ~ bits(0, @_); |
b3eb6a9b |
204 | } |
205 | |
206 | 1; |