Commit | Line | Data |
b38acab9 |
1 | package Filter::Simple; |
2 | |
dfa18578 |
3 | use Text::Balanced ':ALL'; |
b38acab9 |
4 | |
dfa18578 |
5 | use vars qw{ $VERSION @EXPORT }; |
6 | |
1b629f66 |
7 | $VERSION = '0.82'; |
b38acab9 |
8 | |
9 | use Filter::Util::Call; |
10 | use Carp; |
11 | |
dfa18578 |
12 | @EXPORT = qw( FILTER FILTER_ONLY ); |
13 | |
14 | |
b38acab9 |
15 | sub import { |
1b629f66 |
16 | if (@_>1) { shift; goto &FILTER } |
17 | else { *{caller()."::$_"} = \&$_ foreach @EXPORT } |
b38acab9 |
18 | } |
19 | |
dfa18578 |
20 | sub fail { |
1b629f66 |
21 | croak "FILTER_ONLY: ", @_; |
dfa18578 |
22 | } |
23 | |
24 | my $exql = sub { |
1b629f66 |
25 | my @bits = extract_quotelike $_[0], qr//; |
26 | return unless $bits[0]; |
27 | return \@bits; |
dfa18578 |
28 | }; |
29 | |
1b629f66 |
30 | my $ncws = qr/\s+/; |
31 | my $comment = qr/(?<![\$\@%])#.*/; |
32 | my $ws = qr/(?:$ncws|$comment)+/; |
dfa18578 |
33 | my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/; |
34 | my $EOP = qr/\n\n|\Z/; |
35 | my $CUT = qr/\n=cut.*$EOP/; |
36 | my $pod_or_DATA = qr/ |
1b629f66 |
37 | ^=(?:head[1-4]|item) .*? $CUT |
38 | | ^=pod .*? $CUT |
39 | | ^=for .*? $EOP |
40 | | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP |
41 | | ^__(DATA|END)__\r?\n.* |
42 | /smx; |
dfa18578 |
43 | |
44 | my %extractor_for = ( |
1b629f66 |
45 | quotelike => [ $ws, \&extract_variable, $id, { MATCH => \&extract_quotelike } ], |
46 | regex => [ $ws, $pod_or_DATA, $id, $exql ], |
47 | string => [ $ws, $pod_or_DATA, $id, $exql ], |
48 | code => [ $ws, { DONT_MATCH => $pod_or_DATA }, |
49 | \&extract_variable, |
50 | $id, { DONT_MATCH => \&extract_quotelike } ], |
51 | code_no_comments |
52 | => [ { DONT_MATCH => $comment }, |
53 | $ncws, { DONT_MATCH => $pod_or_DATA }, |
54 | \&extract_variable, |
55 | $id, { DONT_MATCH => \&extract_quotelike } ], |
56 | executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ], |
57 | executable_no_comments |
58 | => [ { DONT_MATCH => $comment }, |
59 | $ncws, { DONT_MATCH => $pod_or_DATA } ], |
60 | all => [ { MATCH => qr/(?s:.*)/ } ], |
dfa18578 |
61 | ); |
62 | |
63 | my %selector_for = ( |
1b629f66 |
64 | all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} }, |
65 | executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} }, |
66 | quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} }, |
67 | regex => sub { my ($t)=@_; |
68 | sub{ref() or return $_; |
69 | my ($ql,undef,$pre,$op,$ld,$pat) = @$_; |
70 | return $_->[0] unless $op =~ /^(qr|m|s)/ |
71 | || !$op && ($ld eq '/' || $ld eq '?'); |
72 | $_ = $pat; |
73 | $t->(@_); |
74 | $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/; |
75 | return "$pre$ql"; |
76 | }; |
77 | }, |
78 | string => sub { my ($t)=@_; |
79 | sub{ref() or return $_; |
80 | local *args = \@_; |
81 | my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10]; |
82 | return $_->[0] if $op =~ /^(qr|m)/ |
83 | || !$op && ($ld1 eq '/' || $ld1 eq '?'); |
84 | if (!$op || $op eq 'tr' || $op eq 'y') { |
85 | local *_ = \$str1; |
86 | $t->(@args); |
87 | } |
88 | if ($op =~ /^(tr|y|s)/) { |
89 | local *_ = \$str2; |
90 | $t->(@args); |
91 | } |
92 | my $result = "$pre$op$ld1$str1$rd1"; |
93 | $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}> |
94 | $result .= "$str2$rd2$flg"; |
95 | return $result; |
96 | }; |
97 | }, |
dfa18578 |
98 | ); |
99 | |
100 | |
101 | sub gen_std_filter_for { |
1b629f66 |
102 | my ($type, $transform) = @_; |
103 | return sub { |
104 | my $instr; |
105 | local @components; |
106 | for (extract_multiple($_,$extractor_for{$type})) { |
107 | if (ref()) { push @components, $_; $instr=0 } |
108 | elsif ($instr) { $components[-1] .= $_ } |
109 | else { push @components, $_; $instr=1 } |
110 | } |
111 | if ($type =~ /^code/) { |
112 | my $count = 0; |
113 | local $placeholder = qr/\Q$;\E(\C{4})\Q$;\E/; |
114 | my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/; |
115 | $_ = join "", |
116 | map { ref $_ ? $;.pack('N',$count++).$; : $_ } |
117 | @components; |
118 | @components = grep { ref $_ } @components; |
119 | $transform->(@_); |
120 | s/$extractor/${$components[unpack('N',$1)]}/g; |
121 | } |
122 | else { |
123 | my $selector = $selector_for{$type}->($transform); |
124 | $_ = join "", map $selector->(@_), @components; |
125 | } |
126 | } |
dfa18578 |
127 | }; |
128 | |
1b629f66 |
129 | sub FILTER (&;$) { |
130 | my $caller = caller; |
131 | my ($filter, $terminator) = @_; |
132 | no warnings 'redefine'; |
133 | *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator); |
134 | *{"${caller}::unimport"} = gen_filter_unimport($caller); |
135 | } |
136 | |
dfa18578 |
137 | sub FILTER_ONLY { |
1b629f66 |
138 | my $caller = caller; |
139 | while (@_ > 1) { |
140 | my ($what, $how) = splice(@_, 0, 2); |
141 | fail "Unknown selector: $what" |
142 | unless exists $extractor_for{$what}; |
143 | fail "Filter for $what is not a subroutine reference" |
144 | unless ref $how eq 'CODE'; |
145 | push @transforms, gen_std_filter_for($what,$how); |
146 | } |
147 | my $terminator = shift; |
148 | |
149 | my $multitransform = sub { |
150 | foreach my $transform ( @transforms ) { |
151 | $transform->(@_); |
152 | } |
153 | }; |
154 | no warnings 'redefine'; |
155 | *{"${caller}::import"} = |
156 | gen_filter_import($caller,$multitransform,$terminator); |
157 | *{"${caller}::unimport"} = gen_filter_unimport($caller); |
dfa18578 |
158 | } |
159 | |
160 | my $ows = qr/(?:[ \t]+|#[^\n]*)*/; |
161 | |
b38acab9 |
162 | sub gen_filter_import { |
fbe2c49e |
163 | my ($class, $filter, $terminator) = @_; |
201f4820 |
164 | my %terminator; |
165 | my $prev_import = *{$class."::import"}{CODE}; |
b38acab9 |
166 | return sub { |
1b629f66 |
167 | my ($imported_class, @args) = @_; |
168 | my $def_terminator = |
169 | qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/; |
170 | if (!defined $terminator) { |
171 | $terminator{terminator} = $def_terminator; |
172 | } |
173 | elsif (!ref $terminator || ref $terminator eq 'Regexp') { |
174 | $terminator{terminator} = $terminator; |
175 | } |
176 | elsif (ref $terminator ne 'HASH') { |
177 | croak "Terminator must be specified as scalar or hash ref" |
178 | } |
179 | elsif (!exists $terminator->{terminator}) { |
180 | $terminator{terminator} = $def_terminator; |
181 | } |
182 | filter_add( |
183 | sub { |
184 | my ($status, $lastline); |
185 | my $count = 0; |
186 | my $data = ""; |
187 | while ($status = filter_read()) { |
188 | return $status if $status < 0; |
189 | if ($terminator{terminator} && |
190 | m/$terminator{terminator}/) { |
191 | $lastline = $_; |
192 | last; |
193 | } |
194 | $data .= $_; |
195 | $count++; |
196 | $_ = ""; |
197 | } |
198 | return $count if not $count; |
199 | $_ = $data; |
200 | $filter->($imported_class, @args) unless $status < 0; |
201 | if (defined $lastline) { |
202 | if (defined $terminator{becomes}) { |
203 | $_ .= $terminator{becomes}; |
204 | } |
205 | elsif ($lastline =~ $def_terminator) { |
206 | $_ .= $lastline; |
207 | } |
208 | } |
209 | return $count; |
210 | } |
211 | ); |
212 | if ($prev_import) { |
213 | goto &$prev_import; |
214 | } |
215 | elsif ($class->isa('Exporter')) { |
216 | $class->export_to_level(1,@_); |
217 | } |
b38acab9 |
218 | } |
219 | } |
220 | |
201f4820 |
221 | sub gen_filter_unimport { |
1b629f66 |
222 | my ($class) = @_; |
223 | return sub { |
224 | filter_del(); |
225 | goto &$prev_unimport if $prev_unimport; |
226 | } |
b38acab9 |
227 | } |
228 | |
229 | 1; |
230 | |
231 | __END__ |
232 | |
233 | =head1 NAME |
234 | |
235 | Filter::Simple - Simplified source filtering |
236 | |
fbe2c49e |
237 | |
b38acab9 |
238 | =head1 SYNOPSIS |
239 | |
240 | # in MyFilter.pm: |
241 | |
1b629f66 |
242 | package MyFilter; |
b38acab9 |
243 | |
1b629f66 |
244 | use Filter::Simple; |
245 | |
246 | FILTER { ... }; |
b38acab9 |
247 | |
1b629f66 |
248 | # or just: |
249 | # |
250 | # use Filter::Simple sub { ... }; |
b38acab9 |
251 | |
252 | # in user's code: |
253 | |
1b629f66 |
254 | use MyFilter; |
b38acab9 |
255 | |
1b629f66 |
256 | # this code is filtered |
b38acab9 |
257 | |
1b629f66 |
258 | no MyFilter; |
b38acab9 |
259 | |
1b629f66 |
260 | # this code is not |
b38acab9 |
261 | |
262 | |
263 | =head1 DESCRIPTION |
264 | |
265 | =head2 The Problem |
266 | |
267 | Source filtering is an immensely powerful feature of recent versions of Perl. |
268 | It allows one to extend the language itself (e.g. the Switch module), to |
269 | simplify the language (e.g. Language::Pythonesque), or to completely recast the |
270 | language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use |
271 | the full power of Perl as its own, recursively applied, macro language. |
272 | |
273 | The excellent Filter::Util::Call module (by Paul Marquess) provides a |
274 | usable Perl interface to source filtering, but it is often too powerful |
275 | and not nearly as simple as it could be. |
276 | |
277 | To use the module it is necessary to do the following: |
278 | |
279 | =over 4 |
280 | |
281 | =item 1. |
282 | |
283 | Download, build, and install the Filter::Util::Call module. |
55a1c97c |
284 | (If you have Perl 5.7.1 or later, this is already done for you.) |
b38acab9 |
285 | |
286 | =item 2. |
287 | |
288 | Set up a module that does a C<use Filter::Util::Call>. |
289 | |
290 | =item 3. |
291 | |
292 | Within that module, create an C<import> subroutine. |
293 | |
294 | =item 4. |
295 | |
296 | Within the C<import> subroutine do a call to C<filter_add>, passing |
297 | it either a subroutine reference. |
298 | |
299 | =item 5. |
300 | |
301 | Within the subroutine reference, call C<filter_read> or C<filter_read_exact> |
302 | to "prime" $_ with source code data from the source file that will |
303 | C<use> your module. Check the status value returned to see if any |
304 | source code was actually read in. |
305 | |
306 | =item 6. |
307 | |
308 | Process the contents of $_ to change the source code in the desired manner. |
309 | |
310 | =item 7. |
311 | |
312 | Return the status value. |
313 | |
314 | =item 8. |
315 | |
316 | If the act of unimporting your module (via a C<no>) should cause source |
317 | code filtering to cease, create an C<unimport> subroutine, and have it call |
318 | C<filter_del>. Make sure that the call to C<filter_read> or |
319 | C<filter_read_exact> in step 5 will not accidentally read past the |
320 | C<no>. Effectively this limits source code filters to line-by-line |
321 | operation, unless the C<import> subroutine does some fancy |
322 | pre-pre-parsing of the source code it's filtering. |
323 | |
324 | =back |
325 | |
326 | For example, here is a minimal source code filter in a module named |
327 | BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG> |
328 | to the sequence C<die 'BANG' if $BANG> in any piece of code following a |
329 | C<use BANG;> statement (until the next C<no BANG;> statement, if any): |
330 | |
1b629f66 |
331 | package BANG; |
fbe2c49e |
332 | |
1b629f66 |
333 | use Filter::Util::Call ; |
334 | |
335 | sub import { |
336 | filter_add( sub { |
337 | my $caller = caller; |
338 | my ($status, $no_seen, $data); |
339 | while ($status = filter_read()) { |
340 | if (/^\s*no\s+$caller\s*;\s*?$/) { |
341 | $no_seen=1; |
342 | last; |
343 | } |
344 | $data .= $_; |
345 | $_ = ""; |
b38acab9 |
346 | } |
1b629f66 |
347 | $_ = $data; |
348 | s/BANG\s+BANG/die 'BANG' if \$BANG/g |
349 | unless $status < 0; |
350 | $_ .= "no $class;\n" if $no_seen; |
351 | return 1; |
352 | }) |
353 | } |
b38acab9 |
354 | |
1b629f66 |
355 | sub unimport { |
356 | filter_del(); |
357 | } |
b38acab9 |
358 | |
1b629f66 |
359 | 1 ; |
b38acab9 |
360 | |
7bf0340c |
361 | This level of sophistication puts filtering out of the reach of |
362 | many programmers. |
b38acab9 |
363 | |
364 | |
365 | =head2 A Solution |
366 | |
7bf0340c |
367 | The Filter::Simple module provides a simplified interface to |
b38acab9 |
368 | Filter::Util::Call; one that is sufficient for most common cases. |
369 | |
370 | Instead of the above process, with Filter::Simple the task of setting up |
371 | a source code filter is reduced to: |
372 | |
373 | =over 4 |
374 | |
375 | =item 1. |
376 | |
55a1c97c |
377 | Download and install the Filter::Simple module. |
378 | (If you have Perl 5.7.1 or later, this is already done for you.) |
379 | |
380 | =item 2. |
381 | |
fbe2c49e |
382 | Set up a module that does a C<use Filter::Simple> and then |
383 | calls C<FILTER { ... }>. |
b38acab9 |
384 | |
55a1c97c |
385 | =item 3. |
b38acab9 |
386 | |
fbe2c49e |
387 | Within the anonymous subroutine or block that is passed to |
388 | C<FILTER>, process the contents of $_ to change the source code in |
389 | the desired manner. |
b38acab9 |
390 | |
391 | =back |
392 | |
393 | In other words, the previous example, would become: |
394 | |
1b629f66 |
395 | package BANG; |
396 | use Filter::Simple; |
397 | |
398 | FILTER { |
399 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
400 | }; |
b38acab9 |
401 | |
1b629f66 |
402 | 1 ; |
b38acab9 |
403 | |
55909bcd |
404 | Note that the source code is passed as a single string, so any regex that |
405 | uses C<^> or C<$> to detect line boundaries will need the C</m> flag. |
b38acab9 |
406 | |
fbe2c49e |
407 | =head2 Disabling or changing <no> behaviour |
408 | |
dfa18578 |
409 | By default, the installed filter only filters up to a line consisting of one of |
410 | the three standard source "terminators": |
411 | |
1b629f66 |
412 | no ModuleName; # optional comment |
fbe2c49e |
413 | |
dfa18578 |
414 | or: |
fbe2c49e |
415 | |
1b629f66 |
416 | __END__ |
dfa18578 |
417 | |
418 | or: |
419 | |
1b629f66 |
420 | __DATA__ |
dfa18578 |
421 | |
422 | but this can be altered by passing a second argument to C<use Filter::Simple> |
423 | or C<FILTER> (just remember: there's I<no> comma after the initial block when |
424 | you use C<FILTER>). |
fbe2c49e |
425 | |
426 | That second argument may be either a C<qr>'d regular expression (which is then |
427 | used to match the terminator line), or a defined false value (which indicates |
dfa18578 |
428 | that no terminator line should be looked for), or a reference to a hash |
429 | (in which case the terminator is the value associated with the key |
430 | C<'terminator'>. |
fbe2c49e |
431 | |
432 | For example, to cause the previous filter to filter only up to a line of the |
433 | form: |
434 | |
1b629f66 |
435 | GNAB esu; |
fbe2c49e |
436 | |
437 | you would write: |
438 | |
1b629f66 |
439 | package BANG; |
440 | use Filter::Simple; |
441 | |
442 | FILTER { |
443 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
444 | } |
445 | qr/^\s*GNAB\s+esu\s*;\s*?$/; |
dfa18578 |
446 | |
447 | or: |
448 | |
1b629f66 |
449 | FILTER { |
450 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
451 | } |
452 | { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ }; |
fbe2c49e |
453 | |
454 | and to prevent the filter's being turned off in any way: |
455 | |
1b629f66 |
456 | package BANG; |
457 | use Filter::Simple; |
458 | |
459 | FILTER { |
460 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
461 | } |
462 | ""; # or: 0 |
dfa18578 |
463 | |
464 | or: |
465 | |
1b629f66 |
466 | FILTER { |
467 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
468 | } |
469 | { terminator => "" }; |
dfa18578 |
470 | |
55909bcd |
471 | B<Note that, no matter what you set the terminator pattern to, |
dfa18578 |
472 | the actual terminator itself I<must> be contained on a single source line.> |
fbe2c49e |
473 | |
474 | |
475 | =head2 All-in-one interface |
476 | |
477 | Separating the loading of Filter::Simple: |
478 | |
1b629f66 |
479 | use Filter::Simple; |
fbe2c49e |
480 | |
481 | from the setting up of the filtering: |
482 | |
1b629f66 |
483 | FILTER { ... }; |
fbe2c49e |
484 | |
485 | is useful because it allows other code (typically parser support code |
486 | or caching variables) to be defined before the filter is invoked. |
487 | However, there is often no need for such a separation. |
488 | |
489 | In those cases, it is easier to just append the filtering subroutine and |
490 | any terminator specification directly to the C<use> statement that loads |
491 | Filter::Simple, like so: |
492 | |
1b629f66 |
493 | use Filter::Simple sub { |
494 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
495 | }; |
fbe2c49e |
496 | |
497 | This is exactly the same as: |
498 | |
1b629f66 |
499 | use Filter::Simple; |
500 | BEGIN { |
501 | Filter::Simple::FILTER { |
502 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
503 | }; |
504 | } |
fbe2c49e |
505 | |
506 | except that the C<FILTER> subroutine is not exported by Filter::Simple. |
507 | |
dfa18578 |
508 | |
509 | =head2 Filtering only specific components of source code |
510 | |
511 | One of the problems with a filter like: |
512 | |
1b629f66 |
513 | use Filter::Simple; |
dfa18578 |
514 | |
1b629f66 |
515 | FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g }; |
dfa18578 |
516 | |
517 | is that it indiscriminately applies the specified transformation to |
518 | the entire text of your source program. So something like: |
519 | |
1b629f66 |
520 | warn 'BANG BANG, YOU'RE DEAD'; |
521 | BANG BANG; |
dfa18578 |
522 | |
523 | will become: |
524 | |
1b629f66 |
525 | warn 'die 'BANG' if $BANG, YOU'RE DEAD'; |
526 | die 'BANG' if $BANG; |
dfa18578 |
527 | |
528 | It is very common when filtering source to only want to apply the filter |
529 | to the non-character-string parts of the code, or alternatively to I<only> |
530 | the character strings. |
531 | |
532 | Filter::Simple supports this type of filtering by automatically |
533 | exporting the C<FILTER_ONLY> subroutine. |
534 | |
535 | C<FILTER_ONLY> takes a sequence of specifiers that install separate |
536 | (and possibly multiple) filters that act on only parts of the source code. |
537 | For example: |
538 | |
1b629f66 |
539 | use Filter::Simple; |
dfa18578 |
540 | |
1b629f66 |
541 | FILTER_ONLY |
542 | code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g }, |
543 | quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g }; |
dfa18578 |
544 | |
545 | The C<"code"> subroutine will only be used to filter parts of the source |
546 | code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike> |
547 | subroutine only filters Perl quotelikes (including here documents). |
548 | |
549 | The full list of alternatives is: |
550 | |
551 | =over |
552 | |
553 | =item C<"code"> |
554 | |
555 | Filters only those sections of the source code that are not quotelikes, POD, or |
556 | C<__DATA__>. |
557 | |
1b629f66 |
558 | =item C<"code_no_comments"> |
559 | |
560 | Filters only those sections of the source code that are not quotelikes, POD, |
561 | comments, or C<__DATA__>. |
562 | |
dfa18578 |
563 | =item C<"executable"> |
564 | |
565 | Filters only those sections of the source code that are not POD or C<__DATA__>. |
566 | |
1b629f66 |
567 | =item C<"executable_no_comments"> |
568 | |
569 | Filters only those sections of the source code that are not POD, comments, or C<__DATA__>. |
570 | |
dfa18578 |
571 | =item C<"quotelike"> |
572 | |
573 | Filters only Perl quotelikes (as interpreted by |
574 | C<&Text::Balanced::extract_quotelike>). |
575 | |
576 | =item C<"string"> |
577 | |
578 | Filters only the string literal parts of a Perl quotelike (i.e. the |
579 | contents of a string literal, either half of a C<tr///>, the second |
580 | half of an C<s///>). |
581 | |
582 | =item C<"regex"> |
583 | |
584 | Filters only the pattern literal parts of a Perl quotelike (i.e. the |
585 | contents of a C<qr//> or an C<m//>, the first half of an C<s///>). |
586 | |
587 | =item C<"all"> |
588 | |
589 | Filters everything. Identical in effect to C<FILTER>. |
590 | |
591 | =back |
592 | |
593 | Except for C<< FILTER_ONLY code => sub {...} >>, each of |
594 | the component filters is called repeatedly, once for each component |
595 | found in the source code. |
596 | |
597 | Note that you can also apply two or more of the same type of filter in |
598 | a single C<FILTER_ONLY>. For example, here's a simple |
599 | macro-preprocessor that is only applied within regexes, |
55909bcd |
600 | with a final debugging pass that prints the resulting source code: |
dfa18578 |
601 | |
1b629f66 |
602 | use Regexp::Common; |
603 | FILTER_ONLY |
604 | regex => sub { s/!\[/[^/g }, |
605 | regex => sub { s/%d/$RE{num}{int}/g }, |
606 | regex => sub { s/%f/$RE{num}{real}/g }, |
607 | all => sub { print if $::DEBUG }; |
dfa18578 |
608 | |
609 | |
610 | |
611 | =head2 Filtering only the code parts of source code |
612 | |
613 | Most source code ceases to be grammatically correct when it is broken up |
614 | into the pieces between string literals and regexes. So the C<'code'> |
1b629f66 |
615 | and C<'code_no_comments'> component filter behave slightly differently |
616 | from the other partial filters described in the previous section. |
dfa18578 |
617 | |
618 | Rather than calling the specified processor on each individual piece of |
1b629f66 |
619 | code (i.e. on the bits between quotelikes), the C<'code...'> partial |
620 | filters operate on the entire source code, but with the quotelike bits |
621 | (and, in the case of C<'code_no_comments'>, the comments) "blanked out". |
dfa18578 |
622 | |
1b629f66 |
623 | That is, a C<'code...'> filter I<replaces> each quoted string, quotelike, |
dfa18578 |
624 | regex, POD, and __DATA__ section with a placeholder. The |
625 | delimiters of this placeholder are the contents of the C<$;> variable |
626 | at the time the filter is applied (normally C<"\034">). The remaining |
627 | four bytes are a unique identifier for the component being replaced. |
628 | |
629 | This approach makes it comparatively easy to write code preprocessors |
630 | without worrying about the form or contents of strings, regexes, etc. |
1b629f66 |
631 | |
632 | For convenience, during a C<'code...'> filtering operation, Filter::Simple |
633 | provides a package variable (C<$Filter::Simple::placeholder>) that |
634 | contains a pre-compiled regex that matches any placeholder...and |
635 | captures the identifier within the placeholder. Placeholders can be |
dfa18578 |
636 | moved and re-ordered within the source code as needed. |
637 | |
1b629f66 |
638 | In addition, a second package variable (C<@Filter::Simple::components>) |
639 | contains a list of the various pieces of C<$_>, as they were originally split |
640 | up to allow placeholders to be inserted. |
641 | |
642 | Once the filtering has been applied, the original strings, regexes, POD, |
643 | etc. are re-inserted into the code, by replacing each placeholder with |
644 | the corresponding original component (from C<@components>). Note that |
645 | this means that the C<@components> variable must be treated with extreme |
646 | care within the filter. The C<@components> array stores the "back- |
647 | translations" of each placeholder inserted into C<$_>, as well as the |
648 | interstitial source code between placeholders. If the placeholder |
649 | backtranslations are altered in C<@components>, they will be similarly |
650 | changed when the placeholders are removed from C<$_> after the filter |
651 | is complete. |
dfa18578 |
652 | |
3c4b39be |
653 | For example, the following filter detects concatenated pairs of |
dfa18578 |
654 | strings/quotelikes and reverses the order in which they are |
655 | concatenated: |
656 | |
1b629f66 |
657 | package DemoRevCat; |
658 | use Filter::Simple; |
dfa18578 |
659 | |
1b629f66 |
660 | FILTER_ONLY code => sub { |
661 | my $ph = $Filter::Simple::placeholder; |
662 | s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx |
663 | }; |
dfa18578 |
664 | |
665 | Thus, the following code: |
666 | |
1b629f66 |
667 | use DemoRevCat; |
dfa18578 |
668 | |
1b629f66 |
669 | my $str = "abc" . q(def); |
dfa18578 |
670 | |
1b629f66 |
671 | print "$str\n"; |
dfa18578 |
672 | |
673 | would become: |
674 | |
1b629f66 |
675 | my $str = q(def)."abc"; |
dfa18578 |
676 | |
1b629f66 |
677 | print "$str\n"; |
dfa18578 |
678 | |
679 | and hence print: |
680 | |
1b629f66 |
681 | defabc |
dfa18578 |
682 | |
683 | |
201f4820 |
684 | =head2 Using Filter::Simple with an explicit C<import> subroutine |
55a1c97c |
685 | |
201f4820 |
686 | Filter::Simple generates a special C<import> subroutine for |
687 | your module (see L<"How it works">) which would normally replace any |
688 | C<import> subroutine you might have explicitly declared. |
55a1c97c |
689 | |
201f4820 |
690 | However, Filter::Simple is smart enough to notice your existing |
691 | C<import> and Do The Right Thing with it. |
55909bcd |
692 | That is, if you explicitly define an C<import> subroutine in a package |
201f4820 |
693 | that's using Filter::Simple, that C<import> subroutine will still |
694 | be invoked immediately after any filter you install. |
55a1c97c |
695 | |
201f4820 |
696 | The only thing you have to remember is that the C<import> subroutine |
697 | I<must> be declared I<before> the filter is installed. If you use C<FILTER> |
698 | to install the filter: |
55a1c97c |
699 | |
1b629f66 |
700 | package Filter::TurnItUpTo11; |
55a1c97c |
701 | |
1b629f66 |
702 | use Filter::Simple; |
55a1c97c |
703 | |
1b629f66 |
704 | FILTER { s/(\w+)/\U$1/ }; |
705 | |
201f4820 |
706 | that will almost never be a problem, but if you install a filtering |
707 | subroutine by passing it directly to the C<use Filter::Simple> |
708 | statement: |
55a1c97c |
709 | |
1b629f66 |
710 | package Filter::TurnItUpTo11; |
55a1c97c |
711 | |
1b629f66 |
712 | use Filter::Simple sub{ s/(\w+)/\U$1/ }; |
55a1c97c |
713 | |
201f4820 |
714 | then you must make sure that your C<import> subroutine appears before |
715 | that C<use> statement. |
716 | |
717 | |
718 | =head2 Using Filter::Simple and Exporter together |
719 | |
720 | Likewise, Filter::Simple is also smart enough |
721 | to Do The Right Thing if you use Exporter: |
722 | |
1b629f66 |
723 | package Switch; |
724 | use base Exporter; |
725 | use Filter::Simple; |
201f4820 |
726 | |
1b629f66 |
727 | @EXPORT = qw(switch case); |
728 | @EXPORT_OK = qw(given when); |
201f4820 |
729 | |
1b629f66 |
730 | FILTER { $_ = magic_Perl_filter($_) } |
201f4820 |
731 | |
732 | Immediately after the filter has been applied to the source, |
733 | Filter::Simple will pass control to Exporter, so it can do its magic too. |
734 | |
735 | Of course, here too, Filter::Simple has to know you're using Exporter |
736 | before it applies the filter. That's almost never a problem, but if you're |
737 | nervous about it, you can guarantee that things will work correctly by |
738 | ensuring that your C<use base Exporter> always precedes your |
739 | C<use Filter::Simple>. |
55a1c97c |
740 | |
fbe2c49e |
741 | |
b38acab9 |
742 | =head2 How it works |
743 | |
fbe2c49e |
744 | The Filter::Simple module exports into the package that calls C<FILTER> |
745 | (or C<use>s it directly) -- such as package "BANG" in the above example -- |
746 | two automagically constructed |
b38acab9 |
747 | subroutines -- C<import> and C<unimport> -- which take care of all the |
748 | nasty details. |
749 | |
750 | In addition, the generated C<import> subroutine passes its own argument |
751 | list to the filtering subroutine, so the BANG.pm filter could easily |
752 | be made parametric: |
753 | |
1b629f66 |
754 | package BANG; |
fbe2c49e |
755 | |
1b629f66 |
756 | use Filter::Simple; |
757 | |
758 | FILTER { |
759 | my ($die_msg, $var_name) = @_; |
760 | s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g; |
761 | }; |
b38acab9 |
762 | |
1b629f66 |
763 | # and in some user code: |
b38acab9 |
764 | |
1b629f66 |
765 | use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM |
b38acab9 |
766 | |
767 | |
fbe2c49e |
768 | The specified filtering subroutine is called every time a C<use BANG> is |
769 | encountered, and passed all the source code following that call, up to |
770 | either the next C<no BANG;> (or whatever terminator you've set) or the |
771 | end of the source file, whichever occurs first. By default, any C<no |
772 | BANG;> call must appear by itself on a separate line, or it is ignored. |
b38acab9 |
773 | |
774 | |
775 | =head1 AUTHOR |
776 | |
777 | Damian Conway (damian@conway.org) |
778 | |
779 | =head1 COPYRIGHT |
780 | |
55a1c97c |
781 | Copyright (c) 2000-2001, Damian Conway. All Rights Reserved. |
782 | This module is free software. It may be used, redistributed |
1b629f66 |
783 | and/or modified under the same terms as Perl itself. |