Commit | Line | Data |
b38acab9 |
1 | package Filter::Simple; |
2 | |
dfa18578 |
3 | use Text::Balanced ':ALL'; |
b38acab9 |
4 | |
dfa18578 |
5 | use vars qw{ $VERSION @EXPORT }; |
6 | |
55909bcd |
7 | $VERSION = '0.78'; |
b38acab9 |
8 | |
9 | use Filter::Util::Call; |
10 | use Carp; |
11 | |
dfa18578 |
12 | @EXPORT = qw( FILTER FILTER_ONLY ); |
13 | |
14 | |
b38acab9 |
15 | sub import { |
fbe2c49e |
16 | if (@_>1) { shift; goto &FILTER } |
dfa18578 |
17 | else { *{caller()."::$_"} = \&$_ foreach @EXPORT } |
fbe2c49e |
18 | } |
19 | |
20 | sub FILTER (&;$) { |
b38acab9 |
21 | my $caller = caller; |
fbe2c49e |
22 | my ($filter, $terminator) = @_; |
55909bcd |
23 | local $SIG{__WARN__} = sub{}; |
fbe2c49e |
24 | *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator); |
201f4820 |
25 | *{"${caller}::unimport"} = gen_filter_unimport($caller); |
b38acab9 |
26 | } |
27 | |
dfa18578 |
28 | sub fail { |
29 | croak "FILTER_ONLY: ", @_; |
30 | } |
31 | |
32 | my $exql = sub { |
33 | my @bits = extract_quotelike $_[0], qr//; |
34 | return unless $bits[0]; |
35 | return \@bits; |
36 | }; |
37 | |
38 | my $ws = qr/\s+/; |
39 | my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/; |
40 | my $EOP = qr/\n\n|\Z/; |
41 | my $CUT = qr/\n=cut.*$EOP/; |
42 | my $pod_or_DATA = qr/ |
43 | ^=(?:head[1-4]|item) .*? $CUT |
44 | | ^=pod .*? $CUT |
45 | | ^=for .*? $EOP |
46 | | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP |
126dd235 |
47 | | ^__(DATA|END)__\r?\n.* |
dfa18578 |
48 | /smx; |
49 | |
50 | my %extractor_for = ( |
51 | quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ], |
52 | regex => [ $ws, $pod_or_DATA, $id, $exql ], |
53 | string => [ $ws, $pod_or_DATA, $id, $exql ], |
54 | code => [ $ws, { DONT_MATCH => $pod_or_DATA }, |
55 | $id, { DONT_MATCH => \&extract_quotelike } ], |
56 | executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ], |
57 | all => [ { MATCH => qr/(?s:.*)/ } ], |
58 | ); |
59 | |
60 | my %selector_for = ( |
61 | all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} }, |
62 | executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} }, |
63 | quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} }, |
64 | regex => sub { my ($t)=@_; |
65 | sub{ref() or return $_; |
66 | my ($ql,undef,$pre,$op,$ld,$pat) = @$_; |
67 | return $_->[0] unless $op =~ /^(qr|m|s)/ |
68 | || !$op && ($ld eq '/' || $ld eq '?'); |
69 | $_ = $pat; |
70 | $t->(@_); |
71 | $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/; |
72 | return "$pre$ql"; |
73 | }; |
74 | }, |
75 | string => sub { my ($t)=@_; |
76 | sub{ref() or return $_; |
77 | local *args = \@_; |
78 | my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10]; |
79 | return $_->[0] if $op =~ /^(qr|m)/ |
80 | || !$op && ($ld1 eq '/' || $ld1 eq '?'); |
81 | if (!$op || $op eq 'tr' || $op eq 'y') { |
82 | local *_ = \$str1; |
83 | $t->(@args); |
84 | } |
85 | if ($op =~ /^(tr|y|s)/) { |
86 | local *_ = \$str2; |
87 | $t->(@args); |
88 | } |
89 | my $result = "$pre$op$ld1$str1$rd1"; |
90 | $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}> |
91 | $result .= "$str2$rd2$flg"; |
92 | return $result; |
93 | }; |
94 | }, |
95 | ); |
96 | |
97 | |
98 | sub gen_std_filter_for { |
99 | my ($type, $transform) = @_; |
100 | return sub { my (@pieces, $instr); |
55909bcd |
101 | $DB::single=1; |
dfa18578 |
102 | for (extract_multiple($_,$extractor_for{$type})) { |
103 | if (ref()) { push @pieces, $_; $instr=0 } |
104 | elsif ($instr) { $pieces[-1] .= $_ } |
105 | else { push @pieces, $_; $instr=1 } |
106 | } |
107 | if ($type eq 'code') { |
108 | my $count = 0; |
109 | local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/; |
110 | my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/; |
111 | $_ = join "", |
112 | map { ref $_ ? $;.pack('N',$count++).$; : $_ } |
113 | @pieces; |
114 | @pieces = grep { ref $_ } @pieces; |
115 | $transform->(@_); |
116 | s/$extractor/${$pieces[unpack('N',$1)]}/g; |
117 | } |
118 | else { |
dfa18578 |
119 | my $selector = $selector_for{$type}->($transform); |
120 | $_ = join "", map $selector->(@_), @pieces; |
121 | } |
122 | } |
123 | }; |
124 | |
125 | sub FILTER_ONLY { |
dfa18578 |
126 | my $caller = caller; |
127 | while (@_ > 1) { |
128 | my ($what, $how) = splice(@_, 0, 2); |
129 | fail "Unknown selector: $what" |
130 | unless exists $extractor_for{$what}; |
131 | fail "Filter for $what is not a subroutine reference" |
132 | unless ref $how eq 'CODE'; |
133 | push @transforms, gen_std_filter_for($what,$how); |
134 | } |
135 | my $terminator = shift; |
136 | |
137 | my $multitransform = sub { |
138 | foreach my $transform ( @transforms ) { |
139 | $transform->(@_); |
140 | } |
141 | }; |
142 | no warnings 'redefine'; |
143 | *{"${caller}::import"} = |
144 | gen_filter_import($caller,$multitransform,$terminator); |
201f4820 |
145 | *{"${caller}::unimport"} = gen_filter_unimport($caller); |
dfa18578 |
146 | } |
147 | |
148 | my $ows = qr/(?:[ \t]+|#[^\n]*)*/; |
149 | |
b38acab9 |
150 | sub gen_filter_import { |
fbe2c49e |
151 | my ($class, $filter, $terminator) = @_; |
201f4820 |
152 | my %terminator; |
153 | my $prev_import = *{$class."::import"}{CODE}; |
b38acab9 |
154 | return sub { |
155 | my ($imported_class, @args) = @_; |
dfa18578 |
156 | my $def_terminator = |
126dd235 |
157 | qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/; |
dfa18578 |
158 | if (!defined $terminator) { |
201f4820 |
159 | $terminator{terminator} = $def_terminator; |
dfa18578 |
160 | } |
201f4820 |
161 | elsif (!ref $terminator || ref $terminator eq 'Regexp') { |
162 | $terminator{terminator} = $terminator; |
dfa18578 |
163 | } |
164 | elsif (ref $terminator ne 'HASH') { |
165 | croak "Terminator must be specified as scalar or hash ref" |
166 | } |
167 | elsif (!exists $terminator->{terminator}) { |
201f4820 |
168 | $terminator{terminator} = $def_terminator; |
dfa18578 |
169 | } |
b38acab9 |
170 | filter_add( |
171 | sub { |
dfa18578 |
172 | my ($status, $lastline); |
fbe2c49e |
173 | my $count = 0; |
b38acab9 |
174 | my $data = ""; |
175 | while ($status = filter_read()) { |
fbe2c49e |
176 | return $status if $status < 0; |
201f4820 |
177 | if ($terminator{terminator} && |
178 | m/$terminator{terminator}/) { |
dfa18578 |
179 | $lastline = $_; |
b38acab9 |
180 | last; |
181 | } |
182 | $data .= $_; |
fbe2c49e |
183 | $count++; |
b38acab9 |
184 | $_ = ""; |
185 | } |
186 | $_ = $data; |
dfa18578 |
187 | $filter->($imported_class, @args) unless $status < 0; |
188 | if (defined $lastline) { |
201f4820 |
189 | if (defined $terminator{becomes}) { |
190 | $_ .= $terminator{becomes}; |
dfa18578 |
191 | } |
192 | elsif ($lastline =~ $def_terminator) { |
193 | $_ .= $lastline; |
194 | } |
195 | } |
fbe2c49e |
196 | return $count; |
b38acab9 |
197 | } |
198 | ); |
201f4820 |
199 | if ($prev_import) { |
200 | goto &$prev_import; |
201 | } |
202 | elsif ($class->isa('Exporter')) { |
203 | $class->export_to_level(1,@_); |
204 | } |
b38acab9 |
205 | } |
206 | } |
207 | |
201f4820 |
208 | sub gen_filter_unimport { |
209 | my ($class) = @_; |
210 | my $prev_unimport = *{$class."::unimport"}{CODE}; |
211 | return sub { |
212 | filter_del(); |
213 | goto &$prev_unimport if $prev_unimport; |
214 | } |
b38acab9 |
215 | } |
216 | |
217 | 1; |
218 | |
219 | __END__ |
220 | |
221 | =head1 NAME |
222 | |
223 | Filter::Simple - Simplified source filtering |
224 | |
fbe2c49e |
225 | |
b38acab9 |
226 | =head1 SYNOPSIS |
227 | |
228 | # in MyFilter.pm: |
229 | |
230 | package MyFilter; |
231 | |
fbe2c49e |
232 | use Filter::Simple; |
233 | |
234 | FILTER { ... }; |
b38acab9 |
235 | |
fbe2c49e |
236 | # or just: |
237 | # |
238 | # use Filter::Simple sub { ... }; |
b38acab9 |
239 | |
240 | # in user's code: |
241 | |
242 | use MyFilter; |
243 | |
244 | # this code is filtered |
245 | |
246 | no MyFilter; |
247 | |
248 | # this code is not |
249 | |
250 | |
251 | =head1 DESCRIPTION |
252 | |
253 | =head2 The Problem |
254 | |
255 | Source filtering is an immensely powerful feature of recent versions of Perl. |
256 | It allows one to extend the language itself (e.g. the Switch module), to |
257 | simplify the language (e.g. Language::Pythonesque), or to completely recast the |
258 | language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use |
259 | the full power of Perl as its own, recursively applied, macro language. |
260 | |
261 | The excellent Filter::Util::Call module (by Paul Marquess) provides a |
262 | usable Perl interface to source filtering, but it is often too powerful |
263 | and not nearly as simple as it could be. |
264 | |
265 | To use the module it is necessary to do the following: |
266 | |
267 | =over 4 |
268 | |
269 | =item 1. |
270 | |
271 | Download, build, and install the Filter::Util::Call module. |
55a1c97c |
272 | (If you have Perl 5.7.1 or later, this is already done for you.) |
b38acab9 |
273 | |
274 | =item 2. |
275 | |
276 | Set up a module that does a C<use Filter::Util::Call>. |
277 | |
278 | =item 3. |
279 | |
280 | Within that module, create an C<import> subroutine. |
281 | |
282 | =item 4. |
283 | |
284 | Within the C<import> subroutine do a call to C<filter_add>, passing |
285 | it either a subroutine reference. |
286 | |
287 | =item 5. |
288 | |
289 | Within the subroutine reference, call C<filter_read> or C<filter_read_exact> |
290 | to "prime" $_ with source code data from the source file that will |
291 | C<use> your module. Check the status value returned to see if any |
292 | source code was actually read in. |
293 | |
294 | =item 6. |
295 | |
296 | Process the contents of $_ to change the source code in the desired manner. |
297 | |
298 | =item 7. |
299 | |
300 | Return the status value. |
301 | |
302 | =item 8. |
303 | |
304 | If the act of unimporting your module (via a C<no>) should cause source |
305 | code filtering to cease, create an C<unimport> subroutine, and have it call |
306 | C<filter_del>. Make sure that the call to C<filter_read> or |
307 | C<filter_read_exact> in step 5 will not accidentally read past the |
308 | C<no>. Effectively this limits source code filters to line-by-line |
309 | operation, unless the C<import> subroutine does some fancy |
310 | pre-pre-parsing of the source code it's filtering. |
311 | |
312 | =back |
313 | |
314 | For example, here is a minimal source code filter in a module named |
315 | BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG> |
316 | to the sequence C<die 'BANG' if $BANG> in any piece of code following a |
317 | C<use BANG;> statement (until the next C<no BANG;> statement, if any): |
318 | |
319 | package BANG; |
fbe2c49e |
320 | |
b38acab9 |
321 | use Filter::Util::Call ; |
322 | |
323 | sub import { |
324 | filter_add( sub { |
325 | my $caller = caller; |
326 | my ($status, $no_seen, $data); |
327 | while ($status = filter_read()) { |
fbe2c49e |
328 | if (/^\s*no\s+$caller\s*;\s*?$/) { |
b38acab9 |
329 | $no_seen=1; |
330 | last; |
331 | } |
332 | $data .= $_; |
333 | $_ = ""; |
334 | } |
335 | $_ = $data; |
336 | s/BANG\s+BANG/die 'BANG' if \$BANG/g |
337 | unless $status < 0; |
338 | $_ .= "no $class;\n" if $no_seen; |
339 | return 1; |
340 | }) |
341 | } |
342 | |
343 | sub unimport { |
344 | filter_del(); |
345 | } |
346 | |
347 | 1 ; |
348 | |
7bf0340c |
349 | This level of sophistication puts filtering out of the reach of |
350 | many programmers. |
b38acab9 |
351 | |
352 | |
353 | =head2 A Solution |
354 | |
7bf0340c |
355 | The Filter::Simple module provides a simplified interface to |
b38acab9 |
356 | Filter::Util::Call; one that is sufficient for most common cases. |
357 | |
358 | Instead of the above process, with Filter::Simple the task of setting up |
359 | a source code filter is reduced to: |
360 | |
361 | =over 4 |
362 | |
363 | =item 1. |
364 | |
55a1c97c |
365 | Download and install the Filter::Simple module. |
366 | (If you have Perl 5.7.1 or later, this is already done for you.) |
367 | |
368 | =item 2. |
369 | |
fbe2c49e |
370 | Set up a module that does a C<use Filter::Simple> and then |
371 | calls C<FILTER { ... }>. |
b38acab9 |
372 | |
55a1c97c |
373 | =item 3. |
b38acab9 |
374 | |
fbe2c49e |
375 | Within the anonymous subroutine or block that is passed to |
376 | C<FILTER>, process the contents of $_ to change the source code in |
377 | the desired manner. |
b38acab9 |
378 | |
379 | =back |
380 | |
381 | In other words, the previous example, would become: |
382 | |
383 | package BANG; |
fbe2c49e |
384 | use Filter::Simple; |
385 | |
386 | FILTER { |
b38acab9 |
387 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
388 | }; |
389 | |
390 | 1 ; |
391 | |
55909bcd |
392 | Note that the source code is passed as a single string, so any regex that |
393 | uses C<^> or C<$> to detect line boundaries will need the C</m> flag. |
b38acab9 |
394 | |
fbe2c49e |
395 | =head2 Disabling or changing <no> behaviour |
396 | |
dfa18578 |
397 | By default, the installed filter only filters up to a line consisting of one of |
398 | the three standard source "terminators": |
399 | |
400 | no ModuleName; # optional comment |
fbe2c49e |
401 | |
dfa18578 |
402 | or: |
fbe2c49e |
403 | |
dfa18578 |
404 | __END__ |
405 | |
406 | or: |
407 | |
408 | __DATA__ |
409 | |
410 | but this can be altered by passing a second argument to C<use Filter::Simple> |
411 | or C<FILTER> (just remember: there's I<no> comma after the initial block when |
412 | you use C<FILTER>). |
fbe2c49e |
413 | |
414 | That second argument may be either a C<qr>'d regular expression (which is then |
415 | used to match the terminator line), or a defined false value (which indicates |
dfa18578 |
416 | that no terminator line should be looked for), or a reference to a hash |
417 | (in which case the terminator is the value associated with the key |
418 | C<'terminator'>. |
fbe2c49e |
419 | |
420 | For example, to cause the previous filter to filter only up to a line of the |
421 | form: |
422 | |
423 | GNAB esu; |
424 | |
425 | you would write: |
426 | |
427 | package BANG; |
428 | use Filter::Simple; |
429 | |
430 | FILTER { |
431 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
432 | } |
dfa18578 |
433 | qr/^\s*GNAB\s+esu\s*;\s*?$/; |
434 | |
435 | or: |
436 | |
437 | FILTER { |
438 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
439 | } |
440 | { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ }; |
fbe2c49e |
441 | |
442 | and to prevent the filter's being turned off in any way: |
443 | |
444 | package BANG; |
445 | use Filter::Simple; |
446 | |
447 | FILTER { |
448 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
449 | } |
dfa18578 |
450 | ""; # or: 0 |
451 | |
452 | or: |
453 | |
454 | FILTER { |
455 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
456 | } |
457 | { terminator => "" }; |
458 | |
55909bcd |
459 | B<Note that, no matter what you set the terminator pattern to, |
dfa18578 |
460 | the actual terminator itself I<must> be contained on a single source line.> |
fbe2c49e |
461 | |
462 | |
463 | =head2 All-in-one interface |
464 | |
465 | Separating the loading of Filter::Simple: |
466 | |
467 | use Filter::Simple; |
468 | |
469 | from the setting up of the filtering: |
470 | |
471 | FILTER { ... }; |
472 | |
473 | is useful because it allows other code (typically parser support code |
474 | or caching variables) to be defined before the filter is invoked. |
475 | However, there is often no need for such a separation. |
476 | |
477 | In those cases, it is easier to just append the filtering subroutine and |
478 | any terminator specification directly to the C<use> statement that loads |
479 | Filter::Simple, like so: |
480 | |
481 | use Filter::Simple sub { |
482 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
483 | }; |
484 | |
485 | This is exactly the same as: |
486 | |
487 | use Filter::Simple; |
488 | BEGIN { |
489 | Filter::Simple::FILTER { |
490 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
491 | }; |
492 | } |
493 | |
494 | except that the C<FILTER> subroutine is not exported by Filter::Simple. |
495 | |
dfa18578 |
496 | |
497 | =head2 Filtering only specific components of source code |
498 | |
499 | One of the problems with a filter like: |
500 | |
501 | use Filter::Simple; |
502 | |
503 | FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g }; |
504 | |
505 | is that it indiscriminately applies the specified transformation to |
506 | the entire text of your source program. So something like: |
507 | |
508 | warn 'BANG BANG, YOU'RE DEAD'; |
509 | BANG BANG; |
510 | |
511 | will become: |
512 | |
513 | warn 'die 'BANG' if $BANG, YOU'RE DEAD'; |
514 | die 'BANG' if $BANG; |
515 | |
516 | It is very common when filtering source to only want to apply the filter |
517 | to the non-character-string parts of the code, or alternatively to I<only> |
518 | the character strings. |
519 | |
520 | Filter::Simple supports this type of filtering by automatically |
521 | exporting the C<FILTER_ONLY> subroutine. |
522 | |
523 | C<FILTER_ONLY> takes a sequence of specifiers that install separate |
524 | (and possibly multiple) filters that act on only parts of the source code. |
525 | For example: |
526 | |
527 | use Filter::Simple; |
528 | |
529 | FILTER_ONLY |
530 | code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g }, |
55909bcd |
531 | quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g }; |
dfa18578 |
532 | |
533 | The C<"code"> subroutine will only be used to filter parts of the source |
534 | code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike> |
535 | subroutine only filters Perl quotelikes (including here documents). |
536 | |
537 | The full list of alternatives is: |
538 | |
539 | =over |
540 | |
541 | =item C<"code"> |
542 | |
543 | Filters only those sections of the source code that are not quotelikes, POD, or |
544 | C<__DATA__>. |
545 | |
546 | =item C<"executable"> |
547 | |
548 | Filters only those sections of the source code that are not POD or C<__DATA__>. |
549 | |
550 | =item C<"quotelike"> |
551 | |
552 | Filters only Perl quotelikes (as interpreted by |
553 | C<&Text::Balanced::extract_quotelike>). |
554 | |
555 | =item C<"string"> |
556 | |
557 | Filters only the string literal parts of a Perl quotelike (i.e. the |
558 | contents of a string literal, either half of a C<tr///>, the second |
559 | half of an C<s///>). |
560 | |
561 | =item C<"regex"> |
562 | |
563 | Filters only the pattern literal parts of a Perl quotelike (i.e. the |
564 | contents of a C<qr//> or an C<m//>, the first half of an C<s///>). |
565 | |
566 | =item C<"all"> |
567 | |
568 | Filters everything. Identical in effect to C<FILTER>. |
569 | |
570 | =back |
571 | |
572 | Except for C<< FILTER_ONLY code => sub {...} >>, each of |
573 | the component filters is called repeatedly, once for each component |
574 | found in the source code. |
575 | |
576 | Note that you can also apply two or more of the same type of filter in |
577 | a single C<FILTER_ONLY>. For example, here's a simple |
578 | macro-preprocessor that is only applied within regexes, |
55909bcd |
579 | with a final debugging pass that prints the resulting source code: |
dfa18578 |
580 | |
581 | use Regexp::Common; |
582 | FILTER_ONLY |
583 | regex => sub { s/!\[/[^/g }, |
584 | regex => sub { s/%d/$RE{num}{int}/g }, |
585 | regex => sub { s/%f/$RE{num}{real}/g }, |
586 | all => sub { print if $::DEBUG }; |
587 | |
588 | |
589 | |
590 | =head2 Filtering only the code parts of source code |
591 | |
592 | Most source code ceases to be grammatically correct when it is broken up |
593 | into the pieces between string literals and regexes. So the C<'code'> |
594 | component filter behaves slightly differently from the other partial filters |
595 | described in the previous section. |
596 | |
597 | Rather than calling the specified processor on each individual piece of |
598 | code (i.e. on the bits between quotelikes), the C<'code'> partial filter |
599 | operates on the entire source code, but with the quotelike bits |
600 | "blanked out". |
601 | |
602 | That is, a C<'code'> filter I<replaces> each quoted string, quotelike, |
603 | regex, POD, and __DATA__ section with a placeholder. The |
604 | delimiters of this placeholder are the contents of the C<$;> variable |
605 | at the time the filter is applied (normally C<"\034">). The remaining |
606 | four bytes are a unique identifier for the component being replaced. |
607 | |
608 | This approach makes it comparatively easy to write code preprocessors |
609 | without worrying about the form or contents of strings, regexes, etc. |
610 | For convenience, during a C<'code'> filtering operation, Filter::Simple |
611 | provides a package variable (C<$Filter::Simple::placeholder>) that contains |
612 | a pre-compiled regex that matches any placeholder. Placeholders can be |
613 | moved and re-ordered within the source code as needed. |
614 | |
615 | Once the filtering has been applied, the original strings, regexes, |
616 | POD, etc. are re-inserted into the code, by replacing each |
617 | placeholder with the corresponding original component. |
618 | |
619 | For example, the following filter detects concatentated pairs of |
620 | strings/quotelikes and reverses the order in which they are |
621 | concatenated: |
622 | |
623 | package DemoRevCat; |
624 | use Filter::Simple; |
625 | |
626 | FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder; |
627 | s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx |
628 | }; |
629 | |
630 | Thus, the following code: |
631 | |
632 | use DemoRevCat; |
633 | |
634 | my $str = "abc" . q(def); |
635 | |
636 | print "$str\n"; |
637 | |
638 | would become: |
639 | |
640 | my $str = q(def)."abc"; |
641 | |
642 | print "$str\n"; |
643 | |
644 | and hence print: |
645 | |
646 | defabc |
647 | |
648 | |
201f4820 |
649 | =head2 Using Filter::Simple with an explicit C<import> subroutine |
55a1c97c |
650 | |
201f4820 |
651 | Filter::Simple generates a special C<import> subroutine for |
652 | your module (see L<"How it works">) which would normally replace any |
653 | C<import> subroutine you might have explicitly declared. |
55a1c97c |
654 | |
201f4820 |
655 | However, Filter::Simple is smart enough to notice your existing |
656 | C<import> and Do The Right Thing with it. |
55909bcd |
657 | That is, if you explicitly define an C<import> subroutine in a package |
201f4820 |
658 | that's using Filter::Simple, that C<import> subroutine will still |
659 | be invoked immediately after any filter you install. |
55a1c97c |
660 | |
201f4820 |
661 | The only thing you have to remember is that the C<import> subroutine |
662 | I<must> be declared I<before> the filter is installed. If you use C<FILTER> |
663 | to install the filter: |
55a1c97c |
664 | |
201f4820 |
665 | package Filter::TurnItUpTo11; |
55a1c97c |
666 | |
201f4820 |
667 | use Filter::Simple; |
55a1c97c |
668 | |
201f4820 |
669 | FILTER { s/(\w+)/\U$1/ }; |
670 | |
671 | that will almost never be a problem, but if you install a filtering |
672 | subroutine by passing it directly to the C<use Filter::Simple> |
673 | statement: |
55a1c97c |
674 | |
201f4820 |
675 | package Filter::TurnItUpTo11; |
55a1c97c |
676 | |
201f4820 |
677 | use Filter::Simple sub{ s/(\w+)/\U$1/ }; |
55a1c97c |
678 | |
201f4820 |
679 | then you must make sure that your C<import> subroutine appears before |
680 | that C<use> statement. |
681 | |
682 | |
683 | =head2 Using Filter::Simple and Exporter together |
684 | |
685 | Likewise, Filter::Simple is also smart enough |
686 | to Do The Right Thing if you use Exporter: |
687 | |
688 | package Switch; |
689 | use base Exporter; |
690 | use Filter::Simple; |
691 | |
692 | @EXPORT = qw(switch case); |
693 | @EXPORT_OK = qw(given when); |
694 | |
695 | FILTER { $_ = magic_Perl_filter($_) } |
696 | |
697 | Immediately after the filter has been applied to the source, |
698 | Filter::Simple will pass control to Exporter, so it can do its magic too. |
699 | |
700 | Of course, here too, Filter::Simple has to know you're using Exporter |
701 | before it applies the filter. That's almost never a problem, but if you're |
702 | nervous about it, you can guarantee that things will work correctly by |
703 | ensuring that your C<use base Exporter> always precedes your |
704 | C<use Filter::Simple>. |
55a1c97c |
705 | |
fbe2c49e |
706 | |
b38acab9 |
707 | =head2 How it works |
708 | |
fbe2c49e |
709 | The Filter::Simple module exports into the package that calls C<FILTER> |
710 | (or C<use>s it directly) -- such as package "BANG" in the above example -- |
711 | two automagically constructed |
b38acab9 |
712 | subroutines -- C<import> and C<unimport> -- which take care of all the |
713 | nasty details. |
714 | |
715 | In addition, the generated C<import> subroutine passes its own argument |
716 | list to the filtering subroutine, so the BANG.pm filter could easily |
717 | be made parametric: |
718 | |
719 | package BANG; |
fbe2c49e |
720 | |
721 | use Filter::Simple; |
722 | |
723 | FILTER { |
b38acab9 |
724 | my ($die_msg, $var_name) = @_; |
725 | s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g; |
726 | }; |
727 | |
728 | # and in some user code: |
729 | |
fbe2c49e |
730 | use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM |
b38acab9 |
731 | |
732 | |
fbe2c49e |
733 | The specified filtering subroutine is called every time a C<use BANG> is |
734 | encountered, and passed all the source code following that call, up to |
735 | either the next C<no BANG;> (or whatever terminator you've set) or the |
736 | end of the source file, whichever occurs first. By default, any C<no |
737 | BANG;> call must appear by itself on a separate line, or it is ignored. |
b38acab9 |
738 | |
739 | |
740 | =head1 AUTHOR |
741 | |
742 | Damian Conway (damian@conway.org) |
743 | |
744 | =head1 COPYRIGHT |
745 | |
55a1c97c |
746 | Copyright (c) 2000-2001, Damian Conway. All Rights Reserved. |
747 | This module is free software. It may be used, redistributed |
748 | and/or modified under the same terms as Perl itself. |