1 package Filter::Simple;
3 use Text::Balanced ':ALL';
5 use vars qw{ $VERSION @EXPORT };
9 use Filter::Util::Call;
12 @EXPORT = qw( FILTER FILTER_ONLY );
16 if (@_>1) { shift; goto &FILTER }
17 else { *{caller()."::$_"} = \&$_ foreach @EXPORT }
22 my ($filter, $terminator) = @_;
23 no warnings 'redefine';
24 *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
25 *{"${caller}::unimport"} = gen_filter_unimport($caller);
29 croak "FILTER_ONLY: ", @_;
33 my @bits = extract_quotelike $_[0], qr//;
34 return unless $bits[0];
39 my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
40 my $EOP = qr/\n\n|\Z/;
41 my $CUT = qr/\n=cut.*$EOP/;
43 ^=(?:head[1-4]|item) .*? $CUT
46 | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP
47 | ^__(DATA|END)__\r?\n.*
51 quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ],
52 regex => [ $ws, $pod_or_DATA, $id, $exql ],
53 string => [ $ws, $pod_or_DATA, $id, $exql ],
54 code => [ $ws, { DONT_MATCH => $pod_or_DATA },
55 $id, { DONT_MATCH => \&extract_quotelike } ],
56 executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ],
57 all => [ { MATCH => qr/(?s:.*)/ } ],
61 all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
62 executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
63 quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
64 regex => sub { my ($t)=@_;
65 sub{ref() or return $_;
66 my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
67 return $_->[0] unless $op =~ /^(qr|m|s)/
68 || !$op && ($ld eq '/' || $ld eq '?');
71 $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
75 string => sub { my ($t)=@_;
76 sub{ref() or return $_;
78 my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
79 return $_->[0] if $op =~ /^(qr|m)/
80 || !$op && ($ld1 eq '/' || $ld1 eq '?');
81 if (!$op || $op eq 'tr' || $op eq 'y') {
85 if ($op =~ /^(tr|y|s)/) {
89 my $result = "$pre$op$ld1$str1$rd1";
90 $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
91 $result .= "$str2$rd2$flg";
98 sub gen_std_filter_for {
99 my ($type, $transform) = @_;
100 return sub { my (@pieces, $instr);
101 for (extract_multiple($_,$extractor_for{$type})) {
102 if (ref()) { push @pieces, $_; $instr=0 }
103 elsif ($instr) { $pieces[-1] .= $_ }
104 else { push @pieces, $_; $instr=1 }
106 if ($type eq 'code') {
108 local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/;
109 my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/;
111 map { ref $_ ? $;.pack('N',$count++).$; : $_ }
113 @pieces = grep { ref $_ } @pieces;
115 s/$extractor/${$pieces[unpack('N',$1)]}/g;
118 my $selector = $selector_for{$type}->($transform);
119 $_ = join "", map $selector->(@_), @pieces;
127 my ($what, $how) = splice(@_, 0, 2);
128 fail "Unknown selector: $what"
129 unless exists $extractor_for{$what};
130 fail "Filter for $what is not a subroutine reference"
131 unless ref $how eq 'CODE';
132 push @transforms, gen_std_filter_for($what,$how);
134 my $terminator = shift;
136 my $multitransform = sub {
137 foreach my $transform ( @transforms ) {
141 no warnings 'redefine';
142 *{"${caller}::import"} =
143 gen_filter_import($caller,$multitransform,$terminator);
144 *{"${caller}::unimport"} = gen_filter_unimport($caller);
147 my $ows = qr/(?:[ \t]+|#[^\n]*)*/;
149 sub gen_filter_import {
150 my ($class, $filter, $terminator) = @_;
152 my $prev_import = *{$class."::import"}{CODE};
154 my ($imported_class, @args) = @_;
156 qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/;
157 if (!defined $terminator) {
158 $terminator{terminator} = $def_terminator;
160 elsif (!ref $terminator || ref $terminator eq 'Regexp') {
161 $terminator{terminator} = $terminator;
163 elsif (ref $terminator ne 'HASH') {
164 croak "Terminator must be specified as scalar or hash ref"
166 elsif (!exists $terminator->{terminator}) {
167 $terminator{terminator} = $def_terminator;
171 my ($status, $lastline);
174 while ($status = filter_read()) {
175 return $status if $status < 0;
176 if ($terminator{terminator} &&
177 m/$terminator{terminator}/) {
186 $filter->($imported_class, @args) unless $status < 0;
187 if (defined $lastline) {
188 if (defined $terminator{becomes}) {
189 $_ .= $terminator{becomes};
191 elsif ($lastline =~ $def_terminator) {
201 elsif ($class->isa('Exporter')) {
202 $class->export_to_level(1,@_);
207 sub gen_filter_unimport {
209 my $prev_unimport = *{$class."::unimport"}{CODE};
212 goto &$prev_unimport if $prev_unimport;
222 Filter::Simple - Simplified source filtering
237 # use Filter::Simple sub { ... };
243 # this code is filtered
254 Source filtering is an immensely powerful feature of recent versions of Perl.
255 It allows one to extend the language itself (e.g. the Switch module), to
256 simplify the language (e.g. Language::Pythonesque), or to completely recast the
257 language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use
258 the full power of Perl as its own, recursively applied, macro language.
260 The excellent Filter::Util::Call module (by Paul Marquess) provides a
261 usable Perl interface to source filtering, but it is often too powerful
262 and not nearly as simple as it could be.
264 To use the module it is necessary to do the following:
270 Download, build, and install the Filter::Util::Call module.
271 (If you have Perl 5.7.1 or later, this is already done for you.)
275 Set up a module that does a C<use Filter::Util::Call>.
279 Within that module, create an C<import> subroutine.
283 Within the C<import> subroutine do a call to C<filter_add>, passing
284 it either a subroutine reference.
288 Within the subroutine reference, call C<filter_read> or C<filter_read_exact>
289 to "prime" $_ with source code data from the source file that will
290 C<use> your module. Check the status value returned to see if any
291 source code was actually read in.
295 Process the contents of $_ to change the source code in the desired manner.
299 Return the status value.
303 If the act of unimporting your module (via a C<no>) should cause source
304 code filtering to cease, create an C<unimport> subroutine, and have it call
305 C<filter_del>. Make sure that the call to C<filter_read> or
306 C<filter_read_exact> in step 5 will not accidentally read past the
307 C<no>. Effectively this limits source code filters to line-by-line
308 operation, unless the C<import> subroutine does some fancy
309 pre-pre-parsing of the source code it's filtering.
313 For example, here is a minimal source code filter in a module named
314 BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG>
315 to the sequence C<die 'BANG' if $BANG> in any piece of code following a
316 C<use BANG;> statement (until the next C<no BANG;> statement, if any):
320 use Filter::Util::Call ;
325 my ($status, $no_seen, $data);
326 while ($status = filter_read()) {
327 if (/^\s*no\s+$caller\s*;\s*?$/) {
335 s/BANG\s+BANG/die 'BANG' if \$BANG/g
337 $_ .= "no $class;\n" if $no_seen;
348 This level of sophistication puts filtering out of the reach of
354 The Filter::Simple module provides a simplified interface to
355 Filter::Util::Call; one that is sufficient for most common cases.
357 Instead of the above process, with Filter::Simple the task of setting up
358 a source code filter is reduced to:
364 Download and install the Filter::Simple module.
365 (If you have Perl 5.7.1 or later, this is already done for you.)
369 Set up a module that does a C<use Filter::Simple> and then
370 calls C<FILTER { ... }>.
374 Within the anonymous subroutine or block that is passed to
375 C<FILTER>, process the contents of $_ to change the source code in
380 In other words, the previous example, would become:
386 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
392 =head2 Disabling or changing <no> behaviour
394 By default, the installed filter only filters up to a line consisting of one of
395 the three standard source "terminators":
397 no ModuleName; # optional comment
407 but this can be altered by passing a second argument to C<use Filter::Simple>
408 or C<FILTER> (just remember: there's I<no> comma after the initial block when
411 That second argument may be either a C<qr>'d regular expression (which is then
412 used to match the terminator line), or a defined false value (which indicates
413 that no terminator line should be looked for), or a reference to a hash
414 (in which case the terminator is the value associated with the key
417 For example, to cause the previous filter to filter only up to a line of the
428 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
430 qr/^\s*GNAB\s+esu\s*;\s*?$/;
435 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
437 { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
439 and to prevent the filter's being turned off in any way:
445 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
452 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
454 { terminator => "" };
456 B<Note that, no matter what you set the terminator pattern too,
457 the actual terminator itself I<must> be contained on a single source line.>
460 =head2 All-in-one interface
462 Separating the loading of Filter::Simple:
466 from the setting up of the filtering:
470 is useful because it allows other code (typically parser support code
471 or caching variables) to be defined before the filter is invoked.
472 However, there is often no need for such a separation.
474 In those cases, it is easier to just append the filtering subroutine and
475 any terminator specification directly to the C<use> statement that loads
476 Filter::Simple, like so:
478 use Filter::Simple sub {
479 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
482 This is exactly the same as:
486 Filter::Simple::FILTER {
487 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
491 except that the C<FILTER> subroutine is not exported by Filter::Simple.
494 =head2 Filtering only specific components of source code
496 One of the problems with a filter like:
500 FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
502 is that it indiscriminately applies the specified transformation to
503 the entire text of your source program. So something like:
505 warn 'BANG BANG, YOU'RE DEAD';
510 warn 'die 'BANG' if $BANG, YOU'RE DEAD';
513 It is very common when filtering source to only want to apply the filter
514 to the non-character-string parts of the code, or alternatively to I<only>
515 the character strings.
517 Filter::Simple supports this type of filtering by automatically
518 exporting the C<FILTER_ONLY> subroutine.
520 C<FILTER_ONLY> takes a sequence of specifiers that install separate
521 (and possibly multiple) filters that act on only parts of the source code.
527 code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
528 quotelike => sub { s/BANG\s+BANG/CHITTY CHITYY/g };
530 The C<"code"> subroutine will only be used to filter parts of the source
531 code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
532 subroutine only filters Perl quotelikes (including here documents).
534 The full list of alternatives is:
540 Filters only those sections of the source code that are not quotelikes, POD, or
543 =item C<"executable">
545 Filters only those sections of the source code that are not POD or C<__DATA__>.
549 Filters only Perl quotelikes (as interpreted by
550 C<&Text::Balanced::extract_quotelike>).
554 Filters only the string literal parts of a Perl quotelike (i.e. the
555 contents of a string literal, either half of a C<tr///>, the second
560 Filters only the pattern literal parts of a Perl quotelike (i.e. the
561 contents of a C<qr//> or an C<m//>, the first half of an C<s///>).
565 Filters everything. Identical in effect to C<FILTER>.
569 Except for C<< FILTER_ONLY code => sub {...} >>, each of
570 the component filters is called repeatedly, once for each component
571 found in the source code.
573 Note that you can also apply two or more of the same type of filter in
574 a single C<FILTER_ONLY>. For example, here's a simple
575 macro-preprocessor that is only applied within regexes,
576 with a final debugging pass that printd the resulting source code:
580 regex => sub { s/!\[/[^/g },
581 regex => sub { s/%d/$RE{num}{int}/g },
582 regex => sub { s/%f/$RE{num}{real}/g },
583 all => sub { print if $::DEBUG };
587 =head2 Filtering only the code parts of source code
589 Most source code ceases to be grammatically correct when it is broken up
590 into the pieces between string literals and regexes. So the C<'code'>
591 component filter behaves slightly differently from the other partial filters
592 described in the previous section.
594 Rather than calling the specified processor on each individual piece of
595 code (i.e. on the bits between quotelikes), the C<'code'> partial filter
596 operates on the entire source code, but with the quotelike bits
599 That is, a C<'code'> filter I<replaces> each quoted string, quotelike,
600 regex, POD, and __DATA__ section with a placeholder. The
601 delimiters of this placeholder are the contents of the C<$;> variable
602 at the time the filter is applied (normally C<"\034">). The remaining
603 four bytes are a unique identifier for the component being replaced.
605 This approach makes it comparatively easy to write code preprocessors
606 without worrying about the form or contents of strings, regexes, etc.
607 For convenience, during a C<'code'> filtering operation, Filter::Simple
608 provides a package variable (C<$Filter::Simple::placeholder>) that contains
609 a pre-compiled regex that matches any placeholder. Placeholders can be
610 moved and re-ordered within the source code as needed.
612 Once the filtering has been applied, the original strings, regexes,
613 POD, etc. are re-inserted into the code, by replacing each
614 placeholder with the corresponding original component.
616 For example, the following filter detects concatentated pairs of
617 strings/quotelikes and reverses the order in which they are
623 FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder;
624 s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
627 Thus, the following code:
631 my $str = "abc" . q(def);
637 my $str = q(def)."abc";
646 =head2 Using Filter::Simple with an explicit C<import> subroutine
648 Filter::Simple generates a special C<import> subroutine for
649 your module (see L<"How it works">) which would normally replace any
650 C<import> subroutine you might have explicitly declared.
652 However, Filter::Simple is smart enough to notice your existing
653 C<import> and Do The Right Thing with it.
654 That is, if you explcitly define an C<import> subroutine in a package
655 that's using Filter::Simple, that C<import> subroutine will still
656 be invoked immediately after any filter you install.
658 The only thing you have to remember is that the C<import> subroutine
659 I<must> be declared I<before> the filter is installed. If you use C<FILTER>
660 to install the filter:
662 package Filter::TurnItUpTo11;
666 FILTER { s/(\w+)/\U$1/ };
668 that will almost never be a problem, but if you install a filtering
669 subroutine by passing it directly to the C<use Filter::Simple>
672 package Filter::TurnItUpTo11;
674 use Filter::Simple sub{ s/(\w+)/\U$1/ };
676 then you must make sure that your C<import> subroutine appears before
677 that C<use> statement.
680 =head2 Using Filter::Simple and Exporter together
682 Likewise, Filter::Simple is also smart enough
683 to Do The Right Thing if you use Exporter:
689 @EXPORT = qw(switch case);
690 @EXPORT_OK = qw(given when);
692 FILTER { $_ = magic_Perl_filter($_) }
694 Immediately after the filter has been applied to the source,
695 Filter::Simple will pass control to Exporter, so it can do its magic too.
697 Of course, here too, Filter::Simple has to know you're using Exporter
698 before it applies the filter. That's almost never a problem, but if you're
699 nervous about it, you can guarantee that things will work correctly by
700 ensuring that your C<use base Exporter> always precedes your
701 C<use Filter::Simple>.
706 The Filter::Simple module exports into the package that calls C<FILTER>
707 (or C<use>s it directly) -- such as package "BANG" in the above example --
708 two automagically constructed
709 subroutines -- C<import> and C<unimport> -- which take care of all the
712 In addition, the generated C<import> subroutine passes its own argument
713 list to the filtering subroutine, so the BANG.pm filter could easily
721 my ($die_msg, $var_name) = @_;
722 s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g;
725 # and in some user code:
727 use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM
730 The specified filtering subroutine is called every time a C<use BANG> is
731 encountered, and passed all the source code following that call, up to
732 either the next C<no BANG;> (or whatever terminator you've set) or the
733 end of the source file, whichever occurs first. By default, any C<no
734 BANG;> call must appear by itself on a separate line, or it is ignored.
739 Damian Conway (damian@conway.org)
743 Copyright (c) 2000-2001, Damian Conway. All Rights Reserved.
744 This module is free software. It may be used, redistributed
745 and/or modified under the same terms as Perl itself.