1 package Filter::Simple;
3 use Text::Balanced ':ALL';
5 use vars qw{ $VERSION @EXPORT };
9 use Filter::Util::Call;
12 @EXPORT = qw( FILTER FILTER_ONLY );
16 if (@_>1) { shift; goto &FILTER }
17 else { *{caller()."::$_"} = \&$_ foreach @EXPORT }
21 croak "FILTER_ONLY: ", @_;
25 my @bits = extract_quotelike $_[0], qr//;
26 return unless $bits[0];
31 my $comment = qr/(?<![\$\@%])#.*/;
32 my $ws = qr/(?:$ncws|$comment)+/;
33 my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
34 my $EOP = qr/\n\n|\Z/;
35 my $CUT = qr/\n=cut.*$EOP/;
37 ^=(?:head[1-4]|item) .*? $CUT
40 | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP
41 | ^__(DATA|END)__\r?\n.*
45 quotelike => [ $ws, \&extract_variable, $id, { MATCH => \&extract_quotelike } ],
46 regex => [ $ws, $pod_or_DATA, $id, $exql ],
47 string => [ $ws, $pod_or_DATA, $id, $exql ],
48 code => [ $ws, { DONT_MATCH => $pod_or_DATA },
50 $id, { DONT_MATCH => \&extract_quotelike } ],
52 => [ { DONT_MATCH => $comment },
53 $ncws, { DONT_MATCH => $pod_or_DATA },
55 $id, { DONT_MATCH => \&extract_quotelike } ],
56 executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ],
57 executable_no_comments
58 => [ { DONT_MATCH => $comment },
59 $ncws, { DONT_MATCH => $pod_or_DATA } ],
60 all => [ { MATCH => qr/(?s:.*)/ } ],
64 all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
65 executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
66 quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
67 regex => sub { my ($t)=@_;
68 sub{ref() or return $_;
69 my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
70 return $_->[0] unless $op =~ /^(qr|m|s)/
71 || !$op && ($ld eq '/' || $ld eq '?');
74 $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
78 string => sub { my ($t)=@_;
79 sub{ref() or return $_;
81 my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
82 return $_->[0] if $op =~ /^(qr|m)/
83 || !$op && ($ld1 eq '/' || $ld1 eq '?');
84 if (!$op || $op eq 'tr' || $op eq 'y') {
88 if ($op =~ /^(tr|y|s)/) {
92 my $result = "$pre$op$ld1$str1$rd1";
93 $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
94 $result .= "$str2$rd2$flg";
101 sub gen_std_filter_for {
102 my ($type, $transform) = @_;
106 for (extract_multiple($_,$extractor_for{$type})) {
107 if (ref()) { push @components, $_; $instr=0 }
108 elsif ($instr) { $components[-1] .= $_ }
109 else { push @components, $_; $instr=1 }
111 if ($type =~ /^code/) {
113 local $placeholder = qr/\Q$;\E(\C{4})\Q$;\E/;
114 my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/;
116 map { ref $_ ? $;.pack('N',$count++).$; : $_ }
118 @components = grep { ref $_ } @components;
120 s/$extractor/${$components[unpack('N',$1)]}/g;
123 my $selector = $selector_for{$type}->($transform);
124 $_ = join "", map $selector->(@_), @components;
131 my ($filter, $terminator) = @_;
132 no warnings 'redefine';
133 *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
134 *{"${caller}::unimport"} = gen_filter_unimport($caller);
140 my ($what, $how) = splice(@_, 0, 2);
141 fail "Unknown selector: $what"
142 unless exists $extractor_for{$what};
143 fail "Filter for $what is not a subroutine reference"
144 unless ref $how eq 'CODE';
145 push @transforms, gen_std_filter_for($what,$how);
147 my $terminator = shift;
149 my $multitransform = sub {
150 foreach my $transform ( @transforms ) {
154 no warnings 'redefine';
155 *{"${caller}::import"} =
156 gen_filter_import($caller,$multitransform,$terminator);
157 *{"${caller}::unimport"} = gen_filter_unimport($caller);
160 my $ows = qr/(?:[ \t]+|#[^\n]*)*/;
162 sub gen_filter_import {
163 my ($class, $filter, $terminator) = @_;
165 my $prev_import = *{$class."::import"}{CODE};
167 my ($imported_class, @args) = @_;
169 qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/;
170 if (!defined $terminator) {
171 $terminator{terminator} = $def_terminator;
173 elsif (!ref $terminator || ref $terminator eq 'Regexp') {
174 $terminator{terminator} = $terminator;
176 elsif (ref $terminator ne 'HASH') {
177 croak "Terminator must be specified as scalar or hash ref"
179 elsif (!exists $terminator->{terminator}) {
180 $terminator{terminator} = $def_terminator;
184 my ($status, $lastline);
187 while ($status = filter_read()) {
188 return $status if $status < 0;
189 if ($terminator{terminator} &&
190 m/$terminator{terminator}/) {
198 return $count if not $count;
200 $filter->($imported_class, @args) unless $status < 0;
201 if (defined $lastline) {
202 if (defined $terminator{becomes}) {
203 $_ .= $terminator{becomes};
205 elsif ($lastline =~ $def_terminator) {
215 elsif ($class->isa('Exporter')) {
216 $class->export_to_level(1,@_);
221 sub gen_filter_unimport {
225 goto &$prev_unimport if $prev_unimport;
235 Filter::Simple - Simplified source filtering
250 # use Filter::Simple sub { ... };
256 # this code is filtered
267 Source filtering is an immensely powerful feature of recent versions of Perl.
268 It allows one to extend the language itself (e.g. the Switch module), to
269 simplify the language (e.g. Language::Pythonesque), or to completely recast the
270 language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use
271 the full power of Perl as its own, recursively applied, macro language.
273 The excellent Filter::Util::Call module (by Paul Marquess) provides a
274 usable Perl interface to source filtering, but it is often too powerful
275 and not nearly as simple as it could be.
277 To use the module it is necessary to do the following:
283 Download, build, and install the Filter::Util::Call module.
284 (If you have Perl 5.7.1 or later, this is already done for you.)
288 Set up a module that does a C<use Filter::Util::Call>.
292 Within that module, create an C<import> subroutine.
296 Within the C<import> subroutine do a call to C<filter_add>, passing
297 it either a subroutine reference.
301 Within the subroutine reference, call C<filter_read> or C<filter_read_exact>
302 to "prime" $_ with source code data from the source file that will
303 C<use> your module. Check the status value returned to see if any
304 source code was actually read in.
308 Process the contents of $_ to change the source code in the desired manner.
312 Return the status value.
316 If the act of unimporting your module (via a C<no>) should cause source
317 code filtering to cease, create an C<unimport> subroutine, and have it call
318 C<filter_del>. Make sure that the call to C<filter_read> or
319 C<filter_read_exact> in step 5 will not accidentally read past the
320 C<no>. Effectively this limits source code filters to line-by-line
321 operation, unless the C<import> subroutine does some fancy
322 pre-pre-parsing of the source code it's filtering.
326 For example, here is a minimal source code filter in a module named
327 BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG>
328 to the sequence C<die 'BANG' if $BANG> in any piece of code following a
329 C<use BANG;> statement (until the next C<no BANG;> statement, if any):
333 use Filter::Util::Call ;
338 my ($status, $no_seen, $data);
339 while ($status = filter_read()) {
340 if (/^\s*no\s+$caller\s*;\s*?$/) {
348 s/BANG\s+BANG/die 'BANG' if \$BANG/g
350 $_ .= "no $class;\n" if $no_seen;
361 This level of sophistication puts filtering out of the reach of
367 The Filter::Simple module provides a simplified interface to
368 Filter::Util::Call; one that is sufficient for most common cases.
370 Instead of the above process, with Filter::Simple the task of setting up
371 a source code filter is reduced to:
377 Download and install the Filter::Simple module.
378 (If you have Perl 5.7.1 or later, this is already done for you.)
382 Set up a module that does a C<use Filter::Simple> and then
383 calls C<FILTER { ... }>.
387 Within the anonymous subroutine or block that is passed to
388 C<FILTER>, process the contents of $_ to change the source code in
393 In other words, the previous example, would become:
399 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
404 Note that the source code is passed as a single string, so any regex that
405 uses C<^> or C<$> to detect line boundaries will need the C</m> flag.
407 =head2 Disabling or changing <no> behaviour
409 By default, the installed filter only filters up to a line consisting of one of
410 the three standard source "terminators":
412 no ModuleName; # optional comment
422 but this can be altered by passing a second argument to C<use Filter::Simple>
423 or C<FILTER> (just remember: there's I<no> comma after the initial block when
426 That second argument may be either a C<qr>'d regular expression (which is then
427 used to match the terminator line), or a defined false value (which indicates
428 that no terminator line should be looked for), or a reference to a hash
429 (in which case the terminator is the value associated with the key
432 For example, to cause the previous filter to filter only up to a line of the
443 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
445 qr/^\s*GNAB\s+esu\s*;\s*?$/;
450 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
452 { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
454 and to prevent the filter's being turned off in any way:
460 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
467 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
469 { terminator => "" };
471 B<Note that, no matter what you set the terminator pattern to,
472 the actual terminator itself I<must> be contained on a single source line.>
475 =head2 All-in-one interface
477 Separating the loading of Filter::Simple:
481 from the setting up of the filtering:
485 is useful because it allows other code (typically parser support code
486 or caching variables) to be defined before the filter is invoked.
487 However, there is often no need for such a separation.
489 In those cases, it is easier to just append the filtering subroutine and
490 any terminator specification directly to the C<use> statement that loads
491 Filter::Simple, like so:
493 use Filter::Simple sub {
494 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
497 This is exactly the same as:
501 Filter::Simple::FILTER {
502 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
506 except that the C<FILTER> subroutine is not exported by Filter::Simple.
509 =head2 Filtering only specific components of source code
511 One of the problems with a filter like:
515 FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
517 is that it indiscriminately applies the specified transformation to
518 the entire text of your source program. So something like:
520 warn 'BANG BANG, YOU'RE DEAD';
525 warn 'die 'BANG' if $BANG, YOU'RE DEAD';
528 It is very common when filtering source to only want to apply the filter
529 to the non-character-string parts of the code, or alternatively to I<only>
530 the character strings.
532 Filter::Simple supports this type of filtering by automatically
533 exporting the C<FILTER_ONLY> subroutine.
535 C<FILTER_ONLY> takes a sequence of specifiers that install separate
536 (and possibly multiple) filters that act on only parts of the source code.
542 code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
543 quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g };
545 The C<"code"> subroutine will only be used to filter parts of the source
546 code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
547 subroutine only filters Perl quotelikes (including here documents).
549 The full list of alternatives is:
555 Filters only those sections of the source code that are not quotelikes, POD, or
558 =item C<"code_no_comments">
560 Filters only those sections of the source code that are not quotelikes, POD,
561 comments, or C<__DATA__>.
563 =item C<"executable">
565 Filters only those sections of the source code that are not POD or C<__DATA__>.
567 =item C<"executable_no_comments">
569 Filters only those sections of the source code that are not POD, comments, or C<__DATA__>.
573 Filters only Perl quotelikes (as interpreted by
574 C<&Text::Balanced::extract_quotelike>).
578 Filters only the string literal parts of a Perl quotelike (i.e. the
579 contents of a string literal, either half of a C<tr///>, the second
584 Filters only the pattern literal parts of a Perl quotelike (i.e. the
585 contents of a C<qr//> or an C<m//>, the first half of an C<s///>).
589 Filters everything. Identical in effect to C<FILTER>.
593 Except for C<< FILTER_ONLY code => sub {...} >>, each of
594 the component filters is called repeatedly, once for each component
595 found in the source code.
597 Note that you can also apply two or more of the same type of filter in
598 a single C<FILTER_ONLY>. For example, here's a simple
599 macro-preprocessor that is only applied within regexes,
600 with a final debugging pass that prints the resulting source code:
604 regex => sub { s/!\[/[^/g },
605 regex => sub { s/%d/$RE{num}{int}/g },
606 regex => sub { s/%f/$RE{num}{real}/g },
607 all => sub { print if $::DEBUG };
611 =head2 Filtering only the code parts of source code
613 Most source code ceases to be grammatically correct when it is broken up
614 into the pieces between string literals and regexes. So the C<'code'>
615 and C<'code_no_comments'> component filter behave slightly differently
616 from the other partial filters described in the previous section.
618 Rather than calling the specified processor on each individual piece of
619 code (i.e. on the bits between quotelikes), the C<'code...'> partial
620 filters operate on the entire source code, but with the quotelike bits
621 (and, in the case of C<'code_no_comments'>, the comments) "blanked out".
623 That is, a C<'code...'> filter I<replaces> each quoted string, quotelike,
624 regex, POD, and __DATA__ section with a placeholder. The
625 delimiters of this placeholder are the contents of the C<$;> variable
626 at the time the filter is applied (normally C<"\034">). The remaining
627 four bytes are a unique identifier for the component being replaced.
629 This approach makes it comparatively easy to write code preprocessors
630 without worrying about the form or contents of strings, regexes, etc.
632 For convenience, during a C<'code...'> filtering operation, Filter::Simple
633 provides a package variable (C<$Filter::Simple::placeholder>) that
634 contains a pre-compiled regex that matches any placeholder...and
635 captures the identifier within the placeholder. Placeholders can be
636 moved and re-ordered within the source code as needed.
638 In addition, a second package variable (C<@Filter::Simple::components>)
639 contains a list of the various pieces of C<$_>, as they were originally split
640 up to allow placeholders to be inserted.
642 Once the filtering has been applied, the original strings, regexes, POD,
643 etc. are re-inserted into the code, by replacing each placeholder with
644 the corresponding original component (from C<@components>). Note that
645 this means that the C<@components> variable must be treated with extreme
646 care within the filter. The C<@components> array stores the "back-
647 translations" of each placeholder inserted into C<$_>, as well as the
648 interstitial source code between placeholders. If the placeholder
649 backtranslations are altered in C<@components>, they will be similarly
650 changed when the placeholders are removed from C<$_> after the filter
653 For example, the following filter detects concatenated pairs of
654 strings/quotelikes and reverses the order in which they are
660 FILTER_ONLY code => sub {
661 my $ph = $Filter::Simple::placeholder;
662 s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
665 Thus, the following code:
669 my $str = "abc" . q(def);
675 my $str = q(def)."abc";
684 =head2 Using Filter::Simple with an explicit C<import> subroutine
686 Filter::Simple generates a special C<import> subroutine for
687 your module (see L<"How it works">) which would normally replace any
688 C<import> subroutine you might have explicitly declared.
690 However, Filter::Simple is smart enough to notice your existing
691 C<import> and Do The Right Thing with it.
692 That is, if you explicitly define an C<import> subroutine in a package
693 that's using Filter::Simple, that C<import> subroutine will still
694 be invoked immediately after any filter you install.
696 The only thing you have to remember is that the C<import> subroutine
697 I<must> be declared I<before> the filter is installed. If you use C<FILTER>
698 to install the filter:
700 package Filter::TurnItUpTo11;
704 FILTER { s/(\w+)/\U$1/ };
706 that will almost never be a problem, but if you install a filtering
707 subroutine by passing it directly to the C<use Filter::Simple>
710 package Filter::TurnItUpTo11;
712 use Filter::Simple sub{ s/(\w+)/\U$1/ };
714 then you must make sure that your C<import> subroutine appears before
715 that C<use> statement.
718 =head2 Using Filter::Simple and Exporter together
720 Likewise, Filter::Simple is also smart enough
721 to Do The Right Thing if you use Exporter:
727 @EXPORT = qw(switch case);
728 @EXPORT_OK = qw(given when);
730 FILTER { $_ = magic_Perl_filter($_) }
732 Immediately after the filter has been applied to the source,
733 Filter::Simple will pass control to Exporter, so it can do its magic too.
735 Of course, here too, Filter::Simple has to know you're using Exporter
736 before it applies the filter. That's almost never a problem, but if you're
737 nervous about it, you can guarantee that things will work correctly by
738 ensuring that your C<use base Exporter> always precedes your
739 C<use Filter::Simple>.
744 The Filter::Simple module exports into the package that calls C<FILTER>
745 (or C<use>s it directly) -- such as package "BANG" in the above example --
746 two automagically constructed
747 subroutines -- C<import> and C<unimport> -- which take care of all the
750 In addition, the generated C<import> subroutine passes its own argument
751 list to the filtering subroutine, so the BANG.pm filter could easily
759 my ($die_msg, $var_name) = @_;
760 s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g;
763 # and in some user code:
765 use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM
768 The specified filtering subroutine is called every time a C<use BANG> is
769 encountered, and passed all the source code following that call, up to
770 either the next C<no BANG;> (or whatever terminator you've set) or the
771 end of the source file, whichever occurs first. By default, any C<no
772 BANG;> call must appear by itself on a separate line, or it is ignored.
777 Damian Conway (damian@conway.org)
781 Copyright (c) 2000-2001, Damian Conway. All Rights Reserved.
782 This module is free software. It may be used, redistributed
783 and/or modified under the same terms as Perl itself.