1 package Filter::Simple;
3 use Text::Balanced ':ALL';
5 use vars qw{ $VERSION @EXPORT };
9 use Filter::Util::Call;
12 @EXPORT = qw( FILTER FILTER_ONLY );
16 if (@_>1) { shift; goto &FILTER }
17 else { *{caller()."::$_"} = \&$_ foreach @EXPORT }
22 my ($filter, $terminator) = @_;
23 local $SIG{__WARN__} = sub{};
24 *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
25 *{"${caller}::unimport"} = gen_filter_unimport($caller);
29 croak "FILTER_ONLY: ", @_;
33 my @bits = extract_quotelike $_[0], qr//;
34 return unless $bits[0];
39 my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
40 my $EOP = qr/\n\n|\Z/;
41 my $CUT = qr/\n=cut.*$EOP/;
43 ^=(?:head[1-4]|item) .*? $CUT
46 | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP
47 | ^__(DATA|END)__\r?\n.*
51 quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ],
52 regex => [ $ws, $pod_or_DATA, $id, $exql ],
53 string => [ $ws, $pod_or_DATA, $id, $exql ],
54 code => [ $ws, { DONT_MATCH => $pod_or_DATA },
55 $id, { DONT_MATCH => \&extract_quotelike } ],
56 executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ],
57 all => [ { MATCH => qr/(?s:.*)/ } ],
61 all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
62 executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
63 quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
64 regex => sub { my ($t)=@_;
65 sub{ref() or return $_;
66 my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
67 return $_->[0] unless $op =~ /^(qr|m|s)/
68 || !$op && ($ld eq '/' || $ld eq '?');
71 $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
75 string => sub { my ($t)=@_;
76 sub{ref() or return $_;
78 my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
79 return $_->[0] if $op =~ /^(qr|m)/
80 || !$op && ($ld1 eq '/' || $ld1 eq '?');
81 if (!$op || $op eq 'tr' || $op eq 'y') {
85 if ($op =~ /^(tr|y|s)/) {
89 my $result = "$pre$op$ld1$str1$rd1";
90 $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
91 $result .= "$str2$rd2$flg";
98 sub gen_std_filter_for {
99 my ($type, $transform) = @_;
100 return sub { my (@pieces, $instr);
102 for (extract_multiple($_,$extractor_for{$type})) {
103 if (ref()) { push @pieces, $_; $instr=0 }
104 elsif ($instr) { $pieces[-1] .= $_ }
105 else { push @pieces, $_; $instr=1 }
107 if ($type eq 'code') {
109 local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/;
110 my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/;
112 map { ref $_ ? $;.pack('N',$count++).$; : $_ }
114 @pieces = grep { ref $_ } @pieces;
116 s/$extractor/${$pieces[unpack('N',$1)]}/g;
119 my $selector = $selector_for{$type}->($transform);
120 $_ = join "", map $selector->(@_), @pieces;
128 my ($what, $how) = splice(@_, 0, 2);
129 fail "Unknown selector: $what"
130 unless exists $extractor_for{$what};
131 fail "Filter for $what is not a subroutine reference"
132 unless ref $how eq 'CODE';
133 push @transforms, gen_std_filter_for($what,$how);
135 my $terminator = shift;
137 my $multitransform = sub {
138 foreach my $transform ( @transforms ) {
142 no warnings 'redefine';
143 *{"${caller}::import"} =
144 gen_filter_import($caller,$multitransform,$terminator);
145 *{"${caller}::unimport"} = gen_filter_unimport($caller);
148 my $ows = qr/(?:[ \t]+|#[^\n]*)*/;
150 sub gen_filter_import {
151 my ($class, $filter, $terminator) = @_;
153 my $prev_import = *{$class."::import"}{CODE};
155 my ($imported_class, @args) = @_;
157 qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/;
158 if (!defined $terminator) {
159 $terminator{terminator} = $def_terminator;
161 elsif (!ref $terminator || ref $terminator eq 'Regexp') {
162 $terminator{terminator} = $terminator;
164 elsif (ref $terminator ne 'HASH') {
165 croak "Terminator must be specified as scalar or hash ref"
167 elsif (!exists $terminator->{terminator}) {
168 $terminator{terminator} = $def_terminator;
172 my ($status, $lastline);
175 while ($status = filter_read()) {
176 return $status if $status < 0;
177 if ($terminator{terminator} &&
178 m/$terminator{terminator}/) {
187 $filter->($imported_class, @args) unless $status < 0;
188 if (defined $lastline) {
189 if (defined $terminator{becomes}) {
190 $_ .= $terminator{becomes};
192 elsif ($lastline =~ $def_terminator) {
202 elsif ($class->isa('Exporter')) {
203 $class->export_to_level(1,@_);
208 sub gen_filter_unimport {
210 my $prev_unimport = *{$class."::unimport"}{CODE};
213 goto &$prev_unimport if $prev_unimport;
223 Filter::Simple - Simplified source filtering
238 # use Filter::Simple sub { ... };
244 # this code is filtered
255 Source filtering is an immensely powerful feature of recent versions of Perl.
256 It allows one to extend the language itself (e.g. the Switch module), to
257 simplify the language (e.g. Language::Pythonesque), or to completely recast the
258 language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use
259 the full power of Perl as its own, recursively applied, macro language.
261 The excellent Filter::Util::Call module (by Paul Marquess) provides a
262 usable Perl interface to source filtering, but it is often too powerful
263 and not nearly as simple as it could be.
265 To use the module it is necessary to do the following:
271 Download, build, and install the Filter::Util::Call module.
272 (If you have Perl 5.7.1 or later, this is already done for you.)
276 Set up a module that does a C<use Filter::Util::Call>.
280 Within that module, create an C<import> subroutine.
284 Within the C<import> subroutine do a call to C<filter_add>, passing
285 it either a subroutine reference.
289 Within the subroutine reference, call C<filter_read> or C<filter_read_exact>
290 to "prime" $_ with source code data from the source file that will
291 C<use> your module. Check the status value returned to see if any
292 source code was actually read in.
296 Process the contents of $_ to change the source code in the desired manner.
300 Return the status value.
304 If the act of unimporting your module (via a C<no>) should cause source
305 code filtering to cease, create an C<unimport> subroutine, and have it call
306 C<filter_del>. Make sure that the call to C<filter_read> or
307 C<filter_read_exact> in step 5 will not accidentally read past the
308 C<no>. Effectively this limits source code filters to line-by-line
309 operation, unless the C<import> subroutine does some fancy
310 pre-pre-parsing of the source code it's filtering.
314 For example, here is a minimal source code filter in a module named
315 BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG>
316 to the sequence C<die 'BANG' if $BANG> in any piece of code following a
317 C<use BANG;> statement (until the next C<no BANG;> statement, if any):
321 use Filter::Util::Call ;
326 my ($status, $no_seen, $data);
327 while ($status = filter_read()) {
328 if (/^\s*no\s+$caller\s*;\s*?$/) {
336 s/BANG\s+BANG/die 'BANG' if \$BANG/g
338 $_ .= "no $class;\n" if $no_seen;
349 This level of sophistication puts filtering out of the reach of
355 The Filter::Simple module provides a simplified interface to
356 Filter::Util::Call; one that is sufficient for most common cases.
358 Instead of the above process, with Filter::Simple the task of setting up
359 a source code filter is reduced to:
365 Download and install the Filter::Simple module.
366 (If you have Perl 5.7.1 or later, this is already done for you.)
370 Set up a module that does a C<use Filter::Simple> and then
371 calls C<FILTER { ... }>.
375 Within the anonymous subroutine or block that is passed to
376 C<FILTER>, process the contents of $_ to change the source code in
381 In other words, the previous example, would become:
387 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
392 Note that the source code is passed as a single string, so any regex that
393 uses C<^> or C<$> to detect line boundaries will need the C</m> flag.
395 =head2 Disabling or changing <no> behaviour
397 By default, the installed filter only filters up to a line consisting of one of
398 the three standard source "terminators":
400 no ModuleName; # optional comment
410 but this can be altered by passing a second argument to C<use Filter::Simple>
411 or C<FILTER> (just remember: there's I<no> comma after the initial block when
414 That second argument may be either a C<qr>'d regular expression (which is then
415 used to match the terminator line), or a defined false value (which indicates
416 that no terminator line should be looked for), or a reference to a hash
417 (in which case the terminator is the value associated with the key
420 For example, to cause the previous filter to filter only up to a line of the
431 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
433 qr/^\s*GNAB\s+esu\s*;\s*?$/;
438 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
440 { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
442 and to prevent the filter's being turned off in any way:
448 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
455 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
457 { terminator => "" };
459 B<Note that, no matter what you set the terminator pattern to,
460 the actual terminator itself I<must> be contained on a single source line.>
463 =head2 All-in-one interface
465 Separating the loading of Filter::Simple:
469 from the setting up of the filtering:
473 is useful because it allows other code (typically parser support code
474 or caching variables) to be defined before the filter is invoked.
475 However, there is often no need for such a separation.
477 In those cases, it is easier to just append the filtering subroutine and
478 any terminator specification directly to the C<use> statement that loads
479 Filter::Simple, like so:
481 use Filter::Simple sub {
482 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
485 This is exactly the same as:
489 Filter::Simple::FILTER {
490 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
494 except that the C<FILTER> subroutine is not exported by Filter::Simple.
497 =head2 Filtering only specific components of source code
499 One of the problems with a filter like:
503 FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
505 is that it indiscriminately applies the specified transformation to
506 the entire text of your source program. So something like:
508 warn 'BANG BANG, YOU'RE DEAD';
513 warn 'die 'BANG' if $BANG, YOU'RE DEAD';
516 It is very common when filtering source to only want to apply the filter
517 to the non-character-string parts of the code, or alternatively to I<only>
518 the character strings.
520 Filter::Simple supports this type of filtering by automatically
521 exporting the C<FILTER_ONLY> subroutine.
523 C<FILTER_ONLY> takes a sequence of specifiers that install separate
524 (and possibly multiple) filters that act on only parts of the source code.
530 code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
531 quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g };
533 The C<"code"> subroutine will only be used to filter parts of the source
534 code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
535 subroutine only filters Perl quotelikes (including here documents).
537 The full list of alternatives is:
543 Filters only those sections of the source code that are not quotelikes, POD, or
546 =item C<"executable">
548 Filters only those sections of the source code that are not POD or C<__DATA__>.
552 Filters only Perl quotelikes (as interpreted by
553 C<&Text::Balanced::extract_quotelike>).
557 Filters only the string literal parts of a Perl quotelike (i.e. the
558 contents of a string literal, either half of a C<tr///>, the second
563 Filters only the pattern literal parts of a Perl quotelike (i.e. the
564 contents of a C<qr//> or an C<m//>, the first half of an C<s///>).
568 Filters everything. Identical in effect to C<FILTER>.
572 Except for C<< FILTER_ONLY code => sub {...} >>, each of
573 the component filters is called repeatedly, once for each component
574 found in the source code.
576 Note that you can also apply two or more of the same type of filter in
577 a single C<FILTER_ONLY>. For example, here's a simple
578 macro-preprocessor that is only applied within regexes,
579 with a final debugging pass that prints the resulting source code:
583 regex => sub { s/!\[/[^/g },
584 regex => sub { s/%d/$RE{num}{int}/g },
585 regex => sub { s/%f/$RE{num}{real}/g },
586 all => sub { print if $::DEBUG };
590 =head2 Filtering only the code parts of source code
592 Most source code ceases to be grammatically correct when it is broken up
593 into the pieces between string literals and regexes. So the C<'code'>
594 component filter behaves slightly differently from the other partial filters
595 described in the previous section.
597 Rather than calling the specified processor on each individual piece of
598 code (i.e. on the bits between quotelikes), the C<'code'> partial filter
599 operates on the entire source code, but with the quotelike bits
602 That is, a C<'code'> filter I<replaces> each quoted string, quotelike,
603 regex, POD, and __DATA__ section with a placeholder. The
604 delimiters of this placeholder are the contents of the C<$;> variable
605 at the time the filter is applied (normally C<"\034">). The remaining
606 four bytes are a unique identifier for the component being replaced.
608 This approach makes it comparatively easy to write code preprocessors
609 without worrying about the form or contents of strings, regexes, etc.
610 For convenience, during a C<'code'> filtering operation, Filter::Simple
611 provides a package variable (C<$Filter::Simple::placeholder>) that contains
612 a pre-compiled regex that matches any placeholder. Placeholders can be
613 moved and re-ordered within the source code as needed.
615 Once the filtering has been applied, the original strings, regexes,
616 POD, etc. are re-inserted into the code, by replacing each
617 placeholder with the corresponding original component.
619 For example, the following filter detects concatentated pairs of
620 strings/quotelikes and reverses the order in which they are
626 FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder;
627 s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
630 Thus, the following code:
634 my $str = "abc" . q(def);
640 my $str = q(def)."abc";
649 =head2 Using Filter::Simple with an explicit C<import> subroutine
651 Filter::Simple generates a special C<import> subroutine for
652 your module (see L<"How it works">) which would normally replace any
653 C<import> subroutine you might have explicitly declared.
655 However, Filter::Simple is smart enough to notice your existing
656 C<import> and Do The Right Thing with it.
657 That is, if you explicitly define an C<import> subroutine in a package
658 that's using Filter::Simple, that C<import> subroutine will still
659 be invoked immediately after any filter you install.
661 The only thing you have to remember is that the C<import> subroutine
662 I<must> be declared I<before> the filter is installed. If you use C<FILTER>
663 to install the filter:
665 package Filter::TurnItUpTo11;
669 FILTER { s/(\w+)/\U$1/ };
671 that will almost never be a problem, but if you install a filtering
672 subroutine by passing it directly to the C<use Filter::Simple>
675 package Filter::TurnItUpTo11;
677 use Filter::Simple sub{ s/(\w+)/\U$1/ };
679 then you must make sure that your C<import> subroutine appears before
680 that C<use> statement.
683 =head2 Using Filter::Simple and Exporter together
685 Likewise, Filter::Simple is also smart enough
686 to Do The Right Thing if you use Exporter:
692 @EXPORT = qw(switch case);
693 @EXPORT_OK = qw(given when);
695 FILTER { $_ = magic_Perl_filter($_) }
697 Immediately after the filter has been applied to the source,
698 Filter::Simple will pass control to Exporter, so it can do its magic too.
700 Of course, here too, Filter::Simple has to know you're using Exporter
701 before it applies the filter. That's almost never a problem, but if you're
702 nervous about it, you can guarantee that things will work correctly by
703 ensuring that your C<use base Exporter> always precedes your
704 C<use Filter::Simple>.
709 The Filter::Simple module exports into the package that calls C<FILTER>
710 (or C<use>s it directly) -- such as package "BANG" in the above example --
711 two automagically constructed
712 subroutines -- C<import> and C<unimport> -- which take care of all the
715 In addition, the generated C<import> subroutine passes its own argument
716 list to the filtering subroutine, so the BANG.pm filter could easily
724 my ($die_msg, $var_name) = @_;
725 s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g;
728 # and in some user code:
730 use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM
733 The specified filtering subroutine is called every time a C<use BANG> is
734 encountered, and passed all the source code following that call, up to
735 either the next C<no BANG;> (or whatever terminator you've set) or the
736 end of the source file, whichever occurs first. By default, any C<no
737 BANG;> call must appear by itself on a separate line, or it is ignored.
742 Damian Conway (damian@conway.org)
746 Copyright (c) 2000-2001, Damian Conway. All Rights Reserved.
747 This module is free software. It may be used, redistributed
748 and/or modified under the same terms as Perl itself.