package Filter::Simple;
-use vars qw{ $VERSION };
+use Text::Balanced ':ALL';
-$VERSION = '0.61';
+use vars qw{ $VERSION @EXPORT };
+
+$VERSION = '0.70';
use Filter::Util::Call;
use Carp;
+@EXPORT = qw( FILTER FILTER_ONLY );
+
+
sub import {
if (@_>1) { shift; goto &FILTER }
- else { *{caller()."::FILTER"} = \&FILTER }
+ else { *{caller()."::$_"} = \&$_ foreach @EXPORT }
}
sub FILTER (&;$) {
my $caller = caller;
my ($filter, $terminator) = @_;
+ no warnings 'redefine';
*{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
*{"${caller}::unimport"} = \*filter_unimport;
}
+sub fail {
+ croak "FILTER_ONLY: ", @_;
+}
+
+my $exql = sub {
+ my @bits = extract_quotelike $_[0], qr//;
+ return unless $bits[0];
+ return \@bits;
+};
+
+my $ws = qr/\s+/;
+my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
+my $EOP = qr/\n\n|\Z/;
+my $CUT = qr/\n=cut.*$EOP/;
+my $pod_or_DATA = qr/
+ ^=(?:head[1-4]|item) .*? $CUT
+ | ^=pod .*? $CUT
+ | ^=for .*? $EOP
+ | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP
+ | ^__(DATA|END)__\n.*
+ /smx;
+
+my %extractor_for = (
+ quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ],
+ regex => [ $ws, $pod_or_DATA, $id, $exql ],
+ string => [ $ws, $pod_or_DATA, $id, $exql ],
+ code => [ $ws, { DONT_MATCH => $pod_or_DATA },
+ $id, { DONT_MATCH => \&extract_quotelike } ],
+ executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ],
+ all => [ { MATCH => qr/(?s:.*)/ } ],
+);
+
+my %selector_for = (
+ all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
+ executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
+ quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
+ regex => sub { my ($t)=@_;
+ sub{ref() or return $_;
+ my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
+ return $_->[0] unless $op =~ /^(qr|m|s)/
+ || !$op && ($ld eq '/' || $ld eq '?');
+ $_ = $pat;
+ $t->(@_);
+ $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
+ return "$pre$ql";
+ };
+ },
+ string => sub { my ($t)=@_;
+ sub{ref() or return $_;
+ local *args = \@_;
+ my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
+ return $_->[0] if $op =~ /^(qr|m)/
+ || !$op && ($ld1 eq '/' || $ld1 eq '?');
+ if (!$op || $op eq 'tr' || $op eq 'y') {
+ local *_ = \$str1;
+ $t->(@args);
+ }
+ if ($op =~ /^(tr|y|s)/) {
+ local *_ = \$str2;
+ $t->(@args);
+ }
+ my $result = "$pre$op$ld1$str1$rd1";
+ $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
+ $result .= "$str2$rd2$flg";
+ return $result;
+ };
+ },
+);
+
+
+sub gen_std_filter_for {
+ my ($type, $transform) = @_;
+ return sub { my (@pieces, $instr);
+ for (extract_multiple($_,$extractor_for{$type})) {
+ if (ref()) { push @pieces, $_; $instr=0 }
+ elsif ($instr) { $pieces[-1] .= $_ }
+ else { push @pieces, $_; $instr=1 }
+ }
+ if ($type eq 'code') {
+ my $count = 0;
+ local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/;
+ my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/;
+ $_ = join "",
+ map { ref $_ ? $;.pack('N',$count++).$; : $_ }
+ @pieces;
+ @pieces = grep { ref $_ } @pieces;
+ $transform->(@_);
+ s/$extractor/${$pieces[unpack('N',$1)]}/g;
+ }
+ else {
+ $DB::single=1;
+ my $selector = $selector_for{$type}->($transform);
+ $_ = join "", map $selector->(@_), @pieces;
+ }
+ }
+};
+
+sub FILTER_ONLY {
+ $DB::single = 1;
+ my $caller = caller;
+ while (@_ > 1) {
+ my ($what, $how) = splice(@_, 0, 2);
+ fail "Unknown selector: $what"
+ unless exists $extractor_for{$what};
+ fail "Filter for $what is not a subroutine reference"
+ unless ref $how eq 'CODE';
+ push @transforms, gen_std_filter_for($what,$how);
+ }
+ my $terminator = shift;
+
+ my $multitransform = sub {
+ foreach my $transform ( @transforms ) {
+ $transform->(@_);
+ }
+ };
+ no warnings 'redefine';
+ *{"${caller}::import"} =
+ gen_filter_import($caller,$multitransform,$terminator);
+ *{"${caller}::unimport"} = \*filter_unimport;
+}
+
+my $ows = qr/(?:[ \t]+|#[^\n]*)*/;
+
sub gen_filter_import {
my ($class, $filter, $terminator) = @_;
return sub {
my ($imported_class, @args) = @_;
- $terminator = qr/^\s*no\s+$imported_class\s*;\s*$/
- unless defined $terminator;
+ my $def_terminator =
+ qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)$/;
+ if (!defined $terminator) {
+ $terminator->{terminator} = $def_terminator;
+ }
+ elsif (!ref $terminator) {
+ $terminator->{terminator} = $terminator;
+ }
+ elsif (ref $terminator ne 'HASH') {
+ croak "Terminator must be specified as scalar or hash ref"
+ }
+ elsif (!exists $terminator->{terminator}) {
+ $terminator->{terminator} = $def_terminator;
+ }
filter_add(
sub {
- my ($status, $off);
+ my ($status, $lastline);
my $count = 0;
my $data = "";
while ($status = filter_read()) {
return $status if $status < 0;
- if ($terminator && m/$terminator/) {
- $off=1;
+ if ($terminator->{terminator} &&
+ m/$terminator->{terminator}/) {
+ $lastline = $_;
last;
}
$data .= $_;
$count++;
$_ = "";
}
+ $DB::single=1;
$_ = $data;
- $filter->(@args) unless $status < 0;
- $_ .= "no $imported_class;\n" if $off;
+ $filter->($imported_class, @args) unless $status < 0;
+ if (defined $lastline) {
+ if (defined $terminator->{becomes}) {
+ $_ .= $terminator->{becomes};
+ }
+ elsif ($lastline =~ $def_terminator) {
+ $_ .= $lastline;
+ }
+ }
return $count;
}
);
=head2 Disabling or changing <no> behaviour
-By default, the installed filter only filters to a line of the form:
+By default, the installed filter only filters up to a line consisting of one of
+the three standard source "terminators":
+
+ no ModuleName; # optional comment
- no ModuleName;
+or:
-but this can be altered by passing a second argument to C<use Filter::Simple>.
+ __END__
+
+or:
+
+ __DATA__
+
+but this can be altered by passing a second argument to C<use Filter::Simple>
+or C<FILTER> (just remember: there's I<no> comma after the initial block when
+you use C<FILTER>).
That second argument may be either a C<qr>'d regular expression (which is then
used to match the terminator line), or a defined false value (which indicates
-that no terminator line should be looked for).
+that no terminator line should be looked for), or a reference to a hash
+(in which case the terminator is the value associated with the key
+C<'terminator'>.
For example, to cause the previous filter to filter only up to a line of the
form:
FILTER {
s/BANG\s+BANG/die 'BANG' if \$BANG/g;
}
- => qr/^\s*GNAB\s+esu\s*;\s*?$/;
+ qr/^\s*GNAB\s+esu\s*;\s*?$/;
+
+or:
+
+ FILTER {
+ s/BANG\s+BANG/die 'BANG' if \$BANG/g;
+ }
+ { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
and to prevent the filter's being turned off in any way:
FILTER {
s/BANG\s+BANG/die 'BANG' if \$BANG/g;
}
- => "";
- # or: => 0;
+ ""; # or: 0
+
+or:
+
+ FILTER {
+ s/BANG\s+BANG/die 'BANG' if \$BANG/g;
+ }
+ { terminator => "" };
+
+B<Note that, no matter what you set the terminator pattern too,
+the actual terminator itself I<must> be contained on a single source line.>
=head2 All-in-one interface
except that the C<FILTER> subroutine is not exported by Filter::Simple.
+
+=head2 Filtering only specific components of source code
+
+One of the problems with a filter like:
+
+ use Filter::Simple;
+
+ FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
+
+is that it indiscriminately applies the specified transformation to
+the entire text of your source program. So something like:
+
+ warn 'BANG BANG, YOU'RE DEAD';
+ BANG BANG;
+
+will become:
+
+ warn 'die 'BANG' if $BANG, YOU'RE DEAD';
+ die 'BANG' if $BANG;
+
+It is very common when filtering source to only want to apply the filter
+to the non-character-string parts of the code, or alternatively to I<only>
+the character strings.
+
+Filter::Simple supports this type of filtering by automatically
+exporting the C<FILTER_ONLY> subroutine.
+
+C<FILTER_ONLY> takes a sequence of specifiers that install separate
+(and possibly multiple) filters that act on only parts of the source code.
+For example:
+
+ use Filter::Simple;
+
+ FILTER_ONLY
+ code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
+ quotelike => sub { s/BANG\s+BANG/CHITTY CHITYY/g };
+
+The C<"code"> subroutine will only be used to filter parts of the source
+code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
+subroutine only filters Perl quotelikes (including here documents).
+
+The full list of alternatives is:
+
+=over
+
+=item C<"code">
+
+Filters only those sections of the source code that are not quotelikes, POD, or
+C<__DATA__>.
+
+=item C<"executable">
+
+Filters only those sections of the source code that are not POD or C<__DATA__>.
+
+=item C<"quotelike">
+
+Filters only Perl quotelikes (as interpreted by
+C<&Text::Balanced::extract_quotelike>).
+
+=item C<"string">
+
+Filters only the string literal parts of a Perl quotelike (i.e. the
+contents of a string literal, either half of a C<tr///>, the second
+half of an C<s///>).
+
+=item C<"regex">
+
+Filters only the pattern literal parts of a Perl quotelike (i.e. the
+contents of a C<qr//> or an C<m//>, the first half of an C<s///>).
+
+=item C<"all">
+
+Filters everything. Identical in effect to C<FILTER>.
+
+=back
+
+Except for C<< FILTER_ONLY code => sub {...} >>, each of
+the component filters is called repeatedly, once for each component
+found in the source code.
+
+Note that you can also apply two or more of the same type of filter in
+a single C<FILTER_ONLY>. For example, here's a simple
+macro-preprocessor that is only applied within regexes,
+with a final debugging pass that printd the resulting source code:
+
+ use Regexp::Common;
+ FILTER_ONLY
+ regex => sub { s/!\[/[^/g },
+ regex => sub { s/%d/$RE{num}{int}/g },
+ regex => sub { s/%f/$RE{num}{real}/g },
+ all => sub { print if $::DEBUG };
+
+
+
+=head2 Filtering only the code parts of source code
+
+Most source code ceases to be grammatically correct when it is broken up
+into the pieces between string literals and regexes. So the C<'code'>
+component filter behaves slightly differently from the other partial filters
+described in the previous section.
+
+Rather than calling the specified processor on each individual piece of
+code (i.e. on the bits between quotelikes), the C<'code'> partial filter
+operates on the entire source code, but with the quotelike bits
+"blanked out".
+
+That is, a C<'code'> filter I<replaces> each quoted string, quotelike,
+regex, POD, and __DATA__ section with a placeholder. The
+delimiters of this placeholder are the contents of the C<$;> variable
+at the time the filter is applied (normally C<"\034">). The remaining
+four bytes are a unique identifier for the component being replaced.
+
+This approach makes it comparatively easy to write code preprocessors
+without worrying about the form or contents of strings, regexes, etc.
+For convenience, during a C<'code'> filtering operation, Filter::Simple
+provides a package variable (C<$Filter::Simple::placeholder>) that contains
+a pre-compiled regex that matches any placeholder. Placeholders can be
+moved and re-ordered within the source code as needed.
+
+Once the filtering has been applied, the original strings, regexes,
+POD, etc. are re-inserted into the code, by replacing each
+placeholder with the corresponding original component.
+
+For example, the following filter detects concatentated pairs of
+strings/quotelikes and reverses the order in which they are
+concatenated:
+
+ package DemoRevCat;
+ use Filter::Simple;
+
+ FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder;
+ s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
+ };
+
+Thus, the following code:
+
+ use DemoRevCat;
+
+ my $str = "abc" . q(def);
+
+ print "$str\n";
+
+would become:
+
+ my $str = q(def)."abc";
+
+ print "$str\n";
+
+and hence print:
+
+ defabc
+
+
=head2 Using Filter::Simple and Exporter together
You can't directly use Exporter when Filter::Simple.
Filter::Simple generates an C<import> subroutine for your module
(which hides the one inherited from Exporter).
-The C<FILTER> code you specify will, however, receive the C<import>'s argument
-list, so you can use that filter block as your C<import> subroutine.
+The C<FILTER> code you specify will, however, receive the C<import>'s
+complete argument list (including the package name in $_[0]),
+so you can use that filter block as your C<import> subroutine.
You'll need to call C<Exporter::export_to_level> from your C<FILTER> code
to make it work correctly.
FILTER {
# Your filtering code here
- __PACKAGE__->export_to_level(2,undef,@_);
+ __PACKAGE__->export_to_level(2,@_);
}