From: Jarkko Hietaniemi Date: Thu, 15 Nov 2001 00:42:25 +0000 (+0000) Subject: Upgrade to Filter::Simple 0.70. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=dfa18578110766f088afe62030a123af8750b980;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Filter::Simple 0.70. p4raw-id: //depot/perl@13012 --- diff --git a/MANIFEST b/MANIFEST index d6732af..bad7140 100644 --- a/MANIFEST +++ b/MANIFEST @@ -977,6 +977,8 @@ lib/Filter/Simple.pm Simple frontend to Filter::Util::Call lib/Filter/Simple/Changes Filter::Simple lib/Filter/Simple/README Filter::Simple lib/Filter/Simple/t/filter.t See if Filter::Simple works +lib/Filter/Simple/t/data.t See if Filter::Simple works +lib/Filter/Simple/t/filter_only.t See if Filter::Simple works lib/find.pl A find emulator--used by find2perl lib/FindBin.pm Find name of currently executing program lib/FindBin.t See if FindBin works @@ -2044,6 +2046,7 @@ t/lib/dprof/test6_v Perl code profiler tests t/lib/dprof/V.pm Perl code profiler tests t/lib/filter-util.pl See if Filter::Util::Call works t/lib/FilterTest.pm Helper file for lib/Filter/Simple/t/filter.t +t/lib/FilterOnlyTest.pm Helper file for lib/Filter/Simple/t/filter_only.t t/lib/h2ph.h Test header file for h2ph t/lib/h2ph.pht Generated output from h2ph.h by h2ph, for comparison t/lib/locale/latin1 Part of locale.t in Latin 1 diff --git a/lib/Filter/Simple.pm b/lib/Filter/Simple.pm index a92615d..a0b4a5c 100644 --- a/lib/Filter/Simple.pm +++ b/lib/Filter/Simple.pm @@ -1,48 +1,198 @@ package Filter::Simple; -use vars qw{ $VERSION }; +use Text::Balanced ':ALL'; -$VERSION = '0.61'; +use vars qw{ $VERSION @EXPORT }; + +$VERSION = '0.70'; use Filter::Util::Call; use Carp; +@EXPORT = qw( FILTER FILTER_ONLY ); + + sub import { if (@_>1) { shift; goto &FILTER } - else { *{caller()."::FILTER"} = \&FILTER } + else { *{caller()."::$_"} = \&$_ foreach @EXPORT } } sub FILTER (&;$) { my $caller = caller; my ($filter, $terminator) = @_; + no warnings 'redefine'; *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator); *{"${caller}::unimport"} = \*filter_unimport; } +sub fail { + croak "FILTER_ONLY: ", @_; +} + +my $exql = sub { + my @bits = extract_quotelike $_[0], qr//; + return unless $bits[0]; + return \@bits; +}; + +my $ws = qr/\s+/; +my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/; +my $EOP = qr/\n\n|\Z/; +my $CUT = qr/\n=cut.*$EOP/; +my $pod_or_DATA = qr/ + ^=(?:head[1-4]|item) .*? $CUT + | ^=pod .*? $CUT + | ^=for .*? $EOP + | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP + | ^__(DATA|END)__\n.* + /smx; + +my %extractor_for = ( + quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ], + regex => [ $ws, $pod_or_DATA, $id, $exql ], + string => [ $ws, $pod_or_DATA, $id, $exql ], + code => [ $ws, { DONT_MATCH => $pod_or_DATA }, + $id, { DONT_MATCH => \&extract_quotelike } ], + executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ], + all => [ { MATCH => qr/(?s:.*)/ } ], +); + +my %selector_for = ( + all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} }, + executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} }, + quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} }, + regex => sub { my ($t)=@_; + sub{ref() or return $_; + my ($ql,undef,$pre,$op,$ld,$pat) = @$_; + return $_->[0] unless $op =~ /^(qr|m|s)/ + || !$op && ($ld eq '/' || $ld eq '?'); + $_ = $pat; + $t->(@_); + $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/; + return "$pre$ql"; + }; + }, + string => sub { my ($t)=@_; + sub{ref() or return $_; + local *args = \@_; + my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10]; + return $_->[0] if $op =~ /^(qr|m)/ + || !$op && ($ld1 eq '/' || $ld1 eq '?'); + if (!$op || $op eq 'tr' || $op eq 'y') { + local *_ = \$str1; + $t->(@args); + } + if ($op =~ /^(tr|y|s)/) { + local *_ = \$str2; + $t->(@args); + } + my $result = "$pre$op$ld1$str1$rd1"; + $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}> + $result .= "$str2$rd2$flg"; + return $result; + }; + }, +); + + +sub gen_std_filter_for { + my ($type, $transform) = @_; + return sub { my (@pieces, $instr); + for (extract_multiple($_,$extractor_for{$type})) { + if (ref()) { push @pieces, $_; $instr=0 } + elsif ($instr) { $pieces[-1] .= $_ } + else { push @pieces, $_; $instr=1 } + } + if ($type eq 'code') { + my $count = 0; + local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/; + my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/; + $_ = join "", + map { ref $_ ? $;.pack('N',$count++).$; : $_ } + @pieces; + @pieces = grep { ref $_ } @pieces; + $transform->(@_); + s/$extractor/${$pieces[unpack('N',$1)]}/g; + } + else { + $DB::single=1; + my $selector = $selector_for{$type}->($transform); + $_ = join "", map $selector->(@_), @pieces; + } + } +}; + +sub FILTER_ONLY { + $DB::single = 1; + my $caller = caller; + while (@_ > 1) { + my ($what, $how) = splice(@_, 0, 2); + fail "Unknown selector: $what" + unless exists $extractor_for{$what}; + fail "Filter for $what is not a subroutine reference" + unless ref $how eq 'CODE'; + push @transforms, gen_std_filter_for($what,$how); + } + my $terminator = shift; + + my $multitransform = sub { + foreach my $transform ( @transforms ) { + $transform->(@_); + } + }; + no warnings 'redefine'; + *{"${caller}::import"} = + gen_filter_import($caller,$multitransform,$terminator); + *{"${caller}::unimport"} = \*filter_unimport; +} + +my $ows = qr/(?:[ \t]+|#[^\n]*)*/; + sub gen_filter_import { my ($class, $filter, $terminator) = @_; return sub { my ($imported_class, @args) = @_; - $terminator = qr/^\s*no\s+$imported_class\s*;\s*$/ - unless defined $terminator; + my $def_terminator = + qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)$/; + if (!defined $terminator) { + $terminator->{terminator} = $def_terminator; + } + elsif (!ref $terminator) { + $terminator->{terminator} = $terminator; + } + elsif (ref $terminator ne 'HASH') { + croak "Terminator must be specified as scalar or hash ref" + } + elsif (!exists $terminator->{terminator}) { + $terminator->{terminator} = $def_terminator; + } filter_add( sub { - my ($status, $off); + my ($status, $lastline); my $count = 0; my $data = ""; while ($status = filter_read()) { return $status if $status < 0; - if ($terminator && m/$terminator/) { - $off=1; + if ($terminator->{terminator} && + m/$terminator->{terminator}/) { + $lastline = $_; last; } $data .= $_; $count++; $_ = ""; } + $DB::single=1; $_ = $data; - $filter->(@args) unless $status < 0; - $_ .= "no $imported_class;\n" if $off; + $filter->($imported_class, @args) unless $status < 0; + if (defined $lastline) { + if (defined $terminator->{becomes}) { + $_ .= $terminator->{becomes}; + } + elsif ($lastline =~ $def_terminator) { + $_ .= $lastline; + } + } return $count; } ); @@ -231,15 +381,28 @@ In other words, the previous example, would become: =head2 Disabling or changing behaviour -By default, the installed filter only filters to a line of the form: +By default, the installed filter only filters up to a line consisting of one of +the three standard source "terminators": + + no ModuleName; # optional comment - no ModuleName; +or: -but this can be altered by passing a second argument to C. + __END__ + +or: + + __DATA__ + +but this can be altered by passing a second argument to C +or C (just remember: there's I comma after the initial block when +you use C). That second argument may be either a C'd regular expression (which is then used to match the terminator line), or a defined false value (which indicates -that no terminator line should be looked for). +that no terminator line should be looked for), or a reference to a hash +(in which case the terminator is the value associated with the key +C<'terminator'>. For example, to cause the previous filter to filter only up to a line of the form: @@ -254,7 +417,14 @@ you would write: FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g; } - => qr/^\s*GNAB\s+esu\s*;\s*?$/; + qr/^\s*GNAB\s+esu\s*;\s*?$/; + +or: + + FILTER { + s/BANG\s+BANG/die 'BANG' if \$BANG/g; + } + { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ }; and to prevent the filter's being turned off in any way: @@ -264,8 +434,17 @@ and to prevent the filter's being turned off in any way: FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g; } - => ""; - # or: => 0; + ""; # or: 0 + +or: + + FILTER { + s/BANG\s+BANG/die 'BANG' if \$BANG/g; + } + { terminator => "" }; + +B be contained on a single source line.> =head2 All-in-one interface @@ -301,6 +480,159 @@ This is exactly the same as: except that the C subroutine is not exported by Filter::Simple. + +=head2 Filtering only specific components of source code + +One of the problems with a filter like: + + use Filter::Simple; + + FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g }; + +is that it indiscriminately applies the specified transformation to +the entire text of your source program. So something like: + + warn 'BANG BANG, YOU'RE DEAD'; + BANG BANG; + +will become: + + warn 'die 'BANG' if $BANG, YOU'RE DEAD'; + die 'BANG' if $BANG; + +It is very common when filtering source to only want to apply the filter +to the non-character-string parts of the code, or alternatively to I +the character strings. + +Filter::Simple supports this type of filtering by automatically +exporting the C subroutine. + +C takes a sequence of specifiers that install separate +(and possibly multiple) filters that act on only parts of the source code. +For example: + + use Filter::Simple; + + FILTER_ONLY + code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g }, + quotelike => sub { s/BANG\s+BANG/CHITTY CHITYY/g }; + +The C<"code"> subroutine will only be used to filter parts of the source +code that are not quotelikes, POD, or C<__DATA__>. The C +subroutine only filters Perl quotelikes (including here documents). + +The full list of alternatives is: + +=over + +=item C<"code"> + +Filters only those sections of the source code that are not quotelikes, POD, or +C<__DATA__>. + +=item C<"executable"> + +Filters only those sections of the source code that are not POD or C<__DATA__>. + +=item C<"quotelike"> + +Filters only Perl quotelikes (as interpreted by +C<&Text::Balanced::extract_quotelike>). + +=item C<"string"> + +Filters only the string literal parts of a Perl quotelike (i.e. the +contents of a string literal, either half of a C, the second +half of an C). + +=item C<"regex"> + +Filters only the pattern literal parts of a Perl quotelike (i.e. the +contents of a C or an C, the first half of an C). + +=item C<"all"> + +Filters everything. Identical in effect to C. + +=back + +Except for C<< FILTER_ONLY code => sub {...} >>, each of +the component filters is called repeatedly, once for each component +found in the source code. + +Note that you can also apply two or more of the same type of filter in +a single C. For example, here's a simple +macro-preprocessor that is only applied within regexes, +with a final debugging pass that printd the resulting source code: + + use Regexp::Common; + FILTER_ONLY + regex => sub { s/!\[/[^/g }, + regex => sub { s/%d/$RE{num}{int}/g }, + regex => sub { s/%f/$RE{num}{real}/g }, + all => sub { print if $::DEBUG }; + + + +=head2 Filtering only the code parts of source code + +Most source code ceases to be grammatically correct when it is broken up +into the pieces between string literals and regexes. So the C<'code'> +component filter behaves slightly differently from the other partial filters +described in the previous section. + +Rather than calling the specified processor on each individual piece of +code (i.e. on the bits between quotelikes), the C<'code'> partial filter +operates on the entire source code, but with the quotelike bits +"blanked out". + +That is, a C<'code'> filter I each quoted string, quotelike, +regex, POD, and __DATA__ section with a placeholder. The +delimiters of this placeholder are the contents of the C<$;> variable +at the time the filter is applied (normally C<"\034">). The remaining +four bytes are a unique identifier for the component being replaced. + +This approach makes it comparatively easy to write code preprocessors +without worrying about the form or contents of strings, regexes, etc. +For convenience, during a C<'code'> filtering operation, Filter::Simple +provides a package variable (C<$Filter::Simple::placeholder>) that contains +a pre-compiled regex that matches any placeholder. Placeholders can be +moved and re-ordered within the source code as needed. + +Once the filtering has been applied, the original strings, regexes, +POD, etc. are re-inserted into the code, by replacing each +placeholder with the corresponding original component. + +For example, the following filter detects concatentated pairs of +strings/quotelikes and reverses the order in which they are +concatenated: + + package DemoRevCat; + use Filter::Simple; + + FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder; + s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx + }; + +Thus, the following code: + + use DemoRevCat; + + my $str = "abc" . q(def); + + print "$str\n"; + +would become: + + my $str = q(def)."abc"; + + print "$str\n"; + +and hence print: + + defabc + + =head2 Using Filter::Simple and Exporter together You can't directly use Exporter when Filter::Simple. @@ -308,8 +640,9 @@ You can't directly use Exporter when Filter::Simple. Filter::Simple generates an C subroutine for your module (which hides the one inherited from Exporter). -The C code you specify will, however, receive the C's argument -list, so you can use that filter block as your C subroutine. +The C code you specify will, however, receive the C's +complete argument list (including the package name in $_[0]), +so you can use that filter block as your C subroutine. You'll need to call C from your C code to make it work correctly. @@ -327,7 +660,7 @@ For example: FILTER { # Your filtering code here - __PACKAGE__->export_to_level(2,undef,@_); + __PACKAGE__->export_to_level(2,@_); } diff --git a/lib/Filter/Simple/Changes b/lib/Filter/Simple/Changes index e15c37b..9113bdc 100644 --- a/lib/Filter/Simple/Changes +++ b/lib/Filter/Simple/Changes @@ -30,3 +30,17 @@ Revision history for Perl extension Filter::Simple - Added documentation for using F::S and Exporter together +0.70 Wed Nov 14 23:36:18 2001 + + - Added FILTER_ONLY for fine-grained filtering of code, + strings, or regexes + + - Fixed document snafu regarding optional terminators + + - Fixed bug so that FILTER now receives *all* import args + (i.e. including the class name in $_[0]) + + - Allowed default terminator to allow comments embedded in it + (thanks, Christian) and to handle __DATA__ and __END__ + + - Fixed handling of __DATA__ and *DATA diff --git a/lib/Filter/Simple/README b/lib/Filter/Simple/README index 03e4599..5f22642 100644 --- a/lib/Filter/Simple/README +++ b/lib/Filter/Simple/README @@ -1,5 +1,5 @@ ============================================================================== - Release of version 0.61 of Filter::Simple + Release of version 0.70 of Filter::Simple ============================================================================== @@ -46,9 +46,22 @@ COPYRIGHT ============================================================================== -CHANGES IN VERSION 0.61 +CHANGES IN VERSION 0.70 + + + - Added FILTER_ONLY for fine-grained filtering of code, + strings, or regexes + + - Fixed document snafu regarding optional terminators + + - Fixed bug so that FILTER now receives *all* import args + (i.e. including the class name in $_[0]) + + - Allowed default terminator to allow comments embedded in it + (thanks, Christian) and to handle __DATA__ and __END__ + + - Fixed handling of __DATA__ and *DATA -(No changes have been documented for this version) ============================================================================== diff --git a/lib/Filter/Simple/t/data.t b/lib/Filter/Simple/t/data.t new file mode 100644 index 0000000..4618e36 --- /dev/null +++ b/lib/Filter/Simple/t/data.t @@ -0,0 +1,12 @@ +use FilterOnlyTest qr/ok/ => "not ok", "bad" => "ok"; +print "1..6\n"; + +print "bad 1\n"; +print "bad 2\n"; +print "bad 3\n"; +print ; + +__DATA__ +ok 4 +ok 5 +ok 6 diff --git a/lib/Filter/Simple/t/filter.t b/lib/Filter/Simple/t/filter.t index 5f6e382..d16c7fa 100644 --- a/lib/Filter/Simple/t/filter.t +++ b/lib/Filter/Simple/t/filter.t @@ -1,9 +1,5 @@ -BEGIN { - chdir('t') if -d 't'; - @INC = 'lib'; -} - use FilterTest qr/not ok/ => "ok", fail => "ok"; + print "1..6\n"; sub fail { print "fail ", $_[0], "\n" } diff --git a/lib/Filter/Simple/t/filter_only.t b/lib/Filter/Simple/t/filter_only.t new file mode 100644 index 0000000..45745b0 --- /dev/null +++ b/lib/Filter/Simple/t/filter_only.t @@ -0,0 +1,24 @@ +use FilterOnlyTest qr/not ok/ => "ok", "bad" => "ok", fail => "die"; +print "1..9\n"; + +sub fail { print "ok ", $_[0], "\n" } +sub ok { print "ok ", $_[0], "\n" } + +print "not ok 1\n"; +print "bad 2\n"; + +fail(3); +&fail(4); + +print "not " unless "whatnot okapi" eq "whatokapi"; +print "ok 5\n"; + +ok 7 unless not ok 6; + +no FilterOnlyTest; # THE FUN STOPS HERE + +print "not " unless "not ok" =~ /^not /; +print "ok 8\n"; + +print "not " unless "bad" =~ /bad/; +print "ok 9\n"; diff --git a/t/lib/FilterOnlyTest.pm b/t/lib/FilterOnlyTest.pm new file mode 100644 index 0000000..856e79d --- /dev/null +++ b/t/lib/FilterOnlyTest.pm @@ -0,0 +1,11 @@ +package FilterOnlyTest; + +use Filter::Simple; + +FILTER_ONLY + string => sub { + my $class = shift; + while (my($pat, $str) = splice @_, 0, 2) { + s/$pat/$str/g; + } + }; diff --git a/t/lib/FilterTest.pm b/t/lib/FilterTest.pm index 4e99772..c49e280 100644 --- a/t/lib/FilterTest.pm +++ b/t/lib/FilterTest.pm @@ -1,14 +1,12 @@ package FilterTest; -BEGIN { - chdir('t') if -d 't'; - @INC = '../lib'; -} +use Filter::Simple; -use Filter::Simple sub { - while (my ($from, $to) = splice @_, 0, 2) { - s/$from/$to/g; - } +FILTER { + my $class = shift; + while (my($pat, $str) = splice @_, 0, 2) { + s/$pat/$str/g; + } }; 1;