Missing p4 add.
[p5sagit/p5-mst-13.2.git] / lib / Filter / Simple.pm
CommitLineData
b38acab9 1package Filter::Simple;
2
dfa18578 3use Text::Balanced ':ALL';
b38acab9 4
dfa18578 5use vars qw{ $VERSION @EXPORT };
6
7$VERSION = '0.70';
b38acab9 8
9use Filter::Util::Call;
10use Carp;
11
dfa18578 12@EXPORT = qw( FILTER FILTER_ONLY );
13
14
b38acab9 15sub import {
fbe2c49e 16 if (@_>1) { shift; goto &FILTER }
dfa18578 17 else { *{caller()."::$_"} = \&$_ foreach @EXPORT }
fbe2c49e 18}
19
20sub FILTER (&;$) {
b38acab9 21 my $caller = caller;
fbe2c49e 22 my ($filter, $terminator) = @_;
dfa18578 23 no warnings 'redefine';
fbe2c49e 24 *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
b38acab9 25 *{"${caller}::unimport"} = \*filter_unimport;
26}
27
dfa18578 28sub fail {
29 croak "FILTER_ONLY: ", @_;
30}
31
32my $exql = sub {
33 my @bits = extract_quotelike $_[0], qr//;
34 return unless $bits[0];
35 return \@bits;
36};
37
38my $ws = qr/\s+/;
39my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
40my $EOP = qr/\n\n|\Z/;
41my $CUT = qr/\n=cut.*$EOP/;
42my $pod_or_DATA = qr/
43 ^=(?:head[1-4]|item) .*? $CUT
44 | ^=pod .*? $CUT
45 | ^=for .*? $EOP
46 | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP
47 | ^__(DATA|END)__\n.*
48 /smx;
49
50my %extractor_for = (
51 quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ],
52 regex => [ $ws, $pod_or_DATA, $id, $exql ],
53 string => [ $ws, $pod_or_DATA, $id, $exql ],
54 code => [ $ws, { DONT_MATCH => $pod_or_DATA },
55 $id, { DONT_MATCH => \&extract_quotelike } ],
56 executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ],
57 all => [ { MATCH => qr/(?s:.*)/ } ],
58);
59
60my %selector_for = (
61 all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
62 executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
63 quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
64 regex => sub { my ($t)=@_;
65 sub{ref() or return $_;
66 my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
67 return $_->[0] unless $op =~ /^(qr|m|s)/
68 || !$op && ($ld eq '/' || $ld eq '?');
69 $_ = $pat;
70 $t->(@_);
71 $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
72 return "$pre$ql";
73 };
74 },
75 string => sub { my ($t)=@_;
76 sub{ref() or return $_;
77 local *args = \@_;
78 my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
79 return $_->[0] if $op =~ /^(qr|m)/
80 || !$op && ($ld1 eq '/' || $ld1 eq '?');
81 if (!$op || $op eq 'tr' || $op eq 'y') {
82 local *_ = \$str1;
83 $t->(@args);
84 }
85 if ($op =~ /^(tr|y|s)/) {
86 local *_ = \$str2;
87 $t->(@args);
88 }
89 my $result = "$pre$op$ld1$str1$rd1";
90 $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
91 $result .= "$str2$rd2$flg";
92 return $result;
93 };
94 },
95);
96
97
98sub gen_std_filter_for {
99 my ($type, $transform) = @_;
100 return sub { my (@pieces, $instr);
101 for (extract_multiple($_,$extractor_for{$type})) {
102 if (ref()) { push @pieces, $_; $instr=0 }
103 elsif ($instr) { $pieces[-1] .= $_ }
104 else { push @pieces, $_; $instr=1 }
105 }
106 if ($type eq 'code') {
107 my $count = 0;
108 local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/;
109 my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/;
110 $_ = join "",
111 map { ref $_ ? $;.pack('N',$count++).$; : $_ }
112 @pieces;
113 @pieces = grep { ref $_ } @pieces;
114 $transform->(@_);
115 s/$extractor/${$pieces[unpack('N',$1)]}/g;
116 }
117 else {
118 $DB::single=1;
119 my $selector = $selector_for{$type}->($transform);
120 $_ = join "", map $selector->(@_), @pieces;
121 }
122 }
123};
124
125sub FILTER_ONLY {
126 $DB::single = 1;
127 my $caller = caller;
128 while (@_ > 1) {
129 my ($what, $how) = splice(@_, 0, 2);
130 fail "Unknown selector: $what"
131 unless exists $extractor_for{$what};
132 fail "Filter for $what is not a subroutine reference"
133 unless ref $how eq 'CODE';
134 push @transforms, gen_std_filter_for($what,$how);
135 }
136 my $terminator = shift;
137
138 my $multitransform = sub {
139 foreach my $transform ( @transforms ) {
140 $transform->(@_);
141 }
142 };
143 no warnings 'redefine';
144 *{"${caller}::import"} =
145 gen_filter_import($caller,$multitransform,$terminator);
146 *{"${caller}::unimport"} = \*filter_unimport;
147}
148
149my $ows = qr/(?:[ \t]+|#[^\n]*)*/;
150
b38acab9 151sub gen_filter_import {
fbe2c49e 152 my ($class, $filter, $terminator) = @_;
b38acab9 153 return sub {
154 my ($imported_class, @args) = @_;
dfa18578 155 my $def_terminator =
156 qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)$/;
157 if (!defined $terminator) {
158 $terminator->{terminator} = $def_terminator;
159 }
160 elsif (!ref $terminator) {
161 $terminator->{terminator} = $terminator;
162 }
163 elsif (ref $terminator ne 'HASH') {
164 croak "Terminator must be specified as scalar or hash ref"
165 }
166 elsif (!exists $terminator->{terminator}) {
167 $terminator->{terminator} = $def_terminator;
168 }
b38acab9 169 filter_add(
170 sub {
dfa18578 171 my ($status, $lastline);
fbe2c49e 172 my $count = 0;
b38acab9 173 my $data = "";
174 while ($status = filter_read()) {
fbe2c49e 175 return $status if $status < 0;
dfa18578 176 if ($terminator->{terminator} &&
177 m/$terminator->{terminator}/) {
178 $lastline = $_;
b38acab9 179 last;
180 }
181 $data .= $_;
fbe2c49e 182 $count++;
b38acab9 183 $_ = "";
184 }
dfa18578 185 $DB::single=1;
b38acab9 186 $_ = $data;
dfa18578 187 $filter->($imported_class, @args) unless $status < 0;
188 if (defined $lastline) {
189 if (defined $terminator->{becomes}) {
190 $_ .= $terminator->{becomes};
191 }
192 elsif ($lastline =~ $def_terminator) {
193 $_ .= $lastline;
194 }
195 }
fbe2c49e 196 return $count;
b38acab9 197 }
198 );
199 }
200}
201
202sub filter_unimport {
203 filter_del();
204}
205
2061;
207
208__END__
209
210=head1 NAME
211
212Filter::Simple - Simplified source filtering
213
fbe2c49e 214
b38acab9 215=head1 SYNOPSIS
216
217 # in MyFilter.pm:
218
219 package MyFilter;
220
fbe2c49e 221 use Filter::Simple;
222
223 FILTER { ... };
b38acab9 224
fbe2c49e 225 # or just:
226 #
227 # use Filter::Simple sub { ... };
b38acab9 228
229 # in user's code:
230
231 use MyFilter;
232
233 # this code is filtered
234
235 no MyFilter;
236
237 # this code is not
238
239
240=head1 DESCRIPTION
241
242=head2 The Problem
243
244Source filtering is an immensely powerful feature of recent versions of Perl.
245It allows one to extend the language itself (e.g. the Switch module), to
246simplify the language (e.g. Language::Pythonesque), or to completely recast the
247language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use
248the full power of Perl as its own, recursively applied, macro language.
249
250The excellent Filter::Util::Call module (by Paul Marquess) provides a
251usable Perl interface to source filtering, but it is often too powerful
252and not nearly as simple as it could be.
253
254To use the module it is necessary to do the following:
255
256=over 4
257
258=item 1.
259
260Download, build, and install the Filter::Util::Call module.
55a1c97c 261(If you have Perl 5.7.1 or later, this is already done for you.)
b38acab9 262
263=item 2.
264
265Set up a module that does a C<use Filter::Util::Call>.
266
267=item 3.
268
269Within that module, create an C<import> subroutine.
270
271=item 4.
272
273Within the C<import> subroutine do a call to C<filter_add>, passing
274it either a subroutine reference.
275
276=item 5.
277
278Within the subroutine reference, call C<filter_read> or C<filter_read_exact>
279to "prime" $_ with source code data from the source file that will
280C<use> your module. Check the status value returned to see if any
281source code was actually read in.
282
283=item 6.
284
285Process the contents of $_ to change the source code in the desired manner.
286
287=item 7.
288
289Return the status value.
290
291=item 8.
292
293If the act of unimporting your module (via a C<no>) should cause source
294code filtering to cease, create an C<unimport> subroutine, and have it call
295C<filter_del>. Make sure that the call to C<filter_read> or
296C<filter_read_exact> in step 5 will not accidentally read past the
297C<no>. Effectively this limits source code filters to line-by-line
298operation, unless the C<import> subroutine does some fancy
299pre-pre-parsing of the source code it's filtering.
300
301=back
302
303For example, here is a minimal source code filter in a module named
304BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG>
305to the sequence C<die 'BANG' if $BANG> in any piece of code following a
306C<use BANG;> statement (until the next C<no BANG;> statement, if any):
307
308 package BANG;
fbe2c49e 309
b38acab9 310 use Filter::Util::Call ;
311
312 sub import {
313 filter_add( sub {
314 my $caller = caller;
315 my ($status, $no_seen, $data);
316 while ($status = filter_read()) {
fbe2c49e 317 if (/^\s*no\s+$caller\s*;\s*?$/) {
b38acab9 318 $no_seen=1;
319 last;
320 }
321 $data .= $_;
322 $_ = "";
323 }
324 $_ = $data;
325 s/BANG\s+BANG/die 'BANG' if \$BANG/g
326 unless $status < 0;
327 $_ .= "no $class;\n" if $no_seen;
328 return 1;
329 })
330 }
331
332 sub unimport {
333 filter_del();
334 }
335
336 1 ;
337
7bf0340c 338This level of sophistication puts filtering out of the reach of
339many programmers.
b38acab9 340
341
342=head2 A Solution
343
7bf0340c 344The Filter::Simple module provides a simplified interface to
b38acab9 345Filter::Util::Call; one that is sufficient for most common cases.
346
347Instead of the above process, with Filter::Simple the task of setting up
348a source code filter is reduced to:
349
350=over 4
351
352=item 1.
353
55a1c97c 354Download and install the Filter::Simple module.
355(If you have Perl 5.7.1 or later, this is already done for you.)
356
357=item 2.
358
fbe2c49e 359Set up a module that does a C<use Filter::Simple> and then
360calls C<FILTER { ... }>.
b38acab9 361
55a1c97c 362=item 3.
b38acab9 363
fbe2c49e 364Within the anonymous subroutine or block that is passed to
365C<FILTER>, process the contents of $_ to change the source code in
366the desired manner.
b38acab9 367
368=back
369
370In other words, the previous example, would become:
371
372 package BANG;
fbe2c49e 373 use Filter::Simple;
374
375 FILTER {
b38acab9 376 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
377 };
378
379 1 ;
380
381
fbe2c49e 382=head2 Disabling or changing <no> behaviour
383
dfa18578 384By default, the installed filter only filters up to a line consisting of one of
385the three standard source "terminators":
386
387 no ModuleName; # optional comment
fbe2c49e 388
dfa18578 389or:
fbe2c49e 390
dfa18578 391 __END__
392
393or:
394
395 __DATA__
396
397but this can be altered by passing a second argument to C<use Filter::Simple>
398or C<FILTER> (just remember: there's I<no> comma after the initial block when
399you use C<FILTER>).
fbe2c49e 400
401That second argument may be either a C<qr>'d regular expression (which is then
402used to match the terminator line), or a defined false value (which indicates
dfa18578 403that no terminator line should be looked for), or a reference to a hash
404(in which case the terminator is the value associated with the key
405C<'terminator'>.
fbe2c49e 406
407For example, to cause the previous filter to filter only up to a line of the
408form:
409
410 GNAB esu;
411
412you would write:
413
414 package BANG;
415 use Filter::Simple;
416
417 FILTER {
418 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
419 }
dfa18578 420 qr/^\s*GNAB\s+esu\s*;\s*?$/;
421
422or:
423
424 FILTER {
425 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
426 }
427 { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
fbe2c49e 428
429and to prevent the filter's being turned off in any way:
430
431 package BANG;
432 use Filter::Simple;
433
434 FILTER {
435 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
436 }
dfa18578 437 ""; # or: 0
438
439or:
440
441 FILTER {
442 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
443 }
444 { terminator => "" };
445
446B<Note that, no matter what you set the terminator pattern too,
447the actual terminator itself I<must> be contained on a single source line.>
fbe2c49e 448
449
450=head2 All-in-one interface
451
452Separating the loading of Filter::Simple:
453
454 use Filter::Simple;
455
456from the setting up of the filtering:
457
458 FILTER { ... };
459
460is useful because it allows other code (typically parser support code
461or caching variables) to be defined before the filter is invoked.
462However, there is often no need for such a separation.
463
464In those cases, it is easier to just append the filtering subroutine and
465any terminator specification directly to the C<use> statement that loads
466Filter::Simple, like so:
467
468 use Filter::Simple sub {
469 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
470 };
471
472This is exactly the same as:
473
474 use Filter::Simple;
475 BEGIN {
476 Filter::Simple::FILTER {
477 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
478 };
479 }
480
481except that the C<FILTER> subroutine is not exported by Filter::Simple.
482
dfa18578 483
484=head2 Filtering only specific components of source code
485
486One of the problems with a filter like:
487
488 use Filter::Simple;
489
490 FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
491
492is that it indiscriminately applies the specified transformation to
493the entire text of your source program. So something like:
494
495 warn 'BANG BANG, YOU'RE DEAD';
496 BANG BANG;
497
498will become:
499
500 warn 'die 'BANG' if $BANG, YOU'RE DEAD';
501 die 'BANG' if $BANG;
502
503It is very common when filtering source to only want to apply the filter
504to the non-character-string parts of the code, or alternatively to I<only>
505the character strings.
506
507Filter::Simple supports this type of filtering by automatically
508exporting the C<FILTER_ONLY> subroutine.
509
510C<FILTER_ONLY> takes a sequence of specifiers that install separate
511(and possibly multiple) filters that act on only parts of the source code.
512For example:
513
514 use Filter::Simple;
515
516 FILTER_ONLY
517 code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
518 quotelike => sub { s/BANG\s+BANG/CHITTY CHITYY/g };
519
520The C<"code"> subroutine will only be used to filter parts of the source
521code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
522subroutine only filters Perl quotelikes (including here documents).
523
524The full list of alternatives is:
525
526=over
527
528=item C<"code">
529
530Filters only those sections of the source code that are not quotelikes, POD, or
531C<__DATA__>.
532
533=item C<"executable">
534
535Filters only those sections of the source code that are not POD or C<__DATA__>.
536
537=item C<"quotelike">
538
539Filters only Perl quotelikes (as interpreted by
540C<&Text::Balanced::extract_quotelike>).
541
542=item C<"string">
543
544Filters only the string literal parts of a Perl quotelike (i.e. the
545contents of a string literal, either half of a C<tr///>, the second
546half of an C<s///>).
547
548=item C<"regex">
549
550Filters only the pattern literal parts of a Perl quotelike (i.e. the
551contents of a C<qr//> or an C<m//>, the first half of an C<s///>).
552
553=item C<"all">
554
555Filters everything. Identical in effect to C<FILTER>.
556
557=back
558
559Except for C<< FILTER_ONLY code => sub {...} >>, each of
560the component filters is called repeatedly, once for each component
561found in the source code.
562
563Note that you can also apply two or more of the same type of filter in
564a single C<FILTER_ONLY>. For example, here's a simple
565macro-preprocessor that is only applied within regexes,
566with a final debugging pass that printd the resulting source code:
567
568 use Regexp::Common;
569 FILTER_ONLY
570 regex => sub { s/!\[/[^/g },
571 regex => sub { s/%d/$RE{num}{int}/g },
572 regex => sub { s/%f/$RE{num}{real}/g },
573 all => sub { print if $::DEBUG };
574
575
576
577=head2 Filtering only the code parts of source code
578
579Most source code ceases to be grammatically correct when it is broken up
580into the pieces between string literals and regexes. So the C<'code'>
581component filter behaves slightly differently from the other partial filters
582described in the previous section.
583
584Rather than calling the specified processor on each individual piece of
585code (i.e. on the bits between quotelikes), the C<'code'> partial filter
586operates on the entire source code, but with the quotelike bits
587"blanked out".
588
589That is, a C<'code'> filter I<replaces> each quoted string, quotelike,
590regex, POD, and __DATA__ section with a placeholder. The
591delimiters of this placeholder are the contents of the C<$;> variable
592at the time the filter is applied (normally C<"\034">). The remaining
593four bytes are a unique identifier for the component being replaced.
594
595This approach makes it comparatively easy to write code preprocessors
596without worrying about the form or contents of strings, regexes, etc.
597For convenience, during a C<'code'> filtering operation, Filter::Simple
598provides a package variable (C<$Filter::Simple::placeholder>) that contains
599a pre-compiled regex that matches any placeholder. Placeholders can be
600moved and re-ordered within the source code as needed.
601
602Once the filtering has been applied, the original strings, regexes,
603POD, etc. are re-inserted into the code, by replacing each
604placeholder with the corresponding original component.
605
606For example, the following filter detects concatentated pairs of
607strings/quotelikes and reverses the order in which they are
608concatenated:
609
610 package DemoRevCat;
611 use Filter::Simple;
612
613 FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder;
614 s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
615 };
616
617Thus, the following code:
618
619 use DemoRevCat;
620
621 my $str = "abc" . q(def);
622
623 print "$str\n";
624
625would become:
626
627 my $str = q(def)."abc";
628
629 print "$str\n";
630
631and hence print:
632
633 defabc
634
635
55a1c97c 636=head2 Using Filter::Simple and Exporter together
637
638You can't directly use Exporter when Filter::Simple.
639
640Filter::Simple generates an C<import> subroutine for your module
641(which hides the one inherited from Exporter).
642
dfa18578 643The C<FILTER> code you specify will, however, receive the C<import>'s
644complete argument list (including the package name in $_[0]),
645so you can use that filter block as your C<import> subroutine.
55a1c97c 646
647You'll need to call C<Exporter::export_to_level> from your C<FILTER> code
648to make it work correctly.
649
650For example:
651
652 use Filter::Simple;
653
654 use base Exporter;
655 @EXPORT = qw(foo);
656 @EXPORT_OK = qw(bar);
657
658 sub foo { print "foo\n" }
659 sub bar { print "bar\n" }
660
661 FILTER {
662 # Your filtering code here
dfa18578 663 __PACKAGE__->export_to_level(2,@_);
55a1c97c 664 }
665
fbe2c49e 666
b38acab9 667=head2 How it works
668
fbe2c49e 669The Filter::Simple module exports into the package that calls C<FILTER>
670(or C<use>s it directly) -- such as package "BANG" in the above example --
671two automagically constructed
b38acab9 672subroutines -- C<import> and C<unimport> -- which take care of all the
673nasty details.
674
675In addition, the generated C<import> subroutine passes its own argument
676list to the filtering subroutine, so the BANG.pm filter could easily
677be made parametric:
678
679 package BANG;
fbe2c49e 680
681 use Filter::Simple;
682
683 FILTER {
b38acab9 684 my ($die_msg, $var_name) = @_;
685 s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g;
686 };
687
688 # and in some user code:
689
fbe2c49e 690 use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM
b38acab9 691
692
fbe2c49e 693The specified filtering subroutine is called every time a C<use BANG> is
694encountered, and passed all the source code following that call, up to
695either the next C<no BANG;> (or whatever terminator you've set) or the
696end of the source file, whichever occurs first. By default, any C<no
697BANG;> call must appear by itself on a separate line, or it is ignored.
b38acab9 698
699
700=head1 AUTHOR
701
702Damian Conway (damian@conway.org)
703
704=head1 COPYRIGHT
705
55a1c97c 706 Copyright (c) 2000-2001, Damian Conway. All Rights Reserved.
707 This module is free software. It may be used, redistributed
708 and/or modified under the same terms as Perl itself.