Fix/band-aid for op.c's anon CV leak fix co-existing with threads.
[p5sagit/p5-mst-13.2.git] / lib / Filter / Simple.pm
CommitLineData
b38acab9 1package Filter::Simple;
2
dfa18578 3use Text::Balanced ':ALL';
b38acab9 4
dfa18578 5use vars qw{ $VERSION @EXPORT };
6
201f4820 7$VERSION = '0.77';
b38acab9 8
9use Filter::Util::Call;
10use Carp;
11
dfa18578 12@EXPORT = qw( FILTER FILTER_ONLY );
13
14
b38acab9 15sub import {
fbe2c49e 16 if (@_>1) { shift; goto &FILTER }
dfa18578 17 else { *{caller()."::$_"} = \&$_ foreach @EXPORT }
fbe2c49e 18}
19
20sub FILTER (&;$) {
b38acab9 21 my $caller = caller;
fbe2c49e 22 my ($filter, $terminator) = @_;
dfa18578 23 no warnings 'redefine';
fbe2c49e 24 *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator);
201f4820 25 *{"${caller}::unimport"} = gen_filter_unimport($caller);
b38acab9 26}
27
dfa18578 28sub fail {
29 croak "FILTER_ONLY: ", @_;
30}
31
32my $exql = sub {
33 my @bits = extract_quotelike $_[0], qr//;
34 return unless $bits[0];
35 return \@bits;
36};
37
38my $ws = qr/\s+/;
39my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/;
40my $EOP = qr/\n\n|\Z/;
41my $CUT = qr/\n=cut.*$EOP/;
42my $pod_or_DATA = qr/
43 ^=(?:head[1-4]|item) .*? $CUT
44 | ^=pod .*? $CUT
45 | ^=for .*? $EOP
46 | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP
47 | ^__(DATA|END)__\n.*
48 /smx;
49
50my %extractor_for = (
51 quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ],
52 regex => [ $ws, $pod_or_DATA, $id, $exql ],
53 string => [ $ws, $pod_or_DATA, $id, $exql ],
54 code => [ $ws, { DONT_MATCH => $pod_or_DATA },
55 $id, { DONT_MATCH => \&extract_quotelike } ],
56 executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ],
57 all => [ { MATCH => qr/(?s:.*)/ } ],
58);
59
60my %selector_for = (
61 all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} },
62 executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} },
63 quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} },
64 regex => sub { my ($t)=@_;
65 sub{ref() or return $_;
66 my ($ql,undef,$pre,$op,$ld,$pat) = @$_;
67 return $_->[0] unless $op =~ /^(qr|m|s)/
68 || !$op && ($ld eq '/' || $ld eq '?');
69 $_ = $pat;
70 $t->(@_);
71 $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/;
72 return "$pre$ql";
73 };
74 },
75 string => sub { my ($t)=@_;
76 sub{ref() or return $_;
77 local *args = \@_;
78 my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10];
79 return $_->[0] if $op =~ /^(qr|m)/
80 || !$op && ($ld1 eq '/' || $ld1 eq '?');
81 if (!$op || $op eq 'tr' || $op eq 'y') {
82 local *_ = \$str1;
83 $t->(@args);
84 }
85 if ($op =~ /^(tr|y|s)/) {
86 local *_ = \$str2;
87 $t->(@args);
88 }
89 my $result = "$pre$op$ld1$str1$rd1";
90 $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}>
91 $result .= "$str2$rd2$flg";
92 return $result;
93 };
94 },
95);
96
97
98sub gen_std_filter_for {
99 my ($type, $transform) = @_;
100 return sub { my (@pieces, $instr);
101 for (extract_multiple($_,$extractor_for{$type})) {
102 if (ref()) { push @pieces, $_; $instr=0 }
103 elsif ($instr) { $pieces[-1] .= $_ }
104 else { push @pieces, $_; $instr=1 }
105 }
106 if ($type eq 'code') {
107 my $count = 0;
108 local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/;
109 my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/;
110 $_ = join "",
111 map { ref $_ ? $;.pack('N',$count++).$; : $_ }
112 @pieces;
113 @pieces = grep { ref $_ } @pieces;
114 $transform->(@_);
115 s/$extractor/${$pieces[unpack('N',$1)]}/g;
116 }
117 else {
dfa18578 118 my $selector = $selector_for{$type}->($transform);
119 $_ = join "", map $selector->(@_), @pieces;
120 }
121 }
122};
123
124sub FILTER_ONLY {
dfa18578 125 my $caller = caller;
126 while (@_ > 1) {
127 my ($what, $how) = splice(@_, 0, 2);
128 fail "Unknown selector: $what"
129 unless exists $extractor_for{$what};
130 fail "Filter for $what is not a subroutine reference"
131 unless ref $how eq 'CODE';
132 push @transforms, gen_std_filter_for($what,$how);
133 }
134 my $terminator = shift;
135
136 my $multitransform = sub {
137 foreach my $transform ( @transforms ) {
138 $transform->(@_);
139 }
140 };
141 no warnings 'redefine';
142 *{"${caller}::import"} =
143 gen_filter_import($caller,$multitransform,$terminator);
201f4820 144 *{"${caller}::unimport"} = gen_filter_unimport($caller);
dfa18578 145}
146
147my $ows = qr/(?:[ \t]+|#[^\n]*)*/;
148
b38acab9 149sub gen_filter_import {
fbe2c49e 150 my ($class, $filter, $terminator) = @_;
201f4820 151 my %terminator;
152 my $prev_import = *{$class."::import"}{CODE};
b38acab9 153 return sub {
154 my ($imported_class, @args) = @_;
dfa18578 155 my $def_terminator =
156 qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)$/;
157 if (!defined $terminator) {
201f4820 158 $terminator{terminator} = $def_terminator;
dfa18578 159 }
201f4820 160 elsif (!ref $terminator || ref $terminator eq 'Regexp') {
161 $terminator{terminator} = $terminator;
dfa18578 162 }
163 elsif (ref $terminator ne 'HASH') {
164 croak "Terminator must be specified as scalar or hash ref"
165 }
166 elsif (!exists $terminator->{terminator}) {
201f4820 167 $terminator{terminator} = $def_terminator;
dfa18578 168 }
b38acab9 169 filter_add(
170 sub {
dfa18578 171 my ($status, $lastline);
fbe2c49e 172 my $count = 0;
b38acab9 173 my $data = "";
174 while ($status = filter_read()) {
fbe2c49e 175 return $status if $status < 0;
201f4820 176 if ($terminator{terminator} &&
177 m/$terminator{terminator}/) {
dfa18578 178 $lastline = $_;
b38acab9 179 last;
180 }
181 $data .= $_;
fbe2c49e 182 $count++;
b38acab9 183 $_ = "";
184 }
185 $_ = $data;
dfa18578 186 $filter->($imported_class, @args) unless $status < 0;
187 if (defined $lastline) {
201f4820 188 if (defined $terminator{becomes}) {
189 $_ .= $terminator{becomes};
dfa18578 190 }
191 elsif ($lastline =~ $def_terminator) {
192 $_ .= $lastline;
193 }
194 }
fbe2c49e 195 return $count;
b38acab9 196 }
197 );
201f4820 198 if ($prev_import) {
199 goto &$prev_import;
200 }
201 elsif ($class->isa('Exporter')) {
202 $class->export_to_level(1,@_);
203 }
b38acab9 204 }
205}
206
201f4820 207sub gen_filter_unimport {
208 my ($class) = @_;
209 my $prev_unimport = *{$class."::unimport"}{CODE};
210 return sub {
211 filter_del();
212 goto &$prev_unimport if $prev_unimport;
213 }
b38acab9 214}
215
2161;
217
218__END__
219
220=head1 NAME
221
222Filter::Simple - Simplified source filtering
223
fbe2c49e 224
b38acab9 225=head1 SYNOPSIS
226
227 # in MyFilter.pm:
228
229 package MyFilter;
230
fbe2c49e 231 use Filter::Simple;
232
233 FILTER { ... };
b38acab9 234
fbe2c49e 235 # or just:
236 #
237 # use Filter::Simple sub { ... };
b38acab9 238
239 # in user's code:
240
241 use MyFilter;
242
243 # this code is filtered
244
245 no MyFilter;
246
247 # this code is not
248
249
250=head1 DESCRIPTION
251
252=head2 The Problem
253
254Source filtering is an immensely powerful feature of recent versions of Perl.
255It allows one to extend the language itself (e.g. the Switch module), to
256simplify the language (e.g. Language::Pythonesque), or to completely recast the
257language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use
258the full power of Perl as its own, recursively applied, macro language.
259
260The excellent Filter::Util::Call module (by Paul Marquess) provides a
261usable Perl interface to source filtering, but it is often too powerful
262and not nearly as simple as it could be.
263
264To use the module it is necessary to do the following:
265
266=over 4
267
268=item 1.
269
270Download, build, and install the Filter::Util::Call module.
55a1c97c 271(If you have Perl 5.7.1 or later, this is already done for you.)
b38acab9 272
273=item 2.
274
275Set up a module that does a C<use Filter::Util::Call>.
276
277=item 3.
278
279Within that module, create an C<import> subroutine.
280
281=item 4.
282
283Within the C<import> subroutine do a call to C<filter_add>, passing
284it either a subroutine reference.
285
286=item 5.
287
288Within the subroutine reference, call C<filter_read> or C<filter_read_exact>
289to "prime" $_ with source code data from the source file that will
290C<use> your module. Check the status value returned to see if any
291source code was actually read in.
292
293=item 6.
294
295Process the contents of $_ to change the source code in the desired manner.
296
297=item 7.
298
299Return the status value.
300
301=item 8.
302
303If the act of unimporting your module (via a C<no>) should cause source
304code filtering to cease, create an C<unimport> subroutine, and have it call
305C<filter_del>. Make sure that the call to C<filter_read> or
306C<filter_read_exact> in step 5 will not accidentally read past the
307C<no>. Effectively this limits source code filters to line-by-line
308operation, unless the C<import> subroutine does some fancy
309pre-pre-parsing of the source code it's filtering.
310
311=back
312
313For example, here is a minimal source code filter in a module named
314BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG>
315to the sequence C<die 'BANG' if $BANG> in any piece of code following a
316C<use BANG;> statement (until the next C<no BANG;> statement, if any):
317
318 package BANG;
fbe2c49e 319
b38acab9 320 use Filter::Util::Call ;
321
322 sub import {
323 filter_add( sub {
324 my $caller = caller;
325 my ($status, $no_seen, $data);
326 while ($status = filter_read()) {
fbe2c49e 327 if (/^\s*no\s+$caller\s*;\s*?$/) {
b38acab9 328 $no_seen=1;
329 last;
330 }
331 $data .= $_;
332 $_ = "";
333 }
334 $_ = $data;
335 s/BANG\s+BANG/die 'BANG' if \$BANG/g
336 unless $status < 0;
337 $_ .= "no $class;\n" if $no_seen;
338 return 1;
339 })
340 }
341
342 sub unimport {
343 filter_del();
344 }
345
346 1 ;
347
7bf0340c 348This level of sophistication puts filtering out of the reach of
349many programmers.
b38acab9 350
351
352=head2 A Solution
353
7bf0340c 354The Filter::Simple module provides a simplified interface to
b38acab9 355Filter::Util::Call; one that is sufficient for most common cases.
356
357Instead of the above process, with Filter::Simple the task of setting up
358a source code filter is reduced to:
359
360=over 4
361
362=item 1.
363
55a1c97c 364Download and install the Filter::Simple module.
365(If you have Perl 5.7.1 or later, this is already done for you.)
366
367=item 2.
368
fbe2c49e 369Set up a module that does a C<use Filter::Simple> and then
370calls C<FILTER { ... }>.
b38acab9 371
55a1c97c 372=item 3.
b38acab9 373
fbe2c49e 374Within the anonymous subroutine or block that is passed to
375C<FILTER>, process the contents of $_ to change the source code in
376the desired manner.
b38acab9 377
378=back
379
380In other words, the previous example, would become:
381
382 package BANG;
fbe2c49e 383 use Filter::Simple;
384
385 FILTER {
b38acab9 386 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
387 };
388
389 1 ;
390
391
fbe2c49e 392=head2 Disabling or changing <no> behaviour
393
dfa18578 394By default, the installed filter only filters up to a line consisting of one of
395the three standard source "terminators":
396
397 no ModuleName; # optional comment
fbe2c49e 398
dfa18578 399or:
fbe2c49e 400
dfa18578 401 __END__
402
403or:
404
405 __DATA__
406
407but this can be altered by passing a second argument to C<use Filter::Simple>
408or C<FILTER> (just remember: there's I<no> comma after the initial block when
409you use C<FILTER>).
fbe2c49e 410
411That second argument may be either a C<qr>'d regular expression (which is then
412used to match the terminator line), or a defined false value (which indicates
dfa18578 413that no terminator line should be looked for), or a reference to a hash
414(in which case the terminator is the value associated with the key
415C<'terminator'>.
fbe2c49e 416
417For example, to cause the previous filter to filter only up to a line of the
418form:
419
420 GNAB esu;
421
422you would write:
423
424 package BANG;
425 use Filter::Simple;
426
427 FILTER {
428 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
429 }
dfa18578 430 qr/^\s*GNAB\s+esu\s*;\s*?$/;
431
432or:
433
434 FILTER {
435 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
436 }
437 { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ };
fbe2c49e 438
439and to prevent the filter's being turned off in any way:
440
441 package BANG;
442 use Filter::Simple;
443
444 FILTER {
445 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
446 }
dfa18578 447 ""; # or: 0
448
449or:
450
451 FILTER {
452 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
453 }
454 { terminator => "" };
455
456B<Note that, no matter what you set the terminator pattern too,
457the actual terminator itself I<must> be contained on a single source line.>
fbe2c49e 458
459
460=head2 All-in-one interface
461
462Separating the loading of Filter::Simple:
463
464 use Filter::Simple;
465
466from the setting up of the filtering:
467
468 FILTER { ... };
469
470is useful because it allows other code (typically parser support code
471or caching variables) to be defined before the filter is invoked.
472However, there is often no need for such a separation.
473
474In those cases, it is easier to just append the filtering subroutine and
475any terminator specification directly to the C<use> statement that loads
476Filter::Simple, like so:
477
478 use Filter::Simple sub {
479 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
480 };
481
482This is exactly the same as:
483
484 use Filter::Simple;
485 BEGIN {
486 Filter::Simple::FILTER {
487 s/BANG\s+BANG/die 'BANG' if \$BANG/g;
488 };
489 }
490
491except that the C<FILTER> subroutine is not exported by Filter::Simple.
492
dfa18578 493
494=head2 Filtering only specific components of source code
495
496One of the problems with a filter like:
497
498 use Filter::Simple;
499
500 FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g };
501
502is that it indiscriminately applies the specified transformation to
503the entire text of your source program. So something like:
504
505 warn 'BANG BANG, YOU'RE DEAD';
506 BANG BANG;
507
508will become:
509
510 warn 'die 'BANG' if $BANG, YOU'RE DEAD';
511 die 'BANG' if $BANG;
512
513It is very common when filtering source to only want to apply the filter
514to the non-character-string parts of the code, or alternatively to I<only>
515the character strings.
516
517Filter::Simple supports this type of filtering by automatically
518exporting the C<FILTER_ONLY> subroutine.
519
520C<FILTER_ONLY> takes a sequence of specifiers that install separate
521(and possibly multiple) filters that act on only parts of the source code.
522For example:
523
524 use Filter::Simple;
525
526 FILTER_ONLY
527 code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g },
528 quotelike => sub { s/BANG\s+BANG/CHITTY CHITYY/g };
529
530The C<"code"> subroutine will only be used to filter parts of the source
531code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike>
532subroutine only filters Perl quotelikes (including here documents).
533
534The full list of alternatives is:
535
536=over
537
538=item C<"code">
539
540Filters only those sections of the source code that are not quotelikes, POD, or
541C<__DATA__>.
542
543=item C<"executable">
544
545Filters only those sections of the source code that are not POD or C<__DATA__>.
546
547=item C<"quotelike">
548
549Filters only Perl quotelikes (as interpreted by
550C<&Text::Balanced::extract_quotelike>).
551
552=item C<"string">
553
554Filters only the string literal parts of a Perl quotelike (i.e. the
555contents of a string literal, either half of a C<tr///>, the second
556half of an C<s///>).
557
558=item C<"regex">
559
560Filters only the pattern literal parts of a Perl quotelike (i.e. the
561contents of a C<qr//> or an C<m//>, the first half of an C<s///>).
562
563=item C<"all">
564
565Filters everything. Identical in effect to C<FILTER>.
566
567=back
568
569Except for C<< FILTER_ONLY code => sub {...} >>, each of
570the component filters is called repeatedly, once for each component
571found in the source code.
572
573Note that you can also apply two or more of the same type of filter in
574a single C<FILTER_ONLY>. For example, here's a simple
575macro-preprocessor that is only applied within regexes,
576with a final debugging pass that printd the resulting source code:
577
578 use Regexp::Common;
579 FILTER_ONLY
580 regex => sub { s/!\[/[^/g },
581 regex => sub { s/%d/$RE{num}{int}/g },
582 regex => sub { s/%f/$RE{num}{real}/g },
583 all => sub { print if $::DEBUG };
584
585
586
587=head2 Filtering only the code parts of source code
588
589Most source code ceases to be grammatically correct when it is broken up
590into the pieces between string literals and regexes. So the C<'code'>
591component filter behaves slightly differently from the other partial filters
592described in the previous section.
593
594Rather than calling the specified processor on each individual piece of
595code (i.e. on the bits between quotelikes), the C<'code'> partial filter
596operates on the entire source code, but with the quotelike bits
597"blanked out".
598
599That is, a C<'code'> filter I<replaces> each quoted string, quotelike,
600regex, POD, and __DATA__ section with a placeholder. The
601delimiters of this placeholder are the contents of the C<$;> variable
602at the time the filter is applied (normally C<"\034">). The remaining
603four bytes are a unique identifier for the component being replaced.
604
605This approach makes it comparatively easy to write code preprocessors
606without worrying about the form or contents of strings, regexes, etc.
607For convenience, during a C<'code'> filtering operation, Filter::Simple
608provides a package variable (C<$Filter::Simple::placeholder>) that contains
609a pre-compiled regex that matches any placeholder. Placeholders can be
610moved and re-ordered within the source code as needed.
611
612Once the filtering has been applied, the original strings, regexes,
613POD, etc. are re-inserted into the code, by replacing each
614placeholder with the corresponding original component.
615
616For example, the following filter detects concatentated pairs of
617strings/quotelikes and reverses the order in which they are
618concatenated:
619
620 package DemoRevCat;
621 use Filter::Simple;
622
623 FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder;
624 s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx
625 };
626
627Thus, the following code:
628
629 use DemoRevCat;
630
631 my $str = "abc" . q(def);
632
633 print "$str\n";
634
635would become:
636
637 my $str = q(def)."abc";
638
639 print "$str\n";
640
641and hence print:
642
643 defabc
644
645
201f4820 646=head2 Using Filter::Simple with an explicit C<import> subroutine
55a1c97c 647
201f4820 648Filter::Simple generates a special C<import> subroutine for
649your module (see L<"How it works">) which would normally replace any
650C<import> subroutine you might have explicitly declared.
55a1c97c 651
201f4820 652However, Filter::Simple is smart enough to notice your existing
653C<import> and Do The Right Thing with it.
654That is, if you explcitly define an C<import> subroutine in a package
655that's using Filter::Simple, that C<import> subroutine will still
656be invoked immediately after any filter you install.
55a1c97c 657
201f4820 658The only thing you have to remember is that the C<import> subroutine
659I<must> be declared I<before> the filter is installed. If you use C<FILTER>
660to install the filter:
55a1c97c 661
201f4820 662 package Filter::TurnItUpTo11;
55a1c97c 663
201f4820 664 use Filter::Simple;
55a1c97c 665
201f4820 666 FILTER { s/(\w+)/\U$1/ };
667
668that will almost never be a problem, but if you install a filtering
669subroutine by passing it directly to the C<use Filter::Simple>
670statement:
55a1c97c 671
201f4820 672 package Filter::TurnItUpTo11;
55a1c97c 673
201f4820 674 use Filter::Simple sub{ s/(\w+)/\U$1/ };
55a1c97c 675
201f4820 676then you must make sure that your C<import> subroutine appears before
677that C<use> statement.
678
679
680=head2 Using Filter::Simple and Exporter together
681
682Likewise, Filter::Simple is also smart enough
683to Do The Right Thing if you use Exporter:
684
685 package Switch;
686 use base Exporter;
687 use Filter::Simple;
688
689 @EXPORT = qw(switch case);
690 @EXPORT_OK = qw(given when);
691
692 FILTER { $_ = magic_Perl_filter($_) }
693
694Immediately after the filter has been applied to the source,
695Filter::Simple will pass control to Exporter, so it can do its magic too.
696
697Of course, here too, Filter::Simple has to know you're using Exporter
698before it applies the filter. That's almost never a problem, but if you're
699nervous about it, you can guarantee that things will work correctly by
700ensuring that your C<use base Exporter> always precedes your
701C<use Filter::Simple>.
55a1c97c 702
fbe2c49e 703
b38acab9 704=head2 How it works
705
fbe2c49e 706The Filter::Simple module exports into the package that calls C<FILTER>
707(or C<use>s it directly) -- such as package "BANG" in the above example --
708two automagically constructed
b38acab9 709subroutines -- C<import> and C<unimport> -- which take care of all the
710nasty details.
711
712In addition, the generated C<import> subroutine passes its own argument
713list to the filtering subroutine, so the BANG.pm filter could easily
714be made parametric:
715
716 package BANG;
fbe2c49e 717
718 use Filter::Simple;
719
720 FILTER {
b38acab9 721 my ($die_msg, $var_name) = @_;
722 s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g;
723 };
724
725 # and in some user code:
726
fbe2c49e 727 use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM
b38acab9 728
729
fbe2c49e 730The specified filtering subroutine is called every time a C<use BANG> is
731encountered, and passed all the source code following that call, up to
732either the next C<no BANG;> (or whatever terminator you've set) or the
733end of the source file, whichever occurs first. By default, any C<no
734BANG;> call must appear by itself on a separate line, or it is ignored.
b38acab9 735
736
737=head1 AUTHOR
738
739Damian Conway (damian@conway.org)
740
741=head1 COPYRIGHT
742
55a1c97c 743 Copyright (c) 2000-2001, Damian Conway. All Rights Reserved.
744 This module is free software. It may be used, redistributed
745 and/or modified under the same terms as Perl itself.