perl 5.0 alpha 6
[p5sagit/p5-mst-13.2.git] / x2p / s2p
CommitLineData
79072805 1#!/usr/local/bin/perl
2
3eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
4 if $running_under_some_shell;
5
6$bin = '/usr/local/bin';
7
8# $RCSfile: s2p.SH,v $$Revision: 4.1 $$Date: 92/08/07 18:29:23 $
9#
10# $Log: s2p.SH,v $
11# Revision 4.1 92/08/07 18:29:23 lwall
12#
13# Revision 4.0.1.2 92/06/08 17:26:31 lwall
14# patch20: s2p didn't output portable startup code
15# patch20: added ... as variant on ..
16# patch20: s2p didn't translate s/pat/\&/ or s/pat/\$/ or s/pat/\\1/ right
17#
18# Revision 4.0.1.1 91/06/07 12:19:18 lwall
19# patch4: s2p now handles embedded newlines better and optimizes common idioms
20#
21# Revision 4.0 91/03/20 01:57:59 lwall
22# 4.0 baseline.
23#
24#
25
26$indent = 4;
27$shiftwidth = 4;
28$l = '{'; $r = '}';
29
30while ($ARGV[0] =~ /^-/) {
31 $_ = shift;
32 last if /^--/;
33 if (/^-D/) {
34 $debug++;
35 open(BODY,'>-');
36 next;
37 }
38 if (/^-n/) {
39 $assumen++;
40 next;
41 }
42 if (/^-p/) {
43 $assumep++;
44 next;
45 }
46 die "I don't recognize this switch: $_\n";
47}
48
49unless ($debug) {
50 open(BODY,">/tmp/sperl$$") ||
51 &Die("Can't open temp file: $!\n");
52}
53
54if (!$assumen && !$assumep) {
55 print BODY &q(<<'EOT');
56: while ($ARGV[0] =~ /^-/) {
57: $_ = shift;
58: last if /^--/;
59: if (/^-n/) {
60: $nflag++;
61: next;
62: }
63: die "I don't recognize this switch: $_\\n";
64: }
65:
66EOT
67}
68
69print BODY &q(<<'EOT');
70: #ifdef PRINTIT
71: #ifdef ASSUMEP
72: $printit++;
73: #else
74: $printit++ unless $nflag;
75: #endif
76: #endif
77: <><>
78: $\ = "\n"; # automatically add newline on print
79: <><>
80: #ifdef TOPLABEL
81: LINE:
82: while (chop($_ = <>)) {
83: #else
84: LINE:
85: while (<>) {
86: chop;
87: #endif
88EOT
89
90LINE:
91while (<>) {
92
93 # Wipe out surrounding whitespace.
94
95 s/[ \t]*(.*)\n$/$1/;
96
97 # Perhaps it's a label/comment.
98
99 if (/^:/) {
100 s/^:[ \t]*//;
101 $label = &make_label($_);
102 if ($. == 1) {
103 $toplabel = $label;
104 if (/^(top|(re)?start|redo|begin(ning)|again|input)$/i) {
105 $_ = <>;
106 redo LINE; # Never referenced, so delete it if not a comment.
107 }
108 }
109 $_ = "$label:";
110 if ($lastlinewaslabel++) {
111 $indent += 4;
112 print BODY &tab, ";\n";
113 $indent -= 4;
114 }
115 if ($indent >= 2) {
116 $indent -= 2;
117 $indmod = 2;
118 }
119 next;
120 } else {
121 $lastlinewaslabel = '';
122 }
123
124 # Look for one or two address clauses
125
126 $addr1 = '';
127 $addr2 = '';
128 if (s/^([0-9]+)//) {
129 $addr1 = "$1";
130 $addr1 = "\$. == $addr1" unless /^,/;
131 }
132 elsif (s/^\$//) {
133 $addr1 = 'eof()';
134 }
135 elsif (s|^/||) {
136 $addr1 = &fetchpat('/');
137 }
138 if (s/^,//) {
139 if (s/^([0-9]+)//) {
140 $addr2 = "$1";
141 } elsif (s/^\$//) {
142 $addr2 = "eof()";
143 } elsif (s|^/||) {
144 $addr2 = &fetchpat('/');
145 } else {
146 &Die("Invalid second address at line $.\n");
147 }
148 if ($addr2 =~ /^\d+$/) {
149 $addr1 .= "..$addr2";
150 }
151 else {
152 $addr1 .= "...$addr2";
153 }
154 }
155
156 # Now we check for metacommands {, }, and ! and worry
157 # about indentation.
158
159 s/^[ \t]+//;
160 # a { to keep vi happy
161 if ($_ eq '}') {
162 $indent -= 4;
163 next;
164 }
165 if (s/^!//) {
166 $if = 'unless';
167 $else = "$r else $l\n";
168 } else {
169 $if = 'if';
170 $else = '';
171 }
172 if (s/^{//) { # a } to keep vi happy
173 $indmod = 4;
174 $redo = $_;
175 $_ = '';
176 $rmaybe = '';
177 } else {
178 $rmaybe = "\n$r";
179 if ($addr2 || $addr1) {
180 $space = ' ' x $shiftwidth;
181 } else {
182 $space = '';
183 }
184 $_ = &transmogrify();
185 }
186
187 # See if we can optimize to modifier form.
188
189 if ($addr1) {
190 if ($_ !~ /[\n{}]/ && $rmaybe && !$change &&
191 $_ !~ / if / && $_ !~ / unless /) {
192 s/;$/ $if $addr1;/;
193 $_ = substr($_,$shiftwidth,1000);
194 } else {
195 $_ = "$if ($addr1) $l\n$change$_$rmaybe";
196 }
197 $change = '';
198 next LINE;
199 }
200} continue {
201 @lines = split(/\n/,$_);
202 for (@lines) {
203 unless (s/^ *<<--//) {
204 print BODY &tab;
205 }
206 print BODY $_, "\n";
207 }
208 $indent += $indmod;
209 $indmod = 0;
210 if ($redo) {
211 $_ = $redo;
212 $redo = '';
213 redo LINE;
214 }
215}
216if ($lastlinewaslabel++) {
217 $indent += 4;
218 print BODY &tab, ";\n";
219 $indent -= 4;
220}
221
222if ($appendseen || $tseen || !$assumen) {
223 $printit++ if $dseen || (!$assumen && !$assumep);
224 print BODY &q(<<'EOT');
225: #ifdef SAWNEXT
226: }
227: continue {
228: #endif
229: #ifdef PRINTIT
230: #ifdef DSEEN
231: #ifdef ASSUMEP
232: print if $printit++;
233: #else
234: if ($printit)
235: { print; }
236: else
237: { $printit++ unless $nflag; }
238: #endif
239: #else
240: print if $printit;
241: #endif
242: #else
243: print;
244: #endif
245: #ifdef TSEEN
246: $tflag = 0;
247: #endif
248: #ifdef APPENDSEEN
249: if ($atext) { chop $atext; print $atext; $atext = ''; }
250: #endif
251EOT
252
253print BODY &q(<<'EOT');
254: }
255EOT
256}
257
258close BODY;
259
260unless ($debug) {
261 open(HEAD,">/tmp/sperl2$$.c")
262 || &Die("Can't open temp file 2: $!\n");
263 print HEAD "#define PRINTIT\n" if $printit;
264 print HEAD "#define APPENDSEEN\n" if $appendseen;
265 print HEAD "#define TSEEN\n" if $tseen;
266 print HEAD "#define DSEEN\n" if $dseen;
267 print HEAD "#define ASSUMEN\n" if $assumen;
268 print HEAD "#define ASSUMEP\n" if $assumep;
269 print HEAD "#define TOPLABEL\n" if $toplabel;
270 print HEAD "#define SAWNEXT\n" if $sawnext;
271 if ($opens) {print HEAD "$opens\n";}
272 open(BODY,"/tmp/sperl$$")
273 || &Die("Can't reopen temp file: $!\n");
274 while (<BODY>) {
275 print HEAD $_;
276 }
277 close HEAD;
278
279 print &q(<<"EOT");
280: #!$bin/perl
281: eval 'exec $bin/perl -S \$0 \${1+"\$@"}'
282: if \$running_under_some_shell;
283:
284EOT
285 open(BODY,"cc -E /tmp/sperl2$$.c |") ||
286 &Die("Can't reopen temp file: $!\n");
287 while (<BODY>) {
288 /^# [0-9]/ && next;
289 /^[ \t]*$/ && next;
290 s/^<><>//;
291 print;
292 }
293}
294
295&Cleanup;
296exit;
297
298sub Cleanup {
299 chdir "/tmp";
300 unlink "sperl$$", "sperl2$$", "sperl2$$.c";
301}
302sub Die {
303 &Cleanup;
304 die $_[0];
305}
306sub tab {
307 "\t" x ($indent / 8) . ' ' x ($indent % 8);
308}
309sub make_filehandle {
310 local($_) = $_[0];
311 local($fname) = $_;
312 if (!$seen{$fname}) {
313 $_ = "FH_" . $_ if /^\d/;
314 s/[^a-zA-Z0-9]/_/g;
315 s/^_*//;
316 $_ = "\U$_";
317 if ($fhseen{$_}) {
318 for ($tmp = "a"; $fhseen{"$_$tmp"}; $a++) {}
319 $_ .= $tmp;
320 }
321 $fhseen{$_} = 1;
322 $opens .= &q(<<"EOT");
323: open($_, '>$fname') || die "Can't create $fname: \$!";
324EOT
325 $seen{$fname} = $_;
326 }
327 $seen{$fname};
328}
329
330sub make_label {
331 local($label) = @_;
332 $label =~ s/[^a-zA-Z0-9]/_/g;
333 if ($label =~ /^[0-9_]/) { $label = 'L' . $label; }
334 $label = substr($label,0,8);
335
336 # Could be a reserved word, so capitalize it.
337 substr($label,0,1) =~ y/a-z/A-Z/
338 if $label =~ /^[a-z]/;
339
340 $label;
341}
342
343sub transmogrify {
344 { # case
345 if (/^d/) {
346 $dseen++;
347 chop($_ = &q(<<'EOT'));
348: <<--#ifdef PRINTIT
349: $printit = 0;
350: <<--#endif
351: next LINE;
352EOT
353 $sawnext++;
354 next;
355 }
356
357 if (/^n/) {
358 chop($_ = &q(<<'EOT'));
359: <<--#ifdef PRINTIT
360: <<--#ifdef DSEEN
361: <<--#ifdef ASSUMEP
362: print if $printit++;
363: <<--#else
364: if ($printit)
365: { print; }
366: else
367: { $printit++ unless $nflag; }
368: <<--#endif
369: <<--#else
370: print if $printit;
371: <<--#endif
372: <<--#else
373: print;
374: <<--#endif
375: <<--#ifdef APPENDSEEN
376: if ($atext) {chop $atext; print $atext; $atext = '';}
377: <<--#endif
378: $_ = <>;
379: chop;
380: <<--#ifdef TSEEN
381: $tflag = 0;
382: <<--#endif
383EOT
384 next;
385 }
386
387 if (/^a/) {
388 $appendseen++;
389 $command = $space . "\$atext .= <<'End_Of_Text';\n<<--";
390 $lastline = 0;
391 while (<>) {
392 s/^[ \t]*//;
393 s/^[\\]//;
394 unless (s|\\$||) { $lastline = 1;}
395 s/^([ \t]*\n)/<><>$1/;
396 $command .= $_;
397 $command .= '<<--';
398 last if $lastline;
399 }
400 $_ = $command . "End_Of_Text";
401 last;
402 }
403
404 if (/^[ic]/) {
405 if (/^c/) { $change = 1; }
406 $addr1 = 1 if $addr1 eq '';
407 $addr1 = '$iter = (' . $addr1 . ')';
408 $command = $space .
409 " if (\$iter == 1) { print <<'End_Of_Text'; }\n<<--";
410 $lastline = 0;
411 while (<>) {
412 s/^[ \t]*//;
413 s/^[\\]//;
414 unless (s/\\$//) { $lastline = 1;}
415 s/'/\\'/g;
416 s/^([ \t]*\n)/<><>$1/;
417 $command .= $_;
418 $command .= '<<--';
419 last if $lastline;
420 }
421 $_ = $command . "End_Of_Text";
422 if ($change) {
423 $dseen++;
424 $change = "$_\n";
425 chop($_ = &q(<<"EOT"));
426: <<--#ifdef PRINTIT
427: $space\$printit = 0;
428: <<--#endif
429: ${space}next LINE;
430EOT
431 $sawnext++;
432 }
433 last;
434 }
435
436 if (/^s/) {
437 $delim = substr($_,1,1);
438 $len = length($_);
439 $repl = $end = 0;
440 $inbracket = 0;
441 for ($i = 2; $i < $len; $i++) {
442 $c = substr($_,$i,1);
443 if ($c eq $delim) {
444 if ($inbracket) {
445 substr($_, $i, 0) = '\\';
446 $i++;
447 $len++;
448 }
449 else {
450 if ($repl) {
451 $end = $i;
452 last;
453 } else {
454 $repl = $i;
455 }
456 }
457 }
458 elsif ($c eq '\\') {
459 $i++;
460 if ($i >= $len) {
461 $_ .= 'n';
462 $_ .= <>;
463 $len = length($_);
464 $_ = substr($_,0,--$len);
465 }
466 elsif (substr($_,$i,1) =~ /^[n]$/) {
467 ;
468 }
469 elsif (!$repl &&
470 substr($_,$i,1) =~ /^[(){}\w]$/) {
471 $i--;
472 $len--;
473 substr($_, $i, 1) = '';
474 }
475 elsif (!$repl &&
476 substr($_,$i,1) =~ /^[<>]$/) {
477 substr($_,$i,1) = 'b';
478 }
479 elsif ($repl && substr($_,$i,1) =~ /^\d$/) {
480 substr($_,$i-1,1) = '$';
481 }
482 }
483 elsif ($c eq '&' && $repl) {
484 substr($_, $i, 0) = '$';
485 $i++;
486 $len++;
487 }
488 elsif ($c eq '$' && $repl) {
489 substr($_, $i, 0) = '\\';
490 $i++;
491 $len++;
492 }
493 elsif ($c eq '[' && !$repl) {
494 $i++ if substr($_,$i,1) eq '^';
495 $i++ if substr($_,$i,1) eq ']';
496 $inbracket = 1;
497 }
498 elsif ($c eq ']') {
499 $inbracket = 0;
500 }
501 elsif ($c eq "\t") {
502 substr($_, $i, 1) = '\\t';
503 $i++;
504 $len++;
505 }
506 elsif (!$repl && index("()+",$c) >= 0) {
507 substr($_, $i, 0) = '\\';
508 $i++;
509 $len++;
510 }
511 }
512 &Die("Malformed substitution at line $.\n")
513 unless $end;
514 $pat = substr($_, 0, $repl + 1);
515 $repl = substr($_, $repl+1, $end-$repl-1);
516 $end = substr($_, $end + 1, 1000);
517 &simplify($pat);
518 $dol = '$';
519 $subst = "$pat$repl$delim";
520 $cmd = '';
521 while ($end) {
522 if ($end =~ s/^g//) {
523 $subst .= 'g';
524 next;
525 }
526 if ($end =~ s/^p//) {
527 $cmd .= ' && (print)';
528 next;
529 }
530 if ($end =~ s/^w[ \t]*//) {
531 $fh = &make_filehandle($end);
532 $cmd .= " && (print $fh \$_)";
533 $end = '';
534 next;
535 }
536 &Die("Unrecognized substitution command".
537 "($end) at line $.\n");
538 }
539 chop ($_ = &q(<<"EOT"));
540: <<--#ifdef TSEEN
541: $subst && \$tflag++$cmd;
542: <<--#else
543: $subst$cmd;
544: <<--#endif
545EOT
546 next;
547 }
548
549 if (/^p/) {
550 $_ = 'print;';
551 next;
552 }
553
554 if (/^w/) {
555 s/^w[ \t]*//;
556 $fh = &make_filehandle($_);
557 $_ = "print $fh \$_;";
558 next;
559 }
560
561 if (/^r/) {
562 $appendseen++;
563 s/^r[ \t]*//;
564 $file = $_;
565 $_ = "\$atext .= `cat $file 2>/dev/null`;";
566 next;
567 }
568
569 if (/^P/) {
570 $_ = 'print $1 if /^(.*)/;';
571 next;
572 }
573
574 if (/^D/) {
575 chop($_ = &q(<<'EOT'));
576: s/^.*\n?//;
577: redo LINE if $_;
578: next LINE;
579EOT
580 $sawnext++;
581 next;
582 }
583
584 if (/^N/) {
585 chop($_ = &q(<<'EOT'));
586: $_ .= "\n";
587: $len1 = length;
588: $_ .= <>;
589: chop if $len1 < length;
590: <<--#ifdef TSEEN
591: $tflag = 0;
592: <<--#endif
593EOT
594 next;
595 }
596
597 if (/^h/) {
598 $_ = '$hold = $_;';
599 next;
600 }
601
602 if (/^H/) {
603 $_ = '$hold .= "\n"; $hold .= $_;';
604 next;
605 }
606
607 if (/^g/) {
608 $_ = '$_ = $hold;';
609 next;
610 }
611
612 if (/^G/) {
613 $_ = '$_ .= "\n"; $_ .= $hold;';
614 next;
615 }
616
617 if (/^x/) {
618 $_ = '($_, $hold) = ($hold, $_);';
619 next;
620 }
621
622 if (/^b$/) {
623 $_ = 'next LINE;';
624 $sawnext++;
625 next;
626 }
627
628 if (/^b/) {
629 s/^b[ \t]*//;
630 $lab = &make_label($_);
631 if ($lab eq $toplabel) {
632 $_ = 'redo LINE;';
633 } else {
634 $_ = "goto $lab;";
635 }
636 next;
637 }
638
639 if (/^t$/) {
640 $_ = 'next LINE if $tflag;';
641 $sawnext++;
642 $tseen++;
643 next;
644 }
645
646 if (/^t/) {
647 s/^t[ \t]*//;
648 $lab = &make_label($_);
649 $_ = q/if ($tflag) {$tflag = 0; /;
650 if ($lab eq $toplabel) {
651 $_ .= 'redo LINE;}';
652 } else {
653 $_ .= "goto $lab;}";
654 }
655 $tseen++;
656 next;
657 }
658
659 if (/^y/) {
660 s/abcdefghijklmnopqrstuvwxyz/a-z/g;
661 s/ABCDEFGHIJKLMNOPQRSTUVWXYZ/A-Z/g;
662 s/abcdef/a-f/g;
663 s/ABCDEF/A-F/g;
664 s/0123456789/0-9/g;
665 s/01234567/0-7/g;
666 $_ .= ';';
667 }
668
669 if (/^=/) {
670 $_ = 'print $.;';
671 next;
672 }
673
674 if (/^q/) {
675 chop($_ = &q(<<'EOT'));
676: close(ARGV);
677: @ARGV = ();
678: next LINE;
679EOT
680 $sawnext++;
681 next;
682 }
683 } continue {
684 if ($space) {
685 s/^/$space/;
686 s/(\n)(.)/$1$space$2/g;
687 }
688 last;
689 }
690 $_;
691}
692
693sub fetchpat {
694 local($outer) = @_;
695 local($addr) = $outer;
696 local($inbracket);
697 local($prefix,$delim,$ch);
698
699 # Process pattern one potential delimiter at a time.
700
701 DELIM: while (s#^([^\]+(|)[\\/]*)([]+(|)[\\/])##) {
702 $prefix = $1;
703 $delim = $2;
704 if ($delim eq '\\') {
705 s/(.)//;
706 $ch = $1;
707 $delim = '' if $ch =~ /^[(){}A-Za-mo-z]$/;
708 $ch = 'b' if $ch =~ /^[<>]$/;
709 $delim .= $ch;
710 }
711 elsif ($delim eq '[') {
712 $inbracket = 1;
713 s/^\^// && ($delim .= '^');
714 s/^]// && ($delim .= ']');
715 }
716 elsif ($delim eq ']') {
717 $inbracket = 0;
718 }
719 elsif ($inbracket || $delim ne $outer) {
720 $delim = '\\' . $delim;
721 }
722 $addr .= $prefix;
723 $addr .= $delim;
724 if ($delim eq $outer && !$inbracket) {
725 last DELIM;
726 }
727 }
728 $addr =~ s/\t/\\t/g;
729 &simplify($addr);
730 $addr;
731}
732
733sub q {
734 local($string) = @_;
735 local($*) = 1;
736 $string =~ s/^:\t?//g;
737 $string;
738}
739
740sub simplify {
741 $_[0] =~ s/_a-za-z0-9/\\w/ig;
742 $_[0] =~ s/a-z_a-z0-9/\\w/ig;
743 $_[0] =~ s/a-za-z_0-9/\\w/ig;
744 $_[0] =~ s/a-za-z0-9_/\\w/ig;
745 $_[0] =~ s/_0-9a-za-z/\\w/ig;
746 $_[0] =~ s/0-9_a-za-z/\\w/ig;
747 $_[0] =~ s/0-9a-z_a-z/\\w/ig;
748 $_[0] =~ s/0-9a-za-z_/\\w/ig;
749 $_[0] =~ s/\[\\w\]/\\w/g;
750 $_[0] =~ s/\[^\\w\]/\\W/g;
751 $_[0] =~ s/\[0-9\]/\\d/g;
752 $_[0] =~ s/\[^0-9\]/\\D/g;
753 $_[0] =~ s/\\d\\d\*/\\d+/g;
754 $_[0] =~ s/\\D\\D\*/\\D+/g;
755 $_[0] =~ s/\\w\\w\*/\\w+/g;
756 $_[0] =~ s/\\t\\t\*/\\t+/g;
757 $_[0] =~ s/(\[.[^]]*\])\1\*/$1+/g;
758 $_[0] =~ s/([\w\s!@#%^&-=,:;'"])\1\*/$1+/g;
759}
760