3 # Copyright (c) 1996-1998 Malcolm Beattie
5 # You may distribute under the terms of either the GNU General Public
6 # License or the Artistic License, as specified in the README file.
13 use B qw(minus_c main_cv main_root main_start comppadlist
14 class peekop walkoptree svref_2object cstring walksymtable
15 SVf_POK SVp_POK SVf_IOK SVp_IOK
17 use B::Asmdata qw(@optype @specialsv_name);
18 use B::Assembler qw(assemble_fh);
22 for ($i = 0; $i < @optype; $i++) {
23 $optype_enum{$optype[$i]} = $i;
26 # Following is SVf_POK|SVp_POK
27 # XXX Shouldn't be hardwired
28 sub POK () { SVf_POK|SVp_POK }
30 # Following is SVf_IOK|SVp_IOK
31 # XXX Shouldn't be hardwired
32 sub IOK () { SVf_IOK|SVp_IOK }
34 my ($verbose, $module_only, $no_assemble, $debug_bc, $debug_cv);
37 # Optimisation options. On the command line, use hyphens instead of
38 # underscores for compatibility with gcc-style options. We use
39 # underscores here because they are OK in (strict) barewords.
40 my ($strip_syntree, $compress_nullops, $omit_seq, $bypass_nullops);
41 my %optimise = (strip_syntax_tree => \$strip_syntree,
42 compress_nullops => \$compress_nullops,
43 omit_sequence_numbers => \$omit_seq,
44 bypass_nullops => \$bypass_nullops);
47 my %symtable; # maps object addresses to object indices.
48 # Filled in at allocation (newsv/newop) time.
49 my %saved; # maps object addresses (for SVish classes) to "saved yet?"
50 # flag. Set at FOO::bytecode time usually by SV::bytecode.
51 # Manipulated via saved(), mark_saved(), unmark_saved().
53 my $svix = -1; # we keep track of when the sv register contains an element
54 # of the object table to avoid unnecessary repeated
55 # consecutive ldsv instructions.
56 my $opix = -1; # Ditto for the op register.
97 return cstring($str . "\0");
103 sub saved { $saved{${$_[0]}} }
104 sub mark_saved { $saved{${$_[0]}} = 1 }
105 sub unmark_saved { $saved{${$_[0]}} = 0 }
107 sub debug { $debug_bc = shift }
111 warn sprintf("bytecode save method for %s (0x%x) not yet implemented\n",
116 # objix may stomp on the op register (for op objects)
117 # or the sv register (for SV objects)
119 sub B::OBJECT::objix {
121 my $ix = $symtable{$$obj};
125 $obj->newix($nextix);
126 return $symtable{$$obj} = $nextix++;
132 printf "newsv %d\t# %s\n", $sv->FLAGS & 0xf, class($sv);
138 my $gvname = $gv->NAME;
139 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
140 print "gv_fetchpv $name\n";
146 my $name = $hv->NAME;
149 printf "gv_stashpv %s\n", cstring($name);
152 # It's an ordinary HV. Fall back to ordinary newix method
153 $hv->B::SV::newix($ix);
157 sub B::SPECIAL::newix {
159 # Special case. $$sv is not the address of the SV but an
160 # index into svspecialsv_list.
161 printf "ldspecsv $$sv\t# %s\n", $specialsv_name[$$sv];
167 my $class = class($op);
168 my $typenum = $optype_enum{$class};
169 croak "OP::newix: can't understand class $class" unless defined($typenum);
170 print "newop $typenum\t# $class\n";
174 sub B::OP::walkoptree_debug {
176 warn(sprintf("walkoptree: %s\n", peekop($op)));
179 sub B::OP::bytecode {
181 my $next = $op->next;
183 my $sibix = $op->sibling->objix;
185 my $type = $op->type;
187 if ($bypass_nullops) {
188 $next = $next->next while $$next && $next->type == 0;
190 $nextix = $next->objix;
192 printf "# %s\n", peekop($op) if $debug_bc;
194 print "op_next $nextix\n";
195 print "op_sibling $sibix\n" unless $strip_syntree;
196 printf "op_type %s\t# %d\n", $op->ppaddr, $type;
197 printf("op_seq %d\n", $op->seq) unless $omit_seq;
198 if ($type || !$compress_nullops) {
199 printf "op_targ %d\nop_flags 0x%x\nop_private 0x%x\n",
200 $op->targ, $op->flags, $op->private;
204 sub B::UNOP::bytecode {
206 my $firstix = $op->first->objix;
207 $op->B::OP::bytecode;
208 if (($op->type || !$compress_nullops) && !$strip_syntree) {
209 print "op_first $firstix\n";
213 sub B::LOGOP::bytecode {
215 my $otherix = $op->other->objix;
216 $op->B::UNOP::bytecode;
217 print "op_other $otherix\n";
220 sub B::SVOP::bytecode {
223 my $svix = $sv->objix;
224 $op->B::OP::bytecode;
225 print "op_sv $svix\n";
229 sub B::GVOP::bytecode {
232 my $gvix = $gv->objix;
233 $op->B::OP::bytecode;
234 print "op_gv $gvix\n";
238 sub B::PVOP::bytecode {
241 $op->B::OP::bytecode;
243 # This would be easy except that OP_TRANS uses a PVOP to store an
244 # endian-dependent array of 256 shorts instead of a plain string.
246 if ($op->ppaddr eq "pp_trans") {
247 my @shorts = unpack("s256", $pv); # assembler handles endianness
248 print "op_pv_tr ", join(",", @shorts), "\n";
250 printf "newpv %s\nop_pv\n", pvstring($pv);
254 sub B::BINOP::bytecode {
256 my $lastix = $op->last->objix;
257 $op->B::UNOP::bytecode;
258 if (($op->type || !$compress_nullops) && !$strip_syntree) {
259 print "op_last $lastix\n";
263 sub B::CONDOP::bytecode {
265 my $trueix = $op->true->objix;
266 my $falseix = $op->false->objix;
267 $op->B::UNOP::bytecode;
268 print "op_true $trueix\nop_false $falseix\n";
271 sub B::LISTOP::bytecode {
273 my $children = $op->children;
274 $op->B::BINOP::bytecode;
275 if (($op->type || !$compress_nullops) && !$strip_syntree) {
276 print "op_children $children\n";
280 sub B::LOOP::bytecode {
282 my $redoopix = $op->redoop->objix;
283 my $nextopix = $op->nextop->objix;
284 my $lastopix = $op->lastop->objix;
285 $op->B::LISTOP::bytecode;
286 print "op_redoop $redoopix\nop_nextop $nextopix\nop_lastop $lastopix\n";
289 sub B::COP::bytecode {
291 my $stash = $op->stash;
292 my $stashix = $stash->objix;
293 my $filegv = $op->filegv;
294 my $filegvix = $filegv->objix;
295 my $line = $op->line;
297 printf "# line %s:%d\n", $filegv->SV->PV, $line;
299 $op->B::OP::bytecode;
300 printf <<"EOT", pvstring($op->label), $op->cop_seq, $op->arybase;
313 sub B::PMOP::bytecode {
315 my $replroot = $op->pmreplroot;
316 my $replrootix = $replroot->objix;
317 my $replstartix = $op->pmreplstart->objix;
318 my $ppaddr = $op->ppaddr;
319 # pmnext is corrupt in some PMOPs (see misc.t for example)
320 #my $pmnextix = $op->pmnext->objix;
323 # OP_PUSHRE (a mutated version of OP_MATCH for the regexp
324 # argument to a split) stores a GV in op_pmreplroot instead
325 # of a substitution syntax tree. We don't want to walk that...
326 if ($ppaddr eq "pp_pushre") {
329 walkoptree($replroot, "bytecode");
332 $op->B::LISTOP::bytecode;
333 if ($ppaddr eq "pp_pushre") {
334 printf "op_pmreplrootgv $replrootix\n";
336 print "op_pmreplroot $replrootix\nop_pmreplstart $replstartix\n";
338 my $re = pvstring($op->precomp);
339 # op_pmnext omitted since a perl bug means it's sometime corrupt
340 printf <<"EOT", $op->pmflags, $op->pmpermflags;
348 sub B::SV::bytecode {
350 return if saved($sv);
352 my $refcnt = $sv->REFCNT;
353 my $flags = sprintf("0x%x", $sv->FLAGS);
355 print "sv_refcnt $refcnt\nsv_flags $flags\n";
359 sub B::PV::bytecode {
361 return if saved($sv);
362 $sv->B::SV::bytecode;
363 printf("newpv %s\nxpv\n", pvstring($sv->PV)) if $sv->FLAGS & POK;
366 sub B::IV::bytecode {
368 return if saved($sv);
370 $sv->B::SV::bytecode;
371 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
374 sub B::NV::bytecode {
376 return if saved($sv);
377 $sv->B::SV::bytecode;
378 printf "xnv %s\n", $sv->NVX;
381 sub B::RV::bytecode {
383 return if saved($sv);
385 my $rvix = $rv->objix;
387 $sv->B::SV::bytecode;
391 sub B::PVIV::bytecode {
393 return if saved($sv);
395 $sv->B::PV::bytecode;
396 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
399 sub B::PVNV::bytecode {
400 my ($sv, $flag) = @_;
401 # The $flag argument is passed through PVMG::bytecode by BM::bytecode
402 # and AV::bytecode and indicates special handling. $flag = 1 is used by
403 # BM::bytecode and means that we should ensure we save the whole B-M
404 # table. It consists of 257 bytes (256 char array plus a final \0)
405 # which follow the ordinary PV+\0 and the 257 bytes are *not* reflected
406 # in SvCUR. $flag = 2 is used by AV::bytecode and means that we only
407 # call SV::bytecode instead of saving PV and calling NV::bytecode since
408 # PV/NV/IV stuff is different for AVs.
409 return if saved($sv);
411 $sv->B::SV::bytecode;
414 $sv->B::IV::bytecode;
415 printf "xnv %s\n", $sv->NVX;
417 $pv .= "\0" . $sv->TABLE;
418 printf "newpv %s\npv_cur %d\nxpv\n", pvstring($pv),length($pv)-257;
420 printf("newpv %s\nxpv\n", pvstring($pv)) if $sv->FLAGS & POK;
425 sub B::PVMG::bytecode {
426 my ($sv, $flag) = @_;
427 # See B::PVNV::bytecode for an explanation of $flag.
428 return if saved($sv);
429 # XXX We assume SvSTASH is already saved and don't save it later ourselves
430 my $stashix = $sv->SvSTASH->objix;
431 my @mgchain = $sv->MAGIC;
434 # We need to traverse the magic chain and get objix for each OBJ
435 # field *before* we do B::PVNV::bytecode since objix overwrites
436 # the sv register. However, we need to write the magic-saving
437 # bytecode *after* B::PVNV::bytecode since sv isn't initialised
438 # to refer to $sv until then.
440 @mgobjix = map($_->OBJ->objix, @mgchain);
441 $sv->B::PVNV::bytecode($flag);
442 print "xmg_stash $stashix\n";
443 foreach $mg (@mgchain) {
444 printf "sv_magic %s\nmg_obj %d\nnewpv %s\nmg_pv\n",
445 cstring($mg->TYPE), shift(@mgobjix), pvstring($mg->PTR);
449 sub B::PVLV::bytecode {
451 return if saved($sv);
452 $sv->B::PVMG::bytecode;
453 printf <<'EOT', $sv->TARGOFF, $sv->TARGLEN, cstring($sv->TYPE);
460 sub B::BM::bytecode {
462 return if saved($sv);
463 # See PVNV::bytecode for an explanation of what the argument does
464 $sv->B::PVMG::bytecode(1);
465 printf "xbm_useful %d\nxbm_previous %d\nxbm_rare %d\n",
466 $sv->USEFUL, $sv->PREVIOUS, $sv->RARE;
469 sub B::GV::bytecode {
471 return if saved($gv);
474 my $gvname = $gv->NAME;
475 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
477 my $egvix = $egv->objix;
479 printf <<"EOT", $gv->FLAGS, $gv->GvFLAGS, $gv->LINE;
484 my $refcnt = $gv->REFCNT;
485 printf("sv_refcnt_add %d\n", $refcnt - 1) if $refcnt > 1;
486 my $gvrefcnt = $gv->GvREFCNT;
487 printf("gp_refcnt_add %d\n", $gvrefcnt - 1) if $gvrefcnt > 1;
488 if ($gvrefcnt > 1 && $ix != $egvix) {
489 print "gp_share $egvix\n";
491 if ($gvname !~ /^([^A-Za-z]|STDIN|STDOUT|STDERR|ARGV|SIG|ENV)$/) {
493 my @subfield_names = qw(SV AV HV CV FILEGV FORM IO);
494 my @subfields = map($gv->$_(), @subfield_names);
495 my @ixes = map($_->objix, @subfields);
496 # Reset sv register for $gv
498 for ($i = 0; $i < @ixes; $i++) {
499 printf "gp_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
501 # Now save all the subfields
503 foreach $sv (@subfields) {
510 sub B::HV::bytecode {
512 return if saved($hv);
514 my $name = $hv->NAME;
517 # It's an ordinary HV. Stashes have NAME set and need no further
518 # saving beyond the gv_stashpv that $hv->objix already ensures.
519 my @contents = $hv->ARRAY;
521 for ($i = 1; $i < @contents; $i += 2) {
522 push(@ixes, $contents[$i]->objix);
524 for ($i = 1; $i < @contents; $i += 2) {
525 $contents[$i]->bytecode;
528 for ($i = 0; $i < @contents; $i += 2) {
529 printf("newpv %s\nhv_store %d\n",
530 pvstring($contents[$i]), $ixes[$i / 2]);
532 printf "sv_refcnt %d\nsv_flags 0x%x\n", $hv->REFCNT, $hv->FLAGS;
536 sub B::AV::bytecode {
538 return if saved($av);
540 my $fill = $av->FILL;
545 @ixes = map($_->objix, @array);
547 foreach $sv (@array) {
551 # See PVNV::bytecode for the meaning of the flag argument of 2.
552 $av->B::PVMG::bytecode(2);
553 # Recover sv register and set AvMAX and AvFILL to -1 (since we
554 # create an AV with NEWSV and SvUPGRADE rather than doing newAV
555 # which is what sets AvMAX and AvFILL.
557 printf "xav_flags 0x%x\nxav_max -1\nxav_fill -1\n", $av->AvFLAGS;
560 foreach $elix (@ixes) {
561 print "av_push $elix\n";
565 print "av_extend $max\n";
570 sub B::CV::bytecode {
572 return if saved($cv);
574 $cv->B::PVMG::bytecode;
576 my @subfield_names = qw(ROOT START STASH GV FILEGV PADLIST OUTSIDE);
577 my @subfields = map($cv->$_(), @subfield_names);
578 my @ixes = map($_->objix, @subfields);
579 # Save OP tree from CvROOT (first element of @subfields)
580 my $root = shift @subfields;
582 walkoptree($root, "bytecode");
584 # Reset sv register for $cv (since above ->objix calls stomped on it)
586 for ($i = 0; $i < @ixes; $i++) {
587 printf "xcv_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
589 printf "xcv_depth %d\nxcv_flags 0x%x\n", $cv->DEPTH, $cv->FLAGS;
590 # Now save all the subfields (except for CvROOT which was handled
591 # above) and CvSTART (now the initial element of @subfields).
592 shift @subfields; # bye-bye CvSTART
594 foreach $sv (@subfields) {
599 sub B::IO::bytecode {
601 return if saved($io);
603 my $top_gv = $io->TOP_GV;
604 my $top_gvix = $top_gv->objix;
605 my $fmt_gv = $io->FMT_GV;
606 my $fmt_gvix = $fmt_gv->objix;
607 my $bottom_gv = $io->BOTTOM_GV;
608 my $bottom_gvix = $bottom_gv->objix;
610 $io->B::PVMG::bytecode;
612 print "xio_top_gv $top_gvix\n";
613 print "xio_fmt_gv $fmt_gvix\n";
614 print "xio_bottom_gv $bottom_gvix\n";
616 foreach $field (qw(TOP_NAME FMT_NAME BOTTOM_NAME)) {
617 printf "newpv %s\nxio_%s\n", pvstring($io->$field()), lc($field);
619 foreach $field (qw(LINES PAGE PAGE_LEN LINES_LEFT SUBPROCESS)) {
620 printf "xio_%s %d\n", lc($field), $io->$field();
622 printf "xio_type %s\nxio_flags 0x%x\n", cstring($io->IoTYPE), $io->IoFLAGS;
625 $bottom_gv->bytecode;
628 sub B::SPECIAL::bytecode {
629 # nothing extra needs doing
632 sub bytecompile_object {
635 svref_2object($sv)->bytecode;
639 sub B::GV::bytecodecv {
642 if ($$cv && !saved($cv)) {
644 warn sprintf("saving extra CV &%s::%s (0x%x) from GV 0x%x\n",
645 $gv->STASH->NAME, $gv->NAME, $$cv, $$gv);
651 sub bytecompile_main {
652 my $curpad = (comppadlist->ARRAY)[1];
653 my $curpadix = $curpad->objix;
655 walkoptree(main_root, "bytecode");
656 warn "done main program, now walking symbol table\n" if $debug_bc;
657 my ($pack, %exclude);
658 foreach $pack (qw(B O AutoLoader DynaLoader Config DB VMS strict vars
659 FileHandle Exporter Carp UNIVERSAL IO Fcntl Symbol
660 SelectSaver blib Cwd))
662 $exclude{$pack."::"} = 1;
664 no strict qw(vars refs);
665 walksymtable(\%{"main::"}, "bytecodecv", sub {
666 warn "considering $_[0]\n" if $debug_bc;
667 return !defined($exclude{$_[0]});
670 printf "main_root %d\n", main_root->objix;
671 printf "main_start %d\n", main_start->objix;
672 printf "curpad $curpadix\n";
673 # XXX Do min_intro_pending and max_intro_pending matter?
677 sub prepare_assemble {
678 my $newfh = IO::File->new_tmpfile;
686 seek($fh, 0, 0); # rewind the temporary file
687 assemble_fh($fh, sub { print OUT @_ });
692 my ($option, $opt, $arg);
693 open(OUT, ">&STDOUT");
697 while ($option = shift @options) {
698 if ($option =~ /^-(.)(.*)/) {
702 unshift @options, $option;
705 if ($opt eq "-" && $arg eq "-") {
708 } elsif ($opt eq "o") {
709 $arg ||= shift @options;
710 open(OUT, ">$arg") or return "$arg: $!\n";
712 } elsif ($opt eq "D") {
713 $arg ||= shift @options;
714 foreach $arg (split(//, $arg)) {
718 } elsif ($arg eq "o") {
720 } elsif ($arg eq "a") {
721 B::Assembler::debug(1);
722 } elsif ($arg eq "C") {
726 } elsif ($opt eq "v") {
728 } elsif ($opt eq "m") {
730 } elsif ($opt eq "S") {
732 } elsif ($opt eq "f") {
733 $arg ||= shift @options;
734 my $value = $arg !~ s/^no-//;
736 my $ref = $optimise{$arg};
740 warn qq(ignoring unknown optimisation option "$arg"\n);
742 } elsif ($opt eq "O") {
743 $arg = 1 if $arg eq "";
745 foreach $ref (values %optimise) {
755 $compress_nullops = 1;
764 $newfh = prepare_assemble() unless $no_assemble;
765 foreach $objname (@options) {
766 eval "bytecompile_object(\\$objname)";
768 do_assemble($newfh) unless $no_assemble;
773 $newfh = prepare_assemble() unless $no_assemble;
775 do_assemble($newfh) unless $no_assemble;
786 B::Bytecode - Perl compiler's bytecode backend
790 perl -MO=Bytecode[,OPTIONS] foo.pl
794 This compiler backend takes Perl source and generates a
795 platform-independent bytecode encapsulating code to load the
796 internal structures perl uses to run your program. When the
797 generated bytecode is loaded in, your program is ready to run,
798 reducing the time which perl would have taken to load and parse
799 your program into its internal semi-compiled form. That means that
800 compiling with this backend will not help improve the runtime
801 execution speed of your program but may improve the start-up time.
802 Depending on the environment in which your program runs this may
803 or may not be a help.
805 The resulting bytecode can be run with a special byteperl executable
806 or (for non-main programs) be loaded via the C<byteload_fh> function
811 If there are any non-option arguments, they are taken to be names of
812 objects to be saved (probably doesn't work properly yet). Without
813 extra arguments, it saves the main program.
819 Output to filename instead of STDOUT.
823 Force end of options.
827 Force optimisations on or off one at a time. Each can be preceded
828 by B<no-> to turn the option off (e.g. B<-fno-compress-nullops>).
830 =item B<-fcompress-nullops>
832 Only fills in the necessary fields of ops which have
833 been optimised away by perl's internal compiler.
835 =item B<-fomit-sequence-numbers>
837 Leaves out code to fill in the op_seq field of all ops
838 which is only used by perl's internal compiler.
840 =item B<-fbypass-nullops>
842 If op->op_next ever points to a NULLOP, replaces the op_next field
843 with the first non-NULLOP in the path of execution.
845 =item B<-fstrip-syntax-tree>
847 Leaves out code to fill in the pointers which link the internal syntax
848 tree together. They're not needed at run-time but leaving them out
849 will make it impossible to recompile or disassemble the resulting
850 program. It will also stop C<goto label> statements from working.
854 Optimisation level (n = 0, 1, 2, ...). B<-O> means B<-O1>.
855 B<-O1> sets B<-fcompress-nullops> B<-fomit-sequence numbers>.
856 B<-O6> adds B<-fstrip-syntax-tree>.
860 Debug options (concatenated or separate flags like C<perl -D>).
864 Prints each OP as it's processed.
868 Print debugging information about bytecompiler progress.
872 Tells the (bytecode) assembler to include source assembler lines
873 in its output as bytecode comments.
877 Prints each CV taken from the final symbol tree walk.
881 Output (bytecode) assembler source rather than piping it
882 through the assembler and outputting bytecode.
886 Compile as a module rather than a standalone program. Currently this
887 just means that the bytecodes for initialising C<main_start>,
888 C<main_root> and C<curpad> are omitted.
894 perl -MO=Bytecode,-O6,-o,foo.plc foo.pl
896 perl -MO=Bytecode,-S foo.pl > foo.S
897 assemble foo.S > foo.plc
900 perl -MO=Bytecode,-m,-oFoo.pmc Foo.pm
904 Plenty. Current status: experimental.
908 Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>