3 # Copyright (c) 1996-1998 Malcolm Beattie
5 # You may distribute under the terms of either the GNU General Public
6 # License or the Artistic License, as specified in the README file.
13 use B qw(minus_c main_cv main_root main_start comppadlist
14 class peekop walkoptree svref_2object cstring walksymtable
15 SVf_POK SVp_POK SVf_IOK SVp_IOK
17 use B::Asmdata qw(@optype @specialsv_name);
18 use B::Assembler qw(assemble_fh);
22 for ($i = 0; $i < @optype; $i++) {
23 $optype_enum{$optype[$i]} = $i;
26 # Following is SVf_POK|SVp_POK
27 # XXX Shouldn't be hardwired
28 sub POK () { SVf_POK|SVp_POK }
30 # Following is SVf_IOK|SVp_IOK
31 # XXX Shouldn't be hardwired
32 sub IOK () { SVf_IOK|SVp_IOK }
34 my ($verbose, $module_only, $no_assemble, $debug_bc, $debug_cv);
37 # Optimisation options. On the command line, use hyphens instead of
38 # underscores for compatibility with gcc-style options. We use
39 # underscores here because they are OK in (strict) barewords.
40 my ($strip_syntree, $compress_nullops, $omit_seq, $bypass_nullops);
41 my %optimise = (strip_syntax_tree => \$strip_syntree,
42 compress_nullops => \$compress_nullops,
43 omit_sequence_numbers => \$omit_seq,
44 bypass_nullops => \$bypass_nullops);
47 my %symtable; # maps object addresses to object indices.
48 # Filled in at allocation (newsv/newop) time.
49 my %saved; # maps object addresses (for SVish classes) to "saved yet?"
50 # flag. Set at FOO::bytecode time usually by SV::bytecode.
51 # Manipulated via saved(), mark_saved(), unmark_saved().
53 my $svix = -1; # we keep track of when the sv register contains an element
54 # of the object table to avoid unnecessary repeated
55 # consecutive ldsv instructions.
56 my $opix = -1; # Ditto for the op register.
97 return cstring($str . "\0");
103 sub saved { $saved{${$_[0]}} }
104 sub mark_saved { $saved{${$_[0]}} = 1 }
105 sub unmark_saved { $saved{${$_[0]}} = 0 }
107 sub debug { $debug_bc = shift }
111 warn sprintf("bytecode save method for %s (0x%x) not yet implemented\n",
116 # objix may stomp on the op register (for op objects)
117 # or the sv register (for SV objects)
119 sub B::OBJECT::objix {
121 my $ix = $symtable{$$obj};
125 $obj->newix($nextix);
126 return $symtable{$$obj} = $nextix++;
132 printf "newsv %d\t# %s\n", $sv->FLAGS & 0xf, class($sv);
138 my $gvname = $gv->NAME;
139 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
140 print "gv_fetchpv $name\n";
146 my $name = $hv->NAME;
149 printf "gv_stashpv %s\n", cstring($name);
152 # It's an ordinary HV. Fall back to ordinary newix method
153 $hv->B::SV::newix($ix);
157 sub B::SPECIAL::newix {
159 # Special case. $$sv is not the address of the SV but an
160 # index into svspecialsv_list.
161 printf "ldspecsv $$sv\t# %s\n", $specialsv_name[$$sv];
167 my $class = class($op);
168 my $typenum = $optype_enum{$class};
169 croak "OP::newix: can't understand class $class" unless defined($typenum);
170 print "newop $typenum\t# $class\n";
174 sub B::OP::walkoptree_debug {
176 warn(sprintf("walkoptree: %s\n", peekop($op)));
179 sub B::OP::bytecode {
181 my $next = $op->next;
183 my $sibix = $op->sibling->objix;
185 my $type = $op->type;
187 if ($bypass_nullops) {
188 $next = $next->next while $$next && $next->type == 0;
190 $nextix = $next->objix;
192 printf "# %s\n", peekop($op) if $debug_bc;
194 print "op_next $nextix\n";
195 print "op_sibling $sibix\n" unless $strip_syntree;
196 printf "op_type %s\t# %d\n", $op->ppaddr, $type;
197 printf("op_seq %d\n", $op->seq) unless $omit_seq;
198 if ($type || !$compress_nullops) {
199 printf "op_targ %d\nop_flags 0x%x\nop_private 0x%x\n",
200 $op->targ, $op->flags, $op->private;
204 sub B::UNOP::bytecode {
206 my $firstix = $op->first->objix;
207 $op->B::OP::bytecode;
208 if (($op->type || !$compress_nullops) && !$strip_syntree) {
209 print "op_first $firstix\n";
213 sub B::LOGOP::bytecode {
215 my $otherix = $op->other->objix;
216 $op->B::UNOP::bytecode;
217 print "op_other $otherix\n";
220 sub B::SVOP::bytecode {
223 my $svix = $sv->objix;
224 $op->B::OP::bytecode;
225 print "op_sv $svix\n";
229 sub B::GVOP::bytecode {
232 my $gvix = $gv->objix;
233 $op->B::OP::bytecode;
234 print "op_gv $gvix\n";
238 sub B::PVOP::bytecode {
241 $op->B::OP::bytecode;
243 # This would be easy except that OP_TRANS uses a PVOP to store an
244 # endian-dependent array of 256 shorts instead of a plain string.
246 if ($op->ppaddr eq "pp_trans") {
247 my @shorts = unpack("s256", $pv); # assembler handles endianness
248 print "op_pv_tr ", join(",", @shorts), "\n";
250 printf "newpv %s\nop_pv\n", pvstring($pv);
254 sub B::BINOP::bytecode {
256 my $lastix = $op->last->objix;
257 $op->B::UNOP::bytecode;
258 if (($op->type || !$compress_nullops) && !$strip_syntree) {
259 print "op_last $lastix\n";
263 sub B::CONDOP::bytecode {
265 my $trueix = $op->true->objix;
266 my $falseix = $op->false->objix;
267 $op->B::UNOP::bytecode;
268 print "op_true $trueix\nop_false $falseix\n";
271 sub B::LISTOP::bytecode {
273 my $children = $op->children;
274 $op->B::BINOP::bytecode;
275 if (($op->type || !$compress_nullops) && !$strip_syntree) {
276 print "op_children $children\n";
280 sub B::LOOP::bytecode {
282 my $redoopix = $op->redoop->objix;
283 my $nextopix = $op->nextop->objix;
284 my $lastopix = $op->lastop->objix;
285 $op->B::LISTOP::bytecode;
286 print "op_redoop $redoopix\nop_nextop $nextopix\nop_lastop $lastopix\n";
289 sub B::COP::bytecode {
291 my $stash = $op->stash;
292 my $stashix = $stash->objix;
293 my $filegv = $op->filegv;
294 my $filegvix = $filegv->objix;
295 my $line = $op->line;
296 my $warnings = $op->warnings;
297 my $warningsix = $warnings->objix;
299 printf "# line %s:%d\n", $filegv->SV->PV, $line;
301 $op->B::OP::bytecode;
302 printf <<"EOT", pvstring($op->label), $op->cop_seq, $op->arybase;
310 cop_warnings $warningsix
316 sub B::PMOP::bytecode {
318 my $replroot = $op->pmreplroot;
319 my $replrootix = $replroot->objix;
320 my $replstartix = $op->pmreplstart->objix;
321 my $ppaddr = $op->ppaddr;
322 # pmnext is corrupt in some PMOPs (see misc.t for example)
323 #my $pmnextix = $op->pmnext->objix;
326 # OP_PUSHRE (a mutated version of OP_MATCH for the regexp
327 # argument to a split) stores a GV in op_pmreplroot instead
328 # of a substitution syntax tree. We don't want to walk that...
329 if ($ppaddr eq "pp_pushre") {
332 walkoptree($replroot, "bytecode");
335 $op->B::LISTOP::bytecode;
336 if ($ppaddr eq "pp_pushre") {
337 printf "op_pmreplrootgv $replrootix\n";
339 print "op_pmreplroot $replrootix\nop_pmreplstart $replstartix\n";
341 my $re = pvstring($op->precomp);
342 # op_pmnext omitted since a perl bug means it's sometime corrupt
343 printf <<"EOT", $op->pmflags, $op->pmpermflags;
351 sub B::SV::bytecode {
353 return if saved($sv);
355 my $refcnt = $sv->REFCNT;
356 my $flags = sprintf("0x%x", $sv->FLAGS);
358 print "sv_refcnt $refcnt\nsv_flags $flags\n";
362 sub B::PV::bytecode {
364 return if saved($sv);
365 $sv->B::SV::bytecode;
366 printf("newpv %s\nxpv\n", pvstring($sv->PV)) if $sv->FLAGS & POK;
369 sub B::IV::bytecode {
371 return if saved($sv);
373 $sv->B::SV::bytecode;
374 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
377 sub B::NV::bytecode {
379 return if saved($sv);
380 $sv->B::SV::bytecode;
381 printf "xnv %s\n", $sv->NVX;
384 sub B::RV::bytecode {
386 return if saved($sv);
388 my $rvix = $rv->objix;
390 $sv->B::SV::bytecode;
394 sub B::PVIV::bytecode {
396 return if saved($sv);
398 $sv->B::PV::bytecode;
399 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
402 sub B::PVNV::bytecode {
403 my ($sv, $flag) = @_;
404 # The $flag argument is passed through PVMG::bytecode by BM::bytecode
405 # and AV::bytecode and indicates special handling. $flag = 1 is used by
406 # BM::bytecode and means that we should ensure we save the whole B-M
407 # table. It consists of 257 bytes (256 char array plus a final \0)
408 # which follow the ordinary PV+\0 and the 257 bytes are *not* reflected
409 # in SvCUR. $flag = 2 is used by AV::bytecode and means that we only
410 # call SV::bytecode instead of saving PV and calling NV::bytecode since
411 # PV/NV/IV stuff is different for AVs.
412 return if saved($sv);
414 $sv->B::SV::bytecode;
417 $sv->B::IV::bytecode;
418 printf "xnv %s\n", $sv->NVX;
420 $pv .= "\0" . $sv->TABLE;
421 printf "newpv %s\npv_cur %d\nxpv\n", pvstring($pv),length($pv)-257;
423 printf("newpv %s\nxpv\n", pvstring($pv)) if $sv->FLAGS & POK;
428 sub B::PVMG::bytecode {
429 my ($sv, $flag) = @_;
430 # See B::PVNV::bytecode for an explanation of $flag.
431 return if saved($sv);
432 # XXX We assume SvSTASH is already saved and don't save it later ourselves
433 my $stashix = $sv->SvSTASH->objix;
434 my @mgchain = $sv->MAGIC;
437 # We need to traverse the magic chain and get objix for each OBJ
438 # field *before* we do B::PVNV::bytecode since objix overwrites
439 # the sv register. However, we need to write the magic-saving
440 # bytecode *after* B::PVNV::bytecode since sv isn't initialised
441 # to refer to $sv until then.
443 @mgobjix = map($_->OBJ->objix, @mgchain);
444 $sv->B::PVNV::bytecode($flag);
445 print "xmg_stash $stashix\n";
446 foreach $mg (@mgchain) {
447 printf "sv_magic %s\nmg_obj %d\nnewpv %s\nmg_pv\n",
448 cstring($mg->TYPE), shift(@mgobjix), pvstring($mg->PTR);
452 sub B::PVLV::bytecode {
454 return if saved($sv);
455 $sv->B::PVMG::bytecode;
456 printf <<'EOT', $sv->TARGOFF, $sv->TARGLEN, cstring($sv->TYPE);
463 sub B::BM::bytecode {
465 return if saved($sv);
466 # See PVNV::bytecode for an explanation of what the argument does
467 $sv->B::PVMG::bytecode(1);
468 printf "xbm_useful %d\nxbm_previous %d\nxbm_rare %d\n",
469 $sv->USEFUL, $sv->PREVIOUS, $sv->RARE;
472 sub B::GV::bytecode {
474 return if saved($gv);
477 my $gvname = $gv->NAME;
478 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
480 my $egvix = $egv->objix;
482 printf <<"EOT", $gv->FLAGS, $gv->GvFLAGS, $gv->LINE;
487 my $refcnt = $gv->REFCNT;
488 printf("sv_refcnt_add %d\n", $refcnt - 1) if $refcnt > 1;
489 my $gvrefcnt = $gv->GvREFCNT;
490 printf("gp_refcnt_add %d\n", $gvrefcnt - 1) if $gvrefcnt > 1;
491 if ($gvrefcnt > 1 && $ix != $egvix) {
492 print "gp_share $egvix\n";
494 if ($gvname !~ /^([^A-Za-z]|STDIN|STDOUT|STDERR|ARGV|SIG|ENV)$/) {
496 my @subfield_names = qw(SV AV HV CV FILEGV FORM IO);
497 my @subfields = map($gv->$_(), @subfield_names);
498 my @ixes = map($_->objix, @subfields);
499 # Reset sv register for $gv
501 for ($i = 0; $i < @ixes; $i++) {
502 printf "gp_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
504 # Now save all the subfields
506 foreach $sv (@subfields) {
513 sub B::HV::bytecode {
515 return if saved($hv);
517 my $name = $hv->NAME;
520 # It's an ordinary HV. Stashes have NAME set and need no further
521 # saving beyond the gv_stashpv that $hv->objix already ensures.
522 my @contents = $hv->ARRAY;
524 for ($i = 1; $i < @contents; $i += 2) {
525 push(@ixes, $contents[$i]->objix);
527 for ($i = 1; $i < @contents; $i += 2) {
528 $contents[$i]->bytecode;
531 for ($i = 0; $i < @contents; $i += 2) {
532 printf("newpv %s\nhv_store %d\n",
533 pvstring($contents[$i]), $ixes[$i / 2]);
535 printf "sv_refcnt %d\nsv_flags 0x%x\n", $hv->REFCNT, $hv->FLAGS;
539 sub B::AV::bytecode {
541 return if saved($av);
543 my $fill = $av->FILL;
548 @ixes = map($_->objix, @array);
550 foreach $sv (@array) {
554 # See PVNV::bytecode for the meaning of the flag argument of 2.
555 $av->B::PVMG::bytecode(2);
556 # Recover sv register and set AvMAX and AvFILL to -1 (since we
557 # create an AV with NEWSV and SvUPGRADE rather than doing newAV
558 # which is what sets AvMAX and AvFILL.
560 printf "xav_flags 0x%x\nxav_max -1\nxav_fill -1\n", $av->AvFLAGS;
563 foreach $elix (@ixes) {
564 print "av_push $elix\n";
568 print "av_extend $max\n";
573 sub B::CV::bytecode {
575 return if saved($cv);
577 $cv->B::PVMG::bytecode;
579 my @subfield_names = qw(ROOT START STASH GV FILEGV PADLIST OUTSIDE);
580 my @subfields = map($cv->$_(), @subfield_names);
581 my @ixes = map($_->objix, @subfields);
582 # Save OP tree from CvROOT (first element of @subfields)
583 my $root = shift @subfields;
585 walkoptree($root, "bytecode");
587 # Reset sv register for $cv (since above ->objix calls stomped on it)
589 for ($i = 0; $i < @ixes; $i++) {
590 printf "xcv_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
592 printf "xcv_depth %d\nxcv_flags 0x%x\n", $cv->DEPTH, $cv->FLAGS;
593 # Now save all the subfields (except for CvROOT which was handled
594 # above) and CvSTART (now the initial element of @subfields).
595 shift @subfields; # bye-bye CvSTART
597 foreach $sv (@subfields) {
602 sub B::IO::bytecode {
604 return if saved($io);
606 my $top_gv = $io->TOP_GV;
607 my $top_gvix = $top_gv->objix;
608 my $fmt_gv = $io->FMT_GV;
609 my $fmt_gvix = $fmt_gv->objix;
610 my $bottom_gv = $io->BOTTOM_GV;
611 my $bottom_gvix = $bottom_gv->objix;
613 $io->B::PVMG::bytecode;
615 print "xio_top_gv $top_gvix\n";
616 print "xio_fmt_gv $fmt_gvix\n";
617 print "xio_bottom_gv $bottom_gvix\n";
619 foreach $field (qw(TOP_NAME FMT_NAME BOTTOM_NAME)) {
620 printf "newpv %s\nxio_%s\n", pvstring($io->$field()), lc($field);
622 foreach $field (qw(LINES PAGE PAGE_LEN LINES_LEFT SUBPROCESS)) {
623 printf "xio_%s %d\n", lc($field), $io->$field();
625 printf "xio_type %s\nxio_flags 0x%x\n", cstring($io->IoTYPE), $io->IoFLAGS;
628 $bottom_gv->bytecode;
631 sub B::SPECIAL::bytecode {
632 # nothing extra needs doing
635 sub bytecompile_object {
638 svref_2object($sv)->bytecode;
642 sub B::GV::bytecodecv {
645 if ($$cv && !saved($cv)) {
647 warn sprintf("saving extra CV &%s::%s (0x%x) from GV 0x%x\n",
648 $gv->STASH->NAME, $gv->NAME, $$cv, $$gv);
654 sub bytecompile_main {
655 my $curpad = (comppadlist->ARRAY)[1];
656 my $curpadix = $curpad->objix;
658 walkoptree(main_root, "bytecode");
659 warn "done main program, now walking symbol table\n" if $debug_bc;
660 my ($pack, %exclude);
661 foreach $pack (qw(B O AutoLoader DynaLoader Config DB VMS strict vars
662 FileHandle Exporter Carp UNIVERSAL IO Fcntl Symbol
663 SelectSaver blib Cwd))
665 $exclude{$pack."::"} = 1;
667 no strict qw(vars refs);
668 walksymtable(\%{"main::"}, "bytecodecv", sub {
669 warn "considering $_[0]\n" if $debug_bc;
670 return !defined($exclude{$_[0]});
673 printf "main_root %d\n", main_root->objix;
674 printf "main_start %d\n", main_start->objix;
675 printf "curpad $curpadix\n";
676 # XXX Do min_intro_pending and max_intro_pending matter?
680 sub prepare_assemble {
681 my $newfh = IO::File->new_tmpfile;
689 seek($fh, 0, 0); # rewind the temporary file
690 assemble_fh($fh, sub { print OUT @_ });
695 my ($option, $opt, $arg);
696 open(OUT, ">&STDOUT");
700 while ($option = shift @options) {
701 if ($option =~ /^-(.)(.*)/) {
705 unshift @options, $option;
708 if ($opt eq "-" && $arg eq "-") {
711 } elsif ($opt eq "o") {
712 $arg ||= shift @options;
713 open(OUT, ">$arg") or return "$arg: $!\n";
715 } elsif ($opt eq "D") {
716 $arg ||= shift @options;
717 foreach $arg (split(//, $arg)) {
721 } elsif ($arg eq "o") {
723 } elsif ($arg eq "a") {
724 B::Assembler::debug(1);
725 } elsif ($arg eq "C") {
729 } elsif ($opt eq "v") {
731 } elsif ($opt eq "m") {
733 } elsif ($opt eq "S") {
735 } elsif ($opt eq "f") {
736 $arg ||= shift @options;
737 my $value = $arg !~ s/^no-//;
739 my $ref = $optimise{$arg};
743 warn qq(ignoring unknown optimisation option "$arg"\n);
745 } elsif ($opt eq "O") {
746 $arg = 1 if $arg eq "";
748 foreach $ref (values %optimise) {
758 $compress_nullops = 1;
767 $newfh = prepare_assemble() unless $no_assemble;
768 foreach $objname (@options) {
769 eval "bytecompile_object(\\$objname)";
771 do_assemble($newfh) unless $no_assemble;
776 $newfh = prepare_assemble() unless $no_assemble;
778 do_assemble($newfh) unless $no_assemble;
789 B::Bytecode - Perl compiler's bytecode backend
793 perl -MO=Bytecode[,OPTIONS] foo.pl
797 This compiler backend takes Perl source and generates a
798 platform-independent bytecode encapsulating code to load the
799 internal structures perl uses to run your program. When the
800 generated bytecode is loaded in, your program is ready to run,
801 reducing the time which perl would have taken to load and parse
802 your program into its internal semi-compiled form. That means that
803 compiling with this backend will not help improve the runtime
804 execution speed of your program but may improve the start-up time.
805 Depending on the environment in which your program runs this may
806 or may not be a help.
808 The resulting bytecode can be run with a special byteperl executable
809 or (for non-main programs) be loaded via the C<byteload_fh> function
814 If there are any non-option arguments, they are taken to be names of
815 objects to be saved (probably doesn't work properly yet). Without
816 extra arguments, it saves the main program.
822 Output to filename instead of STDOUT.
826 Force end of options.
830 Force optimisations on or off one at a time. Each can be preceded
831 by B<no-> to turn the option off (e.g. B<-fno-compress-nullops>).
833 =item B<-fcompress-nullops>
835 Only fills in the necessary fields of ops which have
836 been optimised away by perl's internal compiler.
838 =item B<-fomit-sequence-numbers>
840 Leaves out code to fill in the op_seq field of all ops
841 which is only used by perl's internal compiler.
843 =item B<-fbypass-nullops>
845 If op->op_next ever points to a NULLOP, replaces the op_next field
846 with the first non-NULLOP in the path of execution.
848 =item B<-fstrip-syntax-tree>
850 Leaves out code to fill in the pointers which link the internal syntax
851 tree together. They're not needed at run-time but leaving them out
852 will make it impossible to recompile or disassemble the resulting
853 program. It will also stop C<goto label> statements from working.
857 Optimisation level (n = 0, 1, 2, ...). B<-O> means B<-O1>.
858 B<-O1> sets B<-fcompress-nullops> B<-fomit-sequence numbers>.
859 B<-O6> adds B<-fstrip-syntax-tree>.
863 Debug options (concatenated or separate flags like C<perl -D>).
867 Prints each OP as it's processed.
871 Print debugging information about bytecompiler progress.
875 Tells the (bytecode) assembler to include source assembler lines
876 in its output as bytecode comments.
880 Prints each CV taken from the final symbol tree walk.
884 Output (bytecode) assembler source rather than piping it
885 through the assembler and outputting bytecode.
889 Compile as a module rather than a standalone program. Currently this
890 just means that the bytecodes for initialising C<main_start>,
891 C<main_root> and C<curpad> are omitted.
897 perl -MO=Bytecode,-O6,-o,foo.plc foo.pl
899 perl -MO=Bytecode,-S foo.pl > foo.S
900 assemble foo.S > foo.plc
903 perl -MO=Bytecode,-m,-oFoo.pmc Foo.pm
907 Plenty. Current status: experimental.
911 Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>