3 # Copyright (c) 1996-1998 Malcolm Beattie
5 # You may distribute under the terms of either the GNU General Public
6 # License or the Artistic License, as specified in the README file.
13 use B qw(minus_c main_cv main_root main_start comppadlist
14 class peekop walkoptree svref_2object cstring walksymtable);
15 use B::Asmdata qw(@optype @specialsv_name);
16 use B::Assembler qw(assemble_fh);
20 for ($i = 0; $i < @optype; $i++) {
21 $optype_enum{$optype[$i]} = $i;
24 # Following is SVf_POK|SVp_POK
25 # XXX Shouldn't be hardwired
26 sub POK () { 0x04040000 }
28 # Following is SVf_IOK|SVp_OK
29 # XXX Shouldn't be hardwired
30 sub IOK () { 0x01010000 }
32 my ($verbose, $module_only, $no_assemble, $debug_bc, $debug_cv);
35 # Optimisation options. On the command line, use hyphens instead of
36 # underscores for compatibility with gcc-style options. We use
37 # underscores here because they are OK in (strict) barewords.
38 my ($strip_syntree, $compress_nullops, $omit_seq, $bypass_nullops);
39 my %optimise = (strip_syntax_tree => \$strip_syntree,
40 compress_nullops => \$compress_nullops,
41 omit_sequence_numbers => \$omit_seq,
42 bypass_nullops => \$bypass_nullops);
45 my %symtable; # maps object addresses to object indices.
46 # Filled in at allocation (newsv/newop) time.
47 my %saved; # maps object addresses (for SVish classes) to "saved yet?"
48 # flag. Set at FOO::bytecode time usually by SV::bytecode.
49 # Manipulated via saved(), mark_saved(), unmark_saved().
51 my $svix = -1; # we keep track of when the sv register contains an element
52 # of the object table to avoid unnecessary repeated
53 # consecutive ldsv instructions.
54 my $opix = -1; # Ditto for the op register.
95 return cstring($str . "\0");
101 sub saved { $saved{${$_[0]}} }
102 sub mark_saved { $saved{${$_[0]}} = 1 }
103 sub unmark_saved { $saved{${$_[0]}} = 0 }
105 sub debug { $debug_bc = shift }
109 warn sprintf("bytecode save method for %s (0x%x) not yet implemented\n",
114 # objix may stomp on the op register (for op objects)
115 # or the sv register (for SV objects)
117 sub B::OBJECT::objix {
119 my $ix = $symtable{$$obj};
123 $obj->newix($nextix);
124 return $symtable{$$obj} = $nextix++;
130 printf "newsv %d\t# %s\n", $sv->FLAGS & 0xf, class($sv);
136 my $gvname = $gv->NAME;
137 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
138 print "gv_fetchpv $name\n";
144 my $name = $hv->NAME;
147 printf "gv_stashpv %s\n", cstring($name);
150 # It's an ordinary HV. Fall back to ordinary newix method
151 $hv->B::SV::newix($ix);
155 sub B::SPECIAL::newix {
157 # Special case. $$sv is not the address of the SV but an
158 # index into svspecialsv_list.
159 printf "ldspecsv $$sv\t# %s\n", $specialsv_name[$$sv];
165 my $class = class($op);
166 my $typenum = $optype_enum{$class};
167 croak "OP::newix: can't understand class $class" unless defined($typenum);
168 print "newop $typenum\t# $class\n";
172 sub B::OP::walkoptree_debug {
174 warn(sprintf("walkoptree: %s\n", peekop($op)));
177 sub B::OP::bytecode {
179 my $next = $op->next;
181 my $sibix = $op->sibling->objix;
183 my $type = $op->type;
185 if ($bypass_nullops) {
186 $next = $next->next while $$next && $next->type == 0;
188 $nextix = $next->objix;
190 printf "# %s\n", peekop($op) if $debug_bc;
192 print "op_next $nextix\n";
193 print "op_sibling $sibix\n" unless $strip_syntree;
194 printf "op_type %s\t# %d\n", $op->ppaddr, $type;
195 printf("op_seq %d\n", $op->seq) unless $omit_seq;
196 if ($type || !$compress_nullops) {
197 printf "op_targ %d\nop_flags 0x%x\nop_private 0x%x\n",
198 $op->targ, $op->flags, $op->private;
202 sub B::UNOP::bytecode {
204 my $firstix = $op->first->objix;
205 $op->B::OP::bytecode;
206 if (($op->type || !$compress_nullops) && !$strip_syntree) {
207 print "op_first $firstix\n";
211 sub B::LOGOP::bytecode {
213 my $otherix = $op->other->objix;
214 $op->B::UNOP::bytecode;
215 print "op_other $otherix\n";
218 sub B::SVOP::bytecode {
221 my $svix = $sv->objix;
222 $op->B::OP::bytecode;
223 print "op_sv $svix\n";
227 sub B::GVOP::bytecode {
230 my $gvix = $gv->objix;
231 $op->B::OP::bytecode;
232 print "op_gv $gvix\n";
236 sub B::PVOP::bytecode {
239 $op->B::OP::bytecode;
241 # This would be easy except that OP_TRANS uses a PVOP to store an
242 # endian-dependent array of 256 shorts instead of a plain string.
244 if ($op->ppaddr eq "pp_trans") {
245 my @shorts = unpack("s256", $pv); # assembler handles endianness
246 print "op_pv_tr ", join(",", @shorts), "\n";
248 printf "newpv %s\nop_pv\n", pvstring($pv);
252 sub B::BINOP::bytecode {
254 my $lastix = $op->last->objix;
255 $op->B::UNOP::bytecode;
256 if (($op->type || !$compress_nullops) && !$strip_syntree) {
257 print "op_last $lastix\n";
261 sub B::CONDOP::bytecode {
263 my $trueix = $op->true->objix;
264 my $falseix = $op->false->objix;
265 $op->B::UNOP::bytecode;
266 print "op_true $trueix\nop_false $falseix\n";
269 sub B::LISTOP::bytecode {
271 my $children = $op->children;
272 $op->B::BINOP::bytecode;
273 if (($op->type || !$compress_nullops) && !$strip_syntree) {
274 print "op_children $children\n";
278 sub B::LOOP::bytecode {
280 my $redoopix = $op->redoop->objix;
281 my $nextopix = $op->nextop->objix;
282 my $lastopix = $op->lastop->objix;
283 $op->B::LISTOP::bytecode;
284 print "op_redoop $redoopix\nop_nextop $nextopix\nop_lastop $lastopix\n";
287 sub B::COP::bytecode {
289 my $stash = $op->stash;
290 my $stashix = $stash->objix;
291 my $filegv = $op->filegv;
292 my $filegvix = $filegv->objix;
293 my $line = $op->line;
295 printf "# line %s:%d\n", $filegv->SV->PV, $line;
297 $op->B::OP::bytecode;
298 printf <<"EOT", pvstring($op->label), $op->cop_seq, $op->arybase;
311 sub B::PMOP::bytecode {
313 my $replroot = $op->pmreplroot;
314 my $replrootix = $replroot->objix;
315 my $replstartix = $op->pmreplstart->objix;
316 my $ppaddr = $op->ppaddr;
317 # pmnext is corrupt in some PMOPs (see misc.t for example)
318 #my $pmnextix = $op->pmnext->objix;
321 # OP_PUSHRE (a mutated version of OP_MATCH for the regexp
322 # argument to a split) stores a GV in op_pmreplroot instead
323 # of a substitution syntax tree. We don't want to walk that...
324 if ($ppaddr eq "pp_pushre") {
327 walkoptree($replroot, "bytecode");
330 $op->B::LISTOP::bytecode;
331 if ($ppaddr eq "pp_pushre") {
332 printf "op_pmreplrootgv $replrootix\n";
334 print "op_pmreplroot $replrootix\nop_pmreplstart $replstartix\n";
336 my $re = pvstring($op->precomp);
337 # op_pmnext omitted since a perl bug means it's sometime corrupt
338 printf <<"EOT", $op->pmflags, $op->pmpermflags;
346 sub B::SV::bytecode {
348 return if saved($sv);
350 my $refcnt = $sv->REFCNT;
351 my $flags = sprintf("0x%x", $sv->FLAGS);
353 print "sv_refcnt $refcnt\nsv_flags $flags\n";
357 sub B::PV::bytecode {
359 return if saved($sv);
360 $sv->B::SV::bytecode;
361 printf("newpv %s\nxpv\n", pvstring($sv->PV)) if $sv->FLAGS & POK;
364 sub B::IV::bytecode {
366 return if saved($sv);
368 $sv->B::SV::bytecode;
369 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
372 sub B::NV::bytecode {
374 return if saved($sv);
375 $sv->B::SV::bytecode;
376 printf "xnv %s\n", $sv->NVX;
379 sub B::RV::bytecode {
381 return if saved($sv);
383 my $rvix = $rv->objix;
385 $sv->B::SV::bytecode;
389 sub B::PVIV::bytecode {
391 return if saved($sv);
393 $sv->B::PV::bytecode;
394 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
397 sub B::PVNV::bytecode {
398 my ($sv, $flag) = @_;
399 # The $flag argument is passed through PVMG::bytecode by BM::bytecode
400 # and AV::bytecode and indicates special handling. $flag = 1 is used by
401 # BM::bytecode and means that we should ensure we save the whole B-M
402 # table. It consists of 257 bytes (256 char array plus a final \0)
403 # which follow the ordinary PV+\0 and the 257 bytes are *not* reflected
404 # in SvCUR. $flag = 2 is used by AV::bytecode and means that we only
405 # call SV::bytecode instead of saving PV and calling NV::bytecode since
406 # PV/NV/IV stuff is different for AVs.
407 return if saved($sv);
409 $sv->B::SV::bytecode;
412 $sv->B::IV::bytecode;
413 printf "xnv %s\n", $sv->NVX;
415 $pv .= "\0" . $sv->TABLE;
416 printf "newpv %s\npv_cur %d\nxpv\n", pvstring($pv),length($pv)-257;
418 printf("newpv %s\nxpv\n", pvstring($pv)) if $sv->FLAGS & POK;
423 sub B::PVMG::bytecode {
424 my ($sv, $flag) = @_;
425 # See B::PVNV::bytecode for an explanation of $flag.
426 return if saved($sv);
427 # XXX We assume SvSTASH is already saved and don't save it later ourselves
428 my $stashix = $sv->SvSTASH->objix;
429 my @mgchain = $sv->MAGIC;
432 # We need to traverse the magic chain and get objix for each OBJ
433 # field *before* we do B::PVNV::bytecode since objix overwrites
434 # the sv register. However, we need to write the magic-saving
435 # bytecode *after* B::PVNV::bytecode since sv isn't initialised
436 # to refer to $sv until then.
438 @mgobjix = map($_->OBJ->objix, @mgchain);
439 $sv->B::PVNV::bytecode($flag);
440 print "xmg_stash $stashix\n";
441 foreach $mg (@mgchain) {
442 printf "sv_magic %s\nmg_obj %d\nnewpv %s\nmg_pv\n",
443 cstring($mg->TYPE), shift(@mgobjix), pvstring($mg->PTR);
447 sub B::PVLV::bytecode {
449 return if saved($sv);
450 $sv->B::PVMG::bytecode;
451 printf <<'EOT', $sv->TARGOFF, $sv->TARGLEN, cstring($sv->TYPE);
458 sub B::BM::bytecode {
460 return if saved($sv);
461 # See PVNV::bytecode for an explanation of what the argument does
462 $sv->B::PVMG::bytecode(1);
463 printf "xbm_useful %d\nxbm_previous %d\nxbm_rare %d\n",
464 $sv->USEFUL, $sv->PREVIOUS, $sv->RARE;
467 sub B::GV::bytecode {
469 return if saved($gv);
472 my $gvname = $gv->NAME;
473 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
475 my $egvix = $egv->objix;
477 printf <<"EOT", $gv->FLAGS, $gv->GvFLAGS, $gv->LINE;
482 my $refcnt = $gv->REFCNT;
483 printf("sv_refcnt_add %d\n", $refcnt - 1) if $refcnt > 1;
484 my $gvrefcnt = $gv->GvREFCNT;
485 printf("gp_refcnt_add %d\n", $gvrefcnt - 1) if $gvrefcnt > 1;
486 if ($gvrefcnt > 1 && $ix != $egvix) {
487 print "gp_share $egvix\n";
489 if ($gvname !~ /^([^A-Za-z]|STDIN|STDOUT|STDERR|ARGV|SIG|ENV)$/) {
491 my @subfield_names = qw(SV AV HV CV FILEGV FORM IO);
492 my @subfields = map($gv->$_(), @subfield_names);
493 my @ixes = map($_->objix, @subfields);
494 # Reset sv register for $gv
496 for ($i = 0; $i < @ixes; $i++) {
497 printf "gp_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
499 # Now save all the subfields
501 foreach $sv (@subfields) {
508 sub B::HV::bytecode {
510 return if saved($hv);
512 my $name = $hv->NAME;
515 # It's an ordinary HV. Stashes have NAME set and need no further
516 # saving beyond the gv_stashpv that $hv->objix already ensures.
517 my @contents = $hv->ARRAY;
519 for ($i = 1; $i < @contents; $i += 2) {
520 push(@ixes, $contents[$i]->objix);
522 for ($i = 1; $i < @contents; $i += 2) {
523 $contents[$i]->bytecode;
526 for ($i = 0; $i < @contents; $i += 2) {
527 printf("newpv %s\nhv_store %d\n",
528 pvstring($contents[$i]), $ixes[$i / 2]);
530 printf "sv_refcnt %d\nsv_flags 0x%x\n", $hv->REFCNT, $hv->FLAGS;
534 sub B::AV::bytecode {
536 return if saved($av);
538 my $fill = $av->FILL;
543 @ixes = map($_->objix, @array);
545 foreach $sv (@array) {
549 # See PVNV::bytecode for the meaning of the flag argument of 2.
550 $av->B::PVMG::bytecode(2);
551 # Recover sv register and set AvMAX and AvFILL to -1 (since we
552 # create an AV with NEWSV and SvUPGRADE rather than doing newAV
553 # which is what sets AvMAX and AvFILL.
555 printf "xav_flags 0x%x\nxav_max -1\nxav_fill -1\n", $av->AvFLAGS;
558 foreach $elix (@ixes) {
559 print "av_push $elix\n";
563 print "av_extend $max\n";
568 sub B::CV::bytecode {
570 return if saved($cv);
572 $cv->B::PVMG::bytecode;
574 my @subfield_names = qw(ROOT START STASH GV FILEGV PADLIST OUTSIDE);
575 my @subfields = map($cv->$_(), @subfield_names);
576 my @ixes = map($_->objix, @subfields);
577 # Save OP tree from CvROOT (first element of @subfields)
578 my $root = shift @subfields;
580 walkoptree($root, "bytecode");
582 # Reset sv register for $cv (since above ->objix calls stomped on it)
584 for ($i = 0; $i < @ixes; $i++) {
585 printf "xcv_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
587 printf "xcv_depth %d\nxcv_flags 0x%x\n", $cv->DEPTH, $cv->FLAGS;
588 # Now save all the subfields (except for CvROOT which was handled
589 # above) and CvSTART (now the initial element of @subfields).
590 shift @subfields; # bye-bye CvSTART
592 foreach $sv (@subfields) {
597 sub B::IO::bytecode {
599 return if saved($io);
601 my $top_gv = $io->TOP_GV;
602 my $top_gvix = $top_gv->objix;
603 my $fmt_gv = $io->FMT_GV;
604 my $fmt_gvix = $fmt_gv->objix;
605 my $bottom_gv = $io->BOTTOM_GV;
606 my $bottom_gvix = $bottom_gv->objix;
608 $io->B::PVMG::bytecode;
610 print "xio_top_gv $top_gvix\n";
611 print "xio_fmt_gv $fmt_gvix\n";
612 print "xio_bottom_gv $bottom_gvix\n";
614 foreach $field (qw(TOP_NAME FMT_NAME BOTTOM_NAME)) {
615 printf "newpv %s\nxio_%s\n", pvstring($io->$field()), lc($field);
617 foreach $field (qw(LINES PAGE PAGE_LEN LINES_LEFT SUBPROCESS)) {
618 printf "xio_%s %d\n", lc($field), $io->$field();
620 printf "xio_type %s\nxio_flags 0x%x\n", cstring($io->IoTYPE), $io->IoFLAGS;
623 $bottom_gv->bytecode;
626 sub B::SPECIAL::bytecode {
627 # nothing extra needs doing
630 sub bytecompile_object {
633 svref_2object($sv)->bytecode;
637 sub B::GV::bytecodecv {
640 if ($$cv && !saved($cv)) {
642 warn sprintf("saving extra CV &%s::%s (0x%x) from GV 0x%x\n",
643 $gv->STASH->NAME, $gv->NAME, $$cv, $$gv);
649 sub bytecompile_main {
650 my $curpad = (comppadlist->ARRAY)[1];
651 my $curpadix = $curpad->objix;
653 walkoptree(main_root, "bytecode");
654 warn "done main program, now walking symbol table\n" if $debug_bc;
655 my ($pack, %exclude);
656 foreach $pack (qw(B O AutoLoader DynaLoader Config DB VMS strict vars
657 FileHandle Exporter Carp UNIVERSAL IO Fcntl Symbol
658 SelectSaver blib Cwd))
660 $exclude{$pack."::"} = 1;
662 no strict qw(vars refs);
663 walksymtable(\%{"main::"}, "bytecodecv", sub {
664 warn "considering $_[0]\n" if $debug_bc;
665 return !defined($exclude{$_[0]});
668 printf "main_root %d\n", main_root->objix;
669 printf "main_start %d\n", main_start->objix;
670 printf "curpad $curpadix\n";
671 # XXX Do min_intro_pending and max_intro_pending matter?
675 sub prepare_assemble {
676 my $newfh = IO::File->new_tmpfile;
684 seek($fh, 0, 0); # rewind the temporary file
685 assemble_fh($fh, sub { print OUT @_ });
690 my ($option, $opt, $arg);
691 open(OUT, ">&STDOUT");
695 while ($option = shift @options) {
696 if ($option =~ /^-(.)(.*)/) {
700 unshift @options, $option;
703 if ($opt eq "-" && $arg eq "-") {
706 } elsif ($opt eq "o") {
707 $arg ||= shift @options;
708 open(OUT, ">$arg") or return "$arg: $!\n";
710 } elsif ($opt eq "D") {
711 $arg ||= shift @options;
712 foreach $arg (split(//, $arg)) {
716 } elsif ($arg eq "o") {
718 } elsif ($arg eq "a") {
719 B::Assembler::debug(1);
720 } elsif ($arg eq "C") {
724 } elsif ($opt eq "v") {
726 } elsif ($opt eq "m") {
728 } elsif ($opt eq "S") {
730 } elsif ($opt eq "f") {
731 $arg ||= shift @options;
732 my $value = $arg !~ s/^no-//;
734 my $ref = $optimise{$arg};
738 warn qq(ignoring unknown optimisation option "$arg"\n);
740 } elsif ($opt eq "O") {
741 $arg = 1 if $arg eq "";
743 foreach $ref (values %optimise) {
753 $compress_nullops = 1;
762 $newfh = prepare_assemble() unless $no_assemble;
763 foreach $objname (@options) {
764 eval "bytecompile_object(\\$objname)";
766 do_assemble($newfh) unless $no_assemble;
771 $newfh = prepare_assemble() unless $no_assemble;
773 do_assemble($newfh) unless $no_assemble;
784 B::Bytecode - Perl compiler's bytecode backend
788 perl -MO=Bytecode[,OPTIONS] foo.pl
792 This compiler backend takes Perl source and generates a
793 platform-independent bytecode encapsulating code to load the
794 internal structures perl uses to run your program. When the
795 generated bytecode is loaded in, your program is ready to run,
796 reducing the time which perl would have taken to load and parse
797 your program into its internal semi-compiled form. That means that
798 compiling with this backend will not help improve the runtime
799 execution speed of your program but may improve the start-up time.
800 Depending on the environment in which your program runs this may
801 or may not be a help.
803 The resulting bytecode can be run with a special byteperl executable
804 or (for non-main programs) be loaded via the C<byteload_fh> function
809 If there are any non-option arguments, they are taken to be names of
810 objects to be saved (probably doesn't work properly yet). Without
811 extra arguments, it saves the main program.
817 Output to filename instead of STDOUT.
821 Force end of options.
825 Force optimisations on or off one at a time. Each can be preceded
826 by B<no-> to turn the option off (e.g. B<-fno-compress-nullops>).
828 =item B<-fcompress-nullops>
830 Only fills in the necessary fields of ops which have
831 been optimised away by perl's internal compiler.
833 =item B<-fomit-sequence-numbers>
835 Leaves out code to fill in the op_seq field of all ops
836 which is only used by perl's internal compiler.
838 =item B<-fbypass-nullops>
840 If op->op_next ever points to a NULLOP, replaces the op_next field
841 with the first non-NULLOP in the path of execution.
843 =item B<-fstrip-syntax-tree>
845 Leaves out code to fill in the pointers which link the internal syntax
846 tree together. They're not needed at run-time but leaving them out
847 will make it impossible to recompile or disassemble the resulting
848 program. It will also stop C<goto label> statements from working.
852 Optimisation level (n = 0, 1, 2, ...). B<-O> means B<-O1>.
853 B<-O1> sets B<-fcompress-nullops> B<-fomit-sequence numbers>.
854 B<-O6> adds B<-fstrip-syntax-tree>.
858 Debug options (concatenated or separate flags like C<perl -D>).
862 Prints each OP as it's processed.
866 Print debugging information about bytecompiler progress.
870 Tells the (bytecode) assembler to include source assembler lines
871 in its output as bytecode comments.
875 Prints each CV taken from the final symbol tree walk.
879 Output (bytecode) assembler source rather than piping it
880 through the assembler and outputting bytecode.
884 Compile as a module rather than a standalone program. Currently this
885 just means that the bytecodes for initialising C<main_start>,
886 C<main_root> and C<curpad> are omitted.
892 perl -MO=Bytecode,-O6,-o,foo.plc foo.pl
894 perl -MO=Bytecode,-S foo.pl > foo.S
895 assemble foo.S > foo.plc
898 perl -MO=Bytecode,-m,-oFoo.pmc Foo.pm
902 Plenty. Current status: experimental.
906 Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>