3 # Copyright (c) 1996-1998 Malcolm Beattie
5 # You may distribute under the terms of either the GNU General Public
6 # License or the Artistic License, as specified in the README file.
13 use B qw(minus_c main_cv main_root main_start comppadlist
14 class peekop walkoptree svref_2object cstring walksymtable
15 SVf_POK SVp_POK SVf_IOK SVp_IOK
17 use B::Asmdata qw(@optype @specialsv_name);
18 use B::Assembler qw(assemble_fh);
22 for ($i = 0; $i < @optype; $i++) {
23 $optype_enum{$optype[$i]} = $i;
26 # Following is SVf_POK|SVp_POK
27 # XXX Shouldn't be hardwired
28 sub POK () { SVf_POK|SVp_POK }
30 # Following is SVf_IOK|SVp_IOK
31 # XXX Shouldn't be hardwired
32 sub IOK () { SVf_IOK|SVp_IOK }
34 my ($verbose, $module_only, $no_assemble, $debug_bc, $debug_cv);
37 # Optimisation options. On the command line, use hyphens instead of
38 # underscores for compatibility with gcc-style options. We use
39 # underscores here because they are OK in (strict) barewords.
40 my ($strip_syntree, $compress_nullops, $omit_seq, $bypass_nullops);
41 my %optimise = (strip_syntax_tree => \$strip_syntree,
42 compress_nullops => \$compress_nullops,
43 omit_sequence_numbers => \$omit_seq,
44 bypass_nullops => \$bypass_nullops);
47 my %symtable; # maps object addresses to object indices.
48 # Filled in at allocation (newsv/newop) time.
49 my %saved; # maps object addresses (for SVish classes) to "saved yet?"
50 # flag. Set at FOO::bytecode time usually by SV::bytecode.
51 # Manipulated via saved(), mark_saved(), unmark_saved().
53 my $svix = -1; # we keep track of when the sv register contains an element
54 # of the object table to avoid unnecessary repeated
55 # consecutive ldsv instructions.
56 my $opix = -1; # Ditto for the op register.
97 return cstring($str . "\0");
103 sub saved { $saved{${$_[0]}} }
104 sub mark_saved { $saved{${$_[0]}} = 1 }
105 sub unmark_saved { $saved{${$_[0]}} = 0 }
107 sub debug { $debug_bc = shift }
111 warn sprintf("bytecode save method for %s (0x%x) not yet implemented\n",
116 # objix may stomp on the op register (for op objects)
117 # or the sv register (for SV objects)
119 sub B::OBJECT::objix {
121 my $ix = $symtable{$$obj};
125 $obj->newix($nextix);
126 return $symtable{$$obj} = $nextix++;
132 printf "newsv %d\t# %s\n", $sv->FLAGS & 0xf, class($sv);
138 my $gvname = $gv->NAME;
139 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
140 print "gv_fetchpv $name\n";
146 my $name = $hv->NAME;
149 printf "gv_stashpv %s\n", cstring($name);
152 # It's an ordinary HV. Fall back to ordinary newix method
153 $hv->B::SV::newix($ix);
157 sub B::SPECIAL::newix {
159 # Special case. $$sv is not the address of the SV but an
160 # index into svspecialsv_list.
161 printf "ldspecsv $$sv\t# %s\n", $specialsv_name[$$sv];
167 my $class = class($op);
168 my $typenum = $optype_enum{$class};
169 croak "OP::newix: can't understand class $class" unless defined($typenum);
170 print "newop $typenum\t# $class\n";
174 sub B::OP::walkoptree_debug {
176 warn(sprintf("walkoptree: %s\n", peekop($op)));
179 sub B::OP::bytecode {
181 my $next = $op->next;
183 my $sibix = $op->sibling->objix;
185 my $type = $op->type;
187 if ($bypass_nullops) {
188 $next = $next->next while $$next && $next->type == 0;
190 $nextix = $next->objix;
192 printf "# %s\n", peekop($op) if $debug_bc;
194 print "op_next $nextix\n";
195 print "op_sibling $sibix\n" unless $strip_syntree;
196 printf "op_type %s\t# %d\n", "pp_" . $op->name, $type;
197 printf("op_seq %d\n", $op->seq) unless $omit_seq;
198 if ($type || !$compress_nullops) {
199 printf "op_targ %d\nop_flags 0x%x\nop_private 0x%x\n",
200 $op->targ, $op->flags, $op->private;
204 sub B::UNOP::bytecode {
206 my $firstix = $op->first->objix;
207 $op->B::OP::bytecode;
208 if (($op->type || !$compress_nullops) && !$strip_syntree) {
209 print "op_first $firstix\n";
213 sub B::LOGOP::bytecode {
215 my $otherix = $op->other->objix;
216 $op->B::UNOP::bytecode;
217 print "op_other $otherix\n";
220 sub B::SVOP::bytecode {
223 my $svix = $sv->objix;
224 $op->B::OP::bytecode;
225 print "op_sv $svix\n";
229 sub B::GVOP::bytecode {
232 my $gvix = $gv->objix;
233 $op->B::OP::bytecode;
234 print "op_gv $gvix\n";
238 sub B::PVOP::bytecode {
241 $op->B::OP::bytecode;
243 # This would be easy except that OP_TRANS uses a PVOP to store an
244 # endian-dependent array of 256 shorts instead of a plain string.
246 if ($op->name eq "trans") {
247 my @shorts = unpack("s256", $pv); # assembler handles endianness
248 print "op_pv_tr ", join(",", @shorts), "\n";
250 printf "newpv %s\nop_pv\n", pvstring($pv);
254 sub B::BINOP::bytecode {
256 my $lastix = $op->last->objix;
257 $op->B::UNOP::bytecode;
258 if (($op->type || !$compress_nullops) && !$strip_syntree) {
259 print "op_last $lastix\n";
263 sub B::LISTOP::bytecode {
265 my $children = $op->children;
266 $op->B::BINOP::bytecode;
267 if (($op->type || !$compress_nullops) && !$strip_syntree) {
268 print "op_children $children\n";
272 sub B::LOOP::bytecode {
274 my $redoopix = $op->redoop->objix;
275 my $nextopix = $op->nextop->objix;
276 my $lastopix = $op->lastop->objix;
277 $op->B::LISTOP::bytecode;
278 print "op_redoop $redoopix\nop_nextop $nextopix\nop_lastop $lastopix\n";
281 sub B::COP::bytecode {
283 my $stash = $op->stash;
284 my $stashix = $stash->objix;
285 my $filegv = $op->filegv;
286 my $filegvix = $filegv->objix;
287 my $line = $op->line;
288 my $warnings = $op->warnings;
289 my $warningsix = $warnings->objix;
291 printf "# line %s:%d\n", $filegv->SV->PV, $line;
293 $op->B::OP::bytecode;
294 printf <<"EOT", pvstring($op->label), $op->cop_seq, $op->arybase;
302 cop_warnings $warningsix
308 sub B::PMOP::bytecode {
310 my $replroot = $op->pmreplroot;
311 my $replrootix = $replroot->objix;
312 my $replstartix = $op->pmreplstart->objix;
313 my $opname = $op->name;
314 # pmnext is corrupt in some PMOPs (see misc.t for example)
315 #my $pmnextix = $op->pmnext->objix;
318 # OP_PUSHRE (a mutated version of OP_MATCH for the regexp
319 # argument to a split) stores a GV in op_pmreplroot instead
320 # of a substitution syntax tree. We don't want to walk that...
321 if ($opname eq "pushre") {
324 walkoptree($replroot, "bytecode");
327 $op->B::LISTOP::bytecode;
328 if ($opname eq "pushre") {
329 printf "op_pmreplrootgv $replrootix\n";
331 print "op_pmreplroot $replrootix\nop_pmreplstart $replstartix\n";
333 my $re = pvstring($op->precomp);
334 # op_pmnext omitted since a perl bug means it's sometime corrupt
335 printf <<"EOT", $op->pmflags, $op->pmpermflags;
343 sub B::SV::bytecode {
345 return if saved($sv);
347 my $refcnt = $sv->REFCNT;
348 my $flags = sprintf("0x%x", $sv->FLAGS);
350 print "sv_refcnt $refcnt\nsv_flags $flags\n";
354 sub B::PV::bytecode {
356 return if saved($sv);
357 $sv->B::SV::bytecode;
358 printf("newpv %s\nxpv\n", pvstring($sv->PV)) if $sv->FLAGS & POK;
361 sub B::IV::bytecode {
363 return if saved($sv);
365 $sv->B::SV::bytecode;
366 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
369 sub B::NV::bytecode {
371 return if saved($sv);
372 $sv->B::SV::bytecode;
373 printf "xnv %s\n", $sv->NVX;
376 sub B::RV::bytecode {
378 return if saved($sv);
380 my $rvix = $rv->objix;
382 $sv->B::SV::bytecode;
386 sub B::PVIV::bytecode {
388 return if saved($sv);
390 $sv->B::PV::bytecode;
391 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
394 sub B::PVNV::bytecode {
396 my $flag = shift || 0;
397 # The $flag argument is passed through PVMG::bytecode by BM::bytecode
398 # and AV::bytecode and indicates special handling. $flag = 1 is used by
399 # BM::bytecode and means that we should ensure we save the whole B-M
400 # table. It consists of 257 bytes (256 char array plus a final \0)
401 # which follow the ordinary PV+\0 and the 257 bytes are *not* reflected
402 # in SvCUR. $flag = 2 is used by AV::bytecode and means that we only
403 # call SV::bytecode instead of saving PV and calling NV::bytecode since
404 # PV/NV/IV stuff is different for AVs.
405 return if saved($sv);
407 $sv->B::SV::bytecode;
410 $sv->B::IV::bytecode;
411 printf "xnv %s\n", $sv->NVX;
413 $pv .= "\0" . $sv->TABLE;
414 printf "newpv %s\npv_cur %d\nxpv\n", pvstring($pv),length($pv)-257;
416 printf("newpv %s\nxpv\n", pvstring($pv)) if $sv->FLAGS & POK;
421 sub B::PVMG::bytecode {
422 my ($sv, $flag) = @_;
423 # See B::PVNV::bytecode for an explanation of $flag.
424 return if saved($sv);
425 # XXX We assume SvSTASH is already saved and don't save it later ourselves
426 my $stashix = $sv->SvSTASH->objix;
427 my @mgchain = $sv->MAGIC;
430 # We need to traverse the magic chain and get objix for each OBJ
431 # field *before* we do B::PVNV::bytecode since objix overwrites
432 # the sv register. However, we need to write the magic-saving
433 # bytecode *after* B::PVNV::bytecode since sv isn't initialised
434 # to refer to $sv until then.
436 @mgobjix = map($_->OBJ->objix, @mgchain);
437 $sv->B::PVNV::bytecode($flag);
438 print "xmg_stash $stashix\n";
439 foreach $mg (@mgchain) {
440 printf "sv_magic %s\nmg_obj %d\nnewpv %s\nmg_pv\n",
441 cstring($mg->TYPE), shift(@mgobjix), pvstring($mg->PTR);
445 sub B::PVLV::bytecode {
447 return if saved($sv);
448 $sv->B::PVMG::bytecode;
449 printf <<'EOT', $sv->TARGOFF, $sv->TARGLEN, cstring($sv->TYPE);
456 sub B::BM::bytecode {
458 return if saved($sv);
459 # See PVNV::bytecode for an explanation of what the argument does
460 $sv->B::PVMG::bytecode(1);
461 printf "xbm_useful %d\nxbm_previous %d\nxbm_rare %d\n",
462 $sv->USEFUL, $sv->PREVIOUS, $sv->RARE;
465 sub B::GV::bytecode {
467 return if saved($gv);
470 my $gvname = $gv->NAME;
471 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
473 my $egvix = $egv->objix;
475 printf <<"EOT", $gv->FLAGS, $gv->GvFLAGS, $gv->LINE;
480 my $refcnt = $gv->REFCNT;
481 printf("sv_refcnt_add %d\n", $refcnt - 1) if $refcnt > 1;
482 my $gvrefcnt = $gv->GvREFCNT;
483 printf("gp_refcnt_add %d\n", $gvrefcnt - 1) if $gvrefcnt > 1;
484 if ($gvrefcnt > 1 && $ix != $egvix) {
485 print "gp_share $egvix\n";
487 if ($gvname !~ /^([^A-Za-z]|STDIN|STDOUT|STDERR|ARGV|SIG|ENV)$/) {
489 my @subfield_names = qw(SV AV HV CV FILEGV FORM IO);
490 my @subfields = map($gv->$_(), @subfield_names);
491 my @ixes = map($_->objix, @subfields);
492 # Reset sv register for $gv
494 for ($i = 0; $i < @ixes; $i++) {
495 printf "gp_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
497 # Now save all the subfields
499 foreach $sv (@subfields) {
506 sub B::HV::bytecode {
508 return if saved($hv);
510 my $name = $hv->NAME;
513 # It's an ordinary HV. Stashes have NAME set and need no further
514 # saving beyond the gv_stashpv that $hv->objix already ensures.
515 my @contents = $hv->ARRAY;
517 for ($i = 1; $i < @contents; $i += 2) {
518 push(@ixes, $contents[$i]->objix);
520 for ($i = 1; $i < @contents; $i += 2) {
521 $contents[$i]->bytecode;
524 for ($i = 0; $i < @contents; $i += 2) {
525 printf("newpv %s\nhv_store %d\n",
526 pvstring($contents[$i]), $ixes[$i / 2]);
528 printf "sv_refcnt %d\nsv_flags 0x%x\n", $hv->REFCNT, $hv->FLAGS;
532 sub B::AV::bytecode {
534 return if saved($av);
536 my $fill = $av->FILL;
541 @ixes = map($_->objix, @array);
543 foreach $sv (@array) {
547 # See PVNV::bytecode for the meaning of the flag argument of 2.
548 $av->B::PVMG::bytecode(2);
549 # Recover sv register and set AvMAX and AvFILL to -1 (since we
550 # create an AV with NEWSV and SvUPGRADE rather than doing newAV
551 # which is what sets AvMAX and AvFILL.
553 printf "xav_flags 0x%x\nxav_max -1\nxav_fill -1\n", $av->AvFLAGS;
556 foreach $elix (@ixes) {
557 print "av_push $elix\n";
561 print "av_extend $max\n";
566 sub B::CV::bytecode {
568 return if saved($cv);
570 $cv->B::PVMG::bytecode;
572 my @subfield_names = qw(ROOT START STASH GV FILEGV PADLIST OUTSIDE);
573 my @subfields = map($cv->$_(), @subfield_names);
574 my @ixes = map($_->objix, @subfields);
575 # Save OP tree from CvROOT (first element of @subfields)
576 my $root = shift @subfields;
578 walkoptree($root, "bytecode");
580 # Reset sv register for $cv (since above ->objix calls stomped on it)
582 for ($i = 0; $i < @ixes; $i++) {
583 printf "xcv_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
585 printf "xcv_depth %d\nxcv_flags 0x%x\n", $cv->DEPTH, $cv->FLAGS;
586 # Now save all the subfields (except for CvROOT which was handled
587 # above) and CvSTART (now the initial element of @subfields).
588 shift @subfields; # bye-bye CvSTART
590 foreach $sv (@subfields) {
595 sub B::IO::bytecode {
597 return if saved($io);
599 my $top_gv = $io->TOP_GV;
600 my $top_gvix = $top_gv->objix;
601 my $fmt_gv = $io->FMT_GV;
602 my $fmt_gvix = $fmt_gv->objix;
603 my $bottom_gv = $io->BOTTOM_GV;
604 my $bottom_gvix = $bottom_gv->objix;
606 $io->B::PVMG::bytecode;
608 print "xio_top_gv $top_gvix\n";
609 print "xio_fmt_gv $fmt_gvix\n";
610 print "xio_bottom_gv $bottom_gvix\n";
612 foreach $field (qw(TOP_NAME FMT_NAME BOTTOM_NAME)) {
613 printf "newpv %s\nxio_%s\n", pvstring($io->$field()), lc($field);
615 foreach $field (qw(LINES PAGE PAGE_LEN LINES_LEFT SUBPROCESS)) {
616 printf "xio_%s %d\n", lc($field), $io->$field();
618 printf "xio_type %s\nxio_flags 0x%x\n", cstring($io->IoTYPE), $io->IoFLAGS;
621 $bottom_gv->bytecode;
624 sub B::SPECIAL::bytecode {
625 # nothing extra needs doing
628 sub bytecompile_object {
631 svref_2object($sv)->bytecode;
635 sub B::GV::bytecodecv {
638 if ($$cv && !saved($cv)) {
640 warn sprintf("saving extra CV &%s::%s (0x%x) from GV 0x%x\n",
641 $gv->STASH->NAME, $gv->NAME, $$cv, $$gv);
647 sub bytecompile_main {
648 my $curpad = (comppadlist->ARRAY)[1];
649 my $curpadix = $curpad->objix;
651 walkoptree(main_root, "bytecode");
652 warn "done main program, now walking symbol table\n" if $debug_bc;
653 my ($pack, %exclude);
654 foreach $pack (qw(B O AutoLoader DynaLoader Config DB VMS strict vars
655 FileHandle Exporter Carp UNIVERSAL IO Fcntl Symbol
656 SelectSaver blib Cwd))
658 $exclude{$pack."::"} = 1;
660 no strict qw(vars refs);
661 walksymtable(\%{"main::"}, "bytecodecv", sub {
662 warn "considering $_[0]\n" if $debug_bc;
663 return !defined($exclude{$_[0]});
666 printf "main_root %d\n", main_root->objix;
667 printf "main_start %d\n", main_start->objix;
668 printf "curpad $curpadix\n";
669 # XXX Do min_intro_pending and max_intro_pending matter?
673 sub prepare_assemble {
674 my $newfh = IO::File->new_tmpfile;
682 seek($fh, 0, 0); # rewind the temporary file
683 assemble_fh($fh, sub { print OUT @_ });
688 my ($option, $opt, $arg);
689 open(OUT, ">&STDOUT");
693 while ($option = shift @options) {
694 if ($option =~ /^-(.)(.*)/) {
698 unshift @options, $option;
701 if ($opt eq "-" && $arg eq "-") {
704 } elsif ($opt eq "o") {
705 $arg ||= shift @options;
706 open(OUT, ">$arg") or return "$arg: $!\n";
708 } elsif ($opt eq "D") {
709 $arg ||= shift @options;
710 foreach $arg (split(//, $arg)) {
714 } elsif ($arg eq "o") {
716 } elsif ($arg eq "a") {
717 B::Assembler::debug(1);
718 } elsif ($arg eq "C") {
722 } elsif ($opt eq "v") {
724 } elsif ($opt eq "m") {
726 } elsif ($opt eq "S") {
728 } elsif ($opt eq "f") {
729 $arg ||= shift @options;
730 my $value = $arg !~ s/^no-//;
732 my $ref = $optimise{$arg};
736 warn qq(ignoring unknown optimisation option "$arg"\n);
738 } elsif ($opt eq "O") {
739 $arg = 1 if $arg eq "";
741 foreach $ref (values %optimise) {
751 $compress_nullops = 1;
760 $newfh = prepare_assemble() unless $no_assemble;
761 foreach $objname (@options) {
762 eval "bytecompile_object(\\$objname)";
764 do_assemble($newfh) unless $no_assemble;
769 $newfh = prepare_assemble() unless $no_assemble;
771 do_assemble($newfh) unless $no_assemble;
782 B::Bytecode - Perl compiler's bytecode backend
786 perl -MO=Bytecode[,OPTIONS] foo.pl
790 This compiler backend takes Perl source and generates a
791 platform-independent bytecode encapsulating code to load the
792 internal structures perl uses to run your program. When the
793 generated bytecode is loaded in, your program is ready to run,
794 reducing the time which perl would have taken to load and parse
795 your program into its internal semi-compiled form. That means that
796 compiling with this backend will not help improve the runtime
797 execution speed of your program but may improve the start-up time.
798 Depending on the environment in which your program runs this may
799 or may not be a help.
801 The resulting bytecode can be run with a special byteperl executable
802 or (for non-main programs) be loaded via the C<byteload_fh> function
807 If there are any non-option arguments, they are taken to be names of
808 objects to be saved (probably doesn't work properly yet). Without
809 extra arguments, it saves the main program.
815 Output to filename instead of STDOUT.
819 Force end of options.
823 Force optimisations on or off one at a time. Each can be preceded
824 by B<no-> to turn the option off (e.g. B<-fno-compress-nullops>).
826 =item B<-fcompress-nullops>
828 Only fills in the necessary fields of ops which have
829 been optimised away by perl's internal compiler.
831 =item B<-fomit-sequence-numbers>
833 Leaves out code to fill in the op_seq field of all ops
834 which is only used by perl's internal compiler.
836 =item B<-fbypass-nullops>
838 If op->op_next ever points to a NULLOP, replaces the op_next field
839 with the first non-NULLOP in the path of execution.
841 =item B<-fstrip-syntax-tree>
843 Leaves out code to fill in the pointers which link the internal syntax
844 tree together. They're not needed at run-time but leaving them out
845 will make it impossible to recompile or disassemble the resulting
846 program. It will also stop C<goto label> statements from working.
850 Optimisation level (n = 0, 1, 2, ...). B<-O> means B<-O1>.
851 B<-O1> sets B<-fcompress-nullops> B<-fomit-sequence numbers>.
852 B<-O6> adds B<-fstrip-syntax-tree>.
856 Debug options (concatenated or separate flags like C<perl -D>).
860 Prints each OP as it's processed.
864 Print debugging information about bytecompiler progress.
868 Tells the (bytecode) assembler to include source assembler lines
869 in its output as bytecode comments.
873 Prints each CV taken from the final symbol tree walk.
877 Output (bytecode) assembler source rather than piping it
878 through the assembler and outputting bytecode.
882 Compile as a module rather than a standalone program. Currently this
883 just means that the bytecodes for initialising C<main_start>,
884 C<main_root> and C<curpad> are omitted.
890 perl -MO=Bytecode,-O6,-o,foo.plc foo.pl
892 perl -MO=Bytecode,-S foo.pl > foo.S
893 assemble foo.S > foo.plc
895 Note that C<assemble> lives in the C<B> subdirectory of your perl
896 library directory. The utility called perlcc may also be used to
897 help make use of this compiler.
899 perl -MO=Bytecode,-m,-oFoo.pmc Foo.pm
903 Plenty. Current status: experimental.
907 Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>