3 # Copyright (c) 1996-1998 Malcolm Beattie
5 # You may distribute under the terms of either the GNU General Public
6 # License or the Artistic License, as specified in the README file.
13 use B qw(minus_c main_cv main_root main_start comppadlist
14 class peekop walkoptree svref_2object cstring walksymtable
15 SVf_POK SVp_POK SVf_IOK SVp_IOK
17 use B::Asmdata qw(@optype @specialsv_name);
18 use B::Assembler qw(assemble_fh);
22 for ($i = 0; $i < @optype; $i++) {
23 $optype_enum{$optype[$i]} = $i;
26 # Following is SVf_POK|SVp_POK
27 # XXX Shouldn't be hardwired
28 sub POK () { SVf_POK|SVp_POK }
30 # Following is SVf_IOK|SVp_IOK
31 # XXX Shouldn't be hardwired
32 sub IOK () { SVf_IOK|SVp_IOK }
34 my ($verbose, $module_only, $no_assemble, $debug_bc, $debug_cv);
37 # Optimisation options. On the command line, use hyphens instead of
38 # underscores for compatibility with gcc-style options. We use
39 # underscores here because they are OK in (strict) barewords.
40 my ($strip_syntree, $compress_nullops, $omit_seq, $bypass_nullops);
41 my %optimise = (strip_syntax_tree => \$strip_syntree,
42 compress_nullops => \$compress_nullops,
43 omit_sequence_numbers => \$omit_seq,
44 bypass_nullops => \$bypass_nullops);
47 my %symtable; # maps object addresses to object indices.
48 # Filled in at allocation (newsv/newop) time.
49 my %saved; # maps object addresses (for SVish classes) to "saved yet?"
50 # flag. Set at FOO::bytecode time usually by SV::bytecode.
51 # Manipulated via saved(), mark_saved(), unmark_saved().
53 my $svix = -1; # we keep track of when the sv register contains an element
54 # of the object table to avoid unnecessary repeated
55 # consecutive ldsv instructions.
56 my $opix = -1; # Ditto for the op register.
97 return cstring($str . "\0");
103 sub saved { $saved{${$_[0]}} }
104 sub mark_saved { $saved{${$_[0]}} = 1 }
105 sub unmark_saved { $saved{${$_[0]}} = 0 }
107 sub debug { $debug_bc = shift }
111 warn sprintf("bytecode save method for %s (0x%x) not yet implemented\n",
116 # objix may stomp on the op register (for op objects)
117 # or the sv register (for SV objects)
119 sub B::OBJECT::objix {
121 my $ix = $symtable{$$obj};
125 $obj->newix($nextix);
126 return $symtable{$$obj} = $nextix++;
132 printf "newsv %d\t# %s\n", $sv->FLAGS & 0xf, class($sv);
138 my $gvname = $gv->NAME;
139 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
140 print "gv_fetchpv $name\n";
146 my $name = $hv->NAME;
149 printf "gv_stashpv %s\n", cstring($name);
152 # It's an ordinary HV. Fall back to ordinary newix method
153 $hv->B::SV::newix($ix);
157 sub B::SPECIAL::newix {
159 # Special case. $$sv is not the address of the SV but an
160 # index into svspecialsv_list.
161 printf "ldspecsv $$sv\t# %s\n", $specialsv_name[$$sv];
167 my $class = class($op);
168 my $typenum = $optype_enum{$class};
169 croak "OP::newix: can't understand class $class" unless defined($typenum);
170 print "newop $typenum\t# $class\n";
174 sub B::OP::walkoptree_debug {
176 warn(sprintf("walkoptree: %s\n", peekop($op)));
179 sub B::OP::bytecode {
181 my $next = $op->next;
183 my $sibix = $op->sibling->objix;
185 my $type = $op->type;
187 if ($bypass_nullops) {
188 $next = $next->next while $$next && $next->type == 0;
190 $nextix = $next->objix;
192 printf "# %s\n", peekop($op) if $debug_bc;
194 print "op_next $nextix\n";
195 print "op_sibling $sibix\n" unless $strip_syntree;
196 printf "op_type %s\t# %d\n", "pp_" . $op->name, $type;
197 printf("op_seq %d\n", $op->seq) unless $omit_seq;
198 if ($type || !$compress_nullops) {
199 printf "op_targ %d\nop_flags 0x%x\nop_private 0x%x\n",
200 $op->targ, $op->flags, $op->private;
204 sub B::UNOP::bytecode {
206 my $firstix = $op->first->objix;
207 $op->B::OP::bytecode;
208 if (($op->type || !$compress_nullops) && !$strip_syntree) {
209 print "op_first $firstix\n";
213 sub B::LOGOP::bytecode {
215 my $otherix = $op->other->objix;
216 $op->B::UNOP::bytecode;
217 print "op_other $otherix\n";
220 sub B::SVOP::bytecode {
223 my $svix = $sv->objix;
224 $op->B::OP::bytecode;
225 print "op_sv $svix\n";
229 sub B::PADOP::bytecode {
231 my $padix = $op->padix;
232 $op->B::OP::bytecode;
233 print "op_padix $padix\n";
236 sub B::PVOP::bytecode {
239 $op->B::OP::bytecode;
241 # This would be easy except that OP_TRANS uses a PVOP to store an
242 # endian-dependent array of 256 shorts instead of a plain string.
244 if ($op->name eq "trans") {
245 my @shorts = unpack("s256", $pv); # assembler handles endianness
246 print "op_pv_tr ", join(",", @shorts), "\n";
248 printf "newpv %s\nop_pv\n", pvstring($pv);
252 sub B::BINOP::bytecode {
254 my $lastix = $op->last->objix;
255 $op->B::UNOP::bytecode;
256 if (($op->type || !$compress_nullops) && !$strip_syntree) {
257 print "op_last $lastix\n";
261 sub B::LISTOP::bytecode {
263 my $children = $op->children;
264 $op->B::BINOP::bytecode;
265 if (($op->type || !$compress_nullops) && !$strip_syntree) {
266 print "op_children $children\n";
270 sub B::LOOP::bytecode {
272 my $redoopix = $op->redoop->objix;
273 my $nextopix = $op->nextop->objix;
274 my $lastopix = $op->lastop->objix;
275 $op->B::LISTOP::bytecode;
276 print "op_redoop $redoopix\nop_nextop $nextopix\nop_lastop $lastopix\n";
279 sub B::COP::bytecode {
281 my $stash = $op->stash;
282 my $stashix = $stash->objix;
283 my $filegv = $op->filegv;
284 my $filegvix = $filegv->objix;
285 my $line = $op->line;
286 my $warnings = $op->warnings;
287 my $warningsix = $warnings->objix;
289 printf "# line %s:%d\n", $filegv->SV->PV, $line;
291 $op->B::OP::bytecode;
292 printf <<"EOT", pvstring($op->label), $op->cop_seq, $op->arybase;
300 cop_warnings $warningsix
306 sub B::PMOP::bytecode {
308 my $replroot = $op->pmreplroot;
309 my $replrootix = $replroot->objix;
310 my $replstartix = $op->pmreplstart->objix;
311 my $opname = $op->name;
312 # pmnext is corrupt in some PMOPs (see misc.t for example)
313 #my $pmnextix = $op->pmnext->objix;
316 # OP_PUSHRE (a mutated version of OP_MATCH for the regexp
317 # argument to a split) stores a GV in op_pmreplroot instead
318 # of a substitution syntax tree. We don't want to walk that...
319 if ($opname eq "pushre") {
322 walkoptree($replroot, "bytecode");
325 $op->B::LISTOP::bytecode;
326 if ($opname eq "pushre") {
327 printf "op_pmreplrootgv $replrootix\n";
329 print "op_pmreplroot $replrootix\nop_pmreplstart $replstartix\n";
331 my $re = pvstring($op->precomp);
332 # op_pmnext omitted since a perl bug means it's sometime corrupt
333 printf <<"EOT", $op->pmflags, $op->pmpermflags;
341 sub B::SV::bytecode {
343 return if saved($sv);
345 my $refcnt = $sv->REFCNT;
346 my $flags = sprintf("0x%x", $sv->FLAGS);
348 print "sv_refcnt $refcnt\nsv_flags $flags\n";
352 sub B::PV::bytecode {
354 return if saved($sv);
355 $sv->B::SV::bytecode;
356 printf("newpv %s\nxpv\n", pvstring($sv->PV)) if $sv->FLAGS & POK;
359 sub B::IV::bytecode {
361 return if saved($sv);
363 $sv->B::SV::bytecode;
364 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
367 sub B::NV::bytecode {
369 return if saved($sv);
370 $sv->B::SV::bytecode;
371 printf "xnv %s\n", $sv->NVX;
374 sub B::RV::bytecode {
376 return if saved($sv);
378 my $rvix = $rv->objix;
380 $sv->B::SV::bytecode;
384 sub B::PVIV::bytecode {
386 return if saved($sv);
388 $sv->B::PV::bytecode;
389 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
392 sub B::PVNV::bytecode {
394 my $flag = shift || 0;
395 # The $flag argument is passed through PVMG::bytecode by BM::bytecode
396 # and AV::bytecode and indicates special handling. $flag = 1 is used by
397 # BM::bytecode and means that we should ensure we save the whole B-M
398 # table. It consists of 257 bytes (256 char array plus a final \0)
399 # which follow the ordinary PV+\0 and the 257 bytes are *not* reflected
400 # in SvCUR. $flag = 2 is used by AV::bytecode and means that we only
401 # call SV::bytecode instead of saving PV and calling NV::bytecode since
402 # PV/NV/IV stuff is different for AVs.
403 return if saved($sv);
405 $sv->B::SV::bytecode;
408 $sv->B::IV::bytecode;
409 printf "xnv %s\n", $sv->NVX;
411 $pv .= "\0" . $sv->TABLE;
412 printf "newpv %s\npv_cur %d\nxpv\n", pvstring($pv),length($pv)-257;
414 printf("newpv %s\nxpv\n", pvstring($pv)) if $sv->FLAGS & POK;
419 sub B::PVMG::bytecode {
420 my ($sv, $flag) = @_;
421 # See B::PVNV::bytecode for an explanation of $flag.
422 return if saved($sv);
423 # XXX We assume SvSTASH is already saved and don't save it later ourselves
424 my $stashix = $sv->SvSTASH->objix;
425 my @mgchain = $sv->MAGIC;
428 # We need to traverse the magic chain and get objix for each OBJ
429 # field *before* we do B::PVNV::bytecode since objix overwrites
430 # the sv register. However, we need to write the magic-saving
431 # bytecode *after* B::PVNV::bytecode since sv isn't initialised
432 # to refer to $sv until then.
434 @mgobjix = map($_->OBJ->objix, @mgchain);
435 $sv->B::PVNV::bytecode($flag);
436 print "xmg_stash $stashix\n";
437 foreach $mg (@mgchain) {
438 printf "sv_magic %s\nmg_obj %d\nnewpv %s\nmg_pv\n",
439 cstring($mg->TYPE), shift(@mgobjix), pvstring($mg->PTR);
443 sub B::PVLV::bytecode {
445 return if saved($sv);
446 $sv->B::PVMG::bytecode;
447 printf <<'EOT', $sv->TARGOFF, $sv->TARGLEN, cstring($sv->TYPE);
454 sub B::BM::bytecode {
456 return if saved($sv);
457 # See PVNV::bytecode for an explanation of what the argument does
458 $sv->B::PVMG::bytecode(1);
459 printf "xbm_useful %d\nxbm_previous %d\nxbm_rare %d\n",
460 $sv->USEFUL, $sv->PREVIOUS, $sv->RARE;
463 sub B::GV::bytecode {
465 return if saved($gv);
468 my $gvname = $gv->NAME;
469 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
471 my $egvix = $egv->objix;
473 printf <<"EOT", $gv->FLAGS, $gv->GvFLAGS, $gv->LINE;
478 my $refcnt = $gv->REFCNT;
479 printf("sv_refcnt_add %d\n", $refcnt - 1) if $refcnt > 1;
480 my $gvrefcnt = $gv->GvREFCNT;
481 printf("gp_refcnt_add %d\n", $gvrefcnt - 1) if $gvrefcnt > 1;
482 if ($gvrefcnt > 1 && $ix != $egvix) {
483 print "gp_share $egvix\n";
485 if ($gvname !~ /^([^A-Za-z]|STDIN|STDOUT|STDERR|ARGV|SIG|ENV)$/) {
487 my @subfield_names = qw(SV AV HV CV FILEGV FORM IO);
488 my @subfields = map($gv->$_(), @subfield_names);
489 my @ixes = map($_->objix, @subfields);
490 # Reset sv register for $gv
492 for ($i = 0; $i < @ixes; $i++) {
493 printf "gp_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
495 # Now save all the subfields
497 foreach $sv (@subfields) {
504 sub B::HV::bytecode {
506 return if saved($hv);
508 my $name = $hv->NAME;
511 # It's an ordinary HV. Stashes have NAME set and need no further
512 # saving beyond the gv_stashpv that $hv->objix already ensures.
513 my @contents = $hv->ARRAY;
515 for ($i = 1; $i < @contents; $i += 2) {
516 push(@ixes, $contents[$i]->objix);
518 for ($i = 1; $i < @contents; $i += 2) {
519 $contents[$i]->bytecode;
522 for ($i = 0; $i < @contents; $i += 2) {
523 printf("newpv %s\nhv_store %d\n",
524 pvstring($contents[$i]), $ixes[$i / 2]);
526 printf "sv_refcnt %d\nsv_flags 0x%x\n", $hv->REFCNT, $hv->FLAGS;
530 sub B::AV::bytecode {
532 return if saved($av);
534 my $fill = $av->FILL;
539 @ixes = map($_->objix, @array);
541 foreach $sv (@array) {
545 # See PVNV::bytecode for the meaning of the flag argument of 2.
546 $av->B::PVMG::bytecode(2);
547 # Recover sv register and set AvMAX and AvFILL to -1 (since we
548 # create an AV with NEWSV and SvUPGRADE rather than doing newAV
549 # which is what sets AvMAX and AvFILL.
551 printf "xav_flags 0x%x\nxav_max -1\nxav_fill -1\n", $av->AvFLAGS;
554 foreach $elix (@ixes) {
555 print "av_push $elix\n";
559 print "av_extend $max\n";
564 sub B::CV::bytecode {
566 return if saved($cv);
568 $cv->B::PVMG::bytecode;
570 my @subfield_names = qw(ROOT START STASH GV FILEGV PADLIST OUTSIDE);
571 my @subfields = map($cv->$_(), @subfield_names);
572 my @ixes = map($_->objix, @subfields);
573 # Save OP tree from CvROOT (first element of @subfields)
574 my $root = shift @subfields;
576 walkoptree($root, "bytecode");
578 # Reset sv register for $cv (since above ->objix calls stomped on it)
580 for ($i = 0; $i < @ixes; $i++) {
581 printf "xcv_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
583 printf "xcv_depth %d\nxcv_flags 0x%x\n", $cv->DEPTH, $cv->FLAGS;
584 # Now save all the subfields (except for CvROOT which was handled
585 # above) and CvSTART (now the initial element of @subfields).
586 shift @subfields; # bye-bye CvSTART
588 foreach $sv (@subfields) {
593 sub B::IO::bytecode {
595 return if saved($io);
597 my $top_gv = $io->TOP_GV;
598 my $top_gvix = $top_gv->objix;
599 my $fmt_gv = $io->FMT_GV;
600 my $fmt_gvix = $fmt_gv->objix;
601 my $bottom_gv = $io->BOTTOM_GV;
602 my $bottom_gvix = $bottom_gv->objix;
604 $io->B::PVMG::bytecode;
606 print "xio_top_gv $top_gvix\n";
607 print "xio_fmt_gv $fmt_gvix\n";
608 print "xio_bottom_gv $bottom_gvix\n";
610 foreach $field (qw(TOP_NAME FMT_NAME BOTTOM_NAME)) {
611 printf "newpv %s\nxio_%s\n", pvstring($io->$field()), lc($field);
613 foreach $field (qw(LINES PAGE PAGE_LEN LINES_LEFT SUBPROCESS)) {
614 printf "xio_%s %d\n", lc($field), $io->$field();
616 printf "xio_type %s\nxio_flags 0x%x\n", cstring($io->IoTYPE), $io->IoFLAGS;
619 $bottom_gv->bytecode;
622 sub B::SPECIAL::bytecode {
623 # nothing extra needs doing
626 sub bytecompile_object {
629 svref_2object($sv)->bytecode;
633 sub B::GV::bytecodecv {
636 if ($$cv && !saved($cv)) {
638 warn sprintf("saving extra CV &%s::%s (0x%x) from GV 0x%x\n",
639 $gv->STASH->NAME, $gv->NAME, $$cv, $$gv);
645 sub bytecompile_main {
646 my $curpad = (comppadlist->ARRAY)[1];
647 my $curpadix = $curpad->objix;
649 walkoptree(main_root, "bytecode");
650 warn "done main program, now walking symbol table\n" if $debug_bc;
651 my ($pack, %exclude);
652 foreach $pack (qw(B O AutoLoader DynaLoader Config DB VMS strict vars
653 FileHandle Exporter Carp UNIVERSAL IO Fcntl Symbol
654 SelectSaver blib Cwd))
656 $exclude{$pack."::"} = 1;
658 no strict qw(vars refs);
659 walksymtable(\%{"main::"}, "bytecodecv", sub {
660 warn "considering $_[0]\n" if $debug_bc;
661 return !defined($exclude{$_[0]});
664 printf "main_root %d\n", main_root->objix;
665 printf "main_start %d\n", main_start->objix;
666 printf "curpad $curpadix\n";
667 # XXX Do min_intro_pending and max_intro_pending matter?
671 sub prepare_assemble {
672 my $newfh = IO::File->new_tmpfile;
680 seek($fh, 0, 0); # rewind the temporary file
681 assemble_fh($fh, sub { print OUT @_ });
686 my ($option, $opt, $arg);
687 open(OUT, ">&STDOUT");
691 while ($option = shift @options) {
692 if ($option =~ /^-(.)(.*)/) {
696 unshift @options, $option;
699 if ($opt eq "-" && $arg eq "-") {
702 } elsif ($opt eq "o") {
703 $arg ||= shift @options;
704 open(OUT, ">$arg") or return "$arg: $!\n";
706 } elsif ($opt eq "D") {
707 $arg ||= shift @options;
708 foreach $arg (split(//, $arg)) {
712 } elsif ($arg eq "o") {
714 } elsif ($arg eq "a") {
715 B::Assembler::debug(1);
716 } elsif ($arg eq "C") {
720 } elsif ($opt eq "v") {
722 } elsif ($opt eq "m") {
724 } elsif ($opt eq "S") {
726 } elsif ($opt eq "f") {
727 $arg ||= shift @options;
728 my $value = $arg !~ s/^no-//;
730 my $ref = $optimise{$arg};
734 warn qq(ignoring unknown optimisation option "$arg"\n);
736 } elsif ($opt eq "O") {
737 $arg = 1 if $arg eq "";
739 foreach $ref (values %optimise) {
749 $compress_nullops = 1;
758 $newfh = prepare_assemble() unless $no_assemble;
759 foreach $objname (@options) {
760 eval "bytecompile_object(\\$objname)";
762 do_assemble($newfh) unless $no_assemble;
767 $newfh = prepare_assemble() unless $no_assemble;
769 do_assemble($newfh) unless $no_assemble;
780 B::Bytecode - Perl compiler's bytecode backend
784 perl -MO=Bytecode[,OPTIONS] foo.pl
788 This compiler backend takes Perl source and generates a
789 platform-independent bytecode encapsulating code to load the
790 internal structures perl uses to run your program. When the
791 generated bytecode is loaded in, your program is ready to run,
792 reducing the time which perl would have taken to load and parse
793 your program into its internal semi-compiled form. That means that
794 compiling with this backend will not help improve the runtime
795 execution speed of your program but may improve the start-up time.
796 Depending on the environment in which your program runs this may
797 or may not be a help.
799 The resulting bytecode can be run with a special byteperl executable
800 or (for non-main programs) be loaded via the C<byteload_fh> function
805 If there are any non-option arguments, they are taken to be names of
806 objects to be saved (probably doesn't work properly yet). Without
807 extra arguments, it saves the main program.
813 Output to filename instead of STDOUT.
817 Force end of options.
821 Force optimisations on or off one at a time. Each can be preceded
822 by B<no-> to turn the option off (e.g. B<-fno-compress-nullops>).
824 =item B<-fcompress-nullops>
826 Only fills in the necessary fields of ops which have
827 been optimised away by perl's internal compiler.
829 =item B<-fomit-sequence-numbers>
831 Leaves out code to fill in the op_seq field of all ops
832 which is only used by perl's internal compiler.
834 =item B<-fbypass-nullops>
836 If op->op_next ever points to a NULLOP, replaces the op_next field
837 with the first non-NULLOP in the path of execution.
839 =item B<-fstrip-syntax-tree>
841 Leaves out code to fill in the pointers which link the internal syntax
842 tree together. They're not needed at run-time but leaving them out
843 will make it impossible to recompile or disassemble the resulting
844 program. It will also stop C<goto label> statements from working.
848 Optimisation level (n = 0, 1, 2, ...). B<-O> means B<-O1>.
849 B<-O1> sets B<-fcompress-nullops> B<-fomit-sequence numbers>.
850 B<-O6> adds B<-fstrip-syntax-tree>.
854 Debug options (concatenated or separate flags like C<perl -D>).
858 Prints each OP as it's processed.
862 Print debugging information about bytecompiler progress.
866 Tells the (bytecode) assembler to include source assembler lines
867 in its output as bytecode comments.
871 Prints each CV taken from the final symbol tree walk.
875 Output (bytecode) assembler source rather than piping it
876 through the assembler and outputting bytecode.
880 Compile as a module rather than a standalone program. Currently this
881 just means that the bytecodes for initialising C<main_start>,
882 C<main_root> and C<curpad> are omitted.
888 perl -MO=Bytecode,-O6,-o,foo.plc foo.pl
890 perl -MO=Bytecode,-S foo.pl > foo.S
891 assemble foo.S > foo.plc
893 Note that C<assemble> lives in the C<B> subdirectory of your perl
894 library directory. The utility called perlcc may also be used to
895 help make use of this compiler.
897 perl -MO=Bytecode,-m,-oFoo.pmc Foo.pm
901 Plenty. Current status: experimental.
905 Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>