3 # Copyright (c) 1996-1998 Malcolm Beattie
5 # You may distribute under the terms of either the GNU General Public
6 # License or the Artistic License, as specified in the README file.
13 use B qw(minus_c main_cv main_root main_start comppadlist
14 class peekop walkoptree svref_2object cstring walksymtable
15 SVf_POK SVp_POK SVf_IOK SVp_IOK
17 use B::Asmdata qw(@optype @specialsv_name);
18 use B::Assembler qw(assemble_fh);
22 for ($i = 0; $i < @optype; $i++) {
23 $optype_enum{$optype[$i]} = $i;
26 # Following is SVf_POK|SVp_POK
27 # XXX Shouldn't be hardwired
28 sub POK () { SVf_POK|SVp_POK }
30 # Following is SVf_IOK|SVp_IOK
31 # XXX Shouldn't be hardwired
32 sub IOK () { SVf_IOK|SVp_IOK }
34 my ($verbose, $module_only, $no_assemble, $debug_bc, $debug_cv);
37 # Optimisation options. On the command line, use hyphens instead of
38 # underscores for compatibility with gcc-style options. We use
39 # underscores here because they are OK in (strict) barewords.
40 my ($strip_syntree, $compress_nullops, $omit_seq, $bypass_nullops);
41 my %optimise = (strip_syntax_tree => \$strip_syntree,
42 compress_nullops => \$compress_nullops,
43 omit_sequence_numbers => \$omit_seq,
44 bypass_nullops => \$bypass_nullops);
47 my %symtable; # maps object addresses to object indices.
48 # Filled in at allocation (newsv/newop) time.
49 my %saved; # maps object addresses (for SVish classes) to "saved yet?"
50 # flag. Set at FOO::bytecode time usually by SV::bytecode.
51 # Manipulated via saved(), mark_saved(), unmark_saved().
53 my $svix = -1; # we keep track of when the sv register contains an element
54 # of the object table to avoid unnecessary repeated
55 # consecutive ldsv instructions.
56 my $opix = -1; # Ditto for the op register.
97 return cstring($str . "\0");
103 sub saved { $saved{${$_[0]}} }
104 sub mark_saved { $saved{${$_[0]}} = 1 }
105 sub unmark_saved { $saved{${$_[0]}} = 0 }
107 sub debug { $debug_bc = shift }
111 warn sprintf("bytecode save method for %s (0x%x) not yet implemented\n",
116 # objix may stomp on the op register (for op objects)
117 # or the sv register (for SV objects)
119 sub B::OBJECT::objix {
121 my $ix = $symtable{$$obj};
125 $obj->newix($nextix);
126 return $symtable{$$obj} = $nextix++;
132 printf "newsv %d\t# %s\n", $sv->FLAGS & 0xf, class($sv);
138 my $gvname = $gv->NAME;
139 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
140 print "gv_fetchpv $name\n";
146 my $name = $hv->NAME;
149 printf "gv_stashpv %s\n", cstring($name);
152 # It's an ordinary HV. Fall back to ordinary newix method
153 $hv->B::SV::newix($ix);
157 sub B::SPECIAL::newix {
159 # Special case. $$sv is not the address of the SV but an
160 # index into svspecialsv_list.
161 printf "ldspecsv $$sv\t# %s\n", $specialsv_name[$$sv];
167 my $class = class($op);
168 my $typenum = $optype_enum{$class};
169 croak "OP::newix: can't understand class $class" unless defined($typenum);
170 print "newop $typenum\t# $class\n";
174 sub B::OP::walkoptree_debug {
176 warn(sprintf("walkoptree: %s\n", peekop($op)));
179 sub B::OP::bytecode {
181 my $next = $op->next;
183 my $sibix = $op->sibling->objix;
185 my $type = $op->type;
187 if ($bypass_nullops) {
188 $next = $next->next while $$next && $next->type == 0;
190 $nextix = $next->objix;
192 printf "# %s\n", peekop($op) if $debug_bc;
194 print "op_next $nextix\n";
195 print "op_sibling $sibix\n" unless $strip_syntree;
196 printf "op_type %s\t# %d\n", "pp_" . $op->name, $type;
197 printf("op_seq %d\n", $op->seq) unless $omit_seq;
198 if ($type || !$compress_nullops) {
199 printf "op_targ %d\nop_flags 0x%x\nop_private 0x%x\n",
200 $op->targ, $op->flags, $op->private;
204 sub B::UNOP::bytecode {
206 my $firstix = $op->first->objix;
207 $op->B::OP::bytecode;
208 if (($op->type || !$compress_nullops) && !$strip_syntree) {
209 print "op_first $firstix\n";
213 sub B::LOGOP::bytecode {
215 my $otherix = $op->other->objix;
216 $op->B::UNOP::bytecode;
217 print "op_other $otherix\n";
220 sub B::SVOP::bytecode {
223 my $svix = $sv->objix;
224 $op->B::OP::bytecode;
225 print "op_sv $svix\n";
229 sub B::PADOP::bytecode {
231 my $padix = $op->padix;
232 $op->B::OP::bytecode;
233 print "op_padix $padix\n";
236 sub B::PVOP::bytecode {
239 $op->B::OP::bytecode;
241 # This would be easy except that OP_TRANS uses a PVOP to store an
242 # endian-dependent array of 256 shorts instead of a plain string.
244 if ($op->name eq "trans") {
245 my @shorts = unpack("s256", $pv); # assembler handles endianness
246 print "op_pv_tr ", join(",", @shorts), "\n";
248 printf "newpv %s\nop_pv\n", pvstring($pv);
252 sub B::BINOP::bytecode {
254 my $lastix = $op->last->objix;
255 $op->B::UNOP::bytecode;
256 if (($op->type || !$compress_nullops) && !$strip_syntree) {
257 print "op_last $lastix\n";
261 sub B::LISTOP::bytecode {
263 my $children = $op->children;
264 $op->B::BINOP::bytecode;
265 if (($op->type || !$compress_nullops) && !$strip_syntree) {
266 print "op_children $children\n";
270 sub B::LOOP::bytecode {
272 my $redoopix = $op->redoop->objix;
273 my $nextopix = $op->nextop->objix;
274 my $lastopix = $op->lastop->objix;
275 $op->B::LISTOP::bytecode;
276 print "op_redoop $redoopix\nop_nextop $nextopix\nop_lastop $lastopix\n";
279 sub B::COP::bytecode {
281 my $stashpv = $op->stashpv;
282 my $file = $op->file;
283 my $line = $op->line;
284 my $warnings = $op->warnings;
285 my $warningsix = $warnings->objix;
287 printf "# line %s:%d\n", $file, $line;
289 $op->B::OP::bytecode;
290 printf <<"EOT", pvstring($op->label), pvstring($stashpv), $op->cop_seq, pvstring($file), $op->arybase;
300 cop_warnings $warningsix
304 sub B::PMOP::bytecode {
306 my $replroot = $op->pmreplroot;
307 my $replrootix = $replroot->objix;
308 my $replstartix = $op->pmreplstart->objix;
309 my $opname = $op->name;
310 # pmnext is corrupt in some PMOPs (see misc.t for example)
311 #my $pmnextix = $op->pmnext->objix;
314 # OP_PUSHRE (a mutated version of OP_MATCH for the regexp
315 # argument to a split) stores a GV in op_pmreplroot instead
316 # of a substitution syntax tree. We don't want to walk that...
317 if ($opname eq "pushre") {
320 walkoptree($replroot, "bytecode");
323 $op->B::LISTOP::bytecode;
324 if ($opname eq "pushre") {
325 printf "op_pmreplrootgv $replrootix\n";
327 print "op_pmreplroot $replrootix\nop_pmreplstart $replstartix\n";
329 my $re = pvstring($op->precomp);
330 # op_pmnext omitted since a perl bug means it's sometime corrupt
331 printf <<"EOT", $op->pmflags, $op->pmpermflags;
339 sub B::SV::bytecode {
341 return if saved($sv);
343 my $refcnt = $sv->REFCNT;
344 my $flags = sprintf("0x%x", $sv->FLAGS);
346 print "sv_refcnt $refcnt\nsv_flags $flags\n";
350 sub B::PV::bytecode {
352 return if saved($sv);
353 $sv->B::SV::bytecode;
354 printf("newpv %s\nxpv\n", pvstring($sv->PV)) if $sv->FLAGS & POK;
357 sub B::IV::bytecode {
359 return if saved($sv);
361 $sv->B::SV::bytecode;
362 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
365 sub B::NV::bytecode {
367 return if saved($sv);
368 $sv->B::SV::bytecode;
369 printf "xnv %s\n", $sv->NVX;
372 sub B::RV::bytecode {
374 return if saved($sv);
376 my $rvix = $rv->objix;
378 $sv->B::SV::bytecode;
382 sub B::PVIV::bytecode {
384 return if saved($sv);
386 $sv->B::PV::bytecode;
387 printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
390 sub B::PVNV::bytecode {
392 my $flag = shift || 0;
393 # The $flag argument is passed through PVMG::bytecode by BM::bytecode
394 # and AV::bytecode and indicates special handling. $flag = 1 is used by
395 # BM::bytecode and means that we should ensure we save the whole B-M
396 # table. It consists of 257 bytes (256 char array plus a final \0)
397 # which follow the ordinary PV+\0 and the 257 bytes are *not* reflected
398 # in SvCUR. $flag = 2 is used by AV::bytecode and means that we only
399 # call SV::bytecode instead of saving PV and calling NV::bytecode since
400 # PV/NV/IV stuff is different for AVs.
401 return if saved($sv);
403 $sv->B::SV::bytecode;
406 $sv->B::IV::bytecode;
407 printf "xnv %s\n", $sv->NVX;
409 $pv .= "\0" . $sv->TABLE;
410 printf "newpv %s\npv_cur %d\nxpv\n", pvstring($pv),length($pv)-257;
412 printf("newpv %s\nxpv\n", pvstring($pv)) if $sv->FLAGS & POK;
417 sub B::PVMG::bytecode {
418 my ($sv, $flag) = @_;
419 # See B::PVNV::bytecode for an explanation of $flag.
420 return if saved($sv);
421 # XXX We assume SvSTASH is already saved and don't save it later ourselves
422 my $stashix = $sv->SvSTASH->objix;
423 my @mgchain = $sv->MAGIC;
426 # We need to traverse the magic chain and get objix for each OBJ
427 # field *before* we do B::PVNV::bytecode since objix overwrites
428 # the sv register. However, we need to write the magic-saving
429 # bytecode *after* B::PVNV::bytecode since sv isn't initialised
430 # to refer to $sv until then.
432 @mgobjix = map($_->OBJ->objix, @mgchain);
433 $sv->B::PVNV::bytecode($flag);
434 print "xmg_stash $stashix\n";
435 foreach $mg (@mgchain) {
436 printf "sv_magic %s\nmg_obj %d\nnewpv %s\nmg_pv\n",
437 cstring($mg->TYPE), shift(@mgobjix), pvstring($mg->PTR);
441 sub B::PVLV::bytecode {
443 return if saved($sv);
444 $sv->B::PVMG::bytecode;
445 printf <<'EOT', $sv->TARGOFF, $sv->TARGLEN, cstring($sv->TYPE);
452 sub B::BM::bytecode {
454 return if saved($sv);
455 # See PVNV::bytecode for an explanation of what the argument does
456 $sv->B::PVMG::bytecode(1);
457 printf "xbm_useful %d\nxbm_previous %d\nxbm_rare %d\n",
458 $sv->USEFUL, $sv->PREVIOUS, $sv->RARE;
461 sub B::GV::bytecode {
463 return if saved($gv);
467 printf <<"EOT", $gv->FLAGS, $gv->GvFLAGS;
471 my $refcnt = $gv->REFCNT;
472 printf("sv_refcnt_add %d\n", $refcnt - 1) if $refcnt > 1;
473 return if $gv->is_empty;
474 printf <<"EOT", $gv->LINE, pvstring($gv->FILE);
479 my $gvname = $gv->NAME;
480 my $name = cstring($gv->STASH->NAME . "::" . $gvname);
482 my $egvix = $egv->objix;
483 my $gvrefcnt = $gv->GvREFCNT;
484 printf("gp_refcnt_add %d\n", $gvrefcnt - 1) if $gvrefcnt > 1;
485 if ($gvrefcnt > 1 && $ix != $egvix) {
486 print "gp_share $egvix\n";
488 if ($gvname !~ /^([^A-Za-z]|STDIN|STDOUT|STDERR|ARGV|SIG|ENV)$/) {
490 my @subfield_names = qw(SV AV HV CV FORM IO);
491 my @subfields = map($gv->$_(), @subfield_names);
492 my @ixes = map($_->objix, @subfields);
493 # Reset sv register for $gv
495 for ($i = 0; $i < @ixes; $i++) {
496 printf "gp_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
498 # Now save all the subfields
500 foreach $sv (@subfields) {
507 sub B::HV::bytecode {
509 return if saved($hv);
511 my $name = $hv->NAME;
514 # It's an ordinary HV. Stashes have NAME set and need no further
515 # saving beyond the gv_stashpv that $hv->objix already ensures.
516 my @contents = $hv->ARRAY;
518 for ($i = 1; $i < @contents; $i += 2) {
519 push(@ixes, $contents[$i]->objix);
521 for ($i = 1; $i < @contents; $i += 2) {
522 $contents[$i]->bytecode;
525 for ($i = 0; $i < @contents; $i += 2) {
526 printf("newpv %s\nhv_store %d\n",
527 pvstring($contents[$i]), $ixes[$i / 2]);
529 printf "sv_refcnt %d\nsv_flags 0x%x\n", $hv->REFCNT, $hv->FLAGS;
533 sub B::AV::bytecode {
535 return if saved($av);
537 my $fill = $av->FILL;
542 @ixes = map($_->objix, @array);
544 foreach $sv (@array) {
548 # See PVNV::bytecode for the meaning of the flag argument of 2.
549 $av->B::PVMG::bytecode(2);
550 # Recover sv register and set AvMAX and AvFILL to -1 (since we
551 # create an AV with NEWSV and SvUPGRADE rather than doing newAV
552 # which is what sets AvMAX and AvFILL.
554 printf "xav_flags 0x%x\nxav_max -1\nxav_fill -1\n", $av->AvFLAGS;
557 foreach $elix (@ixes) {
558 print "av_push $elix\n";
562 print "av_extend $max\n";
567 sub B::CV::bytecode {
569 return if saved($cv);
571 $cv->B::PVMG::bytecode;
573 my @subfield_names = qw(ROOT START STASH GV PADLIST OUTSIDE);
574 my @subfields = map($cv->$_(), @subfield_names);
575 my @ixes = map($_->objix, @subfields);
576 # Save OP tree from CvROOT (first element of @subfields)
577 my $root = shift @subfields;
579 walkoptree($root, "bytecode");
581 # Reset sv register for $cv (since above ->objix calls stomped on it)
583 for ($i = 0; $i < @ixes; $i++) {
584 printf "xcv_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
586 printf "xcv_depth %d\nxcv_flags 0x%x\n", $cv->DEPTH, $cv->CvFLAGS;
587 printf "newpv %s\nxcv_file\n", pvstring($cv->FILE);
588 # Now save all the subfields (except for CvROOT which was handled
589 # above) and CvSTART (now the initial element of @subfields).
590 shift @subfields; # bye-bye CvSTART
592 foreach $sv (@subfields) {
597 sub B::IO::bytecode {
599 return if saved($io);
601 my $top_gv = $io->TOP_GV;
602 my $top_gvix = $top_gv->objix;
603 my $fmt_gv = $io->FMT_GV;
604 my $fmt_gvix = $fmt_gv->objix;
605 my $bottom_gv = $io->BOTTOM_GV;
606 my $bottom_gvix = $bottom_gv->objix;
608 $io->B::PVMG::bytecode;
610 print "xio_top_gv $top_gvix\n";
611 print "xio_fmt_gv $fmt_gvix\n";
612 print "xio_bottom_gv $bottom_gvix\n";
614 foreach $field (qw(TOP_NAME FMT_NAME BOTTOM_NAME)) {
615 printf "newpv %s\nxio_%s\n", pvstring($io->$field()), lc($field);
617 foreach $field (qw(LINES PAGE PAGE_LEN LINES_LEFT SUBPROCESS)) {
618 printf "xio_%s %d\n", lc($field), $io->$field();
620 printf "xio_type %s\nxio_flags 0x%x\n", cstring($io->IoTYPE), $io->IoFLAGS;
623 $bottom_gv->bytecode;
626 sub B::SPECIAL::bytecode {
627 # nothing extra needs doing
630 sub bytecompile_object {
633 svref_2object($sv)->bytecode;
637 sub B::GV::bytecodecv {
640 if ($$cv && !saved($cv)) {
642 warn sprintf("saving extra CV &%s::%s (0x%x) from GV 0x%x\n",
643 $gv->STASH->NAME, $gv->NAME, $$cv, $$gv);
649 sub bytecompile_main {
650 my $curpad = (comppadlist->ARRAY)[1];
651 my $curpadix = $curpad->objix;
653 walkoptree(main_root, "bytecode");
654 warn "done main program, now walking symbol table\n" if $debug_bc;
655 my ($pack, %exclude);
656 foreach $pack (qw(B O AutoLoader DynaLoader XSLoader Config DB VMS strict vars
657 FileHandle Exporter Carp UNIVERSAL IO Fcntl Symbol warnings
658 attributes File::Spec SelectSaver blib Cwd))
660 $exclude{$pack."::"} = 1;
662 no strict qw(vars refs);
663 walksymtable(\%{"main::"}, "bytecodecv", sub {
664 warn "considering $_[0]\n" if $debug_bc;
665 return !defined($exclude{$_[0]});
668 printf "main_root %d\n", main_root->objix;
669 printf "main_start %d\n", main_start->objix;
670 printf "curpad $curpadix\n";
671 # XXX Do min_intro_pending and max_intro_pending matter?
675 sub prepare_assemble {
676 my $newfh = IO::File->new_tmpfile;
684 seek($fh, 0, 0); # rewind the temporary file
685 assemble_fh($fh, sub { print OUT @_ });
690 my ($option, $opt, $arg);
691 open(OUT, ">&STDOUT");
695 while ($option = shift @options) {
696 if ($option =~ /^-(.)(.*)/) {
700 unshift @options, $option;
703 if ($opt eq "-" && $arg eq "-") {
706 } elsif ($opt eq "o") {
707 $arg ||= shift @options;
708 open(OUT, ">$arg") or return "$arg: $!\n";
710 } elsif ($opt eq "a") {
711 $arg ||= shift @options;
712 open(OUT, ">>$arg") or return "$arg: $!\n";
714 } elsif ($opt eq "D") {
715 $arg ||= shift @options;
716 foreach $arg (split(//, $arg)) {
720 } elsif ($arg eq "o") {
722 } elsif ($arg eq "a") {
723 B::Assembler::debug(1);
724 } elsif ($arg eq "C") {
728 } elsif ($opt eq "v") {
730 } elsif ($opt eq "m") {
732 } elsif ($opt eq "S") {
734 } elsif ($opt eq "f") {
735 $arg ||= shift @options;
736 my $value = $arg !~ s/^no-//;
738 my $ref = $optimise{$arg};
742 warn qq(ignoring unknown optimisation option "$arg"\n);
744 } elsif ($opt eq "O") {
745 $arg = 1 if $arg eq "";
747 foreach $ref (values %optimise) {
757 $compress_nullops = 1;
766 $newfh = prepare_assemble() unless $no_assemble;
767 foreach $objname (@options) {
768 eval "bytecompile_object(\\$objname)";
770 do_assemble($newfh) unless $no_assemble;
775 $newfh = prepare_assemble() unless $no_assemble;
777 do_assemble($newfh) unless $no_assemble;
788 B::Bytecode - Perl compiler's bytecode backend
792 perl -MO=Bytecode[,OPTIONS] foo.pl
796 This compiler backend takes Perl source and generates a
797 platform-independent bytecode encapsulating code to load the
798 internal structures perl uses to run your program. When the
799 generated bytecode is loaded in, your program is ready to run,
800 reducing the time which perl would have taken to load and parse
801 your program into its internal semi-compiled form. That means that
802 compiling with this backend will not help improve the runtime
803 execution speed of your program but may improve the start-up time.
804 Depending on the environment in which your program runs this may
805 or may not be a help.
807 The resulting bytecode can be run with a special byteperl executable
808 or (for non-main programs) be loaded via the C<byteload_fh> function
813 If there are any non-option arguments, they are taken to be names of
814 objects to be saved (probably doesn't work properly yet). Without
815 extra arguments, it saves the main program.
821 Output to filename instead of STDOUT.
825 Append output to filename.
829 Force end of options.
833 Force optimisations on or off one at a time. Each can be preceded
834 by B<no-> to turn the option off (e.g. B<-fno-compress-nullops>).
836 =item B<-fcompress-nullops>
838 Only fills in the necessary fields of ops which have
839 been optimised away by perl's internal compiler.
841 =item B<-fomit-sequence-numbers>
843 Leaves out code to fill in the op_seq field of all ops
844 which is only used by perl's internal compiler.
846 =item B<-fbypass-nullops>
848 If op->op_next ever points to a NULLOP, replaces the op_next field
849 with the first non-NULLOP in the path of execution.
851 =item B<-fstrip-syntax-tree>
853 Leaves out code to fill in the pointers which link the internal syntax
854 tree together. They're not needed at run-time but leaving them out
855 will make it impossible to recompile or disassemble the resulting
856 program. It will also stop C<goto label> statements from working.
860 Optimisation level (n = 0, 1, 2, ...). B<-O> means B<-O1>.
861 B<-O1> sets B<-fcompress-nullops> B<-fomit-sequence numbers>.
862 B<-O6> adds B<-fstrip-syntax-tree>.
866 Debug options (concatenated or separate flags like C<perl -D>).
870 Prints each OP as it's processed.
874 Print debugging information about bytecompiler progress.
878 Tells the (bytecode) assembler to include source assembler lines
879 in its output as bytecode comments.
883 Prints each CV taken from the final symbol tree walk.
887 Output (bytecode) assembler source rather than piping it
888 through the assembler and outputting bytecode.
892 Compile as a module rather than a standalone program. Currently this
893 just means that the bytecodes for initialising C<main_start>,
894 C<main_root> and C<curpad> are omitted.
900 perl -MO=Bytecode,-O6,-o,foo.plc foo.pl
902 perl -MO=Bytecode,-S foo.pl > foo.S
903 assemble foo.S > foo.plc
905 Note that C<assemble> lives in the C<B> subdirectory of your perl
906 library directory. The utility called perlcc may also be used to
907 help make use of this compiler.
909 perl -MO=Bytecode,-m,-oFoo.pmc Foo.pm
913 Plenty. Current status: experimental.
917 Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>