ext/B/B/Bytecode.pm

   1 #      Bytecode.pm
   2 #
   3 #      Copyright (c) 1996-1998 Malcolm Beattie
   4 #
   5 #      You may distribute under the terms of either the GNU General Public
   6 #      License or the Artistic License, as specified in the README file.
   7 #
   8 package B::Bytecode;
   9 use strict;
  10 use Carp;
  11 use IO::File;
  12
  13 use B qw(minus_c main_cv main_root main_start comppadlist
  14          class peekop walkoptree svref_2object cstring walksymtable
  15          SVf_POK SVp_POK SVf_IOK SVp_IOK
  16         );
  17 use B::Asmdata qw(@optype @specialsv_name);
  18 use B::Assembler qw(assemble_fh);
  19
  20 my %optype_enum;
  21 my $i;
  22 for ($i = 0; $i < @optype; $i++) {
  23     $optype_enum{$optype[$i]} = $i;
  24 }
  25
  26 # Following is SVf_POK|SVp_POK
  27 # XXX Shouldn't be hardwired
  28 sub POK () { SVf_POK|SVp_POK }
  29
  30 # Following is SVf_IOK|SVp_IOK
  31 # XXX Shouldn't be hardwired
  32 sub IOK () { SVf_IOK|SVp_IOK }
  33
  34 my ($verbose, $module_only, $no_assemble, $debug_bc, $debug_cv);
  35 my $assembler_pid;
  36
  37 # Optimisation options. On the command line, use hyphens instead of
  38 # underscores for compatibility with gcc-style options. We use
  39 # underscores here because they are OK in (strict) barewords.
  40 my ($strip_syntree, $compress_nullops, $omit_seq, $bypass_nullops);
  41 my %optimise = (strip_syntax_tree       => \$strip_syntree,
  42                 compress_nullops        => \$compress_nullops,
  43                 omit_sequence_numbers   => \$omit_seq,
  44                 bypass_nullops          => \$bypass_nullops);
  45
  46 my $nextix = 0;
  47 my %symtable;   # maps object addresses to object indices.
  48                 # Filled in at allocation (newsv/newop) time.
  49 my %saved;      # maps object addresses (for SVish classes) to "saved yet?"
  50                 # flag. Set at FOO::bytecode time usually by SV::bytecode.
  51                 # Manipulated via saved(), mark_saved(), unmark_saved().
  52
  53 my $svix = -1;  # we keep track of when the sv register contains an element
  54                 # of the object table to avoid unnecessary repeated
  55                 # consecutive ldsv instructions.
  56 my $opix = -1;  # Ditto for the op register.
  57
  58 sub ldsv {
  59     my $ix = shift;
  60     if ($ix != $svix) {
  61         print "ldsv $ix\n";
  62         $svix = $ix;
  63     }
  64 }
  65
  66 sub stsv {
  67     my $ix = shift;
  68     print "stsv $ix\n";
  69     $svix = $ix;
  70 }
  71
  72 sub set_svix {
  73     $svix = shift;
  74 }
  75
  76 sub ldop {
  77     my $ix = shift;
  78     if ($ix != $opix) {
  79         print "ldop $ix\n";
  80         $opix = $ix;
  81     }
  82 }
  83
  84 sub stop {
  85     my $ix = shift;
  86     print "stop $ix\n";
  87     $opix = $ix;
  88 }
  89
  90 sub set_opix {
  91     $opix = shift;
  92 }
  93
  94 sub pvstring {
  95     my $str = shift;
  96     if (defined($str)) {
  97         return cstring($str . "\0");
  98     } else {
  99         return '""';
 100     }
 101 }
 102
 103 sub saved { $saved{${$_[0]}} }
 104 sub mark_saved { $saved{${$_[0]}} = 1 }
 105 sub unmark_saved { $saved{${$_[0]}} = 0 }
 106
 107 sub debug { $debug_bc = shift }
 108
 109 sub B::OBJECT::nyi {
 110     my $obj = shift;
 111     warn sprintf("bytecode save method for %s (0x%x) not yet implemented\n",
 112                  class($obj), $$obj);
 113 }
 114
 115 #
 116 # objix may stomp on the op register (for op objects)
 117 # or the sv register (for SV objects)
 118 #
 119 sub B::OBJECT::objix {
 120     my $obj = shift;
 121     my $ix = $symtable{$$obj};
 122     if (defined($ix)) {
 123         return $ix;
 124     } else {
 125         $obj->newix($nextix);
 126         return $symtable{$$obj} = $nextix++;
 127     }
 128 }
 129
 130 sub B::SV::newix {
 131     my ($sv, $ix) = @_;
 132     printf "newsv %d\t# %s\n", $sv->FLAGS & 0xf, class($sv);
 133     stsv($ix);
 134 }
 135
 136 sub B::GV::newix {
 137     my ($gv, $ix) = @_;
 138     my $gvname = $gv->NAME;
 139     my $name = cstring($gv->STASH->NAME . "::" . $gvname);
 140     print "gv_fetchpv $name\n";
 141     stsv($ix);
 142 }
 143
 144 sub B::HV::newix {
 145     my ($hv, $ix) = @_;
 146     my $name = $hv->NAME;
 147     if ($name) {
 148         # It's a stash
 149         printf "gv_stashpv %s\n", cstring($name);
 150         stsv($ix);
 151     } else {
 152         # It's an ordinary HV. Fall back to ordinary newix method
 153         $hv->B::SV::newix($ix);
 154     }
 155 }
 156
 157 sub B::SPECIAL::newix {
 158     my ($sv, $ix) = @_;
 159     # Special case. $$sv is not the address of the SV but an
 160     # index into svspecialsv_list.
 161     printf "ldspecsv $$sv\t# %s\n", $specialsv_name[$$sv];
 162     stsv($ix);
 163 }
 164
 165 sub B::OP::newix {
 166     my ($op, $ix) = @_;
 167     my $class = class($op);
 168     my $typenum = $optype_enum{$class};
 169     croak "OP::newix: can't understand class $class" unless defined($typenum);
 170     print "newop $typenum\t# $class\n";
 171     stop($ix);
 172 }
 173
 174 sub B::OP::walkoptree_debug {
 175     my $op = shift;
 176     warn(sprintf("walkoptree: %s\n", peekop($op)));
 177 }
 178
 179 sub B::OP::bytecode {
 180     my $op = shift;
 181     my $next = $op->next;
 182     my $nextix;
 183     my $sibix = $op->sibling->objix;
 184     my $ix = $op->objix;
 185     my $type = $op->type;
 186
 187     if ($bypass_nullops) {
 188         $next = $next->next while $$next && $next->type == 0;
 189     }
 190     $nextix = $next->objix;
 191
 192     printf "# %s\n", peekop($op) if $debug_bc;
 193     ldop($ix);
 194     print "op_next $nextix\n";
 195     print "op_sibling $sibix\n" unless $strip_syntree;
 196     printf "op_type %s\t# %d\n", "pp_" . $op->name, $type;
 197     printf("op_seq %d\n", $op->seq) unless $omit_seq;
 198     if ($type || !$compress_nullops) {
 199         printf "op_targ %d\nop_flags 0x%x\nop_private 0x%x\n",
 200             $op->targ, $op->flags, $op->private;
 201     }
 202 }
 203
 204 sub B::UNOP::bytecode {
 205     my $op = shift;
 206     my $firstix = $op->first->objix;
 207     $op->B::OP::bytecode;
 208     if (($op->type || !$compress_nullops) && !$strip_syntree) {
 209         print "op_first $firstix\n";
 210     }
 211 }
 212
 213 sub B::LOGOP::bytecode {
 214     my $op = shift;
 215     my $otherix = $op->other->objix;
 216     $op->B::UNOP::bytecode;
 217     print "op_other $otherix\n";
 218 }
 219
 220 sub B::SVOP::bytecode {
 221     my $op = shift;
 222     my $sv = $op->sv;
 223     my $svix = $sv->objix;
 224     $op->B::OP::bytecode;
 225     print "op_sv $svix\n";
 226     $sv->bytecode;
 227 }
 228
 229 sub B::PADOP::bytecode {
 230     my $op = shift;
 231     my $padix = $op->padix;
 232     $op->B::OP::bytecode;
 233     print "op_padix $padix\n";
 234 }
 235
 236 sub B::PVOP::bytecode {
 237     my $op = shift;
 238     my $pv = $op->pv;
 239     $op->B::OP::bytecode;
 240     #
 241     # This would be easy except that OP_TRANS uses a PVOP to store an
 242     # endian-dependent array of 256 shorts instead of a plain string.
 243     #
 244     if ($op->name eq "trans") {
 245         my @shorts = unpack("s256", $pv); # assembler handles endianness
 246         print "op_pv_tr ", join(",", @shorts), "\n";
 247     } else {
 248         printf "newpv %s\nop_pv\n", pvstring($pv);
 249     }
 250 }
 251
 252 sub B::BINOP::bytecode {
 253     my $op = shift;
 254     my $lastix = $op->last->objix;
 255     $op->B::UNOP::bytecode;
 256     if (($op->type || !$compress_nullops) && !$strip_syntree) {
 257         print "op_last $lastix\n";
 258     }
 259 }
 260
 261 sub B::LISTOP::bytecode {
 262     my $op = shift;
 263     my $children = $op->children;
 264     $op->B::BINOP::bytecode;
 265     if (($op->type || !$compress_nullops) && !$strip_syntree) {
 266         print "op_children $children\n";
 267     }
 268 }
 269
 270 sub B::LOOP::bytecode {
 271     my $op = shift;
 272     my $redoopix = $op->redoop->objix;
 273     my $nextopix = $op->nextop->objix;
 274     my $lastopix = $op->lastop->objix;
 275     $op->B::LISTOP::bytecode;
 276     print "op_redoop $redoopix\nop_nextop $nextopix\nop_lastop $lastopix\n";
 277 }
 278
 279 sub B::COP::bytecode {
 280     my $op = shift;
 281     my $stashpv = $op->stashpv;
 282     my $file = $op->file;
 283     my $line = $op->line;
 284     my $warnings = $op->warnings;
 285     my $warningsix = $warnings->objix;
 286     if ($debug_bc) {
 287         printf "# line %s:%d\n", $file, $line;
 288     }
 289     $op->B::OP::bytecode;
 290     printf <<"EOT", pvstring($op->label), pvstring($stashpv), $op->cop_seq, pvstring($file), $op->arybase;
 291 newpv %s
 292 cop_label
 293 newpv %s
 294 cop_stashpv
 295 cop_seq %d
 296 newpv %s
 297 cop_file
 298 cop_arybase %d
 299 cop_line $line
 300 cop_warnings $warningsix
 301 EOT
 302 }
 303
 304 sub B::PMOP::bytecode {
 305     my $op = shift;
 306     my $replroot = $op->pmreplroot;
 307     my $replrootix = $replroot->objix;
 308     my $replstartix = $op->pmreplstart->objix;
 309     my $opname = $op->name;
 310     # pmnext is corrupt in some PMOPs (see misc.t for example)
 311     #my $pmnextix = $op->pmnext->objix;
 312
 313     if ($$replroot) {
 314         # OP_PUSHRE (a mutated version of OP_MATCH for the regexp
 315         # argument to a split) stores a GV in op_pmreplroot instead
 316         # of a substitution syntax tree. We don't want to walk that...
 317         if ($opname eq "pushre") {
 318             $replroot->bytecode;
 319         } else {
 320             walkoptree($replroot, "bytecode");
 321         }
 322     }
 323     $op->B::LISTOP::bytecode;
 324     if ($opname eq "pushre") {
 325         printf "op_pmreplrootgv $replrootix\n";
 326     } else {
 327         print "op_pmreplroot $replrootix\nop_pmreplstart $replstartix\n";
 328     }
 329     my $re = pvstring($op->precomp);
 330     # op_pmnext omitted since a perl bug means it's sometime corrupt
 331     printf <<"EOT", $op->pmflags, $op->pmpermflags;
 332 op_pmflags 0x%x
 333 op_pmpermflags 0x%x
 334 newpv $re
 335 pregcomp
 336 EOT
 337 }
 338
 339 sub B::SV::bytecode {
 340     my $sv = shift;
 341     return if saved($sv);
 342     my $ix = $sv->objix;
 343     my $refcnt = $sv->REFCNT;
 344     my $flags = sprintf("0x%x", $sv->FLAGS);
 345     ldsv($ix);
 346     print "sv_refcnt $refcnt\nsv_flags $flags\n";
 347     mark_saved($sv);
 348 }
 349
 350 sub B::PV::bytecode {
 351     my $sv = shift;
 352     return if saved($sv);
 353     $sv->B::SV::bytecode;
 354     printf("newpv %s\nxpv\n", pvstring($sv->PV)) if $sv->FLAGS & POK;
 355 }
 356
 357 sub B::IV::bytecode {
 358     my $sv = shift;
 359     return if saved($sv);
 360     my $iv = $sv->IVX;
 361     $sv->B::SV::bytecode;
 362     printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
 363 }
 364
 365 sub B::NV::bytecode {
 366     my $sv = shift;
 367     return if saved($sv);
 368     $sv->B::SV::bytecode;
 369     printf "xnv %s\n", $sv->NVX;
 370 }
 371
 372 sub B::RV::bytecode {
 373     my $sv = shift;
 374     return if saved($sv);
 375     my $rv = $sv->RV;
 376     my $rvix = $rv->objix;
 377     $rv->bytecode;
 378     $sv->B::SV::bytecode;
 379     print "xrv $rvix\n";
 380 }
 381
 382 sub B::PVIV::bytecode {
 383     my $sv = shift;
 384     return if saved($sv);
 385     my $iv = $sv->IVX;
 386     $sv->B::PV::bytecode;
 387     printf "%s $iv\n", $sv->needs64bits ? "xiv64" : "xiv32";
 388 }
 389
 390 sub B::PVNV::bytecode {
 391     my $sv = shift;
 392     my $flag = shift || 0;
 393     # The $flag argument is passed through PVMG::bytecode by BM::bytecode
 394     # and AV::bytecode and indicates special handling. $flag = 1 is used by
 395     # BM::bytecode and means that we should ensure we save the whole B-M
 396     # table. It consists of 257 bytes (256 char array plus a final \0)
 397     # which follow the ordinary PV+\0 and the 257 bytes are *not* reflected
 398     # in SvCUR. $flag = 2 is used by AV::bytecode and means that we only
 399     # call SV::bytecode instead of saving PV and calling NV::bytecode since
 400     # PV/NV/IV stuff is different for AVs.
 401     return if saved($sv);
 402     if ($flag == 2) {
 403         $sv->B::SV::bytecode;
 404     } else {
 405         my $pv = $sv->PV;
 406         $sv->B::IV::bytecode;
 407         printf "xnv %s\n", $sv->NVX;
 408         if ($flag == 1) {
 409             $pv .= "\0" . $sv->TABLE;
 410             printf "newpv %s\npv_cur %d\nxpv\n", pvstring($pv),length($pv)-257;
 411         } else {
 412             printf("newpv %s\nxpv\n", pvstring($pv)) if $sv->FLAGS & POK;
 413         }
 414     }
 415 }
 416
 417 sub B::PVMG::bytecode {
 418     my ($sv, $flag) = @_;
 419     # See B::PVNV::bytecode for an explanation of $flag.
 420     return if saved($sv);
 421     # XXX We assume SvSTASH is already saved and don't save it later ourselves
 422     my $stashix = $sv->SvSTASH->objix;
 423     my @mgchain = $sv->MAGIC;
 424     my (@mgobjix, $mg);
 425     #
 426     # We need to traverse the magic chain and get objix for each OBJ
 427     # field *before* we do B::PVNV::bytecode since objix overwrites
 428     # the sv register. However, we need to write the magic-saving
 429     # bytecode *after* B::PVNV::bytecode since sv isn't initialised
 430     # to refer to $sv until then.
 431     #
 432     @mgobjix = map($_->OBJ->objix, @mgchain);
 433     $sv->B::PVNV::bytecode($flag);
 434     print "xmg_stash $stashix\n";
 435     foreach $mg (@mgchain) {
 436         printf "sv_magic %s\nmg_obj %d\nnewpv %s\nmg_pv\n",
 437             cstring($mg->TYPE), shift(@mgobjix), pvstring($mg->PTR);
 438     }
 439 }
 440
 441 sub B::PVLV::bytecode {
 442     my $sv = shift;
 443     return if saved($sv);
 444     $sv->B::PVMG::bytecode;
 445     printf <<'EOT', $sv->TARGOFF, $sv->TARGLEN, cstring($sv->TYPE);
 446 xlv_targoff %d
 447 xlv_targlen %d
 448 xlv_type %s
 449 EOT
 450 }
 451
 452 sub B::BM::bytecode {
 453     my $sv = shift;
 454     return if saved($sv);
 455     # See PVNV::bytecode for an explanation of what the argument does
 456     $sv->B::PVMG::bytecode(1);
 457     printf "xbm_useful %d\nxbm_previous %d\nxbm_rare %d\n",
 458         $sv->USEFUL, $sv->PREVIOUS, $sv->RARE;
 459 }
 460
 461 sub B::GV::bytecode {
 462     my $gv = shift;
 463     return if saved($gv);
 464     my $ix = $gv->objix;
 465     mark_saved($gv);
 466     ldsv($ix);
 467     printf <<"EOT", $gv->FLAGS, $gv->GvFLAGS;
 468 sv_flags 0x%x
 469 xgv_flags 0x%x
 470 EOT
 471     my $refcnt = $gv->REFCNT;
 472     printf("sv_refcnt_add %d\n", $refcnt - 1) if $refcnt > 1;
 473     return if $gv->is_empty;
 474     printf <<"EOT", $gv->LINE, pvstring($gv->FILE);
 475 gp_line %d
 476 newpv %s
 477 gp_file
 478 EOT
 479     my $gvname = $gv->NAME;
 480     my $name = cstring($gv->STASH->NAME . "::" . $gvname);
 481     my $egv = $gv->EGV;
 482     my $egvix = $egv->objix;
 483     my $gvrefcnt = $gv->GvREFCNT;
 484     printf("gp_refcnt_add %d\n", $gvrefcnt - 1) if $gvrefcnt > 1;
 485     if ($gvrefcnt > 1 &&  $ix != $egvix) {
 486         print "gp_share $egvix\n";
 487     } else {
 488         if ($gvname !~ /^([^A-Za-z]|STDIN|STDOUT|STDERR|ARGV|SIG|ENV)$/) {
 489             my $i;
 490             my @subfield_names = qw(SV AV HV CV FORM IO);
 491             my @subfields = map($gv->$_(), @subfield_names);
 492             my @ixes = map($_->objix, @subfields);
 493             # Reset sv register for $gv
 494             ldsv($ix);
 495             for ($i = 0; $i < @ixes; $i++) {
 496                 printf "gp_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
 497             }
 498             # Now save all the subfields
 499             my $sv;
 500             foreach $sv (@subfields) {
 501                 $sv->bytecode;
 502             }
 503         }
 504     }
 505 }
 506
 507 sub B::HV::bytecode {
 508     my $hv = shift;
 509     return if saved($hv);
 510     mark_saved($hv);
 511     my $name = $hv->NAME;
 512     my $ix = $hv->objix;
 513     if (!$name) {
 514         # It's an ordinary HV. Stashes have NAME set and need no further
 515         # saving beyond the gv_stashpv that $hv->objix already ensures.
 516         my @contents = $hv->ARRAY;
 517         my ($i, @ixes);
 518         for ($i = 1; $i < @contents; $i += 2) {
 519             push(@ixes, $contents[$i]->objix);
 520         }
 521         for ($i = 1; $i < @contents; $i += 2) {
 522             $contents[$i]->bytecode;
 523         }
 524         ldsv($ix);
 525         for ($i = 0; $i < @contents; $i += 2) {
 526             printf("newpv %s\nhv_store %d\n",
 527                    pvstring($contents[$i]), $ixes[$i / 2]);
 528         }
 529         printf "sv_refcnt %d\nsv_flags 0x%x\n", $hv->REFCNT, $hv->FLAGS;
 530     }
 531 }
 532
 533 sub B::AV::bytecode {
 534     my $av = shift;
 535     return if saved($av);
 536     my $ix = $av->objix;
 537     my $fill = $av->FILL;
 538     my $max = $av->MAX;
 539     my (@array, @ixes);
 540     if ($fill > -1) {
 541         @array = $av->ARRAY;
 542         @ixes = map($_->objix, @array);
 543         my $sv;
 544         foreach $sv (@array) {
 545             $sv->bytecode;
 546         }
 547     }
 548     # See PVNV::bytecode for the meaning of the flag argument of 2.
 549     $av->B::PVMG::bytecode(2);
 550     # Recover sv register and set AvMAX and AvFILL to -1 (since we
 551     # create an AV with NEWSV and SvUPGRADE rather than doing newAV
 552     # which is what sets AvMAX and AvFILL.
 553     ldsv($ix);
 554     printf "xav_flags 0x%x\nxav_max -1\nxav_fill -1\n", $av->AvFLAGS;
 555     if ($fill > -1) {
 556         my $elix;
 557         foreach $elix (@ixes) {
 558             print "av_push $elix\n";
 559         }
 560     } else {
 561         if ($max > -1) {
 562             print "av_extend $max\n";
 563         }
 564     }
 565 }
 566
 567 sub B::CV::bytecode {
 568     my $cv = shift;
 569     return if saved($cv);
 570     my $ix = $cv->objix;
 571     $cv->B::PVMG::bytecode;
 572     my $i;
 573     my @subfield_names = qw(ROOT START STASH GV PADLIST OUTSIDE);
 574     my @subfields = map($cv->$_(), @subfield_names);
 575     my @ixes = map($_->objix, @subfields);
 576     # Save OP tree from CvROOT (first element of @subfields)
 577     my $root = shift @subfields;
 578     if ($$root) {
 579         walkoptree($root, "bytecode");
 580     }
 581     # Reset sv register for $cv (since above ->objix calls stomped on it)
 582     ldsv($ix);
 583     for ($i = 0; $i < @ixes; $i++) {
 584         printf "xcv_%s %d\n", lc($subfield_names[$i]), $ixes[$i];
 585     }
 586     printf "xcv_depth %d\nxcv_flags 0x%x\n", $cv->DEPTH, $cv->CvFLAGS;
 587     printf "newpv %s\nxcv_file\n", pvstring($cv->FILE);
 588     # Now save all the subfields (except for CvROOT which was handled
 589     # above) and CvSTART (now the initial element of @subfields).
 590     shift @subfields; # bye-bye CvSTART
 591     my $sv;
 592     foreach $sv (@subfields) {
 593         $sv->bytecode;
 594     }
 595 }
 596
 597 sub B::IO::bytecode {
 598     my $io = shift;
 599     return if saved($io);
 600     my $ix = $io->objix;
 601     my $top_gv = $io->TOP_GV;
 602     my $top_gvix = $top_gv->objix;
 603     my $fmt_gv = $io->FMT_GV;
 604     my $fmt_gvix = $fmt_gv->objix;
 605     my $bottom_gv = $io->BOTTOM_GV;
 606     my $bottom_gvix = $bottom_gv->objix;
 607
 608     $io->B::PVMG::bytecode;
 609     ldsv($ix);
 610     print "xio_top_gv $top_gvix\n";
 611     print "xio_fmt_gv $fmt_gvix\n";
 612     print "xio_bottom_gv $bottom_gvix\n";
 613     my $field;
 614     foreach $field (qw(TOP_NAME FMT_NAME BOTTOM_NAME)) {
 615         printf "newpv %s\nxio_%s\n", pvstring($io->$field()), lc($field);
 616     }
 617     foreach $field (qw(LINES PAGE PAGE_LEN LINES_LEFT SUBPROCESS)) {
 618         printf "xio_%s %d\n", lc($field), $io->$field();
 619     }
 620     printf "xio_type %s\nxio_flags 0x%x\n", cstring($io->IoTYPE), $io->IoFLAGS;
 621     $top_gv->bytecode;
 622     $fmt_gv->bytecode;
 623     $bottom_gv->bytecode;
 624 }
 625
 626 sub B::SPECIAL::bytecode {
 627     # nothing extra needs doing
 628 }
 629
 630 sub bytecompile_object {
 631     my $sv;
 632     foreach $sv (@_) {
 633         svref_2object($sv)->bytecode;
 634     }
 635 }
 636
 637 sub B::GV::bytecodecv {
 638     my $gv = shift;
 639     my $cv = $gv->CV;
 640     if ($$cv && !saved($cv)) {
 641         if ($debug_cv) {
 642             warn sprintf("saving extra CV &%s::%s (0x%x) from GV 0x%x\n",
 643                          $gv->STASH->NAME, $gv->NAME, $$cv, $$gv);
 644         }
 645         $gv->bytecode;
 646     }
 647 }
 648
 649 sub bytecompile_main {
 650     my $curpad = (comppadlist->ARRAY)[1];
 651     my $curpadix = $curpad->objix;
 652     $curpad->bytecode;
 653     walkoptree(main_root, "bytecode");
 654     warn "done main program, now walking symbol table\n" if $debug_bc;
 655     my ($pack, %exclude);
 656     foreach $pack (qw(B O AutoLoader DynaLoader XSLoader Config DB VMS strict vars
 657                       FileHandle Exporter Carp UNIVERSAL IO Fcntl Symbol warnings
 658                       attributes File::Spec SelectSaver blib Cwd))
 659     {
 660         $exclude{$pack."::"} = 1;
 661     }
 662     no strict qw(vars refs);
 663     walksymtable(\%{"main::"}, "bytecodecv", sub {
 664         warn "considering $_[0]\n" if $debug_bc;
 665         return !defined($exclude{$_[0]});
 666     });
 667     if (!$module_only) {
 668         printf "main_root %d\n", main_root->objix;
 669         printf "main_start %d\n", main_start->objix;
 670         printf "curpad $curpadix\n";
 671         # XXX Do min_intro_pending and max_intro_pending matter?
 672     }
 673 }
 674
 675 sub prepare_assemble {
 676     my $newfh = IO::File->new_tmpfile;
 677     select($newfh);
 678     binmode $newfh;
 679     return $newfh;
 680 }
 681
 682 sub do_assemble {
 683     my $fh = shift;
 684     seek($fh, 0, 0); # rewind the temporary file
 685     assemble_fh($fh, sub { print OUT @_ });
 686 }
 687
 688 sub compile {
 689     my @options = @_;
 690     my ($option, $opt, $arg);
 691     open(OUT, ">&STDOUT");
 692     binmode OUT;
 693     select(OUT);
 694   OPTION:
 695     while ($option = shift @options) {
 696         if ($option =~ /^-(.)(.*)/) {
 697             $opt = $1;
 698             $arg = $2;
 699         } else {
 700             unshift @options, $option;
 701             last OPTION;
 702         }
 703         if ($opt eq "-" && $arg eq "-") {
 704             shift @options;
 705             last OPTION;
 706         } elsif ($opt eq "o") {
 707             $arg ||= shift @options;
 708             open(OUT, ">$arg") or return "$arg: $!\n";
 709             binmode OUT;
 710         } elsif ($opt eq "a") {
 711             $arg ||= shift @options;
 712             open(OUT, ">>$arg") or return "$arg: $!\n";
 713             binmode OUT;
 714         } elsif ($opt eq "D") {
 715             $arg ||= shift @options;
 716             foreach $arg (split(//, $arg)) {
 717                 if ($arg eq "b") {
 718                     $| = 1;
 719                     debug(1);
 720                 } elsif ($arg eq "o") {
 721                     B->debug(1);
 722                 } elsif ($arg eq "a") {
 723                     B::Assembler::debug(1);
 724                 } elsif ($arg eq "C") {
 725                     $debug_cv = 1;
 726                 }
 727             }
 728         } elsif ($opt eq "v") {
 729             $verbose = 1;
 730         } elsif ($opt eq "m") {
 731             $module_only = 1;
 732         } elsif ($opt eq "S") {
 733             $no_assemble = 1;
 734         } elsif ($opt eq "f") {
 735             $arg ||= shift @options;
 736             my $value = $arg !~ s/^no-//;
 737             $arg =~ s/-/_/g;
 738             my $ref = $optimise{$arg};
 739             if (defined($ref)) {
 740                 $$ref = $value;
 741             } else {
 742                 warn qq(ignoring unknown optimisation option "$arg"\n);
 743             }
 744         } elsif ($opt eq "O") {
 745             $arg = 1 if $arg eq "";
 746             my $ref;
 747             foreach $ref (values %optimise) {
 748                 $$ref = 0;
 749             }
 750             if ($arg >= 6) {
 751                 $strip_syntree = 1;
 752             }
 753             if ($arg >= 2) {
 754                 $bypass_nullops = 1;
 755             }
 756             if ($arg >= 1) {
 757                 $compress_nullops = 1;
 758                 $omit_seq = 1;
 759             }
 760         }
 761     }
 762     if (@options) {
 763         return sub {
 764             my $objname;
 765             my $newfh;
 766             $newfh = prepare_assemble() unless $no_assemble;
 767             foreach $objname (@options) {
 768                 eval "bytecompile_object(\\$objname)";
 769             }
 770             do_assemble($newfh) unless $no_assemble;
 771         }
 772     } else {
 773         return sub {
 774             my $newfh;
 775             $newfh = prepare_assemble() unless $no_assemble;
 776             bytecompile_main();
 777             do_assemble($newfh) unless $no_assemble;
 778         }
 779     }
 780 }
 781
 782 1;
 783
 784 __END__
 785
 786 =head1 NAME
 787
 788 B::Bytecode - Perl compiler's bytecode backend
 789
 790 =head1 SYNOPSIS
 791
 792         perl -MO=Bytecode[,OPTIONS] foo.pl
 793
 794 =head1 DESCRIPTION
 795
 796 This compiler backend takes Perl source and generates a
 797 platform-independent bytecode encapsulating code to load the
 798 internal structures perl uses to run your program. When the
 799 generated bytecode is loaded in, your program is ready to run,
 800 reducing the time which perl would have taken to load and parse
 801 your program into its internal semi-compiled form. That means that
 802 compiling with this backend will not help improve the runtime
 803 execution speed of your program but may improve the start-up time.
 804 Depending on the environment in which your program runs this may
 805 or may not be a help.
 806
 807 The resulting bytecode can be run with a special byteperl executable
 808 or (for non-main programs) be loaded via the C<byteload_fh> function
 809 in the F<B> module.
 810
 811 =head1 OPTIONS
 812
 813 If there are any non-option arguments, they are taken to be names of
 814 objects to be saved (probably doesn't work properly yet).  Without
 815 extra arguments, it saves the main program.
 816
 817 =over 4
 818
 819 =item B<-ofilename>
 820
 821 Output to filename instead of STDOUT.
 822
 823 =item B<-afilename>
 824
 825 Append output to filename.
 826
 827 =item B<-->
 828
 829 Force end of options.
 830
 831 =item B<-f>
 832
 833 Force optimisations on or off one at a time. Each can be preceded
 834 by B<no-> to turn the option off (e.g. B<-fno-compress-nullops>).
 835
 836 =item B<-fcompress-nullops>
 837
 838 Only fills in the necessary fields of ops which have
 839 been optimised away by perl's internal compiler.
 840
 841 =item B<-fomit-sequence-numbers>
 842
 843 Leaves out code to fill in the op_seq field of all ops
 844 which is only used by perl's internal compiler.
 845
 846 =item B<-fbypass-nullops>
 847
 848 If op->op_next ever points to a NULLOP, replaces the op_next field
 849 with the first non-NULLOP in the path of execution.
 850
 851 =item B<-fstrip-syntax-tree>
 852
 853 Leaves out code to fill in the pointers which link the internal syntax
 854 tree together. They're not needed at run-time but leaving them out
 855 will make it impossible to recompile or disassemble the resulting
 856 program.  It will also stop C<goto label> statements from working.
 857
 858 =item B<-On>
 859
 860 Optimisation level (n = 0, 1, 2, ...). B<-O> means B<-O1>.
 861 B<-O1> sets B<-fcompress-nullops> B<-fomit-sequence numbers>.
 862 B<-O6> adds B<-fstrip-syntax-tree>.
 863
 864 =item B<-D>
 865
 866 Debug options (concatenated or separate flags like C<perl -D>).
 867
 868 =item B<-Do>
 869
 870 Prints each OP as it's processed.
 871
 872 =item B<-Db>
 873
 874 Print debugging information about bytecompiler progress.
 875
 876 =item B<-Da>
 877
 878 Tells the (bytecode) assembler to include source assembler lines
 879 in its output as bytecode comments.
 880
 881 =item B<-DC>
 882
 883 Prints each CV taken from the final symbol tree walk.
 884
 885 =item B<-S>
 886
 887 Output (bytecode) assembler source rather than piping it
 888 through the assembler and outputting bytecode.
 889
 890 =item B<-m>
 891
 892 Compile as a module rather than a standalone program. Currently this
 893 just means that the bytecodes for initialising C<main_start>,
 894 C<main_root> and C<curpad> are omitted.
 895
 896 =back
 897
 898 =head1 EXAMPLES
 899
 900     perl -MO=Bytecode,-O6,-o,foo.plc foo.pl
 901
 902     perl -MO=Bytecode,-S foo.pl > foo.S
 903     assemble foo.S > foo.plc
 904
 905 Note that C<assemble> lives in the C<B> subdirectory of your perl
 906 library directory. The utility called perlcc may also be used to
 907 help make use of this compiler.
 908
 909     perl -MO=Bytecode,-m,-oFoo.pmc Foo.pm
 910
 911 =head1 BUGS
 912
 913 Plenty. Current status: experimental.
 914
 915 =head1 AUTHOR
 916
 917 Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>
 918
 919 =cut