package B::Concise;
-# Copyright (C) 2000, 2001 Stephen McCamant. All rights reserved.
+# Copyright (C) 2000-2003 Stephen McCamant. All rights reserved.
# This program is free software; you can redistribute and/or modify it
# under the same terms as Perl itself.
-our $VERSION = "0.51";
use strict;
+use warnings;
+
+use Exporter ();
+
+our $VERSION = "0.55";
+our @ISA = qw(Exporter);
+our @EXPORT_OK = qw(set_style set_style_standard add_callback
+ concise_cv concise_main);
+
use B qw(class ppname main_start main_root main_cv cstring svref_2object
- SVf_IOK SVf_NOK SVf_POK OPf_KIDS);
+ SVf_IOK SVf_NOK SVf_POK SVf_IVisUV OPf_KIDS);
my %style =
("terse" =>
my($format, $gotofmt, $treefmt);
my $curcv;
-my($seq_base, $cop_seq_base);
+my $cop_seq_base;
+my @callbacks;
+
+sub set_style {
+ ($format, $gotofmt, $treefmt) = @_;
+}
+
+sub set_style_standard {
+ my($name) = @_;
+ set_style(@{$style{$name}});
+}
+
+sub add_callback {
+ push @callbacks, @_;
+}
sub concise_cv {
my ($order, $cvref) = @_;
my $cv = svref_2object($cvref);
$curcv = $cv;
+ sequence($cv->START);
if ($order eq "exec") {
walk_exec($cv->START);
} elsif ($order eq "basic") {
}
}
+sub concise_main {
+ my($order) = @_;
+ sequence(main_start);
+ $curcv = main_cv;
+ if ($order eq "exec") {
+ return if class(main_start) eq "NULL";
+ walk_exec(main_start);
+ } elsif ($order eq "tree") {
+ return if class(main_root) eq "NULL";
+ print tree(main_root, 0);
+ } elsif ($order eq "basic") {
+ return if class(main_root) eq "NULL";
+ walk_topdown(main_root,
+ sub { $_[0]->concise($_[1]) }, 0);
+ }
+}
+
my $start_sym = "\e(0"; # "\cN" sometimes also works
my $end_sym = "\e(B"; # "\cO" respectively
my $order = "basic";
+set_style_standard("concise");
+
sub compile {
my @options = grep(/^-/, @_);
my @args = grep(!/^-/, @_);
my $do_main = 0;
- ($format, $gotofmt, $treefmt) = @{$style{"concise"}};
for my $o (@options) {
if ($o eq "-basic") {
$order = "basic";
} elsif ($o eq "-littleendian") {
$big_endian = 0;
} elsif (exists $style{substr($o, 1)}) {
- ($format, $gotofmt, $treefmt) = @{$style{substr($o, 1)}};
+ set_style(@{$style{substr($o, 1)}});
} else {
warn "Option $o unrecognized";
}
}
- if (@args) {
- return sub {
+ return sub {
+ if (@args) {
for my $objname (@args) {
$objname = "main::" . $objname unless $objname =~ /::/;
+ print "$objname:\n";
eval "concise_cv(\$order, \\&$objname)";
die "concise_cv($order, \\&$objname) failed: $@" if $@;
}
}
- }
- if (!@args or $do_main) {
- if ($order eq "exec") {
- return sub { return if class(main_start) eq "NULL";
- $curcv = main_cv;
- walk_exec(main_start) }
- } elsif ($order eq "tree") {
- return sub { return if class(main_root) eq "NULL";
- $curcv = main_cv;
- print tree(main_root, 0) }
- } elsif ($order eq "basic") {
- return sub { return if class(main_root) eq "NULL";
- $curcv = main_cv;
- walk_topdown(main_root,
- sub { $_[0]->concise($_[1]) }, 0); }
+ if (!@args or $do_main) {
+ print "main program:\n" if $do_main;
+ concise_main($order);
}
}
}
my %opclass = ('OP' => "0", 'UNOP' => "1", 'BINOP' => "2", 'LOGOP' => "|",
'LISTOP' => "@", 'PMOP' => "/", 'SVOP' => "\$", 'GVOP' => "*",
- 'PVOP' => '"', 'LOOP' => "{", 'COP' => ";");
+ 'PVOP' => '"', 'LOOP' => "{", 'COP' => ";", 'PADOP' => "#");
+no warnings 'qw'; # "Possible attempt to put comments..."
my @linenoise =
qw'# () sc ( @? 1 $* gv *{ m$ m@ m% m? p/ *$ $ $# & a& pt \\ s\\ rf bl
` *? <> ?? ?/ r/ c/ // qr s/ /c y/ = @= C sC Cp sp df un BM po +1 +I
co cr u. cm ut r. l@ s@ r@ mD uD oD rD tD sD wD cD f$ w$ p$ sh e$ k$ g3
g4 s4 g5 s5 T@ C@ L@ G@ A@ S@ Hg Hc Hr Hw Mg Mc Ms Mr Sg Sc So rq do {e
e} {t t} g6 G6 6e g7 G7 7e g8 G8 8e g9 G9 9e 6s 7s 8s 9s 6E 7E 8E 9E Pn
- Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n>';
+ Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n> // /= CO';
my $chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
return $str;
}
-sub seq { return $_[0]->seq ? base_n($_[0]->seq - $seq_base) : "-" }
+my %sequence_num;
+my $seq_max = 1;
+
+sub seq {
+ my($op) = @_;
+ return "-" if not exists $sequence_num{$$op};
+ return base_n($sequence_num{$$op});
+}
sub walk_topdown {
my($op, $sub, $level) = @_;
walk_topdown($kid, $sub, $level + 1);
}
}
- if (class($op) eq "PMOP" and $ {$op->pmreplroot}
+ if (class($op) eq "PMOP" and $op->pmreplroot and $ {$op->pmreplroot}
and $op->pmreplroot->isa("B::OP")) {
walk_topdown($op->pmreplroot, $sub, $level + 1);
}
last if $opsseen{$$op}++;
push @$targ, $op;
my $name = $op->name;
- if ($name
- =~ /^(or|and|(map|grep)while|entertry|range|cond_expr)$/) {
+ if (class($op) eq "LOGOP") {
my $ar = [];
push @$targ, $ar;
push @todo, [$op->other, $ar];
walklines(\@lines, 0);
}
+# The structure of this routine is purposely modeled after op.c's peep()
+sub sequence {
+ my($op) = @_;
+ my $oldop = 0;
+ return if class($op) eq "NULL" or exists $sequence_num{$$op};
+ for (; $$op; $op = $op->next) {
+ last if exists $sequence_num{$$op};
+ my $name = $op->name;
+ if ($name =~ /^(null|scalar|lineseq|scope)$/) {
+ next if $oldop and $ {$op->next};
+ } else {
+ $sequence_num{$$op} = $seq_max++;
+ if (class($op) eq "LOGOP") {
+ my $other = $op->other;
+ $other = $other->next while $other->name eq "null";
+ sequence($other);
+ } elsif (class($op) eq "LOOP") {
+ my $redoop = $op->redoop;
+ $redoop = $redoop->next while $redoop->name eq "null";
+ sequence($redoop);
+ my $nextop = $op->nextop;
+ $nextop = $nextop->next while $nextop->name eq "null";
+ sequence($nextop);
+ my $lastop = $op->lastop;
+ $lastop = $lastop->next while $lastop->name eq "null";
+ sequence($lastop);
+ } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
+ my $replstart = $op->pmreplstart;
+ $replstart = $replstart->next while $replstart->name eq "null";
+ sequence($replstart);
+ }
+ }
+ $oldop = $op;
+ }
+}
+
sub fmt_line {
my($hr, $fmt, $level) = @_;
my $text = $fmt;
"padav", "padhv");
$priv{$_}{64} = "REFC" for ("leave", "leavesub", "leavesublv", "leavewrite");
$priv{"aassign"}{64} = "COMMON";
-$priv{"aassign"}{32} = "PHASH";
$priv{"sassign"}{64} = "BKWARD";
$priv{$_}{64} = "RTIME" for ("match", "subst", "substcont");
@{$priv{"trans"}}{1,2,4,8,16,64} = ("<UTF", ">UTF", "IDENT", "SQUASH", "DEL",
"link", "symlink", "mkdir", "rmdir", "wait", "waitpid", "system",
"exec", "kill", "getppid", "getpgrp", "setpgrp", "getpriority",
"setpriority", "time", "sleep");
-@{$priv{"const"}}{8,16,32,64,128} = ("STRICT","ENTERED", "$[", "BARE", "WARN");
+@{$priv{"const"}}{8,16,32,64,128} = ("STRICT","ENTERED", '$[', "BARE", "WARN");
$priv{"flip"}{64} = $priv{"flop"}{64} = "LINENUM";
$priv{"list"}{64} = "GUESSED";
$priv{"delete"}{64} = "SLICE";
"scmp", "lc", "uc", "lcfirst", "ucfirst");
@{$priv{"sort"}}{1,2,4} = ("NUM", "INT", "REV");
$priv{"threadsv"}{64} = "SVREFd";
-$priv{$_}{16} = "INBIN" for ("open", "backtick");
-$priv{$_}{32} = "INCR" for ("open", "backtick");
-$priv{$_}{64} = "OUTBIN" for ("open", "backtick");
-$priv{$_}{128} = "OUTCR" for ("open", "backtick");
+@{$priv{$_}}{16,32,64,128} = ("INBIN","INCR","OUTBIN","OUTCR")
+ for ("open", "backtick");
$priv{"exit"}{128} = "VMS";
sub private_flags {
return join(",", @s);
}
+sub concise_sv {
+ my($sv, $hr) = @_;
+ $hr->{svclass} = class($sv);
+ $hr->{svclass} = "UV"
+ if $hr->{svclass} eq "IV" and $sv->FLAGS & SVf_IVisUV;
+ $hr->{svaddr} = sprintf("%#x", $$sv);
+ if ($hr->{svclass} eq "GV") {
+ my $gv = $sv;
+ my $stash = $gv->STASH->NAME;
+ if ($stash eq "main") {
+ $stash = "";
+ } else {
+ $stash = $stash . "::";
+ }
+ $hr->{svval} = "*$stash" . $gv->SAFENAME;
+ return "*$stash" . $gv->SAFENAME;
+ } else {
+ while (class($sv) eq "RV") {
+ $hr->{svval} .= "\\";
+ $sv = $sv->RV;
+ }
+ if (class($sv) eq "SPECIAL") {
+ $hr->{svval} .= ["Null", "sv_undef", "sv_yes", "sv_no"]->[$$sv];
+ } elsif ($sv->FLAGS & SVf_NOK) {
+ $hr->{svval} .= $sv->NV;
+ } elsif ($sv->FLAGS & SVf_IOK) {
+ $hr->{svval} .= $sv->int_value;
+ } elsif ($sv->FLAGS & SVf_POK) {
+ $hr->{svval} .= cstring($sv->PV);
+ } elsif (class($sv) eq "HV") {
+ $hr->{svval} .= 'HASH';
+ }
+ return $hr->{svclass} . " " . $hr->{svval};
+ }
+}
+
sub concise_op {
my ($op, $level, $format) = @_;
my %h;
} elsif ($h{targ}) {
my $padname = (($curcv->PADLIST->ARRAY)[0]->ARRAY)[$h{targ}];
if (defined $padname and class($padname) ne "SPECIAL") {
- $h{targarg} = $padname->PV;
+ $h{targarg} = $padname->PVX;
my $intro = $padname->NVX - $cop_seq_base;
my $finish = int($padname->IVX) - $cop_seq_base;
$finish = "end" if $finish == 999999999 - $cop_seq_base;
$h{svclass} = $h{svaddr} = $h{svval} = "";
if ($h{class} eq "PMOP") {
my $precomp = $op->precomp;
- $precomp = defined($precomp) ? "/$precomp/" : "";
+ if (defined $precomp) {
+ $precomp = cstring($precomp); # Escape literal control sequences
+ $precomp = "/$precomp/";
+ } else {
+ $precomp = "";
+ }
my $pmreplroot = $op->pmreplroot;
- my ($pmreplroot, $pmreplstart);
- if ($ {$pmreplroot = $op->pmreplroot} && $pmreplroot->isa("B::GV")) {
+ my $pmreplstart;
+ if ($pmreplroot && $$pmreplroot && $pmreplroot->isa("B::GV")) {
# with C<@stash_array = split(/pat/, str);>,
# *stash_array is stored in pmreplroot.
$h{arg} = "($precomp => \@" . $pmreplroot->NAME . ")";
undef $lastnext;
$h{arg} = "(other->" . seq($op->other) . ")";
} elsif ($h{class} eq "SVOP") {
- my $sv = $op->sv;
- $h{svclass} = class($sv);
- $h{svaddr} = sprintf("%#x", $$sv);
- if ($h{svclass} eq "GV") {
- my $gv = $sv;
- my $stash = $gv->STASH->NAME;
- if ($stash eq "main") {
- $stash = "";
- } else {
- $stash = $stash . "::";
- }
- $h{arg} = "(*$stash" . $gv->NAME . ")";
- $h{svval} = "*$stash" . $gv->NAME;
+ if (! ${$op->sv}) {
+ my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->targ];
+ $h{arg} = "[" . concise_sv($sv, \%h) . "]";
+ $h{targarglife} = $h{targarg} = "";
} else {
- while (class($sv) eq "RV") {
- $h{svval} .= "\\";
- $sv = $sv->RV;
- }
- if (class($sv) eq "SPECIAL") {
- $h{svval} = ["Null", "sv_undef", "sv_yes", "sv_no"]->[$$sv];
- } elsif ($sv->FLAGS & SVf_NOK) {
- $h{svval} = $sv->NV;
- } elsif ($sv->FLAGS & SVf_IOK) {
- $h{svval} = $sv->IV;
- } elsif ($sv->FLAGS & SVf_POK) {
- $h{svval} = cstring($sv->PV);
- }
- $h{arg} = "($h{svclass} $h{svval})";
+ $h{arg} = "(" . concise_sv($op->sv, \%h) . ")";
}
+ } elsif ($h{class} eq "PADOP") {
+ my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->padix];
+ $h{arg} = "[" . concise_sv($sv, \%h) . "]";
}
$h{seq} = $h{hyphseq} = seq($op);
$h{seq} = "" if $h{seq} eq "-";
$h{label} = $labels{$op->seq};
$h{typenum} = $op->type;
$h{noise} = $linenoise[$op->type];
+ $_->(\%h, $op, \$format, \$level) for @callbacks;
return fmt_line(\%h, $format, $level);
}
print concise_op($op, $level, $format);
}
+# B::OP::terse (see Terse.pm) now just calls this
+sub b_terse {
+ my($op, $level) = @_;
+
+ # This isn't necessarily right, but there's no easy way to get
+ # from an OP to the right CV. This is a limitation of the
+ # ->terse() interface style, and there isn't much to do about
+ # it. In particular, we can die in concise_op if the main pad
+ # isn't long enough, or has the wrong kind of entries, compared to
+ # the pad a sub was compiled with. The fix for that would be to
+ # make a backwards compatible "terse" format that never even
+ # looked at the pad, just like the old B::Terse. I don't think
+ # that's worth the effort, though.
+ $curcv = main_cv unless $curcv;
+
+ if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
+ my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
+ "addr" => sprintf("%#x", $$lastnext)};
+ print fmt_line($h, $style{"terse"}[1], $level+1);
+ }
+ $lastnext = $op->next;
+ print concise_op($op, $level, $style{"terse"}[0]);
+}
+
sub tree {
my $op = shift;
my $level = shift;
map(" " x (length($name)+$size) . $_, @lines));
}
-# This is a bit of a hack; the 2 and 15 were determined empirically.
-# These need to stay the last things in the module.
-$cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + 2;
-$seq_base = svref_2object(eval 'sub{}')->START->seq + 15;
+# *** Warning: fragile kludge ahead ***
+# Because the B::* modules run in the same interpreter as the code
+# they're compiling, their presence tends to distort the view we have
+# of the code we're looking at. In particular, perl gives sequence
+# numbers to both OPs in general and COPs in particular. If the
+# program we're looking at were run on its own, these numbers would
+# start at 1. Because all of B::Concise and all the modules it uses
+# are compiled first, though, by the time we get to the user's program
+# the sequence numbers are alreay at pretty high numbers, which would
+# be distracting if you're trying to tell OPs apart. Therefore we'd
+# like to subtract an offset from all the sequence numbers we display,
+# to restore the simpler view of the world. The trick is to know what
+# that offset will be, when we're still compiling B::Concise! If we
+# hardcoded a value, it would have to change every time B::Concise or
+# other modules we use do. To help a little, what we do here is
+# compile a little code at the end of the module, and compute the base
+# sequence number for the user's program as being a small offset
+# later, so all we have to worry about are changes in the offset.
+# (Note that we now only play this game with COP sequence numbers. OP
+# sequence numbers aren't used to refer to OPs from a distance, and
+# they don't have much significance, so we just generate our own
+# sequence numbers which are easier to control. This way we also don't
+# stand in the way of a possible future removal of OP sequence
+# numbers).
+
+# When you say "perl -MO=Concise -e '$a'", the output should look like:
+
+# 4 <@> leave[t1] vKP/REFC ->(end)
+# 1 <0> enter ->2
+ #^ smallest OP sequence number should be 1
+# 2 <;> nextstate(main 1 -e:1) v ->3
+ # ^ smallest COP sequence number should be 1
+# - <1> ex-rv2sv vK/1 ->4
+# 3 <$> gvsv(*a) s ->4
+
+# If the second of the marked numbers there isn't 1, it means you need
+# to update the corresponding magic number in the next line.
+# Remember, this needs to stay the last things in the module.
+
+# Why is this different for MacOS? Does it matter?
+my $cop_seq_mnum = $^O eq 'MacOS' ? 10 : 9;
+$cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum;
1;
perl -MO=Concise[,OPTIONS] foo.pl
+ use B::Concise qw(set_style add_callback);
+
=head1 DESCRIPTION
This compiler backend prints the internal OPs of a Perl program's syntax
perl's B<-Dx> debugging flag or the B<B::Terse> module, but it is more
sophisticated and flexible.
+=head1 EXAMPLE
+
+Here's is a short example of output, using the default formatting
+conventions :
+
+ % perl -MO=Concise -e '$a = $b + 42'
+ 8 <@> leave[t1] vKP/REFC ->(end)
+ 1 <0> enter ->2
+ 2 <;> nextstate(main 1 -e:1) v ->3
+ 7 <2> sassign vKS/2 ->8
+ 5 <2> add[t1] sK/2 ->6
+ - <1> ex-rv2sv sK/1 ->4
+ 3 <$> gvsv(*b) s ->4
+ 4 <$> const(IV 42) s ->5
+ - <1> ex-rv2sv sKRM*/1 ->7
+ 6 <$> gvsv(*a) s ->7
+
+Each line corresponds to an operator. Null ops appear as C<ex-opname>,
+where I<opname> is the op that has been optimized away by perl.
+
+The number on the first row indicates the op's sequence number. It's
+given in base 36 by default.
+
+The symbol between angle brackets indicates the op's type : for example,
+<2> is a BINOP, <@> a LISTOP, etc. (see L</"OP class abbreviations">).
+
+The opname may be followed by op-specific information in parentheses
+(e.g. C<gvsv(*b)>), and by targ information in brackets (e.g.
+C<leave[t1]>).
+
+Next come the op flags. The common flags are listed below
+(L</"OP flags abbreviations">). The private flags follow, separated
+by a slash. For example, C<vKP/REFC> means that the leave op has
+public flags OPf_WANT_VOID, OPf_KIDS, and OPf_PARENS, and the private
+flag OPpREFCOUNTED.
+
+Finally an arrow points to the sequence number of the next op.
+
=head1 OPTIONS
Arguments that don't start with a hyphen are taken to be the names of
=item B<-terse>
-Use formatting conventions that emulate the ouput of B<B::Terse>. The
+Use formatting conventions that emulate the output of B<B::Terse>. The
basic mode is almost indistinguishable from the real B<B::Terse>, and the
exec mode looks very similar, but is in a more logical order and lacks
curly brackets. B<B::Terse> doesn't have a tree mode, so the tree mode
The B-determined class of the OP, in all caps.
-=item B<#classym>
+=item B<#classsym>
A single symbol abbreviating the class of the OP.
The numeric value of the OP's flags.
-=item B<#hyphenseq>
+=item B<#hyphseq>
The sequence number of the OP, or a hyphen if it doesn't have one.
=item B<#noise>
-The two-character abbreviation for the OP's name.
+A one- or two-character abbreviation for the OP's name.
=item B<#private>
=item B<#seq>
-The sequence number of the OP.
+The sequence number of the OP. Note that this is now a sequence number
+generated by B::Concise, rather than the real op_seq value (for which
+see B<#seqnum>).
=item B<#seqnum>
" PVOP An OP with a string
{ LOOP An OP that holds pointers for a loop
; COP An OP that marks the start of a statement
+ # PADOP An OP with a GV on the pad
+
+=head1 Using B::Concise outside of the O framework
+
+It is possible to extend B<B::Concise> by using it outside of the B<O>
+framework and providing new styles and new variables.
+
+ use B::Concise qw(set_style add_callback);
+ set_style($format, $gotofmt, $treefmt);
+ add_callback
+ (
+ sub
+ {
+ my ($h, $op, $level, $format) = @_;
+ $h->{variable} = some_func($op);
+ }
+ );
+ B::Concise::compile(@options)->();
+
+You can specify a style by calling the B<set_style> subroutine. If you
+have a new variable in your style, or you want to change the value of an
+existing variable, you will need to add a callback to specify the value
+for that variable.
+
+This is done by calling B<add_callback> passing references to any
+callback subroutines. The subroutines are called in the same order as
+they are added. Each subroutine is passed four parameters. These are a
+reference to a hash, the keys of which are the names of the variables
+and the values of which are their values, the op, the level and the
+format.
+
+To define your own variables, simply add them to the hash, or change
+existing values if you need to. The level and format are passed in as
+references to scalars, but it is unlikely that they will need to be
+changed or even used.
+
+To switch back to one of the standard styles like C<concise> or
+C<terse>, use C<set_style_standard>.
+
+To see the output, call the subroutine returned by B<compile> in the
+same way that B<O> does.
=head1 AUTHOR
-Stephen McCamant, C<smcc@CSUA.Berkeley.EDU>
+Stephen McCamant, E<lt>smcc@CSUA.Berkeley.EDUE<gt>.
=cut