2 # Copyright (C) 2000-2003 Stephen McCamant. All rights reserved.
3 # This program is free software; you can redistribute and/or modify it
4 # under the same terms as Perl itself.
6 # Note: we need to keep track of how many use declarations/BEGIN
7 # blocks this module uses, so we can avoid printing them when user
8 # asks for the BEGIN blocks in her program. Update the comments and
9 # the count in concise_specials if you add or delete one. The
10 # -MO=Concise counts as use #1.
13 use warnings; # uses #3 and #4, since warnings uses Carp
15 use Exporter (); # use #5
17 our $VERSION = "0.61";
18 our @ISA = qw(Exporter);
19 our @EXPORT_OK = qw(set_style set_style_standard add_callback
20 concise_subref concise_cv concise_main
21 add_style walk_output);
24 use B qw(class ppname main_start main_root main_cv cstring svref_2object
25 SVf_IOK SVf_NOK SVf_POK SVf_IVisUV SVf_FAKE OPf_KIDS OPf_SPECIAL
30 ["(?(#label =>\n)?)(*( )*)#class (#addr) #name (?([#targ])?) "
31 . "#svclass~(?((#svaddr))?)~#svval~(?(label \"#coplabel\")?)\n",
32 "(*( )*)goto #class (#addr)\n",
35 ["#hyphseq2 (*( (x( ;)x))*)<#classsym> "
36 . "#exname#arg(?([#targarglife])?)~#flags(?(/#private)?)(x(;~->#next)x)\n",
37 " (*( )*) goto #seq\n",
38 "(?(<#seq>)?)#exname#arg(?([#targarglife])?)"],
40 ["(x(;(*( )*))x)#noise#arg(?([#targarg])?)(x( ;\n)x)",
42 "(?(#seq)?)#noise#arg(?([#targarg])?)"],
44 ["#class (#addr)\n\top_next\t\t#nextaddr\n\top_sibling\t#sibaddr\n\t"
45 . "op_ppaddr\tPL_ppaddr[OP_#NAME]\n\top_type\t\t#typenum\n"
46 . "\top_flags\t#flagval\n\top_private\t#privval\n"
47 . "(?(\top_first\t#firstaddr\n)?)(?(\top_last\t\t#lastaddr\n)?)"
48 . "(?(\top_sv\t\t#svaddr\n)?)",
51 "env" => [$ENV{B_CONCISE_FORMAT}, $ENV{B_CONCISE_GOTO_FORMAT},
52 $ENV{B_CONCISE_TREE_FORMAT}],
55 # Renderings, ie how Concise prints, is controlled by these vars
57 our $stylename; # selects current style from %style
58 my $order = "basic"; # how optree is walked & printed: basic, exec, tree
60 # rendering mechanics:
61 # these 'formats' are the line-rendering templates
62 # they're updated from %style when $stylename changes
63 my ($format, $gotofmt, $treefmt);
66 my $base = 36; # how <sequence#> is displayed
67 my $big_endian = 1; # more <sequence#> display
68 my $tree_style = 0; # tree-order details
69 my $banner = 1; # print banner before optree is traversed
72 our @callbacks; # allow external management
74 set_style_standard("concise");
80 ($format, $gotofmt, $treefmt) = @_;
81 #warn "set_style: deprecated, use set_style_standard instead\n"; # someday
82 die "expecting 3 style-format args\n" unless @_ == 3;
86 my ($newstyle,@args) = @_;
87 die "style '$newstyle' already exists, choose a new name\n"
88 if exists $style{$newstyle};
89 die "expecting 3 style-format args\n" unless @args == 3;
90 $style{$newstyle} = [@args];
91 $stylename = $newstyle; # update rendering state
94 sub set_style_standard {
95 ($stylename) = @_; # update rendering state
96 die "err: style '$stylename' unknown\n" unless exists $style{$stylename};
97 set_style(@{$style{$stylename}});
104 # output handle, used with all Concise-output printing
105 our $walkHandle = \*STDOUT; # public for your convenience
107 sub walk_output { # updates $walkHandle
109 if (ref $handle eq 'SCALAR') {
110 # in 5.8+, open(FILEHANDLE,MODE,REFERENCE) writes to string
111 open my $tmp, '>', $handle; # but cant re-set an existing filehandle
112 $walkHandle = $tmp; # so use my $tmp as intermediate var
115 $walkHandle = $handle;
116 my $iotype = ref $walkHandle;
117 die "expecting argument/object that can print\n"
118 unless $iotype eq 'GLOB' or $iotype and $walkHandle->can('print');
122 my($order, $coderef) = @_;
123 my $codeobj = svref_2object($coderef);
124 die "err: not a coderef: $coderef\n" unless ref $codeobj eq 'B::CV';#CODE';
125 concise_cv_obj($order, $codeobj);
128 # This should have been called concise_subref, but it was exported
129 # under this name in versions before 0.56
130 sub concise_cv { concise_subref(@_); }
133 my ($order, $cv) = @_;
135 die "err: coderef has no START\n" if class($cv->START) eq "NULL";
136 sequence($cv->START);
137 if ($order eq "exec") {
138 walk_exec($cv->START);
139 } elsif ($order eq "basic") {
140 walk_topdown($cv->ROOT, sub { $_[0]->concise($_[1]) }, 0);
142 print $walkHandle tree($cv->ROOT, 0);
148 sequence(main_start);
150 if ($order eq "exec") {
151 return if class(main_start) eq "NULL";
152 walk_exec(main_start);
153 } elsif ($order eq "tree") {
154 return if class(main_root) eq "NULL";
155 print $walkHandle tree(main_root, 0);
156 } elsif ($order eq "basic") {
157 return if class(main_root) eq "NULL";
158 walk_topdown(main_root,
159 sub { $_[0]->concise($_[1]) }, 0);
163 sub concise_specials {
164 my($name, $order, @cv_s) = @_;
166 if ($name eq "BEGIN") {
167 splice(@cv_s, 0, 7); # skip 7 BEGIN blocks in this file
168 } elsif ($name eq "CHECK") {
169 pop @cv_s; # skip the CHECK block that calls us
172 print $walkHandle "$name $i:\n";
174 concise_cv_obj($order, $cv);
178 my $start_sym = "\e(0"; # "\cN" sometimes also works
179 my $end_sym = "\e(B"; # "\cO" respectively
181 my @tree_decorations =
182 ([" ", "--", "+-", "|-", "| ", "`-", "-", 1],
183 [" ", "-", "+", "+", "|", "`", "", 0],
184 [" ", map("$start_sym$_$end_sym", "qq", "wq", "tq", "x ", "mq", "q"), 1],
185 [" ", map("$start_sym$_$end_sym", "q", "w", "t", "x", "m"), "", 0],
189 my @options = grep(/^-/, @_);
190 my @args = grep(!/^-/, @_);
192 for my $o (@options) {
193 if ($o eq "-basic") {
195 } elsif ($o eq "-exec") {
197 } elsif ($o eq "-tree") {
199 } elsif ($o eq "-compact") {
201 } elsif ($o eq "-loose") {
203 } elsif ($o eq "-vt") {
205 } elsif ($o eq "-ascii") {
207 } elsif ($o eq "-main") {
209 } elsif ($o =~ /^-base(\d+)$/) {
211 } elsif ($o eq "-bigendian") {
213 } elsif ($o eq "-littleendian") {
215 } elsif ($o eq "-banner") {
218 elsif (exists $style{substr($o, 1)}) {
219 $stylename = substr($o, 1);
220 set_style_standard($stylename);
222 warn "Option $o unrecognized";
227 for my $objname (@args) {
228 if ($objname eq "BEGIN") {
229 concise_specials("BEGIN", $order,
230 B::begin_av->isa("B::AV") ?
231 B::begin_av->ARRAY : ());
232 } elsif ($objname eq "INIT") {
233 concise_specials("INIT", $order,
234 B::init_av->isa("B::AV") ?
235 B::init_av->ARRAY : ());
236 } elsif ($objname eq "CHECK") {
237 concise_specials("CHECK", $order,
238 B::check_av->isa("B::AV") ?
239 B::check_av->ARRAY : ());
240 } elsif ($objname eq "END") {
241 concise_specials("END", $order,
242 B::end_av->isa("B::AV") ?
243 B::end_av->ARRAY : ());
245 # convert function names to subrefs
248 print $walkHandle "B::Concise::compile($objname)\n"
252 $objname = "main::" . $objname unless $objname =~ /::/;
253 print $walkHandle "$objname:\n";
255 die "err: unknown function ($objname)\n"
256 unless *{$objname}{CODE};
257 $objref = \&$objname;
259 concise_subref($order, $objref);
263 if (!@args or $do_main) {
264 print $walkHandle "main program:\n" if $do_main;
265 concise_main($order);
271 my $lastnext; # remembers op-chain, used to insert gotos
273 my %opclass = ('OP' => "0", 'UNOP' => "1", 'BINOP' => "2", 'LOGOP' => "|",
274 'LISTOP' => "@", 'PMOP' => "/", 'SVOP' => "\$", 'GVOP' => "*",
275 'PVOP' => '"', 'LOOP' => "{", 'COP' => ";", 'PADOP' => "#");
277 no warnings 'qw'; # "Possible attempt to put comments..."; use #7
279 qw'# () sc ( @? 1 $* gv *{ m$ m@ m% m? p/ *$ $ $# & a& pt \\ s\\ rf bl
280 ` *? <> ?? ?/ r/ c/ // qr s/ /c y/ = @= C sC Cp sp df un BM po +1 +I
281 -1 -I 1+ I+ 1- I- ** * i* / i/ %$ i% x + i+ - i- . " << >> < i<
282 > i> <= i, >= i. == i= != i! <? i? s< s> s, s. s= s! s? b& b^ b| -0 -i
283 ! ~ a2 si cs rd sr e^ lg sq in %x %o ab le ss ve ix ri sf FL od ch cy
284 uf lf uc lc qm @ [f [ @[ eh vl ky dl ex % ${ @{ uk pk st jn ) )[ a@
285 a% sl +] -] [- [+ so rv GS GW MS MW .. f. .f && || ^^ ?: &= |= -> s{ s}
286 v} ca wa di rs ;; ; ;d }{ { } {} f{ it {l l} rt }l }n }r dm }g }e ^o
287 ^c ^| ^# um bm t~ u~ ~d DB db ^s se ^g ^r {w }w pf pr ^O ^K ^R ^W ^d ^v
288 ^e ^t ^k t. fc ic fl .s .p .b .c .l .a .h g1 s1 g2 s2 ?. l? -R -W -X -r
289 -w -x -e -o -O -z -s -M -A -C -S -c -b -f -d -p -l -u -g -k -t -T -B cd
290 co cr u. cm ut r. l@ s@ r@ mD uD oD rD tD sD wD cD f$ w$ p$ sh e$ k$ g3
291 g4 s4 g5 s5 T@ C@ L@ G@ A@ S@ Hg Hc Hr Hw Mg Mc Ms Mr Sg Sc So rq do {e
292 e} {t t} g6 G6 6e g7 G7 7e g8 G8 8e g9 G9 9e 6s 7s 8s 9s 6E 7E 8E 9E Pn
293 Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n> // /= CO';
295 my $chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
300 push @v, "v" if ($x & 3) == 1;
301 push @v, "s" if ($x & 3) == 2;
302 push @v, "l" if ($x & 3) == 3;
303 push @v, "K" if $x & 4;
304 push @v, "P" if $x & 8;
305 push @v, "R" if $x & 16;
306 push @v, "M" if $x & 32;
307 push @v, "S" if $x & 64;
308 push @v, "*" if $x & 128;
314 return "-" . base_n(-$x) if $x < 0;
316 do { $str .= substr($chars, $x % $base, 1) } while $x = int($x / $base);
317 $str = reverse $str if $big_endian;
332 return "-" if not exists $sequence_num{$$op};
333 return base_n($sequence_num{$$op});
337 my($op, $sub, $level) = @_;
339 if ($op->flags & OPf_KIDS) {
340 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
341 walk_topdown($kid, $sub, $level + 1);
344 if (class($op) eq "PMOP") {
345 my $maybe_root = $op->pmreplroot;
346 if (ref($maybe_root) and $maybe_root->isa("B::OP")) {
347 # It really is the root of the replacement, not something
348 # else stored here for lack of space elsewhere
349 walk_topdown($maybe_root, $sub, $level + 1);
355 my($ar, $level) = @_;
357 if (ref($l) eq "ARRAY") {
358 walklines($l, $level + 1);
366 my($top, $level) = @_;
369 my @todo = ([$top, \@lines]);
370 while (@todo and my($op, $targ) = @{shift @todo}) {
371 for (; $$op; $op = $op->next) {
372 last if $opsseen{$$op}++;
374 my $name = $op->name;
375 if (class($op) eq "LOGOP") {
378 push @todo, [$op->other, $ar];
379 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
382 push @todo, [$op->pmreplstart, $ar];
383 } elsif ($name =~ /^enter(loop|iter)$/) {
384 $labels{${$op->nextop}} = "NEXT";
385 $labels{${$op->lastop}} = "LAST";
386 $labels{${$op->redoop}} = "REDO";
390 walklines(\@lines, 0);
393 # The structure of this routine is purposely modeled after op.c's peep()
397 return if class($op) eq "NULL" or exists $sequence_num{$$op};
398 for (; $$op; $op = $op->next) {
399 last if exists $sequence_num{$$op};
400 my $name = $op->name;
401 if ($name =~ /^(null|scalar|lineseq|scope)$/) {
402 next if $oldop and $ {$op->next};
404 $sequence_num{$$op} = $seq_max++;
405 if (class($op) eq "LOGOP") {
406 my $other = $op->other;
407 $other = $other->next while $other->name eq "null";
409 } elsif (class($op) eq "LOOP") {
410 my $redoop = $op->redoop;
411 $redoop = $redoop->next while $redoop->name eq "null";
413 my $nextop = $op->nextop;
414 $nextop = $nextop->next while $nextop->name eq "null";
416 my $lastop = $op->lastop;
417 $lastop = $lastop->next while $lastop->name eq "null";
419 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
420 my $replstart = $op->pmreplstart;
421 $replstart = $replstart->next while $replstart->name eq "null";
422 sequence($replstart);
429 sub fmt_line { # generate text-line for op.
430 my($hr, $text, $level) = @_;
431 return '' if $hr->{SKIP}; # suppress line if a callback said so
433 $text =~ s/\(\?\(([^\#]*?)\#(\w+)([^\#]*?)\)\?\)/
434 $hr->{$2} ? $1.$hr->{$2}.$3 : ""/eg;
436 $text =~ s/\(x\((.*?);(.*?)\)x\)/$order eq "exec" ? $1 : $2/egs;
437 $text =~ s/\(\*\(([^;]*?)\)\*\)/$1 x $level/egs;
438 $text =~ s/\(\*\((.*?);(.*?)\)\*\)/$1 x ($level - 1) . $2 x ($level>0)/egs;
439 $text =~ s/\#([a-zA-Z]+)(\d+)/sprintf("%-$2s", $hr->{$1})/eg;
441 $text =~ s/\#([a-zA-Z]+)/$hr->{$1}/eg; # populate data into template
442 $text =~ s/[ \t]*~+[ \t]*/ /g;
444 return "$text\n" if $text ne "";
445 return $text; # suppress empty lines
449 $priv{$_}{128} = "LVINTRO"
450 for ("pos", "substr", "vec", "threadsv", "gvsv", "rv2sv", "rv2hv", "rv2gv",
451 "rv2av", "rv2arylen", "aelem", "helem", "aslice", "hslice", "padsv",
452 "padav", "padhv", "enteriter");
453 $priv{$_}{64} = "REFC" for ("leave", "leavesub", "leavesublv", "leavewrite");
454 $priv{"aassign"}{64} = "COMMON";
455 $priv{"aassign"}{32} = "PHASH" if $] < 5.009;
456 $priv{"sassign"}{64} = "BKWARD";
457 $priv{$_}{64} = "RTIME" for ("match", "subst", "substcont");
458 @{$priv{"trans"}}{1,2,4,8,16,64} = ("<UTF", ">UTF", "IDENT", "SQUASH", "DEL",
460 $priv{"repeat"}{64} = "DOLIST";
461 $priv{"leaveloop"}{64} = "CONT";
462 @{$priv{$_}}{32,64,96} = ("DREFAV", "DREFHV", "DREFSV")
463 for (qw(rv2gv rv2sv padsv aelem helem));
464 $priv{"entersub"}{16} = "DBG";
465 $priv{"entersub"}{32} = "TARG";
466 @{$priv{$_}}{4,8,128} = ("INARGS","AMPER","NO()") for ("entersub", "rv2cv");
467 $priv{"gv"}{32} = "EARLYCV";
468 $priv{"aelem"}{16} = $priv{"helem"}{16} = "LVDEFER";
469 $priv{$_}{16} = "OURINTR" for ("gvsv", "rv2sv", "rv2av", "rv2hv", "r2gv",
471 $priv{$_}{16} = "TARGMY"
472 for (map(($_,"s$_"),"chop", "chomp"),
473 map(($_,"i_$_"), "postinc", "postdec", "multiply", "divide", "modulo",
474 "add", "subtract", "negate"), "pow", "concat", "stringify",
475 "left_shift", "right_shift", "bit_and", "bit_xor", "bit_or",
476 "complement", "atan2", "sin", "cos", "rand", "exp", "log", "sqrt",
477 "int", "hex", "oct", "abs", "length", "index", "rindex", "sprintf",
478 "ord", "chr", "crypt", "quotemeta", "join", "push", "unshift", "flock",
479 "chdir", "chown", "chroot", "unlink", "chmod", "utime", "rename",
480 "link", "symlink", "mkdir", "rmdir", "wait", "waitpid", "system",
481 "exec", "kill", "getppid", "getpgrp", "setpgrp", "getpriority",
482 "setpriority", "time", "sleep");
483 @{$priv{"const"}}{8,16,32,64,128} = ("STRICT","ENTERED", '$[', "BARE", "WARN");
484 $priv{"flip"}{64} = $priv{"flop"}{64} = "LINENUM";
485 $priv{"list"}{64} = "GUESSED";
486 $priv{"delete"}{64} = "SLICE";
487 $priv{"exists"}{64} = "SUB";
488 $priv{$_}{64} = "LOCALE"
489 for ("sort", "prtf", "sprintf", "slt", "sle", "seq", "sne", "sgt", "sge",
490 "scmp", "lc", "uc", "lcfirst", "ucfirst");
491 @{$priv{"sort"}}{1,2,4,8} = ("NUM", "INT", "REV", "INPLACE");
492 $priv{"threadsv"}{64} = "SVREFd";
493 @{$priv{$_}}{16,32,64,128} = ("INBIN","INCR","OUTBIN","OUTCR")
494 for ("open", "backtick");
495 $priv{"exit"}{128} = "VMS";
496 $priv{$_}{2} = "FTACCESS"
497 for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec");
499 # Stacked filetests are post 5.8.x
500 $priv{$_}{4} = "FTSTACKED"
501 for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec",
502 "ftis", "fteowned", "ftrowned", "ftzero", "ftsize", "ftmtime",
503 "ftatime", "ftctime", "ftsock", "ftchr", "ftblk", "ftfile", "ftdir",
504 "ftpipe", "ftlink", "ftsuid", "ftsgid", "ftsvtx", "fttty", "fttext",
506 # Lexical $_ is post 5.8.x
507 $priv{$_}{2} = "GREPLEX"
508 for ("mapwhile", "mapstart", "grepwhile", "grepstart");
514 for my $flag (128, 96, 64, 32, 16, 8, 4, 2, 1) {
515 if ($priv{$name}{$flag} and $x & $flag and $x >= $flag) {
517 push @s, $priv{$name}{$flag};
521 return join(",", @s);
526 $hr->{svclass} = class($sv);
527 $hr->{svclass} = "UV"
528 if $hr->{svclass} eq "IV" and $sv->FLAGS & SVf_IVisUV;
529 $hr->{svaddr} = sprintf("%#x", $$sv);
530 if ($hr->{svclass} eq "GV") {
532 my $stash = $gv->STASH->NAME;
533 if ($stash eq "main") {
536 $stash = $stash . "::";
538 $hr->{svval} = "*$stash" . $gv->SAFENAME;
539 return "*$stash" . $gv->SAFENAME;
541 while (class($sv) eq "RV") {
542 $hr->{svval} .= "\\";
545 if (class($sv) eq "SPECIAL") {
546 $hr->{svval} .= ["Null", "sv_undef", "sv_yes", "sv_no"]->[$$sv];
547 } elsif ($sv->FLAGS & SVf_NOK) {
548 $hr->{svval} .= $sv->NV;
549 } elsif ($sv->FLAGS & SVf_IOK) {
550 $hr->{svval} .= $sv->int_value;
551 } elsif ($sv->FLAGS & SVf_POK) {
552 $hr->{svval} .= cstring($sv->PV);
553 } elsif (class($sv) eq "HV") {
554 $hr->{svval} .= 'HASH';
556 return $hr->{svclass} . " " . $hr->{svval};
561 my ($op, $level, $format) = @_;
563 $h{exname} = $h{name} = $op->name;
564 $h{NAME} = uc $h{name};
565 $h{class} = class($op);
566 $h{extarg} = $h{targ} = $op->targ;
567 $h{extarg} = "" unless $h{extarg};
568 if ($h{name} eq "null" and $h{targ}) {
569 # targ holds the old type
570 $h{exname} = "ex-" . substr(ppname($h{targ}), 3);
572 } elsif ($op->name =~ /^leave(sub(lv)?|write)?$/) {
573 # targ potentially holds a reference count
574 if ($op->private & 64) {
575 my $refs = "ref" . ($h{targ} != 1 ? "s" : "");
576 $h{targarglife} = $h{targarg} = "$h{targ} $refs";
579 my $padname = (($curcv->PADLIST->ARRAY)[0]->ARRAY)[$h{targ}];
580 if (defined $padname and class($padname) ne "SPECIAL") {
581 $h{targarg} = $padname->PVX;
582 if ($padname->FLAGS & SVf_FAKE) {
584 $h{targarglife} = "$h{targarg}:FAKE";
586 # These changes relate to the jumbo closure fix.
587 # See changes 19939 and 20005
589 $fake .= 'a' if $padname->IVX & 1; # PAD_FAKELEX_ANON
590 $fake .= 'm' if $padname->IVX & 2; # PAD_FAKELEX_MULTI
591 $fake .= ':' . $padname->NVX if $curcv->CvFLAGS & CVf_ANON;
592 $h{targarglife} = "$h{targarg}:FAKE:$fake";
596 my $intro = $padname->NVX - $cop_seq_base;
597 my $finish = int($padname->IVX) - $cop_seq_base;
598 $finish = "end" if $finish == 999999999 - $cop_seq_base;
599 $h{targarglife} = "$h{targarg}:$intro,$finish";
602 $h{targarglife} = $h{targarg} = "t" . $h{targ};
606 $h{svclass} = $h{svaddr} = $h{svval} = "";
607 if ($h{class} eq "PMOP") {
608 my $precomp = $op->precomp;
609 if (defined $precomp) {
610 $precomp = cstring($precomp); # Escape literal control sequences
611 $precomp = "/$precomp/";
615 my $pmreplroot = $op->pmreplroot;
617 if (ref($pmreplroot) eq "B::GV") {
618 # with C<@stash_array = split(/pat/, str);>,
619 # *stash_array is stored in /pat/'s pmreplroot.
620 $h{arg} = "($precomp => \@" . $pmreplroot->NAME . ")";
621 } elsif (!ref($pmreplroot) and $pmreplroot) {
622 # same as the last case, except the value is actually a
623 # pad offset for where the GV is kept (this happens under
625 my $gv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$pmreplroot];
626 $h{arg} = "($precomp => \@" . $gv->NAME . ")";
627 } elsif ($ {$op->pmreplstart}) {
629 $pmreplstart = "replstart->" . seq($op->pmreplstart);
630 $h{arg} = "(" . join(" ", $precomp, $pmreplstart) . ")";
632 $h{arg} = "($precomp)";
634 } elsif ($h{class} eq "PVOP" and $h{name} ne "trans") {
635 $h{arg} = '("' . $op->pv . '")';
636 $h{svval} = '"' . $op->pv . '"';
637 } elsif ($h{class} eq "COP") {
638 my $label = $op->label;
639 $h{coplabel} = $label;
640 $label = $label ? "$label: " : "";
643 $loc .= ":" . $op->line;
644 my($stash, $cseq) = ($op->stash->NAME, $op->cop_seq - $cop_seq_base);
645 my $arybase = $op->arybase;
646 $arybase = $arybase ? ' $[=' . $arybase : "";
647 $h{arg} = "($label$stash $cseq $loc$arybase)";
648 } elsif ($h{class} eq "LOOP") {
649 $h{arg} = "(next->" . seq($op->nextop) . " last->" . seq($op->lastop)
650 . " redo->" . seq($op->redoop) . ")";
651 } elsif ($h{class} eq "LOGOP") {
653 $h{arg} = "(other->" . seq($op->other) . ")";
654 } elsif ($h{class} eq "SVOP") {
655 unless ($h{name} eq 'aelemfast' and $op->flags & OPf_SPECIAL) {
657 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->targ];
658 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
659 $h{targarglife} = $h{targarg} = "";
661 $h{arg} = "(" . concise_sv($op->sv, \%h) . ")";
664 } elsif ($h{class} eq "PADOP") {
665 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->padix];
666 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
668 $h{seq} = $h{hyphseq} = seq($op);
669 $h{seq} = "" if $h{seq} eq "-";
671 $h{static} = $op->static;
672 $h{next} = $op->next;
673 $h{next} = (class($h{next}) eq "NULL") ? "(end)" : seq($h{next});
674 $h{nextaddr} = sprintf("%#x", $ {$op->next});
675 $h{sibaddr} = sprintf("%#x", $ {$op->sibling});
676 $h{firstaddr} = sprintf("%#x", $ {$op->first}) if $op->can("first");
677 $h{lastaddr} = sprintf("%#x", $ {$op->last}) if $op->can("last");
679 $h{classsym} = $opclass{$h{class}};
680 $h{flagval} = $op->flags;
681 $h{flags} = op_flags($op->flags);
682 $h{privval} = $op->private;
683 $h{private} = private_flags($h{name}, $op->private);
684 $h{addr} = sprintf("%#x", $$op);
685 $h{label} = $labels{$$op};
686 $h{typenum} = $op->type;
687 $h{noise} = $linenoise[$op->type];
689 $_->(\%h, $op, \$format, \$level, $stylename) for @callbacks;
690 return fmt_line(\%h, $format, $level);
694 my($op, $level) = @_;
695 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
696 # insert a 'goto' line
697 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
698 "addr" => sprintf("%#x", $$lastnext)};
699 print $walkHandle fmt_line($h, $gotofmt, $level+1);
701 $lastnext = $op->next;
702 print $walkHandle concise_op($op, $level, $format);
705 # B::OP::terse (see Terse.pm) now just calls this
707 my($op, $level) = @_;
709 # This isn't necessarily right, but there's no easy way to get
710 # from an OP to the right CV. This is a limitation of the
711 # ->terse() interface style, and there isn't much to do about
712 # it. In particular, we can die in concise_op if the main pad
713 # isn't long enough, or has the wrong kind of entries, compared to
714 # the pad a sub was compiled with. The fix for that would be to
715 # make a backwards compatible "terse" format that never even
716 # looked at the pad, just like the old B::Terse. I don't think
717 # that's worth the effort, though.
718 $curcv = main_cv unless $curcv;
720 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
722 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
723 "addr" => sprintf("%#x", $$lastnext)};
724 print fmt_line($h, $style{"terse"}[1], $level+1);
726 $lastnext = $op->next;
727 print concise_op($op, $level, $style{"terse"}[0]);
733 my $style = $tree_decorations[$tree_style];
734 my($space, $single, $kids, $kid, $nokid, $last, $lead, $size) = @$style;
735 my $name = concise_op($op, $level, $treefmt);
736 if (not $op->flags & OPf_KIDS) {
740 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
741 push @lines, tree($kid, $level+1);
744 for ($i = $#lines; substr($lines[$i], 0, 1) eq " "; $i--) {
745 $lines[$i] = $space . $lines[$i];
748 $lines[$i] = $last . $lines[$i];
750 if (substr($lines[$i], 0, 1) eq " ") {
751 $lines[$i] = $nokid . $lines[$i];
753 $lines[$i] = $kid . $lines[$i];
756 $lines[$i] = $kids . $lines[$i];
758 $lines[0] = $single . $lines[0];
760 return("$name$lead" . shift @lines,
761 map(" " x (length($name)+$size) . $_, @lines));
764 # *** Warning: fragile kludge ahead ***
765 # Because the B::* modules run in the same interpreter as the code
766 # they're compiling, their presence tends to distort the view we have of
767 # the code we're looking at. In particular, perl gives sequence numbers
768 # to COPs. If the program we're looking at were run on its own, this
769 # would start at 1. Because all of B::Concise and all the modules it
770 # uses are compiled first, though, by the time we get to the user's
771 # program the sequence number is already pretty high, which could be
772 # distracting if you're trying to tell OPs apart. Therefore we'd like to
773 # subtract an offset from all the sequence numbers we display, to
774 # restore the simpler view of the world. The trick is to know what that
775 # offset will be, when we're still compiling B::Concise! If we
776 # hardcoded a value, it would have to change every time B::Concise or
777 # other modules we use do. To help a little, what we do here is compile
778 # a little code at the end of the module, and compute the base sequence
779 # number for the user's program as being a small offset later, so all we
780 # have to worry about are changes in the offset.
782 # When you say "perl -MO=Concise -e '$a'", the output should look like:
784 # 4 <@> leave[t1] vKP/REFC ->(end)
786 #^ smallest OP sequence number should be 1
787 # 2 <;> nextstate(main 1 -e:1) v ->3
788 # ^ smallest COP sequence number should be 1
789 # - <1> ex-rv2sv vK/1 ->4
790 # 3 <$> gvsv(*a) s ->4
792 # If the second of the marked numbers there isn't 1, it means you need
793 # to update the corresponding magic number in the next line.
794 # Remember, this needs to stay the last things in the module.
796 # Why is this different for MacOS? Does it matter?
797 my $cop_seq_mnum = $^O eq 'MacOS' ? 12 : 11;
798 $cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum;
806 B::Concise - Walk Perl syntax tree, printing concise info about ops
810 perl -MO=Concise[,OPTIONS] foo.pl
812 use B::Concise qw(set_style add_callback);
816 This compiler backend prints the internal OPs of a Perl program's syntax
817 tree in one of several space-efficient text formats suitable for debugging
818 the inner workings of perl or other compiler backends. It can print OPs in
819 the order they appear in the OP tree, in the order they will execute, or
820 in a text approximation to their tree structure, and the format of the
821 information displyed is customizable. Its function is similar to that of
822 perl's B<-Dx> debugging flag or the B<B::Terse> module, but it is more
823 sophisticated and flexible.
827 Here's is a short example of output (aka 'rendering'), using the
828 default formatting conventions :
830 % perl -MO=Concise -e '$a = $b + 42'
831 8 <@> leave[1 ref] vKP/REFC ->(end)
833 2 <;> nextstate(main 1 -e:1) v ->3
834 7 <2> sassign vKS/2 ->8
835 5 <2> add[t1] sK/2 ->6
836 - <1> ex-rv2sv sK/1 ->4
838 4 <$> const(IV 42) s ->5
839 - <1> ex-rv2sv sKRM*/1 ->7
842 Each line corresponds to an opcode. Null ops appear as C<ex-opname>,
843 where I<opname> is the op that has been optimized away by perl.
845 The number on the first row indicates the op's sequence number. It's
846 given in base 36 by default.
848 The symbol between angle brackets indicates the op's type : for example,
849 <2> is a BINOP, <@> a LISTOP, etc. (see L</"OP class abbreviations">).
851 The opname may be followed by op-specific information in parentheses
852 (e.g. C<gvsv(*b)>), and by targ information in brackets (e.g.
855 Next come the op flags. The common flags are listed below
856 (L</"OP flags abbreviations">). The private flags follow, separated
857 by a slash. For example, C<vKP/REFC> means that the leave op has
858 public flags OPf_WANT_VOID, OPf_KIDS, and OPf_PARENS, and the private
861 Finally an arrow points to the sequence number of the next op.
865 Arguments that don't start with a hyphen are taken to be the names of
866 subroutines to print the OPs of; if no such functions are specified,
867 the main body of the program (outside any subroutines, and not
868 including use'd or require'd files) is printed. Passing C<BEGIN>,
869 C<CHECK>, C<INIT>, or C<END> will cause all of the corresponding
870 special blocks to be printed.
872 Options affect how things are rendered (ie printed). They're presented
873 here by their visual effect, 1st being strongest. They're grouped
874 according to how they interrelate; within each group the options are
875 mutually exclusive (unless otherwise stated).
877 =head2 Options for Opcode Ordering
879 These options control the 'vertical display' of opcodes. The display
880 'order' is also called 'mode' elsewhere in this document.
886 Print OPs in the order they appear in the OP tree (a preorder
887 traversal, starting at the root). The indentation of each OP shows its
888 level in the tree. This mode is the default, so the flag is included
889 simply for completeness.
893 Print OPs in the order they would normally execute (for the majority
894 of constructs this is a postorder traversal of the tree, ending at the
895 root). In most cases the OP that usually follows a given OP will
896 appear directly below it; alternate paths are shown by indentation. In
897 cases like loops when control jumps out of a linear path, a 'goto'
902 Print OPs in a text approximation of a tree, with the root of the tree
903 at the left and 'left-to-right' order of children transformed into
904 'top-to-bottom'. Because this mode grows both to the right and down,
905 it isn't suitable for large programs (unless you have a very wide
910 =head2 Options for Line-Style
912 These options select the line-style (or just style) used to render
913 each opcode, and dictates what info is actually printed into each line.
919 Use the author's favorite set of formatting conventions. This is the
924 Use formatting conventions that emulate the output of B<B::Terse>. The
925 basic mode is almost indistinguishable from the real B<B::Terse>, and the
926 exec mode looks very similar, but is in a more logical order and lacks
927 curly brackets. B<B::Terse> doesn't have a tree mode, so the tree mode
928 is only vaguely reminiscent of B<B::Terse>.
932 Use formatting conventions in which the name of each OP, rather than being
933 written out in full, is represented by a one- or two-character abbreviation.
934 This is mainly a joke.
938 Use formatting conventions reminiscent of B<B::Debug>; these aren't
943 Use formatting conventions read from the environment variables
944 C<B_CONCISE_FORMAT>, C<B_CONCISE_GOTO_FORMAT>, and C<B_CONCISE_TREE_FORMAT>.
948 =head2 Options for tree-specific formatting
954 Use a tree format in which the minimum amount of space is used for the
955 lines connecting nodes (one character in most cases). This squeezes out
956 a few precious columns of screen real estate.
960 Use a tree format that uses longer edges to separate OP nodes. This format
961 tends to look better than the compact one, especially in ASCII, and is
966 Use tree connecting characters drawn from the VT100 line-drawing set.
967 This looks better if your terminal supports it.
971 Draw the tree with standard ASCII characters like C<+> and C<|>. These don't
972 look as clean as the VT100 characters, but they'll work with almost any
973 terminal (or the horizontal scrolling mode of less(1)) and are suitable
974 for text documentation or email. This is the default.
978 These are pairwise exclusive, i.e. compact or loose, vt or ascii.
980 =head2 Options controlling sequence numbering
986 Print OP sequence numbers in base I<n>. If I<n> is greater than 10, the
987 digit for 11 will be 'a', and so on. If I<n> is greater than 36, the digit
988 for 37 will be 'A', and so on until 62. Values greater than 62 are not
989 currently supported. The default is 36.
993 Print sequence numbers with the most significant digit first. This is the
994 usual convention for Arabic numerals, and the default.
996 =item B<-littleendian>
998 Print seqence numbers with the least significant digit first. This is
999 obviously mutually exclusive with bigendian.
1003 =head2 Other options
1009 Include the main program in the output, even if subroutines were also
1010 specified. This is the only option that is not sticky (see below)
1014 B::Concise::compile normally prints a banner line identifying the
1015 function name, or in case of a subref, a generic message including
1016 (unfortunately) the stringified coderef. This option suppresses the
1017 printing of the banner.
1021 =head2 Option Stickiness
1023 If you invoke Concise more than once in a program, you should know that
1024 the options are 'sticky'. This means that the options you provide in
1025 the first call will be remembered for the 2nd call, unless you
1026 re-specify or change them.
1028 =head1 FORMATTING SPECIFICATIONS
1030 For each line-style ('concise', 'terse', 'linenoise', etc.) there are
1031 3 format-specs which control how OPs are rendered.
1033 The first is the 'default' format, which is used in both basic and exec
1034 modes to print all opcodes. The 2nd, goto-format, is used in exec
1035 mode when branches are encountered. They're not real opcodes, and are
1036 inserted to look like a closing curly brace. The tree-format is tree
1039 When a line is rendered, the correct format string is scanned for the
1040 following items, and data is substituted in, or other manipulations,
1041 like basic indenting. Any text that doesn't match a special pattern
1042 (the items below) is copied verbatim. (Yes, it's a set of s///g steps.)
1046 =item B<(x(>I<exec_text>B<;>I<basic_text>B<)x)>
1048 Generates I<exec_text> in exec mode, or I<basic_text> in basic mode.
1050 =item B<(*(>I<text>B<)*)>
1052 Generates one copy of I<text> for each indentation level.
1054 =item B<(*(>I<text1>B<;>I<text2>B<)*)>
1056 Generates one fewer copies of I<text1> than the indentation level, followed
1057 by one copy of I<text2> if the indentation level is more than 0.
1059 =item B<(?(>I<text1>B<#>I<var>I<Text2>B<)?)>
1061 If the value of I<var> is true (not empty or zero), generates the
1062 value of I<var> surrounded by I<text1> and I<Text2>, otherwise
1067 Generates the value of the variable I<var>.
1069 =item B<#>I<var>I<N>
1071 Generates the value of I<var>, left jutified to fill I<N> spaces.
1075 Any number of tildes and surrounding whitespace will be collapsed to
1080 The following variables are recognized:
1086 The address of the OP, in hexidecimal.
1090 The OP-specific information of the OP (such as the SV for an SVOP, the
1091 non-local exit pointers for a LOOP, etc.) enclosed in paretheses.
1095 The B-determined class of the OP, in all caps.
1099 A single symbol abbreviating the class of the OP.
1103 The label of the statement or block the OP is the start of, if any.
1107 The name of the OP, or 'ex-foo' if the OP is a null that used to be a foo.
1111 The target of the OP, or nothing for a nulled OP.
1115 The address of the OP's first child, in hexidecimal.
1119 The OP's flags, abbreviated as a series of symbols.
1123 The numeric value of the OP's flags.
1127 The sequence number of the OP, or a hyphen if it doesn't have one.
1131 'NEXT', 'LAST', or 'REDO' if the OP is a target of one of those in exec
1132 mode, or empty otherwise.
1136 The address of the OP's last child, in hexidecimal.
1144 The OP's name, in all caps.
1148 The sequence number of the OP's next OP.
1152 The address of the OP's next OP, in hexidecimal.
1156 A one- or two-character abbreviation for the OP's name.
1160 The OP's private flags, rendered with abbreviated names if possible.
1164 The numeric value of the OP's private flags.
1168 The sequence number of the OP. Note that this is a sequence number
1169 generated by B::Concise.
1173 Whether or not the op has been optimised by the peephole optimiser.
1177 Whether or not the op is statically defined. This flag is used by the
1178 B::C compiler backend and indicates that the op should not be freed.
1182 The address of the OP's next youngest sibling, in hexidecimal.
1186 The address of the OP's SV, if it has an SV, in hexidecimal.
1190 The class of the OP's SV, if it has one, in all caps (e.g., 'IV').
1194 The value of the OP's SV, if it has one, in a short human-readable format.
1198 The numeric value of the OP's targ.
1202 The name of the variable the OP's targ refers to, if any, otherwise the
1203 letter t followed by the OP's targ in decimal.
1205 =item B<#targarglife>
1207 Same as B<#targarg>, but followed by the COP sequence numbers that delimit
1208 the variable's lifetime (or 'end' for a variable in an open scope) for a
1213 The numeric value of the OP's type, in decimal.
1217 =head1 ABBREVIATIONS
1219 =head2 OP flags abbreviations
1221 v OPf_WANT_VOID Want nothing (void context)
1222 s OPf_WANT_SCALAR Want single value (scalar context)
1223 l OPf_WANT_LIST Want list of any length (list context)
1224 K OPf_KIDS There is a firstborn child.
1225 P OPf_PARENS This operator was parenthesized.
1226 (Or block needs explicit scope entry.)
1227 R OPf_REF Certified reference.
1228 (Return container, not containee).
1229 M OPf_MOD Will modify (lvalue).
1230 S OPf_STACKED Some arg is arriving on the stack.
1231 * OPf_SPECIAL Do something weird for this op (see op.h)
1233 =head2 OP class abbreviations
1235 0 OP (aka BASEOP) An OP with no children
1236 1 UNOP An OP with one child
1237 2 BINOP An OP with two children
1238 | LOGOP A control branch OP
1239 @ LISTOP An OP that could have lots of children
1240 / PMOP An OP with a regular expression
1241 $ SVOP An OP with an SV
1242 " PVOP An OP with a string
1243 { LOOP An OP that holds pointers for a loop
1244 ; COP An OP that marks the start of a statement
1245 # PADOP An OP with a GV on the pad
1247 =head1 Using B::Concise outside of the O framework
1249 You can use B<B::Concise>, and call compile() directly, and
1250 repeatedly. By doing so, you can avoid the compile-time only
1251 operation of 'perl -MO=Concise ..'. For example, you can use the
1252 debugger to step through B::Concise::compile() itself.
1254 When doing so, you can alter Concise output by providing new output
1255 styles, and optionally by adding callback routines which populate new
1256 variables that may be rendered as part of those styles. For all
1257 following sections, please review L</FORMATTING SPECIFICATIONS>.
1259 =head2 Example: Altering Concise Renderings
1261 use B::Concise qw(set_style add_callback);
1262 set_style($your_format, $your_gotofmt, $your_treefmt);
1265 my ($h, $op, $format, $level, $stylename) = @_;
1266 $h->{variable} = some_func($op);
1269 B::Concise::compile(@options)->();
1273 B<set_style> accepts 3 arguments, and updates the three format-specs
1274 comprising a line-style (basic-exec, goto, tree). It has one minor
1275 drawback though; it doesn't register the style under a new name. This
1276 can become an issue if you render more than once and switch styles.
1277 Thus you may prefer to use add_style() and/or set_style_standard()
1280 =head2 set_style_standard($name)
1282 This restores one of the standard line-styles: C<terse>, C<concise>,
1283 C<linenoise>, C<debug>, C<env>, into effect. It also accepts style
1284 names previously defined with add_style().
1288 This subroutine accepts a new style name and three style arguments as
1289 above, and creates, registers, and selects the newly named style. It is
1290 an error to re-add a style; call set_style_standard() to switch between
1293 =head2 add_callback()
1295 If your newly minted styles refer to any #variables, you'll need to
1296 define a callback subroutine that will populate (or modify) those
1297 variables. They are then available for use in the style you've chosen.
1299 The callbacks are called for each opcode visited by Concise, in the
1300 same order as they are added. Each subroutine is passed five
1303 1. A hashref, containing the variable names and values which are
1304 populated into the report-line for the op
1305 2. the op, as a B<B::OP> object
1306 3. a reference to the format string
1307 4. the formatting (indent) level
1308 5. the selected stylename
1310 To define your own variables, simply add them to the hash, or change
1311 existing values if you need to. The level and format are passed in as
1312 references to scalars, but it is unlikely that they will need to be
1313 changed or even used.
1315 =head2 Running B::Concise::compile()
1317 B<compile> accepts options as described above in L</OPTIONS>, and
1318 arguments, which are either coderefs, or subroutine names.
1320 compile() constructs and returns a coderef, which when invoked, scans
1321 the optree, and prints the results to STDOUT. Once you have the
1322 coderef, you may change the output style; thereafter the coderef renders
1325 B<walk_output> lets you change the print destination from STDOUT to
1326 another open filehandle, or into a string passed as a ref.
1328 walk_output(\my $buf);
1329 my $walker = B::Concise::compile('-concise','funcName', \&aSubRef);
1330 print "Concise Banner for Functions: $buf\n";
1332 print "Concise Rendering(s)?: $buf\n";
1334 For each subroutine visited by Concise, the $buf will contain a
1335 banner naming the function or coderef about to be traversed.
1336 Once $walker is invoked, it prints the actual renderings for each.
1338 To switch back to one of the standard styles like C<concise> or
1339 C<terse>, call C<set_style_standard>, or pass the style name into
1340 B::Concise::compile() (as done above).
1342 =head2 B::Concise::reset_sequence()
1344 This function (not exported) lets you reset the sequence numbers (note
1345 that they're numbered arbitrarily, their goal being to be human
1346 readable). Its purpose is mostly to support testing, i.e. to compare
1347 the concise output from two identical anonymous subroutines (but
1348 different instances). Without the reset, B::Concise, seeing that
1349 they're separate optrees, generates different sequence numbers in
1354 All detected errors, (invalid arguments, internal errors, etc.) are
1355 resolved with a die($message). Use an eval if you wish to catch these
1356 errors and continue processing.
1358 In particular, B<compile> will die if you've asked for a non-existent
1359 function-name, a non-existent coderef, or a non-CODE reference.
1363 Stephen McCamant, E<lt>smcc@CSUA.Berkeley.EDUE<gt>.