2 # Copyright (C) 2000-2003 Stephen McCamant. All rights reserved.
3 # This program is free software; you can redistribute and/or modify it
4 # under the same terms as Perl itself.
6 # Note: we need to keep track of how many use declarations/BEGIN
7 # blocks this module uses, so we can avoid printing them when user
8 # asks for the BEGIN blocks in her program. Update the comments and
9 # the count in concise_specials if you add or delete one. The
10 # -MO=Concise counts as use #1.
13 use warnings; # uses #3 and #4, since warnings uses Carp
15 use Exporter (); # use #5
17 our $VERSION = "0.60";
18 our @ISA = qw(Exporter);
19 our @EXPORT_OK = qw(set_style set_style_standard add_callback
20 concise_subref concise_cv concise_main
21 add_style walk_output);
24 use B qw(class ppname main_start main_root main_cv cstring svref_2object
25 SVf_IOK SVf_NOK SVf_POK SVf_IVisUV SVf_FAKE OPf_KIDS OPf_SPECIAL
30 ["(?(#label =>\n)?)(*( )*)#class (#addr) #name (?([#targ])?) "
31 . "#svclass~(?((#svaddr))?)~#svval~(?(label \"#coplabel\")?)\n",
32 "(*( )*)goto #class (#addr)\n",
35 ["#hyphseq2 (*( (x( ;)x))*)<#classsym> "
36 . "#exname#arg(?([#targarglife])?)~#flags(?(/#private)?)(x(;~->#next)x)\n",
37 " (*( )*) goto #seq\n",
38 "(?(<#seq>)?)#exname#arg(?([#targarglife])?)"],
40 ["(x(;(*( )*))x)#noise#arg(?([#targarg])?)(x( ;\n)x)",
42 "(?(#seq)?)#noise#arg(?([#targarg])?)"],
44 ["#class (#addr)\n\top_next\t\t#nextaddr\n\top_sibling\t#sibaddr\n\t"
45 . "op_ppaddr\tPL_ppaddr[OP_#NAME]\n\top_type\t\t#typenum\n"
46 . "\top_flags\t#flagval\n\top_private\t#privval\n"
47 . "(?(\top_first\t#firstaddr\n)?)(?(\top_last\t\t#lastaddr\n)?)"
48 . "(?(\top_sv\t\t#svaddr\n)?)",
51 "env" => [$ENV{B_CONCISE_FORMAT}, $ENV{B_CONCISE_GOTO_FORMAT},
52 $ENV{B_CONCISE_TREE_FORMAT}],
55 my($format, $gotofmt, $treefmt);
62 ($format, $gotofmt, $treefmt) = @_;
63 die "expecting 3 style-format args\n" unless @_ == 3;
67 my ($newstyle,@args) = @_;
68 die "style '$newstyle' already exists, choose a new name\n"
69 if exists $style{$newstyle};
70 die "expecting 3 style-format args\n" unless @args == 3;
71 $style{$newstyle} = [@args];
74 sub set_style_standard {
76 die "err: style '$stylename' unknown\n" unless exists $style{$stylename};
77 set_style(@{$style{$stylename}});
84 # output handle, used with all Concise-output printing
85 our $walkHandle = \*STDOUT; # public for your convenience
87 sub walk_output { # updates $walkHandle
89 if (ref $handle eq 'SCALAR') {
90 # in 5.8+, open(FILEHANDLE,MODE,REFERENCE) writes to string
91 open my $tmp, '>', $handle; # but cant re-set an existing filehandle
92 $walkHandle = $tmp; # so use my $tmp as intermediate var
95 $walkHandle = $handle;
96 my $iotype = ref $walkHandle;
97 die "expecting argument/object that can print\n"
98 unless $iotype eq 'GLOB' or $iotype and $walkHandle->can('print');
102 my($order, $coderef) = @_;
103 my $codeobj = svref_2object($coderef);
104 die "err: not a coderef: $coderef\n" unless ref $codeobj eq 'B::CV';#CODE';
105 concise_cv_obj($order, $codeobj);
108 # This should have been called concise_subref, but it was exported
109 # under this name in versions before 0.56
110 sub concise_cv { concise_subref(@_); }
113 my ($order, $cv) = @_;
115 die "err: coderef has no START\n" if class($cv->START) eq "NULL";
116 sequence($cv->START);
117 if ($order eq "exec") {
118 walk_exec($cv->START);
119 } elsif ($order eq "basic") {
120 walk_topdown($cv->ROOT, sub { $_[0]->concise($_[1]) }, 0);
122 print $walkHandle tree($cv->ROOT, 0);
128 sequence(main_start);
130 if ($order eq "exec") {
131 return if class(main_start) eq "NULL";
132 walk_exec(main_start);
133 } elsif ($order eq "tree") {
134 return if class(main_root) eq "NULL";
135 print $walkHandle tree(main_root, 0);
136 } elsif ($order eq "basic") {
137 return if class(main_root) eq "NULL";
138 walk_topdown(main_root,
139 sub { $_[0]->concise($_[1]) }, 0);
143 sub concise_specials {
144 my($name, $order, @cv_s) = @_;
146 if ($name eq "BEGIN") {
147 splice(@cv_s, 0, 7); # skip 7 BEGIN blocks in this file
148 } elsif ($name eq "CHECK") {
149 pop @cv_s; # skip the CHECK block that calls us
152 print $walkHandle "$name $i:\n";
154 concise_cv_obj($order, $cv);
158 my $start_sym = "\e(0"; # "\cN" sometimes also works
159 my $end_sym = "\e(B"; # "\cO" respectively
161 my @tree_decorations =
162 ([" ", "--", "+-", "|-", "| ", "`-", "-", 1],
163 [" ", "-", "+", "+", "|", "`", "", 0],
164 [" ", map("$start_sym$_$end_sym", "qq", "wq", "tq", "x ", "mq", "q"), 1],
165 [" ", map("$start_sym$_$end_sym", "q", "w", "t", "x", "m"), "", 0],
174 set_style_standard("concise");
177 my @options = grep(/^-/, @_);
178 my @args = grep(!/^-/, @_);
180 for my $o (@options) {
181 if ($o eq "-basic") {
183 } elsif ($o eq "-exec") {
185 } elsif ($o eq "-tree") {
187 } elsif ($o eq "-compact") {
189 } elsif ($o eq "-loose") {
191 } elsif ($o eq "-vt") {
193 } elsif ($o eq "-ascii") {
195 } elsif ($o eq "-main") {
197 } elsif ($o =~ /^-base(\d+)$/) {
199 } elsif ($o eq "-bigendian") {
201 } elsif ($o eq "-littleendian") {
203 } elsif (exists $style{substr($o, 1)}) {
204 $stylename = substr($o, 1);
205 set_style(@{$style{$stylename}});
207 warn "Option $o unrecognized";
212 for my $objname (@args) {
213 if ($objname eq "BEGIN") {
214 concise_specials("BEGIN", $order,
215 B::begin_av->isa("B::AV") ?
216 B::begin_av->ARRAY : ());
217 } elsif ($objname eq "INIT") {
218 concise_specials("INIT", $order,
219 B::init_av->isa("B::AV") ?
220 B::init_av->ARRAY : ());
221 } elsif ($objname eq "CHECK") {
222 concise_specials("CHECK", $order,
223 B::check_av->isa("B::AV") ?
224 B::check_av->ARRAY : ());
225 } elsif ($objname eq "END") {
226 concise_specials("END", $order,
227 B::end_av->isa("B::AV") ?
228 B::end_av->ARRAY : ());
230 # convert function names to subrefs
233 print $walkHandle "B::Concise::compile($objname)\n";
236 $objname = "main::" . $objname unless $objname =~ /::/;
237 print $walkHandle "$objname:\n";
239 die "err: unknown function ($objname)\n"
240 unless *{$objname}{CODE};
241 $objref = \&$objname;
243 concise_subref($order, $objref);
247 if (!@args or $do_main) {
248 print $walkHandle "main program:\n" if $do_main;
249 concise_main($order);
257 my %opclass = ('OP' => "0", 'UNOP' => "1", 'BINOP' => "2", 'LOGOP' => "|",
258 'LISTOP' => "@", 'PMOP' => "/", 'SVOP' => "\$", 'GVOP' => "*",
259 'PVOP' => '"', 'LOOP' => "{", 'COP' => ";", 'PADOP' => "#");
261 no warnings 'qw'; # "Possible attempt to put comments..."; use #7
263 qw'# () sc ( @? 1 $* gv *{ m$ m@ m% m? p/ *$ $ $# & a& pt \\ s\\ rf bl
264 ` *? <> ?? ?/ r/ c/ // qr s/ /c y/ = @= C sC Cp sp df un BM po +1 +I
265 -1 -I 1+ I+ 1- I- ** * i* / i/ %$ i% x + i+ - i- . " << >> < i<
266 > i> <= i, >= i. == i= != i! <? i? s< s> s, s. s= s! s? b& b^ b| -0 -i
267 ! ~ a2 si cs rd sr e^ lg sq in %x %o ab le ss ve ix ri sf FL od ch cy
268 uf lf uc lc qm @ [f [ @[ eh vl ky dl ex % ${ @{ uk pk st jn ) )[ a@
269 a% sl +] -] [- [+ so rv GS GW MS MW .. f. .f && || ^^ ?: &= |= -> s{ s}
270 v} ca wa di rs ;; ; ;d }{ { } {} f{ it {l l} rt }l }n }r dm }g }e ^o
271 ^c ^| ^# um bm t~ u~ ~d DB db ^s se ^g ^r {w }w pf pr ^O ^K ^R ^W ^d ^v
272 ^e ^t ^k t. fc ic fl .s .p .b .c .l .a .h g1 s1 g2 s2 ?. l? -R -W -X -r
273 -w -x -e -o -O -z -s -M -A -C -S -c -b -f -d -p -l -u -g -k -t -T -B cd
274 co cr u. cm ut r. l@ s@ r@ mD uD oD rD tD sD wD cD f$ w$ p$ sh e$ k$ g3
275 g4 s4 g5 s5 T@ C@ L@ G@ A@ S@ Hg Hc Hr Hw Mg Mc Ms Mr Sg Sc So rq do {e
276 e} {t t} g6 G6 6e g7 G7 7e g8 G8 8e g9 G9 9e 6s 7s 8s 9s 6E 7E 8E 9E Pn
277 Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n> // /= CO';
279 my $chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
284 push @v, "v" if ($x & 3) == 1;
285 push @v, "s" if ($x & 3) == 2;
286 push @v, "l" if ($x & 3) == 3;
287 push @v, "K" if $x & 4;
288 push @v, "P" if $x & 8;
289 push @v, "R" if $x & 16;
290 push @v, "M" if $x & 32;
291 push @v, "S" if $x & 64;
292 push @v, "*" if $x & 128;
298 return "-" . base_n(-$x) if $x < 0;
300 do { $str .= substr($chars, $x % $base, 1) } while $x = int($x / $base);
301 $str = reverse $str if $big_endian;
316 return "-" if not exists $sequence_num{$$op};
317 return base_n($sequence_num{$$op});
321 my($op, $sub, $level) = @_;
323 if ($op->flags & OPf_KIDS) {
324 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
325 walk_topdown($kid, $sub, $level + 1);
328 if (class($op) eq "PMOP") {
329 my $maybe_root = $op->pmreplroot;
330 if (ref($maybe_root) and $maybe_root->isa("B::OP")) {
331 # It really is the root of the replacement, not something
332 # else stored here for lack of space elsewhere
333 walk_topdown($maybe_root, $sub, $level + 1);
339 my($ar, $level) = @_;
341 if (ref($l) eq "ARRAY") {
342 walklines($l, $level + 1);
350 my($top, $level) = @_;
353 my @todo = ([$top, \@lines]);
354 while (@todo and my($op, $targ) = @{shift @todo}) {
355 for (; $$op; $op = $op->next) {
356 last if $opsseen{$$op}++;
358 my $name = $op->name;
359 if (class($op) eq "LOGOP") {
362 push @todo, [$op->other, $ar];
363 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
366 push @todo, [$op->pmreplstart, $ar];
367 } elsif ($name =~ /^enter(loop|iter)$/) {
368 $labels{${$op->nextop}} = "NEXT";
369 $labels{${$op->lastop}} = "LAST";
370 $labels{${$op->redoop}} = "REDO";
374 walklines(\@lines, 0);
377 # The structure of this routine is purposely modeled after op.c's peep()
381 return if class($op) eq "NULL" or exists $sequence_num{$$op};
382 for (; $$op; $op = $op->next) {
383 last if exists $sequence_num{$$op};
384 my $name = $op->name;
385 if ($name =~ /^(null|scalar|lineseq|scope)$/) {
386 next if $oldop and $ {$op->next};
388 $sequence_num{$$op} = $seq_max++;
389 if (class($op) eq "LOGOP") {
390 my $other = $op->other;
391 $other = $other->next while $other->name eq "null";
393 } elsif (class($op) eq "LOOP") {
394 my $redoop = $op->redoop;
395 $redoop = $redoop->next while $redoop->name eq "null";
397 my $nextop = $op->nextop;
398 $nextop = $nextop->next while $nextop->name eq "null";
400 my $lastop = $op->lastop;
401 $lastop = $lastop->next while $lastop->name eq "null";
403 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
404 my $replstart = $op->pmreplstart;
405 $replstart = $replstart->next while $replstart->name eq "null";
406 sequence($replstart);
414 my($hr, $text, $level) = @_;
415 return '' if $hr->{SKIP}; # another way to suppress lines of output
417 $text =~ s/\(\?\(([^\#]*?)\#(\w+)([^\#]*?)\)\?\)/
418 $hr->{$2} ? $1.$hr->{$2}.$3 : ""/eg;
420 $text =~ s/\(x\((.*?);(.*?)\)x\)/$order eq "exec" ? $1 : $2/egs;
421 $text =~ s/\(\*\(([^;]*?)\)\*\)/$1 x $level/egs;
422 $text =~ s/\(\*\((.*?);(.*?)\)\*\)/$1 x ($level - 1) . $2 x ($level>0)/egs;
423 $text =~ s/#([a-zA-Z]+)(\d+)/sprintf("%-$2s", $hr->{$1})/eg;
424 $text =~ s/#([a-zA-Z]+)/$hr->{$1}/eg;
425 $text =~ s/[ \t]*~+[ \t]*/ /g;
427 return "$text\n" if $text ne "";
428 return $text; # suppress empty lines
432 $priv{$_}{128} = "LVINTRO"
433 for ("pos", "substr", "vec", "threadsv", "gvsv", "rv2sv", "rv2hv", "rv2gv",
434 "rv2av", "rv2arylen", "aelem", "helem", "aslice", "hslice", "padsv",
435 "padav", "padhv", "enteriter");
436 $priv{$_}{64} = "REFC" for ("leave", "leavesub", "leavesublv", "leavewrite");
437 $priv{"aassign"}{64} = "COMMON";
438 $priv{"aassign"}{32} = "PHASH" if $] < 5.009;
439 $priv{"sassign"}{64} = "BKWARD";
440 $priv{$_}{64} = "RTIME" for ("match", "subst", "substcont");
441 @{$priv{"trans"}}{1,2,4,8,16,64} = ("<UTF", ">UTF", "IDENT", "SQUASH", "DEL",
443 $priv{"repeat"}{64} = "DOLIST";
444 $priv{"leaveloop"}{64} = "CONT";
445 @{$priv{$_}}{32,64,96} = ("DREFAV", "DREFHV", "DREFSV")
446 for ("entersub", map("rv2${_}v", "a", "s", "h", "g"), "aelem", "helem");
447 $priv{"entersub"}{16} = "DBG";
448 $priv{"entersub"}{32} = "TARG";
449 @{$priv{$_}}{4,8,128} = ("INARGS","AMPER","NO()") for ("entersub", "rv2cv");
450 $priv{"gv"}{32} = "EARLYCV";
451 $priv{"aelem"}{16} = $priv{"helem"}{16} = "LVDEFER";
452 $priv{$_}{16} = "OURINTR" for ("gvsv", "rv2sv", "rv2av", "rv2hv", "r2gv",
454 $priv{$_}{16} = "TARGMY"
455 for (map(($_,"s$_"),"chop", "chomp"),
456 map(($_,"i_$_"), "postinc", "postdec", "multiply", "divide", "modulo",
457 "add", "subtract", "negate"), "pow", "concat", "stringify",
458 "left_shift", "right_shift", "bit_and", "bit_xor", "bit_or",
459 "complement", "atan2", "sin", "cos", "rand", "exp", "log", "sqrt",
460 "int", "hex", "oct", "abs", "length", "index", "rindex", "sprintf",
461 "ord", "chr", "crypt", "quotemeta", "join", "push", "unshift", "flock",
462 "chdir", "chown", "chroot", "unlink", "chmod", "utime", "rename",
463 "link", "symlink", "mkdir", "rmdir", "wait", "waitpid", "system",
464 "exec", "kill", "getppid", "getpgrp", "setpgrp", "getpriority",
465 "setpriority", "time", "sleep");
466 @{$priv{"const"}}{8,16,32,64,128} = ("STRICT","ENTERED", '$[', "BARE", "WARN");
467 $priv{"flip"}{64} = $priv{"flop"}{64} = "LINENUM";
468 $priv{"list"}{64} = "GUESSED";
469 $priv{"delete"}{64} = "SLICE";
470 $priv{"exists"}{64} = "SUB";
471 $priv{$_}{64} = "LOCALE"
472 for ("sort", "prtf", "sprintf", "slt", "sle", "seq", "sne", "sgt", "sge",
473 "scmp", "lc", "uc", "lcfirst", "ucfirst");
474 @{$priv{"sort"}}{1,2,4,8} = ("NUM", "INT", "REV", "INPLACE");
475 $priv{"threadsv"}{64} = "SVREFd";
476 @{$priv{$_}}{16,32,64,128} = ("INBIN","INCR","OUTBIN","OUTCR")
477 for ("open", "backtick");
478 $priv{"exit"}{128} = "VMS";
479 $priv{$_}{2} = "FTACCESS"
480 for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec");
481 $priv{$_}{4} = "FTSTACKED"
482 for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec",
483 "ftis", "fteowned", "ftrowned", "ftzero", "ftsize", "ftmtime",
484 "ftatime", "ftctime", "ftsock", "ftchr", "ftblk", "ftfile", "ftdir",
485 "ftpipe", "ftlink", "ftsuid", "ftsgid", "ftsvtx", "fttty", "fttext",
487 $priv{$_}{2} = "GREPLEX"
488 for ("mapwhile", "mapstart", "grepwhile", "grepstart");
493 for my $flag (128, 96, 64, 32, 16, 8, 4, 2, 1) {
494 if ($priv{$name}{$flag} and $x & $flag and $x >= $flag) {
496 push @s, $priv{$name}{$flag};
500 return join(",", @s);
505 $hr->{svclass} = class($sv);
506 $hr->{svclass} = "UV"
507 if $hr->{svclass} eq "IV" and $sv->FLAGS & SVf_IVisUV;
508 $hr->{svaddr} = sprintf("%#x", $$sv);
509 if ($hr->{svclass} eq "GV") {
511 my $stash = $gv->STASH->NAME;
512 if ($stash eq "main") {
515 $stash = $stash . "::";
517 $hr->{svval} = "*$stash" . $gv->SAFENAME;
518 return "*$stash" . $gv->SAFENAME;
520 while (class($sv) eq "RV") {
521 $hr->{svval} .= "\\";
524 if (class($sv) eq "SPECIAL") {
525 $hr->{svval} .= ["Null", "sv_undef", "sv_yes", "sv_no"]->[$$sv];
526 } elsif ($sv->FLAGS & SVf_NOK) {
527 $hr->{svval} .= $sv->NV;
528 } elsif ($sv->FLAGS & SVf_IOK) {
529 $hr->{svval} .= $sv->int_value;
530 } elsif ($sv->FLAGS & SVf_POK) {
531 $hr->{svval} .= cstring($sv->PV);
532 } elsif (class($sv) eq "HV") {
533 $hr->{svval} .= 'HASH';
535 return $hr->{svclass} . " " . $hr->{svval};
540 my ($op, $level, $format) = @_;
542 $h{exname} = $h{name} = $op->name;
543 $h{NAME} = uc $h{name};
544 $h{class} = class($op);
545 $h{extarg} = $h{targ} = $op->targ;
546 $h{extarg} = "" unless $h{extarg};
547 if ($h{name} eq "null" and $h{targ}) {
548 # targ holds the old type
549 $h{exname} = "ex-" . substr(ppname($h{targ}), 3);
551 } elsif ($op->name =~ /^leave(sub(lv)?|write)?$/) {
552 # targ potentially holds a reference count
553 if ($op->private & 64) {
554 my $refs = "ref" . ($h{targ} != 1 ? "s" : "");
555 $h{targarglife} = $h{targarg} = "$h{targ} $refs";
558 my $padname = (($curcv->PADLIST->ARRAY)[0]->ARRAY)[$h{targ}];
559 if (defined $padname and class($padname) ne "SPECIAL") {
560 $h{targarg} = $padname->PVX;
561 if ($padname->FLAGS & SVf_FAKE) {
563 $h{targarglife} = "$h{targarg}:FAKE";
565 # These changes relate to the jumbo closure fix.
566 # See changes 19939 and 20005
568 $fake .= 'a' if $padname->IVX & 1; # PAD_FAKELEX_ANON
569 $fake .= 'm' if $padname->IVX & 2; # PAD_FAKELEX_MULTI
570 $fake .= ':' . $padname->NVX if $curcv->CvFLAGS & CVf_ANON;
571 $h{targarglife} = "$h{targarg}:FAKE:$fake";
575 my $intro = $padname->NVX - $cop_seq_base;
576 my $finish = int($padname->IVX) - $cop_seq_base;
577 $finish = "end" if $finish == 999999999 - $cop_seq_base;
578 $h{targarglife} = "$h{targarg}:$intro,$finish";
581 $h{targarglife} = $h{targarg} = "t" . $h{targ};
585 $h{svclass} = $h{svaddr} = $h{svval} = "";
586 if ($h{class} eq "PMOP") {
587 my $precomp = $op->precomp;
588 if (defined $precomp) {
589 $precomp = cstring($precomp); # Escape literal control sequences
590 $precomp = "/$precomp/";
594 my $pmreplroot = $op->pmreplroot;
596 if (ref($pmreplroot) eq "B::GV") {
597 # with C<@stash_array = split(/pat/, str);>,
598 # *stash_array is stored in /pat/'s pmreplroot.
599 $h{arg} = "($precomp => \@" . $pmreplroot->NAME . ")";
600 } elsif (!ref($pmreplroot) and $pmreplroot) {
601 # same as the last case, except the value is actually a
602 # pad offset for where the GV is kept (this happens under
604 my $gv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$pmreplroot];
605 $h{arg} = "($precomp => \@" . $gv->NAME . ")";
606 } elsif ($ {$op->pmreplstart}) {
608 $pmreplstart = "replstart->" . seq($op->pmreplstart);
609 $h{arg} = "(" . join(" ", $precomp, $pmreplstart) . ")";
611 $h{arg} = "($precomp)";
613 } elsif ($h{class} eq "PVOP" and $h{name} ne "trans") {
614 $h{arg} = '("' . $op->pv . '")';
615 $h{svval} = '"' . $op->pv . '"';
616 } elsif ($h{class} eq "COP") {
617 my $label = $op->label;
618 $h{coplabel} = $label;
619 $label = $label ? "$label: " : "";
622 $loc .= ":" . $op->line;
623 my($stash, $cseq) = ($op->stash->NAME, $op->cop_seq - $cop_seq_base);
624 my $arybase = $op->arybase;
625 $arybase = $arybase ? ' $[=' . $arybase : "";
626 $h{arg} = "($label$stash $cseq $loc$arybase)";
627 } elsif ($h{class} eq "LOOP") {
628 $h{arg} = "(next->" . seq($op->nextop) . " last->" . seq($op->lastop)
629 . " redo->" . seq($op->redoop) . ")";
630 } elsif ($h{class} eq "LOGOP") {
632 $h{arg} = "(other->" . seq($op->other) . ")";
633 } elsif ($h{class} eq "SVOP") {
634 unless ($h{name} eq 'aelemfast' and $op->flags & OPf_SPECIAL) {
636 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->targ];
637 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
638 $h{targarglife} = $h{targarg} = "";
640 $h{arg} = "(" . concise_sv($op->sv, \%h) . ")";
643 } elsif ($h{class} eq "PADOP") {
644 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->padix];
645 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
647 $h{seq} = $h{hyphseq} = seq($op);
648 $h{seq} = "" if $h{seq} eq "-";
650 $h{static} = $op->static;
651 $h{next} = $op->next;
652 $h{next} = (class($h{next}) eq "NULL") ? "(end)" : seq($h{next});
653 $h{nextaddr} = sprintf("%#x", $ {$op->next});
654 $h{sibaddr} = sprintf("%#x", $ {$op->sibling});
655 $h{firstaddr} = sprintf("%#x", $ {$op->first}) if $op->can("first");
656 $h{lastaddr} = sprintf("%#x", $ {$op->last}) if $op->can("last");
658 $h{classsym} = $opclass{$h{class}};
659 $h{flagval} = $op->flags;
660 $h{flags} = op_flags($op->flags);
661 $h{privval} = $op->private;
662 $h{private} = private_flags($h{name}, $op->private);
663 $h{addr} = sprintf("%#x", $$op);
664 $h{label} = $labels{$$op};
665 $h{typenum} = $op->type;
666 $h{noise} = $linenoise[$op->type];
668 $_->(\%h, $op, \$format, \$level, $stylename) for @callbacks;
669 return fmt_line(\%h, $format, $level);
673 my($op, $level) = @_;
674 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
675 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
676 "addr" => sprintf("%#x", $$lastnext)};
677 print $walkHandle fmt_line($h, $gotofmt, $level+1);
679 $lastnext = $op->next;
680 print $walkHandle concise_op($op, $level, $format);
683 # B::OP::terse (see Terse.pm) now just calls this
685 my($op, $level) = @_;
687 # This isn't necessarily right, but there's no easy way to get
688 # from an OP to the right CV. This is a limitation of the
689 # ->terse() interface style, and there isn't much to do about
690 # it. In particular, we can die in concise_op if the main pad
691 # isn't long enough, or has the wrong kind of entries, compared to
692 # the pad a sub was compiled with. The fix for that would be to
693 # make a backwards compatible "terse" format that never even
694 # looked at the pad, just like the old B::Terse. I don't think
695 # that's worth the effort, though.
696 $curcv = main_cv unless $curcv;
698 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
699 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
700 "addr" => sprintf("%#x", $$lastnext)};
701 print fmt_line($h, $style{"terse"}[1], $level+1);
703 $lastnext = $op->next;
704 print concise_op($op, $level, $style{"terse"}[0]);
710 my $style = $tree_decorations[$tree_style];
711 my($space, $single, $kids, $kid, $nokid, $last, $lead, $size) = @$style;
712 my $name = concise_op($op, $level, $treefmt);
713 if (not $op->flags & OPf_KIDS) {
717 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
718 push @lines, tree($kid, $level+1);
721 for ($i = $#lines; substr($lines[$i], 0, 1) eq " "; $i--) {
722 $lines[$i] = $space . $lines[$i];
725 $lines[$i] = $last . $lines[$i];
727 if (substr($lines[$i], 0, 1) eq " ") {
728 $lines[$i] = $nokid . $lines[$i];
730 $lines[$i] = $kid . $lines[$i];
733 $lines[$i] = $kids . $lines[$i];
735 $lines[0] = $single . $lines[0];
737 return("$name$lead" . shift @lines,
738 map(" " x (length($name)+$size) . $_, @lines));
741 # *** Warning: fragile kludge ahead ***
742 # Because the B::* modules run in the same interpreter as the code
743 # they're compiling, their presence tends to distort the view we have of
744 # the code we're looking at. In particular, perl gives sequence numbers
745 # to COPs. If the program we're looking at were run on its own, this
746 # would start at 1. Because all of B::Concise and all the modules it
747 # uses are compiled first, though, by the time we get to the user's
748 # program the sequence number is already pretty high, which could be
749 # distracting if you're trying to tell OPs apart. Therefore we'd like to
750 # subtract an offset from all the sequence numbers we display, to
751 # restore the simpler view of the world. The trick is to know what that
752 # offset will be, when we're still compiling B::Concise! If we
753 # hardcoded a value, it would have to change every time B::Concise or
754 # other modules we use do. To help a little, what we do here is compile
755 # a little code at the end of the module, and compute the base sequence
756 # number for the user's program as being a small offset later, so all we
757 # have to worry about are changes in the offset.
759 # When you say "perl -MO=Concise -e '$a'", the output should look like:
761 # 4 <@> leave[t1] vKP/REFC ->(end)
763 #^ smallest OP sequence number should be 1
764 # 2 <;> nextstate(main 1 -e:1) v ->3
765 # ^ smallest COP sequence number should be 1
766 # - <1> ex-rv2sv vK/1 ->4
767 # 3 <$> gvsv(*a) s ->4
769 # If the second of the marked numbers there isn't 1, it means you need
770 # to update the corresponding magic number in the next line.
771 # Remember, this needs to stay the last things in the module.
773 # Why is this different for MacOS? Does it matter?
774 my $cop_seq_mnum = $^O eq 'MacOS' ? 12 : 11;
775 $cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum;
783 B::Concise - Walk Perl syntax tree, printing concise info about ops
787 perl -MO=Concise[,OPTIONS] foo.pl
789 use B::Concise qw(set_style add_callback);
793 This compiler backend prints the internal OPs of a Perl program's syntax
794 tree in one of several space-efficient text formats suitable for debugging
795 the inner workings of perl or other compiler backends. It can print OPs in
796 the order they appear in the OP tree, in the order they will execute, or
797 in a text approximation to their tree structure, and the format of the
798 information displyed is customizable. Its function is similar to that of
799 perl's B<-Dx> debugging flag or the B<B::Terse> module, but it is more
800 sophisticated and flexible.
804 Here's is a short example of output, using the default formatting
807 % perl -MO=Concise -e '$a = $b + 42'
808 8 <@> leave[1 ref] vKP/REFC ->(end)
810 2 <;> nextstate(main 1 -e:1) v ->3
811 7 <2> sassign vKS/2 ->8
812 5 <2> add[t1] sK/2 ->6
813 - <1> ex-rv2sv sK/1 ->4
815 4 <$> const(IV 42) s ->5
816 - <1> ex-rv2sv sKRM*/1 ->7
819 Each line corresponds to an operator. Null ops appear as C<ex-opname>,
820 where I<opname> is the op that has been optimized away by perl.
822 The number on the first row indicates the op's sequence number. It's
823 given in base 36 by default.
825 The symbol between angle brackets indicates the op's type : for example,
826 <2> is a BINOP, <@> a LISTOP, etc. (see L</"OP class abbreviations">).
828 The opname may be followed by op-specific information in parentheses
829 (e.g. C<gvsv(*b)>), and by targ information in brackets (e.g.
832 Next come the op flags. The common flags are listed below
833 (L</"OP flags abbreviations">). The private flags follow, separated
834 by a slash. For example, C<vKP/REFC> means that the leave op has
835 public flags OPf_WANT_VOID, OPf_KIDS, and OPf_PARENS, and the private
838 Finally an arrow points to the sequence number of the next op.
842 Arguments that don't start with a hyphen are taken to be the names of
843 subroutines to print the OPs of; if no such functions are specified,
844 the main body of the program (outside any subroutines, and not
845 including use'd or require'd files) is printed. Passing C<BEGIN>,
846 C<CHECK>, C<INIT>, or C<END> will cause all of the corresponding
847 special blocks to be printed.
853 Print OPs in the order they appear in the OP tree (a preorder
854 traversal, starting at the root). The indentation of each OP shows its
855 level in the tree. This mode is the default, so the flag is included
856 simply for completeness.
860 Print OPs in the order they would normally execute (for the majority
861 of constructs this is a postorder traversal of the tree, ending at the
862 root). In most cases the OP that usually follows a given OP will
863 appear directly below it; alternate paths are shown by indentation. In
864 cases like loops when control jumps out of a linear path, a 'goto'
869 Print OPs in a text approximation of a tree, with the root of the tree
870 at the left and 'left-to-right' order of children transformed into
871 'top-to-bottom'. Because this mode grows both to the right and down,
872 it isn't suitable for large programs (unless you have a very wide
877 Use a tree format in which the minimum amount of space is used for the
878 lines connecting nodes (one character in most cases). This squeezes out
879 a few precious columns of screen real estate.
883 Use a tree format that uses longer edges to separate OP nodes. This format
884 tends to look better than the compact one, especially in ASCII, and is
889 Use tree connecting characters drawn from the VT100 line-drawing set.
890 This looks better if your terminal supports it.
894 Draw the tree with standard ASCII characters like C<+> and C<|>. These don't
895 look as clean as the VT100 characters, but they'll work with almost any
896 terminal (or the horizontal scrolling mode of less(1)) and are suitable
897 for text documentation or email. This is the default.
901 Include the main program in the output, even if subroutines were also
906 Print OP sequence numbers in base I<n>. If I<n> is greater than 10, the
907 digit for 11 will be 'a', and so on. If I<n> is greater than 36, the digit
908 for 37 will be 'A', and so on until 62. Values greater than 62 are not
909 currently supported. The default is 36.
913 Print sequence numbers with the most significant digit first. This is the
914 usual convention for Arabic numerals, and the default.
916 =item B<-littleendian>
918 Print seqence numbers with the least significant digit first.
922 Use the author's favorite set of formatting conventions. This is the
927 Use formatting conventions that emulate the output of B<B::Terse>. The
928 basic mode is almost indistinguishable from the real B<B::Terse>, and the
929 exec mode looks very similar, but is in a more logical order and lacks
930 curly brackets. B<B::Terse> doesn't have a tree mode, so the tree mode
931 is only vaguely reminiscient of B<B::Terse>.
935 Use formatting conventions in which the name of each OP, rather than being
936 written out in full, is represented by a one- or two-character abbreviation.
937 This is mainly a joke.
941 Use formatting conventions reminiscient of B<B::Debug>; these aren't
946 Use formatting conventions read from the environment variables
947 C<B_CONCISE_FORMAT>, C<B_CONCISE_GOTO_FORMAT>, and C<B_CONCISE_TREE_FORMAT>.
951 =head1 FORMATTING SPECIFICATIONS
953 For each general style ('concise', 'terse', 'linenoise', etc.) there are
954 three specifications: one of how OPs should appear in the basic or exec
955 modes, one of how 'goto' lines should appear (these occur in the exec
956 mode only), and one of how nodes should appear in tree mode. Each has the
957 same format, described below. Any text that doesn't match a special
958 pattern is copied verbatim.
962 =item B<(x(>I<exec_text>B<;>I<basic_text>B<)x)>
964 Generates I<exec_text> in exec mode, or I<basic_text> in basic mode.
966 =item B<(*(>I<text>B<)*)>
968 Generates one copy of I<text> for each indentation level.
970 =item B<(*(>I<text1>B<;>I<text2>B<)*)>
972 Generates one fewer copies of I<text1> than the indentation level, followed
973 by one copy of I<text2> if the indentation level is more than 0.
975 =item B<(?(>I<text1>B<#>I<var>I<Text2>B<)?)>
977 If the value of I<var> is true (not empty or zero), generates the
978 value of I<var> surrounded by I<text1> and I<Text2>, otherwise
983 Generates the value of the variable I<var>.
987 Generates the value of I<var>, left jutified to fill I<N> spaces.
991 Any number of tildes and surrounding whitespace will be collapsed to
996 The following variables are recognized:
1002 The address of the OP, in hexidecimal.
1006 The OP-specific information of the OP (such as the SV for an SVOP, the
1007 non-local exit pointers for a LOOP, etc.) enclosed in paretheses.
1011 The B-determined class of the OP, in all caps.
1015 A single symbol abbreviating the class of the OP.
1019 The label of the statement or block the OP is the start of, if any.
1023 The name of the OP, or 'ex-foo' if the OP is a null that used to be a foo.
1027 The target of the OP, or nothing for a nulled OP.
1031 The address of the OP's first child, in hexidecimal.
1035 The OP's flags, abbreviated as a series of symbols.
1039 The numeric value of the OP's flags.
1043 The sequence number of the OP, or a hyphen if it doesn't have one.
1047 'NEXT', 'LAST', or 'REDO' if the OP is a target of one of those in exec
1048 mode, or empty otherwise.
1052 The address of the OP's last child, in hexidecimal.
1060 The OP's name, in all caps.
1064 The sequence number of the OP's next OP.
1068 The address of the OP's next OP, in hexidecimal.
1072 A one- or two-character abbreviation for the OP's name.
1076 The OP's private flags, rendered with abbreviated names if possible.
1080 The numeric value of the OP's private flags.
1084 The sequence number of the OP. Note that this is a sequence number
1085 generated by B::Concise.
1089 Whether or not the op has been optimised by the peephole optimiser.
1093 Whether or not the op is statically defined. This flag is used by the
1094 B::C compiler backend and indicates that the op should not be freed.
1098 The address of the OP's next youngest sibling, in hexidecimal.
1102 The address of the OP's SV, if it has an SV, in hexidecimal.
1106 The class of the OP's SV, if it has one, in all caps (e.g., 'IV').
1110 The value of the OP's SV, if it has one, in a short human-readable format.
1114 The numeric value of the OP's targ.
1118 The name of the variable the OP's targ refers to, if any, otherwise the
1119 letter t followed by the OP's targ in decimal.
1121 =item B<#targarglife>
1123 Same as B<#targarg>, but followed by the COP sequence numbers that delimit
1124 the variable's lifetime (or 'end' for a variable in an open scope) for a
1129 The numeric value of the OP's type, in decimal.
1133 =head1 ABBREVIATIONS
1135 =head2 OP flags abbreviations
1137 v OPf_WANT_VOID Want nothing (void context)
1138 s OPf_WANT_SCALAR Want single value (scalar context)
1139 l OPf_WANT_LIST Want list of any length (list context)
1140 K OPf_KIDS There is a firstborn child.
1141 P OPf_PARENS This operator was parenthesized.
1142 (Or block needs explicit scope entry.)
1143 R OPf_REF Certified reference.
1144 (Return container, not containee).
1145 M OPf_MOD Will modify (lvalue).
1146 S OPf_STACKED Some arg is arriving on the stack.
1147 * OPf_SPECIAL Do something weird for this op (see op.h)
1149 =head2 OP class abbreviations
1151 0 OP (aka BASEOP) An OP with no children
1152 1 UNOP An OP with one child
1153 2 BINOP An OP with two children
1154 | LOGOP A control branch OP
1155 @ LISTOP An OP that could have lots of children
1156 / PMOP An OP with a regular expression
1157 $ SVOP An OP with an SV
1158 " PVOP An OP with a string
1159 { LOOP An OP that holds pointers for a loop
1160 ; COP An OP that marks the start of a statement
1161 # PADOP An OP with a GV on the pad
1163 =head1 Using B::Concise outside of the O framework
1165 You can use B<B::Concise>, and call compile() directly, thereby
1166 avoiding the compile-only operation of O. For example, you could use
1167 the debugger to step through B::Concise::compile() itself.
1169 When doing so, you can alter Concise output by providing new output
1170 styles, and optionally by adding callback routines which populate new
1171 variables that may be rendered as part of those styles. For all
1172 following sections, please review L</FORMATTING SPECIFICATIONS>.
1174 =head2 example: Altering Concise Output
1176 use B::Concise qw(set_style add_callback);
1177 set_style($your_format, $your_gotofmt, $your_treefmt);
1180 my ($h, $op, $format, $level, $stylename) = @_;
1181 $h->{variable} = some_func($op);
1184 B::Concise::compile(@options)->();
1188 B<set_style> accepts 3 arguments, and updates the three components of an
1189 output style (basic-exec, goto, tree). It has one minor drawback though:
1190 it doesn't register the style under a new name, thus you may prefer to use
1191 add_style() and/or set_style_standard() instead.
1195 This subroutine accepts a new style name and three style arguments as
1196 above, and creates, registers, and selects the newly named style. It is
1197 an error to re-add a style; call set_style_standard() to switch between
1200 =head2 set_style_standard($name)
1202 This restores one of the standard styles: C<terse>, C<concise>,
1203 C<linenoise>, C<debug>, C<env>, into effect. It also accepts style
1204 names previously defined with add_style().
1206 =head2 add_callback()
1208 If your newly minted styles refer to any #variables, you'll need to
1209 define a callback subroutine that will populate (or modify) those
1210 variables. They are then available for use in the style you've chosen.
1212 The callbacks are called for each opcode visited by Concise, in the
1213 same order as they are added. Each subroutine is passed five
1216 1. A hashref, containing the variable names and values which are
1217 populated into the report-line for the op
1218 2. the op, as a B<B::OP> object
1219 3. a reference to the format string
1220 4. the formatting (indent) level
1221 5. the selected stylename
1223 To define your own variables, simply add them to the hash, or change
1224 existing values if you need to. The level and format are passed in as
1225 references to scalars, but it is unlikely that they will need to be
1226 changed or even used.
1228 =head2 running B::Concise::compile()
1230 B<compile> accepts options as described above in L</OPTIONS>, and
1231 arguments, which are either coderefs, or subroutine names.
1233 compile() constructs and returns a coderef, which when invoked, scans
1234 the optree, and prints the results to STDOUT. Once you have the
1235 coderef, you may change the output style; thereafter the coderef renders
1238 B<walk_output> lets you change the print destination from STDOUT to
1239 another open filehandle, or into a string passed as a ref.
1241 walk_output(\my $buf);
1242 B::Concise::compile('-concise','funcName', \&aSubRef)->();
1243 print "Concise Results: $buf\n";
1245 For each subroutine visited, the opcode info is preceded by a single
1246 line containing either the subroutine name or the stringified coderef.
1248 To switch back to one of the standard styles like C<concise> or
1249 C<terse>, call C<set_style_standard>, or pass the style name into
1250 B::Concise::compile() (as done above).
1252 =head2 B::Concise::reset_sequence()
1254 This function (not exported) lets you reset the sequence numbers (note
1255 that they're numbered arbitrarily, their goal being to be human
1256 readable). Its purpose is mostly to support testing, i.e. to compare
1257 the concise output from two identical anonymous subroutines (but
1258 different instances). Without the reset, B::Concise, seeing that
1259 they're separate optrees, generates different sequence numbers in
1264 All detected errors, (invalid arguments, internal errors, etc.) are
1265 resolved with a die($message). Use an eval if you wish to catch these
1266 errors and continue processing.
1268 In particular, B<compile> will die as follows if you've asked for a
1269 non-existent function-name, a non-existent coderef, or a non-CODE
1274 Stephen McCamant, E<lt>smcc@CSUA.Berkeley.EDUE<gt>.