open.pm: allow upper-cased EUC
[p5sagit/p5-mst-13.2.git] / ext / B / B / Concise.pm
CommitLineData
c99ca59a 1package B::Concise;
c27ea44e 2# Copyright (C) 2000-2003 Stephen McCamant. All rights reserved.
c99ca59a 3# This program is free software; you can redistribute and/or modify it
4# under the same terms as Perl itself.
5
8ec8fbef 6# Note: we need to keep track of how many use declarations/BEGIN
7# blocks this module uses, so we can avoid printing them when user
8# asks for the BEGIN blocks in her program. Update the comments and
9# the count in concise_specials if you add or delete one. The
10# -MO=Concise counts as use #1.
78ad9108 11
8ec8fbef 12use strict; # use #2
13use warnings; # uses #3 and #4, since warnings uses Carp
78ad9108 14
8ec8fbef 15use Exporter (); # use #5
16
2814eb74 17our $VERSION = "0.59";
78ad9108 18our @ISA = qw(Exporter);
31b49ad4 19our @EXPORT_OK = qw(set_style set_style_standard add_callback
8ec8fbef 20 concise_subref concise_cv concise_main);
78ad9108 21
8ec8fbef 22# use #6
c99ca59a 23use B qw(class ppname main_start main_root main_cv cstring svref_2object
6a077020 24 SVf_IOK SVf_NOK SVf_POK SVf_IVisUV SVf_FAKE OPf_KIDS OPf_SPECIAL
25 CVf_ANON);
c99ca59a 26
27my %style =
28 ("terse" =>
c3caa09d 29 ["(?(#label =>\n)?)(*( )*)#class (#addr) #name (?([#targ])?) "
30 . "#svclass~(?((#svaddr))?)~#svval~(?(label \"#coplabel\")?)\n",
c99ca59a 31 "(*( )*)goto #class (#addr)\n",
32 "#class pp_#name"],
33 "concise" =>
34 ["#hyphseq2 (*( (x( ;)x))*)<#classsym> "
35 . "#exname#arg(?([#targarglife])?)~#flags(?(/#private)?)(x(;~->#next)x)\n",
36 " (*( )*) goto #seq\n",
37 "(?(<#seq>)?)#exname#arg(?([#targarglife])?)"],
38 "linenoise" =>
39 ["(x(;(*( )*))x)#noise#arg(?([#targarg])?)(x( ;\n)x)",
40 "gt_#seq ",
41 "(?(#seq)?)#noise#arg(?([#targarg])?)"],
42 "debug" =>
43 ["#class (#addr)\n\top_next\t\t#nextaddr\n\top_sibling\t#sibaddr\n\t"
2814eb74 44 . "op_ppaddr\tPL_ppaddr[OP_#NAME]\n\top_type\t\t#typenum\n"
45 . "\top_flags\t#flagval\n\top_private\t#privval\n"
c99ca59a 46 . "(?(\top_first\t#firstaddr\n)?)(?(\top_last\t\t#lastaddr\n)?)"
47 . "(?(\top_sv\t\t#svaddr\n)?)",
48 " GOTO #addr\n",
49 "#addr"],
50 "env" => [$ENV{B_CONCISE_FORMAT}, $ENV{B_CONCISE_GOTO_FORMAT},
51 $ENV{B_CONCISE_TREE_FORMAT}],
52 );
53
54my($format, $gotofmt, $treefmt);
55my $curcv;
c27ea44e 56my $cop_seq_base;
78ad9108 57my @callbacks;
58
59sub set_style {
60 ($format, $gotofmt, $treefmt) = @_;
61}
62
31b49ad4 63sub set_style_standard {
64 my($name) = @_;
65 set_style(@{$style{$name}});
66}
67
78ad9108 68sub add_callback {
69 push @callbacks, @_;
70}
c99ca59a 71
8ec8fbef 72sub concise_subref {
73 my($order, $subref) = @_;
74 concise_cv_obj($order, svref_2object($subref));
75}
76
77# This should have been called concise_subref, but it was exported
78# under this name in versions before 0.56
79sub concise_cv { concise_subref(@_); }
80
81sub concise_cv_obj {
82 my ($order, $cv) = @_;
c99ca59a 83 $curcv = $cv;
c27ea44e 84 sequence($cv->START);
c99ca59a 85 if ($order eq "exec") {
86 walk_exec($cv->START);
87 } elsif ($order eq "basic") {
88 walk_topdown($cv->ROOT, sub { $_[0]->concise($_[1]) }, 0);
89 } else {
90 print tree($cv->ROOT, 0)
91 }
92}
93
31b49ad4 94sub concise_main {
95 my($order) = @_;
96 sequence(main_start);
97 $curcv = main_cv;
98 if ($order eq "exec") {
99 return if class(main_start) eq "NULL";
100 walk_exec(main_start);
101 } elsif ($order eq "tree") {
102 return if class(main_root) eq "NULL";
103 print tree(main_root, 0);
104 } elsif ($order eq "basic") {
105 return if class(main_root) eq "NULL";
106 walk_topdown(main_root,
107 sub { $_[0]->concise($_[1]) }, 0);
108 }
109}
110
8ec8fbef 111sub concise_specials {
112 my($name, $order, @cv_s) = @_;
113 my $i = 1;
114 if ($name eq "BEGIN") {
115 splice(@cv_s, 0, 7); # skip 7 BEGIN blocks in this file
116 } elsif ($name eq "CHECK") {
117 pop @cv_s; # skip the CHECK block that calls us
118 }
119 for my $cv (@cv_s) {
120 print "$name $i:\n";
121 $i++;
122 concise_cv_obj($order, $cv);
123 }
124}
125
c99ca59a 126my $start_sym = "\e(0"; # "\cN" sometimes also works
127my $end_sym = "\e(B"; # "\cO" respectively
128
129my @tree_decorations =
130 ([" ", "--", "+-", "|-", "| ", "`-", "-", 1],
131 [" ", "-", "+", "+", "|", "`", "", 0],
132 [" ", map("$start_sym$_$end_sym", "qq", "wq", "tq", "x ", "mq", "q"), 1],
133 [" ", map("$start_sym$_$end_sym", "q", "w", "t", "x", "m"), "", 0],
134 );
135my $tree_style = 0;
136
137my $base = 36;
138my $big_endian = 1;
139
140my $order = "basic";
141
31b49ad4 142set_style_standard("concise");
78ad9108 143
c99ca59a 144sub compile {
145 my @options = grep(/^-/, @_);
146 my @args = grep(!/^-/, @_);
147 my $do_main = 0;
c99ca59a 148 for my $o (@options) {
149 if ($o eq "-basic") {
150 $order = "basic";
151 } elsif ($o eq "-exec") {
152 $order = "exec";
153 } elsif ($o eq "-tree") {
154 $order = "tree";
155 } elsif ($o eq "-compact") {
156 $tree_style |= 1;
157 } elsif ($o eq "-loose") {
158 $tree_style &= ~1;
159 } elsif ($o eq "-vt") {
160 $tree_style |= 2;
161 } elsif ($o eq "-ascii") {
162 $tree_style &= ~2;
163 } elsif ($o eq "-main") {
164 $do_main = 1;
165 } elsif ($o =~ /^-base(\d+)$/) {
166 $base = $1;
167 } elsif ($o eq "-bigendian") {
168 $big_endian = 1;
169 } elsif ($o eq "-littleendian") {
170 $big_endian = 0;
171 } elsif (exists $style{substr($o, 1)}) {
78ad9108 172 set_style(@{$style{substr($o, 1)}});
c99ca59a 173 } else {
174 warn "Option $o unrecognized";
175 }
176 }
c27ea44e 177 return sub {
178 if (@args) {
c99ca59a 179 for my $objname (@args) {
8ec8fbef 180 if ($objname eq "BEGIN") {
181 concise_specials("BEGIN", $order,
182 B::begin_av->isa("B::AV") ?
183 B::begin_av->ARRAY : ());
184 } elsif ($objname eq "INIT") {
185 concise_specials("INIT", $order,
186 B::init_av->isa("B::AV") ?
187 B::init_av->ARRAY : ());
188 } elsif ($objname eq "CHECK") {
189 concise_specials("CHECK", $order,
190 B::check_av->isa("B::AV") ?
191 B::check_av->ARRAY : ());
192 } elsif ($objname eq "END") {
193 concise_specials("END", $order,
194 B::end_av->isa("B::AV") ?
195 B::end_av->ARRAY : ());
196 } else {
197 $objname = "main::" . $objname unless $objname =~ /::/;
198 print "$objname:\n";
199 eval "concise_subref(\$order, \\&$objname)";
200 die "concise_subref($order, \\&$objname) failed: $@" if $@;
201 }
c99ca59a 202 }
203 }
c27ea44e 204 if (!@args or $do_main) {
205 print "main program:\n" if $do_main;
31b49ad4 206 concise_main($order);
c99ca59a 207 }
208 }
209}
210
211my %labels;
212my $lastnext;
213
214my %opclass = ('OP' => "0", 'UNOP' => "1", 'BINOP' => "2", 'LOGOP' => "|",
215 'LISTOP' => "@", 'PMOP' => "/", 'SVOP' => "\$", 'GVOP' => "*",
051f02e9 216 'PVOP' => '"', 'LOOP' => "{", 'COP' => ";", 'PADOP' => "#");
c99ca59a 217
8ec8fbef 218no warnings 'qw'; # "Possible attempt to put comments..."; use #7
35fc55f1 219my @linenoise =
220 qw'# () sc ( @? 1 $* gv *{ m$ m@ m% m? p/ *$ $ $# & a& pt \\ s\\ rf bl
c99ca59a 221 ` *? <> ?? ?/ r/ c/ // qr s/ /c y/ = @= C sC Cp sp df un BM po +1 +I
222 -1 -I 1+ I+ 1- I- ** * i* / i/ %$ i% x + i+ - i- . " << >> < i<
223 > i> <= i, >= i. == i= != i! <? i? s< s> s, s. s= s! s? b& b^ b| -0 -i
224 ! ~ a2 si cs rd sr e^ lg sq in %x %o ab le ss ve ix ri sf FL od ch cy
225 uf lf uc lc qm @ [f [ @[ eh vl ky dl ex % ${ @{ uk pk st jn ) )[ a@
226 a% sl +] -] [- [+ so rv GS GW MS MW .. f. .f && || ^^ ?: &= |= -> s{ s}
227 v} ca wa di rs ;; ; ;d }{ { } {} f{ it {l l} rt }l }n }r dm }g }e ^o
228 ^c ^| ^# um bm t~ u~ ~d DB db ^s se ^g ^r {w }w pf pr ^O ^K ^R ^W ^d ^v
229 ^e ^t ^k t. fc ic fl .s .p .b .c .l .a .h g1 s1 g2 s2 ?. l? -R -W -X -r
230 -w -x -e -o -O -z -s -M -A -C -S -c -b -f -d -p -l -u -g -k -t -T -B cd
231 co cr u. cm ut r. l@ s@ r@ mD uD oD rD tD sD wD cD f$ w$ p$ sh e$ k$ g3
232 g4 s4 g5 s5 T@ C@ L@ G@ A@ S@ Hg Hc Hr Hw Mg Mc Ms Mr Sg Sc So rq do {e
233 e} {t t} g6 G6 6e g7 G7 7e g8 G8 8e g9 G9 9e 6s 7s 8s 9s 6E 7E 8E 9E Pn
c27ea44e 234 Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n> // /= CO';
c99ca59a 235
236my $chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
237
238sub op_flags {
239 my($x) = @_;
240 my(@v);
241 push @v, "v" if ($x & 3) == 1;
242 push @v, "s" if ($x & 3) == 2;
243 push @v, "l" if ($x & 3) == 3;
244 push @v, "K" if $x & 4;
245 push @v, "P" if $x & 8;
246 push @v, "R" if $x & 16;
247 push @v, "M" if $x & 32;
248 push @v, "S" if $x & 64;
249 push @v, "*" if $x & 128;
250 return join("", @v);
251}
252
253sub base_n {
254 my $x = shift;
255 return "-" . base_n(-$x) if $x < 0;
256 my $str = "";
257 do { $str .= substr($chars, $x % $base, 1) } while $x = int($x / $base);
258 $str = reverse $str if $big_endian;
259 return $str;
260}
261
c27ea44e 262my %sequence_num;
263my $seq_max = 1;
264
265sub seq {
266 my($op) = @_;
267 return "-" if not exists $sequence_num{$$op};
268 return base_n($sequence_num{$$op});
269}
c99ca59a 270
271sub walk_topdown {
272 my($op, $sub, $level) = @_;
273 $sub->($op, $level);
274 if ($op->flags & OPf_KIDS) {
275 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
276 walk_topdown($kid, $sub, $level + 1);
277 }
278 }
c6e79e55 279 if (class($op) eq "PMOP") {
280 my $maybe_root = $op->pmreplroot;
281 if (ref($maybe_root) and $maybe_root->isa("B::OP")) {
282 # It really is the root of the replacement, not something
283 # else stored here for lack of space elsewhere
284 walk_topdown($maybe_root, $sub, $level + 1);
285 }
c99ca59a 286 }
287}
288
289sub walklines {
290 my($ar, $level) = @_;
291 for my $l (@$ar) {
292 if (ref($l) eq "ARRAY") {
293 walklines($l, $level + 1);
294 } else {
295 $l->concise($level);
296 }
297 }
298}
299
300sub walk_exec {
301 my($top, $level) = @_;
302 my %opsseen;
303 my @lines;
304 my @todo = ([$top, \@lines]);
305 while (@todo and my($op, $targ) = @{shift @todo}) {
306 for (; $$op; $op = $op->next) {
307 last if $opsseen{$$op}++;
308 push @$targ, $op;
309 my $name = $op->name;
62e36f8a 310 if (class($op) eq "LOGOP") {
c99ca59a 311 my $ar = [];
312 push @$targ, $ar;
313 push @todo, [$op->other, $ar];
314 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
315 my $ar = [];
316 push @$targ, $ar;
317 push @todo, [$op->pmreplstart, $ar];
318 } elsif ($name =~ /^enter(loop|iter)$/) {
2814eb74 319 $labels{${$op->nextop}} = "NEXT";
320 $labels{${$op->lastop}} = "LAST";
321 $labels{${$op->redoop}} = "REDO";
c99ca59a 322 }
323 }
324 }
325 walklines(\@lines, 0);
326}
327
c27ea44e 328# The structure of this routine is purposely modeled after op.c's peep()
329sub sequence {
330 my($op) = @_;
331 my $oldop = 0;
332 return if class($op) eq "NULL" or exists $sequence_num{$$op};
333 for (; $$op; $op = $op->next) {
334 last if exists $sequence_num{$$op};
335 my $name = $op->name;
336 if ($name =~ /^(null|scalar|lineseq|scope)$/) {
337 next if $oldop and $ {$op->next};
338 } else {
339 $sequence_num{$$op} = $seq_max++;
340 if (class($op) eq "LOGOP") {
341 my $other = $op->other;
342 $other = $other->next while $other->name eq "null";
343 sequence($other);
344 } elsif (class($op) eq "LOOP") {
345 my $redoop = $op->redoop;
346 $redoop = $redoop->next while $redoop->name eq "null";
347 sequence($redoop);
348 my $nextop = $op->nextop;
349 $nextop = $nextop->next while $nextop->name eq "null";
350 sequence($nextop);
351 my $lastop = $op->lastop;
352 $lastop = $lastop->next while $lastop->name eq "null";
353 sequence($lastop);
354 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
355 my $replstart = $op->pmreplstart;
356 $replstart = $replstart->next while $replstart->name eq "null";
357 sequence($replstart);
358 }
359 }
360 $oldop = $op;
361 }
362}
363
c99ca59a 364sub fmt_line {
365 my($hr, $fmt, $level) = @_;
366 my $text = $fmt;
367 $text =~ s/\(\?\(([^\#]*?)\#(\w+)([^\#]*?)\)\?\)/
368 $hr->{$2} ? $1.$hr->{$2}.$3 : ""/eg;
369 $text =~ s/\(x\((.*?);(.*?)\)x\)/$order eq "exec" ? $1 : $2/egs;
370 $text =~ s/\(\*\(([^;]*?)\)\*\)/$1 x $level/egs;
371 $text =~ s/\(\*\((.*?);(.*?)\)\*\)/$1 x ($level - 1) . $2 x ($level>0)/egs;
372 $text =~ s/#([a-zA-Z]+)(\d+)/sprintf("%-$2s", $hr->{$1})/eg;
373 $text =~ s/#([a-zA-Z]+)/$hr->{$1}/eg;
374 $text =~ s/[ \t]*~+[ \t]*/ /g;
375 return $text;
376}
377
378my %priv;
379$priv{$_}{128} = "LVINTRO"
380 for ("pos", "substr", "vec", "threadsv", "gvsv", "rv2sv", "rv2hv", "rv2gv",
381 "rv2av", "rv2arylen", "aelem", "helem", "aslice", "hslice", "padsv",
241416b8 382 "padav", "padhv", "enteriter");
c99ca59a 383$priv{$_}{64} = "REFC" for ("leave", "leavesub", "leavesublv", "leavewrite");
384$priv{"aassign"}{64} = "COMMON";
c99ca59a 385$priv{"sassign"}{64} = "BKWARD";
386$priv{$_}{64} = "RTIME" for ("match", "subst", "substcont");
387@{$priv{"trans"}}{1,2,4,8,16,64} = ("<UTF", ">UTF", "IDENT", "SQUASH", "DEL",
388 "COMPL", "GROWS");
389$priv{"repeat"}{64} = "DOLIST";
390$priv{"leaveloop"}{64} = "CONT";
391@{$priv{$_}}{32,64,96} = ("DREFAV", "DREFHV", "DREFSV")
392 for ("entersub", map("rv2${_}v", "a", "s", "h", "g"), "aelem", "helem");
393$priv{"entersub"}{16} = "DBG";
394$priv{"entersub"}{32} = "TARG";
395@{$priv{$_}}{4,8,128} = ("INARGS","AMPER","NO()") for ("entersub", "rv2cv");
396$priv{"gv"}{32} = "EARLYCV";
397$priv{"aelem"}{16} = $priv{"helem"}{16} = "LVDEFER";
241416b8 398$priv{$_}{16} = "OURINTR" for ("gvsv", "rv2sv", "rv2av", "rv2hv", "r2gv",
399 "enteriter");
c99ca59a 400$priv{$_}{16} = "TARGMY"
401 for (map(($_,"s$_"),"chop", "chomp"),
402 map(($_,"i_$_"), "postinc", "postdec", "multiply", "divide", "modulo",
403 "add", "subtract", "negate"), "pow", "concat", "stringify",
404 "left_shift", "right_shift", "bit_and", "bit_xor", "bit_or",
405 "complement", "atan2", "sin", "cos", "rand", "exp", "log", "sqrt",
406 "int", "hex", "oct", "abs", "length", "index", "rindex", "sprintf",
407 "ord", "chr", "crypt", "quotemeta", "join", "push", "unshift", "flock",
408 "chdir", "chown", "chroot", "unlink", "chmod", "utime", "rename",
409 "link", "symlink", "mkdir", "rmdir", "wait", "waitpid", "system",
410 "exec", "kill", "getppid", "getpgrp", "setpgrp", "getpriority",
411 "setpriority", "time", "sleep");
7a9b44b9 412@{$priv{"const"}}{8,16,32,64,128} = ("STRICT","ENTERED", '$[', "BARE", "WARN");
c99ca59a 413$priv{"flip"}{64} = $priv{"flop"}{64} = "LINENUM";
414$priv{"list"}{64} = "GUESSED";
415$priv{"delete"}{64} = "SLICE";
416$priv{"exists"}{64} = "SUB";
417$priv{$_}{64} = "LOCALE"
418 for ("sort", "prtf", "sprintf", "slt", "sle", "seq", "sne", "sgt", "sge",
419 "scmp", "lc", "uc", "lcfirst", "ucfirst");
2b6e98cb 420@{$priv{"sort"}}{1,2,4,8} = ("NUM", "INT", "REV", "INPLACE");
c99ca59a 421$priv{"threadsv"}{64} = "SVREFd";
c27ea44e 422@{$priv{$_}}{16,32,64,128} = ("INBIN","INCR","OUTBIN","OUTCR")
423 for ("open", "backtick");
c99ca59a 424$priv{"exit"}{128} = "VMS";
feaeca78 425$priv{$_}{2} = "FTACCESS"
426 for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec");
fbb0b3b3 427$priv{$_}{4} = "FTSTACKED"
428 for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec",
429 "ftis", "fteowned", "ftrowned", "ftzero", "ftsize", "ftmtime",
430 "ftatime", "ftctime", "ftsock", "ftchr", "ftblk", "ftfile", "ftdir",
431 "ftpipe", "ftlink", "ftsuid", "ftsgid", "ftsvtx", "fttty", "fttext",
432 "ftbinary");
bf2b43ff 433$priv{$_}{2} = "GREPLEX"
434 for ("mapwhile", "mapstart", "grepwhile", "grepstart");
c99ca59a 435
436sub private_flags {
437 my($name, $x) = @_;
438 my @s;
439 for my $flag (128, 96, 64, 32, 16, 8, 4, 2, 1) {
440 if ($priv{$name}{$flag} and $x & $flag and $x >= $flag) {
441 $x -= $flag;
442 push @s, $priv{$name}{$flag};
443 }
444 }
445 push @s, $x if $x;
446 return join(",", @s);
447}
448
c27ea44e 449sub concise_sv {
450 my($sv, $hr) = @_;
451 $hr->{svclass} = class($sv);
31b49ad4 452 $hr->{svclass} = "UV"
453 if $hr->{svclass} eq "IV" and $sv->FLAGS & SVf_IVisUV;
c27ea44e 454 $hr->{svaddr} = sprintf("%#x", $$sv);
455 if ($hr->{svclass} eq "GV") {
456 my $gv = $sv;
457 my $stash = $gv->STASH->NAME;
458 if ($stash eq "main") {
459 $stash = "";
460 } else {
461 $stash = $stash . "::";
462 }
463 $hr->{svval} = "*$stash" . $gv->SAFENAME;
464 return "*$stash" . $gv->SAFENAME;
465 } else {
466 while (class($sv) eq "RV") {
467 $hr->{svval} .= "\\";
468 $sv = $sv->RV;
469 }
470 if (class($sv) eq "SPECIAL") {
40b5b14f 471 $hr->{svval} .= ["Null", "sv_undef", "sv_yes", "sv_no"]->[$$sv];
c27ea44e 472 } elsif ($sv->FLAGS & SVf_NOK) {
40b5b14f 473 $hr->{svval} .= $sv->NV;
c27ea44e 474 } elsif ($sv->FLAGS & SVf_IOK) {
31b49ad4 475 $hr->{svval} .= $sv->int_value;
c27ea44e 476 } elsif ($sv->FLAGS & SVf_POK) {
40b5b14f 477 $hr->{svval} .= cstring($sv->PV);
31b49ad4 478 } elsif (class($sv) eq "HV") {
479 $hr->{svval} .= 'HASH';
c27ea44e 480 }
481 return $hr->{svclass} . " " . $hr->{svval};
482 }
483}
484
c99ca59a 485sub concise_op {
486 my ($op, $level, $format) = @_;
487 my %h;
488 $h{exname} = $h{name} = $op->name;
489 $h{NAME} = uc $h{name};
490 $h{class} = class($op);
491 $h{extarg} = $h{targ} = $op->targ;
492 $h{extarg} = "" unless $h{extarg};
493 if ($h{name} eq "null" and $h{targ}) {
8ec8fbef 494 # targ holds the old type
c99ca59a 495 $h{exname} = "ex-" . substr(ppname($h{targ}), 3);
496 $h{extarg} = "";
8ec8fbef 497 } elsif ($op->name =~ /^leave(sub(lv)?|write)?$/) {
498 # targ potentially holds a reference count
499 if ($op->private & 64) {
500 my $refs = "ref" . ($h{targ} != 1 ? "s" : "");
501 $h{targarglife} = $h{targarg} = "$h{targ} $refs";
502 }
c99ca59a 503 } elsif ($h{targ}) {
504 my $padname = (($curcv->PADLIST->ARRAY)[0]->ARRAY)[$h{targ}];
505 if (defined $padname and class($padname) ne "SPECIAL") {
0b40bd6d 506 $h{targarg} = $padname->PVX;
127212b2 507 if ($padname->FLAGS & SVf_FAKE) {
508 my $fake = '';
509 $fake .= 'a' if $padname->IVX & 1; # PAD_FAKELEX_ANON
510 $fake .= 'm' if $padname->IVX & 2; # PAD_FAKELEX_MULTI
511 $fake .= ':' . $padname->NVX if $curcv->CvFLAGS & CVf_ANON;
512 $h{targarglife} = "$h{targarg}:FAKE:$fake";
513 }
514 else {
515 my $intro = $padname->NVX - $cop_seq_base;
516 my $finish = int($padname->IVX) - $cop_seq_base;
517 $finish = "end" if $finish == 999999999 - $cop_seq_base;
518 $h{targarglife} = "$h{targarg}:$intro,$finish";
519 }
c99ca59a 520 } else {
521 $h{targarglife} = $h{targarg} = "t" . $h{targ};
522 }
523 }
524 $h{arg} = "";
525 $h{svclass} = $h{svaddr} = $h{svval} = "";
526 if ($h{class} eq "PMOP") {
527 my $precomp = $op->precomp;
7a9b44b9 528 if (defined $precomp) {
c27ea44e 529 $precomp = cstring($precomp); # Escape literal control sequences
530 $precomp = "/$precomp/";
531 } else {
532 $precomp = "";
7a9b44b9 533 }
b2a3cfdd 534 my $pmreplroot = $op->pmreplroot;
34a48b4b 535 my $pmreplstart;
c6e79e55 536 if (ref($pmreplroot) eq "B::GV") {
b2a3cfdd 537 # with C<@stash_array = split(/pat/, str);>,
c6e79e55 538 # *stash_array is stored in /pat/'s pmreplroot.
b2a3cfdd 539 $h{arg} = "($precomp => \@" . $pmreplroot->NAME . ")";
c6e79e55 540 } elsif (!ref($pmreplroot) and $pmreplroot) {
541 # same as the last case, except the value is actually a
542 # pad offset for where the GV is kept (this happens under
543 # ithreads)
544 my $gv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$pmreplroot];
545 $h{arg} = "($precomp => \@" . $gv->NAME . ")";
b2a3cfdd 546 } elsif ($ {$op->pmreplstart}) {
c99ca59a 547 undef $lastnext;
548 $pmreplstart = "replstart->" . seq($op->pmreplstart);
549 $h{arg} = "(" . join(" ", $precomp, $pmreplstart) . ")";
550 } else {
551 $h{arg} = "($precomp)";
552 }
553 } elsif ($h{class} eq "PVOP" and $h{name} ne "trans") {
554 $h{arg} = '("' . $op->pv . '")';
555 $h{svval} = '"' . $op->pv . '"';
556 } elsif ($h{class} eq "COP") {
557 my $label = $op->label;
c3caa09d 558 $h{coplabel} = $label;
c99ca59a 559 $label = $label ? "$label: " : "";
560 my $loc = $op->file;
561 $loc =~ s[.*/][];
562 $loc .= ":" . $op->line;
563 my($stash, $cseq) = ($op->stash->NAME, $op->cop_seq - $cop_seq_base);
564 my $arybase = $op->arybase;
565 $arybase = $arybase ? ' $[=' . $arybase : "";
566 $h{arg} = "($label$stash $cseq $loc$arybase)";
567 } elsif ($h{class} eq "LOOP") {
568 $h{arg} = "(next->" . seq($op->nextop) . " last->" . seq($op->lastop)
569 . " redo->" . seq($op->redoop) . ")";
570 } elsif ($h{class} eq "LOGOP") {
571 undef $lastnext;
572 $h{arg} = "(other->" . seq($op->other) . ")";
573 } elsif ($h{class} eq "SVOP") {
6a077020 574 unless ($h{name} eq 'aelemfast' and $op->flags & OPf_SPECIAL) {
575 if (! ${$op->sv}) {
576 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->targ];
577 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
578 $h{targarglife} = $h{targarg} = "";
579 } else {
580 $h{arg} = "(" . concise_sv($op->sv, \%h) . ")";
581 }
c99ca59a 582 }
31b49ad4 583 } elsif ($h{class} eq "PADOP") {
584 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->padix];
585 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
c99ca59a 586 }
587 $h{seq} = $h{hyphseq} = seq($op);
588 $h{seq} = "" if $h{seq} eq "-";
2814eb74 589 $h{opt} = $op->opt;
590 $h{static} = $op->static;
c99ca59a 591 $h{next} = $op->next;
592 $h{next} = (class($h{next}) eq "NULL") ? "(end)" : seq($h{next});
593 $h{nextaddr} = sprintf("%#x", $ {$op->next});
594 $h{sibaddr} = sprintf("%#x", $ {$op->sibling});
595 $h{firstaddr} = sprintf("%#x", $ {$op->first}) if $op->can("first");
596 $h{lastaddr} = sprintf("%#x", $ {$op->last}) if $op->can("last");
597
598 $h{classsym} = $opclass{$h{class}};
599 $h{flagval} = $op->flags;
600 $h{flags} = op_flags($op->flags);
601 $h{privval} = $op->private;
602 $h{private} = private_flags($h{name}, $op->private);
603 $h{addr} = sprintf("%#x", $$op);
2814eb74 604 $h{label} = $labels{$$op};
c99ca59a 605 $h{typenum} = $op->type;
606 $h{noise} = $linenoise[$op->type];
78ad9108 607 $_->(\%h, $op, \$format, \$level) for @callbacks;
c99ca59a 608 return fmt_line(\%h, $format, $level);
609}
610
611sub B::OP::concise {
612 my($op, $level) = @_;
613 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
614 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
615 "addr" => sprintf("%#x", $$lastnext)};
616 print fmt_line($h, $gotofmt, $level+1);
617 }
618 $lastnext = $op->next;
619 print concise_op($op, $level, $format);
620}
621
31b49ad4 622# B::OP::terse (see Terse.pm) now just calls this
623sub b_terse {
624 my($op, $level) = @_;
625
626 # This isn't necessarily right, but there's no easy way to get
627 # from an OP to the right CV. This is a limitation of the
628 # ->terse() interface style, and there isn't much to do about
629 # it. In particular, we can die in concise_op if the main pad
630 # isn't long enough, or has the wrong kind of entries, compared to
631 # the pad a sub was compiled with. The fix for that would be to
632 # make a backwards compatible "terse" format that never even
633 # looked at the pad, just like the old B::Terse. I don't think
634 # that's worth the effort, though.
635 $curcv = main_cv unless $curcv;
636
637 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
638 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
639 "addr" => sprintf("%#x", $$lastnext)};
640 print fmt_line($h, $style{"terse"}[1], $level+1);
641 }
642 $lastnext = $op->next;
643 print concise_op($op, $level, $style{"terse"}[0]);
644}
645
c99ca59a 646sub tree {
647 my $op = shift;
648 my $level = shift;
649 my $style = $tree_decorations[$tree_style];
650 my($space, $single, $kids, $kid, $nokid, $last, $lead, $size) = @$style;
651 my $name = concise_op($op, $level, $treefmt);
652 if (not $op->flags & OPf_KIDS) {
653 return $name . "\n";
654 }
655 my @lines;
656 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
657 push @lines, tree($kid, $level+1);
658 }
659 my $i;
660 for ($i = $#lines; substr($lines[$i], 0, 1) eq " "; $i--) {
661 $lines[$i] = $space . $lines[$i];
662 }
663 if ($i > 0) {
664 $lines[$i] = $last . $lines[$i];
665 while ($i-- > 1) {
666 if (substr($lines[$i], 0, 1) eq " ") {
667 $lines[$i] = $nokid . $lines[$i];
668 } else {
669 $lines[$i] = $kid . $lines[$i];
670 }
671 }
672 $lines[$i] = $kids . $lines[$i];
673 } else {
674 $lines[0] = $single . $lines[0];
675 }
676 return("$name$lead" . shift @lines,
677 map(" " x (length($name)+$size) . $_, @lines));
678}
679
213a1a26 680# *** Warning: fragile kludge ahead ***
681# Because the B::* modules run in the same interpreter as the code
2814eb74 682# they're compiling, their presence tends to distort the view we have of
683# the code we're looking at. In particular, perl gives sequence numbers
684# to COPs. If the program we're looking at were run on its own, this
685# would start at 1. Because all of B::Concise and all the modules it
686# uses are compiled first, though, by the time we get to the user's
687# program the sequence number is already pretty high, which could be
688# distracting if you're trying to tell OPs apart. Therefore we'd like to
689# subtract an offset from all the sequence numbers we display, to
690# restore the simpler view of the world. The trick is to know what that
691# offset will be, when we're still compiling B::Concise! If we
213a1a26 692# hardcoded a value, it would have to change every time B::Concise or
2814eb74 693# other modules we use do. To help a little, what we do here is compile
694# a little code at the end of the module, and compute the base sequence
695# number for the user's program as being a small offset later, so all we
696# have to worry about are changes in the offset.
213a1a26 697
698# When you say "perl -MO=Concise -e '$a'", the output should look like:
699
700# 4 <@> leave[t1] vKP/REFC ->(end)
701# 1 <0> enter ->2
702 #^ smallest OP sequence number should be 1
703# 2 <;> nextstate(main 1 -e:1) v ->3
704 # ^ smallest COP sequence number should be 1
705# - <1> ex-rv2sv vK/1 ->4
706# 3 <$> gvsv(*a) s ->4
707
c27ea44e 708# If the second of the marked numbers there isn't 1, it means you need
709# to update the corresponding magic number in the next line.
710# Remember, this needs to stay the last things in the module.
e69a2255 711
c27ea44e 712# Why is this different for MacOS? Does it matter?
8ec8fbef 713my $cop_seq_mnum = $^O eq 'MacOS' ? 12 : 11;
e69a2255 714$cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum;
c99ca59a 715
7161;
717
718__END__
719
720=head1 NAME
721
722B::Concise - Walk Perl syntax tree, printing concise info about ops
723
724=head1 SYNOPSIS
725
726 perl -MO=Concise[,OPTIONS] foo.pl
727
78ad9108 728 use B::Concise qw(set_style add_callback);
729
c99ca59a 730=head1 DESCRIPTION
731
732This compiler backend prints the internal OPs of a Perl program's syntax
733tree in one of several space-efficient text formats suitable for debugging
734the inner workings of perl or other compiler backends. It can print OPs in
735the order they appear in the OP tree, in the order they will execute, or
736in a text approximation to their tree structure, and the format of the
737information displyed is customizable. Its function is similar to that of
738perl's B<-Dx> debugging flag or the B<B::Terse> module, but it is more
739sophisticated and flexible.
740
f8a679e6 741=head1 EXAMPLE
742
743Here's is a short example of output, using the default formatting
744conventions :
745
746 % perl -MO=Concise -e '$a = $b + 42'
8ec8fbef 747 8 <@> leave[1 ref] vKP/REFC ->(end)
f8a679e6 748 1 <0> enter ->2
749 2 <;> nextstate(main 1 -e:1) v ->3
750 7 <2> sassign vKS/2 ->8
751 5 <2> add[t1] sK/2 ->6
752 - <1> ex-rv2sv sK/1 ->4
753 3 <$> gvsv(*b) s ->4
754 4 <$> const(IV 42) s ->5
755 - <1> ex-rv2sv sKRM*/1 ->7
756 6 <$> gvsv(*a) s ->7
757
758Each line corresponds to an operator. Null ops appear as C<ex-opname>,
759where I<opname> is the op that has been optimized away by perl.
760
761The number on the first row indicates the op's sequence number. It's
762given in base 36 by default.
763
764The symbol between angle brackets indicates the op's type : for example,
765<2> is a BINOP, <@> a LISTOP, etc. (see L</"OP class abbreviations">).
766
767The opname may be followed by op-specific information in parentheses
768(e.g. C<gvsv(*b)>), and by targ information in brackets (e.g.
769C<leave[t1]>).
770
771Next come the op flags. The common flags are listed below
772(L</"OP flags abbreviations">). The private flags follow, separated
773by a slash. For example, C<vKP/REFC> means that the leave op has
774public flags OPf_WANT_VOID, OPf_KIDS, and OPf_PARENS, and the private
775flag OPpREFCOUNTED.
776
777Finally an arrow points to the sequence number of the next op.
778
c99ca59a 779=head1 OPTIONS
780
781Arguments that don't start with a hyphen are taken to be the names of
8ec8fbef 782subroutines to print the OPs of; if no such functions are specified,
783the main body of the program (outside any subroutines, and not
784including use'd or require'd files) is printed. Passing C<BEGIN>,
785C<CHECK>, C<INIT>, or C<END> will cause all of the corresponding
786special blocks to be printed.
c99ca59a 787
788=over 4
789
790=item B<-basic>
791
792Print OPs in the order they appear in the OP tree (a preorder
793traversal, starting at the root). The indentation of each OP shows its
794level in the tree. This mode is the default, so the flag is included
795simply for completeness.
796
797=item B<-exec>
798
799Print OPs in the order they would normally execute (for the majority
800of constructs this is a postorder traversal of the tree, ending at the
801root). In most cases the OP that usually follows a given OP will
802appear directly below it; alternate paths are shown by indentation. In
803cases like loops when control jumps out of a linear path, a 'goto'
804line is generated.
805
806=item B<-tree>
807
808Print OPs in a text approximation of a tree, with the root of the tree
809at the left and 'left-to-right' order of children transformed into
810'top-to-bottom'. Because this mode grows both to the right and down,
811it isn't suitable for large programs (unless you have a very wide
812terminal).
813
814=item B<-compact>
815
816Use a tree format in which the minimum amount of space is used for the
817lines connecting nodes (one character in most cases). This squeezes out
818a few precious columns of screen real estate.
819
820=item B<-loose>
821
822Use a tree format that uses longer edges to separate OP nodes. This format
823tends to look better than the compact one, especially in ASCII, and is
824the default.
825
826=item B<-vt>
827
828Use tree connecting characters drawn from the VT100 line-drawing set.
829This looks better if your terminal supports it.
830
831=item B<-ascii>
832
833Draw the tree with standard ASCII characters like C<+> and C<|>. These don't
834look as clean as the VT100 characters, but they'll work with almost any
835terminal (or the horizontal scrolling mode of less(1)) and are suitable
836for text documentation or email. This is the default.
837
838=item B<-main>
839
840Include the main program in the output, even if subroutines were also
841specified.
842
843=item B<-base>I<n>
844
845Print OP sequence numbers in base I<n>. If I<n> is greater than 10, the
846digit for 11 will be 'a', and so on. If I<n> is greater than 36, the digit
847for 37 will be 'A', and so on until 62. Values greater than 62 are not
848currently supported. The default is 36.
849
850=item B<-bigendian>
851
852Print sequence numbers with the most significant digit first. This is the
853usual convention for Arabic numerals, and the default.
854
855=item B<-littleendian>
856
857Print seqence numbers with the least significant digit first.
858
859=item B<-concise>
860
861Use the author's favorite set of formatting conventions. This is the
862default, of course.
863
864=item B<-terse>
865
a6d05634 866Use formatting conventions that emulate the output of B<B::Terse>. The
c99ca59a 867basic mode is almost indistinguishable from the real B<B::Terse>, and the
868exec mode looks very similar, but is in a more logical order and lacks
869curly brackets. B<B::Terse> doesn't have a tree mode, so the tree mode
870is only vaguely reminiscient of B<B::Terse>.
871
872=item B<-linenoise>
873
874Use formatting conventions in which the name of each OP, rather than being
875written out in full, is represented by a one- or two-character abbreviation.
876This is mainly a joke.
877
878=item B<-debug>
879
880Use formatting conventions reminiscient of B<B::Debug>; these aren't
881very concise at all.
882
883=item B<-env>
884
885Use formatting conventions read from the environment variables
886C<B_CONCISE_FORMAT>, C<B_CONCISE_GOTO_FORMAT>, and C<B_CONCISE_TREE_FORMAT>.
887
888=back
889
890=head1 FORMATTING SPECIFICATIONS
891
892For each general style ('concise', 'terse', 'linenoise', etc.) there are
893three specifications: one of how OPs should appear in the basic or exec
894modes, one of how 'goto' lines should appear (these occur in the exec
895mode only), and one of how nodes should appear in tree mode. Each has the
896same format, described below. Any text that doesn't match a special
897pattern is copied verbatim.
898
899=over 4
900
901=item B<(x(>I<exec_text>B<;>I<basic_text>B<)x)>
902
903Generates I<exec_text> in exec mode, or I<basic_text> in basic mode.
904
905=item B<(*(>I<text>B<)*)>
906
907Generates one copy of I<text> for each indentation level.
908
909=item B<(*(>I<text1>B<;>I<text2>B<)*)>
910
911Generates one fewer copies of I<text1> than the indentation level, followed
912by one copy of I<text2> if the indentation level is more than 0.
913
914=item B<(?(>I<text1>B<#>I<var>I<Text2>B<)?)>
915
916If the value of I<var> is true (not empty or zero), generates the
917value of I<var> surrounded by I<text1> and I<Text2>, otherwise
918nothing.
919
920=item B<#>I<var>
921
922Generates the value of the variable I<var>.
923
924=item B<#>I<var>I<N>
925
926Generates the value of I<var>, left jutified to fill I<N> spaces.
927
928=item B<~>
929
930Any number of tildes and surrounding whitespace will be collapsed to
931a single space.
932
933=back
934
935The following variables are recognized:
936
937=over 4
938
939=item B<#addr>
940
941The address of the OP, in hexidecimal.
942
943=item B<#arg>
944
945The OP-specific information of the OP (such as the SV for an SVOP, the
946non-local exit pointers for a LOOP, etc.) enclosed in paretheses.
947
948=item B<#class>
949
950The B-determined class of the OP, in all caps.
951
f8a679e6 952=item B<#classsym>
c99ca59a 953
954A single symbol abbreviating the class of the OP.
955
c3caa09d 956=item B<#coplabel>
957
958The label of the statement or block the OP is the start of, if any.
959
c99ca59a 960=item B<#exname>
961
962The name of the OP, or 'ex-foo' if the OP is a null that used to be a foo.
963
964=item B<#extarg>
965
966The target of the OP, or nothing for a nulled OP.
967
968=item B<#firstaddr>
969
970The address of the OP's first child, in hexidecimal.
971
972=item B<#flags>
973
974The OP's flags, abbreviated as a series of symbols.
975
976=item B<#flagval>
977
978The numeric value of the OP's flags.
979
f8a679e6 980=item B<#hyphseq>
c99ca59a 981
982The sequence number of the OP, or a hyphen if it doesn't have one.
983
984=item B<#label>
985
986'NEXT', 'LAST', or 'REDO' if the OP is a target of one of those in exec
987mode, or empty otherwise.
988
989=item B<#lastaddr>
990
991The address of the OP's last child, in hexidecimal.
992
993=item B<#name>
994
995The OP's name.
996
997=item B<#NAME>
998
999The OP's name, in all caps.
1000
1001=item B<#next>
1002
1003The sequence number of the OP's next OP.
1004
1005=item B<#nextaddr>
1006
1007The address of the OP's next OP, in hexidecimal.
1008
1009=item B<#noise>
1010
c27ea44e 1011A one- or two-character abbreviation for the OP's name.
c99ca59a 1012
1013=item B<#private>
1014
1015The OP's private flags, rendered with abbreviated names if possible.
1016
1017=item B<#privval>
1018
1019The numeric value of the OP's private flags.
1020
1021=item B<#seq>
1022
2814eb74 1023The sequence number of the OP. Note that this is a sequence number
1024generated by B::Concise.
c99ca59a 1025
2814eb74 1026=item B<#opt>
c99ca59a 1027
2814eb74 1028Whether or not the op has been optimised by the peephole optimiser.
1029
1030=item B<#static>
1031
1032Whether or not the op is statically defined. This flag is used by the
1033B::C compiler backend and indicates that the op should not be freed.
c99ca59a 1034
1035=item B<#sibaddr>
1036
1037The address of the OP's next youngest sibling, in hexidecimal.
1038
1039=item B<#svaddr>
1040
1041The address of the OP's SV, if it has an SV, in hexidecimal.
1042
1043=item B<#svclass>
1044
1045The class of the OP's SV, if it has one, in all caps (e.g., 'IV').
1046
1047=item B<#svval>
1048
1049The value of the OP's SV, if it has one, in a short human-readable format.
1050
1051=item B<#targ>
1052
1053The numeric value of the OP's targ.
1054
1055=item B<#targarg>
1056
1057The name of the variable the OP's targ refers to, if any, otherwise the
1058letter t followed by the OP's targ in decimal.
1059
1060=item B<#targarglife>
1061
1062Same as B<#targarg>, but followed by the COP sequence numbers that delimit
1063the variable's lifetime (or 'end' for a variable in an open scope) for a
1064variable.
1065
1066=item B<#typenum>
1067
1068The numeric value of the OP's type, in decimal.
1069
1070=back
1071
1072=head1 ABBREVIATIONS
1073
1074=head2 OP flags abbreviations
1075
1076 v OPf_WANT_VOID Want nothing (void context)
1077 s OPf_WANT_SCALAR Want single value (scalar context)
1078 l OPf_WANT_LIST Want list of any length (list context)
1079 K OPf_KIDS There is a firstborn child.
1080 P OPf_PARENS This operator was parenthesized.
1081 (Or block needs explicit scope entry.)
1082 R OPf_REF Certified reference.
1083 (Return container, not containee).
1084 M OPf_MOD Will modify (lvalue).
1085 S OPf_STACKED Some arg is arriving on the stack.
1086 * OPf_SPECIAL Do something weird for this op (see op.h)
1087
1088=head2 OP class abbreviations
1089
1090 0 OP (aka BASEOP) An OP with no children
1091 1 UNOP An OP with one child
1092 2 BINOP An OP with two children
1093 | LOGOP A control branch OP
1094 @ LISTOP An OP that could have lots of children
1095 / PMOP An OP with a regular expression
1096 $ SVOP An OP with an SV
1097 " PVOP An OP with a string
1098 { LOOP An OP that holds pointers for a loop
1099 ; COP An OP that marks the start of a statement
051f02e9 1100 # PADOP An OP with a GV on the pad
c99ca59a 1101
78ad9108 1102=head1 Using B::Concise outside of the O framework
1103
1104It is possible to extend B<B::Concise> by using it outside of the B<O>
1105framework and providing new styles and new variables.
1106
1107 use B::Concise qw(set_style add_callback);
1108 set_style($format, $gotofmt, $treefmt);
1109 add_callback
1110 (
1111 sub
1112 {
1113 my ($h, $op, $level, $format) = @_;
1114 $h->{variable} = some_func($op);
1115 }
1116 );
1117 B::Concise::compile(@options)->();
1118
1119You can specify a style by calling the B<set_style> subroutine. If you
1120have a new variable in your style, or you want to change the value of an
1121existing variable, you will need to add a callback to specify the value
1122for that variable.
1123
1124This is done by calling B<add_callback> passing references to any
1125callback subroutines. The subroutines are called in the same order as
1126they are added. Each subroutine is passed four parameters. These are a
1127reference to a hash, the keys of which are the names of the variables
1128and the values of which are their values, the op, the level and the
1129format.
1130
1131To define your own variables, simply add them to the hash, or change
1132existing values if you need to. The level and format are passed in as
1133references to scalars, but it is unlikely that they will need to be
1134changed or even used.
1135
31b49ad4 1136To switch back to one of the standard styles like C<concise> or
1137C<terse>, use C<set_style_standard>.
1138
78ad9108 1139To see the output, call the subroutine returned by B<compile> in the
1140same way that B<O> does.
1141
c99ca59a 1142=head1 AUTHOR
1143
31b49ad4 1144Stephen McCamant, E<lt>smcc@CSUA.Berkeley.EDUE<gt>.
c99ca59a 1145
1146=cut