Re: [perl #26073] sprintf miscounts padding when format is utf8
[p5sagit/p5-mst-13.2.git] / ext / B / B / Concise.pm
CommitLineData
c99ca59a 1package B::Concise;
c27ea44e 2# Copyright (C) 2000-2003 Stephen McCamant. All rights reserved.
c99ca59a 3# This program is free software; you can redistribute and/or modify it
4# under the same terms as Perl itself.
5
8ec8fbef 6# Note: we need to keep track of how many use declarations/BEGIN
7# blocks this module uses, so we can avoid printing them when user
8# asks for the BEGIN blocks in her program. Update the comments and
9# the count in concise_specials if you add or delete one. The
10# -MO=Concise counts as use #1.
78ad9108 11
8ec8fbef 12use strict; # use #2
13use warnings; # uses #3 and #4, since warnings uses Carp
78ad9108 14
8ec8fbef 15use Exporter (); # use #5
16
bf2b43ff 17our $VERSION = "0.58";
78ad9108 18our @ISA = qw(Exporter);
31b49ad4 19our @EXPORT_OK = qw(set_style set_style_standard add_callback
8ec8fbef 20 concise_subref concise_cv concise_main);
78ad9108 21
8ec8fbef 22# use #6
c99ca59a 23use B qw(class ppname main_start main_root main_cv cstring svref_2object
127212b2 24 SVf_IOK SVf_NOK SVf_POK SVf_IVisUV SVf_FAKE OPf_KIDS CVf_ANON);
c99ca59a 25
26my %style =
27 ("terse" =>
c3caa09d 28 ["(?(#label =>\n)?)(*( )*)#class (#addr) #name (?([#targ])?) "
29 . "#svclass~(?((#svaddr))?)~#svval~(?(label \"#coplabel\")?)\n",
c99ca59a 30 "(*( )*)goto #class (#addr)\n",
31 "#class pp_#name"],
32 "concise" =>
33 ["#hyphseq2 (*( (x( ;)x))*)<#classsym> "
34 . "#exname#arg(?([#targarglife])?)~#flags(?(/#private)?)(x(;~->#next)x)\n",
35 " (*( )*) goto #seq\n",
36 "(?(<#seq>)?)#exname#arg(?([#targarglife])?)"],
37 "linenoise" =>
38 ["(x(;(*( )*))x)#noise#arg(?([#targarg])?)(x( ;\n)x)",
39 "gt_#seq ",
40 "(?(#seq)?)#noise#arg(?([#targarg])?)"],
41 "debug" =>
42 ["#class (#addr)\n\top_next\t\t#nextaddr\n\top_sibling\t#sibaddr\n\t"
43 . "op_ppaddr\tPL_ppaddr[OP_#NAME]\n\top_type\t\t#typenum\n\top_seq\t\t"
44 . "#seqnum\n\top_flags\t#flagval\n\top_private\t#privval\n"
45 . "(?(\top_first\t#firstaddr\n)?)(?(\top_last\t\t#lastaddr\n)?)"
46 . "(?(\top_sv\t\t#svaddr\n)?)",
47 " GOTO #addr\n",
48 "#addr"],
49 "env" => [$ENV{B_CONCISE_FORMAT}, $ENV{B_CONCISE_GOTO_FORMAT},
50 $ENV{B_CONCISE_TREE_FORMAT}],
51 );
52
53my($format, $gotofmt, $treefmt);
54my $curcv;
c27ea44e 55my $cop_seq_base;
78ad9108 56my @callbacks;
57
58sub set_style {
59 ($format, $gotofmt, $treefmt) = @_;
60}
61
31b49ad4 62sub set_style_standard {
63 my($name) = @_;
64 set_style(@{$style{$name}});
65}
66
78ad9108 67sub add_callback {
68 push @callbacks, @_;
69}
c99ca59a 70
8ec8fbef 71sub concise_subref {
72 my($order, $subref) = @_;
73 concise_cv_obj($order, svref_2object($subref));
74}
75
76# This should have been called concise_subref, but it was exported
77# under this name in versions before 0.56
78sub concise_cv { concise_subref(@_); }
79
80sub concise_cv_obj {
81 my ($order, $cv) = @_;
c99ca59a 82 $curcv = $cv;
c27ea44e 83 sequence($cv->START);
c99ca59a 84 if ($order eq "exec") {
85 walk_exec($cv->START);
86 } elsif ($order eq "basic") {
87 walk_topdown($cv->ROOT, sub { $_[0]->concise($_[1]) }, 0);
88 } else {
89 print tree($cv->ROOT, 0)
90 }
91}
92
31b49ad4 93sub concise_main {
94 my($order) = @_;
95 sequence(main_start);
96 $curcv = main_cv;
97 if ($order eq "exec") {
98 return if class(main_start) eq "NULL";
99 walk_exec(main_start);
100 } elsif ($order eq "tree") {
101 return if class(main_root) eq "NULL";
102 print tree(main_root, 0);
103 } elsif ($order eq "basic") {
104 return if class(main_root) eq "NULL";
105 walk_topdown(main_root,
106 sub { $_[0]->concise($_[1]) }, 0);
107 }
108}
109
8ec8fbef 110sub concise_specials {
111 my($name, $order, @cv_s) = @_;
112 my $i = 1;
113 if ($name eq "BEGIN") {
114 splice(@cv_s, 0, 7); # skip 7 BEGIN blocks in this file
115 } elsif ($name eq "CHECK") {
116 pop @cv_s; # skip the CHECK block that calls us
117 }
118 for my $cv (@cv_s) {
119 print "$name $i:\n";
120 $i++;
121 concise_cv_obj($order, $cv);
122 }
123}
124
c99ca59a 125my $start_sym = "\e(0"; # "\cN" sometimes also works
126my $end_sym = "\e(B"; # "\cO" respectively
127
128my @tree_decorations =
129 ([" ", "--", "+-", "|-", "| ", "`-", "-", 1],
130 [" ", "-", "+", "+", "|", "`", "", 0],
131 [" ", map("$start_sym$_$end_sym", "qq", "wq", "tq", "x ", "mq", "q"), 1],
132 [" ", map("$start_sym$_$end_sym", "q", "w", "t", "x", "m"), "", 0],
133 );
134my $tree_style = 0;
135
136my $base = 36;
137my $big_endian = 1;
138
139my $order = "basic";
140
31b49ad4 141set_style_standard("concise");
78ad9108 142
c99ca59a 143sub compile {
144 my @options = grep(/^-/, @_);
145 my @args = grep(!/^-/, @_);
146 my $do_main = 0;
c99ca59a 147 for my $o (@options) {
148 if ($o eq "-basic") {
149 $order = "basic";
150 } elsif ($o eq "-exec") {
151 $order = "exec";
152 } elsif ($o eq "-tree") {
153 $order = "tree";
154 } elsif ($o eq "-compact") {
155 $tree_style |= 1;
156 } elsif ($o eq "-loose") {
157 $tree_style &= ~1;
158 } elsif ($o eq "-vt") {
159 $tree_style |= 2;
160 } elsif ($o eq "-ascii") {
161 $tree_style &= ~2;
162 } elsif ($o eq "-main") {
163 $do_main = 1;
164 } elsif ($o =~ /^-base(\d+)$/) {
165 $base = $1;
166 } elsif ($o eq "-bigendian") {
167 $big_endian = 1;
168 } elsif ($o eq "-littleendian") {
169 $big_endian = 0;
170 } elsif (exists $style{substr($o, 1)}) {
78ad9108 171 set_style(@{$style{substr($o, 1)}});
c99ca59a 172 } else {
173 warn "Option $o unrecognized";
174 }
175 }
c27ea44e 176 return sub {
177 if (@args) {
c99ca59a 178 for my $objname (@args) {
8ec8fbef 179 if ($objname eq "BEGIN") {
180 concise_specials("BEGIN", $order,
181 B::begin_av->isa("B::AV") ?
182 B::begin_av->ARRAY : ());
183 } elsif ($objname eq "INIT") {
184 concise_specials("INIT", $order,
185 B::init_av->isa("B::AV") ?
186 B::init_av->ARRAY : ());
187 } elsif ($objname eq "CHECK") {
188 concise_specials("CHECK", $order,
189 B::check_av->isa("B::AV") ?
190 B::check_av->ARRAY : ());
191 } elsif ($objname eq "END") {
192 concise_specials("END", $order,
193 B::end_av->isa("B::AV") ?
194 B::end_av->ARRAY : ());
195 } else {
196 $objname = "main::" . $objname unless $objname =~ /::/;
197 print "$objname:\n";
198 eval "concise_subref(\$order, \\&$objname)";
199 die "concise_subref($order, \\&$objname) failed: $@" if $@;
200 }
c99ca59a 201 }
202 }
c27ea44e 203 if (!@args or $do_main) {
204 print "main program:\n" if $do_main;
31b49ad4 205 concise_main($order);
c99ca59a 206 }
207 }
208}
209
210my %labels;
211my $lastnext;
212
213my %opclass = ('OP' => "0", 'UNOP' => "1", 'BINOP' => "2", 'LOGOP' => "|",
214 'LISTOP' => "@", 'PMOP' => "/", 'SVOP' => "\$", 'GVOP' => "*",
051f02e9 215 'PVOP' => '"', 'LOOP' => "{", 'COP' => ";", 'PADOP' => "#");
c99ca59a 216
8ec8fbef 217no warnings 'qw'; # "Possible attempt to put comments..."; use #7
35fc55f1 218my @linenoise =
219 qw'# () sc ( @? 1 $* gv *{ m$ m@ m% m? p/ *$ $ $# & a& pt \\ s\\ rf bl
c99ca59a 220 ` *? <> ?? ?/ r/ c/ // qr s/ /c y/ = @= C sC Cp sp df un BM po +1 +I
221 -1 -I 1+ I+ 1- I- ** * i* / i/ %$ i% x + i+ - i- . " << >> < i<
222 > i> <= i, >= i. == i= != i! <? i? s< s> s, s. s= s! s? b& b^ b| -0 -i
223 ! ~ a2 si cs rd sr e^ lg sq in %x %o ab le ss ve ix ri sf FL od ch cy
224 uf lf uc lc qm @ [f [ @[ eh vl ky dl ex % ${ @{ uk pk st jn ) )[ a@
225 a% sl +] -] [- [+ so rv GS GW MS MW .. f. .f && || ^^ ?: &= |= -> s{ s}
226 v} ca wa di rs ;; ; ;d }{ { } {} f{ it {l l} rt }l }n }r dm }g }e ^o
227 ^c ^| ^# um bm t~ u~ ~d DB db ^s se ^g ^r {w }w pf pr ^O ^K ^R ^W ^d ^v
228 ^e ^t ^k t. fc ic fl .s .p .b .c .l .a .h g1 s1 g2 s2 ?. l? -R -W -X -r
229 -w -x -e -o -O -z -s -M -A -C -S -c -b -f -d -p -l -u -g -k -t -T -B cd
230 co cr u. cm ut r. l@ s@ r@ mD uD oD rD tD sD wD cD f$ w$ p$ sh e$ k$ g3
231 g4 s4 g5 s5 T@ C@ L@ G@ A@ S@ Hg Hc Hr Hw Mg Mc Ms Mr Sg Sc So rq do {e
232 e} {t t} g6 G6 6e g7 G7 7e g8 G8 8e g9 G9 9e 6s 7s 8s 9s 6E 7E 8E 9E Pn
c27ea44e 233 Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n> // /= CO';
c99ca59a 234
235my $chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
236
237sub op_flags {
238 my($x) = @_;
239 my(@v);
240 push @v, "v" if ($x & 3) == 1;
241 push @v, "s" if ($x & 3) == 2;
242 push @v, "l" if ($x & 3) == 3;
243 push @v, "K" if $x & 4;
244 push @v, "P" if $x & 8;
245 push @v, "R" if $x & 16;
246 push @v, "M" if $x & 32;
247 push @v, "S" if $x & 64;
248 push @v, "*" if $x & 128;
249 return join("", @v);
250}
251
252sub base_n {
253 my $x = shift;
254 return "-" . base_n(-$x) if $x < 0;
255 my $str = "";
256 do { $str .= substr($chars, $x % $base, 1) } while $x = int($x / $base);
257 $str = reverse $str if $big_endian;
258 return $str;
259}
260
c27ea44e 261my %sequence_num;
262my $seq_max = 1;
263
264sub seq {
265 my($op) = @_;
266 return "-" if not exists $sequence_num{$$op};
267 return base_n($sequence_num{$$op});
268}
c99ca59a 269
270sub walk_topdown {
271 my($op, $sub, $level) = @_;
272 $sub->($op, $level);
273 if ($op->flags & OPf_KIDS) {
274 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
275 walk_topdown($kid, $sub, $level + 1);
276 }
277 }
c6e79e55 278 if (class($op) eq "PMOP") {
279 my $maybe_root = $op->pmreplroot;
280 if (ref($maybe_root) and $maybe_root->isa("B::OP")) {
281 # It really is the root of the replacement, not something
282 # else stored here for lack of space elsewhere
283 walk_topdown($maybe_root, $sub, $level + 1);
284 }
c99ca59a 285 }
286}
287
288sub walklines {
289 my($ar, $level) = @_;
290 for my $l (@$ar) {
291 if (ref($l) eq "ARRAY") {
292 walklines($l, $level + 1);
293 } else {
294 $l->concise($level);
295 }
296 }
297}
298
299sub walk_exec {
300 my($top, $level) = @_;
301 my %opsseen;
302 my @lines;
303 my @todo = ([$top, \@lines]);
304 while (@todo and my($op, $targ) = @{shift @todo}) {
305 for (; $$op; $op = $op->next) {
306 last if $opsseen{$$op}++;
307 push @$targ, $op;
308 my $name = $op->name;
62e36f8a 309 if (class($op) eq "LOGOP") {
c99ca59a 310 my $ar = [];
311 push @$targ, $ar;
312 push @todo, [$op->other, $ar];
313 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
314 my $ar = [];
315 push @$targ, $ar;
316 push @todo, [$op->pmreplstart, $ar];
317 } elsif ($name =~ /^enter(loop|iter)$/) {
318 $labels{$op->nextop->seq} = "NEXT";
319 $labels{$op->lastop->seq} = "LAST";
320 $labels{$op->redoop->seq} = "REDO";
321 }
322 }
323 }
324 walklines(\@lines, 0);
325}
326
c27ea44e 327# The structure of this routine is purposely modeled after op.c's peep()
328sub sequence {
329 my($op) = @_;
330 my $oldop = 0;
331 return if class($op) eq "NULL" or exists $sequence_num{$$op};
332 for (; $$op; $op = $op->next) {
333 last if exists $sequence_num{$$op};
334 my $name = $op->name;
335 if ($name =~ /^(null|scalar|lineseq|scope)$/) {
336 next if $oldop and $ {$op->next};
337 } else {
338 $sequence_num{$$op} = $seq_max++;
339 if (class($op) eq "LOGOP") {
340 my $other = $op->other;
341 $other = $other->next while $other->name eq "null";
342 sequence($other);
343 } elsif (class($op) eq "LOOP") {
344 my $redoop = $op->redoop;
345 $redoop = $redoop->next while $redoop->name eq "null";
346 sequence($redoop);
347 my $nextop = $op->nextop;
348 $nextop = $nextop->next while $nextop->name eq "null";
349 sequence($nextop);
350 my $lastop = $op->lastop;
351 $lastop = $lastop->next while $lastop->name eq "null";
352 sequence($lastop);
353 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
354 my $replstart = $op->pmreplstart;
355 $replstart = $replstart->next while $replstart->name eq "null";
356 sequence($replstart);
357 }
358 }
359 $oldop = $op;
360 }
361}
362
c99ca59a 363sub fmt_line {
364 my($hr, $fmt, $level) = @_;
365 my $text = $fmt;
366 $text =~ s/\(\?\(([^\#]*?)\#(\w+)([^\#]*?)\)\?\)/
367 $hr->{$2} ? $1.$hr->{$2}.$3 : ""/eg;
368 $text =~ s/\(x\((.*?);(.*?)\)x\)/$order eq "exec" ? $1 : $2/egs;
369 $text =~ s/\(\*\(([^;]*?)\)\*\)/$1 x $level/egs;
370 $text =~ s/\(\*\((.*?);(.*?)\)\*\)/$1 x ($level - 1) . $2 x ($level>0)/egs;
371 $text =~ s/#([a-zA-Z]+)(\d+)/sprintf("%-$2s", $hr->{$1})/eg;
372 $text =~ s/#([a-zA-Z]+)/$hr->{$1}/eg;
373 $text =~ s/[ \t]*~+[ \t]*/ /g;
374 return $text;
375}
376
377my %priv;
378$priv{$_}{128} = "LVINTRO"
379 for ("pos", "substr", "vec", "threadsv", "gvsv", "rv2sv", "rv2hv", "rv2gv",
380 "rv2av", "rv2arylen", "aelem", "helem", "aslice", "hslice", "padsv",
241416b8 381 "padav", "padhv", "enteriter");
c99ca59a 382$priv{$_}{64} = "REFC" for ("leave", "leavesub", "leavesublv", "leavewrite");
383$priv{"aassign"}{64} = "COMMON";
c99ca59a 384$priv{"sassign"}{64} = "BKWARD";
385$priv{$_}{64} = "RTIME" for ("match", "subst", "substcont");
386@{$priv{"trans"}}{1,2,4,8,16,64} = ("<UTF", ">UTF", "IDENT", "SQUASH", "DEL",
387 "COMPL", "GROWS");
388$priv{"repeat"}{64} = "DOLIST";
389$priv{"leaveloop"}{64} = "CONT";
390@{$priv{$_}}{32,64,96} = ("DREFAV", "DREFHV", "DREFSV")
391 for ("entersub", map("rv2${_}v", "a", "s", "h", "g"), "aelem", "helem");
392$priv{"entersub"}{16} = "DBG";
393$priv{"entersub"}{32} = "TARG";
394@{$priv{$_}}{4,8,128} = ("INARGS","AMPER","NO()") for ("entersub", "rv2cv");
395$priv{"gv"}{32} = "EARLYCV";
396$priv{"aelem"}{16} = $priv{"helem"}{16} = "LVDEFER";
241416b8 397$priv{$_}{16} = "OURINTR" for ("gvsv", "rv2sv", "rv2av", "rv2hv", "r2gv",
398 "enteriter");
c99ca59a 399$priv{$_}{16} = "TARGMY"
400 for (map(($_,"s$_"),"chop", "chomp"),
401 map(($_,"i_$_"), "postinc", "postdec", "multiply", "divide", "modulo",
402 "add", "subtract", "negate"), "pow", "concat", "stringify",
403 "left_shift", "right_shift", "bit_and", "bit_xor", "bit_or",
404 "complement", "atan2", "sin", "cos", "rand", "exp", "log", "sqrt",
405 "int", "hex", "oct", "abs", "length", "index", "rindex", "sprintf",
406 "ord", "chr", "crypt", "quotemeta", "join", "push", "unshift", "flock",
407 "chdir", "chown", "chroot", "unlink", "chmod", "utime", "rename",
408 "link", "symlink", "mkdir", "rmdir", "wait", "waitpid", "system",
409 "exec", "kill", "getppid", "getpgrp", "setpgrp", "getpriority",
410 "setpriority", "time", "sleep");
7a9b44b9 411@{$priv{"const"}}{8,16,32,64,128} = ("STRICT","ENTERED", '$[', "BARE", "WARN");
c99ca59a 412$priv{"flip"}{64} = $priv{"flop"}{64} = "LINENUM";
413$priv{"list"}{64} = "GUESSED";
414$priv{"delete"}{64} = "SLICE";
415$priv{"exists"}{64} = "SUB";
416$priv{$_}{64} = "LOCALE"
417 for ("sort", "prtf", "sprintf", "slt", "sle", "seq", "sne", "sgt", "sge",
418 "scmp", "lc", "uc", "lcfirst", "ucfirst");
419@{$priv{"sort"}}{1,2,4} = ("NUM", "INT", "REV");
420$priv{"threadsv"}{64} = "SVREFd";
c27ea44e 421@{$priv{$_}}{16,32,64,128} = ("INBIN","INCR","OUTBIN","OUTCR")
422 for ("open", "backtick");
c99ca59a 423$priv{"exit"}{128} = "VMS";
feaeca78 424$priv{$_}{2} = "FTACCESS"
425 for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec");
bf2b43ff 426$priv{$_}{2} = "GREPLEX"
427 for ("mapwhile", "mapstart", "grepwhile", "grepstart");
c99ca59a 428
429sub private_flags {
430 my($name, $x) = @_;
431 my @s;
432 for my $flag (128, 96, 64, 32, 16, 8, 4, 2, 1) {
433 if ($priv{$name}{$flag} and $x & $flag and $x >= $flag) {
434 $x -= $flag;
435 push @s, $priv{$name}{$flag};
436 }
437 }
438 push @s, $x if $x;
439 return join(",", @s);
440}
441
c27ea44e 442sub concise_sv {
443 my($sv, $hr) = @_;
444 $hr->{svclass} = class($sv);
31b49ad4 445 $hr->{svclass} = "UV"
446 if $hr->{svclass} eq "IV" and $sv->FLAGS & SVf_IVisUV;
c27ea44e 447 $hr->{svaddr} = sprintf("%#x", $$sv);
448 if ($hr->{svclass} eq "GV") {
449 my $gv = $sv;
450 my $stash = $gv->STASH->NAME;
451 if ($stash eq "main") {
452 $stash = "";
453 } else {
454 $stash = $stash . "::";
455 }
456 $hr->{svval} = "*$stash" . $gv->SAFENAME;
457 return "*$stash" . $gv->SAFENAME;
458 } else {
459 while (class($sv) eq "RV") {
460 $hr->{svval} .= "\\";
461 $sv = $sv->RV;
462 }
463 if (class($sv) eq "SPECIAL") {
40b5b14f 464 $hr->{svval} .= ["Null", "sv_undef", "sv_yes", "sv_no"]->[$$sv];
c27ea44e 465 } elsif ($sv->FLAGS & SVf_NOK) {
40b5b14f 466 $hr->{svval} .= $sv->NV;
c27ea44e 467 } elsif ($sv->FLAGS & SVf_IOK) {
31b49ad4 468 $hr->{svval} .= $sv->int_value;
c27ea44e 469 } elsif ($sv->FLAGS & SVf_POK) {
40b5b14f 470 $hr->{svval} .= cstring($sv->PV);
31b49ad4 471 } elsif (class($sv) eq "HV") {
472 $hr->{svval} .= 'HASH';
c27ea44e 473 }
474 return $hr->{svclass} . " " . $hr->{svval};
475 }
476}
477
c99ca59a 478sub concise_op {
479 my ($op, $level, $format) = @_;
480 my %h;
481 $h{exname} = $h{name} = $op->name;
482 $h{NAME} = uc $h{name};
483 $h{class} = class($op);
484 $h{extarg} = $h{targ} = $op->targ;
485 $h{extarg} = "" unless $h{extarg};
486 if ($h{name} eq "null" and $h{targ}) {
8ec8fbef 487 # targ holds the old type
c99ca59a 488 $h{exname} = "ex-" . substr(ppname($h{targ}), 3);
489 $h{extarg} = "";
8ec8fbef 490 } elsif ($op->name =~ /^leave(sub(lv)?|write)?$/) {
491 # targ potentially holds a reference count
492 if ($op->private & 64) {
493 my $refs = "ref" . ($h{targ} != 1 ? "s" : "");
494 $h{targarglife} = $h{targarg} = "$h{targ} $refs";
495 }
c99ca59a 496 } elsif ($h{targ}) {
497 my $padname = (($curcv->PADLIST->ARRAY)[0]->ARRAY)[$h{targ}];
498 if (defined $padname and class($padname) ne "SPECIAL") {
0b40bd6d 499 $h{targarg} = $padname->PVX;
127212b2 500 if ($padname->FLAGS & SVf_FAKE) {
501 my $fake = '';
502 $fake .= 'a' if $padname->IVX & 1; # PAD_FAKELEX_ANON
503 $fake .= 'm' if $padname->IVX & 2; # PAD_FAKELEX_MULTI
504 $fake .= ':' . $padname->NVX if $curcv->CvFLAGS & CVf_ANON;
505 $h{targarglife} = "$h{targarg}:FAKE:$fake";
506 }
507 else {
508 my $intro = $padname->NVX - $cop_seq_base;
509 my $finish = int($padname->IVX) - $cop_seq_base;
510 $finish = "end" if $finish == 999999999 - $cop_seq_base;
511 $h{targarglife} = "$h{targarg}:$intro,$finish";
512 }
c99ca59a 513 } else {
514 $h{targarglife} = $h{targarg} = "t" . $h{targ};
515 }
516 }
517 $h{arg} = "";
518 $h{svclass} = $h{svaddr} = $h{svval} = "";
519 if ($h{class} eq "PMOP") {
520 my $precomp = $op->precomp;
7a9b44b9 521 if (defined $precomp) {
c27ea44e 522 $precomp = cstring($precomp); # Escape literal control sequences
523 $precomp = "/$precomp/";
524 } else {
525 $precomp = "";
7a9b44b9 526 }
b2a3cfdd 527 my $pmreplroot = $op->pmreplroot;
34a48b4b 528 my $pmreplstart;
c6e79e55 529 if (ref($pmreplroot) eq "B::GV") {
b2a3cfdd 530 # with C<@stash_array = split(/pat/, str);>,
c6e79e55 531 # *stash_array is stored in /pat/'s pmreplroot.
b2a3cfdd 532 $h{arg} = "($precomp => \@" . $pmreplroot->NAME . ")";
c6e79e55 533 } elsif (!ref($pmreplroot) and $pmreplroot) {
534 # same as the last case, except the value is actually a
535 # pad offset for where the GV is kept (this happens under
536 # ithreads)
537 my $gv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$pmreplroot];
538 $h{arg} = "($precomp => \@" . $gv->NAME . ")";
b2a3cfdd 539 } elsif ($ {$op->pmreplstart}) {
c99ca59a 540 undef $lastnext;
541 $pmreplstart = "replstart->" . seq($op->pmreplstart);
542 $h{arg} = "(" . join(" ", $precomp, $pmreplstart) . ")";
543 } else {
544 $h{arg} = "($precomp)";
545 }
546 } elsif ($h{class} eq "PVOP" and $h{name} ne "trans") {
547 $h{arg} = '("' . $op->pv . '")';
548 $h{svval} = '"' . $op->pv . '"';
549 } elsif ($h{class} eq "COP") {
550 my $label = $op->label;
c3caa09d 551 $h{coplabel} = $label;
c99ca59a 552 $label = $label ? "$label: " : "";
553 my $loc = $op->file;
554 $loc =~ s[.*/][];
555 $loc .= ":" . $op->line;
556 my($stash, $cseq) = ($op->stash->NAME, $op->cop_seq - $cop_seq_base);
557 my $arybase = $op->arybase;
558 $arybase = $arybase ? ' $[=' . $arybase : "";
559 $h{arg} = "($label$stash $cseq $loc$arybase)";
560 } elsif ($h{class} eq "LOOP") {
561 $h{arg} = "(next->" . seq($op->nextop) . " last->" . seq($op->lastop)
562 . " redo->" . seq($op->redoop) . ")";
563 } elsif ($h{class} eq "LOGOP") {
564 undef $lastnext;
565 $h{arg} = "(other->" . seq($op->other) . ")";
566 } elsif ($h{class} eq "SVOP") {
c27ea44e 567 if (! ${$op->sv}) {
568 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->targ];
569 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
570 $h{targarglife} = $h{targarg} = "";
c99ca59a 571 } else {
c27ea44e 572 $h{arg} = "(" . concise_sv($op->sv, \%h) . ")";
c99ca59a 573 }
31b49ad4 574 } elsif ($h{class} eq "PADOP") {
575 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->padix];
576 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
c99ca59a 577 }
578 $h{seq} = $h{hyphseq} = seq($op);
579 $h{seq} = "" if $h{seq} eq "-";
580 $h{seqnum} = $op->seq;
581 $h{next} = $op->next;
582 $h{next} = (class($h{next}) eq "NULL") ? "(end)" : seq($h{next});
583 $h{nextaddr} = sprintf("%#x", $ {$op->next});
584 $h{sibaddr} = sprintf("%#x", $ {$op->sibling});
585 $h{firstaddr} = sprintf("%#x", $ {$op->first}) if $op->can("first");
586 $h{lastaddr} = sprintf("%#x", $ {$op->last}) if $op->can("last");
587
588 $h{classsym} = $opclass{$h{class}};
589 $h{flagval} = $op->flags;
590 $h{flags} = op_flags($op->flags);
591 $h{privval} = $op->private;
592 $h{private} = private_flags($h{name}, $op->private);
593 $h{addr} = sprintf("%#x", $$op);
594 $h{label} = $labels{$op->seq};
595 $h{typenum} = $op->type;
596 $h{noise} = $linenoise[$op->type];
78ad9108 597 $_->(\%h, $op, \$format, \$level) for @callbacks;
c99ca59a 598 return fmt_line(\%h, $format, $level);
599}
600
601sub B::OP::concise {
602 my($op, $level) = @_;
603 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
604 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
605 "addr" => sprintf("%#x", $$lastnext)};
606 print fmt_line($h, $gotofmt, $level+1);
607 }
608 $lastnext = $op->next;
609 print concise_op($op, $level, $format);
610}
611
31b49ad4 612# B::OP::terse (see Terse.pm) now just calls this
613sub b_terse {
614 my($op, $level) = @_;
615
616 # This isn't necessarily right, but there's no easy way to get
617 # from an OP to the right CV. This is a limitation of the
618 # ->terse() interface style, and there isn't much to do about
619 # it. In particular, we can die in concise_op if the main pad
620 # isn't long enough, or has the wrong kind of entries, compared to
621 # the pad a sub was compiled with. The fix for that would be to
622 # make a backwards compatible "terse" format that never even
623 # looked at the pad, just like the old B::Terse. I don't think
624 # that's worth the effort, though.
625 $curcv = main_cv unless $curcv;
626
627 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
628 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
629 "addr" => sprintf("%#x", $$lastnext)};
630 print fmt_line($h, $style{"terse"}[1], $level+1);
631 }
632 $lastnext = $op->next;
633 print concise_op($op, $level, $style{"terse"}[0]);
634}
635
c99ca59a 636sub tree {
637 my $op = shift;
638 my $level = shift;
639 my $style = $tree_decorations[$tree_style];
640 my($space, $single, $kids, $kid, $nokid, $last, $lead, $size) = @$style;
641 my $name = concise_op($op, $level, $treefmt);
642 if (not $op->flags & OPf_KIDS) {
643 return $name . "\n";
644 }
645 my @lines;
646 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
647 push @lines, tree($kid, $level+1);
648 }
649 my $i;
650 for ($i = $#lines; substr($lines[$i], 0, 1) eq " "; $i--) {
651 $lines[$i] = $space . $lines[$i];
652 }
653 if ($i > 0) {
654 $lines[$i] = $last . $lines[$i];
655 while ($i-- > 1) {
656 if (substr($lines[$i], 0, 1) eq " ") {
657 $lines[$i] = $nokid . $lines[$i];
658 } else {
659 $lines[$i] = $kid . $lines[$i];
660 }
661 }
662 $lines[$i] = $kids . $lines[$i];
663 } else {
664 $lines[0] = $single . $lines[0];
665 }
666 return("$name$lead" . shift @lines,
667 map(" " x (length($name)+$size) . $_, @lines));
668}
669
213a1a26 670# *** Warning: fragile kludge ahead ***
671# Because the B::* modules run in the same interpreter as the code
672# they're compiling, their presence tends to distort the view we have
673# of the code we're looking at. In particular, perl gives sequence
674# numbers to both OPs in general and COPs in particular. If the
675# program we're looking at were run on its own, these numbers would
676# start at 1. Because all of B::Concise and all the modules it uses
677# are compiled first, though, by the time we get to the user's program
678# the sequence numbers are alreay at pretty high numbers, which would
679# be distracting if you're trying to tell OPs apart. Therefore we'd
680# like to subtract an offset from all the sequence numbers we display,
681# to restore the simpler view of the world. The trick is to know what
682# that offset will be, when we're still compiling B::Concise! If we
683# hardcoded a value, it would have to change every time B::Concise or
684# other modules we use do. To help a little, what we do here is
685# compile a little code at the end of the module, and compute the base
686# sequence number for the user's program as being a small offset
687# later, so all we have to worry about are changes in the offset.
c27ea44e 688# (Note that we now only play this game with COP sequence numbers. OP
689# sequence numbers aren't used to refer to OPs from a distance, and
690# they don't have much significance, so we just generate our own
691# sequence numbers which are easier to control. This way we also don't
692# stand in the way of a possible future removal of OP sequence
693# numbers).
213a1a26 694
695# When you say "perl -MO=Concise -e '$a'", the output should look like:
696
697# 4 <@> leave[t1] vKP/REFC ->(end)
698# 1 <0> enter ->2
699 #^ smallest OP sequence number should be 1
700# 2 <;> nextstate(main 1 -e:1) v ->3
701 # ^ smallest COP sequence number should be 1
702# - <1> ex-rv2sv vK/1 ->4
703# 3 <$> gvsv(*a) s ->4
704
c27ea44e 705# If the second of the marked numbers there isn't 1, it means you need
706# to update the corresponding magic number in the next line.
707# Remember, this needs to stay the last things in the module.
e69a2255 708
c27ea44e 709# Why is this different for MacOS? Does it matter?
8ec8fbef 710my $cop_seq_mnum = $^O eq 'MacOS' ? 12 : 11;
e69a2255 711$cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum;
c99ca59a 712
7131;
714
715__END__
716
717=head1 NAME
718
719B::Concise - Walk Perl syntax tree, printing concise info about ops
720
721=head1 SYNOPSIS
722
723 perl -MO=Concise[,OPTIONS] foo.pl
724
78ad9108 725 use B::Concise qw(set_style add_callback);
726
c99ca59a 727=head1 DESCRIPTION
728
729This compiler backend prints the internal OPs of a Perl program's syntax
730tree in one of several space-efficient text formats suitable for debugging
731the inner workings of perl or other compiler backends. It can print OPs in
732the order they appear in the OP tree, in the order they will execute, or
733in a text approximation to their tree structure, and the format of the
734information displyed is customizable. Its function is similar to that of
735perl's B<-Dx> debugging flag or the B<B::Terse> module, but it is more
736sophisticated and flexible.
737
f8a679e6 738=head1 EXAMPLE
739
740Here's is a short example of output, using the default formatting
741conventions :
742
743 % perl -MO=Concise -e '$a = $b + 42'
8ec8fbef 744 8 <@> leave[1 ref] vKP/REFC ->(end)
f8a679e6 745 1 <0> enter ->2
746 2 <;> nextstate(main 1 -e:1) v ->3
747 7 <2> sassign vKS/2 ->8
748 5 <2> add[t1] sK/2 ->6
749 - <1> ex-rv2sv sK/1 ->4
750 3 <$> gvsv(*b) s ->4
751 4 <$> const(IV 42) s ->5
752 - <1> ex-rv2sv sKRM*/1 ->7
753 6 <$> gvsv(*a) s ->7
754
755Each line corresponds to an operator. Null ops appear as C<ex-opname>,
756where I<opname> is the op that has been optimized away by perl.
757
758The number on the first row indicates the op's sequence number. It's
759given in base 36 by default.
760
761The symbol between angle brackets indicates the op's type : for example,
762<2> is a BINOP, <@> a LISTOP, etc. (see L</"OP class abbreviations">).
763
764The opname may be followed by op-specific information in parentheses
765(e.g. C<gvsv(*b)>), and by targ information in brackets (e.g.
766C<leave[t1]>).
767
768Next come the op flags. The common flags are listed below
769(L</"OP flags abbreviations">). The private flags follow, separated
770by a slash. For example, C<vKP/REFC> means that the leave op has
771public flags OPf_WANT_VOID, OPf_KIDS, and OPf_PARENS, and the private
772flag OPpREFCOUNTED.
773
774Finally an arrow points to the sequence number of the next op.
775
c99ca59a 776=head1 OPTIONS
777
778Arguments that don't start with a hyphen are taken to be the names of
8ec8fbef 779subroutines to print the OPs of; if no such functions are specified,
780the main body of the program (outside any subroutines, and not
781including use'd or require'd files) is printed. Passing C<BEGIN>,
782C<CHECK>, C<INIT>, or C<END> will cause all of the corresponding
783special blocks to be printed.
c99ca59a 784
785=over 4
786
787=item B<-basic>
788
789Print OPs in the order they appear in the OP tree (a preorder
790traversal, starting at the root). The indentation of each OP shows its
791level in the tree. This mode is the default, so the flag is included
792simply for completeness.
793
794=item B<-exec>
795
796Print OPs in the order they would normally execute (for the majority
797of constructs this is a postorder traversal of the tree, ending at the
798root). In most cases the OP that usually follows a given OP will
799appear directly below it; alternate paths are shown by indentation. In
800cases like loops when control jumps out of a linear path, a 'goto'
801line is generated.
802
803=item B<-tree>
804
805Print OPs in a text approximation of a tree, with the root of the tree
806at the left and 'left-to-right' order of children transformed into
807'top-to-bottom'. Because this mode grows both to the right and down,
808it isn't suitable for large programs (unless you have a very wide
809terminal).
810
811=item B<-compact>
812
813Use a tree format in which the minimum amount of space is used for the
814lines connecting nodes (one character in most cases). This squeezes out
815a few precious columns of screen real estate.
816
817=item B<-loose>
818
819Use a tree format that uses longer edges to separate OP nodes. This format
820tends to look better than the compact one, especially in ASCII, and is
821the default.
822
823=item B<-vt>
824
825Use tree connecting characters drawn from the VT100 line-drawing set.
826This looks better if your terminal supports it.
827
828=item B<-ascii>
829
830Draw the tree with standard ASCII characters like C<+> and C<|>. These don't
831look as clean as the VT100 characters, but they'll work with almost any
832terminal (or the horizontal scrolling mode of less(1)) and are suitable
833for text documentation or email. This is the default.
834
835=item B<-main>
836
837Include the main program in the output, even if subroutines were also
838specified.
839
840=item B<-base>I<n>
841
842Print OP sequence numbers in base I<n>. If I<n> is greater than 10, the
843digit for 11 will be 'a', and so on. If I<n> is greater than 36, the digit
844for 37 will be 'A', and so on until 62. Values greater than 62 are not
845currently supported. The default is 36.
846
847=item B<-bigendian>
848
849Print sequence numbers with the most significant digit first. This is the
850usual convention for Arabic numerals, and the default.
851
852=item B<-littleendian>
853
854Print seqence numbers with the least significant digit first.
855
856=item B<-concise>
857
858Use the author's favorite set of formatting conventions. This is the
859default, of course.
860
861=item B<-terse>
862
a6d05634 863Use formatting conventions that emulate the output of B<B::Terse>. The
c99ca59a 864basic mode is almost indistinguishable from the real B<B::Terse>, and the
865exec mode looks very similar, but is in a more logical order and lacks
866curly brackets. B<B::Terse> doesn't have a tree mode, so the tree mode
867is only vaguely reminiscient of B<B::Terse>.
868
869=item B<-linenoise>
870
871Use formatting conventions in which the name of each OP, rather than being
872written out in full, is represented by a one- or two-character abbreviation.
873This is mainly a joke.
874
875=item B<-debug>
876
877Use formatting conventions reminiscient of B<B::Debug>; these aren't
878very concise at all.
879
880=item B<-env>
881
882Use formatting conventions read from the environment variables
883C<B_CONCISE_FORMAT>, C<B_CONCISE_GOTO_FORMAT>, and C<B_CONCISE_TREE_FORMAT>.
884
885=back
886
887=head1 FORMATTING SPECIFICATIONS
888
889For each general style ('concise', 'terse', 'linenoise', etc.) there are
890three specifications: one of how OPs should appear in the basic or exec
891modes, one of how 'goto' lines should appear (these occur in the exec
892mode only), and one of how nodes should appear in tree mode. Each has the
893same format, described below. Any text that doesn't match a special
894pattern is copied verbatim.
895
896=over 4
897
898=item B<(x(>I<exec_text>B<;>I<basic_text>B<)x)>
899
900Generates I<exec_text> in exec mode, or I<basic_text> in basic mode.
901
902=item B<(*(>I<text>B<)*)>
903
904Generates one copy of I<text> for each indentation level.
905
906=item B<(*(>I<text1>B<;>I<text2>B<)*)>
907
908Generates one fewer copies of I<text1> than the indentation level, followed
909by one copy of I<text2> if the indentation level is more than 0.
910
911=item B<(?(>I<text1>B<#>I<var>I<Text2>B<)?)>
912
913If the value of I<var> is true (not empty or zero), generates the
914value of I<var> surrounded by I<text1> and I<Text2>, otherwise
915nothing.
916
917=item B<#>I<var>
918
919Generates the value of the variable I<var>.
920
921=item B<#>I<var>I<N>
922
923Generates the value of I<var>, left jutified to fill I<N> spaces.
924
925=item B<~>
926
927Any number of tildes and surrounding whitespace will be collapsed to
928a single space.
929
930=back
931
932The following variables are recognized:
933
934=over 4
935
936=item B<#addr>
937
938The address of the OP, in hexidecimal.
939
940=item B<#arg>
941
942The OP-specific information of the OP (such as the SV for an SVOP, the
943non-local exit pointers for a LOOP, etc.) enclosed in paretheses.
944
945=item B<#class>
946
947The B-determined class of the OP, in all caps.
948
f8a679e6 949=item B<#classsym>
c99ca59a 950
951A single symbol abbreviating the class of the OP.
952
c3caa09d 953=item B<#coplabel>
954
955The label of the statement or block the OP is the start of, if any.
956
c99ca59a 957=item B<#exname>
958
959The name of the OP, or 'ex-foo' if the OP is a null that used to be a foo.
960
961=item B<#extarg>
962
963The target of the OP, or nothing for a nulled OP.
964
965=item B<#firstaddr>
966
967The address of the OP's first child, in hexidecimal.
968
969=item B<#flags>
970
971The OP's flags, abbreviated as a series of symbols.
972
973=item B<#flagval>
974
975The numeric value of the OP's flags.
976
f8a679e6 977=item B<#hyphseq>
c99ca59a 978
979The sequence number of the OP, or a hyphen if it doesn't have one.
980
981=item B<#label>
982
983'NEXT', 'LAST', or 'REDO' if the OP is a target of one of those in exec
984mode, or empty otherwise.
985
986=item B<#lastaddr>
987
988The address of the OP's last child, in hexidecimal.
989
990=item B<#name>
991
992The OP's name.
993
994=item B<#NAME>
995
996The OP's name, in all caps.
997
998=item B<#next>
999
1000The sequence number of the OP's next OP.
1001
1002=item B<#nextaddr>
1003
1004The address of the OP's next OP, in hexidecimal.
1005
1006=item B<#noise>
1007
c27ea44e 1008A one- or two-character abbreviation for the OP's name.
c99ca59a 1009
1010=item B<#private>
1011
1012The OP's private flags, rendered with abbreviated names if possible.
1013
1014=item B<#privval>
1015
1016The numeric value of the OP's private flags.
1017
1018=item B<#seq>
1019
c27ea44e 1020The sequence number of the OP. Note that this is now a sequence number
1021generated by B::Concise, rather than the real op_seq value (for which
1022see B<#seqnum>).
c99ca59a 1023
1024=item B<#seqnum>
1025
1026The real sequence number of the OP, as a regular number and not adjusted
1027to be relative to the start of the real program. (This will generally be
1028a fairly large number because all of B<B::Concise> is compiled before
1029your program is).
1030
1031=item B<#sibaddr>
1032
1033The address of the OP's next youngest sibling, in hexidecimal.
1034
1035=item B<#svaddr>
1036
1037The address of the OP's SV, if it has an SV, in hexidecimal.
1038
1039=item B<#svclass>
1040
1041The class of the OP's SV, if it has one, in all caps (e.g., 'IV').
1042
1043=item B<#svval>
1044
1045The value of the OP's SV, if it has one, in a short human-readable format.
1046
1047=item B<#targ>
1048
1049The numeric value of the OP's targ.
1050
1051=item B<#targarg>
1052
1053The name of the variable the OP's targ refers to, if any, otherwise the
1054letter t followed by the OP's targ in decimal.
1055
1056=item B<#targarglife>
1057
1058Same as B<#targarg>, but followed by the COP sequence numbers that delimit
1059the variable's lifetime (or 'end' for a variable in an open scope) for a
1060variable.
1061
1062=item B<#typenum>
1063
1064The numeric value of the OP's type, in decimal.
1065
1066=back
1067
1068=head1 ABBREVIATIONS
1069
1070=head2 OP flags abbreviations
1071
1072 v OPf_WANT_VOID Want nothing (void context)
1073 s OPf_WANT_SCALAR Want single value (scalar context)
1074 l OPf_WANT_LIST Want list of any length (list context)
1075 K OPf_KIDS There is a firstborn child.
1076 P OPf_PARENS This operator was parenthesized.
1077 (Or block needs explicit scope entry.)
1078 R OPf_REF Certified reference.
1079 (Return container, not containee).
1080 M OPf_MOD Will modify (lvalue).
1081 S OPf_STACKED Some arg is arriving on the stack.
1082 * OPf_SPECIAL Do something weird for this op (see op.h)
1083
1084=head2 OP class abbreviations
1085
1086 0 OP (aka BASEOP) An OP with no children
1087 1 UNOP An OP with one child
1088 2 BINOP An OP with two children
1089 | LOGOP A control branch OP
1090 @ LISTOP An OP that could have lots of children
1091 / PMOP An OP with a regular expression
1092 $ SVOP An OP with an SV
1093 " PVOP An OP with a string
1094 { LOOP An OP that holds pointers for a loop
1095 ; COP An OP that marks the start of a statement
051f02e9 1096 # PADOP An OP with a GV on the pad
c99ca59a 1097
78ad9108 1098=head1 Using B::Concise outside of the O framework
1099
1100It is possible to extend B<B::Concise> by using it outside of the B<O>
1101framework and providing new styles and new variables.
1102
1103 use B::Concise qw(set_style add_callback);
1104 set_style($format, $gotofmt, $treefmt);
1105 add_callback
1106 (
1107 sub
1108 {
1109 my ($h, $op, $level, $format) = @_;
1110 $h->{variable} = some_func($op);
1111 }
1112 );
1113 B::Concise::compile(@options)->();
1114
1115You can specify a style by calling the B<set_style> subroutine. If you
1116have a new variable in your style, or you want to change the value of an
1117existing variable, you will need to add a callback to specify the value
1118for that variable.
1119
1120This is done by calling B<add_callback> passing references to any
1121callback subroutines. The subroutines are called in the same order as
1122they are added. Each subroutine is passed four parameters. These are a
1123reference to a hash, the keys of which are the names of the variables
1124and the values of which are their values, the op, the level and the
1125format.
1126
1127To define your own variables, simply add them to the hash, or change
1128existing values if you need to. The level and format are passed in as
1129references to scalars, but it is unlikely that they will need to be
1130changed or even used.
1131
31b49ad4 1132To switch back to one of the standard styles like C<concise> or
1133C<terse>, use C<set_style_standard>.
1134
78ad9108 1135To see the output, call the subroutine returned by B<compile> in the
1136same way that B<O> does.
1137
c99ca59a 1138=head1 AUTHOR
1139
31b49ad4 1140Stephen McCamant, E<lt>smcc@CSUA.Berkeley.EDUE<gt>.
c99ca59a 1141
1142=cut