Add S & E tokens, plus timing.
[p5sagit/Devel-Size.git] / bin / sizeme_store.pl
CommitLineData
5aa3ad8e 1#!/usr/bin/env perl
2c631ee0 2
d3b8a135 3# Read the raw memory data from Devel::Memory and process the tree
4# (as a stack, propagating data such as totals, up the tree).
5# Output completed nodes in the request formats.
6# Needs to be generalized to support pluggable output formats.
7# Making nodes into (lightweight fast) objects would be smart.
8# Tests would be even smarter!
9#
10# When working on this code it's important to have a sense of the flow.
11# Specifically the way that depth drives the completion of nodes.
12# It's a depth-first stream processing machine, which only ever holds
13# a single stack of the currently incomplete nodes, which is always the same as
14# the current depth. I.e., when a node of depth N arrives, all nodes >N are
15# popped off the stack and 'completed', each rippling data up to its parent.
16
2c631ee0 17use strict;
18use warnings;
5aa3ad8e 19use autodie;
2c631ee0 20
e8f4c506 21use DBI qw(looks_like_number);
b2fc39a5 22use DBD::SQLite;
f60f09e5 23use JSON::XS;
de73b186 24use Devel::Dwarn;
d3b8a135 25use HTML::Entities qw(encode_entities);;
5e2e22f3 26use Data::Dumper;
fc6614ee 27use Getopt::Long;
5e2e22f3 28use Carp qw(carp croak confess);
fc6614ee 29
de73b186 30# XXX import these from the XS code
31use constant NPtype_NAME => 0x01;
32use constant NPtype_LINK => 0x02;
33use constant NPtype_SV => 0x03;
34use constant NPtype_MAGIC => 0x04;
35use constant NPtype_OP => 0x05;
36
37use constant NPattr_LEAFSIZE => 0x00;
38use constant NPattr_NAME => 0x01;
39use constant NPattr_PADFAKE => 0x02;
40use constant NPattr_PADNAME => 0x03;
41use constant NPattr_PADTMP => 0x04;
42use constant NPattr_NOTE => 0x05;
68cafb30 43use constant NPattr_PRE_ATTR => 0x06;
de73b186 44
45
fc6614ee 46GetOptions(
5aa3ad8e 47 'text!' => \my $opt_text,
48 'dot=s' => \my $opt_dot,
b2fc39a5 49 'db=s' => \my $opt_db,
e8f4c506 50 'verbose|v!' => \my $opt_verbose,
51 'debug|d!' => \my $opt_debug,
98128850 52 'showid!' => \my $opt_showid,
fc6614ee 53) or exit 1;
94fab3d1 54
5e2e22f3 55$| = 1 if $opt_debug;
0e977dbc 56my $run_size = 0;
57my $total_size = 0;
58
f60f09e5 59my $j = JSON::XS->new->ascii->pretty(0);
60
1915946b 61my ($dbh, $node_ins_sth);
62if ($opt_db) {
63 $dbh = DBI->connect("dbi:SQLite:dbname=$opt_db","","", {
64 RaiseError => 1, PrintError => 0, AutoCommit => 0
65 });
66 $dbh->do("PRAGMA synchronous = OFF");
1915946b 67}
b2fc39a5 68
2c631ee0 69my @stack;
70my %seqn2node;
71
7020702a 72my $dotnode = sub {
73 my $name = encode_entities(shift);
74 $name =~ s/"/\\"/g;
7020702a 75 return '"'.$name.'"';
76};
ee2793c1 77
ee2793c1 78
5aa3ad8e 79my $dot_fh;
ee2793c1 80
0741448c 81sub fmt_size {
82 my $size = shift;
83 my $kb = $size / 1024;
84 return $size if $kb < 5;
85 return sprintf "%.1fKb", $kb if $kb < 1000;
86 return sprintf "%.1fMb", $kb/1024;
87}
88
ee2793c1 89
94fab3d1 90sub enter_node {
91 my $x = shift;
5e2e22f3 92 warn "enter_node $x->{id}\n" if $opt_debug;
de73b186 93
94 my $parent = $stack[-1];
95 if ($parent) {
96
37836f2a 97 if ($x->{name} eq 'AVelem' and $parent->{name} eq 'SV(PVAV)') {
68cafb30 98 my $index = $x->{attr}{index};
99 # If node is an AVelem of a CvPADLIST propagate pad name to AVelem
100 if (@stack >= 4 and (my $cvpl = $stack[-4])->{name} eq 'CvPADLIST') {
68cafb30 101 my $padnames = $cvpl->{_cached}{padnames} ||= do {
102 my @names = @{ $cvpl->{attr}{+NPattr_PADNAME} || []};
103 $_ = "my(".($_||'').")" for @names;
104 $names[0] = '@_';
105 \@names;
106 };
68cafb30 107 $x->{name} = $padnames->[$index] || "?";
108 $x->{name} =~ s/my\(SVs_PADTMP\)/PADTMP/; # XXX hack for neatness
109 }
110 else {
111 $x->{name} = "[$index]";
de73b186 112 }
113 }
ee2793c1 114 }
de73b186 115
116 return $x;
94fab3d1 117}
118
de73b186 119
94fab3d1 120sub leave_node {
121 my $x = shift;
5e2e22f3 122 confess unless defined $x->{id};
123 warn "leave_node $x->{id}\n" if $opt_debug;
b2fc39a5 124 delete $seqn2node{$x->{id}};
ee2793c1 125
d3b8a135 126 my $self_size = 0; $self_size += $_ for values %{$x->{leaves}};
94fab3d1 127 $x->{self_size} = $self_size;
ee2793c1 128
129 my $parent = $stack[-1];
130 if ($parent) {
2c631ee0 131 # link to parent
5a78486c 132 $x->{parent_id} = $parent->{id};
2c631ee0 133 # accumulate into parent
134 $parent->{kids_node_count} += 1 + ($x->{kids_node_count}||0);
94fab3d1 135 $parent->{kids_size} += $self_size + $x->{kids_size};
5a78486c 136 push @{$parent->{child_id}}, $x->{id};
2c631ee0 137 }
de73b186 138
2c631ee0 139 # output
140 # ...
ee2793c1 141 if ($opt_dot) {
1915946b 142 printf "// n%d parent=%s(type=%s)\n", $x->{id},
0741448c 143 $parent ? $parent->{id} : "",
144 $parent ? $parent->{type} : ""
145 if 0;
de73b186 146 if ($x->{type} != NPtype_LINK) {
0741448c 147 my $name = $x->{title} ? "\"$x->{title}\" $x->{name}" : $x->{name};
148
149 if ($x->{kids_size}) {
150 $name .= sprintf " %s+%s=%s", fmt_size($x->{self_size}), fmt_size($x->{kids_size}), fmt_size($x->{self_size}+$x->{kids_size});
151 }
152 else {
153 $name .= sprintf " +%s", fmt_size($x->{self_size});
154 }
98128850 155 $name .= " $x->{id}" if $opt_showid;
0741448c 156
157 my @node_attr = (
158 sprintf("label=%s", $dotnode->($name)),
159 "id=$x->{id}",
160 );
161 my @link_attr;
162 #if ($x->{name} eq 'hek') { push @node_attr, "shape=point"; push @node_attr, "labelfontsize=6"; }
1915946b 163 if ($parent) { # probably a link
1915946b 164 my $parent_id = $parent->{id};
0741448c 165 my @link_attr = ("id=$parent_id");
de73b186 166 if ($parent->{type} == NPtype_LINK) { # link
1915946b 167 (my $link_name = $parent->{name}) =~ s/->$//;
168 push @link_attr, (sprintf "label=%s", $dotnode->($link_name));
169 $parent_id = ($stack[-2]||die "panic")->{id};
170 }
5aa3ad8e 171 printf $dot_fh qq{n%d -> n%d [%s];\n},
1915946b 172 $parent_id, $x->{id}, join(",", @link_attr);
173 }
5aa3ad8e 174 printf $dot_fh qq{n%d [ %s ];\n}, $x->{id}, join(",", @node_attr);
1915946b 175 }
176
ee2793c1 177 }
b2fc39a5 178 if ($dbh) {
f60f09e5 179 my $attr_json = $j->encode($x->{attr});
e78b28ca 180 my $leaves_json = $j->encode($x->{leaves});
b2fc39a5 181 $node_ins_sth->execute(
98128850 182 $x->{id}, $x->{name}, $x->{title}, $x->{type}, $x->{depth}, $x->{parent_id},
b2fc39a5 183 $x->{self_size}, $x->{kids_size}, $x->{kids_node_count},
f60f09e5 184 $x->{child_id} ? join(",", @{$x->{child_id}}) : undef,
e78b28ca 185 $attr_json, $leaves_json,
b2fc39a5 186 );
187 # XXX attribs
188 }
d3b8a135 189
190 return $x;
2c631ee0 191}
192
5aa3ad8e 193my $indent = ": ";
d3b8a135 194my @attr_type_name = (qw(size NAME PADFAKE my PADTMP NOTE)); # XXX get from XS in some way
68cafb30 195my $pending_pre_attr = {};
94fab3d1 196
2c631ee0 197while (<>) {
5e2e22f3 198 warn $_ if $opt_debug;
2c631ee0 199 chomp;
de73b186 200
b2fc39a5 201 my ($type, $id, $val, $name, $extra) = split / /, $_, 5;
de73b186 202
ee2793c1 203 if ($type =~ s/^-//) { # Node type ($val is depth)
d3b8a135 204
65b2cf7d 205 printf "%s%s%s %s [#%d @%d]\n", $indent x $val, $name,
206 ($type == NPtype_LINK) ? "->" : "",
207 $extra||'', $id, $val
5aa3ad8e 208 if $opt_text;
d3b8a135 209
210 # this is the core driving logic
2c631ee0 211 while ($val < @stack) {
d3b8a135 212 my $x = leave_node(pop @stack);
e8f4c506 213 warn "N $id d$val ends $x->{id} d$x->{depth}: size $x->{self_size}+$x->{kids_size}\n"
214 if $opt_verbose;
2c631ee0 215 }
c5078bcb 216 die "panic: stack already has item at depth $val"
217 if $stack[$val];
5e2e22f3 218 die "Depth out of sync\n" if $val != @stack;
de73b186 219 my $node = enter_node({
220 id => $id, type => $type, name => $name, extra => $extra,
68cafb30 221 attr => { %$pending_pre_attr },
222 leaves => {}, depth => $val, self_size=>0, kids_size=>0
de73b186 223 });
68cafb30 224 %$pending_pre_attr = ();
de73b186 225 $stack[$val] = $node;
b2fc39a5 226 $seqn2node{$id} = $node;
2c631ee0 227 }
d3b8a135 228
de73b186 229 # --- Leaf name and memory size
230 elsif ($type eq "L") {
b2fc39a5 231 my $node = $seqn2node{$id} || die;
2c631ee0 232 $node->{leaves}{$name} += $val;
0e977dbc 233 $run_size += $val;
234 printf "%s+%d=%d %s\n", $indent x ($node->{depth}+1), $val, $run_size, $name
5aa3ad8e 235 if $opt_text;
2c631ee0 236 }
d3b8a135 237
238 # --- Attribute type, name and value (all rather hackish)
de73b186 239 elsif (looks_like_number($type)) {
b2fc39a5 240 my $node = $seqn2node{$id} || die;
e8f4c506 241 my $attr = $node->{attr} || die;
de73b186 242
68cafb30 243 # attributes to queue up and apply to the next node
244 if (NPattr_PRE_ATTR == $type) {
245 $pending_pre_attr->{$name} = $val;
246 }
247 # attributes where the string is a key (or always empty and the type is the key)
248 elsif ($type == NPattr_NAME or $type == NPattr_NOTE) {
de73b186 249 printf "%s~%s(%s) %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
250 if $opt_text;
e8f4c506 251 warn "Node $id already has attribute $type:$name (value $attr->{$type}{$name})\n"
252 if exists $attr->{$type}{$name};
253 $attr->{$type}{$name} = $val || $id;
e8f4c506 254 $node->{title} = $name if $type == 1 and !$val;
255 }
68cafb30 256 # attributes where the number is a key (or always zero)
de73b186 257 elsif (NPattr_PADFAKE==$type or NPattr_PADTMP==$type or NPattr_PADNAME==$type) {
258 printf "%s~%s('%s') %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
259 if $opt_text;
e8f4c506 260 warn "Node $id already has attribute $type:$name (value $attr->{$type}[$val])\n"
261 if defined $attr->{$type}[$val];
de73b186 262 $attr->{+NPattr_PADNAME}[$val] = $name; # store all as NPattr_PADNAME
e8f4c506 263 }
264 else {
de73b186 265 printf "%s~%s %d [t%d]\n", $indent x ($node->{depth}+1), $name, $val, $type
266 if $opt_text;
e8f4c506 267 warn "Invalid attribute type '$type' on line $. ($_)";
268 }
2c631ee0 269 }
5e2e22f3 270 elsif ($type eq 'S') { # start of a run
271 die "Unexpected start token" if @stack;
272 if ($opt_dot) {
273 open $dot_fh, ">$opt_dot";
274 print $dot_fh "digraph {\n"; # }
275 print $dot_fh "graph [overlap=false]\n"; # target="???", URL="???"
276 }
277 if ($dbh) {
278 # XXX add a size_run table records each run
279 # XXX pick a table name to store the run nodes in
280 #$run_ins_sth->execute(
281 my $table = "node";
282 $dbh->do("DROP TABLE IF EXISTS $table");
283 $dbh->do(qq{
284 CREATE TABLE $table (
285 id integer primary key,
286 name text,
287 title text,
288 type integer,
289 depth integer,
290 parent_id integer,
291
292 self_size integer,
293 kids_size integer,
294 kids_node_count integer,
295 child_ids text,
296 attr_json text,
297 leaves_json text
298 )
299 });
300 $node_ins_sth = $dbh->prepare(qq{
301 INSERT INTO $table VALUES (?,?,?,?,?,?, ?,?,?,?,?,?)
302 });
303 }
304 }
305 elsif ($type eq 'E') { # end of a run
306
307 my $top = $stack[0]; # grab top node before we pop all the nodes
308 leave_node(pop @stack) while @stack;
309 my $top_size = $top->{self_size}+$top->{kids_size};
310
311 printf "Stored %d nodes sizing %s (%d)\n",
312 $top->{kids_node_count}, fmt_size($top_size), $top_size;
313
314 if ($opt_verbose or $run_size != $top_size) {
315 warn "EOF ends $top->{id} d$top->{depth}: size $top->{self_size}+$top->{kids_size}\n";
316 warn Dumper($top);
317 }
318 die "panic: seqn2node should be empty ". Dumper(\%seqn2node)
319 if %seqn2node;
320 %$pending_pre_attr = ();
321
322 if ($dot_fh) {
323 print $dot_fh "}\n";
324 close $dot_fh;
325 system("open -a Graphviz $opt_dot") if $^O eq 'darwin'; # OSX
326 }
327
328 $dbh->commit if $dbh;
329 }
2c631ee0 330 else {
331 warn "Invalid type '$type' on line $. ($_)";
e8f4c506 332 next;
2c631ee0 333 }
d3b8a135 334
b2fc39a5 335 $dbh->commit if $dbh and $id % 10_000 == 0;
2c631ee0 336}
5e2e22f3 337die "EOF without end token" if @stack;
b2fc39a5 338
2c631ee0 339
d3b8a135 340=for This is out of date but gives you an idea of the data and stream
341
2c631ee0 342SV(PVAV) fill=1/1 [#1 @0]
343: +64 sv =64
344: +16 av_max =80
345: AVelem-> [#2 @1]
346: : SV(RV) [#3 @2]
347: : : +24 sv =104
348: : : RV-> [#4 @3]
349: : : : SV(PVAV) fill=-1/-1 [#5 @4]
350: : : : : +64 sv =168
351: AVelem-> [#6 @1]
352: : SV(IV) [#7 @2]
353: : : +24 sv =192
354192 at -e line 1.
355=cut
356__DATA__
357N 1 0 SV(PVAV) fill=1/1
358L 1 64 sv
359L 1 16 av_max
360N 2 1 AVelem->
361N 3 2 SV(RV)
362L 3 24 sv
363N 4 3 RV->
364N 5 4 SV(PVAV) fill=-1/-1
365L 5 64 sv
366N 6 1 AVelem->
367N 7 2 SV(IV)
368L 7 24 sv