Remove 'pre attr'. Other minor cleanup.
[p5sagit/Devel-Size.git] / bin / sizeme_store.pl
CommitLineData
5aa3ad8e 1#!/usr/bin/env perl
2c631ee0 2
d3b8a135 3# Read the raw memory data from Devel::Memory and process the tree
4# (as a stack, propagating data such as totals, up the tree).
5# Output completed nodes in the request formats.
d81c4128 6#
d3b8a135 7# Needs to be generalized to support pluggable output formats.
d81c4128 8# Actually it needs to be split so sizeme_store.pl only does the store
9# and another program drives the output with plugins.
d3b8a135 10# Making nodes into (lightweight fast) objects would be smart.
11# Tests would be even smarter!
12#
13# When working on this code it's important to have a sense of the flow.
14# Specifically the way that depth drives the completion of nodes.
15# It's a depth-first stream processing machine, which only ever holds
16# a single stack of the currently incomplete nodes, which is always the same as
17# the current depth. I.e., when a node of depth N arrives, all nodes >N are
18# popped off the stack and 'completed', each rippling data up to its parent.
19
2c631ee0 20use strict;
21use warnings;
5aa3ad8e 22use autodie;
2c631ee0 23
e8f4c506 24use DBI qw(looks_like_number);
b2fc39a5 25use DBD::SQLite;
f60f09e5 26use JSON::XS;
de73b186 27use Devel::Dwarn;
d3b8a135 28use HTML::Entities qw(encode_entities);;
5e2e22f3 29use Data::Dumper;
fc6614ee 30use Getopt::Long;
5e2e22f3 31use Carp qw(carp croak confess);
fc6614ee 32
de73b186 33# XXX import these from the XS code
34use constant NPtype_NAME => 0x01;
35use constant NPtype_LINK => 0x02;
36use constant NPtype_SV => 0x03;
37use constant NPtype_MAGIC => 0x04;
38use constant NPtype_OP => 0x05;
39
40use constant NPattr_LEAFSIZE => 0x00;
41use constant NPattr_NAME => 0x01;
42use constant NPattr_PADFAKE => 0x02;
43use constant NPattr_PADNAME => 0x03;
44use constant NPattr_PADTMP => 0x04;
45use constant NPattr_NOTE => 0x05;
68cafb30 46use constant NPattr_PRE_ATTR => 0x06;
09c6d3bb 47my @attr_type_name = (qw(size NAME PADFAKE my PADTMP NOTE PREATTR)); # XXX get from XS in some way
de73b186 48
49
fc6614ee 50GetOptions(
5aa3ad8e 51 'text!' => \my $opt_text,
52 'dot=s' => \my $opt_dot,
b2fc39a5 53 'db=s' => \my $opt_db,
e8f4c506 54 'verbose|v!' => \my $opt_verbose,
55 'debug|d!' => \my $opt_debug,
98128850 56 'showid!' => \my $opt_showid,
fc6614ee 57) or exit 1;
94fab3d1 58
5e2e22f3 59$| = 1 if $opt_debug;
0e977dbc 60my $run_size = 0;
61my $total_size = 0;
62
f60f09e5 63my $j = JSON::XS->new->ascii->pretty(0);
64
1915946b 65my ($dbh, $node_ins_sth);
66if ($opt_db) {
67 $dbh = DBI->connect("dbi:SQLite:dbname=$opt_db","","", {
68 RaiseError => 1, PrintError => 0, AutoCommit => 0
69 });
70 $dbh->do("PRAGMA synchronous = OFF");
1915946b 71}
b2fc39a5 72
2c631ee0 73my @stack;
74my %seqn2node;
75
7020702a 76my $dotnode = sub {
77 my $name = encode_entities(shift);
78 $name =~ s/"/\\"/g;
7020702a 79 return '"'.$name.'"';
80};
ee2793c1 81
ee2793c1 82
5aa3ad8e 83my $dot_fh;
ee2793c1 84
0741448c 85sub fmt_size {
86 my $size = shift;
87 my $kb = $size / 1024;
88 return $size if $kb < 5;
89 return sprintf "%.1fKb", $kb if $kb < 1000;
90 return sprintf "%.1fMb", $kb/1024;
91}
92
ee2793c1 93
94fab3d1 94sub enter_node {
95 my $x = shift;
09c6d3bb 96 warn ">> enter_node $x->{id}\n" if $opt_debug;
de73b186 97
98 my $parent = $stack[-1];
99 if ($parent) {
100
37836f2a 101 if ($x->{name} eq 'AVelem' and $parent->{name} eq 'SV(PVAV)') {
09c6d3bb 102 my $index = $x->{attr}{+NPattr_NOTE}{i};
eda23e24 103 #Dwarn $x->{attr};
104 #Dwarn $index;
68cafb30 105 # If node is an AVelem of a CvPADLIST propagate pad name to AVelem
106 if (@stack >= 4 and (my $cvpl = $stack[-4])->{name} eq 'CvPADLIST') {
68cafb30 107 my $padnames = $cvpl->{_cached}{padnames} ||= do {
108 my @names = @{ $cvpl->{attr}{+NPattr_PADNAME} || []};
109 $_ = "my(".($_||'').")" for @names;
110 $names[0] = '@_';
111 \@names;
112 };
fd0de560 113 $x->{name} = (defined $index and $padnames->[$index]) || "?";
68cafb30 114 $x->{name} =~ s/my\(SVs_PADTMP\)/PADTMP/; # XXX hack for neatness
115 }
116 else {
fd0de560 117 $x->{name} = "[$index]" if defined $index;
de73b186 118 }
119 }
09c6d3bb 120
ee2793c1 121 }
de73b186 122
123 return $x;
94fab3d1 124}
125
de73b186 126
94fab3d1 127sub leave_node {
128 my $x = shift;
5e2e22f3 129 confess unless defined $x->{id};
09c6d3bb 130 warn "<< leave_node $x->{id}\n" if $opt_debug;
b2fc39a5 131 delete $seqn2node{$x->{id}};
ee2793c1 132
d3b8a135 133 my $self_size = 0; $self_size += $_ for values %{$x->{leaves}};
94fab3d1 134 $x->{self_size} = $self_size;
ee2793c1 135
09c6d3bb 136 if ($x->{name} eq 'AVelem') {
137 my $index = $x->{attr}{+NPattr_NOTE}{i};
138 $x->{name} = "[$index]" if defined $index;
139 }
140
ee2793c1 141 my $parent = $stack[-1];
142 if ($parent) {
2c631ee0 143 # link to parent
5a78486c 144 $x->{parent_id} = $parent->{id};
2c631ee0 145 # accumulate into parent
146 $parent->{kids_node_count} += 1 + ($x->{kids_node_count}||0);
94fab3d1 147 $parent->{kids_size} += $self_size + $x->{kids_size};
5a78486c 148 push @{$parent->{child_id}}, $x->{id};
2c631ee0 149 }
d81c4128 150 else {
151 $x->{kids_node_count} ||= 0;
152 }
de73b186 153
2c631ee0 154 # output
155 # ...
ee2793c1 156 if ($opt_dot) {
1915946b 157 printf "// n%d parent=%s(type=%s)\n", $x->{id},
0741448c 158 $parent ? $parent->{id} : "",
159 $parent ? $parent->{type} : ""
160 if 0;
09c6d3bb 161
de73b186 162 if ($x->{type} != NPtype_LINK) {
0741448c 163 my $name = $x->{title} ? "\"$x->{title}\" $x->{name}" : $x->{name};
164
165 if ($x->{kids_size}) {
166 $name .= sprintf " %s+%s=%s", fmt_size($x->{self_size}), fmt_size($x->{kids_size}), fmt_size($x->{self_size}+$x->{kids_size});
167 }
168 else {
169 $name .= sprintf " +%s", fmt_size($x->{self_size});
170 }
f91a90e9 171 $name .= " #$x->{id}" if $opt_showid;
0741448c 172
173 my @node_attr = (
174 sprintf("label=%s", $dotnode->($name)),
175 "id=$x->{id}",
176 );
09c6d3bb 177 printf $dot_fh qq{n%d [ %s ];\n}, $x->{id}, join(",", @node_attr);
178 }
179 else { # NPtype_LINK
d81c4128 180 my @kids = @{$x->{child_id}||[]};
09c6d3bb 181 die "panic: NPtype_LINK has more than one child: @kids"
182 if @kids > 1;
183 for my $child_id (@kids) { # wouldn't work right, eg id= attr
184 #die Dwarn $x;
185 my @link_attr = ("id=$x->{id}");
186 (my $link_name = $x->{name}) =~ s/->$//;
d81c4128 187 $link_name .= " #$x->{id}" if $opt_showid;
09c6d3bb 188 push @link_attr, (sprintf "label=%s", $dotnode->($link_name));
5aa3ad8e 189 printf $dot_fh qq{n%d -> n%d [%s];\n},
09c6d3bb 190 $x->{parent_id}, $child_id, join(",", @link_attr);
1915946b 191 }
1915946b 192 }
193
ee2793c1 194 }
b2fc39a5 195 if ($dbh) {
f60f09e5 196 my $attr_json = $j->encode($x->{attr});
e78b28ca 197 my $leaves_json = $j->encode($x->{leaves});
b2fc39a5 198 $node_ins_sth->execute(
98128850 199 $x->{id}, $x->{name}, $x->{title}, $x->{type}, $x->{depth}, $x->{parent_id},
b2fc39a5 200 $x->{self_size}, $x->{kids_size}, $x->{kids_node_count},
f60f09e5 201 $x->{child_id} ? join(",", @{$x->{child_id}}) : undef,
e78b28ca 202 $attr_json, $leaves_json,
b2fc39a5 203 );
204 # XXX attribs
205 }
d3b8a135 206
207 return $x;
2c631ee0 208}
209
5aa3ad8e 210my $indent = ": ";
94fab3d1 211
2c631ee0 212while (<>) {
d81c4128 213 warn "\t\t\t\t== $_" if $opt_debug;
2c631ee0 214 chomp;
de73b186 215
b2fc39a5 216 my ($type, $id, $val, $name, $extra) = split / /, $_, 5;
de73b186 217
ee2793c1 218 if ($type =~ s/^-//) { # Node type ($val is depth)
d3b8a135 219
65b2cf7d 220 printf "%s%s%s %s [#%d @%d]\n", $indent x $val, $name,
221 ($type == NPtype_LINK) ? "->" : "",
222 $extra||'', $id, $val
5aa3ad8e 223 if $opt_text;
d3b8a135 224
225 # this is the core driving logic
2c631ee0 226 while ($val < @stack) {
d3b8a135 227 my $x = leave_node(pop @stack);
e8f4c506 228 warn "N $id d$val ends $x->{id} d$x->{depth}: size $x->{self_size}+$x->{kids_size}\n"
229 if $opt_verbose;
2c631ee0 230 }
c5078bcb 231 die "panic: stack already has item at depth $val"
232 if $stack[$val];
5e2e22f3 233 die "Depth out of sync\n" if $val != @stack;
de73b186 234 my $node = enter_node({
235 id => $id, type => $type, name => $name, extra => $extra,
957d176b 236 attr => { }, leaves => {}, depth => $val, self_size=>0, kids_size=>0
de73b186 237 });
238 $stack[$val] = $node;
b2fc39a5 239 $seqn2node{$id} = $node;
2c631ee0 240 }
d3b8a135 241
de73b186 242 # --- Leaf name and memory size
243 elsif ($type eq "L") {
b2fc39a5 244 my $node = $seqn2node{$id} || die;
2c631ee0 245 $node->{leaves}{$name} += $val;
0e977dbc 246 $run_size += $val;
247 printf "%s+%d=%d %s\n", $indent x ($node->{depth}+1), $val, $run_size, $name
5aa3ad8e 248 if $opt_text;
2c631ee0 249 }
d3b8a135 250
251 # --- Attribute type, name and value (all rather hackish)
de73b186 252 elsif (looks_like_number($type)) {
b2fc39a5 253 my $node = $seqn2node{$id} || die;
e8f4c506 254 my $attr = $node->{attr} || die;
de73b186 255
68cafb30 256 # attributes where the string is a key (or always empty and the type is the key)
957d176b 257 if ($type == NPattr_NAME or $type == NPattr_NOTE) {
de73b186 258 printf "%s~%s(%s) %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
259 if $opt_text;
e8f4c506 260 warn "Node $id already has attribute $type:$name (value $attr->{$type}{$name})\n"
261 if exists $attr->{$type}{$name};
09c6d3bb 262 $attr->{$type}{$name} = $val;
eda23e24 263 #Dwarn $attr;
09c6d3bb 264 $node->{title} = $name if $type == NPattr_NAME and !$val; # XXX hack
e8f4c506 265 }
68cafb30 266 # attributes where the number is a key (or always zero)
de73b186 267 elsif (NPattr_PADFAKE==$type or NPattr_PADTMP==$type or NPattr_PADNAME==$type) {
268 printf "%s~%s('%s') %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
269 if $opt_text;
e8f4c506 270 warn "Node $id already has attribute $type:$name (value $attr->{$type}[$val])\n"
271 if defined $attr->{$type}[$val];
de73b186 272 $attr->{+NPattr_PADNAME}[$val] = $name; # store all as NPattr_PADNAME
e8f4c506 273 }
274 else {
de73b186 275 printf "%s~%s %d [t%d]\n", $indent x ($node->{depth}+1), $name, $val, $type
276 if $opt_text;
e8f4c506 277 warn "Invalid attribute type '$type' on line $. ($_)";
278 }
2c631ee0 279 }
5e2e22f3 280 elsif ($type eq 'S') { # start of a run
281 die "Unexpected start token" if @stack;
282 if ($opt_dot) {
283 open $dot_fh, ">$opt_dot";
284 print $dot_fh "digraph {\n"; # }
285 print $dot_fh "graph [overlap=false]\n"; # target="???", URL="???"
286 }
287 if ($dbh) {
288 # XXX add a size_run table records each run
289 # XXX pick a table name to store the run nodes in
290 #$run_ins_sth->execute(
291 my $table = "node";
292 $dbh->do("DROP TABLE IF EXISTS $table");
293 $dbh->do(qq{
294 CREATE TABLE $table (
295 id integer primary key,
296 name text,
297 title text,
298 type integer,
299 depth integer,
300 parent_id integer,
301
302 self_size integer,
303 kids_size integer,
304 kids_node_count integer,
305 child_ids text,
306 attr_json text,
307 leaves_json text
308 )
309 });
310 $node_ins_sth = $dbh->prepare(qq{
311 INSERT INTO $table VALUES (?,?,?,?,?,?, ?,?,?,?,?,?)
312 });
313 }
314 }
315 elsif ($type eq 'E') { # end of a run
316
317 my $top = $stack[0]; # grab top node before we pop all the nodes
318 leave_node(pop @stack) while @stack;
c99e1fe8 319
eda23e24 320 # if nothing output (ie size(undef))
321 $top ||= { self_size=>0, kids_size=>0, kids_node_count=>0 };
322
5e2e22f3 323 my $top_size = $top->{self_size}+$top->{kids_size};
324
eda23e24 325 printf "Stored %d nodes totalling %s [lines=%d size=%d write=%.2fs]\n",
326 1+$top->{kids_node_count}, fmt_size($top_size),
327 $., $top_size, $val;
c99e1fe8 328 # the duration here ($val) is from Devel::SizeMe perspective
329 # ie doesn't include time to read file/pipe and commit to database.
5e2e22f3 330
331 if ($opt_verbose or $run_size != $top_size) {
332 warn "EOF ends $top->{id} d$top->{depth}: size $top->{self_size}+$top->{kids_size}\n";
333 warn Dumper($top);
334 }
335 die "panic: seqn2node should be empty ". Dumper(\%seqn2node)
336 if %seqn2node;
5e2e22f3 337
338 if ($dot_fh) {
339 print $dot_fh "}\n";
340 close $dot_fh;
341 system("open -a Graphviz $opt_dot") if $^O eq 'darwin'; # OSX
342 }
343
344 $dbh->commit if $dbh;
345 }
2c631ee0 346 else {
347 warn "Invalid type '$type' on line $. ($_)";
e8f4c506 348 next;
2c631ee0 349 }
d3b8a135 350
b2fc39a5 351 $dbh->commit if $dbh and $id % 10_000 == 0;
2c631ee0 352}
5e2e22f3 353die "EOF without end token" if @stack;
b2fc39a5 354
2c631ee0 355
d3b8a135 356=for This is out of date but gives you an idea of the data and stream
357
2c631ee0 358SV(PVAV) fill=1/1 [#1 @0]
359: +64 sv =64
360: +16 av_max =80
361: AVelem-> [#2 @1]
362: : SV(RV) [#3 @2]
363: : : +24 sv =104
364: : : RV-> [#4 @3]
365: : : : SV(PVAV) fill=-1/-1 [#5 @4]
366: : : : : +64 sv =168
367: AVelem-> [#6 @1]
368: : SV(IV) [#7 @2]
369: : : +24 sv =192
370192 at -e line 1.
371=cut
372__DATA__
373N 1 0 SV(PVAV) fill=1/1
374L 1 64 sv
375L 1 16 av_max
376N 2 1 AVelem->
377N 3 2 SV(RV)
378L 3 24 sv
379N 4 3 RV->
380N 5 4 SV(PVAV) fill=-1/-1
381L 5 64 sv
382N 6 1 AVelem->
383N 7 2 SV(IV)
384L 7 24 sv