First steps migrating to SizeMe
[p5sagit/Devel-Size.git] / bin / sizeme_store.pl
CommitLineData
5aa3ad8e 1#!/usr/bin/env perl
2c631ee0 2
d3b8a135 3# Read the raw memory data from Devel::Memory and process the tree
4# (as a stack, propagating data such as totals, up the tree).
5# Output completed nodes in the request formats.
6# Needs to be generalized to support pluggable output formats.
7# Making nodes into (lightweight fast) objects would be smart.
8# Tests would be even smarter!
9#
10# When working on this code it's important to have a sense of the flow.
11# Specifically the way that depth drives the completion of nodes.
12# It's a depth-first stream processing machine, which only ever holds
13# a single stack of the currently incomplete nodes, which is always the same as
14# the current depth. I.e., when a node of depth N arrives, all nodes >N are
15# popped off the stack and 'completed', each rippling data up to its parent.
16
2c631ee0 17use strict;
18use warnings;
5aa3ad8e 19use autodie;
2c631ee0 20
e8f4c506 21use DBI qw(looks_like_number);
b2fc39a5 22use DBD::SQLite;
f60f09e5 23use JSON::XS;
de73b186 24use Devel::Dwarn;
d3b8a135 25use HTML::Entities qw(encode_entities);;
b2fc39a5 26
fc6614ee 27use Getopt::Long;
28
de73b186 29# XXX import these from the XS code
30use constant NPtype_NAME => 0x01;
31use constant NPtype_LINK => 0x02;
32use constant NPtype_SV => 0x03;
33use constant NPtype_MAGIC => 0x04;
34use constant NPtype_OP => 0x05;
35
36use constant NPattr_LEAFSIZE => 0x00;
37use constant NPattr_NAME => 0x01;
38use constant NPattr_PADFAKE => 0x02;
39use constant NPattr_PADNAME => 0x03;
40use constant NPattr_PADTMP => 0x04;
41use constant NPattr_NOTE => 0x05;
68cafb30 42use constant NPattr_PRE_ATTR => 0x06;
de73b186 43
44
fc6614ee 45GetOptions(
5aa3ad8e 46 'text!' => \my $opt_text,
47 'dot=s' => \my $opt_dot,
b2fc39a5 48 'db=s' => \my $opt_db,
e8f4c506 49 'verbose|v!' => \my $opt_verbose,
50 'debug|d!' => \my $opt_debug,
98128850 51 'showid!' => \my $opt_showid,
fc6614ee 52) or exit 1;
94fab3d1 53
0e977dbc 54my $run_size = 0;
55my $total_size = 0;
56
f60f09e5 57my $j = JSON::XS->new->ascii->pretty(0);
58
1915946b 59my ($dbh, $node_ins_sth);
60if ($opt_db) {
61 $dbh = DBI->connect("dbi:SQLite:dbname=$opt_db","","", {
62 RaiseError => 1, PrintError => 0, AutoCommit => 0
63 });
64 $dbh->do("PRAGMA synchronous = OFF");
65 $dbh->do("DROP TABLE IF EXISTS node");
66 $dbh->do(q{
67 CREATE TABLE node (
68 id integer primary key,
69 name text,
70 title text,
98128850 71 type integer,
1915946b 72 depth integer,
73 parent_id integer,
74
75 self_size integer,
76 kids_size integer,
77 kids_node_count integer,
78 child_ids text,
79 attr_json text,
80 leaves_json text
81 )
82 });
83 $node_ins_sth = $dbh->prepare(q{
98128850 84 INSERT INTO node VALUES (?,?,?,?,?,?, ?,?,?,?,?,?)
1915946b 85 });
86}
b2fc39a5 87
2c631ee0 88my @stack;
89my %seqn2node;
90
7020702a 91my $dotnode = sub {
92 my $name = encode_entities(shift);
93 $name =~ s/"/\\"/g;
7020702a 94 return '"'.$name.'"';
95};
ee2793c1 96
ee2793c1 97
5aa3ad8e 98my $dot_fh;
ee2793c1 99if ($opt_dot) {
5aa3ad8e 100 open $dot_fh, ">$opt_dot";
101 print $dot_fh "digraph {\n"; # }
102 print $dot_fh "graph [overlap=false]\n"; # target="???", URL="???"
ee2793c1 103}
104
0741448c 105sub fmt_size {
106 my $size = shift;
107 my $kb = $size / 1024;
108 return $size if $kb < 5;
109 return sprintf "%.1fKb", $kb if $kb < 1000;
110 return sprintf "%.1fMb", $kb/1024;
111}
112
ee2793c1 113
94fab3d1 114sub enter_node {
115 my $x = shift;
de73b186 116
117 my $parent = $stack[-1];
118 if ($parent) {
119
37836f2a 120 if ($x->{name} eq 'AVelem' and $parent->{name} eq 'SV(PVAV)') {
68cafb30 121 my $index = $x->{attr}{index};
122 # If node is an AVelem of a CvPADLIST propagate pad name to AVelem
123 if (@stack >= 4 and (my $cvpl = $stack[-4])->{name} eq 'CvPADLIST') {
68cafb30 124 my $padnames = $cvpl->{_cached}{padnames} ||= do {
125 my @names = @{ $cvpl->{attr}{+NPattr_PADNAME} || []};
126 $_ = "my(".($_||'').")" for @names;
127 $names[0] = '@_';
128 \@names;
129 };
68cafb30 130 $x->{name} = $padnames->[$index] || "?";
131 $x->{name} =~ s/my\(SVs_PADTMP\)/PADTMP/; # XXX hack for neatness
132 }
133 else {
134 $x->{name} = "[$index]";
de73b186 135 }
136 }
ee2793c1 137 }
de73b186 138
139 return $x;
94fab3d1 140}
141
de73b186 142
94fab3d1 143sub leave_node {
144 my $x = shift;
b2fc39a5 145 delete $seqn2node{$x->{id}};
ee2793c1 146
d3b8a135 147 my $self_size = 0; $self_size += $_ for values %{$x->{leaves}};
94fab3d1 148 $x->{self_size} = $self_size;
ee2793c1 149
150 my $parent = $stack[-1];
151 if ($parent) {
2c631ee0 152 # link to parent
5a78486c 153 $x->{parent_id} = $parent->{id};
2c631ee0 154 # accumulate into parent
155 $parent->{kids_node_count} += 1 + ($x->{kids_node_count}||0);
94fab3d1 156 $parent->{kids_size} += $self_size + $x->{kids_size};
5a78486c 157 push @{$parent->{child_id}}, $x->{id};
2c631ee0 158 }
de73b186 159
2c631ee0 160 # output
161 # ...
ee2793c1 162 if ($opt_dot) {
1915946b 163 printf "// n%d parent=%s(type=%s)\n", $x->{id},
0741448c 164 $parent ? $parent->{id} : "",
165 $parent ? $parent->{type} : ""
166 if 0;
de73b186 167 if ($x->{type} != NPtype_LINK) {
0741448c 168 my $name = $x->{title} ? "\"$x->{title}\" $x->{name}" : $x->{name};
169
170 if ($x->{kids_size}) {
171 $name .= sprintf " %s+%s=%s", fmt_size($x->{self_size}), fmt_size($x->{kids_size}), fmt_size($x->{self_size}+$x->{kids_size});
172 }
173 else {
174 $name .= sprintf " +%s", fmt_size($x->{self_size});
175 }
98128850 176 $name .= " $x->{id}" if $opt_showid;
0741448c 177
178 my @node_attr = (
179 sprintf("label=%s", $dotnode->($name)),
180 "id=$x->{id}",
181 );
182 my @link_attr;
183 #if ($x->{name} eq 'hek') { push @node_attr, "shape=point"; push @node_attr, "labelfontsize=6"; }
1915946b 184 if ($parent) { # probably a link
1915946b 185 my $parent_id = $parent->{id};
0741448c 186 my @link_attr = ("id=$parent_id");
de73b186 187 if ($parent->{type} == NPtype_LINK) { # link
1915946b 188 (my $link_name = $parent->{name}) =~ s/->$//;
189 push @link_attr, (sprintf "label=%s", $dotnode->($link_name));
190 $parent_id = ($stack[-2]||die "panic")->{id};
191 }
5aa3ad8e 192 printf $dot_fh qq{n%d -> n%d [%s];\n},
1915946b 193 $parent_id, $x->{id}, join(",", @link_attr);
194 }
5aa3ad8e 195 printf $dot_fh qq{n%d [ %s ];\n}, $x->{id}, join(",", @node_attr);
1915946b 196 }
197
ee2793c1 198 }
b2fc39a5 199 if ($dbh) {
f60f09e5 200 my $attr_json = $j->encode($x->{attr});
e78b28ca 201 my $leaves_json = $j->encode($x->{leaves});
b2fc39a5 202 $node_ins_sth->execute(
98128850 203 $x->{id}, $x->{name}, $x->{title}, $x->{type}, $x->{depth}, $x->{parent_id},
b2fc39a5 204 $x->{self_size}, $x->{kids_size}, $x->{kids_node_count},
f60f09e5 205 $x->{child_id} ? join(",", @{$x->{child_id}}) : undef,
e78b28ca 206 $attr_json, $leaves_json,
b2fc39a5 207 );
208 # XXX attribs
209 }
d3b8a135 210
211 return $x;
2c631ee0 212}
213
5aa3ad8e 214my $indent = ": ";
d3b8a135 215my @attr_type_name = (qw(size NAME PADFAKE my PADTMP NOTE)); # XXX get from XS in some way
68cafb30 216my $pending_pre_attr = {};
94fab3d1 217
2c631ee0 218while (<>) {
219 chomp;
de73b186 220
b2fc39a5 221 my ($type, $id, $val, $name, $extra) = split / /, $_, 5;
de73b186 222
ee2793c1 223 if ($type =~ s/^-//) { # Node type ($val is depth)
d3b8a135 224
65b2cf7d 225 printf "%s%s%s %s [#%d @%d]\n", $indent x $val, $name,
226 ($type == NPtype_LINK) ? "->" : "",
227 $extra||'', $id, $val
5aa3ad8e 228 if $opt_text;
d3b8a135 229
230 # this is the core driving logic
2c631ee0 231 while ($val < @stack) {
d3b8a135 232 my $x = leave_node(pop @stack);
e8f4c506 233 warn "N $id d$val ends $x->{id} d$x->{depth}: size $x->{self_size}+$x->{kids_size}\n"
234 if $opt_verbose;
2c631ee0 235 }
c5078bcb 236 die "panic: stack already has item at depth $val"
237 if $stack[$val];
de73b186 238 my $node = enter_node({
239 id => $id, type => $type, name => $name, extra => $extra,
68cafb30 240 attr => { %$pending_pre_attr },
241 leaves => {}, depth => $val, self_size=>0, kids_size=>0
de73b186 242 });
68cafb30 243 %$pending_pre_attr = ();
de73b186 244 $stack[$val] = $node;
b2fc39a5 245 $seqn2node{$id} = $node;
2c631ee0 246 }
d3b8a135 247
de73b186 248 # --- Leaf name and memory size
249 elsif ($type eq "L") {
b2fc39a5 250 my $node = $seqn2node{$id} || die;
2c631ee0 251 $node->{leaves}{$name} += $val;
0e977dbc 252 $run_size += $val;
253 printf "%s+%d=%d %s\n", $indent x ($node->{depth}+1), $val, $run_size, $name
5aa3ad8e 254 if $opt_text;
2c631ee0 255 }
d3b8a135 256
257 # --- Attribute type, name and value (all rather hackish)
de73b186 258 elsif (looks_like_number($type)) {
b2fc39a5 259 my $node = $seqn2node{$id} || die;
e8f4c506 260 my $attr = $node->{attr} || die;
de73b186 261
68cafb30 262 # attributes to queue up and apply to the next node
263 if (NPattr_PRE_ATTR == $type) {
264 $pending_pre_attr->{$name} = $val;
265 }
266 # attributes where the string is a key (or always empty and the type is the key)
267 elsif ($type == NPattr_NAME or $type == NPattr_NOTE) {
de73b186 268 printf "%s~%s(%s) %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
269 if $opt_text;
e8f4c506 270 warn "Node $id already has attribute $type:$name (value $attr->{$type}{$name})\n"
271 if exists $attr->{$type}{$name};
272 $attr->{$type}{$name} = $val || $id;
e8f4c506 273 $node->{title} = $name if $type == 1 and !$val;
274 }
68cafb30 275 # attributes where the number is a key (or always zero)
de73b186 276 elsif (NPattr_PADFAKE==$type or NPattr_PADTMP==$type or NPattr_PADNAME==$type) {
277 printf "%s~%s('%s') %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
278 if $opt_text;
e8f4c506 279 warn "Node $id already has attribute $type:$name (value $attr->{$type}[$val])\n"
280 if defined $attr->{$type}[$val];
de73b186 281 $attr->{+NPattr_PADNAME}[$val] = $name; # store all as NPattr_PADNAME
e8f4c506 282 }
283 else {
de73b186 284 printf "%s~%s %d [t%d]\n", $indent x ($node->{depth}+1), $name, $val, $type
285 if $opt_text;
e8f4c506 286 warn "Invalid attribute type '$type' on line $. ($_)";
287 }
2c631ee0 288 }
289 else {
290 warn "Invalid type '$type' on line $. ($_)";
e8f4c506 291 next;
2c631ee0 292 }
d3b8a135 293
b2fc39a5 294 $dbh->commit if $dbh and $id % 10_000 == 0;
2c631ee0 295}
c5078bcb 296my $top = $stack[0]; # grab top node before we pop all the nodes
297leave_node(pop @stack) while @stack;
0e977dbc 298my $top_size = $top->{self_size}+$top->{kids_size};
299
300printf "Stored %d nodes recording %s (%d)\n",
301 $top->{kids_node_count}, fmt_size($top_size), $top_size;
d3b8a135 302
0e977dbc 303if ($opt_verbose or $run_size != $top_size) {
d3b8a135 304 warn "EOF ends $top->{id} d$top->{depth}: size $top->{self_size}+$top->{kids_size}\n";
305 warn Dumper($top);
306}
5aa3ad8e 307
308if ($dot_fh) {
309 print $dot_fh "}\n";
310 close $dot_fh;
55e6dd93 311 system("open -a Graphviz $opt_dot") if $^O eq 'darwin'; # OSX
2c631ee0 312}
94fab3d1 313
b2fc39a5 314$dbh->commit if $dbh;
315
2c631ee0 316use Data::Dumper;
5aa3ad8e 317warn Dumper(\%seqn2node) if %seqn2node; # should be empty
2c631ee0 318
d3b8a135 319=for This is out of date but gives you an idea of the data and stream
320
2c631ee0 321SV(PVAV) fill=1/1 [#1 @0]
322: +64 sv =64
323: +16 av_max =80
324: AVelem-> [#2 @1]
325: : SV(RV) [#3 @2]
326: : : +24 sv =104
327: : : RV-> [#4 @3]
328: : : : SV(PVAV) fill=-1/-1 [#5 @4]
329: : : : : +64 sv =168
330: AVelem-> [#6 @1]
331: : SV(IV) [#7 @2]
332: : : +24 sv =192
333192 at -e line 1.
334=cut
335__DATA__
336N 1 0 SV(PVAV) fill=1/1
337L 1 64 sv
338L 1 16 av_max
339N 2 1 AVelem->
340N 3 2 SV(RV)
341L 3 24 sv
342N 4 3 RV->
343N 5 4 SV(PVAV) fill=-1/-1
344L 5 64 sv
345N 6 1 AVelem->
346N 7 2 SV(IV)
347L 7 24 sv