5 sizeme_store.pl - process and store the raw data stream from Devel::SizeMe
9 sizeme_store.pl [--text] [--dot=sizeme.dot] [--db=sizeme.db]
11 Typically used via the C<SIZEME> env var:
13 export SIZEME='|./sizeme_store.pl --text'
14 export SIZEME='|./sizeme_store.pl --dot=sizeme.dot'
15 export SIZEME='|./sizeme_store.pl --db=sizeme.db'
19 Reads the raw memory data from Devel::SizeMe and processes the tree
20 via a stack, propagating data such as totals, up the tree nodes.
21 Output completed nodes in the request formats.
23 The --text output is similar to the textual representation output by the module
24 when the SIZEME env var is set to an empty string.
26 The --dot output is suitable for feeding to Graphviz. (On OSX the Graphviz
27 application will be started automatically.)
29 The --db output is a SQLite database. The db schema is very subject to change.
30 This output is destined to be the primary one. The other output types will
31 probably become separate programs that read the db.
35 # Needs to be generalized to support pluggable output formats.
36 # Actually it needs to be split so sizeme_store.pl only does the store
37 # and another program drives the output with plugins.
38 # Making nodes into (lightweight fast) objects would be smart.
39 # Tests would be even smarter!
41 # When working on this code it's important to have a sense of the flow.
42 # Specifically the way that depth drives the completion of nodes.
43 # It's a depth-first stream processing machine, which only ever holds
44 # a single stack of the currently incomplete nodes, which is always the same as
45 # the current depth. I.e., when a node of depth N arrives, all nodes >N are
46 # popped off the stack and 'completed', each rippling data up to its parent.
52 use DBI qw(looks_like_number);
56 use HTML::Entities qw(encode_entities);;
59 use Carp qw(carp croak confess);
61 # XXX import these from the XS code
62 use constant NPtype_NAME => 0x01;
63 use constant NPtype_LINK => 0x02;
64 use constant NPtype_SV => 0x03;
65 use constant NPtype_MAGIC => 0x04;
66 use constant NPtype_OP => 0x05;
68 use constant NPattr_LEAFSIZE => 0x00;
69 use constant NPattr_NAME => 0x01;
70 use constant NPattr_PADFAKE => 0x02;
71 use constant NPattr_PADNAME => 0x03;
72 use constant NPattr_PADTMP => 0x04;
73 use constant NPattr_NOTE => 0x05;
74 use constant NPattr_PRE_ATTR => 0x06;
75 my @attr_type_name = (qw(size NAME PADFAKE my PADTMP NOTE PREATTR)); # XXX get from XS in some way
79 'text!' => \my $opt_text,
80 'dot=s' => \my $opt_dot,
81 'db=s' => \my $opt_db,
82 'verbose|v!' => \my $opt_verbose,
83 'debug|d!' => \my $opt_debug,
84 'showid!' => \my $opt_showid,
91 my $j = JSON::XS->new->ascii->pretty(0);
93 my ($dbh, $node_ins_sth);
95 $dbh = DBI->connect("dbi:SQLite:dbname=$opt_db","","", {
96 RaiseError => 1, PrintError => 0, AutoCommit => 0
98 $dbh->do("PRAGMA synchronous = OFF");
105 my $name = encode_entities(shift);
107 return '"'.$name.'"';
115 my $kb = $size / 1024;
116 return $size if $kb < 5;
117 return sprintf "%.1fKb", $kb if $kb < 1000;
118 return sprintf "%.1fMb", $kb/1024;
124 warn ">> enter_node $x->{id}\n" if $opt_debug;
126 my $parent = $stack[-1];
129 if ($x->{name} eq 'AVelem' and $parent->{name} eq 'SV(PVAV)') {
130 my $index = $x->{attr}{+NPattr_NOTE}{i};
133 # If node is an AVelem of a CvPADLIST propagate pad name to AVelem
134 if (@stack >= 4 and (my $cvpl = $stack[-4])->{name} eq 'CvPADLIST') {
135 my $padnames = $cvpl->{_cached}{padnames} ||= do {
136 my @names = @{ $cvpl->{attr}{+NPattr_PADNAME} || []};
137 $_ = "my(".($_||'').")" for @names;
141 $x->{name} = (defined $index and $padnames->[$index]) || "?";
142 $x->{name} =~ s/my\(SVs_PADTMP\)/PADTMP/; # XXX hack for neatness
145 $x->{name} = "[$index]" if defined $index;
157 confess unless defined $x->{id};
158 warn "<< leave_node $x->{id}\n" if $opt_debug;
159 delete $seqn2node{$x->{id}};
161 my $self_size = 0; $self_size += $_ for values %{$x->{leaves}};
162 $x->{self_size} = $self_size;
164 if ($x->{name} eq 'AVelem') {
165 my $index = $x->{attr}{+NPattr_NOTE}{i};
166 $x->{name} = "[$index]" if defined $index;
169 my $parent = $stack[-1];
172 $x->{parent_id} = $parent->{id};
173 # accumulate into parent
174 $parent->{kids_node_count} += 1 + ($x->{kids_node_count}||0);
175 $parent->{kids_size} += $self_size + $x->{kids_size};
176 push @{$parent->{child_id}}, $x->{id};
179 $x->{kids_node_count} ||= 0;
185 printf "// n%d parent=%s(type=%s)\n", $x->{id},
186 $parent ? $parent->{id} : "",
187 $parent ? $parent->{type} : ""
190 if ($x->{type} != NPtype_LINK) {
191 my $name = $x->{title} ? "\"$x->{title}\" $x->{name}" : $x->{name};
193 if ($x->{kids_size}) {
194 $name .= sprintf " %s+%s=%s", fmt_size($x->{self_size}), fmt_size($x->{kids_size}), fmt_size($x->{self_size}+$x->{kids_size});
197 $name .= sprintf " +%s", fmt_size($x->{self_size});
199 $name .= " #$x->{id}" if $opt_showid;
202 sprintf("label=%s", $dotnode->($name)),
205 printf $dot_fh qq{n%d [ %s ];\n}, $x->{id}, join(",", @node_attr);
208 my @kids = @{$x->{child_id}||[]};
209 die "panic: NPtype_LINK has more than one child: @kids"
211 for my $child_id (@kids) { # wouldn't work right, eg id= attr
213 my @link_attr = ("id=$x->{id}");
214 (my $link_name = $x->{name}) =~ s/->$//;
215 $link_name .= " #$x->{id}" if $opt_showid;
216 push @link_attr, (sprintf "label=%s", $dotnode->($link_name));
217 printf $dot_fh qq{n%d -> n%d [%s];\n},
218 $x->{parent_id}, $child_id, join(",", @link_attr);
224 my $attr_json = $j->encode($x->{attr});
225 my $leaves_json = $j->encode($x->{leaves});
226 $node_ins_sth->execute(
227 $x->{id}, $x->{name}, $x->{title}, $x->{type}, $x->{depth}, $x->{parent_id},
228 $x->{self_size}, $x->{kids_size}, $x->{kids_node_count},
229 $x->{child_id} ? join(",", @{$x->{child_id}}) : undef,
230 $attr_json, $leaves_json,
241 warn "\t\t\t\t== $_" if $opt_debug;
244 my ($type, $id, $val, $name, $extra) = split / /, $_, 5;
246 if ($type =~ s/^-//) { # Node type ($val is depth)
248 printf "%s%s%s %s [#%d @%d]\n", $indent x $val, $name,
249 ($type == NPtype_LINK) ? "->" : "",
250 $extra||'', $id, $val
253 # this is the core driving logic
254 while ($val < @stack) {
255 my $x = leave_node(pop @stack);
256 warn "N $id d$val ends $x->{id} d$x->{depth}: size $x->{self_size}+$x->{kids_size}\n"
259 die "panic: stack already has item at depth $val"
261 die "Depth out of sync\n" if $val != @stack;
262 my $node = enter_node({
263 id => $id, type => $type, name => $name, extra => $extra,
264 attr => { }, leaves => {}, depth => $val, self_size=>0, kids_size=>0
266 $stack[$val] = $node;
267 $seqn2node{$id} = $node;
270 # --- Leaf name and memory size
271 elsif ($type eq "L") {
272 my $node = $seqn2node{$id} || die;
273 $node->{leaves}{$name} += $val;
275 printf "%s+%d=%d %s\n", $indent x ($node->{depth}+1), $val, $run_size, $name
279 # --- Attribute type, name and value (all rather hackish)
280 elsif (looks_like_number($type)) {
281 my $node = $seqn2node{$id} || die;
282 my $attr = $node->{attr} || die;
284 # attributes where the string is a key (or always empty and the type is the key)
285 if ($type == NPattr_NAME or $type == NPattr_NOTE) {
286 printf "%s~%s(%s) %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
288 warn "Node $id already has attribute $type:$name (value $attr->{$type}{$name})\n"
289 if exists $attr->{$type}{$name};
290 $attr->{$type}{$name} = $val;
292 $node->{title} = $name if $type == NPattr_NAME and !$val; # XXX hack
294 # attributes where the number is a key (or always zero)
295 elsif (NPattr_PADFAKE==$type or NPattr_PADTMP==$type or NPattr_PADNAME==$type) {
296 printf "%s~%s('%s') %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
298 warn "Node $id already has attribute $type:$name (value $attr->{$type}[$val])\n"
299 if defined $attr->{$type}[$val];
300 $attr->{+NPattr_PADNAME}[$val] = $name; # store all as NPattr_PADNAME
303 printf "%s~%s %d [t%d]\n", $indent x ($node->{depth}+1), $name, $val, $type
305 warn "Invalid attribute type '$type' on line $. ($_)";
308 elsif ($type eq 'S') { # start of a run
309 die "Unexpected start token" if @stack;
311 open $dot_fh, ">$opt_dot";
312 print $dot_fh "digraph {\n"; # }
313 print $dot_fh "graph [overlap=false]\n"; # target="???", URL="???"
316 # XXX add a size_run table records each run
317 # XXX pick a table name to store the run nodes in
318 #$run_ins_sth->execute(
320 $dbh->do("DROP TABLE IF EXISTS $table");
322 CREATE TABLE $table (
323 id integer primary key,
332 kids_node_count integer,
338 $node_ins_sth = $dbh->prepare(qq{
339 INSERT INTO $table VALUES (?,?,?,?,?,?, ?,?,?,?,?,?)
343 elsif ($type eq 'E') { # end of a run
345 my $top = $stack[0]; # grab top node before we pop all the nodes
346 leave_node(pop @stack) while @stack;
348 # if nothing output (ie size(undef))
349 $top ||= { self_size=>0, kids_size=>0, kids_node_count=>0 };
351 my $top_size = $top->{self_size}+$top->{kids_size};
353 printf "Stored %d nodes totalling %s [lines=%d size=%d write=%.2fs]\n",
354 1+$top->{kids_node_count}, fmt_size($top_size),
356 # the duration here ($val) is from Devel::SizeMe perspective
357 # ie doesn't include time to read file/pipe and commit to database.
359 if ($opt_verbose or $run_size != $top_size) {
360 warn "EOF ends $top->{id} d$top->{depth}: size $top->{self_size}+$top->{kids_size}\n";
363 die "panic: seqn2node should be empty ". Dumper(\%seqn2node)
369 system("open -a Graphviz $opt_dot") if $^O eq 'darwin'; # OSX
372 $dbh->commit if $dbh;
375 warn "Invalid type '$type' on line $. ($_)";
379 $dbh->commit if $dbh and $id % 10_000 == 0;
381 die "EOF without end token" if @stack;
384 =for This is out of date but gives you an idea of the data and stream
386 SV(PVAV) fill=1/1 [#1 @0]
393 : : : : SV(PVAV) fill=-1/-1 [#5 @4]
394 : : : : : +64 sv =168
401 N 1 0 SV(PVAV) fill=1/1
408 N 5 4 SV(PVAV) fill=-1/-1