Polish docs and bump version for 0.03.
[p5sagit/Devel-Size.git] / bin / sizeme_store.pl
CommitLineData
5aa3ad8e 1#!/usr/bin/env perl
2c631ee0 2
12ae883b 3=head1 NAME
4
5sizeme_store.pl - process and store the raw data stream from Devel::SizeMe
6
7=head1 SYNOPSIS
8
9 sizeme_store.pl [--text] [--dot=sizeme.dot] [--db=sizeme.db]
10
591aba53 11Typically used with Devel::SizeMe via the C<SIZEME> env var:
12ae883b 12
13 export SIZEME='|./sizeme_store.pl --text'
14 export SIZEME='|./sizeme_store.pl --dot=sizeme.dot'
15 export SIZEME='|./sizeme_store.pl --db=sizeme.db'
16
17=head1 DESCRIPTION
18
19Reads the raw memory data from Devel::SizeMe and processes the tree
591aba53 20via a stack, propagating data such as totals, up the tree nodes
21as the data streams through. Output completed nodes in the request formats.
12ae883b 22
23The --text output is similar to the textual representation output by the module
24when the SIZEME env var is set to an empty string.
25
26The --dot output is suitable for feeding to Graphviz. (On OSX the Graphviz
27application will be started automatically.)
28
29The --db output is a SQLite database. The db schema is very subject to change.
30This output is destined to be the primary one. The other output types will
31probably become separate programs that read the db.
32
591aba53 33=head1 TODO
34
35Current implementation is all very alpha and rather hackish.
36
37Refactor to separate the core code into a module.
38
39Move the output formats into separate modules, which should probably read from
40the db so the db becomes the canonical source of data.
41
42Import constants from XS.
43
e988a695 44=cut
12ae883b 45
d3b8a135 46# Needs to be generalized to support pluggable output formats.
d81c4128 47# Actually it needs to be split so sizeme_store.pl only does the store
48# and another program drives the output with plugins.
d3b8a135 49# Making nodes into (lightweight fast) objects would be smart.
50# Tests would be even smarter!
51#
52# When working on this code it's important to have a sense of the flow.
53# Specifically the way that depth drives the completion of nodes.
54# It's a depth-first stream processing machine, which only ever holds
55# a single stack of the currently incomplete nodes, which is always the same as
56# the current depth. I.e., when a node of depth N arrives, all nodes >N are
57# popped off the stack and 'completed', each rippling data up to its parent.
58
2c631ee0 59use strict;
60use warnings;
5aa3ad8e 61use autodie;
2c631ee0 62
e8f4c506 63use DBI qw(looks_like_number);
b2fc39a5 64use DBD::SQLite;
f60f09e5 65use JSON::XS;
de73b186 66use Devel::Dwarn;
d3b8a135 67use HTML::Entities qw(encode_entities);;
5e2e22f3 68use Data::Dumper;
fc6614ee 69use Getopt::Long;
5e2e22f3 70use Carp qw(carp croak confess);
fc6614ee 71
de73b186 72# XXX import these from the XS code
73use constant NPtype_NAME => 0x01;
74use constant NPtype_LINK => 0x02;
75use constant NPtype_SV => 0x03;
76use constant NPtype_MAGIC => 0x04;
77use constant NPtype_OP => 0x05;
78
79use constant NPattr_LEAFSIZE => 0x00;
80use constant NPattr_NAME => 0x01;
81use constant NPattr_PADFAKE => 0x02;
82use constant NPattr_PADNAME => 0x03;
83use constant NPattr_PADTMP => 0x04;
84use constant NPattr_NOTE => 0x05;
68cafb30 85use constant NPattr_PRE_ATTR => 0x06;
09c6d3bb 86my @attr_type_name = (qw(size NAME PADFAKE my PADTMP NOTE PREATTR)); # XXX get from XS in some way
de73b186 87
88
fc6614ee 89GetOptions(
5aa3ad8e 90 'text!' => \my $opt_text,
91 'dot=s' => \my $opt_dot,
b2fc39a5 92 'db=s' => \my $opt_db,
e8f4c506 93 'verbose|v!' => \my $opt_verbose,
94 'debug|d!' => \my $opt_debug,
98128850 95 'showid!' => \my $opt_showid,
fc6614ee 96) or exit 1;
94fab3d1 97
5e2e22f3 98$| = 1 if $opt_debug;
0e977dbc 99my $run_size = 0;
100my $total_size = 0;
101
f60f09e5 102my $j = JSON::XS->new->ascii->pretty(0);
103
1915946b 104my ($dbh, $node_ins_sth);
105if ($opt_db) {
106 $dbh = DBI->connect("dbi:SQLite:dbname=$opt_db","","", {
107 RaiseError => 1, PrintError => 0, AutoCommit => 0
108 });
109 $dbh->do("PRAGMA synchronous = OFF");
1915946b 110}
b2fc39a5 111
2c631ee0 112my @stack;
113my %seqn2node;
114
7020702a 115my $dotnode = sub {
116 my $name = encode_entities(shift);
117 $name =~ s/"/\\"/g;
7020702a 118 return '"'.$name.'"';
119};
ee2793c1 120
ee2793c1 121
5aa3ad8e 122my $dot_fh;
ee2793c1 123
0741448c 124sub fmt_size {
125 my $size = shift;
126 my $kb = $size / 1024;
127 return $size if $kb < 5;
128 return sprintf "%.1fKb", $kb if $kb < 1000;
129 return sprintf "%.1fMb", $kb/1024;
130}
131
ee2793c1 132
94fab3d1 133sub enter_node {
134 my $x = shift;
09c6d3bb 135 warn ">> enter_node $x->{id}\n" if $opt_debug;
de73b186 136
137 my $parent = $stack[-1];
138 if ($parent) {
139
37836f2a 140 if ($x->{name} eq 'AVelem' and $parent->{name} eq 'SV(PVAV)') {
09c6d3bb 141 my $index = $x->{attr}{+NPattr_NOTE}{i};
eda23e24 142 #Dwarn $x->{attr};
143 #Dwarn $index;
68cafb30 144 # If node is an AVelem of a CvPADLIST propagate pad name to AVelem
145 if (@stack >= 4 and (my $cvpl = $stack[-4])->{name} eq 'CvPADLIST') {
68cafb30 146 my $padnames = $cvpl->{_cached}{padnames} ||= do {
147 my @names = @{ $cvpl->{attr}{+NPattr_PADNAME} || []};
148 $_ = "my(".($_||'').")" for @names;
149 $names[0] = '@_';
150 \@names;
151 };
fd0de560 152 $x->{name} = (defined $index and $padnames->[$index]) || "?";
68cafb30 153 $x->{name} =~ s/my\(SVs_PADTMP\)/PADTMP/; # XXX hack for neatness
154 }
155 else {
fd0de560 156 $x->{name} = "[$index]" if defined $index;
de73b186 157 }
158 }
09c6d3bb 159
ee2793c1 160 }
de73b186 161
162 return $x;
94fab3d1 163}
164
de73b186 165
94fab3d1 166sub leave_node {
167 my $x = shift;
5e2e22f3 168 confess unless defined $x->{id};
09c6d3bb 169 warn "<< leave_node $x->{id}\n" if $opt_debug;
b2fc39a5 170 delete $seqn2node{$x->{id}};
ee2793c1 171
d3b8a135 172 my $self_size = 0; $self_size += $_ for values %{$x->{leaves}};
94fab3d1 173 $x->{self_size} = $self_size;
ee2793c1 174
09c6d3bb 175 if ($x->{name} eq 'AVelem') {
176 my $index = $x->{attr}{+NPattr_NOTE}{i};
177 $x->{name} = "[$index]" if defined $index;
178 }
179
ee2793c1 180 my $parent = $stack[-1];
181 if ($parent) {
2c631ee0 182 # link to parent
5a78486c 183 $x->{parent_id} = $parent->{id};
2c631ee0 184 # accumulate into parent
185 $parent->{kids_node_count} += 1 + ($x->{kids_node_count}||0);
94fab3d1 186 $parent->{kids_size} += $self_size + $x->{kids_size};
5a78486c 187 push @{$parent->{child_id}}, $x->{id};
2c631ee0 188 }
d81c4128 189 else {
190 $x->{kids_node_count} ||= 0;
191 }
de73b186 192
2c631ee0 193 # output
194 # ...
ee2793c1 195 if ($opt_dot) {
1915946b 196 printf "// n%d parent=%s(type=%s)\n", $x->{id},
0741448c 197 $parent ? $parent->{id} : "",
198 $parent ? $parent->{type} : ""
199 if 0;
09c6d3bb 200
de73b186 201 if ($x->{type} != NPtype_LINK) {
0741448c 202 my $name = $x->{title} ? "\"$x->{title}\" $x->{name}" : $x->{name};
203
204 if ($x->{kids_size}) {
205 $name .= sprintf " %s+%s=%s", fmt_size($x->{self_size}), fmt_size($x->{kids_size}), fmt_size($x->{self_size}+$x->{kids_size});
206 }
207 else {
208 $name .= sprintf " +%s", fmt_size($x->{self_size});
209 }
f91a90e9 210 $name .= " #$x->{id}" if $opt_showid;
0741448c 211
212 my @node_attr = (
213 sprintf("label=%s", $dotnode->($name)),
214 "id=$x->{id}",
215 );
09c6d3bb 216 printf $dot_fh qq{n%d [ %s ];\n}, $x->{id}, join(",", @node_attr);
217 }
218 else { # NPtype_LINK
d81c4128 219 my @kids = @{$x->{child_id}||[]};
09c6d3bb 220 die "panic: NPtype_LINK has more than one child: @kids"
221 if @kids > 1;
222 for my $child_id (@kids) { # wouldn't work right, eg id= attr
223 #die Dwarn $x;
224 my @link_attr = ("id=$x->{id}");
225 (my $link_name = $x->{name}) =~ s/->$//;
d81c4128 226 $link_name .= " #$x->{id}" if $opt_showid;
09c6d3bb 227 push @link_attr, (sprintf "label=%s", $dotnode->($link_name));
5aa3ad8e 228 printf $dot_fh qq{n%d -> n%d [%s];\n},
09c6d3bb 229 $x->{parent_id}, $child_id, join(",", @link_attr);
1915946b 230 }
1915946b 231 }
232
ee2793c1 233 }
b2fc39a5 234 if ($dbh) {
f60f09e5 235 my $attr_json = $j->encode($x->{attr});
e78b28ca 236 my $leaves_json = $j->encode($x->{leaves});
b2fc39a5 237 $node_ins_sth->execute(
98128850 238 $x->{id}, $x->{name}, $x->{title}, $x->{type}, $x->{depth}, $x->{parent_id},
b2fc39a5 239 $x->{self_size}, $x->{kids_size}, $x->{kids_node_count},
f60f09e5 240 $x->{child_id} ? join(",", @{$x->{child_id}}) : undef,
e78b28ca 241 $attr_json, $leaves_json,
b2fc39a5 242 );
243 # XXX attribs
244 }
d3b8a135 245
246 return $x;
2c631ee0 247}
248
5aa3ad8e 249my $indent = ": ";
94fab3d1 250
2c631ee0 251while (<>) {
d81c4128 252 warn "\t\t\t\t== $_" if $opt_debug;
2c631ee0 253 chomp;
de73b186 254
b2fc39a5 255 my ($type, $id, $val, $name, $extra) = split / /, $_, 5;
de73b186 256
ee2793c1 257 if ($type =~ s/^-//) { # Node type ($val is depth)
d3b8a135 258
65b2cf7d 259 printf "%s%s%s %s [#%d @%d]\n", $indent x $val, $name,
260 ($type == NPtype_LINK) ? "->" : "",
261 $extra||'', $id, $val
5aa3ad8e 262 if $opt_text;
d3b8a135 263
264 # this is the core driving logic
2c631ee0 265 while ($val < @stack) {
d3b8a135 266 my $x = leave_node(pop @stack);
e8f4c506 267 warn "N $id d$val ends $x->{id} d$x->{depth}: size $x->{self_size}+$x->{kids_size}\n"
268 if $opt_verbose;
2c631ee0 269 }
c5078bcb 270 die "panic: stack already has item at depth $val"
271 if $stack[$val];
5e2e22f3 272 die "Depth out of sync\n" if $val != @stack;
de73b186 273 my $node = enter_node({
274 id => $id, type => $type, name => $name, extra => $extra,
957d176b 275 attr => { }, leaves => {}, depth => $val, self_size=>0, kids_size=>0
de73b186 276 });
277 $stack[$val] = $node;
b2fc39a5 278 $seqn2node{$id} = $node;
2c631ee0 279 }
d3b8a135 280
de73b186 281 # --- Leaf name and memory size
282 elsif ($type eq "L") {
b2fc39a5 283 my $node = $seqn2node{$id} || die;
2c631ee0 284 $node->{leaves}{$name} += $val;
0e977dbc 285 $run_size += $val;
286 printf "%s+%d=%d %s\n", $indent x ($node->{depth}+1), $val, $run_size, $name
5aa3ad8e 287 if $opt_text;
2c631ee0 288 }
d3b8a135 289
290 # --- Attribute type, name and value (all rather hackish)
de73b186 291 elsif (looks_like_number($type)) {
b2fc39a5 292 my $node = $seqn2node{$id} || die;
e8f4c506 293 my $attr = $node->{attr} || die;
de73b186 294
68cafb30 295 # attributes where the string is a key (or always empty and the type is the key)
957d176b 296 if ($type == NPattr_NAME or $type == NPattr_NOTE) {
de73b186 297 printf "%s~%s(%s) %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
298 if $opt_text;
e8f4c506 299 warn "Node $id already has attribute $type:$name (value $attr->{$type}{$name})\n"
300 if exists $attr->{$type}{$name};
09c6d3bb 301 $attr->{$type}{$name} = $val;
eda23e24 302 #Dwarn $attr;
09c6d3bb 303 $node->{title} = $name if $type == NPattr_NAME and !$val; # XXX hack
e8f4c506 304 }
68cafb30 305 # attributes where the number is a key (or always zero)
de73b186 306 elsif (NPattr_PADFAKE==$type or NPattr_PADTMP==$type or NPattr_PADNAME==$type) {
307 printf "%s~%s('%s') %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
308 if $opt_text;
e8f4c506 309 warn "Node $id already has attribute $type:$name (value $attr->{$type}[$val])\n"
310 if defined $attr->{$type}[$val];
de73b186 311 $attr->{+NPattr_PADNAME}[$val] = $name; # store all as NPattr_PADNAME
e8f4c506 312 }
313 else {
de73b186 314 printf "%s~%s %d [t%d]\n", $indent x ($node->{depth}+1), $name, $val, $type
315 if $opt_text;
e8f4c506 316 warn "Invalid attribute type '$type' on line $. ($_)";
317 }
2c631ee0 318 }
5e2e22f3 319 elsif ($type eq 'S') { # start of a run
320 die "Unexpected start token" if @stack;
321 if ($opt_dot) {
322 open $dot_fh, ">$opt_dot";
323 print $dot_fh "digraph {\n"; # }
324 print $dot_fh "graph [overlap=false]\n"; # target="???", URL="???"
325 }
326 if ($dbh) {
327 # XXX add a size_run table records each run
328 # XXX pick a table name to store the run nodes in
329 #$run_ins_sth->execute(
330 my $table = "node";
331 $dbh->do("DROP TABLE IF EXISTS $table");
332 $dbh->do(qq{
333 CREATE TABLE $table (
334 id integer primary key,
335 name text,
336 title text,
337 type integer,
338 depth integer,
339 parent_id integer,
340
341 self_size integer,
342 kids_size integer,
343 kids_node_count integer,
344 child_ids text,
345 attr_json text,
346 leaves_json text
347 )
348 });
349 $node_ins_sth = $dbh->prepare(qq{
350 INSERT INTO $table VALUES (?,?,?,?,?,?, ?,?,?,?,?,?)
351 });
352 }
353 }
354 elsif ($type eq 'E') { # end of a run
355
356 my $top = $stack[0]; # grab top node before we pop all the nodes
357 leave_node(pop @stack) while @stack;
c99e1fe8 358
eda23e24 359 # if nothing output (ie size(undef))
360 $top ||= { self_size=>0, kids_size=>0, kids_node_count=>0 };
361
5e2e22f3 362 my $top_size = $top->{self_size}+$top->{kids_size};
363
eda23e24 364 printf "Stored %d nodes totalling %s [lines=%d size=%d write=%.2fs]\n",
365 1+$top->{kids_node_count}, fmt_size($top_size),
366 $., $top_size, $val;
c99e1fe8 367 # the duration here ($val) is from Devel::SizeMe perspective
368 # ie doesn't include time to read file/pipe and commit to database.
5e2e22f3 369
370 if ($opt_verbose or $run_size != $top_size) {
371 warn "EOF ends $top->{id} d$top->{depth}: size $top->{self_size}+$top->{kids_size}\n";
372 warn Dumper($top);
373 }
374 die "panic: seqn2node should be empty ". Dumper(\%seqn2node)
375 if %seqn2node;
5e2e22f3 376
377 if ($dot_fh) {
378 print $dot_fh "}\n";
379 close $dot_fh;
380 system("open -a Graphviz $opt_dot") if $^O eq 'darwin'; # OSX
381 }
382
383 $dbh->commit if $dbh;
384 }
2c631ee0 385 else {
386 warn "Invalid type '$type' on line $. ($_)";
e8f4c506 387 next;
2c631ee0 388 }
d3b8a135 389
b2fc39a5 390 $dbh->commit if $dbh and $id % 10_000 == 0;
2c631ee0 391}
5e2e22f3 392die "EOF without end token" if @stack;
b2fc39a5 393
2c631ee0 394
d3b8a135 395=for This is out of date but gives you an idea of the data and stream
396
2c631ee0 397SV(PVAV) fill=1/1 [#1 @0]
398: +64 sv =64
399: +16 av_max =80
400: AVelem-> [#2 @1]
401: : SV(RV) [#3 @2]
402: : : +24 sv =104
403: : : RV-> [#4 @3]
404: : : : SV(PVAV) fill=-1/-1 [#5 @4]
405: : : : : +64 sv =168
406: AVelem-> [#6 @1]
407: : SV(IV) [#7 @2]
408: : : +24 sv =192
409192 at -e line 1.
410=cut
411__DATA__
412N 1 0 SV(PVAV) fill=1/1
413L 1 64 sv
414L 1 16 av_max
415N 2 1 AVelem->
416N 3 2 SV(RV)
417L 3 24 sv
418N 4 3 RV->
419N 5 4 SV(PVAV) fill=-1/-1
420L 5 64 sv
421N 6 1 AVelem->
422N 7 2 SV(IV)
423L 7 24 sv