More docs and other polish for release.
[p5sagit/Devel-Size.git] / bin / sizeme_store.pl
CommitLineData
5aa3ad8e 1#!/usr/bin/env perl
2c631ee0 2
12ae883b 3=head1 NAME
4
5sizeme_store.pl - process and store the raw data stream from Devel::SizeMe
6
7=head1 SYNOPSIS
8
9 sizeme_store.pl [--text] [--dot=sizeme.dot] [--db=sizeme.db]
10
11Typically used via the C<SIZEME> env var:
12
13 export SIZEME='|./sizeme_store.pl --text'
14 export SIZEME='|./sizeme_store.pl --dot=sizeme.dot'
15 export SIZEME='|./sizeme_store.pl --db=sizeme.db'
16
17=head1 DESCRIPTION
18
19Reads the raw memory data from Devel::SizeMe and processes the tree
20via a stack, propagating data such as totals, up the tree nodes.
21Output completed nodes in the request formats.
22
23The --text output is similar to the textual representation output by the module
24when the SIZEME env var is set to an empty string.
25
26The --dot output is suitable for feeding to Graphviz. (On OSX the Graphviz
27application will be started automatically.)
28
29The --db output is a SQLite database. The db schema is very subject to change.
30This output is destined to be the primary one. The other output types will
31probably become separate programs that read the db.
32
33=pod
34
d3b8a135 35# Needs to be generalized to support pluggable output formats.
d81c4128 36# Actually it needs to be split so sizeme_store.pl only does the store
37# and another program drives the output with plugins.
d3b8a135 38# Making nodes into (lightweight fast) objects would be smart.
39# Tests would be even smarter!
40#
41# When working on this code it's important to have a sense of the flow.
42# Specifically the way that depth drives the completion of nodes.
43# It's a depth-first stream processing machine, which only ever holds
44# a single stack of the currently incomplete nodes, which is always the same as
45# the current depth. I.e., when a node of depth N arrives, all nodes >N are
46# popped off the stack and 'completed', each rippling data up to its parent.
47
2c631ee0 48use strict;
49use warnings;
5aa3ad8e 50use autodie;
2c631ee0 51
e8f4c506 52use DBI qw(looks_like_number);
b2fc39a5 53use DBD::SQLite;
f60f09e5 54use JSON::XS;
de73b186 55use Devel::Dwarn;
d3b8a135 56use HTML::Entities qw(encode_entities);;
5e2e22f3 57use Data::Dumper;
fc6614ee 58use Getopt::Long;
5e2e22f3 59use Carp qw(carp croak confess);
fc6614ee 60
de73b186 61# XXX import these from the XS code
62use constant NPtype_NAME => 0x01;
63use constant NPtype_LINK => 0x02;
64use constant NPtype_SV => 0x03;
65use constant NPtype_MAGIC => 0x04;
66use constant NPtype_OP => 0x05;
67
68use constant NPattr_LEAFSIZE => 0x00;
69use constant NPattr_NAME => 0x01;
70use constant NPattr_PADFAKE => 0x02;
71use constant NPattr_PADNAME => 0x03;
72use constant NPattr_PADTMP => 0x04;
73use constant NPattr_NOTE => 0x05;
68cafb30 74use constant NPattr_PRE_ATTR => 0x06;
09c6d3bb 75my @attr_type_name = (qw(size NAME PADFAKE my PADTMP NOTE PREATTR)); # XXX get from XS in some way
de73b186 76
77
fc6614ee 78GetOptions(
5aa3ad8e 79 'text!' => \my $opt_text,
80 'dot=s' => \my $opt_dot,
b2fc39a5 81 'db=s' => \my $opt_db,
e8f4c506 82 'verbose|v!' => \my $opt_verbose,
83 'debug|d!' => \my $opt_debug,
98128850 84 'showid!' => \my $opt_showid,
fc6614ee 85) or exit 1;
94fab3d1 86
5e2e22f3 87$| = 1 if $opt_debug;
0e977dbc 88my $run_size = 0;
89my $total_size = 0;
90
f60f09e5 91my $j = JSON::XS->new->ascii->pretty(0);
92
1915946b 93my ($dbh, $node_ins_sth);
94if ($opt_db) {
95 $dbh = DBI->connect("dbi:SQLite:dbname=$opt_db","","", {
96 RaiseError => 1, PrintError => 0, AutoCommit => 0
97 });
98 $dbh->do("PRAGMA synchronous = OFF");
1915946b 99}
b2fc39a5 100
2c631ee0 101my @stack;
102my %seqn2node;
103
7020702a 104my $dotnode = sub {
105 my $name = encode_entities(shift);
106 $name =~ s/"/\\"/g;
7020702a 107 return '"'.$name.'"';
108};
ee2793c1 109
ee2793c1 110
5aa3ad8e 111my $dot_fh;
ee2793c1 112
0741448c 113sub fmt_size {
114 my $size = shift;
115 my $kb = $size / 1024;
116 return $size if $kb < 5;
117 return sprintf "%.1fKb", $kb if $kb < 1000;
118 return sprintf "%.1fMb", $kb/1024;
119}
120
ee2793c1 121
94fab3d1 122sub enter_node {
123 my $x = shift;
09c6d3bb 124 warn ">> enter_node $x->{id}\n" if $opt_debug;
de73b186 125
126 my $parent = $stack[-1];
127 if ($parent) {
128
37836f2a 129 if ($x->{name} eq 'AVelem' and $parent->{name} eq 'SV(PVAV)') {
09c6d3bb 130 my $index = $x->{attr}{+NPattr_NOTE}{i};
eda23e24 131 #Dwarn $x->{attr};
132 #Dwarn $index;
68cafb30 133 # If node is an AVelem of a CvPADLIST propagate pad name to AVelem
134 if (@stack >= 4 and (my $cvpl = $stack[-4])->{name} eq 'CvPADLIST') {
68cafb30 135 my $padnames = $cvpl->{_cached}{padnames} ||= do {
136 my @names = @{ $cvpl->{attr}{+NPattr_PADNAME} || []};
137 $_ = "my(".($_||'').")" for @names;
138 $names[0] = '@_';
139 \@names;
140 };
fd0de560 141 $x->{name} = (defined $index and $padnames->[$index]) || "?";
68cafb30 142 $x->{name} =~ s/my\(SVs_PADTMP\)/PADTMP/; # XXX hack for neatness
143 }
144 else {
fd0de560 145 $x->{name} = "[$index]" if defined $index;
de73b186 146 }
147 }
09c6d3bb 148
ee2793c1 149 }
de73b186 150
151 return $x;
94fab3d1 152}
153
de73b186 154
94fab3d1 155sub leave_node {
156 my $x = shift;
5e2e22f3 157 confess unless defined $x->{id};
09c6d3bb 158 warn "<< leave_node $x->{id}\n" if $opt_debug;
b2fc39a5 159 delete $seqn2node{$x->{id}};
ee2793c1 160
d3b8a135 161 my $self_size = 0; $self_size += $_ for values %{$x->{leaves}};
94fab3d1 162 $x->{self_size} = $self_size;
ee2793c1 163
09c6d3bb 164 if ($x->{name} eq 'AVelem') {
165 my $index = $x->{attr}{+NPattr_NOTE}{i};
166 $x->{name} = "[$index]" if defined $index;
167 }
168
ee2793c1 169 my $parent = $stack[-1];
170 if ($parent) {
2c631ee0 171 # link to parent
5a78486c 172 $x->{parent_id} = $parent->{id};
2c631ee0 173 # accumulate into parent
174 $parent->{kids_node_count} += 1 + ($x->{kids_node_count}||0);
94fab3d1 175 $parent->{kids_size} += $self_size + $x->{kids_size};
5a78486c 176 push @{$parent->{child_id}}, $x->{id};
2c631ee0 177 }
d81c4128 178 else {
179 $x->{kids_node_count} ||= 0;
180 }
de73b186 181
2c631ee0 182 # output
183 # ...
ee2793c1 184 if ($opt_dot) {
1915946b 185 printf "// n%d parent=%s(type=%s)\n", $x->{id},
0741448c 186 $parent ? $parent->{id} : "",
187 $parent ? $parent->{type} : ""
188 if 0;
09c6d3bb 189
de73b186 190 if ($x->{type} != NPtype_LINK) {
0741448c 191 my $name = $x->{title} ? "\"$x->{title}\" $x->{name}" : $x->{name};
192
193 if ($x->{kids_size}) {
194 $name .= sprintf " %s+%s=%s", fmt_size($x->{self_size}), fmt_size($x->{kids_size}), fmt_size($x->{self_size}+$x->{kids_size});
195 }
196 else {
197 $name .= sprintf " +%s", fmt_size($x->{self_size});
198 }
f91a90e9 199 $name .= " #$x->{id}" if $opt_showid;
0741448c 200
201 my @node_attr = (
202 sprintf("label=%s", $dotnode->($name)),
203 "id=$x->{id}",
204 );
09c6d3bb 205 printf $dot_fh qq{n%d [ %s ];\n}, $x->{id}, join(",", @node_attr);
206 }
207 else { # NPtype_LINK
d81c4128 208 my @kids = @{$x->{child_id}||[]};
09c6d3bb 209 die "panic: NPtype_LINK has more than one child: @kids"
210 if @kids > 1;
211 for my $child_id (@kids) { # wouldn't work right, eg id= attr
212 #die Dwarn $x;
213 my @link_attr = ("id=$x->{id}");
214 (my $link_name = $x->{name}) =~ s/->$//;
d81c4128 215 $link_name .= " #$x->{id}" if $opt_showid;
09c6d3bb 216 push @link_attr, (sprintf "label=%s", $dotnode->($link_name));
5aa3ad8e 217 printf $dot_fh qq{n%d -> n%d [%s];\n},
09c6d3bb 218 $x->{parent_id}, $child_id, join(",", @link_attr);
1915946b 219 }
1915946b 220 }
221
ee2793c1 222 }
b2fc39a5 223 if ($dbh) {
f60f09e5 224 my $attr_json = $j->encode($x->{attr});
e78b28ca 225 my $leaves_json = $j->encode($x->{leaves});
b2fc39a5 226 $node_ins_sth->execute(
98128850 227 $x->{id}, $x->{name}, $x->{title}, $x->{type}, $x->{depth}, $x->{parent_id},
b2fc39a5 228 $x->{self_size}, $x->{kids_size}, $x->{kids_node_count},
f60f09e5 229 $x->{child_id} ? join(",", @{$x->{child_id}}) : undef,
e78b28ca 230 $attr_json, $leaves_json,
b2fc39a5 231 );
232 # XXX attribs
233 }
d3b8a135 234
235 return $x;
2c631ee0 236}
237
5aa3ad8e 238my $indent = ": ";
94fab3d1 239
2c631ee0 240while (<>) {
d81c4128 241 warn "\t\t\t\t== $_" if $opt_debug;
2c631ee0 242 chomp;
de73b186 243
b2fc39a5 244 my ($type, $id, $val, $name, $extra) = split / /, $_, 5;
de73b186 245
ee2793c1 246 if ($type =~ s/^-//) { # Node type ($val is depth)
d3b8a135 247
65b2cf7d 248 printf "%s%s%s %s [#%d @%d]\n", $indent x $val, $name,
249 ($type == NPtype_LINK) ? "->" : "",
250 $extra||'', $id, $val
5aa3ad8e 251 if $opt_text;
d3b8a135 252
253 # this is the core driving logic
2c631ee0 254 while ($val < @stack) {
d3b8a135 255 my $x = leave_node(pop @stack);
e8f4c506 256 warn "N $id d$val ends $x->{id} d$x->{depth}: size $x->{self_size}+$x->{kids_size}\n"
257 if $opt_verbose;
2c631ee0 258 }
c5078bcb 259 die "panic: stack already has item at depth $val"
260 if $stack[$val];
5e2e22f3 261 die "Depth out of sync\n" if $val != @stack;
de73b186 262 my $node = enter_node({
263 id => $id, type => $type, name => $name, extra => $extra,
957d176b 264 attr => { }, leaves => {}, depth => $val, self_size=>0, kids_size=>0
de73b186 265 });
266 $stack[$val] = $node;
b2fc39a5 267 $seqn2node{$id} = $node;
2c631ee0 268 }
d3b8a135 269
de73b186 270 # --- Leaf name and memory size
271 elsif ($type eq "L") {
b2fc39a5 272 my $node = $seqn2node{$id} || die;
2c631ee0 273 $node->{leaves}{$name} += $val;
0e977dbc 274 $run_size += $val;
275 printf "%s+%d=%d %s\n", $indent x ($node->{depth}+1), $val, $run_size, $name
5aa3ad8e 276 if $opt_text;
2c631ee0 277 }
d3b8a135 278
279 # --- Attribute type, name and value (all rather hackish)
de73b186 280 elsif (looks_like_number($type)) {
b2fc39a5 281 my $node = $seqn2node{$id} || die;
e8f4c506 282 my $attr = $node->{attr} || die;
de73b186 283
68cafb30 284 # attributes where the string is a key (or always empty and the type is the key)
957d176b 285 if ($type == NPattr_NAME or $type == NPattr_NOTE) {
de73b186 286 printf "%s~%s(%s) %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
287 if $opt_text;
e8f4c506 288 warn "Node $id already has attribute $type:$name (value $attr->{$type}{$name})\n"
289 if exists $attr->{$type}{$name};
09c6d3bb 290 $attr->{$type}{$name} = $val;
eda23e24 291 #Dwarn $attr;
09c6d3bb 292 $node->{title} = $name if $type == NPattr_NAME and !$val; # XXX hack
e8f4c506 293 }
68cafb30 294 # attributes where the number is a key (or always zero)
de73b186 295 elsif (NPattr_PADFAKE==$type or NPattr_PADTMP==$type or NPattr_PADNAME==$type) {
296 printf "%s~%s('%s') %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type
297 if $opt_text;
e8f4c506 298 warn "Node $id already has attribute $type:$name (value $attr->{$type}[$val])\n"
299 if defined $attr->{$type}[$val];
de73b186 300 $attr->{+NPattr_PADNAME}[$val] = $name; # store all as NPattr_PADNAME
e8f4c506 301 }
302 else {
de73b186 303 printf "%s~%s %d [t%d]\n", $indent x ($node->{depth}+1), $name, $val, $type
304 if $opt_text;
e8f4c506 305 warn "Invalid attribute type '$type' on line $. ($_)";
306 }
2c631ee0 307 }
5e2e22f3 308 elsif ($type eq 'S') { # start of a run
309 die "Unexpected start token" if @stack;
310 if ($opt_dot) {
311 open $dot_fh, ">$opt_dot";
312 print $dot_fh "digraph {\n"; # }
313 print $dot_fh "graph [overlap=false]\n"; # target="???", URL="???"
314 }
315 if ($dbh) {
316 # XXX add a size_run table records each run
317 # XXX pick a table name to store the run nodes in
318 #$run_ins_sth->execute(
319 my $table = "node";
320 $dbh->do("DROP TABLE IF EXISTS $table");
321 $dbh->do(qq{
322 CREATE TABLE $table (
323 id integer primary key,
324 name text,
325 title text,
326 type integer,
327 depth integer,
328 parent_id integer,
329
330 self_size integer,
331 kids_size integer,
332 kids_node_count integer,
333 child_ids text,
334 attr_json text,
335 leaves_json text
336 )
337 });
338 $node_ins_sth = $dbh->prepare(qq{
339 INSERT INTO $table VALUES (?,?,?,?,?,?, ?,?,?,?,?,?)
340 });
341 }
342 }
343 elsif ($type eq 'E') { # end of a run
344
345 my $top = $stack[0]; # grab top node before we pop all the nodes
346 leave_node(pop @stack) while @stack;
c99e1fe8 347
eda23e24 348 # if nothing output (ie size(undef))
349 $top ||= { self_size=>0, kids_size=>0, kids_node_count=>0 };
350
5e2e22f3 351 my $top_size = $top->{self_size}+$top->{kids_size};
352
eda23e24 353 printf "Stored %d nodes totalling %s [lines=%d size=%d write=%.2fs]\n",
354 1+$top->{kids_node_count}, fmt_size($top_size),
355 $., $top_size, $val;
c99e1fe8 356 # the duration here ($val) is from Devel::SizeMe perspective
357 # ie doesn't include time to read file/pipe and commit to database.
5e2e22f3 358
359 if ($opt_verbose or $run_size != $top_size) {
360 warn "EOF ends $top->{id} d$top->{depth}: size $top->{self_size}+$top->{kids_size}\n";
361 warn Dumper($top);
362 }
363 die "panic: seqn2node should be empty ". Dumper(\%seqn2node)
364 if %seqn2node;
5e2e22f3 365
366 if ($dot_fh) {
367 print $dot_fh "}\n";
368 close $dot_fh;
369 system("open -a Graphviz $opt_dot") if $^O eq 'darwin'; # OSX
370 }
371
372 $dbh->commit if $dbh;
373 }
2c631ee0 374 else {
375 warn "Invalid type '$type' on line $. ($_)";
e8f4c506 376 next;
2c631ee0 377 }
d3b8a135 378
b2fc39a5 379 $dbh->commit if $dbh and $id % 10_000 == 0;
2c631ee0 380}
5e2e22f3 381die "EOF without end token" if @stack;
b2fc39a5 382
2c631ee0 383
d3b8a135 384=for This is out of date but gives you an idea of the data and stream
385
2c631ee0 386SV(PVAV) fill=1/1 [#1 @0]
387: +64 sv =64
388: +16 av_max =80
389: AVelem-> [#2 @1]
390: : SV(RV) [#3 @2]
391: : : +24 sv =104
392: : : RV-> [#4 @3]
393: : : : SV(PVAV) fill=-1/-1 [#5 @4]
394: : : : : +64 sv =168
395: AVelem-> [#6 @1]
396: : SV(IV) [#7 @2]
397: : : +24 sv =192
398192 at -e line 1.
399=cut
400__DATA__
401N 1 0 SV(PVAV) fill=1/1
402L 1 64 sv
403L 1 16 av_max
404N 2 1 AVelem->
405N 3 2 SV(RV)
406L 3 24 sv
407N 4 3 RV->
408N 5 4 SV(PVAV) fill=-1/-1
409L 5 64 sv
410N 6 1 AVelem->
411N 7 2 SV(IV)
412L 7 24 sv