Commit | Line | Data |
5aa3ad8e |
1 | #!/usr/bin/env perl |
2c631ee0 |
2 | |
12ae883b |
3 | =head1 NAME |
4 | |
5 | sizeme_store.pl - process and store the raw data stream from Devel::SizeMe |
6 | |
7 | =head1 SYNOPSIS |
8 | |
9 | sizeme_store.pl [--text] [--dot=sizeme.dot] [--db=sizeme.db] |
10 | |
591aba53 |
11 | Typically used with Devel::SizeMe via the C<SIZEME> env var: |
12ae883b |
12 | |
13 | export SIZEME='|./sizeme_store.pl --text' |
14 | export SIZEME='|./sizeme_store.pl --dot=sizeme.dot' |
15 | export SIZEME='|./sizeme_store.pl --db=sizeme.db' |
16 | |
17 | =head1 DESCRIPTION |
18 | |
19 | Reads the raw memory data from Devel::SizeMe and processes the tree |
591aba53 |
20 | via a stack, propagating data such as totals, up the tree nodes |
21 | as the data streams through. Output completed nodes in the request formats. |
12ae883b |
22 | |
23 | The --text output is similar to the textual representation output by the module |
24 | when the SIZEME env var is set to an empty string. |
25 | |
26 | The --dot output is suitable for feeding to Graphviz. (On OSX the Graphviz |
27 | application will be started automatically.) |
28 | |
29 | The --db output is a SQLite database. The db schema is very subject to change. |
30 | This output is destined to be the primary one. The other output types will |
31 | probably become separate programs that read the db. |
32 | |
591aba53 |
33 | =head1 TODO |
34 | |
35 | Current implementation is all very alpha and rather hackish. |
36 | |
37 | Refactor to separate the core code into a module. |
38 | |
39 | Move the output formats into separate modules, which should probably read from |
40 | the db so the db becomes the canonical source of data. |
41 | |
42 | Import constants from XS. |
43 | |
e988a695 |
44 | =cut |
12ae883b |
45 | |
d3b8a135 |
46 | # Needs to be generalized to support pluggable output formats. |
d81c4128 |
47 | # Actually it needs to be split so sizeme_store.pl only does the store |
48 | # and another program drives the output with plugins. |
d3b8a135 |
49 | # Making nodes into (lightweight fast) objects would be smart. |
50 | # Tests would be even smarter! |
51 | # |
52 | # When working on this code it's important to have a sense of the flow. |
53 | # Specifically the way that depth drives the completion of nodes. |
54 | # It's a depth-first stream processing machine, which only ever holds |
55 | # a single stack of the currently incomplete nodes, which is always the same as |
56 | # the current depth. I.e., when a node of depth N arrives, all nodes >N are |
57 | # popped off the stack and 'completed', each rippling data up to its parent. |
58 | |
2c631ee0 |
59 | use strict; |
60 | use warnings; |
5aa3ad8e |
61 | use autodie; |
2c631ee0 |
62 | |
e8f4c506 |
63 | use DBI qw(looks_like_number); |
b2fc39a5 |
64 | use DBD::SQLite; |
f60f09e5 |
65 | use JSON::XS; |
de73b186 |
66 | use Devel::Dwarn; |
d3b8a135 |
67 | use HTML::Entities qw(encode_entities);; |
5e2e22f3 |
68 | use Data::Dumper; |
fc6614ee |
69 | use Getopt::Long; |
5e2e22f3 |
70 | use Carp qw(carp croak confess); |
fc6614ee |
71 | |
de73b186 |
72 | # XXX import these from the XS code |
73 | use constant NPtype_NAME => 0x01; |
74 | use constant NPtype_LINK => 0x02; |
75 | use constant NPtype_SV => 0x03; |
76 | use constant NPtype_MAGIC => 0x04; |
77 | use constant NPtype_OP => 0x05; |
78 | |
79 | use constant NPattr_LEAFSIZE => 0x00; |
80 | use constant NPattr_NAME => 0x01; |
81 | use constant NPattr_PADFAKE => 0x02; |
82 | use constant NPattr_PADNAME => 0x03; |
83 | use constant NPattr_PADTMP => 0x04; |
84 | use constant NPattr_NOTE => 0x05; |
68cafb30 |
85 | use constant NPattr_PRE_ATTR => 0x06; |
09c6d3bb |
86 | my @attr_type_name = (qw(size NAME PADFAKE my PADTMP NOTE PREATTR)); # XXX get from XS in some way |
de73b186 |
87 | |
88 | |
fc6614ee |
89 | GetOptions( |
5aa3ad8e |
90 | 'text!' => \my $opt_text, |
91 | 'dot=s' => \my $opt_dot, |
b2fc39a5 |
92 | 'db=s' => \my $opt_db, |
e8f4c506 |
93 | 'verbose|v!' => \my $opt_verbose, |
94 | 'debug|d!' => \my $opt_debug, |
98128850 |
95 | 'showid!' => \my $opt_showid, |
fc6614ee |
96 | ) or exit 1; |
94fab3d1 |
97 | |
5e2e22f3 |
98 | $| = 1 if $opt_debug; |
0e977dbc |
99 | my $run_size = 0; |
100 | my $total_size = 0; |
101 | |
f60f09e5 |
102 | my $j = JSON::XS->new->ascii->pretty(0); |
103 | |
1915946b |
104 | my ($dbh, $node_ins_sth); |
105 | if ($opt_db) { |
106 | $dbh = DBI->connect("dbi:SQLite:dbname=$opt_db","","", { |
107 | RaiseError => 1, PrintError => 0, AutoCommit => 0 |
108 | }); |
109 | $dbh->do("PRAGMA synchronous = OFF"); |
1915946b |
110 | } |
b2fc39a5 |
111 | |
2c631ee0 |
112 | my @stack; |
113 | my %seqn2node; |
114 | |
7020702a |
115 | my $dotnode = sub { |
116 | my $name = encode_entities(shift); |
117 | $name =~ s/"/\\"/g; |
7020702a |
118 | return '"'.$name.'"'; |
119 | }; |
ee2793c1 |
120 | |
ee2793c1 |
121 | |
5aa3ad8e |
122 | my $dot_fh; |
ee2793c1 |
123 | |
0741448c |
124 | sub fmt_size { |
125 | my $size = shift; |
126 | my $kb = $size / 1024; |
127 | return $size if $kb < 5; |
128 | return sprintf "%.1fKb", $kb if $kb < 1000; |
129 | return sprintf "%.1fMb", $kb/1024; |
130 | } |
131 | |
ee2793c1 |
132 | |
94fab3d1 |
133 | sub enter_node { |
134 | my $x = shift; |
09c6d3bb |
135 | warn ">> enter_node $x->{id}\n" if $opt_debug; |
de73b186 |
136 | |
137 | my $parent = $stack[-1]; |
138 | if ($parent) { |
139 | |
37836f2a |
140 | if ($x->{name} eq 'AVelem' and $parent->{name} eq 'SV(PVAV)') { |
09c6d3bb |
141 | my $index = $x->{attr}{+NPattr_NOTE}{i}; |
eda23e24 |
142 | #Dwarn $x->{attr}; |
143 | #Dwarn $index; |
68cafb30 |
144 | # If node is an AVelem of a CvPADLIST propagate pad name to AVelem |
145 | if (@stack >= 4 and (my $cvpl = $stack[-4])->{name} eq 'CvPADLIST') { |
68cafb30 |
146 | my $padnames = $cvpl->{_cached}{padnames} ||= do { |
147 | my @names = @{ $cvpl->{attr}{+NPattr_PADNAME} || []}; |
148 | $_ = "my(".($_||'').")" for @names; |
149 | $names[0] = '@_'; |
150 | \@names; |
151 | }; |
fd0de560 |
152 | $x->{name} = (defined $index and $padnames->[$index]) || "?"; |
68cafb30 |
153 | $x->{name} =~ s/my\(SVs_PADTMP\)/PADTMP/; # XXX hack for neatness |
154 | } |
155 | else { |
fd0de560 |
156 | $x->{name} = "[$index]" if defined $index; |
de73b186 |
157 | } |
158 | } |
09c6d3bb |
159 | |
ee2793c1 |
160 | } |
de73b186 |
161 | |
162 | return $x; |
94fab3d1 |
163 | } |
164 | |
de73b186 |
165 | |
94fab3d1 |
166 | sub leave_node { |
167 | my $x = shift; |
5e2e22f3 |
168 | confess unless defined $x->{id}; |
09c6d3bb |
169 | warn "<< leave_node $x->{id}\n" if $opt_debug; |
b2fc39a5 |
170 | delete $seqn2node{$x->{id}}; |
ee2793c1 |
171 | |
d3b8a135 |
172 | my $self_size = 0; $self_size += $_ for values %{$x->{leaves}}; |
94fab3d1 |
173 | $x->{self_size} = $self_size; |
ee2793c1 |
174 | |
09c6d3bb |
175 | if ($x->{name} eq 'AVelem') { |
176 | my $index = $x->{attr}{+NPattr_NOTE}{i}; |
177 | $x->{name} = "[$index]" if defined $index; |
178 | } |
179 | |
ee2793c1 |
180 | my $parent = $stack[-1]; |
181 | if ($parent) { |
2c631ee0 |
182 | # link to parent |
5a78486c |
183 | $x->{parent_id} = $parent->{id}; |
2c631ee0 |
184 | # accumulate into parent |
185 | $parent->{kids_node_count} += 1 + ($x->{kids_node_count}||0); |
94fab3d1 |
186 | $parent->{kids_size} += $self_size + $x->{kids_size}; |
5a78486c |
187 | push @{$parent->{child_id}}, $x->{id}; |
2c631ee0 |
188 | } |
d81c4128 |
189 | else { |
190 | $x->{kids_node_count} ||= 0; |
191 | } |
de73b186 |
192 | |
2c631ee0 |
193 | # output |
194 | # ... |
ee2793c1 |
195 | if ($opt_dot) { |
1915946b |
196 | printf "// n%d parent=%s(type=%s)\n", $x->{id}, |
0741448c |
197 | $parent ? $parent->{id} : "", |
198 | $parent ? $parent->{type} : "" |
199 | if 0; |
09c6d3bb |
200 | |
de73b186 |
201 | if ($x->{type} != NPtype_LINK) { |
0741448c |
202 | my $name = $x->{title} ? "\"$x->{title}\" $x->{name}" : $x->{name}; |
203 | |
204 | if ($x->{kids_size}) { |
205 | $name .= sprintf " %s+%s=%s", fmt_size($x->{self_size}), fmt_size($x->{kids_size}), fmt_size($x->{self_size}+$x->{kids_size}); |
206 | } |
207 | else { |
208 | $name .= sprintf " +%s", fmt_size($x->{self_size}); |
209 | } |
f91a90e9 |
210 | $name .= " #$x->{id}" if $opt_showid; |
0741448c |
211 | |
212 | my @node_attr = ( |
213 | sprintf("label=%s", $dotnode->($name)), |
214 | "id=$x->{id}", |
215 | ); |
09c6d3bb |
216 | printf $dot_fh qq{n%d [ %s ];\n}, $x->{id}, join(",", @node_attr); |
217 | } |
218 | else { # NPtype_LINK |
d81c4128 |
219 | my @kids = @{$x->{child_id}||[]}; |
09c6d3bb |
220 | die "panic: NPtype_LINK has more than one child: @kids" |
221 | if @kids > 1; |
222 | for my $child_id (@kids) { # wouldn't work right, eg id= attr |
223 | #die Dwarn $x; |
224 | my @link_attr = ("id=$x->{id}"); |
225 | (my $link_name = $x->{name}) =~ s/->$//; |
d81c4128 |
226 | $link_name .= " #$x->{id}" if $opt_showid; |
09c6d3bb |
227 | push @link_attr, (sprintf "label=%s", $dotnode->($link_name)); |
5aa3ad8e |
228 | printf $dot_fh qq{n%d -> n%d [%s];\n}, |
09c6d3bb |
229 | $x->{parent_id}, $child_id, join(",", @link_attr); |
1915946b |
230 | } |
1915946b |
231 | } |
232 | |
ee2793c1 |
233 | } |
b2fc39a5 |
234 | if ($dbh) { |
f60f09e5 |
235 | my $attr_json = $j->encode($x->{attr}); |
e78b28ca |
236 | my $leaves_json = $j->encode($x->{leaves}); |
b2fc39a5 |
237 | $node_ins_sth->execute( |
98128850 |
238 | $x->{id}, $x->{name}, $x->{title}, $x->{type}, $x->{depth}, $x->{parent_id}, |
b2fc39a5 |
239 | $x->{self_size}, $x->{kids_size}, $x->{kids_node_count}, |
f60f09e5 |
240 | $x->{child_id} ? join(",", @{$x->{child_id}}) : undef, |
e78b28ca |
241 | $attr_json, $leaves_json, |
b2fc39a5 |
242 | ); |
243 | # XXX attribs |
244 | } |
d3b8a135 |
245 | |
246 | return $x; |
2c631ee0 |
247 | } |
248 | |
5aa3ad8e |
249 | my $indent = ": "; |
94fab3d1 |
250 | |
2c631ee0 |
251 | while (<>) { |
d81c4128 |
252 | warn "\t\t\t\t== $_" if $opt_debug; |
2c631ee0 |
253 | chomp; |
de73b186 |
254 | |
b2fc39a5 |
255 | my ($type, $id, $val, $name, $extra) = split / /, $_, 5; |
de73b186 |
256 | |
ee2793c1 |
257 | if ($type =~ s/^-//) { # Node type ($val is depth) |
d3b8a135 |
258 | |
65b2cf7d |
259 | printf "%s%s%s %s [#%d @%d]\n", $indent x $val, $name, |
260 | ($type == NPtype_LINK) ? "->" : "", |
261 | $extra||'', $id, $val |
5aa3ad8e |
262 | if $opt_text; |
d3b8a135 |
263 | |
264 | # this is the core driving logic |
2c631ee0 |
265 | while ($val < @stack) { |
d3b8a135 |
266 | my $x = leave_node(pop @stack); |
e8f4c506 |
267 | warn "N $id d$val ends $x->{id} d$x->{depth}: size $x->{self_size}+$x->{kids_size}\n" |
268 | if $opt_verbose; |
2c631ee0 |
269 | } |
c5078bcb |
270 | die "panic: stack already has item at depth $val" |
271 | if $stack[$val]; |
5e2e22f3 |
272 | die "Depth out of sync\n" if $val != @stack; |
de73b186 |
273 | my $node = enter_node({ |
274 | id => $id, type => $type, name => $name, extra => $extra, |
957d176b |
275 | attr => { }, leaves => {}, depth => $val, self_size=>0, kids_size=>0 |
de73b186 |
276 | }); |
277 | $stack[$val] = $node; |
b2fc39a5 |
278 | $seqn2node{$id} = $node; |
2c631ee0 |
279 | } |
d3b8a135 |
280 | |
de73b186 |
281 | # --- Leaf name and memory size |
282 | elsif ($type eq "L") { |
b2fc39a5 |
283 | my $node = $seqn2node{$id} || die; |
2c631ee0 |
284 | $node->{leaves}{$name} += $val; |
0e977dbc |
285 | $run_size += $val; |
286 | printf "%s+%d=%d %s\n", $indent x ($node->{depth}+1), $val, $run_size, $name |
5aa3ad8e |
287 | if $opt_text; |
2c631ee0 |
288 | } |
d3b8a135 |
289 | |
290 | # --- Attribute type, name and value (all rather hackish) |
de73b186 |
291 | elsif (looks_like_number($type)) { |
b2fc39a5 |
292 | my $node = $seqn2node{$id} || die; |
e8f4c506 |
293 | my $attr = $node->{attr} || die; |
de73b186 |
294 | |
68cafb30 |
295 | # attributes where the string is a key (or always empty and the type is the key) |
957d176b |
296 | if ($type == NPattr_NAME or $type == NPattr_NOTE) { |
de73b186 |
297 | printf "%s~%s(%s) %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type |
298 | if $opt_text; |
e8f4c506 |
299 | warn "Node $id already has attribute $type:$name (value $attr->{$type}{$name})\n" |
300 | if exists $attr->{$type}{$name}; |
09c6d3bb |
301 | $attr->{$type}{$name} = $val; |
eda23e24 |
302 | #Dwarn $attr; |
09c6d3bb |
303 | $node->{title} = $name if $type == NPattr_NAME and !$val; # XXX hack |
e8f4c506 |
304 | } |
68cafb30 |
305 | # attributes where the number is a key (or always zero) |
de73b186 |
306 | elsif (NPattr_PADFAKE==$type or NPattr_PADTMP==$type or NPattr_PADNAME==$type) { |
307 | printf "%s~%s('%s') %d [t%d]\n", $indent x ($node->{depth}+1), $attr_type_name[$type], $name, $val, $type |
308 | if $opt_text; |
e8f4c506 |
309 | warn "Node $id already has attribute $type:$name (value $attr->{$type}[$val])\n" |
310 | if defined $attr->{$type}[$val]; |
de73b186 |
311 | $attr->{+NPattr_PADNAME}[$val] = $name; # store all as NPattr_PADNAME |
e8f4c506 |
312 | } |
313 | else { |
de73b186 |
314 | printf "%s~%s %d [t%d]\n", $indent x ($node->{depth}+1), $name, $val, $type |
315 | if $opt_text; |
e8f4c506 |
316 | warn "Invalid attribute type '$type' on line $. ($_)"; |
317 | } |
2c631ee0 |
318 | } |
5e2e22f3 |
319 | elsif ($type eq 'S') { # start of a run |
320 | die "Unexpected start token" if @stack; |
321 | if ($opt_dot) { |
322 | open $dot_fh, ">$opt_dot"; |
323 | print $dot_fh "digraph {\n"; # } |
324 | print $dot_fh "graph [overlap=false]\n"; # target="???", URL="???" |
325 | } |
326 | if ($dbh) { |
327 | # XXX add a size_run table records each run |
328 | # XXX pick a table name to store the run nodes in |
329 | #$run_ins_sth->execute( |
330 | my $table = "node"; |
331 | $dbh->do("DROP TABLE IF EXISTS $table"); |
332 | $dbh->do(qq{ |
333 | CREATE TABLE $table ( |
334 | id integer primary key, |
335 | name text, |
336 | title text, |
337 | type integer, |
338 | depth integer, |
339 | parent_id integer, |
340 | |
341 | self_size integer, |
342 | kids_size integer, |
343 | kids_node_count integer, |
344 | child_ids text, |
345 | attr_json text, |
346 | leaves_json text |
347 | ) |
348 | }); |
349 | $node_ins_sth = $dbh->prepare(qq{ |
350 | INSERT INTO $table VALUES (?,?,?,?,?,?, ?,?,?,?,?,?) |
351 | }); |
352 | } |
353 | } |
354 | elsif ($type eq 'E') { # end of a run |
355 | |
356 | my $top = $stack[0]; # grab top node before we pop all the nodes |
357 | leave_node(pop @stack) while @stack; |
c99e1fe8 |
358 | |
eda23e24 |
359 | # if nothing output (ie size(undef)) |
360 | $top ||= { self_size=>0, kids_size=>0, kids_node_count=>0 }; |
361 | |
5e2e22f3 |
362 | my $top_size = $top->{self_size}+$top->{kids_size}; |
363 | |
eda23e24 |
364 | printf "Stored %d nodes totalling %s [lines=%d size=%d write=%.2fs]\n", |
365 | 1+$top->{kids_node_count}, fmt_size($top_size), |
366 | $., $top_size, $val; |
c99e1fe8 |
367 | # the duration here ($val) is from Devel::SizeMe perspective |
368 | # ie doesn't include time to read file/pipe and commit to database. |
5e2e22f3 |
369 | |
370 | if ($opt_verbose or $run_size != $top_size) { |
371 | warn "EOF ends $top->{id} d$top->{depth}: size $top->{self_size}+$top->{kids_size}\n"; |
372 | warn Dumper($top); |
373 | } |
374 | die "panic: seqn2node should be empty ". Dumper(\%seqn2node) |
375 | if %seqn2node; |
5e2e22f3 |
376 | |
377 | if ($dot_fh) { |
378 | print $dot_fh "}\n"; |
379 | close $dot_fh; |
380 | system("open -a Graphviz $opt_dot") if $^O eq 'darwin'; # OSX |
381 | } |
382 | |
383 | $dbh->commit if $dbh; |
384 | } |
2c631ee0 |
385 | else { |
386 | warn "Invalid type '$type' on line $. ($_)"; |
e8f4c506 |
387 | next; |
2c631ee0 |
388 | } |
d3b8a135 |
389 | |
b2fc39a5 |
390 | $dbh->commit if $dbh and $id % 10_000 == 0; |
2c631ee0 |
391 | } |
5e2e22f3 |
392 | die "EOF without end token" if @stack; |
b2fc39a5 |
393 | |
2c631ee0 |
394 | |
d3b8a135 |
395 | =for This is out of date but gives you an idea of the data and stream |
396 | |
2c631ee0 |
397 | SV(PVAV) fill=1/1 [#1 @0] |
398 | : +64 sv =64 |
399 | : +16 av_max =80 |
400 | : AVelem-> [#2 @1] |
401 | : : SV(RV) [#3 @2] |
402 | : : : +24 sv =104 |
403 | : : : RV-> [#4 @3] |
404 | : : : : SV(PVAV) fill=-1/-1 [#5 @4] |
405 | : : : : : +64 sv =168 |
406 | : AVelem-> [#6 @1] |
407 | : : SV(IV) [#7 @2] |
408 | : : : +24 sv =192 |
409 | 192 at -e line 1. |
410 | =cut |
411 | __DATA__ |
412 | N 1 0 SV(PVAV) fill=1/1 |
413 | L 1 64 sv |
414 | L 1 16 av_max |
415 | N 2 1 AVelem-> |
416 | N 3 2 SV(RV) |
417 | L 3 24 sv |
418 | N 4 3 RV-> |
419 | N 5 4 SV(PVAV) fill=-1/-1 |
420 | L 5 64 sv |
421 | N 6 1 AVelem-> |
422 | N 7 2 SV(IV) |
423 | L 7 24 sv |