[p5sagit/p5-mst-13.2.git] / Porting / valgrindpp.pl

#!/usr/bin/perl
use IO::File ();
use File::Find qw(find);
use Text::Wrap qw(wrap);
use Getopt::Long qw(GetOptions);
use Pod::Usage qw(pod2usage);
use Cwd qw(cwd);
use File::Spec;
use strict;

my %opt = (
  frames  => 3,
  lines   => 0,
  tests   => 0,
  top     => 0,
  verbose => 0,
);

GetOptions(\%opt, qw(
            dir=s
            frames=i
            hide=s@
            lines!
            output-file=s
            tests!
            top=i
            verbose+
          )) or pod2usage(2);

# Setup the directory to process
if (exists $opt{dir}) {
  $opt{dir} = File::Spec->canonpath($opt{dir});
}
else {
  # Check if we're in 't'
  $opt{dir} = cwd =~ /\/t$/ ? '..' : '.';

  # Check if we're in the right directory
  -d "$opt{dir}/$_" or die "$0: must be run from the perl source directory"
                         . " when --dir is not given\n"
      for qw(t lib ext);
}

# Assemble regex for functions whose leaks should be hidden
# (no, a hash won't be significantly faster)
my $hidden = do { local $"='|'; $opt{hide} ? qr/^(?:@{$opt{hide}})$/o : '' };

# Setup our output file handle
# (do it early, as it may fail)
my $fh = \*STDOUT;
if (exists $opt{'output-file'}) {
  $fh = new IO::File ">$opt{'output-file'}"
        or die "$0: cannot open $opt{'output-file'} ($!)\n";
}

# These hashes will receive the error and leak summary data:
#
# %error = (
#   error_name => {
#                   stack_frame => {
#                                    test_script => occurences
#                                  }
#                 }
# );
#
# %leak = (
#   leak_type => {
#                  stack_frames => {
#                                    test_script => occurences
#                                  }
#                } # stack frames are separated by '<'s
# );
my(%error, %leak);

# Collect summary data
find({wanted => \&filter, no_chdir => 1}, $opt{dir});

# Format the output nicely
$Text::Wrap::columns = 80;
$Text::Wrap::unexpand = 0;

# Write summary
summary($fh, \%error, \%leak);

exit 0;

sub summary {
  my($fh, $error, $leak) = @_;
  my(%ne, %nl, %top);

  # Prepare the data

  for my $e (keys %$error) {
    for my $f (keys %{$error->{$e}}) {
      my($func, $file, $line) = split /:/, $f;
      my $nf = $opt{lines} ? "$func ($file:$line)" : "$func ($file)";
      $ne{$e}{$nf}{count}++;
      while (my($k,$v) = each %{$error->{$e}{$f}}) {
        $ne{$e}{$nf}{tests}{$k} += $v;
        $top{$k}{error}++;
      }
    }
  }

  for my $l (keys %$leak) {
    for my $s (keys %{$leak->{$l}}) {
      my $ns = join '<', map {
                 my($func, $file, $line) = split /:/;
                 /:/ ? $opt{lines}
                       ? "$func ($file:$line)" : "$func ($file)"
                     : $_
               } split /</, $s;
      $nl{$l}{$ns}{count}++;
      while (my($k,$v) = each %{$leak->{$l}{$s}}) {
        $nl{$l}{$ns}{tests}{$k} += $v;
        $top{$k}{leak}++;
      }
    }
  }

  # Print the Top N

  if ($opt{top}) {
    for my $what (qw(error leak)) {
      my @t = sort { $top{$b}{$what} <=> $top{$a}{$what} or $a cmp $b }
              grep $top{$_}{$what}, keys %top;
      @t > $opt{top} and splice @t, $opt{top};
      my $n = @t;
      my $s = $n > 1 ? 's' : '';
      my $prev = 0;
      print $fh "Top $n test scripts for ${what}s:\n\n";
      for my $i (1 .. $n) {
        $n = $top{$t[$i-1]}{$what};
        $s = $n > 1 ? 's' : '';
        printf $fh "    %3s %-40s %3d $what$s\n",
                   $n != $prev ? "$i." : '', $t[$i-1], $n;
        $prev = $n;
      }
      print $fh "\n";
    }
  }

  # Print the real summary

  print $fh "MEMORY ACCESS ERRORS\n\n";

  for my $e (sort keys %ne) {
    print $fh qq("$e"\n);
    for my $frame (sort keys %{$ne{$e}}) {
      my $data = $ne{$e}{$frame};
      my $count = $data->{count} > 1 ? " [$data->{count} paths]" : '';
      print $fh ' 'x4, "$frame$count\n",
                format_tests($data->{tests}), "\n";
    }
    print $fh "\n";
  }

  print $fh "\nMEMORY LEAKS\n\n";
 
  for my $l (sort keys %nl) {
    print $fh qq("$l"\n);
    for my $frames (sort keys %{$nl{$l}}) {
      my $data = $nl{$l}{$frames};
      my @stack = split /</, $frames;
      $data->{count} > 1 and $stack[-1] .= " [$data->{count} paths]";
      print $fh join('', map { ' 'x4 . "$_:$stack[$_]\n" } 0 .. $#stack ),
                format_tests($data->{tests}), "\n\n";
    }
  }
}

sub format_tests {
  my $tests = shift;
  my $indent = ' 'x8;

  if ($opt{tests}) {
    return wrap($indent, $indent, join ', ', sort keys %$tests);
  }
  else {
    my $count = keys %$tests;
    my $s = $count > 1 ? 's' : '';
    return $indent . "triggered by $count test$s";
  }
}

sub filter {
  debug(2, "$File::Find::name\n");

  # Only process '*.t.valgrind' files
  /(.*)\.t\.valgrind$/ or return;

  # Strip all unnecessary stuff from the test name
  my $test = $1;
  $test =~ s/^(?:(?:\Q$opt{dir}\E|[.t])\/)+//;

  debug(1, "processing $test ($_)\n");

  # Get all the valgrind output lines
  my @l = do {
    my $fh = new IO::File $_ or die "$0: cannot open $_ ($!)\n";
    # Process outputs can interrupt each other, so sort by pid first
    my %pid; local $_;
    while (<$fh>) {
      chomp;
      s/^==(\d+)==\s?// and push @{$pid{$1}}, $_;
    }
    map @$_, values %pid;
  };

  # Setup some useful regexes
  my $hexaddr  = '0x[[:xdigit:]]+';
  my $topframe = qr/^\s+at $hexaddr:\s+/;
  my $address  = qr/^\s+Address $hexaddr is \d+ bytes (?:before|inside|after) a block of size \d+/;
  my $leak     = qr/^\s*\d+ bytes in \d+ blocks are (still reachable|(?:definite|possib)ly lost)/;

  for my $i (0 .. $#l) {
    $l[$i]   =~ $topframe or next; # Match on any topmost frame...
    $l[$i-1] =~ $address and next; # ...but not if it's only address details
    my $line = $l[$i-1]; # The error / leak description line
    my $j    = $i;

    if ($line =~ $leak) {
      debug(2, "LEAK: $line\n");

      my $type   = $1;     # Type of leak (still reachable, ...)
      my $inperl = 0;      # Are we inside the perl source? (And how deep?)
      my @stack;           # Call stack

      while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(\w+)\s+\((?:([^:]+):(\d+)|[^)]+)\)/o) {
        my($func, $file, $lineno) = ($1, $2, $3);

        # If the stack frame is inside perl => increment $inperl
        # If we've already been inside perl, but are no longer => leave
        defined $file && ++$inperl or $inperl && last;

        # A function that should be hidden? => clear stack and leave
        $hidden && $func =~ $hidden and @stack = (), last;

        # Add stack frame if it's within our threshold
        if ($inperl <= $opt{frames}) {
          push @stack, $inperl ? "$func:$file:$lineno" : $func;
        }
      }

      # If there's something on the stack and we've seen perl code,
      # add this memory leak to the summary data
      @stack and $inperl and $leak{$type}{join '<', @stack}{$test}++;
    } else {
      debug(1, "ERROR: $line\n");

      # Simply find the topmost frame in the call stack within
      # the perl source code
      while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(?:(\w+)\s+\(([^:]+):(\d+)\))?/o) {
        if (defined $1) {
          $error{$line}{"$1:$2:$3"}{$test}++;
          last;
        }
      }
    }
  }
}

sub debug {
  my $level = shift;
  $opt{verbose} >= $level and print STDERR @_;
}

__END__

=head1 NAME

valgrindpp.pl - A post processor for make test.valgrind

=head1 SYNOPSIS

valgrindpp.pl [B<--dir>=I<dir>] [B<--frames>=I<number>]
[B<--hide>=I<identifier>] [B<--lines>]
[B<--output-file>=I<file>] [B<--tests>] 
[B<--top>=I<number>] [B<--verbose>]

=head1 DESCRIPTION

B<valgrindpp.pl> is a post processor for I<.valgrind> files
created during I<make test.valgrind>. It collects all these
files, extracts most of the information and produces a
significantly shorter summary of all detected memory access
errors and memory leaks.

=head1 OPTIONS

=over 4

=item B<--dir>=I<dir>

Recursively process I<.valgrind> files in I<dir>. If this
options is not given, B<valgrindpp.pl> must be run from
either the perl source or the I<t> directory and will process
all I<.valgrind> files within the distribution.

=item B<--frames>=I<number>

Number of stack frames within the perl source code to 
consider when distinguishing between memory leak sources.
Increasing this value will give you a longer backtrace,
while decreasing the number will show you fewer sources
for memory leaks. The default is 3 frames.

=item B<--hide>=I<identifier>

Hide all memory leaks that have I<identifier> in their backtrace.
Useful if you want to hide leaks from functions that are known to
have lots of memory leaks. I<identifier> can also be a regular
expression, in which case all leaks with symbols matching the
expression are hidden. Can be given multiple times.

=item B<--lines>

Show line numbers for stack frames. This is useful for further
increasing the error/leak resolution, but makes it harder to
compare different reports using I<diff>.

=item B<--output-file>=I<file>

Redirect the output into I<file>. If this option is not
given, the output goes to I<stdout>.

=item B<--tests>

List all tests that trigger memory access errors or memory
leaks explicitly instead of only printing a count.

=item B<--top>=I<number>

List the top I<number> test scripts for memory access errors
and memory leaks. Set to C<0> for no top-I<n> statistics.

=item B<--verbose>

Increase verbosity level. Can be given multiple times.

=back

=head1 COPYRIGHT

Copyright 2003 by Marcus Holland-Moritz <mhx@cpan.org>.

This program is free software; you may redistribute it
and/or modify it under the same terms as Perl itself.

=cut
Commit	Line	Data
77c22dc1	1	#!/usr/bin/perl
	2	use IO::File ();
	3	use File::Find qw(find);
	4	use Text::Wrap qw(wrap);
	5	use Getopt::Long qw(GetOptions);
	6	use Pod::Usage qw(pod2usage);
85ec34a0	7	use Cwd qw(cwd);
85ec34a0	8	use File::Spec;
77c22dc1	9	use strict;
	10
	11	my %opt = (
85ec34a0	12	frames => 3,
520dabba	13	lines => 0,
	14	tests => 0,
	15	top => 0,
85ec34a0	16	verbose => 0,
77c22dc1	17	);
77c22dc1	18
85ec34a0	19	GetOptions(\%opt, qw(
85ec34a0	20	dir=s
520dabba	21	frames=i
77c22dc1	22	hide=s@
520dabba	23	lines!
77c22dc1	24	output-file=s
520dabba	25	tests!
520dabba	26	top=i
85ec34a0	27	verbose+
85ec34a0	28	)) or pod2usage(2);
77c22dc1	29
85ec34a0	30	# Setup the directory to process
	31	if (exists $opt{dir}) {
	32	$opt{dir} = File::Spec->canonpath($opt{dir});
	33	}
	34	else {
	35	# Check if we're in 't'
	36	$opt{dir} = cwd =~ /\/t$/ ? '..' : '.';
	37
	38	# Check if we're in the right directory
	39	-d "$opt{dir}/$_" or die "$0: must be run from the perl source directory"
	40	. " when --dir is not given\n"
	41	for qw(t lib ext);
	42	}
	43
	44	# Assemble regex for functions whose leaks should be hidden
	45	# (no, a hash won't be significantly faster)
	46	my $hidden = do { local $"='\|'; $opt{hide} ? qr/^(?:@{$opt{hide}})$/o : '' };
77c22dc1	47
85ec34a0	48	# Setup our output file handle
85ec34a0	49	# (do it early, as it may fail)
77c22dc1	50	my $fh = \*STDOUT;
	51	if (exists $opt{'output-file'}) {
	52	$fh = new IO::File ">$opt{'output-file'}"
85ec34a0	53	or die "$0: cannot open $opt{'output-file'} ($!)\n";
77c22dc1	54	}
77c22dc1	55
85ec34a0	56	# These hashes will receive the error and leak summary data:
	57	#
	58	# %error = (
	59	# error_name => {
	60	# stack_frame => {
	61	# test_script => occurences
	62	# }
	63	# }
	64	# );
	65	#
	66	# %leak = (
	67	# leak_type => {
	68	# stack_frames => {
	69	# test_script => occurences
	70	# }
	71	# } # stack frames are separated by '<'s
	72	# );
77c22dc1	73	my(%error, %leak);
77c22dc1	74
85ec34a0	75	# Collect summary data
	76	find({wanted => \&filter, no_chdir => 1}, $opt{dir});
	77
520dabba	78	# Format the output nicely
	79	$Text::Wrap::columns = 80;
	80	$Text::Wrap::unexpand = 0;
	81
85ec34a0	82	# Write summary
520dabba	83	summary($fh, \%error, \%leak);
77c22dc1	84
	85	exit 0;
	86
	87	sub summary {
520dabba	88	my($fh, $error, $leak) = @_;
	89	my(%ne, %nl, %top);
	90
	91	# Prepare the data
	92
	93	for my $e (keys %$error) {
	94	for my $f (keys %{$error->{$e}}) {
	95	my($func, $file, $line) = split /:/, $f;
	96	my $nf = $opt{lines} ? "$func ($file:$line)" : "$func ($file)";
	97	$ne{$e}{$nf}{count}++;
	98	while (my($k,$v) = each %{$error->{$e}{$f}}) {
	99	$ne{$e}{$nf}{tests}{$k} += $v;
	100	$top{$k}{error}++;
	101	}
	102	}
	103	}
	104
	105	for my $l (keys %$leak) {
	106	for my $s (keys %{$leak->{$l}}) {
	107	my $ns = join '<', map {
	108	my($func, $file, $line) = split /:/;
	109	/:/ ? $opt{lines}
	110	? "$func ($file:$line)" : "$func ($file)"
	111	: $_
	112	} split /</, $s;
	113	$nl{$l}{$ns}{count}++;
	114	while (my($k,$v) = each %{$leak->{$l}{$s}}) {
	115	$nl{$l}{$ns}{tests}{$k} += $v;
	116	$top{$k}{leak}++;
	117	}
	118	}
	119	}
	120
	121	# Print the Top N
	122
	123	if ($opt{top}) {
	124	for my $what (qw(error leak)) {
	125	my @t = sort { $top{$b}{$what} <=> $top{$a}{$what} or $a cmp $b }
	126	grep $top{$_}{$what}, keys %top;
	127	@t > $opt{top} and splice @t, $opt{top};
	128	my $n = @t;
	129	my $s = $n > 1 ? 's' : '';
	130	my $prev = 0;
	131	print $fh "Top $n test scripts for ${what}s:\n\n";
	132	for my $i (1 .. $n) {
	133	$n = $top{$t[$i-1]}{$what};
	134	$s = $n > 1 ? 's' : '';
	135	printf $fh " %3s %-40s %3d $what$s\n",
	136	$n != $prev ? "$i." : '', $t[$i-1], $n;
	137	$prev = $n;
	138	}
	139	print $fh "\n";
	140	}
	141	}
	142
	143	# Print the real summary
77c22dc1	144
77c22dc1	145	print $fh "MEMORY ACCESS ERRORS\n\n";
520dabba	146
520dabba	147	for my $e (sort keys %ne) {
77c22dc1	148	print $fh qq("$e"\n);
520dabba	149	for my $frame (sort keys %{$ne{$e}}) {
	150	my $data = $ne{$e}{$frame};
	151	my $count = $data->{count} > 1 ? " [$data->{count} paths]" : '';
	152	print $fh ' 'x4, "$frame$count\n",
	153	format_tests($data->{tests}), "\n";
77c22dc1	154	}
	155	print $fh "\n";
	156	}
520dabba	157
77c22dc1	158	print $fh "\nMEMORY LEAKS\n\n";
520dabba	159
520dabba	160	for my $l (sort keys %nl) {
77c22dc1	161	print $fh qq("$l"\n);
520dabba	162	for my $frames (sort keys %{$nl{$l}}) {
520dabba	163	my $data = $nl{$l}{$frames};
77c22dc1	164	my @stack = split /</, $frames;
520dabba	165	$data->{count} > 1 and $stack[-1] .= " [$data->{count} paths]";
77c22dc1	166	print $fh join('', map { ' 'x4 . "$_:$stack[$_]\n" } 0 .. $#stack ),
520dabba	167	format_tests($data->{tests}), "\n\n";
77c22dc1	168	}
	169	}
	170	}
	171
520dabba	172	sub format_tests {
	173	my $tests = shift;
	174	my $indent = ' 'x8;
	175
	176	if ($opt{tests}) {
	177	return wrap($indent, $indent, join ', ', sort keys %$tests);
	178	}
	179	else {
	180	my $count = keys %$tests;
	181	my $s = $count > 1 ? 's' : '';
	182	return $indent . "triggered by $count test$s";
	183	}
	184	}
	185
77c22dc1	186	sub filter {
85ec34a0	187	debug(2, "$File::Find::name\n");
77c22dc1	188
85ec34a0	189	# Only process '*.t.valgrind' files
85ec34a0	190	/(.*)\.t\.valgrind$/ or return;
77c22dc1	191
85ec34a0	192	# Strip all unnecessary stuff from the test name
77c22dc1	193	my $test = $1;
85ec34a0	194	$test =~ s/^(?:(?:\Q$opt{dir}\E\|[.t])\/)+//;
	195
	196	debug(1, "processing $test ($_)\n");
77c22dc1	197
85ec34a0	198	# Get all the valgrind output lines
110e9861	199	my @l = do {
	200	my $fh = new IO::File $_ or die "$0: cannot open $_ ($!)\n";
	201	# Process outputs can interrupt each other, so sort by pid first
	202	my %pid; local $_;
	203	while (<$fh>) {
	204	chomp;
	205	s/^==(\d+)==\s?// and push @{$pid{$1}}, $_;
	206	}
	207	map @$_, values %pid;
	208	};
77c22dc1	209
85ec34a0	210	# Setup some useful regexes
77c22dc1	211	my $hexaddr = '0x[[:xdigit:]]+';
85ec34a0	212	my $topframe = qr/^\s+at $hexaddr:\s+/;
	213	my $address = qr/^\s+Address $hexaddr is \d+ bytes (?:before\|inside\|after) a block of size \d+/;
	214	my $leak = qr/^\s*\d+ bytes in \d+ blocks are (still reachable\|(?:definite\|possib)ly lost)/;
77c22dc1	215
77c22dc1	216	for my $i (0 .. $#l) {
85ec34a0	217	$l[$i] =~ $topframe or next; # Match on any topmost frame...
77c22dc1	218	$l[$i-1] =~ $address and next; # ...but not if it's only address details
85ec34a0	219	my $line = $l[$i-1]; # The error / leak description line
77c22dc1	220	my $j = $i;
	221
	222	if ($line =~ $leak) {
	223	debug(2, "LEAK: $line\n");
	224
85ec34a0	225	my $type = $1; # Type of leak (still reachable, ...)
	226	my $inperl = 0; # Are we inside the perl source? (And how deep?)
	227	my @stack; # Call stack
77c22dc1	228
520dabba	229	while ($l[$j++] =~ /^\s+(?:at\|by) $hexaddr:\s+(\w+)\s+\((?:([^:]+):(\d+)\|[^)]+)\)/o) {
520dabba	230	my($func, $file, $lineno) = ($1, $2, $3);
85ec34a0	231
	232	# If the stack frame is inside perl => increment $inperl
	233	# If we've already been inside perl, but are no longer => leave
520dabba	234	defined $file && ++$inperl or $inperl && last;
85ec34a0	235
	236	# A function that should be hidden? => clear stack and leave
	237	$hidden && $func =~ $hidden and @stack = (), last;
	238
	239	# Add stack frame if it's within our threshold
520dabba	240	if ($inperl <= $opt{frames}) {
	241	push @stack, $inperl ? "$func:$file:$lineno" : $func;
	242	}
77c22dc1	243	}
77c22dc1	244
85ec34a0	245	# If there's something on the stack and we've seen perl code,
	246	# add this memory leak to the summary data
	247	@stack and $inperl and $leak{$type}{join '<', @stack}{$test}++;
77c22dc1	248	} else {
	249	debug(1, "ERROR: $line\n");
	250
85ec34a0	251	# Simply find the topmost frame in the call stack within
85ec34a0	252	# the perl source code
520dabba	253	while ($l[$j++] =~ /^\s+(?:at\|by) $hexaddr:\s+(?:(\w+)\s+\(([^:]+):(\d+)\))?/o) {
77c22dc1	254	if (defined $1) {
520dabba	255	$error{$line}{"$1:$2:$3"}{$test}++;
77c22dc1	256	last;
	257	}
	258	}
	259	}
	260	}
	261	}
	262
	263	sub debug {
	264	my $level = shift;
85ec34a0	265	$opt{verbose} >= $level and print STDERR @_;
77c22dc1	266	}
	267
	268	__END__
	269
	270	=head1 NAME
	271
	272	valgrindpp.pl - A post processor for make test.valgrind
	273
	274	=head1 SYNOPSIS
	275
520dabba	276	valgrindpp.pl [B<--dir>=I<dir>] [B<--frames>=I<number>]
	277	[B<--hide>=I<identifier>] [B<--lines>]
	278	[B<--output-file>=I<file>] [B<--tests>]
	279	[B<--top>=I<number>] [B<--verbose>]
77c22dc1	280
	281	=head1 DESCRIPTION
	282
	283	B<valgrindpp.pl> is a post processor for I<.valgrind> files
	284	created during I<make test.valgrind>. It collects all these
	285	files, extracts most of the information and produces a
	286	significantly shorter summary of all detected memory access
	287	errors and memory leaks.
	288
	289	=head1 OPTIONS
	290
	291	=over 4
	292
85ec34a0	293	=item B<--dir>=I<dir>
	294
	295	Recursively process I<.valgrind> files in I<dir>. If this
	296	options is not given, B<valgrindpp.pl> must be run from
	297	either the perl source or the I<t> directory and will process
	298	all I<.valgrind> files within the distribution.
	299
77c22dc1	300	=item B<--frames>=I<number>
	301
	302	Number of stack frames within the perl source code to
	303	consider when distinguishing between memory leak sources.
	304	Increasing this value will give you a longer backtrace,
	305	while decreasing the number will show you fewer sources
	306	for memory leaks. The default is 3 frames.
	307
	308	=item B<--hide>=I<identifier>
	309
	310	Hide all memory leaks that have I<identifier> in their backtrace.
	311	Useful if you want to hide leaks from functions that are known to
	312	have lots of memory leaks. I<identifier> can also be a regular
	313	expression, in which case all leaks with symbols matching the
	314	expression are hidden. Can be given multiple times.
	315
520dabba	316	=item B<--lines>
	317
	318	Show line numbers for stack frames. This is useful for further
	319	increasing the error/leak resolution, but makes it harder to
	320	compare different reports using I<diff>.
	321
	322	=item B<--output-file>=I<file>
	323
	324	Redirect the output into I<file>. If this option is not
	325	given, the output goes to I<stdout>.
	326
	327	=item B<--tests>
	328
	329	List all tests that trigger memory access errors or memory
	330	leaks explicitly instead of only printing a count.
	331
	332	=item B<--top>=I<number>
	333
	334	List the top I<number> test scripts for memory access errors
	335	and memory leaks. Set to C<0> for no top-I<n> statistics.
	336
85ec34a0	337	=item B<--verbose>
77c22dc1	338
85ec34a0	339	Increase verbosity level. Can be given multiple times.
77c22dc1	340
	341	=back
	342
	343	=head1 COPYRIGHT
	344
	345	Copyright 2003 by Marcus Holland-Moritz <mhx@cpan.org>.
	346
	347	This program is free software; you may redistribute it
	348	and/or modify it under the same terms as Perl itself.
	349
	350	=cut