--- /dev/null
+#!/usr/bin/env perl
+
+# core-cpan-diff: Compare CPAN modules with their equivalent in core
+
+# Originally based on App::DualLivedDiff by Steffen Mueller.
+
+use strict;
+use warnings;
+
+use 5.010;
+
+use Getopt::Long;
+use File::Temp ();
+use File::Path ();
+use File::Spec;
+use Archive::Extract;
+use IO::Uncompress::Gunzip ();
+use File::Compare ();
+use ExtUtils::Manifest;
+
+BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
+use lib 'Porting';
+use Maintainers ();
+
+# if running from blead, we may be doing -Ilib, which means when we
+# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
+# So preload the things we need, and tell it to check %INC first:
+
+use Archive::Tar;
+use IPC::Open3;
+use IO::Select;
+$Module::Load::Conditional::CHECK_INC_HASH = 1;
+# stop Archive::Extract whinging about lack of Archive::Zip
+$Archive::Extract::WARN = 0;
+
+
+# Files, which if they exist in CPAN but not in perl, will not generate
+# an 'Only in CPAN' listing
+#
+our %IGNORABLE = map { ($_ => 1) }
+ qw(.cvsignore .dualLivedDiffConfig .gitignore
+ ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
+ CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
+ GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
+ MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
+ SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
+
+# where, under the cache dir, to untar stuff to
+
+use constant UNTAR_DIR => 'untarred';
+
+use constant DIFF_CMD => 'diff';
+use constant WGET_CMD => 'wget';
+
+sub usage {
+ print STDERR "\n@_\n\n" if @_;
+ print STDERR <<HERE;
+Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
+
+-a/--all Scan all dual-life modules.
+
+-c/--cachedir Where to save downloaded CPAN tarball files
+ (defaults to /tmp/something/ with deletion after each run).
+
+-d/--diff Display file differences using diff(1), rather than just
+ listing which files have changed.
+ The diff(1) command is assumed to be in your PATH.
+
+--diffopts Options to pass to the diff command. Defaults to '-u'.
+
+-f|force Force download from CPAN of new 02packages.details.txt file
+ (with --crosscheck only).
+
+-o/--output File name to write output to (defaults to STDOUT).
+
+-r/--reverse Reverses the diff (perl to CPAN).
+
+-v/--verbose List the fate of *all* files in the tarball, not just those
+ that differ or are missing.
+
+-x|crosscheck List the distributions whose current CPAN version differs from
+ that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
+
+By default (i.e. without the --crosscheck option), for each listed module
+(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
+from CPAN associated with that module, and compare the files in it with
+those in the perl source tree.
+
+Must be run from the root of the perl source tree.
+Module names must match the keys of %Modules in Maintainers.pl.
+HERE
+ exit(1);
+}
+
+
+sub run {
+ my $scan_all;
+ my $diff_opts;
+ my $reverse = 0;
+ my $cache_dir;
+ my $use_diff;
+ my $output_file;
+ my $verbose;
+ my $force;
+ my $do_crosscheck;
+
+ GetOptions(
+ 'a|all' => \$scan_all,
+ 'c|cachedir=s' => \$cache_dir,
+ 'd|diff' => \$use_diff,
+ 'diffopts:s' => \$diff_opts,
+ 'f|force' => \$force,
+ 'h|help' => \&usage,
+ 'o|output=s' => \$output_file,
+ 'r|reverse' => \$reverse,
+ 'v|verbose' => \$verbose,
+ 'x|crosscheck' => \$do_crosscheck,
+ ) or usage;
+
+
+ my @modules;
+
+ usage("Cannot mix -a with module list") if $scan_all && @ARGV;
+
+ if ($do_crosscheck) {
+ usage("can't use -r, -d, --diffopts, -v with --crosscheck")
+ if ($reverse || $use_diff || $diff_opts || $verbose);
+ }
+ else {
+ $diff_opts = '-u' unless defined $diff_opts;
+ usage("can't use -f without --crosscheck") if $force;
+ }
+
+ @modules = $scan_all
+ ? grep $Maintainers::Modules{$_}{CPAN},
+ (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
+ : @ARGV;
+ usage("No modules specified") unless @modules;
+
+
+ my $outfh;
+ if (defined $output_file) {
+ open $outfh, '>', $output_file
+ or die "ERROR: could not open file '$output_file' for writing: $!";
+ }
+ else {
+ open $outfh, ">&STDOUT"
+ or die "ERROR: can't dup STDOUT: $!";
+ }
+
+ if (defined $cache_dir) {
+ die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
+ }
+
+ if ($do_crosscheck) {
+ do_crosscheck($outfh, $cache_dir, $force, \@modules);
+ }
+ else {
+ do_compare(\@modules, $outfh, $cache_dir, $verbose, $use_diff,
+ $reverse, $diff_opts);
+ }
+}
+
+
+
+# compare a list of modules against their CPAN equivalents
+
+sub do_compare {
+ my ($modules, $outfh, $cache_dir, $verbose,
+ $use_diff, $reverse, $diff_opts) = @_;
+
+
+ # first, make sure we have a directory where they can all be untarred,
+ # and if its a permanent directory, clear any previous content
+ my $untar_dir;
+ if ($cache_dir) {
+ $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
+ if (-d $untar_dir) {
+ File::Path::rmtree($untar_dir)
+ or die "failed to remove $untar_dir\n";
+ }
+ mkdir $untar_dir
+ or die "mkdir $untar_dir: $!\n";
+ }
+ else {
+ $untar_dir = File::Temp::tempdir( CLEANUP => 1 );
+ }
+
+ my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
+
+ my %seen_dist;
+ for my $module (@$modules) {
+ print $outfh "\n$module\n" unless $use_diff;
+
+ my $m = $Maintainers::Modules{$module}
+ or die "ERROR: No such module in Maintainers.pl: '$module'\n";
+
+ unless ($m->{CPAN}) {
+ print $outfh "WARNING: $module is not dual-life; skipping\n";
+ next;
+ }
+
+ my $dist = $m->{DISTRIBUTION};
+ die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
+
+ if ($seen_dist{$dist}) {
+ warn "WARNING: duplicate entry for $dist in $module\n"
+ }
+ $seen_dist{$dist}++;
+
+ my $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist);
+
+
+ my @perl_files = Maintainers::get_module_files($module);
+
+ my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
+ die "ERROR: no such file: $manifest\n" unless -f $manifest;
+
+ my $cpan_files = ExtUtils::Manifest::maniread($manifest);
+ my @cpan_files = sort keys %$cpan_files;
+
+ my ($excluded, $map) = get_map($m, $module, \@perl_files);
+
+ my %perl_unseen;
+ @perl_unseen{@perl_files} = ();
+ my %perl_files = %perl_unseen;
+
+ foreach my $cpan_file (@cpan_files) {
+ my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
+ unless (defined $mapped_file) {
+ print $outfh " Excluded: $cpan_file\n" if $verbose;
+ next;
+ }
+
+ if (exists $perl_files{$mapped_file}) {
+ delete $perl_unseen{$mapped_file};
+ }
+ else {
+ # some CPAN files foo are stored in core as foo.packed,
+ # which are then unpacked by 'make test_prep'
+ my $packed_file = "$mapped_file.packed";
+ if (exists $perl_files{$packed_file} ) {
+ if (! -f $mapped_file and -f $packed_file) {
+ print $outfh <<EOF;
+WARNING: $mapped_file not found, but .packed variant exists.
+Perhaps you need to run 'make test_prep'?
+EOF
+ next;
+ }
+ delete $perl_unseen{$packed_file};
+ }
+ else {
+ if ($ignorable{$cpan_file}) {
+ print $outfh " Ignored: $cpan_file\n" if $verbose;
+ next;
+ }
+
+ unless ($use_diff) {
+ print $outfh " CPAN only: $cpan_file",
+ ($cpan_file eq $mapped_file) ? "\n"
+ : " (expected $mapped_file)\n";
+ }
+ next;
+ }
+ }
+
+
+ my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
+
+ # should never happen
+ die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
+
+ # might happen if the FILES entry in Maintainers.pl is wrong
+ unless (-f $mapped_file) {
+ print $outfh "WARNING: perl file not found: $mapped_file\n";
+ next;
+ }
+
+
+ if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
+ if ($use_diff) {
+ file_diff($outfh, $abs_cpan_file, $mapped_file,
+ $reverse, $diff_opts);
+ }
+ else {
+ if ($cpan_file eq $mapped_file) {
+ print $outfh " Modified: $cpan_file\n";
+ }
+ else {
+ print $outfh " Modified: $cpan_file $mapped_file\n";
+ }
+ }
+ }
+ elsif ($verbose) {
+ if ($cpan_file eq $mapped_file) {
+ print $outfh " Unchanged: $cpan_file\n";
+ }
+ else {
+ print $outfh " Unchanged: $cpan_file $mapped_file\n";
+ }
+ }
+ }
+ for (sort keys %perl_unseen) {
+ print $outfh " Perl only: $_\n" unless $use_diff;
+ }
+ }
+}
+
+# given FooBar-1.23_45.tar.gz, return FooBar
+
+sub distro_base {
+ my $d = shift;
+ $d =~ s/\.tar\.gz$//;
+ $d =~ s/\.gip$//;
+ $d =~ s/[\d\-_\.]+$//;
+ return $d;
+}
+
+# process --crosscheck action:
+# ie list all distributions whose CPAN versions differ from that listed in
+# Maintainers.pl
+
+sub do_crosscheck {
+ my ($outfh, $cache_dir, $force, $modules) = @_;
+
+ my $file = '02packages.details.txt';
+ my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
+ my $path = File::Spec->catfile($download_dir, $file);
+ my $gzfile = "$path.gz";
+
+ # grab 02packages.details.txt
+
+ my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
+
+ if (! -f $gzfile or $force) {
+ unlink $gzfile;
+ my_getstore($url, $gzfile);
+ }
+ unlink $path;
+ IO::Uncompress::Gunzip::gunzip($gzfile, $path)
+ or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
+
+ # suck in the data from it
+
+ open my $fh, '<', $path
+ or die "ERROR: open: $file: $!\n";
+
+ my %distros;
+ my %modules;
+
+ while (<$fh>) {
+ next if 1../^$/;
+ chomp;
+ my @f = split ' ', $_;
+ if (@f != 3) {
+ warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
+ next;
+ }
+ $modules{$f[0]} = $f[2];
+
+ my $distro = $f[2];
+ $distro =~ s{^.*/}{};
+
+ $distros{distro_base($distro)}{$distro} = 1;
+ }
+
+ for my $module (@$modules) {
+ my $m = $Maintainers::Modules{$module}
+ or die "ERROR: No such module in Maintainers.pl: '$module'\n";
+
+ unless ($m->{CPAN}) {
+ print $outfh "\nWARNING: $module is not dual-life; skipping\n";
+ next;
+ }
+
+
+ # given an try like
+ # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
+ # first compare the module name against Foo::Bar, and failing that,
+ # against foo-bar
+
+ my $pdist = $m->{DISTRIBUTION};
+ die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
+ $pdist =~ s{^.*/}{};
+
+ my $cdist = $modules{$module};
+
+ if (defined $cdist) {
+ $cdist =~ s{^.*/}{};
+ }
+ else {
+ my $d = $distros{distro_base($pdist)};
+ unless (defined $d) {
+ print $outfh "\n$module: Can't determine current CPAN entry\n";
+ next;
+ }
+ if (keys %$d > 1) {
+ print $outfh "\n$module: (found more than one CPAN candidate):\n";
+ print $outfh " perl: $pdist\n";
+ print $outfh " CPAN: $_\n" for sort keys %$d;
+ next;
+ }
+ $cdist = (keys %$d)[0];
+ }
+
+ if ($cdist ne $pdist) {
+ print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
+ }
+ }
+}
+
+
+
+# get the EXCLUDED and MAP entries for this module, or
+# make up defauts if they don't exist
+
+sub get_map {
+ my ($m, $module_name, $perl_files) = @_;
+
+ my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
+
+ $excluded ||= [];
+
+ return $excluded, $map if $map;
+
+ # all files under ext/foo-bar (plus maybe some under t/lib)???
+
+ my $ext;
+ for (@$perl_files) {
+ if (m{^(ext/[^/]+/)}) {
+ if (defined $ext and $ext ne $1) {
+ # more than one ext/$ext/
+ undef $ext;
+ last;
+ }
+ $ext = $1;
+ }
+ elsif (m{^t/lib/}) {
+ next;
+ }
+ else {
+ undef $ext;
+ last;
+ }
+ }
+
+ if (defined $ext) {
+ $map = { '' => $ext },
+ }
+ else {
+ (my $base = $module_name) =~ s{::}{/}g;
+ $base ="lib/$base";
+ $map = {
+ 'lib/' => 'lib/',
+ '' => "$base/",
+ };
+ }
+ return $excluded, $map;
+}
+
+
+# Given an exclude list and a mapping hash, convert a CPAN filename
+# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
+# Returns an empty list for an excluded file
+
+sub cpan_to_perl {
+ my ($excluded, $map, $cpan_file) = @_;
+
+ for my $exclude (@$excluded) {
+ # may be a simple string to match exactly, or a pattern
+ if (ref $exclude) {
+ return if $cpan_file =~ $exclude;
+ }
+ else {
+ return if $cpan_file eq $exclude;
+ }
+ }
+
+ my $perl_file = $cpan_file;
+
+ # try longest prefix first, then alphabetically on tie-break
+ for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
+ {
+ last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
+ }
+ return $perl_file;
+}
+
+
+
+# do LWP::Simple::getstore, possibly without LWP::Simple being available
+
+my $lwp_simple_available;
+
+sub my_getstore {
+ my ($url, $file) = @_;
+ unless (defined $lwp_simple_available) {
+ eval { require LWP::Simple };
+ $lwp_simple_available = $@ eq '';
+ }
+ if ($lwp_simple_available) {
+ return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
+ }
+ else {
+ return system(WGET_CMD, "-O", $file, $url) == 0;
+ }
+}
+
+
+# download and unpack a distribution
+# Returns the full pathname of the extracted directory
+# (eg '/tmp/XYZ/Foo_bar-1.23')
+
+# cache_dir: where to dowenload the .tar.gz file to
+# untar_dir: where to untar or unzup the file
+# module: name of module
+# dist: name of the distribution
+
+sub get_distribution {
+ my ($cache_dir, $untar_dir, $module, $dist) = @_;
+
+ $dist =~ m{.+/([^/]+)$}
+ or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist";
+ my $filename = $1;
+
+ my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
+ my $download_file = File::Spec->catfile($download_dir, $filename);
+
+ # download distribution
+
+ if (-f $download_file and ! -s $download_file ) {
+ # wget can leave a zero-length file on failed download
+ unlink $download_file;
+ }
+
+ unless (-f $download_file) {
+ # not cached
+ $dist =~ /^([A-Z])([A-Z])/
+ or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist";
+
+ my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
+ my_getstore($url, $download_file)
+ or die "ERROR: Could not fetch '$url'";
+ }
+
+ # extract distribution
+
+ my $ae = Archive::Extract->new( archive => $download_file);
+ $ae->extract( to => $untar_dir )
+ or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error();
+
+ # get the name of the extracted distribution dir
+
+ my $path = File::Spec->catfile($untar_dir, $filename);
+
+ $path =~ s/\.tar\.gz$// or
+ $path =~ s/\.zip$// or
+ die "ERROR: downloaded file does not have a recognised suffix: $path\n";
+
+ die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
+
+ return $path;
+}
+
+
+# produce the diff of a single file
+sub file_diff {
+ my $outfh = shift;
+ my $cpan_file = shift;
+ my $perl_file = shift;
+ my $reverse = shift;
+ my $diff_opts = shift;
+
+
+ my @cmd = (DIFF_CMD, split ' ', $diff_opts);
+ if ($reverse) {
+ push @cmd, $perl_file, $cpan_file;
+ }
+ else {
+ push @cmd, $cpan_file, $perl_file;
+ }
+ my $result = `@cmd`;
+
+ $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
+
+ print $outfh $result;
+}
+
+
+run();
+