From: David Mitchell Date: Fri, 19 Jun 2009 15:00:29 +0000 (+0100) Subject: add Porting/core-cpan-diff X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=cb097e7a7eb1098f10246724baff2d8189ac41c5;p=p5sagit%2Fp5-mst-13.2.git add Porting/core-cpan-diff --- diff --git a/MANIFEST b/MANIFEST index da15315..930d640 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3704,6 +3704,7 @@ Porting/config_H Sample config.h Porting/config_h.pl Reorder config_h.SH after metaconfig Porting/config.sh Sample config.sh Porting/Contract Social contract for contributed modules in Perl core +Porting/core-cpan-diff Compare core distros with their CPAN equivalents Porting/corecpan.pl Reports outdated dual-lived modules Porting/corelist.pl Generates data for Module::CoreList Porting/curliff.pl Curliff or liff your curliffable files. diff --git a/Porting/core-cpan-diff b/Porting/core-cpan-diff new file mode 100755 index 0000000..80d6b7d --- /dev/null +++ b/Porting/core-cpan-diff @@ -0,0 +1,591 @@ +#!/usr/bin/env perl + +# core-cpan-diff: Compare CPAN modules with their equivalent in core + +# Originally based on App::DualLivedDiff by Steffen Mueller. + +use strict; +use warnings; + +use 5.010; + +use Getopt::Long; +use File::Temp (); +use File::Path (); +use File::Spec; +use Archive::Extract; +use IO::Uncompress::Gunzip (); +use File::Compare (); +use ExtUtils::Manifest; + +BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } +use lib 'Porting'; +use Maintainers (); + +# if running from blead, we may be doing -Ilib, which means when we +# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. +# So preload the things we need, and tell it to check %INC first: + +use Archive::Tar; +use IPC::Open3; +use IO::Select; +$Module::Load::Conditional::CHECK_INC_HASH = 1; +# stop Archive::Extract whinging about lack of Archive::Zip +$Archive::Extract::WARN = 0; + + +# Files, which if they exist in CPAN but not in perl, will not generate +# an 'Only in CPAN' listing +# +our %IGNORABLE = map { ($_ => 1) } + qw(.cvsignore .dualLivedDiffConfig .gitignore + ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL + CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS + GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL + MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README + SIGNATURE THANKS TODO Todo VERSION WHATSNEW); + +# where, under the cache dir, to untar stuff to + +use constant UNTAR_DIR => 'untarred'; + +use constant DIFF_CMD => 'diff'; +use constant WGET_CMD => 'wget'; + +sub usage { + print STDERR "\n@_\n\n" if @_; + print STDERR < \$scan_all, + 'c|cachedir=s' => \$cache_dir, + 'd|diff' => \$use_diff, + 'diffopts:s' => \$diff_opts, + 'f|force' => \$force, + 'h|help' => \&usage, + 'o|output=s' => \$output_file, + 'r|reverse' => \$reverse, + 'v|verbose' => \$verbose, + 'x|crosscheck' => \$do_crosscheck, + ) or usage; + + + my @modules; + + usage("Cannot mix -a with module list") if $scan_all && @ARGV; + + if ($do_crosscheck) { + usage("can't use -r, -d, --diffopts, -v with --crosscheck") + if ($reverse || $use_diff || $diff_opts || $verbose); + } + else { + $diff_opts = '-u' unless defined $diff_opts; + usage("can't use -f without --crosscheck") if $force; + } + + @modules = $scan_all + ? grep $Maintainers::Modules{$_}{CPAN}, + (sort {lc $a cmp lc $b } keys %Maintainers::Modules) + : @ARGV; + usage("No modules specified") unless @modules; + + + my $outfh; + if (defined $output_file) { + open $outfh, '>', $output_file + or die "ERROR: could not open file '$output_file' for writing: $!"; + } + else { + open $outfh, ">&STDOUT" + or die "ERROR: can't dup STDOUT: $!"; + } + + if (defined $cache_dir) { + die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir; + } + + if ($do_crosscheck) { + do_crosscheck($outfh, $cache_dir, $force, \@modules); + } + else { + do_compare(\@modules, $outfh, $cache_dir, $verbose, $use_diff, + $reverse, $diff_opts); + } +} + + + +# compare a list of modules against their CPAN equivalents + +sub do_compare { + my ($modules, $outfh, $cache_dir, $verbose, + $use_diff, $reverse, $diff_opts) = @_; + + + # first, make sure we have a directory where they can all be untarred, + # and if its a permanent directory, clear any previous content + my $untar_dir; + if ($cache_dir) { + $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR); + if (-d $untar_dir) { + File::Path::rmtree($untar_dir) + or die "failed to remove $untar_dir\n"; + } + mkdir $untar_dir + or die "mkdir $untar_dir: $!\n"; + } + else { + $untar_dir = File::Temp::tempdir( CLEANUP => 1 ); + } + + my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE; + + my %seen_dist; + for my $module (@$modules) { + print $outfh "\n$module\n" unless $use_diff; + + my $m = $Maintainers::Modules{$module} + or die "ERROR: No such module in Maintainers.pl: '$module'\n"; + + unless ($m->{CPAN}) { + print $outfh "WARNING: $module is not dual-life; skipping\n"; + next; + } + + my $dist = $m->{DISTRIBUTION}; + die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; + + if ($seen_dist{$dist}) { + warn "WARNING: duplicate entry for $dist in $module\n" + } + $seen_dist{$dist}++; + + my $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist); + + + my @perl_files = Maintainers::get_module_files($module); + + my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST'); + die "ERROR: no such file: $manifest\n" unless -f $manifest; + + my $cpan_files = ExtUtils::Manifest::maniread($manifest); + my @cpan_files = sort keys %$cpan_files; + + my ($excluded, $map) = get_map($m, $module, \@perl_files); + + my %perl_unseen; + @perl_unseen{@perl_files} = (); + my %perl_files = %perl_unseen; + + foreach my $cpan_file (@cpan_files) { + my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file); + unless (defined $mapped_file) { + print $outfh " Excluded: $cpan_file\n" if $verbose; + next; + } + + if (exists $perl_files{$mapped_file}) { + delete $perl_unseen{$mapped_file}; + } + else { + # some CPAN files foo are stored in core as foo.packed, + # which are then unpacked by 'make test_prep' + my $packed_file = "$mapped_file.packed"; + if (exists $perl_files{$packed_file} ) { + if (! -f $mapped_file and -f $packed_file) { + print $outfh <catfile($cpan_dir, $cpan_file); + + # should never happen + die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file; + + # might happen if the FILES entry in Maintainers.pl is wrong + unless (-f $mapped_file) { + print $outfh "WARNING: perl file not found: $mapped_file\n"; + next; + } + + + if (File::Compare::compare($abs_cpan_file, $mapped_file)) { + if ($use_diff) { + file_diff($outfh, $abs_cpan_file, $mapped_file, + $reverse, $diff_opts); + } + else { + if ($cpan_file eq $mapped_file) { + print $outfh " Modified: $cpan_file\n"; + } + else { + print $outfh " Modified: $cpan_file $mapped_file\n"; + } + } + } + elsif ($verbose) { + if ($cpan_file eq $mapped_file) { + print $outfh " Unchanged: $cpan_file\n"; + } + else { + print $outfh " Unchanged: $cpan_file $mapped_file\n"; + } + } + } + for (sort keys %perl_unseen) { + print $outfh " Perl only: $_\n" unless $use_diff; + } + } +} + +# given FooBar-1.23_45.tar.gz, return FooBar + +sub distro_base { + my $d = shift; + $d =~ s/\.tar\.gz$//; + $d =~ s/\.gip$//; + $d =~ s/[\d\-_\.]+$//; + return $d; +} + +# process --crosscheck action: +# ie list all distributions whose CPAN versions differ from that listed in +# Maintainers.pl + +sub do_crosscheck { + my ($outfh, $cache_dir, $force, $modules) = @_; + + my $file = '02packages.details.txt'; + my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); + my $path = File::Spec->catfile($download_dir, $file); + my $gzfile = "$path.gz"; + + # grab 02packages.details.txt + + my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz'; + + if (! -f $gzfile or $force) { + unlink $gzfile; + my_getstore($url, $gzfile); + } + unlink $path; + IO::Uncompress::Gunzip::gunzip($gzfile, $path) + or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; + + # suck in the data from it + + open my $fh, '<', $path + or die "ERROR: open: $file: $!\n"; + + my %distros; + my %modules; + + while (<$fh>) { + next if 1../^$/; + chomp; + my @f = split ' ', $_; + if (@f != 3) { + warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; + next; + } + $modules{$f[0]} = $f[2]; + + my $distro = $f[2]; + $distro =~ s{^.*/}{}; + + $distros{distro_base($distro)}{$distro} = 1; + } + + for my $module (@$modules) { + my $m = $Maintainers::Modules{$module} + or die "ERROR: No such module in Maintainers.pl: '$module'\n"; + + unless ($m->{CPAN}) { + print $outfh "\nWARNING: $module is not dual-life; skipping\n"; + next; + } + + + # given an try like + # Foo::Bar 1.23 foo-bar-1.23.tar.gz, + # first compare the module name against Foo::Bar, and failing that, + # against foo-bar + + my $pdist = $m->{DISTRIBUTION}; + die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; + $pdist =~ s{^.*/}{}; + + my $cdist = $modules{$module}; + + if (defined $cdist) { + $cdist =~ s{^.*/}{}; + } + else { + my $d = $distros{distro_base($pdist)}; + unless (defined $d) { + print $outfh "\n$module: Can't determine current CPAN entry\n"; + next; + } + if (keys %$d > 1) { + print $outfh "\n$module: (found more than one CPAN candidate):\n"; + print $outfh " perl: $pdist\n"; + print $outfh " CPAN: $_\n" for sort keys %$d; + next; + } + $cdist = (keys %$d)[0]; + } + + if ($cdist ne $pdist) { + print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; + } + } +} + + + +# get the EXCLUDED and MAP entries for this module, or +# make up defauts if they don't exist + +sub get_map { + my ($m, $module_name, $perl_files) = @_; + + my ($excluded, $map) = @$m{qw(EXCLUDED MAP)}; + + $excluded ||= []; + + return $excluded, $map if $map; + + # all files under ext/foo-bar (plus maybe some under t/lib)??? + + my $ext; + for (@$perl_files) { + if (m{^(ext/[^/]+/)}) { + if (defined $ext and $ext ne $1) { + # more than one ext/$ext/ + undef $ext; + last; + } + $ext = $1; + } + elsif (m{^t/lib/}) { + next; + } + else { + undef $ext; + last; + } + } + + if (defined $ext) { + $map = { '' => $ext }, + } + else { + (my $base = $module_name) =~ s{::}{/}g; + $base ="lib/$base"; + $map = { + 'lib/' => 'lib/', + '' => "$base/", + }; + } + return $excluded, $map; +} + + +# Given an exclude list and a mapping hash, convert a CPAN filename +# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). +# Returns an empty list for an excluded file + +sub cpan_to_perl { + my ($excluded, $map, $cpan_file) = @_; + + for my $exclude (@$excluded) { + # may be a simple string to match exactly, or a pattern + if (ref $exclude) { + return if $cpan_file =~ $exclude; + } + else { + return if $cpan_file eq $exclude; + } + } + + my $perl_file = $cpan_file; + + # try longest prefix first, then alphabetically on tie-break + for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map) + { + last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; + } + return $perl_file; +} + + + +# do LWP::Simple::getstore, possibly without LWP::Simple being available + +my $lwp_simple_available; + +sub my_getstore { + my ($url, $file) = @_; + unless (defined $lwp_simple_available) { + eval { require LWP::Simple }; + $lwp_simple_available = $@ eq ''; + } + if ($lwp_simple_available) { + return LWP::Simple::is_success(LWP::Simple::getstore($url, $file)); + } + else { + return system(WGET_CMD, "-O", $file, $url) == 0; + } +} + + +# download and unpack a distribution +# Returns the full pathname of the extracted directory +# (eg '/tmp/XYZ/Foo_bar-1.23') + +# cache_dir: where to dowenload the .tar.gz file to +# untar_dir: where to untar or unzup the file +# module: name of module +# dist: name of the distribution + +sub get_distribution { + my ($cache_dir, $untar_dir, $module, $dist) = @_; + + $dist =~ m{.+/([^/]+)$} + or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist"; + my $filename = $1; + + my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); + my $download_file = File::Spec->catfile($download_dir, $filename); + + # download distribution + + if (-f $download_file and ! -s $download_file ) { + # wget can leave a zero-length file on failed download + unlink $download_file; + } + + unless (-f $download_file) { + # not cached + $dist =~ /^([A-Z])([A-Z])/ + or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist"; + + my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist"; + my_getstore($url, $download_file) + or die "ERROR: Could not fetch '$url'"; + } + + # extract distribution + + my $ae = Archive::Extract->new( archive => $download_file); + $ae->extract( to => $untar_dir ) + or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error(); + + # get the name of the extracted distribution dir + + my $path = File::Spec->catfile($untar_dir, $filename); + + $path =~ s/\.tar\.gz$// or + $path =~ s/\.zip$// or + die "ERROR: downloaded file does not have a recognised suffix: $path\n"; + + die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; + + return $path; +} + + +# produce the diff of a single file +sub file_diff { + my $outfh = shift; + my $cpan_file = shift; + my $perl_file = shift; + my $reverse = shift; + my $diff_opts = shift; + + + my @cmd = (DIFF_CMD, split ' ', $diff_opts); + if ($reverse) { + push @cmd, $perl_file, $cpan_file; + } + else { + push @cmd, $cpan_file, $perl_file; + } + my $result = `@cmd`; + + $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm; + + print $outfh $result; +} + + +run(); +