3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
17 use IO::Uncompress::Gunzip ();
19 use ExtUtils::Manifest;
21 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
25 # if running from blead, we may be doing -Ilib, which means when we
26 # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
27 # So preload the things we need, and tell it to check %INC first:
32 $Module::Load::Conditional::CHECK_INC_HASH = 1;
33 # stop Archive::Extract whinging about lack of Archive::Zip
34 $Archive::Extract::WARN = 0;
37 # Files, which if they exist in CPAN but not in perl, will not generate
38 # an 'Only in CPAN' listing
40 our %IGNORABLE = map { ($_ => 1) }
41 qw(.cvsignore .dualLivedDiffConfig .gitignore
42 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
43 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
44 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
45 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
46 SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
48 # where, under the cache dir, to untar stuff to
50 use constant UNTAR_DIR => 'untarred';
52 use constant DIFF_CMD => 'diff';
53 use constant WGET_CMD => 'wget';
56 print STDERR "\n@_\n\n" if @_;
58 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
60 -a/--all Scan all dual-life modules.
62 -c/--cachedir Where to save downloaded CPAN tarball files
63 (defaults to /tmp/something/ with deletion after each run).
65 -d/--diff Display file differences using diff(1), rather than just
66 listing which files have changed.
67 The diff(1) command is assumed to be in your PATH.
69 --diffopts Options to pass to the diff command. Defaults to '-u'.
71 -f|force Force download from CPAN of new 02packages.details.txt file
72 (with --crosscheck only).
74 -o/--output File name to write output to (defaults to STDOUT).
76 -r/--reverse Reverses the diff (perl to CPAN).
78 -u/--upstream only print modules with the given upstream (defaults to all)
80 -v/--verbose List the fate of *all* files in the tarball, not just those
81 that differ or are missing.
83 -x|crosscheck List the distributions whose current CPAN version differs from
84 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
86 By default (i.e. without the --crosscheck option), for each listed module
87 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
88 from CPAN associated with that module, and compare the files in it with
89 those in the perl source tree.
91 Must be run from the root of the perl source tree.
92 Module names must match the keys of %Modules in Maintainers.pl.
102 my @wanted_upstreams;
111 'a|all' => \$scan_all,
112 'c|cachedir=s' => \$cache_dir,
113 'd|diff' => \$use_diff,
114 'diffopts:s' => \$diff_opts,
115 'f|force' => \$force,
117 'o|output=s' => \$output_file,
118 'r|reverse' => \$reverse,
119 'u|upstream=s@'=> \@wanted_upstreams,
120 'v|verbose' => \$verbose,
121 'x|crosscheck' => \$do_crosscheck,
127 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
129 if ($do_crosscheck) {
130 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
131 if ($reverse || $use_diff || $diff_opts || $verbose);
134 $diff_opts = '-u' unless defined $diff_opts;
135 usage("can't use -f without --crosscheck") if $force;
139 ? grep $Maintainers::Modules{$_}{CPAN},
140 (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
142 usage("No modules specified") unless @modules;
146 if (defined $output_file) {
147 open $outfh, '>', $output_file
148 or die "ERROR: could not open file '$output_file' for writing: $!\n";
151 open $outfh, ">&STDOUT"
152 or die "ERROR: can't dup STDOUT: $!\n";
155 if (defined $cache_dir) {
156 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
159 if ($do_crosscheck) {
160 do_crosscheck($outfh, $cache_dir, $force, \@modules);
163 do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff,
164 $reverse, $diff_opts, \@wanted_upstreams);
170 # compare a list of modules against their CPAN equivalents
173 my ($modules, $outfh, $output_file, $cache_dir, $verbose,
174 $use_diff, $reverse, $diff_opts, $wanted_upstreams) = @_;
177 # first, make sure we have a directory where they can all be untarred,
178 # and if its a permanent directory, clear any previous content
181 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
183 File::Path::rmtree($untar_dir)
184 or die "failed to remove $untar_dir\n";
187 or die "mkdir $untar_dir: $!\n";
190 $untar_dir = File::Temp::tempdir( CLEANUP => 1 );
193 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
196 for my $module (@$modules) {
197 warn "Processing $module ...\n" if defined $output_file;
199 my $m = $Maintainers::Modules{$module}
200 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
202 unless ($m->{CPAN}) {
203 print $outfh "WARNING: $module is not dual-life; skipping\n";
207 my $dist = $m->{DISTRIBUTION};
208 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
210 if ($seen_dist{$dist}) {
211 warn "WARNING: duplicate entry for $dist in $module\n"
214 my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
215 next if @$wanted_upstreams and ! ($upstream ~~ $wanted_upstreams);
216 print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff;
217 print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n";
223 $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist)
226 print $outfh " ", $@;
227 print $outfh " (skipping)\n";
231 my @perl_files = Maintainers::get_module_files($module);
233 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
234 die "ERROR: no such file: $manifest\n" unless -f $manifest;
236 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
237 my @cpan_files = sort keys %$cpan_files;
239 my ($excluded, $map) = get_map($m, $module, \@perl_files);
242 @perl_unseen{@perl_files} = ();
243 my %perl_files = %perl_unseen;
245 foreach my $cpan_file (@cpan_files) {
246 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
247 unless (defined $mapped_file) {
248 print $outfh " Excluded: $cpan_file\n" if $verbose;
252 if (exists $perl_files{$mapped_file}) {
253 delete $perl_unseen{$mapped_file};
256 # some CPAN files foo are stored in core as foo.packed,
257 # which are then unpacked by 'make test_prep'
258 my $packed_file = "$mapped_file.packed";
259 if (exists $perl_files{$packed_file} ) {
260 if (! -f $mapped_file and -f $packed_file) {
262 WARNING: $mapped_file not found, but .packed variant exists.
263 Perhaps you need to run 'make test_prep'?
267 delete $perl_unseen{$packed_file};
270 if ($ignorable{$cpan_file}) {
271 print $outfh " Ignored: $cpan_file\n" if $verbose;
276 print $outfh " CPAN only: $cpan_file",
277 ($cpan_file eq $mapped_file) ? "\n"
278 : " (expected $mapped_file)\n";
285 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
287 # should never happen
288 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
290 # might happen if the FILES entry in Maintainers.pl is wrong
291 unless (-f $mapped_file) {
292 print $outfh "WARNING: perl file not found: $mapped_file\n";
296 my $relative_mapped_file = $mapped_file;
297 $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///;
299 if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
303 file_diff($outfh, $abs_cpan_file, $mapped_file,
304 $reverse, $diff_opts);
307 if ($cpan_file eq $relative_mapped_file) {
308 print $outfh " Modified: $relative_mapped_file\n";
311 print $outfh " Modified: $cpan_file $relative_mapped_file\n";
316 if ($cpan_file eq $relative_mapped_file) {
317 print $outfh " Unchanged: $cpan_file\n";
320 print $outfh " Unchanged: $cpan_file $relative_mapped_file\n";
324 for (sort keys %perl_unseen) {
325 print $outfh " Perl only: $_\n" unless $use_diff;
330 # given FooBar-1.23_45.tar.gz, return FooBar
334 $d =~ s/\.tar\.gz$//;
336 $d =~ s/[\d\-_\.]+$//;
340 # process --crosscheck action:
341 # ie list all distributions whose CPAN versions differ from that listed in
345 my ($outfh, $cache_dir, $force, $modules) = @_;
347 my $file = '02packages.details.txt';
348 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
349 my $path = File::Spec->catfile($download_dir, $file);
350 my $gzfile = "$path.gz";
352 # grab 02packages.details.txt
354 my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
356 if (! -f $gzfile or $force) {
358 my_getstore($url, $gzfile);
361 IO::Uncompress::Gunzip::gunzip($gzfile, $path)
362 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
364 # suck in the data from it
366 open my $fh, '<', $path
367 or die "ERROR: open: $file: $!\n";
375 my @f = split ' ', $_;
377 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
381 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
382 $modules{$f[0]} = $distro;
384 (my $short_distro = $distro) =~ s{^.*/}{};
386 $distros{distro_base($short_distro)}{$distro} = 1;
389 for my $module (@$modules) {
390 my $m = $Maintainers::Modules{$module}
391 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
393 unless ($m->{CPAN}) {
394 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
398 # given an entry like
399 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
400 # first compare the module name against Foo::Bar, and failing that,
403 my $pdist = $m->{DISTRIBUTION};
404 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
406 my $cdist = $modules{$module};
407 (my $short_pdist = $pdist) =~ s{^.*/}{};
409 unless (defined $cdist) {
410 my $d = $distros{distro_base($short_pdist)};
411 unless (defined $d) {
412 print $outfh "\n$module: Can't determine current CPAN entry\n";
416 print $outfh "\n$module: (found more than one CPAN candidate):\n";
417 print $outfh " perl: $pdist\n";
418 print $outfh " CPAN: $_\n" for sort keys %$d;
421 $cdist = (keys %$d)[0];
424 if ($cdist ne $pdist) {
425 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
432 # get the EXCLUDED and MAP entries for this module, or
433 # make up defauts if they don't exist
436 my ($m, $module_name, $perl_files) = @_;
438 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
442 return $excluded, $map if $map;
444 # all files under ext/foo-bar (plus maybe some under t/lib)???
448 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
449 if (defined $ext and $ext ne $1) {
450 # more than one ext/$ext/
466 $map = { '' => $ext },
469 (my $base = $module_name) =~ s{::}{/}g;
476 return $excluded, $map;
480 # Given an exclude list and a mapping hash, convert a CPAN filename
481 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
482 # Returns an empty list for an excluded file
485 my ($excluded, $map, $cpan_file) = @_;
487 for my $exclude (@$excluded) {
488 # may be a simple string to match exactly, or a pattern
490 return if $cpan_file =~ $exclude;
493 return if $cpan_file eq $exclude;
497 my $perl_file = $cpan_file;
499 # try longest prefix first, then alphabetically on tie-break
500 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
502 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
509 # do LWP::Simple::getstore, possibly without LWP::Simple being available
511 my $lwp_simple_available;
514 my ($url, $file) = @_;
515 unless (defined $lwp_simple_available) {
516 eval { require LWP::Simple };
517 $lwp_simple_available = $@ eq '';
519 if ($lwp_simple_available) {
520 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
523 return system(WGET_CMD, "-O", $file, $url) == 0;
528 # download and unpack a distribution
529 # Returns the full pathname of the extracted directory
530 # (eg '/tmp/XYZ/Foo_bar-1.23')
532 # cache_dir: where to dowenload the .tar.gz file to
533 # untar_dir: where to untar or unzup the file
534 # module: name of module
535 # dist: name of the distribution
537 sub get_distribution {
538 my ($cache_dir, $untar_dir, $module, $dist) = @_;
540 $dist =~ m{.+/([^/]+)$}
541 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
544 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
545 my $download_file = File::Spec->catfile($download_dir, $filename);
547 # download distribution
549 if (-f $download_file and ! -s $download_file ) {
550 # wget can leave a zero-length file on failed download
551 unlink $download_file;
554 unless (-f $download_file) {
556 $dist =~ /^([A-Z])([A-Z])/
557 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
559 my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
560 my_getstore($url, $download_file)
561 or die "ERROR: Could not fetch '$url'\n";
564 # extract distribution
566 my $ae = Archive::Extract->new( archive => $download_file);
567 $ae->extract( to => $untar_dir )
568 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
570 # get the name of the extracted distribution dir
572 my $path = File::Spec->catfile($untar_dir, $filename);
574 $path =~ s/\.tar\.gz$// or
575 $path =~ s/\.zip$// or
576 die "ERROR: downloaded file does not have a recognised suffix: $path\n";
578 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
584 # produce the diff of a single file
587 my $cpan_file = shift;
588 my $perl_file = shift;
590 my $diff_opts = shift;
593 my @cmd = (DIFF_CMD, split ' ', $diff_opts);
595 push @cmd, $perl_file, $cpan_file;
598 push @cmd, $cpan_file, $perl_file;
602 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
604 print $outfh $result;