Update Maintainers.pl accordingly
[p5sagit/p5-mst-13.2.git] / Porting / core-cpan-diff
CommitLineData
cb097e7a 1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Temp ();
14use File::Path ();
15use File::Spec;
16use Archive::Extract;
17use IO::Uncompress::Gunzip ();
18use File::Compare ();
19use ExtUtils::Manifest;
20
21BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
22use lib 'Porting';
23use Maintainers ();
24
25# if running from blead, we may be doing -Ilib, which means when we
26# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
27# So preload the things we need, and tell it to check %INC first:
28
29use Archive::Tar;
30use IPC::Open3;
31use IO::Select;
32$Module::Load::Conditional::CHECK_INC_HASH = 1;
33# stop Archive::Extract whinging about lack of Archive::Zip
34$Archive::Extract::WARN = 0;
35
36
37# Files, which if they exist in CPAN but not in perl, will not generate
38# an 'Only in CPAN' listing
39#
40our %IGNORABLE = map { ($_ => 1) }
41 qw(.cvsignore .dualLivedDiffConfig .gitignore
42 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
43 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
44 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
45 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
46 SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
47
48# where, under the cache dir, to untar stuff to
49
50use constant UNTAR_DIR => 'untarred';
51
52use constant DIFF_CMD => 'diff';
53use constant WGET_CMD => 'wget';
54
55sub usage {
56 print STDERR "\n@_\n\n" if @_;
57 print STDERR <<HERE;
58Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
59
60-a/--all Scan all dual-life modules.
61
62-c/--cachedir Where to save downloaded CPAN tarball files
63 (defaults to /tmp/something/ with deletion after each run).
64
65-d/--diff Display file differences using diff(1), rather than just
66 listing which files have changed.
67 The diff(1) command is assumed to be in your PATH.
68
69--diffopts Options to pass to the diff command. Defaults to '-u'.
70
71-f|force Force download from CPAN of new 02packages.details.txt file
72 (with --crosscheck only).
73
74-o/--output File name to write output to (defaults to STDOUT).
75
76-r/--reverse Reverses the diff (perl to CPAN).
77
78-v/--verbose List the fate of *all* files in the tarball, not just those
79 that differ or are missing.
80
81-x|crosscheck List the distributions whose current CPAN version differs from
82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
83
84By default (i.e. without the --crosscheck option), for each listed module
85(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86from CPAN associated with that module, and compare the files in it with
87those in the perl source tree.
88
89Must be run from the root of the perl source tree.
90Module names must match the keys of %Modules in Maintainers.pl.
91HERE
92 exit(1);
93}
94
95
96sub run {
97 my $scan_all;
98 my $diff_opts;
99 my $reverse = 0;
100 my $cache_dir;
101 my $use_diff;
102 my $output_file;
103 my $verbose;
104 my $force;
105 my $do_crosscheck;
106
107 GetOptions(
108 'a|all' => \$scan_all,
109 'c|cachedir=s' => \$cache_dir,
110 'd|diff' => \$use_diff,
111 'diffopts:s' => \$diff_opts,
112 'f|force' => \$force,
113 'h|help' => \&usage,
114 'o|output=s' => \$output_file,
115 'r|reverse' => \$reverse,
116 'v|verbose' => \$verbose,
117 'x|crosscheck' => \$do_crosscheck,
118 ) or usage;
119
120
121 my @modules;
122
123 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
124
125 if ($do_crosscheck) {
126 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
127 if ($reverse || $use_diff || $diff_opts || $verbose);
128 }
129 else {
130 $diff_opts = '-u' unless defined $diff_opts;
131 usage("can't use -f without --crosscheck") if $force;
132 }
133
134 @modules = $scan_all
135 ? grep $Maintainers::Modules{$_}{CPAN},
136 (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
137 : @ARGV;
138 usage("No modules specified") unless @modules;
139
140
141 my $outfh;
142 if (defined $output_file) {
143 open $outfh, '>', $output_file
144 or die "ERROR: could not open file '$output_file' for writing: $!";
145 }
146 else {
147 open $outfh, ">&STDOUT"
148 or die "ERROR: can't dup STDOUT: $!";
149 }
150
151 if (defined $cache_dir) {
152 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
153 }
154
155 if ($do_crosscheck) {
156 do_crosscheck($outfh, $cache_dir, $force, \@modules);
157 }
158 else {
159 do_compare(\@modules, $outfh, $cache_dir, $verbose, $use_diff,
160 $reverse, $diff_opts);
161 }
162}
163
164
165
166# compare a list of modules against their CPAN equivalents
167
168sub do_compare {
169 my ($modules, $outfh, $cache_dir, $verbose,
170 $use_diff, $reverse, $diff_opts) = @_;
171
172
173 # first, make sure we have a directory where they can all be untarred,
174 # and if its a permanent directory, clear any previous content
175 my $untar_dir;
176 if ($cache_dir) {
177 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
178 if (-d $untar_dir) {
179 File::Path::rmtree($untar_dir)
180 or die "failed to remove $untar_dir\n";
181 }
182 mkdir $untar_dir
183 or die "mkdir $untar_dir: $!\n";
184 }
185 else {
186 $untar_dir = File::Temp::tempdir( CLEANUP => 1 );
187 }
188
189 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
190
191 my %seen_dist;
192 for my $module (@$modules) {
193 print $outfh "\n$module\n" unless $use_diff;
194
195 my $m = $Maintainers::Modules{$module}
196 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
197
198 unless ($m->{CPAN}) {
199 print $outfh "WARNING: $module is not dual-life; skipping\n";
200 next;
201 }
202
203 my $dist = $m->{DISTRIBUTION};
204 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
205
206 if ($seen_dist{$dist}) {
207 warn "WARNING: duplicate entry for $dist in $module\n"
208 }
209 $seen_dist{$dist}++;
210
211 my $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist);
212
213
214 my @perl_files = Maintainers::get_module_files($module);
215
216 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
217 die "ERROR: no such file: $manifest\n" unless -f $manifest;
218
219 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
220 my @cpan_files = sort keys %$cpan_files;
221
222 my ($excluded, $map) = get_map($m, $module, \@perl_files);
223
224 my %perl_unseen;
225 @perl_unseen{@perl_files} = ();
226 my %perl_files = %perl_unseen;
227
228 foreach my $cpan_file (@cpan_files) {
229 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
230 unless (defined $mapped_file) {
231 print $outfh " Excluded: $cpan_file\n" if $verbose;
232 next;
233 }
234
235 if (exists $perl_files{$mapped_file}) {
236 delete $perl_unseen{$mapped_file};
237 }
238 else {
239 # some CPAN files foo are stored in core as foo.packed,
240 # which are then unpacked by 'make test_prep'
241 my $packed_file = "$mapped_file.packed";
242 if (exists $perl_files{$packed_file} ) {
243 if (! -f $mapped_file and -f $packed_file) {
244 print $outfh <<EOF;
245WARNING: $mapped_file not found, but .packed variant exists.
246Perhaps you need to run 'make test_prep'?
247EOF
248 next;
249 }
250 delete $perl_unseen{$packed_file};
251 }
252 else {
253 if ($ignorable{$cpan_file}) {
254 print $outfh " Ignored: $cpan_file\n" if $verbose;
255 next;
256 }
257
258 unless ($use_diff) {
259 print $outfh " CPAN only: $cpan_file",
260 ($cpan_file eq $mapped_file) ? "\n"
261 : " (expected $mapped_file)\n";
262 }
263 next;
264 }
265 }
266
267
268 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
269
270 # should never happen
271 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
272
273 # might happen if the FILES entry in Maintainers.pl is wrong
274 unless (-f $mapped_file) {
275 print $outfh "WARNING: perl file not found: $mapped_file\n";
276 next;
277 }
278
279
280 if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
281 if ($use_diff) {
282 file_diff($outfh, $abs_cpan_file, $mapped_file,
283 $reverse, $diff_opts);
284 }
285 else {
286 if ($cpan_file eq $mapped_file) {
287 print $outfh " Modified: $cpan_file\n";
288 }
289 else {
290 print $outfh " Modified: $cpan_file $mapped_file\n";
291 }
292 }
293 }
294 elsif ($verbose) {
295 if ($cpan_file eq $mapped_file) {
296 print $outfh " Unchanged: $cpan_file\n";
297 }
298 else {
299 print $outfh " Unchanged: $cpan_file $mapped_file\n";
300 }
301 }
302 }
303 for (sort keys %perl_unseen) {
304 print $outfh " Perl only: $_\n" unless $use_diff;
305 }
306 }
307}
308
309# given FooBar-1.23_45.tar.gz, return FooBar
310
311sub distro_base {
312 my $d = shift;
313 $d =~ s/\.tar\.gz$//;
314 $d =~ s/\.gip$//;
315 $d =~ s/[\d\-_\.]+$//;
316 return $d;
317}
318
319# process --crosscheck action:
320# ie list all distributions whose CPAN versions differ from that listed in
321# Maintainers.pl
322
323sub do_crosscheck {
324 my ($outfh, $cache_dir, $force, $modules) = @_;
325
326 my $file = '02packages.details.txt';
327 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
328 my $path = File::Spec->catfile($download_dir, $file);
329 my $gzfile = "$path.gz";
330
331 # grab 02packages.details.txt
332
333 my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
334
335 if (! -f $gzfile or $force) {
336 unlink $gzfile;
337 my_getstore($url, $gzfile);
338 }
339 unlink $path;
340 IO::Uncompress::Gunzip::gunzip($gzfile, $path)
341 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
342
343 # suck in the data from it
344
345 open my $fh, '<', $path
346 or die "ERROR: open: $file: $!\n";
347
348 my %distros;
349 my %modules;
350
351 while (<$fh>) {
352 next if 1../^$/;
353 chomp;
354 my @f = split ' ', $_;
355 if (@f != 3) {
356 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
357 next;
358 }
359 $modules{$f[0]} = $f[2];
360
361 my $distro = $f[2];
362 $distro =~ s{^.*/}{};
363
364 $distros{distro_base($distro)}{$distro} = 1;
365 }
366
367 for my $module (@$modules) {
368 my $m = $Maintainers::Modules{$module}
369 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
370
371 unless ($m->{CPAN}) {
372 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
373 next;
374 }
375
376
377 # given an try like
378 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
379 # first compare the module name against Foo::Bar, and failing that,
380 # against foo-bar
381
382 my $pdist = $m->{DISTRIBUTION};
383 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
384 $pdist =~ s{^.*/}{};
385
386 my $cdist = $modules{$module};
387
388 if (defined $cdist) {
389 $cdist =~ s{^.*/}{};
390 }
391 else {
392 my $d = $distros{distro_base($pdist)};
393 unless (defined $d) {
394 print $outfh "\n$module: Can't determine current CPAN entry\n";
395 next;
396 }
397 if (keys %$d > 1) {
398 print $outfh "\n$module: (found more than one CPAN candidate):\n";
399 print $outfh " perl: $pdist\n";
400 print $outfh " CPAN: $_\n" for sort keys %$d;
401 next;
402 }
403 $cdist = (keys %$d)[0];
404 }
405
406 if ($cdist ne $pdist) {
407 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
408 }
409 }
410}
411
412
413
414# get the EXCLUDED and MAP entries for this module, or
415# make up defauts if they don't exist
416
417sub get_map {
418 my ($m, $module_name, $perl_files) = @_;
419
420 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
421
422 $excluded ||= [];
423
424 return $excluded, $map if $map;
425
426 # all files under ext/foo-bar (plus maybe some under t/lib)???
427
428 my $ext;
429 for (@$perl_files) {
430 if (m{^(ext/[^/]+/)}) {
431 if (defined $ext and $ext ne $1) {
432 # more than one ext/$ext/
433 undef $ext;
434 last;
435 }
436 $ext = $1;
437 }
438 elsif (m{^t/lib/}) {
439 next;
440 }
441 else {
442 undef $ext;
443 last;
444 }
445 }
446
447 if (defined $ext) {
448 $map = { '' => $ext },
449 }
450 else {
451 (my $base = $module_name) =~ s{::}{/}g;
452 $base ="lib/$base";
453 $map = {
454 'lib/' => 'lib/',
455 '' => "$base/",
456 };
457 }
458 return $excluded, $map;
459}
460
461
462# Given an exclude list and a mapping hash, convert a CPAN filename
463# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
464# Returns an empty list for an excluded file
465
466sub cpan_to_perl {
467 my ($excluded, $map, $cpan_file) = @_;
468
469 for my $exclude (@$excluded) {
470 # may be a simple string to match exactly, or a pattern
471 if (ref $exclude) {
472 return if $cpan_file =~ $exclude;
473 }
474 else {
475 return if $cpan_file eq $exclude;
476 }
477 }
478
479 my $perl_file = $cpan_file;
480
481 # try longest prefix first, then alphabetically on tie-break
482 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
483 {
484 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
485 }
486 return $perl_file;
487}
488
489
490
491# do LWP::Simple::getstore, possibly without LWP::Simple being available
492
493my $lwp_simple_available;
494
495sub my_getstore {
496 my ($url, $file) = @_;
497 unless (defined $lwp_simple_available) {
498 eval { require LWP::Simple };
499 $lwp_simple_available = $@ eq '';
500 }
501 if ($lwp_simple_available) {
502 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
503 }
504 else {
505 return system(WGET_CMD, "-O", $file, $url) == 0;
506 }
507}
508
509
510# download and unpack a distribution
511# Returns the full pathname of the extracted directory
512# (eg '/tmp/XYZ/Foo_bar-1.23')
513
514# cache_dir: where to dowenload the .tar.gz file to
515# untar_dir: where to untar or unzup the file
516# module: name of module
517# dist: name of the distribution
518
519sub get_distribution {
520 my ($cache_dir, $untar_dir, $module, $dist) = @_;
521
522 $dist =~ m{.+/([^/]+)$}
523 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist";
524 my $filename = $1;
525
526 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
527 my $download_file = File::Spec->catfile($download_dir, $filename);
528
529 # download distribution
530
531 if (-f $download_file and ! -s $download_file ) {
532 # wget can leave a zero-length file on failed download
533 unlink $download_file;
534 }
535
536 unless (-f $download_file) {
537 # not cached
538 $dist =~ /^([A-Z])([A-Z])/
539 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist";
540
541 my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
542 my_getstore($url, $download_file)
543 or die "ERROR: Could not fetch '$url'";
544 }
545
546 # extract distribution
547
548 my $ae = Archive::Extract->new( archive => $download_file);
549 $ae->extract( to => $untar_dir )
550 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error();
551
552 # get the name of the extracted distribution dir
553
554 my $path = File::Spec->catfile($untar_dir, $filename);
555
556 $path =~ s/\.tar\.gz$// or
557 $path =~ s/\.zip$// or
558 die "ERROR: downloaded file does not have a recognised suffix: $path\n";
559
560 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
561
562 return $path;
563}
564
565
566# produce the diff of a single file
567sub file_diff {
568 my $outfh = shift;
569 my $cpan_file = shift;
570 my $perl_file = shift;
571 my $reverse = shift;
572 my $diff_opts = shift;
573
574
575 my @cmd = (DIFF_CMD, split ' ', $diff_opts);
576 if ($reverse) {
577 push @cmd, $perl_file, $cpan_file;
578 }
579 else {
580 push @cmd, $cpan_file, $perl_file;
581 }
582 my $result = `@cmd`;
583
584 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
585
586 print $outfh $result;
587}
588
589
590run();
591