Commit | Line | Data |
cb097e7a |
1 | #!/usr/bin/env perl |
2 | |
3 | # core-cpan-diff: Compare CPAN modules with their equivalent in core |
4 | |
5 | # Originally based on App::DualLivedDiff by Steffen Mueller. |
6 | |
7 | use strict; |
8 | use warnings; |
9 | |
10 | use 5.010; |
11 | |
12 | use Getopt::Long; |
13 | use File::Temp (); |
14 | use File::Path (); |
15 | use File::Spec; |
16 | use Archive::Extract; |
17 | use IO::Uncompress::Gunzip (); |
18 | use File::Compare (); |
19 | use ExtUtils::Manifest; |
20 | |
21 | BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } |
22 | use lib 'Porting'; |
23 | use Maintainers (); |
24 | |
25 | # if running from blead, we may be doing -Ilib, which means when we |
26 | # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. |
27 | # So preload the things we need, and tell it to check %INC first: |
28 | |
29 | use Archive::Tar; |
30 | use IPC::Open3; |
31 | use IO::Select; |
32 | $Module::Load::Conditional::CHECK_INC_HASH = 1; |
33 | # stop Archive::Extract whinging about lack of Archive::Zip |
34 | $Archive::Extract::WARN = 0; |
35 | |
36 | |
37 | # Files, which if they exist in CPAN but not in perl, will not generate |
38 | # an 'Only in CPAN' listing |
39 | # |
40 | our %IGNORABLE = map { ($_ => 1) } |
41 | qw(.cvsignore .dualLivedDiffConfig .gitignore |
42 | ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL |
43 | CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS |
44 | GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL |
45 | MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README |
46 | SIGNATURE THANKS TODO Todo VERSION WHATSNEW); |
47 | |
48 | # where, under the cache dir, to untar stuff to |
49 | |
50 | use constant UNTAR_DIR => 'untarred'; |
51 | |
52 | use constant DIFF_CMD => 'diff'; |
53 | use constant WGET_CMD => 'wget'; |
54 | |
55 | sub usage { |
56 | print STDERR "\n@_\n\n" if @_; |
57 | print STDERR <<HERE; |
58 | Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ] |
59 | |
60 | -a/--all Scan all dual-life modules. |
61 | |
62 | -c/--cachedir Where to save downloaded CPAN tarball files |
63 | (defaults to /tmp/something/ with deletion after each run). |
64 | |
65 | -d/--diff Display file differences using diff(1), rather than just |
66 | listing which files have changed. |
67 | The diff(1) command is assumed to be in your PATH. |
68 | |
69 | --diffopts Options to pass to the diff command. Defaults to '-u'. |
70 | |
71 | -f|force Force download from CPAN of new 02packages.details.txt file |
72 | (with --crosscheck only). |
73 | |
74 | -o/--output File name to write output to (defaults to STDOUT). |
75 | |
76 | -r/--reverse Reverses the diff (perl to CPAN). |
77 | |
78 | -v/--verbose List the fate of *all* files in the tarball, not just those |
79 | that differ or are missing. |
80 | |
81 | -x|crosscheck List the distributions whose current CPAN version differs from |
82 | that in blead (i.e. the DISTRIBUTION field in Maintainers.pl). |
83 | |
84 | By default (i.e. without the --crosscheck option), for each listed module |
85 | (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball |
86 | from CPAN associated with that module, and compare the files in it with |
87 | those in the perl source tree. |
88 | |
89 | Must be run from the root of the perl source tree. |
90 | Module names must match the keys of %Modules in Maintainers.pl. |
91 | HERE |
92 | exit(1); |
93 | } |
94 | |
95 | |
96 | sub run { |
97 | my $scan_all; |
98 | my $diff_opts; |
99 | my $reverse = 0; |
100 | my $cache_dir; |
101 | my $use_diff; |
102 | my $output_file; |
103 | my $verbose; |
104 | my $force; |
105 | my $do_crosscheck; |
106 | |
107 | GetOptions( |
108 | 'a|all' => \$scan_all, |
109 | 'c|cachedir=s' => \$cache_dir, |
110 | 'd|diff' => \$use_diff, |
111 | 'diffopts:s' => \$diff_opts, |
112 | 'f|force' => \$force, |
113 | 'h|help' => \&usage, |
114 | 'o|output=s' => \$output_file, |
115 | 'r|reverse' => \$reverse, |
116 | 'v|verbose' => \$verbose, |
117 | 'x|crosscheck' => \$do_crosscheck, |
118 | ) or usage; |
119 | |
120 | |
121 | my @modules; |
122 | |
123 | usage("Cannot mix -a with module list") if $scan_all && @ARGV; |
124 | |
125 | if ($do_crosscheck) { |
126 | usage("can't use -r, -d, --diffopts, -v with --crosscheck") |
127 | if ($reverse || $use_diff || $diff_opts || $verbose); |
128 | } |
129 | else { |
130 | $diff_opts = '-u' unless defined $diff_opts; |
131 | usage("can't use -f without --crosscheck") if $force; |
132 | } |
133 | |
134 | @modules = $scan_all |
135 | ? grep $Maintainers::Modules{$_}{CPAN}, |
136 | (sort {lc $a cmp lc $b } keys %Maintainers::Modules) |
137 | : @ARGV; |
138 | usage("No modules specified") unless @modules; |
139 | |
140 | |
141 | my $outfh; |
142 | if (defined $output_file) { |
143 | open $outfh, '>', $output_file |
8c814d1a |
144 | or die "ERROR: could not open file '$output_file' for writing: $!\n"; |
cb097e7a |
145 | } |
146 | else { |
147 | open $outfh, ">&STDOUT" |
8c814d1a |
148 | or die "ERROR: can't dup STDOUT: $!\n"; |
cb097e7a |
149 | } |
150 | |
151 | if (defined $cache_dir) { |
152 | die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir; |
153 | } |
154 | |
155 | if ($do_crosscheck) { |
156 | do_crosscheck($outfh, $cache_dir, $force, \@modules); |
157 | } |
158 | else { |
f0ce33d7 |
159 | do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff, |
cb097e7a |
160 | $reverse, $diff_opts); |
161 | } |
162 | } |
163 | |
164 | |
165 | |
166 | # compare a list of modules against their CPAN equivalents |
167 | |
168 | sub do_compare { |
f0ce33d7 |
169 | my ($modules, $outfh, $output_file, $cache_dir, $verbose, |
cb097e7a |
170 | $use_diff, $reverse, $diff_opts) = @_; |
171 | |
172 | |
173 | # first, make sure we have a directory where they can all be untarred, |
174 | # and if its a permanent directory, clear any previous content |
175 | my $untar_dir; |
176 | if ($cache_dir) { |
177 | $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR); |
178 | if (-d $untar_dir) { |
179 | File::Path::rmtree($untar_dir) |
180 | or die "failed to remove $untar_dir\n"; |
181 | } |
182 | mkdir $untar_dir |
183 | or die "mkdir $untar_dir: $!\n"; |
184 | } |
185 | else { |
186 | $untar_dir = File::Temp::tempdir( CLEANUP => 1 ); |
187 | } |
188 | |
189 | my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE; |
190 | |
191 | my %seen_dist; |
192 | for my $module (@$modules) { |
f0ce33d7 |
193 | warn "Processing $module ...\n" if defined $output_file; |
cb097e7a |
194 | print $outfh "\n$module\n" unless $use_diff; |
195 | |
196 | my $m = $Maintainers::Modules{$module} |
197 | or die "ERROR: No such module in Maintainers.pl: '$module'\n"; |
198 | |
199 | unless ($m->{CPAN}) { |
200 | print $outfh "WARNING: $module is not dual-life; skipping\n"; |
201 | next; |
202 | } |
203 | |
204 | my $dist = $m->{DISTRIBUTION}; |
205 | die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; |
206 | |
207 | if ($seen_dist{$dist}) { |
208 | warn "WARNING: duplicate entry for $dist in $module\n" |
209 | } |
210 | $seen_dist{$dist}++; |
211 | |
8c814d1a |
212 | my $cpan_dir; |
213 | eval { |
214 | $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist) |
215 | }; |
216 | if ($@) { |
217 | print $outfh " ", $@; |
218 | print $outfh " (skipping)\n"; |
219 | next; |
220 | } |
cb097e7a |
221 | |
222 | my @perl_files = Maintainers::get_module_files($module); |
223 | |
224 | my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST'); |
225 | die "ERROR: no such file: $manifest\n" unless -f $manifest; |
226 | |
227 | my $cpan_files = ExtUtils::Manifest::maniread($manifest); |
228 | my @cpan_files = sort keys %$cpan_files; |
229 | |
230 | my ($excluded, $map) = get_map($m, $module, \@perl_files); |
231 | |
232 | my %perl_unseen; |
233 | @perl_unseen{@perl_files} = (); |
234 | my %perl_files = %perl_unseen; |
235 | |
236 | foreach my $cpan_file (@cpan_files) { |
237 | my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file); |
238 | unless (defined $mapped_file) { |
239 | print $outfh " Excluded: $cpan_file\n" if $verbose; |
240 | next; |
241 | } |
242 | |
243 | if (exists $perl_files{$mapped_file}) { |
244 | delete $perl_unseen{$mapped_file}; |
245 | } |
246 | else { |
247 | # some CPAN files foo are stored in core as foo.packed, |
248 | # which are then unpacked by 'make test_prep' |
249 | my $packed_file = "$mapped_file.packed"; |
250 | if (exists $perl_files{$packed_file} ) { |
251 | if (! -f $mapped_file and -f $packed_file) { |
252 | print $outfh <<EOF; |
253 | WARNING: $mapped_file not found, but .packed variant exists. |
254 | Perhaps you need to run 'make test_prep'? |
255 | EOF |
256 | next; |
257 | } |
258 | delete $perl_unseen{$packed_file}; |
259 | } |
260 | else { |
261 | if ($ignorable{$cpan_file}) { |
262 | print $outfh " Ignored: $cpan_file\n" if $verbose; |
263 | next; |
264 | } |
265 | |
266 | unless ($use_diff) { |
267 | print $outfh " CPAN only: $cpan_file", |
268 | ($cpan_file eq $mapped_file) ? "\n" |
269 | : " (expected $mapped_file)\n"; |
270 | } |
271 | next; |
272 | } |
273 | } |
274 | |
275 | |
276 | my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file); |
277 | |
278 | # should never happen |
279 | die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file; |
280 | |
281 | # might happen if the FILES entry in Maintainers.pl is wrong |
282 | unless (-f $mapped_file) { |
283 | print $outfh "WARNING: perl file not found: $mapped_file\n"; |
284 | next; |
285 | } |
286 | |
287 | |
288 | if (File::Compare::compare($abs_cpan_file, $mapped_file)) { |
289 | if ($use_diff) { |
290 | file_diff($outfh, $abs_cpan_file, $mapped_file, |
291 | $reverse, $diff_opts); |
292 | } |
293 | else { |
294 | if ($cpan_file eq $mapped_file) { |
295 | print $outfh " Modified: $cpan_file\n"; |
296 | } |
297 | else { |
298 | print $outfh " Modified: $cpan_file $mapped_file\n"; |
299 | } |
300 | } |
301 | } |
302 | elsif ($verbose) { |
303 | if ($cpan_file eq $mapped_file) { |
304 | print $outfh " Unchanged: $cpan_file\n"; |
305 | } |
306 | else { |
307 | print $outfh " Unchanged: $cpan_file $mapped_file\n"; |
308 | } |
309 | } |
310 | } |
311 | for (sort keys %perl_unseen) { |
312 | print $outfh " Perl only: $_\n" unless $use_diff; |
313 | } |
314 | } |
315 | } |
316 | |
317 | # given FooBar-1.23_45.tar.gz, return FooBar |
318 | |
319 | sub distro_base { |
320 | my $d = shift; |
321 | $d =~ s/\.tar\.gz$//; |
322 | $d =~ s/\.gip$//; |
323 | $d =~ s/[\d\-_\.]+$//; |
324 | return $d; |
325 | } |
326 | |
327 | # process --crosscheck action: |
328 | # ie list all distributions whose CPAN versions differ from that listed in |
329 | # Maintainers.pl |
330 | |
331 | sub do_crosscheck { |
332 | my ($outfh, $cache_dir, $force, $modules) = @_; |
333 | |
334 | my $file = '02packages.details.txt'; |
335 | my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); |
336 | my $path = File::Spec->catfile($download_dir, $file); |
337 | my $gzfile = "$path.gz"; |
338 | |
339 | # grab 02packages.details.txt |
340 | |
341 | my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz'; |
342 | |
343 | if (! -f $gzfile or $force) { |
344 | unlink $gzfile; |
345 | my_getstore($url, $gzfile); |
346 | } |
347 | unlink $path; |
348 | IO::Uncompress::Gunzip::gunzip($gzfile, $path) |
349 | or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; |
350 | |
351 | # suck in the data from it |
352 | |
353 | open my $fh, '<', $path |
354 | or die "ERROR: open: $file: $!\n"; |
355 | |
356 | my %distros; |
357 | my %modules; |
358 | |
359 | while (<$fh>) { |
360 | next if 1../^$/; |
361 | chomp; |
362 | my @f = split ' ', $_; |
363 | if (@f != 3) { |
364 | warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; |
365 | next; |
366 | } |
cb097e7a |
367 | my $distro = $f[2]; |
f0ce33d7 |
368 | $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/ |
369 | $modules{$f[0]} = $distro; |
370 | |
371 | (my $short_distro = $distro) =~ s{^.*/}{}; |
cb097e7a |
372 | |
f0ce33d7 |
373 | $distros{distro_base($short_distro)}{$distro} = 1; |
cb097e7a |
374 | } |
375 | |
376 | for my $module (@$modules) { |
377 | my $m = $Maintainers::Modules{$module} |
378 | or die "ERROR: No such module in Maintainers.pl: '$module'\n"; |
379 | |
380 | unless ($m->{CPAN}) { |
381 | print $outfh "\nWARNING: $module is not dual-life; skipping\n"; |
382 | next; |
383 | } |
384 | |
f0ce33d7 |
385 | # given an entry like |
cb097e7a |
386 | # Foo::Bar 1.23 foo-bar-1.23.tar.gz, |
387 | # first compare the module name against Foo::Bar, and failing that, |
388 | # against foo-bar |
389 | |
390 | my $pdist = $m->{DISTRIBUTION}; |
391 | die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; |
cb097e7a |
392 | |
393 | my $cdist = $modules{$module}; |
f0ce33d7 |
394 | (my $short_pdist = $pdist) =~ s{^.*/}{}; |
cb097e7a |
395 | |
f0ce33d7 |
396 | unless (defined $cdist) { |
397 | my $d = $distros{distro_base($short_pdist)}; |
cb097e7a |
398 | unless (defined $d) { |
399 | print $outfh "\n$module: Can't determine current CPAN entry\n"; |
400 | next; |
401 | } |
402 | if (keys %$d > 1) { |
403 | print $outfh "\n$module: (found more than one CPAN candidate):\n"; |
404 | print $outfh " perl: $pdist\n"; |
405 | print $outfh " CPAN: $_\n" for sort keys %$d; |
406 | next; |
407 | } |
408 | $cdist = (keys %$d)[0]; |
409 | } |
410 | |
411 | if ($cdist ne $pdist) { |
412 | print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; |
413 | } |
414 | } |
415 | } |
416 | |
417 | |
418 | |
419 | # get the EXCLUDED and MAP entries for this module, or |
420 | # make up defauts if they don't exist |
421 | |
422 | sub get_map { |
423 | my ($m, $module_name, $perl_files) = @_; |
424 | |
425 | my ($excluded, $map) = @$m{qw(EXCLUDED MAP)}; |
426 | |
427 | $excluded ||= []; |
428 | |
429 | return $excluded, $map if $map; |
430 | |
431 | # all files under ext/foo-bar (plus maybe some under t/lib)??? |
432 | |
433 | my $ext; |
434 | for (@$perl_files) { |
435 | if (m{^(ext/[^/]+/)}) { |
436 | if (defined $ext and $ext ne $1) { |
437 | # more than one ext/$ext/ |
438 | undef $ext; |
439 | last; |
440 | } |
441 | $ext = $1; |
442 | } |
443 | elsif (m{^t/lib/}) { |
444 | next; |
445 | } |
446 | else { |
447 | undef $ext; |
448 | last; |
449 | } |
450 | } |
451 | |
452 | if (defined $ext) { |
453 | $map = { '' => $ext }, |
454 | } |
455 | else { |
456 | (my $base = $module_name) =~ s{::}{/}g; |
457 | $base ="lib/$base"; |
458 | $map = { |
459 | 'lib/' => 'lib/', |
460 | '' => "$base/", |
461 | }; |
462 | } |
463 | return $excluded, $map; |
464 | } |
465 | |
466 | |
467 | # Given an exclude list and a mapping hash, convert a CPAN filename |
468 | # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). |
469 | # Returns an empty list for an excluded file |
470 | |
471 | sub cpan_to_perl { |
472 | my ($excluded, $map, $cpan_file) = @_; |
473 | |
474 | for my $exclude (@$excluded) { |
475 | # may be a simple string to match exactly, or a pattern |
476 | if (ref $exclude) { |
477 | return if $cpan_file =~ $exclude; |
478 | } |
479 | else { |
480 | return if $cpan_file eq $exclude; |
481 | } |
482 | } |
483 | |
484 | my $perl_file = $cpan_file; |
485 | |
486 | # try longest prefix first, then alphabetically on tie-break |
487 | for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map) |
488 | { |
489 | last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; |
490 | } |
491 | return $perl_file; |
492 | } |
493 | |
494 | |
495 | |
496 | # do LWP::Simple::getstore, possibly without LWP::Simple being available |
497 | |
498 | my $lwp_simple_available; |
499 | |
500 | sub my_getstore { |
501 | my ($url, $file) = @_; |
502 | unless (defined $lwp_simple_available) { |
503 | eval { require LWP::Simple }; |
504 | $lwp_simple_available = $@ eq ''; |
505 | } |
506 | if ($lwp_simple_available) { |
507 | return LWP::Simple::is_success(LWP::Simple::getstore($url, $file)); |
508 | } |
509 | else { |
510 | return system(WGET_CMD, "-O", $file, $url) == 0; |
511 | } |
512 | } |
513 | |
514 | |
515 | # download and unpack a distribution |
516 | # Returns the full pathname of the extracted directory |
517 | # (eg '/tmp/XYZ/Foo_bar-1.23') |
518 | |
519 | # cache_dir: where to dowenload the .tar.gz file to |
520 | # untar_dir: where to untar or unzup the file |
521 | # module: name of module |
522 | # dist: name of the distribution |
523 | |
524 | sub get_distribution { |
525 | my ($cache_dir, $untar_dir, $module, $dist) = @_; |
526 | |
527 | $dist =~ m{.+/([^/]+)$} |
8c814d1a |
528 | or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n"; |
cb097e7a |
529 | my $filename = $1; |
530 | |
531 | my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); |
532 | my $download_file = File::Spec->catfile($download_dir, $filename); |
533 | |
534 | # download distribution |
535 | |
536 | if (-f $download_file and ! -s $download_file ) { |
537 | # wget can leave a zero-length file on failed download |
538 | unlink $download_file; |
539 | } |
540 | |
541 | unless (-f $download_file) { |
542 | # not cached |
543 | $dist =~ /^([A-Z])([A-Z])/ |
8c814d1a |
544 | or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n"; |
cb097e7a |
545 | |
546 | my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist"; |
547 | my_getstore($url, $download_file) |
8c814d1a |
548 | or die "ERROR: Could not fetch '$url'\n"; |
cb097e7a |
549 | } |
550 | |
551 | # extract distribution |
552 | |
553 | my $ae = Archive::Extract->new( archive => $download_file); |
554 | $ae->extract( to => $untar_dir ) |
8c814d1a |
555 | or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n"; |
cb097e7a |
556 | |
557 | # get the name of the extracted distribution dir |
558 | |
559 | my $path = File::Spec->catfile($untar_dir, $filename); |
560 | |
561 | $path =~ s/\.tar\.gz$// or |
562 | $path =~ s/\.zip$// or |
563 | die "ERROR: downloaded file does not have a recognised suffix: $path\n"; |
564 | |
565 | die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; |
566 | |
567 | return $path; |
568 | } |
569 | |
570 | |
571 | # produce the diff of a single file |
572 | sub file_diff { |
573 | my $outfh = shift; |
574 | my $cpan_file = shift; |
575 | my $perl_file = shift; |
576 | my $reverse = shift; |
577 | my $diff_opts = shift; |
578 | |
579 | |
580 | my @cmd = (DIFF_CMD, split ' ', $diff_opts); |
581 | if ($reverse) { |
582 | push @cmd, $perl_file, $cpan_file; |
583 | } |
584 | else { |
585 | push @cmd, $cpan_file, $perl_file; |
586 | } |
587 | my $result = `@cmd`; |
588 | |
589 | $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm; |
590 | |
591 | print $outfh $result; |
592 | } |
593 | |
594 | |
595 | run(); |
596 | |