Commit | Line | Data |
cb097e7a |
1 | #!/usr/bin/env perl |
2 | |
3 | # core-cpan-diff: Compare CPAN modules with their equivalent in core |
4 | |
5 | # Originally based on App::DualLivedDiff by Steffen Mueller. |
6 | |
7 | use strict; |
8 | use warnings; |
9 | |
10 | use 5.010; |
11 | |
12 | use Getopt::Long; |
13 | use File::Temp (); |
14 | use File::Path (); |
15 | use File::Spec; |
16 | use Archive::Extract; |
17 | use IO::Uncompress::Gunzip (); |
18 | use File::Compare (); |
19 | use ExtUtils::Manifest; |
20 | |
21 | BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } |
22 | use lib 'Porting'; |
23 | use Maintainers (); |
24 | |
25 | # if running from blead, we may be doing -Ilib, which means when we |
26 | # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. |
27 | # So preload the things we need, and tell it to check %INC first: |
28 | |
29 | use Archive::Tar; |
30 | use IPC::Open3; |
31 | use IO::Select; |
32 | $Module::Load::Conditional::CHECK_INC_HASH = 1; |
33 | # stop Archive::Extract whinging about lack of Archive::Zip |
34 | $Archive::Extract::WARN = 0; |
35 | |
36 | |
37 | # Files, which if they exist in CPAN but not in perl, will not generate |
38 | # an 'Only in CPAN' listing |
39 | # |
40 | our %IGNORABLE = map { ($_ => 1) } |
41 | qw(.cvsignore .dualLivedDiffConfig .gitignore |
42 | ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL |
43 | CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS |
44 | GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL |
45 | MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README |
46 | SIGNATURE THANKS TODO Todo VERSION WHATSNEW); |
47 | |
48 | # where, under the cache dir, to untar stuff to |
49 | |
50 | use constant UNTAR_DIR => 'untarred'; |
51 | |
52 | use constant DIFF_CMD => 'diff'; |
53 | use constant WGET_CMD => 'wget'; |
54 | |
55 | sub usage { |
56 | print STDERR "\n@_\n\n" if @_; |
57 | print STDERR <<HERE; |
58 | Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ] |
59 | |
60 | -a/--all Scan all dual-life modules. |
61 | |
62 | -c/--cachedir Where to save downloaded CPAN tarball files |
63 | (defaults to /tmp/something/ with deletion after each run). |
64 | |
65 | -d/--diff Display file differences using diff(1), rather than just |
66 | listing which files have changed. |
67 | The diff(1) command is assumed to be in your PATH. |
68 | |
69 | --diffopts Options to pass to the diff command. Defaults to '-u'. |
70 | |
71 | -f|force Force download from CPAN of new 02packages.details.txt file |
72 | (with --crosscheck only). |
73 | |
74 | -o/--output File name to write output to (defaults to STDOUT). |
75 | |
76 | -r/--reverse Reverses the diff (perl to CPAN). |
77 | |
78 | -v/--verbose List the fate of *all* files in the tarball, not just those |
79 | that differ or are missing. |
80 | |
81 | -x|crosscheck List the distributions whose current CPAN version differs from |
82 | that in blead (i.e. the DISTRIBUTION field in Maintainers.pl). |
83 | |
84 | By default (i.e. without the --crosscheck option), for each listed module |
85 | (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball |
86 | from CPAN associated with that module, and compare the files in it with |
87 | those in the perl source tree. |
88 | |
89 | Must be run from the root of the perl source tree. |
90 | Module names must match the keys of %Modules in Maintainers.pl. |
91 | HERE |
92 | exit(1); |
93 | } |
94 | |
95 | |
96 | sub run { |
97 | my $scan_all; |
98 | my $diff_opts; |
99 | my $reverse = 0; |
100 | my $cache_dir; |
101 | my $use_diff; |
102 | my $output_file; |
103 | my $verbose; |
104 | my $force; |
105 | my $do_crosscheck; |
106 | |
107 | GetOptions( |
108 | 'a|all' => \$scan_all, |
109 | 'c|cachedir=s' => \$cache_dir, |
110 | 'd|diff' => \$use_diff, |
111 | 'diffopts:s' => \$diff_opts, |
112 | 'f|force' => \$force, |
113 | 'h|help' => \&usage, |
114 | 'o|output=s' => \$output_file, |
115 | 'r|reverse' => \$reverse, |
116 | 'v|verbose' => \$verbose, |
117 | 'x|crosscheck' => \$do_crosscheck, |
118 | ) or usage; |
119 | |
120 | |
121 | my @modules; |
122 | |
123 | usage("Cannot mix -a with module list") if $scan_all && @ARGV; |
124 | |
125 | if ($do_crosscheck) { |
126 | usage("can't use -r, -d, --diffopts, -v with --crosscheck") |
127 | if ($reverse || $use_diff || $diff_opts || $verbose); |
128 | } |
129 | else { |
130 | $diff_opts = '-u' unless defined $diff_opts; |
131 | usage("can't use -f without --crosscheck") if $force; |
132 | } |
133 | |
134 | @modules = $scan_all |
135 | ? grep $Maintainers::Modules{$_}{CPAN}, |
136 | (sort {lc $a cmp lc $b } keys %Maintainers::Modules) |
137 | : @ARGV; |
138 | usage("No modules specified") unless @modules; |
139 | |
140 | |
141 | my $outfh; |
142 | if (defined $output_file) { |
143 | open $outfh, '>', $output_file |
144 | or die "ERROR: could not open file '$output_file' for writing: $!"; |
145 | } |
146 | else { |
147 | open $outfh, ">&STDOUT" |
148 | or die "ERROR: can't dup STDOUT: $!"; |
149 | } |
150 | |
151 | if (defined $cache_dir) { |
152 | die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir; |
153 | } |
154 | |
155 | if ($do_crosscheck) { |
156 | do_crosscheck($outfh, $cache_dir, $force, \@modules); |
157 | } |
158 | else { |
159 | do_compare(\@modules, $outfh, $cache_dir, $verbose, $use_diff, |
160 | $reverse, $diff_opts); |
161 | } |
162 | } |
163 | |
164 | |
165 | |
166 | # compare a list of modules against their CPAN equivalents |
167 | |
168 | sub do_compare { |
169 | my ($modules, $outfh, $cache_dir, $verbose, |
170 | $use_diff, $reverse, $diff_opts) = @_; |
171 | |
172 | |
173 | # first, make sure we have a directory where they can all be untarred, |
174 | # and if its a permanent directory, clear any previous content |
175 | my $untar_dir; |
176 | if ($cache_dir) { |
177 | $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR); |
178 | if (-d $untar_dir) { |
179 | File::Path::rmtree($untar_dir) |
180 | or die "failed to remove $untar_dir\n"; |
181 | } |
182 | mkdir $untar_dir |
183 | or die "mkdir $untar_dir: $!\n"; |
184 | } |
185 | else { |
186 | $untar_dir = File::Temp::tempdir( CLEANUP => 1 ); |
187 | } |
188 | |
189 | my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE; |
190 | |
191 | my %seen_dist; |
192 | for my $module (@$modules) { |
193 | print $outfh "\n$module\n" unless $use_diff; |
194 | |
195 | my $m = $Maintainers::Modules{$module} |
196 | or die "ERROR: No such module in Maintainers.pl: '$module'\n"; |
197 | |
198 | unless ($m->{CPAN}) { |
199 | print $outfh "WARNING: $module is not dual-life; skipping\n"; |
200 | next; |
201 | } |
202 | |
203 | my $dist = $m->{DISTRIBUTION}; |
204 | die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; |
205 | |
206 | if ($seen_dist{$dist}) { |
207 | warn "WARNING: duplicate entry for $dist in $module\n" |
208 | } |
209 | $seen_dist{$dist}++; |
210 | |
211 | my $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist); |
212 | |
213 | |
214 | my @perl_files = Maintainers::get_module_files($module); |
215 | |
216 | my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST'); |
217 | die "ERROR: no such file: $manifest\n" unless -f $manifest; |
218 | |
219 | my $cpan_files = ExtUtils::Manifest::maniread($manifest); |
220 | my @cpan_files = sort keys %$cpan_files; |
221 | |
222 | my ($excluded, $map) = get_map($m, $module, \@perl_files); |
223 | |
224 | my %perl_unseen; |
225 | @perl_unseen{@perl_files} = (); |
226 | my %perl_files = %perl_unseen; |
227 | |
228 | foreach my $cpan_file (@cpan_files) { |
229 | my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file); |
230 | unless (defined $mapped_file) { |
231 | print $outfh " Excluded: $cpan_file\n" if $verbose; |
232 | next; |
233 | } |
234 | |
235 | if (exists $perl_files{$mapped_file}) { |
236 | delete $perl_unseen{$mapped_file}; |
237 | } |
238 | else { |
239 | # some CPAN files foo are stored in core as foo.packed, |
240 | # which are then unpacked by 'make test_prep' |
241 | my $packed_file = "$mapped_file.packed"; |
242 | if (exists $perl_files{$packed_file} ) { |
243 | if (! -f $mapped_file and -f $packed_file) { |
244 | print $outfh <<EOF; |
245 | WARNING: $mapped_file not found, but .packed variant exists. |
246 | Perhaps you need to run 'make test_prep'? |
247 | EOF |
248 | next; |
249 | } |
250 | delete $perl_unseen{$packed_file}; |
251 | } |
252 | else { |
253 | if ($ignorable{$cpan_file}) { |
254 | print $outfh " Ignored: $cpan_file\n" if $verbose; |
255 | next; |
256 | } |
257 | |
258 | unless ($use_diff) { |
259 | print $outfh " CPAN only: $cpan_file", |
260 | ($cpan_file eq $mapped_file) ? "\n" |
261 | : " (expected $mapped_file)\n"; |
262 | } |
263 | next; |
264 | } |
265 | } |
266 | |
267 | |
268 | my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file); |
269 | |
270 | # should never happen |
271 | die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file; |
272 | |
273 | # might happen if the FILES entry in Maintainers.pl is wrong |
274 | unless (-f $mapped_file) { |
275 | print $outfh "WARNING: perl file not found: $mapped_file\n"; |
276 | next; |
277 | } |
278 | |
279 | |
280 | if (File::Compare::compare($abs_cpan_file, $mapped_file)) { |
281 | if ($use_diff) { |
282 | file_diff($outfh, $abs_cpan_file, $mapped_file, |
283 | $reverse, $diff_opts); |
284 | } |
285 | else { |
286 | if ($cpan_file eq $mapped_file) { |
287 | print $outfh " Modified: $cpan_file\n"; |
288 | } |
289 | else { |
290 | print $outfh " Modified: $cpan_file $mapped_file\n"; |
291 | } |
292 | } |
293 | } |
294 | elsif ($verbose) { |
295 | if ($cpan_file eq $mapped_file) { |
296 | print $outfh " Unchanged: $cpan_file\n"; |
297 | } |
298 | else { |
299 | print $outfh " Unchanged: $cpan_file $mapped_file\n"; |
300 | } |
301 | } |
302 | } |
303 | for (sort keys %perl_unseen) { |
304 | print $outfh " Perl only: $_\n" unless $use_diff; |
305 | } |
306 | } |
307 | } |
308 | |
309 | # given FooBar-1.23_45.tar.gz, return FooBar |
310 | |
311 | sub distro_base { |
312 | my $d = shift; |
313 | $d =~ s/\.tar\.gz$//; |
314 | $d =~ s/\.gip$//; |
315 | $d =~ s/[\d\-_\.]+$//; |
316 | return $d; |
317 | } |
318 | |
319 | # process --crosscheck action: |
320 | # ie list all distributions whose CPAN versions differ from that listed in |
321 | # Maintainers.pl |
322 | |
323 | sub do_crosscheck { |
324 | my ($outfh, $cache_dir, $force, $modules) = @_; |
325 | |
326 | my $file = '02packages.details.txt'; |
327 | my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); |
328 | my $path = File::Spec->catfile($download_dir, $file); |
329 | my $gzfile = "$path.gz"; |
330 | |
331 | # grab 02packages.details.txt |
332 | |
333 | my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz'; |
334 | |
335 | if (! -f $gzfile or $force) { |
336 | unlink $gzfile; |
337 | my_getstore($url, $gzfile); |
338 | } |
339 | unlink $path; |
340 | IO::Uncompress::Gunzip::gunzip($gzfile, $path) |
341 | or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; |
342 | |
343 | # suck in the data from it |
344 | |
345 | open my $fh, '<', $path |
346 | or die "ERROR: open: $file: $!\n"; |
347 | |
348 | my %distros; |
349 | my %modules; |
350 | |
351 | while (<$fh>) { |
352 | next if 1../^$/; |
353 | chomp; |
354 | my @f = split ' ', $_; |
355 | if (@f != 3) { |
356 | warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; |
357 | next; |
358 | } |
359 | $modules{$f[0]} = $f[2]; |
360 | |
361 | my $distro = $f[2]; |
362 | $distro =~ s{^.*/}{}; |
363 | |
364 | $distros{distro_base($distro)}{$distro} = 1; |
365 | } |
366 | |
367 | for my $module (@$modules) { |
368 | my $m = $Maintainers::Modules{$module} |
369 | or die "ERROR: No such module in Maintainers.pl: '$module'\n"; |
370 | |
371 | unless ($m->{CPAN}) { |
372 | print $outfh "\nWARNING: $module is not dual-life; skipping\n"; |
373 | next; |
374 | } |
375 | |
376 | |
377 | # given an try like |
378 | # Foo::Bar 1.23 foo-bar-1.23.tar.gz, |
379 | # first compare the module name against Foo::Bar, and failing that, |
380 | # against foo-bar |
381 | |
382 | my $pdist = $m->{DISTRIBUTION}; |
383 | die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; |
384 | $pdist =~ s{^.*/}{}; |
385 | |
386 | my $cdist = $modules{$module}; |
387 | |
388 | if (defined $cdist) { |
389 | $cdist =~ s{^.*/}{}; |
390 | } |
391 | else { |
392 | my $d = $distros{distro_base($pdist)}; |
393 | unless (defined $d) { |
394 | print $outfh "\n$module: Can't determine current CPAN entry\n"; |
395 | next; |
396 | } |
397 | if (keys %$d > 1) { |
398 | print $outfh "\n$module: (found more than one CPAN candidate):\n"; |
399 | print $outfh " perl: $pdist\n"; |
400 | print $outfh " CPAN: $_\n" for sort keys %$d; |
401 | next; |
402 | } |
403 | $cdist = (keys %$d)[0]; |
404 | } |
405 | |
406 | if ($cdist ne $pdist) { |
407 | print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; |
408 | } |
409 | } |
410 | } |
411 | |
412 | |
413 | |
414 | # get the EXCLUDED and MAP entries for this module, or |
415 | # make up defauts if they don't exist |
416 | |
417 | sub get_map { |
418 | my ($m, $module_name, $perl_files) = @_; |
419 | |
420 | my ($excluded, $map) = @$m{qw(EXCLUDED MAP)}; |
421 | |
422 | $excluded ||= []; |
423 | |
424 | return $excluded, $map if $map; |
425 | |
426 | # all files under ext/foo-bar (plus maybe some under t/lib)??? |
427 | |
428 | my $ext; |
429 | for (@$perl_files) { |
430 | if (m{^(ext/[^/]+/)}) { |
431 | if (defined $ext and $ext ne $1) { |
432 | # more than one ext/$ext/ |
433 | undef $ext; |
434 | last; |
435 | } |
436 | $ext = $1; |
437 | } |
438 | elsif (m{^t/lib/}) { |
439 | next; |
440 | } |
441 | else { |
442 | undef $ext; |
443 | last; |
444 | } |
445 | } |
446 | |
447 | if (defined $ext) { |
448 | $map = { '' => $ext }, |
449 | } |
450 | else { |
451 | (my $base = $module_name) =~ s{::}{/}g; |
452 | $base ="lib/$base"; |
453 | $map = { |
454 | 'lib/' => 'lib/', |
455 | '' => "$base/", |
456 | }; |
457 | } |
458 | return $excluded, $map; |
459 | } |
460 | |
461 | |
462 | # Given an exclude list and a mapping hash, convert a CPAN filename |
463 | # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). |
464 | # Returns an empty list for an excluded file |
465 | |
466 | sub cpan_to_perl { |
467 | my ($excluded, $map, $cpan_file) = @_; |
468 | |
469 | for my $exclude (@$excluded) { |
470 | # may be a simple string to match exactly, or a pattern |
471 | if (ref $exclude) { |
472 | return if $cpan_file =~ $exclude; |
473 | } |
474 | else { |
475 | return if $cpan_file eq $exclude; |
476 | } |
477 | } |
478 | |
479 | my $perl_file = $cpan_file; |
480 | |
481 | # try longest prefix first, then alphabetically on tie-break |
482 | for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map) |
483 | { |
484 | last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; |
485 | } |
486 | return $perl_file; |
487 | } |
488 | |
489 | |
490 | |
491 | # do LWP::Simple::getstore, possibly without LWP::Simple being available |
492 | |
493 | my $lwp_simple_available; |
494 | |
495 | sub my_getstore { |
496 | my ($url, $file) = @_; |
497 | unless (defined $lwp_simple_available) { |
498 | eval { require LWP::Simple }; |
499 | $lwp_simple_available = $@ eq ''; |
500 | } |
501 | if ($lwp_simple_available) { |
502 | return LWP::Simple::is_success(LWP::Simple::getstore($url, $file)); |
503 | } |
504 | else { |
505 | return system(WGET_CMD, "-O", $file, $url) == 0; |
506 | } |
507 | } |
508 | |
509 | |
510 | # download and unpack a distribution |
511 | # Returns the full pathname of the extracted directory |
512 | # (eg '/tmp/XYZ/Foo_bar-1.23') |
513 | |
514 | # cache_dir: where to dowenload the .tar.gz file to |
515 | # untar_dir: where to untar or unzup the file |
516 | # module: name of module |
517 | # dist: name of the distribution |
518 | |
519 | sub get_distribution { |
520 | my ($cache_dir, $untar_dir, $module, $dist) = @_; |
521 | |
522 | $dist =~ m{.+/([^/]+)$} |
523 | or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist"; |
524 | my $filename = $1; |
525 | |
526 | my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); |
527 | my $download_file = File::Spec->catfile($download_dir, $filename); |
528 | |
529 | # download distribution |
530 | |
531 | if (-f $download_file and ! -s $download_file ) { |
532 | # wget can leave a zero-length file on failed download |
533 | unlink $download_file; |
534 | } |
535 | |
536 | unless (-f $download_file) { |
537 | # not cached |
538 | $dist =~ /^([A-Z])([A-Z])/ |
539 | or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist"; |
540 | |
541 | my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist"; |
542 | my_getstore($url, $download_file) |
543 | or die "ERROR: Could not fetch '$url'"; |
544 | } |
545 | |
546 | # extract distribution |
547 | |
548 | my $ae = Archive::Extract->new( archive => $download_file); |
549 | $ae->extract( to => $untar_dir ) |
550 | or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error(); |
551 | |
552 | # get the name of the extracted distribution dir |
553 | |
554 | my $path = File::Spec->catfile($untar_dir, $filename); |
555 | |
556 | $path =~ s/\.tar\.gz$// or |
557 | $path =~ s/\.zip$// or |
558 | die "ERROR: downloaded file does not have a recognised suffix: $path\n"; |
559 | |
560 | die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; |
561 | |
562 | return $path; |
563 | } |
564 | |
565 | |
566 | # produce the diff of a single file |
567 | sub file_diff { |
568 | my $outfh = shift; |
569 | my $cpan_file = shift; |
570 | my $perl_file = shift; |
571 | my $reverse = shift; |
572 | my $diff_opts = shift; |
573 | |
574 | |
575 | my @cmd = (DIFF_CMD, split ' ', $diff_opts); |
576 | if ($reverse) { |
577 | push @cmd, $perl_file, $cpan_file; |
578 | } |
579 | else { |
580 | push @cmd, $cpan_file, $perl_file; |
581 | } |
582 | my $result = `@cmd`; |
583 | |
584 | $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm; |
585 | |
586 | print $outfh $result; |
587 | } |
588 | |
589 | |
590 | run(); |
591 | |