Commit | Line | Data |
cb097e7a |
1 | #!/usr/bin/env perl |
2 | |
3 | # core-cpan-diff: Compare CPAN modules with their equivalent in core |
4 | |
5 | # Originally based on App::DualLivedDiff by Steffen Mueller. |
6 | |
7 | use strict; |
8 | use warnings; |
9 | |
10 | use 5.010; |
11 | |
12 | use Getopt::Long; |
13 | use File::Temp (); |
14 | use File::Path (); |
15 | use File::Spec; |
16 | use Archive::Extract; |
17 | use IO::Uncompress::Gunzip (); |
18 | use File::Compare (); |
19 | use ExtUtils::Manifest; |
20 | |
21 | BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } |
22 | use lib 'Porting'; |
23 | use Maintainers (); |
24 | |
25 | # if running from blead, we may be doing -Ilib, which means when we |
26 | # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. |
27 | # So preload the things we need, and tell it to check %INC first: |
28 | |
29 | use Archive::Tar; |
30 | use IPC::Open3; |
31 | use IO::Select; |
32 | $Module::Load::Conditional::CHECK_INC_HASH = 1; |
33 | # stop Archive::Extract whinging about lack of Archive::Zip |
34 | $Archive::Extract::WARN = 0; |
35 | |
36 | |
37 | # Files, which if they exist in CPAN but not in perl, will not generate |
38 | # an 'Only in CPAN' listing |
39 | # |
40 | our %IGNORABLE = map { ($_ => 1) } |
41 | qw(.cvsignore .dualLivedDiffConfig .gitignore |
42 | ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL |
43 | CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS |
44 | GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL |
45 | MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README |
46 | SIGNATURE THANKS TODO Todo VERSION WHATSNEW); |
47 | |
48 | # where, under the cache dir, to untar stuff to |
49 | |
50 | use constant UNTAR_DIR => 'untarred'; |
51 | |
52 | use constant DIFF_CMD => 'diff'; |
53 | use constant WGET_CMD => 'wget'; |
54 | |
55 | sub usage { |
56 | print STDERR "\n@_\n\n" if @_; |
57 | print STDERR <<HERE; |
58 | Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ] |
59 | |
60 | -a/--all Scan all dual-life modules. |
61 | |
62 | -c/--cachedir Where to save downloaded CPAN tarball files |
63 | (defaults to /tmp/something/ with deletion after each run). |
64 | |
65 | -d/--diff Display file differences using diff(1), rather than just |
66 | listing which files have changed. |
67 | The diff(1) command is assumed to be in your PATH. |
68 | |
69 | --diffopts Options to pass to the diff command. Defaults to '-u'. |
70 | |
71 | -f|force Force download from CPAN of new 02packages.details.txt file |
72 | (with --crosscheck only). |
73 | |
74 | -o/--output File name to write output to (defaults to STDOUT). |
75 | |
76 | -r/--reverse Reverses the diff (perl to CPAN). |
77 | |
78 | -v/--verbose List the fate of *all* files in the tarball, not just those |
79 | that differ or are missing. |
80 | |
81 | -x|crosscheck List the distributions whose current CPAN version differs from |
82 | that in blead (i.e. the DISTRIBUTION field in Maintainers.pl). |
83 | |
84 | By default (i.e. without the --crosscheck option), for each listed module |
85 | (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball |
86 | from CPAN associated with that module, and compare the files in it with |
87 | those in the perl source tree. |
88 | |
89 | Must be run from the root of the perl source tree. |
90 | Module names must match the keys of %Modules in Maintainers.pl. |
91 | HERE |
92 | exit(1); |
93 | } |
94 | |
95 | |
96 | sub run { |
97 | my $scan_all; |
98 | my $diff_opts; |
99 | my $reverse = 0; |
100 | my $cache_dir; |
101 | my $use_diff; |
102 | my $output_file; |
103 | my $verbose; |
104 | my $force; |
105 | my $do_crosscheck; |
106 | |
107 | GetOptions( |
108 | 'a|all' => \$scan_all, |
109 | 'c|cachedir=s' => \$cache_dir, |
110 | 'd|diff' => \$use_diff, |
111 | 'diffopts:s' => \$diff_opts, |
112 | 'f|force' => \$force, |
113 | 'h|help' => \&usage, |
114 | 'o|output=s' => \$output_file, |
115 | 'r|reverse' => \$reverse, |
116 | 'v|verbose' => \$verbose, |
117 | 'x|crosscheck' => \$do_crosscheck, |
118 | ) or usage; |
119 | |
120 | |
121 | my @modules; |
122 | |
123 | usage("Cannot mix -a with module list") if $scan_all && @ARGV; |
124 | |
125 | if ($do_crosscheck) { |
126 | usage("can't use -r, -d, --diffopts, -v with --crosscheck") |
127 | if ($reverse || $use_diff || $diff_opts || $verbose); |
128 | } |
129 | else { |
130 | $diff_opts = '-u' unless defined $diff_opts; |
131 | usage("can't use -f without --crosscheck") if $force; |
132 | } |
133 | |
134 | @modules = $scan_all |
135 | ? grep $Maintainers::Modules{$_}{CPAN}, |
136 | (sort {lc $a cmp lc $b } keys %Maintainers::Modules) |
137 | : @ARGV; |
138 | usage("No modules specified") unless @modules; |
139 | |
140 | |
141 | my $outfh; |
142 | if (defined $output_file) { |
143 | open $outfh, '>', $output_file |
8c814d1a |
144 | or die "ERROR: could not open file '$output_file' for writing: $!\n"; |
cb097e7a |
145 | } |
146 | else { |
147 | open $outfh, ">&STDOUT" |
8c814d1a |
148 | or die "ERROR: can't dup STDOUT: $!\n"; |
cb097e7a |
149 | } |
150 | |
151 | if (defined $cache_dir) { |
152 | die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir; |
153 | } |
154 | |
155 | if ($do_crosscheck) { |
156 | do_crosscheck($outfh, $cache_dir, $force, \@modules); |
157 | } |
158 | else { |
f0ce33d7 |
159 | do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff, |
cb097e7a |
160 | $reverse, $diff_opts); |
161 | } |
162 | } |
163 | |
164 | |
165 | |
166 | # compare a list of modules against their CPAN equivalents |
167 | |
168 | sub do_compare { |
f0ce33d7 |
169 | my ($modules, $outfh, $output_file, $cache_dir, $verbose, |
cb097e7a |
170 | $use_diff, $reverse, $diff_opts) = @_; |
171 | |
172 | |
173 | # first, make sure we have a directory where they can all be untarred, |
174 | # and if its a permanent directory, clear any previous content |
175 | my $untar_dir; |
176 | if ($cache_dir) { |
177 | $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR); |
178 | if (-d $untar_dir) { |
179 | File::Path::rmtree($untar_dir) |
180 | or die "failed to remove $untar_dir\n"; |
181 | } |
182 | mkdir $untar_dir |
183 | or die "mkdir $untar_dir: $!\n"; |
184 | } |
185 | else { |
186 | $untar_dir = File::Temp::tempdir( CLEANUP => 1 ); |
187 | } |
188 | |
189 | my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE; |
190 | |
191 | my %seen_dist; |
192 | for my $module (@$modules) { |
f0ce33d7 |
193 | warn "Processing $module ...\n" if defined $output_file; |
cb097e7a |
194 | |
195 | my $m = $Maintainers::Modules{$module} |
196 | or die "ERROR: No such module in Maintainers.pl: '$module'\n"; |
197 | |
198 | unless ($m->{CPAN}) { |
199 | print $outfh "WARNING: $module is not dual-life; skipping\n"; |
200 | next; |
201 | } |
202 | |
203 | my $dist = $m->{DISTRIBUTION}; |
204 | die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; |
205 | |
206 | if ($seen_dist{$dist}) { |
207 | warn "WARNING: duplicate entry for $dist in $module\n" |
208 | } |
d55832d0 |
209 | |
9546b88d |
210 | print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff; |
d55832d0 |
211 | print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n"; |
212 | |
cb097e7a |
213 | $seen_dist{$dist}++; |
214 | |
8c814d1a |
215 | my $cpan_dir; |
216 | eval { |
217 | $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist) |
218 | }; |
219 | if ($@) { |
220 | print $outfh " ", $@; |
221 | print $outfh " (skipping)\n"; |
222 | next; |
223 | } |
cb097e7a |
224 | |
225 | my @perl_files = Maintainers::get_module_files($module); |
226 | |
227 | my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST'); |
228 | die "ERROR: no such file: $manifest\n" unless -f $manifest; |
229 | |
230 | my $cpan_files = ExtUtils::Manifest::maniread($manifest); |
231 | my @cpan_files = sort keys %$cpan_files; |
232 | |
233 | my ($excluded, $map) = get_map($m, $module, \@perl_files); |
234 | |
235 | my %perl_unseen; |
236 | @perl_unseen{@perl_files} = (); |
237 | my %perl_files = %perl_unseen; |
238 | |
239 | foreach my $cpan_file (@cpan_files) { |
240 | my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file); |
241 | unless (defined $mapped_file) { |
242 | print $outfh " Excluded: $cpan_file\n" if $verbose; |
243 | next; |
244 | } |
245 | |
246 | if (exists $perl_files{$mapped_file}) { |
247 | delete $perl_unseen{$mapped_file}; |
248 | } |
249 | else { |
250 | # some CPAN files foo are stored in core as foo.packed, |
251 | # which are then unpacked by 'make test_prep' |
252 | my $packed_file = "$mapped_file.packed"; |
253 | if (exists $perl_files{$packed_file} ) { |
254 | if (! -f $mapped_file and -f $packed_file) { |
255 | print $outfh <<EOF; |
256 | WARNING: $mapped_file not found, but .packed variant exists. |
257 | Perhaps you need to run 'make test_prep'? |
258 | EOF |
259 | next; |
260 | } |
261 | delete $perl_unseen{$packed_file}; |
262 | } |
263 | else { |
264 | if ($ignorable{$cpan_file}) { |
265 | print $outfh " Ignored: $cpan_file\n" if $verbose; |
266 | next; |
267 | } |
268 | |
269 | unless ($use_diff) { |
270 | print $outfh " CPAN only: $cpan_file", |
271 | ($cpan_file eq $mapped_file) ? "\n" |
272 | : " (expected $mapped_file)\n"; |
273 | } |
274 | next; |
275 | } |
276 | } |
277 | |
278 | |
279 | my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file); |
280 | |
281 | # should never happen |
282 | die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file; |
283 | |
284 | # might happen if the FILES entry in Maintainers.pl is wrong |
285 | unless (-f $mapped_file) { |
286 | print $outfh "WARNING: perl file not found: $mapped_file\n"; |
287 | next; |
288 | } |
289 | |
9546b88d |
290 | my $relative_mapped_file = $mapped_file; |
291 | $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///; |
cb097e7a |
292 | |
293 | if (File::Compare::compare($abs_cpan_file, $mapped_file)) { |
9546b88d |
294 | |
295 | |
296 | if ($use_diff) { |
cb097e7a |
297 | file_diff($outfh, $abs_cpan_file, $mapped_file, |
298 | $reverse, $diff_opts); |
299 | } |
300 | else { |
9546b88d |
301 | if ($cpan_file eq $relative_mapped_file) { |
302 | print $outfh " Modified: $relative_mapped_file\n"; |
cb097e7a |
303 | } |
304 | else { |
9546b88d |
305 | print $outfh " Modified: $cpan_file $relative_mapped_file\n"; |
cb097e7a |
306 | } |
307 | } |
308 | } |
309 | elsif ($verbose) { |
9546b88d |
310 | if ($cpan_file eq $relative_mapped_file) { |
cb097e7a |
311 | print $outfh " Unchanged: $cpan_file\n"; |
312 | } |
313 | else { |
9546b88d |
314 | print $outfh " Unchanged: $cpan_file $relative_mapped_file\n"; |
cb097e7a |
315 | } |
316 | } |
317 | } |
318 | for (sort keys %perl_unseen) { |
319 | print $outfh " Perl only: $_\n" unless $use_diff; |
320 | } |
321 | } |
322 | } |
323 | |
324 | # given FooBar-1.23_45.tar.gz, return FooBar |
325 | |
326 | sub distro_base { |
327 | my $d = shift; |
328 | $d =~ s/\.tar\.gz$//; |
329 | $d =~ s/\.gip$//; |
330 | $d =~ s/[\d\-_\.]+$//; |
331 | return $d; |
332 | } |
333 | |
334 | # process --crosscheck action: |
335 | # ie list all distributions whose CPAN versions differ from that listed in |
336 | # Maintainers.pl |
337 | |
338 | sub do_crosscheck { |
339 | my ($outfh, $cache_dir, $force, $modules) = @_; |
340 | |
341 | my $file = '02packages.details.txt'; |
342 | my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); |
343 | my $path = File::Spec->catfile($download_dir, $file); |
344 | my $gzfile = "$path.gz"; |
345 | |
346 | # grab 02packages.details.txt |
347 | |
348 | my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz'; |
349 | |
350 | if (! -f $gzfile or $force) { |
351 | unlink $gzfile; |
352 | my_getstore($url, $gzfile); |
353 | } |
354 | unlink $path; |
355 | IO::Uncompress::Gunzip::gunzip($gzfile, $path) |
356 | or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; |
357 | |
358 | # suck in the data from it |
359 | |
360 | open my $fh, '<', $path |
361 | or die "ERROR: open: $file: $!\n"; |
362 | |
363 | my %distros; |
364 | my %modules; |
365 | |
366 | while (<$fh>) { |
367 | next if 1../^$/; |
368 | chomp; |
369 | my @f = split ' ', $_; |
370 | if (@f != 3) { |
371 | warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; |
372 | next; |
373 | } |
cb097e7a |
374 | my $distro = $f[2]; |
f0ce33d7 |
375 | $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/ |
376 | $modules{$f[0]} = $distro; |
377 | |
378 | (my $short_distro = $distro) =~ s{^.*/}{}; |
cb097e7a |
379 | |
f0ce33d7 |
380 | $distros{distro_base($short_distro)}{$distro} = 1; |
cb097e7a |
381 | } |
382 | |
383 | for my $module (@$modules) { |
384 | my $m = $Maintainers::Modules{$module} |
385 | or die "ERROR: No such module in Maintainers.pl: '$module'\n"; |
386 | |
387 | unless ($m->{CPAN}) { |
388 | print $outfh "\nWARNING: $module is not dual-life; skipping\n"; |
389 | next; |
390 | } |
391 | |
f0ce33d7 |
392 | # given an entry like |
cb097e7a |
393 | # Foo::Bar 1.23 foo-bar-1.23.tar.gz, |
394 | # first compare the module name against Foo::Bar, and failing that, |
395 | # against foo-bar |
396 | |
397 | my $pdist = $m->{DISTRIBUTION}; |
398 | die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; |
cb097e7a |
399 | |
400 | my $cdist = $modules{$module}; |
f0ce33d7 |
401 | (my $short_pdist = $pdist) =~ s{^.*/}{}; |
cb097e7a |
402 | |
f0ce33d7 |
403 | unless (defined $cdist) { |
404 | my $d = $distros{distro_base($short_pdist)}; |
cb097e7a |
405 | unless (defined $d) { |
406 | print $outfh "\n$module: Can't determine current CPAN entry\n"; |
407 | next; |
408 | } |
409 | if (keys %$d > 1) { |
410 | print $outfh "\n$module: (found more than one CPAN candidate):\n"; |
411 | print $outfh " perl: $pdist\n"; |
412 | print $outfh " CPAN: $_\n" for sort keys %$d; |
413 | next; |
414 | } |
415 | $cdist = (keys %$d)[0]; |
416 | } |
417 | |
418 | if ($cdist ne $pdist) { |
419 | print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; |
420 | } |
421 | } |
422 | } |
423 | |
424 | |
425 | |
426 | # get the EXCLUDED and MAP entries for this module, or |
427 | # make up defauts if they don't exist |
428 | |
429 | sub get_map { |
430 | my ($m, $module_name, $perl_files) = @_; |
431 | |
432 | my ($excluded, $map) = @$m{qw(EXCLUDED MAP)}; |
433 | |
434 | $excluded ||= []; |
435 | |
436 | return $excluded, $map if $map; |
437 | |
438 | # all files under ext/foo-bar (plus maybe some under t/lib)??? |
439 | |
440 | my $ext; |
441 | for (@$perl_files) { |
a193a2db |
442 | if (m{^((?:ext|dist|cpan)/[^/]+/)}) { |
cb097e7a |
443 | if (defined $ext and $ext ne $1) { |
444 | # more than one ext/$ext/ |
445 | undef $ext; |
446 | last; |
447 | } |
448 | $ext = $1; |
449 | } |
450 | elsif (m{^t/lib/}) { |
451 | next; |
452 | } |
453 | else { |
454 | undef $ext; |
455 | last; |
456 | } |
457 | } |
458 | |
459 | if (defined $ext) { |
460 | $map = { '' => $ext }, |
461 | } |
462 | else { |
463 | (my $base = $module_name) =~ s{::}{/}g; |
464 | $base ="lib/$base"; |
465 | $map = { |
466 | 'lib/' => 'lib/', |
467 | '' => "$base/", |
468 | }; |
469 | } |
470 | return $excluded, $map; |
471 | } |
472 | |
473 | |
474 | # Given an exclude list and a mapping hash, convert a CPAN filename |
475 | # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). |
476 | # Returns an empty list for an excluded file |
477 | |
478 | sub cpan_to_perl { |
479 | my ($excluded, $map, $cpan_file) = @_; |
480 | |
481 | for my $exclude (@$excluded) { |
482 | # may be a simple string to match exactly, or a pattern |
483 | if (ref $exclude) { |
484 | return if $cpan_file =~ $exclude; |
485 | } |
486 | else { |
487 | return if $cpan_file eq $exclude; |
488 | } |
489 | } |
490 | |
491 | my $perl_file = $cpan_file; |
492 | |
493 | # try longest prefix first, then alphabetically on tie-break |
494 | for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map) |
495 | { |
496 | last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; |
497 | } |
498 | return $perl_file; |
499 | } |
500 | |
501 | |
502 | |
503 | # do LWP::Simple::getstore, possibly without LWP::Simple being available |
504 | |
505 | my $lwp_simple_available; |
506 | |
507 | sub my_getstore { |
508 | my ($url, $file) = @_; |
509 | unless (defined $lwp_simple_available) { |
510 | eval { require LWP::Simple }; |
511 | $lwp_simple_available = $@ eq ''; |
512 | } |
513 | if ($lwp_simple_available) { |
514 | return LWP::Simple::is_success(LWP::Simple::getstore($url, $file)); |
515 | } |
516 | else { |
517 | return system(WGET_CMD, "-O", $file, $url) == 0; |
518 | } |
519 | } |
520 | |
521 | |
522 | # download and unpack a distribution |
523 | # Returns the full pathname of the extracted directory |
524 | # (eg '/tmp/XYZ/Foo_bar-1.23') |
525 | |
526 | # cache_dir: where to dowenload the .tar.gz file to |
527 | # untar_dir: where to untar or unzup the file |
528 | # module: name of module |
529 | # dist: name of the distribution |
530 | |
531 | sub get_distribution { |
532 | my ($cache_dir, $untar_dir, $module, $dist) = @_; |
533 | |
534 | $dist =~ m{.+/([^/]+)$} |
8c814d1a |
535 | or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n"; |
cb097e7a |
536 | my $filename = $1; |
537 | |
538 | my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); |
539 | my $download_file = File::Spec->catfile($download_dir, $filename); |
540 | |
541 | # download distribution |
542 | |
543 | if (-f $download_file and ! -s $download_file ) { |
544 | # wget can leave a zero-length file on failed download |
545 | unlink $download_file; |
546 | } |
547 | |
548 | unless (-f $download_file) { |
549 | # not cached |
550 | $dist =~ /^([A-Z])([A-Z])/ |
8c814d1a |
551 | or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n"; |
cb097e7a |
552 | |
553 | my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist"; |
554 | my_getstore($url, $download_file) |
8c814d1a |
555 | or die "ERROR: Could not fetch '$url'\n"; |
cb097e7a |
556 | } |
557 | |
558 | # extract distribution |
559 | |
560 | my $ae = Archive::Extract->new( archive => $download_file); |
561 | $ae->extract( to => $untar_dir ) |
8c814d1a |
562 | or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n"; |
cb097e7a |
563 | |
564 | # get the name of the extracted distribution dir |
565 | |
566 | my $path = File::Spec->catfile($untar_dir, $filename); |
567 | |
568 | $path =~ s/\.tar\.gz$// or |
569 | $path =~ s/\.zip$// or |
570 | die "ERROR: downloaded file does not have a recognised suffix: $path\n"; |
571 | |
572 | die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; |
573 | |
574 | return $path; |
575 | } |
576 | |
577 | |
578 | # produce the diff of a single file |
579 | sub file_diff { |
580 | my $outfh = shift; |
581 | my $cpan_file = shift; |
582 | my $perl_file = shift; |
583 | my $reverse = shift; |
584 | my $diff_opts = shift; |
585 | |
586 | |
587 | my @cmd = (DIFF_CMD, split ' ', $diff_opts); |
588 | if ($reverse) { |
589 | push @cmd, $perl_file, $cpan_file; |
590 | } |
591 | else { |
592 | push @cmd, $cpan_file, $perl_file; |
593 | } |
594 | my $result = `@cmd`; |
595 | |
596 | $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm; |
597 | |
598 | print $outfh $result; |
599 | } |
600 | |
601 | |
602 | run(); |
603 | |