10 use List::Util qw(sum);
13 use Parallel::Fork::BossWorkerAsync;
14 use Term::ProgressBar::Simple;
15 use URI::Find::Simple qw( list_uris );
19 while ( my $line = <main::DATA> ) {
21 next if $line =~ /^#/;
26 my $ua = LWP::UserAgent->new;
30 my @filenames = @ARGV;
31 @filenames = sort grep { $_ !~ /^\.git/ } File::Find::Rule->new->file->in('.')
34 my $total_bytes = sum map {-s} @filenames;
36 my $extract_progress = Term::ProgressBar::Simple->new(
37 { count => $total_bytes,
38 name => 'Extracting URIs',
43 foreach my $filename (@filenames) {
44 next if $filename =~ /uris\.txt/;
45 next if $filename =~ /check_uris/;
46 next if $filename =~ /\.patch$/;
47 my $contents = read_file($filename);
48 my @uris = list_uris($contents);
49 foreach my $uri (@uris) {
50 next unless $uri =~ /^(http|ftp)/;
51 next if $ignore{$uri};
53 # no need to hit rt.perl.org
55 if $uri =~ m{^http://rt.perl.org/rt3/Ticket/Display.html?id=\d+$};
57 # no need to hit rt.cpan.org
59 if $uri =~ m{^http://rt.cpan.org/Public/Bug/Display.html?id=\d+$};
60 push @{ $uris{$uri} }, $filename;
62 $extract_progress += -s $filename;
65 my $bw = Parallel::Fork::BossWorkerAsync->new(
66 work_handler => \&work_alarmed,
67 global_timeout => 120,
71 foreach my $uri ( keys %uris ) {
72 my @filenames = @{ $uris{$uri} };
73 $bw->add_work( { uri => $uri, filenames => \@filenames } );
76 undef $extract_progress;
78 my $fetch_progress = Term::ProgressBar::Simple->new(
79 { count => scalar( keys %uris ),
80 name => 'Fetching URIs',
85 while ( $bw->pending() ) {
86 my $response = $bw->get_result();
87 my $uri = $response->{uri};
88 my @filenames = @{ $response->{filenames} };
89 my $is_success = $response->{is_success};
90 my $message = $response->{message};
92 unless ($is_success) {
93 foreach my $filename (@filenames) {
94 push @{ $filenames{$filename} },
95 { uri => $uri, message => $message };
102 my $fh = IO::File->new('> uris.txt');
103 foreach my $filename ( sort keys %filenames ) {
104 $fh->say("* $filename");
105 my @bits = @{ $filenames{$filename} };
106 foreach my $bit (@bits) {
107 my $uri = $bit->{uri};
108 my $message = $bit->{message};
110 $fh->say(" $message");
115 say 'Finished, see uris.txt';
120 local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
126 $conf->{is_success} = 0;
127 $conf->{message} = 'Timed out';
135 my $uri = $conf->{uri};
136 my @filenames = @{ $conf->{filenames} };
138 if ( $uri =~ /^http/ ) {
139 my $uri_without_fragment = URI->new($uri);
140 my $fragment = $uri_without_fragment->fragment(undef);
141 my $response = $ua->head($uri_without_fragment);
143 $conf->{is_success} = $response->is_success;
144 $conf->{message} = $response->status_line;
148 my $uri_object = URI->new($uri);
149 my $host = $uri_object->host;
150 my $path = $uri_object->path;
151 my ( $volume, $directories, $filename )
152 = File::Spec->splitpath($path);
154 my $ftp = Net::FTP->new( $host, Passive => 1, Timeout => 60 );
156 $conf->{is_succcess} = 0;
157 $conf->{message} = "Can not connect to $host: $@";
161 my $can_login = $ftp->login( "anonymous", '-anonymous@' );
162 unless ($can_login) {
163 $conf->{is_success} = 0;
164 $conf->{message} = "Can not login ", $ftp->message;
168 my $can_binary = $ftp->binary();
169 unless ($can_binary) {
170 $conf->{is_success} = 0;
171 $conf->{message} = "Can not binary ", $ftp->message;
175 my $can_cwd = $ftp->cwd($directories);
177 $conf->{is_success} = 0;
178 $conf->{message} = "Can not cwd to $directories ", $ftp->message;
183 my $can_size = $ftp->size($filename);
185 $conf->{is_success} = 0;
187 = "Can not size $filename in $directories",
192 my ($can_dir) = $ftp->dir;
194 my ($can_ls) = $ftp->ls;
196 $conf->{is_success} = 0;
198 = "Can not dir or ls in $directories ",
205 $conf->{is_success} = 1;
211 # these are fine but give errors
212 ftp://ftp.stratus.com/pub/vos/posix/ga/ga.html
213 ftp://ftp.stratus.com/pub/vos/utility/utility.html
215 # this is missing, sigh
216 ftp://ftp.sco.com/SLS/ptf7051e.Z
217 http://perlmonks.thepen.com/42898.html
219 # this are URI extraction bugs
220 http://www.perl.org/E
221 http://en.wikipedia.org/wiki/SREC_(file_format
222 http://somewhere.else',-type=/
226 http://search.cpan.org/src/KWILLIAMS/Module-Build-0.30/lib/Module/Build/Platform/Windows.pm:split_like_shell
227 http://www.xray.mpe.mpg.de/mailing-lists/perl5-
228 http://support.microsoft.com/support/kb/articles/Q280/3/41.ASP:
230 # these are used as an example
232 http://something.here/
233 http://users.perl5.git.perl.org/~yourlogin/
234 http://github.com/USERNAME/perl/tree/orange
235 http://the.good.ship.lollypop.com:8080/cgi-bin/foo.cgi
236 http://the.good.ship.lollypop.com:8080/cgi-bin/foo.cgi/somewhere/else?game=chess;game=checkers;weather=dull;foo=bar
237 http://somewhere.else$/
238 http://somewhere.else$/
239 http://somewhere.else/bin/foo&bar',-Type=
240 http://the.good.ship.lollypop.com:8080/cgi-bin/foo.cgi
241 http://the.good.ship.lollypop.com:8080/cgi-bin/foo.cgi/somewhere/else?game=chess;game=checkers;weather=dull;foo=bar
242 http://www.perl.org/test.cgi
244 http://search.cpan.org/perldoc?
246 http://cpan.dev.local/CPAN
250 ftp://ftp.software.ibm.com/aix/fixes/v4/other/vac.C.5.0.2.6.bff
251 http://www14.software.ibm.com/webapp/download/downloadaz.jsp
252 http://www.cpan.org/modules/by-module/Archive/Archive-Zip-*.tar.gz
253 http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT
254 http://localhost/tmp/index.txt
255 http://example.com/foo/bar.html
256 http://example.com/Text-Bastardize-1.06.tar.gz
257 ftp://example.com/sources/packages.txt
258 http://example.com/sources/packages.txt
259 http://example.com/sources
260 ftp://example.com/sources
261 http://some.where.com/dir/file.txt
262 http://some.where.com/dir/a.txt
266 http://www.foo.com:8000/
267 http://mysite.com/cgi-bin/log.cgi/http://www.some.other.site/args
268 http://decoded/mirror/path
269 http://a/b/c/d/e/f/g/h/i/j
272 http://purl.org/rss/1.0/modules/taxonomy/
273 ftp://ftp.sun.ac.za/CPAN/CPAN/
274 ftp://ftp.cpan.org/pub/mirror/index.txt
275 ftp://cpan.org/pub/mirror/index.txt
276 http://example.com/~eh/
277 http://plagger.org/.../rss
278 http://umn.dl.sourceforge.net/mingw/gcc-g++-3.4.5-20060117-1.tar.gz
279 http://umn.dl.sourceforge.net/mingw/binutils-2.16.91-20060119-1.tar.gz
280 http://umn.dl.sourceforge.net/mingw/mingw-runtime-3.10.tar.gz
281 http://umn.dl.sourceforge.net/mingw/gcc-core-3.4.5-20060117-1.tar.gz
282 http://umn.dl.sourceforge.net/mingw/w32api-3.6.tar.gz
283 http://search.cpan.org/CPAN/authors/id/S/SH/SHAY/dmake-4.5-20060619-SHAY.zip
284 http://module-build.sourceforge.net/META-spec-new.html
285 http://module-build.sourceforge.net/META-spec-v1.4.html
286 http://www.cs.vu.nl/~tmgil/vi.html
287 http://perlcomposer.sourceforge.net/vperl.html
288 http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/rep
289 http://w4.lns.cornell.edu/~pvhp/ptk/ptkTOC.html
290 http://world.std.com/~aep/ptkdb/
291 http://www.castlelink.co.uk/object_system/
292 http://www.fh-wedel.de/elvis/
293 ftp://ftp.blarg.net/users/amol/zsh/
294 ftp://ftp.funet.fi/pub/languages/perl/CPAN
295 http://search.cpan.org/CPAN/authors/id/S/SH/SHAY/dmake-4.5-20060619-SHAY.zip
297 # these are used to generate or match URLs
298 http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist
299 http://www.cpantesters.org/show/%s.yaml
308 # weird redirects that LWP doesn't like
309 http://www.theperlreview.com/community_calendar
310 http://www.software.hp.com/portal/swdepot/displayProductInfo.do?productNumber=PERL
311 http://groups.google.com/
312 http://groups.google.com/group/comp.lang.perl.misc/topics
313 http://groups.google.co.uk/group/perl.perl5.porters/browse_thread/thread/b4e2b1d88280ddff/630b667a66e3509f?#630b667a66e3509f
314 http://groups.google.com/group/comp.sys.sgi.admin/msg/3ad8353bc4ce3cb0
315 http://groups.google.com/group/perl.module.build/browse_thread/thread/c8065052b2e0d741
316 http://groups.google.com/group/perl.perl5.porters/browse_thread/thread/adaffaaf939b042e/20dafc298df737f0%2320dafc298df737f0?sa=X&oi=groupsr&start=0&num=3
318 # broken webserver that doesn't like HEAD requests
319 http://docs.sun.com/app/docs/doc/816-5168/syslog-3c?a=view
321 # these have been reported upstream to CPAN authors
322 http://www.gnu.org/manual/tar/html_node/tar_139.html
323 http://www.w3.org/pub/WWW/TR/Wd-css-1.html
324 http://support.microsoft.com/support/kb/articles/Q280/3/41.ASP
325 http://msdn.microsoft.com/workshop/author/dhtml/httponly_cookies.asp
326 http://search.cpan.org/search?query=Module::Build::Convert
327 http://www.refcnt.org/papers/module-build-convert
328 http://csrc.nist.gov/cryptval/shs.html
329 http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset4.asp
330 http://www.debian.or.jp/~kubota/unicode-symbols.html.en
331 http://www.mail-archive.com/perl5-porters@perl.org/msg69766.html
332 http://www.debian.or.jp/~kubota/unicode-symbols.html.en
333 http://rfc.net/rfc2781.html
334 http://www.icu-project.org/charset/
335 http://ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html
336 http://www.rfc-editor.org/
338 http://www.oreilly.com/people/authors/lunde/cjk_inf.html
339 http://www.oreilly.com/catalog/cjkvinfo/
340 http://www.cpan.org/modules/by-author/Damian_Conway/Filter-Simple.tar.gz
341 http://www.csse.monash.edu.au/~damian/CPAN/Filter-Simple.tar.gz
342 http://www.egt.ie/standards/iso3166/iso3166-1-en.html
343 http://www.bsi-global.com/iso4217currency
344 http://www.plover.com/~mjd/perl/Memoize/
345 http://www.plover.com/~mjd/perl/MiniMemoize/
346 http://www.sysadminmag.com/tpj/issues/vol5_5/
347 ftp://ftp.tpc.int/tpc/server/UNIX/
348 http://www.nara.gov/genealogy/
349 http://home.utah-inter.net/kinsearch/Soundex.html
350 http://www.nara.gov/genealogy/soundex/soundex.html
351 http://rfc.net/rfc3461.html
352 ftp://ftp.cs.pdx.edu/pub/elvis/
353 http://www.fh-wedel.de/elvis/
359 checkURL.pl - Check that all the URLs in the Perl source are valid
363 This program checks that all the URLs in the Perl source are valid. It
364 checks HTTP and FTP links in parallel and contains a list of known
365 bad example links in its source. It takes 4 minutes to run on my
366 machine. The results are written to 'uris.txt' and list the filename,
367 the URL and the error:
369 * ext/Locale-Maketext/lib/Locale/Maketext.pod
370 http://sunsite.dk/RFC/rfc/rfc2277.html
374 It should be run every so often and links fixed and upstream authors
377 Note that the web is unstable and some websites are temporarily down.