Commit | Line | Data |
6e7dc4a9 |
1 | #!/usr/bin/perl |
2 | |
3 | use strict; |
4 | use warnings 'all'; |
5 | |
6 | use LWP::Simple qw /$ua getstore/; |
6e7dc4a9 |
7 | |
6e7dc4a9 |
8 | my %urls; |
9 | |
10 | my @dummy = qw( |
11 | http://something.here |
12 | http://www.pvhp.com |
13 | ); |
14 | my %dummy; |
15 | |
16 | @dummy{@dummy} = (); |
17 | |
058eaa42 |
18 | foreach my $file (<*/*.pod */*/*.pod */*/*/*.pod README README.* INSTALL>) { |
6e7dc4a9 |
19 | open my $fh => $file or die "Failed to open $file: $!\n"; |
20 | while (<$fh>) { |
21 | if (m{(?:http|ftp)://(?:(?!\w<)[-\w~?@=.])+} && !exists $dummy{$&}) { |
22 | my $url = $&; |
23 | $url =~ s/\.$//; |
24 | $urls {$url} ||= { }; |
25 | $urls {$url} {$file} = 1; |
26 | } |
27 | } |
28 | close $fh; |
29 | } |
30 | |
0d6d7233 |
31 | sub fisher_yates_shuffle { |
32 | my $deck = shift; # $deck is a reference to an array |
33 | my $i = @$deck; |
34 | while (--$i) { |
35 | my $j = int rand ($i+1); |
36 | @$deck[$i,$j] = @$deck[$j,$i]; |
37 | } |
38 | } |
39 | |
6e7dc4a9 |
40 | my @urls = keys %urls; |
41 | |
0d6d7233 |
42 | fisher_yates_shuffle(\@urls); |
43 | |
44 | sub todo { |
45 | warn "(", scalar @urls, " URLs)\n"; |
46 | } |
47 | |
48 | my $MAXPROC = 40; |
49 | my $MAXURL = 10; |
50 | my $MAXFORK = $MAXPROC < $MAXURL ? 1 : $MAXPROC / $MAXURL; |
51 | |
52 | select(STDERR); $| = 1; |
53 | select(STDOUT); $| = 1; |
54 | |
6e7dc4a9 |
55 | while (@urls) { |
0d6d7233 |
56 | my @list; |
6e7dc4a9 |
57 | my $pid; |
0d6d7233 |
58 | my $i; |
59 | |
60 | todo(); |
61 | |
62 | for ($i = 0; $i < $MAXFORK; $i++) { |
63 | $list[$i] = [ splice @urls, 0, $MAXURL ]; |
6e7dc4a9 |
64 | $pid = fork; |
0d6d7233 |
65 | die "Failed to fork: $!\n" unless defined $pid; |
66 | last unless $pid; # Child. |
67 | } |
68 | |
69 | if ($pid) { |
70 | # Parent. |
71 | warn "(waiting)\n"; |
72 | 1 until -1 == wait; # Reap. |
73 | } else { |
6e7dc4a9 |
74 | # Child. |
0d6d7233 |
75 | foreach my $url (@{$list[$i]}) { |
6e7dc4a9 |
76 | my $code = getstore $url, "/dev/null"; |
77 | next if $code == 200; |
78 | my $f = join ", " => keys %{$urls {$url}}; |
79 | printf "%03d %s: %s\n" => $code, $url, $f; |
80 | } |
81 | |
82 | exit; |
83 | } |
84 | } |
85 | |
6e7dc4a9 |
86 | __END__ |