3 WWW::Mechanize::Examples - Sample programs that use WWW::Mechanize
7 Plenty of people have learned WWW::Mechanize, and now, you can too!
9 Following are user-supplied samples of WWW::Mechanize in action.
10 If you have samples you'd like to contribute, please send 'em to
11 C<< <andy@petdance.com> >>.
13 You can also look at the F<t/*.t> files in the distribution.
15 Please note that these examples are not intended to do any specific task.
16 For all I know, they're no longer functional because the sites they
17 hit have changed. They're here to give examples of how people have
20 Note that the examples are in reverse order of my having received them,
21 so the freshest examples are always at the top.
23 =head2 Starbucks Density Calculator, by Nat Torkington
25 Here's a pair of programs from Nat Torkington, editor for O'Reilly Media
26 and co-author of the I<Perl Cookbook>.
30 Rael [Dornfest] discovered that you can easily find out how many Starbucks
31 there are in an area by searching for "Starbucks". So I wrote a silly
32 scraper for some old census data and came up with some Starbucks density
33 figures. There's no meaning to these numbers thanks to errors from using
34 old census data coupled with false positives in Yahoo search (e.g.,
35 "Dodie Starbuck-Your Style Desgn" in Portland OR). But it was fun to
38 Here are the top twenty cities in descending order of population,
39 with the amount of territory each Starbucks has. E.g., A New York NY
40 Starbucks covers 1.7 square miles of ground.
72 $url = 'http://www.census.gov/population/www/documentation/twps0027.html';
73 $m = WWW::Mechanize->new();
78 $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s
79 or die "Can't find the population table\n";
81 @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs;
84 @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs;
85 for ($x = 0; $x < @bits; $x++) {
87 @v = split /\s*<BR>\s*/, $b;
88 foreach (@v) { s/^\s+//; s/\s+$// }
89 push @{$data[$x]}, @v;
93 for ($y = 0; $y < @{$data[0]}; $y++) {
94 $data{$data[1][$y]} = {
97 POP => comma_free($data[2][$y]),
98 AREA => comma_free($data[3][$y]),
99 DENS => comma_free($data[4][$y]),
103 store(\%data, "cities.dat");
120 $SIG{__WARN__} = sub {} ; # ssssssh
122 my $Cities = retrieve("cities.dat");
124 my $m = WWW::Mechanize->new();
125 $m->get("http://local.yahoo.com/");
127 my @cities = sort { $Cities->{$a}{RANK} <=> $Cities->{$b}{RANK} } keys %$Cities;
128 foreach my $c ( @cities ) {
130 'stx' => "starbucks",
134 my $r = $m->submit_form(form_number => 2,
136 die "Couldn't submit form" unless $r->is_success;
138 my $hits = number_of_hits($r);
139 # my $ppl = sprintf("%d", 1000 * $Cities->{$c}{POP} / $hits);
140 # print "$c has $hits Starbucks. That's one for every $ppl people.\n";
141 my $density = sprintf("%.1f", $Cities->{$c}{AREA} / $hits);
142 print "$c : $density\n";
148 if ($c =~ m{\d+ out of <b>(\d+)</b> total results for}) {
151 if ($c =~ m{Sorry, no .*? found in or near}) {
154 if ($c =~ m{Your search matched multiple cities}) {
155 warn "Your search matched multiple cities\n";
158 if ($c =~ m{Sorry we couldn.t find that location}) {
162 if ($c =~ m{Could not find.*?, showing results for}) {
166 die "Unknown response\n$c\n";
171 =head2 pb-upload, by John Beppu
173 This program takes filenames of images from the command line and
174 uploads them to a www.photobucket.com folder. John Beppu, the author, says:
178 I had 92 pictures I wanted to upload, and doing it through a browser
179 would've been torture. But thanks to mech, all I had to do was
180 `./pb.upload *.jpg` and watch it do its thing. It felt good.
181 If I had more time, I'd implement WWW::Photobucket on top of
186 #!/usr/bin/perl -w -T
191 my $login = "login_name";
192 my $password = "password";
193 my $folder = "folder";
195 my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/";
197 # login to your photobucket.com account
198 my $mech = WWW::Mechanize->new();
202 fields => { password => $password },
204 die unless ($mech->success);
206 # upload image files specified on command line
209 $mech->form_number(2);
210 $mech->field('the_file[]' => $_);
214 =head2 listmod, by Ian Langworth
216 Ian Langworth contributes this little gem that will bring joy to
217 beleagured mailing list admins. It discards spam messages through
218 mailman's web interface.
221 #!/arch/unix/bin/perl
225 # listmod - fast alternative to mailman list interface
227 # usage: listmod crew XXXXXXXX
230 die "usage: $0 <listname> <password>\n" unless @ARGV == 2;
231 my ($listname, $password) = @ARGV;
233 use CGI qw(unescape);
236 my $m = WWW::Mechanize->new( autocheck => 1 );
239 my $term = Term::ReadLine->new($0);
241 # submit the form, get the cookie, go to the list admin page
242 $m->get("https://lists.ccs.neu.edu/bin/admindb/$listname");
243 $m->set_visible( $password );
246 # exit if nothing to do
247 print "There are no pending requests.\n" and exit
248 if $m->content =~ /There are no pending requests/;
250 # select the first form and examine its contents
252 my $f = $m->current_form or die "Couldn't get first form!\n";
254 # get me the base form element for each email item
255 my @items = map {m/^.+?-(.+)/} grep {m/senderbanp/} $f->param
256 or die "Couldn't get items in first form!\n";
258 # iterate through items, prompt user, commit actions
259 foreach my $item (@items) {
262 my $sender = unescape($item);
263 my ($subject) = [$f->find_input("senderbanp-$item")->value_names]->[1]
264 =~ /Subject:\s+(.+?)\s+Size:/g;
268 while ( $choice !~ /^[DAX]$/ ) {
269 print "$sender\: '$subject'\n";
270 $choice = uc $term->readline("Action: defer/accept/discard [dax]: ");
275 $m->field("senderaction-$item" => {D=>0,A=>1,X=>3}->{$choice});
281 =head2 ccdl, by Andy Lester
283 Steve McConnell, author of the landmark I<Code Complete> has put
284 up the chapters for the 2nd edition in PDF format on his website.
285 I needed to download them to take to Kinko's to have printed. This
286 little program did it for me.
294 my $start = "http://www.stevemcconnell.com/cc2/cc.htm";
296 my $mech = WWW::Mechanize->new( autocheck => 1 );
297 $mech->get( $start );
299 my @links = $mech->find_all_links( url_regex => qr/\d+.+\.pdf$/ );
301 for my $link ( @links ) {
302 my $url = $link->url_abs;
304 $filename =~ s[^.+/][];
306 print "Fetching $url";
307 $mech->get( $url, ':content_file' => $filename );
309 print " ", -s $filename, " bytes\n";
312 =head2 quotes.pl, by Andy Lester
314 This was a program that was going to get a hack in I<Spidering Hacks>,
315 but got cut at the last minute, probably because it's against IMDB's TOS
316 to scrape from it. I present it here as an example, not a suggestion
317 that you break their TOS.
319 Last I checked, it didn't work because their HTML didn't match, but it's
320 still good as sample code.
333 "match=s" => \$match,
334 "random" => \$random,
337 my $movie = shift @ARGV or die "Must specify a movie\n";
339 my $quotes_page = get_quotes_page( $movie );
340 my @quotes = extract_quotes( $quotes_page );
343 $match = quotemeta($match);
344 @quotes = grep /$match/i, @quotes;
348 print $quotes[rand @quotes];
351 print join( "\n", @quotes );
355 sub get_quotes_page {
358 my $mech = WWW::Mechanize->new;
359 $mech->get( "http://www.imdb.com/search" );
360 $mech->success or die "Can't get the search page";
366 restrict => "Movies only",
370 my @links = $mech->find_all_links( url_regex => qr[^/Title] )
371 or die "No matches for \"$movie\" were found.\n";
374 my ( $url, $title ) = @{$links[0]};
376 warn "Checking $title...\n";
379 my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i )
380 or die qq{"$title" has no quotes in IMDB!\n};
382 warn "Fetching quotes...\n\n";
383 $mech->get( $link->[0] );
385 return $mech->content;
392 # Nibble away at the unwanted HTML at the beginnning...
393 $page =~ s/.+Memorable Quotes//si;
394 $page =~ s/.+?(<a name)/$1/si;
396 # ... and the end of the page
397 $page =~ s/Browse titles in the movie quotes.+$//si;
400 # Quotes separated by an <HR> tag
401 my @quotes = split( /<hr.+?>/, $page );
403 for my $quote ( @quotes ) {
404 my @lines = split( /<br>/, $quote );
406 s/<[^>]+>//g; # Strip HTML tags
407 s/\s+/ /g; # Squash whitespace
408 s/^ //; # Strip leading space
409 s/ $//; # Strip trailing space
410 s/"/"/g; # Replace HTML entity quotes
412 # Word-wrap to fit in 72 columns
413 $Text::Wrap::columns = 72;
414 $_ = wrap( '', ' ', $_ );
416 $quote = join( "\n", @lines );
422 =head2 cpansearch.pl, by Ed Silva
424 A quick little utility to search the CPAN and fire up a browser
429 # turn on perl's safety features
433 # work out the name of the module we're looking for
434 my $module_name = $ARGV[0]
435 or die "Must specify module name on command line";
437 # create a new browser
439 my $browser = WWW::Mechanize->new();
441 # tell it to get the main page
442 $browser->get("http://search.cpan.org/");
444 # okay, fill in the box with the name of the
445 # module we want to look up
446 $browser->form_number(1);
447 $browser->field("query", $module_name);
450 # click on the link that matches the module name
451 $browser->follow_link( text_regex => $module_name );
453 my $url = $browser->uri;
455 # launch a browser...
456 system('galeon', $url);
461 =head2 lj_friends.cgi, by Matt Cashner
465 # Provides an rss feed of a paid user's LiveJournal friends list
466 # Full entries, protected entries, etc.
467 # Add to your favorite rss reader as
468 # http://your.site.com/cgi-bin/lj_friends.cgi?user=USER&password=PASSWORD
476 my $cgi = CGI->new();
477 my $form = $cgi->Vars;
479 my $agent = WWW::Mechanize->new();
481 $agent->get('http://www.livejournal.com/login.bml');
482 $agent->form_number('3');
483 $agent->field('user',$form->{user});
484 $agent->field('password',$form->{password});
486 $agent->get('http://www.livejournal.com/customview.cgi?user='.$form->{user}.'&styleid=225596&checkcookies=1');
487 print "Content-type: text/plain\n\n";
488 print $agent->content();
490 =head2 Hacking Movable Type, by Dan Rinzel
494 # a tool to automatically post entries to a moveable type weblog, and set arbitary creation dates
496 my $mech = WWW::Mechanize->new();
498 $entry->{title} = "Test AutoEntry Title";
499 $entry->{btext} = "Test AutoEntry Body";
500 $entry->{date} = '2002-04-15 14:18:00';
501 my $start = qq|http://my.blog.site/mt.cgi|;
504 $mech->field('username','und3f1n3d');
505 $mech->field('password','obscur3d');
506 $mech->submit(); # to get login cookie
507 $mech->get(qq|$start?__mode=view&_type=entry&blog_id=1|);
508 $mech->form_name('entry_form');
509 $mech->field('title',$entry->{title});
510 $mech->field('category_id',1); # adjust as needed
511 $mech->field('text',$entry->{btext});
512 $mech->field('status',2); # publish, or 1 = draft
513 $results = $mech->submit();
515 # if we're ok with this entry being datestamped "NOW" (no {date} in %entry)
516 # we're done. Otherwise, time to be tricksy
517 # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler
518 # which takes the user to an editable version of the form where the create date can be edited
519 # MT date format of YYYY-MM-DD HH:MI:SS is the only one that won't error out
521 if ($entry->{date} && $entry->{date} =~ /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}/) {
522 # travel the redirect
523 $results = $mech->get($results->{_headers}->{location});
524 $results->{_content} =~ /<body onLoad="([^\"]+)"/is;
526 $js =~ /\'([^']+)\'/;
527 $results = $mech->get($start.$1);
528 $mech->form_name('entry_form');
529 $mech->field('created_on_manual',$entry->{date});
533 =head2 get-despair, by Randal Schwartz
535 Randal submitted this bot that walks the despair.com site sucking down
544 my $m = WWW::Mechanize->new;
546 $m->get("http://www.despair.com/indem.html");
548 my @top_links = @{$m->links};
550 for my $top_link_num (0..$#top_links) {
551 next unless $top_links[$top_link_num][0] =~ /^http:/;
553 $m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
556 for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
557 my $local = basename $image;
558 print " $image...", $m->mirror($image, $local)->message, "\n"
561 $m->back or die "can't go back";