local-lib5/lib/perl5/WWW/Mechanize/Examples.pod

   1 =head1 NAME
   2
   3 WWW::Mechanize::Examples - Sample programs that use WWW::Mechanize
   4
   5 =head1 SYNOPSIS
   6
   7 Plenty of people have learned WWW::Mechanize, and now, you can too!
   8
   9 Following are user-supplied samples of WWW::Mechanize in action.
  10 If you have samples you'd like to contribute, please send 'em to
  11 C<< <andy@petdance.com> >>.
  12
  13 You can also look at the F<t/*.t> files in the distribution.
  14
  15 Please note that these examples are not intended to do any specific task.
  16 For all I know, they're no longer functional because the sites they
  17 hit have changed.  They're here to give examples of how people have
  18 used WWW::Mechanize.
  19
  20 Note that the examples are in reverse order of my having received them,
  21 so the freshest examples are always at the top.
  22
  23 =head2 Starbucks Density Calculator, by Nat Torkington
  24
  25 Here's a pair of programs from Nat Torkington, editor for O'Reilly Media
  26 and co-author of the I<Perl Cookbook>.
  27
  28 =over 4
  29
  30 Rael [Dornfest] discovered that you can easily find out how many Starbucks
  31 there are in an area by searching for "Starbucks".  So I wrote a silly
  32 scraper for some old census data and came up with some Starbucks density
  33 figures.  There's no meaning to these numbers thanks to errors from using
  34 old census data coupled with false positives in Yahoo search (e.g.,
  35 "Dodie Starbuck-Your Style Desgn" in Portland OR).  But it was fun to
  36 waste a night on.
  37
  38 Here are the top twenty cities in descending order of population,
  39 with the amount of territory each Starbucks has.  E.g., A New York NY
  40 Starbucks covers 1.7 square miles of ground.
  41
  42     New York, NY        1.7
  43     Los Angeles, CA     1.2
  44     Chicago, IL         1.0
  45     Houston, TX         4.6
  46     Philadelphia, PA    6.8
  47     San Diego, CA       2.7
  48     Detroit, MI        19.9
  49     Dallas, TX          2.7
  50     Phoenix, AZ         4.1
  51     San Antonio, TX    12.3
  52     San Jose, CA        1.1
  53     Baltimore, MD       3.9
  54     Indianapolis, IN   12.1
  55     San Francisco, CA   0.5
  56     Jacksonville, FL   39.9
  57     Columbus, OH        7.3
  58     Milwaukee, WI       5.1
  59     Memphis, TN        15.1
  60     Washington, DC      1.4
  61     Boston, MA          0.5
  62
  63 =back
  64
  65 C<get_pop_data>
  66
  67     #!/usr/bin/perl -w
  68
  69     use WWW::Mechanize;
  70     use Storable;
  71
  72     $url = 'http://www.census.gov/population/www/documentation/twps0027.html';
  73     $m = WWW::Mechanize->new();
  74     $m->get($url);
  75
  76     $c = $m->content;
  77
  78     $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s
  79       or die "Can't find the population table\n";
  80     $t = $1;
  81     @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs;
  82     shift @outer;
  83     foreach $r (@outer) {
  84       @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs;
  85       for ($x = 0; $x < @bits; $x++) {
  86         $b = $bits[$x];
  87         @v = split /\s*<BR>\s*/, $b;
  88         foreach (@v) { s/^\s+//; s/\s+$// }
  89         push @{$data[$x]}, @v;
  90       }
  91     }
  92
  93     for ($y = 0; $y < @{$data[0]}; $y++) {
  94         $data{$data[1][$y]} = {
  95             NAME => $data[1][$y],
  96             RANK => $data[0][$y],
  97             POP  => comma_free($data[2][$y]),
  98             AREA => comma_free($data[3][$y]),
  99             DENS => comma_free($data[4][$y]),
 100         };
 101     }
 102
 103     store(\%data, "cities.dat");
 104
 105     sub comma_free {
 106       my $n = shift;
 107       $n =~ s/,//;
 108       return $n;
 109     }
 110
 111
 112 C<plague_of_coffee>
 113
 114     #!/usr/bin/perl -w
 115
 116     use WWW::Mechanize;
 117     use strict;
 118     use Storable;
 119
 120     $SIG{__WARN__} = sub {} ;  # ssssssh
 121
 122     my $Cities = retrieve("cities.dat");
 123
 124     my $m = WWW::Mechanize->new();
 125     $m->get("http://local.yahoo.com/");
 126
 127     my @cities = sort { $Cities->{$a}{RANK} <=> $Cities->{$b}{RANK} } keys %$Cities;
 128     foreach my $c ( @cities ) {
 129       my $fields = {
 130         'stx' => "starbucks",
 131         'csz' => $c,
 132       };
 133
 134       my $r = $m->submit_form(form_number => 2,
 135                               fields => $fields);
 136       die "Couldn't submit form" unless $r->is_success;
 137
 138       my $hits = number_of_hits($r);
 139       #  my $ppl  = sprintf("%d", 1000 * $Cities->{$c}{POP} / $hits);
 140       #  print "$c has $hits Starbucks.  That's one for every $ppl people.\n";
 141       my $density = sprintf("%.1f", $Cities->{$c}{AREA} / $hits);
 142       print "$c : $density\n";
 143     }
 144
 145     sub number_of_hits {
 146       my $r = shift;
 147       my $c = $r->content;
 148       if ($c =~ m{\d+ out of <b>(\d+)</b> total results for}) {
 149         return $1;
 150       }
 151       if ($c =~ m{Sorry, no .*? found in or near}) {
 152         return 0;
 153       }
 154       if ($c =~ m{Your search matched multiple cities}) {
 155         warn "Your search matched multiple cities\n";
 156         return 0;
 157       }
 158       if ($c =~ m{Sorry we couldn.t find that location}) {
 159         warn "No cities\n";
 160         return 0;
 161       }
 162       if ($c =~ m{Could not find.*?, showing results for}) {
 163         warn "No matches\n";
 164         return 0;
 165       }
 166       die "Unknown response\n$c\n";
 167     }
 168
 169
 170
 171 =head2 pb-upload, by John Beppu
 172
 173 This program takes filenames of images from the command line and
 174 uploads them to a www.photobucket.com folder.  John Beppu, the author, says:
 175
 176 =over 4
 177
 178 I had 92 pictures I wanted to upload, and doing it through a browser
 179 would've been torture.  But thanks to mech, all I had to do was
 180 `./pb.upload *.jpg` and watch it do its thing.  It felt good.
 181 If I had more time, I'd implement WWW::Photobucket on top of
 182 WWW::Mechanize.
 183
 184 =back
 185
 186     #!/usr/bin/perl -w -T
 187
 188     use strict;
 189     use WWW::Mechanize;
 190
 191     my $login    = "login_name";
 192     my $password = "password";
 193     my $folder   = "folder";
 194
 195     my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/";
 196
 197     # login to your photobucket.com account
 198     my $mech = WWW::Mechanize->new();
 199     $mech->get($url);
 200     $mech->submit_form(
 201         form_number => 1,
 202         fields      => { password => $password },
 203     );
 204     die unless ($mech->success);
 205
 206     # upload image files specified on command line
 207     foreach (@ARGV) {
 208         print "$_\n";
 209         $mech->form_number(2);
 210         $mech->field('the_file[]' => $_);
 211         $mech->submit();
 212     }
 213
 214 =head2 listmod, by Ian Langworth
 215
 216 Ian Langworth contributes this little gem that will bring joy to
 217 beleagured mailing list admins.  It discards spam messages through
 218 mailman's web interface.
 219
 220
 221     #!/arch/unix/bin/perl
 222     use strict;
 223     use warnings;
 224     #
 225     # listmod - fast alternative to mailman list interface
 226     #
 227     # usage: listmod crew XXXXXXXX
 228     #
 229
 230     die "usage: $0 <listname> <password>\n" unless @ARGV == 2;
 231     my ($listname, $password) = @ARGV;
 232
 233     use CGI qw(unescape);
 234
 235     use WWW::Mechanize;
 236     my $m = WWW::Mechanize->new( autocheck => 1 );
 237
 238     use Term::ReadLine;
 239     my $term = Term::ReadLine->new($0);
 240
 241     # submit the form, get the cookie, go to the list admin page
 242     $m->get("https://lists.ccs.neu.edu/bin/admindb/$listname");
 243     $m->set_visible( $password );
 244     $m->click;
 245
 246     # exit if nothing to do
 247     print "There are no pending requests.\n" and exit
 248         if $m->content =~ /There are no pending requests/;
 249
 250     # select the first form and examine its contents
 251     $m->form_number(1);
 252     my $f = $m->current_form or die "Couldn't get first form!\n";
 253
 254     # get me the base form element for each email item
 255     my @items = map {m/^.+?-(.+)/} grep {m/senderbanp/} $f->param
 256         or die "Couldn't get items in first form!\n";
 257
 258     # iterate through items, prompt user, commit actions
 259     foreach my $item (@items) {
 260
 261         # show item info
 262         my $sender = unescape($item);
 263         my ($subject) = [$f->find_input("senderbanp-$item")->value_names]->[1]
 264             =~ /Subject:\s+(.+?)\s+Size:/g;
 265
 266         # prompt user
 267         my $choice = '';
 268         while ( $choice !~ /^[DAX]$/ ) {
 269             print "$sender\: '$subject'\n";
 270             $choice = uc $term->readline("Action: defer/accept/discard [dax]: ");
 271             print "\n\n";
 272         }
 273
 274         # set button
 275         $m->field("senderaction-$item" => {D=>0,A=>1,X=>3}->{$choice});
 276     }
 277
 278     # submit actions
 279     $m->click;
 280
 281 =head2 ccdl, by Andy Lester
 282
 283 Steve McConnell, author of the landmark I<Code Complete> has put
 284 up the chapters for the 2nd edition in PDF format on his website.
 285 I needed to download them to take to Kinko's to have printed.  This
 286 little program did it for me.
 287
 288
 289     #!/usr/bin/perl -w
 290
 291     use strict;
 292     use WWW::Mechanize;
 293
 294     my $start = "http://www.stevemcconnell.com/cc2/cc.htm";
 295
 296     my $mech = WWW::Mechanize->new( autocheck => 1 );
 297     $mech->get( $start );
 298
 299     my @links = $mech->find_all_links( url_regex => qr/\d+.+\.pdf$/ );
 300
 301     for my $link ( @links ) {
 302         my $url = $link->url_abs;
 303         my $filename = $url;
 304         $filename =~ s[^.+/][];
 305
 306         print "Fetching $url";
 307         $mech->get( $url, ':content_file' => $filename );
 308
 309         print "   ", -s $filename, " bytes\n";
 310     }
 311
 312 =head2 quotes.pl, by Andy Lester
 313
 314 This was a program that was going to get a hack in I<Spidering Hacks>,
 315 but got cut at the last minute, probably because it's against IMDB's TOS
 316 to scrape from it.  I present it here as an example, not a suggestion
 317 that you break their TOS.
 318
 319 Last I checked, it didn't work because their HTML didn't match, but it's
 320 still good as sample code.
 321
 322     #!/usr/bin/perl -w
 323
 324     use strict;
 325
 326     use WWW::Mechanize;
 327     use Getopt::Long;
 328     use Text::Wrap;
 329
 330     my $match = undef;
 331     my $random = undef;
 332     GetOptions(
 333         "match=s" => \$match,
 334         "random" => \$random,
 335     ) or exit 1;
 336
 337     my $movie = shift @ARGV or die "Must specify a movie\n";
 338
 339     my $quotes_page = get_quotes_page( $movie );
 340     my @quotes = extract_quotes( $quotes_page );
 341
 342     if ( $match ) {
 343         $match = quotemeta($match);
 344         @quotes = grep /$match/i, @quotes;
 345     }
 346
 347     if ( $random ) {
 348         print $quotes[rand @quotes];
 349     }
 350     else {
 351         print join( "\n", @quotes );
 352     }
 353
 354
 355     sub get_quotes_page {
 356         my $movie = shift;
 357
 358         my $mech = WWW::Mechanize->new;
 359         $mech->get( "http://www.imdb.com/search" );
 360         $mech->success or die "Can't get the search page";
 361
 362         $mech->submit_form(
 363             form_number => 2,
 364             fields => {
 365                 title   => $movie,
 366                 restrict    => "Movies only",
 367             },
 368         );
 369
 370         my @links = $mech->find_all_links( url_regex => qr[^/Title] )
 371             or die "No matches for \"$movie\" were found.\n";
 372
 373         # Use the first link
 374         my ( $url, $title ) = @{$links[0]};
 375
 376         warn "Checking $title...\n";
 377
 378         $mech->get( $url );
 379         my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i )
 380             or die qq{"$title" has no quotes in IMDB!\n};
 381
 382         warn "Fetching quotes...\n\n";
 383         $mech->get( $link->[0] );
 384
 385         return $mech->content;
 386     }
 387
 388
 389     sub extract_quotes {
 390         my $page = shift;
 391
 392         # Nibble away at the unwanted HTML at the beginnning...
 393         $page =~ s/.+Memorable Quotes//si;
 394         $page =~ s/.+?(<a name)/$1/si;
 395
 396         # ... and the end of the page
 397         $page =~ s/Browse titles in the movie quotes.+$//si;
 398         $page =~ s/<p.+$//g;
 399
 400         # Quotes separated by an <HR> tag
 401         my @quotes = split( /<hr.+?>/, $page );
 402
 403         for my $quote ( @quotes ) {
 404             my @lines = split( /<br>/, $quote );
 405             for ( @lines ) {
 406                 s/<[^>]+>//g;   # Strip HTML tags
 407                 s/\s+/ /g;          # Squash whitespace
 408                 s/^ //;     # Strip leading space
 409                 s/ $//;     # Strip trailing space
 410                 s/&#34;/"/g;    # Replace HTML entity quotes
 411
 412                 # Word-wrap to fit in 72 columns
 413                 $Text::Wrap::columns = 72;
 414                 $_ = wrap( '', '    ', $_ );
 415             }
 416             $quote = join( "\n", @lines );
 417         }
 418
 419         return @quotes;
 420     }
 421
 422 =head2 cpansearch.pl, by Ed Silva
 423
 424 A quick little utility to search the CPAN and fire up a browser
 425 with a results page.
 426
 427     #!/usr/bin/perl
 428
 429     # turn on perl's safety features
 430     use strict;
 431     use warnings;
 432
 433     # work out the name of the module we're looking for
 434     my $module_name = $ARGV[0]
 435       or die "Must specify module name on command line";
 436
 437     # create a new browser
 438     use WWW::Mechanize;
 439     my $browser = WWW::Mechanize->new();
 440
 441     # tell it to get the main page
 442     $browser->get("http://search.cpan.org/");
 443
 444     # okay, fill in the box with the name of the
 445     # module we want to look up
 446     $browser->form_number(1);
 447     $browser->field("query", $module_name);
 448     $browser->click();
 449
 450     # click on the link that matches the module name
 451     $browser->follow_link( text_regex => $module_name );
 452
 453     my $url = $browser->uri;
 454
 455     # launch a browser...
 456     system('galeon', $url);
 457
 458     exit(0);
 459
 460
 461 =head2 lj_friends.cgi, by Matt Cashner
 462
 463     #!/usr/bin/perl
 464
 465     # Provides an rss feed of a paid user's LiveJournal friends list
 466     # Full entries, protected entries, etc.
 467     # Add to your favorite rss reader as
 468     # http://your.site.com/cgi-bin/lj_friends.cgi?user=USER&password=PASSWORD
 469
 470     use warnings;
 471     use strict;
 472
 473     use WWW::Mechanize;
 474     use CGI;
 475
 476     my $cgi = CGI->new();
 477     my $form = $cgi->Vars;
 478
 479     my $agent = WWW::Mechanize->new();
 480
 481     $agent->get('http://www.livejournal.com/login.bml');
 482     $agent->form_number('3');
 483     $agent->field('user',$form->{user});
 484     $agent->field('password',$form->{password});
 485     $agent->submit();
 486     $agent->get('http://www.livejournal.com/customview.cgi?user='.$form->{user}.'&styleid=225596&checkcookies=1');
 487     print "Content-type: text/plain\n\n";
 488     print $agent->content();
 489
 490 =head2 Hacking Movable Type, by Dan Rinzel
 491
 492     use WWW::Mechanize;
 493
 494     # a tool to automatically post entries to a moveable type weblog, and set arbitary creation dates
 495
 496     my $mech = WWW::Mechanize->new();
 497     my %entry;
 498     $entry->{title} = "Test AutoEntry Title";
 499     $entry->{btext} = "Test AutoEntry Body";
 500     $entry->{date} = '2002-04-15 14:18:00';
 501     my $start = qq|http://my.blog.site/mt.cgi|;
 502
 503     $mech->get($start);
 504     $mech->field('username','und3f1n3d');
 505     $mech->field('password','obscur3d');
 506     $mech->submit(); # to get login cookie
 507     $mech->get(qq|$start?__mode=view&_type=entry&blog_id=1|);
 508     $mech->form_name('entry_form');
 509     $mech->field('title',$entry->{title});
 510     $mech->field('category_id',1); # adjust as needed
 511     $mech->field('text',$entry->{btext});
 512     $mech->field('status',2); # publish, or 1 = draft
 513     $results = $mech->submit();
 514
 515     # if we're ok with this entry being datestamped "NOW" (no {date} in %entry)
 516     # we're done. Otherwise, time to be tricksy
 517     # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler
 518     # which takes the user to an editable version of the form where the create date can be edited
 519     # MT date format of YYYY-MM-DD HH:MI:SS is the only one that won't error out
 520
 521     if ($entry->{date} && $entry->{date} =~ /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}/) {
 522         # travel the redirect
 523         $results = $mech->get($results->{_headers}->{location});
 524         $results->{_content} =~ /<body onLoad="([^\"]+)"/is;
 525         my $js = $1;
 526         $js =~ /\'([^']+)\'/;
 527         $results = $mech->get($start.$1);
 528         $mech->form_name('entry_form');
 529         $mech->field('created_on_manual',$entry->{date});
 530         $mech->submit();
 531     }
 532
 533 =head2 get-despair, by Randal Schwartz
 534
 535 Randal submitted this bot that walks the despair.com site sucking down
 536 all the pictures.
 537
 538     use strict;
 539     $|++;
 540
 541     use WWW::Mechanize;
 542     use File::Basename;
 543
 544     my $m = WWW::Mechanize->new;
 545
 546     $m->get("http://www.despair.com/indem.html");
 547
 548     my @top_links = @{$m->links};
 549
 550     for my $top_link_num (0..$#top_links) {
 551         next unless $top_links[$top_link_num][0] =~ /^http:/;
 552
 553         $m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
 554
 555         print $m->uri, "\n";
 556         for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
 557             my $local = basename $image;
 558             print " $image...", $m->mirror($image, $local)->message, "\n"
 559         }
 560
 561         $m->back or die "can't go back";
 562     }