Commit | Line | Data |
3fea05b9 |
1 | .\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.10) |
2 | .\" |
3 | .\" Standard preamble: |
4 | .\" ======================================================================== |
5 | .de Sp \" Vertical space (when we can't use .PP) |
6 | .if t .sp .5v |
7 | .if n .sp |
8 | .. |
9 | .de Vb \" Begin verbatim text |
10 | .ft CW |
11 | .nf |
12 | .ne \\$1 |
13 | .. |
14 | .de Ve \" End verbatim text |
15 | .ft R |
16 | .fi |
17 | .. |
18 | .\" Set up some character translations and predefined strings. \*(-- will |
19 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left |
20 | .\" double quote, and \*(R" will give a right double quote. \*(C+ will |
21 | .\" give a nicer C++. Capital omega is used to do unbreakable dashes and |
22 | .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, |
23 | .\" nothing in troff, for use with C<>. |
24 | .tr \(*W- |
25 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
26 | .ie n \{\ |
27 | . ds -- \(*W- |
28 | . ds PI pi |
29 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch |
30 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch |
31 | . ds L" "" |
32 | . ds R" "" |
33 | . ds C` "" |
34 | . ds C' "" |
35 | 'br\} |
36 | .el\{\ |
37 | . ds -- \|\(em\| |
38 | . ds PI \(*p |
39 | . ds L" `` |
40 | . ds R" '' |
41 | 'br\} |
42 | .\" |
43 | .\" Escape single quotes in literal strings from groff's Unicode transform. |
44 | .ie \n(.g .ds Aq \(aq |
45 | .el .ds Aq ' |
46 | .\" |
47 | .\" If the F register is turned on, we'll generate index entries on stderr for |
48 | .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index |
49 | .\" entries marked with X<> in POD. Of course, you'll have to process the |
50 | .\" output yourself in some meaningful fashion. |
51 | .ie \nF \{\ |
52 | . de IX |
53 | . tm Index:\\$1\t\\n%\t"\\$2" |
54 | .. |
55 | . nr % 0 |
56 | . rr F |
57 | .\} |
58 | .el \{\ |
59 | . de IX |
60 | .. |
61 | .\} |
62 | .\" |
63 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
64 | .\" Fear. Run. Save yourself. No user-serviceable parts. |
65 | . \" fudge factors for nroff and troff |
66 | .if n \{\ |
67 | . ds #H 0 |
68 | . ds #V .8m |
69 | . ds #F .3m |
70 | . ds #[ \f1 |
71 | . ds #] \fP |
72 | .\} |
73 | .if t \{\ |
74 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) |
75 | . ds #V .6m |
76 | . ds #F 0 |
77 | . ds #[ \& |
78 | . ds #] \& |
79 | .\} |
80 | . \" simple accents for nroff and troff |
81 | .if n \{\ |
82 | . ds ' \& |
83 | . ds ` \& |
84 | . ds ^ \& |
85 | . ds , \& |
86 | . ds ~ ~ |
87 | . ds / |
88 | .\} |
89 | .if t \{\ |
90 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" |
91 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' |
92 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' |
93 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' |
94 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' |
95 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' |
96 | .\} |
97 | . \" troff and (daisy-wheel) nroff accents |
98 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' |
99 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' |
100 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] |
101 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' |
102 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' |
103 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] |
104 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] |
105 | .ds ae a\h'-(\w'a'u*4/10)'e |
106 | .ds Ae A\h'-(\w'A'u*4/10)'E |
107 | . \" corrections for vroff |
108 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' |
109 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' |
110 | . \" for low resolution devices (crt and lpr) |
111 | .if \n(.H>23 .if \n(.V>19 \ |
112 | \{\ |
113 | . ds : e |
114 | . ds 8 ss |
115 | . ds o a |
116 | . ds d- d\h'-1'\(ga |
117 | . ds D- D\h'-1'\(hy |
118 | . ds th \o'bp' |
119 | . ds Th \o'LP' |
120 | . ds ae ae |
121 | . ds Ae AE |
122 | .\} |
123 | .rm #[ #] #H #V #F C |
124 | .\" ======================================================================== |
125 | .\" |
126 | .IX Title "WWW::Mechanize::Examples 3" |
127 | .TH WWW::Mechanize::Examples 3 "2009-06-24" "perl v5.8.7" "User Contributed Perl Documentation" |
128 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes |
129 | .\" way too many mistakes in technical documents. |
130 | .if n .ad l |
131 | .nh |
132 | .SH "NAME" |
133 | WWW::Mechanize::Examples \- Sample programs that use WWW::Mechanize |
134 | .SH "SYNOPSIS" |
135 | .IX Header "SYNOPSIS" |
136 | Plenty of people have learned WWW::Mechanize, and now, you can too! |
137 | .PP |
138 | Following are user-supplied samples of WWW::Mechanize in action. |
139 | If you have samples you'd like to contribute, please send 'em to |
140 | \&\f(CW\*(C`<andy@petdance.com>\*(C'\fR. |
141 | .PP |
142 | You can also look at the \fIt/*.t\fR files in the distribution. |
143 | .PP |
144 | Please note that these examples are not intended to do any specific task. |
145 | For all I know, they're no longer functional because the sites they |
146 | hit have changed. They're here to give examples of how people have |
147 | used WWW::Mechanize. |
148 | .PP |
149 | Note that the examples are in reverse order of my having received them, |
150 | so the freshest examples are always at the top. |
151 | .SS "Starbucks Density Calculator, by Nat Torkington" |
152 | .IX Subsection "Starbucks Density Calculator, by Nat Torkington" |
153 | Here's a pair of programs from Nat Torkington, editor for O'Reilly Media |
154 | and co-author of the \fIPerl Cookbook\fR. |
155 | .Sp |
156 | .RS 4 |
157 | Rael [Dornfest] discovered that you can easily find out how many Starbucks |
158 | there are in an area by searching for \*(L"Starbucks\*(R". So I wrote a silly |
159 | scraper for some old census data and came up with some Starbucks density |
160 | figures. There's no meaning to these numbers thanks to errors from using |
161 | old census data coupled with false positives in Yahoo search (e.g., |
162 | \&\*(L"Dodie Starbuck-Your Style Desgn\*(R" in Portland \s-1OR\s0). But it was fun to |
163 | waste a night on. |
164 | .Sp |
165 | Here are the top twenty cities in descending order of population, |
166 | with the amount of territory each Starbucks has. E.g., A New York \s-1NY\s0 |
167 | Starbucks covers 1.7 square miles of ground. |
168 | .Sp |
169 | .Vb 10 |
170 | \& New York, NY 1.7 |
171 | \& Los Angeles, CA 1.2 |
172 | \& Chicago, IL 1.0 |
173 | \& Houston, TX 4.6 |
174 | \& Philadelphia, PA 6.8 |
175 | \& San Diego, CA 2.7 |
176 | \& Detroit, MI 19.9 |
177 | \& Dallas, TX 2.7 |
178 | \& Phoenix, AZ 4.1 |
179 | \& San Antonio, TX 12.3 |
180 | \& San Jose, CA 1.1 |
181 | \& Baltimore, MD 3.9 |
182 | \& Indianapolis, IN 12.1 |
183 | \& San Francisco, CA 0.5 |
184 | \& Jacksonville, FL 39.9 |
185 | \& Columbus, OH 7.3 |
186 | \& Milwaukee, WI 5.1 |
187 | \& Memphis, TN 15.1 |
188 | \& Washington, DC 1.4 |
189 | \& Boston, MA 0.5 |
190 | .Ve |
191 | .RE |
192 | .PP |
193 | \&\f(CW\*(C`get_pop_data\*(C'\fR |
194 | .PP |
195 | .Vb 1 |
196 | \& #!/usr/bin/perl \-w |
197 | \& |
198 | \& use WWW::Mechanize; |
199 | \& use Storable; |
200 | \& |
201 | \& $url = \*(Aqhttp://www.census.gov/population/www/documentation/twps0027.html\*(Aq; |
202 | \& $m = WWW::Mechanize\->new(); |
203 | \& $m\->get($url); |
204 | \& |
205 | \& $c = $m\->content; |
206 | \& |
207 | \& $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s |
208 | \& or die "Can\*(Aqt find the population table\en"; |
209 | \& $t = $1; |
210 | \& @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs; |
211 | \& shift @outer; |
212 | \& foreach $r (@outer) { |
213 | \& @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs; |
214 | \& for ($x = 0; $x < @bits; $x++) { |
215 | \& $b = $bits[$x]; |
216 | \& @v = split /\es*<BR>\es*/, $b; |
217 | \& foreach (@v) { s/^\es+//; s/\es+$// } |
218 | \& push @{$data[$x]}, @v; |
219 | \& } |
220 | \& } |
221 | \& |
222 | \& for ($y = 0; $y < @{$data[0]}; $y++) { |
223 | \& $data{$data[1][$y]} = { |
224 | \& NAME => $data[1][$y], |
225 | \& RANK => $data[0][$y], |
226 | \& POP => comma_free($data[2][$y]), |
227 | \& AREA => comma_free($data[3][$y]), |
228 | \& DENS => comma_free($data[4][$y]), |
229 | \& }; |
230 | \& } |
231 | \& |
232 | \& store(\e%data, "cities.dat"); |
233 | \& |
234 | \& sub comma_free { |
235 | \& my $n = shift; |
236 | \& $n =~ s/,//; |
237 | \& return $n; |
238 | \& } |
239 | .Ve |
240 | .PP |
241 | \&\f(CW\*(C`plague_of_coffee\*(C'\fR |
242 | .PP |
243 | .Vb 1 |
244 | \& #!/usr/bin/perl \-w |
245 | \& |
246 | \& use WWW::Mechanize; |
247 | \& use strict; |
248 | \& use Storable; |
249 | \& |
250 | \& $SIG{_\|_WARN_\|_} = sub {} ; # ssssssh |
251 | \& |
252 | \& my $Cities = retrieve("cities.dat"); |
253 | \& |
254 | \& my $m = WWW::Mechanize\->new(); |
255 | \& $m\->get("http://local.yahoo.com/"); |
256 | \& |
257 | \& my @cities = sort { $Cities\->{$a}{RANK} <=> $Cities\->{$b}{RANK} } keys %$Cities; |
258 | \& foreach my $c ( @cities ) { |
259 | \& my $fields = { |
260 | \& \*(Aqstx\*(Aq => "starbucks", |
261 | \& \*(Aqcsz\*(Aq => $c, |
262 | \& }; |
263 | \& |
264 | \& my $r = $m\->submit_form(form_number => 2, |
265 | \& fields => $fields); |
266 | \& die "Couldn\*(Aqt submit form" unless $r\->is_success; |
267 | \& |
268 | \& my $hits = number_of_hits($r); |
269 | \& # my $ppl = sprintf("%d", 1000 * $Cities\->{$c}{POP} / $hits); |
270 | \& # print "$c has $hits Starbucks. That\*(Aqs one for every $ppl people.\en"; |
271 | \& my $density = sprintf("%.1f", $Cities\->{$c}{AREA} / $hits); |
272 | \& print "$c : $density\en"; |
273 | \& } |
274 | \& |
275 | \& sub number_of_hits { |
276 | \& my $r = shift; |
277 | \& my $c = $r\->content; |
278 | \& if ($c =~ m{\ed+ out of <b>(\ed+)</b> total results for}) { |
279 | \& return $1; |
280 | \& } |
281 | \& if ($c =~ m{Sorry, no .*? found in or near}) { |
282 | \& return 0; |
283 | \& } |
284 | \& if ($c =~ m{Your search matched multiple cities}) { |
285 | \& warn "Your search matched multiple cities\en"; |
286 | \& return 0; |
287 | \& } |
288 | \& if ($c =~ m{Sorry we couldn.t find that location}) { |
289 | \& warn "No cities\en"; |
290 | \& return 0; |
291 | \& } |
292 | \& if ($c =~ m{Could not find.*?, showing results for}) { |
293 | \& warn "No matches\en"; |
294 | \& return 0; |
295 | \& } |
296 | \& die "Unknown response\en$c\en"; |
297 | \& } |
298 | .Ve |
299 | .SS "pb-upload, by John Beppu" |
300 | .IX Subsection "pb-upload, by John Beppu" |
301 | This program takes filenames of images from the command line and |
302 | uploads them to a www.photobucket.com folder. John Beppu, the author, says: |
303 | .Sp |
304 | .RS 4 |
305 | I had 92 pictures I wanted to upload, and doing it through a browser |
306 | would've been torture. But thanks to mech, all I had to do was |
307 | `./pb.upload *.jpg` and watch it do its thing. It felt good. |
308 | If I had more time, I'd implement WWW::Photobucket on top of |
309 | WWW::Mechanize. |
310 | .RE |
311 | .PP |
312 | .Vb 1 |
313 | \& #!/usr/bin/perl \-w \-T |
314 | \& |
315 | \& use strict; |
316 | \& use WWW::Mechanize; |
317 | \& |
318 | \& my $login = "login_name"; |
319 | \& my $password = "password"; |
320 | \& my $folder = "folder"; |
321 | \& |
322 | \& my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/"; |
323 | \& |
324 | \& # login to your photobucket.com account |
325 | \& my $mech = WWW::Mechanize\->new(); |
326 | \& $mech\->get($url); |
327 | \& $mech\->submit_form( |
328 | \& form_number => 1, |
329 | \& fields => { password => $password }, |
330 | \& ); |
331 | \& die unless ($mech\->success); |
332 | \& |
333 | \& # upload image files specified on command line |
334 | \& foreach (@ARGV) { |
335 | \& print "$_\en"; |
336 | \& $mech\->form_number(2); |
337 | \& $mech\->field(\*(Aqthe_file[]\*(Aq => $_); |
338 | \& $mech\->submit(); |
339 | \& } |
340 | .Ve |
341 | .SS "listmod, by Ian Langworth" |
342 | .IX Subsection "listmod, by Ian Langworth" |
343 | Ian Langworth contributes this little gem that will bring joy to |
344 | beleagured mailing list admins. It discards spam messages through |
345 | mailman's web interface. |
346 | .PP |
347 | .Vb 8 |
348 | \& #!/arch/unix/bin/perl |
349 | \& use strict; |
350 | \& use warnings; |
351 | \& # |
352 | \& # listmod \- fast alternative to mailman list interface |
353 | \& # |
354 | \& # usage: listmod crew XXXXXXXX |
355 | \& # |
356 | \& |
357 | \& die "usage: $0 <listname> <password>\en" unless @ARGV == 2; |
358 | \& my ($listname, $password) = @ARGV; |
359 | \& |
360 | \& use CGI qw(unescape); |
361 | \& |
362 | \& use WWW::Mechanize; |
363 | \& my $m = WWW::Mechanize\->new( autocheck => 1 ); |
364 | \& |
365 | \& use Term::ReadLine; |
366 | \& my $term = Term::ReadLine\->new($0); |
367 | \& |
368 | \& # submit the form, get the cookie, go to the list admin page |
369 | \& $m\->get("https://lists.ccs.neu.edu/bin/admindb/$listname"); |
370 | \& $m\->set_visible( $password ); |
371 | \& $m\->click; |
372 | \& |
373 | \& # exit if nothing to do |
374 | \& print "There are no pending requests.\en" and exit |
375 | \& if $m\->content =~ /There are no pending requests/; |
376 | \& |
377 | \& # select the first form and examine its contents |
378 | \& $m\->form_number(1); |
379 | \& my $f = $m\->current_form or die "Couldn\*(Aqt get first form!\en"; |
380 | \& |
381 | \& # get me the base form element for each email item |
382 | \& my @items = map {m/^.+?\-(.+)/} grep {m/senderbanp/} $f\->param |
383 | \& or die "Couldn\*(Aqt get items in first form!\en"; |
384 | \& |
385 | \& # iterate through items, prompt user, commit actions |
386 | \& foreach my $item (@items) { |
387 | \& |
388 | \& # show item info |
389 | \& my $sender = unescape($item); |
390 | \& my ($subject) = [$f\->find_input("senderbanp\-$item")\->value_names]\->[1] |
391 | \& =~ /Subject:\es+(.+?)\es+Size:/g; |
392 | \& |
393 | \& # prompt user |
394 | \& my $choice = \*(Aq\*(Aq; |
395 | \& while ( $choice !~ /^[DAX]$/ ) { |
396 | \& print "$sender\e: \*(Aq$subject\*(Aq\en"; |
397 | \& $choice = uc $term\->readline("Action: defer/accept/discard [dax]: "); |
398 | \& print "\en\en"; |
399 | \& } |
400 | \& |
401 | \& # set button |
402 | \& $m\->field("senderaction\-$item" => {D=>0,A=>1,X=>3}\->{$choice}); |
403 | \& } |
404 | \& |
405 | \& # submit actions |
406 | \& $m\->click; |
407 | .Ve |
408 | .SS "ccdl, by Andy Lester" |
409 | .IX Subsection "ccdl, by Andy Lester" |
410 | Steve McConnell, author of the landmark \fICode Complete\fR has put |
411 | up the chapters for the 2nd edition in \s-1PDF\s0 format on his website. |
412 | I needed to download them to take to Kinko's to have printed. This |
413 | little program did it for me. |
414 | .PP |
415 | .Vb 1 |
416 | \& #!/usr/bin/perl \-w |
417 | \& |
418 | \& use strict; |
419 | \& use WWW::Mechanize; |
420 | \& |
421 | \& my $start = "http://www.stevemcconnell.com/cc2/cc.htm"; |
422 | \& |
423 | \& my $mech = WWW::Mechanize\->new( autocheck => 1 ); |
424 | \& $mech\->get( $start ); |
425 | \& |
426 | \& my @links = $mech\->find_all_links( url_regex => qr/\ed+.+\e.pdf$/ ); |
427 | \& |
428 | \& for my $link ( @links ) { |
429 | \& my $url = $link\->url_abs; |
430 | \& my $filename = $url; |
431 | \& $filename =~ s[^.+/][]; |
432 | \& |
433 | \& print "Fetching $url"; |
434 | \& $mech\->get( $url, \*(Aq:content_file\*(Aq => $filename ); |
435 | \& |
436 | \& print " ", \-s $filename, " bytes\en"; |
437 | \& } |
438 | .Ve |
439 | .SS "quotes.pl, by Andy Lester" |
440 | .IX Subsection "quotes.pl, by Andy Lester" |
441 | This was a program that was going to get a hack in \fISpidering Hacks\fR, |
442 | but got cut at the last minute, probably because it's against \s-1IMDB\s0's \s-1TOS\s0 |
443 | to scrape from it. I present it here as an example, not a suggestion |
444 | that you break their \s-1TOS\s0. |
445 | .PP |
446 | Last I checked, it didn't work because their \s-1HTML\s0 didn't match, but it's |
447 | still good as sample code. |
448 | .PP |
449 | .Vb 1 |
450 | \& #!/usr/bin/perl \-w |
451 | \& |
452 | \& use strict; |
453 | \& |
454 | \& use WWW::Mechanize; |
455 | \& use Getopt::Long; |
456 | \& use Text::Wrap; |
457 | \& |
458 | \& my $match = undef; |
459 | \& my $random = undef; |
460 | \& GetOptions( |
461 | \& "match=s" => \e$match, |
462 | \& "random" => \e$random, |
463 | \& ) or exit 1; |
464 | \& |
465 | \& my $movie = shift @ARGV or die "Must specify a movie\en"; |
466 | \& |
467 | \& my $quotes_page = get_quotes_page( $movie ); |
468 | \& my @quotes = extract_quotes( $quotes_page ); |
469 | \& |
470 | \& if ( $match ) { |
471 | \& $match = quotemeta($match); |
472 | \& @quotes = grep /$match/i, @quotes; |
473 | \& } |
474 | \& |
475 | \& if ( $random ) { |
476 | \& print $quotes[rand @quotes]; |
477 | \& } |
478 | \& else { |
479 | \& print join( "\en", @quotes ); |
480 | \& } |
481 | \& |
482 | \& |
483 | \& sub get_quotes_page { |
484 | \& my $movie = shift; |
485 | \& |
486 | \& my $mech = WWW::Mechanize\->new; |
487 | \& $mech\->get( "http://www.imdb.com/search" ); |
488 | \& $mech\->success or die "Can\*(Aqt get the search page"; |
489 | \& |
490 | \& $mech\->submit_form( |
491 | \& form_number => 2, |
492 | \& fields => { |
493 | \& title => $movie, |
494 | \& restrict => "Movies only", |
495 | \& }, |
496 | \& ); |
497 | \& |
498 | \& my @links = $mech\->find_all_links( url_regex => qr[^/Title] ) |
499 | \& or die "No matches for \e"$movie\e" were found.\en"; |
500 | \& |
501 | \& # Use the first link |
502 | \& my ( $url, $title ) = @{$links[0]}; |
503 | \& |
504 | \& warn "Checking $title...\en"; |
505 | \& |
506 | \& $mech\->get( $url ); |
507 | \& my $link = $mech\->find_link( text_regex => qr/Memorable Quotes/i ) |
508 | \& or die qq{"$title" has no quotes in IMDB!\en}; |
509 | \& |
510 | \& warn "Fetching quotes...\en\en"; |
511 | \& $mech\->get( $link\->[0] ); |
512 | \& |
513 | \& return $mech\->content; |
514 | \& } |
515 | \& |
516 | \& |
517 | \& sub extract_quotes { |
518 | \& my $page = shift; |
519 | \& |
520 | \& # Nibble away at the unwanted HTML at the beginnning... |
521 | \& $page =~ s/.+Memorable Quotes//si; |
522 | \& $page =~ s/.+?(<a name)/$1/si; |
523 | \& |
524 | \& # ... and the end of the page |
525 | \& $page =~ s/Browse titles in the movie quotes.+$//si; |
526 | \& $page =~ s/<p.+$//g; |
527 | \& |
528 | \& # Quotes separated by an <HR> tag |
529 | \& my @quotes = split( /<hr.+?>/, $page ); |
530 | \& |
531 | \& for my $quote ( @quotes ) { |
532 | \& my @lines = split( /<br>/, $quote ); |
533 | \& for ( @lines ) { |
534 | \& s/<[^>]+>//g; # Strip HTML tags |
535 | \& s/\es+/ /g; # Squash whitespace |
536 | \& s/^ //; # Strip leading space |
537 | \& s/ $//; # Strip trailing space |
538 | \& s/"/"/g; # Replace HTML entity quotes |
539 | \& |
540 | \& # Word\-wrap to fit in 72 columns |
541 | \& $Text::Wrap::columns = 72; |
542 | \& $_ = wrap( \*(Aq\*(Aq, \*(Aq \*(Aq, $_ ); |
543 | \& } |
544 | \& $quote = join( "\en", @lines ); |
545 | \& } |
546 | \& |
547 | \& return @quotes; |
548 | \& } |
549 | .Ve |
550 | .SS "cpansearch.pl, by Ed Silva" |
551 | .IX Subsection "cpansearch.pl, by Ed Silva" |
552 | A quick little utility to search the \s-1CPAN\s0 and fire up a browser |
553 | with a results page. |
554 | .PP |
555 | .Vb 1 |
556 | \& #!/usr/bin/perl |
557 | \& |
558 | \& # turn on perl\*(Aqs safety features |
559 | \& use strict; |
560 | \& use warnings; |
561 | \& |
562 | \& # work out the name of the module we\*(Aqre looking for |
563 | \& my $module_name = $ARGV[0] |
564 | \& or die "Must specify module name on command line"; |
565 | \& |
566 | \& # create a new browser |
567 | \& use WWW::Mechanize; |
568 | \& my $browser = WWW::Mechanize\->new(); |
569 | \& |
570 | \& # tell it to get the main page |
571 | \& $browser\->get("http://search.cpan.org/"); |
572 | \& |
573 | \& # okay, fill in the box with the name of the |
574 | \& # module we want to look up |
575 | \& $browser\->form_number(1); |
576 | \& $browser\->field("query", $module_name); |
577 | \& $browser\->click(); |
578 | \& |
579 | \& # click on the link that matches the module name |
580 | \& $browser\->follow_link( text_regex => $module_name ); |
581 | \& |
582 | \& my $url = $browser\->uri; |
583 | \& |
584 | \& # launch a browser... |
585 | \& system(\*(Aqgaleon\*(Aq, $url); |
586 | \& |
587 | \& exit(0); |
588 | .Ve |
589 | .SS "lj_friends.cgi, by Matt Cashner" |
590 | .IX Subsection "lj_friends.cgi, by Matt Cashner" |
591 | .Vb 1 |
592 | \& #!/usr/bin/perl |
593 | \& |
594 | \& # Provides an rss feed of a paid user\*(Aqs LiveJournal friends list |
595 | \& # Full entries, protected entries, etc. |
596 | \& # Add to your favorite rss reader as |
597 | \& # http://your.site.com/cgi\-bin/lj_friends.cgi?user=USER&password=PASSWORD |
598 | \& |
599 | \& use warnings; |
600 | \& use strict; |
601 | \& |
602 | \& use WWW::Mechanize; |
603 | \& use CGI; |
604 | \& |
605 | \& my $cgi = CGI\->new(); |
606 | \& my $form = $cgi\->Vars; |
607 | \& |
608 | \& my $agent = WWW::Mechanize\->new(); |
609 | \& |
610 | \& $agent\->get(\*(Aqhttp://www.livejournal.com/login.bml\*(Aq); |
611 | \& $agent\->form_number(\*(Aq3\*(Aq); |
612 | \& $agent\->field(\*(Aquser\*(Aq,$form\->{user}); |
613 | \& $agent\->field(\*(Aqpassword\*(Aq,$form\->{password}); |
614 | \& $agent\->submit(); |
615 | \& $agent\->get(\*(Aqhttp://www.livejournal.com/customview.cgi?user=\*(Aq.$form\->{user}.\*(Aq&styleid=225596&checkcookies=1\*(Aq); |
616 | \& print "Content\-type: text/plain\en\en"; |
617 | \& print $agent\->content(); |
618 | .Ve |
619 | .SS "Hacking Movable Type, by Dan Rinzel" |
620 | .IX Subsection "Hacking Movable Type, by Dan Rinzel" |
621 | .Vb 1 |
622 | \& use WWW::Mechanize; |
623 | \& |
624 | \& # a tool to automatically post entries to a moveable type weblog, and set arbitary creation dates |
625 | \& |
626 | \& my $mech = WWW::Mechanize\->new(); |
627 | \& my %entry; |
628 | \& $entry\->{title} = "Test AutoEntry Title"; |
629 | \& $entry\->{btext} = "Test AutoEntry Body"; |
630 | \& $entry\->{date} = \*(Aq2002\-04\-15 14:18:00\*(Aq; |
631 | \& my $start = qq|http://my.blog.site/mt.cgi|; |
632 | \& |
633 | \& $mech\->get($start); |
634 | \& $mech\->field(\*(Aqusername\*(Aq,\*(Aqund3f1n3d\*(Aq); |
635 | \& $mech\->field(\*(Aqpassword\*(Aq,\*(Aqobscur3d\*(Aq); |
636 | \& $mech\->submit(); # to get login cookie |
637 | \& $mech\->get(qq|$start?_\|_mode=view&_type=entry&blog_id=1|); |
638 | \& $mech\->form_name(\*(Aqentry_form\*(Aq); |
639 | \& $mech\->field(\*(Aqtitle\*(Aq,$entry\->{title}); |
640 | \& $mech\->field(\*(Aqcategory_id\*(Aq,1); # adjust as needed |
641 | \& $mech\->field(\*(Aqtext\*(Aq,$entry\->{btext}); |
642 | \& $mech\->field(\*(Aqstatus\*(Aq,2); # publish, or 1 = draft |
643 | \& $results = $mech\->submit(); |
644 | \& |
645 | \& # if we\*(Aqre ok with this entry being datestamped "NOW" (no {date} in %entry) |
646 | \& # we\*(Aqre done. Otherwise, time to be tricksy |
647 | \& # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler |
648 | \& # which takes the user to an editable version of the form where the create date can be edited |
649 | \& # MT date format of YYYY\-MM\-DD HH:MI:SS is the only one that won\*(Aqt error out |
650 | \& |
651 | \& if ($entry\->{date} && $entry\->{date} =~ /^\ed{4}\-\ed{2}\-\ed{2}\es+\ed{2}:\ed{2}:\ed{2}/) { |
652 | \& # travel the redirect |
653 | \& $results = $mech\->get($results\->{_headers}\->{location}); |
654 | \& $results\->{_content} =~ /<body onLoad="([^\e"]+)"/is; |
655 | \& my $js = $1; |
656 | \& $js =~ /\e\*(Aq([^\*(Aq]+)\e\*(Aq/; |
657 | \& $results = $mech\->get($start.$1); |
658 | \& $mech\->form_name(\*(Aqentry_form\*(Aq); |
659 | \& $mech\->field(\*(Aqcreated_on_manual\*(Aq,$entry\->{date}); |
660 | \& $mech\->submit(); |
661 | \& } |
662 | .Ve |
663 | .SS "get-despair, by Randal Schwartz" |
664 | .IX Subsection "get-despair, by Randal Schwartz" |
665 | Randal submitted this bot that walks the despair.com site sucking down |
666 | all the pictures. |
667 | .PP |
668 | .Vb 2 |
669 | \& use strict; |
670 | \& $|++; |
671 | \& |
672 | \& use WWW::Mechanize; |
673 | \& use File::Basename; |
674 | \& |
675 | \& my $m = WWW::Mechanize\->new; |
676 | \& |
677 | \& $m\->get("http://www.despair.com/indem.html"); |
678 | \& |
679 | \& my @top_links = @{$m\->links}; |
680 | \& |
681 | \& for my $top_link_num (0..$#top_links) { |
682 | \& next unless $top_links[$top_link_num][0] =~ /^http:/; |
683 | \& |
684 | \& $m\->follow_link( n=>$top_link_num ) or die "can\*(Aqt follow $top_link_num"; |
685 | \& |
686 | \& print $m\->uri, "\en"; |
687 | \& for my $image (grep m{^http://store4}, map $_\->[0], @{$m\->links}) { |
688 | \& my $local = basename $image; |
689 | \& print " $image...", $m\->mirror($image, $local)\->message, "\en" |
690 | \& } |
691 | \& |
692 | \& $m\->back or die "can\*(Aqt go back"; |
693 | \& } |
694 | .Ve |