1 .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.3
4 .\" ========================================================================
5 .de Sh \" Subsection heading
13 .de Sp \" Vertical space (when we can't use .PP)
17 .de Vb \" Begin verbatim text
22 .de Ve \" End verbatim text
26 .\" Set up some character translations and predefined strings. \*(-- will
27 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28 .\" double quote, and \*(R" will give a right double quote. | will give a
29 .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30 .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31 .\" expand to `' in nroff, nothing in troff, for use with C<>.
33 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
37 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
51 .\" If the F register is turned on, we'll generate index entries on stderr for
52 .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53 .\" entries marked with X<> in POD. Of course, you'll have to process the
54 .\" output yourself in some meaningful fashion.
57 . tm Index:\\$1\t\\n%\t"\\$2"
63 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
64 .\" way too many mistakes in technical documents.
68 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69 .\" Fear. Run. Save yourself. No user-serviceable parts.
70 . \" fudge factors for nroff and troff
79 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
85 . \" simple accents for nroff and troff
95 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
102 . \" troff and (daisy-wheel) nroff accents
103 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110 .ds ae a\h'-(\w'a'u*4/10)'e
111 .ds Ae A\h'-(\w'A'u*4/10)'E
112 . \" corrections for vroff
113 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115 . \" for low resolution devices (crt and lpr)
116 .if \n(.H>23 .if \n(.V>19 \
129 .\" ========================================================================
131 .IX Title "LWP::RobotUA 3"
132 .TH LWP::RobotUA 3 "2009-06-15" "perl v5.8.7" "User Contributed Perl Documentation"
134 LWP::RobotUA \- a class for well\-behaved Web robots
136 .IX Header "SYNOPSIS"
139 \& my $ua = LWP::RobotUA\->new('my\-robot/0.1', 'me@foo.com');
140 \& $ua\->delay(10); # be very nice \-\- max one hit every ten minutes!
145 \& # Then just use it just like a normal LWP::UserAgent:
146 \& my $response = $ua\->get('http://whatever.int/...');
150 .IX Header "DESCRIPTION"
151 This class implements a user agent that is suitable for robot
152 applications. Robots should be nice to the servers they visit. They
153 should consult the \fI/robots.txt\fR file to ensure that they are welcomed
154 and they should not make requests too frequently.
156 But before you consider writing a robot, take a look at
157 <URL:http://www.robotstxt.org/>.
159 When you use a \fILWP::RobotUA\fR object as your user agent, then you do not
160 really have to think about these things yourself; \f(CW\*(C`robots.txt\*(C'\fR files
161 are automatically consulted and obeyed, the server isn't queried
162 too rapidly, and so on. Just send requests
163 as you do when you are using a normal \fILWP::UserAgent\fR
164 object (using \f(CW\*(C`$ua\->get(...)\*(C'\fR, \f(CW\*(C`$ua\->head(...)\*(C'\fR,
165 \&\f(CW\*(C`$ua\->request(...)\*(C'\fR, etc.), and this
166 special agent will make sure you are nice.
169 The LWP::RobotUA is a sub-class of LWP::UserAgent and implements the
170 same methods. In addition the following methods are provided:
171 .ie n .IP "$ua = LWP::RobotUA\->new( %options )" 4
172 .el .IP "$ua = LWP::RobotUA\->new( \f(CW%options\fR )" 4
173 .IX Item "$ua = LWP::RobotUA->new( %options )"
175 .ie n .IP "$ua = LWP::RobotUA\->new( $agent\fR, \f(CW$from )" 4
176 .el .IP "$ua = LWP::RobotUA\->new( \f(CW$agent\fR, \f(CW$from\fR )" 4
177 .IX Item "$ua = LWP::RobotUA->new( $agent, $from )"
178 .ie n .IP "$ua = LWP::RobotUA\->new( $agent\fR, \f(CW$from\fR, \f(CW$rules )" 4
179 .el .IP "$ua = LWP::RobotUA\->new( \f(CW$agent\fR, \f(CW$from\fR, \f(CW$rules\fR )" 4
180 .IX Item "$ua = LWP::RobotUA->new( $agent, $from, $rules )"
182 The LWP::UserAgent options \f(CW\*(C`agent\*(C'\fR and \f(CW\*(C`from\*(C'\fR are mandatory. The
183 options \f(CW\*(C`delay\*(C'\fR, \f(CW\*(C`use_sleep\*(C'\fR and \f(CW\*(C`rules\*(C'\fR initialize attributes
184 private to the RobotUA. If \f(CW\*(C`rules\*(C'\fR are not provided, then
185 \&\f(CW\*(C`WWW::RobotRules\*(C'\fR is instantiated providing an internal database of
188 It is also possible to just pass the value of \f(CW\*(C`agent\*(C'\fR, \f(CW\*(C`from\*(C'\fR and
189 optionally \f(CW\*(C`rules\*(C'\fR as plain positional arguments.
191 .IX Item "$ua->delay"
193 .ie n .IP "$ua\->delay( $minutes )" 4
194 .el .IP "$ua\->delay( \f(CW$minutes\fR )" 4
195 .IX Item "$ua->delay( $minutes )"
197 Get/set the minimum delay between requests to the same server, in
198 \&\fIminutes\fR. The default is 1 minute. Note that this number doesn't
199 have to be an integer; for example, this sets the delay to 10 seconds:
202 \& $ua\->delay(10/60);
204 .IP "$ua\->use_sleep" 4
205 .IX Item "$ua->use_sleep"
207 .ie n .IP "$ua\->use_sleep( $boolean )" 4
208 .el .IP "$ua\->use_sleep( \f(CW$boolean\fR )" 4
209 .IX Item "$ua->use_sleep( $boolean )"
211 Get/set a value indicating whether the \s-1UA\s0 should \fIsleep()\fR if requests
212 arrive too fast, defined as \f(CW$ua\fR\->delay minutes not passed since
213 last request to the given server. The default is \s-1TRUE\s0. If this value is
214 \&\s-1FALSE\s0 then an internal \s-1SERVICE_UNAVAILABLE\s0 response will be generated.
215 It will have an Retry-After header that indicates when it is \s-1OK\s0 to
216 send another request to this server.
218 .IX Item "$ua->rules"
220 .ie n .IP "$ua\->rules( $rules )" 4
221 .el .IP "$ua\->rules( \f(CW$rules\fR )" 4
222 .IX Item "$ua->rules( $rules )"
224 Set/get which \fIWWW::RobotRules\fR object to use.
225 .ie n .IP "$ua\->no_visits( $netloc )" 4
226 .el .IP "$ua\->no_visits( \f(CW$netloc\fR )" 4
227 .IX Item "$ua->no_visits( $netloc )"
228 Returns the number of documents fetched from this server host. Yeah I
229 know, this method should probably have been named \fInum_visits()\fR or
230 something like that. :\-(
231 .ie n .IP "$ua\->host_wait( $netloc )" 4
232 .el .IP "$ua\->host_wait( \f(CW$netloc\fR )" 4
233 .IX Item "$ua->host_wait( $netloc )"
234 Returns the number of \fIseconds\fR (from now) you must wait before you can
235 make a new request to this host.
236 .IP "$ua\->as_string" 4
237 .IX Item "$ua->as_string"
238 Returns a string that describes the state of the \s-1UA\s0.
239 Mainly useful for debugging.
241 .IX Header "SEE ALSO"
242 LWP::UserAgent, WWW::RobotRules
244 .IX Header "COPYRIGHT"
245 Copyright 1996\-2004 Gisle Aas.
247 This library is free software; you can redistribute it and/or
248 modify it under the same terms as Perl itself.