Commit | Line | Data |
3fea05b9 |
1 | package LWP::Protocol::ftp; |
2 | |
3 | # Implementation of the ftp protocol (RFC 959). We let the Net::FTP |
4 | # package do all the dirty work. |
5 | |
6 | use Carp (); |
7 | |
8 | use HTTP::Status (); |
9 | use HTTP::Negotiate (); |
10 | use HTTP::Response (); |
11 | use LWP::MediaTypes (); |
12 | use File::Listing (); |
13 | |
14 | require LWP::Protocol; |
15 | @ISA = qw(LWP::Protocol); |
16 | |
17 | use strict; |
18 | eval { |
19 | package LWP::Protocol::MyFTP; |
20 | |
21 | require Net::FTP; |
22 | Net::FTP->require_version(2.00); |
23 | |
24 | use vars qw(@ISA); |
25 | @ISA=qw(Net::FTP); |
26 | |
27 | sub new { |
28 | my $class = shift; |
29 | |
30 | my $self = $class->SUPER::new(@_) || return undef; |
31 | |
32 | my $mess = $self->message; # welcome message |
33 | $mess =~ s|\n.*||s; # only first line left |
34 | $mess =~ s|\s*ready\.?$||; |
35 | # Make the version number more HTTP like |
36 | $mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||; |
37 | ${*$self}{myftp_server} = $mess; |
38 | #$response->header("Server", $mess); |
39 | |
40 | $self; |
41 | } |
42 | |
43 | sub http_server { |
44 | my $self = shift; |
45 | ${*$self}{myftp_server}; |
46 | } |
47 | |
48 | sub home { |
49 | my $self = shift; |
50 | my $old = ${*$self}{myftp_home}; |
51 | if (@_) { |
52 | ${*$self}{myftp_home} = shift; |
53 | } |
54 | $old; |
55 | } |
56 | |
57 | sub go_home { |
58 | my $self = shift; |
59 | $self->cwd(${*$self}{myftp_home}); |
60 | } |
61 | |
62 | sub request_count { |
63 | my $self = shift; |
64 | ++${*$self}{myftp_reqcount}; |
65 | } |
66 | |
67 | sub ping { |
68 | my $self = shift; |
69 | return $self->go_home; |
70 | } |
71 | |
72 | }; |
73 | my $init_failed = $@; |
74 | |
75 | |
76 | sub _connect { |
77 | my($self, $host, $port, $user, $account, $password, $timeout) = @_; |
78 | |
79 | my $key; |
80 | my $conn_cache = $self->{ua}{conn_cache}; |
81 | if ($conn_cache) { |
82 | $key = "$host:$port:$user"; |
83 | $key .= ":$account" if defined($account); |
84 | if (my $ftp = $conn_cache->withdraw("ftp", $key)) { |
85 | if ($ftp->ping) { |
86 | # save it again |
87 | $conn_cache->deposit("ftp", $key, $ftp); |
88 | return $ftp; |
89 | } |
90 | } |
91 | } |
92 | |
93 | # try to make a connection |
94 | my $ftp = LWP::Protocol::MyFTP->new($host, |
95 | Port => $port, |
96 | Timeout => $timeout, |
97 | LocalAddr => $self->{ua}{local_address}, |
98 | ); |
99 | # XXX Should be some what to pass on 'Passive' (header??) |
100 | unless ($ftp) { |
101 | $@ =~ s/^Net::FTP: //; |
102 | return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $@); |
103 | } |
104 | |
105 | unless ($ftp->login($user, $password, $account)) { |
106 | # Unauthorized. Let's fake a RC_UNAUTHORIZED response |
107 | my $mess = scalar($ftp->message); |
108 | $mess =~ s/\n$//; |
109 | my $res = HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, $mess); |
110 | $res->header("Server", $ftp->http_server); |
111 | $res->header("WWW-Authenticate", qq(Basic Realm="FTP login")); |
112 | return $res; |
113 | } |
114 | |
115 | my $home = $ftp->pwd; |
116 | $ftp->home($home); |
117 | |
118 | $conn_cache->deposit("ftp", $key, $ftp) if $conn_cache; |
119 | |
120 | return $ftp; |
121 | } |
122 | |
123 | |
124 | sub request |
125 | { |
126 | my($self, $request, $proxy, $arg, $size, $timeout) = @_; |
127 | |
128 | $size = 4096 unless $size; |
129 | |
130 | # check proxy |
131 | if (defined $proxy) |
132 | { |
133 | return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, |
134 | 'You can not proxy through the ftp'); |
135 | } |
136 | |
137 | my $url = $request->uri; |
138 | if ($url->scheme ne 'ftp') { |
139 | my $scheme = $url->scheme; |
140 | return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, |
141 | "LWP::Protocol::ftp::request called for '$scheme'"); |
142 | } |
143 | |
144 | # check method |
145 | my $method = $request->method; |
146 | |
147 | unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') { |
148 | return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, |
149 | 'Library does not allow method ' . |
150 | "$method for 'ftp:' URLs"); |
151 | } |
152 | |
153 | if ($init_failed) { |
154 | return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, |
155 | $init_failed); |
156 | } |
157 | |
158 | my $host = $url->host; |
159 | my $port = $url->port; |
160 | my $user = $url->user; |
161 | my $password = $url->password; |
162 | |
163 | # If a basic autorization header is present than we prefer these over |
164 | # the username/password specified in the URL. |
165 | { |
166 | my($u,$p) = $request->authorization_basic; |
167 | if (defined $u) { |
168 | $user = $u; |
169 | $password = $p; |
170 | } |
171 | } |
172 | |
173 | # We allow the account to be specified in the "Account" header |
174 | my $account = $request->header('Account'); |
175 | |
176 | my $ftp = $self->_connect($host, $port, $user, $account, $password, $timeout); |
177 | return $ftp if ref($ftp) eq "HTTP::Response"; # ugh! |
178 | |
179 | # Create an initial response object |
180 | my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK"); |
181 | $response->header(Server => $ftp->http_server); |
182 | $response->header('Client-Request-Num' => $ftp->request_count); |
183 | $response->request($request); |
184 | |
185 | # Get & fix the path |
186 | my @path = grep { length } $url->path_segments; |
187 | my $remote_file = pop(@path); |
188 | $remote_file = '' unless defined $remote_file; |
189 | |
190 | my $type; |
191 | if (ref $remote_file) { |
192 | my @params; |
193 | ($remote_file, @params) = @$remote_file; |
194 | for (@params) { |
195 | $type = $_ if s/^type=//; |
196 | } |
197 | } |
198 | |
199 | if ($type && $type eq 'a') { |
200 | $ftp->ascii; |
201 | } |
202 | else { |
203 | $ftp->binary; |
204 | } |
205 | |
206 | for (@path) { |
207 | unless ($ftp->cwd($_)) { |
208 | return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND, |
209 | "Can't chdir to $_"); |
210 | } |
211 | } |
212 | |
213 | if ($method eq 'GET' || $method eq 'HEAD') { |
214 | if (my $mod_time = $ftp->mdtm($remote_file)) { |
215 | $response->last_modified($mod_time); |
216 | if (my $ims = $request->if_modified_since) { |
217 | if ($mod_time <= $ims) { |
218 | $response->code(&HTTP::Status::RC_NOT_MODIFIED); |
219 | $response->message("Not modified"); |
220 | return $response; |
221 | } |
222 | } |
223 | } |
224 | |
225 | # We'll use this later to abort the transfer if necessary. |
226 | # if $max_size is defined, we need to abort early. Otherwise, it's |
227 | # a normal transfer |
228 | my $max_size = undef; |
229 | |
230 | # Set resume location, if the client requested it |
231 | if ($request->header('Range') && $ftp->supported('REST')) |
232 | { |
233 | my $range_info = $request->header('Range'); |
234 | |
235 | # Change bytes=2772992-6781209 to just 2772992 |
236 | my ($start_byte,$end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)?/; |
237 | if ( defined $start_byte && !defined $end_byte ) { |
238 | |
239 | # open range -- only the start is specified |
240 | |
241 | $ftp->restart( $start_byte ); |
242 | # don't define $max_size, we don't want to abort early |
243 | } |
244 | elsif ( defined $start_byte && defined $end_byte && |
245 | $start_byte >= 0 && $end_byte >= $start_byte ) { |
246 | |
247 | $ftp->restart( $start_byte ); |
248 | $max_size = $end_byte - $start_byte; |
249 | } |
250 | else { |
251 | |
252 | return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, |
253 | 'Incorrect syntax for Range request'); |
254 | } |
255 | } |
256 | elsif ($request->header('Range') && !$ftp->supported('REST')) |
257 | { |
258 | return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED, |
259 | "Server does not support resume."); |
260 | } |
261 | |
262 | my $data; # the data handle |
263 | if (length($remote_file) and $data = $ftp->retr($remote_file)) { |
264 | my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file); |
265 | $response->header('Content-Type', $type) if $type; |
266 | for (@enc) { |
267 | $response->push_header('Content-Encoding', $_); |
268 | } |
269 | my $mess = $ftp->message; |
270 | if ($mess =~ /\((\d+)\s+bytes\)/) { |
271 | $response->header('Content-Length', "$1"); |
272 | } |
273 | |
274 | if ($method ne 'HEAD') { |
275 | # Read data from server |
276 | $response = $self->collect($arg, $response, sub { |
277 | my $content = ''; |
278 | my $result = $data->read($content, $size); |
279 | |
280 | # Stop early if we need to. |
281 | if (defined $max_size) |
282 | { |
283 | # We need an interface to Net::FTP::dataconn for getting |
284 | # the number of bytes already read |
285 | my $bytes_received = $data->bytes_read(); |
286 | |
287 | # We were already over the limit. (Should only happen |
288 | # once at the end.) |
289 | if ($bytes_received - length($content) > $max_size) |
290 | { |
291 | $content = ''; |
292 | } |
293 | # We just went over the limit |
294 | elsif ($bytes_received > $max_size) |
295 | { |
296 | # Trim content |
297 | $content = substr($content, 0, |
298 | $max_size - ($bytes_received - length($content)) ); |
299 | } |
300 | # We're under the limit |
301 | else |
302 | { |
303 | } |
304 | } |
305 | |
306 | return \$content; |
307 | } ); |
308 | } |
309 | # abort is needed for HEAD, it's == close if the transfer has |
310 | # already completed. |
311 | unless ($data->abort) { |
312 | # Something did not work too well. Note that we treat |
313 | # responses to abort() with code 0 in case of HEAD as ok |
314 | # (at least wu-ftpd 2.6.1(1) does that). |
315 | if ($method ne 'HEAD' || $ftp->code != 0) { |
316 | $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR); |
317 | $response->message("FTP close response: " . $ftp->code . |
318 | " " . $ftp->message); |
319 | } |
320 | } |
321 | } |
322 | elsif (!length($remote_file) || ( $ftp->code >= 400 && $ftp->code < 600 )) { |
323 | # not a plain file, try to list instead |
324 | if (length($remote_file) && !$ftp->cwd($remote_file)) { |
325 | return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND, |
326 | "File '$remote_file' not found"); |
327 | } |
328 | |
329 | # It should now be safe to try to list the directory |
330 | my @lsl = $ftp->dir; |
331 | |
332 | # Try to figure out if the user want us to convert the |
333 | # directory listing to HTML. |
334 | my @variants = |
335 | ( |
336 | ['html', 0.60, 'text/html' ], |
337 | ['dir', 1.00, 'text/ftp-dir-listing' ] |
338 | ); |
339 | #$HTTP::Negotiate::DEBUG=1; |
340 | my $prefer = HTTP::Negotiate::choose(\@variants, $request); |
341 | |
342 | my $content = ''; |
343 | |
344 | if (!defined($prefer)) { |
345 | return HTTP::Response->new(&HTTP::Status::RC_NOT_ACCEPTABLE, |
346 | "Neither HTML nor directory listing wanted"); |
347 | } |
348 | elsif ($prefer eq 'html') { |
349 | $response->header('Content-Type' => 'text/html'); |
350 | $content = "<HEAD><TITLE>File Listing</TITLE>\n"; |
351 | my $base = $request->uri->clone; |
352 | my $path = $base->path; |
353 | $base->path("$path/") unless $path =~ m|/$|; |
354 | $content .= qq(<BASE HREF="$base">\n</HEAD>\n); |
355 | $content .= "<BODY>\n<UL>\n"; |
356 | for (File::Listing::parse_dir(\@lsl, 'GMT')) { |
357 | my($name, $type, $size, $mtime, $mode) = @$_; |
358 | $content .= qq( <LI> <a href="$name">$name</a>); |
359 | $content .= " $size bytes" if $type eq 'f'; |
360 | $content .= "\n"; |
361 | } |
362 | $content .= "</UL></body>\n"; |
363 | } |
364 | else { |
365 | $response->header('Content-Type', 'text/ftp-dir-listing'); |
366 | $content = join("\n", @lsl, ''); |
367 | } |
368 | |
369 | $response->header('Content-Length', length($content)); |
370 | |
371 | if ($method ne 'HEAD') { |
372 | $response = $self->collect_once($arg, $response, $content); |
373 | } |
374 | } |
375 | else { |
376 | my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, |
377 | "FTP return code " . $ftp->code); |
378 | $res->content_type("text/plain"); |
379 | $res->content($ftp->message); |
380 | return $res; |
381 | } |
382 | } |
383 | elsif ($method eq 'PUT') { |
384 | # method must be PUT |
385 | unless (length($remote_file)) { |
386 | return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, |
387 | "Must have a file name to PUT to"); |
388 | } |
389 | my $data; |
390 | if ($data = $ftp->stor($remote_file)) { |
391 | my $content = $request->content; |
392 | my $bytes = 0; |
393 | if (defined $content) { |
394 | if (ref($content) eq 'SCALAR') { |
395 | $bytes = $data->write($$content, length($$content)); |
396 | } |
397 | elsif (ref($content) eq 'CODE') { |
398 | my($buf, $n); |
399 | while (length($buf = &$content)) { |
400 | $n = $data->write($buf, length($buf)); |
401 | last unless $n; |
402 | $bytes += $n; |
403 | } |
404 | } |
405 | elsif (!ref($content)) { |
406 | if (defined $content && length($content)) { |
407 | $bytes = $data->write($content, length($content)); |
408 | } |
409 | } |
410 | else { |
411 | die "Bad content"; |
412 | } |
413 | } |
414 | $data->close; |
415 | |
416 | $response->code(&HTTP::Status::RC_CREATED); |
417 | $response->header('Content-Type', 'text/plain'); |
418 | $response->content("$bytes bytes stored as $remote_file on $host\n") |
419 | |
420 | } |
421 | else { |
422 | my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, |
423 | "FTP return code " . $ftp->code); |
424 | $res->content_type("text/plain"); |
425 | $res->content($ftp->message); |
426 | return $res; |
427 | } |
428 | } |
429 | else { |
430 | return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, |
431 | "Illegal method $method"); |
432 | } |
433 | |
434 | $response; |
435 | } |
436 | |
437 | 1; |
438 | |
439 | __END__ |
440 | |
441 | # This is what RFC 1738 has to say about FTP access: |
442 | # -------------------------------------------------- |
443 | # |
444 | # 3.2. FTP |
445 | # |
446 | # The FTP URL scheme is used to designate files and directories on |
447 | # Internet hosts accessible using the FTP protocol (RFC959). |
448 | # |
449 | # A FTP URL follow the syntax described in Section 3.1. If :<port> is |
450 | # omitted, the port defaults to 21. |
451 | # |
452 | # 3.2.1. FTP Name and Password |
453 | # |
454 | # A user name and password may be supplied; they are used in the ftp |
455 | # "USER" and "PASS" commands after first making the connection to the |
456 | # FTP server. If no user name or password is supplied and one is |
457 | # requested by the FTP server, the conventions for "anonymous" FTP are |
458 | # to be used, as follows: |
459 | # |
460 | # The user name "anonymous" is supplied. |
461 | # |
462 | # The password is supplied as the Internet e-mail address |
463 | # of the end user accessing the resource. |
464 | # |
465 | # If the URL supplies a user name but no password, and the remote |
466 | # server requests a password, the program interpreting the FTP URL |
467 | # should request one from the user. |
468 | # |
469 | # 3.2.2. FTP url-path |
470 | # |
471 | # The url-path of a FTP URL has the following syntax: |
472 | # |
473 | # <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode> |
474 | # |
475 | # Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings |
476 | # and <typecode> is one of the characters "a", "i", or "d". The part |
477 | # ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be |
478 | # empty. The whole url-path may be omitted, including the "/" |
479 | # delimiting it from the prefix containing user, password, host, and |
480 | # port. |
481 | # |
482 | # The url-path is interpreted as a series of FTP commands as follows: |
483 | # |
484 | # Each of the <cwd> elements is to be supplied, sequentially, as the |
485 | # argument to a CWD (change working directory) command. |
486 | # |
487 | # If the typecode is "d", perform a NLST (name list) command with |
488 | # <name> as the argument, and interpret the results as a file |
489 | # directory listing. |
490 | # |
491 | # Otherwise, perform a TYPE command with <typecode> as the argument, |
492 | # and then access the file whose name is <name> (for example, using |
493 | # the RETR command.) |
494 | # |
495 | # Within a name or CWD component, the characters "/" and ";" are |
496 | # reserved and must be encoded. The components are decoded prior to |
497 | # their use in the FTP protocol. In particular, if the appropriate FTP |
498 | # sequence to access a particular file requires supplying a string |
499 | # containing a "/" as an argument to a CWD or RETR command, it is |
500 | # necessary to encode each "/". |
501 | # |
502 | # For example, the URL <URL:ftp://myname@host.dom/%2Fetc/motd> is |
503 | # interpreted by FTP-ing to "host.dom", logging in as "myname" |
504 | # (prompting for a password if it is asked for), and then executing |
505 | # "CWD /etc" and then "RETR motd". This has a different meaning from |
506 | # <URL:ftp://myname@host.dom/etc/motd> which would "CWD etc" and then |
507 | # "RETR motd"; the initial "CWD" might be executed relative to the |
508 | # default directory for "myname". On the other hand, |
509 | # <URL:ftp://myname@host.dom//etc/motd>, would "CWD " with a null |
510 | # argument, then "CWD etc", and then "RETR motd". |
511 | # |
512 | # FTP URLs may also be used for other operations; for example, it is |
513 | # possible to update a file on a remote file server, or infer |
514 | # information about it from the directory listings. The mechanism for |
515 | # doing so is not spelled out here. |
516 | # |
517 | # 3.2.3. FTP Typecode is Optional |
518 | # |
519 | # The entire ;type=<typecode> part of a FTP URL is optional. If it is |
520 | # omitted, the client program interpreting the URL must guess the |
521 | # appropriate mode to use. In general, the data content type of a file |
522 | # can only be guessed from the name, e.g., from the suffix of the name; |
523 | # the appropriate type code to be used for transfer of the file can |
524 | # then be deduced from the data content of the file. |
525 | # |
526 | # 3.2.4 Hierarchy |
527 | # |
528 | # For some file systems, the "/" used to denote the hierarchical |
529 | # structure of the URL corresponds to the delimiter used to construct a |
530 | # file name hierarchy, and thus, the filename will look similar to the |
531 | # URL path. This does NOT mean that the URL is a Unix filename. |
532 | # |
533 | # 3.2.5. Optimization |
534 | # |
535 | # Clients accessing resources via FTP may employ additional heuristics |
536 | # to optimize the interaction. For some FTP servers, for example, it |
537 | # may be reasonable to keep the control connection open while accessing |
538 | # multiple URLs from the same server. However, there is no common |
539 | # hierarchical model to the FTP protocol, so if a directory change |
540 | # command has been given, it is impossible in general to deduce what |
541 | # sequence should be given to navigate to another directory for a |
542 | # second retrieval, if the paths are different. The only reliable |
543 | # algorithm is to disconnect and reestablish the control connection. |