Commit | Line | Data |
3fea05b9 |
1 | package LWP::Protocol::http10; |
2 | |
3 | use strict; |
4 | |
5 | require HTTP::Response; |
6 | require HTTP::Status; |
7 | require IO::Socket; |
8 | require IO::Select; |
9 | |
10 | use vars qw(@ISA @EXTRA_SOCK_OPTS); |
11 | |
12 | require LWP::Protocol; |
13 | @ISA = qw(LWP::Protocol); |
14 | |
15 | my $CRLF = "\015\012"; # how lines should be terminated; |
16 | # "\r\n" is not correct on all systems, for |
17 | # instance MacPerl defines it to "\012\015" |
18 | |
19 | sub _new_socket |
20 | { |
21 | my($self, $host, $port, $timeout) = @_; |
22 | |
23 | local($^W) = 0; # IO::Socket::INET can be noisy |
24 | my $sock = IO::Socket::INET->new(PeerAddr => $host, |
25 | PeerPort => $port, |
26 | Proto => 'tcp', |
27 | Timeout => $timeout, |
28 | $self->_extra_sock_opts($host, $port), |
29 | ); |
30 | unless ($sock) { |
31 | # IO::Socket::INET leaves additional error messages in $@ |
32 | $@ =~ s/^.*?: //; |
33 | die "Can't connect to $host:$port ($@)"; |
34 | } |
35 | $sock; |
36 | } |
37 | |
38 | sub _extra_sock_opts # to be overridden by subclass |
39 | { |
40 | return @EXTRA_SOCK_OPTS; |
41 | } |
42 | |
43 | |
44 | sub _check_sock |
45 | { |
46 | #my($self, $req, $sock) = @_; |
47 | } |
48 | |
49 | sub _get_sock_info |
50 | { |
51 | my($self, $res, $sock) = @_; |
52 | if (defined(my $peerhost = $sock->peerhost)) { |
53 | $res->header("Client-Peer" => "$peerhost:" . $sock->peerport); |
54 | } |
55 | } |
56 | |
57 | sub _fixup_header |
58 | { |
59 | my($self, $h, $url, $proxy) = @_; |
60 | |
61 | $h->remove_header('Connection'); # need support here to be useful |
62 | |
63 | # HTTP/1.1 will require us to send the 'Host' header, so we might |
64 | # as well start now. |
65 | my $hhost = $url->authority; |
66 | if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@" |
67 | # add authorization header if we need them. HTTP URLs do |
68 | # not really support specification of user and password, but |
69 | # we allow it. |
70 | if (defined($1) && not $h->header('Authorization')) { |
71 | require URI::Escape; |
72 | $h->authorization_basic(map URI::Escape::uri_unescape($_), |
73 | split(":", $1, 2)); |
74 | } |
75 | } |
76 | $h->init_header('Host' => $hhost); |
77 | |
78 | if ($proxy) { |
79 | # Check the proxy URI's userinfo() for proxy credentials |
80 | # export http_proxy="http://proxyuser:proxypass@proxyhost:port" |
81 | my $p_auth = $proxy->userinfo(); |
82 | if(defined $p_auth) { |
83 | require URI::Escape; |
84 | $h->proxy_authorization_basic(map URI::Escape::uri_unescape($_), |
85 | split(":", $p_auth, 2)) |
86 | } |
87 | } |
88 | } |
89 | |
90 | |
91 | sub request |
92 | { |
93 | my($self, $request, $proxy, $arg, $size, $timeout) = @_; |
94 | |
95 | $size ||= 4096; |
96 | |
97 | # check method |
98 | my $method = $request->method; |
99 | unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token |
100 | return new HTTP::Response &HTTP::Status::RC_BAD_REQUEST, |
101 | 'Library does not allow method ' . |
102 | "$method for 'http:' URLs"; |
103 | } |
104 | |
105 | my $url = $request->uri; |
106 | my($host, $port, $fullpath); |
107 | |
108 | # Check if we're proxy'ing |
109 | if (defined $proxy) { |
110 | # $proxy is an URL to an HTTP server which will proxy this request |
111 | $host = $proxy->host; |
112 | $port = $proxy->port; |
113 | $fullpath = $method eq "CONNECT" ? |
114 | ($url->host . ":" . $url->port) : |
115 | $url->as_string; |
116 | } |
117 | else { |
118 | $host = $url->host; |
119 | $port = $url->port; |
120 | $fullpath = $url->path_query; |
121 | $fullpath = "/" unless length $fullpath; |
122 | } |
123 | |
124 | # connect to remote site |
125 | my $socket = $self->_new_socket($host, $port, $timeout); |
126 | $self->_check_sock($request, $socket); |
127 | |
128 | my $sel = IO::Select->new($socket) if $timeout; |
129 | |
130 | my $request_line = "$method $fullpath HTTP/1.0$CRLF"; |
131 | |
132 | my $h = $request->headers->clone; |
133 | my $cont_ref = $request->content_ref; |
134 | $cont_ref = $$cont_ref if ref($$cont_ref); |
135 | my $ctype = ref($cont_ref); |
136 | |
137 | # If we're sending content we *have* to specify a content length |
138 | # otherwise the server won't know a messagebody is coming. |
139 | if ($ctype eq 'CODE') { |
140 | die 'No Content-Length header for request with dynamic content' |
141 | unless defined($h->header('Content-Length')) || |
142 | $h->content_type =~ /^multipart\//; |
143 | # For HTTP/1.1 we could have used chunked transfer encoding... |
144 | } |
145 | else { |
146 | $h->header('Content-Length' => length $$cont_ref) |
147 | if defined($$cont_ref) && length($$cont_ref); |
148 | } |
149 | |
150 | $self->_fixup_header($h, $url, $proxy); |
151 | |
152 | my $buf = $request_line . $h->as_string($CRLF) . $CRLF; |
153 | my $n; # used for return value from syswrite/sysread |
154 | my $length; |
155 | my $offset; |
156 | |
157 | # syswrite $buf |
158 | $length = length($buf); |
159 | $offset = 0; |
160 | while ( $offset < $length ) { |
161 | die "write timeout" if $timeout && !$sel->can_write($timeout); |
162 | $n = $socket->syswrite($buf, $length-$offset, $offset ); |
163 | die $! unless defined($n); |
164 | $offset += $n; |
165 | } |
166 | |
167 | if ($ctype eq 'CODE') { |
168 | while ( ($buf = &$cont_ref()), defined($buf) && length($buf)) { |
169 | # syswrite $buf |
170 | $length = length($buf); |
171 | $offset = 0; |
172 | while ( $offset < $length ) { |
173 | die "write timeout" if $timeout && !$sel->can_write($timeout); |
174 | $n = $socket->syswrite($buf, $length-$offset, $offset ); |
175 | die $! unless defined($n); |
176 | $offset += $n; |
177 | } |
178 | } |
179 | } |
180 | elsif (defined($$cont_ref) && length($$cont_ref)) { |
181 | # syswrite $$cont_ref |
182 | $length = length($$cont_ref); |
183 | $offset = 0; |
184 | while ( $offset < $length ) { |
185 | die "write timeout" if $timeout && !$sel->can_write($timeout); |
186 | $n = $socket->syswrite($$cont_ref, $length-$offset, $offset ); |
187 | die $! unless defined($n); |
188 | $offset += $n; |
189 | } |
190 | } |
191 | |
192 | # read response line from server |
193 | my $response; |
194 | $buf = ''; |
195 | |
196 | # Inside this loop we will read the response line and all headers |
197 | # found in the response. |
198 | while (1) { |
199 | die "read timeout" if $timeout && !$sel->can_read($timeout); |
200 | $n = $socket->sysread($buf, $size, length($buf)); |
201 | die $! unless defined($n); |
202 | die "unexpected EOF before status line seen" unless $n; |
203 | |
204 | if ($buf =~ s/^(HTTP\/\d+\.\d+)[ \t]+(\d+)[ \t]*([^\012]*)\012//) { |
205 | # HTTP/1.0 response or better |
206 | my($ver,$code,$msg) = ($1, $2, $3); |
207 | $msg =~ s/\015$//; |
208 | $response = HTTP::Response->new($code, $msg); |
209 | $response->protocol($ver); |
210 | |
211 | # ensure that we have read all headers. The headers will be |
212 | # terminated by two blank lines |
213 | until ($buf =~ /^\015?\012/ || $buf =~ /\015?\012\015?\012/) { |
214 | # must read more if we can... |
215 | die "read timeout" if $timeout && !$sel->can_read($timeout); |
216 | my $old_len = length($buf); |
217 | $n = $socket->sysread($buf, $size, $old_len); |
218 | die $! unless defined($n); |
219 | die "unexpected EOF before all headers seen" unless $n; |
220 | } |
221 | |
222 | # now we start parsing the headers. The strategy is to |
223 | # remove one line at a time from the beginning of the header |
224 | # buffer ($res). |
225 | my($key, $val); |
226 | while ($buf =~ s/([^\012]*)\012//) { |
227 | my $line = $1; |
228 | |
229 | # if we need to restore as content when illegal headers |
230 | # are found. |
231 | my $save = "$line\012"; |
232 | |
233 | $line =~ s/\015$//; |
234 | last unless length $line; |
235 | |
236 | if ($line =~ /^([a-zA-Z0-9_\-.]+)\s*:\s*(.*)/) { |
237 | $response->push_header($key, $val) if $key; |
238 | ($key, $val) = ($1, $2); |
239 | } |
240 | elsif ($line =~ /^\s+(.*)/ && $key) { |
241 | $val .= " $1"; |
242 | } |
243 | else { |
244 | $response->push_header("Client-Bad-Header-Line" => $line); |
245 | } |
246 | } |
247 | $response->push_header($key, $val) if $key; |
248 | last; |
249 | |
250 | } |
251 | elsif ((length($buf) >= 5 and $buf !~ /^HTTP\//) or |
252 | $buf =~ /\012/ ) { |
253 | # HTTP/0.9 or worse |
254 | $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK"); |
255 | $response->protocol('HTTP/0.9'); |
256 | last; |
257 | |
258 | } |
259 | else { |
260 | # need more data |
261 | } |
262 | }; |
263 | $response->request($request); |
264 | $self->_get_sock_info($response, $socket); |
265 | |
266 | if ($method eq "CONNECT") { |
267 | $response->{client_socket} = $socket; # so it can be picked up |
268 | $response->content($buf); # in case we read more than the headers |
269 | return $response; |
270 | } |
271 | |
272 | my $usebuf = length($buf) > 0; |
273 | $response = $self->collect($arg, $response, sub { |
274 | if ($usebuf) { |
275 | $usebuf = 0; |
276 | return \$buf; |
277 | } |
278 | die "read timeout" if $timeout && !$sel->can_read($timeout); |
279 | my $n = $socket->sysread($buf, $size); |
280 | die $! unless defined($n); |
281 | return \$buf; |
282 | } ); |
283 | |
284 | #$socket->close; |
285 | |
286 | $response; |
287 | } |
288 | |
289 | 1; |