6 use vars qw($VERSION $TRANSLATE_UNDERSCORE);
9 # The $TRANSLATE_UNDERSCORE variable controls whether '_' can be used
10 # as a replacement for '-' in header field names.
11 $TRANSLATE_UNDERSCORE = 1 unless defined $TRANSLATE_UNDERSCORE;
13 # "Good Practice" order of HTTP message headers:
19 my @general_headers = qw(
20 Cache-Control Connection Date Pragma Trailer Transfer-Encoding Upgrade
24 my @request_headers = qw(
25 Accept Accept-Charset Accept-Encoding Accept-Language
26 Authorization Expect From Host
27 If-Match If-Modified-Since If-None-Match If-Range If-Unmodified-Since
28 Max-Forwards Proxy-Authorization Range Referer TE User-Agent
31 my @response_headers = qw(
32 Accept-Ranges Age ETag Location Proxy-Authenticate Retry-After Server
36 my @entity_headers = qw(
37 Allow Content-Encoding Content-Language Content-Length Content-Location
38 Content-MD5 Content-Range Content-Type Expires Last-Modified
41 my %entity_header = map { lc($_) => 1 } @entity_headers;
50 # Make alternative representations of @header_order. This is used
51 # for sorting and case matching.
59 $header_order{$lc} = ++$i;
60 $standard_case{$lc} = $_;
69 my $self = bless {}, $class;
70 $self->header(@_) if @_; # set up initial headers
78 Carp::croak('Usage: $h->header($field, ...)') unless @_;
83 my $op = @_ ? ($seen{lc($field)}++ ? 'PUSH' : 'SET') : 'GET';
84 @old = $self->_header($field, shift, $op);
86 return @old if wantarray;
87 return $old[0] if @old <= 1;
101 return $self->_header(@_, 'PUSH_H') if @_ == 2;
103 $self->_header(splice(@_, 0, 2), 'PUSH_H');
110 Carp::croak('Usage: $h->init_header($field, $val)') if @_ != 3;
111 shift->_header(@_, 'INIT');
117 my($self, @fields) = @_;
120 foreach $field (@fields) {
121 $field =~ tr/_/-/ if $field !~ /^:/ && $TRANSLATE_UNDERSCORE;
122 my $v = delete $self->{lc $field};
123 push(@values, ref($v) eq 'ARRAY' ? @$v : $v) if defined $v;
128 sub remove_content_headers
131 unless (defined(wantarray)) {
132 # fast branch that does not create return object
133 delete @$self{grep $entity_header{$_} || /^content-/, keys %$self};
137 my $c = ref($self)->new;
138 for my $f (grep $entity_header{$_} || /^content-/, keys %$self) {
139 $c->{$f} = delete $self->{$f};
147 my($self, $field, $val, $op) = @_;
149 unless ($field =~ /^:/) {
150 $field =~ tr/_/-/ if $TRANSLATE_UNDERSCORE;
153 unless(defined $standard_case{$field}) {
154 # generate a %standard_case entry for this field
155 $old =~ s/\b(\w)/\u$1/g;
156 $standard_case{$field} = $old;
160 $op ||= defined($val) ? 'SET' : 'GET';
161 if ($op eq 'PUSH_H') {
162 # Like PUSH but where we don't care about the return value
163 if (exists $self->{$field}) {
164 my $h = $self->{$field};
165 if (ref($h) eq 'ARRAY') {
166 push(@$h, ref($val) eq "ARRAY" ? @$val : $val);
169 $self->{$field} = [$h, ref($val) eq "ARRAY" ? @$val : $val]
173 $self->{$field} = $val;
177 my $h = $self->{$field};
178 my @old = ref($h) eq 'ARRAY' ? @$h : (defined($h) ? ($h) : ());
180 unless ($op eq 'GET' || ($op eq 'INIT' && @old)) {
182 my @new = ($op eq 'PUSH') ? @old : ();
183 if (ref($val) ne 'ARRAY') {
189 $self->{$field} = @new > 1 ? \@new : $new[0];
191 elsif ($op ne 'PUSH') {
192 delete $self->{$field};
199 sub _sorted_field_names
203 ($header_order{$a} || 999) <=> ($header_order{$b} || 999) ||
209 sub header_field_names {
211 return map $standard_case{$_} || $_, $self->_sorted_field_names
219 my($self, $sub) = @_;
221 foreach $key ($self->_sorted_field_names) {
222 next if $key =~ /^_/;
223 my $vals = $self->{$key};
224 if (ref($vals) eq 'ARRAY') {
227 &$sub($standard_case{$key} || $key, $val);
231 &$sub($standard_case{$key} || $key, $vals);
239 my($self, $endl) = @_;
240 $endl = "\n" unless defined $endl;
244 my($field, $val) = @_;
247 # must handle header values with embedded newlines with care
248 $val =~ s/\s+$//; # trailing newlines and space must go
249 $val =~ s/\n\n+/\n/g; # no empty lines
250 $val =~ s/\n([^\040\t])/\n $1/g; # intial space for continuation
251 $val =~ s/\n/$endl/g; # substitute with requested line ending
253 push(@result, "$field: $val");
256 join($endl, @result, '');
260 if (eval { require Storable; 1 }) {
261 *clone = \&Storable::dclone;
265 my $clone = new HTTP::Headers;
266 $self->scan(sub { $clone->push_header(@_);} );
275 my($self, $header, $time) = @_;
276 my($old) = $self->_header($header);
278 $self->_header($header, HTTP::Date::time2str($time));
280 $old =~ s/;.*// if defined($old);
281 HTTP::Date::str2time($old);
285 sub date { shift->_date_header('Date', @_); }
286 sub expires { shift->_date_header('Expires', @_); }
287 sub if_modified_since { shift->_date_header('If-Modified-Since', @_); }
288 sub if_unmodified_since { shift->_date_header('If-Unmodified-Since', @_); }
289 sub last_modified { shift->_date_header('Last-Modified', @_); }
291 # This is used as a private LWP extension. The Client-Date header is
292 # added as a timestamp to a response when it has been received.
293 sub client_date { shift->_date_header('Client-Date', @_); }
295 # The retry_after field is dual format (can also be a expressed as
296 # number of seconds from now), so we don't provide an easy way to
297 # access it until we have know how both these interfaces can be
298 # addressed. One possibility is to return a negative value for
299 # relative seconds and a positive value for epoch based time values.
300 #sub retry_after { shift->_date_header('Retry-After', @_); }
304 my $ct = $self->{'content-type'};
305 $self->{'content-type'} = shift if @_;
306 $ct = $ct->[0] if ref($ct) eq 'ARRAY';
307 return '' unless defined($ct) && length($ct);
308 my @ct = split(/;\s*/, $ct, 2);
313 wantarray ? @ct : $ct[0];
316 sub content_type_charset {
318 require HTTP::Headers::Util;
319 my $h = $self->{'content-type'};
320 $h = $h->[0] if ref($h);
321 $h = "" unless defined $h;
322 my @v = HTTP::Headers::Util::split_header_words($h);
324 my($ct, undef, %ct_param) = @{$v[0]};
325 my $charset = $ct_param{charset};
331 $charset = uc($charset);
332 $charset =~ s/^\s+//; $charset =~ s/\s+\z//;
333 undef($charset) if $charset eq "";
335 return $ct, $charset if wantarray;
338 return undef, undef if wantarray;
342 sub content_is_text {
344 return $self->content_type =~ m,^text/,;
347 sub content_is_html {
349 return $self->content_type eq 'text/html' || $self->content_is_xhtml;
352 sub content_is_xhtml {
353 my $ct = shift->content_type;
354 return $ct eq "application/xhtml+xml" ||
355 $ct eq "application/vnd.wap.xhtml+xml";
359 my $ct = shift->content_type;
360 return 1 if $ct eq "text/xml";
361 return 1 if $ct eq "application/xml";
362 return 1 if $ct =~ /\+xml$/;
368 if (@_ && $_[0] =~ /#/) {
369 # Strip fragment per RFC 2616, section 14.36.
373 $uri->fragment(undef);
380 ($self->_header('Referer', @_))[0];
382 *referrer = \&referer; # on tchrist's request
384 sub title { (shift->_header('Title', @_))[0] }
385 sub content_encoding { (shift->_header('Content-Encoding', @_))[0] }
386 sub content_language { (shift->_header('Content-Language', @_))[0] }
387 sub content_length { (shift->_header('Content-Length', @_))[0] }
389 sub user_agent { (shift->_header('User-Agent', @_))[0] }
390 sub server { (shift->_header('Server', @_))[0] }
392 sub from { (shift->_header('From', @_))[0] }
393 sub warning { (shift->_header('Warning', @_))[0] }
395 sub www_authenticate { (shift->_header('WWW-Authenticate', @_))[0] }
396 sub authorization { (shift->_header('Authorization', @_))[0] }
398 sub proxy_authenticate { (shift->_header('Proxy-Authenticate', @_))[0] }
399 sub proxy_authorization { (shift->_header('Proxy-Authorization', @_))[0] }
401 sub authorization_basic { shift->_basic_auth("Authorization", @_) }
402 sub proxy_authorization_basic { shift->_basic_auth("Proxy-Authorization", @_) }
405 require MIME::Base64;
406 my($self, $h, $user, $passwd) = @_;
407 my($old) = $self->_header($h);
409 Carp::croak("Basic authorization user name can't contain ':'")
411 $passwd = '' unless defined $passwd;
412 $self->_header($h => 'Basic ' .
413 MIME::Base64::encode("$user:$passwd", ''));
415 if (defined $old && $old =~ s/^\s*Basic\s+//) {
416 my $val = MIME::Base64::decode($old);
417 return $val unless wantarray;
418 return split(/:/, $val, 2);
430 HTTP::Headers - Class encapsulating HTTP Message headers
434 require HTTP::Headers;
435 $h = HTTP::Headers->new;
437 $h->header('Content-Type' => 'text/plain'); # set
438 $ct = $h->header('Content-Type'); # get
439 $h->remove_header('Content-Type'); # delete
443 The C<HTTP::Headers> class encapsulates HTTP-style message headers.
444 The headers consist of attribute-value pairs also called fields, which
445 may be repeated, and which are printed in a particular order. The
446 field names are cases insensitive.
448 Instances of this class are usually created as member variables of the
449 C<HTTP::Request> and C<HTTP::Response> classes, internal to the
452 The following methods are available:
456 =item $h = HTTP::Headers->new
458 Constructs a new C<HTTP::Headers> object. You might pass some initial
459 attribute-value pairs as parameters to the constructor. I<E.g.>:
461 $h = HTTP::Headers->new(
462 Date => 'Thu, 03 Feb 1994 00:00:00 GMT',
463 Content_Type => 'text/html; version=3.2',
464 Content_Base => 'http://www.perl.org/');
466 The constructor arguments are passed to the C<header> method which is
471 Returns a copy of this C<HTTP::Headers> object.
473 =item $h->header( $field )
475 =item $h->header( $field => $value )
477 =item $h->header( $f1 => $v1, $f2 => $v2, ... )
479 Get or set the value of one or more header fields. The header field
480 name ($field) is not case sensitive. To make the life easier for perl
481 users who wants to avoid quoting before the => operator, you can use
482 '_' as a replacement for '-' in header names.
484 The header() method accepts multiple ($field => $value) pairs, which
485 means that you can update several fields with a single invocation.
487 The $value argument may be a plain string or a reference to an array
488 of strings for a multi-valued field. If the $value is provided as
489 C<undef> then the field is removed. If the $value is not given, then
490 that header field will remain unchanged.
492 The old value (or values) of the last of the header fields is returned.
493 If no such field exists C<undef> will be returned.
495 A multi-valued field will be returned as separate values in list
496 context and will be concatenated with ", " as separator in scalar
497 context. The HTTP spec (RFC 2616) promise that joining multiple
498 values in this way will not change the semantic of a header field, but
499 in practice there are cases like old-style Netscape cookies (see
500 L<HTTP::Cookies>) where "," is used as part of the syntax of a single
505 $header->header(MIME_Version => '1.0',
506 User_Agent => 'My-Web-Client/0.01');
507 $header->header(Accept => "text/html, text/plain, image/*");
508 $header->header(Accept => [qw(text/html text/plain image/*)]);
509 @accepts = $header->header('Accept'); # get multiple values
510 $accepts = $header->header('Accept'); # get values as a single string
512 =item $h->push_header( $field => $value )
514 =item $h->push_header( $f1 => $v1, $f2 => $v2, ... )
516 Add a new field value for the specified header field. Previous values
517 for the same field are retained.
519 As for the header() method, the field name ($field) is not case
520 sensitive and '_' can be used as a replacement for '-'.
522 The $value argument may be a scalar or a reference to a list of
525 $header->push_header(Accept => 'image/jpeg');
526 $header->push_header(Accept => [map "image/$_", qw(gif png tiff)]);
528 =item $h->init_header( $field => $value )
530 Set the specified header to the given value, but only if no previous
531 value for that field is set.
533 The header field name ($field) is not case sensitive and '_'
534 can be used as a replacement for '-'.
536 The $value argument may be a scalar or a reference to a list of
539 =item $h->remove_header( $field, ... )
541 This function removes the header fields with the specified names.
543 The header field names ($field) are not case sensitive and '_'
544 can be used as a replacement for '-'.
546 The return value is the values of the fields removed. In scalar
547 context the number of fields removed is returned.
549 Note that if you pass in multiple field names then it is generally not
550 possible to tell which of the returned values belonged to which field.
552 =item $h->remove_content_headers
554 This will remove all the header fields used to describe the content of
555 a message. All header field names prefixed with C<Content-> falls
556 into this category, as well as C<Allow>, C<Expires> and
557 C<Last-Modified>. RFC 2616 denote these fields as I<Entity Header
560 The return value is a new C<HTTP::Headers> object that contains the
561 removed headers only.
565 This will remove all header fields.
567 =item $h->header_field_names
569 Returns the list of distinct names for the fields present in the
570 header. The field names have case as suggested by HTTP spec, and the
571 names are returned in the recommended "Good Practice" order.
573 In scalar context return the number of distinct field names.
575 =item $h->scan( \&process_header_field )
577 Apply a subroutine to each header field in turn. The callback routine
578 is called with two parameters; the name of the field and a single
579 value (a string). If a header field is multi-valued, then the
580 routine is called once for each value. The field name passed to the
581 callback routine has case as suggested by HTTP spec, and the headers
582 will be visited in the recommended "Good Practice" order.
584 Any return values of the callback routine are ignored. The loop can
585 be broken by raising an exception (C<die>), but the caller of scan()
586 would have to trap the exception itself.
590 =item $h->as_string( $eol )
592 Return the header fields as a formatted MIME header. Since it
593 internally uses the C<scan> method to build the string, the result
594 will use case as suggested by HTTP spec, and it will follow
595 recommended "Good Practice" of ordering the header fields. Long header
596 values are not folded.
598 The optional $eol parameter specifies the line ending sequence to
599 use. The default is "\n". Embedded "\n" characters in header field
600 values will be substituted with this line ending sequence.
604 =head1 CONVENIENCE METHODS
606 The most frequently used headers can also be accessed through the
607 following convenience Methods. Most of these methods can both be used to read
608 and to set the value of a header. The header value is set if you pass
609 an argument to the method. The old header value is always returned.
610 If the given header did not exist then C<undef> is returned.
612 Methods that deal with dates/times always convert their value to system
613 time (seconds since Jan 1, 1970) and they also expect this kind of
614 value when the header value is set.
620 This header represents the date and time at which the message was
623 $h->date(time); # set current date
627 This header gives the date and time after which the entity should be
630 =item $h->if_modified_since
632 =item $h->if_unmodified_since
634 These header fields are used to make a request conditional. If the requested
635 resource has (or has not) been modified since the time specified in this field,
636 then the server will return a C<304 Not Modified> response instead of
639 =item $h->last_modified
641 This header indicates the date and time at which the resource was last
644 # check if document is more than 1 hour old
645 if (my $last_mod = $h->last_modified) {
646 if ($last_mod < time - 60*60) {
651 =item $h->content_type
653 The Content-Type header field indicates the media type of the message
656 $h->content_type('text/html');
658 The value returned will be converted to lower case, and potential
659 parameters will be chopped off and returned as a separate value if in
660 an array context. If there is no such header field, then the empty
661 string is returned. This makes it safe to do the following:
663 if ($h->content_type eq 'text/html') {
664 # we enter this place even if the real header value happens to
665 # be 'TEXT/HTML; version=3.0'
669 =item $h->content_type_charset
671 Returns the upper-cased charset specified in the Content-Type header. In list
672 context return the lower-cased bare content type followed by the upper-cased
673 charset. Both values will be C<undef> if not specified in the header.
675 =item $h->content_is_text
677 Returns TRUE if the Content-Type header field indicate that the
680 =item $h->content_is_html
682 Returns TRUE if the Content-Type header field indicate that the
683 content is some kind of HTML (including XHTML). This method can't be
684 used to set Content-Type.
686 =item $h->content_is_xhtml
688 Returns TRUE if the Content-Type header field indicate that the
689 content is XHTML. This method can't be used to set Content-Type.
691 =item $h->content_is_xml
693 Returns TRUE if the Content-Type header field indicate that the
694 content is XML. This method can't be used to set Content-Type.
696 =item $h->content_encoding
698 The Content-Encoding header field is used as a modifier to the
699 media type. When present, its value indicates what additional
700 encoding mechanism has been applied to the resource.
702 =item $h->content_length
704 A decimal number indicating the size in bytes of the message content.
706 =item $h->content_language
708 The natural language(s) of the intended audience for the message
709 content. The value is one or more language tags as defined by RFC
710 1766. Eg. "no" for some kind of Norwegian and "en-US" for English the
711 way it is written in the US.
715 The title of the document. In libwww-perl this header will be
716 initialized automatically from the E<lt>TITLE>...E<lt>/TITLE> element
717 of HTML documents. I<This header is no longer part of the HTTP
722 This header field is used in request messages and contains information
723 about the user agent originating the request. I<E.g.>:
725 $h->user_agent('Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0)');
729 The server header field contains information about the software being
730 used by the originating server program handling the request.
734 This header should contain an Internet e-mail address for the human
735 user who controls the requesting user agent. The address should be
736 machine-usable, as defined by RFC822. E.g.:
738 $h->from('King Kong <king@kong.com>');
740 I<This header is no longer part of the HTTP standard.>
744 Used to specify the address (URI) of the document from which the
745 requested resource address was obtained.
747 The "Free On-line Dictionary of Computing" as this to say about the
750 <World-Wide Web> A misspelling of "referrer" which
751 somehow made it into the {HTTP} standard. A given {web
752 page}'s referer (sic) is the {URL} of whatever web page
753 contains the link that the user followed to the current
754 page. Most browsers pass this information as part of a
759 By popular demand C<referrer> exists as an alias for this method so you
760 can avoid this misspelling in your programs and still send the right
763 When setting the referrer, this method removes the fragment from the
764 given URI if it is present, as mandated by RFC2616. Note that
765 the removal does I<not> happen automatically if using the header(),
766 push_header() or init_header() methods to set the referrer.
768 =item $h->www_authenticate
770 This header must be included as part of a C<401 Unauthorized> response.
771 The field value consist of a challenge that indicates the
772 authentication scheme and parameters applicable to the requested URI.
774 =item $h->proxy_authenticate
776 This header must be included in a C<407 Proxy Authentication Required>
779 =item $h->authorization
781 =item $h->proxy_authorization
783 A user agent that wishes to authenticate itself with a server or a
784 proxy, may do so by including these headers.
786 =item $h->authorization_basic
788 This method is used to get or set an authorization header that use the
789 "Basic Authentication Scheme". In array context it will return two
790 values; the user name and the password. In scalar context it will
791 return I<"uname:password"> as a single string value.
793 When used to set the header value, it expects two arguments. I<E.g.>:
795 $h->authorization_basic($uname, $password);
797 The method will croak if the $uname contains a colon ':'.
799 =item $h->proxy_authorization_basic
801 Same as authorization_basic() but will set the "Proxy-Authorization"
806 =head1 NON-CANONICALIZED FIELD NAMES
808 The header field name spelling is normally canonicalized including the
809 '_' to '-' translation. There are some application where this is not
810 appropriate. Prefixing field names with ':' allow you to force a
811 specific spelling. For example if you really want a header field name
812 to show up as C<foo_bar> instead of "Foo-Bar", you might set it like
815 $h->header(":foo_bar" => 1);
817 These field names are returned with the ':' intact for
818 $h->header_field_names and the $h->scan callback, but the colons do
819 not show in $h->as_string.
823 Copyright 1995-2005 Gisle Aas.
825 This library is free software; you can redistribute it and/or
826 modify it under the same terms as Perl itself.