local-lib5/lib/perl5/HTTP/Headers/Util.pm

   1 package HTTP::Headers::Util;
   2
   3 use strict;
   4 use vars qw($VERSION @ISA @EXPORT_OK);
   5
   6 $VERSION = "5.817";
   7
   8 require Exporter;
   9 @ISA=qw(Exporter);
  10
  11 @EXPORT_OK=qw(split_header_words _split_header_words join_header_words);
  12
  13
  14
  15 sub split_header_words {
  16     my @res = &_split_header_words;
  17     for my $arr (@res) {
  18         for (my $i = @$arr - 2; $i >= 0; $i -= 2) {
  19             $arr->[$i] = lc($arr->[$i]);
  20         }
  21     }
  22     return @res;
  23 }
  24
  25 sub _split_header_words
  26 {
  27     my(@val) = @_;
  28     my @res;
  29     for (@val) {
  30         my @cur;
  31         while (length) {
  32             if (s/^\s*(=*[^\s=;,]+)//) {  # 'token' or parameter 'attribute'
  33                 push(@cur, $1);
  34                 # a quoted value
  35                 if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
  36                     my $val = $1;
  37                     $val =~ s/\\(.)/$1/g;
  38                     push(@cur, $val);
  39                 # some unquoted value
  40                 }
  41                 elsif (s/^\s*=\s*([^;,\s]*)//) {
  42                     my $val = $1;
  43                     $val =~ s/\s+$//;
  44                     push(@cur, $val);
  45                 # no value, a lone token
  46                 }
  47                 else {
  48                     push(@cur, undef);
  49                 }
  50             }
  51             elsif (s/^\s*,//) {
  52                 push(@res, [@cur]) if @cur;
  53                 @cur = ();
  54             }
  55             elsif (s/^\s*;// || s/^\s+//) {
  56                 # continue
  57             }
  58             else {
  59                 die "This should not happen: '$_'";
  60             }
  61         }
  62         push(@res, \@cur) if @cur;
  63     }
  64     @res;
  65 }
  66
  67
  68 sub join_header_words
  69 {
  70     @_ = ([@_]) if @_ && !ref($_[0]);
  71     my @res;
  72     for (@_) {
  73         my @cur = @$_;
  74         my @attr;
  75         while (@cur) {
  76             my $k = shift @cur;
  77             my $v = shift @cur;
  78             if (defined $v) {
  79                 if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) {
  80                     $v =~ s/([\"\\])/\\$1/g;  # escape " and \
  81                     $k .= qq(="$v");
  82                 }
  83                 else {
  84                     # token
  85                     $k .= "=$v";
  86                 }
  87             }
  88             push(@attr, $k);
  89         }
  90         push(@res, join("; ", @attr)) if @attr;
  91     }
  92     join(", ", @res);
  93 }
  94
  95
  96 1;
  97
  98 __END__
  99
 100 =head1 NAME
 101
 102 HTTP::Headers::Util - Header value parsing utility functions
 103
 104 =head1 SYNOPSIS
 105
 106   use HTTP::Headers::Util qw(split_header_words);
 107   @values = split_header_words($h->header("Content-Type"));
 108
 109 =head1 DESCRIPTION
 110
 111 This module provides a few functions that helps parsing and
 112 construction of valid HTTP header values.  None of the functions are
 113 exported by default.
 114
 115 The following functions are available:
 116
 117 =over 4
 118
 119
 120 =item split_header_words( @header_values )
 121
 122 This function will parse the header values given as argument into a
 123 list of anonymous arrays containing key/value pairs.  The function
 124 knows how to deal with ",", ";" and "=" as well as quoted values after
 125 "=".  A list of space separated tokens are parsed as if they were
 126 separated by ";".
 127
 128 If the @header_values passed as argument contains multiple values,
 129 then they are treated as if they were a single value separated by
 130 comma ",".
 131
 132 This means that this function is useful for parsing header fields that
 133 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
 134 the requirement for tokens).
 135
 136   headers           = #header
 137   header            = (token | parameter) *( [";"] (token | parameter))
 138
 139   token             = 1*<any CHAR except CTLs or separators>
 140   separators        = "(" | ")" | "<" | ">" | "@"
 141                     | "," | ";" | ":" | "\" | <">
 142                     | "/" | "[" | "]" | "?" | "="
 143                     | "{" | "}" | SP | HT
 144
 145   quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
 146   qdtext            = <any TEXT except <">>
 147   quoted-pair       = "\" CHAR
 148
 149   parameter         = attribute "=" value
 150   attribute         = token
 151   value             = token | quoted-string
 152
 153 Each I<header> is represented by an anonymous array of key/value
 154 pairs.  The keys will be all be forced to lower case.
 155 The value for a simple token (not part of a parameter) is C<undef>.
 156 Syntactically incorrect headers will not necessary be parsed as you
 157 would want.
 158
 159 This is easier to describe with some examples:
 160
 161    split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz');
 162    split_header_words('text/html; charset="iso-8859-1"');
 163    split_header_words('Basic realm="\\"foo\\\\bar\\""');
 164
 165 will return
 166
 167    [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
 168    ['text/html' => undef, charset => 'iso-8859-1']
 169    [basic => undef, realm => "\"foo\\bar\""]
 170
 171 If you don't want the function to convert tokens and attribute keys to
 172 lower case you can call it as C<_split_header_words> instead (with a
 173 leading underscore).
 174
 175 =item join_header_words( @arrays )
 176
 177 This will do the opposite of the conversion done by split_header_words().
 178 It takes a list of anonymous arrays as arguments (or a list of
 179 key/value pairs) and produces a single header value.  Attribute values
 180 are quoted if needed.
 181
 182 Example:
 183
 184    join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
 185    join_header_words("text/plain" => undef, charset => "iso-8859/1");
 186
 187 will both return the string:
 188
 189    text/plain; charset="iso-8859/1"
 190
 191 =back
 192
 193 =head1 COPYRIGHT
 194
 195 Copyright 1997-1998, Gisle Aas
 196
 197 This library is free software; you can redistribute it and/or
 198 modify it under the same terms as Perl itself.
 199