Add built local::lib
[catagits/Gitalist.git] / local-lib5 / lib / perl5 / URI / Escape.pm
CommitLineData
3fea05b9 1package URI::Escape;
2use strict;
3
4=head1 NAME
5
6URI::Escape - Escape and unescape unsafe characters
7
8=head1 SYNOPSIS
9
10 use URI::Escape;
11 $safe = uri_escape("10% is enough\n");
12 $verysafe = uri_escape("foo", "\0-\377");
13 $str = uri_unescape($safe);
14
15=head1 DESCRIPTION
16
17This module provides functions to escape and unescape URI strings as
18defined by RFC 2396 (and updated by RFC 2732).
19A URI consists of a restricted set of characters,
20denoted as C<uric> in RFC 2396. The restricted set of characters
21consists of digits, letters, and a few graphic symbols chosen from
22those common to most of the character encodings and input facilities
23available to Internet users:
24
25 "A" .. "Z", "a" .. "z", "0" .. "9",
26 ";", "/", "?", ":", "@", "&", "=", "+", "$", ",", "[", "]", # reserved
27 "-", "_", ".", "!", "~", "*", "'", "(", ")"
28
29In addition, any byte (octet) can be represented in a URI by an escape
30sequence: a triplet consisting of the character "%" followed by two
31hexadecimal digits. A byte can also be represented directly by a
32character, using the US-ASCII character for that octet (iff the
33character is part of C<uric>).
34
35Some of the C<uric> characters are I<reserved> for use as delimiters
36or as part of certain URI components. These must be escaped if they are
37to be treated as ordinary data. Read RFC 2396 for further details.
38
39The functions provided (and exported by default) from this module are:
40
41=over 4
42
43=item uri_escape( $string )
44
45=item uri_escape( $string, $unsafe )
46
47Replaces each unsafe character in the $string with the corresponding
48escape sequence and returns the result. The $string argument should
49be a string of bytes. The uri_escape() function will croak if given a
50characters with code above 255. Use uri_escape_utf8() if you know you
51have such chars or/and want chars in the 128 .. 255 range treated as
52UTF-8.
53
54The uri_escape() function takes an optional second argument that
55overrides the set of characters that are to be escaped. The set is
56specified as a string that can be used in a regular expression
57character class (between [ ]). E.g.:
58
59 "\x00-\x1f\x7f-\xff" # all control and hi-bit characters
60 "a-z" # all lower case characters
61 "^A-Za-z" # everything not a letter
62
63The default set of characters to be escaped is all those which are
64I<not> part of the C<uric> character class shown above as well as the
65reserved characters. I.e. the default is:
66
67 "^A-Za-z0-9\-_.!~*'()"
68
69=item uri_escape_utf8( $string )
70
71=item uri_escape_utf8( $string, $unsafe )
72
73Works like uri_escape(), but will encode chars as UTF-8 before
74escaping them. This makes this function able do deal with characters
75with code above 255 in $string. Note that chars in the 128 .. 255
76range will be escaped differently by this function compared to what
77uri_escape() would. For chars in the 0 .. 127 range there is no
78difference.
79
80The call:
81
82 $uri = uri_escape_utf8($string);
83
84will be the same as:
85
86 use Encode qw(encode);
87 $uri = uri_escape(encode("UTF-8", $string));
88
89but will even work for perl-5.6 for chars in the 128 .. 255 range.
90
91Note: Javascript has a function called escape() that produces the
92sequence "%uXXXX" for chars in the 256 .. 65535 range. This function
93has really nothing to do with URI escaping but some folks got confused
94since it "does the right thing" in the 0 .. 255 range. Because of
95this you sometimes see "URIs" with these kind of escapes. The
96JavaScript encodeURIComponent() function is similar to uri_escape_utf8().
97
98=item uri_unescape($string,...)
99
100Returns a string with each %XX sequence replaced with the actual byte
101(octet).
102
103This does the same as:
104
105 $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
106
107but does not modify the string in-place as this RE would. Using the
108uri_unescape() function instead of the RE might make the code look
109cleaner and is a few characters less to type.
110
111In a simple benchmark test I did,
112calling the function (instead of the inline RE above) if a few chars
113were unescaped was something like 40% slower, and something like 700% slower if none were. If
114you are going to unescape a lot of times it might be a good idea to
115inline the RE.
116
117If the uri_unescape() function is passed multiple strings, then each
118one is returned unescaped.
119
120=back
121
122The module can also export the C<%escapes> hash, which contains the
123mapping from all 256 bytes to the corresponding escape codes. Lookup
124in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))>
125each time.
126
127=head1 SEE ALSO
128
129L<URI>
130
131
132=head1 COPYRIGHT
133
134Copyright 1995-2004 Gisle Aas.
135
136This program is free software; you can redistribute it and/or modify
137it under the same terms as Perl itself.
138
139=cut
140
141use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
142use vars qw(%escapes);
143
144require Exporter;
145@ISA = qw(Exporter);
146@EXPORT = qw(uri_escape uri_unescape uri_escape_utf8);
147@EXPORT_OK = qw(%escapes);
148$VERSION = "3.29";
149
150use Carp ();
151
152# Build a char->hex map
153for (0..255) {
154 $escapes{chr($_)} = sprintf("%%%02X", $_);
155}
156
157my %subst; # compiled patternes
158
159sub uri_escape
160{
161 my($text, $patn) = @_;
162 return undef unless defined $text;
163 if (defined $patn){
164 unless (exists $subst{$patn}) {
165 # Because we can't compile the regex we fake it with a cached sub
166 (my $tmp = $patn) =~ s,/,\\/,g;
167 eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }";
168 Carp::croak("uri_escape: $@") if $@;
169 }
170 &{$subst{$patn}}($text);
171 } else {
172 # Default unsafe characters. RFC 2732 ^(uric - reserved)
173 $text =~ s/([^A-Za-z0-9\-_.!~*'()])/$escapes{$1} || _fail_hi($1)/ge;
174 }
175 $text;
176}
177
178sub _fail_hi {
179 my $chr = shift;
180 Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr));
181}
182
183sub uri_escape_utf8
184{
185 my $text = shift;
186 if ($] < 5.008) {
187 $text =~ s/([^\0-\x7F])/do {my $o = ord($1); sprintf("%c%c", 0xc0 | ($o >> 6), 0x80 | ($o & 0x3f)) }/ge;
188 }
189 else {
190 utf8::encode($text);
191 }
192
193 return uri_escape($text, @_);
194}
195
196sub uri_unescape
197{
198 # Note from RFC1630: "Sequences which start with a percent sign
199 # but are not followed by two hexadecimal characters are reserved
200 # for future extension"
201 my $str = shift;
202 if (@_ && wantarray) {
203 # not executed for the common case of a single argument
204 my @str = ($str, @_); # need to copy
205 foreach (@str) {
206 s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
207 }
208 return @str;
209 }
210 $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
211 $str;
212}
213
214sub escape_char {
215 return join '', @URI::Escape::escapes{$_[0] =~ /(\C)/g};
216}
217
2181;