7 use vars qw($ABS_REMOTE_LEADING_DOTS $ABS_ALLOW_RELATIVE_SCHEME $DEFAULT_QUERY_FORM_DELIMITER);
9 my %implements; # mapping from scheme to implementor class
11 # Some "official" character classes
13 use vars qw($reserved $mark $unreserved $uric $scheme_re);
14 $reserved = q(;/?:@&=+$,[]);
15 $mark = q(-_.!~*'()); #'; emacs
16 $unreserved = "A-Za-z0-9\Q$mark\E";
17 $uric = quotemeta($reserved) . $unreserved . "%";
19 $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*';
24 use overload ('""' => sub { ${$_[0]} },
25 '==' => sub { _obj_eq(@_) },
26 '!=' => sub { !_obj_eq(@_) },
30 # Check if two objects are the same object
32 return overload::StrVal($_[0]) eq overload::StrVal($_[1]);
37 my($class, $uri, $scheme) = @_;
39 $uri = defined ($uri) ? "$uri" : ""; # stringify
40 # Get rid of potential wrapping
41 $uri =~ s/^<(?:URL:)?(.*)>$/$1/; #
42 $uri =~ s/^"(.*)"$/$1/;
47 if ($uri =~ m/^($scheme_re):/so) {
51 if (($impclass = ref($scheme))) {
52 $scheme = $scheme->scheme;
54 elsif ($scheme && $scheme =~ m/^($scheme_re)(?::|$)/o) {
58 $impclass ||= implementor($scheme) ||
60 require URI::_foreign;
61 $impclass = 'URI::_foreign';
64 return $impclass->_init($uri, $scheme);
70 my($class, $uri, $base) = @_;
71 $uri = $class->new($uri, $base);
79 my($str, $scheme) = @_;
80 # find all funny characters and encode the bytes.
81 $str = $class->_uric_escape($str);
82 $str = "$scheme:$str" unless $str =~ /^$scheme_re:/o ||
83 $class->_no_scheme_ok;
84 my $self = bless \$str, $class;
91 my($class, $str) = @_;
92 $str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego;
99 my($scheme, $impclass) = @_;
100 if (!$scheme || $scheme !~ /\A$scheme_re\z/o) {
101 require URI::_generic;
102 return "URI::_generic";
105 $scheme = lc($scheme);
108 # Set the implementor class for a given scheme
109 my $old = $implements{$scheme};
110 $impclass->_init_implementor($scheme);
111 $implements{$scheme} = $impclass;
115 my $ic = $implements{$scheme};
118 # scheme not yet known, look for internal or
119 # preloaded (with 'use') implementation
120 $ic = "URI::$scheme"; # default location
122 # turn scheme into a valid perl identifier by a simple transformation...
128 # check we actually have one for the scheme:
129 unless (@{"${ic}::ISA"}) {
132 die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/;
133 return unless @{"${ic}::ISA"};
136 $ic->_init_implementor($scheme);
137 $implements{$scheme} = $ic;
142 sub _init_implementor
144 my($class, $scheme) = @_;
145 # Remember that one implementor class may actually
146 # serve to implement several URI schemes.
154 bless \$other, ref $self;
158 sub _no_scheme_ok { 0 }
165 return unless $$self =~ /^($scheme_re):/o;
171 if (defined($new) && length($new)) {
172 Carp::croak("Bad scheme '$new'") unless $new =~ /^$scheme_re$/o;
173 $old = $1 if $$self =~ s/^($scheme_re)://o;
174 my $newself = URI->new("$new:$$self");
176 bless $self, ref($newself);
179 if ($self->_no_scheme_ok) {
180 $old = $1 if $$self =~ s/^($scheme_re)://o;
181 Carp::carp("Oops, opaque part now look like scheme")
182 if $^W && $$self =~ m/^$scheme_re:/o
185 $old = $1 if $$self =~ m/^($scheme_re):/o;
194 my $scheme = shift->_scheme(@_);
195 return unless defined $scheme;
205 $$self =~ /^(?:$scheme_re:)?([^\#]*)/o or die;
209 $$self =~ /^($scheme_re:)? # optional scheme
211 (\#.*)? # optional fragment
218 my $new_opaque = shift;
219 $new_opaque = "" unless defined $new_opaque;
220 $new_opaque =~ s/([^$uric])/ URI::Escape::escape_char($1)/ego;
222 $$self = defined($old_scheme) ? $old_scheme : "";
223 $$self .= $new_opaque;
224 $$self .= $old_frag if defined $old_frag;
229 *path = \&opaque; # alias
236 return unless $$self =~ /\#(.*)/s;
241 $old = $1 if $$self =~ s/\#(.*)//s;
243 my $new_frag = shift;
244 if (defined $new_frag) {
245 $new_frag =~ s/([^$uric])/ URI::Escape::escape_char($1) /ego;
246 $$self .= "#$new_frag";
263 if ($str =~ /\bxn--/ && $self->can("ihost")) {
264 my $ihost = $self->ihost;
266 my $u = $self->clone;
267 $u->host("%%host%%");
268 $str = $u->as_string;
269 $str =~ s/%%host%%/$ihost/;
272 if ($str =~ s/%([89A-F][0-9A-F])/chr(hex($1))/eg) {
273 # All this crap because the more obvious:
275 # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift })
277 # doesn't work. Apparently passing a sub as CHECK only works
278 # for 'ascii' and similar direct encodings.
281 my $enc = Encode::find_encoding("UTF-8");
283 while (length $str) {
284 $u .= $enc->decode($str, Encode::FB_QUIET());
287 $u .= URI::Escape::escape_char(substr($str, 0, 1, ""));
298 # Make sure scheme is lowercased, that we don't escape unreserved chars,
299 # and that we use upcase escape sequences.
302 my $scheme = $self->_scheme || "";
303 my $uc_scheme = $scheme =~ /[A-Z]/;
304 my $esc = $$self =~ /%[a-fA-F0-9]{2}/;
305 return $self unless $uc_scheme || $esc;
307 my $other = $self->clone;
309 $other->_scheme(lc $scheme);
312 $$other =~ s{%([0-9a-fA-F]{2})}
313 { my $a = chr(hex($1));
314 $a =~ /^[$unreserved]\z/o ? $a : "%\U$1"
320 # Compare two URIs, subclasses will provide a more correct implementation
322 my($self, $other) = @_;
323 $self = URI->new($self, $other) unless ref $self;
324 $other = URI->new($other, $self) unless ref $other;
325 ref($self) eq ref($other) && # same class
326 $self->canonical->as_string eq $other->canonical->as_string;
329 # generic-URI transformation methods
334 sub STORABLE_freeze {
335 my($self, $cloning) = @_;
340 my($self, $cloning, $str) = @_;
350 URI - Uniform Resource Identifiers (absolute and relative)
354 $u1 = URI->new("http://www.perl.com");
355 $u2 = URI->new("foo", "http");
358 $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical;
360 $str = $u->as_string;
363 $scheme = $u->scheme;
364 $opaque = $u->opaque;
366 $frag = $u->fragment;
369 $u->host("ftp.perl.com");
374 This module implements the C<URI> class. Objects of this class
375 represent "Uniform Resource Identifier references" as specified in RFC
376 2396 (and updated by RFC 2732).
378 A Uniform Resource Identifier is a compact string of characters that
379 identifies an abstract or physical resource. A Uniform Resource
380 Identifier can be further classified as either a Uniform Resource Locator
381 (URL) or a Uniform Resource Name (URN). The distinction between URL
382 and URN does not matter to the C<URI> class interface. A
383 "URI-reference" is a URI that may have additional information attached
384 in the form of a fragment identifier.
386 An absolute URI reference consists of three parts: a I<scheme>, a
387 I<scheme-specific part> and a I<fragment> identifier. A subset of URI
388 references share a common syntax for hierarchical namespaces. For
389 these, the scheme-specific part is further broken down into
390 I<authority>, I<path> and I<query> components. These URIs can also
391 take the form of relative URI references, where the scheme (and
392 usually also the authority) component is missing, but implied by the
393 context of the URI reference. The three forms of URI reference
394 syntax are summarized as follows:
396 <scheme>:<scheme-specific-part>#<fragment>
397 <scheme>://<authority><path>?<query>#<fragment>
398 <path>?<query>#<fragment>
400 The components into which a URI reference can be divided depend on the
401 I<scheme>. The C<URI> class provides methods to get and set the
402 individual components. The methods available for a specific
403 C<URI> object depend on the scheme.
407 The following methods construct new C<URI> objects:
411 =item $uri = URI->new( $str )
413 =item $uri = URI->new( $str, $scheme )
415 Constructs a new URI object. The string
416 representation of a URI is given as argument, together with an optional
417 scheme specification. Common URI wrappers like "" and <>, as well as
418 leading and trailing white space, are automatically removed from
419 the $str argument before it is processed further.
421 The constructor determines the scheme, maps this to an appropriate
422 URI subclass, constructs a new object of that class and returns it.
424 The $scheme argument is only used when $str is a
425 relative URI. It can be either a simple string that
426 denotes the scheme, a string containing an absolute URI reference, or
427 an absolute C<URI> object. If no $scheme is specified for a relative
428 URI $str, then $str is simply treated as a generic URI (no scheme-specific
431 The set of characters available for building URI references is
432 restricted (see L<URI::Escape>). Characters outside this set are
433 automatically escaped by the URI constructor.
435 =item $uri = URI->new_abs( $str, $base_uri )
437 Constructs a new absolute URI object. The $str argument can
438 denote a relative or absolute URI. If relative, then it is
439 absolutized using $base_uri as base. The $base_uri must be an absolute
442 =item $uri = URI::file->new( $filename )
444 =item $uri = URI::file->new( $filename, $os )
446 Constructs a new I<file> URI from a file name. See L<URI::file>.
448 =item $uri = URI::file->new_abs( $filename )
450 =item $uri = URI::file->new_abs( $filename, $os )
452 Constructs a new absolute I<file> URI from a file name. See
455 =item $uri = URI::file->cwd
457 Returns the current working directory as a I<file> URI. See
462 Returns a copy of the $uri.
466 =head1 COMMON METHODS
468 The methods described in this section are available for all C<URI>
471 Methods that give access to components of a URI always return the
472 old value of the component. The value returned is C<undef> if the
473 component was not present. There is generally a difference between a
474 component that is empty (represented as C<"">) and a component that is
475 missing (represented as C<undef>). If an accessor method is given an
476 argument, it updates the corresponding component in addition to
477 returning the old value of the component. Passing an undefined
478 argument removes the component (if possible). The description of
479 each accessor method indicates whether the component is passed as
480 an escaped or an unescaped string. A component that can be further
481 divided into sub-parts are usually passed escaped, as unescaping might
482 change its semantics.
484 The common methods available for all URI are:
490 =item $uri->scheme( $new_scheme )
492 Sets and returns the scheme part of the $uri. If the $uri is
493 relative, then $uri->scheme returns C<undef>. If called with an
494 argument, it updates the scheme of $uri, possibly changing the
495 class of $uri, and returns the old scheme value. The method croaks
496 if the new scheme name is illegal; a scheme name must begin with a
497 letter and must consist of only US-ASCII letters, numbers, and a few
498 special marks: ".", "+", "-". This restriction effectively means
499 that the scheme must be passed unescaped. Passing an undefined
500 argument to the scheme method makes the URI relative (if possible).
502 Letter case does not matter for scheme names. The string
503 returned by $uri->scheme is always lowercase. If you want the scheme
504 just as it was written in the URI in its original case,
505 you can use the $uri->_scheme method instead.
509 =item $uri->opaque( $new_opaque )
511 Sets and returns the scheme-specific part of the $uri
512 (everything between the scheme and the fragment)
513 as an escaped string.
517 =item $uri->path( $new_path )
519 Sets and returns the same value as $uri->opaque unless the URI
520 supports the generic syntax for hierarchical namespaces.
521 In that case the generic method is overridden to set and return
522 the part of the URI between the I<host name> and the I<fragment>.
526 =item $uri->fragment( $new_frag )
528 Returns the fragment identifier of a URI reference
529 as an escaped string.
531 =item $uri->as_string
533 Returns a URI object to a plain ASCII string. URI objects are
534 also converted to plain strings automatically by overloading. This
535 means that $uri objects can be used as plain strings in most Perl
540 Returns a Unicode string representing the URI. Escaped UTF-8 sequences
541 representing non-ASCII characters are turned into their corresponding Unicode
544 =item $uri->canonical
546 Returns a normalized version of the URI. The rules
547 for normalization are scheme-dependent. They usually involve
548 lowercasing the scheme and Internet host name components,
549 removing the explicit port specification if it matches the default port,
550 uppercasing all escape sequences, and unescaping octets that can be
551 better represented as plain characters.
553 For efficiency reasons, if the $uri is already in normalized form,
554 then a reference to it is returned instead of a copy.
556 =item $uri->eq( $other_uri )
558 =item URI::eq( $first_uri, $other_uri )
560 Tests whether two URI references are equal. URI references
561 that normalize to the same string are considered equal. The method
562 can also be used as a plain function which can also test two string
565 If you need to test whether two C<URI> object references denote the
566 same object, use the '==' operator.
568 =item $uri->abs( $base_uri )
570 Returns an absolute URI reference. If $uri is already
571 absolute, then a reference to it is simply returned. If the $uri
572 is relative, then a new absolute URI is constructed by combining the
573 $uri and the $base_uri, and returned.
575 =item $uri->rel( $base_uri )
577 Returns a relative URI reference if it is possible to
578 make one that denotes the same resource relative to $base_uri.
579 If not, then $uri is simply returned.
583 =head1 GENERIC METHODS
585 The following methods are available to schemes that use the
586 common/generic syntax for hierarchical namespaces. The descriptions of
587 schemes below indicate which these are. Unknown schemes are
588 assumed to support the generic syntax, and therefore the following
593 =item $uri->authority
595 =item $uri->authority( $new_authority )
597 Sets and returns the escaped authority component
602 =item $uri->path( $new_path )
604 Sets and returns the escaped path component of
605 the $uri (the part between the host name and the query or fragment).
606 The path can never be undefined, but it can be the empty string.
608 =item $uri->path_query
610 =item $uri->path_query( $new_path_query )
612 Sets and returns the escaped path and query
613 components as a single entity. The path and the query are
614 separated by a "?" character, but the query can itself contain "?".
616 =item $uri->path_segments
618 =item $uri->path_segments( $segment, ... )
620 Sets and returns the path. In a scalar context, it returns
621 the same value as $uri->path. In a list context, it returns the
622 unescaped path segments that make up the path. Path segments that
623 have parameters are returned as an anonymous array. The first element
624 is the unescaped path segment proper; subsequent elements are escaped
625 parameter strings. Such an anonymous array uses overloading so it can
626 be treated as a string too, but this string does not include the
629 Note that absolute paths have the empty string as their first
630 I<path_segment>, i.e. the I<path> C</foo/bar> have 3
631 I<path_segments>; "", "foo" and "bar".
635 =item $uri->query( $new_query )
637 Sets and returns the escaped query component of
640 =item $uri->query_form
642 =item $uri->query_form( $key1 => $val1, $key2 => $val2, ... )
644 =item $uri->query_form( $key1 => $val1, $key2 => $val2, ..., $delim )
646 =item $uri->query_form( \@key_value_pairs )
648 =item $uri->query_form( \@key_value_pairs, $delim )
650 =item $uri->query_form( \%hash )
652 =item $uri->query_form( \%hash, $delim )
654 Sets and returns query components that use the
655 I<application/x-www-form-urlencoded> format. Key/value pairs are
656 separated by "&", and the key is separated from the value by a "="
659 The form can be set either by passing separate key/value pairs, or via
660 an array or hash reference. Passing an empty array or an empty hash
661 removes the query component, whereas passing no arguments at all leaves
662 the component unchanged. The order of keys is undefined if a hash
663 reference is passed. The old value is always returned as a list of
664 separate key/value pairs. Assigning this list to a hash is unwise as
665 the keys returned might repeat.
667 The values passed when setting the form can be plain strings or
668 references to arrays of strings. Passing an array of values has the
669 same effect as passing the key repeatedly with one value at a time.
670 All the following statements have the same effect:
672 $uri->query_form(foo => 1, foo => 2);
673 $uri->query_form(foo => [1, 2]);
674 $uri->query_form([ foo => 1, foo => 2 ]);
675 $uri->query_form([ foo => [1, 2] ]);
676 $uri->query_form({ foo => [1, 2] });
678 The $delim parameter can be passed as ";" to force the key/value pairs
679 to be delimited by ";" instead of "&" in the query string. This
680 practice is often recommended for URLs embedded in HTML or XML
681 documents as this avoids the trouble of escaping the "&" character.
682 You might also set the $URI::DEFAULT_QUERY_FORM_DELIMITER variable to
683 ";" for the same global effect.
685 The C<URI::QueryParam> module can be loaded to add further methods to
686 manipulate the form of a URI. See L<URI::QueryParam> for details.
688 =item $uri->query_keywords
690 =item $uri->query_keywords( $keywords, ... )
692 =item $uri->query_keywords( \@keywords )
694 Sets and returns query components that use the
695 keywords separated by "+" format.
697 The keywords can be set either by passing separate keywords directly
698 or by passing a reference to an array of keywords. Passing an empty
699 array removes the query component, whereas passing no arguments at
700 all leaves the component unchanged. The old value is always returned
701 as a list of separate words.
705 =head1 SERVER METHODS
707 For schemes where the I<authority> component denotes an Internet host,
708 the following methods are available in addition to the generic
715 =item $uri->userinfo( $new_userinfo )
717 Sets and returns the escaped userinfo part of the
720 For some schemes this is a user name and a password separated by
721 a colon. This practice is not recommended. Embedding passwords in
722 clear text (such as URI) has proven to be a security risk in almost
723 every case where it has been used.
727 =item $uri->host( $new_host )
729 Sets and returns the unescaped hostname.
731 If the $new_host string ends with a colon and a number, then this
732 number also sets the port.
734 For IPv6 addresses the brackets around the raw address is removed in the return
735 value from $uri->host. When setting the host attribute to an IPv6 address you
736 can use a raw address or one enclosed in brackets. The address needs to be
737 enclosed in brackets if you want to pass in a new port value as well.
741 Returns the host in Unicode form. Any IDNA A-labels are turned into U-labels.
745 =item $uri->port( $new_port )
747 Sets and returns the port. The port is a simple integer
748 that should be greater than 0.
750 If a port is not specified explicitly in the URI, then the URI scheme's default port
751 is returned. If you don't want the default port
752 substituted, then you can use the $uri->_port method instead.
754 =item $uri->host_port
756 =item $uri->host_port( $new_host_port )
758 Sets and returns the host and port as a single
759 unit. The returned value includes a port, even if it matches the
760 default port. The host part and the port part are separated by a
763 For IPv6 addresses the bracketing is preserved; thus
764 URI->new("http://[::1]/")->host_port returns "[::1]:80". Contrast this with
765 $uri->host which will remove the brackets.
767 =item $uri->default_port
769 Returns the default port of the URI scheme to which $uri
770 belongs. For I<http> this is the number 80, for I<ftp> this
771 is the number 21, etc. The default port for a scheme can not be
776 =head1 SCHEME-SPECIFIC SUPPORT
778 Scheme-specific support is provided for the following URI schemes. For C<URI>
779 objects that do not belong to one of these, you can only use the common and
786 The I<data> URI scheme is specified in RFC 2397. It allows inclusion
787 of small data items as "immediate" data, as if it had been included
790 C<URI> objects belonging to the data scheme support the common methods
791 and two new methods to access their scheme-specific components:
792 $uri->media_type and $uri->data. See L<URI::data> for details.
796 An old specification of the I<file> URI scheme is found in RFC 1738.
797 A new RFC 2396 based specification in not available yet, but file URI
798 references are in common use.
800 C<URI> objects belonging to the file scheme support the common and
801 generic methods. In addition, they provide two methods for mapping file URIs
802 back to local file names; $uri->file and $uri->dir. See L<URI::file>
807 An old specification of the I<ftp> URI scheme is found in RFC 1738. A
808 new RFC 2396 based specification in not available yet, but ftp URI
809 references are in common use.
811 C<URI> objects belonging to the ftp scheme support the common,
812 generic and server methods. In addition, they provide two methods for
813 accessing the userinfo sub-components: $uri->user and $uri->password.
817 The I<gopher> URI scheme is specified in
818 <draft-murali-url-gopher-1996-12-04> and will hopefully be available
819 as a RFC 2396 based specification.
821 C<URI> objects belonging to the gopher scheme support the common,
822 generic and server methods. In addition, they support some methods for
823 accessing gopher-specific path components: $uri->gopher_type,
824 $uri->selector, $uri->search, $uri->string.
828 The I<http> URI scheme is specified in RFC 2616.
829 The scheme is used to reference resources hosted by HTTP servers.
831 C<URI> objects belonging to the http scheme support the common,
832 generic and server methods.
836 The I<https> URI scheme is a Netscape invention which is commonly
837 implemented. The scheme is used to reference HTTP servers through SSL
838 connections. Its syntax is the same as http, but the default
843 The I<ldap> URI scheme is specified in RFC 2255. LDAP is the
844 Lightweight Directory Access Protocol. An ldap URI describes an LDAP
845 search operation to perform to retrieve information from an LDAP
848 C<URI> objects belonging to the ldap scheme support the common,
849 generic and server methods as well as ldap-specific methods: $uri->dn,
850 $uri->attributes, $uri->scope, $uri->filter, $uri->extensions. See
851 L<URI::ldap> for details.
855 Like the I<ldap> URI scheme, but uses a UNIX domain socket. The
856 server methods are not supported, and the local socket path is
857 available as $uri->un_path. The I<ldapi> scheme is used by the
858 OpenLDAP package. There is no real specification for it, but it is
859 mentioned in various OpenLDAP manual pages.
863 Like the I<ldap> URI scheme, but uses an SSL connection. This
864 scheme is deprecated, as the preferred way is to use the I<start_tls>
869 The I<mailto> URI scheme is specified in RFC 2368. The scheme was
870 originally used to designate the Internet mailing address of an
871 individual or service. It has (in RFC 2368) been extended to allow
872 setting of other mail header fields and the message body.
874 C<URI> objects belonging to the mailto scheme support the common
875 methods and the generic query methods. In addition, they support the
876 following mailto-specific methods: $uri->to, $uri->headers.
878 Note that the "foo@example.com" part of a mailto is I<not> the
879 C<userinfo> and C<host> but instead the C<path>. This allowed a
880 mailto to contain multiple comma-seperated email addresses.
884 The I<mms> URL specification can be found at L<http://sdp.ppona.com/>
885 C<URI> objects belonging to the mms scheme support the common,
886 generic, and server methods, with the exception of userinfo and
887 query-related sub-components.
891 The I<news>, I<nntp> and I<snews> URI schemes are specified in
892 <draft-gilman-news-url-01> and will hopefully be available as an RFC
893 2396 based specification soon.
895 C<URI> objects belonging to the news scheme support the common,
896 generic and server methods. In addition, they provide some methods to
897 access the path: $uri->group and $uri->message.
905 The I<pop> URI scheme is specified in RFC 2384. The scheme is used to
906 reference a POP3 mailbox.
908 C<URI> objects belonging to the pop scheme support the common, generic
909 and server methods. In addition, they provide two methods to access the
910 userinfo components: $uri->user and $uri->auth
914 An old specification of the I<rlogin> URI scheme is found in RFC
915 1738. C<URI> objects belonging to the rlogin scheme support the
916 common, generic and server methods.
920 The I<rtsp> URL specification can be found in section 3.2 of RFC 2326.
921 C<URI> objects belonging to the rtsp scheme support the common,
922 generic, and server methods, with the exception of userinfo and
923 query-related sub-components.
927 The I<rtspu> URI scheme is used to talk to RTSP servers over UDP
928 instead of TCP. The syntax is the same as rtsp.
932 Information about rsync is available from http://rsync.samba.org.
933 C<URI> objects belonging to the rsync scheme support the common,
934 generic and server methods. In addition, they provide methods to
935 access the userinfo sub-components: $uri->user and $uri->password.
939 The I<sip> URI specification is described in sections 19.1 and 25
940 of RFC 3261. C<URI> objects belonging to the sip scheme support the
941 common, generic, and server methods with the exception of path related
942 sub-components. In addition, they provide two methods to get and set
943 I<sip> parameters: $uri->params_form and $uri->params.
947 See I<sip> scheme. Its syntax is the same as sip, but the default
952 See I<news> scheme. Its syntax is the same as news, but the default
957 An old specification of the I<telnet> URI scheme is found in RFC
958 1738. C<URI> objects belonging to the telnet scheme support the
959 common, generic and server methods.
963 These URIs are used like I<telnet> URIs but for connections to IBM
964 mainframes. C<URI> objects belonging to the tn3270 scheme support the
965 common, generic and server methods.
969 Information about ssh is available at http://www.openssh.com/.
970 C<URI> objects belonging to the ssh scheme support the common,
971 generic and server methods. In addition, they provide methods to
972 access the userinfo sub-components: $uri->user and $uri->password.
976 The syntax of Uniform Resource Names is specified in RFC 2141. C<URI>
977 objects belonging to the urn scheme provide the common methods, and also the
978 methods $uri->nid and $uri->nss, which return the Namespace Identifier
979 and the Namespace-Specific String respectively.
981 The Namespace Identifier basically works like the Scheme identifier of
982 URIs, and further divides the URN namespace. Namespace Identifier
983 assignments are maintained at
984 <http://www.iana.org/assignments/urn-namespaces>.
986 Letter case is not significant for the Namespace Identifier. It is
987 always returned in lower case by the $uri->nid method. The $uri->_nid
988 method can be used if you want it in its original case.
990 =item B<urn>:B<isbn>:
992 The C<urn:isbn:> namespace contains International Standard Book
993 Numbers (ISBNs) and is described in RFC 3187. A C<URI> object belonging
994 to this namespace has the following extra methods (if the
995 Business::ISBN module is available): $uri->isbn,
996 $uri->isbn_publisher_code, $uri->isbn_group_code (formerly isbn_country_code,
997 which is still supported by issues a deprecation warning), $uri->isbn_as_ean.
1001 The C<urn:oid:> namespace contains Object Identifiers (OIDs) and is
1002 described in RFC 3061. An object identifier consists of sequences of digits
1003 separated by dots. A C<URI> object belonging to this namespace has an
1004 additional method called $uri->oid that can be used to get/set the oid
1005 value. In a list context, oid numbers are returned as separate elements.
1009 =head1 CONFIGURATION VARIABLES
1011 The following configuration variables influence how the class and its
1016 =item $URI::ABS_ALLOW_RELATIVE_SCHEME
1018 Some older parsers used to allow the scheme name to be present in the
1019 relative URL if it was the same as the base URL scheme. RFC 2396 says
1020 that this should be avoided, but you can enable this old behaviour by
1021 setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to a TRUE value.
1022 The difference is demonstrated by the following examples:
1024 URI->new("http:foo")->abs("http://host/a/b")
1027 local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1;
1028 URI->new("http:foo")->abs("http://host/a/b")
1029 ==> "http:/host/a/foo"
1032 =item $URI::ABS_REMOTE_LEADING_DOTS
1034 You can also have the abs() method ignore excess ".."
1035 segments in the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS
1036 to a TRUE value. The difference is demonstrated by the following
1039 URI->new("../../../foo")->abs("http://host/a/b")
1040 ==> "http://host/../../foo"
1042 local $URI::ABS_REMOTE_LEADING_DOTS = 1;
1043 URI->new("../../../foo")->abs("http://host/a/b")
1044 ==> "http://host/foo"
1046 =item $URI::DEFAULT_QUERY_FORM_DELIMITER
1048 This value can be set to ";" to have the query form C<key=value> pairs
1049 delimited by ";" instead of "&" which is the default.
1055 Using regexp variables like $1 directly as arguments to the URI methods
1056 does not work too well with current perl implementations. I would argue
1057 that this is actually a bug in perl. The workaround is to quote
1063 =head1 PARSING URIs WITH REGEXP
1065 As an alternative to this module, the following (official) regular
1066 expression can be used to decode a URI:
1068 my($scheme, $authority, $path, $query, $fragment) =
1069 $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
1071 The C<URI::Split> module provides the function uri_split() as a
1072 readable alternative.
1076 L<URI::file>, L<URI::WithBase>, L<URI::QueryParam>, L<URI::Escape>,
1077 L<URI::Split>, L<URI::Heuristic>
1079 RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax",
1080 Berners-Lee, Fielding, Masinter, August 1998.
1082 http://www.iana.org/assignments/uri-schemes
1084 http://www.iana.org/assignments/urn-namespaces
1086 http://www.w3.org/Addressing/
1090 Copyright 1995-2009 Gisle Aas.
1092 Copyright 1995 Martijn Koster.
1094 This program is free software; you can redistribute it and/or modify
1095 it under the same terms as Perl itself.
1097 =head1 AUTHORS / ACKNOWLEDGMENTS
1099 This module is based on the C<URI::URL> module, which in turn was
1100 (distantly) based on the C<wwwurl.pl> code in the libwww-perl for
1101 perl4 developed by Roy Fielding, as part of the Arcadia project at the
1102 University of California, Irvine, with contributions from Brooks
1105 C<URI::URL> was developed by Gisle Aas, Tim Bunce, Roy Fielding and
1106 Martijn Koster with input from other people on the libwww-perl mailing
1109 C<URI> and related subclasses was developed by Gisle Aas.