Add built local::lib
[catagits/Gitalist.git] / local-lib5 / lib / perl5 / URI / Heuristic.pm
CommitLineData
3fea05b9 1package URI::Heuristic;
2
3=head1 NAME
4
5URI::Heuristic - Expand URI using heuristics
6
7=head1 SYNOPSIS
8
9 use URI::Heuristic qw(uf_uristr);
10 $u = uf_uristr("perl"); # http://www.perl.com
11 $u = uf_uristr("www.sol.no/sol"); # http://www.sol.no/sol
12 $u = uf_uristr("aas"); # http://www.aas.no
13 $u = uf_uristr("ftp.funet.fi"); # ftp://ftp.funet.fi
14 $u = uf_uristr("/etc/passwd"); # file:/etc/passwd
15
16=head1 DESCRIPTION
17
18This module provides functions that expand strings into real absolute
19URIs using some built-in heuristics. Strings that already represent
20absolute URIs (i.e. that start with a C<scheme:> part) are never modified
21and are returned unchanged. The main use of these functions is to
22allow abbreviated URIs similar to what many web browsers allow for URIs
23typed in by the user.
24
25The following functions are provided:
26
27=over 4
28
29=item uf_uristr($str)
30
31Tries to make the argument string
32into a proper absolute URI string. The "uf_" prefix stands for "User
33Friendly". Under MacOS, it assumes that any string with a common URL
34scheme (http, ftp, etc.) is a URL rather than a local path. So don't name
35your volumes after common URL schemes and expect uf_uristr() to construct
36valid file: URL's on those volumes for you, because it won't.
37
38=item uf_uri($str)
39
40Works the same way as uf_uristr() but
41returns a C<URI> object.
42
43=back
44
45=head1 ENVIRONMENT
46
47If the hostname portion of a URI does not contain any dots, then
48certain qualified guesses are made. These guesses are governed by
49the following two environment variables:
50
51=over 10
52
53=item COUNTRY
54
55The two-letter country code (ISO 3166) for your location. If
56the domain name of your host ends with two letters, then it is taken
57to be the default country. See also L<Locale::Country>.
58
59=item URL_GUESS_PATTERN
60
61Contains a space-separated list of URL patterns to try. The string
62"ACME" is for some reason used as a placeholder for the host name in
63the URL provided. Example:
64
65 URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com"
66 export URL_GUESS_PATTERN
67
68Specifying URL_GUESS_PATTERN disables any guessing rules based on
69country. An empty URL_GUESS_PATTERN disables any guessing that
70involves host name lookups.
71
72=back
73
74=head1 COPYRIGHT
75
76Copyright 1997-1998, Gisle Aas
77
78This library is free software; you can redistribute it and/or
79modify it under the same terms as Perl itself.
80
81=cut
82
83use strict;
84
85use vars qw(@EXPORT_OK $VERSION $MY_COUNTRY %LOCAL_GUESSING $DEBUG);
86
87require Exporter;
88*import = \&Exporter::import;
89@EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr);
90$VERSION = "4.18";
91
92sub MY_COUNTRY() {
93 for ($MY_COUNTRY) {
94 return $_ if defined;
95
96 # First try the environment.
97 $_ = $ENV{COUNTRY};
98 return $_ if defined;
99
100 # Could use LANG, LC_ALL, etc at this point, but probably too
101 # much of a wild guess. (Catalan != Canada, etc.)
102 #
103
104 # Last bit of domain name. This may access the network.
105 require Net::Domain;
106 my $fqdn = Net::Domain::hostfqdn();
107 $_ = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/;
108 return $_ if defined;
109
110 # Give up. Defined but false.
111 return ($_ = 0);
112 }
113}
114
115%LOCAL_GUESSING =
116(
117 'us' => [qw(www.ACME.gov www.ACME.mil)],
118 'uk' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)],
119 'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)],
120 'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)],
121 # send corrections and new entries to <gisle@aas.no>
122);
123
124
125sub uf_uristr ($)
126{
127 local($_) = @_;
128 print STDERR "uf_uristr: resolving $_\n" if $DEBUG;
129 return unless defined;
130
131 s/^\s+//;
132 s/\s+$//;
133
134 if (/^(www|web|home)\./) {
135 $_ = "http://$_";
136
137 } elsif (/^(ftp|gopher|news|wais|http|https)\./) {
138 $_ = "$1://$_";
139
140 } elsif ($^O ne "MacOS" &&
141 (m,^/, || # absolute file name
142 m,^\.\.?/, || # relative file name
143 m,^[a-zA-Z]:[/\\],) # dosish file name
144 )
145 {
146 $_ = "file:$_";
147
148 } elsif ($^O eq "MacOS" && m/:/) {
149 # potential MacOS file name
150 unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) {
151 require URI::file;
152 my $a = URI::file->new($_)->as_string;
153 $_ = ($a =~ m/^file:/) ? $a : "file:$a";
154 }
155 } elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) {
156 $_ = "mailto:$_";
157
158 } elsif (!/^[a-zA-Z][a-zA-Z0-9.+\-]*:/) { # no scheme specified
159 if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) {
160 my $host = $1;
161
162 if ($host !~ /\./ && $host ne "localhost") {
163 my @guess;
164 if (exists $ENV{URL_GUESS_PATTERN}) {
165 @guess = map { s/\bACME\b/$host/; $_ }
166 split(' ', $ENV{URL_GUESS_PATTERN});
167 } else {
168 if (MY_COUNTRY()) {
169 my $special = $LOCAL_GUESSING{MY_COUNTRY()};
170 if ($special) {
171 my @special = @$special;
172 push(@guess, map { s/\bACME\b/$host/; $_ }
173 @special);
174 } else {
175 push(@guess, "www.$host." . MY_COUNTRY());
176 }
177 }
178 push(@guess, map "www.$host.$_",
179 "com", "org", "net", "edu", "int");
180 }
181
182
183 my $guess;
184 for $guess (@guess) {
185 print STDERR "uf_uristr: gethostbyname('$guess.')..."
186 if $DEBUG;
187 if (gethostbyname("$guess.")) {
188 print STDERR "yes\n" if $DEBUG;
189 $host = $guess;
190 last;
191 }
192 print STDERR "no\n" if $DEBUG;
193 }
194 }
195 $_ = "http://$host$_";
196
197 } else {
198 # pure junk, just return it unchanged...
199
200 }
201 }
202 print STDERR "uf_uristr: ==> $_\n" if $DEBUG;
203
204 $_;
205}
206
207sub uf_uri ($)
208{
209 require URI;
210 URI->new(uf_uristr($_[0]));
211}
212
213# legacy
214*uf_urlstr = \*uf_uristr;
215
216sub uf_url ($)
217{
218 require URI::URL;
219 URI::URL->new(uf_uristr($_[0]));
220}
221
2221;