1 package URI::Heuristic;
5 URI::Heuristic - Expand URI using heuristics
9 use URI::Heuristic qw(uf_uristr);
10 $u = uf_uristr("perl"); # http://www.perl.com
11 $u = uf_uristr("www.sol.no/sol"); # http://www.sol.no/sol
12 $u = uf_uristr("aas"); # http://www.aas.no
13 $u = uf_uristr("ftp.funet.fi"); # ftp://ftp.funet.fi
14 $u = uf_uristr("/etc/passwd"); # file:/etc/passwd
18 This module provides functions that expand strings into real absolute
19 URIs using some built-in heuristics. Strings that already represent
20 absolute URIs (i.e. that start with a C<scheme:> part) are never modified
21 and are returned unchanged. The main use of these functions is to
22 allow abbreviated URIs similar to what many web browsers allow for URIs
25 The following functions are provided:
31 Tries to make the argument string
32 into a proper absolute URI string. The "uf_" prefix stands for "User
33 Friendly". Under MacOS, it assumes that any string with a common URL
34 scheme (http, ftp, etc.) is a URL rather than a local path. So don't name
35 your volumes after common URL schemes and expect uf_uristr() to construct
36 valid file: URL's on those volumes for you, because it won't.
40 Works the same way as uf_uristr() but
41 returns a C<URI> object.
47 If the hostname portion of a URI does not contain any dots, then
48 certain qualified guesses are made. These guesses are governed by
49 the following two environment variables:
55 The two-letter country code (ISO 3166) for your location. If
56 the domain name of your host ends with two letters, then it is taken
57 to be the default country. See also L<Locale::Country>.
59 =item URL_GUESS_PATTERN
61 Contains a space-separated list of URL patterns to try. The string
62 "ACME" is for some reason used as a placeholder for the host name in
63 the URL provided. Example:
65 URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com"
66 export URL_GUESS_PATTERN
68 Specifying URL_GUESS_PATTERN disables any guessing rules based on
69 country. An empty URL_GUESS_PATTERN disables any guessing that
70 involves host name lookups.
76 Copyright 1997-1998, Gisle Aas
78 This library is free software; you can redistribute it and/or
79 modify it under the same terms as Perl itself.
85 use vars qw(@EXPORT_OK $VERSION $MY_COUNTRY %LOCAL_GUESSING $DEBUG);
88 *import = \&Exporter::import;
89 @EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr);
96 # First try the environment.
100 # Could use LANG, LC_ALL, etc at this point, but probably too
101 # much of a wild guess. (Catalan != Canada, etc.)
104 # Last bit of domain name. This may access the network.
106 my $fqdn = Net::Domain::hostfqdn();
107 $_ = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/;
108 return $_ if defined;
110 # Give up. Defined but false.
117 'us' => [qw(www.ACME.gov www.ACME.mil)],
118 'uk' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)],
119 'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)],
120 'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)],
121 # send corrections and new entries to <gisle@aas.no>
128 print STDERR "uf_uristr: resolving $_\n" if $DEBUG;
129 return unless defined;
134 if (/^(www|web|home)\./) {
137 } elsif (/^(ftp|gopher|news|wais|http|https)\./) {
140 } elsif ($^O ne "MacOS" &&
141 (m,^/, || # absolute file name
142 m,^\.\.?/, || # relative file name
143 m,^[a-zA-Z]:[/\\],) # dosish file name
148 } elsif ($^O eq "MacOS" && m/:/) {
149 # potential MacOS file name
150 unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) {
152 my $a = URI::file->new($_)->as_string;
153 $_ = ($a =~ m/^file:/) ? $a : "file:$a";
155 } elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) {
158 } elsif (!/^[a-zA-Z][a-zA-Z0-9.+\-]*:/) { # no scheme specified
159 if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) {
162 if ($host !~ /\./ && $host ne "localhost") {
164 if (exists $ENV{URL_GUESS_PATTERN}) {
165 @guess = map { s/\bACME\b/$host/; $_ }
166 split(' ', $ENV{URL_GUESS_PATTERN});
169 my $special = $LOCAL_GUESSING{MY_COUNTRY()};
171 my @special = @$special;
172 push(@guess, map { s/\bACME\b/$host/; $_ }
175 push(@guess, "www.$host." . MY_COUNTRY());
178 push(@guess, map "www.$host.$_",
179 "com", "org", "net", "edu", "int");
184 for $guess (@guess) {
185 print STDERR "uf_uristr: gethostbyname('$guess.')..."
187 if (gethostbyname("$guess.")) {
188 print STDERR "yes\n" if $DEBUG;
192 print STDERR "no\n" if $DEBUG;
195 $_ = "http://$host$_";
198 # pure junk, just return it unchanged...
202 print STDERR "uf_uristr: ==> $_\n" if $DEBUG;
210 URI->new(uf_uristr($_[0]));
214 *uf_urlstr = \*uf_uristr;
219 URI::URL->new(uf_uristr($_[0]));