Further lies in the File::Basename docs
[p5sagit/p5-mst-13.2.git] / lib / File / Basename.pm
CommitLineData
f06db76b 1=head1 NAME
2
767010ca 3File::Basename - Parse file paths into directory, filename and suffix.
f06db76b 4
5=head1 SYNOPSIS
6
7 use File::Basename;
8
1c33a35c 9 ($name,$path,$suffix) = fileparse($fullname,@suffixlist);
10 $name = fileparse($fullname,@suffixlist);
767010ca 11
f06db76b 12 $basename = basename($fullname,@suffixlist);
767010ca 13 $dirname = dirname($fullname);
f06db76b 14
f06db76b 15
16=head1 DESCRIPTION
17
767010ca 18These routines allow you to parse file paths into their directory, filename
19and suffix.
f06db76b 20
767010ca 21B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and quirks, of
22the shell and C functions of the same name. See each function's documention
23for details.
2ae324a7 24
f06db76b 25=cut
26
b3eb6a9b 27
767010ca 28package File::Basename;
29
1f47e8e2 30# A bit of juggling to insure that C<use re 'taint';> always works, since
918c0b2d 31# File::Basename is used during the Perl build, when the re extension may
32# not be available.
33BEGIN {
34 unless (eval { require re; })
9cfe5470 35 { eval ' sub re::import { $^H |= 0x00100000; } ' } # HINT_RE_TAINT
918c0b2d 36 import re 'taint';
37}
38
39
767010ca 40use strict;
3b825e41 41use 5.006;
b395063c 42use warnings;
17f410f9 43our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase);
a0d0e21e 44require Exporter;
45@ISA = qw(Exporter);
748a9306 46@EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
1c33a35c 47$VERSION = "2.73";
7e2183d3 48
767010ca 49fileparse_set_fstype($^O);
a0d0e21e 50
a0d0e21e 51
767010ca 52=over 4
53
54=item C<fileparse>
55
56 my($filename, $directories, $suffix) = fileparse($path);
57 my($filename, $directories, $suffix) = fileparse($path, @suffixes);
58 my $filename = fileparse($path, @suffixes);
59
60The C<fileparse()> routine divides a file path into its $directories, $filename
61and (optionally) the filename $suffix.
62
63$directories contains everything up to and including the last
64directory separator in the $path including the volume (if applicable).
65The remainder of the $path is the $filename.
66
67 # On Unix returns ("baz", "/foo/bar/", "")
68 fileparse("/foo/bar/baz");
69
70 # On Windows returns ("baz", "C:\foo\bar\", "")
71 fileparse("C:\foo\bar\baz");
72
73 # On Unix returns ("", "/foo/bar/baz/", "")
74 fileparse("/foo/bar/baz/");
75
76If @suffixes are given each element is a pattern (either a string or a
77C<qr//>) matched against the end of the $filename. The matching
78portion is removed and becomes the $suffix.
79
80 # On Unix returns ("baz", "/foo/bar", ".txt")
81 fileparse("/foo/bar/baz", qr/\.[^.]*/);
82
3291253b 83If type is non-Unix (see C<fileparse_set_fstype()>) then the pattern
84matching for suffix removal is performed case-insensitively, since
85those systems are not case-sensitive when opening existing files.
767010ca 86
87You are guaranteed that C<$directories . $filename . $suffix> will
88denote the same location as the original $path.
a0d0e21e 89
767010ca 90=cut
a0d0e21e 91
92
93sub fileparse {
94 my($fullname,@suffices) = @_;
3291253b 95
978ae421 96 unless (defined $fullname) {
97 require Carp;
6286f723 98 Carp::croak("fileparse(): need a valid pathname");
978ae421 99 }
3291253b 100
101 my $orig_type = '';
102 my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
103
12cbd720 104 my($taint) = substr($fullname,0,0); # Is $fullname tainted?
a0d0e21e 105
3291253b 106 if ($type eq "VMS" and $fullname =~ m{/} ) {
107 # We're doing Unix emulation
108 $orig_type = $type;
109 $type = 'Unix';
a0d0e21e 110 }
3291253b 111
112 my($dirpath, $basename);
113
114 if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) {
c7b9dd21 115 ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s);
116 $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/;
a0d0e21e 117 }
3291253b 118 elsif ($type eq "OS2") {
f1e20921 119 ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s);
120 $dirpath = './' unless $dirpath; # Can't be 0
121 $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#;
122 }
3291253b 123 elsif ($type eq "MacOS") {
c7b9dd21 124 ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s);
95e8664e 125 $dirpath = ':' unless $dirpath;
a0d0e21e 126 }
3291253b 127 elsif ($type eq "AmigaOS") {
c7b9dd21 128 ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s);
a3156fc3 129 $dirpath = './' unless $dirpath;
55497cff 130 }
3291253b 131 elsif ($type eq 'VMS' ) {
132 ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
133 $dirpath ||= ''; # should always be defined
134 }
135 else { # Default to Unix semantics.
c7b9dd21 136 ($dirpath,$basename) = ($fullname =~ m#^(.*/)?(.*)#s);
3291253b 137 if ($orig_type eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) {
491527d0 138 # dev:[000000] is top of VMS tree, similar to Unix '/'
e3830a4e 139 # so strip it off and treat the rest as "normal"
140 my $devspec = $1;
141 my $remainder = $3;
142 ($dirpath,$basename) = ($remainder =~ m#^(.*/)?(.*)#s);
5fa137f1 143 $dirpath ||= ''; # should always be defined
e3830a4e 144 $dirpath = $devspec.$dirpath;
491527d0 145 }
f0c6ccdf 146 $dirpath = './' unless $dirpath;
a0d0e21e 147 }
3291253b 148
a0d0e21e 149
3291253b 150 my($tail, $suffix);
a0d0e21e 151 if (@suffices) {
f06db76b 152 $tail = '';
a0d0e21e 153 foreach $suffix (@suffices) {
ee2ff9ea 154 my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$";
c7b9dd21 155 if ($basename =~ s/$pat//s) {
12cbd720 156 $taint .= substr($suffix,0,0);
44a8e56a 157 $tail = $1 . $tail;
a0d0e21e 158 }
159 }
160 }
161
767010ca 162 # Ensure taint is propgated from the path to its pieces.
12cbd720 163 $tail .= $taint if defined $tail; # avoid warning if $tail == undef
8d6d96c1 164 wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
d2ccd3cb 165 : ($basename .= $taint);
a0d0e21e 166}
167
168
767010ca 169
170=item C<basename>
171
172 my $filename = basename($path);
173 my $filename = basename($path, @suffixes);
174
175C<basename()> works just like C<fileparse()> in scalar context - you only get
176the $filename - except that it always quotes metacharacters in the @suffixes.
177
178 # These two function calls are equivalent.
179 my $filename = basename("/foo/bar/baz.txt", ".txt");
180 my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/);
181
182This function is provided for compatibility with the Unix shell command
183C<basename(1)>.
184
185=cut
186
a0d0e21e 187
188sub basename {
748a9306 189 my($name) = shift;
190 (fileparse($name, map("\Q$_\E",@_)))[0];
a0d0e21e 191}
7e2183d3 192
a0d0e21e 193
767010ca 194
195=item C<dirname>
196
197This function is provided for compatibility with the Unix shell
198command C<dirname(1)> and has inherited some of its quirks. In spite of
199its name it does B<NOT> always return the directory name as you might
200expect. To be safe, if you want the directory name of a path use
201C<fileparse()>.
202
3291253b 203Only on VMS (where there is no ambiguity between the file and directory
204portions of a path) and AmigaOS (possibly due to an implementation quirk in
205this module) does C<dirname()> work like C<fileparse($path)>, returning just the
206$directories.
767010ca 207
3291253b 208 # On VMS and AmigaOS
209 my $directories = dirname($path);
767010ca 210
211When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function
212which is subtly different from how C<fileparse()> works. It returns all but
213the last level of a file path even if the last level is clearly a directory.
214In effect, it is not returning the directory portion but simply the path one
215level up acting like C<chop()> for file paths.
216
217Also unlike C<fileparse()>, C<dirname()> does not include a trailing slash on
218its returned path.
219
220 # returns /foo/bar. fileparse() would return /foo/bar/
221 dirname("/foo/bar/baz");
222
223 # also returns /foo/bar despite the fact that baz is clearly a
224 # directory. fileparse() would return /foo/bar/baz/
225 dirname("/foo/bar/baz/");
226
227 # returns '.'. fileparse() would return 'foo/'
228 dirname("foo/");
229
230Under VMS, if there is no directory information in the $path, then the
231current default device and directory is used.
232
233=cut
234
a0d0e21e 235
236sub dirname {
3291253b 237 my $path = shift;
a0d0e21e 238
3291253b 239 my($type) = $Fileparse_fstype;
240
241 if( $type eq 'VMS' and $path =~ m{/} ) {
767010ca 242 # Parse as Unix
243 local($File::Basename::Fileparse_fstype) = '';
3291253b 244 return dirname($path);
767010ca 245 }
246
3291253b 247 my($basename, $dirname) = fileparse($path);
767010ca 248
3291253b 249 if ($type eq 'VMS') {
767010ca 250 $dirname ||= $ENV{DEFAULT};
a0d0e21e 251 }
3291253b 252 elsif ($type eq 'MacOS') {
084592ab 253 if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
254 $dirname =~ s/([^:]):\z/$1/s;
255 ($basename,$dirname) = fileparse $dirname;
256 }
257 $dirname .= ":" unless $dirname =~ /:\z/;
258 }
3291253b 259 elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
c7b9dd21 260 $dirname =~ s/([^:])[\\\/]*\z/$1/;
68dc0745 261 unless( length($basename) ) {
262 ($basename,$dirname) = fileparse $dirname;
c7b9dd21 263 $dirname =~ s/([^:])[\\\/]*\z/$1/;
68dc0745 264 }
265 }
3291253b 266 elsif ($type eq 'AmigaOS') {
c7b9dd21 267 if ( $dirname =~ /:\z/) { return $dirname }
55497cff 268 chop $dirname;
c7b9dd21 269 $dirname =~ s#[^:/]+\z## unless length($basename);
55497cff 270 }
084592ab 271 else {
767010ca 272 $dirname =~ s{(.)/*\z}{$1}s;
42568e28 273 unless( length($basename) ) {
42568e28 274 ($basename,$dirname) = fileparse $dirname;
767010ca 275 $dirname =~ s{(.)/*\z}{$1}s;
42568e28 276 }
a0d0e21e 277 }
278
279 $dirname;
280}
281
767010ca 282
283=item C<fileparse_set_fstype>
284
3291253b 285 my $type = fileparse_set_fstype();
286 my $previous_type = fileparse_set_fstype($type);
767010ca 287
288Normally File::Basename will assume a file path type native to your current
289operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...).
290With this function you can override that assumption.
291
3291253b 292Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS",
293"MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility),
294"Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is
295given "Unix" will be assumed.
767010ca 296
297If you've selected VMS syntax, and the file specification you pass to
298one of these routines contains a "/", they assume you are using Unix
299emulation and apply the Unix syntax rules instead, for that function
300call only.
301
302=back
303
304=cut
305
306
3291253b 307BEGIN {
308
309my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc);
310my @Types = (@Ignore_Case, qw(Unix));
311
767010ca 312sub fileparse_set_fstype {
3291253b 313 my $old = $Fileparse_fstype;
314
315 if (@_) {
316 my $new_type = shift;
317
318 $Fileparse_fstype = 'Unix'; # default
319 foreach my $type (@Types) {
320 $Fileparse_fstype = $type if $new_type =~ /^$type/i;
321 }
322
323 $Fileparse_igncase =
324 (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0;
325 }
326
327 return $old;
328}
329
767010ca 330}
331
a0d0e21e 332
3331;