[perl #22236] File::Basename behavior is misleading
[p5sagit/p5-mst-13.2.git] / lib / File / Basename.pm
CommitLineData
f06db76b 1=head1 NAME
2
767010ca 3File::Basename - Parse file paths into directory, filename and suffix.
f06db76b 4
5=head1 SYNOPSIS
6
7 use File::Basename;
8
1c33a35c 9 ($name,$path,$suffix) = fileparse($fullname,@suffixlist);
10 $name = fileparse($fullname,@suffixlist);
767010ca 11
f06db76b 12 $basename = basename($fullname,@suffixlist);
767010ca 13 $dirname = dirname($fullname);
f06db76b 14
f06db76b 15
16=head1 DESCRIPTION
17
767010ca 18These routines allow you to parse file paths into their directory, filename
19and suffix.
f06db76b 20
767010ca 21B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and quirks, of
22the shell and C functions of the same name. See each function's documention
23for details.
2ae324a7 24
e586b3eb 25It is guaranteed that
26
27 # Where $path_separator is / for Unix, \ for Windows, etc...
28 dirname($path) . $path_separator . basename($path);
29
30is equivalent to the original path for all systems but VMS.
31
f06db76b 32=cut
33
b3eb6a9b 34
767010ca 35package File::Basename;
36
1f47e8e2 37# A bit of juggling to insure that C<use re 'taint';> always works, since
918c0b2d 38# File::Basename is used during the Perl build, when the re extension may
39# not be available.
40BEGIN {
41 unless (eval { require re; })
9cfe5470 42 { eval ' sub re::import { $^H |= 0x00100000; } ' } # HINT_RE_TAINT
918c0b2d 43 import re 'taint';
44}
45
46
767010ca 47use strict;
3b825e41 48use 5.006;
b395063c 49use warnings;
17f410f9 50our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase);
a0d0e21e 51require Exporter;
52@ISA = qw(Exporter);
748a9306 53@EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
1c33a35c 54$VERSION = "2.73";
7e2183d3 55
767010ca 56fileparse_set_fstype($^O);
a0d0e21e 57
a0d0e21e 58
767010ca 59=over 4
60
61=item C<fileparse>
62
63 my($filename, $directories, $suffix) = fileparse($path);
64 my($filename, $directories, $suffix) = fileparse($path, @suffixes);
65 my $filename = fileparse($path, @suffixes);
66
67The C<fileparse()> routine divides a file path into its $directories, $filename
68and (optionally) the filename $suffix.
69
70$directories contains everything up to and including the last
71directory separator in the $path including the volume (if applicable).
72The remainder of the $path is the $filename.
73
74 # On Unix returns ("baz", "/foo/bar/", "")
75 fileparse("/foo/bar/baz");
76
77 # On Windows returns ("baz", "C:\foo\bar\", "")
78 fileparse("C:\foo\bar\baz");
79
80 # On Unix returns ("", "/foo/bar/baz/", "")
81 fileparse("/foo/bar/baz/");
82
83If @suffixes are given each element is a pattern (either a string or a
84C<qr//>) matched against the end of the $filename. The matching
85portion is removed and becomes the $suffix.
86
87 # On Unix returns ("baz", "/foo/bar", ".txt")
88 fileparse("/foo/bar/baz", qr/\.[^.]*/);
89
3291253b 90If type is non-Unix (see C<fileparse_set_fstype()>) then the pattern
91matching for suffix removal is performed case-insensitively, since
92those systems are not case-sensitive when opening existing files.
767010ca 93
94You are guaranteed that C<$directories . $filename . $suffix> will
95denote the same location as the original $path.
a0d0e21e 96
767010ca 97=cut
a0d0e21e 98
99
100sub fileparse {
101 my($fullname,@suffices) = @_;
3291253b 102
978ae421 103 unless (defined $fullname) {
104 require Carp;
6286f723 105 Carp::croak("fileparse(): need a valid pathname");
978ae421 106 }
3291253b 107
108 my $orig_type = '';
109 my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
110
12cbd720 111 my($taint) = substr($fullname,0,0); # Is $fullname tainted?
a0d0e21e 112
3291253b 113 if ($type eq "VMS" and $fullname =~ m{/} ) {
114 # We're doing Unix emulation
115 $orig_type = $type;
116 $type = 'Unix';
a0d0e21e 117 }
3291253b 118
119 my($dirpath, $basename);
120
121 if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) {
c7b9dd21 122 ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s);
123 $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/;
a0d0e21e 124 }
3291253b 125 elsif ($type eq "OS2") {
f1e20921 126 ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s);
127 $dirpath = './' unless $dirpath; # Can't be 0
128 $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#;
129 }
3291253b 130 elsif ($type eq "MacOS") {
c7b9dd21 131 ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s);
95e8664e 132 $dirpath = ':' unless $dirpath;
a0d0e21e 133 }
3291253b 134 elsif ($type eq "AmigaOS") {
c7b9dd21 135 ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s);
a3156fc3 136 $dirpath = './' unless $dirpath;
55497cff 137 }
3291253b 138 elsif ($type eq 'VMS' ) {
139 ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
140 $dirpath ||= ''; # should always be defined
141 }
142 else { # Default to Unix semantics.
c7b9dd21 143 ($dirpath,$basename) = ($fullname =~ m#^(.*/)?(.*)#s);
3291253b 144 if ($orig_type eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) {
491527d0 145 # dev:[000000] is top of VMS tree, similar to Unix '/'
e3830a4e 146 # so strip it off and treat the rest as "normal"
147 my $devspec = $1;
148 my $remainder = $3;
149 ($dirpath,$basename) = ($remainder =~ m#^(.*/)?(.*)#s);
5fa137f1 150 $dirpath ||= ''; # should always be defined
e3830a4e 151 $dirpath = $devspec.$dirpath;
491527d0 152 }
f0c6ccdf 153 $dirpath = './' unless $dirpath;
a0d0e21e 154 }
3291253b 155
a0d0e21e 156
3291253b 157 my($tail, $suffix);
a0d0e21e 158 if (@suffices) {
f06db76b 159 $tail = '';
a0d0e21e 160 foreach $suffix (@suffices) {
ee2ff9ea 161 my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$";
c7b9dd21 162 if ($basename =~ s/$pat//s) {
12cbd720 163 $taint .= substr($suffix,0,0);
44a8e56a 164 $tail = $1 . $tail;
a0d0e21e 165 }
166 }
167 }
168
767010ca 169 # Ensure taint is propgated from the path to its pieces.
12cbd720 170 $tail .= $taint if defined $tail; # avoid warning if $tail == undef
8d6d96c1 171 wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
d2ccd3cb 172 : ($basename .= $taint);
a0d0e21e 173}
174
175
767010ca 176
177=item C<basename>
178
179 my $filename = basename($path);
180 my $filename = basename($path, @suffixes);
181
e586b3eb 182This function is provided for compatibility with the Unix shell command
183C<basename(1)>. It does B<NOT> always return the file name portion of a
184path as you might expect. To be safe, if you want the file name portion of
185a path use C<fileparse()>.
186
187C<basename()> returns the last level of a filepath even if the last
188level is clearly directory. In effect, it is acting like C<pop()> for
189paths. This differs from C<fileparse()>'s behaviour.
190
191 # Both return "bar"
192 basename("/foo/bar");
193 basename("/foo/bar/");
194
195@suffixes work as in C<fileparse()> except all regex metacharacters are
196quoted.
767010ca 197
198 # These two function calls are equivalent.
199 my $filename = basename("/foo/bar/baz.txt", ".txt");
200 my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/);
201
767010ca 202=cut
203
a0d0e21e 204
205sub basename {
748a9306 206 my($name) = shift;
e586b3eb 207 _strip_trailing_sep($name);
748a9306 208 (fileparse($name, map("\Q$_\E",@_)))[0];
a0d0e21e 209}
7e2183d3 210
a0d0e21e 211
767010ca 212
213=item C<dirname>
214
215This function is provided for compatibility with the Unix shell
216command C<dirname(1)> and has inherited some of its quirks. In spite of
217its name it does B<NOT> always return the directory name as you might
218expect. To be safe, if you want the directory name of a path use
219C<fileparse()>.
220
3291253b 221Only on VMS (where there is no ambiguity between the file and directory
222portions of a path) and AmigaOS (possibly due to an implementation quirk in
223this module) does C<dirname()> work like C<fileparse($path)>, returning just the
224$directories.
767010ca 225
3291253b 226 # On VMS and AmigaOS
227 my $directories = dirname($path);
767010ca 228
229When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function
230which is subtly different from how C<fileparse()> works. It returns all but
231the last level of a file path even if the last level is clearly a directory.
232In effect, it is not returning the directory portion but simply the path one
233level up acting like C<chop()> for file paths.
234
235Also unlike C<fileparse()>, C<dirname()> does not include a trailing slash on
236its returned path.
237
238 # returns /foo/bar. fileparse() would return /foo/bar/
239 dirname("/foo/bar/baz");
240
241 # also returns /foo/bar despite the fact that baz is clearly a
242 # directory. fileparse() would return /foo/bar/baz/
243 dirname("/foo/bar/baz/");
244
245 # returns '.'. fileparse() would return 'foo/'
246 dirname("foo/");
247
248Under VMS, if there is no directory information in the $path, then the
249current default device and directory is used.
250
251=cut
252
a0d0e21e 253
254sub dirname {
3291253b 255 my $path = shift;
a0d0e21e 256
3291253b 257 my($type) = $Fileparse_fstype;
258
259 if( $type eq 'VMS' and $path =~ m{/} ) {
767010ca 260 # Parse as Unix
261 local($File::Basename::Fileparse_fstype) = '';
3291253b 262 return dirname($path);
767010ca 263 }
264
3291253b 265 my($basename, $dirname) = fileparse($path);
767010ca 266
3291253b 267 if ($type eq 'VMS') {
767010ca 268 $dirname ||= $ENV{DEFAULT};
a0d0e21e 269 }
3291253b 270 elsif ($type eq 'MacOS') {
084592ab 271 if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
e586b3eb 272 _strip_trailing_sep($dirname);
084592ab 273 ($basename,$dirname) = fileparse $dirname;
274 }
275 $dirname .= ":" unless $dirname =~ /:\z/;
276 }
3291253b 277 elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
e586b3eb 278 _strip_trailing_sep($dirname);
68dc0745 279 unless( length($basename) ) {
280 ($basename,$dirname) = fileparse $dirname;
e586b3eb 281 _strip_trailing_sep($dirname);
68dc0745 282 }
283 }
3291253b 284 elsif ($type eq 'AmigaOS') {
c7b9dd21 285 if ( $dirname =~ /:\z/) { return $dirname }
55497cff 286 chop $dirname;
c7b9dd21 287 $dirname =~ s#[^:/]+\z## unless length($basename);
55497cff 288 }
084592ab 289 else {
e586b3eb 290 _strip_trailing_sep($dirname);
42568e28 291 unless( length($basename) ) {
42568e28 292 ($basename,$dirname) = fileparse $dirname;
e586b3eb 293 _strip_trailing_sep($dirname);
42568e28 294 }
a0d0e21e 295 }
296
297 $dirname;
298}
299
767010ca 300
e586b3eb 301# Strip the trailing path separator.
302sub _strip_trailing_sep {
303 my $type = $Fileparse_fstype;
304
305 if ($type eq 'MacOS') {
306 $_[0] =~ s/([^:]):\z/$1/s;
307 }
308 elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
309 $_[0] =~ s/([^:])[\\\/]*\z/$1/;
310 }
311 else {
312 $_[0] =~ s{(.)/*\z}{$1}s;
313 }
314}
315
316
767010ca 317=item C<fileparse_set_fstype>
318
3291253b 319 my $type = fileparse_set_fstype();
320 my $previous_type = fileparse_set_fstype($type);
767010ca 321
322Normally File::Basename will assume a file path type native to your current
323operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...).
324With this function you can override that assumption.
325
3291253b 326Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS",
327"MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility),
328"Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is
329given "Unix" will be assumed.
767010ca 330
331If you've selected VMS syntax, and the file specification you pass to
332one of these routines contains a "/", they assume you are using Unix
333emulation and apply the Unix syntax rules instead, for that function
334call only.
335
336=back
337
338=cut
339
340
3291253b 341BEGIN {
342
343my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc);
344my @Types = (@Ignore_Case, qw(Unix));
345
767010ca 346sub fileparse_set_fstype {
3291253b 347 my $old = $Fileparse_fstype;
348
349 if (@_) {
350 my $new_type = shift;
351
352 $Fileparse_fstype = 'Unix'; # default
353 foreach my $type (@Types) {
354 $Fileparse_fstype = $type if $new_type =~ /^$type/i;
355 }
356
357 $Fileparse_igncase =
358 (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0;
359 }
360
361 return $old;
362}
363
767010ca 364}
365
a0d0e21e 366
3671;