These routines allow you to parse file paths into their directory, filename
and suffix.
-B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and quirks, of
-the shell and C functions of the same name. See each function's documention
-for details.
+B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and
+quirks, of the shell and C functions of the same name. See each
+function's documentation for details. If your concern is just parsing
+paths it is safer to use L<File::Spec>'s C<splitpath()> and
+C<splitdir()> methods.
+
+It is guaranteed that
+
+ # Where $path_separator is / for Unix, \ for Windows, etc...
+ dirname($path) . $path_separator . basename($path);
+
+is equivalent to the original path for all systems but VMS.
+
=cut
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
-$VERSION = "2.73";
+$VERSION = "2.74";
fileparse_set_fstype($^O);
# On Unix returns ("baz", "/foo/bar", ".txt")
fileparse("/foo/bar/baz", qr/\.[^.]*/);
-If type is one of "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "MSWin32"
-or "RISCOS" (see C<fileparse_set_fstype()>) then the pattern matching
-for suffix removal is performed case-insensitively, since those
-systems are not case-sensitive when opening existing files.
+If type is non-Unix (see C<fileparse_set_fstype()>) then the pattern
+matching for suffix removal is performed case-insensitively, since
+those systems are not case-sensitive when opening existing files.
You are guaranteed that C<$directories . $filename . $suffix> will
denote the same location as the original $path.
sub fileparse {
my($fullname,@suffices) = @_;
+
unless (defined $fullname) {
require Carp;
Carp::croak("fileparse(): need a valid pathname");
}
- my($fstype,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
- my($dirpath,$tail,$suffix,$basename);
+
+ my $orig_type = '';
+ my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
+
my($taint) = substr($fullname,0,0); # Is $fullname tainted?
- if ($fstype =~ /^VMS/i) {
- if ($fullname =~ m#/#) { $fstype = '' } # We're doing Unix emulation
- else {
- ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
- $dirpath ||= ''; # should always be defined
- }
+ if ($type eq "VMS" and $fullname =~ m{/} ) {
+ # We're doing Unix emulation
+ $orig_type = $type;
+ $type = 'Unix';
}
- if ($fstype =~ /^MS(DOS|Win32)|epoc/i) {
+
+ my($dirpath, $basename);
+
+ if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) {
($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s);
$dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/;
}
- elsif ($fstype =~ /^os2/i) {
+ elsif ($type eq "OS2") {
($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s);
$dirpath = './' unless $dirpath; # Can't be 0
$dirpath .= '/' unless $dirpath =~ m#[\\/]\z#;
}
- elsif ($fstype =~ /^MacOS/si) {
+ elsif ($type eq "MacOS") {
($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s);
$dirpath = ':' unless $dirpath;
}
- elsif ($fstype =~ /^AmigaOS/i) {
+ elsif ($type eq "AmigaOS") {
($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s);
$dirpath = './' unless $dirpath;
}
- elsif ($fstype !~ /^VMS/i) { # default to Unix
+ elsif ($type eq 'VMS' ) {
+ ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
+ $dirpath ||= ''; # should always be defined
+ }
+ else { # Default to Unix semantics.
($dirpath,$basename) = ($fullname =~ m#^(.*/)?(.*)#s);
- if ($^O eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) {
+ if ($orig_type eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) {
# dev:[000000] is top of VMS tree, similar to Unix '/'
# so strip it off and treat the rest as "normal"
my $devspec = $1;
}
$dirpath = './' unless $dirpath;
}
+
+ my $tail = '';
+ my $suffix = '';
if (@suffices) {
- $tail = '';
foreach $suffix (@suffices) {
my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$";
if ($basename =~ s/$pat//s) {
}
# Ensure taint is propgated from the path to its pieces.
- $tail .= $taint if defined $tail; # avoid warning if $tail == undef
+ $tail .= $taint;
wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
: ($basename .= $taint);
}
my $filename = basename($path);
my $filename = basename($path, @suffixes);
-C<basename()> works just like C<fileparse()> in scalar context - you only get
-the $filename - except that it always quotes metacharacters in the @suffixes.
+This function is provided for compatibility with the Unix shell command
+C<basename(1)>. It does B<NOT> always return the file name portion of a
+path as you might expect. To be safe, if you want the file name portion of
+a path use C<fileparse()>.
+
+C<basename()> returns the last level of a filepath even if the last
+level is clearly directory. In effect, it is acting like C<pop()> for
+paths. This differs from C<fileparse()>'s behaviour.
+
+ # Both return "bar"
+ basename("/foo/bar");
+ basename("/foo/bar/");
+
+@suffixes work as in C<fileparse()> except all regex metacharacters are
+quoted.
# These two function calls are equivalent.
my $filename = basename("/foo/bar/baz.txt", ".txt");
my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/);
-This function is provided for compatibility with the Unix shell command
-C<basename(1)>.
+Also note that in order to be compatible with the shell command,
+C<basename()> does not strip off a suffix if it is identical to the
+remaining characters in the filename.
=cut
sub basename {
- my($name) = shift;
- (fileparse($name, map("\Q$_\E",@_)))[0];
+ my($path) = shift;
+
+ # From BSD basename(1)
+ # The basename utility deletes any prefix ending with the last slash `/'
+ # character present in string (after first stripping trailing slashes)
+ _strip_trailing_sep($path);
+
+ my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) );
+
+ # From BSD basename(1)
+ # The suffix is not stripped if it is identical to the remaining
+ # characters in string.
+ if( length $suffix and !length $basename ) {
+ $basename = $suffix;
+ }
+
+ # Ensure that basename '/' == '/'
+ if( !length $basename ) {
+ $basename = $dirname;
+ }
+
+ return $basename;
}
expect. To be safe, if you want the directory name of a path use
C<fileparse()>.
- # On all but Unix and MSDOS
- my $directories = dirname($path);
-
-On all system types but Unix and MSDOS this works just like
-C<fileparse($path)> but returning just the $directories.
+Only on VMS (where there is no ambiguity between the file and directory
+portions of a path) and AmigaOS (possibly due to an implementation quirk in
+this module) does C<dirname()> work like C<fileparse($path)>, returning just the
+$directories.
- # On Unix and MSDOS
- my $path_one_level_up = dirname($path);
+ # On VMS and AmigaOS
+ my $directories = dirname($path);
When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function
which is subtly different from how C<fileparse()> works. It returns all but
sub dirname {
- my($fstype) = $Fileparse_fstype;
+ my $path = shift;
- if( $fstype =~ /VMS/i and $_[0] =~ m{/} ) {
+ my($type) = $Fileparse_fstype;
+
+ if( $type eq 'VMS' and $path =~ m{/} ) {
# Parse as Unix
local($File::Basename::Fileparse_fstype) = '';
- return dirname(@_);
+ return dirname($path);
}
- my($basename,$dirname) = fileparse($_[0]);
+ my($basename, $dirname) = fileparse($path);
- if ($fstype =~ /VMS/i) {
+ if ($type eq 'VMS') {
$dirname ||= $ENV{DEFAULT};
}
- elsif ($fstype =~ /MacOS/i) {
+ elsif ($type eq 'MacOS') {
if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
- $dirname =~ s/([^:]):\z/$1/s;
+ _strip_trailing_sep($dirname);
($basename,$dirname) = fileparse $dirname;
}
$dirname .= ":" unless $dirname =~ /:\z/;
}
- elsif ($fstype =~ /MS(DOS|Win32)|os2/i) {
- $dirname =~ s/([^:])[\\\/]*\z/$1/;
+ elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
+ _strip_trailing_sep($dirname);
unless( length($basename) ) {
($basename,$dirname) = fileparse $dirname;
- $dirname =~ s/([^:])[\\\/]*\z/$1/;
+ _strip_trailing_sep($dirname);
}
}
- elsif ($fstype =~ /AmigaOS/i) {
+ elsif ($type eq 'AmigaOS') {
if ( $dirname =~ /:\z/) { return $dirname }
chop $dirname;
$dirname =~ s#[^:/]+\z## unless length($basename);
}
else {
- $dirname =~ s{(.)/*\z}{$1}s;
+ _strip_trailing_sep($dirname);
unless( length($basename) ) {
($basename,$dirname) = fileparse $dirname;
- $dirname =~ s{(.)/*\z}{$1}s;
+ _strip_trailing_sep($dirname);
}
}
}
+# Strip the trailing path separator.
+sub _strip_trailing_sep {
+ my $type = $Fileparse_fstype;
+
+ if ($type eq 'MacOS') {
+ $_[0] =~ s/([^:]):\z/$1/s;
+ }
+ elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
+ $_[0] =~ s/([^:])[\\\/]*\z/$1/;
+ }
+ else {
+ $_[0] =~ s{(.)/*\z}{$1}s;
+ }
+}
+
+
=item C<fileparse_set_fstype>
- my $previous_fstype = fileparse_set_fstype($type);
+ my $type = fileparse_set_fstype();
+ my $previous_type = fileparse_set_fstype($type);
Normally File::Basename will assume a file path type native to your current
operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...).
With this function you can override that assumption.
-Valid $types are "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "RISCOS",
-"MSWin32" and "Unix" (case-insensitive). If an unrecognized $type is
-given Unix semantics will be assumed.
+Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS",
+"MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility),
+"Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is
+given "Unix" will be assumed.
If you've selected VMS syntax, and the file specification you pass to
one of these routines contains a "/", they assume you are using Unix
=cut
+BEGIN {
+
+my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc);
+my @Types = (@Ignore_Case, qw(Unix));
+
sub fileparse_set_fstype {
- my @old = ($Fileparse_fstype, $Fileparse_igncase);
- if (@_) {
- $Fileparse_fstype = $_[0];
- $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i);
- }
- wantarray ? @old : $old[0];
+ my $old = $Fileparse_fstype;
+
+ if (@_) {
+ my $new_type = shift;
+
+ $Fileparse_fstype = 'Unix'; # default
+ foreach my $type (@Types) {
+ $Fileparse_fstype = $type if $new_type =~ /^$type/i;
+ }
+
+ $Fileparse_igncase =
+ (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0;
+ }
+
+ return $old;
+}
+
}
1;
+
+
+=head1 SEE ALSO
+
+L<dirname(1)>, L<basename(1)>, L<File::Spec>