-package File::Basename;
-
=head1 NAME
-fileparse - split a pathname into pieces
-
-basename - extract just the filename from a path
-
-dirname - extract just the directory from a path
+File::Basename - Parse file paths into directory, filename and suffix.
=head1 SYNOPSIS
use File::Basename;
- ($name,$path,$suffix) = fileparse($fullname,@suffixlist)
- fileparse_set_fstype($os_string);
+ ($name,$path,$suffix) = fileparse($fullname,@suffixlist);
+ $name = fileparse($fullname,@suffixlist);
+
$basename = basename($fullname,@suffixlist);
- $dirname = dirname($fullname);
+ $dirname = dirname($fullname);
- ($name,$path,$suffix) = fileparse("lib/File/Basename.pm","\.pm");
- fileparse_set_fstype("VMS");
- $basename = basename("lib/File/Basename.pm",".pm");
- $dirname = dirname("lib/File/Basename.pm");
=head1 DESCRIPTION
-These routines allow you to parse file specifications into useful
-pieces using the syntax of different operating systems.
-
-=over 4
-
-=item fileparse_set_fstype
-
-You select the syntax via the routine fileparse_set_fstype().
-
-If the argument passed to it contains one of the substrings
-"VMS", "MSDOS", "MacOS", "AmigaOS" or "MSWin32", the file specification
-syntax of that operating system is used in future calls to
-fileparse(), basename(), and dirname(). If it contains none of
-these substrings, UNIX syntax is used. This pattern matching is
-case-insensitive. If you've selected VMS syntax, and the file
-specification you pass to one of these routines contains a "/",
-they assume you are using UNIX emulation and apply the UNIX syntax
-rules instead, for that function call only.
-
-If the argument passed to it contains one of the substrings "VMS",
-"MSDOS", "MacOS", "AmigaOS", "os2", "MSWin32" or "RISCOS", then the pattern
-matching for suffix removal is performed without regard for case,
-since those systems are not case-sensitive when opening existing files
-(though some of them preserve case on file creation).
-
-If you haven't called fileparse_set_fstype(), the syntax is chosen
-by examining the builtin variable C<$^O> according to these rules.
-
-=item fileparse
-
-The fileparse() routine divides a file specification into three
-parts: a leading B<path>, a file B<name>, and a B<suffix>. The
-B<path> contains everything up to and including the last directory
-separator in the input file specification. The remainder of the input
-file specification is then divided into B<name> and B<suffix> based on
-the optional patterns you specify in C<@suffixlist>. Each element of
-this list is interpreted as a regular expression, and is matched
-against the end of B<name>. If this succeeds, the matching portion of
-B<name> is removed and prepended to B<suffix>. By proper use of
-C<@suffixlist>, you can remove file types or versions for examination.
-
-You are guaranteed that if you concatenate B<path>, B<name>, and
-B<suffix> together in that order, the result will denote the same
-file as the input file specification.
-
-=back
-
-=head1 EXAMPLES
-
-Using UNIX file syntax:
-
- ($base,$path,$type) = fileparse('/virgil/aeneid/draft.book7',
- '\.book\d+');
-
-would yield
-
- $base eq 'draft'
- $path eq '/virgil/aeneid/',
- $type eq '.book7'
-
-Similarly, using VMS syntax:
+These routines allow you to parse file paths into their directory, filename
+and suffix.
- ($name,$dir,$type) = fileparse('Doc_Root:[Help]Rhetoric.Rnh',
- '\..*');
+B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and
+quirks, of the shell and C functions of the same name. See each
+function's documentation for details. If your concern is just parsing
+paths it is safer to use L<File::Spec>'s C<splitpath()> and
+C<splitdir()> methods.
-would yield
+It is guaranteed that
- $name eq 'Rhetoric'
- $dir eq 'Doc_Root:[Help]'
- $type eq '.Rnh'
+ # Where $path_separator is / for Unix, \ for Windows, etc...
+ dirname($path) . $path_separator . basename($path);
-=over
+is equivalent to the original path for all systems but VMS.
-=item C<basename>
-
-The basename() routine returns the first element of the list produced
-by calling fileparse() with the same arguments, except that it always
-quotes metacharacters in the given suffixes. It is provided for
-programmer compatibility with the UNIX shell command basename(1).
-
-=item C<dirname>
-
-The dirname() routine returns the directory portion of the input file
-specification. When using VMS or MacOS syntax, this is identical to the
-second element of the list produced by calling fileparse() with the same
-input file specification. (Under VMS, if there is no directory information
-in the input file specification, then the current default device and
-directory are returned.) When using UNIX or MSDOS syntax, the return
-value conforms to the behavior of the UNIX shell command dirname(1). This
-is usually the same as the behavior of fileparse(), but differs in some
-cases. For example, for the input file specification F<lib/>, fileparse()
-considers the directory name to be F<lib/>, while dirname() considers the
-directory name to be F<.>).
-
-=back
=cut
-## use strict;
+package File::Basename;
+
# A bit of juggling to insure that C<use re 'taint';> always works, since
# File::Basename is used during the Perl build, when the re extension may
# not be available.
BEGIN {
unless (eval { require re; })
- { eval ' sub re::import { $^H |= 0x00100000; } ' }
+ { eval ' sub re::import { $^H |= 0x00100000; } ' } # HINT_RE_TAINT
import re 'taint';
}
-
-use 5.005_64;
+use strict;
+use 5.006;
+use warnings;
our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase);
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
-$VERSION = "2.6";
+$VERSION = "2.74";
+fileparse_set_fstype($^O);
-# fileparse_set_fstype() - specify OS-based rules used in future
-# calls to routines in this package
-#
-# Currently recognized values: VMS, MSDOS, MacOS, AmigaOS, os2, RISCOS
-# Any other name uses Unix-style rules and is case-sensitive
-sub fileparse_set_fstype {
- my @old = ($Fileparse_fstype, $Fileparse_igncase);
- if (@_) {
- $Fileparse_fstype = $_[0];
- $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i);
- }
- wantarray ? @old : $old[0];
-}
+=over 4
+
+=item C<fileparse>
+
+ my($filename, $directories, $suffix) = fileparse($path);
+ my($filename, $directories, $suffix) = fileparse($path, @suffixes);
+ my $filename = fileparse($path, @suffixes);
-# fileparse() - parse file specification
-#
-# Version 2.4 27-Sep-1996 Charles Bailey bailey@genetics.upenn.edu
+The C<fileparse()> routine divides a file path into its $directories, $filename
+and (optionally) the filename $suffix.
+
+$directories contains everything up to and including the last
+directory separator in the $path including the volume (if applicable).
+The remainder of the $path is the $filename.
+
+ # On Unix returns ("baz", "/foo/bar/", "")
+ fileparse("/foo/bar/baz");
+
+ # On Windows returns ("baz", "C:\foo\bar\", "")
+ fileparse("C:\foo\bar\baz");
+
+ # On Unix returns ("", "/foo/bar/baz/", "")
+ fileparse("/foo/bar/baz/");
+
+If @suffixes are given each element is a pattern (either a string or a
+C<qr//>) matched against the end of the $filename. The matching
+portion is removed and becomes the $suffix.
+
+ # On Unix returns ("baz", "/foo/bar", ".txt")
+ fileparse("/foo/bar/baz", qr/\.[^.]*/);
+
+If type is non-Unix (see C<fileparse_set_fstype()>) then the pattern
+matching for suffix removal is performed case-insensitively, since
+those systems are not case-sensitive when opening existing files.
+
+You are guaranteed that C<$directories . $filename . $suffix> will
+denote the same location as the original $path.
+
+=cut
sub fileparse {
my($fullname,@suffices) = @_;
- my($fstype,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
- my($dirpath,$tail,$suffix,$basename);
+
+ unless (defined $fullname) {
+ require Carp;
+ Carp::croak("fileparse(): need a valid pathname");
+ }
+
+ my $orig_type = '';
+ my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
+
my($taint) = substr($fullname,0,0); # Is $fullname tainted?
- if ($fstype =~ /^VMS/i) {
- if ($fullname =~ m#/#) { $fstype = '' } # We're doing Unix emulation
- else {
- ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/);
- $dirpath ||= ''; # should always be defined
- }
+ if ($type eq "VMS" and $fullname =~ m{/} ) {
+ # We're doing Unix emulation
+ $orig_type = $type;
+ $type = 'Unix';
+ }
+
+ my($dirpath, $basename);
+
+ if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) {
+ ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s);
+ $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/;
}
- if ($fstype =~ /^MS(DOS|Win32)/i) {
- ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/);
- $dirpath .= '.\\' unless $dirpath =~ /[\\\/]$/;
+ elsif ($type eq "OS2") {
+ ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s);
+ $dirpath = './' unless $dirpath; # Can't be 0
+ $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#;
}
- elsif ($fstype =~ /^MacOS/i) {
- ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/);
+ elsif ($type eq "MacOS") {
+ ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s);
+ $dirpath = ':' unless $dirpath;
}
- elsif ($fstype =~ /^AmigaOS/i) {
- ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/);
+ elsif ($type eq "AmigaOS") {
+ ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s);
$dirpath = './' unless $dirpath;
}
- elsif ($fstype !~ /^VMS/i) { # default to Unix
- ($dirpath,$basename) = ($fullname =~ m#^(.*/)?(.*)#);
- if ($^O eq 'VMS' and $fullname =~ m:/[^/]+/000000/?:) {
+ elsif ($type eq 'VMS' ) {
+ ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
+ $dirpath ||= ''; # should always be defined
+ }
+ else { # Default to Unix semantics.
+ ($dirpath,$basename) = ($fullname =~ m#^(.*/)?(.*)#s);
+ if ($orig_type eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) {
# dev:[000000] is top of VMS tree, similar to Unix '/'
- ($basename,$dirpath) = ('',$fullname);
+ # so strip it off and treat the rest as "normal"
+ my $devspec = $1;
+ my $remainder = $3;
+ ($dirpath,$basename) = ($remainder =~ m#^(.*/)?(.*)#s);
+ $dirpath ||= ''; # should always be defined
+ $dirpath = $devspec.$dirpath;
}
$dirpath = './' unless $dirpath;
}
+
+ my $tail = '';
+ my $suffix = '';
if (@suffices) {
- $tail = '';
foreach $suffix (@suffices) {
my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$";
- if ($basename =~ s/$pat//) {
+ if ($basename =~ s/$pat//s) {
$taint .= substr($suffix,0,0);
$tail = $1 . $tail;
}
}
}
- $tail .= $taint if defined $tail; # avoid warning if $tail == undef
- wantarray ? ($basename . $taint, $dirpath . $taint, $tail)
- : $basename . $taint;
+ # Ensure taint is propgated from the path to its pieces.
+ $tail .= $taint;
+ wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
+ : ($basename .= $taint);
}
-# basename() - returns first element of list returned by fileparse()
+
+=item C<basename>
+
+ my $filename = basename($path);
+ my $filename = basename($path, @suffixes);
+
+This function is provided for compatibility with the Unix shell command
+C<basename(1)>. It does B<NOT> always return the file name portion of a
+path as you might expect. To be safe, if you want the file name portion of
+a path use C<fileparse()>.
+
+C<basename()> returns the last level of a filepath even if the last
+level is clearly directory. In effect, it is acting like C<pop()> for
+paths. This differs from C<fileparse()>'s behaviour.
+
+ # Both return "bar"
+ basename("/foo/bar");
+ basename("/foo/bar/");
+
+@suffixes work as in C<fileparse()> except all regex metacharacters are
+quoted.
+
+ # These two function calls are equivalent.
+ my $filename = basename("/foo/bar/baz.txt", ".txt");
+ my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/);
+
+Also note that in order to be compatible with the shell command,
+C<basename()> does not strip off a suffix if it is identical to the
+remaining characters in the filename.
+
+=cut
+
sub basename {
- my($name) = shift;
- (fileparse($name, map("\Q$_\E",@_)))[0];
+ my($path) = shift;
+
+ # From BSD basename(1)
+ # The basename utility deletes any prefix ending with the last slash `/'
+ # character present in string (after first stripping trailing slashes)
+ _strip_trailing_sep($path);
+
+ my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) );
+
+ # From BSD basename(1)
+ # The suffix is not stripped if it is identical to the remaining
+ # characters in string.
+ if( length $suffix and !length $basename ) {
+ $basename = $suffix;
+ }
+
+ # Ensure that basename '/' == '/'
+ if( !length $basename ) {
+ $basename = $dirname;
+ }
+
+ return $basename;
}
-# dirname() - returns device and directory portion of file specification
-# Behavior matches that of Unix dirname(1) exactly for Unix and MSDOS
-# filespecs except for names ending with a separator, e.g., "/xx/yy/".
-# This differs from the second element of the list returned
-# by fileparse() in that the trailing '/' (Unix) or '\' (MSDOS) (and
-# the last directory name if the filespec ends in a '/' or '\'), is lost.
+
+=item C<dirname>
+
+This function is provided for compatibility with the Unix shell
+command C<dirname(1)> and has inherited some of its quirks. In spite of
+its name it does B<NOT> always return the directory name as you might
+expect. To be safe, if you want the directory name of a path use
+C<fileparse()>.
+
+Only on VMS (where there is no ambiguity between the file and directory
+portions of a path) and AmigaOS (possibly due to an implementation quirk in
+this module) does C<dirname()> work like C<fileparse($path)>, returning just the
+$directories.
+
+ # On VMS and AmigaOS
+ my $directories = dirname($path);
+
+When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function
+which is subtly different from how C<fileparse()> works. It returns all but
+the last level of a file path even if the last level is clearly a directory.
+In effect, it is not returning the directory portion but simply the path one
+level up acting like C<chop()> for file paths.
+
+Also unlike C<fileparse()>, C<dirname()> does not include a trailing slash on
+its returned path.
+
+ # returns /foo/bar. fileparse() would return /foo/bar/
+ dirname("/foo/bar/baz");
+
+ # also returns /foo/bar despite the fact that baz is clearly a
+ # directory. fileparse() would return /foo/bar/baz/
+ dirname("/foo/bar/baz/");
+
+ # returns '.'. fileparse() would return 'foo/'
+ dirname("foo/");
+
+Under VMS, if there is no directory information in the $path, then the
+current default device and directory is used.
+
+=cut
+
sub dirname {
- my($basename,$dirname) = fileparse($_[0]);
- my($fstype) = $Fileparse_fstype;
+ my $path = shift;
- if ($fstype =~ /VMS/i) {
- if ($_[0] =~ m#/#) { $fstype = '' }
- else { return $dirname || $ENV{DEFAULT} }
+ my($type) = $Fileparse_fstype;
+
+ if( $type eq 'VMS' and $path =~ m{/} ) {
+ # Parse as Unix
+ local($File::Basename::Fileparse_fstype) = '';
+ return dirname($path);
}
- if ($fstype =~ /MacOS/i) { return $dirname }
- elsif ($fstype =~ /MSDOS/i) {
- $dirname =~ s/([^:])[\\\/]*$/$1/;
- unless( length($basename) ) {
+
+ my($basename, $dirname) = fileparse($path);
+
+ if ($type eq 'VMS') {
+ $dirname ||= $ENV{DEFAULT};
+ }
+ elsif ($type eq 'MacOS') {
+ if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
+ _strip_trailing_sep($dirname);
($basename,$dirname) = fileparse $dirname;
- $dirname =~ s/([^:])[\\\/]*$/$1/;
}
+ $dirname .= ":" unless $dirname =~ /:\z/;
}
- elsif ($fstype =~ /MSWin32/i) {
- $dirname =~ s/([^:])[\\\/]*$/$1/;
+ elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
+ _strip_trailing_sep($dirname);
unless( length($basename) ) {
($basename,$dirname) = fileparse $dirname;
- $dirname =~ s/([^:])[\\\/]*$/$1/;
+ _strip_trailing_sep($dirname);
}
}
- elsif ($fstype =~ /AmigaOS/i) {
- if ( $dirname =~ /:$/) { return $dirname }
+ elsif ($type eq 'AmigaOS') {
+ if ( $dirname =~ /:\z/) { return $dirname }
chop $dirname;
- $dirname =~ s#[^:/]+$## unless length($basename);
+ $dirname =~ s#[^:/]+\z## unless length($basename);
}
- else {
- $dirname =~ s:(.)/*$:$1:;
+ else {
+ _strip_trailing_sep($dirname);
unless( length($basename) ) {
- local($File::Basename::Fileparse_fstype) = $fstype;
($basename,$dirname) = fileparse $dirname;
- $dirname =~ s:(.)/*$:$1:;
+ _strip_trailing_sep($dirname);
}
}
$dirname;
}
-fileparse_set_fstype $^O;
+
+# Strip the trailing path separator.
+sub _strip_trailing_sep {
+ my $type = $Fileparse_fstype;
+
+ if ($type eq 'MacOS') {
+ $_[0] =~ s/([^:]):\z/$1/s;
+ }
+ elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) {
+ $_[0] =~ s/([^:])[\\\/]*\z/$1/;
+ }
+ else {
+ $_[0] =~ s{(.)/*\z}{$1}s;
+ }
+}
+
+
+=item C<fileparse_set_fstype>
+
+ my $type = fileparse_set_fstype();
+ my $previous_type = fileparse_set_fstype($type);
+
+Normally File::Basename will assume a file path type native to your current
+operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...).
+With this function you can override that assumption.
+
+Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS",
+"MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility),
+"Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is
+given "Unix" will be assumed.
+
+If you've selected VMS syntax, and the file specification you pass to
+one of these routines contains a "/", they assume you are using Unix
+emulation and apply the Unix syntax rules instead, for that function
+call only.
+
+=back
+
+=cut
+
+
+BEGIN {
+
+my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc);
+my @Types = (@Ignore_Case, qw(Unix));
+
+sub fileparse_set_fstype {
+ my $old = $Fileparse_fstype;
+
+ if (@_) {
+ my $new_type = shift;
+
+ $Fileparse_fstype = 'Unix'; # default
+ foreach my $type (@Types) {
+ $Fileparse_fstype = $type if $new_type =~ /^$type/i;
+ }
+
+ $Fileparse_igncase =
+ (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0;
+ }
+
+ return $old;
+}
+
+}
+
1;
+
+
+=head1 SEE ALSO
+
+L<dirname(1)>, L<basename(1)>, L<File::Spec>