From: Michael G. Schwern Date: Tue, 5 Jul 2005 20:11:27 +0000 (-0700) Subject: File::Basename doc overhaul X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=767010ca49e6a0dff07d97842a8341decbed33d9;p=p5sagit%2Fp5-mst-13.2.git File::Basename doc overhaul Message-ID: <20050706031127.GL9227@windhund.schwern.org> (with some minor changes) p4raw-id: //depot/perl@25086 --- diff --git a/lib/File/Basename.pm b/lib/File/Basename.pm index 887c7ba..e6b16e7 100644 --- a/lib/File/Basename.pm +++ b/lib/File/Basename.pm @@ -1,12 +1,6 @@ -package File::Basename; - =head1 NAME -fileparse - split a pathname into pieces - -basename - extract just the filename from a path - -dirname - extract just the directory from a path +File::Basename - Parse file paths into directory, filename and suffix. =head1 SYNOPSIS @@ -14,120 +8,25 @@ dirname - extract just the directory from a path ($name,$path,$suffix) = fileparse($fullname,@suffixlist); $name = fileparse($fullname,@suffixlist); - fileparse_set_fstype($os_string); + $basename = basename($fullname,@suffixlist); - $dirname = dirname($fullname); + $dirname = dirname($fullname); - ($name,$path,$suffix) = fileparse("lib/File/Basename.pm",qr{\.pm}); - fileparse_set_fstype("VMS"); - $basename = basename("lib/File/Basename.pm",".pm"); - $dirname = dirname("lib/File/Basename.pm"); =head1 DESCRIPTION -These routines allow you to parse file specifications into useful -pieces using the syntax of different operating systems. - -=over 4 - -=item fileparse_set_fstype - -You select the syntax via the routine fileparse_set_fstype(). - -If the argument passed to it contains one of the substrings -"VMS", "MSDOS", "MacOS", "AmigaOS" or "MSWin32", the file specification -syntax of that operating system is used in future calls to -fileparse(), basename(), and dirname(). If it contains none of -these substrings, Unix syntax is used. This pattern matching is -case-insensitive. If you've selected VMS syntax, and the file -specification you pass to one of these routines contains a "/", -they assume you are using Unix emulation and apply the Unix syntax -rules instead, for that function call only. - -If the argument passed to it contains one of the substrings "VMS", -"MSDOS", "MacOS", "AmigaOS", "os2", "MSWin32" or "RISCOS", then the pattern -matching for suffix removal is performed without regard for case, -since those systems are not case-sensitive when opening existing files -(though some of them preserve case on file creation). - -If you haven't called fileparse_set_fstype(), the syntax is chosen -by examining the builtin variable C<$^O> according to these rules. - -=item fileparse - -The fileparse() routine divides a file specification into three -parts: a leading B, a file B, and a B. The -B contains everything up to and including the last directory -separator in the input file specification. The remainder of the input -file specification is then divided into B and B based on -the optional patterns you specify in C<@suffixlist>. Each element of -this list can be a qr-quoted pattern (or a string which is interpreted -as a regular expression), and is matched -against the end of B. If this succeeds, the matching portion of -B is removed and prepended to B. By proper use of -C<@suffixlist>, you can remove file types or versions for examination. - -You are guaranteed that if you concatenate B, B, and -B together in that order, the result will denote the same -file as the input file specification. - -In scalar context, fileparse() returns only the B part of the filename. - -=back - -=head1 EXAMPLES - -Using Unix file syntax: - - ($base,$path,$type) = fileparse('/virgil/aeneid/draft.book7', - qr{\.book\d+}); - -would yield - - $base eq 'draft' - $path eq '/virgil/aeneid/', - $type eq '.book7' - -Similarly, using VMS syntax: - - ($name,$dir,$type) = fileparse('Doc_Root:[Help]Rhetoric.Rnh', - qr{\..*}); - -would yield - - $name eq 'Rhetoric' - $dir eq 'Doc_Root:[Help]' - $type eq '.Rnh' - -=over - -=item C - -The basename() routine returns the first element of the list produced -by calling fileparse() with the same arguments, except that it always -quotes metacharacters in the given suffixes. It is provided for -programmer compatibility with the Unix shell command basename(1). - -=item C - -The dirname() routine returns the directory portion of the input file -specification. When using VMS or MacOS syntax, this is identical to the -second element of the list produced by calling fileparse() with the same -input file specification. (Under VMS, if there is no directory information -in the input file specification, then the current default device and -directory are returned.) When using Unix or MSDOS syntax, the return -value conforms to the behavior of the Unix shell command dirname(1). This -is usually the same as the behavior of fileparse(), but differs in some -cases. For example, for the input file specification F, fileparse() -considers the directory name to be F, while dirname() considers the -directory name to be F<.>). +These routines allow you to parse file paths into their directory, filename +and suffix. -=back +B: C and C emulate the behaviours, and quirks, of +the shell and C functions of the same name. See each function's documention +for details. =cut -## use strict; +package File::Basename; + # A bit of juggling to insure that C always works, since # File::Basename is used during the Perl build, when the re extension may # not be available. @@ -138,7 +37,7 @@ BEGIN { } - +use strict; use 5.006; use warnings; our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase); @@ -147,25 +46,49 @@ require Exporter; @EXPORT = qw(fileparse fileparse_set_fstype basename dirname); $VERSION = "2.73"; +fileparse_set_fstype($^O); -# fileparse_set_fstype() - specify OS-based rules used in future -# calls to routines in this package -# -# Currently recognized values: VMS, MSDOS, MacOS, AmigaOS, os2, RISCOS -# Any other name uses Unix-style rules and is case-sensitive -sub fileparse_set_fstype { - my @old = ($Fileparse_fstype, $Fileparse_igncase); - if (@_) { - $Fileparse_fstype = $_[0]; - $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i); - } - wantarray ? @old : $old[0]; -} +=over 4 + +=item C + + my($filename, $directories, $suffix) = fileparse($path); + my($filename, $directories, $suffix) = fileparse($path, @suffixes); + my $filename = fileparse($path, @suffixes); + +The C routine divides a file path into its $directories, $filename +and (optionally) the filename $suffix. + +$directories contains everything up to and including the last +directory separator in the $path including the volume (if applicable). +The remainder of the $path is the $filename. + + # On Unix returns ("baz", "/foo/bar/", "") + fileparse("/foo/bar/baz"); + + # On Windows returns ("baz", "C:\foo\bar\", "") + fileparse("C:\foo\bar\baz"); + + # On Unix returns ("", "/foo/bar/baz/", "") + fileparse("/foo/bar/baz/"); + +If @suffixes are given each element is a pattern (either a string or a +C) matched against the end of the $filename. The matching +portion is removed and becomes the $suffix. + + # On Unix returns ("baz", "/foo/bar", ".txt") + fileparse("/foo/bar/baz", qr/\.[^.]*/); + +If type is one of "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "MSWin32" +or "RISCOS" (see C) then the pattern matching +for suffix removal is performed case-insensitively, since those +systems are not case-sensitive when opening existing files. + +You are guaranteed that C<$directories . $filename . $suffix> will +denote the same location as the original $path. -# fileparse() - parse file specification -# -# Version 2.4 27-Sep-1996 Charles Bailey bailey@genetics.upenn.edu +=cut sub fileparse { @@ -227,13 +150,31 @@ sub fileparse { } } + # Ensure taint is propgated from the path to its pieces. $tail .= $taint if defined $tail; # avoid warning if $tail == undef wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail) : ($basename .= $taint); } -# basename() - returns first element of list returned by fileparse() + +=item C + + my $filename = basename($path); + my $filename = basename($path, @suffixes); + +C works just like C in scalar context - you only get +the $filename - except that it always quotes metacharacters in the @suffixes. + + # These two function calls are equivalent. + my $filename = basename("/foo/bar/baz.txt", ".txt"); + my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/); + +This function is provided for compatibility with the Unix shell command +C. + +=cut + sub basename { my($name) = shift; @@ -241,22 +182,64 @@ sub basename { } -# dirname() - returns device and directory portion of file specification -# Behavior matches that of Unix dirname(1) exactly for Unix and MSDOS -# filespecs except for names ending with a separator, e.g., "/xx/yy/". -# This differs from the second element of the list returned -# by fileparse() in that the trailing '/' (Unix) or '\' (MSDOS) (and -# the last directory name if the filespec ends in a '/' or '\'), is lost. + +=item C + +This function is provided for compatibility with the Unix shell +command C and has inherited some of its quirks. In spite of +its name it does B always return the directory name as you might +expect. To be safe, if you want the directory name of a path use +C. + + # On all but Unix and MSDOS + my $directories = dirname($path); + +On all system types but Unix and MSDOS this works just like +C but returning just the $directories. + + # On Unix and MSDOS + my $path_one_level_up = dirname($path); + +When using Unix or MSDOS syntax this emulates the C shell function +which is subtly different from how C works. It returns all but +the last level of a file path even if the last level is clearly a directory. +In effect, it is not returning the directory portion but simply the path one +level up acting like C for file paths. + +Also unlike C, C does not include a trailing slash on +its returned path. + + # returns /foo/bar. fileparse() would return /foo/bar/ + dirname("/foo/bar/baz"); + + # also returns /foo/bar despite the fact that baz is clearly a + # directory. fileparse() would return /foo/bar/baz/ + dirname("/foo/bar/baz/"); + + # returns '.'. fileparse() would return 'foo/' + dirname("foo/"); + +Under VMS, if there is no directory information in the $path, then the +current default device and directory is used. + +=cut + sub dirname { - my($basename,$dirname) = fileparse($_[0]); my($fstype) = $Fileparse_fstype; + if( $fstype =~ /VMS/i and $_[0] =~ m{/} ) { + # Parse as Unix + local($File::Basename::Fileparse_fstype) = ''; + return dirname(@_); + } + + my($basename,$dirname) = fileparse($_[0]); + if ($fstype =~ /VMS/i) { - if ($_[0] =~ m#/#) { $fstype = '' } - else { return $dirname || $ENV{DEFAULT} } + $dirname ||= $ENV{DEFAULT}; } - if ($fstype =~ /MacOS/i) { + elsif ($fstype =~ /MacOS/i) { if( !length($basename) && $dirname !~ /^[^:]+:\z/) { $dirname =~ s/([^:]):\z/$1/s; ($basename,$dirname) = fileparse $dirname; @@ -276,17 +259,47 @@ sub dirname { $dirname =~ s#[^:/]+\z## unless length($basename); } else { - $dirname =~ s:(.)/*\z:$1:s; + $dirname =~ s{(.)/*\z}{$1}s; unless( length($basename) ) { - local($File::Basename::Fileparse_fstype) = $fstype; ($basename,$dirname) = fileparse $dirname; - $dirname =~ s:(.)/*\z:$1:s; + $dirname =~ s{(.)/*\z}{$1}s; } } $dirname; } -fileparse_set_fstype $^O; + +=item C + + my $previous_fstype = fileparse_set_fstype($type); + +Normally File::Basename will assume a file path type native to your current +operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...). +With this function you can override that assumption. + +Valid $types are "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "RISCOS", +"MSWin32" and "Unix" (case-insensitive). If an unrecognized $type is +given Unix semantics will be assumed. + +If you've selected VMS syntax, and the file specification you pass to +one of these routines contains a "/", they assume you are using Unix +emulation and apply the Unix syntax rules instead, for that function +call only. + +=back + +=cut + + +sub fileparse_set_fstype { + my @old = ($Fileparse_fstype, $Fileparse_igncase); + if (@_) { + $Fileparse_fstype = $_[0]; + $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i); + } + wantarray ? @old : $old[0]; +} + 1;