X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FFile%2FFind.pm;h=9b1549429902368d7a1842c16c7eee50d867a87d;hb=3bb6d3e5122054efefe647ff1c834b844fec2fa2;hp=42905dec80c9d8c4177571370e52ee53d236165e;hpb=cb50131aab68ac6dda048612c6e853b8cb08701e;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/File/Find.pm b/lib/File/Find.pm index 42905de..9b15494 100644 --- a/lib/File/Find.pm +++ b/lib/File/Find.pm @@ -1,31 +1,65 @@ package File::Find; -use 5.005_64; +use 5.006; +use strict; +use warnings; +use warnings::register; +our $VERSION = '1.04'; require Exporter; require Cwd; =head1 NAME -find - traverse a file tree - -finddepth - traverse a directory structure depth-first +File::Find - Traverse a directory tree. =head1 SYNOPSIS use File::Find; - find(\&wanted, '/foo', '/bar'); + find(\&wanted, @directories_to_seach); sub wanted { ... } use File::Find; - finddepth(\&wanted, '/foo', '/bar'); + finddepth(\&wanted, @directories_to_search); sub wanted { ... } - + use File::Find; find({ wanted => \&process, follow => 1 }, '.'); =head1 DESCRIPTION +These are functions for searching through directory trees doing work +on each file found similar to the Unix I command. File::Find +exports two functions, C and C. They work similarly +but have subtle differences. + +=over 4 + +=item B + + find(\&wanted, @directories); + find(\%options, @directories); + +find() does a breadth-first search over the given @directories in the +order they are given. In essense, it works from the top down. + +For each file or directory found the &wanted subroutine is called (see +below for details). Additionally, for each directory found it will go +into that directory and continue the search. + +=item B + + finddepth(\&wanted, @directories); + finddepth(\%options, @directories); + +finddepth() works just like find() except it does a depth-first search. +It works from the bottom of the directory tree up. + +=back + +=head2 %options + The first argument to find() is either a hash reference describing the -operations to be performed for each file, or a code reference. +operations to be performed for each file, or a code reference. The +code reference is described in L below. Here are the possible keys for the hash: @@ -33,14 +67,35 @@ Here are the possible keys for the hash: =item C -The value should be a code reference. This code reference is called -I below. +The value should be a code reference. This code reference is +described in L below. =item C Reports the name of a directory only AFTER all its entries have been reported. Entry point finddepth() is a shortcut for -specifying C<{ bydepth => 1 }> in the first argument of find(). +specifying C<{ bydepth =E 1 }> in the first argument of find(). + +=item C + +The value should be a code reference. This code reference is used to +preprocess the current directory. The name of currently processed +directory is in $File::Find::dir. Your preprocessing function is +called after readdir() but before the loop that calls the wanted() +function. It is called with a list of strings (actually file/directory +names) and is expected to return a list of strings. The code can be +used to sort the file/directory names alphabetically, numerically, +or to filter out directory entries based on their name alone. When +I or I are in effect, C is a no-op. + +=item C + +The value should be a code reference. It is invoked just before leaving +the currently processed directory. It is called in void context with no +arguments. The name of the current directory is in $File::Find::dir. This +hook is handy for summarizing a directory, such as calculating its disk +usage. When I or I are in effect, C is a +no-op. =item C @@ -55,7 +110,7 @@ If either I or I is in effect: =item * -It is guarantueed that an I has been called before the user's +It is guaranteed that an I has been called before the user's I function is called. This enables fast file checks involving S< _>. =item * @@ -67,11 +122,10 @@ pathname of the file with all symbolic links resolved =item C -This is similar to I except that it may report some files -more than once. It does detect cycles however. -Since only symbolic links have to be hashed, this is -much cheaper both in space and time. -If processing a file more than once (by the user's I function) +This is similar to I except that it may report some files more +than once. It does detect cycles, however. Since only symbolic links +have to be hashed, this is much cheaper both in space and time. If +processing a file more than once (by the user's I function) is worse than just taking time, the option I should be used. =item C @@ -83,8 +137,15 @@ are about to be processed a second time, File::Find dies. C causes File::Find to die if any file is about to be processed a second time. C causes File::Find to ignore any duplicate files and -dirctories but to proceed normally otherwise. +directories but to proceed normally otherwise. + +=item C +If true and a code reference, will be called with the symbolic link +name and the directory it lives in as arguments. Otherwise, if true +and warnings are on, warning "symbolic_link_name is a dangling +symbolic link\n" will be issued. If false, the dangling symbolic link +will be silently ignored. =item C @@ -96,35 +157,72 @@ C<$_> will be the same as C<$File::Find::name>. If find is used in taint-mode (-T command line switch or if EUID != UID or if EGID != GID) then internally directory names have to be untainted -before they can be cd'ed to. Therefore they are checked against a regular -expression I. Note, that all names passed to the -user's I function are still tainted. +before they can be chdir'ed to. Therefore they are checked against a regular +expression I. Note that all names passed to the user's +I function are still tainted. If this option is used while +not in taint-mode, C is a no-op. =item C See above. This should be set using the C quoting operator. The default is set to C. -Note that the paranthesis which are vital. +Note that the parentheses are vital. =item C -If set, directories (subtrees) which fail the I -are skipped. The default is to 'die' in such a case. +If set, a directory which fails the I is skipped, +including all its sub-directories. The default is to 'die' in such a case. + +=back + +=head2 The wanted function + +The wanted() function does whatever verifications you want on each +file and directory. It takes no arguments but rather does its work +through a collection of variables. + +=over 4 + +=item C<$File::Find::dir> is the current directory name, + +=item C<$_> is the current filename within that directory + +=item C<$File::Find::name> is the complete pathname to the file. =back -The wanted() function does whatever verifications you want. -C<$File::Find::dir> contains the current directory name, and C<$_> the -current filename within that directory. C<$File::Find::name> contains -the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when -the function is called, unless C was specified. -When or are in effect there is also a -C<$File::Find::fullname>. -The function may set C<$File::Find::prune> to prune the tree -unless C was specified. -Unless C or C is specified, for compatibility -reasons (find.pl, find2perl) there are in addition the following globals -available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>, +Don't modify these variables. + +For example, when examining the file /some/path/foo.ext you will have: + + $File::Find::dir = /some/path/ + $_ = foo.ext + $File::Find::name = /some/path/foo.ext + +You are chdir()'d toC<$File::Find::dir> when the function is called, +unless C was specified. Note that when changing to +directories is in effect the root directory (F) is a somewhat +special case inasmuch as the concatenation of C<$File::Find::dir>, +C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The +table below summarizes all variants: + + $File::Find::name $File::Find::dir $_ + default / / . + no_chdir=>0 /etc / etc + /etc/x /etc x + + no_chdir=>1 / / / + /etc / /etc + /etc/x /etc /etc/x + + +When or are in effect, there is +also a C<$File::Find::fullname>. The function may set +C<$File::Find::prune> to prune the tree unless C was +specified. Unless C or C is specified, for +compatibility reasons (find.pl, find2perl) there are in addition the +following globals available: C<$File::Find::topdir>, +C<$File::Find::topdev>, C<$File::Find::topino>, C<$File::Find::topmode> and C<$File::Find::topnlink>. This library is useful for the C tool, which when fed, @@ -135,7 +233,7 @@ This library is useful for the C tool, which when fed, produces something like: sub wanted { - /^\.nfs.*$/ && + /^\.nfs.*\z/s && (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && int(-M _) > 7 && unlink($_) @@ -145,12 +243,12 @@ produces something like: ($File::Find::prune = 1); } -Set the variable C<$File::Find::dont_use_nlink> if you're using AFS, -since AFS cheats. - +Notice the C<_> in the above C: the C<_> is a magical +filehandle that caches the information from the preceding +stat(), lstat(), or filetest. -Here's another interesting wanted function. It will find all symlinks -that don't resolve: +Here's another interesting wanted function. It will find all symbolic +links that don't resolve: sub wanted { -l && !-e && print "bogus link: $File::Find::name\n"; @@ -159,9 +257,38 @@ that don't resolve: See also the script C on CPAN for a nice application of this module. +=head1 WARNINGS + +If you run your program with the C<-w> switch, or if you use the +C pragma, File::Find will report warnings for several weird +situations. You can disable these warnings by putting the statement + + no warnings 'File::Find'; + +in the appropriate scope. See L for more info about lexical +warnings. + =head1 CAVEAT -Be aware that the option to follow symblic links can be dangerous. +=over 2 + +=item $dont_use_nlink + +You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to +force File::Find to always stat directories. This was used for file systems +that do not have an C count matching the number of sub-directories. +Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file +system) and a couple of others. + +You shouldn't need to set this variable, since File::Find should now detect +such file systems on-the-fly and switch itself to using stat. This works even +for parts of your file system, like a mounted CD-ROM. + +If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs. + +=item symlinks + +Be aware that the option to follow symbolic links can be dangerous. Depending on the structure of the directory tree (including symbolic links to directories) you might traverse a given (physical) directory more than once (only if C is in effect). @@ -169,26 +296,114 @@ Furthermore, deleting or changing files in a symbolically linked directory might cause very unpleasant surprises, since you delete or change files in an unknown directory. +=back + +=head1 NOTES + +=over 4 + +=item * + +Mac OS (Classic) users should note a few differences: + +=over 4 + +=item * + +The path separator is ':', not '/', and the current directory is denoted +as ':', not '.'. You should be careful about specifying relative pathnames. +While a full path always begins with a volume name, a relative pathname +should always begin with a ':'. If specifying a volume name only, a +trailing ':' is required. + +=item * + +C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_> +contains the name of a directory, that name may or may not end with a +':'. Likewise, C<$File::Find::name>, which contains the complete +pathname to that directory, and C<$File::Find::fullname>, which holds +the absolute pathname of that directory with all symbolic links resolved, +may or may not end with a ':'. + +=item * + +The default C (see above) on Mac OS is set to +C. Note that the parentheses are vital. + +=item * + +The invisible system file "Icon\015" is ignored. While this file may +appear in every directory, there are some more invisible system files +on every volume, which are all located at the volume root level (i.e. +"MacintoshHD:"). These system files are B excluded automatically. +Your filter may use the following code to recognize invisible files or +directories (requires Mac::Files): + + use Mac::Files; + + # invisible() -- returns 1 if file/directory is invisible, + # 0 if it's visible or undef if an error occurred + + sub invisible($) { + my $file = shift; + my ($fileCat, $fileInfo); + my $invisible_flag = 1 << 14; + + if ( $fileCat = FSpGetCatInfo($file) ) { + if ($fileInfo = $fileCat->ioFlFndrInfo() ) { + return (($fileInfo->fdFlags & $invisible_flag) && 1); + } + } + return undef; + } + +Generally, invisible files are system files, unless an odd application +decides to use invisible files for its own purposes. To distinguish +such files from system files, you have to look at the B and B +file attributes. The MacPerl built-in functions C and +C offer access to these attributes +(see MacPerl.pm for details). + +Files that appear on the desktop actually reside in an (hidden) directory +named "Desktop Folder" on the particular disk volume. Note that, although +all desktop files appear to be on the same "virtual" desktop, each disk +volume actually maintains its own "Desktop Folder" directory. + +=back + +=back + +=head1 HISTORY + +File::Find used to produce incorrect results if called recursively. +During the development of perl 5.8 this bug was fixed. +The first fixed version of File::Find was 1.01. =cut -@ISA = qw(Exporter); -@EXPORT = qw(find finddepth); +our @ISA = qw(Exporter); +our @EXPORT = qw(find finddepth); use strict; my $Is_VMS; +my $Is_MacOS; require File::Basename; +require File::Spec; -my %SLnkSeen; -my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, - $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat); +# Should ideally be my() not our() but local() currently +# refuses to operate on lexicals + +our %SLnkSeen; +our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, + $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, + $pre_process, $post_process, $dangling_symlinks); sub contract_name { my ($cdir,$fn) = @_; - return substr($cdir,0,rindex($cdir,'/')) if $fn eq '.'; + return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; $cdir = substr($cdir,0,rindex($cdir,'/')+1); @@ -197,31 +412,89 @@ sub contract_name { my $abs_name= $cdir . $fn; if (substr($fn,0,3) eq '../') { - do 1 while ($abs_name=~ s|/(?>[^/]+)/\.\./|/|); + 1 while $abs_name =~ s!/[^/]*/\.\./!/!; } return $abs_name; } +# return the absolute name of a directory or file +sub contract_name_Mac { + my ($cdir,$fn) = @_; + my $abs_name; + + if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':' + + my $colon_count = length ($1); + if ($colon_count == 1) { + $abs_name = $cdir . $2; + return $abs_name; + } + else { + # need to move up the tree, but + # only if it's not a volume name + for (my $i=1; $i<$colon_count; $i++) { + unless ($cdir =~ /^[^:]+:$/) { # volume name + $cdir =~ s/[^:]+:$//; + } + else { + return undef; + } + } + $abs_name = $cdir . $2; + return $abs_name; + } + + } + else { + + # $fn may be a valid path to a directory or file or (dangling) + # symlink, without a leading ':' + if ( (-e $fn) || (-l $fn) ) { + if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:* + return $fn; # $fn is already an absolute path + } + else { + $abs_name = $cdir . $fn; + return $abs_name; + } + } + else { # argh!, $fn is not a valid directory/file + return undef; + } + } +} sub PathCombine($$) { my ($Base,$Name) = @_; my $AbsName; - if (substr($Name,0,1) eq '/') { - $AbsName= $Name; + if ($Is_MacOS) { + # $Name is the resolved symlink (always a full path on MacOS), + # i.e. there's no need to call contract_name_Mac() + $AbsName = $Name; + + # (simple) check for recursion + if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion + return undef; + } } else { - $AbsName= contract_name($Base,$Name); - } + if (substr($Name,0,1) eq '/') { + $AbsName= $Name; + } + else { + $AbsName= contract_name($Base,$Name); + } - # (simple) check for recursion - my $newlen= length($AbsName); - if ($newlen <= length($Base)) { - if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') - && $AbsName eq substr($Base,0,$newlen)) - { - return undef; + # (simple) check for recursion + my $newlen= length($AbsName); + if ($newlen <= length($Base)) { + if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') + && $AbsName eq substr($Base,0,$newlen)) + { + return undef; + } } } return $AbsName; @@ -258,8 +531,8 @@ sub Follow_SymLink($) { return undef unless defined $DEV; # dangling symbolic link } - if ($full_check && $SLnkSeen{$DEV, $INO}++) { - if ($follow_skip < 1) { + if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { + if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { die "$AbsName encountered a second time"; } else { @@ -274,110 +547,177 @@ our($dir, $name, $fullname, $prune); sub _find_dir_symlnk($$$); sub _find_dir($$$); +# check whether or not a scalar variable is tainted +# (code straight from the Camel, 3rd ed., page 561) +sub is_tainted_pp { + my $arg = shift; + my $nada = substr($arg, 0, 0); # zero-length + local $@; + eval { eval "# $nada" }; + return length($@) != 0; +} + sub _find_opt { my $wanted = shift; die "invalid top directory" unless defined $_[0]; - my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::cwd(); - my $cwd_untainted = $cwd; - $wanted_callback = $wanted->{wanted}; - $bydepth = $wanted->{bydepth}; - $no_chdir = $wanted->{no_chdir}; - $full_check = $wanted->{follow}; - $follow = $full_check || $wanted->{follow_fast}; - $follow_skip = $wanted->{follow_skip}; - $untaint = $wanted->{untaint}; - $untaint_pat = $wanted->{untaint_pattern}; - $untaint_skip = $wanted->{untaint_skip}; - - # for compatability reasons (find.pl, find2perl) - our ($topdir, $topdev, $topino, $topmode, $topnlink); + # This function must local()ize everything because callbacks may + # call find() or finddepth() + + local %SLnkSeen; + local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, + $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, + $pre_process, $post_process, $dangling_symlinks); + local($dir, $name, $fullname, $prune, $_); + + my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); + my $cwd_untainted = $cwd; + my $check_t_cwd = 1; + $wanted_callback = $wanted->{wanted}; + $bydepth = $wanted->{bydepth}; + $pre_process = $wanted->{preprocess}; + $post_process = $wanted->{postprocess}; + $no_chdir = $wanted->{no_chdir}; + $full_check = $wanted->{follow}; + $follow = $full_check || $wanted->{follow_fast}; + $follow_skip = $wanted->{follow_skip}; + $untaint = $wanted->{untaint}; + $untaint_pat = $wanted->{untaint_pattern}; + $untaint_skip = $wanted->{untaint_skip}; + $dangling_symlinks = $wanted->{dangling_symlinks}; + + # for compatibility reasons (find.pl, find2perl) + local our ($topdir, $topdev, $topino, $topmode, $topnlink); # a symbolic link to a directory doesn't increase the link count $avoid_nlink = $follow || $File::Find::dont_use_nlink; - if ( $untaint ) { - $cwd_untainted= $1 if $cwd_untainted =~ m|$untaint_pat|; - die "insecure cwd in find(depth)" unless defined($cwd_untainted); - } - my ($abs_dir, $Is_Dir); Proc_Top_Item: foreach my $TOP (@_) { - my $top_item = $TOP; - $top_item =~ s|/$|| unless $top_item eq '/'; - $Is_Dir= 0; - - ($topdev,$topino,$topmode,$topnlink) = stat $top_item; - - if ($follow) { - if (substr($top_item,0,1) eq '/') { - $abs_dir = $top_item; - } - elsif ($top_item eq '.') { - $abs_dir = $cwd; - } - else { # care about any ../ - $abs_dir = contract_name("$cwd/",$top_item); - } - $abs_dir= Follow_SymLink($abs_dir); - unless (defined $abs_dir) { - warn "$top_item is a dangling symbolic link\n"; + my $top_item = $TOP; + + if ($Is_MacOS) { + ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; + $top_item = ":$top_item" + if ( (-d _) && ( $top_item !~ /:/ ) ); + } + else { + $top_item =~ s|/\z|| unless $top_item eq '/'; + ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; + } + + $Is_Dir= 0; + + if ($follow) { + + if ($Is_MacOS) { + $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety + + if ($top_item eq $File::Find::current_dir) { + $abs_dir = $cwd; + } + else { + $abs_dir = contract_name_Mac($cwd, $top_item); + unless (defined $abs_dir) { + warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n"; + next Proc_Top_Item; + } + } + + } + else { + if (substr($top_item,0,1) eq '/') { + $abs_dir = $top_item; + } + elsif ($top_item eq $File::Find::current_dir) { + $abs_dir = $cwd; + } + else { # care about any ../ + $abs_dir = contract_name("$cwd/",$top_item); + } + } + $abs_dir= Follow_SymLink($abs_dir); + unless (defined $abs_dir) { + if ($dangling_symlinks) { + if (ref $dangling_symlinks eq 'CODE') { + $dangling_symlinks->($top_item, $cwd); + } else { + warnings::warnif "$top_item is a dangling symbolic link\n"; + } + } next Proc_Top_Item; - } - if (-d _) { + } + + if (-d _) { _find_dir_symlnk($wanted, $abs_dir, $top_item); $Is_Dir= 1; - } - } + } + } else { # no follow - $topdir = $top_item; - unless (defined $topnlink) { - warn "Can't stat $top_item: $!\n"; - next Proc_Top_Item; - } - if (-d _) { - $top_item =~ s/\.dir$// if $Is_VMS; + $topdir = $top_item; + unless (defined $topnlink) { + warnings::warnif "Can't stat $top_item: $!\n"; + next Proc_Top_Item; + } + if (-d _) { + $top_item =~ s/\.dir\z// if $Is_VMS; _find_dir($wanted, $top_item, $topnlink); $Is_Dir= 1; - } + } else { $abs_dir= $top_item; - } - } + } + } - unless ($Is_Dir) { + unless ($Is_Dir) { unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { - ($dir,$_) = ('.', $top_item); + if ($Is_MacOS) { + ($dir,$_) = (':', $top_item); # $File::Find::dir, $_ + } + else { + ($dir,$_) = ('./', $top_item); + } } - $abs_dir = $dir; - if ($untaint) { - my $abs_dir_save = $abs_dir; - $abs_dir = $1 if $abs_dir =~ m|$untaint_pat|; + $abs_dir = $dir; + if (( $untaint ) && (is_tainted($dir) )) { + ( $abs_dir ) = $dir =~ m|$untaint_pat|; unless (defined $abs_dir) { if ($untaint_skip == 0) { - die "directory $abs_dir_save is still tainted"; + die "directory $dir is still tainted"; } else { next Proc_Top_Item; } } - } + } + + unless ($no_chdir || chdir $abs_dir) { + warnings::warnif "Couldn't chdir $abs_dir: $!\n"; + next Proc_Top_Item; + } + + $name = $abs_dir . $_; # $File::Find::name + $_ = $name if $no_chdir; - unless ($no_chdir or chdir $abs_dir) { - warn "Couldn't chdir $abs_dir: $!\n"; - next Proc_Top_Item; - } - - $name = $abs_dir; - - &$wanted_callback; + { $wanted_callback->() }; # protect against wild "next" - } + } - $no_chdir or chdir $cwd_untainted; + unless ( $no_chdir ) { + if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { + ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; + unless (defined $cwd_untainted) { + die "insecure cwd in find(depth)"; + } + $check_t_cwd = 0; + } + unless (chdir $cwd_untainted) { + die "Can't cd to $cwd: $!\n"; + } + } } } @@ -396,15 +736,24 @@ sub _find_dir($$$) { my ($subcount,$sub_nlink); my $SE= []; my $dir_name= $p_dir; - my $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); - my $dir_rel= '.'; # directory name relative to current directory + my $dir_pref; + my $dir_rel = $File::Find::current_dir; + my $tainted = 0; + my $no_nlink; + + if ($Is_MacOS) { + $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface + } + else { + $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); + } local ($dir, $name, $prune, *DIR); - - unless ($no_chdir or $p_dir eq '.') { + + unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { my $udir = $p_dir; - if ($untaint) { - $udir = $1 if $p_dir =~ m|$untaint_pat|; + if (( $untaint ) && (is_tainted($p_dir) )) { + ( $udir ) = $p_dir =~ m|$untaint_pat|; unless (defined $udir) { if ($untaint_skip == 0) { die "directory $p_dir is still tainted"; @@ -415,62 +764,90 @@ sub _find_dir($$$) { } } unless (chdir $udir) { - warn "Can't cd to $udir: $!\n"; + warnings::warnif "Can't cd to $udir: $!\n"; return; } } + # push the starting directory + push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; + + if ($Is_MacOS) { + $p_dir = $dir_pref; # ensure trailing ':' + } + while (defined $SE) { unless ($bydepth) { - $dir= $p_dir; - $name= $dir_name; - $_= ($no_chdir ? $dir_name : $dir_rel ); + $dir= $p_dir; # $File::Find::dir + $name= $dir_name; # $File::Find::name + $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ # prune may happen here - $prune= 0; - &$wanted_callback; - next if $prune; + $prune= 0; + { $wanted_callback->() }; # protect against wild "next" + next if $prune; } - + # change to that directory - unless ($no_chdir or $dir_rel eq '.') { + unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { my $udir= $dir_rel; - if ($untaint) { - $udir = $1 if $dir_rel =~ m|$untaint_pat|; + if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { + ( $udir ) = $dir_rel =~ m|$untaint_pat|; unless (defined $udir) { if ($untaint_skip == 0) { - die "directory (" - . ($p_dir ne '/' ? $p_dir : '') - . "/) $dir_rel is still tainted"; + if ($Is_MacOS) { + die "directory ($p_dir) $dir_rel is still tainted"; + } + else { + die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; + } + } else { # $untaint_skip == 1 + next; } } } unless (chdir $udir) { - warn "Can't cd to (" - . ($p_dir ne '/' ? $p_dir : '') - . "/) $udir : $!\n"; + if ($Is_MacOS) { + warnings::warnif "Can't cd to ($p_dir) $udir: $!\n"; + } + else { + warnings::warnif "Can't cd to (" . + ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; + } next; } $CdLvl++; } - $dir= $dir_name; + if ($Is_MacOS) { + $dir_name = "$dir_name:" unless ($dir_name =~ /:$/); + } + + $dir= $dir_name; # $File::Find::dir # Get the list of files in the current directory. - unless (opendir DIR, ($no_chdir ? $dir_name : '.')) { - warn "Can't opendir($dir_name): $!\n"; + unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) { + warnings::warnif "Can't opendir($dir_name): $!\n"; next; } @filenames = readdir DIR; closedir(DIR); + @filenames = $pre_process->(@filenames) if $pre_process; + push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; + + # default: use whatever was specifid + # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) + $no_nlink = $avoid_nlink; + # if dir has wrong nlink count, force switch to slower stat method + $no_nlink = 1 if ($nlink < 2); - if ($nlink == 2 && !$avoid_nlink) { + if ($nlink == 2 && !$no_nlink) { # This dir has no subdirectories. for my $FN (@filenames) { - next if $FN =~ /^\.{1,2}$/; + next if $FN =~ $File::Find::skip_pattern; - $name = $dir_pref . $FN; - $_ = ($no_chdir ? $name : $FN); - &$wanted_callback; + $name = $dir_pref . $FN; # $File::Find::name + $_ = ($no_chdir ? $name : $FN); # $_ + { $wanted_callback->() }; # protect against wild "next" } } @@ -479,8 +856,8 @@ sub _find_dir($$$) { $subcount = $nlink - 2; for my $FN (@filenames) { - next if $FN =~ /^\.{1,2}$/; - if ($subcount > 0 || $avoid_nlink) { + next if $FN =~ $File::Find::skip_pattern; + if ($subcount > 0 || $no_nlink) { # Seen all the subdirs? # check for directoriness. # stat is faster for a file in the current directory @@ -488,39 +865,81 @@ sub _find_dir($$$) { if (-d _) { --$subcount; - $FN =~ s/\.dir$// if $Is_VMS; + $FN =~ s/\.dir\z// if $Is_VMS; push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; } else { - $name = $dir_pref . $FN; - $_= ($no_chdir ? $name : $FN); - &$wanted_callback; + $name = $dir_pref . $FN; # $File::Find::name + $_= ($no_chdir ? $name : $FN); # $_ + { $wanted_callback->() }; # protect against wild "next" } } else { - $name = $dir_pref . $FN; - $_= ($no_chdir ? $name : $FN); - &$wanted_callback; + $name = $dir_pref . $FN; # $File::Find::name + $_= ($no_chdir ? $name : $FN); # $_ + { $wanted_callback->() }; # protect against wild "next" } } } - if ($bydepth) { - $name = $dir_name; - $dir = $p_dir; - $_ = ($no_chdir ? $dir_name : $dir_rel ); - &$wanted_callback; - } } continue { - if ( defined ($SE = pop @Stack) ) { + while ( defined ($SE = pop @Stack) ) { ($Level, $p_dir, $dir_rel, $nlink) = @$SE; if ($CdLvl > $Level && !$no_chdir) { - die "Can't cd to $dir_name" . '../' x ($CdLvl-$Level) - unless chdir '../' x ($CdLvl-$Level); + my $tmp; + if ($Is_MacOS) { + $tmp = (':' x ($CdLvl-$Level)) . ':'; + } + else { + $tmp = join('/',('..') x ($CdLvl-$Level)); + } + die "Can't cd to $dir_name" . $tmp + unless chdir ($tmp); $CdLvl = $Level; } - $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); - $dir_pref = "$dir_name/"; + + if ($Is_MacOS) { + # $pdir always has a trailing ':', except for the starting dir, + # where $dir_rel eq ':' + $dir_name = "$p_dir$dir_rel"; + $dir_pref = "$dir_name:"; + } + else { + $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); + $dir_pref = "$dir_name/"; + } + + if ( $nlink == -2 ) { + $name = $dir = $p_dir; # $File::Find::name / dir + $_ = $File::Find::current_dir; + $post_process->(); # End-of-directory processing + } + elsif ( $nlink < 0 ) { # must be finddepth, report dirname now + $name = $dir_name; + if ($Is_MacOS) { + if ($dir_rel eq ':') { # must be the top dir, where we started + $name =~ s|:$||; # $File::Find::name + $p_dir = "$p_dir:" unless ($p_dir =~ /:$/); + } + $dir = $p_dir; # $File::Find::dir + $_ = ($no_chdir ? $name : $dir_rel); # $_ + } + else { + if ( substr($name,-2) eq '/.' ) { + substr($name, length($name) == 2 ? -1 : -2) = ''; + } + $dir = $p_dir; + $_ = ($no_chdir ? $dir_name : $dir_rel ); + if ( substr($_,-2) eq '/.' ) { + substr($_, length($_) == 2 ? -1 : -2) = ''; + } + } + { $wanted_callback->() }; # protect against wild "next" + } + else { + push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; + last; + } } } } @@ -534,23 +953,38 @@ sub _find_dir($$$) { # chdir (if not no_chdir) to dir sub _find_dir_symlnk($$$) { - my ($wanted, $dir_loc, $p_dir) = @_; + my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory my @Stack; my @filenames; my $new_loc; + my $updir_loc = $dir_loc; # untainted parent directory my $SE = []; my $dir_name = $p_dir; - my $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); - my $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); - my $dir_rel = '.'; # directory name relative to current directory + my $dir_pref; + my $loc_pref; + my $dir_rel = $File::Find::current_dir; + my $byd_flag; # flag for pending stack entry if $bydepth + my $tainted = 0; + my $ok = 1; + + if ($Is_MacOS) { + $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:"; + $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:"; + } else { + $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); + $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); + } local ($dir, $name, $fullname, $prune, *DIR); - - unless ($no_chdir or $p_dir eq '.') { - my $udir = $dir_loc; - if ($untaint) { - $udir = $1 if $dir_loc =~ m|$untaint_pat|; - unless (defined $udir) { + + unless ($no_chdir) { + # untaint the topdir + if (( $untaint ) && (is_tainted($dir_loc) )) { + ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted + # once untainted, $updir_loc is pushed on the stack (as parent directory); + # hence, we don't need to untaint the parent directory every time we chdir + # to it later + unless (defined $updir_loc) { if ($untaint_skip == 0) { die "directory $dir_loc is still tainted"; } @@ -559,31 +993,47 @@ sub _find_dir_symlnk($$$) { } } } - unless (chdir $udir) { - warn "Can't cd to $udir: $!\n"; + $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); + unless ($ok) { + warnings::warnif "Can't cd to $updir_loc: $!\n"; return; } } + push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; + + if ($Is_MacOS) { + $p_dir = $dir_pref; # ensure trailing ':' + } + while (defined $SE) { unless ($bydepth) { - $dir= $p_dir; - $name= $dir_name; - $_= ($no_chdir ? $dir_name : $dir_rel ); - $fullname= $dir_loc; + # change (back) to parent directory (always untainted) + unless ($no_chdir) { + unless (chdir $updir_loc) { + warnings::warnif "Can't cd to $updir_loc: $!\n"; + next; + } + } + $dir= $p_dir; # $File::Find::dir + $name= $dir_name; # $File::Find::name + $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ + $fullname= $dir_loc; # $File::Find::fullname # prune may happen here - $prune= 0; - &$wanted_callback; - next if $prune; + $prune= 0; + lstat($_); # make sure file tests with '_' work + { $wanted_callback->() }; # protect against wild "next" + next if $prune; } # change to that directory - unless ($no_chdir or $dir_rel eq '.') { - my $udir = $dir_loc; - if ($untaint) { - $udir = $1 if $dir_loc =~ m|$untaint_pat|; - unless (defined $udir ) { + unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { + $updir_loc = $dir_loc; + if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { + # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir + ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; + unless (defined $updir_loc) { if ($untaint_skip == 0) { die "directory $dir_loc is still tainted"; } @@ -592,55 +1042,97 @@ sub _find_dir_symlnk($$$) { } } } - unless (chdir $udir) { - warn "Can't cd to $udir: $!\n"; + unless (chdir $updir_loc) { + warnings::warnif "Can't cd to $updir_loc: $!\n"; next; } } - $dir = $dir_name; + if ($Is_MacOS) { + $dir_name = "$dir_name:" unless ($dir_name =~ /:$/); + } + + $dir = $dir_name; # $File::Find::dir # Get the list of files in the current directory. - unless (opendir DIR, ($no_chdir ? $dir_loc : '.')) { - warn "Can't opendir($dir_loc): $!\n"; + unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { + warnings::warnif "Can't opendir($dir_loc): $!\n"; next; } @filenames = readdir DIR; closedir(DIR); for my $FN (@filenames) { - next if $FN =~ /^\.{1,2}$/; + next if $FN =~ $File::Find::skip_pattern; # follow symbolic links / do an lstat $new_loc = Follow_SymLink($loc_pref.$FN); # ignore if invalid symlink next unless defined $new_loc; - + if (-d _) { - push @Stack,[$new_loc,$dir_name,$FN]; + push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; } else { - $fullname = $new_loc; - $name = $dir_pref . $FN; - $_ = ($no_chdir ? $name : $FN); - &$wanted_callback; + $fullname = $new_loc; # $File::Find::fullname + $name = $dir_pref . $FN; # $File::Find::name + $_ = ($no_chdir ? $name : $FN); # $_ + { $wanted_callback->() }; # protect against wild "next" } } - if ($bydepth) { - $fullname = $dir_loc; - $name = $dir_name; - $_ = ($no_chdir ? $dir_name : $dir_rel); - &$wanted_callback; - } } continue { - if (defined($SE = pop @Stack)) { - ($dir_loc, $p_dir, $dir_rel) = @$SE; - $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); - $dir_pref = "$dir_name/"; - $loc_pref = "$dir_loc/"; + while (defined($SE = pop @Stack)) { + ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; + if ($Is_MacOS) { + # $p_dir always has a trailing ':', except for the starting dir, + # where $dir_rel eq ':' + $dir_name = "$p_dir$dir_rel"; + $dir_pref = "$dir_name:"; + $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:"; + } + else { + $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); + $dir_pref = "$dir_name/"; + $loc_pref = "$dir_loc/"; + } + if ( $byd_flag < 0 ) { # must be finddepth, report dirname now + unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { + unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted + warnings::warnif "Can't cd to $updir_loc: $!\n"; + next; + } + } + $fullname = $dir_loc; # $File::Find::fullname + $name = $dir_name; # $File::Find::name + if ($Is_MacOS) { + if ($dir_rel eq ':') { # must be the top dir, where we started + $name =~ s|:$||; # $File::Find::name + $p_dir = "$p_dir:" unless ($p_dir =~ /:$/); + } + $dir = $p_dir; # $File::Find::dir + $_ = ($no_chdir ? $name : $dir_rel); # $_ + } + else { + if ( substr($name,-2) eq '/.' ) { + substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name + } + $dir = $p_dir; # $File::Find::dir + $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ + if ( substr($_,-2) eq '/.' ) { + substr($_, length($_) == 2 ? -1 : -2) = ''; + } + } + + lstat($_); # make sure file tests with '_' work + { $wanted_callback->() }; # protect against wild "next" + } + else { + push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; + last; + } } } } @@ -653,7 +1145,7 @@ sub wrap_wanted { $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; } if ( $wanted->{untaint} ) { - $wanted->{untaint_pattern} = qr|^([-+@\w./]+)$| + $wanted->{untaint_pattern} = $File::Find::untaint_pattern unless defined $wanted->{untaint_pattern}; $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; } @@ -667,24 +1159,38 @@ sub wrap_wanted { sub find { my $wanted = shift; _find_opt(wrap_wanted($wanted), @_); - %SLnkSeen= (); # free memory } sub finddepth { my $wanted = wrap_wanted(shift); $wanted->{bydepth} = 1; _find_opt($wanted, @_); - %SLnkSeen= (); # free memory } +# default +$File::Find::skip_pattern = qr/^\.{1,2}\z/; +$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; + # These are hard-coded for now, but may move to hint files. if ($^O eq 'VMS') { $Is_VMS = 1; - $File::Find::dont_use_nlink = 1; + $File::Find::dont_use_nlink = 1; +} +elsif ($^O eq 'MacOS') { + $Is_MacOS = 1; + $File::Find::dont_use_nlink = 1; + $File::Find::skip_pattern = qr/^Icon\015\z/; + $File::Find::untaint_pattern = qr|^(.+)$|; } +# this _should_ work properly on all platforms +# where File::Find can be expected to work +$File::Find::current_dir = File::Spec->curdir || '.'; + $File::Find::dont_use_nlink = 1 - if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32'; + if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' || + $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'qnx' || + $^O eq 'nto'; # Set dont_use_nlink in your hint file if your system's stat doesn't # report the number of links in a directory as an indication @@ -695,4 +1201,13 @@ unless ($File::Find::dont_use_nlink) { $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'}); } +# We need a function that checks if a scalar is tainted. Either use the +# Scalar::Util module's tainted() function or our (slower) pure Perl +# fallback is_tainted_pp() +{ + local $@; + eval { require Scalar::Util }; + *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; +} + 1;