X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FFile%2FFind.pm;h=9b1549429902368d7a1842c16c7eee50d867a87d;hb=3bb6d3e5122054efefe647ff1c834b844fec2fa2;hp=11835067ff1f841d5fcf714ce1f629b7a3657db9;hpb=423d68ab98265bbd73b2ade2438378d2784c6e8c;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/File/Find.pm b/lib/File/Find.pm index 1183506..9b15494 100644 --- a/lib/File/Find.pm +++ b/lib/File/Find.pm @@ -1,276 +1,1213 @@ package File::Find; -require 5.000; +use 5.006; +use strict; +use warnings; +use warnings::register; +our $VERSION = '1.04'; require Exporter; -use Config; require Cwd; -require File::Basename; - =head1 NAME -find - traverse a file tree - -finddepth - traverse a directory structure depth-first +File::Find - Traverse a directory tree. =head1 SYNOPSIS use File::Find; - find(\&wanted, '/foo','/bar'); + find(\&wanted, @directories_to_seach); sub wanted { ... } - + use File::Find; - finddepth(\&wanted, '/foo','/bar'); + finddepth(\&wanted, @directories_to_search); sub wanted { ... } + use File::Find; + find({ wanted => \&process, follow => 1 }, '.'); + =head1 DESCRIPTION -The wanted() function does whatever verifications you want. -$File::Find::dir contains the current directory name, and $_ the -current filename within that directory. $File::Find::name contains -C<"$File::Find::dir/$_">. You are chdir()'d to $File::Find::dir when -the function is called. The function may set $File::Find::prune to -prune the tree. +These are functions for searching through directory trees doing work +on each file found similar to the Unix I command. File::Find +exports two functions, C and C. They work similarly +but have subtle differences. + +=over 4 + +=item B + + find(\&wanted, @directories); + find(\%options, @directories); + +find() does a breadth-first search over the given @directories in the +order they are given. In essense, it works from the top down. + +For each file or directory found the &wanted subroutine is called (see +below for details). Additionally, for each directory found it will go +into that directory and continue the search. + +=item B + + finddepth(\&wanted, @directories); + finddepth(\%options, @directories); + +finddepth() works just like find() except it does a depth-first search. +It works from the bottom of the directory tree up. + +=back + +=head2 %options + +The first argument to find() is either a hash reference describing the +operations to be performed for each file, or a code reference. The +code reference is described in L below. + +Here are the possible keys for the hash: + +=over 3 + +=item C + +The value should be a code reference. This code reference is +described in L below. + +=item C + +Reports the name of a directory only AFTER all its entries +have been reported. Entry point finddepth() is a shortcut for +specifying C<{ bydepth =E 1 }> in the first argument of find(). + +=item C + +The value should be a code reference. This code reference is used to +preprocess the current directory. The name of currently processed +directory is in $File::Find::dir. Your preprocessing function is +called after readdir() but before the loop that calls the wanted() +function. It is called with a list of strings (actually file/directory +names) and is expected to return a list of strings. The code can be +used to sort the file/directory names alphabetically, numerically, +or to filter out directory entries based on their name alone. When +I or I are in effect, C is a no-op. + +=item C + +The value should be a code reference. It is invoked just before leaving +the currently processed directory. It is called in void context with no +arguments. The name of the current directory is in $File::Find::dir. This +hook is handy for summarizing a directory, such as calculating its disk +usage. When I or I are in effect, C is a +no-op. + +=item C + +Causes symbolic links to be followed. Since directory trees with symbolic +links (followed) may contain files more than once and may even have +cycles, a hash has to be built up with an entry for each file. +This might be expensive both in space and time for a large +directory tree. See I and I below. +If either I or I is in effect: + +=over 6 + +=item * + +It is guaranteed that an I has been called before the user's +I function is called. This enables fast file checks involving S< _>. + +=item * + +There is a variable C<$File::Find::fullname> which holds the absolute +pathname of the file with all symbolic links resolved + +=back + +=item C + +This is similar to I except that it may report some files more +than once. It does detect cycles, however. Since only symbolic links +have to be hashed, this is much cheaper both in space and time. If +processing a file more than once (by the user's I function) +is worse than just taking time, the option I should be used. + +=item C + +C, which is the default, causes all files which are +neither directories nor symbolic links to be ignored if they are about +to be processed a second time. If a directory or a symbolic link +are about to be processed a second time, File::Find dies. +C causes File::Find to die if any file is about to be +processed a second time. +C causes File::Find to ignore any duplicate files and +directories but to proceed normally otherwise. + +=item C + +If true and a code reference, will be called with the symbolic link +name and the directory it lives in as arguments. Otherwise, if true +and warnings are on, warning "symbolic_link_name is a dangling +symbolic link\n" will be issued. If false, the dangling symbolic link +will be silently ignored. + +=item C -File::Find assumes that you don't alter the $_ variable. If you do then -make sure you return it to its original value before exiting your function. +Does not C to each directory as it recurses. The wanted() +function will need to be aware of this, of course. In this case, +C<$_> will be the same as C<$File::Find::name>. -This library is primarily for the C tool, which when fed, +=item C + +If find is used in taint-mode (-T command line switch or if EUID != UID +or if EGID != GID) then internally directory names have to be untainted +before they can be chdir'ed to. Therefore they are checked against a regular +expression I. Note that all names passed to the user's +I function are still tainted. If this option is used while +not in taint-mode, C is a no-op. + +=item C + +See above. This should be set using the C quoting operator. +The default is set to C. +Note that the parentheses are vital. + +=item C + +If set, a directory which fails the I is skipped, +including all its sub-directories. The default is to 'die' in such a case. + +=back + +=head2 The wanted function + +The wanted() function does whatever verifications you want on each +file and directory. It takes no arguments but rather does its work +through a collection of variables. + +=over 4 + +=item C<$File::Find::dir> is the current directory name, + +=item C<$_> is the current filename within that directory + +=item C<$File::Find::name> is the complete pathname to the file. + +=back + +Don't modify these variables. + +For example, when examining the file /some/path/foo.ext you will have: + + $File::Find::dir = /some/path/ + $_ = foo.ext + $File::Find::name = /some/path/foo.ext + +You are chdir()'d toC<$File::Find::dir> when the function is called, +unless C was specified. Note that when changing to +directories is in effect the root directory (F) is a somewhat +special case inasmuch as the concatenation of C<$File::Find::dir>, +C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The +table below summarizes all variants: + + $File::Find::name $File::Find::dir $_ + default / / . + no_chdir=>0 /etc / etc + /etc/x /etc x + + no_chdir=>1 / / / + /etc / /etc + /etc/x /etc /etc/x + + +When or are in effect, there is +also a C<$File::Find::fullname>. The function may set +C<$File::Find::prune> to prune the tree unless C was +specified. Unless C or C is specified, for +compatibility reasons (find.pl, find2perl) there are in addition the +following globals available: C<$File::Find::topdir>, +C<$File::Find::topdev>, C<$File::Find::topino>, +C<$File::Find::topmode> and C<$File::Find::topnlink>. + +This library is useful for the C tool, which when fed, find2perl / -name .nfs\* -mtime +7 \ - -exec rm -f {} \; -o -fstype nfs -prune + -exec rm -f {} \; -o -fstype nfs -prune produces something like: sub wanted { - /^\.nfs.*$/ && - (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && + /^\.nfs.*\z/s && + (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && int(-M _) > 7 && unlink($_) || - ($nlink || (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_))) && + ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && $dev < 0 && ($File::Find::prune = 1); } -Set the variable $File::Find::dont_use_nlink if you're using AFS, -since AFS cheats. +Notice the C<_> in the above C: the C<_> is a magical +filehandle that caches the information from the preceding +stat(), lstat(), or filetest. -C is just like C, except that it does a depth-first -search. - -Here's another interesting wanted function. It will find all symlinks -that don't resolve: +Here's another interesting wanted function. It will find all symbolic +links that don't resolve: sub wanted { - -l && !-e && print "bogus link: $File::Find::name\n"; - } + -l && !-e && print "bogus link: $File::Find::name\n"; + } + +See also the script C on CPAN for a nice application of this +module. + +=head1 WARNINGS + +If you run your program with the C<-w> switch, or if you use the +C pragma, File::Find will report warnings for several weird +situations. You can disable these warnings by putting the statement + + no warnings 'File::Find'; + +in the appropriate scope. See L for more info about lexical +warnings. + +=head1 CAVEAT + +=over 2 + +=item $dont_use_nlink + +You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to +force File::Find to always stat directories. This was used for file systems +that do not have an C count matching the number of sub-directories. +Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file +system) and a couple of others. + +You shouldn't need to set this variable, since File::Find should now detect +such file systems on-the-fly and switch itself to using stat. This works even +for parts of your file system, like a mounted CD-ROM. + +If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs. + +=item symlinks + +Be aware that the option to follow symbolic links can be dangerous. +Depending on the structure of the directory tree (including symbolic +links to directories) you might traverse a given (physical) directory +more than once (only if C is in effect). +Furthermore, deleting or changing files in a symbolically linked directory +might cause very unpleasant surprises, since you delete or change files +in an unknown directory. + +=back + +=head1 NOTES + +=over 4 + +=item * + +Mac OS (Classic) users should note a few differences: + +=over 4 -=head1 BUGS +=item * -There is no way to make find or finddepth follow symlinks. +The path separator is ':', not '/', and the current directory is denoted +as ':', not '.'. You should be careful about specifying relative pathnames. +While a full path always begins with a volume name, a relative pathname +should always begin with a ':'. If specifying a volume name only, a +trailing ':' is required. + +=item * + +C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_> +contains the name of a directory, that name may or may not end with a +':'. Likewise, C<$File::Find::name>, which contains the complete +pathname to that directory, and C<$File::Find::fullname>, which holds +the absolute pathname of that directory with all symbolic links resolved, +may or may not end with a ':'. + +=item * + +The default C (see above) on Mac OS is set to +C. Note that the parentheses are vital. + +=item * + +The invisible system file "Icon\015" is ignored. While this file may +appear in every directory, there are some more invisible system files +on every volume, which are all located at the volume root level (i.e. +"MacintoshHD:"). These system files are B excluded automatically. +Your filter may use the following code to recognize invisible files or +directories (requires Mac::Files): + + use Mac::Files; + + # invisible() -- returns 1 if file/directory is invisible, + # 0 if it's visible or undef if an error occurred + + sub invisible($) { + my $file = shift; + my ($fileCat, $fileInfo); + my $invisible_flag = 1 << 14; + + if ( $fileCat = FSpGetCatInfo($file) ) { + if ($fileInfo = $fileCat->ioFlFndrInfo() ) { + return (($fileInfo->fdFlags & $invisible_flag) && 1); + } + } + return undef; + } + +Generally, invisible files are system files, unless an odd application +decides to use invisible files for its own purposes. To distinguish +such files from system files, you have to look at the B and B +file attributes. The MacPerl built-in functions C and +C offer access to these attributes +(see MacPerl.pm for details). + +Files that appear on the desktop actually reside in an (hidden) directory +named "Desktop Folder" on the particular disk volume. Note that, although +all desktop files appear to be on the same "virtual" desktop, each disk +volume actually maintains its own "Desktop Folder" directory. + +=back + +=back + +=head1 HISTORY + +File::Find used to produce incorrect results if called recursively. +During the development of perl 5.8 this bug was fixed. +The first fixed version of File::Find was 1.01. =cut -@ISA = qw(Exporter); -@EXPORT = qw(find finddepth); +our @ISA = qw(Exporter); +our @EXPORT = qw(find finddepth); -sub find { - my $wanted = shift; - my $cwd = Cwd::cwd(); - # Localize these rather than lexicalizing them for backwards - # compatibility. - local($topdir,$topdev,$topino,$topmode,$topnlink); - foreach $topdir (@_) { - (($topdev,$topino,$topmode,$topnlink) = - ($Is_VMS ? stat($topdir) : lstat($topdir))) - || (warn("Can't stat $topdir: $!\n"), next); - if (-d _) { - if (chdir($topdir)) { - ($dir,$_) = ($topdir,'.'); - $name = $topdir; - $prune = 0; - &$wanted; - if (!$prune) { - my $fixtopdir = $topdir; - $fixtopdir =~ s,/$,, ; - $fixtopdir =~ s/\.dir$// if $Is_VMS; - &finddir($wanted,$fixtopdir,$topnlink); +use strict; +my $Is_VMS; +my $Is_MacOS; + +require File::Basename; +require File::Spec; + +# Should ideally be my() not our() but local() currently +# refuses to operate on lexicals + +our %SLnkSeen; +our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, + $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, + $pre_process, $post_process, $dangling_symlinks); + +sub contract_name { + my ($cdir,$fn) = @_; + + return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; + + $cdir = substr($cdir,0,rindex($cdir,'/')+1); + + $fn =~ s|^\./||; + + my $abs_name= $cdir . $fn; + + if (substr($fn,0,3) eq '../') { + 1 while $abs_name =~ s!/[^/]*/\.\./!/!; + } + + return $abs_name; +} + +# return the absolute name of a directory or file +sub contract_name_Mac { + my ($cdir,$fn) = @_; + my $abs_name; + + if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':' + + my $colon_count = length ($1); + if ($colon_count == 1) { + $abs_name = $cdir . $2; + return $abs_name; + } + else { + # need to move up the tree, but + # only if it's not a volume name + for (my $i=1; $i<$colon_count; $i++) { + unless ($cdir =~ /^[^:]+:$/) { # volume name + $cdir =~ s/[^:]+:$//; + } + else { + return undef; } } + $abs_name = $cdir . $2; + return $abs_name; + } + + } + else { + + # $fn may be a valid path to a directory or file or (dangling) + # symlink, without a leading ':' + if ( (-e $fn) || (-l $fn) ) { + if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:* + return $fn; # $fn is already an absolute path + } else { - warn "Can't cd to $topdir: $!\n"; + $abs_name = $cdir . $fn; + return $abs_name; } } + else { # argh!, $fn is not a valid directory/file + return undef; + } + } +} + +sub PathCombine($$) { + my ($Base,$Name) = @_; + my $AbsName; + + if ($Is_MacOS) { + # $Name is the resolved symlink (always a full path on MacOS), + # i.e. there's no need to call contract_name_Mac() + $AbsName = $Name; + + # (simple) check for recursion + if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion + return undef; + } + } + else { + if (substr($Name,0,1) eq '/') { + $AbsName= $Name; + } else { - unless (($_,$dir) = File::Basename::fileparse($topdir)) { - ($dir,$_) = ('.', $topdir); + $AbsName= contract_name($Base,$Name); + } + + # (simple) check for recursion + my $newlen= length($AbsName); + if ($newlen <= length($Base)) { + if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') + && $AbsName eq substr($Base,0,$newlen)) + { + return undef; } - $name = $topdir; - chdir $dir && &$wanted; } - chdir $cwd; } + return $AbsName; } -sub finddir { - my($wanted, $nlink); - local($dir, $name); - ($wanted, $dir, $nlink) = @_; +sub Follow_SymLink($) { + my ($AbsName) = @_; - my($dev, $ino, $mode, $subcount); + my ($NewName,$DEV, $INO); + ($DEV, $INO)= lstat $AbsName; - # Get the list of files in the current directory. - opendir(DIR,'.') || (warn "Can't open $dir: $!\n", return); - my(@filenames) = readdir(DIR); - closedir(DIR); + while (-l _) { + if ($SLnkSeen{$DEV, $INO}++) { + if ($follow_skip < 2) { + die "$AbsName is encountered a second time"; + } + else { + return undef; + } + } + $NewName= PathCombine($AbsName, readlink($AbsName)); + unless(defined $NewName) { + if ($follow_skip < 2) { + die "$AbsName is a recursive symbolic link"; + } + else { + return undef; + } + } + else { + $AbsName= $NewName; + } + ($DEV, $INO) = lstat($AbsName); + return undef unless defined $DEV; # dangling symbolic link + } - if ($nlink == 2 && !$dont_use_nlink) { # This dir has no subdirectories. - for (@filenames) { - next if $_ eq '.'; - next if $_ eq '..'; - $name = "$dir/$_"; - $nlink = 0; - &$wanted; + if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { + if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { + die "$AbsName encountered a second time"; + } + else { + return undef; } } - else { # This dir has subdirectories. - $subcount = $nlink - 2; - for (@filenames) { - next if $_ eq '.'; - next if $_ eq '..'; - $nlink = $prune = 0; - $name = "$dir/$_"; - &$wanted; - if ($subcount > 0 || $dont_use_nlink) { # Seen all the subdirs? - - # Get link count and check for directoriness. - - ($dev,$ino,$mode,$nlink) = ($Is_VMS ? stat($_) : lstat($_)); - # unless ($nlink || $dont_use_nlink); - - if (-d _) { - # It really is a directory, so do it recursively. + return $AbsName; +} + +our($dir, $name, $fullname, $prune); +sub _find_dir_symlnk($$$); +sub _find_dir($$$); + +# check whether or not a scalar variable is tainted +# (code straight from the Camel, 3rd ed., page 561) +sub is_tainted_pp { + my $arg = shift; + my $nada = substr($arg, 0, 0); # zero-length + local $@; + eval { eval "# $nada" }; + return length($@) != 0; +} + +sub _find_opt { + my $wanted = shift; + die "invalid top directory" unless defined $_[0]; + + # This function must local()ize everything because callbacks may + # call find() or finddepth() + + local %SLnkSeen; + local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, + $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, + $pre_process, $post_process, $dangling_symlinks); + local($dir, $name, $fullname, $prune, $_); + + my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); + my $cwd_untainted = $cwd; + my $check_t_cwd = 1; + $wanted_callback = $wanted->{wanted}; + $bydepth = $wanted->{bydepth}; + $pre_process = $wanted->{preprocess}; + $post_process = $wanted->{postprocess}; + $no_chdir = $wanted->{no_chdir}; + $full_check = $wanted->{follow}; + $follow = $full_check || $wanted->{follow_fast}; + $follow_skip = $wanted->{follow_skip}; + $untaint = $wanted->{untaint}; + $untaint_pat = $wanted->{untaint_pattern}; + $untaint_skip = $wanted->{untaint_skip}; + $dangling_symlinks = $wanted->{dangling_symlinks}; + + # for compatibility reasons (find.pl, find2perl) + local our ($topdir, $topdev, $topino, $topmode, $topnlink); + + # a symbolic link to a directory doesn't increase the link count + $avoid_nlink = $follow || $File::Find::dont_use_nlink; + + my ($abs_dir, $Is_Dir); + + Proc_Top_Item: + foreach my $TOP (@_) { + my $top_item = $TOP; + + if ($Is_MacOS) { + ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; + $top_item = ":$top_item" + if ( (-d _) && ( $top_item !~ /:/ ) ); + } + else { + $top_item =~ s|/\z|| unless $top_item eq '/'; + ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; + } + + $Is_Dir= 0; + + if ($follow) { + + if ($Is_MacOS) { + $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety + + if ($top_item eq $File::Find::current_dir) { + $abs_dir = $cwd; + } + else { + $abs_dir = contract_name_Mac($cwd, $top_item); + unless (defined $abs_dir) { + warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n"; + next Proc_Top_Item; + } + } - if (!$prune && chdir $_) { - $name =~ s/\.dir$// if $Is_VMS; - &finddir($wanted,$name,$nlink); - chdir '..'; + } + else { + if (substr($top_item,0,1) eq '/') { + $abs_dir = $top_item; + } + elsif ($top_item eq $File::Find::current_dir) { + $abs_dir = $cwd; + } + else { # care about any ../ + $abs_dir = contract_name("$cwd/",$top_item); + } + } + $abs_dir= Follow_SymLink($abs_dir); + unless (defined $abs_dir) { + if ($dangling_symlinks) { + if (ref $dangling_symlinks eq 'CODE') { + $dangling_symlinks->($top_item, $cwd); + } else { + warnings::warnif "$top_item is a dangling symbolic link\n"; } - --$subcount; } + next Proc_Top_Item; + } + + if (-d _) { + _find_dir_symlnk($wanted, $abs_dir, $top_item); + $Is_Dir= 1; + } + } + else { # no follow + $topdir = $top_item; + unless (defined $topnlink) { + warnings::warnif "Can't stat $top_item: $!\n"; + next Proc_Top_Item; + } + if (-d _) { + $top_item =~ s/\.dir\z// if $Is_VMS; + _find_dir($wanted, $top_item, $topnlink); + $Is_Dir= 1; + } + else { + $abs_dir= $top_item; + } + } + + unless ($Is_Dir) { + unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { + if ($Is_MacOS) { + ($dir,$_) = (':', $top_item); # $File::Find::dir, $_ + } + else { + ($dir,$_) = ('./', $top_item); + } + } + + $abs_dir = $dir; + if (( $untaint ) && (is_tainted($dir) )) { + ( $abs_dir ) = $dir =~ m|$untaint_pat|; + unless (defined $abs_dir) { + if ($untaint_skip == 0) { + die "directory $dir is still tainted"; + } + else { + next Proc_Top_Item; + } + } + } + + unless ($no_chdir || chdir $abs_dir) { + warnings::warnif "Couldn't chdir $abs_dir: $!\n"; + next Proc_Top_Item; + } + + $name = $abs_dir . $_; # $File::Find::name + $_ = $name if $no_chdir; + + { $wanted_callback->() }; # protect against wild "next" + + } + + unless ( $no_chdir ) { + if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { + ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; + unless (defined $cwd_untainted) { + die "insecure cwd in find(depth)"; + } + $check_t_cwd = 0; + } + unless (chdir $cwd_untainted) { + die "Can't cd to $cwd: $!\n"; } } } } +# API: +# $wanted +# $p_dir : "parent directory" +# $nlink : what came back from the stat +# preconditions: +# chdir (if not no_chdir) to dir -sub finddepth { - my $wanted = shift; +sub _find_dir($$$) { + my ($wanted, $p_dir, $nlink) = @_; + my ($CdLvl,$Level) = (0,0); + my @Stack; + my @filenames; + my ($subcount,$sub_nlink); + my $SE= []; + my $dir_name= $p_dir; + my $dir_pref; + my $dir_rel = $File::Find::current_dir; + my $tainted = 0; + my $no_nlink; + + if ($Is_MacOS) { + $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface + } + else { + $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); + } + + local ($dir, $name, $prune, *DIR); + + unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { + my $udir = $p_dir; + if (( $untaint ) && (is_tainted($p_dir) )) { + ( $udir ) = $p_dir =~ m|$untaint_pat|; + unless (defined $udir) { + if ($untaint_skip == 0) { + die "directory $p_dir is still tainted"; + } + else { + return; + } + } + } + unless (chdir $udir) { + warnings::warnif "Can't cd to $udir: $!\n"; + return; + } + } + + # push the starting directory + push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; + + if ($Is_MacOS) { + $p_dir = $dir_pref; # ensure trailing ':' + } + + while (defined $SE) { + unless ($bydepth) { + $dir= $p_dir; # $File::Find::dir + $name= $dir_name; # $File::Find::name + $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ + # prune may happen here + $prune= 0; + { $wanted_callback->() }; # protect against wild "next" + next if $prune; + } - $cwd = Cwd::fastcwd();; - - # Localize these rather than lexicalizing them for backwards - # compatibility. - local($topdir, $topdev, $topino, $topmode, $topnlink); - foreach $topdir (@_) { - (($topdev,$topino,$topmode,$topnlink) = - ($Is_VMS ? stat($topdir) : lstat($topdir))) - || (warn("Can't stat $topdir: $!\n"), next); - if (-d _) { - if (chdir($topdir)) { - my $fixtopdir = $topdir; - $fixtopdir =~ s,/$,, ; - $fixtopdir =~ s/\.dir$// if $Is_VMS; - &finddepthdir($wanted,$fixtopdir,$topnlink); - ($dir,$_) = ($fixtopdir,'.'); - $name = $fixtopdir; - &$wanted; + # change to that directory + unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { + my $udir= $dir_rel; + if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { + ( $udir ) = $dir_rel =~ m|$untaint_pat|; + unless (defined $udir) { + if ($untaint_skip == 0) { + if ($Is_MacOS) { + die "directory ($p_dir) $dir_rel is still tainted"; + } + else { + die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; + } + } else { # $untaint_skip == 1 + next; + } + } } - else { - warn "Can't cd to $topdir: $!\n"; + unless (chdir $udir) { + if ($Is_MacOS) { + warnings::warnif "Can't cd to ($p_dir) $udir: $!\n"; + } + else { + warnings::warnif "Can't cd to (" . + ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; + } + next; } + $CdLvl++; + } + + if ($Is_MacOS) { + $dir_name = "$dir_name:" unless ($dir_name =~ /:$/); + } + + $dir= $dir_name; # $File::Find::dir + + # Get the list of files in the current directory. + unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) { + warnings::warnif "Can't opendir($dir_name): $!\n"; + next; + } + @filenames = readdir DIR; + closedir(DIR); + @filenames = $pre_process->(@filenames) if $pre_process; + push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; + + # default: use whatever was specifid + # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) + $no_nlink = $avoid_nlink; + # if dir has wrong nlink count, force switch to slower stat method + $no_nlink = 1 if ($nlink < 2); + + if ($nlink == 2 && !$no_nlink) { + # This dir has no subdirectories. + for my $FN (@filenames) { + next if $FN =~ $File::Find::skip_pattern; + + $name = $dir_pref . $FN; # $File::Find::name + $_ = ($no_chdir ? $name : $FN); # $_ + { $wanted_callback->() }; # protect against wild "next" + } + } else { - unless (($_,$dir) = File::Basename::fileparse($topdir)) { - ($dir,$_) = ('.', $topdir); + # This dir has subdirectories. + $subcount = $nlink - 2; + + for my $FN (@filenames) { + next if $FN =~ $File::Find::skip_pattern; + if ($subcount > 0 || $no_nlink) { + # Seen all the subdirs? + # check for directoriness. + # stat is faster for a file in the current directory + $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; + + if (-d _) { + --$subcount; + $FN =~ s/\.dir\z// if $Is_VMS; + push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; + } + else { + $name = $dir_pref . $FN; # $File::Find::name + $_= ($no_chdir ? $name : $FN); # $_ + { $wanted_callback->() }; # protect against wild "next" + } + } + else { + $name = $dir_pref . $FN; # $File::Find::name + $_= ($no_chdir ? $name : $FN); # $_ + { $wanted_callback->() }; # protect against wild "next" + } + } + } + } + continue { + while ( defined ($SE = pop @Stack) ) { + ($Level, $p_dir, $dir_rel, $nlink) = @$SE; + if ($CdLvl > $Level && !$no_chdir) { + my $tmp; + if ($Is_MacOS) { + $tmp = (':' x ($CdLvl-$Level)) . ':'; + } + else { + $tmp = join('/',('..') x ($CdLvl-$Level)); + } + die "Can't cd to $dir_name" . $tmp + unless chdir ($tmp); + $CdLvl = $Level; + } + + if ($Is_MacOS) { + # $pdir always has a trailing ':', except for the starting dir, + # where $dir_rel eq ':' + $dir_name = "$p_dir$dir_rel"; + $dir_pref = "$dir_name:"; + } + else { + $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); + $dir_pref = "$dir_name/"; + } + + if ( $nlink == -2 ) { + $name = $dir = $p_dir; # $File::Find::name / dir + $_ = $File::Find::current_dir; + $post_process->(); # End-of-directory processing + } + elsif ( $nlink < 0 ) { # must be finddepth, report dirname now + $name = $dir_name; + if ($Is_MacOS) { + if ($dir_rel eq ':') { # must be the top dir, where we started + $name =~ s|:$||; # $File::Find::name + $p_dir = "$p_dir:" unless ($p_dir =~ /:$/); + } + $dir = $p_dir; # $File::Find::dir + $_ = ($no_chdir ? $name : $dir_rel); # $_ + } + else { + if ( substr($name,-2) eq '/.' ) { + substr($name, length($name) == 2 ? -1 : -2) = ''; + } + $dir = $p_dir; + $_ = ($no_chdir ? $dir_name : $dir_rel ); + if ( substr($_,-2) eq '/.' ) { + substr($_, length($_) == 2 ? -1 : -2) = ''; + } + } + { $wanted_callback->() }; # protect against wild "next" + } + else { + push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; + last; } - $name = $topdir; - chdir $dir && &$wanted; } - chdir $cwd; } } -sub finddepthdir { - my($wanted, $nlink); - local($dir, $name); - ($wanted,$dir,$nlink) = @_; - my($dev, $ino, $mode, $subcount); - - # Get the list of files in the current directory. - opendir(DIR,'.') || warn "Can't open $dir: $!\n"; - my(@filenames) = readdir(DIR); - closedir(DIR); - - if ($nlink == 2 && !$dont_use_nlink) { # This dir has no subdirectories. - for (@filenames) { - next if $_ eq '.'; - next if $_ eq '..'; - $name = "$dir/$_"; - $nlink = 0; - &$wanted; + +# API: +# $wanted +# $dir_loc : absolute location of a dir +# $p_dir : "parent directory" +# preconditions: +# chdir (if not no_chdir) to dir + +sub _find_dir_symlnk($$$) { + my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory + my @Stack; + my @filenames; + my $new_loc; + my $updir_loc = $dir_loc; # untainted parent directory + my $SE = []; + my $dir_name = $p_dir; + my $dir_pref; + my $loc_pref; + my $dir_rel = $File::Find::current_dir; + my $byd_flag; # flag for pending stack entry if $bydepth + my $tainted = 0; + my $ok = 1; + + if ($Is_MacOS) { + $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:"; + $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:"; + } else { + $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); + $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); + } + + local ($dir, $name, $fullname, $prune, *DIR); + + unless ($no_chdir) { + # untaint the topdir + if (( $untaint ) && (is_tainted($dir_loc) )) { + ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted + # once untainted, $updir_loc is pushed on the stack (as parent directory); + # hence, we don't need to untaint the parent directory every time we chdir + # to it later + unless (defined $updir_loc) { + if ($untaint_skip == 0) { + die "directory $dir_loc is still tainted"; + } + else { + return; + } + } + } + $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); + unless ($ok) { + warnings::warnif "Can't cd to $updir_loc: $!\n"; + return; } } - else { # This dir has subdirectories. - $subcount = $nlink - 2; - for (@filenames) { - next if $_ eq '.'; - next if $_ eq '..'; - $nlink = 0; - $name = "$dir/$_"; - if ($subcount > 0 || $dont_use_nlink) { # Seen all the subdirs? - - # Get link count and check for directoriness. - - ($dev,$ino,$mode,$nlink) = ($Is_VMS ? stat($_) : lstat($_)); - - if (-d _) { - # It really is a directory, so do it recursively. + push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; + + if ($Is_MacOS) { + $p_dir = $dir_pref; # ensure trailing ':' + } + + while (defined $SE) { + + unless ($bydepth) { + # change (back) to parent directory (always untainted) + unless ($no_chdir) { + unless (chdir $updir_loc) { + warnings::warnif "Can't cd to $updir_loc: $!\n"; + next; + } + } + $dir= $p_dir; # $File::Find::dir + $name= $dir_name; # $File::Find::name + $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ + $fullname= $dir_loc; # $File::Find::fullname + # prune may happen here + $prune= 0; + lstat($_); # make sure file tests with '_' work + { $wanted_callback->() }; # protect against wild "next" + next if $prune; + } + + # change to that directory + unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { + $updir_loc = $dir_loc; + if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { + # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir + ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; + unless (defined $updir_loc) { + if ($untaint_skip == 0) { + die "directory $dir_loc is still tainted"; + } + else { + next; + } + } + } + unless (chdir $updir_loc) { + warnings::warnif "Can't cd to $updir_loc: $!\n"; + next; + } + } + + if ($Is_MacOS) { + $dir_name = "$dir_name:" unless ($dir_name =~ /:$/); + } + + $dir = $dir_name; # $File::Find::dir + + # Get the list of files in the current directory. + unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { + warnings::warnif "Can't opendir($dir_loc): $!\n"; + next; + } + @filenames = readdir DIR; + closedir(DIR); + + for my $FN (@filenames) { + next if $FN =~ $File::Find::skip_pattern; + + # follow symbolic links / do an lstat + $new_loc = Follow_SymLink($loc_pref.$FN); + + # ignore if invalid symlink + next unless defined $new_loc; + + if (-d _) { + push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; + } + else { + $fullname = $new_loc; # $File::Find::fullname + $name = $dir_pref . $FN; # $File::Find::name + $_ = ($no_chdir ? $name : $FN); # $_ + { $wanted_callback->() }; # protect against wild "next" + } + } - if (chdir $_) { - $name =~ s/\.dir$// if $Is_VMS; - &finddepthdir($wanted,$name,$nlink); - chdir '..'; + } + continue { + while (defined($SE = pop @Stack)) { + ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; + if ($Is_MacOS) { + # $p_dir always has a trailing ':', except for the starting dir, + # where $dir_rel eq ':' + $dir_name = "$p_dir$dir_rel"; + $dir_pref = "$dir_name:"; + $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:"; + } + else { + $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); + $dir_pref = "$dir_name/"; + $loc_pref = "$dir_loc/"; + } + if ( $byd_flag < 0 ) { # must be finddepth, report dirname now + unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { + unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted + warnings::warnif "Can't cd to $updir_loc: $!\n"; + next; + } + } + $fullname = $dir_loc; # $File::Find::fullname + $name = $dir_name; # $File::Find::name + if ($Is_MacOS) { + if ($dir_rel eq ':') { # must be the top dir, where we started + $name =~ s|:$||; # $File::Find::name + $p_dir = "$p_dir:" unless ($p_dir =~ /:$/); + } + $dir = $p_dir; # $File::Find::dir + $_ = ($no_chdir ? $name : $dir_rel); # $_ + } + else { + if ( substr($name,-2) eq '/.' ) { + substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name + } + $dir = $p_dir; # $File::Find::dir + $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ + if ( substr($_,-2) eq '/.' ) { + substr($_, length($_) == 2 ? -1 : -2) = ''; } - --$subcount; } + + lstat($_); # make sure file tests with '_' work + { $wanted_callback->() }; # protect against wild "next" + } + else { + push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; + last; } - &$wanted; } } } -# Set dont_use_nlink in your hint file if your system's stat doesn't -# report the number of links in a directory as an indication -# of the number of files. -# See, e.g. hints/machten.sh for MachTen 2.2. -$dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'}); + +sub wrap_wanted { + my $wanted = shift; + if ( ref($wanted) eq 'HASH' ) { + if ( $wanted->{follow} || $wanted->{follow_fast}) { + $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; + } + if ( $wanted->{untaint} ) { + $wanted->{untaint_pattern} = $File::Find::untaint_pattern + unless defined $wanted->{untaint_pattern}; + $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; + } + return $wanted; + } + else { + return { wanted => $wanted }; + } +} + +sub find { + my $wanted = shift; + _find_opt(wrap_wanted($wanted), @_); +} + +sub finddepth { + my $wanted = wrap_wanted(shift); + $wanted->{bydepth} = 1; + _find_opt($wanted, @_); +} + +# default +$File::Find::skip_pattern = qr/^\.{1,2}\z/; +$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; # These are hard-coded for now, but may move to hint files. if ($^O eq 'VMS') { - $Is_VMS = 1; - $dont_use_nlink = 1; + $Is_VMS = 1; + $File::Find::dont_use_nlink = 1; } -if ($^O =~ m:^mswin32:i) { - $Is_NT = 1; - $dont_use_nlink = 1; +elsif ($^O eq 'MacOS') { + $Is_MacOS = 1; + $File::Find::dont_use_nlink = 1; + $File::Find::skip_pattern = qr/^Icon\015\z/; + $File::Find::untaint_pattern = qr|^(.+)$|; } -$dont_use_nlink = 1 - if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos'; +# this _should_ work properly on all platforms +# where File::Find can be expected to work +$File::Find::current_dir = File::Spec->curdir || '.'; -1; +$File::Find::dont_use_nlink = 1 + if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' || + $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'qnx' || + $^O eq 'nto'; + +# Set dont_use_nlink in your hint file if your system's stat doesn't +# report the number of links in a directory as an indication +# of the number of files. +# See, e.g. hints/machten.sh for MachTen 2.2. +unless ($File::Find::dont_use_nlink) { + require Config; + $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'}); +} +# We need a function that checks if a scalar is tainted. Either use the +# Scalar::Util module's tainted() function or our (slower) pure Perl +# fallback is_tainted_pp() +{ + local $@; + eval { require Scalar::Util }; + *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; +} + +1;