X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FFile%2FFind.pm;h=6e6e4627678e439b0727fdd08d33b8766e136d4c;hb=ac06ab1de2fadeeb1962be6804923320d378ac8b;hp=56ab7980ec9cc9746951af1beb6e18939e34339f;hpb=1d7c184104c076988718a01b77c8706aae05b092;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/File/Find.pm b/lib/File/Find.pm index 56ab798..6e6e462 100644 --- a/lib/File/Find.pm +++ b/lib/File/Find.pm @@ -1,5 +1,5 @@ package File::Find; -require 5.000; +use 5.005_64; require Exporter; require Cwd; @@ -18,7 +18,7 @@ finddepth - traverse a directory structure depth-first use File::Find; finddepth(\&wanted, '/foo', '/bar'); sub wanted { ... } - + use File::Find; find({ wanted => \&process, follow => 1 }, '.'); @@ -42,6 +42,22 @@ Reports the name of a directory only AFTER all its entries have been reported. Entry point finddepth() is a shortcut for specifying C<{ bydepth => 1 }> in the first argument of find(). +=item C + +The value should be a code reference. This code reference is used to +preprocess a directory; it is called after readdir() but before the loop that +calls the wanted() function. It is called with a list of strings and is +expected to return a list of strings. The code can be used to sort the +strings alphabetically, numerically, or to filter out directory entries based +on their name alone. + +=item C + +The value should be a code reference. It is invoked just before leaving the +current directory. It is called in void context with no arguments. The name +of the current directory is in $File::Find::dir. This hook is handy for +summarizing a directory, such as calculating its disk usage. + =item C Causes symbolic links to be followed. Since directory trees with symbolic @@ -53,12 +69,12 @@ If either I or I is in effect: =over 6 -=item +=item * -It is guarantueed that an I has been called before the user's +It is guaranteed that an I has been called before the user's I function is called. This enables fast file checks involving S< _>. -=item +=item * There is a variable C<$File::Find::fullname> which holds the absolute pathname of the file with all symbolic links resolved @@ -67,11 +83,10 @@ pathname of the file with all symbolic links resolved =item C -This is similar to I except that it may report some files -more than once. It does detect cycles however. -Since only symbolic links have to be hashed, this is -much cheaper both in space and time. -If processing a file more than once (by the user's I function) +This is similar to I except that it may report some files more +than once. It does detect cycles, however. Since only symbolic links +have to be hashed, this is much cheaper both in space and time. If +processing a file more than once (by the user's I function) is worse than just taking time, the option I should be used. =item C @@ -97,14 +112,14 @@ C<$_> will be the same as C<$File::Find::name>. If find is used in taint-mode (-T command line switch or if EUID != UID or if EGID != GID) then internally directory names have to be untainted before they can be cd'ed to. Therefore they are checked against a regular -expression I. Note, that all names passed to the +expression I. Note that all names passed to the user's I function are still tainted. =item C See above. This should be set using the C quoting operator. The default is set to C. -Note that the paranthesis which are vital. +Note that the parantheses are vital. =item C @@ -116,12 +131,16 @@ are skipped. The default is to 'die' in such a case. The wanted() function does whatever verifications you want. C<$File::Find::dir> contains the current directory name, and C<$_> the current filename within that directory. C<$File::Find::name> contains -the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when -the function is called, unless C was specified. -When or are in effect there is also a -C<$File::Find::fullname>. -The function may set C<$File::Find::prune> to prune the tree -unless C was specified. +the complete pathname to the file. You are chdir()'d to +C<$File::Find::dir> when the function is called, unless C +was specified. When or are in effect, there is +also a C<$File::Find::fullname>. The function may set +C<$File::Find::prune> to prune the tree unless C was +specified. Unless C or C is specified, for +compatibility reasons (find.pl, find2perl) there are in addition the +following globals available: C<$File::Find::topdir>, +C<$File::Find::topdev>, C<$File::Find::topino>, +C<$File::Find::topmode> and C<$File::Find::topnlink>. This library is useful for the C tool, which when fed, @@ -131,7 +150,7 @@ This library is useful for the C tool, which when fed, produces something like: sub wanted { - /^\.nfs.*$/ && + /^\.nfs.*\z/s && (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && int(-M _) > 7 && unlink($_) @@ -157,7 +176,7 @@ module. =head1 CAVEAT -Be aware that the option to follow symblic links can be dangerous. +Be aware that the option to follow symbolic links can be dangerous. Depending on the structure of the directory tree (including symbolic links to directories) you might traverse a given (physical) directory more than once (only if C is in effect). @@ -179,7 +198,8 @@ require File::Basename; my %SLnkSeen; my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, - $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat); + $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, + $pre_process, $post_process); sub contract_name { my ($cdir,$fn) = @_; @@ -266,7 +286,7 @@ sub Follow_SymLink($) { return $AbsName; } -use vars qw/ $dir $name $fullname $prune /; +our($dir, $name, $fullname, $prune); sub _find_dir_symlnk($$$); sub _find_dir($$$); @@ -278,6 +298,8 @@ sub _find_opt { my $cwd_untainted = $cwd; $wanted_callback = $wanted->{wanted}; $bydepth = $wanted->{bydepth}; + $pre_process = $wanted->{preprocess}; + $post_process = $wanted->{postprocess}; $no_chdir = $wanted->{no_chdir}; $full_check = $wanted->{follow}; $follow = $full_check || $wanted->{follow_fast}; @@ -286,6 +308,8 @@ sub _find_opt { $untaint_pat = $wanted->{untaint_pattern}; $untaint_skip = $wanted->{untaint_skip}; + # for compatability reasons (find.pl, find2perl) + our ($topdir, $topdev, $topino, $topmode, $topnlink); # a symbolic link to a directory doesn't increase the link count $avoid_nlink = $follow || $File::Find::dont_use_nlink; @@ -295,14 +319,16 @@ sub _find_opt { die "insecure cwd in find(depth)" unless defined($cwd_untainted); } - my ($abs_dir, $nlink, $Is_Dir); + my ($abs_dir, $Is_Dir); Proc_Top_Item: foreach my $TOP (@_) { my $top_item = $TOP; - $top_item =~ s|/$||; + $top_item =~ s|/\z|| unless $top_item eq '/'; $Is_Dir= 0; + ($topdev,$topino,$topmode,$topnlink) = stat $top_item; + if ($follow) { if (substr($top_item,0,1) eq '/') { $abs_dir = $top_item; @@ -324,14 +350,14 @@ sub _find_opt { } } else { # no follow - $nlink = (lstat $top_item)[3]; - unless (defined $nlink) { + $topdir = $top_item; + unless (defined $topnlink) { warn "Can't stat $top_item: $!\n"; next Proc_Top_Item; } if (-d _) { - $top_item =~ s/\.dir$// if $Is_VMS; - _find_dir($wanted, $top_item, $nlink); + $top_item =~ s/\.dir\z// if $Is_VMS; + _find_dir($wanted, $top_item, $topnlink); $Is_Dir= 1; } else { @@ -341,7 +367,7 @@ sub _find_opt { unless ($Is_Dir) { unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { - ($dir,$_) = ('.', $top_item); + ($dir,$_) = ('./', $top_item); } $abs_dir = $dir; @@ -362,10 +388,10 @@ sub _find_opt { warn "Couldn't chdir $abs_dir: $!\n"; next Proc_Top_Item; } - - $name = $abs_dir; - - &$wanted_callback; + + $name = $abs_dir . $_; + + { &$wanted_callback }; # protect against wild "next" } @@ -388,6 +414,7 @@ sub _find_dir($$$) { my ($subcount,$sub_nlink); my $SE= []; my $dir_name= $p_dir; + my $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); my $dir_rel= '.'; # directory name relative to current directory local ($dir, $name, $prune, *DIR); @@ -410,6 +437,8 @@ sub _find_dir($$$) { return; } } + + push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; while (defined $SE) { unless ($bydepth) { @@ -418,7 +447,7 @@ sub _find_dir($$$) { $_= ($no_chdir ? $dir_name : $dir_rel ); # prune may happen here $prune= 0; - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" next if $prune; } @@ -429,12 +458,16 @@ sub _find_dir($$$) { $udir = $1 if $dir_rel =~ m|$untaint_pat|; unless (defined $udir) { if ($untaint_skip == 0) { - die "directory ($p_dir/) $dir_rel is still tainted"; + die "directory (" + . ($p_dir ne '/' ? $p_dir : '') + . "/) $dir_rel is still tainted"; } } } unless (chdir $udir) { - warn "Can't cd to ($p_dir/) $udir : $!\n"; + warn "Can't cd to (" + . ($p_dir ne '/' ? $p_dir : '') + . "/) $udir : $!\n"; next; } $CdLvl++; @@ -449,15 +482,17 @@ sub _find_dir($$$) { } @filenames = readdir DIR; closedir(DIR); + @filenames = &$pre_process(@filenames) if $pre_process; + push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; if ($nlink == 2 && !$avoid_nlink) { # This dir has no subdirectories. for my $FN (@filenames) { - next if $FN =~ /^\.{1,2}$/; + next if $FN =~ /^\.{1,2}\z/; - $name = "$dir_name/$FN"; + $name = $dir_pref . $FN; $_ = ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } @@ -466,46 +501,62 @@ sub _find_dir($$$) { $subcount = $nlink - 2; for my $FN (@filenames) { - next if $FN =~ /^\.{1,2}$/; + next if $FN =~ /^\.{1,2}\z/; if ($subcount > 0 || $avoid_nlink) { # Seen all the subdirs? # check for directoriness. # stat is faster for a file in the current directory - $sub_nlink = (lstat ($no_chdir ? "$dir_name/$FN" : $FN))[3]; + $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; if (-d _) { --$subcount; - $FN =~ s/\.dir$// if $Is_VMS; + $FN =~ s/\.dir\z// if $Is_VMS; push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; } else { - $name = "$dir_name/$FN"; + $name = $dir_pref . $FN; $_= ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } - else { $name = "$dir_name/$FN"; + else { + $name = $dir_pref . $FN; $_= ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } } - if ($bydepth) { - $name = $dir_name; - $dir = $p_dir; - $_ = ($no_chdir ? $dir_name : $dir_rel ); - &$wanted_callback; - } } continue { - if ( defined ($SE = pop @Stack) ) { + while ( defined ($SE = pop @Stack) ) { ($Level, $p_dir, $dir_rel, $nlink) = @$SE; if ($CdLvl > $Level && !$no_chdir) { - die "Can't cd to $dir_name" . '../' x ($CdLvl-$Level) - unless chdir '../' x ($CdLvl-$Level); + my $tmp = join('/',('..') x ($CdLvl-$Level)); + die "Can't cd to $dir_name" . $tmp + unless chdir ($tmp); $CdLvl = $Level; } - $dir_name = "$p_dir/$dir_rel"; + $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); + $dir_pref = "$dir_name/"; + if ( $nlink == -2 ) { + $name = $dir = $p_dir; + $_ = "."; + &$post_process; # End-of-directory processing + } elsif ( $nlink < 0 ) { # must be finddepth, report dirname now + $name = $dir_name; + if ( substr($name,-2) eq '/.' ) { + $name =~ s|/\.$||; + } + $dir = $p_dir; + $_ = ($no_chdir ? $dir_name : $dir_rel ); + if ( substr($_,-2) eq '/.' ) { + s|/\.$||; + } + { &$wanted_callback }; # protect against wild "next" + } else { + push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; + last; + } } } } @@ -523,9 +574,13 @@ sub _find_dir_symlnk($$$) { my @Stack; my @filenames; my $new_loc; + my $pdir_loc = $dir_loc; my $SE = []; my $dir_name = $p_dir; + my $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); + my $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); my $dir_rel = '.'; # directory name relative to current directory + my $byd_flag; # flag for pending stack entry if $bydepth local ($dir, $name, $fullname, $prune, *DIR); @@ -548,16 +603,30 @@ sub _find_dir_symlnk($$$) { } } + push @Stack,[$dir_loc,$pdir_loc,$p_dir,$dir_rel,-1] if $bydepth; + while (defined $SE) { unless ($bydepth) { + # change to parent directory + unless ($no_chdir) { + my $udir = $pdir_loc; + if ($untaint) { + $udir = $1 if $pdir_loc =~ m|$untaint_pat|; + } + unless (chdir $udir) { + warn "Can't cd to $udir: $!\n"; + next; + } + } $dir= $p_dir; $name= $dir_name; $_= ($no_chdir ? $dir_name : $dir_rel ); $fullname= $dir_loc; # prune may happen here $prune= 0; - &$wanted_callback; + lstat($_); # make sure file tests with '_' work + { &$wanted_callback }; # protect against wild "next" next if $prune; } @@ -592,36 +661,60 @@ sub _find_dir_symlnk($$$) { closedir(DIR); for my $FN (@filenames) { - next if $FN =~ /^\.{1,2}$/; + next if $FN =~ /^\.{1,2}\z/; # follow symbolic links / do an lstat - $new_loc= Follow_SymLink("$dir_loc/$FN"); + $new_loc = Follow_SymLink($loc_pref.$FN); # ignore if invalid symlink next unless defined $new_loc; if (-d _) { - push @Stack,[$new_loc,$dir_name,$FN]; + push @Stack,[$new_loc,$dir_loc,$dir_name,$FN,1]; } else { $fullname = $new_loc; - $name = "$dir_name/$FN"; + $name = $dir_pref . $FN; $_ = ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } - if ($bydepth) { - $fullname = $dir_loc; - $name = $dir_name; - $_ = ($no_chdir ? $dir_name : $dir_rel); - &$wanted_callback; - } } continue { - if (defined($SE = pop @Stack)) { - ($dir_loc, $p_dir, $dir_rel) = @$SE; - $dir_name = "$p_dir/$dir_rel"; + while (defined($SE = pop @Stack)) { + ($dir_loc, $pdir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; + $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); + $dir_pref = "$dir_name/"; + $loc_pref = "$dir_loc/"; + if ( $byd_flag < 0 ) { # must be finddepth, report dirname now + unless ($no_chdir or $dir_rel eq '.') { + my $udir = $pdir_loc; + if ($untaint) { + $udir = $1 if $dir_loc =~ m|$untaint_pat|; + } + unless (chdir $udir) { + warn "Can't cd to $udir: $!\n"; + next; + } + } + $fullname = $dir_loc; + $name = $dir_name; + if ( substr($name,-2) eq '/.' ) { + $name =~ s|/\.$||; + } + $dir = $p_dir; + $_ = ($no_chdir ? $dir_name : $dir_rel); + if ( substr($_,-2) eq '/.' ) { + s|/\.$||; + } + + lstat($_); # make sure file tests with '_' work + { &$wanted_callback }; # protect against wild "next" + } else { + push @Stack,[$dir_loc, $pdir_loc, $p_dir, $dir_rel,-1] if $bydepth; + last; + } } } } @@ -665,7 +758,8 @@ if ($^O eq 'VMS') { } $File::Find::dont_use_nlink = 1 - if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32'; + if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' || + $^O eq 'cygwin'; # Set dont_use_nlink in your hint file if your system's stat doesn't # report the number of links in a directory as an indication