X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FFile%2FFind.pm;h=6e6e4627678e439b0727fdd08d33b8766e136d4c;hb=ac06ab1de2fadeeb1962be6804923320d378ac8b;hp=074cff3c719ef0fd3788f57325d1b66e01ed3e49;hpb=c7b9dd210bc8835ea8e4750a4d97a670da01ea70;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/File/Find.pm b/lib/File/Find.pm index 074cff3..6e6e462 100644 --- a/lib/File/Find.pm +++ b/lib/File/Find.pm @@ -18,7 +18,7 @@ finddepth - traverse a directory structure depth-first use File::Find; finddepth(\&wanted, '/foo', '/bar'); sub wanted { ... } - + use File::Find; find({ wanted => \&process, follow => 1 }, '.'); @@ -42,6 +42,22 @@ Reports the name of a directory only AFTER all its entries have been reported. Entry point finddepth() is a shortcut for specifying C<{ bydepth => 1 }> in the first argument of find(). +=item C + +The value should be a code reference. This code reference is used to +preprocess a directory; it is called after readdir() but before the loop that +calls the wanted() function. It is called with a list of strings and is +expected to return a list of strings. The code can be used to sort the +strings alphabetically, numerically, or to filter out directory entries based +on their name alone. + +=item C + +The value should be a code reference. It is invoked just before leaving the +current directory. It is called in void context with no arguments. The name +of the current directory is in $File::Find::dir. This hook is handy for +summarizing a directory, such as calculating its disk usage. + =item C Causes symbolic links to be followed. Since directory trees with symbolic @@ -55,7 +71,7 @@ If either I or I is in effect: =item * -It is guarantueed that an I has been called before the user's +It is guaranteed that an I has been called before the user's I function is called. This enables fast file checks involving S< _>. =item * @@ -67,11 +83,10 @@ pathname of the file with all symbolic links resolved =item C -This is similar to I except that it may report some files -more than once. It does detect cycles however. -Since only symbolic links have to be hashed, this is -much cheaper both in space and time. -If processing a file more than once (by the user's I function) +This is similar to I except that it may report some files more +than once. It does detect cycles, however. Since only symbolic links +have to be hashed, this is much cheaper both in space and time. If +processing a file more than once (by the user's I function) is worse than just taking time, the option I should be used. =item C @@ -97,14 +112,14 @@ C<$_> will be the same as C<$File::Find::name>. If find is used in taint-mode (-T command line switch or if EUID != UID or if EGID != GID) then internally directory names have to be untainted before they can be cd'ed to. Therefore they are checked against a regular -expression I. Note, that all names passed to the +expression I. Note that all names passed to the user's I function are still tainted. =item C See above. This should be set using the C quoting operator. The default is set to C. -Note that the paranthesis which are vital. +Note that the parantheses are vital. =item C @@ -116,15 +131,15 @@ are skipped. The default is to 'die' in such a case. The wanted() function does whatever verifications you want. C<$File::Find::dir> contains the current directory name, and C<$_> the current filename within that directory. C<$File::Find::name> contains -the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when -the function is called, unless C was specified. -When or are in effect there is also a -C<$File::Find::fullname>. -The function may set C<$File::Find::prune> to prune the tree -unless C was specified. -Unless C or C is specified, for compatibility -reasons (find.pl, find2perl) there are in addition the following globals -available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>, +the complete pathname to the file. You are chdir()'d to +C<$File::Find::dir> when the function is called, unless C +was specified. When or are in effect, there is +also a C<$File::Find::fullname>. The function may set +C<$File::Find::prune> to prune the tree unless C was +specified. Unless C or C is specified, for +compatibility reasons (find.pl, find2perl) there are in addition the +following globals available: C<$File::Find::topdir>, +C<$File::Find::topdev>, C<$File::Find::topino>, C<$File::Find::topmode> and C<$File::Find::topnlink>. This library is useful for the C tool, which when fed, @@ -161,7 +176,7 @@ module. =head1 CAVEAT -Be aware that the option to follow symblic links can be dangerous. +Be aware that the option to follow symbolic links can be dangerous. Depending on the structure of the directory tree (including symbolic links to directories) you might traverse a given (physical) directory more than once (only if C is in effect). @@ -183,7 +198,8 @@ require File::Basename; my %SLnkSeen; my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, - $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat); + $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, + $pre_process, $post_process); sub contract_name { my ($cdir,$fn) = @_; @@ -282,6 +298,8 @@ sub _find_opt { my $cwd_untainted = $cwd; $wanted_callback = $wanted->{wanted}; $bydepth = $wanted->{bydepth}; + $pre_process = $wanted->{preprocess}; + $post_process = $wanted->{postprocess}; $no_chdir = $wanted->{no_chdir}; $full_check = $wanted->{follow}; $follow = $full_check || $wanted->{follow_fast}; @@ -349,7 +367,7 @@ sub _find_opt { unless ($Is_Dir) { unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { - ($dir,$_) = ('.', $top_item); + ($dir,$_) = ('./', $top_item); } $abs_dir = $dir; @@ -370,10 +388,10 @@ sub _find_opt { warn "Couldn't chdir $abs_dir: $!\n"; next Proc_Top_Item; } - - $name = $abs_dir; - - &$wanted_callback; + + $name = $abs_dir . $_; + + { &$wanted_callback }; # protect against wild "next" } @@ -419,6 +437,8 @@ sub _find_dir($$$) { return; } } + + push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; while (defined $SE) { unless ($bydepth) { @@ -427,7 +447,7 @@ sub _find_dir($$$) { $_= ($no_chdir ? $dir_name : $dir_rel ); # prune may happen here $prune= 0; - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" next if $prune; } @@ -462,6 +482,8 @@ sub _find_dir($$$) { } @filenames = readdir DIR; closedir(DIR); + @filenames = &$pre_process(@filenames) if $pre_process; + push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; if ($nlink == 2 && !$avoid_nlink) { # This dir has no subdirectories. @@ -470,7 +492,7 @@ sub _find_dir($$$) { $name = $dir_pref . $FN; $_ = ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } @@ -494,33 +516,47 @@ sub _find_dir($$$) { else { $name = $dir_pref . $FN; $_= ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } else { $name = $dir_pref . $FN; $_= ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } } - if ($bydepth) { - $name = $dir_name; - $dir = $p_dir; - $_ = ($no_chdir ? $dir_name : $dir_rel ); - &$wanted_callback; - } } continue { - if ( defined ($SE = pop @Stack) ) { + while ( defined ($SE = pop @Stack) ) { ($Level, $p_dir, $dir_rel, $nlink) = @$SE; if ($CdLvl > $Level && !$no_chdir) { - die "Can't cd to $dir_name" . '../' x ($CdLvl-$Level) - unless chdir '../' x ($CdLvl-$Level); + my $tmp = join('/',('..') x ($CdLvl-$Level)); + die "Can't cd to $dir_name" . $tmp + unless chdir ($tmp); $CdLvl = $Level; } $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; + if ( $nlink == -2 ) { + $name = $dir = $p_dir; + $_ = "."; + &$post_process; # End-of-directory processing + } elsif ( $nlink < 0 ) { # must be finddepth, report dirname now + $name = $dir_name; + if ( substr($name,-2) eq '/.' ) { + $name =~ s|/\.$||; + } + $dir = $p_dir; + $_ = ($no_chdir ? $dir_name : $dir_rel ); + if ( substr($_,-2) eq '/.' ) { + s|/\.$||; + } + { &$wanted_callback }; # protect against wild "next" + } else { + push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; + last; + } } } } @@ -538,11 +574,13 @@ sub _find_dir_symlnk($$$) { my @Stack; my @filenames; my $new_loc; + my $pdir_loc = $dir_loc; my $SE = []; my $dir_name = $p_dir; my $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); my $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); my $dir_rel = '.'; # directory name relative to current directory + my $byd_flag; # flag for pending stack entry if $bydepth local ($dir, $name, $fullname, $prune, *DIR); @@ -565,16 +603,30 @@ sub _find_dir_symlnk($$$) { } } + push @Stack,[$dir_loc,$pdir_loc,$p_dir,$dir_rel,-1] if $bydepth; + while (defined $SE) { unless ($bydepth) { + # change to parent directory + unless ($no_chdir) { + my $udir = $pdir_loc; + if ($untaint) { + $udir = $1 if $pdir_loc =~ m|$untaint_pat|; + } + unless (chdir $udir) { + warn "Can't cd to $udir: $!\n"; + next; + } + } $dir= $p_dir; $name= $dir_name; $_= ($no_chdir ? $dir_name : $dir_rel ); $fullname= $dir_loc; # prune may happen here $prune= 0; - &$wanted_callback; + lstat($_); # make sure file tests with '_' work + { &$wanted_callback }; # protect against wild "next" next if $prune; } @@ -618,29 +670,51 @@ sub _find_dir_symlnk($$$) { next unless defined $new_loc; if (-d _) { - push @Stack,[$new_loc,$dir_name,$FN]; + push @Stack,[$new_loc,$dir_loc,$dir_name,$FN,1]; } else { $fullname = $new_loc; $name = $dir_pref . $FN; $_ = ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } - if ($bydepth) { - $fullname = $dir_loc; - $name = $dir_name; - $_ = ($no_chdir ? $dir_name : $dir_rel); - &$wanted_callback; - } } continue { - if (defined($SE = pop @Stack)) { - ($dir_loc, $p_dir, $dir_rel) = @$SE; + while (defined($SE = pop @Stack)) { + ($dir_loc, $pdir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; $loc_pref = "$dir_loc/"; + if ( $byd_flag < 0 ) { # must be finddepth, report dirname now + unless ($no_chdir or $dir_rel eq '.') { + my $udir = $pdir_loc; + if ($untaint) { + $udir = $1 if $dir_loc =~ m|$untaint_pat|; + } + unless (chdir $udir) { + warn "Can't cd to $udir: $!\n"; + next; + } + } + $fullname = $dir_loc; + $name = $dir_name; + if ( substr($name,-2) eq '/.' ) { + $name =~ s|/\.$||; + } + $dir = $p_dir; + $_ = ($no_chdir ? $dir_name : $dir_rel); + if ( substr($_,-2) eq '/.' ) { + s|/\.$||; + } + + lstat($_); # make sure file tests with '_' work + { &$wanted_callback }; # protect against wild "next" + } else { + push @Stack,[$dir_loc, $pdir_loc, $p_dir, $dir_rel,-1] if $bydepth; + last; + } } } } @@ -684,7 +758,8 @@ if ($^O eq 'VMS') { } $File::Find::dont_use_nlink = 1 - if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32'; + if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' || + $^O eq 'cygwin'; # Set dont_use_nlink in your hint file if your system's stat doesn't # report the number of links in a directory as an indication