X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FFile%2FFind.pm;h=6e6e4627678e439b0727fdd08d33b8766e136d4c;hb=ac06ab1de2fadeeb1962be6804923320d378ac8b;hp=0fa0032206ef49ab0d94650f6863a1c2632394cc;hpb=c739111c01bcc76523dcfc2986f6b0a93da0d494;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/File/Find.pm b/lib/File/Find.pm index 0fa0032..6e6e462 100644 --- a/lib/File/Find.pm +++ b/lib/File/Find.pm @@ -18,7 +18,7 @@ finddepth - traverse a directory structure depth-first use File::Find; finddepth(\&wanted, '/foo', '/bar'); sub wanted { ... } - + use File::Find; find({ wanted => \&process, follow => 1 }, '.'); @@ -42,6 +42,22 @@ Reports the name of a directory only AFTER all its entries have been reported. Entry point finddepth() is a shortcut for specifying C<{ bydepth => 1 }> in the first argument of find(). +=item C + +The value should be a code reference. This code reference is used to +preprocess a directory; it is called after readdir() but before the loop that +calls the wanted() function. It is called with a list of strings and is +expected to return a list of strings. The code can be used to sort the +strings alphabetically, numerically, or to filter out directory entries based +on their name alone. + +=item C + +The value should be a code reference. It is invoked just before leaving the +current directory. It is called in void context with no arguments. The name +of the current directory is in $File::Find::dir. This hook is handy for +summarizing a directory, such as calculating its disk usage. + =item C Causes symbolic links to be followed. Since directory trees with symbolic @@ -55,7 +71,7 @@ If either I or I is in effect: =item * -It is guarantueed that an I has been called before the user's +It is guaranteed that an I has been called before the user's I function is called. This enables fast file checks involving S< _>. =item * @@ -67,11 +83,10 @@ pathname of the file with all symbolic links resolved =item C -This is similar to I except that it may report some files -more than once. It does detect cycles however. -Since only symbolic links have to be hashed, this is -much cheaper both in space and time. -If processing a file more than once (by the user's I function) +This is similar to I except that it may report some files more +than once. It does detect cycles, however. Since only symbolic links +have to be hashed, this is much cheaper both in space and time. If +processing a file more than once (by the user's I function) is worse than just taking time, the option I should be used. =item C @@ -97,14 +112,14 @@ C<$_> will be the same as C<$File::Find::name>. If find is used in taint-mode (-T command line switch or if EUID != UID or if EGID != GID) then internally directory names have to be untainted before they can be cd'ed to. Therefore they are checked against a regular -expression I. Note, that all names passed to the +expression I. Note that all names passed to the user's I function are still tainted. =item C See above. This should be set using the C quoting operator. The default is set to C. -Note that the paranthesis which are vital. +Note that the parantheses are vital. =item C @@ -116,15 +131,15 @@ are skipped. The default is to 'die' in such a case. The wanted() function does whatever verifications you want. C<$File::Find::dir> contains the current directory name, and C<$_> the current filename within that directory. C<$File::Find::name> contains -the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when -the function is called, unless C was specified. -When or are in effect there is also a -C<$File::Find::fullname>. -The function may set C<$File::Find::prune> to prune the tree -unless C was specified. -Unless C or C is specified, for compatibility -reasons (find.pl, find2perl) there are in addition the following globals -available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>, +the complete pathname to the file. You are chdir()'d to +C<$File::Find::dir> when the function is called, unless C +was specified. When or are in effect, there is +also a C<$File::Find::fullname>. The function may set +C<$File::Find::prune> to prune the tree unless C was +specified. Unless C or C is specified, for +compatibility reasons (find.pl, find2perl) there are in addition the +following globals available: C<$File::Find::topdir>, +C<$File::Find::topdev>, C<$File::Find::topino>, C<$File::Find::topmode> and C<$File::Find::topnlink>. This library is useful for the C tool, which when fed, @@ -161,7 +176,7 @@ module. =head1 CAVEAT -Be aware that the option to follow symblic links can be dangerous. +Be aware that the option to follow symbolic links can be dangerous. Depending on the structure of the directory tree (including symbolic links to directories) you might traverse a given (physical) directory more than once (only if C is in effect). @@ -183,7 +198,8 @@ require File::Basename; my %SLnkSeen; my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, - $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat); + $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, + $pre_process, $post_process); sub contract_name { my ($cdir,$fn) = @_; @@ -282,6 +298,8 @@ sub _find_opt { my $cwd_untainted = $cwd; $wanted_callback = $wanted->{wanted}; $bydepth = $wanted->{bydepth}; + $pre_process = $wanted->{preprocess}; + $post_process = $wanted->{postprocess}; $no_chdir = $wanted->{no_chdir}; $full_check = $wanted->{follow}; $follow = $full_check || $wanted->{follow_fast}; @@ -349,7 +367,7 @@ sub _find_opt { unless ($Is_Dir) { unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { - ($dir,$_) = ('.', $top_item); + ($dir,$_) = ('./', $top_item); } $abs_dir = $dir; @@ -370,10 +388,10 @@ sub _find_opt { warn "Couldn't chdir $abs_dir: $!\n"; next Proc_Top_Item; } - - $name = $abs_dir; - - &$wanted_callback; + + $name = $abs_dir . $_; + + { &$wanted_callback }; # protect against wild "next" } @@ -429,7 +447,7 @@ sub _find_dir($$$) { $_= ($no_chdir ? $dir_name : $dir_rel ); # prune may happen here $prune= 0; - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" next if $prune; } @@ -464,6 +482,8 @@ sub _find_dir($$$) { } @filenames = readdir DIR; closedir(DIR); + @filenames = &$pre_process(@filenames) if $pre_process; + push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; if ($nlink == 2 && !$avoid_nlink) { # This dir has no subdirectories. @@ -472,7 +492,7 @@ sub _find_dir($$$) { $name = $dir_pref . $FN; $_ = ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } @@ -496,13 +516,13 @@ sub _find_dir($$$) { else { $name = $dir_pref . $FN; $_= ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } else { $name = $dir_pref . $FN; $_= ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } } @@ -518,7 +538,11 @@ sub _find_dir($$$) { } $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; - if ( $nlink < 0 ) { # must be finddepth, report dirname now + if ( $nlink == -2 ) { + $name = $dir = $p_dir; + $_ = "."; + &$post_process; # End-of-directory processing + } elsif ( $nlink < 0 ) { # must be finddepth, report dirname now $name = $dir_name; if ( substr($name,-2) eq '/.' ) { $name =~ s|/\.$||; @@ -528,7 +552,7 @@ sub _find_dir($$$) { if ( substr($_,-2) eq '/.' ) { s|/\.$||; } - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } else { push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; last; @@ -584,13 +608,25 @@ sub _find_dir_symlnk($$$) { while (defined $SE) { unless ($bydepth) { + # change to parent directory + unless ($no_chdir) { + my $udir = $pdir_loc; + if ($untaint) { + $udir = $1 if $pdir_loc =~ m|$untaint_pat|; + } + unless (chdir $udir) { + warn "Can't cd to $udir: $!\n"; + next; + } + } $dir= $p_dir; $name= $dir_name; $_= ($no_chdir ? $dir_name : $dir_rel ); $fullname= $dir_loc; # prune may happen here $prune= 0; - &$wanted_callback; + lstat($_); # make sure file tests with '_' work + { &$wanted_callback }; # protect against wild "next" next if $prune; } @@ -640,7 +676,7 @@ sub _find_dir_symlnk($$$) { $fullname = $new_loc; $name = $dir_pref . $FN; $_ = ($no_chdir ? $name : $FN); - &$wanted_callback; + { &$wanted_callback }; # protect against wild "next" } } @@ -673,7 +709,8 @@ sub _find_dir_symlnk($$$) { s|/\.$||; } - &$wanted_callback; + lstat($_); # make sure file tests with '_' work + { &$wanted_callback }; # protect against wild "next" } else { push @Stack,[$dir_loc, $pdir_loc, $p_dir, $dir_rel,-1] if $bydepth; last; @@ -721,7 +758,8 @@ if ($^O eq 'VMS') { } $File::Find::dont_use_nlink = 1 - if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32'; + if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' || + $^O eq 'cygwin'; # Set dont_use_nlink in your hint file if your system's stat doesn't # report the number of links in a directory as an indication