use File::Find;
finddepth(\&wanted, '/foo', '/bar');
sub wanted { ... }
-
+
use File::Find;
find({ wanted => \&process, follow => 1 }, '.');
have been reported. Entry point finddepth() is a shortcut for
specifying C<{ bydepth => 1 }> in the first argument of find().
+=item C<preprocess>
+
+The value should be a code reference. This code reference is used to
+preprocess a directory; it is called after readdir() but before the loop that
+calls the wanted() function. It is called with a list of strings and is
+expected to return a list of strings. The code can be used to sort the
+strings alphabetically, numerically, or to filter out directory entries based
+on their name alone.
+
+=item C<postprocess>
+
+The value should be a code reference. It is invoked just before leaving the
+current directory. It is called in void context with no arguments. The name
+of the current directory is in $File::Find::dir. This hook is handy for
+summarizing a directory, such as calculating its disk usage.
+
=item C<follow>
Causes symbolic links to be followed. Since directory trees with symbolic
=item *
-It is guarantueed that an I<lstat> has been called before the user's
+It is guaranteed that an I<lstat> has been called before the user's
I<wanted()> function is called. This enables fast file checks involving S< _>.
=item *
=item C<follow_fast>
-This is similar to I<follow> except that it may report some files
-more than once. It does detect cycles however.
-Since only symbolic links have to be hashed, this is
-much cheaper both in space and time.
-If processing a file more than once (by the user's I<wanted()> function)
+This is similar to I<follow> except that it may report some files more
+than once. It does detect cycles, however. Since only symbolic links
+have to be hashed, this is much cheaper both in space and time. If
+processing a file more than once (by the user's I<wanted()> function)
is worse than just taking time, the option I<follow> should be used.
=item C<follow_skip>
If find is used in taint-mode (-T command line switch or if EUID != UID
or if EGID != GID) then internally directory names have to be untainted
before they can be cd'ed to. Therefore they are checked against a regular
-expression I<untaint_pattern>. Note, that all names passed to the
+expression I<untaint_pattern>. Note that all names passed to the
user's I<wanted()> function are still tainted.
=item C<untaint_pattern>
See above. This should be set using the C<qr> quoting operator.
The default is set to C<qr|^([-+@\w./]+)$|>.
-Note that the paranthesis which are vital.
+Note that the parantheses are vital.
=item C<untaint_skip>
The wanted() function does whatever verifications you want.
C<$File::Find::dir> contains the current directory name, and C<$_> the
current filename within that directory. C<$File::Find::name> contains
-the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when
-the function is called, unless C<no_chdir> was specified.
-When <follow> or <follow_fast> are in effect there is also a
-C<$File::Find::fullname>.
-The function may set C<$File::Find::prune> to prune the tree
-unless C<bydepth> was specified.
-Unless C<follow> or C<follow_fast> is specified, for compatibility
-reasons (find.pl, find2perl) there are in addition the following globals
-available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>,
+the complete pathname to the file. You are chdir()'d to
+C<$File::Find::dir> when the function is called, unless C<no_chdir>
+was specified. When <follow> or <follow_fast> are in effect, there is
+also a C<$File::Find::fullname>. The function may set
+C<$File::Find::prune> to prune the tree unless C<bydepth> was
+specified. Unless C<follow> or C<follow_fast> is specified, for
+compatibility reasons (find.pl, find2perl) there are in addition the
+following globals available: C<$File::Find::topdir>,
+C<$File::Find::topdev>, C<$File::Find::topino>,
C<$File::Find::topmode> and C<$File::Find::topnlink>.
This library is useful for the C<find2perl> tool, which when fed,
produces something like:
sub wanted {
- /^\.nfs.*$/ &&
+ /^\.nfs.*\z/s &&
(($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
int(-M _) > 7 &&
unlink($_)
=head1 CAVEAT
-Be aware that the option to follow symblic links can be dangerous.
+Be aware that the option to follow symbolic links can be dangerous.
Depending on the structure of the directory tree (including symbolic
links to directories) you might traverse a given (physical) directory
more than once (only if C<follow_fast> is in effect).
my %SLnkSeen;
my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
- $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat);
+ $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
+ $pre_process, $post_process);
sub contract_name {
my ($cdir,$fn) = @_;
my $cwd_untainted = $cwd;
$wanted_callback = $wanted->{wanted};
$bydepth = $wanted->{bydepth};
+ $pre_process = $wanted->{preprocess};
+ $post_process = $wanted->{postprocess};
$no_chdir = $wanted->{no_chdir};
$full_check = $wanted->{follow};
$follow = $full_check || $wanted->{follow_fast};
Proc_Top_Item:
foreach my $TOP (@_) {
my $top_item = $TOP;
- $top_item =~ s|/$|| unless $top_item eq '/';
+ $top_item =~ s|/\z|| unless $top_item eq '/';
$Is_Dir= 0;
($topdev,$topino,$topmode,$topnlink) = stat $top_item;
next Proc_Top_Item;
}
if (-d _) {
- $top_item =~ s/\.dir$// if $Is_VMS;
+ $top_item =~ s/\.dir\z// if $Is_VMS;
_find_dir($wanted, $top_item, $topnlink);
$Is_Dir= 1;
}
unless ($Is_Dir) {
unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
- ($dir,$_) = ('.', $top_item);
+ ($dir,$_) = ('./', $top_item);
}
$abs_dir = $dir;
warn "Couldn't chdir $abs_dir: $!\n";
next Proc_Top_Item;
}
-
- $name = $abs_dir;
-
- &$wanted_callback;
+
+ $name = $abs_dir . $_;
+
+ { &$wanted_callback }; # protect against wild "next"
}
return;
}
}
+
+ push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
while (defined $SE) {
unless ($bydepth) {
$_= ($no_chdir ? $dir_name : $dir_rel );
# prune may happen here
$prune= 0;
- &$wanted_callback;
+ { &$wanted_callback }; # protect against wild "next"
next if $prune;
}
}
@filenames = readdir DIR;
closedir(DIR);
+ @filenames = &$pre_process(@filenames) if $pre_process;
+ push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
if ($nlink == 2 && !$avoid_nlink) {
# This dir has no subdirectories.
for my $FN (@filenames) {
- next if $FN =~ /^\.{1,2}$/;
+ next if $FN =~ /^\.{1,2}\z/;
$name = $dir_pref . $FN;
$_ = ($no_chdir ? $name : $FN);
- &$wanted_callback;
+ { &$wanted_callback }; # protect against wild "next"
}
}
$subcount = $nlink - 2;
for my $FN (@filenames) {
- next if $FN =~ /^\.{1,2}$/;
+ next if $FN =~ /^\.{1,2}\z/;
if ($subcount > 0 || $avoid_nlink) {
# Seen all the subdirs?
# check for directoriness.
if (-d _) {
--$subcount;
- $FN =~ s/\.dir$// if $Is_VMS;
+ $FN =~ s/\.dir\z// if $Is_VMS;
push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
}
else {
$name = $dir_pref . $FN;
$_= ($no_chdir ? $name : $FN);
- &$wanted_callback;
+ { &$wanted_callback }; # protect against wild "next"
}
}
else {
$name = $dir_pref . $FN;
$_= ($no_chdir ? $name : $FN);
- &$wanted_callback;
+ { &$wanted_callback }; # protect against wild "next"
}
}
}
- if ($bydepth) {
- $name = $dir_name;
- $dir = $p_dir;
- $_ = ($no_chdir ? $dir_name : $dir_rel );
- &$wanted_callback;
- }
}
continue {
- if ( defined ($SE = pop @Stack) ) {
+ while ( defined ($SE = pop @Stack) ) {
($Level, $p_dir, $dir_rel, $nlink) = @$SE;
if ($CdLvl > $Level && !$no_chdir) {
- die "Can't cd to $dir_name" . '../' x ($CdLvl-$Level)
- unless chdir '../' x ($CdLvl-$Level);
+ my $tmp = join('/',('..') x ($CdLvl-$Level));
+ die "Can't cd to $dir_name" . $tmp
+ unless chdir ($tmp);
$CdLvl = $Level;
}
$dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
$dir_pref = "$dir_name/";
+ if ( $nlink == -2 ) {
+ $name = $dir = $p_dir;
+ $_ = ".";
+ &$post_process; # End-of-directory processing
+ } elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
+ $name = $dir_name;
+ if ( substr($name,-2) eq '/.' ) {
+ $name =~ s|/\.$||;
+ }
+ $dir = $p_dir;
+ $_ = ($no_chdir ? $dir_name : $dir_rel );
+ if ( substr($_,-2) eq '/.' ) {
+ s|/\.$||;
+ }
+ { &$wanted_callback }; # protect against wild "next"
+ } else {
+ push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
+ last;
+ }
}
}
}
my @Stack;
my @filenames;
my $new_loc;
+ my $pdir_loc = $dir_loc;
my $SE = [];
my $dir_name = $p_dir;
my $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
my $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
my $dir_rel = '.'; # directory name relative to current directory
+ my $byd_flag; # flag for pending stack entry if $bydepth
local ($dir, $name, $fullname, $prune, *DIR);
}
}
+ push @Stack,[$dir_loc,$pdir_loc,$p_dir,$dir_rel,-1] if $bydepth;
+
while (defined $SE) {
unless ($bydepth) {
+ # change to parent directory
+ unless ($no_chdir) {
+ my $udir = $pdir_loc;
+ if ($untaint) {
+ $udir = $1 if $pdir_loc =~ m|$untaint_pat|;
+ }
+ unless (chdir $udir) {
+ warn "Can't cd to $udir: $!\n";
+ next;
+ }
+ }
$dir= $p_dir;
$name= $dir_name;
$_= ($no_chdir ? $dir_name : $dir_rel );
$fullname= $dir_loc;
# prune may happen here
$prune= 0;
- &$wanted_callback;
+ lstat($_); # make sure file tests with '_' work
+ { &$wanted_callback }; # protect against wild "next"
next if $prune;
}
closedir(DIR);
for my $FN (@filenames) {
- next if $FN =~ /^\.{1,2}$/;
+ next if $FN =~ /^\.{1,2}\z/;
# follow symbolic links / do an lstat
$new_loc = Follow_SymLink($loc_pref.$FN);
next unless defined $new_loc;
if (-d _) {
- push @Stack,[$new_loc,$dir_name,$FN];
+ push @Stack,[$new_loc,$dir_loc,$dir_name,$FN,1];
}
else {
$fullname = $new_loc;
$name = $dir_pref . $FN;
$_ = ($no_chdir ? $name : $FN);
- &$wanted_callback;
+ { &$wanted_callback }; # protect against wild "next"
}
}
- if ($bydepth) {
- $fullname = $dir_loc;
- $name = $dir_name;
- $_ = ($no_chdir ? $dir_name : $dir_rel);
- &$wanted_callback;
- }
}
continue {
- if (defined($SE = pop @Stack)) {
- ($dir_loc, $p_dir, $dir_rel) = @$SE;
+ while (defined($SE = pop @Stack)) {
+ ($dir_loc, $pdir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
$dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
$dir_pref = "$dir_name/";
$loc_pref = "$dir_loc/";
+ if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
+ unless ($no_chdir or $dir_rel eq '.') {
+ my $udir = $pdir_loc;
+ if ($untaint) {
+ $udir = $1 if $dir_loc =~ m|$untaint_pat|;
+ }
+ unless (chdir $udir) {
+ warn "Can't cd to $udir: $!\n";
+ next;
+ }
+ }
+ $fullname = $dir_loc;
+ $name = $dir_name;
+ if ( substr($name,-2) eq '/.' ) {
+ $name =~ s|/\.$||;
+ }
+ $dir = $p_dir;
+ $_ = ($no_chdir ? $dir_name : $dir_rel);
+ if ( substr($_,-2) eq '/.' ) {
+ s|/\.$||;
+ }
+
+ lstat($_); # make sure file tests with '_' work
+ { &$wanted_callback }; # protect against wild "next"
+ } else {
+ push @Stack,[$dir_loc, $pdir_loc, $p_dir, $dir_rel,-1] if $bydepth;
+ last;
+ }
}
}
}
}
$File::Find::dont_use_nlink = 1
- if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32';
+ if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
+ $^O eq 'cygwin';
# Set dont_use_nlink in your hint file if your system's stat doesn't
# report the number of links in a directory as an indication