document what chdir() without an argument does (from Mark-Jason
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
17f410f9 2use 5.005_64;
a0d0e21e 3require Exporter;
6280b799 4require Cwd;
a0d0e21e 5
f06db76b 6=head1 NAME
7
8find - traverse a file tree
9
10finddepth - traverse a directory structure depth-first
11
12=head1 SYNOPSIS
13
14 use File::Find;
81793b90 15 find(\&wanted, '/foo', '/bar');
f06db76b 16 sub wanted { ... }
237437d0 17
f06db76b 18 use File::Find;
81793b90 19 finddepth(\&wanted, '/foo', '/bar');
f06db76b 20 sub wanted { ... }
81793b90 21
22 use File::Find;
23 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 24
25=head1 DESCRIPTION
26
20408e3c 27The first argument to find() is either a hash reference describing the
81793b90 28operations to be performed for each file, or a code reference.
20408e3c 29
81793b90 30Here are the possible keys for the hash:
31
32=over 3
33
34=item C<wanted>
35
36The value should be a code reference. This code reference is called
37I<the wanted() function> below.
38
39=item C<bydepth>
40
41Reports the name of a directory only AFTER all its entries
42have been reported. Entry point finddepth() is a shortcut for
43specifying C<{ bydepth => 1 }> in the first argument of find().
44
45=item C<follow>
46
47Causes symbolic links to be followed. Since directory trees with symbolic
48links (followed) may contain files more than once and may even have
49cycles, a hash has to be built up with an entry for each file.
50This might be expensive both in space and time for a large
51directory tree. See I<follow_fast> and I<follow_skip> below.
52If either I<follow> or I<follow_fast> is in effect:
53
54=over 6
55
a45bd81d 56=item *
81793b90 57
58It is guarantueed that an I<lstat> has been called before the user's
59I<wanted()> function is called. This enables fast file checks involving S< _>.
60
a45bd81d 61=item *
81793b90 62
63There is a variable C<$File::Find::fullname> which holds the absolute
64pathname of the file with all symbolic links resolved
65
66=back
67
68=item C<follow_fast>
69
70This is similar to I<follow> except that it may report some files
71more than once. It does detect cycles however.
72Since only symbolic links have to be hashed, this is
73much cheaper both in space and time.
74If processing a file more than once (by the user's I<wanted()> function)
75is worse than just taking time, the option I<follow> should be used.
76
77=item C<follow_skip>
78
79C<follow_skip==1>, which is the default, causes all files which are
80neither directories nor symbolic links to be ignored if they are about
81to be processed a second time. If a directory or a symbolic link
82are about to be processed a second time, File::Find dies.
83C<follow_skip==0> causes File::Find to die if any file is about to be
84processed a second time.
85C<follow_skip==2> causes File::Find to ignore any duplicate files and
86dirctories but to proceed normally otherwise.
20408e3c 87
f06db76b 88
81793b90 89=item C<no_chdir>
90
91Does not C<chdir()> to each directory as it recurses. The wanted()
92function will need to be aware of this, of course. In this case,
93C<$_> will be the same as C<$File::Find::name>.
94
95=item C<untaint>
96
97If find is used in taint-mode (-T command line switch or if EUID != UID
98or if EGID != GID) then internally directory names have to be untainted
99before they can be cd'ed to. Therefore they are checked against a regular
100expression I<untaint_pattern>. Note, that all names passed to the
101user's I<wanted()> function are still tainted.
102
103=item C<untaint_pattern>
104
105See above. This should be set using the C<qr> quoting operator.
106The default is set to C<qr|^([-+@\w./]+)$|>.
107Note that the paranthesis which are vital.
108
109=item C<untaint_skip>
110
111If set, directories (subtrees) which fail the I<untaint_pattern>
112are skipped. The default is to 'die' in such a case.
113
114=back
115
116The wanted() function does whatever verifications you want.
117C<$File::Find::dir> contains the current directory name, and C<$_> the
118current filename within that directory. C<$File::Find::name> contains
119the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when
120the function is called, unless C<no_chdir> was specified.
121When <follow> or <follow_fast> are in effect there is also a
122C<$File::Find::fullname>.
123The function may set C<$File::Find::prune> to prune the tree
124unless C<bydepth> was specified.
e7b91b67 125Unless C<follow> or C<follow_fast> is specified, for compatibility
126reasons (find.pl, find2perl) there are in addition the following globals
127available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>,
128C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 129
20408e3c 130This library is useful for the C<find2perl> tool, which when fed,
f06db76b 131
132 find2perl / -name .nfs\* -mtime +7 \
81793b90 133 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 134
135produces something like:
136
137 sub wanted {
138 /^\.nfs.*$/ &&
81793b90 139 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 140 int(-M _) > 7 &&
141 unlink($_)
142 ||
81793b90 143 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 144 $dev < 0 &&
6280b799 145 ($File::Find::prune = 1);
f06db76b 146 }
147
81793b90 148Set the variable C<$File::Find::dont_use_nlink> if you're using AFS,
6280b799 149since AFS cheats.
f06db76b 150
f06db76b 151
152Here's another interesting wanted function. It will find all symlinks
153that don't resolve:
154
155 sub wanted {
81793b90 156 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 157 }
f06db76b 158
81793b90 159See also the script C<pfind> on CPAN for a nice application of this
160module.
161
162=head1 CAVEAT
163
164Be aware that the option to follow symblic links can be dangerous.
165Depending on the structure of the directory tree (including symbolic
166links to directories) you might traverse a given (physical) directory
167more than once (only if C<follow_fast> is in effect).
168Furthermore, deleting or changing files in a symbolically linked directory
169might cause very unpleasant surprises, since you delete or change files
170in an unknown directory.
0530a6c4 171
0530a6c4 172
f06db76b 173=cut
174
a0d0e21e 175@ISA = qw(Exporter);
6280b799 176@EXPORT = qw(find finddepth);
177
a0d0e21e 178
81793b90 179use strict;
180my $Is_VMS;
181
182require File::Basename;
183
184my %SLnkSeen;
185my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
186 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat);
187
188sub contract_name {
189 my ($cdir,$fn) = @_;
190
191 return substr($cdir,0,rindex($cdir,'/')) if $fn eq '.';
192
193 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
194
195 $fn =~ s|^\./||;
196
197 my $abs_name= $cdir . $fn;
198
199 if (substr($fn,0,3) eq '../') {
200 do 1 while ($abs_name=~ s|/(?>[^/]+)/\.\./|/|);
201 }
202
203 return $abs_name;
204}
205
206
207sub PathCombine($$) {
208 my ($Base,$Name) = @_;
209 my $AbsName;
210
211 if (substr($Name,0,1) eq '/') {
212 $AbsName= $Name;
213 }
214 else {
215 $AbsName= contract_name($Base,$Name);
216 }
217
218 # (simple) check for recursion
219 my $newlen= length($AbsName);
220 if ($newlen <= length($Base)) {
221 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
222 && $AbsName eq substr($Base,0,$newlen))
223 {
224 return undef;
225 }
226 }
227 return $AbsName;
228}
229
230sub Follow_SymLink($) {
231 my ($AbsName) = @_;
232
233 my ($NewName,$DEV, $INO);
234 ($DEV, $INO)= lstat $AbsName;
235
236 while (-l _) {
237 if ($SLnkSeen{$DEV, $INO}++) {
238 if ($follow_skip < 2) {
239 die "$AbsName is encountered a second time";
a0d0e21e 240 }
241 else {
81793b90 242 return undef;
a0d0e21e 243 }
244 }
81793b90 245 $NewName= PathCombine($AbsName, readlink($AbsName));
246 unless(defined $NewName) {
247 if ($follow_skip < 2) {
248 die "$AbsName is a recursive symbolic link";
249 }
250 else {
251 return undef;
a0d0e21e 252 }
81793b90 253 }
254 else {
255 $AbsName= $NewName;
256 }
257 ($DEV, $INO) = lstat($AbsName);
258 return undef unless defined $DEV; # dangling symbolic link
259 }
260
261 if ($full_check && $SLnkSeen{$DEV, $INO}++) {
262 if ($follow_skip < 1) {
263 die "$AbsName encountered a second time";
264 }
265 else {
266 return undef;
267 }
268 }
269
270 return $AbsName;
271}
272
17f410f9 273our($dir, $name, $fullname, $prune);
81793b90 274sub _find_dir_symlnk($$$);
275sub _find_dir($$$);
276
277sub _find_opt {
278 my $wanted = shift;
279 die "invalid top directory" unless defined $_[0];
280
281 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::cwd();
282 my $cwd_untainted = $cwd;
283 $wanted_callback = $wanted->{wanted};
284 $bydepth = $wanted->{bydepth};
285 $no_chdir = $wanted->{no_chdir};
286 $full_check = $wanted->{follow};
287 $follow = $full_check || $wanted->{follow_fast};
288 $follow_skip = $wanted->{follow_skip};
289 $untaint = $wanted->{untaint};
290 $untaint_pat = $wanted->{untaint_pattern};
291 $untaint_skip = $wanted->{untaint_skip};
292
e7b91b67 293 # for compatability reasons (find.pl, find2perl)
294 our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 295
296 # a symbolic link to a directory doesn't increase the link count
297 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
298
299 if ( $untaint ) {
300 $cwd_untainted= $1 if $cwd_untainted =~ m|$untaint_pat|;
301 die "insecure cwd in find(depth)" unless defined($cwd_untainted);
302 }
303
e7b91b67 304 my ($abs_dir, $Is_Dir);
81793b90 305
306 Proc_Top_Item:
307 foreach my $TOP (@_) {
308 my $top_item = $TOP;
07867069 309 $top_item =~ s|/$|| unless $top_item eq '/';
81793b90 310 $Is_Dir= 0;
311
312 if ($follow) {
313 if (substr($top_item,0,1) eq '/') {
314 $abs_dir = $top_item;
315 }
316 elsif ($top_item eq '.') {
317 $abs_dir = $cwd;
237437d0 318 }
81793b90 319 else { # care about any ../
320 $abs_dir = contract_name("$cwd/",$top_item);
321 }
322 $abs_dir= Follow_SymLink($abs_dir);
323 unless (defined $abs_dir) {
324 warn "$top_item is a dangling symbolic link\n";
325 next Proc_Top_Item;
326 }
327 if (-d _) {
328 _find_dir_symlnk($wanted, $abs_dir, $top_item);
329 $Is_Dir= 1;
330 }
331 }
332 else { # no follow
e7b91b67 333 $topdir = $top_item;
334 ($topdev,$topino,$topmode,$topnlink) = lstat $top_item;
335 unless (defined $topnlink) {
81793b90 336 warn "Can't stat $top_item: $!\n";
337 next Proc_Top_Item;
338 }
339 if (-d _) {
340 $top_item =~ s/\.dir$// if $Is_VMS;
e7b91b67 341 _find_dir($wanted, $top_item, $topnlink);
81793b90 342 $Is_Dir= 1;
343 }
237437d0 344 else {
81793b90 345 $abs_dir= $top_item;
346 }
347 }
348
349 unless ($Is_Dir) {
350 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
351 ($dir,$_) = ('.', $top_item);
352 }
353
354 $abs_dir = $dir;
355 if ($untaint) {
356 my $abs_dir_save = $abs_dir;
357 $abs_dir = $1 if $abs_dir =~ m|$untaint_pat|;
358 unless (defined $abs_dir) {
359 if ($untaint_skip == 0) {
360 die "directory $abs_dir_save is still tainted";
361 }
362 else {
363 next Proc_Top_Item;
364 }
365 }
366 }
367
368 unless ($no_chdir or chdir $abs_dir) {
369 warn "Couldn't chdir $abs_dir: $!\n";
370 next Proc_Top_Item;
371 }
372
373 $name = $abs_dir;
374
375 &$wanted_callback;
376
377 }
378
379 $no_chdir or chdir $cwd_untainted;
380 }
381}
382
383# API:
384# $wanted
385# $p_dir : "parent directory"
386# $nlink : what came back from the stat
387# preconditions:
388# chdir (if not no_chdir) to dir
389
390sub _find_dir($$$) {
391 my ($wanted, $p_dir, $nlink) = @_;
392 my ($CdLvl,$Level) = (0,0);
393 my @Stack;
394 my @filenames;
395 my ($subcount,$sub_nlink);
396 my $SE= [];
397 my $dir_name= $p_dir;
07867069 398 my $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
81793b90 399 my $dir_rel= '.'; # directory name relative to current directory
400
401 local ($dir, $name, $prune, *DIR);
402
403 unless ($no_chdir or $p_dir eq '.') {
404 my $udir = $p_dir;
405 if ($untaint) {
406 $udir = $1 if $p_dir =~ m|$untaint_pat|;
407 unless (defined $udir) {
408 if ($untaint_skip == 0) {
409 die "directory $p_dir is still tainted";
410 }
411 else {
412 return;
413 }
237437d0 414 }
a0d0e21e 415 }
81793b90 416 unless (chdir $udir) {
417 warn "Can't cd to $udir: $!\n";
418 return;
419 }
420 }
421
422 while (defined $SE) {
423 unless ($bydepth) {
424 $dir= $p_dir;
425 $name= $dir_name;
426 $_= ($no_chdir ? $dir_name : $dir_rel );
427 # prune may happen here
428 $prune= 0;
429 &$wanted_callback;
430 next if $prune;
431 }
432
433 # change to that directory
434 unless ($no_chdir or $dir_rel eq '.') {
435 my $udir= $dir_rel;
436 if ($untaint) {
437 $udir = $1 if $dir_rel =~ m|$untaint_pat|;
438 unless (defined $udir) {
439 if ($untaint_skip == 0) {
07867069 440 die "directory ("
441 . ($p_dir ne '/' ? $p_dir : '')
442 . "/) $dir_rel is still tainted";
81793b90 443 }
444 }
445 }
446 unless (chdir $udir) {
07867069 447 warn "Can't cd to ("
448 . ($p_dir ne '/' ? $p_dir : '')
449 . "/) $udir : $!\n";
81793b90 450 next;
451 }
452 $CdLvl++;
453 }
454
455 $dir= $dir_name;
456
457 # Get the list of files in the current directory.
458 unless (opendir DIR, ($no_chdir ? $dir_name : '.')) {
459 warn "Can't opendir($dir_name): $!\n";
460 next;
461 }
462 @filenames = readdir DIR;
463 closedir(DIR);
464
465 if ($nlink == 2 && !$avoid_nlink) {
466 # This dir has no subdirectories.
467 for my $FN (@filenames) {
468 next if $FN =~ /^\.{1,2}$/;
469
07867069 470 $name = $dir_pref . $FN;
81793b90 471 $_ = ($no_chdir ? $name : $FN);
472 &$wanted_callback;
473 }
474
475 }
476 else {
477 # This dir has subdirectories.
478 $subcount = $nlink - 2;
479
480 for my $FN (@filenames) {
481 next if $FN =~ /^\.{1,2}$/;
482 if ($subcount > 0 || $avoid_nlink) {
483 # Seen all the subdirs?
484 # check for directoriness.
485 # stat is faster for a file in the current directory
07867069 486 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 487
488 if (-d _) {
489 --$subcount;
490 $FN =~ s/\.dir$// if $Is_VMS;
491 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
492 }
493 else {
07867069 494 $name = $dir_pref . $FN;
81793b90 495 $_= ($no_chdir ? $name : $FN);
496 &$wanted_callback;
497 }
498 }
07867069 499 else {
500 $name = $dir_pref . $FN;
81793b90 501 $_= ($no_chdir ? $name : $FN);
502 &$wanted_callback;
503 }
504 }
505 }
506 if ($bydepth) {
507 $name = $dir_name;
508 $dir = $p_dir;
509 $_ = ($no_chdir ? $dir_name : $dir_rel );
510 &$wanted_callback;
511 }
17b275ff 512 }
513 continue {
81793b90 514 if ( defined ($SE = pop @Stack) ) {
515 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
516 if ($CdLvl > $Level && !$no_chdir) {
517 die "Can't cd to $dir_name" . '../' x ($CdLvl-$Level)
518 unless chdir '../' x ($CdLvl-$Level);
519 $CdLvl = $Level;
520 }
07867069 521 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
522 $dir_pref = "$dir_name/";
81793b90 523 }
a0d0e21e 524 }
525}
526
81793b90 527
528# API:
529# $wanted
530# $dir_loc : absolute location of a dir
531# $p_dir : "parent directory"
532# preconditions:
533# chdir (if not no_chdir) to dir
534
535sub _find_dir_symlnk($$$) {
536 my ($wanted, $dir_loc, $p_dir) = @_;
537 my @Stack;
538 my @filenames;
539 my $new_loc;
540 my $SE = [];
541 my $dir_name = $p_dir;
07867069 542 my $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
543 my $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
81793b90 544 my $dir_rel = '.'; # directory name relative to current directory
545
546 local ($dir, $name, $fullname, $prune, *DIR);
547
548 unless ($no_chdir or $p_dir eq '.') {
549 my $udir = $dir_loc;
550 if ($untaint) {
551 $udir = $1 if $dir_loc =~ m|$untaint_pat|;
552 unless (defined $udir) {
553 if ($untaint_skip == 0) {
554 die "directory $dir_loc is still tainted";
555 }
556 else {
557 return;
558 }
559 }
560 }
561 unless (chdir $udir) {
562 warn "Can't cd to $udir: $!\n";
563 return;
564 }
565 }
566
567 while (defined $SE) {
568
569 unless ($bydepth) {
570 $dir= $p_dir;
571 $name= $dir_name;
572 $_= ($no_chdir ? $dir_name : $dir_rel );
573 $fullname= $dir_loc;
574 # prune may happen here
575 $prune= 0;
576 &$wanted_callback;
577 next if $prune;
578 }
579
580 # change to that directory
581 unless ($no_chdir or $dir_rel eq '.') {
582 my $udir = $dir_loc;
583 if ($untaint) {
584 $udir = $1 if $dir_loc =~ m|$untaint_pat|;
585 unless (defined $udir ) {
586 if ($untaint_skip == 0) {
587 die "directory $dir_loc is still tainted";
a0d0e21e 588 }
237437d0 589 else {
81793b90 590 next;
237437d0 591 }
a0d0e21e 592 }
593 }
81793b90 594 unless (chdir $udir) {
595 warn "Can't cd to $udir: $!\n";
596 next;
597 }
598 }
599
600 $dir = $dir_name;
601
602 # Get the list of files in the current directory.
603 unless (opendir DIR, ($no_chdir ? $dir_loc : '.')) {
604 warn "Can't opendir($dir_loc): $!\n";
605 next;
606 }
607 @filenames = readdir DIR;
608 closedir(DIR);
609
610 for my $FN (@filenames) {
611 next if $FN =~ /^\.{1,2}$/;
612
613 # follow symbolic links / do an lstat
07867069 614 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 615
616 # ignore if invalid symlink
617 next unless defined $new_loc;
618
619 if (-d _) {
620 push @Stack,[$new_loc,$dir_name,$FN];
621 }
622 else {
623 $fullname = $new_loc;
07867069 624 $name = $dir_pref . $FN;
81793b90 625 $_ = ($no_chdir ? $name : $FN);
626 &$wanted_callback;
627 }
628 }
629
630 if ($bydepth) {
631 $fullname = $dir_loc;
632 $name = $dir_name;
633 $_ = ($no_chdir ? $dir_name : $dir_rel);
634 &$wanted_callback;
635 }
636 }
637 continue {
638 if (defined($SE = pop @Stack)) {
639 ($dir_loc, $p_dir, $dir_rel) = @$SE;
07867069 640 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
641 $dir_pref = "$dir_name/";
642 $loc_pref = "$dir_loc/";
a0d0e21e 643 }
644 }
645}
646
81793b90 647
20408e3c 648sub wrap_wanted {
81793b90 649 my $wanted = shift;
650 if ( ref($wanted) eq 'HASH' ) {
651 if ( $wanted->{follow} || $wanted->{follow_fast}) {
652 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
653 }
654 if ( $wanted->{untaint} ) {
655 $wanted->{untaint_pattern} = qr|^([-+@\w./]+)$|
656 unless defined $wanted->{untaint_pattern};
657 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
658 }
659 return $wanted;
660 }
661 else {
662 return { wanted => $wanted };
663 }
a0d0e21e 664}
665
20408e3c 666sub find {
81793b90 667 my $wanted = shift;
668 _find_opt(wrap_wanted($wanted), @_);
669 %SLnkSeen= (); # free memory
a0d0e21e 670}
671
55d729e4 672sub finddepth {
81793b90 673 my $wanted = wrap_wanted(shift);
674 $wanted->{bydepth} = 1;
675 _find_opt($wanted, @_);
676 %SLnkSeen= (); # free memory
20408e3c 677}
6280b799 678
679# These are hard-coded for now, but may move to hint files.
10eba763 680if ($^O eq 'VMS') {
81793b90 681 $Is_VMS = 1;
682 $File::Find::dont_use_nlink = 1;
748a9306 683}
684
81793b90 685$File::Find::dont_use_nlink = 1
3e8584ad 686 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32';
6280b799 687
20408e3c 688# Set dont_use_nlink in your hint file if your system's stat doesn't
689# report the number of links in a directory as an indication
690# of the number of files.
691# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 692unless ($File::Find::dont_use_nlink) {
693 require Config;
694 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 695}
696
a0d0e21e 6971;