fix Pod::Html to recognize C<< > >> etc., and convert some pods
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
17f410f9 2use 5.005_64;
a0d0e21e 3require Exporter;
6280b799 4require Cwd;
a0d0e21e 5
f06db76b 6=head1 NAME
7
8find - traverse a file tree
9
10finddepth - traverse a directory structure depth-first
11
12=head1 SYNOPSIS
13
14 use File::Find;
81793b90 15 find(\&wanted, '/foo', '/bar');
f06db76b 16 sub wanted { ... }
237437d0 17
f06db76b 18 use File::Find;
81793b90 19 finddepth(\&wanted, '/foo', '/bar');
f06db76b 20 sub wanted { ... }
81793b90 21
22 use File::Find;
23 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 24
25=head1 DESCRIPTION
26
20408e3c 27The first argument to find() is either a hash reference describing the
81793b90 28operations to be performed for each file, or a code reference.
20408e3c 29
81793b90 30Here are the possible keys for the hash:
31
32=over 3
33
34=item C<wanted>
35
36The value should be a code reference. This code reference is called
37I<the wanted() function> below.
38
39=item C<bydepth>
40
41Reports the name of a directory only AFTER all its entries
42have been reported. Entry point finddepth() is a shortcut for
43specifying C<{ bydepth => 1 }> in the first argument of find().
44
45=item C<follow>
46
47Causes symbolic links to be followed. Since directory trees with symbolic
48links (followed) may contain files more than once and may even have
49cycles, a hash has to be built up with an entry for each file.
50This might be expensive both in space and time for a large
51directory tree. See I<follow_fast> and I<follow_skip> below.
52If either I<follow> or I<follow_fast> is in effect:
53
54=over 6
55
a45bd81d 56=item *
81793b90 57
58It is guarantueed that an I<lstat> has been called before the user's
59I<wanted()> function is called. This enables fast file checks involving S< _>.
60
a45bd81d 61=item *
81793b90 62
63There is a variable C<$File::Find::fullname> which holds the absolute
64pathname of the file with all symbolic links resolved
65
66=back
67
68=item C<follow_fast>
69
70This is similar to I<follow> except that it may report some files
71more than once. It does detect cycles however.
72Since only symbolic links have to be hashed, this is
73much cheaper both in space and time.
74If processing a file more than once (by the user's I<wanted()> function)
75is worse than just taking time, the option I<follow> should be used.
76
77=item C<follow_skip>
78
79C<follow_skip==1>, which is the default, causes all files which are
80neither directories nor symbolic links to be ignored if they are about
81to be processed a second time. If a directory or a symbolic link
82are about to be processed a second time, File::Find dies.
83C<follow_skip==0> causes File::Find to die if any file is about to be
84processed a second time.
85C<follow_skip==2> causes File::Find to ignore any duplicate files and
86dirctories but to proceed normally otherwise.
20408e3c 87
f06db76b 88
81793b90 89=item C<no_chdir>
90
91Does not C<chdir()> to each directory as it recurses. The wanted()
92function will need to be aware of this, of course. In this case,
93C<$_> will be the same as C<$File::Find::name>.
94
95=item C<untaint>
96
97If find is used in taint-mode (-T command line switch or if EUID != UID
98or if EGID != GID) then internally directory names have to be untainted
99before they can be cd'ed to. Therefore they are checked against a regular
100expression I<untaint_pattern>. Note, that all names passed to the
101user's I<wanted()> function are still tainted.
102
103=item C<untaint_pattern>
104
105See above. This should be set using the C<qr> quoting operator.
106The default is set to C<qr|^([-+@\w./]+)$|>.
107Note that the paranthesis which are vital.
108
109=item C<untaint_skip>
110
111If set, directories (subtrees) which fail the I<untaint_pattern>
112are skipped. The default is to 'die' in such a case.
113
114=back
115
116The wanted() function does whatever verifications you want.
117C<$File::Find::dir> contains the current directory name, and C<$_> the
118current filename within that directory. C<$File::Find::name> contains
119the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when
120the function is called, unless C<no_chdir> was specified.
121When <follow> or <follow_fast> are in effect there is also a
122C<$File::Find::fullname>.
123The function may set C<$File::Find::prune> to prune the tree
124unless C<bydepth> was specified.
e7b91b67 125Unless C<follow> or C<follow_fast> is specified, for compatibility
126reasons (find.pl, find2perl) there are in addition the following globals
127available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>,
128C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 129
20408e3c 130This library is useful for the C<find2perl> tool, which when fed,
f06db76b 131
132 find2perl / -name .nfs\* -mtime +7 \
81793b90 133 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 134
135produces something like:
136
137 sub wanted {
c7b9dd21 138 /^\.nfs.*\z/s &&
81793b90 139 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 140 int(-M _) > 7 &&
141 unlink($_)
142 ||
81793b90 143 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 144 $dev < 0 &&
6280b799 145 ($File::Find::prune = 1);
f06db76b 146 }
147
81793b90 148Set the variable C<$File::Find::dont_use_nlink> if you're using AFS,
6280b799 149since AFS cheats.
f06db76b 150
f06db76b 151
152Here's another interesting wanted function. It will find all symlinks
153that don't resolve:
154
155 sub wanted {
81793b90 156 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 157 }
f06db76b 158
81793b90 159See also the script C<pfind> on CPAN for a nice application of this
160module.
161
162=head1 CAVEAT
163
164Be aware that the option to follow symblic links can be dangerous.
165Depending on the structure of the directory tree (including symbolic
166links to directories) you might traverse a given (physical) directory
167more than once (only if C<follow_fast> is in effect).
168Furthermore, deleting or changing files in a symbolically linked directory
169might cause very unpleasant surprises, since you delete or change files
170in an unknown directory.
0530a6c4 171
0530a6c4 172
f06db76b 173=cut
174
a0d0e21e 175@ISA = qw(Exporter);
6280b799 176@EXPORT = qw(find finddepth);
177
a0d0e21e 178
81793b90 179use strict;
180my $Is_VMS;
181
182require File::Basename;
183
184my %SLnkSeen;
185my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
186 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat);
187
188sub contract_name {
189 my ($cdir,$fn) = @_;
190
191 return substr($cdir,0,rindex($cdir,'/')) if $fn eq '.';
192
193 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
194
195 $fn =~ s|^\./||;
196
197 my $abs_name= $cdir . $fn;
198
199 if (substr($fn,0,3) eq '../') {
200 do 1 while ($abs_name=~ s|/(?>[^/]+)/\.\./|/|);
201 }
202
203 return $abs_name;
204}
205
206
207sub PathCombine($$) {
208 my ($Base,$Name) = @_;
209 my $AbsName;
210
211 if (substr($Name,0,1) eq '/') {
212 $AbsName= $Name;
213 }
214 else {
215 $AbsName= contract_name($Base,$Name);
216 }
217
218 # (simple) check for recursion
219 my $newlen= length($AbsName);
220 if ($newlen <= length($Base)) {
221 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
222 && $AbsName eq substr($Base,0,$newlen))
223 {
224 return undef;
225 }
226 }
227 return $AbsName;
228}
229
230sub Follow_SymLink($) {
231 my ($AbsName) = @_;
232
233 my ($NewName,$DEV, $INO);
234 ($DEV, $INO)= lstat $AbsName;
235
236 while (-l _) {
237 if ($SLnkSeen{$DEV, $INO}++) {
238 if ($follow_skip < 2) {
239 die "$AbsName is encountered a second time";
a0d0e21e 240 }
241 else {
81793b90 242 return undef;
a0d0e21e 243 }
244 }
81793b90 245 $NewName= PathCombine($AbsName, readlink($AbsName));
246 unless(defined $NewName) {
247 if ($follow_skip < 2) {
248 die "$AbsName is a recursive symbolic link";
249 }
250 else {
251 return undef;
a0d0e21e 252 }
81793b90 253 }
254 else {
255 $AbsName= $NewName;
256 }
257 ($DEV, $INO) = lstat($AbsName);
258 return undef unless defined $DEV; # dangling symbolic link
259 }
260
261 if ($full_check && $SLnkSeen{$DEV, $INO}++) {
262 if ($follow_skip < 1) {
263 die "$AbsName encountered a second time";
264 }
265 else {
266 return undef;
267 }
268 }
269
270 return $AbsName;
271}
272
17f410f9 273our($dir, $name, $fullname, $prune);
81793b90 274sub _find_dir_symlnk($$$);
275sub _find_dir($$$);
276
277sub _find_opt {
278 my $wanted = shift;
279 die "invalid top directory" unless defined $_[0];
280
281 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::cwd();
282 my $cwd_untainted = $cwd;
283 $wanted_callback = $wanted->{wanted};
284 $bydepth = $wanted->{bydepth};
285 $no_chdir = $wanted->{no_chdir};
286 $full_check = $wanted->{follow};
287 $follow = $full_check || $wanted->{follow_fast};
288 $follow_skip = $wanted->{follow_skip};
289 $untaint = $wanted->{untaint};
290 $untaint_pat = $wanted->{untaint_pattern};
291 $untaint_skip = $wanted->{untaint_skip};
292
e7b91b67 293 # for compatability reasons (find.pl, find2perl)
294 our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 295
296 # a symbolic link to a directory doesn't increase the link count
297 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
298
299 if ( $untaint ) {
300 $cwd_untainted= $1 if $cwd_untainted =~ m|$untaint_pat|;
301 die "insecure cwd in find(depth)" unless defined($cwd_untainted);
302 }
303
e7b91b67 304 my ($abs_dir, $Is_Dir);
81793b90 305
306 Proc_Top_Item:
307 foreach my $TOP (@_) {
308 my $top_item = $TOP;
c7b9dd21 309 $top_item =~ s|/\z|| unless $top_item eq '/';
81793b90 310 $Is_Dir= 0;
311
3d1e7443 312 ($topdev,$topino,$topmode,$topnlink) = stat $top_item;
313
81793b90 314 if ($follow) {
315 if (substr($top_item,0,1) eq '/') {
316 $abs_dir = $top_item;
317 }
318 elsif ($top_item eq '.') {
319 $abs_dir = $cwd;
237437d0 320 }
81793b90 321 else { # care about any ../
322 $abs_dir = contract_name("$cwd/",$top_item);
323 }
324 $abs_dir= Follow_SymLink($abs_dir);
325 unless (defined $abs_dir) {
326 warn "$top_item is a dangling symbolic link\n";
327 next Proc_Top_Item;
328 }
329 if (-d _) {
330 _find_dir_symlnk($wanted, $abs_dir, $top_item);
331 $Is_Dir= 1;
332 }
333 }
334 else { # no follow
e7b91b67 335 $topdir = $top_item;
e7b91b67 336 unless (defined $topnlink) {
81793b90 337 warn "Can't stat $top_item: $!\n";
338 next Proc_Top_Item;
339 }
340 if (-d _) {
c7b9dd21 341 $top_item =~ s/\.dir\z// if $Is_VMS;
e7b91b67 342 _find_dir($wanted, $top_item, $topnlink);
81793b90 343 $Is_Dir= 1;
344 }
237437d0 345 else {
81793b90 346 $abs_dir= $top_item;
347 }
348 }
349
350 unless ($Is_Dir) {
351 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
352 ($dir,$_) = ('.', $top_item);
353 }
354
355 $abs_dir = $dir;
356 if ($untaint) {
357 my $abs_dir_save = $abs_dir;
358 $abs_dir = $1 if $abs_dir =~ m|$untaint_pat|;
359 unless (defined $abs_dir) {
360 if ($untaint_skip == 0) {
361 die "directory $abs_dir_save is still tainted";
362 }
363 else {
364 next Proc_Top_Item;
365 }
366 }
367 }
368
369 unless ($no_chdir or chdir $abs_dir) {
370 warn "Couldn't chdir $abs_dir: $!\n";
371 next Proc_Top_Item;
372 }
373
374 $name = $abs_dir;
375
376 &$wanted_callback;
377
378 }
379
380 $no_chdir or chdir $cwd_untainted;
381 }
382}
383
384# API:
385# $wanted
386# $p_dir : "parent directory"
387# $nlink : what came back from the stat
388# preconditions:
389# chdir (if not no_chdir) to dir
390
391sub _find_dir($$$) {
392 my ($wanted, $p_dir, $nlink) = @_;
393 my ($CdLvl,$Level) = (0,0);
394 my @Stack;
395 my @filenames;
396 my ($subcount,$sub_nlink);
397 my $SE= [];
398 my $dir_name= $p_dir;
07867069 399 my $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
81793b90 400 my $dir_rel= '.'; # directory name relative to current directory
401
402 local ($dir, $name, $prune, *DIR);
403
404 unless ($no_chdir or $p_dir eq '.') {
405 my $udir = $p_dir;
406 if ($untaint) {
407 $udir = $1 if $p_dir =~ m|$untaint_pat|;
408 unless (defined $udir) {
409 if ($untaint_skip == 0) {
410 die "directory $p_dir is still tainted";
411 }
412 else {
413 return;
414 }
237437d0 415 }
a0d0e21e 416 }
81793b90 417 unless (chdir $udir) {
418 warn "Can't cd to $udir: $!\n";
419 return;
420 }
421 }
422
423 while (defined $SE) {
424 unless ($bydepth) {
425 $dir= $p_dir;
426 $name= $dir_name;
427 $_= ($no_chdir ? $dir_name : $dir_rel );
428 # prune may happen here
429 $prune= 0;
430 &$wanted_callback;
431 next if $prune;
432 }
433
434 # change to that directory
435 unless ($no_chdir or $dir_rel eq '.') {
436 my $udir= $dir_rel;
437 if ($untaint) {
438 $udir = $1 if $dir_rel =~ m|$untaint_pat|;
439 unless (defined $udir) {
440 if ($untaint_skip == 0) {
07867069 441 die "directory ("
442 . ($p_dir ne '/' ? $p_dir : '')
443 . "/) $dir_rel is still tainted";
81793b90 444 }
445 }
446 }
447 unless (chdir $udir) {
07867069 448 warn "Can't cd to ("
449 . ($p_dir ne '/' ? $p_dir : '')
450 . "/) $udir : $!\n";
81793b90 451 next;
452 }
453 $CdLvl++;
454 }
455
456 $dir= $dir_name;
457
458 # Get the list of files in the current directory.
459 unless (opendir DIR, ($no_chdir ? $dir_name : '.')) {
460 warn "Can't opendir($dir_name): $!\n";
461 next;
462 }
463 @filenames = readdir DIR;
464 closedir(DIR);
465
466 if ($nlink == 2 && !$avoid_nlink) {
467 # This dir has no subdirectories.
468 for my $FN (@filenames) {
c7b9dd21 469 next if $FN =~ /^\.{1,2}\z/;
81793b90 470
07867069 471 $name = $dir_pref . $FN;
81793b90 472 $_ = ($no_chdir ? $name : $FN);
473 &$wanted_callback;
474 }
475
476 }
477 else {
478 # This dir has subdirectories.
479 $subcount = $nlink - 2;
480
481 for my $FN (@filenames) {
c7b9dd21 482 next if $FN =~ /^\.{1,2}\z/;
81793b90 483 if ($subcount > 0 || $avoid_nlink) {
484 # Seen all the subdirs?
485 # check for directoriness.
486 # stat is faster for a file in the current directory
07867069 487 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 488
489 if (-d _) {
490 --$subcount;
c7b9dd21 491 $FN =~ s/\.dir\z// if $Is_VMS;
81793b90 492 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
493 }
494 else {
07867069 495 $name = $dir_pref . $FN;
81793b90 496 $_= ($no_chdir ? $name : $FN);
497 &$wanted_callback;
498 }
499 }
07867069 500 else {
501 $name = $dir_pref . $FN;
81793b90 502 $_= ($no_chdir ? $name : $FN);
503 &$wanted_callback;
504 }
505 }
506 }
507 if ($bydepth) {
508 $name = $dir_name;
509 $dir = $p_dir;
510 $_ = ($no_chdir ? $dir_name : $dir_rel );
511 &$wanted_callback;
512 }
17b275ff 513 }
514 continue {
81793b90 515 if ( defined ($SE = pop @Stack) ) {
516 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
517 if ($CdLvl > $Level && !$no_chdir) {
518 die "Can't cd to $dir_name" . '../' x ($CdLvl-$Level)
519 unless chdir '../' x ($CdLvl-$Level);
520 $CdLvl = $Level;
521 }
07867069 522 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
523 $dir_pref = "$dir_name/";
81793b90 524 }
a0d0e21e 525 }
526}
527
81793b90 528
529# API:
530# $wanted
531# $dir_loc : absolute location of a dir
532# $p_dir : "parent directory"
533# preconditions:
534# chdir (if not no_chdir) to dir
535
536sub _find_dir_symlnk($$$) {
537 my ($wanted, $dir_loc, $p_dir) = @_;
538 my @Stack;
539 my @filenames;
540 my $new_loc;
541 my $SE = [];
542 my $dir_name = $p_dir;
07867069 543 my $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
544 my $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
81793b90 545 my $dir_rel = '.'; # directory name relative to current directory
546
547 local ($dir, $name, $fullname, $prune, *DIR);
548
549 unless ($no_chdir or $p_dir eq '.') {
550 my $udir = $dir_loc;
551 if ($untaint) {
552 $udir = $1 if $dir_loc =~ m|$untaint_pat|;
553 unless (defined $udir) {
554 if ($untaint_skip == 0) {
555 die "directory $dir_loc is still tainted";
556 }
557 else {
558 return;
559 }
560 }
561 }
562 unless (chdir $udir) {
563 warn "Can't cd to $udir: $!\n";
564 return;
565 }
566 }
567
568 while (defined $SE) {
569
570 unless ($bydepth) {
571 $dir= $p_dir;
572 $name= $dir_name;
573 $_= ($no_chdir ? $dir_name : $dir_rel );
574 $fullname= $dir_loc;
575 # prune may happen here
576 $prune= 0;
577 &$wanted_callback;
578 next if $prune;
579 }
580
581 # change to that directory
582 unless ($no_chdir or $dir_rel eq '.') {
583 my $udir = $dir_loc;
584 if ($untaint) {
585 $udir = $1 if $dir_loc =~ m|$untaint_pat|;
586 unless (defined $udir ) {
587 if ($untaint_skip == 0) {
588 die "directory $dir_loc is still tainted";
a0d0e21e 589 }
237437d0 590 else {
81793b90 591 next;
237437d0 592 }
a0d0e21e 593 }
594 }
81793b90 595 unless (chdir $udir) {
596 warn "Can't cd to $udir: $!\n";
597 next;
598 }
599 }
600
601 $dir = $dir_name;
602
603 # Get the list of files in the current directory.
604 unless (opendir DIR, ($no_chdir ? $dir_loc : '.')) {
605 warn "Can't opendir($dir_loc): $!\n";
606 next;
607 }
608 @filenames = readdir DIR;
609 closedir(DIR);
610
611 for my $FN (@filenames) {
c7b9dd21 612 next if $FN =~ /^\.{1,2}\z/;
81793b90 613
614 # follow symbolic links / do an lstat
07867069 615 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 616
617 # ignore if invalid symlink
618 next unless defined $new_loc;
619
620 if (-d _) {
621 push @Stack,[$new_loc,$dir_name,$FN];
622 }
623 else {
624 $fullname = $new_loc;
07867069 625 $name = $dir_pref . $FN;
81793b90 626 $_ = ($no_chdir ? $name : $FN);
627 &$wanted_callback;
628 }
629 }
630
631 if ($bydepth) {
632 $fullname = $dir_loc;
633 $name = $dir_name;
634 $_ = ($no_chdir ? $dir_name : $dir_rel);
635 &$wanted_callback;
636 }
637 }
638 continue {
639 if (defined($SE = pop @Stack)) {
640 ($dir_loc, $p_dir, $dir_rel) = @$SE;
07867069 641 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
642 $dir_pref = "$dir_name/";
643 $loc_pref = "$dir_loc/";
a0d0e21e 644 }
645 }
646}
647
81793b90 648
20408e3c 649sub wrap_wanted {
81793b90 650 my $wanted = shift;
651 if ( ref($wanted) eq 'HASH' ) {
652 if ( $wanted->{follow} || $wanted->{follow_fast}) {
653 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
654 }
655 if ( $wanted->{untaint} ) {
656 $wanted->{untaint_pattern} = qr|^([-+@\w./]+)$|
657 unless defined $wanted->{untaint_pattern};
658 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
659 }
660 return $wanted;
661 }
662 else {
663 return { wanted => $wanted };
664 }
a0d0e21e 665}
666
20408e3c 667sub find {
81793b90 668 my $wanted = shift;
669 _find_opt(wrap_wanted($wanted), @_);
670 %SLnkSeen= (); # free memory
a0d0e21e 671}
672
55d729e4 673sub finddepth {
81793b90 674 my $wanted = wrap_wanted(shift);
675 $wanted->{bydepth} = 1;
676 _find_opt($wanted, @_);
677 %SLnkSeen= (); # free memory
20408e3c 678}
6280b799 679
680# These are hard-coded for now, but may move to hint files.
10eba763 681if ($^O eq 'VMS') {
81793b90 682 $Is_VMS = 1;
683 $File::Find::dont_use_nlink = 1;
748a9306 684}
685
81793b90 686$File::Find::dont_use_nlink = 1
3e8584ad 687 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32';
6280b799 688
20408e3c 689# Set dont_use_nlink in your hint file if your system's stat doesn't
690# report the number of links in a directory as an indication
691# of the number of files.
692# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 693unless ($File::Find::dont_use_nlink) {
694 require Config;
695 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 696}
697
a0d0e21e 6981;