handle tri graphs in h2ph.PL -> h2ph*
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
b75c8c73 2use strict;
b395063c 3use warnings;
4use 5.6.0;
b75c8c73 5our $VERSION = '1.00';
a0d0e21e 6require Exporter;
6280b799 7require Cwd;
a0d0e21e 8
f06db76b 9=head1 NAME
10
11find - traverse a file tree
12
13finddepth - traverse a directory structure depth-first
14
15=head1 SYNOPSIS
16
17 use File::Find;
81793b90 18 find(\&wanted, '/foo', '/bar');
f06db76b 19 sub wanted { ... }
237437d0 20
f06db76b 21 use File::Find;
81793b90 22 finddepth(\&wanted, '/foo', '/bar');
f06db76b 23 sub wanted { ... }
3cb6de81 24
81793b90 25 use File::Find;
26 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 27
28=head1 DESCRIPTION
29
20408e3c 30The first argument to find() is either a hash reference describing the
81793b90 31operations to be performed for each file, or a code reference.
20408e3c 32
81793b90 33Here are the possible keys for the hash:
34
35=over 3
36
37=item C<wanted>
38
39The value should be a code reference. This code reference is called
40I<the wanted() function> below.
41
42=item C<bydepth>
43
44Reports the name of a directory only AFTER all its entries
45have been reported. Entry point finddepth() is a shortcut for
46specifying C<{ bydepth => 1 }> in the first argument of find().
47
719c805e 48=item C<preprocess>
49
7e47e6ff 50The value should be a code reference. This code reference is used to
51preprocess the current directory. The name of currently processed
52directory is in $File::Find::dir. Your preprocessing function is
53called after readdir() but before the loop that calls the wanted()
54function. It is called with a list of strings (actually file/directory
55names) and is expected to return a list of strings. The code can be
56used to sort the file/directory names alphabetically, numerically,
57or to filter out directory entries based on their name alone. When
58I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e 59
60=item C<postprocess>
61
7e47e6ff 62The value should be a code reference. It is invoked just before leaving
63the currently processed directory. It is called in void context with no
64arguments. The name of the current directory is in $File::Find::dir. This
65hook is handy for summarizing a directory, such as calculating its disk
66usage. When I<follow> or I<follow_fast> are in effect, C<preprocess> is a
67no-op.
719c805e 68
81793b90 69=item C<follow>
70
71Causes symbolic links to be followed. Since directory trees with symbolic
72links (followed) may contain files more than once and may even have
73cycles, a hash has to be built up with an entry for each file.
74This might be expensive both in space and time for a large
75directory tree. See I<follow_fast> and I<follow_skip> below.
76If either I<follow> or I<follow_fast> is in effect:
77
78=over 6
79
a45bd81d 80=item *
81793b90 81
f10e1564 82It is guaranteed that an I<lstat> has been called before the user's
81793b90 83I<wanted()> function is called. This enables fast file checks involving S< _>.
84
a45bd81d 85=item *
81793b90 86
87There is a variable C<$File::Find::fullname> which holds the absolute
88pathname of the file with all symbolic links resolved
89
90=back
91
92=item C<follow_fast>
93
f10e1564 94This is similar to I<follow> except that it may report some files more
95than once. It does detect cycles, however. Since only symbolic links
96have to be hashed, this is much cheaper both in space and time. If
97processing a file more than once (by the user's I<wanted()> function)
81793b90 98is worse than just taking time, the option I<follow> should be used.
99
100=item C<follow_skip>
101
102C<follow_skip==1>, which is the default, causes all files which are
103neither directories nor symbolic links to be ignored if they are about
104to be processed a second time. If a directory or a symbolic link
105are about to be processed a second time, File::Find dies.
106C<follow_skip==0> causes File::Find to die if any file is about to be
107processed a second time.
108C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 109directories but to proceed normally otherwise.
20408e3c 110
f06db76b 111
81793b90 112=item C<no_chdir>
113
114Does not C<chdir()> to each directory as it recurses. The wanted()
115function will need to be aware of this, of course. In this case,
116C<$_> will be the same as C<$File::Find::name>.
117
118=item C<untaint>
119
120If find is used in taint-mode (-T command line switch or if EUID != UID
121or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 122before they can be chdir'ed to. Therefore they are checked against a regular
123expression I<untaint_pattern>. Note that all names passed to the user's
124I<wanted()> function are still tainted. If this option is used while
125not in taint-mode, C<untaint> is a no-op.
81793b90 126
127=item C<untaint_pattern>
128
129See above. This should be set using the C<qr> quoting operator.
130The default is set to C<qr|^([-+@\w./]+)$|>.
ac06ab1d 131Note that the parantheses are vital.
81793b90 132
133=item C<untaint_skip>
134
7e47e6ff 135If set, a directory which fails the I<untaint_pattern> is skipped,
136including all its sub-directories. The default is to 'die' in such a case.
81793b90 137
138=back
139
140The wanted() function does whatever verifications you want.
141C<$File::Find::dir> contains the current directory name, and C<$_> the
142current filename within that directory. C<$File::Find::name> contains
f10e1564 143the complete pathname to the file. You are chdir()'d to
144C<$File::Find::dir> when the function is called, unless C<no_chdir>
7e47e6ff 145was specified. When C<follow> or C<follow_fast> are in effect, there is
f10e1564 146also a C<$File::Find::fullname>. The function may set
147C<$File::Find::prune> to prune the tree unless C<bydepth> was
148specified. Unless C<follow> or C<follow_fast> is specified, for
149compatibility reasons (find.pl, find2perl) there are in addition the
150following globals available: C<$File::Find::topdir>,
151C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 152C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 153
20408e3c 154This library is useful for the C<find2perl> tool, which when fed,
f06db76b 155
156 find2perl / -name .nfs\* -mtime +7 \
81793b90 157 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 158
159produces something like:
160
161 sub wanted {
c7b9dd21 162 /^\.nfs.*\z/s &&
81793b90 163 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 164 int(-M _) > 7 &&
165 unlink($_)
166 ||
81793b90 167 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 168 $dev < 0 &&
6280b799 169 ($File::Find::prune = 1);
f06db76b 170 }
171
81793b90 172Set the variable C<$File::Find::dont_use_nlink> if you're using AFS,
6280b799 173since AFS cheats.
f06db76b 174
f06db76b 175
176Here's another interesting wanted function. It will find all symlinks
177that don't resolve:
178
179 sub wanted {
81793b90 180 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 181 }
f06db76b 182
81793b90 183See also the script C<pfind> on CPAN for a nice application of this
184module.
185
186=head1 CAVEAT
187
f10e1564 188Be aware that the option to follow symbolic links can be dangerous.
81793b90 189Depending on the structure of the directory tree (including symbolic
190links to directories) you might traverse a given (physical) directory
191more than once (only if C<follow_fast> is in effect).
192Furthermore, deleting or changing files in a symbolically linked directory
193might cause very unpleasant surprises, since you delete or change files
194in an unknown directory.
0530a6c4 195
7e47e6ff 196=head1 NOTES
197
198=over 4
199
200=item *
201
202Mac OS (Classic) users should note a few differences:
203
204=over 4
205
206=item *
207
208The path separator is ':', not '/', and the current directory is denoted
209as ':', not '.'. You should be careful about specifying relative pathnames.
210While a full path always begins with a volume name, a relative pathname
211should always begin with a ':'. If specifying a volume name only, a
212trailing ':' is required.
213
214=item *
215
216C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
217contains the name of a directory, that name may or may not end with a
218':'. Likewise, C<$File::Find::name>, which contains the complete
219pathname to that directory, and C<$File::Find::fullname>, which holds
220the absolute pathname of that directory with all symbolic links resolved,
221may or may not end with a ':'.
222
223=item *
224
225The default C<untaint_pattern> (see above) on Mac OS is set to
226C<qr|^(.+)$|>. Note that the parentheses are vital.
227
228=item *
229
230The invisible system file "Icon\015" is ignored. While this file may
231appear in every directory, there are some more invisible system files
232on every volume, which are all located at the volume root level (i.e.
233"MacintoshHD:"). These system files are B<not> excluded automatically.
234Your filter may use the following code to recognize invisible files or
235directories (requires Mac::Files):
236
237 use Mac::Files;
238
239 # invisible() -- returns 1 if file/directory is invisible,
240 # 0 if it's visible or undef if an error occured
241
242 sub invisible($) {
243 my $file = shift;
244 my ($fileCat, $fileInfo);
245 my $invisible_flag = 1 << 14;
246
247 if ( $fileCat = FSpGetCatInfo($file) ) {
248 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
249 return (($fileInfo->fdFlags & $invisible_flag) && 1);
250 }
251 }
252 return undef;
253 }
254
255Generally, invisible files are system files, unless an odd application
256decides to use invisible files for its own purposes. To distinguish
257such files from system files, you have to look at the B<type> and B<creator>
258file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
259C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
260(see MacPerl.pm for details).
261
262Files that appear on the desktop actually reside in an (hidden) directory
263named "Desktop Folder" on the particular disk volume. Note that, although
264all desktop files appear to be on the same "virtual" desktop, each disk
265volume actually maintains its own "Desktop Folder" directory.
266
267=back
268
269=back
0530a6c4 270
f06db76b 271=cut
272
b75c8c73 273our @ISA = qw(Exporter);
274our @EXPORT = qw(find finddepth);
6280b799 275
a0d0e21e 276
81793b90 277use strict;
278my $Is_VMS;
7e47e6ff 279my $Is_MacOS;
81793b90 280
281require File::Basename;
7e47e6ff 282require File::Spec;
81793b90 283
284my %SLnkSeen;
285my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 286 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
287 $pre_process, $post_process);
81793b90 288
289sub contract_name {
290 my ($cdir,$fn) = @_;
291
7e47e6ff 292 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90 293
294 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
295
296 $fn =~ s|^\./||;
297
298 my $abs_name= $cdir . $fn;
299
300 if (substr($fn,0,3) eq '../') {
fecbda2b 301 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
81793b90 302 }
303
304 return $abs_name;
305}
306
7e47e6ff 307# return the absolute name of a directory or file
308sub contract_name_Mac {
309 my ($cdir,$fn) = @_;
310 my $abs_name;
311
312 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
313
314 my $colon_count = length ($1);
315 if ($colon_count == 1) {
316 $abs_name = $cdir . $2;
317 return $abs_name;
318 }
319 else {
320 # need to move up the tree, but
321 # only if it's not a volume name
322 for (my $i=1; $i<$colon_count; $i++) {
323 unless ($cdir =~ /^[^:]+:$/) { # volume name
324 $cdir =~ s/[^:]+:$//;
325 }
326 else {
327 return undef;
328 }
329 }
330 $abs_name = $cdir . $2;
331 return $abs_name;
332 }
333
334 }
335 else {
336
337 # $fn may be a valid path to a directory or file or (dangling)
338 # symlink, without a leading ':'
339 if ( (-e $fn) || (-l $fn) ) {
340 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
341 return $fn; # $fn is already an absolute path
342 }
343 else {
344 $abs_name = $cdir . $fn;
345 return $abs_name;
346 }
347 }
348 else { # argh!, $fn is not a valid directory/file
349 return undef;
350 }
351 }
352}
81793b90 353
354sub PathCombine($$) {
355 my ($Base,$Name) = @_;
356 my $AbsName;
357
7e47e6ff 358 if ($Is_MacOS) {
359 # $Name is the resolved symlink (always a full path on MacOS),
360 # i.e. there's no need to call contract_name_Mac()
361 $AbsName = $Name;
362
363 # (simple) check for recursion
364 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
365 return undef;
366 }
81793b90 367 }
368 else {
7e47e6ff 369 if (substr($Name,0,1) eq '/') {
370 $AbsName= $Name;
371 }
372 else {
373 $AbsName= contract_name($Base,$Name);
374 }
81793b90 375
7e47e6ff 376 # (simple) check for recursion
377 my $newlen= length($AbsName);
378 if ($newlen <= length($Base)) {
379 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
380 && $AbsName eq substr($Base,0,$newlen))
381 {
382 return undef;
383 }
81793b90 384 }
385 }
386 return $AbsName;
387}
388
389sub Follow_SymLink($) {
390 my ($AbsName) = @_;
391
392 my ($NewName,$DEV, $INO);
393 ($DEV, $INO)= lstat $AbsName;
394
395 while (-l _) {
396 if ($SLnkSeen{$DEV, $INO}++) {
397 if ($follow_skip < 2) {
398 die "$AbsName is encountered a second time";
a0d0e21e 399 }
400 else {
81793b90 401 return undef;
a0d0e21e 402 }
403 }
81793b90 404 $NewName= PathCombine($AbsName, readlink($AbsName));
405 unless(defined $NewName) {
406 if ($follow_skip < 2) {
407 die "$AbsName is a recursive symbolic link";
408 }
409 else {
410 return undef;
a0d0e21e 411 }
81793b90 412 }
413 else {
414 $AbsName= $NewName;
415 }
416 ($DEV, $INO) = lstat($AbsName);
417 return undef unless defined $DEV; # dangling symbolic link
418 }
419
420 if ($full_check && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 421 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90 422 die "$AbsName encountered a second time";
423 }
424 else {
425 return undef;
426 }
427 }
428
429 return $AbsName;
430}
431
17f410f9 432our($dir, $name, $fullname, $prune);
81793b90 433sub _find_dir_symlnk($$$);
434sub _find_dir($$$);
435
7e47e6ff 436# check whether or not a scalar variable is tainted
437# (code straight from the Camel, 3rd ed., page 561)
438sub is_tainted_pp {
439 my $arg = shift;
440 my $nada = substr($arg, 0, 0); # zero-length
441 local $@;
442 eval { eval "# $nada" };
443 return length($@) != 0;
444}
445
81793b90 446sub _find_opt {
447 my $wanted = shift;
448 die "invalid top directory" unless defined $_[0];
449
450 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::cwd();
451 my $cwd_untainted = $cwd;
7e47e6ff 452 my $check_t_cwd = 1;
81793b90 453 $wanted_callback = $wanted->{wanted};
454 $bydepth = $wanted->{bydepth};
719c805e 455 $pre_process = $wanted->{preprocess};
456 $post_process = $wanted->{postprocess};
81793b90 457 $no_chdir = $wanted->{no_chdir};
458 $full_check = $wanted->{follow};
459 $follow = $full_check || $wanted->{follow_fast};
460 $follow_skip = $wanted->{follow_skip};
461 $untaint = $wanted->{untaint};
462 $untaint_pat = $wanted->{untaint_pattern};
463 $untaint_skip = $wanted->{untaint_skip};
464
e7b91b67 465 # for compatability reasons (find.pl, find2perl)
466 our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 467
468 # a symbolic link to a directory doesn't increase the link count
469 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
470
e7b91b67 471 my ($abs_dir, $Is_Dir);
81793b90 472
473 Proc_Top_Item:
474 foreach my $TOP (@_) {
7e47e6ff 475 my $top_item = $TOP;
476
477 if ($Is_MacOS) {
478 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
479 $top_item = ":$top_item"
480 if ( (-d _) && ($top_item =~ /^[^:]+\z/) );
481 }
482 else {
483 $top_item =~ s|/\z|| unless $top_item eq '/';
484 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
485 }
486
487 $Is_Dir= 0;
488
489 if ($follow) {
490
491 if ($Is_MacOS) {
492 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
493
494 if ($top_item eq $File::Find::current_dir) {
495 $abs_dir = $cwd;
496 }
497 else {
498 $abs_dir = contract_name_Mac($cwd, $top_item);
499 unless (defined $abs_dir) {
500 warn "Can't determine absolute path for $top_item (No such file or directory)\n";
501 next Proc_Top_Item;
502 }
503 }
504
505 }
506 else {
507 if (substr($top_item,0,1) eq '/') {
508 $abs_dir = $top_item;
509 }
510 elsif ($top_item eq $File::Find::current_dir) {
511 $abs_dir = $cwd;
512 }
513 else { # care about any ../
514 $abs_dir = contract_name("$cwd/",$top_item);
515 }
516 }
517 $abs_dir= Follow_SymLink($abs_dir);
518 unless (defined $abs_dir) {
81793b90 519 warn "$top_item is a dangling symbolic link\n";
520 next Proc_Top_Item;
7e47e6ff 521 }
522
523 if (-d _) {
81793b90 524 _find_dir_symlnk($wanted, $abs_dir, $top_item);
525 $Is_Dir= 1;
7e47e6ff 526 }
527 }
81793b90 528 else { # no follow
7e47e6ff 529 $topdir = $top_item;
530 unless (defined $topnlink) {
531 warn "Can't stat $top_item: $!\n";
532 next Proc_Top_Item;
533 }
534 if (-d _) {
c7b9dd21 535 $top_item =~ s/\.dir\z// if $Is_VMS;
e7b91b67 536 _find_dir($wanted, $top_item, $topnlink);
81793b90 537 $Is_Dir= 1;
7e47e6ff 538 }
237437d0 539 else {
81793b90 540 $abs_dir= $top_item;
7e47e6ff 541 }
542 }
81793b90 543
7e47e6ff 544 unless ($Is_Dir) {
81793b90 545 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
7e47e6ff 546 if ($Is_MacOS) {
547 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
548 }
549 else {
550 ($dir,$_) = ('./', $top_item);
551 }
81793b90 552 }
553
7e47e6ff 554 $abs_dir = $dir;
555 if (( $untaint ) && (is_tainted($dir) )) {
556 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90 557 unless (defined $abs_dir) {
558 if ($untaint_skip == 0) {
7e47e6ff 559 die "directory $dir is still tainted";
81793b90 560 }
561 else {
562 next Proc_Top_Item;
563 }
564 }
7e47e6ff 565 }
81793b90 566
7e47e6ff 567 unless ($no_chdir || chdir $abs_dir) {
568 warn "Couldn't chdir $abs_dir: $!\n";
569 next Proc_Top_Item;
570 }
719911cc 571
7e47e6ff 572 $name = $abs_dir . $_; # $File::Find::name
719911cc 573
7e47e6ff 574 { &$wanted_callback }; # protect against wild "next"
81793b90 575
7e47e6ff 576 }
81793b90 577
7e47e6ff 578 unless ( $no_chdir ) {
579 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
580 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
581 unless (defined $cwd_untainted) {
582 die "insecure cwd in find(depth)";
583 }
584 $check_t_cwd = 0;
585 }
586 unless (chdir $cwd_untainted) {
587 die "Can't cd to $cwd: $!\n";
588 }
589 }
81793b90 590 }
591}
592
593# API:
594# $wanted
595# $p_dir : "parent directory"
596# $nlink : what came back from the stat
597# preconditions:
598# chdir (if not no_chdir) to dir
599
600sub _find_dir($$$) {
601 my ($wanted, $p_dir, $nlink) = @_;
602 my ($CdLvl,$Level) = (0,0);
603 my @Stack;
604 my @filenames;
605 my ($subcount,$sub_nlink);
606 my $SE= [];
607 my $dir_name= $p_dir;
7e47e6ff 608 my $dir_pref;
609 my $dir_rel;
610 my $tainted = 0;
611
612 if ($Is_MacOS) {
613 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
614 $dir_rel= ':'; # directory name relative to current directory
615 }
616 else {
617 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
618 $dir_rel= '.'; # directory name relative to current directory
619 }
81793b90 620
621 local ($dir, $name, $prune, *DIR);
7e47e6ff 622
623 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 624 my $udir = $p_dir;
7e47e6ff 625 if (( $untaint ) && (is_tainted($p_dir) )) {
626 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90 627 unless (defined $udir) {
628 if ($untaint_skip == 0) {
629 die "directory $p_dir is still tainted";
630 }
631 else {
632 return;
633 }
237437d0 634 }
a0d0e21e 635 }
81793b90 636 unless (chdir $udir) {
637 warn "Can't cd to $udir: $!\n";
638 return;
639 }
640 }
7e47e6ff 641
642 # push the starting directory
57e73c4b 643 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90 644
7e47e6ff 645 if ($Is_MacOS) {
646 $p_dir = $dir_pref; # ensure trailing ':'
647 }
648
81793b90 649 while (defined $SE) {
650 unless ($bydepth) {
7e47e6ff 651 $dir= $p_dir; # $File::Find::dir
652 $name= $dir_name; # $File::Find::name
653 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 654 # prune may happen here
7e47e6ff 655 $prune= 0;
656 { &$wanted_callback }; # protect against wild "next"
657 next if $prune;
81793b90 658 }
7e47e6ff 659
81793b90 660 # change to that directory
7e47e6ff 661 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 662 my $udir= $dir_rel;
7e47e6ff 663 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
664 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90 665 unless (defined $udir) {
666 if ($untaint_skip == 0) {
7e47e6ff 667 if ($Is_MacOS) {
668 die "directory ($p_dir) $dir_rel is still tainted";
669 }
670 else {
671 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
672 }
673 } else { # $untaint_skip == 1
674 next;
81793b90 675 }
676 }
677 }
678 unless (chdir $udir) {
7e47e6ff 679 if ($Is_MacOS) {
680 warn "Can't cd to ($p_dir) $udir: $!\n";
681 }
682 else {
683 warn "Can't cd to (" . ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
684 }
81793b90 685 next;
686 }
687 $CdLvl++;
688 }
689
7e47e6ff 690 if ($Is_MacOS) {
691 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
692 }
693
694 $dir= $dir_name; # $File::Find::dir
81793b90 695
696 # Get the list of files in the current directory.
7e47e6ff 697 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
81793b90 698 warn "Can't opendir($dir_name): $!\n";
699 next;
700 }
701 @filenames = readdir DIR;
702 closedir(DIR);
719c805e 703 @filenames = &$pre_process(@filenames) if $pre_process;
704 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 705
706 if ($nlink == 2 && !$avoid_nlink) {
707 # This dir has no subdirectories.
708 for my $FN (@filenames) {
7e47e6ff 709 next if $FN =~ $File::Find::skip_pattern;
81793b90 710
7e47e6ff 711 $name = $dir_pref . $FN; # $File::Find::name
712 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 713 { &$wanted_callback }; # protect against wild "next"
81793b90 714 }
715
716 }
717 else {
718 # This dir has subdirectories.
719 $subcount = $nlink - 2;
720
721 for my $FN (@filenames) {
7e47e6ff 722 next if $FN =~ $File::Find::skip_pattern;
81793b90 723 if ($subcount > 0 || $avoid_nlink) {
724 # Seen all the subdirs?
725 # check for directoriness.
726 # stat is faster for a file in the current directory
07867069 727 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 728
729 if (-d _) {
730 --$subcount;
c7b9dd21 731 $FN =~ s/\.dir\z// if $Is_VMS;
81793b90 732 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
733 }
734 else {
7e47e6ff 735 $name = $dir_pref . $FN; # $File::Find::name
736 $_= ($no_chdir ? $name : $FN); # $_
73396e07 737 { &$wanted_callback }; # protect against wild "next"
81793b90 738 }
739 }
07867069 740 else {
7e47e6ff 741 $name = $dir_pref . $FN; # $File::Find::name
742 $_= ($no_chdir ? $name : $FN); # $_
73396e07 743 { &$wanted_callback }; # protect against wild "next"
81793b90 744 }
745 }
746 }
17b275ff 747 }
748 continue {
57e73c4b 749 while ( defined ($SE = pop @Stack) ) {
81793b90 750 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
751 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 752 my $tmp;
753 if ($Is_MacOS) {
754 $tmp = (':' x ($CdLvl-$Level)) . ':';
755 }
756 else {
757 $tmp = join('/',('..') x ($CdLvl-$Level));
758 }
759 die "Can't cd to $dir_name" . $tmp
760 unless chdir ($tmp);
81793b90 761 $CdLvl = $Level;
762 }
7e47e6ff 763
764 if ($Is_MacOS) {
765 # $pdir always has a trailing ':', except for the starting dir,
766 # where $dir_rel eq ':'
767 $dir_name = "$p_dir$dir_rel";
768 $dir_pref = "$dir_name:";
769 }
770 else {
771 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
772 $dir_pref = "$dir_name/";
773 }
774
719c805e 775 if ( $nlink == -2 ) {
7e47e6ff 776 $name = $dir = $p_dir; # $File::Find::name / dir
777 if ($Is_MacOS) {
778 $_ = ':'; # $_
779 }
780 else {
781 $_ = '.';
782 }
719c805e 783 &$post_process; # End-of-directory processing
7e47e6ff 784 }
785 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
786 $name = $dir_name;
787 if ($Is_MacOS) {
788 if ($dir_rel eq ':') { # must be the top dir, where we started
789 $name =~ s|:$||; # $File::Find::name
790 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
791 }
792 $dir = $p_dir; # $File::Find::dir
793 $_ = ($no_chdir ? $name : $dir_rel); # $_
794 }
795 else {
796 if ( substr($name,-2) eq '/.' ) {
797 $name =~ s|/\.$||;
798 }
799 $dir = $p_dir;
800 $_ = ($no_chdir ? $dir_name : $dir_rel );
801 if ( substr($_,-2) eq '/.' ) {
802 s|/\.$||;
803 }
804 }
805 { &$wanted_callback }; # protect against wild "next"
806 }
807 else {
808 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
809 last;
810 }
81793b90 811 }
a0d0e21e 812 }
813}
814
81793b90 815
816# API:
817# $wanted
818# $dir_loc : absolute location of a dir
819# $p_dir : "parent directory"
820# preconditions:
821# chdir (if not no_chdir) to dir
822
823sub _find_dir_symlnk($$$) {
7e47e6ff 824 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90 825 my @Stack;
826 my @filenames;
827 my $new_loc;
7e47e6ff 828 my $updir_loc = $dir_loc; # untainted parent directory
81793b90 829 my $SE = [];
830 my $dir_name = $p_dir;
7e47e6ff 831 my $dir_pref;
832 my $loc_pref;
833 my $dir_rel;
834 my $byd_flag; # flag for pending stack entry if $bydepth
835 my $tainted = 0;
836 my $ok = 1;
837
838 if ($Is_MacOS) {
839 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
840 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
841 $dir_rel = ':'; # directory name relative to current directory
842 } else {
843 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
844 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
845 $dir_rel = '.'; # directory name relative to current directory
846 }
81793b90 847
848 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff 849
850 unless ($no_chdir) {
851 # untaint the topdir
852 if (( $untaint ) && (is_tainted($dir_loc) )) {
853 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
854 # once untainted, $updir_loc is pushed on the stack (as parent directory);
855 # hence, we don't need to untaint the parent directory every time we chdir
856 # to it later
857 unless (defined $updir_loc) {
81793b90 858 if ($untaint_skip == 0) {
859 die "directory $dir_loc is still tainted";
860 }
861 else {
862 return;
863 }
864 }
865 }
7e47e6ff 866 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
867 unless ($ok) {
868 warn "Can't cd to $updir_loc: $!\n";
81793b90 869 return;
870 }
871 }
872
7e47e6ff 873 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
874
875 if ($Is_MacOS) {
876 $p_dir = $dir_pref; # ensure trailing ':'
877 }
57e73c4b 878
81793b90 879 while (defined $SE) {
880
881 unless ($bydepth) {
7e47e6ff 882 # change (back) to parent directory (always untainted)
704ea872 883 unless ($no_chdir) {
7e47e6ff 884 unless (chdir $updir_loc) {
885 warn "Can't cd to $updir_loc: $!\n";
704ea872 886 next;
887 }
888 }
7e47e6ff 889 $dir= $p_dir; # $File::Find::dir
890 $name= $dir_name; # $File::Find::name
891 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
892 $fullname= $dir_loc; # $File::Find::fullname
81793b90 893 # prune may happen here
7e47e6ff 894 $prune= 0;
704ea872 895 lstat($_); # make sure file tests with '_' work
7e47e6ff 896 { &$wanted_callback }; # protect against wild "next"
897 next if $prune;
81793b90 898 }
899
900 # change to that directory
7e47e6ff 901 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
902 $updir_loc = $dir_loc;
903 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
904 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
905 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
906 unless (defined $updir_loc) {
81793b90 907 if ($untaint_skip == 0) {
908 die "directory $dir_loc is still tainted";
a0d0e21e 909 }
237437d0 910 else {
81793b90 911 next;
237437d0 912 }
a0d0e21e 913 }
914 }
7e47e6ff 915 unless (chdir $updir_loc) {
916 warn "Can't cd to $updir_loc: $!\n";
81793b90 917 next;
918 }
919 }
920
7e47e6ff 921 if ($Is_MacOS) {
922 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
923 }
924
925 $dir = $dir_name; # $File::Find::dir
81793b90 926
927 # Get the list of files in the current directory.
7e47e6ff 928 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
81793b90 929 warn "Can't opendir($dir_loc): $!\n";
930 next;
931 }
932 @filenames = readdir DIR;
933 closedir(DIR);
934
935 for my $FN (@filenames) {
7e47e6ff 936 next if $FN =~ $File::Find::skip_pattern;
81793b90 937
938 # follow symbolic links / do an lstat
07867069 939 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 940
941 # ignore if invalid symlink
942 next unless defined $new_loc;
7e47e6ff 943
81793b90 944 if (-d _) {
7e47e6ff 945 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90 946 }
947 else {
7e47e6ff 948 $fullname = $new_loc; # $File::Find::fullname
949 $name = $dir_pref . $FN; # $File::Find::name
950 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 951 { &$wanted_callback }; # protect against wild "next"
81793b90 952 }
953 }
954
81793b90 955 }
956 continue {
57e73c4b 957 while (defined($SE = pop @Stack)) {
7e47e6ff 958 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
959 if ($Is_MacOS) {
960 # $p_dir always has a trailing ':', except for the starting dir,
961 # where $dir_rel eq ':'
962 $dir_name = "$p_dir$dir_rel";
963 $dir_pref = "$dir_name:";
964 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
965 }
966 else {
967 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
968 $dir_pref = "$dir_name/";
969 $loc_pref = "$dir_loc/";
970 }
971 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
972 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
973 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
974 warn "Can't cd to $updir_loc: $!\n";
975 next;
976 }
977 }
978 $fullname = $dir_loc; # $File::Find::fullname
979 $name = $dir_name; # $File::Find::name
980 if ($Is_MacOS) {
981 if ($dir_rel eq ':') { # must be the top dir, where we started
982 $name =~ s|:$||; # $File::Find::name
983 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
984 }
985 $dir = $p_dir; # $File::Find::dir
986 $_ = ($no_chdir ? $name : $dir_rel); # $_
987 }
988 else {
989 if ( substr($name,-2) eq '/.' ) {
990 $name =~ s|/\.$||; # $File::Find::name
991 }
992 $dir = $p_dir; # $File::Find::dir
993 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
994 if ( substr($_,-2) eq '/.' ) {
995 s|/\.$||;
996 }
997 }
998
999 lstat($_); # make sure file tests with '_' work
1000 { &$wanted_callback }; # protect against wild "next"
1001 }
1002 else {
1003 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1004 last;
1005 }
a0d0e21e 1006 }
1007 }
1008}
1009
81793b90 1010
20408e3c 1011sub wrap_wanted {
81793b90 1012 my $wanted = shift;
1013 if ( ref($wanted) eq 'HASH' ) {
1014 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1015 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1016 }
1017 if ( $wanted->{untaint} ) {
7e47e6ff 1018 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90 1019 unless defined $wanted->{untaint_pattern};
1020 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1021 }
1022 return $wanted;
1023 }
1024 else {
1025 return { wanted => $wanted };
1026 }
a0d0e21e 1027}
1028
20408e3c 1029sub find {
81793b90 1030 my $wanted = shift;
7e47e6ff 1031 %SLnkSeen= (); # clear hash first
81793b90 1032 _find_opt(wrap_wanted($wanted), @_);
1033 %SLnkSeen= (); # free memory
a0d0e21e 1034}
1035
55d729e4 1036sub finddepth {
81793b90 1037 my $wanted = wrap_wanted(shift);
7e47e6ff 1038 %SLnkSeen= (); # clear hash first
81793b90 1039 $wanted->{bydepth} = 1;
1040 _find_opt($wanted, @_);
1041 %SLnkSeen= (); # free memory
20408e3c 1042}
6280b799 1043
7e47e6ff 1044# default
1045$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1046$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1047
6280b799 1048# These are hard-coded for now, but may move to hint files.
10eba763 1049if ($^O eq 'VMS') {
81793b90 1050 $Is_VMS = 1;
7e47e6ff 1051 $File::Find::dont_use_nlink = 1;
1052}
1053elsif ($^O eq 'MacOS') {
1054 $Is_MacOS = 1;
1055 $File::Find::dont_use_nlink = 1;
1056 $File::Find::skip_pattern = qr/^Icon\015\z/;
1057 $File::Find::untaint_pattern = qr|^(.+)$|;
748a9306 1058}
1059
7e47e6ff 1060# this _should_ work properly on all platforms
1061# where File::Find can be expected to work
1062$File::Find::current_dir = File::Spec->curdir || '.';
1063
81793b90 1064$File::Find::dont_use_nlink = 1
497711e7 1065 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
fa6a1c44 1066 $^O eq 'cygwin' || $^O eq 'epoc';
6280b799 1067
20408e3c 1068# Set dont_use_nlink in your hint file if your system's stat doesn't
1069# report the number of links in a directory as an indication
1070# of the number of files.
1071# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 1072unless ($File::Find::dont_use_nlink) {
1073 require Config;
1074 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 1075}
1076
7e47e6ff 1077# We need a function that checks if a scalar is tainted. Either use the
1078# Scalar::Util module's tainted() function or our (slower) pure Perl
1079# fallback is_tainted_pp()
1080{
1081 local $@;
1082 eval { require Scalar::Util };
1083 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1084}
1085
a0d0e21e 10861;