(retracted by #14334)
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
6our $VERSION = '1.04';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
f06db76b 10=head1 NAME
11
abfdd623 12File::Find - Traverse a directory tree.
f06db76b 13
14=head1 SYNOPSIS
15
16 use File::Find;
abfdd623 17 find(\&wanted, @directories_to_seach);
f06db76b 18 sub wanted { ... }
237437d0 19
f06db76b 20 use File::Find;
abfdd623 21 finddepth(\&wanted, @directories_to_search);
f06db76b 22 sub wanted { ... }
3cb6de81 23
81793b90 24 use File::Find;
25 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 26
27=head1 DESCRIPTION
28
abfdd623 29These are functions for searching through directory trees doing work
30on each file found similar to the Unix I<find> command. File::Find
31exports two functions, C<find> and C<finddepth>. They work similarly
32but have subtle differences.
33
34=over 4
35
36=item B<find>
37
38 find(\&wanted, @directories);
39 find(\%options, @directories);
40
41find() does a breadth-first search over the given @directories in the
42order they are given. In essense, it works from the top down.
43
44For each file or directory found the &wanted subroutine is called (see
45below for details). Additionally, for each directory found it will go
46into that directory and continue the search.
47
48=item B<finddepth>
49
50 finddepth(\&wanted, @directories);
51 finddepth(\%options, @directories);
52
53finddepth() works just like find() except it does a depth-first search.
54It works from the bottom of the directory tree up.
55
56=back
57
58=head2 %options
59
20408e3c 60The first argument to find() is either a hash reference describing the
abfdd623 61operations to be performed for each file, or a code reference. The
62code reference is described in L<The wanted function> below.
20408e3c 63
81793b90 64Here are the possible keys for the hash:
65
66=over 3
67
68=item C<wanted>
69
abfdd623 70The value should be a code reference. This code reference is
71described in L<The wanted function> below.
81793b90 72
73=item C<bydepth>
74
75Reports the name of a directory only AFTER all its entries
76have been reported. Entry point finddepth() is a shortcut for
f801979b 77specifying C<{ bydepth =E<gt> 1 }> in the first argument of find().
81793b90 78
719c805e 79=item C<preprocess>
80
7e47e6ff 81The value should be a code reference. This code reference is used to
82preprocess the current directory. The name of currently processed
83directory is in $File::Find::dir. Your preprocessing function is
84called after readdir() but before the loop that calls the wanted()
85function. It is called with a list of strings (actually file/directory
86names) and is expected to return a list of strings. The code can be
87used to sort the file/directory names alphabetically, numerically,
88or to filter out directory entries based on their name alone. When
89I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e 90
91=item C<postprocess>
92
7e47e6ff 93The value should be a code reference. It is invoked just before leaving
94the currently processed directory. It is called in void context with no
95arguments. The name of the current directory is in $File::Find::dir. This
96hook is handy for summarizing a directory, such as calculating its disk
3fa6e24b 97usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 98no-op.
719c805e 99
81793b90 100=item C<follow>
101
102Causes symbolic links to be followed. Since directory trees with symbolic
103links (followed) may contain files more than once and may even have
104cycles, a hash has to be built up with an entry for each file.
105This might be expensive both in space and time for a large
106directory tree. See I<follow_fast> and I<follow_skip> below.
107If either I<follow> or I<follow_fast> is in effect:
108
109=over 6
110
a45bd81d 111=item *
81793b90 112
f10e1564 113It is guaranteed that an I<lstat> has been called before the user's
81793b90 114I<wanted()> function is called. This enables fast file checks involving S< _>.
115
a45bd81d 116=item *
81793b90 117
118There is a variable C<$File::Find::fullname> which holds the absolute
119pathname of the file with all symbolic links resolved
120
121=back
122
123=item C<follow_fast>
124
f10e1564 125This is similar to I<follow> except that it may report some files more
126than once. It does detect cycles, however. Since only symbolic links
127have to be hashed, this is much cheaper both in space and time. If
128processing a file more than once (by the user's I<wanted()> function)
81793b90 129is worse than just taking time, the option I<follow> should be used.
130
131=item C<follow_skip>
132
133C<follow_skip==1>, which is the default, causes all files which are
134neither directories nor symbolic links to be ignored if they are about
135to be processed a second time. If a directory or a symbolic link
136are about to be processed a second time, File::Find dies.
137C<follow_skip==0> causes File::Find to die if any file is about to be
138processed a second time.
139C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 140directories but to proceed normally otherwise.
20408e3c 141
80e52b73 142=item C<dangling_symlinks>
143
144If true and a code reference, will be called with the symbolic link
145name and the directory it lives in as arguments. Otherwise, if true
146and warnings are on, warning "symbolic_link_name is a dangling
147symbolic link\n" will be issued. If false, the dangling symbolic link
148will be silently ignored.
f06db76b 149
81793b90 150=item C<no_chdir>
151
152Does not C<chdir()> to each directory as it recurses. The wanted()
153function will need to be aware of this, of course. In this case,
154C<$_> will be the same as C<$File::Find::name>.
155
156=item C<untaint>
157
158If find is used in taint-mode (-T command line switch or if EUID != UID
159or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 160before they can be chdir'ed to. Therefore they are checked against a regular
161expression I<untaint_pattern>. Note that all names passed to the user's
162I<wanted()> function are still tainted. If this option is used while
163not in taint-mode, C<untaint> is a no-op.
81793b90 164
165=item C<untaint_pattern>
166
167See above. This should be set using the C<qr> quoting operator.
168The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 169Note that the parentheses are vital.
81793b90 170
171=item C<untaint_skip>
172
7e47e6ff 173If set, a directory which fails the I<untaint_pattern> is skipped,
174including all its sub-directories. The default is to 'die' in such a case.
81793b90 175
176=back
177
abfdd623 178=head2 The wanted function
179
180The wanted() function does whatever verifications you want on each
181file and directory. It takes no arguments but rather does its work
182through a collection of variables.
183
184=over 4
185
186=item C<$File::Find::dir> is the current directory name,
187
188=item C<$_> is the current filename within that directory
189
190=item C<$File::Find::name> is the complete pathname to the file.
191
192=back
193
194For example, when examining the file /some/path/foo.ext you will have:
195
196 $File::Find::dir = /some/path/
197 $_ = foo.ext
198 $File::Find::name = /some/path/foo.ext
199
200You are chdir()'d toC<$File::Find::dir> when the function is called,
201unless C<no_chdir> was specified. Note that when changing to
202directories is in effect the root directory (F</>) is a somewhat
203special case inasmuch as the concatenation of C<$File::Find::dir>,
204C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
205table below summarizes all variants:
5cf0a2f2 206
207 $File::Find::name $File::Find::dir $_
208 default / / .
209 no_chdir=>0 /etc / etc
210 /etc/x /etc x
abfdd623 211
5cf0a2f2 212 no_chdir=>1 / / /
213 /etc / /etc
214 /etc/x /etc /etc/x
215
216
217When <follow> or <follow_fast> are in effect, there is
f10e1564 218also a C<$File::Find::fullname>. The function may set
219C<$File::Find::prune> to prune the tree unless C<bydepth> was
220specified. Unless C<follow> or C<follow_fast> is specified, for
221compatibility reasons (find.pl, find2perl) there are in addition the
222following globals available: C<$File::Find::topdir>,
223C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 224C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 225
20408e3c 226This library is useful for the C<find2perl> tool, which when fed,
f06db76b 227
228 find2perl / -name .nfs\* -mtime +7 \
81793b90 229 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 230
231produces something like:
232
233 sub wanted {
c7b9dd21 234 /^\.nfs.*\z/s &&
81793b90 235 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 236 int(-M _) > 7 &&
237 unlink($_)
238 ||
81793b90 239 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 240 $dev < 0 &&
6280b799 241 ($File::Find::prune = 1);
f06db76b 242 }
243
43dece2a 244Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
245filehandle that caches the information from the preceding
246stat(), lstat(), or filetest.
247
1cffc1dd 248Here's another interesting wanted function. It will find all symbolic
249links that don't resolve:
f06db76b 250
251 sub wanted {
81793b90 252 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 253 }
f06db76b 254
81793b90 255See also the script C<pfind> on CPAN for a nice application of this
256module.
257
cd68ec93 258=head1 WARNINGS
259
260If you run your program with the C<-w> switch, or if you use the
261C<warnings> pragma, File::Find will report warnings for several weird
262situations. You can disable these warnings by putting the statement
263
264 no warnings 'File::Find';
265
266in the appropriate scope. See L<perllexwarn> for more info about lexical
267warnings.
268
81793b90 269=head1 CAVEAT
270
5fa2bf2b 271=over 2
272
273=item $dont_use_nlink
274
275You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067 276force File::Find to always stat directories. This was used for file systems
277that do not have an C<nlink> count matching the number of sub-directories.
278Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
279system) and a couple of others.
5fa2bf2b 280
6cf3b067 281You shouldn't need to set this variable, since File::Find should now detect
282such file systems on-the-fly and switch itself to using stat. This works even
283for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 284
6cf3b067 285If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b 286
287=item symlinks
288
f10e1564 289Be aware that the option to follow symbolic links can be dangerous.
81793b90 290Depending on the structure of the directory tree (including symbolic
291links to directories) you might traverse a given (physical) directory
292more than once (only if C<follow_fast> is in effect).
293Furthermore, deleting or changing files in a symbolically linked directory
294might cause very unpleasant surprises, since you delete or change files
295in an unknown directory.
0530a6c4 296
5fa2bf2b 297=back
298
7e47e6ff 299=head1 NOTES
300
301=over 4
302
303=item *
304
305Mac OS (Classic) users should note a few differences:
306
307=over 4
308
309=item *
310
311The path separator is ':', not '/', and the current directory is denoted
312as ':', not '.'. You should be careful about specifying relative pathnames.
313While a full path always begins with a volume name, a relative pathname
314should always begin with a ':'. If specifying a volume name only, a
315trailing ':' is required.
316
317=item *
318
319C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
320contains the name of a directory, that name may or may not end with a
321':'. Likewise, C<$File::Find::name>, which contains the complete
322pathname to that directory, and C<$File::Find::fullname>, which holds
323the absolute pathname of that directory with all symbolic links resolved,
324may or may not end with a ':'.
325
326=item *
327
328The default C<untaint_pattern> (see above) on Mac OS is set to
329C<qr|^(.+)$|>. Note that the parentheses are vital.
330
331=item *
332
333The invisible system file "Icon\015" is ignored. While this file may
334appear in every directory, there are some more invisible system files
335on every volume, which are all located at the volume root level (i.e.
336"MacintoshHD:"). These system files are B<not> excluded automatically.
337Your filter may use the following code to recognize invisible files or
338directories (requires Mac::Files):
339
340 use Mac::Files;
341
342 # invisible() -- returns 1 if file/directory is invisible,
1cffc1dd 343 # 0 if it's visible or undef if an error occurred
7e47e6ff 344
345 sub invisible($) {
346 my $file = shift;
347 my ($fileCat, $fileInfo);
348 my $invisible_flag = 1 << 14;
349
350 if ( $fileCat = FSpGetCatInfo($file) ) {
351 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
352 return (($fileInfo->fdFlags & $invisible_flag) && 1);
353 }
354 }
355 return undef;
356 }
357
358Generally, invisible files are system files, unless an odd application
359decides to use invisible files for its own purposes. To distinguish
360such files from system files, you have to look at the B<type> and B<creator>
361file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
362C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
363(see MacPerl.pm for details).
364
365Files that appear on the desktop actually reside in an (hidden) directory
366named "Desktop Folder" on the particular disk volume. Note that, although
367all desktop files appear to be on the same "virtual" desktop, each disk
368volume actually maintains its own "Desktop Folder" directory.
369
370=back
371
372=back
0530a6c4 373
a85af077 374=head1 HISTORY
375
376File::Find used to produce incorrect results if called recursively.
377During the development of perl 5.8 this bug was fixed.
378The first fixed version of File::Find was 1.01.
379
f06db76b 380=cut
381
b75c8c73 382our @ISA = qw(Exporter);
383our @EXPORT = qw(find finddepth);
6280b799 384
a0d0e21e 385
81793b90 386use strict;
387my $Is_VMS;
7e47e6ff 388my $Is_MacOS;
81793b90 389
390require File::Basename;
7e47e6ff 391require File::Spec;
81793b90 392
9f826d6a 393# Should ideally be my() not our() but local() currently
394# refuses to operate on lexicals
395
396our %SLnkSeen;
397our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 398 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 399 $pre_process, $post_process, $dangling_symlinks);
81793b90 400
401sub contract_name {
402 my ($cdir,$fn) = @_;
403
7e47e6ff 404 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90 405
406 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
407
408 $fn =~ s|^\./||;
409
410 my $abs_name= $cdir . $fn;
411
412 if (substr($fn,0,3) eq '../') {
fecbda2b 413 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
81793b90 414 }
415
416 return $abs_name;
417}
418
7e47e6ff 419# return the absolute name of a directory or file
420sub contract_name_Mac {
421 my ($cdir,$fn) = @_;
422 my $abs_name;
423
424 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
425
426 my $colon_count = length ($1);
427 if ($colon_count == 1) {
428 $abs_name = $cdir . $2;
429 return $abs_name;
430 }
431 else {
432 # need to move up the tree, but
433 # only if it's not a volume name
434 for (my $i=1; $i<$colon_count; $i++) {
435 unless ($cdir =~ /^[^:]+:$/) { # volume name
436 $cdir =~ s/[^:]+:$//;
437 }
438 else {
439 return undef;
440 }
441 }
442 $abs_name = $cdir . $2;
443 return $abs_name;
444 }
445
446 }
447 else {
448
449 # $fn may be a valid path to a directory or file or (dangling)
450 # symlink, without a leading ':'
451 if ( (-e $fn) || (-l $fn) ) {
452 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
453 return $fn; # $fn is already an absolute path
454 }
455 else {
456 $abs_name = $cdir . $fn;
457 return $abs_name;
458 }
459 }
460 else { # argh!, $fn is not a valid directory/file
461 return undef;
462 }
463 }
464}
81793b90 465
466sub PathCombine($$) {
467 my ($Base,$Name) = @_;
468 my $AbsName;
469
7e47e6ff 470 if ($Is_MacOS) {
471 # $Name is the resolved symlink (always a full path on MacOS),
472 # i.e. there's no need to call contract_name_Mac()
473 $AbsName = $Name;
474
475 # (simple) check for recursion
476 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
477 return undef;
478 }
81793b90 479 }
480 else {
7e47e6ff 481 if (substr($Name,0,1) eq '/') {
482 $AbsName= $Name;
483 }
484 else {
485 $AbsName= contract_name($Base,$Name);
486 }
81793b90 487
7e47e6ff 488 # (simple) check for recursion
489 my $newlen= length($AbsName);
490 if ($newlen <= length($Base)) {
491 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
492 && $AbsName eq substr($Base,0,$newlen))
493 {
494 return undef;
495 }
81793b90 496 }
497 }
498 return $AbsName;
499}
500
501sub Follow_SymLink($) {
502 my ($AbsName) = @_;
503
504 my ($NewName,$DEV, $INO);
505 ($DEV, $INO)= lstat $AbsName;
506
507 while (-l _) {
508 if ($SLnkSeen{$DEV, $INO}++) {
509 if ($follow_skip < 2) {
510 die "$AbsName is encountered a second time";
a0d0e21e 511 }
512 else {
81793b90 513 return undef;
a0d0e21e 514 }
515 }
81793b90 516 $NewName= PathCombine($AbsName, readlink($AbsName));
517 unless(defined $NewName) {
518 if ($follow_skip < 2) {
519 die "$AbsName is a recursive symbolic link";
520 }
521 else {
522 return undef;
a0d0e21e 523 }
81793b90 524 }
525 else {
526 $AbsName= $NewName;
527 }
528 ($DEV, $INO) = lstat($AbsName);
529 return undef unless defined $DEV; # dangling symbolic link
530 }
531
cd68ec93 532 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 533 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90 534 die "$AbsName encountered a second time";
535 }
536 else {
537 return undef;
538 }
539 }
540
541 return $AbsName;
542}
543
17f410f9 544our($dir, $name, $fullname, $prune);
81793b90 545sub _find_dir_symlnk($$$);
546sub _find_dir($$$);
547
7e47e6ff 548# check whether or not a scalar variable is tainted
549# (code straight from the Camel, 3rd ed., page 561)
550sub is_tainted_pp {
551 my $arg = shift;
552 my $nada = substr($arg, 0, 0); # zero-length
553 local $@;
554 eval { eval "# $nada" };
555 return length($@) != 0;
556}
557
81793b90 558sub _find_opt {
559 my $wanted = shift;
560 die "invalid top directory" unless defined $_[0];
561
9f826d6a 562 # This function must local()ize everything because callbacks may
563 # call find() or finddepth()
564
565 local %SLnkSeen;
566 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
567 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 568 $pre_process, $post_process, $dangling_symlinks);
9f826d6a 569 local($dir, $name, $fullname, $prune);
570
a0c9c202 571 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
80e52b73 572 my $cwd_untainted = $cwd;
573 my $check_t_cwd = 1;
574 $wanted_callback = $wanted->{wanted};
575 $bydepth = $wanted->{bydepth};
576 $pre_process = $wanted->{preprocess};
577 $post_process = $wanted->{postprocess};
578 $no_chdir = $wanted->{no_chdir};
579 $full_check = $wanted->{follow};
580 $follow = $full_check || $wanted->{follow_fast};
581 $follow_skip = $wanted->{follow_skip};
582 $untaint = $wanted->{untaint};
583 $untaint_pat = $wanted->{untaint_pattern};
584 $untaint_skip = $wanted->{untaint_skip};
585 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 586
1cffc1dd 587 # for compatibility reasons (find.pl, find2perl)
9f826d6a 588 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 589
590 # a symbolic link to a directory doesn't increase the link count
591 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
592
e7b91b67 593 my ($abs_dir, $Is_Dir);
81793b90 594
595 Proc_Top_Item:
596 foreach my $TOP (@_) {
7e47e6ff 597 my $top_item = $TOP;
598
599 if ($Is_MacOS) {
600 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
601 $top_item = ":$top_item"
3fa6e24b 602 if ( (-d _) && ( $top_item !~ /:/ ) );
7e47e6ff 603 }
604 else {
605 $top_item =~ s|/\z|| unless $top_item eq '/';
606 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
607 }
608
609 $Is_Dir= 0;
610
611 if ($follow) {
612
613 if ($Is_MacOS) {
614 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
615
616 if ($top_item eq $File::Find::current_dir) {
617 $abs_dir = $cwd;
618 }
619 else {
620 $abs_dir = contract_name_Mac($cwd, $top_item);
621 unless (defined $abs_dir) {
cd68ec93 622 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n";
7e47e6ff 623 next Proc_Top_Item;
624 }
625 }
626
627 }
628 else {
629 if (substr($top_item,0,1) eq '/') {
630 $abs_dir = $top_item;
631 }
632 elsif ($top_item eq $File::Find::current_dir) {
633 $abs_dir = $cwd;
634 }
635 else { # care about any ../
636 $abs_dir = contract_name("$cwd/",$top_item);
637 }
638 }
639 $abs_dir= Follow_SymLink($abs_dir);
640 unless (defined $abs_dir) {
80e52b73 641 if ($dangling_symlinks) {
642 if (ref $dangling_symlinks eq 'CODE') {
643 $dangling_symlinks->($top_item, $cwd);
644 } else {
cd68ec93 645 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73 646 }
647 }
81793b90 648 next Proc_Top_Item;
7e47e6ff 649 }
650
651 if (-d _) {
81793b90 652 _find_dir_symlnk($wanted, $abs_dir, $top_item);
653 $Is_Dir= 1;
7e47e6ff 654 }
655 }
81793b90 656 else { # no follow
7e47e6ff 657 $topdir = $top_item;
658 unless (defined $topnlink) {
cd68ec93 659 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff 660 next Proc_Top_Item;
661 }
662 if (-d _) {
c7b9dd21 663 $top_item =~ s/\.dir\z// if $Is_VMS;
e7b91b67 664 _find_dir($wanted, $top_item, $topnlink);
81793b90 665 $Is_Dir= 1;
7e47e6ff 666 }
237437d0 667 else {
81793b90 668 $abs_dir= $top_item;
7e47e6ff 669 }
670 }
81793b90 671
7e47e6ff 672 unless ($Is_Dir) {
81793b90 673 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
7e47e6ff 674 if ($Is_MacOS) {
675 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
676 }
677 else {
678 ($dir,$_) = ('./', $top_item);
679 }
81793b90 680 }
681
7e47e6ff 682 $abs_dir = $dir;
683 if (( $untaint ) && (is_tainted($dir) )) {
684 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90 685 unless (defined $abs_dir) {
686 if ($untaint_skip == 0) {
7e47e6ff 687 die "directory $dir is still tainted";
81793b90 688 }
689 else {
690 next Proc_Top_Item;
691 }
692 }
7e47e6ff 693 }
81793b90 694
7e47e6ff 695 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 696 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff 697 next Proc_Top_Item;
698 }
719911cc 699
7e47e6ff 700 $name = $abs_dir . $_; # $File::Find::name
719911cc 701
abfdd623 702 { $wanted_callback->() }; # protect against wild "next"
81793b90 703
7e47e6ff 704 }
81793b90 705
7e47e6ff 706 unless ( $no_chdir ) {
707 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
708 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
709 unless (defined $cwd_untainted) {
710 die "insecure cwd in find(depth)";
711 }
712 $check_t_cwd = 0;
713 }
714 unless (chdir $cwd_untainted) {
715 die "Can't cd to $cwd: $!\n";
716 }
717 }
81793b90 718 }
719}
720
721# API:
722# $wanted
723# $p_dir : "parent directory"
724# $nlink : what came back from the stat
725# preconditions:
726# chdir (if not no_chdir) to dir
727
728sub _find_dir($$$) {
729 my ($wanted, $p_dir, $nlink) = @_;
730 my ($CdLvl,$Level) = (0,0);
731 my @Stack;
732 my @filenames;
733 my ($subcount,$sub_nlink);
734 my $SE= [];
735 my $dir_name= $p_dir;
7e47e6ff 736 my $dir_pref;
39e79f6b 737 my $dir_rel = $File::Find::current_dir;
7e47e6ff 738 my $tainted = 0;
5fa2bf2b 739 my $no_nlink;
7e47e6ff 740
741 if ($Is_MacOS) {
742 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
7e47e6ff 743 }
744 else {
745 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 746 }
81793b90 747
748 local ($dir, $name, $prune, *DIR);
7e47e6ff 749
750 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 751 my $udir = $p_dir;
7e47e6ff 752 if (( $untaint ) && (is_tainted($p_dir) )) {
753 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90 754 unless (defined $udir) {
755 if ($untaint_skip == 0) {
756 die "directory $p_dir is still tainted";
757 }
758 else {
759 return;
760 }
237437d0 761 }
a0d0e21e 762 }
81793b90 763 unless (chdir $udir) {
cd68ec93 764 warnings::warnif "Can't cd to $udir: $!\n";
81793b90 765 return;
766 }
767 }
7e47e6ff 768
769 # push the starting directory
57e73c4b 770 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90 771
7e47e6ff 772 if ($Is_MacOS) {
773 $p_dir = $dir_pref; # ensure trailing ':'
774 }
775
81793b90 776 while (defined $SE) {
777 unless ($bydepth) {
7e47e6ff 778 $dir= $p_dir; # $File::Find::dir
779 $name= $dir_name; # $File::Find::name
780 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 781 # prune may happen here
7e47e6ff 782 $prune= 0;
abfdd623 783 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 784 next if $prune;
81793b90 785 }
7e47e6ff 786
81793b90 787 # change to that directory
7e47e6ff 788 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 789 my $udir= $dir_rel;
7e47e6ff 790 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
791 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90 792 unless (defined $udir) {
793 if ($untaint_skip == 0) {
7e47e6ff 794 if ($Is_MacOS) {
795 die "directory ($p_dir) $dir_rel is still tainted";
796 }
797 else {
798 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
799 }
800 } else { # $untaint_skip == 1
801 next;
81793b90 802 }
803 }
804 }
805 unless (chdir $udir) {
7e47e6ff 806 if ($Is_MacOS) {
cd68ec93 807 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n";
7e47e6ff 808 }
809 else {
cd68ec93 810 warnings::warnif "Can't cd to (" .
811 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
7e47e6ff 812 }
81793b90 813 next;
814 }
815 $CdLvl++;
816 }
817
7e47e6ff 818 if ($Is_MacOS) {
819 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
820 }
821
822 $dir= $dir_name; # $File::Find::dir
81793b90 823
824 # Get the list of files in the current directory.
7e47e6ff 825 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 826 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90 827 next;
828 }
829 @filenames = readdir DIR;
830 closedir(DIR);
abfdd623 831 @filenames = $pre_process->(@filenames) if $pre_process;
719c805e 832 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 833
5fa2bf2b 834 # default: use whatever was specifid
835 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
836 $no_nlink = $avoid_nlink;
837 # if dir has wrong nlink count, force switch to slower stat method
838 $no_nlink = 1 if ($nlink < 2);
839
840 if ($nlink == 2 && !$no_nlink) {
81793b90 841 # This dir has no subdirectories.
842 for my $FN (@filenames) {
7e47e6ff 843 next if $FN =~ $File::Find::skip_pattern;
81793b90 844
7e47e6ff 845 $name = $dir_pref . $FN; # $File::Find::name
846 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 847 { $wanted_callback->() }; # protect against wild "next"
81793b90 848 }
849
850 }
851 else {
852 # This dir has subdirectories.
853 $subcount = $nlink - 2;
854
855 for my $FN (@filenames) {
7e47e6ff 856 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 857 if ($subcount > 0 || $no_nlink) {
81793b90 858 # Seen all the subdirs?
859 # check for directoriness.
860 # stat is faster for a file in the current directory
07867069 861 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 862
863 if (-d _) {
864 --$subcount;
c7b9dd21 865 $FN =~ s/\.dir\z// if $Is_VMS;
81793b90 866 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
867 }
868 else {
7e47e6ff 869 $name = $dir_pref . $FN; # $File::Find::name
870 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 871 { $wanted_callback->() }; # protect against wild "next"
81793b90 872 }
873 }
07867069 874 else {
7e47e6ff 875 $name = $dir_pref . $FN; # $File::Find::name
876 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 877 { $wanted_callback->() }; # protect against wild "next"
81793b90 878 }
879 }
880 }
17b275ff 881 }
882 continue {
57e73c4b 883 while ( defined ($SE = pop @Stack) ) {
81793b90 884 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
885 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 886 my $tmp;
887 if ($Is_MacOS) {
888 $tmp = (':' x ($CdLvl-$Level)) . ':';
889 }
890 else {
891 $tmp = join('/',('..') x ($CdLvl-$Level));
892 }
893 die "Can't cd to $dir_name" . $tmp
894 unless chdir ($tmp);
81793b90 895 $CdLvl = $Level;
896 }
7e47e6ff 897
898 if ($Is_MacOS) {
899 # $pdir always has a trailing ':', except for the starting dir,
900 # where $dir_rel eq ':'
901 $dir_name = "$p_dir$dir_rel";
902 $dir_pref = "$dir_name:";
903 }
904 else {
905 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
906 $dir_pref = "$dir_name/";
907 }
908
719c805e 909 if ( $nlink == -2 ) {
7e47e6ff 910 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 911 $_ = $File::Find::current_dir;
abfdd623 912 $post_process->(); # End-of-directory processing
7e47e6ff 913 }
914 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
915 $name = $dir_name;
916 if ($Is_MacOS) {
917 if ($dir_rel eq ':') { # must be the top dir, where we started
918 $name =~ s|:$||; # $File::Find::name
919 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
920 }
921 $dir = $p_dir; # $File::Find::dir
922 $_ = ($no_chdir ? $name : $dir_rel); # $_
923 }
924 else {
925 if ( substr($name,-2) eq '/.' ) {
5cf0a2f2 926 substr($name, length($name) == 2 ? -1 : -2) = '';
7e47e6ff 927 }
928 $dir = $p_dir;
929 $_ = ($no_chdir ? $dir_name : $dir_rel );
930 if ( substr($_,-2) eq '/.' ) {
5cf0a2f2 931 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 932 }
933 }
abfdd623 934 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 935 }
936 else {
937 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
938 last;
939 }
81793b90 940 }
a0d0e21e 941 }
942}
943
81793b90 944
945# API:
946# $wanted
947# $dir_loc : absolute location of a dir
948# $p_dir : "parent directory"
949# preconditions:
950# chdir (if not no_chdir) to dir
951
952sub _find_dir_symlnk($$$) {
7e47e6ff 953 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90 954 my @Stack;
955 my @filenames;
956 my $new_loc;
7e47e6ff 957 my $updir_loc = $dir_loc; # untainted parent directory
81793b90 958 my $SE = [];
959 my $dir_name = $p_dir;
7e47e6ff 960 my $dir_pref;
961 my $loc_pref;
39e79f6b 962 my $dir_rel = $File::Find::current_dir;
7e47e6ff 963 my $byd_flag; # flag for pending stack entry if $bydepth
964 my $tainted = 0;
965 my $ok = 1;
966
967 if ($Is_MacOS) {
968 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
969 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
7e47e6ff 970 } else {
971 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
972 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
7e47e6ff 973 }
81793b90 974
975 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff 976
977 unless ($no_chdir) {
978 # untaint the topdir
979 if (( $untaint ) && (is_tainted($dir_loc) )) {
980 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
981 # once untainted, $updir_loc is pushed on the stack (as parent directory);
982 # hence, we don't need to untaint the parent directory every time we chdir
983 # to it later
984 unless (defined $updir_loc) {
81793b90 985 if ($untaint_skip == 0) {
986 die "directory $dir_loc is still tainted";
987 }
988 else {
989 return;
990 }
991 }
992 }
7e47e6ff 993 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
994 unless ($ok) {
cd68ec93 995 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 996 return;
997 }
998 }
999
7e47e6ff 1000 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
1001
1002 if ($Is_MacOS) {
1003 $p_dir = $dir_pref; # ensure trailing ':'
1004 }
57e73c4b 1005
81793b90 1006 while (defined $SE) {
1007
1008 unless ($bydepth) {
7e47e6ff 1009 # change (back) to parent directory (always untainted)
704ea872 1010 unless ($no_chdir) {
7e47e6ff 1011 unless (chdir $updir_loc) {
cd68ec93 1012 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872 1013 next;
1014 }
1015 }
7e47e6ff 1016 $dir= $p_dir; # $File::Find::dir
1017 $name= $dir_name; # $File::Find::name
1018 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
1019 $fullname= $dir_loc; # $File::Find::fullname
81793b90 1020 # prune may happen here
7e47e6ff 1021 $prune= 0;
704ea872 1022 lstat($_); # make sure file tests with '_' work
abfdd623 1023 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 1024 next if $prune;
81793b90 1025 }
1026
1027 # change to that directory
7e47e6ff 1028 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1029 $updir_loc = $dir_loc;
1030 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
1031 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
1032 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
1033 unless (defined $updir_loc) {
81793b90 1034 if ($untaint_skip == 0) {
1035 die "directory $dir_loc is still tainted";
a0d0e21e 1036 }
237437d0 1037 else {
81793b90 1038 next;
237437d0 1039 }
a0d0e21e 1040 }
1041 }
7e47e6ff 1042 unless (chdir $updir_loc) {
cd68ec93 1043 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 1044 next;
1045 }
1046 }
1047
7e47e6ff 1048 if ($Is_MacOS) {
1049 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
1050 }
1051
1052 $dir = $dir_name; # $File::Find::dir
81793b90 1053
1054 # Get the list of files in the current directory.
7e47e6ff 1055 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 1056 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90 1057 next;
1058 }
1059 @filenames = readdir DIR;
1060 closedir(DIR);
1061
1062 for my $FN (@filenames) {
7e47e6ff 1063 next if $FN =~ $File::Find::skip_pattern;
81793b90 1064
1065 # follow symbolic links / do an lstat
07867069 1066 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 1067
1068 # ignore if invalid symlink
1069 next unless defined $new_loc;
7e47e6ff 1070
81793b90 1071 if (-d _) {
7e47e6ff 1072 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90 1073 }
1074 else {
7e47e6ff 1075 $fullname = $new_loc; # $File::Find::fullname
1076 $name = $dir_pref . $FN; # $File::Find::name
1077 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 1078 { $wanted_callback->() }; # protect against wild "next"
81793b90 1079 }
1080 }
1081
81793b90 1082 }
1083 continue {
57e73c4b 1084 while (defined($SE = pop @Stack)) {
7e47e6ff 1085 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
1086 if ($Is_MacOS) {
1087 # $p_dir always has a trailing ':', except for the starting dir,
1088 # where $dir_rel eq ':'
1089 $dir_name = "$p_dir$dir_rel";
1090 $dir_pref = "$dir_name:";
1091 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
1092 }
1093 else {
1094 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1095 $dir_pref = "$dir_name/";
1096 $loc_pref = "$dir_loc/";
1097 }
1098 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1099 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1100 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1101 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff 1102 next;
1103 }
1104 }
1105 $fullname = $dir_loc; # $File::Find::fullname
1106 $name = $dir_name; # $File::Find::name
1107 if ($Is_MacOS) {
1108 if ($dir_rel eq ':') { # must be the top dir, where we started
1109 $name =~ s|:$||; # $File::Find::name
1110 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
1111 }
1112 $dir = $p_dir; # $File::Find::dir
1113 $_ = ($no_chdir ? $name : $dir_rel); # $_
1114 }
1115 else {
1116 if ( substr($name,-2) eq '/.' ) {
f801979b 1117 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
7e47e6ff 1118 }
1119 $dir = $p_dir; # $File::Find::dir
1120 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1121 if ( substr($_,-2) eq '/.' ) {
f801979b 1122 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 1123 }
1124 }
1125
1126 lstat($_); # make sure file tests with '_' work
abfdd623 1127 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 1128 }
1129 else {
1130 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1131 last;
1132 }
a0d0e21e 1133 }
1134 }
1135}
1136
81793b90 1137
20408e3c 1138sub wrap_wanted {
81793b90 1139 my $wanted = shift;
1140 if ( ref($wanted) eq 'HASH' ) {
1141 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1142 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1143 }
1144 if ( $wanted->{untaint} ) {
7e47e6ff 1145 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90 1146 unless defined $wanted->{untaint_pattern};
1147 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1148 }
1149 return $wanted;
1150 }
1151 else {
1152 return { wanted => $wanted };
1153 }
a0d0e21e 1154}
1155
20408e3c 1156sub find {
81793b90 1157 my $wanted = shift;
1158 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e 1159}
1160
55d729e4 1161sub finddepth {
81793b90 1162 my $wanted = wrap_wanted(shift);
1163 $wanted->{bydepth} = 1;
1164 _find_opt($wanted, @_);
20408e3c 1165}
6280b799 1166
7e47e6ff 1167# default
1168$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1169$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1170
6280b799 1171# These are hard-coded for now, but may move to hint files.
10eba763 1172if ($^O eq 'VMS') {
81793b90 1173 $Is_VMS = 1;
7e47e6ff 1174 $File::Find::dont_use_nlink = 1;
1175}
1176elsif ($^O eq 'MacOS') {
1177 $Is_MacOS = 1;
1178 $File::Find::dont_use_nlink = 1;
1179 $File::Find::skip_pattern = qr/^Icon\015\z/;
1180 $File::Find::untaint_pattern = qr|^(.+)$|;
748a9306 1181}
1182
7e47e6ff 1183# this _should_ work properly on all platforms
1184# where File::Find can be expected to work
1185$File::Find::current_dir = File::Spec->curdir || '.';
1186
81793b90 1187$File::Find::dont_use_nlink = 1
497711e7 1188 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
94c67634 1189 $^O eq 'cygwin' || $^O eq 'epoc';
6280b799 1190
20408e3c 1191# Set dont_use_nlink in your hint file if your system's stat doesn't
1192# report the number of links in a directory as an indication
1193# of the number of files.
1194# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 1195unless ($File::Find::dont_use_nlink) {
1196 require Config;
1197 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 1198}
1199
7e47e6ff 1200# We need a function that checks if a scalar is tainted. Either use the
1201# Scalar::Util module's tainted() function or our (slower) pure Perl
1202# fallback is_tainted_pp()
1203{
1204 local $@;
1205 eval { require Scalar::Util };
1206 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1207}
1208
a0d0e21e 12091;