RE: maint snapshot @ 19525
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
6our $VERSION = '1.04';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
f06db76b 10=head1 NAME
11
abfdd623 12File::Find - Traverse a directory tree.
f06db76b 13
14=head1 SYNOPSIS
15
16 use File::Find;
6d355c6e 17 find(\&wanted, @directories_to_search);
f06db76b 18 sub wanted { ... }
237437d0 19
f06db76b 20 use File::Find;
abfdd623 21 finddepth(\&wanted, @directories_to_search);
f06db76b 22 sub wanted { ... }
3cb6de81 23
81793b90 24 use File::Find;
25 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 26
27=head1 DESCRIPTION
28
abfdd623 29These are functions for searching through directory trees doing work
30on each file found similar to the Unix I<find> command. File::Find
31exports two functions, C<find> and C<finddepth>. They work similarly
32but have subtle differences.
33
34=over 4
35
36=item B<find>
37
38 find(\&wanted, @directories);
39 find(\%options, @directories);
40
41find() does a breadth-first search over the given @directories in the
6d355c6e 42order they are given. In essence, it works from the top down.
abfdd623 43
44For each file or directory found the &wanted subroutine is called (see
45below for details). Additionally, for each directory found it will go
46into that directory and continue the search.
47
48=item B<finddepth>
49
50 finddepth(\&wanted, @directories);
51 finddepth(\%options, @directories);
52
53finddepth() works just like find() except it does a depth-first search.
54It works from the bottom of the directory tree up.
55
56=back
57
58=head2 %options
59
20408e3c 60The first argument to find() is either a hash reference describing the
abfdd623 61operations to be performed for each file, or a code reference. The
62code reference is described in L<The wanted function> below.
20408e3c 63
81793b90 64Here are the possible keys for the hash:
65
66=over 3
67
68=item C<wanted>
69
abfdd623 70The value should be a code reference. This code reference is
71described in L<The wanted function> below.
81793b90 72
73=item C<bydepth>
74
75Reports the name of a directory only AFTER all its entries
76have been reported. Entry point finddepth() is a shortcut for
f801979b 77specifying C<{ bydepth =E<gt> 1 }> in the first argument of find().
81793b90 78
719c805e 79=item C<preprocess>
80
7e47e6ff 81The value should be a code reference. This code reference is used to
82preprocess the current directory. The name of currently processed
83directory is in $File::Find::dir. Your preprocessing function is
84called after readdir() but before the loop that calls the wanted()
85function. It is called with a list of strings (actually file/directory
86names) and is expected to return a list of strings. The code can be
87used to sort the file/directory names alphabetically, numerically,
88or to filter out directory entries based on their name alone. When
89I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e 90
91=item C<postprocess>
92
7e47e6ff 93The value should be a code reference. It is invoked just before leaving
94the currently processed directory. It is called in void context with no
95arguments. The name of the current directory is in $File::Find::dir. This
96hook is handy for summarizing a directory, such as calculating its disk
3fa6e24b 97usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 98no-op.
719c805e 99
81793b90 100=item C<follow>
101
102Causes symbolic links to be followed. Since directory trees with symbolic
103links (followed) may contain files more than once and may even have
104cycles, a hash has to be built up with an entry for each file.
105This might be expensive both in space and time for a large
106directory tree. See I<follow_fast> and I<follow_skip> below.
107If either I<follow> or I<follow_fast> is in effect:
108
109=over 6
110
a45bd81d 111=item *
81793b90 112
f10e1564 113It is guaranteed that an I<lstat> has been called before the user's
81793b90 114I<wanted()> function is called. This enables fast file checks involving S< _>.
115
a45bd81d 116=item *
81793b90 117
118There is a variable C<$File::Find::fullname> which holds the absolute
119pathname of the file with all symbolic links resolved
120
121=back
122
123=item C<follow_fast>
124
f10e1564 125This is similar to I<follow> except that it may report some files more
126than once. It does detect cycles, however. Since only symbolic links
127have to be hashed, this is much cheaper both in space and time. If
128processing a file more than once (by the user's I<wanted()> function)
81793b90 129is worse than just taking time, the option I<follow> should be used.
130
131=item C<follow_skip>
132
133C<follow_skip==1>, which is the default, causes all files which are
134neither directories nor symbolic links to be ignored if they are about
135to be processed a second time. If a directory or a symbolic link
136are about to be processed a second time, File::Find dies.
137C<follow_skip==0> causes File::Find to die if any file is about to be
138processed a second time.
139C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 140directories but to proceed normally otherwise.
20408e3c 141
80e52b73 142=item C<dangling_symlinks>
143
144If true and a code reference, will be called with the symbolic link
145name and the directory it lives in as arguments. Otherwise, if true
146and warnings are on, warning "symbolic_link_name is a dangling
147symbolic link\n" will be issued. If false, the dangling symbolic link
148will be silently ignored.
f06db76b 149
81793b90 150=item C<no_chdir>
151
152Does not C<chdir()> to each directory as it recurses. The wanted()
153function will need to be aware of this, of course. In this case,
154C<$_> will be the same as C<$File::Find::name>.
155
156=item C<untaint>
157
158If find is used in taint-mode (-T command line switch or if EUID != UID
159or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 160before they can be chdir'ed to. Therefore they are checked against a regular
161expression I<untaint_pattern>. Note that all names passed to the user's
162I<wanted()> function are still tainted. If this option is used while
163not in taint-mode, C<untaint> is a no-op.
81793b90 164
165=item C<untaint_pattern>
166
167See above. This should be set using the C<qr> quoting operator.
168The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 169Note that the parentheses are vital.
81793b90 170
171=item C<untaint_skip>
172
7e47e6ff 173If set, a directory which fails the I<untaint_pattern> is skipped,
174including all its sub-directories. The default is to 'die' in such a case.
81793b90 175
176=back
177
abfdd623 178=head2 The wanted function
179
180The wanted() function does whatever verifications you want on each
181file and directory. It takes no arguments but rather does its work
182through a collection of variables.
183
184=over 4
185
f837ebe2 186=item C<$File::Find::dir> is the current directory name,
abfdd623 187
188=item C<$_> is the current filename within that directory
189
f837ebe2 190=item C<$File::Find::name> is the complete pathname to the file.
abfdd623 191
192=back
193
f837ebe2 194Don't modify these variables.
195
abfdd623 196For example, when examining the file /some/path/foo.ext you will have:
197
198 $File::Find::dir = /some/path/
199 $_ = foo.ext
200 $File::Find::name = /some/path/foo.ext
201
202You are chdir()'d toC<$File::Find::dir> when the function is called,
203unless C<no_chdir> was specified. Note that when changing to
204directories is in effect the root directory (F</>) is a somewhat
205special case inasmuch as the concatenation of C<$File::Find::dir>,
206C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
207table below summarizes all variants:
5cf0a2f2 208
209 $File::Find::name $File::Find::dir $_
210 default / / .
211 no_chdir=>0 /etc / etc
212 /etc/x /etc x
abfdd623 213
5cf0a2f2 214 no_chdir=>1 / / /
215 /etc / /etc
216 /etc/x /etc /etc/x
217
218
219When <follow> or <follow_fast> are in effect, there is
f10e1564 220also a C<$File::Find::fullname>. The function may set
221C<$File::Find::prune> to prune the tree unless C<bydepth> was
222specified. Unless C<follow> or C<follow_fast> is specified, for
223compatibility reasons (find.pl, find2perl) there are in addition the
224following globals available: C<$File::Find::topdir>,
225C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 226C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 227
20408e3c 228This library is useful for the C<find2perl> tool, which when fed,
f06db76b 229
230 find2perl / -name .nfs\* -mtime +7 \
81793b90 231 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 232
233produces something like:
234
235 sub wanted {
c7b9dd21 236 /^\.nfs.*\z/s &&
81793b90 237 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 238 int(-M _) > 7 &&
239 unlink($_)
240 ||
81793b90 241 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 242 $dev < 0 &&
6280b799 243 ($File::Find::prune = 1);
f06db76b 244 }
245
43dece2a 246Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
247filehandle that caches the information from the preceding
248stat(), lstat(), or filetest.
249
1cffc1dd 250Here's another interesting wanted function. It will find all symbolic
251links that don't resolve:
f06db76b 252
253 sub wanted {
81793b90 254 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 255 }
f06db76b 256
81793b90 257See also the script C<pfind> on CPAN for a nice application of this
258module.
259
cd68ec93 260=head1 WARNINGS
261
262If you run your program with the C<-w> switch, or if you use the
263C<warnings> pragma, File::Find will report warnings for several weird
264situations. You can disable these warnings by putting the statement
265
266 no warnings 'File::Find';
267
268in the appropriate scope. See L<perllexwarn> for more info about lexical
269warnings.
270
81793b90 271=head1 CAVEAT
272
5fa2bf2b 273=over 2
274
275=item $dont_use_nlink
276
277You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067 278force File::Find to always stat directories. This was used for file systems
279that do not have an C<nlink> count matching the number of sub-directories.
280Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
281system) and a couple of others.
5fa2bf2b 282
6cf3b067 283You shouldn't need to set this variable, since File::Find should now detect
284such file systems on-the-fly and switch itself to using stat. This works even
285for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 286
6cf3b067 287If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b 288
289=item symlinks
290
f10e1564 291Be aware that the option to follow symbolic links can be dangerous.
81793b90 292Depending on the structure of the directory tree (including symbolic
293links to directories) you might traverse a given (physical) directory
294more than once (only if C<follow_fast> is in effect).
295Furthermore, deleting or changing files in a symbolically linked directory
296might cause very unpleasant surprises, since you delete or change files
297in an unknown directory.
0530a6c4 298
5fa2bf2b 299=back
300
7e47e6ff 301=head1 NOTES
302
303=over 4
304
305=item *
306
307Mac OS (Classic) users should note a few differences:
308
309=over 4
310
311=item *
312
313The path separator is ':', not '/', and the current directory is denoted
314as ':', not '.'. You should be careful about specifying relative pathnames.
315While a full path always begins with a volume name, a relative pathname
316should always begin with a ':'. If specifying a volume name only, a
317trailing ':' is required.
318
319=item *
320
321C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
322contains the name of a directory, that name may or may not end with a
323':'. Likewise, C<$File::Find::name>, which contains the complete
324pathname to that directory, and C<$File::Find::fullname>, which holds
325the absolute pathname of that directory with all symbolic links resolved,
326may or may not end with a ':'.
327
328=item *
329
330The default C<untaint_pattern> (see above) on Mac OS is set to
331C<qr|^(.+)$|>. Note that the parentheses are vital.
332
333=item *
334
335The invisible system file "Icon\015" is ignored. While this file may
336appear in every directory, there are some more invisible system files
337on every volume, which are all located at the volume root level (i.e.
338"MacintoshHD:"). These system files are B<not> excluded automatically.
339Your filter may use the following code to recognize invisible files or
340directories (requires Mac::Files):
341
342 use Mac::Files;
343
344 # invisible() -- returns 1 if file/directory is invisible,
1cffc1dd 345 # 0 if it's visible or undef if an error occurred
7e47e6ff 346
347 sub invisible($) {
348 my $file = shift;
349 my ($fileCat, $fileInfo);
350 my $invisible_flag = 1 << 14;
351
352 if ( $fileCat = FSpGetCatInfo($file) ) {
353 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
354 return (($fileInfo->fdFlags & $invisible_flag) && 1);
355 }
356 }
357 return undef;
358 }
359
360Generally, invisible files are system files, unless an odd application
361decides to use invisible files for its own purposes. To distinguish
362such files from system files, you have to look at the B<type> and B<creator>
363file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
364C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
365(see MacPerl.pm for details).
366
367Files that appear on the desktop actually reside in an (hidden) directory
368named "Desktop Folder" on the particular disk volume. Note that, although
369all desktop files appear to be on the same "virtual" desktop, each disk
370volume actually maintains its own "Desktop Folder" directory.
371
372=back
373
374=back
0530a6c4 375
a85af077 376=head1 HISTORY
377
378File::Find used to produce incorrect results if called recursively.
379During the development of perl 5.8 this bug was fixed.
380The first fixed version of File::Find was 1.01.
381
f06db76b 382=cut
383
b75c8c73 384our @ISA = qw(Exporter);
385our @EXPORT = qw(find finddepth);
6280b799 386
a0d0e21e 387
81793b90 388use strict;
389my $Is_VMS;
7e47e6ff 390my $Is_MacOS;
81793b90 391
392require File::Basename;
7e47e6ff 393require File::Spec;
81793b90 394
9f826d6a 395# Should ideally be my() not our() but local() currently
396# refuses to operate on lexicals
397
398our %SLnkSeen;
399our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 400 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 401 $pre_process, $post_process, $dangling_symlinks);
81793b90 402
403sub contract_name {
404 my ($cdir,$fn) = @_;
405
7e47e6ff 406 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90 407
408 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
409
410 $fn =~ s|^\./||;
411
412 my $abs_name= $cdir . $fn;
413
414 if (substr($fn,0,3) eq '../') {
fecbda2b 415 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
81793b90 416 }
417
418 return $abs_name;
419}
420
7e47e6ff 421# return the absolute name of a directory or file
422sub contract_name_Mac {
423 my ($cdir,$fn) = @_;
424 my $abs_name;
425
426 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
427
428 my $colon_count = length ($1);
429 if ($colon_count == 1) {
430 $abs_name = $cdir . $2;
431 return $abs_name;
432 }
433 else {
434 # need to move up the tree, but
435 # only if it's not a volume name
436 for (my $i=1; $i<$colon_count; $i++) {
437 unless ($cdir =~ /^[^:]+:$/) { # volume name
438 $cdir =~ s/[^:]+:$//;
439 }
440 else {
441 return undef;
442 }
443 }
444 $abs_name = $cdir . $2;
445 return $abs_name;
446 }
447
448 }
449 else {
450
451 # $fn may be a valid path to a directory or file or (dangling)
452 # symlink, without a leading ':'
453 if ( (-e $fn) || (-l $fn) ) {
454 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
455 return $fn; # $fn is already an absolute path
456 }
457 else {
458 $abs_name = $cdir . $fn;
459 return $abs_name;
460 }
461 }
462 else { # argh!, $fn is not a valid directory/file
463 return undef;
464 }
465 }
466}
81793b90 467
468sub PathCombine($$) {
469 my ($Base,$Name) = @_;
470 my $AbsName;
471
7e47e6ff 472 if ($Is_MacOS) {
473 # $Name is the resolved symlink (always a full path on MacOS),
474 # i.e. there's no need to call contract_name_Mac()
475 $AbsName = $Name;
476
477 # (simple) check for recursion
478 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
479 return undef;
480 }
81793b90 481 }
482 else {
7e47e6ff 483 if (substr($Name,0,1) eq '/') {
484 $AbsName= $Name;
485 }
486 else {
487 $AbsName= contract_name($Base,$Name);
488 }
81793b90 489
7e47e6ff 490 # (simple) check for recursion
491 my $newlen= length($AbsName);
492 if ($newlen <= length($Base)) {
493 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
494 && $AbsName eq substr($Base,0,$newlen))
495 {
496 return undef;
497 }
81793b90 498 }
499 }
500 return $AbsName;
501}
502
503sub Follow_SymLink($) {
504 my ($AbsName) = @_;
505
506 my ($NewName,$DEV, $INO);
507 ($DEV, $INO)= lstat $AbsName;
508
509 while (-l _) {
510 if ($SLnkSeen{$DEV, $INO}++) {
511 if ($follow_skip < 2) {
512 die "$AbsName is encountered a second time";
a0d0e21e 513 }
514 else {
81793b90 515 return undef;
a0d0e21e 516 }
517 }
81793b90 518 $NewName= PathCombine($AbsName, readlink($AbsName));
519 unless(defined $NewName) {
520 if ($follow_skip < 2) {
521 die "$AbsName is a recursive symbolic link";
522 }
523 else {
524 return undef;
a0d0e21e 525 }
81793b90 526 }
527 else {
528 $AbsName= $NewName;
529 }
530 ($DEV, $INO) = lstat($AbsName);
531 return undef unless defined $DEV; # dangling symbolic link
532 }
533
cd68ec93 534 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 535 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90 536 die "$AbsName encountered a second time";
537 }
538 else {
539 return undef;
540 }
541 }
542
543 return $AbsName;
544}
545
17f410f9 546our($dir, $name, $fullname, $prune);
81793b90 547sub _find_dir_symlnk($$$);
548sub _find_dir($$$);
549
7e47e6ff 550# check whether or not a scalar variable is tainted
551# (code straight from the Camel, 3rd ed., page 561)
552sub is_tainted_pp {
553 my $arg = shift;
554 my $nada = substr($arg, 0, 0); # zero-length
555 local $@;
556 eval { eval "# $nada" };
557 return length($@) != 0;
558}
559
81793b90 560sub _find_opt {
561 my $wanted = shift;
562 die "invalid top directory" unless defined $_[0];
563
9f826d6a 564 # This function must local()ize everything because callbacks may
565 # call find() or finddepth()
566
567 local %SLnkSeen;
568 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
569 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 570 $pre_process, $post_process, $dangling_symlinks);
17ab9c14 571 local($dir, $name, $fullname, $prune, $_);
9f826d6a 572
a0c9c202 573 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
80e52b73 574 my $cwd_untainted = $cwd;
575 my $check_t_cwd = 1;
576 $wanted_callback = $wanted->{wanted};
577 $bydepth = $wanted->{bydepth};
578 $pre_process = $wanted->{preprocess};
579 $post_process = $wanted->{postprocess};
580 $no_chdir = $wanted->{no_chdir};
581 $full_check = $wanted->{follow};
582 $follow = $full_check || $wanted->{follow_fast};
583 $follow_skip = $wanted->{follow_skip};
584 $untaint = $wanted->{untaint};
585 $untaint_pat = $wanted->{untaint_pattern};
586 $untaint_skip = $wanted->{untaint_skip};
587 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 588
1cffc1dd 589 # for compatibility reasons (find.pl, find2perl)
9f826d6a 590 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 591
592 # a symbolic link to a directory doesn't increase the link count
593 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
594
e7b91b67 595 my ($abs_dir, $Is_Dir);
81793b90 596
597 Proc_Top_Item:
598 foreach my $TOP (@_) {
7e47e6ff 599 my $top_item = $TOP;
600
601 if ($Is_MacOS) {
602 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
603 $top_item = ":$top_item"
3fa6e24b 604 if ( (-d _) && ( $top_item !~ /:/ ) );
7e47e6ff 605 }
606 else {
607 $top_item =~ s|/\z|| unless $top_item eq '/';
608 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
609 }
610
611 $Is_Dir= 0;
612
613 if ($follow) {
614
615 if ($Is_MacOS) {
616 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
617
618 if ($top_item eq $File::Find::current_dir) {
619 $abs_dir = $cwd;
620 }
621 else {
622 $abs_dir = contract_name_Mac($cwd, $top_item);
623 unless (defined $abs_dir) {
cd68ec93 624 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n";
7e47e6ff 625 next Proc_Top_Item;
626 }
627 }
628
629 }
630 else {
631 if (substr($top_item,0,1) eq '/') {
632 $abs_dir = $top_item;
633 }
634 elsif ($top_item eq $File::Find::current_dir) {
635 $abs_dir = $cwd;
636 }
637 else { # care about any ../
638 $abs_dir = contract_name("$cwd/",$top_item);
639 }
640 }
641 $abs_dir= Follow_SymLink($abs_dir);
642 unless (defined $abs_dir) {
80e52b73 643 if ($dangling_symlinks) {
644 if (ref $dangling_symlinks eq 'CODE') {
645 $dangling_symlinks->($top_item, $cwd);
646 } else {
cd68ec93 647 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73 648 }
649 }
81793b90 650 next Proc_Top_Item;
7e47e6ff 651 }
652
653 if (-d _) {
81793b90 654 _find_dir_symlnk($wanted, $abs_dir, $top_item);
655 $Is_Dir= 1;
7e47e6ff 656 }
657 }
81793b90 658 else { # no follow
7e47e6ff 659 $topdir = $top_item;
660 unless (defined $topnlink) {
cd68ec93 661 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff 662 next Proc_Top_Item;
663 }
664 if (-d _) {
c7b9dd21 665 $top_item =~ s/\.dir\z// if $Is_VMS;
e7b91b67 666 _find_dir($wanted, $top_item, $topnlink);
81793b90 667 $Is_Dir= 1;
7e47e6ff 668 }
237437d0 669 else {
81793b90 670 $abs_dir= $top_item;
7e47e6ff 671 }
672 }
81793b90 673
7e47e6ff 674 unless ($Is_Dir) {
81793b90 675 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
7e47e6ff 676 if ($Is_MacOS) {
677 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
678 }
679 else {
680 ($dir,$_) = ('./', $top_item);
681 }
81793b90 682 }
683
7e47e6ff 684 $abs_dir = $dir;
685 if (( $untaint ) && (is_tainted($dir) )) {
686 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90 687 unless (defined $abs_dir) {
688 if ($untaint_skip == 0) {
7e47e6ff 689 die "directory $dir is still tainted";
81793b90 690 }
691 else {
692 next Proc_Top_Item;
693 }
694 }
7e47e6ff 695 }
81793b90 696
7e47e6ff 697 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 698 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff 699 next Proc_Top_Item;
700 }
719911cc 701
7e47e6ff 702 $name = $abs_dir . $_; # $File::Find::name
3bb6d3e5 703 $_ = $name if $no_chdir;
719911cc 704
abfdd623 705 { $wanted_callback->() }; # protect against wild "next"
81793b90 706
7e47e6ff 707 }
81793b90 708
7e47e6ff 709 unless ( $no_chdir ) {
710 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
711 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
712 unless (defined $cwd_untainted) {
713 die "insecure cwd in find(depth)";
714 }
715 $check_t_cwd = 0;
716 }
717 unless (chdir $cwd_untainted) {
718 die "Can't cd to $cwd: $!\n";
719 }
720 }
81793b90 721 }
722}
723
724# API:
725# $wanted
726# $p_dir : "parent directory"
727# $nlink : what came back from the stat
728# preconditions:
729# chdir (if not no_chdir) to dir
730
731sub _find_dir($$$) {
732 my ($wanted, $p_dir, $nlink) = @_;
733 my ($CdLvl,$Level) = (0,0);
734 my @Stack;
735 my @filenames;
736 my ($subcount,$sub_nlink);
737 my $SE= [];
738 my $dir_name= $p_dir;
7e47e6ff 739 my $dir_pref;
39e79f6b 740 my $dir_rel = $File::Find::current_dir;
7e47e6ff 741 my $tainted = 0;
5fa2bf2b 742 my $no_nlink;
7e47e6ff 743
744 if ($Is_MacOS) {
745 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
7e47e6ff 746 }
747 else {
748 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 749 }
81793b90 750
751 local ($dir, $name, $prune, *DIR);
7e47e6ff 752
753 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 754 my $udir = $p_dir;
7e47e6ff 755 if (( $untaint ) && (is_tainted($p_dir) )) {
756 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90 757 unless (defined $udir) {
758 if ($untaint_skip == 0) {
759 die "directory $p_dir is still tainted";
760 }
761 else {
762 return;
763 }
237437d0 764 }
a0d0e21e 765 }
81793b90 766 unless (chdir $udir) {
cd68ec93 767 warnings::warnif "Can't cd to $udir: $!\n";
81793b90 768 return;
769 }
770 }
7e47e6ff 771
772 # push the starting directory
57e73c4b 773 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90 774
7e47e6ff 775 if ($Is_MacOS) {
776 $p_dir = $dir_pref; # ensure trailing ':'
777 }
778
81793b90 779 while (defined $SE) {
780 unless ($bydepth) {
7e47e6ff 781 $dir= $p_dir; # $File::Find::dir
782 $name= $dir_name; # $File::Find::name
783 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 784 # prune may happen here
7e47e6ff 785 $prune= 0;
abfdd623 786 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 787 next if $prune;
81793b90 788 }
7e47e6ff 789
81793b90 790 # change to that directory
7e47e6ff 791 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 792 my $udir= $dir_rel;
7e47e6ff 793 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
794 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90 795 unless (defined $udir) {
796 if ($untaint_skip == 0) {
7e47e6ff 797 if ($Is_MacOS) {
798 die "directory ($p_dir) $dir_rel is still tainted";
799 }
800 else {
801 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
802 }
803 } else { # $untaint_skip == 1
804 next;
81793b90 805 }
806 }
807 }
808 unless (chdir $udir) {
7e47e6ff 809 if ($Is_MacOS) {
cd68ec93 810 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n";
7e47e6ff 811 }
812 else {
cd68ec93 813 warnings::warnif "Can't cd to (" .
814 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
7e47e6ff 815 }
81793b90 816 next;
817 }
818 $CdLvl++;
819 }
820
7e47e6ff 821 if ($Is_MacOS) {
822 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
823 }
824
825 $dir= $dir_name; # $File::Find::dir
81793b90 826
827 # Get the list of files in the current directory.
7e47e6ff 828 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 829 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90 830 next;
831 }
832 @filenames = readdir DIR;
833 closedir(DIR);
abfdd623 834 @filenames = $pre_process->(@filenames) if $pre_process;
719c805e 835 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 836
5fa2bf2b 837 # default: use whatever was specifid
838 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
839 $no_nlink = $avoid_nlink;
840 # if dir has wrong nlink count, force switch to slower stat method
841 $no_nlink = 1 if ($nlink < 2);
842
843 if ($nlink == 2 && !$no_nlink) {
81793b90 844 # This dir has no subdirectories.
845 for my $FN (@filenames) {
7e47e6ff 846 next if $FN =~ $File::Find::skip_pattern;
81793b90 847
7e47e6ff 848 $name = $dir_pref . $FN; # $File::Find::name
849 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 850 { $wanted_callback->() }; # protect against wild "next"
81793b90 851 }
852
853 }
854 else {
855 # This dir has subdirectories.
856 $subcount = $nlink - 2;
857
858 for my $FN (@filenames) {
7e47e6ff 859 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 860 if ($subcount > 0 || $no_nlink) {
81793b90 861 # Seen all the subdirs?
862 # check for directoriness.
863 # stat is faster for a file in the current directory
07867069 864 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 865
866 if (-d _) {
867 --$subcount;
c7b9dd21 868 $FN =~ s/\.dir\z// if $Is_VMS;
81793b90 869 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
870 }
871 else {
7e47e6ff 872 $name = $dir_pref . $FN; # $File::Find::name
873 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 874 { $wanted_callback->() }; # protect against wild "next"
81793b90 875 }
876 }
07867069 877 else {
7e47e6ff 878 $name = $dir_pref . $FN; # $File::Find::name
879 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 880 { $wanted_callback->() }; # protect against wild "next"
81793b90 881 }
882 }
883 }
17b275ff 884 }
885 continue {
57e73c4b 886 while ( defined ($SE = pop @Stack) ) {
81793b90 887 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
888 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 889 my $tmp;
890 if ($Is_MacOS) {
891 $tmp = (':' x ($CdLvl-$Level)) . ':';
892 }
893 else {
894 $tmp = join('/',('..') x ($CdLvl-$Level));
895 }
896 die "Can't cd to $dir_name" . $tmp
897 unless chdir ($tmp);
81793b90 898 $CdLvl = $Level;
899 }
7e47e6ff 900
901 if ($Is_MacOS) {
902 # $pdir always has a trailing ':', except for the starting dir,
903 # where $dir_rel eq ':'
904 $dir_name = "$p_dir$dir_rel";
905 $dir_pref = "$dir_name:";
906 }
907 else {
908 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
909 $dir_pref = "$dir_name/";
910 }
911
719c805e 912 if ( $nlink == -2 ) {
7e47e6ff 913 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 914 $_ = $File::Find::current_dir;
abfdd623 915 $post_process->(); # End-of-directory processing
7e47e6ff 916 }
917 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
918 $name = $dir_name;
919 if ($Is_MacOS) {
920 if ($dir_rel eq ':') { # must be the top dir, where we started
921 $name =~ s|:$||; # $File::Find::name
922 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
923 }
924 $dir = $p_dir; # $File::Find::dir
925 $_ = ($no_chdir ? $name : $dir_rel); # $_
926 }
927 else {
928 if ( substr($name,-2) eq '/.' ) {
5cf0a2f2 929 substr($name, length($name) == 2 ? -1 : -2) = '';
7e47e6ff 930 }
931 $dir = $p_dir;
932 $_ = ($no_chdir ? $dir_name : $dir_rel );
933 if ( substr($_,-2) eq '/.' ) {
5cf0a2f2 934 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 935 }
936 }
abfdd623 937 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 938 }
939 else {
940 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
941 last;
942 }
81793b90 943 }
a0d0e21e 944 }
945}
946
81793b90 947
948# API:
949# $wanted
950# $dir_loc : absolute location of a dir
951# $p_dir : "parent directory"
952# preconditions:
953# chdir (if not no_chdir) to dir
954
955sub _find_dir_symlnk($$$) {
7e47e6ff 956 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90 957 my @Stack;
958 my @filenames;
959 my $new_loc;
7e47e6ff 960 my $updir_loc = $dir_loc; # untainted parent directory
81793b90 961 my $SE = [];
962 my $dir_name = $p_dir;
7e47e6ff 963 my $dir_pref;
964 my $loc_pref;
39e79f6b 965 my $dir_rel = $File::Find::current_dir;
7e47e6ff 966 my $byd_flag; # flag for pending stack entry if $bydepth
967 my $tainted = 0;
968 my $ok = 1;
969
970 if ($Is_MacOS) {
971 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
972 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
7e47e6ff 973 } else {
974 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
975 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
7e47e6ff 976 }
81793b90 977
978 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff 979
980 unless ($no_chdir) {
981 # untaint the topdir
982 if (( $untaint ) && (is_tainted($dir_loc) )) {
983 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
984 # once untainted, $updir_loc is pushed on the stack (as parent directory);
985 # hence, we don't need to untaint the parent directory every time we chdir
986 # to it later
987 unless (defined $updir_loc) {
81793b90 988 if ($untaint_skip == 0) {
989 die "directory $dir_loc is still tainted";
990 }
991 else {
992 return;
993 }
994 }
995 }
7e47e6ff 996 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
997 unless ($ok) {
cd68ec93 998 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 999 return;
1000 }
1001 }
1002
7e47e6ff 1003 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
1004
1005 if ($Is_MacOS) {
1006 $p_dir = $dir_pref; # ensure trailing ':'
1007 }
57e73c4b 1008
81793b90 1009 while (defined $SE) {
1010
1011 unless ($bydepth) {
7e47e6ff 1012 # change (back) to parent directory (always untainted)
704ea872 1013 unless ($no_chdir) {
7e47e6ff 1014 unless (chdir $updir_loc) {
cd68ec93 1015 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872 1016 next;
1017 }
1018 }
7e47e6ff 1019 $dir= $p_dir; # $File::Find::dir
1020 $name= $dir_name; # $File::Find::name
1021 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
1022 $fullname= $dir_loc; # $File::Find::fullname
81793b90 1023 # prune may happen here
7e47e6ff 1024 $prune= 0;
704ea872 1025 lstat($_); # make sure file tests with '_' work
abfdd623 1026 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 1027 next if $prune;
81793b90 1028 }
1029
1030 # change to that directory
7e47e6ff 1031 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1032 $updir_loc = $dir_loc;
1033 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
1034 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
1035 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
1036 unless (defined $updir_loc) {
81793b90 1037 if ($untaint_skip == 0) {
1038 die "directory $dir_loc is still tainted";
a0d0e21e 1039 }
237437d0 1040 else {
81793b90 1041 next;
237437d0 1042 }
a0d0e21e 1043 }
1044 }
7e47e6ff 1045 unless (chdir $updir_loc) {
cd68ec93 1046 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 1047 next;
1048 }
1049 }
1050
7e47e6ff 1051 if ($Is_MacOS) {
1052 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
1053 }
1054
1055 $dir = $dir_name; # $File::Find::dir
81793b90 1056
1057 # Get the list of files in the current directory.
7e47e6ff 1058 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 1059 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90 1060 next;
1061 }
1062 @filenames = readdir DIR;
1063 closedir(DIR);
1064
1065 for my $FN (@filenames) {
7e47e6ff 1066 next if $FN =~ $File::Find::skip_pattern;
81793b90 1067
1068 # follow symbolic links / do an lstat
07867069 1069 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 1070
1071 # ignore if invalid symlink
1072 next unless defined $new_loc;
7e47e6ff 1073
81793b90 1074 if (-d _) {
7e47e6ff 1075 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90 1076 }
1077 else {
7e47e6ff 1078 $fullname = $new_loc; # $File::Find::fullname
1079 $name = $dir_pref . $FN; # $File::Find::name
1080 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 1081 { $wanted_callback->() }; # protect against wild "next"
81793b90 1082 }
1083 }
1084
81793b90 1085 }
1086 continue {
57e73c4b 1087 while (defined($SE = pop @Stack)) {
7e47e6ff 1088 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
1089 if ($Is_MacOS) {
1090 # $p_dir always has a trailing ':', except for the starting dir,
1091 # where $dir_rel eq ':'
1092 $dir_name = "$p_dir$dir_rel";
1093 $dir_pref = "$dir_name:";
1094 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
1095 }
1096 else {
1097 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1098 $dir_pref = "$dir_name/";
1099 $loc_pref = "$dir_loc/";
1100 }
1101 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1102 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1103 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1104 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff 1105 next;
1106 }
1107 }
1108 $fullname = $dir_loc; # $File::Find::fullname
1109 $name = $dir_name; # $File::Find::name
1110 if ($Is_MacOS) {
1111 if ($dir_rel eq ':') { # must be the top dir, where we started
1112 $name =~ s|:$||; # $File::Find::name
1113 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
1114 }
1115 $dir = $p_dir; # $File::Find::dir
1116 $_ = ($no_chdir ? $name : $dir_rel); # $_
1117 }
1118 else {
1119 if ( substr($name,-2) eq '/.' ) {
f801979b 1120 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
7e47e6ff 1121 }
1122 $dir = $p_dir; # $File::Find::dir
1123 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1124 if ( substr($_,-2) eq '/.' ) {
f801979b 1125 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 1126 }
1127 }
1128
1129 lstat($_); # make sure file tests with '_' work
abfdd623 1130 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 1131 }
1132 else {
1133 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1134 last;
1135 }
a0d0e21e 1136 }
1137 }
1138}
1139
81793b90 1140
20408e3c 1141sub wrap_wanted {
81793b90 1142 my $wanted = shift;
1143 if ( ref($wanted) eq 'HASH' ) {
1144 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1145 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1146 }
1147 if ( $wanted->{untaint} ) {
7e47e6ff 1148 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90 1149 unless defined $wanted->{untaint_pattern};
1150 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1151 }
1152 return $wanted;
1153 }
1154 else {
1155 return { wanted => $wanted };
1156 }
a0d0e21e 1157}
1158
20408e3c 1159sub find {
81793b90 1160 my $wanted = shift;
1161 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e 1162}
1163
55d729e4 1164sub finddepth {
81793b90 1165 my $wanted = wrap_wanted(shift);
1166 $wanted->{bydepth} = 1;
1167 _find_opt($wanted, @_);
20408e3c 1168}
6280b799 1169
7e47e6ff 1170# default
1171$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1172$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1173
6280b799 1174# These are hard-coded for now, but may move to hint files.
10eba763 1175if ($^O eq 'VMS') {
81793b90 1176 $Is_VMS = 1;
7e47e6ff 1177 $File::Find::dont_use_nlink = 1;
1178}
1179elsif ($^O eq 'MacOS') {
1180 $Is_MacOS = 1;
1181 $File::Find::dont_use_nlink = 1;
1182 $File::Find::skip_pattern = qr/^Icon\015\z/;
1183 $File::Find::untaint_pattern = qr|^(.+)$|;
748a9306 1184}
1185
7e47e6ff 1186# this _should_ work properly on all platforms
1187# where File::Find can be expected to work
1188$File::Find::current_dir = File::Spec->curdir || '.';
1189
81793b90 1190$File::Find::dont_use_nlink = 1
497711e7 1191 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
1119cb72 1192 $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'qnx' ||
1193 $^O eq 'nto';
6280b799 1194
20408e3c 1195# Set dont_use_nlink in your hint file if your system's stat doesn't
1196# report the number of links in a directory as an indication
1197# of the number of files.
1198# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 1199unless ($File::Find::dont_use_nlink) {
1200 require Config;
1201 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 1202}
1203
7e47e6ff 1204# We need a function that checks if a scalar is tainted. Either use the
1205# Scalar::Util module's tainted() function or our (slower) pure Perl
1206# fallback is_tainted_pp()
1207{
1208 local $@;
1209 eval { require Scalar::Util };
1210 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1211}
1212
a0d0e21e 12131;