doio fatal on cygwin for 13651
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
6our $VERSION = '1.04';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
f06db76b 10=head1 NAME
11
12find - traverse a file tree
13
14finddepth - traverse a directory structure depth-first
15
16=head1 SYNOPSIS
17
18 use File::Find;
81793b90 19 find(\&wanted, '/foo', '/bar');
f06db76b 20 sub wanted { ... }
237437d0 21
f06db76b 22 use File::Find;
81793b90 23 finddepth(\&wanted, '/foo', '/bar');
f06db76b 24 sub wanted { ... }
3cb6de81 25
81793b90 26 use File::Find;
27 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 28
29=head1 DESCRIPTION
30
20408e3c 31The first argument to find() is either a hash reference describing the
81793b90 32operations to be performed for each file, or a code reference.
20408e3c 33
81793b90 34Here are the possible keys for the hash:
35
36=over 3
37
38=item C<wanted>
39
40The value should be a code reference. This code reference is called
41I<the wanted() function> below.
42
43=item C<bydepth>
44
45Reports the name of a directory only AFTER all its entries
46have been reported. Entry point finddepth() is a shortcut for
47specifying C<{ bydepth => 1 }> in the first argument of find().
48
719c805e 49=item C<preprocess>
50
7e47e6ff 51The value should be a code reference. This code reference is used to
52preprocess the current directory. The name of currently processed
53directory is in $File::Find::dir. Your preprocessing function is
54called after readdir() but before the loop that calls the wanted()
55function. It is called with a list of strings (actually file/directory
56names) and is expected to return a list of strings. The code can be
57used to sort the file/directory names alphabetically, numerically,
58or to filter out directory entries based on their name alone. When
59I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e 60
61=item C<postprocess>
62
7e47e6ff 63The value should be a code reference. It is invoked just before leaving
64the currently processed directory. It is called in void context with no
65arguments. The name of the current directory is in $File::Find::dir. This
66hook is handy for summarizing a directory, such as calculating its disk
3fa6e24b 67usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 68no-op.
719c805e 69
81793b90 70=item C<follow>
71
72Causes symbolic links to be followed. Since directory trees with symbolic
73links (followed) may contain files more than once and may even have
74cycles, a hash has to be built up with an entry for each file.
75This might be expensive both in space and time for a large
76directory tree. See I<follow_fast> and I<follow_skip> below.
77If either I<follow> or I<follow_fast> is in effect:
78
79=over 6
80
a45bd81d 81=item *
81793b90 82
f10e1564 83It is guaranteed that an I<lstat> has been called before the user's
81793b90 84I<wanted()> function is called. This enables fast file checks involving S< _>.
85
a45bd81d 86=item *
81793b90 87
88There is a variable C<$File::Find::fullname> which holds the absolute
89pathname of the file with all symbolic links resolved
90
91=back
92
93=item C<follow_fast>
94
f10e1564 95This is similar to I<follow> except that it may report some files more
96than once. It does detect cycles, however. Since only symbolic links
97have to be hashed, this is much cheaper both in space and time. If
98processing a file more than once (by the user's I<wanted()> function)
81793b90 99is worse than just taking time, the option I<follow> should be used.
100
101=item C<follow_skip>
102
103C<follow_skip==1>, which is the default, causes all files which are
104neither directories nor symbolic links to be ignored if they are about
105to be processed a second time. If a directory or a symbolic link
106are about to be processed a second time, File::Find dies.
107C<follow_skip==0> causes File::Find to die if any file is about to be
108processed a second time.
109C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 110directories but to proceed normally otherwise.
20408e3c 111
80e52b73 112=item C<dangling_symlinks>
113
114If true and a code reference, will be called with the symbolic link
115name and the directory it lives in as arguments. Otherwise, if true
116and warnings are on, warning "symbolic_link_name is a dangling
117symbolic link\n" will be issued. If false, the dangling symbolic link
118will be silently ignored.
f06db76b 119
81793b90 120=item C<no_chdir>
121
122Does not C<chdir()> to each directory as it recurses. The wanted()
123function will need to be aware of this, of course. In this case,
124C<$_> will be the same as C<$File::Find::name>.
125
126=item C<untaint>
127
128If find is used in taint-mode (-T command line switch or if EUID != UID
129or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 130before they can be chdir'ed to. Therefore they are checked against a regular
131expression I<untaint_pattern>. Note that all names passed to the user's
132I<wanted()> function are still tainted. If this option is used while
133not in taint-mode, C<untaint> is a no-op.
81793b90 134
135=item C<untaint_pattern>
136
137See above. This should be set using the C<qr> quoting operator.
138The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 139Note that the parentheses are vital.
81793b90 140
141=item C<untaint_skip>
142
7e47e6ff 143If set, a directory which fails the I<untaint_pattern> is skipped,
144including all its sub-directories. The default is to 'die' in such a case.
81793b90 145
146=back
147
148The wanted() function does whatever verifications you want.
149C<$File::Find::dir> contains the current directory name, and C<$_> the
150current filename within that directory. C<$File::Find::name> contains
f10e1564 151the complete pathname to the file. You are chdir()'d to
152C<$File::Find::dir> when the function is called, unless C<no_chdir>
7e47e6ff 153was specified. When C<follow> or C<follow_fast> are in effect, there is
f10e1564 154also a C<$File::Find::fullname>. The function may set
155C<$File::Find::prune> to prune the tree unless C<bydepth> was
156specified. Unless C<follow> or C<follow_fast> is specified, for
157compatibility reasons (find.pl, find2perl) there are in addition the
158following globals available: C<$File::Find::topdir>,
159C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 160C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 161
20408e3c 162This library is useful for the C<find2perl> tool, which when fed,
f06db76b 163
164 find2perl / -name .nfs\* -mtime +7 \
81793b90 165 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 166
167produces something like:
168
169 sub wanted {
c7b9dd21 170 /^\.nfs.*\z/s &&
81793b90 171 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 172 int(-M _) > 7 &&
173 unlink($_)
174 ||
81793b90 175 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 176 $dev < 0 &&
6280b799 177 ($File::Find::prune = 1);
f06db76b 178 }
179
43dece2a 180Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
181filehandle that caches the information from the preceding
182stat(), lstat(), or filetest.
183
1cffc1dd 184Here's another interesting wanted function. It will find all symbolic
185links that don't resolve:
f06db76b 186
187 sub wanted {
81793b90 188 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 189 }
f06db76b 190
81793b90 191See also the script C<pfind> on CPAN for a nice application of this
192module.
193
cd68ec93 194=head1 WARNINGS
195
196If you run your program with the C<-w> switch, or if you use the
197C<warnings> pragma, File::Find will report warnings for several weird
198situations. You can disable these warnings by putting the statement
199
200 no warnings 'File::Find';
201
202in the appropriate scope. See L<perllexwarn> for more info about lexical
203warnings.
204
81793b90 205=head1 CAVEAT
206
5fa2bf2b 207=over 2
208
209=item $dont_use_nlink
210
211You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067 212force File::Find to always stat directories. This was used for file systems
213that do not have an C<nlink> count matching the number of sub-directories.
214Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
215system) and a couple of others.
5fa2bf2b 216
6cf3b067 217You shouldn't need to set this variable, since File::Find should now detect
218such file systems on-the-fly and switch itself to using stat. This works even
219for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 220
6cf3b067 221If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b 222
223=item symlinks
224
f10e1564 225Be aware that the option to follow symbolic links can be dangerous.
81793b90 226Depending on the structure of the directory tree (including symbolic
227links to directories) you might traverse a given (physical) directory
228more than once (only if C<follow_fast> is in effect).
229Furthermore, deleting or changing files in a symbolically linked directory
230might cause very unpleasant surprises, since you delete or change files
231in an unknown directory.
0530a6c4 232
5fa2bf2b 233=back
234
7e47e6ff 235=head1 NOTES
236
237=over 4
238
239=item *
240
241Mac OS (Classic) users should note a few differences:
242
243=over 4
244
245=item *
246
247The path separator is ':', not '/', and the current directory is denoted
248as ':', not '.'. You should be careful about specifying relative pathnames.
249While a full path always begins with a volume name, a relative pathname
250should always begin with a ':'. If specifying a volume name only, a
251trailing ':' is required.
252
253=item *
254
255C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
256contains the name of a directory, that name may or may not end with a
257':'. Likewise, C<$File::Find::name>, which contains the complete
258pathname to that directory, and C<$File::Find::fullname>, which holds
259the absolute pathname of that directory with all symbolic links resolved,
260may or may not end with a ':'.
261
262=item *
263
264The default C<untaint_pattern> (see above) on Mac OS is set to
265C<qr|^(.+)$|>. Note that the parentheses are vital.
266
267=item *
268
269The invisible system file "Icon\015" is ignored. While this file may
270appear in every directory, there are some more invisible system files
271on every volume, which are all located at the volume root level (i.e.
272"MacintoshHD:"). These system files are B<not> excluded automatically.
273Your filter may use the following code to recognize invisible files or
274directories (requires Mac::Files):
275
276 use Mac::Files;
277
278 # invisible() -- returns 1 if file/directory is invisible,
1cffc1dd 279 # 0 if it's visible or undef if an error occurred
7e47e6ff 280
281 sub invisible($) {
282 my $file = shift;
283 my ($fileCat, $fileInfo);
284 my $invisible_flag = 1 << 14;
285
286 if ( $fileCat = FSpGetCatInfo($file) ) {
287 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
288 return (($fileInfo->fdFlags & $invisible_flag) && 1);
289 }
290 }
291 return undef;
292 }
293
294Generally, invisible files are system files, unless an odd application
295decides to use invisible files for its own purposes. To distinguish
296such files from system files, you have to look at the B<type> and B<creator>
297file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
298C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
299(see MacPerl.pm for details).
300
301Files that appear on the desktop actually reside in an (hidden) directory
302named "Desktop Folder" on the particular disk volume. Note that, although
303all desktop files appear to be on the same "virtual" desktop, each disk
304volume actually maintains its own "Desktop Folder" directory.
305
306=back
307
308=back
0530a6c4 309
a85af077 310=head1 HISTORY
311
312File::Find used to produce incorrect results if called recursively.
313During the development of perl 5.8 this bug was fixed.
314The first fixed version of File::Find was 1.01.
315
f06db76b 316=cut
317
b75c8c73 318our @ISA = qw(Exporter);
319our @EXPORT = qw(find finddepth);
6280b799 320
a0d0e21e 321
81793b90 322use strict;
323my $Is_VMS;
7e47e6ff 324my $Is_MacOS;
81793b90 325
326require File::Basename;
7e47e6ff 327require File::Spec;
81793b90 328
9f826d6a 329# Should ideally be my() not our() but local() currently
330# refuses to operate on lexicals
331
332our %SLnkSeen;
333our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 334 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 335 $pre_process, $post_process, $dangling_symlinks);
81793b90 336
337sub contract_name {
338 my ($cdir,$fn) = @_;
339
7e47e6ff 340 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90 341
342 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
343
344 $fn =~ s|^\./||;
345
346 my $abs_name= $cdir . $fn;
347
348 if (substr($fn,0,3) eq '../') {
fecbda2b 349 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
81793b90 350 }
351
352 return $abs_name;
353}
354
7e47e6ff 355# return the absolute name of a directory or file
356sub contract_name_Mac {
357 my ($cdir,$fn) = @_;
358 my $abs_name;
359
360 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
361
362 my $colon_count = length ($1);
363 if ($colon_count == 1) {
364 $abs_name = $cdir . $2;
365 return $abs_name;
366 }
367 else {
368 # need to move up the tree, but
369 # only if it's not a volume name
370 for (my $i=1; $i<$colon_count; $i++) {
371 unless ($cdir =~ /^[^:]+:$/) { # volume name
372 $cdir =~ s/[^:]+:$//;
373 }
374 else {
375 return undef;
376 }
377 }
378 $abs_name = $cdir . $2;
379 return $abs_name;
380 }
381
382 }
383 else {
384
385 # $fn may be a valid path to a directory or file or (dangling)
386 # symlink, without a leading ':'
387 if ( (-e $fn) || (-l $fn) ) {
388 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
389 return $fn; # $fn is already an absolute path
390 }
391 else {
392 $abs_name = $cdir . $fn;
393 return $abs_name;
394 }
395 }
396 else { # argh!, $fn is not a valid directory/file
397 return undef;
398 }
399 }
400}
81793b90 401
402sub PathCombine($$) {
403 my ($Base,$Name) = @_;
404 my $AbsName;
405
7e47e6ff 406 if ($Is_MacOS) {
407 # $Name is the resolved symlink (always a full path on MacOS),
408 # i.e. there's no need to call contract_name_Mac()
409 $AbsName = $Name;
410
411 # (simple) check for recursion
412 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
413 return undef;
414 }
81793b90 415 }
416 else {
7e47e6ff 417 if (substr($Name,0,1) eq '/') {
418 $AbsName= $Name;
419 }
420 else {
421 $AbsName= contract_name($Base,$Name);
422 }
81793b90 423
7e47e6ff 424 # (simple) check for recursion
425 my $newlen= length($AbsName);
426 if ($newlen <= length($Base)) {
427 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
428 && $AbsName eq substr($Base,0,$newlen))
429 {
430 return undef;
431 }
81793b90 432 }
433 }
434 return $AbsName;
435}
436
437sub Follow_SymLink($) {
438 my ($AbsName) = @_;
439
440 my ($NewName,$DEV, $INO);
441 ($DEV, $INO)= lstat $AbsName;
442
443 while (-l _) {
444 if ($SLnkSeen{$DEV, $INO}++) {
445 if ($follow_skip < 2) {
446 die "$AbsName is encountered a second time";
a0d0e21e 447 }
448 else {
81793b90 449 return undef;
a0d0e21e 450 }
451 }
81793b90 452 $NewName= PathCombine($AbsName, readlink($AbsName));
453 unless(defined $NewName) {
454 if ($follow_skip < 2) {
455 die "$AbsName is a recursive symbolic link";
456 }
457 else {
458 return undef;
a0d0e21e 459 }
81793b90 460 }
461 else {
462 $AbsName= $NewName;
463 }
464 ($DEV, $INO) = lstat($AbsName);
465 return undef unless defined $DEV; # dangling symbolic link
466 }
467
cd68ec93 468 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 469 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90 470 die "$AbsName encountered a second time";
471 }
472 else {
473 return undef;
474 }
475 }
476
477 return $AbsName;
478}
479
17f410f9 480our($dir, $name, $fullname, $prune);
81793b90 481sub _find_dir_symlnk($$$);
482sub _find_dir($$$);
483
7e47e6ff 484# check whether or not a scalar variable is tainted
485# (code straight from the Camel, 3rd ed., page 561)
486sub is_tainted_pp {
487 my $arg = shift;
488 my $nada = substr($arg, 0, 0); # zero-length
489 local $@;
490 eval { eval "# $nada" };
491 return length($@) != 0;
492}
493
81793b90 494sub _find_opt {
495 my $wanted = shift;
496 die "invalid top directory" unless defined $_[0];
497
9f826d6a 498 # This function must local()ize everything because callbacks may
499 # call find() or finddepth()
500
501 local %SLnkSeen;
502 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
503 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 504 $pre_process, $post_process, $dangling_symlinks);
9f826d6a 505 local($dir, $name, $fullname, $prune);
506
a0c9c202 507 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
80e52b73 508 my $cwd_untainted = $cwd;
509 my $check_t_cwd = 1;
510 $wanted_callback = $wanted->{wanted};
511 $bydepth = $wanted->{bydepth};
512 $pre_process = $wanted->{preprocess};
513 $post_process = $wanted->{postprocess};
514 $no_chdir = $wanted->{no_chdir};
515 $full_check = $wanted->{follow};
516 $follow = $full_check || $wanted->{follow_fast};
517 $follow_skip = $wanted->{follow_skip};
518 $untaint = $wanted->{untaint};
519 $untaint_pat = $wanted->{untaint_pattern};
520 $untaint_skip = $wanted->{untaint_skip};
521 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 522
1cffc1dd 523 # for compatibility reasons (find.pl, find2perl)
9f826d6a 524 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 525
526 # a symbolic link to a directory doesn't increase the link count
527 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
528
e7b91b67 529 my ($abs_dir, $Is_Dir);
81793b90 530
531 Proc_Top_Item:
532 foreach my $TOP (@_) {
7e47e6ff 533 my $top_item = $TOP;
534
535 if ($Is_MacOS) {
536 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
537 $top_item = ":$top_item"
3fa6e24b 538 if ( (-d _) && ( $top_item !~ /:/ ) );
7e47e6ff 539 }
540 else {
541 $top_item =~ s|/\z|| unless $top_item eq '/';
542 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
543 }
544
545 $Is_Dir= 0;
546
547 if ($follow) {
548
549 if ($Is_MacOS) {
550 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
551
552 if ($top_item eq $File::Find::current_dir) {
00f4e5f0 553 # avoid empty name after return to '/'
554 $name = '/' unless length( $name );
7e47e6ff 555 $abs_dir = $cwd;
556 }
557 else {
558 $abs_dir = contract_name_Mac($cwd, $top_item);
559 unless (defined $abs_dir) {
cd68ec93 560 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n";
7e47e6ff 561 next Proc_Top_Item;
562 }
563 }
564
565 }
566 else {
567 if (substr($top_item,0,1) eq '/') {
568 $abs_dir = $top_item;
569 }
570 elsif ($top_item eq $File::Find::current_dir) {
571 $abs_dir = $cwd;
572 }
573 else { # care about any ../
574 $abs_dir = contract_name("$cwd/",$top_item);
575 }
576 }
577 $abs_dir= Follow_SymLink($abs_dir);
578 unless (defined $abs_dir) {
80e52b73 579 if ($dangling_symlinks) {
580 if (ref $dangling_symlinks eq 'CODE') {
581 $dangling_symlinks->($top_item, $cwd);
582 } else {
cd68ec93 583 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73 584 }
585 }
81793b90 586 next Proc_Top_Item;
7e47e6ff 587 }
588
589 if (-d _) {
81793b90 590 _find_dir_symlnk($wanted, $abs_dir, $top_item);
591 $Is_Dir= 1;
7e47e6ff 592 }
593 }
81793b90 594 else { # no follow
7e47e6ff 595 $topdir = $top_item;
596 unless (defined $topnlink) {
cd68ec93 597 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff 598 next Proc_Top_Item;
599 }
600 if (-d _) {
c7b9dd21 601 $top_item =~ s/\.dir\z// if $Is_VMS;
e7b91b67 602 _find_dir($wanted, $top_item, $topnlink);
81793b90 603 $Is_Dir= 1;
7e47e6ff 604 }
237437d0 605 else {
81793b90 606 $abs_dir= $top_item;
7e47e6ff 607 }
608 }
81793b90 609
7e47e6ff 610 unless ($Is_Dir) {
81793b90 611 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
7e47e6ff 612 if ($Is_MacOS) {
613 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
614 }
615 else {
616 ($dir,$_) = ('./', $top_item);
617 }
81793b90 618 }
619
7e47e6ff 620 $abs_dir = $dir;
621 if (( $untaint ) && (is_tainted($dir) )) {
622 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90 623 unless (defined $abs_dir) {
624 if ($untaint_skip == 0) {
7e47e6ff 625 die "directory $dir is still tainted";
81793b90 626 }
627 else {
628 next Proc_Top_Item;
629 }
630 }
7e47e6ff 631 }
81793b90 632
7e47e6ff 633 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 634 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff 635 next Proc_Top_Item;
636 }
719911cc 637
7e47e6ff 638 $name = $abs_dir . $_; # $File::Find::name
719911cc 639
7e47e6ff 640 { &$wanted_callback }; # protect against wild "next"
81793b90 641
7e47e6ff 642 }
81793b90 643
7e47e6ff 644 unless ( $no_chdir ) {
645 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
646 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
647 unless (defined $cwd_untainted) {
648 die "insecure cwd in find(depth)";
649 }
650 $check_t_cwd = 0;
651 }
652 unless (chdir $cwd_untainted) {
653 die "Can't cd to $cwd: $!\n";
654 }
655 }
81793b90 656 }
657}
658
659# API:
660# $wanted
661# $p_dir : "parent directory"
662# $nlink : what came back from the stat
663# preconditions:
664# chdir (if not no_chdir) to dir
665
666sub _find_dir($$$) {
667 my ($wanted, $p_dir, $nlink) = @_;
668 my ($CdLvl,$Level) = (0,0);
669 my @Stack;
670 my @filenames;
671 my ($subcount,$sub_nlink);
672 my $SE= [];
673 my $dir_name= $p_dir;
7e47e6ff 674 my $dir_pref;
39e79f6b 675 my $dir_rel = $File::Find::current_dir;
7e47e6ff 676 my $tainted = 0;
5fa2bf2b 677 my $no_nlink;
7e47e6ff 678
679 if ($Is_MacOS) {
680 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
7e47e6ff 681 }
682 else {
683 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 684 }
81793b90 685
686 local ($dir, $name, $prune, *DIR);
7e47e6ff 687
688 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 689 my $udir = $p_dir;
7e47e6ff 690 if (( $untaint ) && (is_tainted($p_dir) )) {
691 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90 692 unless (defined $udir) {
693 if ($untaint_skip == 0) {
694 die "directory $p_dir is still tainted";
695 }
696 else {
697 return;
698 }
237437d0 699 }
a0d0e21e 700 }
81793b90 701 unless (chdir $udir) {
cd68ec93 702 warnings::warnif "Can't cd to $udir: $!\n";
81793b90 703 return;
704 }
705 }
7e47e6ff 706
707 # push the starting directory
57e73c4b 708 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90 709
7e47e6ff 710 if ($Is_MacOS) {
711 $p_dir = $dir_pref; # ensure trailing ':'
712 }
713
81793b90 714 while (defined $SE) {
715 unless ($bydepth) {
7e47e6ff 716 $dir= $p_dir; # $File::Find::dir
717 $name= $dir_name; # $File::Find::name
718 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 719 # prune may happen here
7e47e6ff 720 $prune= 0;
9111eb4b 721 # guarantee lstat for directory
722 lstat( $dir_name );
7e47e6ff 723 { &$wanted_callback }; # protect against wild "next"
724 next if $prune;
81793b90 725 }
7e47e6ff 726
81793b90 727 # change to that directory
7e47e6ff 728 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 729 my $udir= $dir_rel;
7e47e6ff 730 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
731 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90 732 unless (defined $udir) {
733 if ($untaint_skip == 0) {
7e47e6ff 734 if ($Is_MacOS) {
735 die "directory ($p_dir) $dir_rel is still tainted";
736 }
737 else {
738 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
739 }
740 } else { # $untaint_skip == 1
741 next;
81793b90 742 }
743 }
744 }
745 unless (chdir $udir) {
7e47e6ff 746 if ($Is_MacOS) {
cd68ec93 747 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n";
7e47e6ff 748 }
749 else {
cd68ec93 750 warnings::warnif "Can't cd to (" .
751 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
7e47e6ff 752 }
81793b90 753 next;
754 }
755 $CdLvl++;
756 }
757
7e47e6ff 758 if ($Is_MacOS) {
759 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
760 }
761
762 $dir= $dir_name; # $File::Find::dir
81793b90 763
764 # Get the list of files in the current directory.
7e47e6ff 765 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 766 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90 767 next;
768 }
769 @filenames = readdir DIR;
770 closedir(DIR);
719c805e 771 @filenames = &$pre_process(@filenames) if $pre_process;
772 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 773
5fa2bf2b 774 # default: use whatever was specifid
775 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
776 $no_nlink = $avoid_nlink;
777 # if dir has wrong nlink count, force switch to slower stat method
778 $no_nlink = 1 if ($nlink < 2);
779
780 if ($nlink == 2 && !$no_nlink) {
81793b90 781 # This dir has no subdirectories.
782 for my $FN (@filenames) {
7e47e6ff 783 next if $FN =~ $File::Find::skip_pattern;
81793b90 784
7e47e6ff 785 $name = $dir_pref . $FN; # $File::Find::name
786 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 787 { &$wanted_callback }; # protect against wild "next"
81793b90 788 }
789
790 }
791 else {
792 # This dir has subdirectories.
793 $subcount = $nlink - 2;
794
795 for my $FN (@filenames) {
7e47e6ff 796 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 797 if ($subcount > 0 || $no_nlink) {
81793b90 798 # Seen all the subdirs?
799 # check for directoriness.
800 # stat is faster for a file in the current directory
07867069 801 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 802
803 if (-d _) {
804 --$subcount;
c7b9dd21 805 $FN =~ s/\.dir\z// if $Is_VMS;
81793b90 806 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
807 }
808 else {
7e47e6ff 809 $name = $dir_pref . $FN; # $File::Find::name
810 $_= ($no_chdir ? $name : $FN); # $_
73396e07 811 { &$wanted_callback }; # protect against wild "next"
81793b90 812 }
813 }
07867069 814 else {
7e47e6ff 815 $name = $dir_pref . $FN; # $File::Find::name
816 $_= ($no_chdir ? $name : $FN); # $_
73396e07 817 { &$wanted_callback }; # protect against wild "next"
81793b90 818 }
819 }
820 }
17b275ff 821 }
822 continue {
57e73c4b 823 while ( defined ($SE = pop @Stack) ) {
81793b90 824 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
825 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 826 my $tmp;
827 if ($Is_MacOS) {
828 $tmp = (':' x ($CdLvl-$Level)) . ':';
829 }
830 else {
831 $tmp = join('/',('..') x ($CdLvl-$Level));
832 }
833 die "Can't cd to $dir_name" . $tmp
834 unless chdir ($tmp);
81793b90 835 $CdLvl = $Level;
836 }
7e47e6ff 837
838 if ($Is_MacOS) {
839 # $pdir always has a trailing ':', except for the starting dir,
840 # where $dir_rel eq ':'
841 $dir_name = "$p_dir$dir_rel";
842 $dir_pref = "$dir_name:";
843 }
844 else {
845 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
846 $dir_pref = "$dir_name/";
847 }
848
719c805e 849 if ( $nlink == -2 ) {
7e47e6ff 850 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 851 $_ = $File::Find::current_dir;
719c805e 852 &$post_process; # End-of-directory processing
7e47e6ff 853 }
854 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
855 $name = $dir_name;
856 if ($Is_MacOS) {
857 if ($dir_rel eq ':') { # must be the top dir, where we started
858 $name =~ s|:$||; # $File::Find::name
859 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
860 }
861 $dir = $p_dir; # $File::Find::dir
862 $_ = ($no_chdir ? $name : $dir_rel); # $_
863 }
864 else {
865 if ( substr($name,-2) eq '/.' ) {
866 $name =~ s|/\.$||;
867 }
868 $dir = $p_dir;
869 $_ = ($no_chdir ? $dir_name : $dir_rel );
870 if ( substr($_,-2) eq '/.' ) {
871 s|/\.$||;
872 }
873 }
9111eb4b 874 # guarantee lstat at return to directory
875 lstat( $dir_name );
7e47e6ff 876 { &$wanted_callback }; # protect against wild "next"
877 }
878 else {
879 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
880 last;
881 }
81793b90 882 }
a0d0e21e 883 }
884}
885
81793b90 886
887# API:
888# $wanted
889# $dir_loc : absolute location of a dir
890# $p_dir : "parent directory"
891# preconditions:
892# chdir (if not no_chdir) to dir
893
894sub _find_dir_symlnk($$$) {
7e47e6ff 895 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90 896 my @Stack;
897 my @filenames;
898 my $new_loc;
7e47e6ff 899 my $updir_loc = $dir_loc; # untainted parent directory
81793b90 900 my $SE = [];
901 my $dir_name = $p_dir;
7e47e6ff 902 my $dir_pref;
903 my $loc_pref;
39e79f6b 904 my $dir_rel = $File::Find::current_dir;
7e47e6ff 905 my $byd_flag; # flag for pending stack entry if $bydepth
906 my $tainted = 0;
907 my $ok = 1;
908
909 if ($Is_MacOS) {
910 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
911 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
7e47e6ff 912 } else {
913 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
914 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
7e47e6ff 915 }
81793b90 916
917 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff 918
919 unless ($no_chdir) {
920 # untaint the topdir
921 if (( $untaint ) && (is_tainted($dir_loc) )) {
922 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
923 # once untainted, $updir_loc is pushed on the stack (as parent directory);
924 # hence, we don't need to untaint the parent directory every time we chdir
925 # to it later
926 unless (defined $updir_loc) {
81793b90 927 if ($untaint_skip == 0) {
928 die "directory $dir_loc is still tainted";
929 }
930 else {
931 return;
932 }
933 }
934 }
7e47e6ff 935 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
936 unless ($ok) {
cd68ec93 937 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 938 return;
939 }
940 }
941
7e47e6ff 942 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
943
944 if ($Is_MacOS) {
945 $p_dir = $dir_pref; # ensure trailing ':'
946 }
57e73c4b 947
81793b90 948 while (defined $SE) {
949
950 unless ($bydepth) {
7e47e6ff 951 # change (back) to parent directory (always untainted)
704ea872 952 unless ($no_chdir) {
7e47e6ff 953 unless (chdir $updir_loc) {
cd68ec93 954 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872 955 next;
956 }
957 }
7e47e6ff 958 $dir= $p_dir; # $File::Find::dir
959 $name= $dir_name; # $File::Find::name
960 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
961 $fullname= $dir_loc; # $File::Find::fullname
81793b90 962 # prune may happen here
7e47e6ff 963 $prune= 0;
704ea872 964 lstat($_); # make sure file tests with '_' work
7e47e6ff 965 { &$wanted_callback }; # protect against wild "next"
966 next if $prune;
81793b90 967 }
968
969 # change to that directory
7e47e6ff 970 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
971 $updir_loc = $dir_loc;
972 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
973 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
974 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
975 unless (defined $updir_loc) {
81793b90 976 if ($untaint_skip == 0) {
977 die "directory $dir_loc is still tainted";
a0d0e21e 978 }
237437d0 979 else {
81793b90 980 next;
237437d0 981 }
a0d0e21e 982 }
983 }
7e47e6ff 984 unless (chdir $updir_loc) {
cd68ec93 985 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 986 next;
987 }
988 }
989
7e47e6ff 990 if ($Is_MacOS) {
991 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
992 }
993
994 $dir = $dir_name; # $File::Find::dir
81793b90 995
996 # Get the list of files in the current directory.
7e47e6ff 997 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 998 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90 999 next;
1000 }
1001 @filenames = readdir DIR;
1002 closedir(DIR);
1003
1004 for my $FN (@filenames) {
7e47e6ff 1005 next if $FN =~ $File::Find::skip_pattern;
81793b90 1006
1007 # follow symbolic links / do an lstat
07867069 1008 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 1009
1010 # ignore if invalid symlink
1011 next unless defined $new_loc;
7e47e6ff 1012
81793b90 1013 if (-d _) {
7e47e6ff 1014 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90 1015 }
1016 else {
7e47e6ff 1017 $fullname = $new_loc; # $File::Find::fullname
1018 $name = $dir_pref . $FN; # $File::Find::name
1019 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 1020 { &$wanted_callback }; # protect against wild "next"
81793b90 1021 }
1022 }
1023
81793b90 1024 }
1025 continue {
57e73c4b 1026 while (defined($SE = pop @Stack)) {
7e47e6ff 1027 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
1028 if ($Is_MacOS) {
1029 # $p_dir always has a trailing ':', except for the starting dir,
1030 # where $dir_rel eq ':'
1031 $dir_name = "$p_dir$dir_rel";
1032 $dir_pref = "$dir_name:";
1033 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
1034 }
1035 else {
1036 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1037 $dir_pref = "$dir_name/";
1038 $loc_pref = "$dir_loc/";
1039 }
1040 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1041 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1042 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1043 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff 1044 next;
1045 }
1046 }
1047 $fullname = $dir_loc; # $File::Find::fullname
1048 $name = $dir_name; # $File::Find::name
1049 if ($Is_MacOS) {
1050 if ($dir_rel eq ':') { # must be the top dir, where we started
1051 $name =~ s|:$||; # $File::Find::name
1052 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
1053 }
1054 $dir = $p_dir; # $File::Find::dir
1055 $_ = ($no_chdir ? $name : $dir_rel); # $_
1056 }
1057 else {
1058 if ( substr($name,-2) eq '/.' ) {
1059 $name =~ s|/\.$||; # $File::Find::name
1060 }
1061 $dir = $p_dir; # $File::Find::dir
1062 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1063 if ( substr($_,-2) eq '/.' ) {
1064 s|/\.$||;
1065 }
1066 }
1067
1068 lstat($_); # make sure file tests with '_' work
1069 { &$wanted_callback }; # protect against wild "next"
1070 }
1071 else {
1072 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1073 last;
1074 }
a0d0e21e 1075 }
1076 }
1077}
1078
81793b90 1079
20408e3c 1080sub wrap_wanted {
81793b90 1081 my $wanted = shift;
1082 if ( ref($wanted) eq 'HASH' ) {
1083 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1084 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1085 }
1086 if ( $wanted->{untaint} ) {
7e47e6ff 1087 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90 1088 unless defined $wanted->{untaint_pattern};
1089 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1090 }
1091 return $wanted;
1092 }
1093 else {
1094 return { wanted => $wanted };
1095 }
a0d0e21e 1096}
1097
20408e3c 1098sub find {
81793b90 1099 my $wanted = shift;
1100 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e 1101}
1102
55d729e4 1103sub finddepth {
81793b90 1104 my $wanted = wrap_wanted(shift);
1105 $wanted->{bydepth} = 1;
1106 _find_opt($wanted, @_);
20408e3c 1107}
6280b799 1108
7e47e6ff 1109# default
1110$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1111$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1112
6280b799 1113# These are hard-coded for now, but may move to hint files.
10eba763 1114if ($^O eq 'VMS') {
81793b90 1115 $Is_VMS = 1;
7e47e6ff 1116 $File::Find::dont_use_nlink = 1;
1117}
1118elsif ($^O eq 'MacOS') {
1119 $Is_MacOS = 1;
1120 $File::Find::dont_use_nlink = 1;
1121 $File::Find::skip_pattern = qr/^Icon\015\z/;
1122 $File::Find::untaint_pattern = qr|^(.+)$|;
748a9306 1123}
1124
7e47e6ff 1125# this _should_ work properly on all platforms
1126# where File::Find can be expected to work
1127$File::Find::current_dir = File::Spec->curdir || '.';
1128
81793b90 1129$File::Find::dont_use_nlink = 1
497711e7 1130 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
2986a63f 1131 $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'NetWare';
6280b799 1132
20408e3c 1133# Set dont_use_nlink in your hint file if your system's stat doesn't
1134# report the number of links in a directory as an indication
1135# of the number of files.
1136# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 1137unless ($File::Find::dont_use_nlink) {
1138 require Config;
1139 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 1140}
1141
7e47e6ff 1142# We need a function that checks if a scalar is tainted. Either use the
1143# Scalar::Util module's tainted() function or our (slower) pure Perl
1144# fallback is_tainted_pp()
1145{
1146 local $@;
1147 eval { require Scalar::Util };
1148 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1149}
1150
a0d0e21e 11511;