Re: File/Spec/t/rel2abs2rel2whatever broken again
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
6our $VERSION = '1.04';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
f06db76b 10=head1 NAME
11
abfdd623 12File::Find - Traverse a directory tree.
f06db76b 13
14=head1 SYNOPSIS
15
16 use File::Find;
abfdd623 17 find(\&wanted, @directories_to_seach);
f06db76b 18 sub wanted { ... }
237437d0 19
f06db76b 20 use File::Find;
abfdd623 21 finddepth(\&wanted, @directories_to_search);
f06db76b 22 sub wanted { ... }
3cb6de81 23
81793b90 24 use File::Find;
25 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 26
27=head1 DESCRIPTION
28
abfdd623 29These are functions for searching through directory trees doing work
30on each file found similar to the Unix I<find> command. File::Find
31exports two functions, C<find> and C<finddepth>. They work similarly
32but have subtle differences.
33
34=over 4
35
36=item B<find>
37
38 find(\&wanted, @directories);
39 find(\%options, @directories);
40
41find() does a breadth-first search over the given @directories in the
42order they are given. In essense, it works from the top down.
43
44For each file or directory found the &wanted subroutine is called (see
45below for details). Additionally, for each directory found it will go
46into that directory and continue the search.
47
48=item B<finddepth>
49
50 finddepth(\&wanted, @directories);
51 finddepth(\%options, @directories);
52
53finddepth() works just like find() except it does a depth-first search.
54It works from the bottom of the directory tree up.
55
56=back
57
58=head2 %options
59
20408e3c 60The first argument to find() is either a hash reference describing the
abfdd623 61operations to be performed for each file, or a code reference. The
62code reference is described in L<The wanted function> below.
20408e3c 63
81793b90 64Here are the possible keys for the hash:
65
66=over 3
67
68=item C<wanted>
69
abfdd623 70The value should be a code reference. This code reference is
71described in L<The wanted function> below.
81793b90 72
73=item C<bydepth>
74
75Reports the name of a directory only AFTER all its entries
76have been reported. Entry point finddepth() is a shortcut for
f801979b 77specifying C<{ bydepth =E<gt> 1 }> in the first argument of find().
81793b90 78
719c805e 79=item C<preprocess>
80
7e47e6ff 81The value should be a code reference. This code reference is used to
82preprocess the current directory. The name of currently processed
83directory is in $File::Find::dir. Your preprocessing function is
84called after readdir() but before the loop that calls the wanted()
85function. It is called with a list of strings (actually file/directory
86names) and is expected to return a list of strings. The code can be
87used to sort the file/directory names alphabetically, numerically,
88or to filter out directory entries based on their name alone. When
89I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e 90
91=item C<postprocess>
92
7e47e6ff 93The value should be a code reference. It is invoked just before leaving
94the currently processed directory. It is called in void context with no
95arguments. The name of the current directory is in $File::Find::dir. This
96hook is handy for summarizing a directory, such as calculating its disk
3fa6e24b 97usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 98no-op.
719c805e 99
81793b90 100=item C<follow>
101
102Causes symbolic links to be followed. Since directory trees with symbolic
103links (followed) may contain files more than once and may even have
104cycles, a hash has to be built up with an entry for each file.
105This might be expensive both in space and time for a large
106directory tree. See I<follow_fast> and I<follow_skip> below.
107If either I<follow> or I<follow_fast> is in effect:
108
109=over 6
110
a45bd81d 111=item *
81793b90 112
f10e1564 113It is guaranteed that an I<lstat> has been called before the user's
81793b90 114I<wanted()> function is called. This enables fast file checks involving S< _>.
115
a45bd81d 116=item *
81793b90 117
118There is a variable C<$File::Find::fullname> which holds the absolute
119pathname of the file with all symbolic links resolved
120
121=back
122
123=item C<follow_fast>
124
f10e1564 125This is similar to I<follow> except that it may report some files more
126than once. It does detect cycles, however. Since only symbolic links
127have to be hashed, this is much cheaper both in space and time. If
128processing a file more than once (by the user's I<wanted()> function)
81793b90 129is worse than just taking time, the option I<follow> should be used.
130
131=item C<follow_skip>
132
133C<follow_skip==1>, which is the default, causes all files which are
134neither directories nor symbolic links to be ignored if they are about
135to be processed a second time. If a directory or a symbolic link
136are about to be processed a second time, File::Find dies.
137C<follow_skip==0> causes File::Find to die if any file is about to be
138processed a second time.
139C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 140directories but to proceed normally otherwise.
20408e3c 141
80e52b73 142=item C<dangling_symlinks>
143
144If true and a code reference, will be called with the symbolic link
145name and the directory it lives in as arguments. Otherwise, if true
146and warnings are on, warning "symbolic_link_name is a dangling
147symbolic link\n" will be issued. If false, the dangling symbolic link
148will be silently ignored.
f06db76b 149
81793b90 150=item C<no_chdir>
151
152Does not C<chdir()> to each directory as it recurses. The wanted()
153function will need to be aware of this, of course. In this case,
154C<$_> will be the same as C<$File::Find::name>.
155
156=item C<untaint>
157
158If find is used in taint-mode (-T command line switch or if EUID != UID
159or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 160before they can be chdir'ed to. Therefore they are checked against a regular
161expression I<untaint_pattern>. Note that all names passed to the user's
162I<wanted()> function are still tainted. If this option is used while
163not in taint-mode, C<untaint> is a no-op.
81793b90 164
165=item C<untaint_pattern>
166
167See above. This should be set using the C<qr> quoting operator.
168The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 169Note that the parentheses are vital.
81793b90 170
171=item C<untaint_skip>
172
7e47e6ff 173If set, a directory which fails the I<untaint_pattern> is skipped,
174including all its sub-directories. The default is to 'die' in such a case.
81793b90 175
176=back
177
abfdd623 178=head2 The wanted function
179
180The wanted() function does whatever verifications you want on each
181file and directory. It takes no arguments but rather does its work
182through a collection of variables.
183
184=over 4
185
f837ebe2 186=item C<$File::Find::dir> is the current directory name,
abfdd623 187
188=item C<$_> is the current filename within that directory
189
f837ebe2 190=item C<$File::Find::name> is the complete pathname to the file.
abfdd623 191
192=back
193
f837ebe2 194Don't modify these variables.
195
abfdd623 196For example, when examining the file /some/path/foo.ext you will have:
197
198 $File::Find::dir = /some/path/
199 $_ = foo.ext
200 $File::Find::name = /some/path/foo.ext
201
202You are chdir()'d toC<$File::Find::dir> when the function is called,
203unless C<no_chdir> was specified. Note that when changing to
204directories is in effect the root directory (F</>) is a somewhat
205special case inasmuch as the concatenation of C<$File::Find::dir>,
206C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
207table below summarizes all variants:
5cf0a2f2 208
209 $File::Find::name $File::Find::dir $_
210 default / / .
211 no_chdir=>0 /etc / etc
212 /etc/x /etc x
abfdd623 213
5cf0a2f2 214 no_chdir=>1 / / /
215 /etc / /etc
216 /etc/x /etc /etc/x
217
218
219When <follow> or <follow_fast> are in effect, there is
f10e1564 220also a C<$File::Find::fullname>. The function may set
221C<$File::Find::prune> to prune the tree unless C<bydepth> was
222specified. Unless C<follow> or C<follow_fast> is specified, for
223compatibility reasons (find.pl, find2perl) there are in addition the
224following globals available: C<$File::Find::topdir>,
225C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 226C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 227
20408e3c 228This library is useful for the C<find2perl> tool, which when fed,
f06db76b 229
230 find2perl / -name .nfs\* -mtime +7 \
81793b90 231 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 232
233produces something like:
234
235 sub wanted {
c7b9dd21 236 /^\.nfs.*\z/s &&
81793b90 237 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 238 int(-M _) > 7 &&
239 unlink($_)
240 ||
81793b90 241 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 242 $dev < 0 &&
6280b799 243 ($File::Find::prune = 1);
f06db76b 244 }
245
43dece2a 246Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
247filehandle that caches the information from the preceding
248stat(), lstat(), or filetest.
249
1cffc1dd 250Here's another interesting wanted function. It will find all symbolic
251links that don't resolve:
f06db76b 252
253 sub wanted {
81793b90 254 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 255 }
f06db76b 256
81793b90 257See also the script C<pfind> on CPAN for a nice application of this
258module.
259
cd68ec93 260=head1 WARNINGS
261
262If you run your program with the C<-w> switch, or if you use the
263C<warnings> pragma, File::Find will report warnings for several weird
264situations. You can disable these warnings by putting the statement
265
266 no warnings 'File::Find';
267
268in the appropriate scope. See L<perllexwarn> for more info about lexical
269warnings.
270
81793b90 271=head1 CAVEAT
272
5fa2bf2b 273=over 2
274
275=item $dont_use_nlink
276
277You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067 278force File::Find to always stat directories. This was used for file systems
279that do not have an C<nlink> count matching the number of sub-directories.
280Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
281system) and a couple of others.
5fa2bf2b 282
6cf3b067 283You shouldn't need to set this variable, since File::Find should now detect
284such file systems on-the-fly and switch itself to using stat. This works even
285for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 286
6cf3b067 287If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b 288
289=item symlinks
290
f10e1564 291Be aware that the option to follow symbolic links can be dangerous.
81793b90 292Depending on the structure of the directory tree (including symbolic
293links to directories) you might traverse a given (physical) directory
294more than once (only if C<follow_fast> is in effect).
295Furthermore, deleting or changing files in a symbolically linked directory
296might cause very unpleasant surprises, since you delete or change files
297in an unknown directory.
0530a6c4 298
5fa2bf2b 299=back
300
7e47e6ff 301=head1 NOTES
302
303=over 4
304
305=item *
306
307Mac OS (Classic) users should note a few differences:
308
309=over 4
310
311=item *
312
313The path separator is ':', not '/', and the current directory is denoted
314as ':', not '.'. You should be careful about specifying relative pathnames.
315While a full path always begins with a volume name, a relative pathname
316should always begin with a ':'. If specifying a volume name only, a
317trailing ':' is required.
318
319=item *
320
321C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
322contains the name of a directory, that name may or may not end with a
323':'. Likewise, C<$File::Find::name>, which contains the complete
324pathname to that directory, and C<$File::Find::fullname>, which holds
325the absolute pathname of that directory with all symbolic links resolved,
326may or may not end with a ':'.
327
328=item *
329
330The default C<untaint_pattern> (see above) on Mac OS is set to
331C<qr|^(.+)$|>. Note that the parentheses are vital.
332
333=item *
334
335The invisible system file "Icon\015" is ignored. While this file may
336appear in every directory, there are some more invisible system files
337on every volume, which are all located at the volume root level (i.e.
338"MacintoshHD:"). These system files are B<not> excluded automatically.
339Your filter may use the following code to recognize invisible files or
340directories (requires Mac::Files):
341
342 use Mac::Files;
343
344 # invisible() -- returns 1 if file/directory is invisible,
1cffc1dd 345 # 0 if it's visible or undef if an error occurred
7e47e6ff 346
347 sub invisible($) {
348 my $file = shift;
349 my ($fileCat, $fileInfo);
350 my $invisible_flag = 1 << 14;
351
352 if ( $fileCat = FSpGetCatInfo($file) ) {
353 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
354 return (($fileInfo->fdFlags & $invisible_flag) && 1);
355 }
356 }
357 return undef;
358 }
359
360Generally, invisible files are system files, unless an odd application
361decides to use invisible files for its own purposes. To distinguish
362such files from system files, you have to look at the B<type> and B<creator>
363file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
364C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
365(see MacPerl.pm for details).
366
367Files that appear on the desktop actually reside in an (hidden) directory
368named "Desktop Folder" on the particular disk volume. Note that, although
369all desktop files appear to be on the same "virtual" desktop, each disk
370volume actually maintains its own "Desktop Folder" directory.
371
372=back
373
374=back
0530a6c4 375
a85af077 376=head1 HISTORY
377
378File::Find used to produce incorrect results if called recursively.
379During the development of perl 5.8 this bug was fixed.
380The first fixed version of File::Find was 1.01.
381
f06db76b 382=cut
383
b75c8c73 384our @ISA = qw(Exporter);
385our @EXPORT = qw(find finddepth);
6280b799 386
a0d0e21e 387
81793b90 388use strict;
389my $Is_VMS;
7e47e6ff 390my $Is_MacOS;
81793b90 391
392require File::Basename;
7e47e6ff 393require File::Spec;
81793b90 394
9f826d6a 395# Should ideally be my() not our() but local() currently
396# refuses to operate on lexicals
397
398our %SLnkSeen;
399our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 400 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 401 $pre_process, $post_process, $dangling_symlinks);
81793b90 402
403sub contract_name {
404 my ($cdir,$fn) = @_;
405
7e47e6ff 406 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90 407
408 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
409
410 $fn =~ s|^\./||;
411
412 my $abs_name= $cdir . $fn;
413
414 if (substr($fn,0,3) eq '../') {
fecbda2b 415 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
81793b90 416 }
417
418 return $abs_name;
419}
420
7e47e6ff 421# return the absolute name of a directory or file
422sub contract_name_Mac {
423 my ($cdir,$fn) = @_;
424 my $abs_name;
425
426 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
427
428 my $colon_count = length ($1);
429 if ($colon_count == 1) {
430 $abs_name = $cdir . $2;
431 return $abs_name;
432 }
433 else {
434 # need to move up the tree, but
435 # only if it's not a volume name
436 for (my $i=1; $i<$colon_count; $i++) {
437 unless ($cdir =~ /^[^:]+:$/) { # volume name
438 $cdir =~ s/[^:]+:$//;
439 }
440 else {
441 return undef;
442 }
443 }
444 $abs_name = $cdir . $2;
445 return $abs_name;
446 }
447
448 }
449 else {
450
451 # $fn may be a valid path to a directory or file or (dangling)
452 # symlink, without a leading ':'
453 if ( (-e $fn) || (-l $fn) ) {
454 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
455 return $fn; # $fn is already an absolute path
456 }
457 else {
458 $abs_name = $cdir . $fn;
459 return $abs_name;
460 }
461 }
462 else { # argh!, $fn is not a valid directory/file
463 return undef;
464 }
465 }
466}
81793b90 467
468sub PathCombine($$) {
469 my ($Base,$Name) = @_;
470 my $AbsName;
471
7e47e6ff 472 if ($Is_MacOS) {
473 # $Name is the resolved symlink (always a full path on MacOS),
474 # i.e. there's no need to call contract_name_Mac()
475 $AbsName = $Name;
476
477 # (simple) check for recursion
478 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
479 return undef;
480 }
81793b90 481 }
482 else {
7e47e6ff 483 if (substr($Name,0,1) eq '/') {
484 $AbsName= $Name;
485 }
486 else {
487 $AbsName= contract_name($Base,$Name);
488 }
81793b90 489
7e47e6ff 490 # (simple) check for recursion
491 my $newlen= length($AbsName);
492 if ($newlen <= length($Base)) {
493 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
494 && $AbsName eq substr($Base,0,$newlen))
495 {
496 return undef;
497 }
81793b90 498 }
499 }
500 return $AbsName;
501}
502
503sub Follow_SymLink($) {
504 my ($AbsName) = @_;
505
506 my ($NewName,$DEV, $INO);
507 ($DEV, $INO)= lstat $AbsName;
508
509 while (-l _) {
510 if ($SLnkSeen{$DEV, $INO}++) {
511 if ($follow_skip < 2) {
512 die "$AbsName is encountered a second time";
a0d0e21e 513 }
514 else {
81793b90 515 return undef;
a0d0e21e 516 }
517 }
81793b90 518 $NewName= PathCombine($AbsName, readlink($AbsName));
519 unless(defined $NewName) {
520 if ($follow_skip < 2) {
521 die "$AbsName is a recursive symbolic link";
522 }
523 else {
524 return undef;
a0d0e21e 525 }
81793b90 526 }
527 else {
528 $AbsName= $NewName;
529 }
530 ($DEV, $INO) = lstat($AbsName);
531 return undef unless defined $DEV; # dangling symbolic link
532 }
533
cd68ec93 534 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 535 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90 536 die "$AbsName encountered a second time";
537 }
538 else {
539 return undef;
540 }
541 }
542
543 return $AbsName;
544}
545
17f410f9 546our($dir, $name, $fullname, $prune);
81793b90 547sub _find_dir_symlnk($$$);
548sub _find_dir($$$);
549
7e47e6ff 550# check whether or not a scalar variable is tainted
551# (code straight from the Camel, 3rd ed., page 561)
552sub is_tainted_pp {
553 my $arg = shift;
554 my $nada = substr($arg, 0, 0); # zero-length
555 local $@;
556 eval { eval "# $nada" };
557 return length($@) != 0;
558}
559
81793b90 560sub _find_opt {
561 my $wanted = shift;
562 die "invalid top directory" unless defined $_[0];
563
9f826d6a 564 # This function must local()ize everything because callbacks may
565 # call find() or finddepth()
566
567 local %SLnkSeen;
568 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
569 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 570 $pre_process, $post_process, $dangling_symlinks);
9f826d6a 571 local($dir, $name, $fullname, $prune);
572
a0c9c202 573 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
80e52b73 574 my $cwd_untainted = $cwd;
575 my $check_t_cwd = 1;
576 $wanted_callback = $wanted->{wanted};
577 $bydepth = $wanted->{bydepth};
578 $pre_process = $wanted->{preprocess};
579 $post_process = $wanted->{postprocess};
580 $no_chdir = $wanted->{no_chdir};
581 $full_check = $wanted->{follow};
582 $follow = $full_check || $wanted->{follow_fast};
583 $follow_skip = $wanted->{follow_skip};
584 $untaint = $wanted->{untaint};
585 $untaint_pat = $wanted->{untaint_pattern};
586 $untaint_skip = $wanted->{untaint_skip};
587 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 588
1cffc1dd 589 # for compatibility reasons (find.pl, find2perl)
9f826d6a 590 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 591
592 # a symbolic link to a directory doesn't increase the link count
593 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
594
e7b91b67 595 my ($abs_dir, $Is_Dir);
81793b90 596
597 Proc_Top_Item:
598 foreach my $TOP (@_) {
7e47e6ff 599 my $top_item = $TOP;
600
601 if ($Is_MacOS) {
602 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
603 $top_item = ":$top_item"
3fa6e24b 604 if ( (-d _) && ( $top_item !~ /:/ ) );
7e47e6ff 605 }
606 else {
607 $top_item =~ s|/\z|| unless $top_item eq '/';
608 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
609 }
610
611 $Is_Dir= 0;
612
613 if ($follow) {
614
615 if ($Is_MacOS) {
616 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
617
618 if ($top_item eq $File::Find::current_dir) {
619 $abs_dir = $cwd;
620 }
621 else {
622 $abs_dir = contract_name_Mac($cwd, $top_item);
623 unless (defined $abs_dir) {
cd68ec93 624 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n";
7e47e6ff 625 next Proc_Top_Item;
626 }
627 }
628
629 }
630 else {
631 if (substr($top_item,0,1) eq '/') {
632 $abs_dir = $top_item;
633 }
634 elsif ($top_item eq $File::Find::current_dir) {
635 $abs_dir = $cwd;
636 }
637 else { # care about any ../
638 $abs_dir = contract_name("$cwd/",$top_item);
639 }
640 }
641 $abs_dir= Follow_SymLink($abs_dir);
642 unless (defined $abs_dir) {
80e52b73 643 if ($dangling_symlinks) {
644 if (ref $dangling_symlinks eq 'CODE') {
645 $dangling_symlinks->($top_item, $cwd);
646 } else {
cd68ec93 647 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73 648 }
649 }
81793b90 650 next Proc_Top_Item;
7e47e6ff 651 }
652
653 if (-d _) {
81793b90 654 _find_dir_symlnk($wanted, $abs_dir, $top_item);
655 $Is_Dir= 1;
7e47e6ff 656 }
657 }
81793b90 658 else { # no follow
7e47e6ff 659 $topdir = $top_item;
660 unless (defined $topnlink) {
cd68ec93 661 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff 662 next Proc_Top_Item;
663 }
664 if (-d _) {
c7b9dd21 665 $top_item =~ s/\.dir\z// if $Is_VMS;
e7b91b67 666 _find_dir($wanted, $top_item, $topnlink);
81793b90 667 $Is_Dir= 1;
7e47e6ff 668 }
237437d0 669 else {
81793b90 670 $abs_dir= $top_item;
7e47e6ff 671 }
672 }
81793b90 673
7e47e6ff 674 unless ($Is_Dir) {
81793b90 675 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
7e47e6ff 676 if ($Is_MacOS) {
677 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
678 }
679 else {
680 ($dir,$_) = ('./', $top_item);
681 }
81793b90 682 }
683
7e47e6ff 684 $abs_dir = $dir;
685 if (( $untaint ) && (is_tainted($dir) )) {
686 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90 687 unless (defined $abs_dir) {
688 if ($untaint_skip == 0) {
7e47e6ff 689 die "directory $dir is still tainted";
81793b90 690 }
691 else {
692 next Proc_Top_Item;
693 }
694 }
7e47e6ff 695 }
81793b90 696
7e47e6ff 697 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 698 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff 699 next Proc_Top_Item;
700 }
719911cc 701
7e47e6ff 702 $name = $abs_dir . $_; # $File::Find::name
719911cc 703
abfdd623 704 { $wanted_callback->() }; # protect against wild "next"
81793b90 705
7e47e6ff 706 }
81793b90 707
7e47e6ff 708 unless ( $no_chdir ) {
709 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
710 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
711 unless (defined $cwd_untainted) {
712 die "insecure cwd in find(depth)";
713 }
714 $check_t_cwd = 0;
715 }
716 unless (chdir $cwd_untainted) {
717 die "Can't cd to $cwd: $!\n";
718 }
719 }
81793b90 720 }
721}
722
723# API:
724# $wanted
725# $p_dir : "parent directory"
726# $nlink : what came back from the stat
727# preconditions:
728# chdir (if not no_chdir) to dir
729
730sub _find_dir($$$) {
731 my ($wanted, $p_dir, $nlink) = @_;
732 my ($CdLvl,$Level) = (0,0);
733 my @Stack;
734 my @filenames;
735 my ($subcount,$sub_nlink);
736 my $SE= [];
737 my $dir_name= $p_dir;
7e47e6ff 738 my $dir_pref;
39e79f6b 739 my $dir_rel = $File::Find::current_dir;
7e47e6ff 740 my $tainted = 0;
5fa2bf2b 741 my $no_nlink;
7e47e6ff 742
743 if ($Is_MacOS) {
744 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
7e47e6ff 745 }
746 else {
747 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 748 }
81793b90 749
750 local ($dir, $name, $prune, *DIR);
7e47e6ff 751
752 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 753 my $udir = $p_dir;
7e47e6ff 754 if (( $untaint ) && (is_tainted($p_dir) )) {
755 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90 756 unless (defined $udir) {
757 if ($untaint_skip == 0) {
758 die "directory $p_dir is still tainted";
759 }
760 else {
761 return;
762 }
237437d0 763 }
a0d0e21e 764 }
81793b90 765 unless (chdir $udir) {
cd68ec93 766 warnings::warnif "Can't cd to $udir: $!\n";
81793b90 767 return;
768 }
769 }
7e47e6ff 770
771 # push the starting directory
57e73c4b 772 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90 773
7e47e6ff 774 if ($Is_MacOS) {
775 $p_dir = $dir_pref; # ensure trailing ':'
776 }
777
81793b90 778 while (defined $SE) {
779 unless ($bydepth) {
7e47e6ff 780 $dir= $p_dir; # $File::Find::dir
781 $name= $dir_name; # $File::Find::name
782 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 783 # prune may happen here
7e47e6ff 784 $prune= 0;
abfdd623 785 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 786 next if $prune;
81793b90 787 }
7e47e6ff 788
81793b90 789 # change to that directory
7e47e6ff 790 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 791 my $udir= $dir_rel;
7e47e6ff 792 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
793 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90 794 unless (defined $udir) {
795 if ($untaint_skip == 0) {
7e47e6ff 796 if ($Is_MacOS) {
797 die "directory ($p_dir) $dir_rel is still tainted";
798 }
799 else {
800 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
801 }
802 } else { # $untaint_skip == 1
803 next;
81793b90 804 }
805 }
806 }
807 unless (chdir $udir) {
7e47e6ff 808 if ($Is_MacOS) {
cd68ec93 809 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n";
7e47e6ff 810 }
811 else {
cd68ec93 812 warnings::warnif "Can't cd to (" .
813 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
7e47e6ff 814 }
81793b90 815 next;
816 }
817 $CdLvl++;
818 }
819
7e47e6ff 820 if ($Is_MacOS) {
821 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
822 }
823
824 $dir= $dir_name; # $File::Find::dir
81793b90 825
826 # Get the list of files in the current directory.
7e47e6ff 827 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 828 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90 829 next;
830 }
831 @filenames = readdir DIR;
832 closedir(DIR);
abfdd623 833 @filenames = $pre_process->(@filenames) if $pre_process;
719c805e 834 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 835
5fa2bf2b 836 # default: use whatever was specifid
837 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
838 $no_nlink = $avoid_nlink;
839 # if dir has wrong nlink count, force switch to slower stat method
840 $no_nlink = 1 if ($nlink < 2);
841
842 if ($nlink == 2 && !$no_nlink) {
81793b90 843 # This dir has no subdirectories.
844 for my $FN (@filenames) {
7e47e6ff 845 next if $FN =~ $File::Find::skip_pattern;
81793b90 846
7e47e6ff 847 $name = $dir_pref . $FN; # $File::Find::name
848 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 849 { $wanted_callback->() }; # protect against wild "next"
81793b90 850 }
851
852 }
853 else {
854 # This dir has subdirectories.
855 $subcount = $nlink - 2;
856
857 for my $FN (@filenames) {
7e47e6ff 858 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 859 if ($subcount > 0 || $no_nlink) {
81793b90 860 # Seen all the subdirs?
861 # check for directoriness.
862 # stat is faster for a file in the current directory
07867069 863 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 864
865 if (-d _) {
866 --$subcount;
c7b9dd21 867 $FN =~ s/\.dir\z// if $Is_VMS;
81793b90 868 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
869 }
870 else {
7e47e6ff 871 $name = $dir_pref . $FN; # $File::Find::name
872 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 873 { $wanted_callback->() }; # protect against wild "next"
81793b90 874 }
875 }
07867069 876 else {
7e47e6ff 877 $name = $dir_pref . $FN; # $File::Find::name
878 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 879 { $wanted_callback->() }; # protect against wild "next"
81793b90 880 }
881 }
882 }
17b275ff 883 }
884 continue {
57e73c4b 885 while ( defined ($SE = pop @Stack) ) {
81793b90 886 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
887 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 888 my $tmp;
889 if ($Is_MacOS) {
890 $tmp = (':' x ($CdLvl-$Level)) . ':';
891 }
892 else {
893 $tmp = join('/',('..') x ($CdLvl-$Level));
894 }
895 die "Can't cd to $dir_name" . $tmp
896 unless chdir ($tmp);
81793b90 897 $CdLvl = $Level;
898 }
7e47e6ff 899
900 if ($Is_MacOS) {
901 # $pdir always has a trailing ':', except for the starting dir,
902 # where $dir_rel eq ':'
903 $dir_name = "$p_dir$dir_rel";
904 $dir_pref = "$dir_name:";
905 }
906 else {
907 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
908 $dir_pref = "$dir_name/";
909 }
910
719c805e 911 if ( $nlink == -2 ) {
7e47e6ff 912 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 913 $_ = $File::Find::current_dir;
abfdd623 914 $post_process->(); # End-of-directory processing
7e47e6ff 915 }
916 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
917 $name = $dir_name;
918 if ($Is_MacOS) {
919 if ($dir_rel eq ':') { # must be the top dir, where we started
920 $name =~ s|:$||; # $File::Find::name
921 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
922 }
923 $dir = $p_dir; # $File::Find::dir
924 $_ = ($no_chdir ? $name : $dir_rel); # $_
925 }
926 else {
927 if ( substr($name,-2) eq '/.' ) {
5cf0a2f2 928 substr($name, length($name) == 2 ? -1 : -2) = '';
7e47e6ff 929 }
930 $dir = $p_dir;
931 $_ = ($no_chdir ? $dir_name : $dir_rel );
932 if ( substr($_,-2) eq '/.' ) {
5cf0a2f2 933 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 934 }
935 }
abfdd623 936 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 937 }
938 else {
939 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
940 last;
941 }
81793b90 942 }
a0d0e21e 943 }
944}
945
81793b90 946
947# API:
948# $wanted
949# $dir_loc : absolute location of a dir
950# $p_dir : "parent directory"
951# preconditions:
952# chdir (if not no_chdir) to dir
953
954sub _find_dir_symlnk($$$) {
7e47e6ff 955 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90 956 my @Stack;
957 my @filenames;
958 my $new_loc;
7e47e6ff 959 my $updir_loc = $dir_loc; # untainted parent directory
81793b90 960 my $SE = [];
961 my $dir_name = $p_dir;
7e47e6ff 962 my $dir_pref;
963 my $loc_pref;
39e79f6b 964 my $dir_rel = $File::Find::current_dir;
7e47e6ff 965 my $byd_flag; # flag for pending stack entry if $bydepth
966 my $tainted = 0;
967 my $ok = 1;
968
969 if ($Is_MacOS) {
970 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
971 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
7e47e6ff 972 } else {
973 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
974 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
7e47e6ff 975 }
81793b90 976
977 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff 978
979 unless ($no_chdir) {
980 # untaint the topdir
981 if (( $untaint ) && (is_tainted($dir_loc) )) {
982 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
983 # once untainted, $updir_loc is pushed on the stack (as parent directory);
984 # hence, we don't need to untaint the parent directory every time we chdir
985 # to it later
986 unless (defined $updir_loc) {
81793b90 987 if ($untaint_skip == 0) {
988 die "directory $dir_loc is still tainted";
989 }
990 else {
991 return;
992 }
993 }
994 }
7e47e6ff 995 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
996 unless ($ok) {
cd68ec93 997 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 998 return;
999 }
1000 }
1001
7e47e6ff 1002 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
1003
1004 if ($Is_MacOS) {
1005 $p_dir = $dir_pref; # ensure trailing ':'
1006 }
57e73c4b 1007
81793b90 1008 while (defined $SE) {
1009
1010 unless ($bydepth) {
7e47e6ff 1011 # change (back) to parent directory (always untainted)
704ea872 1012 unless ($no_chdir) {
7e47e6ff 1013 unless (chdir $updir_loc) {
cd68ec93 1014 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872 1015 next;
1016 }
1017 }
7e47e6ff 1018 $dir= $p_dir; # $File::Find::dir
1019 $name= $dir_name; # $File::Find::name
1020 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
1021 $fullname= $dir_loc; # $File::Find::fullname
81793b90 1022 # prune may happen here
7e47e6ff 1023 $prune= 0;
704ea872 1024 lstat($_); # make sure file tests with '_' work
abfdd623 1025 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 1026 next if $prune;
81793b90 1027 }
1028
1029 # change to that directory
7e47e6ff 1030 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1031 $updir_loc = $dir_loc;
1032 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
1033 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
1034 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
1035 unless (defined $updir_loc) {
81793b90 1036 if ($untaint_skip == 0) {
1037 die "directory $dir_loc is still tainted";
a0d0e21e 1038 }
237437d0 1039 else {
81793b90 1040 next;
237437d0 1041 }
a0d0e21e 1042 }
1043 }
7e47e6ff 1044 unless (chdir $updir_loc) {
cd68ec93 1045 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 1046 next;
1047 }
1048 }
1049
7e47e6ff 1050 if ($Is_MacOS) {
1051 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
1052 }
1053
1054 $dir = $dir_name; # $File::Find::dir
81793b90 1055
1056 # Get the list of files in the current directory.
7e47e6ff 1057 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 1058 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90 1059 next;
1060 }
1061 @filenames = readdir DIR;
1062 closedir(DIR);
1063
1064 for my $FN (@filenames) {
7e47e6ff 1065 next if $FN =~ $File::Find::skip_pattern;
81793b90 1066
1067 # follow symbolic links / do an lstat
07867069 1068 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 1069
1070 # ignore if invalid symlink
1071 next unless defined $new_loc;
7e47e6ff 1072
81793b90 1073 if (-d _) {
7e47e6ff 1074 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90 1075 }
1076 else {
7e47e6ff 1077 $fullname = $new_loc; # $File::Find::fullname
1078 $name = $dir_pref . $FN; # $File::Find::name
1079 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 1080 { $wanted_callback->() }; # protect against wild "next"
81793b90 1081 }
1082 }
1083
81793b90 1084 }
1085 continue {
57e73c4b 1086 while (defined($SE = pop @Stack)) {
7e47e6ff 1087 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
1088 if ($Is_MacOS) {
1089 # $p_dir always has a trailing ':', except for the starting dir,
1090 # where $dir_rel eq ':'
1091 $dir_name = "$p_dir$dir_rel";
1092 $dir_pref = "$dir_name:";
1093 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
1094 }
1095 else {
1096 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1097 $dir_pref = "$dir_name/";
1098 $loc_pref = "$dir_loc/";
1099 }
1100 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1101 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1102 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1103 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff 1104 next;
1105 }
1106 }
1107 $fullname = $dir_loc; # $File::Find::fullname
1108 $name = $dir_name; # $File::Find::name
1109 if ($Is_MacOS) {
1110 if ($dir_rel eq ':') { # must be the top dir, where we started
1111 $name =~ s|:$||; # $File::Find::name
1112 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
1113 }
1114 $dir = $p_dir; # $File::Find::dir
1115 $_ = ($no_chdir ? $name : $dir_rel); # $_
1116 }
1117 else {
1118 if ( substr($name,-2) eq '/.' ) {
f801979b 1119 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
7e47e6ff 1120 }
1121 $dir = $p_dir; # $File::Find::dir
1122 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1123 if ( substr($_,-2) eq '/.' ) {
f801979b 1124 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 1125 }
1126 }
1127
1128 lstat($_); # make sure file tests with '_' work
abfdd623 1129 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 1130 }
1131 else {
1132 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1133 last;
1134 }
a0d0e21e 1135 }
1136 }
1137}
1138
81793b90 1139
20408e3c 1140sub wrap_wanted {
81793b90 1141 my $wanted = shift;
1142 if ( ref($wanted) eq 'HASH' ) {
1143 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1144 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1145 }
1146 if ( $wanted->{untaint} ) {
7e47e6ff 1147 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90 1148 unless defined $wanted->{untaint_pattern};
1149 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1150 }
1151 return $wanted;
1152 }
1153 else {
1154 return { wanted => $wanted };
1155 }
a0d0e21e 1156}
1157
20408e3c 1158sub find {
81793b90 1159 my $wanted = shift;
1160 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e 1161}
1162
55d729e4 1163sub finddepth {
81793b90 1164 my $wanted = wrap_wanted(shift);
1165 $wanted->{bydepth} = 1;
1166 _find_opt($wanted, @_);
20408e3c 1167}
6280b799 1168
7e47e6ff 1169# default
1170$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1171$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1172
6280b799 1173# These are hard-coded for now, but may move to hint files.
10eba763 1174if ($^O eq 'VMS') {
81793b90 1175 $Is_VMS = 1;
7e47e6ff 1176 $File::Find::dont_use_nlink = 1;
1177}
1178elsif ($^O eq 'MacOS') {
1179 $Is_MacOS = 1;
1180 $File::Find::dont_use_nlink = 1;
1181 $File::Find::skip_pattern = qr/^Icon\015\z/;
1182 $File::Find::untaint_pattern = qr|^(.+)$|;
748a9306 1183}
1184
7e47e6ff 1185# this _should_ work properly on all platforms
1186# where File::Find can be expected to work
1187$File::Find::current_dir = File::Spec->curdir || '.';
1188
81793b90 1189$File::Find::dont_use_nlink = 1
497711e7 1190 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
94c67634 1191 $^O eq 'cygwin' || $^O eq 'epoc';
6280b799 1192
20408e3c 1193# Set dont_use_nlink in your hint file if your system's stat doesn't
1194# report the number of links in a directory as an indication
1195# of the number of files.
1196# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 1197unless ($File::Find::dont_use_nlink) {
1198 require Config;
1199 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 1200}
1201
7e47e6ff 1202# We need a function that checks if a scalar is tainted. Either use the
1203# Scalar::Util module's tainted() function or our (slower) pure Perl
1204# fallback is_tainted_pp()
1205{
1206 local $@;
1207 eval { require Scalar::Util };
1208 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1209}
1210
a0d0e21e 12111;