Re: [PATCH] ID 20011206.007 File::Find:name set to empty string for "/"
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
6our $VERSION = '1.04';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
f06db76b 10=head1 NAME
11
12find - traverse a file tree
13
14finddepth - traverse a directory structure depth-first
15
16=head1 SYNOPSIS
17
18 use File::Find;
81793b90 19 find(\&wanted, '/foo', '/bar');
f06db76b 20 sub wanted { ... }
237437d0 21
f06db76b 22 use File::Find;
81793b90 23 finddepth(\&wanted, '/foo', '/bar');
f06db76b 24 sub wanted { ... }
3cb6de81 25
81793b90 26 use File::Find;
27 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 28
29=head1 DESCRIPTION
30
20408e3c 31The first argument to find() is either a hash reference describing the
81793b90 32operations to be performed for each file, or a code reference.
20408e3c 33
81793b90 34Here are the possible keys for the hash:
35
36=over 3
37
38=item C<wanted>
39
40The value should be a code reference. This code reference is called
41I<the wanted() function> below.
42
43=item C<bydepth>
44
45Reports the name of a directory only AFTER all its entries
46have been reported. Entry point finddepth() is a shortcut for
47specifying C<{ bydepth => 1 }> in the first argument of find().
48
719c805e 49=item C<preprocess>
50
7e47e6ff 51The value should be a code reference. This code reference is used to
52preprocess the current directory. The name of currently processed
53directory is in $File::Find::dir. Your preprocessing function is
54called after readdir() but before the loop that calls the wanted()
55function. It is called with a list of strings (actually file/directory
56names) and is expected to return a list of strings. The code can be
57used to sort the file/directory names alphabetically, numerically,
58or to filter out directory entries based on their name alone. When
59I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e 60
61=item C<postprocess>
62
7e47e6ff 63The value should be a code reference. It is invoked just before leaving
64the currently processed directory. It is called in void context with no
65arguments. The name of the current directory is in $File::Find::dir. This
66hook is handy for summarizing a directory, such as calculating its disk
3fa6e24b 67usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 68no-op.
719c805e 69
81793b90 70=item C<follow>
71
72Causes symbolic links to be followed. Since directory trees with symbolic
73links (followed) may contain files more than once and may even have
74cycles, a hash has to be built up with an entry for each file.
75This might be expensive both in space and time for a large
76directory tree. See I<follow_fast> and I<follow_skip> below.
77If either I<follow> or I<follow_fast> is in effect:
78
79=over 6
80
a45bd81d 81=item *
81793b90 82
f10e1564 83It is guaranteed that an I<lstat> has been called before the user's
81793b90 84I<wanted()> function is called. This enables fast file checks involving S< _>.
85
a45bd81d 86=item *
81793b90 87
88There is a variable C<$File::Find::fullname> which holds the absolute
89pathname of the file with all symbolic links resolved
90
91=back
92
93=item C<follow_fast>
94
f10e1564 95This is similar to I<follow> except that it may report some files more
96than once. It does detect cycles, however. Since only symbolic links
97have to be hashed, this is much cheaper both in space and time. If
98processing a file more than once (by the user's I<wanted()> function)
81793b90 99is worse than just taking time, the option I<follow> should be used.
100
101=item C<follow_skip>
102
103C<follow_skip==1>, which is the default, causes all files which are
104neither directories nor symbolic links to be ignored if they are about
105to be processed a second time. If a directory or a symbolic link
106are about to be processed a second time, File::Find dies.
107C<follow_skip==0> causes File::Find to die if any file is about to be
108processed a second time.
109C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 110directories but to proceed normally otherwise.
20408e3c 111
80e52b73 112=item C<dangling_symlinks>
113
114If true and a code reference, will be called with the symbolic link
115name and the directory it lives in as arguments. Otherwise, if true
116and warnings are on, warning "symbolic_link_name is a dangling
117symbolic link\n" will be issued. If false, the dangling symbolic link
118will be silently ignored.
f06db76b 119
81793b90 120=item C<no_chdir>
121
122Does not C<chdir()> to each directory as it recurses. The wanted()
123function will need to be aware of this, of course. In this case,
124C<$_> will be the same as C<$File::Find::name>.
125
126=item C<untaint>
127
128If find is used in taint-mode (-T command line switch or if EUID != UID
129or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 130before they can be chdir'ed to. Therefore they are checked against a regular
131expression I<untaint_pattern>. Note that all names passed to the user's
132I<wanted()> function are still tainted. If this option is used while
133not in taint-mode, C<untaint> is a no-op.
81793b90 134
135=item C<untaint_pattern>
136
137See above. This should be set using the C<qr> quoting operator.
138The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 139Note that the parentheses are vital.
81793b90 140
141=item C<untaint_skip>
142
7e47e6ff 143If set, a directory which fails the I<untaint_pattern> is skipped,
144including all its sub-directories. The default is to 'die' in such a case.
81793b90 145
146=back
147
148The wanted() function does whatever verifications you want.
149C<$File::Find::dir> contains the current directory name, and C<$_> the
150current filename within that directory. C<$File::Find::name> contains
f10e1564 151the complete pathname to the file. You are chdir()'d to
152C<$File::Find::dir> when the function is called, unless C<no_chdir>
5cf0a2f2 153was specified. Note that when changing to directories is in effect
154the root directory (F</>) is a somewhat special case inasmuch as the
155concatenation of C<$File::Find::dir>, C<'/'> and C<$_> is not literally
156equal to C<$File::Find::name>. The table below summarizes all variants:
157
158 $File::Find::name $File::Find::dir $_
159 default / / .
160 no_chdir=>0 /etc / etc
161 /etc/x /etc x
162
163 no_chdir=>1 / / /
164 /etc / /etc
165 /etc/x /etc /etc/x
166
167
168When <follow> or <follow_fast> are in effect, there is
f10e1564 169also a C<$File::Find::fullname>. The function may set
170C<$File::Find::prune> to prune the tree unless C<bydepth> was
171specified. Unless C<follow> or C<follow_fast> is specified, for
172compatibility reasons (find.pl, find2perl) there are in addition the
173following globals available: C<$File::Find::topdir>,
174C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 175C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 176
20408e3c 177This library is useful for the C<find2perl> tool, which when fed,
f06db76b 178
179 find2perl / -name .nfs\* -mtime +7 \
81793b90 180 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 181
182produces something like:
183
184 sub wanted {
c7b9dd21 185 /^\.nfs.*\z/s &&
81793b90 186 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 187 int(-M _) > 7 &&
188 unlink($_)
189 ||
81793b90 190 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 191 $dev < 0 &&
6280b799 192 ($File::Find::prune = 1);
f06db76b 193 }
194
43dece2a 195Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
196filehandle that caches the information from the preceding
197stat(), lstat(), or filetest.
198
1cffc1dd 199Here's another interesting wanted function. It will find all symbolic
200links that don't resolve:
f06db76b 201
202 sub wanted {
81793b90 203 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 204 }
f06db76b 205
81793b90 206See also the script C<pfind> on CPAN for a nice application of this
207module.
208
cd68ec93 209=head1 WARNINGS
210
211If you run your program with the C<-w> switch, or if you use the
212C<warnings> pragma, File::Find will report warnings for several weird
213situations. You can disable these warnings by putting the statement
214
215 no warnings 'File::Find';
216
217in the appropriate scope. See L<perllexwarn> for more info about lexical
218warnings.
219
81793b90 220=head1 CAVEAT
221
5fa2bf2b 222=over 2
223
224=item $dont_use_nlink
225
226You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067 227force File::Find to always stat directories. This was used for file systems
228that do not have an C<nlink> count matching the number of sub-directories.
229Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
230system) and a couple of others.
5fa2bf2b 231
6cf3b067 232You shouldn't need to set this variable, since File::Find should now detect
233such file systems on-the-fly and switch itself to using stat. This works even
234for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 235
6cf3b067 236If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b 237
238=item symlinks
239
f10e1564 240Be aware that the option to follow symbolic links can be dangerous.
81793b90 241Depending on the structure of the directory tree (including symbolic
242links to directories) you might traverse a given (physical) directory
243more than once (only if C<follow_fast> is in effect).
244Furthermore, deleting or changing files in a symbolically linked directory
245might cause very unpleasant surprises, since you delete or change files
246in an unknown directory.
0530a6c4 247
5fa2bf2b 248=back
249
7e47e6ff 250=head1 NOTES
251
252=over 4
253
254=item *
255
256Mac OS (Classic) users should note a few differences:
257
258=over 4
259
260=item *
261
262The path separator is ':', not '/', and the current directory is denoted
263as ':', not '.'. You should be careful about specifying relative pathnames.
264While a full path always begins with a volume name, a relative pathname
265should always begin with a ':'. If specifying a volume name only, a
266trailing ':' is required.
267
268=item *
269
270C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
271contains the name of a directory, that name may or may not end with a
272':'. Likewise, C<$File::Find::name>, which contains the complete
273pathname to that directory, and C<$File::Find::fullname>, which holds
274the absolute pathname of that directory with all symbolic links resolved,
275may or may not end with a ':'.
276
277=item *
278
279The default C<untaint_pattern> (see above) on Mac OS is set to
280C<qr|^(.+)$|>. Note that the parentheses are vital.
281
282=item *
283
284The invisible system file "Icon\015" is ignored. While this file may
285appear in every directory, there are some more invisible system files
286on every volume, which are all located at the volume root level (i.e.
287"MacintoshHD:"). These system files are B<not> excluded automatically.
288Your filter may use the following code to recognize invisible files or
289directories (requires Mac::Files):
290
291 use Mac::Files;
292
293 # invisible() -- returns 1 if file/directory is invisible,
1cffc1dd 294 # 0 if it's visible or undef if an error occurred
7e47e6ff 295
296 sub invisible($) {
297 my $file = shift;
298 my ($fileCat, $fileInfo);
299 my $invisible_flag = 1 << 14;
300
301 if ( $fileCat = FSpGetCatInfo($file) ) {
302 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
303 return (($fileInfo->fdFlags & $invisible_flag) && 1);
304 }
305 }
306 return undef;
307 }
308
309Generally, invisible files are system files, unless an odd application
310decides to use invisible files for its own purposes. To distinguish
311such files from system files, you have to look at the B<type> and B<creator>
312file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
313C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
314(see MacPerl.pm for details).
315
316Files that appear on the desktop actually reside in an (hidden) directory
317named "Desktop Folder" on the particular disk volume. Note that, although
318all desktop files appear to be on the same "virtual" desktop, each disk
319volume actually maintains its own "Desktop Folder" directory.
320
321=back
322
323=back
0530a6c4 324
a85af077 325=head1 HISTORY
326
327File::Find used to produce incorrect results if called recursively.
328During the development of perl 5.8 this bug was fixed.
329The first fixed version of File::Find was 1.01.
330
f06db76b 331=cut
332
b75c8c73 333our @ISA = qw(Exporter);
334our @EXPORT = qw(find finddepth);
6280b799 335
a0d0e21e 336
81793b90 337use strict;
338my $Is_VMS;
7e47e6ff 339my $Is_MacOS;
81793b90 340
341require File::Basename;
7e47e6ff 342require File::Spec;
81793b90 343
9f826d6a 344# Should ideally be my() not our() but local() currently
345# refuses to operate on lexicals
346
347our %SLnkSeen;
348our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 349 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 350 $pre_process, $post_process, $dangling_symlinks);
81793b90 351
352sub contract_name {
353 my ($cdir,$fn) = @_;
354
7e47e6ff 355 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90 356
357 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
358
359 $fn =~ s|^\./||;
360
361 my $abs_name= $cdir . $fn;
362
363 if (substr($fn,0,3) eq '../') {
fecbda2b 364 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
81793b90 365 }
366
367 return $abs_name;
368}
369
7e47e6ff 370# return the absolute name of a directory or file
371sub contract_name_Mac {
372 my ($cdir,$fn) = @_;
373 my $abs_name;
374
375 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
376
377 my $colon_count = length ($1);
378 if ($colon_count == 1) {
379 $abs_name = $cdir . $2;
380 return $abs_name;
381 }
382 else {
383 # need to move up the tree, but
384 # only if it's not a volume name
385 for (my $i=1; $i<$colon_count; $i++) {
386 unless ($cdir =~ /^[^:]+:$/) { # volume name
387 $cdir =~ s/[^:]+:$//;
388 }
389 else {
390 return undef;
391 }
392 }
393 $abs_name = $cdir . $2;
394 return $abs_name;
395 }
396
397 }
398 else {
399
400 # $fn may be a valid path to a directory or file or (dangling)
401 # symlink, without a leading ':'
402 if ( (-e $fn) || (-l $fn) ) {
403 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
404 return $fn; # $fn is already an absolute path
405 }
406 else {
407 $abs_name = $cdir . $fn;
408 return $abs_name;
409 }
410 }
411 else { # argh!, $fn is not a valid directory/file
412 return undef;
413 }
414 }
415}
81793b90 416
417sub PathCombine($$) {
418 my ($Base,$Name) = @_;
419 my $AbsName;
420
7e47e6ff 421 if ($Is_MacOS) {
422 # $Name is the resolved symlink (always a full path on MacOS),
423 # i.e. there's no need to call contract_name_Mac()
424 $AbsName = $Name;
425
426 # (simple) check for recursion
427 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
428 return undef;
429 }
81793b90 430 }
431 else {
7e47e6ff 432 if (substr($Name,0,1) eq '/') {
433 $AbsName= $Name;
434 }
435 else {
436 $AbsName= contract_name($Base,$Name);
437 }
81793b90 438
7e47e6ff 439 # (simple) check for recursion
440 my $newlen= length($AbsName);
441 if ($newlen <= length($Base)) {
442 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
443 && $AbsName eq substr($Base,0,$newlen))
444 {
445 return undef;
446 }
81793b90 447 }
448 }
449 return $AbsName;
450}
451
452sub Follow_SymLink($) {
453 my ($AbsName) = @_;
454
455 my ($NewName,$DEV, $INO);
456 ($DEV, $INO)= lstat $AbsName;
457
458 while (-l _) {
459 if ($SLnkSeen{$DEV, $INO}++) {
460 if ($follow_skip < 2) {
461 die "$AbsName is encountered a second time";
a0d0e21e 462 }
463 else {
81793b90 464 return undef;
a0d0e21e 465 }
466 }
81793b90 467 $NewName= PathCombine($AbsName, readlink($AbsName));
468 unless(defined $NewName) {
469 if ($follow_skip < 2) {
470 die "$AbsName is a recursive symbolic link";
471 }
472 else {
473 return undef;
a0d0e21e 474 }
81793b90 475 }
476 else {
477 $AbsName= $NewName;
478 }
479 ($DEV, $INO) = lstat($AbsName);
480 return undef unless defined $DEV; # dangling symbolic link
481 }
482
cd68ec93 483 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 484 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90 485 die "$AbsName encountered a second time";
486 }
487 else {
488 return undef;
489 }
490 }
491
492 return $AbsName;
493}
494
17f410f9 495our($dir, $name, $fullname, $prune);
81793b90 496sub _find_dir_symlnk($$$);
497sub _find_dir($$$);
498
7e47e6ff 499# check whether or not a scalar variable is tainted
500# (code straight from the Camel, 3rd ed., page 561)
501sub is_tainted_pp {
502 my $arg = shift;
503 my $nada = substr($arg, 0, 0); # zero-length
504 local $@;
505 eval { eval "# $nada" };
506 return length($@) != 0;
507}
508
81793b90 509sub _find_opt {
510 my $wanted = shift;
511 die "invalid top directory" unless defined $_[0];
512
9f826d6a 513 # This function must local()ize everything because callbacks may
514 # call find() or finddepth()
515
516 local %SLnkSeen;
517 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
518 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 519 $pre_process, $post_process, $dangling_symlinks);
9f826d6a 520 local($dir, $name, $fullname, $prune);
521
a0c9c202 522 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
80e52b73 523 my $cwd_untainted = $cwd;
524 my $check_t_cwd = 1;
525 $wanted_callback = $wanted->{wanted};
526 $bydepth = $wanted->{bydepth};
527 $pre_process = $wanted->{preprocess};
528 $post_process = $wanted->{postprocess};
529 $no_chdir = $wanted->{no_chdir};
530 $full_check = $wanted->{follow};
531 $follow = $full_check || $wanted->{follow_fast};
532 $follow_skip = $wanted->{follow_skip};
533 $untaint = $wanted->{untaint};
534 $untaint_pat = $wanted->{untaint_pattern};
535 $untaint_skip = $wanted->{untaint_skip};
536 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 537
1cffc1dd 538 # for compatibility reasons (find.pl, find2perl)
9f826d6a 539 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 540
541 # a symbolic link to a directory doesn't increase the link count
542 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
543
e7b91b67 544 my ($abs_dir, $Is_Dir);
81793b90 545
546 Proc_Top_Item:
547 foreach my $TOP (@_) {
7e47e6ff 548 my $top_item = $TOP;
549
550 if ($Is_MacOS) {
551 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
552 $top_item = ":$top_item"
3fa6e24b 553 if ( (-d _) && ( $top_item !~ /:/ ) );
7e47e6ff 554 }
555 else {
556 $top_item =~ s|/\z|| unless $top_item eq '/';
557 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
558 }
559
560 $Is_Dir= 0;
561
562 if ($follow) {
563
564 if ($Is_MacOS) {
565 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
566
567 if ($top_item eq $File::Find::current_dir) {
00f4e5f0 568 # avoid empty name after return to '/'
569 $name = '/' unless length( $name );
7e47e6ff 570 $abs_dir = $cwd;
571 }
572 else {
573 $abs_dir = contract_name_Mac($cwd, $top_item);
574 unless (defined $abs_dir) {
cd68ec93 575 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n";
7e47e6ff 576 next Proc_Top_Item;
577 }
578 }
579
580 }
581 else {
582 if (substr($top_item,0,1) eq '/') {
583 $abs_dir = $top_item;
584 }
585 elsif ($top_item eq $File::Find::current_dir) {
586 $abs_dir = $cwd;
587 }
588 else { # care about any ../
589 $abs_dir = contract_name("$cwd/",$top_item);
590 }
591 }
592 $abs_dir= Follow_SymLink($abs_dir);
593 unless (defined $abs_dir) {
80e52b73 594 if ($dangling_symlinks) {
595 if (ref $dangling_symlinks eq 'CODE') {
596 $dangling_symlinks->($top_item, $cwd);
597 } else {
cd68ec93 598 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73 599 }
600 }
81793b90 601 next Proc_Top_Item;
7e47e6ff 602 }
603
604 if (-d _) {
81793b90 605 _find_dir_symlnk($wanted, $abs_dir, $top_item);
606 $Is_Dir= 1;
7e47e6ff 607 }
608 }
81793b90 609 else { # no follow
7e47e6ff 610 $topdir = $top_item;
611 unless (defined $topnlink) {
cd68ec93 612 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff 613 next Proc_Top_Item;
614 }
615 if (-d _) {
c7b9dd21 616 $top_item =~ s/\.dir\z// if $Is_VMS;
e7b91b67 617 _find_dir($wanted, $top_item, $topnlink);
81793b90 618 $Is_Dir= 1;
7e47e6ff 619 }
237437d0 620 else {
81793b90 621 $abs_dir= $top_item;
7e47e6ff 622 }
623 }
81793b90 624
7e47e6ff 625 unless ($Is_Dir) {
81793b90 626 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
7e47e6ff 627 if ($Is_MacOS) {
628 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
629 }
630 else {
631 ($dir,$_) = ('./', $top_item);
632 }
81793b90 633 }
634
7e47e6ff 635 $abs_dir = $dir;
636 if (( $untaint ) && (is_tainted($dir) )) {
637 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90 638 unless (defined $abs_dir) {
639 if ($untaint_skip == 0) {
7e47e6ff 640 die "directory $dir is still tainted";
81793b90 641 }
642 else {
643 next Proc_Top_Item;
644 }
645 }
7e47e6ff 646 }
81793b90 647
7e47e6ff 648 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 649 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff 650 next Proc_Top_Item;
651 }
719911cc 652
7e47e6ff 653 $name = $abs_dir . $_; # $File::Find::name
719911cc 654
7e47e6ff 655 { &$wanted_callback }; # protect against wild "next"
81793b90 656
7e47e6ff 657 }
81793b90 658
7e47e6ff 659 unless ( $no_chdir ) {
660 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
661 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
662 unless (defined $cwd_untainted) {
663 die "insecure cwd in find(depth)";
664 }
665 $check_t_cwd = 0;
666 }
667 unless (chdir $cwd_untainted) {
668 die "Can't cd to $cwd: $!\n";
669 }
670 }
81793b90 671 }
672}
673
674# API:
675# $wanted
676# $p_dir : "parent directory"
677# $nlink : what came back from the stat
678# preconditions:
679# chdir (if not no_chdir) to dir
680
681sub _find_dir($$$) {
682 my ($wanted, $p_dir, $nlink) = @_;
683 my ($CdLvl,$Level) = (0,0);
684 my @Stack;
685 my @filenames;
686 my ($subcount,$sub_nlink);
687 my $SE= [];
688 my $dir_name= $p_dir;
7e47e6ff 689 my $dir_pref;
39e79f6b 690 my $dir_rel = $File::Find::current_dir;
7e47e6ff 691 my $tainted = 0;
5fa2bf2b 692 my $no_nlink;
7e47e6ff 693
694 if ($Is_MacOS) {
695 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
7e47e6ff 696 }
697 else {
698 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 699 }
81793b90 700
701 local ($dir, $name, $prune, *DIR);
7e47e6ff 702
703 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 704 my $udir = $p_dir;
7e47e6ff 705 if (( $untaint ) && (is_tainted($p_dir) )) {
706 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90 707 unless (defined $udir) {
708 if ($untaint_skip == 0) {
709 die "directory $p_dir is still tainted";
710 }
711 else {
712 return;
713 }
237437d0 714 }
a0d0e21e 715 }
81793b90 716 unless (chdir $udir) {
cd68ec93 717 warnings::warnif "Can't cd to $udir: $!\n";
81793b90 718 return;
719 }
720 }
7e47e6ff 721
722 # push the starting directory
57e73c4b 723 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90 724
7e47e6ff 725 if ($Is_MacOS) {
726 $p_dir = $dir_pref; # ensure trailing ':'
727 }
728
81793b90 729 while (defined $SE) {
730 unless ($bydepth) {
7e47e6ff 731 $dir= $p_dir; # $File::Find::dir
732 $name= $dir_name; # $File::Find::name
733 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 734 # prune may happen here
7e47e6ff 735 $prune= 0;
9111eb4b 736 # guarantee lstat for directory
737 lstat( $dir_name );
7e47e6ff 738 { &$wanted_callback }; # protect against wild "next"
739 next if $prune;
81793b90 740 }
7e47e6ff 741
81793b90 742 # change to that directory
7e47e6ff 743 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 744 my $udir= $dir_rel;
7e47e6ff 745 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
746 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90 747 unless (defined $udir) {
748 if ($untaint_skip == 0) {
7e47e6ff 749 if ($Is_MacOS) {
750 die "directory ($p_dir) $dir_rel is still tainted";
751 }
752 else {
753 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
754 }
755 } else { # $untaint_skip == 1
756 next;
81793b90 757 }
758 }
759 }
760 unless (chdir $udir) {
7e47e6ff 761 if ($Is_MacOS) {
cd68ec93 762 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n";
7e47e6ff 763 }
764 else {
cd68ec93 765 warnings::warnif "Can't cd to (" .
766 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
7e47e6ff 767 }
81793b90 768 next;
769 }
770 $CdLvl++;
771 }
772
7e47e6ff 773 if ($Is_MacOS) {
774 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
775 }
776
777 $dir= $dir_name; # $File::Find::dir
81793b90 778
779 # Get the list of files in the current directory.
7e47e6ff 780 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 781 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90 782 next;
783 }
784 @filenames = readdir DIR;
785 closedir(DIR);
719c805e 786 @filenames = &$pre_process(@filenames) if $pre_process;
787 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 788
5fa2bf2b 789 # default: use whatever was specifid
790 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
791 $no_nlink = $avoid_nlink;
792 # if dir has wrong nlink count, force switch to slower stat method
793 $no_nlink = 1 if ($nlink < 2);
794
795 if ($nlink == 2 && !$no_nlink) {
81793b90 796 # This dir has no subdirectories.
797 for my $FN (@filenames) {
7e47e6ff 798 next if $FN =~ $File::Find::skip_pattern;
81793b90 799
7e47e6ff 800 $name = $dir_pref . $FN; # $File::Find::name
801 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 802 { &$wanted_callback }; # protect against wild "next"
81793b90 803 }
804
805 }
806 else {
807 # This dir has subdirectories.
808 $subcount = $nlink - 2;
809
810 for my $FN (@filenames) {
7e47e6ff 811 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 812 if ($subcount > 0 || $no_nlink) {
81793b90 813 # Seen all the subdirs?
814 # check for directoriness.
815 # stat is faster for a file in the current directory
07867069 816 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 817
818 if (-d _) {
819 --$subcount;
c7b9dd21 820 $FN =~ s/\.dir\z// if $Is_VMS;
81793b90 821 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
822 }
823 else {
7e47e6ff 824 $name = $dir_pref . $FN; # $File::Find::name
825 $_= ($no_chdir ? $name : $FN); # $_
73396e07 826 { &$wanted_callback }; # protect against wild "next"
81793b90 827 }
828 }
07867069 829 else {
7e47e6ff 830 $name = $dir_pref . $FN; # $File::Find::name
831 $_= ($no_chdir ? $name : $FN); # $_
73396e07 832 { &$wanted_callback }; # protect against wild "next"
81793b90 833 }
834 }
835 }
17b275ff 836 }
837 continue {
57e73c4b 838 while ( defined ($SE = pop @Stack) ) {
81793b90 839 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
840 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 841 my $tmp;
842 if ($Is_MacOS) {
843 $tmp = (':' x ($CdLvl-$Level)) . ':';
844 }
845 else {
846 $tmp = join('/',('..') x ($CdLvl-$Level));
847 }
848 die "Can't cd to $dir_name" . $tmp
849 unless chdir ($tmp);
81793b90 850 $CdLvl = $Level;
851 }
7e47e6ff 852
853 if ($Is_MacOS) {
854 # $pdir always has a trailing ':', except for the starting dir,
855 # where $dir_rel eq ':'
856 $dir_name = "$p_dir$dir_rel";
857 $dir_pref = "$dir_name:";
858 }
859 else {
860 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
861 $dir_pref = "$dir_name/";
862 }
863
719c805e 864 if ( $nlink == -2 ) {
7e47e6ff 865 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 866 $_ = $File::Find::current_dir;
719c805e 867 &$post_process; # End-of-directory processing
7e47e6ff 868 }
869 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
870 $name = $dir_name;
871 if ($Is_MacOS) {
872 if ($dir_rel eq ':') { # must be the top dir, where we started
873 $name =~ s|:$||; # $File::Find::name
874 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
875 }
876 $dir = $p_dir; # $File::Find::dir
877 $_ = ($no_chdir ? $name : $dir_rel); # $_
878 }
879 else {
880 if ( substr($name,-2) eq '/.' ) {
5cf0a2f2 881 substr($name, length($name) == 2 ? -1 : -2) = '';
7e47e6ff 882 }
883 $dir = $p_dir;
884 $_ = ($no_chdir ? $dir_name : $dir_rel );
885 if ( substr($_,-2) eq '/.' ) {
5cf0a2f2 886 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 887 }
888 }
9111eb4b 889 # guarantee lstat at return to directory
890 lstat( $dir_name );
7e47e6ff 891 { &$wanted_callback }; # protect against wild "next"
892 }
893 else {
894 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
895 last;
896 }
81793b90 897 }
a0d0e21e 898 }
899}
900
81793b90 901
902# API:
903# $wanted
904# $dir_loc : absolute location of a dir
905# $p_dir : "parent directory"
906# preconditions:
907# chdir (if not no_chdir) to dir
908
909sub _find_dir_symlnk($$$) {
7e47e6ff 910 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90 911 my @Stack;
912 my @filenames;
913 my $new_loc;
7e47e6ff 914 my $updir_loc = $dir_loc; # untainted parent directory
81793b90 915 my $SE = [];
916 my $dir_name = $p_dir;
7e47e6ff 917 my $dir_pref;
918 my $loc_pref;
39e79f6b 919 my $dir_rel = $File::Find::current_dir;
7e47e6ff 920 my $byd_flag; # flag for pending stack entry if $bydepth
921 my $tainted = 0;
922 my $ok = 1;
923
924 if ($Is_MacOS) {
925 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
926 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
7e47e6ff 927 } else {
928 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
929 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
7e47e6ff 930 }
81793b90 931
932 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff 933
934 unless ($no_chdir) {
935 # untaint the topdir
936 if (( $untaint ) && (is_tainted($dir_loc) )) {
937 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
938 # once untainted, $updir_loc is pushed on the stack (as parent directory);
939 # hence, we don't need to untaint the parent directory every time we chdir
940 # to it later
941 unless (defined $updir_loc) {
81793b90 942 if ($untaint_skip == 0) {
943 die "directory $dir_loc is still tainted";
944 }
945 else {
946 return;
947 }
948 }
949 }
7e47e6ff 950 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
951 unless ($ok) {
cd68ec93 952 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 953 return;
954 }
955 }
956
7e47e6ff 957 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
958
959 if ($Is_MacOS) {
960 $p_dir = $dir_pref; # ensure trailing ':'
961 }
57e73c4b 962
81793b90 963 while (defined $SE) {
964
965 unless ($bydepth) {
7e47e6ff 966 # change (back) to parent directory (always untainted)
704ea872 967 unless ($no_chdir) {
7e47e6ff 968 unless (chdir $updir_loc) {
cd68ec93 969 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872 970 next;
971 }
972 }
7e47e6ff 973 $dir= $p_dir; # $File::Find::dir
974 $name= $dir_name; # $File::Find::name
975 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
976 $fullname= $dir_loc; # $File::Find::fullname
81793b90 977 # prune may happen here
7e47e6ff 978 $prune= 0;
704ea872 979 lstat($_); # make sure file tests with '_' work
7e47e6ff 980 { &$wanted_callback }; # protect against wild "next"
981 next if $prune;
81793b90 982 }
983
984 # change to that directory
7e47e6ff 985 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
986 $updir_loc = $dir_loc;
987 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
988 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
989 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
990 unless (defined $updir_loc) {
81793b90 991 if ($untaint_skip == 0) {
992 die "directory $dir_loc is still tainted";
a0d0e21e 993 }
237437d0 994 else {
81793b90 995 next;
237437d0 996 }
a0d0e21e 997 }
998 }
7e47e6ff 999 unless (chdir $updir_loc) {
cd68ec93 1000 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 1001 next;
1002 }
1003 }
1004
7e47e6ff 1005 if ($Is_MacOS) {
1006 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
1007 }
1008
1009 $dir = $dir_name; # $File::Find::dir
81793b90 1010
1011 # Get the list of files in the current directory.
7e47e6ff 1012 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 1013 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90 1014 next;
1015 }
1016 @filenames = readdir DIR;
1017 closedir(DIR);
1018
1019 for my $FN (@filenames) {
7e47e6ff 1020 next if $FN =~ $File::Find::skip_pattern;
81793b90 1021
1022 # follow symbolic links / do an lstat
07867069 1023 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 1024
1025 # ignore if invalid symlink
1026 next unless defined $new_loc;
7e47e6ff 1027
81793b90 1028 if (-d _) {
7e47e6ff 1029 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90 1030 }
1031 else {
7e47e6ff 1032 $fullname = $new_loc; # $File::Find::fullname
1033 $name = $dir_pref . $FN; # $File::Find::name
1034 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 1035 { &$wanted_callback }; # protect against wild "next"
81793b90 1036 }
1037 }
1038
81793b90 1039 }
1040 continue {
57e73c4b 1041 while (defined($SE = pop @Stack)) {
7e47e6ff 1042 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
1043 if ($Is_MacOS) {
1044 # $p_dir always has a trailing ':', except for the starting dir,
1045 # where $dir_rel eq ':'
1046 $dir_name = "$p_dir$dir_rel";
1047 $dir_pref = "$dir_name:";
1048 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
1049 }
1050 else {
1051 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1052 $dir_pref = "$dir_name/";
1053 $loc_pref = "$dir_loc/";
1054 }
1055 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1056 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1057 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1058 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff 1059 next;
1060 }
1061 }
1062 $fullname = $dir_loc; # $File::Find::fullname
1063 $name = $dir_name; # $File::Find::name
1064 if ($Is_MacOS) {
1065 if ($dir_rel eq ':') { # must be the top dir, where we started
1066 $name =~ s|:$||; # $File::Find::name
1067 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
1068 }
1069 $dir = $p_dir; # $File::Find::dir
1070 $_ = ($no_chdir ? $name : $dir_rel); # $_
1071 }
1072 else {
1073 if ( substr($name,-2) eq '/.' ) {
1074 $name =~ s|/\.$||; # $File::Find::name
1075 }
1076 $dir = $p_dir; # $File::Find::dir
1077 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1078 if ( substr($_,-2) eq '/.' ) {
1079 s|/\.$||;
1080 }
1081 }
1082
1083 lstat($_); # make sure file tests with '_' work
1084 { &$wanted_callback }; # protect against wild "next"
1085 }
1086 else {
1087 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1088 last;
1089 }
a0d0e21e 1090 }
1091 }
1092}
1093
81793b90 1094
20408e3c 1095sub wrap_wanted {
81793b90 1096 my $wanted = shift;
1097 if ( ref($wanted) eq 'HASH' ) {
1098 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1099 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1100 }
1101 if ( $wanted->{untaint} ) {
7e47e6ff 1102 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90 1103 unless defined $wanted->{untaint_pattern};
1104 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1105 }
1106 return $wanted;
1107 }
1108 else {
1109 return { wanted => $wanted };
1110 }
a0d0e21e 1111}
1112
20408e3c 1113sub find {
81793b90 1114 my $wanted = shift;
1115 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e 1116}
1117
55d729e4 1118sub finddepth {
81793b90 1119 my $wanted = wrap_wanted(shift);
1120 $wanted->{bydepth} = 1;
1121 _find_opt($wanted, @_);
20408e3c 1122}
6280b799 1123
7e47e6ff 1124# default
1125$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1126$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1127
6280b799 1128# These are hard-coded for now, but may move to hint files.
10eba763 1129if ($^O eq 'VMS') {
81793b90 1130 $Is_VMS = 1;
7e47e6ff 1131 $File::Find::dont_use_nlink = 1;
1132}
1133elsif ($^O eq 'MacOS') {
1134 $Is_MacOS = 1;
1135 $File::Find::dont_use_nlink = 1;
1136 $File::Find::skip_pattern = qr/^Icon\015\z/;
1137 $File::Find::untaint_pattern = qr|^(.+)$|;
748a9306 1138}
1139
7e47e6ff 1140# this _should_ work properly on all platforms
1141# where File::Find can be expected to work
1142$File::Find::current_dir = File::Spec->curdir || '.';
1143
81793b90 1144$File::Find::dont_use_nlink = 1
497711e7 1145 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
2986a63f 1146 $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'NetWare';
6280b799 1147
20408e3c 1148# Set dont_use_nlink in your hint file if your system's stat doesn't
1149# report the number of links in a directory as an indication
1150# of the number of files.
1151# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 1152unless ($File::Find::dont_use_nlink) {
1153 require Config;
1154 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 1155}
1156
7e47e6ff 1157# We need a function that checks if a scalar is tainted. Either use the
1158# Scalar::Util module's tainted() function or our (slower) pure Perl
1159# fallback is_tainted_pp()
1160{
1161 local $@;
1162 eval { require Scalar::Util };
1163 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1164}
1165
a0d0e21e 11661;