ID 20011206.007 File::Find:name set to empty string for "/"
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
6our $VERSION = '1.04';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
f06db76b 10=head1 NAME
11
12find - traverse a file tree
13
14finddepth - traverse a directory structure depth-first
15
16=head1 SYNOPSIS
17
18 use File::Find;
81793b90 19 find(\&wanted, '/foo', '/bar');
f06db76b 20 sub wanted { ... }
237437d0 21
f06db76b 22 use File::Find;
81793b90 23 finddepth(\&wanted, '/foo', '/bar');
f06db76b 24 sub wanted { ... }
3cb6de81 25
81793b90 26 use File::Find;
27 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 28
29=head1 DESCRIPTION
30
20408e3c 31The first argument to find() is either a hash reference describing the
81793b90 32operations to be performed for each file, or a code reference.
20408e3c 33
81793b90 34Here are the possible keys for the hash:
35
36=over 3
37
38=item C<wanted>
39
40The value should be a code reference. This code reference is called
41I<the wanted() function> below.
42
43=item C<bydepth>
44
45Reports the name of a directory only AFTER all its entries
46have been reported. Entry point finddepth() is a shortcut for
47specifying C<{ bydepth => 1 }> in the first argument of find().
48
719c805e 49=item C<preprocess>
50
7e47e6ff 51The value should be a code reference. This code reference is used to
52preprocess the current directory. The name of currently processed
53directory is in $File::Find::dir. Your preprocessing function is
54called after readdir() but before the loop that calls the wanted()
55function. It is called with a list of strings (actually file/directory
56names) and is expected to return a list of strings. The code can be
57used to sort the file/directory names alphabetically, numerically,
58or to filter out directory entries based on their name alone. When
59I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e 60
61=item C<postprocess>
62
7e47e6ff 63The value should be a code reference. It is invoked just before leaving
64the currently processed directory. It is called in void context with no
65arguments. The name of the current directory is in $File::Find::dir. This
66hook is handy for summarizing a directory, such as calculating its disk
3fa6e24b 67usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 68no-op.
719c805e 69
81793b90 70=item C<follow>
71
72Causes symbolic links to be followed. Since directory trees with symbolic
73links (followed) may contain files more than once and may even have
74cycles, a hash has to be built up with an entry for each file.
75This might be expensive both in space and time for a large
76directory tree. See I<follow_fast> and I<follow_skip> below.
77If either I<follow> or I<follow_fast> is in effect:
78
79=over 6
80
a45bd81d 81=item *
81793b90 82
f10e1564 83It is guaranteed that an I<lstat> has been called before the user's
81793b90 84I<wanted()> function is called. This enables fast file checks involving S< _>.
85
a45bd81d 86=item *
81793b90 87
88There is a variable C<$File::Find::fullname> which holds the absolute
89pathname of the file with all symbolic links resolved
90
91=back
92
93=item C<follow_fast>
94
f10e1564 95This is similar to I<follow> except that it may report some files more
96than once. It does detect cycles, however. Since only symbolic links
97have to be hashed, this is much cheaper both in space and time. If
98processing a file more than once (by the user's I<wanted()> function)
81793b90 99is worse than just taking time, the option I<follow> should be used.
100
101=item C<follow_skip>
102
103C<follow_skip==1>, which is the default, causes all files which are
104neither directories nor symbolic links to be ignored if they are about
105to be processed a second time. If a directory or a symbolic link
106are about to be processed a second time, File::Find dies.
107C<follow_skip==0> causes File::Find to die if any file is about to be
108processed a second time.
109C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 110directories but to proceed normally otherwise.
20408e3c 111
80e52b73 112=item C<dangling_symlinks>
113
114If true and a code reference, will be called with the symbolic link
115name and the directory it lives in as arguments. Otherwise, if true
116and warnings are on, warning "symbolic_link_name is a dangling
117symbolic link\n" will be issued. If false, the dangling symbolic link
118will be silently ignored.
f06db76b 119
81793b90 120=item C<no_chdir>
121
122Does not C<chdir()> to each directory as it recurses. The wanted()
123function will need to be aware of this, of course. In this case,
124C<$_> will be the same as C<$File::Find::name>.
125
126=item C<untaint>
127
128If find is used in taint-mode (-T command line switch or if EUID != UID
129or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 130before they can be chdir'ed to. Therefore they are checked against a regular
131expression I<untaint_pattern>. Note that all names passed to the user's
132I<wanted()> function are still tainted. If this option is used while
133not in taint-mode, C<untaint> is a no-op.
81793b90 134
135=item C<untaint_pattern>
136
137See above. This should be set using the C<qr> quoting operator.
138The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 139Note that the parentheses are vital.
81793b90 140
141=item C<untaint_skip>
142
7e47e6ff 143If set, a directory which fails the I<untaint_pattern> is skipped,
144including all its sub-directories. The default is to 'die' in such a case.
81793b90 145
146=back
147
148The wanted() function does whatever verifications you want.
149C<$File::Find::dir> contains the current directory name, and C<$_> the
150current filename within that directory. C<$File::Find::name> contains
f10e1564 151the complete pathname to the file. You are chdir()'d to
152C<$File::Find::dir> when the function is called, unless C<no_chdir>
7e47e6ff 153was specified. When C<follow> or C<follow_fast> are in effect, there is
f10e1564 154also a C<$File::Find::fullname>. The function may set
155C<$File::Find::prune> to prune the tree unless C<bydepth> was
156specified. Unless C<follow> or C<follow_fast> is specified, for
157compatibility reasons (find.pl, find2perl) there are in addition the
158following globals available: C<$File::Find::topdir>,
159C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 160C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 161
20408e3c 162This library is useful for the C<find2perl> tool, which when fed,
f06db76b 163
164 find2perl / -name .nfs\* -mtime +7 \
81793b90 165 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 166
167produces something like:
168
169 sub wanted {
c7b9dd21 170 /^\.nfs.*\z/s &&
81793b90 171 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 172 int(-M _) > 7 &&
173 unlink($_)
174 ||
81793b90 175 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 176 $dev < 0 &&
6280b799 177 ($File::Find::prune = 1);
f06db76b 178 }
179
43dece2a 180Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
181filehandle that caches the information from the preceding
182stat(), lstat(), or filetest.
183
1cffc1dd 184Here's another interesting wanted function. It will find all symbolic
185links that don't resolve:
f06db76b 186
187 sub wanted {
81793b90 188 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 189 }
f06db76b 190
81793b90 191See also the script C<pfind> on CPAN for a nice application of this
192module.
193
cd68ec93 194=head1 WARNINGS
195
196If you run your program with the C<-w> switch, or if you use the
197C<warnings> pragma, File::Find will report warnings for several weird
198situations. You can disable these warnings by putting the statement
199
200 no warnings 'File::Find';
201
202in the appropriate scope. See L<perllexwarn> for more info about lexical
203warnings.
204
81793b90 205=head1 CAVEAT
206
5fa2bf2b 207=over 2
208
209=item $dont_use_nlink
210
211You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067 212force File::Find to always stat directories. This was used for file systems
213that do not have an C<nlink> count matching the number of sub-directories.
214Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
215system) and a couple of others.
5fa2bf2b 216
6cf3b067 217You shouldn't need to set this variable, since File::Find should now detect
218such file systems on-the-fly and switch itself to using stat. This works even
219for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 220
6cf3b067 221If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b 222
223=item symlinks
224
f10e1564 225Be aware that the option to follow symbolic links can be dangerous.
81793b90 226Depending on the structure of the directory tree (including symbolic
227links to directories) you might traverse a given (physical) directory
228more than once (only if C<follow_fast> is in effect).
229Furthermore, deleting or changing files in a symbolically linked directory
230might cause very unpleasant surprises, since you delete or change files
231in an unknown directory.
0530a6c4 232
5fa2bf2b 233=back
234
7e47e6ff 235=head1 NOTES
236
237=over 4
238
239=item *
240
241Mac OS (Classic) users should note a few differences:
242
243=over 4
244
245=item *
246
247The path separator is ':', not '/', and the current directory is denoted
248as ':', not '.'. You should be careful about specifying relative pathnames.
249While a full path always begins with a volume name, a relative pathname
250should always begin with a ':'. If specifying a volume name only, a
251trailing ':' is required.
252
253=item *
254
255C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
256contains the name of a directory, that name may or may not end with a
257':'. Likewise, C<$File::Find::name>, which contains the complete
258pathname to that directory, and C<$File::Find::fullname>, which holds
259the absolute pathname of that directory with all symbolic links resolved,
260may or may not end with a ':'.
261
262=item *
263
264The default C<untaint_pattern> (see above) on Mac OS is set to
265C<qr|^(.+)$|>. Note that the parentheses are vital.
266
267=item *
268
269The invisible system file "Icon\015" is ignored. While this file may
270appear in every directory, there are some more invisible system files
271on every volume, which are all located at the volume root level (i.e.
272"MacintoshHD:"). These system files are B<not> excluded automatically.
273Your filter may use the following code to recognize invisible files or
274directories (requires Mac::Files):
275
276 use Mac::Files;
277
278 # invisible() -- returns 1 if file/directory is invisible,
1cffc1dd 279 # 0 if it's visible or undef if an error occurred
7e47e6ff 280
281 sub invisible($) {
282 my $file = shift;
283 my ($fileCat, $fileInfo);
284 my $invisible_flag = 1 << 14;
285
286 if ( $fileCat = FSpGetCatInfo($file) ) {
287 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
288 return (($fileInfo->fdFlags & $invisible_flag) && 1);
289 }
290 }
291 return undef;
292 }
293
294Generally, invisible files are system files, unless an odd application
295decides to use invisible files for its own purposes. To distinguish
296such files from system files, you have to look at the B<type> and B<creator>
297file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
298C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
299(see MacPerl.pm for details).
300
301Files that appear on the desktop actually reside in an (hidden) directory
302named "Desktop Folder" on the particular disk volume. Note that, although
303all desktop files appear to be on the same "virtual" desktop, each disk
304volume actually maintains its own "Desktop Folder" directory.
305
306=back
307
308=back
0530a6c4 309
a85af077 310=head1 HISTORY
311
312File::Find used to produce incorrect results if called recursively.
313During the development of perl 5.8 this bug was fixed.
314The first fixed version of File::Find was 1.01.
315
f06db76b 316=cut
317
b75c8c73 318our @ISA = qw(Exporter);
319our @EXPORT = qw(find finddepth);
6280b799 320
a0d0e21e 321
81793b90 322use strict;
323my $Is_VMS;
7e47e6ff 324my $Is_MacOS;
81793b90 325
326require File::Basename;
7e47e6ff 327require File::Spec;
81793b90 328
9f826d6a 329# Should ideally be my() not our() but local() currently
330# refuses to operate on lexicals
331
332our %SLnkSeen;
333our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 334 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 335 $pre_process, $post_process, $dangling_symlinks);
81793b90 336
337sub contract_name {
338 my ($cdir,$fn) = @_;
339
7e47e6ff 340 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90 341
342 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
343
344 $fn =~ s|^\./||;
345
346 my $abs_name= $cdir . $fn;
347
348 if (substr($fn,0,3) eq '../') {
fecbda2b 349 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
81793b90 350 }
351
352 return $abs_name;
353}
354
7e47e6ff 355# return the absolute name of a directory or file
356sub contract_name_Mac {
357 my ($cdir,$fn) = @_;
358 my $abs_name;
359
360 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
361
362 my $colon_count = length ($1);
363 if ($colon_count == 1) {
364 $abs_name = $cdir . $2;
365 return $abs_name;
366 }
367 else {
368 # need to move up the tree, but
369 # only if it's not a volume name
370 for (my $i=1; $i<$colon_count; $i++) {
371 unless ($cdir =~ /^[^:]+:$/) { # volume name
372 $cdir =~ s/[^:]+:$//;
373 }
374 else {
375 return undef;
376 }
377 }
378 $abs_name = $cdir . $2;
379 return $abs_name;
380 }
381
382 }
383 else {
384
385 # $fn may be a valid path to a directory or file or (dangling)
386 # symlink, without a leading ':'
387 if ( (-e $fn) || (-l $fn) ) {
388 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
389 return $fn; # $fn is already an absolute path
390 }
391 else {
392 $abs_name = $cdir . $fn;
393 return $abs_name;
394 }
395 }
396 else { # argh!, $fn is not a valid directory/file
397 return undef;
398 }
399 }
400}
81793b90 401
402sub PathCombine($$) {
403 my ($Base,$Name) = @_;
404 my $AbsName;
405
7e47e6ff 406 if ($Is_MacOS) {
407 # $Name is the resolved symlink (always a full path on MacOS),
408 # i.e. there's no need to call contract_name_Mac()
409 $AbsName = $Name;
410
411 # (simple) check for recursion
412 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
413 return undef;
414 }
81793b90 415 }
416 else {
7e47e6ff 417 if (substr($Name,0,1) eq '/') {
418 $AbsName= $Name;
419 }
420 else {
421 $AbsName= contract_name($Base,$Name);
422 }
81793b90 423
7e47e6ff 424 # (simple) check for recursion
425 my $newlen= length($AbsName);
426 if ($newlen <= length($Base)) {
427 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
428 && $AbsName eq substr($Base,0,$newlen))
429 {
430 return undef;
431 }
81793b90 432 }
433 }
434 return $AbsName;
435}
436
437sub Follow_SymLink($) {
438 my ($AbsName) = @_;
439
440 my ($NewName,$DEV, $INO);
441 ($DEV, $INO)= lstat $AbsName;
442
443 while (-l _) {
444 if ($SLnkSeen{$DEV, $INO}++) {
445 if ($follow_skip < 2) {
446 die "$AbsName is encountered a second time";
a0d0e21e 447 }
448 else {
81793b90 449 return undef;
a0d0e21e 450 }
451 }
81793b90 452 $NewName= PathCombine($AbsName, readlink($AbsName));
453 unless(defined $NewName) {
454 if ($follow_skip < 2) {
455 die "$AbsName is a recursive symbolic link";
456 }
457 else {
458 return undef;
a0d0e21e 459 }
81793b90 460 }
461 else {
462 $AbsName= $NewName;
463 }
464 ($DEV, $INO) = lstat($AbsName);
465 return undef unless defined $DEV; # dangling symbolic link
466 }
467
cd68ec93 468 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 469 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90 470 die "$AbsName encountered a second time";
471 }
472 else {
473 return undef;
474 }
475 }
476
477 return $AbsName;
478}
479
17f410f9 480our($dir, $name, $fullname, $prune);
81793b90 481sub _find_dir_symlnk($$$);
482sub _find_dir($$$);
483
7e47e6ff 484# check whether or not a scalar variable is tainted
485# (code straight from the Camel, 3rd ed., page 561)
486sub is_tainted_pp {
487 my $arg = shift;
488 my $nada = substr($arg, 0, 0); # zero-length
489 local $@;
490 eval { eval "# $nada" };
491 return length($@) != 0;
492}
493
81793b90 494sub _find_opt {
495 my $wanted = shift;
496 die "invalid top directory" unless defined $_[0];
497
9f826d6a 498 # This function must local()ize everything because callbacks may
499 # call find() or finddepth()
500
501 local %SLnkSeen;
502 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
503 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 504 $pre_process, $post_process, $dangling_symlinks);
9f826d6a 505 local($dir, $name, $fullname, $prune);
506
a0c9c202 507 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
80e52b73 508 my $cwd_untainted = $cwd;
509 my $check_t_cwd = 1;
510 $wanted_callback = $wanted->{wanted};
511 $bydepth = $wanted->{bydepth};
512 $pre_process = $wanted->{preprocess};
513 $post_process = $wanted->{postprocess};
514 $no_chdir = $wanted->{no_chdir};
515 $full_check = $wanted->{follow};
516 $follow = $full_check || $wanted->{follow_fast};
517 $follow_skip = $wanted->{follow_skip};
518 $untaint = $wanted->{untaint};
519 $untaint_pat = $wanted->{untaint_pattern};
520 $untaint_skip = $wanted->{untaint_skip};
521 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 522
1cffc1dd 523 # for compatibility reasons (find.pl, find2perl)
9f826d6a 524 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 525
526 # a symbolic link to a directory doesn't increase the link count
527 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
528
e7b91b67 529 my ($abs_dir, $Is_Dir);
81793b90 530
531 Proc_Top_Item:
532 foreach my $TOP (@_) {
7e47e6ff 533 my $top_item = $TOP;
534
535 if ($Is_MacOS) {
536 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
537 $top_item = ":$top_item"
3fa6e24b 538 if ( (-d _) && ( $top_item !~ /:/ ) );
7e47e6ff 539 }
540 else {
541 $top_item =~ s|/\z|| unless $top_item eq '/';
542 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
543 }
544
545 $Is_Dir= 0;
546
547 if ($follow) {
548
549 if ($Is_MacOS) {
550 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
551
552 if ($top_item eq $File::Find::current_dir) {
00f4e5f0 553 # avoid empty name after return to '/'
554 $name = '/' unless length( $name );
7e47e6ff 555 $abs_dir = $cwd;
556 }
557 else {
558 $abs_dir = contract_name_Mac($cwd, $top_item);
559 unless (defined $abs_dir) {
cd68ec93 560 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n";
7e47e6ff 561 next Proc_Top_Item;
562 }
563 }
564
565 }
566 else {
567 if (substr($top_item,0,1) eq '/') {
568 $abs_dir = $top_item;
569 }
570 elsif ($top_item eq $File::Find::current_dir) {
571 $abs_dir = $cwd;
572 }
573 else { # care about any ../
574 $abs_dir = contract_name("$cwd/",$top_item);
575 }
576 }
577 $abs_dir= Follow_SymLink($abs_dir);
578 unless (defined $abs_dir) {
80e52b73 579 if ($dangling_symlinks) {
580 if (ref $dangling_symlinks eq 'CODE') {
581 $dangling_symlinks->($top_item, $cwd);
582 } else {
cd68ec93 583 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73 584 }
585 }
81793b90 586 next Proc_Top_Item;
7e47e6ff 587 }
588
589 if (-d _) {
81793b90 590 _find_dir_symlnk($wanted, $abs_dir, $top_item);
591 $Is_Dir= 1;
7e47e6ff 592 }
593 }
81793b90 594 else { # no follow
7e47e6ff 595 $topdir = $top_item;
596 unless (defined $topnlink) {
cd68ec93 597 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff 598 next Proc_Top_Item;
599 }
600 if (-d _) {
c7b9dd21 601 $top_item =~ s/\.dir\z// if $Is_VMS;
e7b91b67 602 _find_dir($wanted, $top_item, $topnlink);
81793b90 603 $Is_Dir= 1;
7e47e6ff 604 }
237437d0 605 else {
81793b90 606 $abs_dir= $top_item;
7e47e6ff 607 }
608 }
81793b90 609
7e47e6ff 610 unless ($Is_Dir) {
81793b90 611 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
7e47e6ff 612 if ($Is_MacOS) {
613 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
614 }
615 else {
616 ($dir,$_) = ('./', $top_item);
617 }
81793b90 618 }
619
7e47e6ff 620 $abs_dir = $dir;
621 if (( $untaint ) && (is_tainted($dir) )) {
622 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90 623 unless (defined $abs_dir) {
624 if ($untaint_skip == 0) {
7e47e6ff 625 die "directory $dir is still tainted";
81793b90 626 }
627 else {
628 next Proc_Top_Item;
629 }
630 }
7e47e6ff 631 }
81793b90 632
7e47e6ff 633 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 634 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff 635 next Proc_Top_Item;
636 }
719911cc 637
7e47e6ff 638 $name = $abs_dir . $_; # $File::Find::name
719911cc 639
7e47e6ff 640 { &$wanted_callback }; # protect against wild "next"
81793b90 641
7e47e6ff 642 }
81793b90 643
7e47e6ff 644 unless ( $no_chdir ) {
645 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
646 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
647 unless (defined $cwd_untainted) {
648 die "insecure cwd in find(depth)";
649 }
650 $check_t_cwd = 0;
651 }
652 unless (chdir $cwd_untainted) {
653 die "Can't cd to $cwd: $!\n";
654 }
655 }
81793b90 656 }
657}
658
659# API:
660# $wanted
661# $p_dir : "parent directory"
662# $nlink : what came back from the stat
663# preconditions:
664# chdir (if not no_chdir) to dir
665
666sub _find_dir($$$) {
667 my ($wanted, $p_dir, $nlink) = @_;
668 my ($CdLvl,$Level) = (0,0);
669 my @Stack;
670 my @filenames;
671 my ($subcount,$sub_nlink);
672 my $SE= [];
673 my $dir_name= $p_dir;
7e47e6ff 674 my $dir_pref;
39e79f6b 675 my $dir_rel = $File::Find::current_dir;
7e47e6ff 676 my $tainted = 0;
5fa2bf2b 677 my $no_nlink;
7e47e6ff 678
679 if ($Is_MacOS) {
680 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
7e47e6ff 681 }
682 else {
683 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 684 }
81793b90 685
686 local ($dir, $name, $prune, *DIR);
7e47e6ff 687
688 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 689 my $udir = $p_dir;
7e47e6ff 690 if (( $untaint ) && (is_tainted($p_dir) )) {
691 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90 692 unless (defined $udir) {
693 if ($untaint_skip == 0) {
694 die "directory $p_dir is still tainted";
695 }
696 else {
697 return;
698 }
237437d0 699 }
a0d0e21e 700 }
81793b90 701 unless (chdir $udir) {
cd68ec93 702 warnings::warnif "Can't cd to $udir: $!\n";
81793b90 703 return;
704 }
705 }
7e47e6ff 706
707 # push the starting directory
57e73c4b 708 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90 709
7e47e6ff 710 if ($Is_MacOS) {
711 $p_dir = $dir_pref; # ensure trailing ':'
712 }
713
81793b90 714 while (defined $SE) {
715 unless ($bydepth) {
7e47e6ff 716 $dir= $p_dir; # $File::Find::dir
717 $name= $dir_name; # $File::Find::name
718 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 719 # prune may happen here
7e47e6ff 720 $prune= 0;
721 { &$wanted_callback }; # protect against wild "next"
722 next if $prune;
81793b90 723 }
7e47e6ff 724
81793b90 725 # change to that directory
7e47e6ff 726 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 727 my $udir= $dir_rel;
7e47e6ff 728 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
729 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90 730 unless (defined $udir) {
731 if ($untaint_skip == 0) {
7e47e6ff 732 if ($Is_MacOS) {
733 die "directory ($p_dir) $dir_rel is still tainted";
734 }
735 else {
736 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
737 }
738 } else { # $untaint_skip == 1
739 next;
81793b90 740 }
741 }
742 }
743 unless (chdir $udir) {
7e47e6ff 744 if ($Is_MacOS) {
cd68ec93 745 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n";
7e47e6ff 746 }
747 else {
cd68ec93 748 warnings::warnif "Can't cd to (" .
749 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
7e47e6ff 750 }
81793b90 751 next;
752 }
753 $CdLvl++;
754 }
755
7e47e6ff 756 if ($Is_MacOS) {
757 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
758 }
759
760 $dir= $dir_name; # $File::Find::dir
81793b90 761
762 # Get the list of files in the current directory.
7e47e6ff 763 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 764 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90 765 next;
766 }
767 @filenames = readdir DIR;
768 closedir(DIR);
719c805e 769 @filenames = &$pre_process(@filenames) if $pre_process;
770 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 771
5fa2bf2b 772 # default: use whatever was specifid
773 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
774 $no_nlink = $avoid_nlink;
775 # if dir has wrong nlink count, force switch to slower stat method
776 $no_nlink = 1 if ($nlink < 2);
777
778 if ($nlink == 2 && !$no_nlink) {
81793b90 779 # This dir has no subdirectories.
780 for my $FN (@filenames) {
7e47e6ff 781 next if $FN =~ $File::Find::skip_pattern;
81793b90 782
7e47e6ff 783 $name = $dir_pref . $FN; # $File::Find::name
784 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 785 { &$wanted_callback }; # protect against wild "next"
81793b90 786 }
787
788 }
789 else {
790 # This dir has subdirectories.
791 $subcount = $nlink - 2;
792
793 for my $FN (@filenames) {
7e47e6ff 794 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 795 if ($subcount > 0 || $no_nlink) {
81793b90 796 # Seen all the subdirs?
797 # check for directoriness.
798 # stat is faster for a file in the current directory
07867069 799 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 800
801 if (-d _) {
802 --$subcount;
c7b9dd21 803 $FN =~ s/\.dir\z// if $Is_VMS;
81793b90 804 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
805 }
806 else {
7e47e6ff 807 $name = $dir_pref . $FN; # $File::Find::name
808 $_= ($no_chdir ? $name : $FN); # $_
73396e07 809 { &$wanted_callback }; # protect against wild "next"
81793b90 810 }
811 }
07867069 812 else {
7e47e6ff 813 $name = $dir_pref . $FN; # $File::Find::name
814 $_= ($no_chdir ? $name : $FN); # $_
73396e07 815 { &$wanted_callback }; # protect against wild "next"
81793b90 816 }
817 }
818 }
17b275ff 819 }
820 continue {
57e73c4b 821 while ( defined ($SE = pop @Stack) ) {
81793b90 822 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
823 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 824 my $tmp;
825 if ($Is_MacOS) {
826 $tmp = (':' x ($CdLvl-$Level)) . ':';
827 }
828 else {
829 $tmp = join('/',('..') x ($CdLvl-$Level));
830 }
831 die "Can't cd to $dir_name" . $tmp
832 unless chdir ($tmp);
81793b90 833 $CdLvl = $Level;
834 }
7e47e6ff 835
836 if ($Is_MacOS) {
837 # $pdir always has a trailing ':', except for the starting dir,
838 # where $dir_rel eq ':'
839 $dir_name = "$p_dir$dir_rel";
840 $dir_pref = "$dir_name:";
841 }
842 else {
843 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
844 $dir_pref = "$dir_name/";
845 }
846
719c805e 847 if ( $nlink == -2 ) {
7e47e6ff 848 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 849 $_ = $File::Find::current_dir;
719c805e 850 &$post_process; # End-of-directory processing
7e47e6ff 851 }
852 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
853 $name = $dir_name;
854 if ($Is_MacOS) {
855 if ($dir_rel eq ':') { # must be the top dir, where we started
856 $name =~ s|:$||; # $File::Find::name
857 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
858 }
859 $dir = $p_dir; # $File::Find::dir
860 $_ = ($no_chdir ? $name : $dir_rel); # $_
861 }
862 else {
863 if ( substr($name,-2) eq '/.' ) {
864 $name =~ s|/\.$||;
865 }
866 $dir = $p_dir;
867 $_ = ($no_chdir ? $dir_name : $dir_rel );
868 if ( substr($_,-2) eq '/.' ) {
869 s|/\.$||;
870 }
871 }
872 { &$wanted_callback }; # protect against wild "next"
873 }
874 else {
875 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
876 last;
877 }
81793b90 878 }
a0d0e21e 879 }
880}
881
81793b90 882
883# API:
884# $wanted
885# $dir_loc : absolute location of a dir
886# $p_dir : "parent directory"
887# preconditions:
888# chdir (if not no_chdir) to dir
889
890sub _find_dir_symlnk($$$) {
7e47e6ff 891 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90 892 my @Stack;
893 my @filenames;
894 my $new_loc;
7e47e6ff 895 my $updir_loc = $dir_loc; # untainted parent directory
81793b90 896 my $SE = [];
897 my $dir_name = $p_dir;
7e47e6ff 898 my $dir_pref;
899 my $loc_pref;
39e79f6b 900 my $dir_rel = $File::Find::current_dir;
7e47e6ff 901 my $byd_flag; # flag for pending stack entry if $bydepth
902 my $tainted = 0;
903 my $ok = 1;
904
905 if ($Is_MacOS) {
906 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
907 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
7e47e6ff 908 } else {
909 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
910 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
7e47e6ff 911 }
81793b90 912
913 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff 914
915 unless ($no_chdir) {
916 # untaint the topdir
917 if (( $untaint ) && (is_tainted($dir_loc) )) {
918 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
919 # once untainted, $updir_loc is pushed on the stack (as parent directory);
920 # hence, we don't need to untaint the parent directory every time we chdir
921 # to it later
922 unless (defined $updir_loc) {
81793b90 923 if ($untaint_skip == 0) {
924 die "directory $dir_loc is still tainted";
925 }
926 else {
927 return;
928 }
929 }
930 }
7e47e6ff 931 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
932 unless ($ok) {
cd68ec93 933 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 934 return;
935 }
936 }
937
7e47e6ff 938 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
939
940 if ($Is_MacOS) {
941 $p_dir = $dir_pref; # ensure trailing ':'
942 }
57e73c4b 943
81793b90 944 while (defined $SE) {
945
946 unless ($bydepth) {
7e47e6ff 947 # change (back) to parent directory (always untainted)
704ea872 948 unless ($no_chdir) {
7e47e6ff 949 unless (chdir $updir_loc) {
cd68ec93 950 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872 951 next;
952 }
953 }
7e47e6ff 954 $dir= $p_dir; # $File::Find::dir
955 $name= $dir_name; # $File::Find::name
956 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
957 $fullname= $dir_loc; # $File::Find::fullname
81793b90 958 # prune may happen here
7e47e6ff 959 $prune= 0;
704ea872 960 lstat($_); # make sure file tests with '_' work
7e47e6ff 961 { &$wanted_callback }; # protect against wild "next"
962 next if $prune;
81793b90 963 }
964
965 # change to that directory
7e47e6ff 966 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
967 $updir_loc = $dir_loc;
968 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
969 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
970 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
971 unless (defined $updir_loc) {
81793b90 972 if ($untaint_skip == 0) {
973 die "directory $dir_loc is still tainted";
a0d0e21e 974 }
237437d0 975 else {
81793b90 976 next;
237437d0 977 }
a0d0e21e 978 }
979 }
7e47e6ff 980 unless (chdir $updir_loc) {
cd68ec93 981 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 982 next;
983 }
984 }
985
7e47e6ff 986 if ($Is_MacOS) {
987 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
988 }
989
990 $dir = $dir_name; # $File::Find::dir
81793b90 991
992 # Get the list of files in the current directory.
7e47e6ff 993 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 994 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90 995 next;
996 }
997 @filenames = readdir DIR;
998 closedir(DIR);
999
1000 for my $FN (@filenames) {
7e47e6ff 1001 next if $FN =~ $File::Find::skip_pattern;
81793b90 1002
1003 # follow symbolic links / do an lstat
07867069 1004 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 1005
1006 # ignore if invalid symlink
1007 next unless defined $new_loc;
7e47e6ff 1008
81793b90 1009 if (-d _) {
7e47e6ff 1010 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90 1011 }
1012 else {
7e47e6ff 1013 $fullname = $new_loc; # $File::Find::fullname
1014 $name = $dir_pref . $FN; # $File::Find::name
1015 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 1016 { &$wanted_callback }; # protect against wild "next"
81793b90 1017 }
1018 }
1019
81793b90 1020 }
1021 continue {
57e73c4b 1022 while (defined($SE = pop @Stack)) {
7e47e6ff 1023 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
1024 if ($Is_MacOS) {
1025 # $p_dir always has a trailing ':', except for the starting dir,
1026 # where $dir_rel eq ':'
1027 $dir_name = "$p_dir$dir_rel";
1028 $dir_pref = "$dir_name:";
1029 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
1030 }
1031 else {
1032 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1033 $dir_pref = "$dir_name/";
1034 $loc_pref = "$dir_loc/";
1035 }
1036 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1037 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1038 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1039 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff 1040 next;
1041 }
1042 }
1043 $fullname = $dir_loc; # $File::Find::fullname
1044 $name = $dir_name; # $File::Find::name
1045 if ($Is_MacOS) {
1046 if ($dir_rel eq ':') { # must be the top dir, where we started
1047 $name =~ s|:$||; # $File::Find::name
1048 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
1049 }
1050 $dir = $p_dir; # $File::Find::dir
1051 $_ = ($no_chdir ? $name : $dir_rel); # $_
1052 }
1053 else {
1054 if ( substr($name,-2) eq '/.' ) {
1055 $name =~ s|/\.$||; # $File::Find::name
1056 }
1057 $dir = $p_dir; # $File::Find::dir
1058 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1059 if ( substr($_,-2) eq '/.' ) {
1060 s|/\.$||;
1061 }
1062 }
1063
1064 lstat($_); # make sure file tests with '_' work
1065 { &$wanted_callback }; # protect against wild "next"
1066 }
1067 else {
1068 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1069 last;
1070 }
a0d0e21e 1071 }
1072 }
1073}
1074
81793b90 1075
20408e3c 1076sub wrap_wanted {
81793b90 1077 my $wanted = shift;
1078 if ( ref($wanted) eq 'HASH' ) {
1079 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1080 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1081 }
1082 if ( $wanted->{untaint} ) {
7e47e6ff 1083 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90 1084 unless defined $wanted->{untaint_pattern};
1085 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1086 }
1087 return $wanted;
1088 }
1089 else {
1090 return { wanted => $wanted };
1091 }
a0d0e21e 1092}
1093
20408e3c 1094sub find {
81793b90 1095 my $wanted = shift;
1096 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e 1097}
1098
55d729e4 1099sub finddepth {
81793b90 1100 my $wanted = wrap_wanted(shift);
1101 $wanted->{bydepth} = 1;
1102 _find_opt($wanted, @_);
20408e3c 1103}
6280b799 1104
7e47e6ff 1105# default
1106$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1107$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1108
6280b799 1109# These are hard-coded for now, but may move to hint files.
10eba763 1110if ($^O eq 'VMS') {
81793b90 1111 $Is_VMS = 1;
7e47e6ff 1112 $File::Find::dont_use_nlink = 1;
1113}
1114elsif ($^O eq 'MacOS') {
1115 $Is_MacOS = 1;
1116 $File::Find::dont_use_nlink = 1;
1117 $File::Find::skip_pattern = qr/^Icon\015\z/;
1118 $File::Find::untaint_pattern = qr|^(.+)$|;
748a9306 1119}
1120
7e47e6ff 1121# this _should_ work properly on all platforms
1122# where File::Find can be expected to work
1123$File::Find::current_dir = File::Spec->curdir || '.';
1124
81793b90 1125$File::Find::dont_use_nlink = 1
497711e7 1126 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
2986a63f 1127 $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'NetWare';
6280b799 1128
20408e3c 1129# Set dont_use_nlink in your hint file if your system's stat doesn't
1130# report the number of links in a directory as an indication
1131# of the number of files.
1132# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 1133unless ($File::Find::dont_use_nlink) {
1134 require Config;
1135 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 1136}
1137
7e47e6ff 1138# We need a function that checks if a scalar is tainted. Either use the
1139# Scalar::Util module's tainted() function or our (slower) pure Perl
1140# fallback is_tainted_pp()
1141{
1142 local $@;
1143 eval { require Scalar::Util };
1144 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1145}
1146
a0d0e21e 11471;