Storable-2.08/t/code.t
[p5sagit/p5-mst-13.2.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
c00d3472 6our $VERSION = '1.06';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
7bd31527 10#
11# Modified to ensure sub-directory traversal order is not inverded by stack
12# push and pops. That is remains in the same order as in the directory file,
13# or user pre-processing (EG:sorted).
14#
15
f06db76b 16=head1 NAME
17
abfdd623 18File::Find - Traverse a directory tree.
f06db76b 19
20=head1 SYNOPSIS
21
22 use File::Find;
6d355c6e 23 find(\&wanted, @directories_to_search);
f06db76b 24 sub wanted { ... }
237437d0 25
f06db76b 26 use File::Find;
abfdd623 27 finddepth(\&wanted, @directories_to_search);
f06db76b 28 sub wanted { ... }
3cb6de81 29
81793b90 30 use File::Find;
31 find({ wanted => \&process, follow => 1 }, '.');
f06db76b 32
33=head1 DESCRIPTION
34
abfdd623 35These are functions for searching through directory trees doing work
36on each file found similar to the Unix I<find> command. File::Find
37exports two functions, C<find> and C<finddepth>. They work similarly
38but have subtle differences.
39
40=over 4
41
42=item B<find>
43
44 find(\&wanted, @directories);
45 find(\%options, @directories);
46
95e23d19 47C<find()> does a breadth-first search over the given C<@directories> in the
6d355c6e 48order they are given. In essence, it works from the top down.
abfdd623 49
95e23d19 50For each file or directory found, the C<&wanted> subroutine is called,
51with the return code ignored. (See below for details on how to use
52the C<&wanted> function). Additionally, for each directory found,
53it will go into that directory and continue the search.
abfdd623 54
55=item B<finddepth>
56
57 finddepth(\&wanted, @directories);
58 finddepth(\%options, @directories);
59
95e23d19 60C<finddepth()> works just like C<find()> except it does a depth-first search.
abfdd623 61It works from the bottom of the directory tree up.
62
63=back
64
65=head2 %options
66
95e23d19 67The first argument to C<find()> is either a code reference to your
68C<&wanted> function, or a hash reference describing the operations
69to be performed for each file. The
abfdd623 70code reference is described in L<The wanted function> below.
20408e3c 71
81793b90 72Here are the possible keys for the hash:
73
74=over 3
75
76=item C<wanted>
77
abfdd623 78The value should be a code reference. This code reference is
79described in L<The wanted function> below.
81793b90 80
81=item C<bydepth>
82
83Reports the name of a directory only AFTER all its entries
95e23d19 84have been reported. Entry point C<finddepth()> is a shortcut for
85specifying C<<{ bydepth => 1 }>> in the first argument of C<find()>.
81793b90 86
719c805e 87=item C<preprocess>
88
7e47e6ff 89The value should be a code reference. This code reference is used to
95e23d19 90preprocess the current directory. The name of the currently processed
91directory is in C<$File::Find::dir>. Your preprocessing function is
92called after C<readdir()>, but before the loop that calls the C<wanted()>
7e47e6ff 93function. It is called with a list of strings (actually file/directory
94names) and is expected to return a list of strings. The code can be
95used to sort the file/directory names alphabetically, numerically,
96or to filter out directory entries based on their name alone. When
97I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e 98
99=item C<postprocess>
100
7e47e6ff 101The value should be a code reference. It is invoked just before leaving
102the currently processed directory. It is called in void context with no
95e23d19 103arguments. The name of the current directory is in C<$File::Find::dir>. This
7e47e6ff 104hook is handy for summarizing a directory, such as calculating its disk
3fa6e24b 105usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 106no-op.
719c805e 107
81793b90 108=item C<follow>
109
110Causes symbolic links to be followed. Since directory trees with symbolic
111links (followed) may contain files more than once and may even have
112cycles, a hash has to be built up with an entry for each file.
113This might be expensive both in space and time for a large
114directory tree. See I<follow_fast> and I<follow_skip> below.
115If either I<follow> or I<follow_fast> is in effect:
116
117=over 6
118
a45bd81d 119=item *
81793b90 120
f10e1564 121It is guaranteed that an I<lstat> has been called before the user's
95e23d19 122C<wanted()> function is called. This enables fast file checks involving S< _>.
81793b90 123
a45bd81d 124=item *
81793b90 125
126There is a variable C<$File::Find::fullname> which holds the absolute
127pathname of the file with all symbolic links resolved
128
129=back
130
131=item C<follow_fast>
132
f10e1564 133This is similar to I<follow> except that it may report some files more
134than once. It does detect cycles, however. Since only symbolic links
135have to be hashed, this is much cheaper both in space and time. If
95e23d19 136processing a file more than once (by the user's C<wanted()> function)
81793b90 137is worse than just taking time, the option I<follow> should be used.
138
139=item C<follow_skip>
140
141C<follow_skip==1>, which is the default, causes all files which are
142neither directories nor symbolic links to be ignored if they are about
143to be processed a second time. If a directory or a symbolic link
144are about to be processed a second time, File::Find dies.
95e23d19 145
81793b90 146C<follow_skip==0> causes File::Find to die if any file is about to be
147processed a second time.
95e23d19 148
81793b90 149C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 150directories but to proceed normally otherwise.
20408e3c 151
80e52b73 152=item C<dangling_symlinks>
153
154If true and a code reference, will be called with the symbolic link
155name and the directory it lives in as arguments. Otherwise, if true
156and warnings are on, warning "symbolic_link_name is a dangling
157symbolic link\n" will be issued. If false, the dangling symbolic link
158will be silently ignored.
f06db76b 159
81793b90 160=item C<no_chdir>
161
95e23d19 162Does not C<chdir()> to each directory as it recurses. The C<wanted()>
81793b90 163function will need to be aware of this, of course. In this case,
164C<$_> will be the same as C<$File::Find::name>.
165
166=item C<untaint>
167
168If find is used in taint-mode (-T command line switch or if EUID != UID
169or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 170before they can be chdir'ed to. Therefore they are checked against a regular
171expression I<untaint_pattern>. Note that all names passed to the user's
172I<wanted()> function are still tainted. If this option is used while
173not in taint-mode, C<untaint> is a no-op.
81793b90 174
175=item C<untaint_pattern>
176
177See above. This should be set using the C<qr> quoting operator.
178The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 179Note that the parentheses are vital.
81793b90 180
181=item C<untaint_skip>
182
7e47e6ff 183If set, a directory which fails the I<untaint_pattern> is skipped,
184including all its sub-directories. The default is to 'die' in such a case.
81793b90 185
186=back
187
abfdd623 188=head2 The wanted function
189
95e23d19 190The C<wanted()> function does whatever verifications you want on
191each file and directory. Note that despite its name, the C<wanted()>
192function is a generic callback function, and does B<not> tell
193File::Find if a file is "wanted" or not. In fact, its return value
194is ignored.
195
196The wanted function takes no arguments but rather does its work
abfdd623 197through a collection of variables.
198
199=over 4
200
f837ebe2 201=item C<$File::Find::dir> is the current directory name,
abfdd623 202
203=item C<$_> is the current filename within that directory
204
f837ebe2 205=item C<$File::Find::name> is the complete pathname to the file.
abfdd623 206
207=back
208
f837ebe2 209Don't modify these variables.
210
95e23d19 211For example, when examining the file F</some/path/foo.ext> you will have:
abfdd623 212
213 $File::Find::dir = /some/path/
214 $_ = foo.ext
215 $File::Find::name = /some/path/foo.ext
216
217You are chdir()'d toC<$File::Find::dir> when the function is called,
218unless C<no_chdir> was specified. Note that when changing to
219directories is in effect the root directory (F</>) is a somewhat
220special case inasmuch as the concatenation of C<$File::Find::dir>,
221C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
222table below summarizes all variants:
5cf0a2f2 223
224 $File::Find::name $File::Find::dir $_
225 default / / .
226 no_chdir=>0 /etc / etc
227 /etc/x /etc x
abfdd623 228
5cf0a2f2 229 no_chdir=>1 / / /
230 /etc / /etc
231 /etc/x /etc /etc/x
232
233
234When <follow> or <follow_fast> are in effect, there is
f10e1564 235also a C<$File::Find::fullname>. The function may set
236C<$File::Find::prune> to prune the tree unless C<bydepth> was
237specified. Unless C<follow> or C<follow_fast> is specified, for
238compatibility reasons (find.pl, find2perl) there are in addition the
239following globals available: C<$File::Find::topdir>,
240C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 241C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 242
20408e3c 243This library is useful for the C<find2perl> tool, which when fed,
f06db76b 244
245 find2perl / -name .nfs\* -mtime +7 \
81793b90 246 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b 247
248produces something like:
249
250 sub wanted {
c7b9dd21 251 /^\.nfs.*\z/s &&
81793b90 252 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b 253 int(-M _) > 7 &&
254 unlink($_)
255 ||
81793b90 256 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 257 $dev < 0 &&
6280b799 258 ($File::Find::prune = 1);
f06db76b 259 }
260
43dece2a 261Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
262filehandle that caches the information from the preceding
95e23d19 263C<stat()>, C<lstat()>, or filetest.
43dece2a 264
1cffc1dd 265Here's another interesting wanted function. It will find all symbolic
266links that don't resolve:
f06db76b 267
268 sub wanted {
81793b90 269 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 270 }
f06db76b 271
81793b90 272See also the script C<pfind> on CPAN for a nice application of this
273module.
274
cd68ec93 275=head1 WARNINGS
276
277If you run your program with the C<-w> switch, or if you use the
278C<warnings> pragma, File::Find will report warnings for several weird
279situations. You can disable these warnings by putting the statement
280
281 no warnings 'File::Find';
282
283in the appropriate scope. See L<perllexwarn> for more info about lexical
284warnings.
285
81793b90 286=head1 CAVEAT
287
5fa2bf2b 288=over 2
289
290=item $dont_use_nlink
291
292You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067 293force File::Find to always stat directories. This was used for file systems
294that do not have an C<nlink> count matching the number of sub-directories.
295Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
296system) and a couple of others.
5fa2bf2b 297
6cf3b067 298You shouldn't need to set this variable, since File::Find should now detect
299such file systems on-the-fly and switch itself to using stat. This works even
300for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 301
6cf3b067 302If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b 303
304=item symlinks
305
f10e1564 306Be aware that the option to follow symbolic links can be dangerous.
81793b90 307Depending on the structure of the directory tree (including symbolic
308links to directories) you might traverse a given (physical) directory
309more than once (only if C<follow_fast> is in effect).
310Furthermore, deleting or changing files in a symbolically linked directory
311might cause very unpleasant surprises, since you delete or change files
312in an unknown directory.
0530a6c4 313
5fa2bf2b 314=back
315
7e47e6ff 316=head1 NOTES
317
318=over 4
319
320=item *
321
322Mac OS (Classic) users should note a few differences:
323
324=over 4
325
326=item *
327
328The path separator is ':', not '/', and the current directory is denoted
329as ':', not '.'. You should be careful about specifying relative pathnames.
330While a full path always begins with a volume name, a relative pathname
331should always begin with a ':'. If specifying a volume name only, a
332trailing ':' is required.
333
334=item *
335
336C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
337contains the name of a directory, that name may or may not end with a
338':'. Likewise, C<$File::Find::name>, which contains the complete
339pathname to that directory, and C<$File::Find::fullname>, which holds
340the absolute pathname of that directory with all symbolic links resolved,
341may or may not end with a ':'.
342
343=item *
344
345The default C<untaint_pattern> (see above) on Mac OS is set to
346C<qr|^(.+)$|>. Note that the parentheses are vital.
347
348=item *
349
350The invisible system file "Icon\015" is ignored. While this file may
351appear in every directory, there are some more invisible system files
352on every volume, which are all located at the volume root level (i.e.
353"MacintoshHD:"). These system files are B<not> excluded automatically.
354Your filter may use the following code to recognize invisible files or
355directories (requires Mac::Files):
356
357 use Mac::Files;
358
359 # invisible() -- returns 1 if file/directory is invisible,
1cffc1dd 360 # 0 if it's visible or undef if an error occurred
7e47e6ff 361
362 sub invisible($) {
363 my $file = shift;
364 my ($fileCat, $fileInfo);
365 my $invisible_flag = 1 << 14;
366
367 if ( $fileCat = FSpGetCatInfo($file) ) {
368 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
369 return (($fileInfo->fdFlags & $invisible_flag) && 1);
370 }
371 }
372 return undef;
373 }
374
375Generally, invisible files are system files, unless an odd application
376decides to use invisible files for its own purposes. To distinguish
377such files from system files, you have to look at the B<type> and B<creator>
378file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
379C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
380(see MacPerl.pm for details).
381
382Files that appear on the desktop actually reside in an (hidden) directory
383named "Desktop Folder" on the particular disk volume. Note that, although
384all desktop files appear to be on the same "virtual" desktop, each disk
385volume actually maintains its own "Desktop Folder" directory.
386
387=back
388
389=back
0530a6c4 390
a85af077 391=head1 HISTORY
392
393File::Find used to produce incorrect results if called recursively.
394During the development of perl 5.8 this bug was fixed.
395The first fixed version of File::Find was 1.01.
396
f06db76b 397=cut
398
b75c8c73 399our @ISA = qw(Exporter);
400our @EXPORT = qw(find finddepth);
6280b799 401
a0d0e21e 402
81793b90 403use strict;
404my $Is_VMS;
7e47e6ff 405my $Is_MacOS;
81793b90 406
407require File::Basename;
7e47e6ff 408require File::Spec;
81793b90 409
9f826d6a 410# Should ideally be my() not our() but local() currently
411# refuses to operate on lexicals
412
413our %SLnkSeen;
414our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 415 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 416 $pre_process, $post_process, $dangling_symlinks);
81793b90 417
418sub contract_name {
419 my ($cdir,$fn) = @_;
420
7e47e6ff 421 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90 422
423 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
424
425 $fn =~ s|^\./||;
426
427 my $abs_name= $cdir . $fn;
428
429 if (substr($fn,0,3) eq '../') {
fecbda2b 430 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
81793b90 431 }
432
433 return $abs_name;
434}
435
7e47e6ff 436# return the absolute name of a directory or file
437sub contract_name_Mac {
438 my ($cdir,$fn) = @_;
439 my $abs_name;
440
441 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
442
443 my $colon_count = length ($1);
444 if ($colon_count == 1) {
445 $abs_name = $cdir . $2;
446 return $abs_name;
447 }
448 else {
449 # need to move up the tree, but
450 # only if it's not a volume name
451 for (my $i=1; $i<$colon_count; $i++) {
452 unless ($cdir =~ /^[^:]+:$/) { # volume name
453 $cdir =~ s/[^:]+:$//;
454 }
455 else {
456 return undef;
457 }
458 }
459 $abs_name = $cdir . $2;
460 return $abs_name;
461 }
462
463 }
464 else {
465
466 # $fn may be a valid path to a directory or file or (dangling)
467 # symlink, without a leading ':'
468 if ( (-e $fn) || (-l $fn) ) {
469 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
470 return $fn; # $fn is already an absolute path
471 }
472 else {
473 $abs_name = $cdir . $fn;
474 return $abs_name;
475 }
476 }
477 else { # argh!, $fn is not a valid directory/file
478 return undef;
479 }
480 }
481}
81793b90 482
483sub PathCombine($$) {
484 my ($Base,$Name) = @_;
485 my $AbsName;
486
7e47e6ff 487 if ($Is_MacOS) {
488 # $Name is the resolved symlink (always a full path on MacOS),
489 # i.e. there's no need to call contract_name_Mac()
490 $AbsName = $Name;
491
492 # (simple) check for recursion
493 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
494 return undef;
495 }
81793b90 496 }
497 else {
7e47e6ff 498 if (substr($Name,0,1) eq '/') {
499 $AbsName= $Name;
500 }
501 else {
502 $AbsName= contract_name($Base,$Name);
503 }
81793b90 504
7e47e6ff 505 # (simple) check for recursion
506 my $newlen= length($AbsName);
507 if ($newlen <= length($Base)) {
508 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
509 && $AbsName eq substr($Base,0,$newlen))
510 {
511 return undef;
512 }
81793b90 513 }
514 }
515 return $AbsName;
516}
517
518sub Follow_SymLink($) {
519 my ($AbsName) = @_;
520
521 my ($NewName,$DEV, $INO);
522 ($DEV, $INO)= lstat $AbsName;
523
524 while (-l _) {
525 if ($SLnkSeen{$DEV, $INO}++) {
526 if ($follow_skip < 2) {
527 die "$AbsName is encountered a second time";
a0d0e21e 528 }
529 else {
81793b90 530 return undef;
a0d0e21e 531 }
532 }
81793b90 533 $NewName= PathCombine($AbsName, readlink($AbsName));
534 unless(defined $NewName) {
535 if ($follow_skip < 2) {
536 die "$AbsName is a recursive symbolic link";
537 }
538 else {
539 return undef;
a0d0e21e 540 }
81793b90 541 }
542 else {
543 $AbsName= $NewName;
544 }
545 ($DEV, $INO) = lstat($AbsName);
546 return undef unless defined $DEV; # dangling symbolic link
547 }
548
cd68ec93 549 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 550 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90 551 die "$AbsName encountered a second time";
552 }
553 else {
554 return undef;
555 }
556 }
557
558 return $AbsName;
559}
560
17f410f9 561our($dir, $name, $fullname, $prune);
81793b90 562sub _find_dir_symlnk($$$);
563sub _find_dir($$$);
564
7e47e6ff 565# check whether or not a scalar variable is tainted
566# (code straight from the Camel, 3rd ed., page 561)
567sub is_tainted_pp {
568 my $arg = shift;
569 my $nada = substr($arg, 0, 0); # zero-length
570 local $@;
571 eval { eval "# $nada" };
572 return length($@) != 0;
573}
574
81793b90 575sub _find_opt {
576 my $wanted = shift;
577 die "invalid top directory" unless defined $_[0];
578
9f826d6a 579 # This function must local()ize everything because callbacks may
580 # call find() or finddepth()
581
582 local %SLnkSeen;
583 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
584 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 585 $pre_process, $post_process, $dangling_symlinks);
17ab9c14 586 local($dir, $name, $fullname, $prune, $_);
9f826d6a 587
a0c9c202 588 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
80e52b73 589 my $cwd_untainted = $cwd;
590 my $check_t_cwd = 1;
591 $wanted_callback = $wanted->{wanted};
592 $bydepth = $wanted->{bydepth};
593 $pre_process = $wanted->{preprocess};
594 $post_process = $wanted->{postprocess};
595 $no_chdir = $wanted->{no_chdir};
596 $full_check = $wanted->{follow};
597 $follow = $full_check || $wanted->{follow_fast};
598 $follow_skip = $wanted->{follow_skip};
599 $untaint = $wanted->{untaint};
600 $untaint_pat = $wanted->{untaint_pattern};
601 $untaint_skip = $wanted->{untaint_skip};
602 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 603
1cffc1dd 604 # for compatibility reasons (find.pl, find2perl)
9f826d6a 605 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 606
607 # a symbolic link to a directory doesn't increase the link count
608 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
609
e7b91b67 610 my ($abs_dir, $Is_Dir);
81793b90 611
612 Proc_Top_Item:
613 foreach my $TOP (@_) {
7e47e6ff 614 my $top_item = $TOP;
615
616 if ($Is_MacOS) {
617 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
618 $top_item = ":$top_item"
3fa6e24b 619 if ( (-d _) && ( $top_item !~ /:/ ) );
7e47e6ff 620 }
621 else {
622 $top_item =~ s|/\z|| unless $top_item eq '/';
623 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
624 }
625
626 $Is_Dir= 0;
627
628 if ($follow) {
629
630 if ($Is_MacOS) {
631 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
632
633 if ($top_item eq $File::Find::current_dir) {
634 $abs_dir = $cwd;
635 }
636 else {
637 $abs_dir = contract_name_Mac($cwd, $top_item);
638 unless (defined $abs_dir) {
cd68ec93 639 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n";
7e47e6ff 640 next Proc_Top_Item;
641 }
642 }
643
644 }
645 else {
646 if (substr($top_item,0,1) eq '/') {
647 $abs_dir = $top_item;
648 }
649 elsif ($top_item eq $File::Find::current_dir) {
650 $abs_dir = $cwd;
651 }
652 else { # care about any ../
653 $abs_dir = contract_name("$cwd/",$top_item);
654 }
655 }
656 $abs_dir= Follow_SymLink($abs_dir);
657 unless (defined $abs_dir) {
80e52b73 658 if ($dangling_symlinks) {
659 if (ref $dangling_symlinks eq 'CODE') {
660 $dangling_symlinks->($top_item, $cwd);
661 } else {
cd68ec93 662 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73 663 }
664 }
81793b90 665 next Proc_Top_Item;
7e47e6ff 666 }
667
668 if (-d _) {
81793b90 669 _find_dir_symlnk($wanted, $abs_dir, $top_item);
670 $Is_Dir= 1;
7e47e6ff 671 }
672 }
81793b90 673 else { # no follow
7e47e6ff 674 $topdir = $top_item;
675 unless (defined $topnlink) {
cd68ec93 676 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff 677 next Proc_Top_Item;
678 }
679 if (-d _) {
544ff7a7 680 $top_item =~ s/\.dir\z//i if $Is_VMS;
e7b91b67 681 _find_dir($wanted, $top_item, $topnlink);
81793b90 682 $Is_Dir= 1;
7e47e6ff 683 }
237437d0 684 else {
81793b90 685 $abs_dir= $top_item;
7e47e6ff 686 }
687 }
81793b90 688
7e47e6ff 689 unless ($Is_Dir) {
81793b90 690 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
7e47e6ff 691 if ($Is_MacOS) {
692 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
693 }
694 else {
695 ($dir,$_) = ('./', $top_item);
696 }
81793b90 697 }
698
7e47e6ff 699 $abs_dir = $dir;
700 if (( $untaint ) && (is_tainted($dir) )) {
701 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90 702 unless (defined $abs_dir) {
703 if ($untaint_skip == 0) {
7e47e6ff 704 die "directory $dir is still tainted";
81793b90 705 }
706 else {
707 next Proc_Top_Item;
708 }
709 }
7e47e6ff 710 }
81793b90 711
7e47e6ff 712 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 713 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff 714 next Proc_Top_Item;
715 }
719911cc 716
7e47e6ff 717 $name = $abs_dir . $_; # $File::Find::name
3bb6d3e5 718 $_ = $name if $no_chdir;
719911cc 719
abfdd623 720 { $wanted_callback->() }; # protect against wild "next"
81793b90 721
7e47e6ff 722 }
81793b90 723
7e47e6ff 724 unless ( $no_chdir ) {
725 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
726 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
727 unless (defined $cwd_untainted) {
728 die "insecure cwd in find(depth)";
729 }
730 $check_t_cwd = 0;
731 }
732 unless (chdir $cwd_untainted) {
733 die "Can't cd to $cwd: $!\n";
734 }
735 }
81793b90 736 }
737}
738
739# API:
740# $wanted
741# $p_dir : "parent directory"
742# $nlink : what came back from the stat
743# preconditions:
744# chdir (if not no_chdir) to dir
745
746sub _find_dir($$$) {
747 my ($wanted, $p_dir, $nlink) = @_;
748 my ($CdLvl,$Level) = (0,0);
749 my @Stack;
750 my @filenames;
751 my ($subcount,$sub_nlink);
752 my $SE= [];
753 my $dir_name= $p_dir;
7e47e6ff 754 my $dir_pref;
39e79f6b 755 my $dir_rel = $File::Find::current_dir;
7e47e6ff 756 my $tainted = 0;
5fa2bf2b 757 my $no_nlink;
7e47e6ff 758
759 if ($Is_MacOS) {
760 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
7e47e6ff 761 }
762 else {
763 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 764 }
81793b90 765
766 local ($dir, $name, $prune, *DIR);
7e47e6ff 767
768 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 769 my $udir = $p_dir;
7e47e6ff 770 if (( $untaint ) && (is_tainted($p_dir) )) {
771 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90 772 unless (defined $udir) {
773 if ($untaint_skip == 0) {
774 die "directory $p_dir is still tainted";
775 }
776 else {
777 return;
778 }
237437d0 779 }
a0d0e21e 780 }
8d8eebbf 781 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
cd68ec93 782 warnings::warnif "Can't cd to $udir: $!\n";
81793b90 783 return;
784 }
785 }
7e47e6ff 786
787 # push the starting directory
57e73c4b 788 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90 789
7e47e6ff 790 if ($Is_MacOS) {
791 $p_dir = $dir_pref; # ensure trailing ':'
792 }
793
81793b90 794 while (defined $SE) {
795 unless ($bydepth) {
7e47e6ff 796 $dir= $p_dir; # $File::Find::dir
797 $name= $dir_name; # $File::Find::name
798 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 799 # prune may happen here
7e47e6ff 800 $prune= 0;
abfdd623 801 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 802 next if $prune;
81793b90 803 }
7e47e6ff 804
81793b90 805 # change to that directory
7e47e6ff 806 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 807 my $udir= $dir_rel;
7e47e6ff 808 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
809 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90 810 unless (defined $udir) {
811 if ($untaint_skip == 0) {
7e47e6ff 812 if ($Is_MacOS) {
813 die "directory ($p_dir) $dir_rel is still tainted";
814 }
815 else {
816 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
817 }
818 } else { # $untaint_skip == 1
819 next;
81793b90 820 }
821 }
822 }
8d8eebbf 823 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
7e47e6ff 824 if ($Is_MacOS) {
cd68ec93 825 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n";
7e47e6ff 826 }
827 else {
cd68ec93 828 warnings::warnif "Can't cd to (" .
829 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
7e47e6ff 830 }
81793b90 831 next;
832 }
833 $CdLvl++;
834 }
835
7e47e6ff 836 if ($Is_MacOS) {
837 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
838 }
839
840 $dir= $dir_name; # $File::Find::dir
81793b90 841
842 # Get the list of files in the current directory.
7e47e6ff 843 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 844 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90 845 next;
846 }
847 @filenames = readdir DIR;
848 closedir(DIR);
abfdd623 849 @filenames = $pre_process->(@filenames) if $pre_process;
719c805e 850 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 851
5fa2bf2b 852 # default: use whatever was specifid
853 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
854 $no_nlink = $avoid_nlink;
855 # if dir has wrong nlink count, force switch to slower stat method
856 $no_nlink = 1 if ($nlink < 2);
857
858 if ($nlink == 2 && !$no_nlink) {
81793b90 859 # This dir has no subdirectories.
860 for my $FN (@filenames) {
7e47e6ff 861 next if $FN =~ $File::Find::skip_pattern;
81793b90 862
7e47e6ff 863 $name = $dir_pref . $FN; # $File::Find::name
864 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 865 { $wanted_callback->() }; # protect against wild "next"
81793b90 866 }
867
868 }
869 else {
870 # This dir has subdirectories.
871 $subcount = $nlink - 2;
872
7bd31527 873 # HACK: insert directories at this position. so as to preserve
874 # the user pre-processed ordering of files.
875 # EG: directory traversal is in user sorted order, not at random.
876 my $stack_top = @Stack;
877
81793b90 878 for my $FN (@filenames) {
7e47e6ff 879 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 880 if ($subcount > 0 || $no_nlink) {
81793b90 881 # Seen all the subdirs?
882 # check for directoriness.
883 # stat is faster for a file in the current directory
07867069 884 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90 885
886 if (-d _) {
887 --$subcount;
544ff7a7 888 $FN =~ s/\.dir\z//i if $Is_VMS;
7bd31527 889 # HACK: replace push to preserve dir traversal order
890 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
891 splice @Stack, $stack_top, 0,
892 [$CdLvl,$dir_name,$FN,$sub_nlink];
81793b90 893 }
894 else {
7e47e6ff 895 $name = $dir_pref . $FN; # $File::Find::name
896 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 897 { $wanted_callback->() }; # protect against wild "next"
81793b90 898 }
899 }
07867069 900 else {
7e47e6ff 901 $name = $dir_pref . $FN; # $File::Find::name
902 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 903 { $wanted_callback->() }; # protect against wild "next"
81793b90 904 }
905 }
906 }
17b275ff 907 }
908 continue {
57e73c4b 909 while ( defined ($SE = pop @Stack) ) {
81793b90 910 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
911 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 912 my $tmp;
913 if ($Is_MacOS) {
914 $tmp = (':' x ($CdLvl-$Level)) . ':';
915 }
916 else {
917 $tmp = join('/',('..') x ($CdLvl-$Level));
918 }
919 die "Can't cd to $dir_name" . $tmp
920 unless chdir ($tmp);
81793b90 921 $CdLvl = $Level;
922 }
7e47e6ff 923
924 if ($Is_MacOS) {
925 # $pdir always has a trailing ':', except for the starting dir,
926 # where $dir_rel eq ':'
927 $dir_name = "$p_dir$dir_rel";
928 $dir_pref = "$dir_name:";
929 }
930 else {
931 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
932 $dir_pref = "$dir_name/";
933 }
934
719c805e 935 if ( $nlink == -2 ) {
7e47e6ff 936 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 937 $_ = $File::Find::current_dir;
abfdd623 938 $post_process->(); # End-of-directory processing
7e47e6ff 939 }
940 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
941 $name = $dir_name;
942 if ($Is_MacOS) {
943 if ($dir_rel eq ':') { # must be the top dir, where we started
944 $name =~ s|:$||; # $File::Find::name
945 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
946 }
947 $dir = $p_dir; # $File::Find::dir
948 $_ = ($no_chdir ? $name : $dir_rel); # $_
949 }
950 else {
951 if ( substr($name,-2) eq '/.' ) {
5cf0a2f2 952 substr($name, length($name) == 2 ? -1 : -2) = '';
7e47e6ff 953 }
954 $dir = $p_dir;
955 $_ = ($no_chdir ? $dir_name : $dir_rel );
956 if ( substr($_,-2) eq '/.' ) {
5cf0a2f2 957 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 958 }
959 }
abfdd623 960 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 961 }
962 else {
963 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
964 last;
965 }
81793b90 966 }
a0d0e21e 967 }
968}
969
81793b90 970
971# API:
972# $wanted
973# $dir_loc : absolute location of a dir
974# $p_dir : "parent directory"
975# preconditions:
976# chdir (if not no_chdir) to dir
977
978sub _find_dir_symlnk($$$) {
7e47e6ff 979 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90 980 my @Stack;
981 my @filenames;
982 my $new_loc;
7e47e6ff 983 my $updir_loc = $dir_loc; # untainted parent directory
81793b90 984 my $SE = [];
985 my $dir_name = $p_dir;
7e47e6ff 986 my $dir_pref;
987 my $loc_pref;
39e79f6b 988 my $dir_rel = $File::Find::current_dir;
7e47e6ff 989 my $byd_flag; # flag for pending stack entry if $bydepth
990 my $tainted = 0;
991 my $ok = 1;
992
993 if ($Is_MacOS) {
994 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
995 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
7e47e6ff 996 } else {
997 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
998 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
7e47e6ff 999 }
81793b90 1000
1001 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff 1002
1003 unless ($no_chdir) {
1004 # untaint the topdir
1005 if (( $untaint ) && (is_tainted($dir_loc) )) {
1006 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
1007 # once untainted, $updir_loc is pushed on the stack (as parent directory);
1008 # hence, we don't need to untaint the parent directory every time we chdir
1009 # to it later
1010 unless (defined $updir_loc) {
81793b90 1011 if ($untaint_skip == 0) {
1012 die "directory $dir_loc is still tainted";
1013 }
1014 else {
1015 return;
1016 }
1017 }
1018 }
7e47e6ff 1019 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
1020 unless ($ok) {
cd68ec93 1021 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 1022 return;
1023 }
1024 }
1025
7e47e6ff 1026 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
1027
1028 if ($Is_MacOS) {
1029 $p_dir = $dir_pref; # ensure trailing ':'
1030 }
57e73c4b 1031
81793b90 1032 while (defined $SE) {
1033
1034 unless ($bydepth) {
7e47e6ff 1035 # change (back) to parent directory (always untainted)
704ea872 1036 unless ($no_chdir) {
7e47e6ff 1037 unless (chdir $updir_loc) {
cd68ec93 1038 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872 1039 next;
1040 }
1041 }
7e47e6ff 1042 $dir= $p_dir; # $File::Find::dir
1043 $name= $dir_name; # $File::Find::name
1044 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
1045 $fullname= $dir_loc; # $File::Find::fullname
81793b90 1046 # prune may happen here
7e47e6ff 1047 $prune= 0;
704ea872 1048 lstat($_); # make sure file tests with '_' work
abfdd623 1049 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 1050 next if $prune;
81793b90 1051 }
1052
1053 # change to that directory
7e47e6ff 1054 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1055 $updir_loc = $dir_loc;
1056 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
1057 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
1058 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
1059 unless (defined $updir_loc) {
81793b90 1060 if ($untaint_skip == 0) {
1061 die "directory $dir_loc is still tainted";
a0d0e21e 1062 }
237437d0 1063 else {
81793b90 1064 next;
237437d0 1065 }
a0d0e21e 1066 }
1067 }
7e47e6ff 1068 unless (chdir $updir_loc) {
cd68ec93 1069 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90 1070 next;
1071 }
1072 }
1073
7e47e6ff 1074 if ($Is_MacOS) {
1075 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
1076 }
1077
1078 $dir = $dir_name; # $File::Find::dir
81793b90 1079
1080 # Get the list of files in the current directory.
7e47e6ff 1081 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 1082 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90 1083 next;
1084 }
1085 @filenames = readdir DIR;
1086 closedir(DIR);
1087
1088 for my $FN (@filenames) {
7e47e6ff 1089 next if $FN =~ $File::Find::skip_pattern;
81793b90 1090
1091 # follow symbolic links / do an lstat
07867069 1092 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 1093
1094 # ignore if invalid symlink
1095 next unless defined $new_loc;
7e47e6ff 1096
81793b90 1097 if (-d _) {
7e47e6ff 1098 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90 1099 }
1100 else {
7e47e6ff 1101 $fullname = $new_loc; # $File::Find::fullname
1102 $name = $dir_pref . $FN; # $File::Find::name
1103 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 1104 { $wanted_callback->() }; # protect against wild "next"
81793b90 1105 }
1106 }
1107
81793b90 1108 }
1109 continue {
57e73c4b 1110 while (defined($SE = pop @Stack)) {
7e47e6ff 1111 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
1112 if ($Is_MacOS) {
1113 # $p_dir always has a trailing ':', except for the starting dir,
1114 # where $dir_rel eq ':'
1115 $dir_name = "$p_dir$dir_rel";
1116 $dir_pref = "$dir_name:";
1117 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
1118 }
1119 else {
1120 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1121 $dir_pref = "$dir_name/";
1122 $loc_pref = "$dir_loc/";
1123 }
1124 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1125 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1126 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1127 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff 1128 next;
1129 }
1130 }
1131 $fullname = $dir_loc; # $File::Find::fullname
1132 $name = $dir_name; # $File::Find::name
1133 if ($Is_MacOS) {
1134 if ($dir_rel eq ':') { # must be the top dir, where we started
1135 $name =~ s|:$||; # $File::Find::name
1136 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
1137 }
1138 $dir = $p_dir; # $File::Find::dir
1139 $_ = ($no_chdir ? $name : $dir_rel); # $_
1140 }
1141 else {
1142 if ( substr($name,-2) eq '/.' ) {
f801979b 1143 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
7e47e6ff 1144 }
1145 $dir = $p_dir; # $File::Find::dir
1146 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1147 if ( substr($_,-2) eq '/.' ) {
f801979b 1148 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 1149 }
1150 }
1151
1152 lstat($_); # make sure file tests with '_' work
abfdd623 1153 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 1154 }
1155 else {
1156 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1157 last;
1158 }
a0d0e21e 1159 }
1160 }
1161}
1162
81793b90 1163
20408e3c 1164sub wrap_wanted {
81793b90 1165 my $wanted = shift;
1166 if ( ref($wanted) eq 'HASH' ) {
1167 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1168 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1169 }
1170 if ( $wanted->{untaint} ) {
7e47e6ff 1171 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90 1172 unless defined $wanted->{untaint_pattern};
1173 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1174 }
1175 return $wanted;
1176 }
1177 else {
1178 return { wanted => $wanted };
1179 }
a0d0e21e 1180}
1181
20408e3c 1182sub find {
81793b90 1183 my $wanted = shift;
1184 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e 1185}
1186
55d729e4 1187sub finddepth {
81793b90 1188 my $wanted = wrap_wanted(shift);
1189 $wanted->{bydepth} = 1;
1190 _find_opt($wanted, @_);
20408e3c 1191}
6280b799 1192
7e47e6ff 1193# default
1194$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1195$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1196
6280b799 1197# These are hard-coded for now, but may move to hint files.
10eba763 1198if ($^O eq 'VMS') {
81793b90 1199 $Is_VMS = 1;
7e47e6ff 1200 $File::Find::dont_use_nlink = 1;
1201}
1202elsif ($^O eq 'MacOS') {
1203 $Is_MacOS = 1;
1204 $File::Find::dont_use_nlink = 1;
1205 $File::Find::skip_pattern = qr/^Icon\015\z/;
1206 $File::Find::untaint_pattern = qr|^(.+)$|;
748a9306 1207}
1208
7e47e6ff 1209# this _should_ work properly on all platforms
1210# where File::Find can be expected to work
1211$File::Find::current_dir = File::Spec->curdir || '.';
1212
81793b90 1213$File::Find::dont_use_nlink = 1
497711e7 1214 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
1119cb72 1215 $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'qnx' ||
1216 $^O eq 'nto';
6280b799 1217
20408e3c 1218# Set dont_use_nlink in your hint file if your system's stat doesn't
1219# report the number of links in a directory as an indication
1220# of the number of files.
1221# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90 1222unless ($File::Find::dont_use_nlink) {
1223 require Config;
1224 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c 1225}
1226
7e47e6ff 1227# We need a function that checks if a scalar is tainted. Either use the
1228# Scalar::Util module's tainted() function or our (slower) pure Perl
1229# fallback is_tainted_pp()
1230{
1231 local $@;
1232 eval { require Scalar::Util };
1233 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1234}
1235
a0d0e21e 12361;