3 package File::Find::Rule;
6 use Text::Glob 'glob_to_regex';
9 use File::Find (); # we're only wrapping for now
11 our $VERSION = '0.32';
13 # we'd just inherit from Exporter, but I want the colon
17 for my $sym ( qw( find rule ) ) {
19 *{"$to\::$sym"} = \&{$sym};
22 my ($extension) = /^:(.*)/;
23 eval "require File::Find::Rule::$extension";
24 croak "couldn't bootstrap File::Find::Rule::$extension: $@" if $@;
30 File::Find::Rule - Alternative interface to File::Find
35 # find all the subdirectories of a given directory
36 my @subdirs = File::Find::Rule->directory->in( $directory );
38 # find all the .pm files in @INC
39 my @files = File::Find::Rule->file()
43 # as above, but without method chaining
44 my $rule = File::Find::Rule->new;
46 $rule->name( '*.pm' );
47 my @files = $rule->in( @INC );
51 File::Find::Rule is a friendlier interface to File::Find. It allows
52 you to build rules which specify the desired files and directories.
60 my $object = __PACKAGE__->new();
67 if ($method =~ s/^\!//) {
68 # jinkies, we're really negating this
73 unless (defined prototype $method) {
75 @args = ref $args eq 'ARRAY' ? @$args : $args;
79 @args = $object->new->$method(@args);
83 my @return = $object->$method(@args);
84 return @return if $method eq 'in';
96 A constructor. You need not invoke C<new> manually unless you wish
97 to, as each of the rule-making methods will auto-create a suitable
98 object if called as class methods.
103 my $referent = shift;
104 my $class = ref $referent || $referent;
117 $object = $object->new()
124 =head2 Matching Rules
128 =item C<name( @patterns )>
130 Specifies names that should match. May be globs or regular
133 $set->name( '*.mp3', '*.ogg' ); # mp3s or oggs
134 $set->name( qr/\.(mp3|ogg)$/ ); # the same as a regex
135 $set->name( 'foo.bar' ); # just things named foo.bar
143 ref $item eq 'ARRAY' ? push @_, @{ $item } : push @flat, $item;
149 my $self = _force_object shift;
150 my @names = map { ref $_ eq "Regexp" ? $_ : glob_to_regex $_ } _flatten( @_ );
152 push @{ $self->{rules} }, {
154 code => join( ' || ', map { "m($_)" } @names ),
163 Synonyms are provided for each of the -X tests. See L<perlfunc/-X> for
164 details. None of these methods take arguments.
166 Test | Method Test | Method
167 ------|------------- ------|----------------
168 -r | readable -R | r_readable
169 -w | writeable -W | r_writeable
170 -w | writable -W | r_writable
171 -x | executable -X | r_executable
172 -o | owned -O | r_owned
174 -e | exists -f | file
175 -z | empty -d | directory
176 -s | nonempty -l | symlink
178 -u | setuid -S | socket
179 -g | setgid -b | block
180 -k | sticky -c | character
183 -A | accessed -T | ascii
184 -C | changed -B | binary
186 Though some tests are fairly meaningless as binary flags (C<modified>,
187 C<accessed>, C<changed>), they have been included for completeness.
189 # find nonempty files
195 use vars qw( %X_tests );
197 -r => readable => -R => r_readable =>
198 -w => writeable => -W => r_writeable =>
199 -w => writable => -W => r_writable =>
200 -x => executable => -X => r_executable =>
201 -o => owned => -O => r_owned =>
203 -e => exists => -f => file =>
204 -z => empty => -d => directory =>
205 -s => nonempty => -l => symlink =>
207 -u => setuid => -S => socket =>
208 -g => setgid => -b => block =>
209 -k => sticky => -c => character =>
212 -A => accessed => -T => ascii =>
213 -C => changed => -B => binary =>
216 for my $test (keys %X_tests) {
217 my $sub = eval 'sub () {
218 my $self = _force_object shift;
219 push @{ $self->{rules} }, {
220 code => "' . $test . ' \$_",
221 rule => "'.$X_tests{$test}.'",
226 *{ $X_tests{$test} } = $sub;
232 The following C<stat> based methods are provided: C<dev>, C<ino>,
233 C<mode>, C<nlink>, C<uid>, C<gid>, C<rdev>, C<size>, C<atime>,
234 C<mtime>, C<ctime>, C<blksize>, and C<blocks>. See L<perlfunc/stat>
237 Each of these can take a number of targets, which will follow
238 L<Number::Compare> semantics.
240 $rule->size( 7 ); # exactly 7
241 $rule->size( ">7Ki" ); # larger than 7 * 1024 * 1024 bytes
243 ->size( "<=90" ); # between 7 and 90, inclusive
244 $rule->size( 7, 9, 42 ); # 7, 9 or 42
248 use vars qw( @stat_tests );
249 @stat_tests = qw( dev ino mode nlink uid gid rdev
250 size atime mtime ctime blksize blocks );
253 for my $test (@stat_tests) {
254 my $index = $i++; # to close over
256 my $self = _force_object shift;
258 my @tests = map { Number::Compare->parse_to_perl($_) } @_;
260 push @{ $self->{rules} }, {
263 code => 'do { my $val = (stat $_)['.$index.'] || 0;'.
264 join ('||', map { "(\$val $_)" } @tests ).' }',
273 =item C<any( @rules )>
275 =item C<or( @rules )>
277 Allows shortcircuiting boolean evaluation as an alternative to the
278 default and-like nature of combined rules. C<any> and C<or> are
281 # find avis, movs, things over 200M and empty files
282 $rule->any( File::Find::Rule->name( '*.avi', '*.mov' ),
283 File::Find::Rule->size( '>200M' ),
284 File::Find::Rule->file->empty,
290 my $self = _force_object shift;
291 # compile all the subrules to code fragments
292 push @{ $self->{rules} }, {
294 code => '(' . join( ' || ', map '( ' . $_->_compile . ' )', @_ ). ')',
298 # merge all the subs hashes of the kids into ourself
299 %{ $self->{subs} } = map { %{ $_->{subs} } } $self, @_;
305 =item C<none( @rules )>
307 =item C<not( @rules )>
309 Negates a rule. (The inverse of C<any>.) C<none> and C<not> are
312 # files that aren't 8.3 safe
314 ->not( $rule->new->name( qr/^[^.]{1,8}(\.[^.]{0,3})?$/ ) );
319 my $self = _force_object shift;
321 push @{ $self->{rules} }, {
324 code => '(' . join ( ' && ', map { "!(". $_->_compile . ")" } @_ ) . ")",
327 # merge all the subs hashes into us
328 %{ $self->{subs} } = map { %{ $_->{subs} } } $self, @_;
336 Traverse no further. This rule always matches.
341 my $self = _force_object shift;
343 push @{ $self->{rules} },
346 code => '$File::Find::prune = 1'
353 Don't keep this file. This rule always matches.
358 my $self = _force_object shift;
360 push @{ $self->{rules} }, {
362 code => '$discarded = 1',
367 =item C<exec( \&subroutine( $shortname, $path, $fullname ) )>
369 Allows user-defined rules. Your subroutine will be invoked with C<$_>
370 set to the current short name, and with parameters of the name, the
371 path you're in, and the full relative filename.
373 Return a true value if your rule matched.
375 # get things with long names
376 $rules->exec( sub { length > 20 } );
381 my $self = _force_object shift;
384 push @{ $self->{rules} }, {
391 =item C<grep( @specifiers )>
393 Opens a file and tests it each line at a time.
395 For each line it evaluates each of the specifiers, stopping at the
396 first successful match. A specifier may be a regular expression or a
397 subroutine. The subroutine will be invoked with the same parameters
398 as an ->exec subroutine.
400 It is possible to provide a set of negative specifiers by enclosing
401 them in anonymous arrays. Should a negative specifier match the
402 iteration is aborted and the clause is failed. For example:
404 $rule->grep( qr/^#!.*\bperl/, [ sub { 1 } ] );
406 Is a passing clause if the first line of a file looks like a perl
412 my $self = _force_object shift;
416 ? map { [ ( ref $_ ? $_ : qr/$_/ ) => 0 ] } @$_
423 open FILE, $_ or return;
426 for my $p (@pattern) {
427 my ($rule, $ret) = @$p;
429 if ref $rule eq 'Regexp'
438 =item C<maxdepth( $level )>
440 Descend at most C<$level> (a non-negative integer) levels of directories
441 below the starting point.
443 May be invoked many times per rule, but only the most recent value is
446 =item C<mindepth( $level )>
448 Do not apply any tests at levels less than C<$level> (a non-negative
451 =item C<extras( \%extras )>
453 Specifies extra values to pass through to C<File::File::find> as part
456 For example this allows you to specify following of symlinks like so:
458 my $rule = File::Find::Rule->extras({ follow => 1 });
460 May be invoked many times per rule, but only the most recent value is
465 for my $setter (qw( maxdepth mindepth extras )) {
467 my $self = _force_object shift;
468 $self->{$setter} = shift;
478 Trim the leading portion of any path found
483 my $self = _force_object shift;
484 $self->{relative} = 1;
490 Negated version of the rule. An effective shortand related to ! in
491 the procedural interface.
493 $foo->not_name('*.pl');
495 $foo->not( $foo->new->name('*.pl' ) );
502 $AUTOLOAD =~ /::not_([^:]*)$/
503 or croak "Can't locate method $AUTOLOAD";
507 my $self = _force_object shift;
508 $self->not( $self->new->$method(@_) );
523 =item C<in( @directories )>
525 Evaluates the rule, returns a list of paths to matching files and
531 my $self = _force_object shift;
534 my $fragment = $self->_compile;
535 my %subs = %{ $self->{subs} };
537 warn "relative mode handed multiple paths - that's a bit silly\n"
538 if $self->{relative} && @_ > 1;
542 (my $path = $File::Find::name) =~ s#^(?:\./+)+##;
543 my @args = ($_, $File::Find::dir, $path);
544 my $maxdepth = $self->{maxdepth};
545 my $mindepth = $self->{mindepth};
546 my $relative = $self->{relative};
548 # figure out the relative path and depth
549 my $relpath = $File::Find::name;
550 $relpath =~ s{^\Q$topdir\E/?}{};
551 my $depth = scalar File::Spec->splitdir($relpath);
552 #print "name: \'$File::Find::name\' ";
553 #print "relpath: \'$relpath\' depth: $depth relative: $relative\n";
555 defined $maxdepth && $depth >= $maxdepth
556 and $File::Find::prune = 1;
558 defined $mindepth && $depth < $mindepth
561 #print "Testing \'$_\'\n";
564 return unless ' . $fragment . ';
565 return if $discarded;
567 push @found, $relpath if $relpath ne "";
575 #print Dumper \%subs;
576 #warn "Compiled sub: '$code'\n";
578 my $sub = eval "$code" or die "compile error '$code' $@";
580 # $topdir is used for relative and maxdepth
582 # slice off the trailing slash if there is one (the
583 # maxdepth/mindepth code is fussy)
585 unless $topdir eq '/';
586 $self->_call_find( { %{ $self->{extras} }, wanted => $sub }, $path );
594 File::Find::find( @_ );
600 return '1' unless @{ $self->{rules} };
601 my $code = join " && ", map {
602 if (ref $_->{code}) {
603 my $key = "$_->{code}";
604 $self->{subs}{$key} = $_->{code};
605 "\$subs{'$key'}->(\@args) # $_->{rule}\n";
608 "( $_->{code} ) # $_->{rule}\n";
610 } @{ $self->{rules} };
616 =item C<start( @directories )>
618 Starts a find across the specified directories. Matching items may
619 then be queried using L</match>. This allows you to use a rule as an
622 my $rule = File::Find::Rule->file->name("*.jpeg")->start( "/web" );
623 while ( defined ( my $image = $rule->match ) ) {
630 my $self = _force_object shift;
632 $self->{iterator} = [ $self->in( @_ ) ];
638 Returns the next file which matches, false if there are no more.
643 my $self = _force_object shift;
645 return shift @{ $self->{iterator} };
656 Extension modules are available from CPAN in the File::Find::Rule
657 namespace. In order to use these extensions either use them directly:
659 use File::Find::Rule::ImageSize;
660 use File::Find::Rule::MMagic;
662 # now your rules can use the clauses supplied by the ImageSize and
665 or, specify that File::Find::Rule should load them for you:
667 use File::Find::Rule qw( :ImageSize :MMagic );
669 For notes on implementing your own extensions, consult
670 L<File::Find::Rule::Extending>
672 =head2 Further examples
676 =item Finding perl scripts
678 my $finder = File::Find::Rule->or
680 File::Find::Rule->name( '*.pl' ),
681 File::Find::Rule->exec(
683 if (open my $fh, $_) {
686 return $shebang =~ /^#!.*\bperl/;
692 Based upon this message http://use.perl.org/comments.pl?sid=7052&cid=10842
694 =item ignore CVS directories
696 my $rule = File::Find::Rule->new;
704 Note here the use of a null rule. Null rules match anything they see,
705 so the effect is to match (and discard) directories called 'CVS' or to
710 =head1 TWO FOR THE PRICE OF ONE
712 File::Find::Rule also gives you a procedural interface. This is
713 documented in L<File::Find::Rule::Procedural>
719 =head1 TAINT MODE INTERACTION
721 As of 0.32 File::Find::Rule doesn't capture the current working directory in
722 a taint-unsafe manner. File::Find itself still does operations that the taint
723 system will flag as insecure but you can use the L</extras> feature to ask
724 L<File::Find> to internally C<untaint> file paths with a regex like so:
726 my $rule = File::Find::Rule->extras({ untaint => 1 });
728 Please consult L<File::Find>'s documentation for C<untaint>,
729 C<untaint_pattern>, and C<untaint_skip> for more information.
733 The code makes use of the C<our> keyword and as such requires perl version
736 Currently it isn't possible to remove a clause from a rule object. If
737 this becomes a significant issue it will be addressed.
741 Richard Clamp <richardc@unixbeard.net> with input gained from this
742 use.perl discussion: http://use.perl.org/~richardc/journal/6467
744 Additional proofreading and input provided by Kake, Greg McCarroll,
745 and Andy Lester andy@petdance.com.
749 Copyright (C) 2002, 2003, 2004, 2006, 2009 Richard Clamp. All Rights Reserved.
751 This module is free software; you can redistribute it and/or modify it
752 under the same terms as Perl itself.
756 L<File::Find>, L<Text::Glob>, L<Number::Compare>, find(1)
758 If you want to know about the procedural interface, see
759 L<File::Find::Rule::Procedural>, and if you have an idea for a neat
760 extension L<File::Find::Rule::Extending>
764 Implementation notes:
766 $self->rules is an array of hashrefs. it may be a code fragment or a call
769 Anonymous subroutines are stored in the $self->subs hashref keyed on the
770 stringfied version of the coderef.
772 When one File::Find::Rule object is combined with another, such as in the any
773 and not operations, this entire hash is merged.
775 The _compile method walks the rules element and simply glues the code
776 fragments together so they can be compiled into an anyonymous File::Find
780 [*] There's probably a win to be made with the current model in making
781 stat calls use C<_>. For
783 find( file => size => "> 20M" => size => "< 400M" );
785 up to 3 stats will happen for each candidate. Adding a priming _
786 would be a bit blind if the first operation was C< name => 'foo' >,
787 since that can be tested by a single regex. Simply checking what the
788 next type of operation doesn't work since any arbritary exec sub may
789 or may not stat. Potentially worse, they could stat something else
792 # extract from the worlds stupidest make(1)
793 find( exec => sub { my $f = $_; $f =~ s/\.c$/.o/ && !-e $f } );
795 Maybe the best way is to treat C<_> as invalid after calling an exec,
796 and doc that C<_> will only be meaningful after stat and -X tests if
797 they're wanted in exec blocks.