local-lib5/lib/perl5/File/Find/Rule.pm

   1 #       $Id$
   2
   3 package File::Find::Rule;
   4 use strict;
   5 use File::Spec;
   6 use Text::Glob 'glob_to_regex';
   7 use Number::Compare;
   8 use Carp qw/croak/;
   9 use File::Find (); # we're only wrapping for now
  10
  11 our $VERSION = '0.32';
  12
  13 # we'd just inherit from Exporter, but I want the colon
  14 sub import {
  15     my $pkg = shift;
  16     my $to  = caller;
  17     for my $sym ( qw( find rule ) ) {
  18         no strict 'refs';
  19         *{"$to\::$sym"} = \&{$sym};
  20     }
  21     for (grep /^:/, @_) {
  22         my ($extension) = /^:(.*)/;
  23         eval "require File::Find::Rule::$extension";
  24         croak "couldn't bootstrap File::Find::Rule::$extension: $@" if $@;
  25     }
  26 }
  27
  28 =head1 NAME
  29
  30 File::Find::Rule - Alternative interface to File::Find
  31
  32 =head1 SYNOPSIS
  33
  34   use File::Find::Rule;
  35   # find all the subdirectories of a given directory
  36   my @subdirs = File::Find::Rule->directory->in( $directory );
  37
  38   # find all the .pm files in @INC
  39   my @files = File::Find::Rule->file()
  40                               ->name( '*.pm' )
  41                               ->in( @INC );
  42
  43   # as above, but without method chaining
  44   my $rule =  File::Find::Rule->new;
  45   $rule->file;
  46   $rule->name( '*.pm' );
  47   my @files = $rule->in( @INC );
  48
  49 =head1 DESCRIPTION
  50
  51 File::Find::Rule is a friendlier interface to File::Find.  It allows
  52 you to build rules which specify the desired files and directories.
  53
  54 =cut
  55
  56 # the procedural shim
  57
  58 *rule = \&find;
  59 sub find {
  60     my $object = __PACKAGE__->new();
  61     my $not = 0;
  62
  63     while (@_) {
  64         my $method = shift;
  65         my @args;
  66
  67         if ($method =~ s/^\!//) {
  68             # jinkies, we're really negating this
  69             unshift @_, $method;
  70             $not = 1;
  71             next;
  72         }
  73         unless (defined prototype $method) {
  74             my $args = shift;
  75             @args = ref $args eq 'ARRAY' ? @$args : $args;
  76         }
  77         if ($not) {
  78             $not = 0;
  79             @args = $object->new->$method(@args);
  80             $method = "not";
  81         }
  82
  83         my @return = $object->$method(@args);
  84         return @return if $method eq 'in';
  85     }
  86     $object;
  87 }
  88
  89
  90 =head1 METHODS
  91
  92 =over
  93
  94 =item C<new>
  95
  96 A constructor.  You need not invoke C<new> manually unless you wish
  97 to, as each of the rule-making methods will auto-create a suitable
  98 object if called as class methods.
  99
 100 =cut
 101
 102 sub new {
 103     my $referent = shift;
 104     my $class = ref $referent || $referent;
 105     bless {
 106         rules    => [],
 107         subs     => {},
 108         iterator => [],
 109         extras   => {},
 110         maxdepth => undef,
 111         mindepth => undef,
 112     }, $class;
 113 }
 114
 115 sub _force_object {
 116     my $object = shift;
 117     $object = $object->new()
 118       unless ref $object;
 119     $object;
 120 }
 121
 122 =back
 123
 124 =head2 Matching Rules
 125
 126 =over
 127
 128 =item C<name( @patterns )>
 129
 130 Specifies names that should match.  May be globs or regular
 131 expressions.
 132
 133  $set->name( '*.mp3', '*.ogg' ); # mp3s or oggs
 134  $set->name( qr/\.(mp3|ogg)$/ ); # the same as a regex
 135  $set->name( 'foo.bar' );        # just things named foo.bar
 136
 137 =cut
 138
 139 sub _flatten {
 140     my @flat;
 141     while (@_) {
 142         my $item = shift;
 143         ref $item eq 'ARRAY' ? push @_, @{ $item } : push @flat, $item;
 144     }
 145     return @flat;
 146 }
 147
 148 sub name {
 149     my $self = _force_object shift;
 150     my @names = map { ref $_ eq "Regexp" ? $_ : glob_to_regex $_ } _flatten( @_ );
 151
 152     push @{ $self->{rules} }, {
 153         rule => 'name',
 154         code => join( ' || ', map { "m($_)" } @names ),
 155         args => \@_,
 156     };
 157
 158     $self;
 159 }
 160
 161 =item -X tests
 162
 163 Synonyms are provided for each of the -X tests. See L<perlfunc/-X> for
 164 details.  None of these methods take arguments.
 165
 166   Test | Method               Test |  Method
 167  ------|-------------        ------|----------------
 168    -r  |  readable             -R  |  r_readable
 169    -w  |  writeable            -W  |  r_writeable
 170    -w  |  writable             -W  |  r_writable
 171    -x  |  executable           -X  |  r_executable
 172    -o  |  owned                -O  |  r_owned
 173        |                           |
 174    -e  |  exists               -f  |  file
 175    -z  |  empty                -d  |  directory
 176    -s  |  nonempty             -l  |  symlink
 177        |                       -p  |  fifo
 178    -u  |  setuid               -S  |  socket
 179    -g  |  setgid               -b  |  block
 180    -k  |  sticky               -c  |  character
 181        |                       -t  |  tty
 182    -M  |  modified                 |
 183    -A  |  accessed             -T  |  ascii
 184    -C  |  changed              -B  |  binary
 185
 186 Though some tests are fairly meaningless as binary flags (C<modified>,
 187 C<accessed>, C<changed>), they have been included for completeness.
 188
 189  # find nonempty files
 190  $rule->file,
 191       ->nonempty;
 192
 193 =cut
 194
 195 use vars qw( %X_tests );
 196 %X_tests = (
 197     -r  =>  readable           =>  -R  =>  r_readable      =>
 198     -w  =>  writeable          =>  -W  =>  r_writeable     =>
 199     -w  =>  writable           =>  -W  =>  r_writable      =>
 200     -x  =>  executable         =>  -X  =>  r_executable    =>
 201     -o  =>  owned              =>  -O  =>  r_owned         =>
 202
 203     -e  =>  exists             =>  -f  =>  file            =>
 204     -z  =>  empty              =>  -d  =>  directory       =>
 205     -s  =>  nonempty           =>  -l  =>  symlink         =>
 206                                =>  -p  =>  fifo            =>
 207     -u  =>  setuid             =>  -S  =>  socket          =>
 208     -g  =>  setgid             =>  -b  =>  block           =>
 209     -k  =>  sticky             =>  -c  =>  character       =>
 210                                =>  -t  =>  tty             =>
 211     -M  =>  modified                                       =>
 212     -A  =>  accessed           =>  -T  =>  ascii           =>
 213     -C  =>  changed            =>  -B  =>  binary          =>
 214    );
 215
 216 for my $test (keys %X_tests) {
 217     my $sub = eval 'sub () {
 218         my $self = _force_object shift;
 219         push @{ $self->{rules} }, {
 220             code => "' . $test . ' \$_",
 221             rule => "'.$X_tests{$test}.'",
 222         };
 223         $self;
 224     } ';
 225     no strict 'refs';
 226     *{ $X_tests{$test} } = $sub;
 227 }
 228
 229
 230 =item stat tests
 231
 232 The following C<stat> based methods are provided: C<dev>, C<ino>,
 233 C<mode>, C<nlink>, C<uid>, C<gid>, C<rdev>, C<size>, C<atime>,
 234 C<mtime>, C<ctime>, C<blksize>, and C<blocks>.  See L<perlfunc/stat>
 235 for details.
 236
 237 Each of these can take a number of targets, which will follow
 238 L<Number::Compare> semantics.
 239
 240  $rule->size( 7 );         # exactly 7
 241  $rule->size( ">7Ki" );    # larger than 7 * 1024 * 1024 bytes
 242  $rule->size( ">=7" )
 243       ->size( "<=90" );    # between 7 and 90, inclusive
 244  $rule->size( 7, 9, 42 );  # 7, 9 or 42
 245
 246 =cut
 247
 248 use vars qw( @stat_tests );
 249 @stat_tests = qw( dev ino mode nlink uid gid rdev
 250                   size atime mtime ctime blksize blocks );
 251 {
 252     my $i = 0;
 253     for my $test (@stat_tests) {
 254         my $index = $i++; # to close over
 255         my $sub = sub {
 256             my $self = _force_object shift;
 257
 258             my @tests = map { Number::Compare->parse_to_perl($_) } @_;
 259
 260             push @{ $self->{rules} }, {
 261                 rule => $test,
 262                 args => \@_,
 263                 code => 'do { my $val = (stat $_)['.$index.'] || 0;'.
 264                   join ('||', map { "(\$val $_)" } @tests ).' }',
 265             };
 266             $self;
 267         };
 268         no strict 'refs';
 269         *$test = $sub;
 270     }
 271 }
 272
 273 =item C<any( @rules )>
 274
 275 =item C<or( @rules )>
 276
 277 Allows shortcircuiting boolean evaluation as an alternative to the
 278 default and-like nature of combined rules.  C<any> and C<or> are
 279 interchangeable.
 280
 281  # find avis, movs, things over 200M and empty files
 282  $rule->any( File::Find::Rule->name( '*.avi', '*.mov' ),
 283              File::Find::Rule->size( '>200M' ),
 284              File::Find::Rule->file->empty,
 285            );
 286
 287 =cut
 288
 289 sub any {
 290     my $self = _force_object shift;
 291     # compile all the subrules to code fragments
 292     push @{ $self->{rules} }, {
 293         rule => "any",
 294         code => '(' . join( ' || ', map '( ' . $_->_compile . ' )', @_ ). ')',
 295         args => \@_,
 296     };
 297
 298     # merge all the subs hashes of the kids into ourself
 299     %{ $self->{subs} } = map { %{ $_->{subs} } } $self, @_;
 300     $self;
 301 }
 302
 303 *or = \&any;
 304
 305 =item C<none( @rules )>
 306
 307 =item C<not( @rules )>
 308
 309 Negates a rule.  (The inverse of C<any>.)  C<none> and C<not> are
 310 interchangeable.
 311
 312   # files that aren't 8.3 safe
 313   $rule->file
 314        ->not( $rule->new->name( qr/^[^.]{1,8}(\.[^.]{0,3})?$/ ) );
 315
 316 =cut
 317
 318 sub not {
 319     my $self = _force_object shift;
 320
 321     push @{ $self->{rules} }, {
 322         rule => 'not',
 323         args => \@_,
 324         code => '(' . join ( ' && ', map { "!(". $_->_compile . ")" } @_ ) . ")",
 325     };
 326
 327     # merge all the subs hashes into us
 328     %{ $self->{subs} } = map { %{ $_->{subs} } } $self, @_;
 329     $self;
 330 }
 331
 332 *none = \&not;
 333
 334 =item C<prune>
 335
 336 Traverse no further.  This rule always matches.
 337
 338 =cut
 339
 340 sub prune () {
 341     my $self = _force_object shift;
 342
 343     push @{ $self->{rules} },
 344       {
 345        rule => 'prune',
 346        code => '$File::Find::prune = 1'
 347       };
 348     $self;
 349 }
 350
 351 =item C<discard>
 352
 353 Don't keep this file.  This rule always matches.
 354
 355 =cut
 356
 357 sub discard () {
 358     my $self = _force_object shift;
 359
 360     push @{ $self->{rules} }, {
 361         rule => 'discard',
 362         code => '$discarded = 1',
 363     };
 364     $self;
 365 }
 366
 367 =item C<exec( \&subroutine( $shortname, $path, $fullname ) )>
 368
 369 Allows user-defined rules.  Your subroutine will be invoked with C<$_>
 370 set to the current short name, and with parameters of the name, the
 371 path you're in, and the full relative filename.
 372
 373 Return a true value if your rule matched.
 374
 375  # get things with long names
 376  $rules->exec( sub { length > 20 } );
 377
 378 =cut
 379
 380 sub exec {
 381     my $self = _force_object shift;
 382     my $code = shift;
 383
 384     push @{ $self->{rules} }, {
 385         rule => 'exec',
 386         code => $code,
 387     };
 388     $self;
 389 }
 390
 391 =item C<grep( @specifiers )>
 392
 393 Opens a file and tests it each line at a time.
 394
 395 For each line it evaluates each of the specifiers, stopping at the
 396 first successful match.  A specifier may be a regular expression or a
 397 subroutine.  The subroutine will be invoked with the same parameters
 398 as an ->exec subroutine.
 399
 400 It is possible to provide a set of negative specifiers by enclosing
 401 them in anonymous arrays.  Should a negative specifier match the
 402 iteration is aborted and the clause is failed.  For example:
 403
 404  $rule->grep( qr/^#!.*\bperl/, [ sub { 1 } ] );
 405
 406 Is a passing clause if the first line of a file looks like a perl
 407 shebang line.
 408
 409 =cut
 410
 411 sub grep {
 412     my $self = _force_object shift;
 413     my @pattern = map {
 414         ref $_
 415           ? ref $_ eq 'ARRAY'
 416             ? map { [ ( ref $_ ? $_ : qr/$_/ ) => 0 ] } @$_
 417             : [ $_ => 1 ]
 418           : [ qr/$_/ => 1 ]
 419       } @_;
 420
 421     $self->exec( sub {
 422         local *FILE;
 423         open FILE, $_ or return;
 424         local ($_, $.);
 425         while (<FILE>) {
 426             for my $p (@pattern) {
 427                 my ($rule, $ret) = @$p;
 428                 return $ret
 429                   if ref $rule eq 'Regexp'
 430                     ? /$rule/
 431                       : $rule->(@_);
 432             }
 433         }
 434         return;
 435     } );
 436 }
 437
 438 =item C<maxdepth( $level )>
 439
 440 Descend at most C<$level> (a non-negative integer) levels of directories
 441 below the starting point.
 442
 443 May be invoked many times per rule, but only the most recent value is
 444 used.
 445
 446 =item C<mindepth( $level )>
 447
 448 Do not apply any tests at levels less than C<$level> (a non-negative
 449 integer).
 450
 451 =item C<extras( \%extras )>
 452
 453 Specifies extra values to pass through to C<File::File::find> as part
 454 of the options hash.
 455
 456 For example this allows you to specify following of symlinks like so:
 457
 458  my $rule = File::Find::Rule->extras({ follow => 1 });
 459
 460 May be invoked many times per rule, but only the most recent value is
 461 used.
 462
 463 =cut
 464
 465 for my $setter (qw( maxdepth mindepth extras )) {
 466     my $sub = sub {
 467         my $self = _force_object shift;
 468         $self->{$setter} = shift;
 469         $self;
 470     };
 471     no strict 'refs';
 472     *$setter = $sub;
 473 }
 474
 475
 476 =item C<relative>
 477
 478 Trim the leading portion of any path found
 479
 480 =cut
 481
 482 sub relative () {
 483     my $self = _force_object shift;
 484     $self->{relative} = 1;
 485     $self;
 486 }
 487
 488 =item C<not_*>
 489
 490 Negated version of the rule.  An effective shortand related to ! in
 491 the procedural interface.
 492
 493  $foo->not_name('*.pl');
 494
 495  $foo->not( $foo->new->name('*.pl' ) );
 496
 497 =cut
 498
 499 sub DESTROY {}
 500 sub AUTOLOAD {
 501     our $AUTOLOAD;
 502     $AUTOLOAD =~ /::not_([^:]*)$/
 503       or croak "Can't locate method $AUTOLOAD";
 504     my $method = $1;
 505
 506     my $sub = sub {
 507         my $self = _force_object shift;
 508         $self->not( $self->new->$method(@_) );
 509     };
 510     {
 511         no strict 'refs';
 512         *$AUTOLOAD = $sub;
 513     }
 514     &$sub;
 515 }
 516
 517 =back
 518
 519 =head2 Query Methods
 520
 521 =over
 522
 523 =item C<in( @directories )>
 524
 525 Evaluates the rule, returns a list of paths to matching files and
 526 directories.
 527
 528 =cut
 529
 530 sub in {
 531     my $self = _force_object shift;
 532
 533     my @found;
 534     my $fragment = $self->_compile;
 535     my %subs = %{ $self->{subs} };
 536
 537     warn "relative mode handed multiple paths - that's a bit silly\n"
 538       if $self->{relative} && @_ > 1;
 539
 540     my $topdir;
 541     my $code = 'sub {
 542         (my $path = $File::Find::name)  =~ s#^(?:\./+)+##;
 543         my @args = ($_, $File::Find::dir, $path);
 544         my $maxdepth = $self->{maxdepth};
 545         my $mindepth = $self->{mindepth};
 546         my $relative = $self->{relative};
 547
 548         # figure out the relative path and depth
 549         my $relpath = $File::Find::name;
 550         $relpath =~ s{^\Q$topdir\E/?}{};
 551         my $depth = scalar File::Spec->splitdir($relpath);
 552         #print "name: \'$File::Find::name\' ";
 553         #print "relpath: \'$relpath\' depth: $depth relative: $relative\n";
 554
 555         defined $maxdepth && $depth >= $maxdepth
 556            and $File::Find::prune = 1;
 557
 558         defined $mindepth && $depth < $mindepth
 559            and return;
 560
 561         #print "Testing \'$_\'\n";
 562
 563         my $discarded;
 564         return unless ' . $fragment . ';
 565         return if $discarded;
 566         if ($relative) {
 567             push @found, $relpath if $relpath ne "";
 568         }
 569         else {
 570             push @found, $path;
 571         }
 572     }';
 573
 574     #use Data::Dumper;
 575     #print Dumper \%subs;
 576     #warn "Compiled sub: '$code'\n";
 577
 578     my $sub = eval "$code" or die "compile error '$code' $@";
 579     for my $path (@_) {
 580         # $topdir is used for relative and maxdepth
 581         $topdir = $path;
 582         # slice off the trailing slash if there is one (the
 583         # maxdepth/mindepth code is fussy)
 584         $topdir =~ s{/?$}{}
 585           unless $topdir eq '/';
 586         $self->_call_find( { %{ $self->{extras} }, wanted => $sub }, $path );
 587     }
 588
 589     return @found;
 590 }
 591
 592 sub _call_find {
 593     my $self = shift;
 594     File::Find::find( @_ );
 595 }
 596
 597 sub _compile {
 598     my $self = shift;
 599
 600     return '1' unless @{ $self->{rules} };
 601     my $code = join " && ", map {
 602         if (ref $_->{code}) {
 603             my $key = "$_->{code}";
 604             $self->{subs}{$key} = $_->{code};
 605             "\$subs{'$key'}->(\@args) # $_->{rule}\n";
 606         }
 607         else {
 608             "( $_->{code} ) # $_->{rule}\n";
 609         }
 610     } @{ $self->{rules} };
 611
 612     #warn $code;
 613     return $code;
 614 }
 615
 616 =item C<start( @directories )>
 617
 618 Starts a find across the specified directories.  Matching items may
 619 then be queried using L</match>.  This allows you to use a rule as an
 620 iterator.
 621
 622  my $rule = File::Find::Rule->file->name("*.jpeg")->start( "/web" );
 623  while ( defined ( my $image = $rule->match ) ) {
 624      ...
 625  }
 626
 627 =cut
 628
 629 sub start {
 630     my $self = _force_object shift;
 631
 632     $self->{iterator} = [ $self->in( @_ ) ];
 633     $self;
 634 }
 635
 636 =item C<match>
 637
 638 Returns the next file which matches, false if there are no more.
 639
 640 =cut
 641
 642 sub match {
 643     my $self = _force_object shift;
 644
 645     return shift @{ $self->{iterator} };
 646 }
 647
 648 1;
 649
 650 __END__
 651
 652 =back
 653
 654 =head2 Extensions
 655
 656 Extension modules are available from CPAN in the File::Find::Rule
 657 namespace.  In order to use these extensions either use them directly:
 658
 659  use File::Find::Rule::ImageSize;
 660  use File::Find::Rule::MMagic;
 661
 662  # now your rules can use the clauses supplied by the ImageSize and
 663  # MMagic extension
 664
 665 or, specify that File::Find::Rule should load them for you:
 666
 667  use File::Find::Rule qw( :ImageSize :MMagic );
 668
 669 For notes on implementing your own extensions, consult
 670 L<File::Find::Rule::Extending>
 671
 672 =head2 Further examples
 673
 674 =over
 675
 676 =item Finding perl scripts
 677
 678  my $finder = File::Find::Rule->or
 679   (
 680    File::Find::Rule->name( '*.pl' ),
 681    File::Find::Rule->exec(
 682                           sub {
 683                               if (open my $fh, $_) {
 684                                   my $shebang = <$fh>;
 685                                   close $fh;
 686                                   return $shebang =~ /^#!.*\bperl/;
 687                               }
 688                               return 0;
 689                           } ),
 690   );
 691
 692 Based upon this message http://use.perl.org/comments.pl?sid=7052&cid=10842
 693
 694 =item ignore CVS directories
 695
 696  my $rule = File::Find::Rule->new;
 697  $rule->or($rule->new
 698                 ->directory
 699                 ->name('CVS')
 700                 ->prune
 701                 ->discard,
 702            $rule->new);
 703
 704 Note here the use of a null rule.  Null rules match anything they see,
 705 so the effect is to match (and discard) directories called 'CVS' or to
 706 match anything.
 707
 708 =back
 709
 710 =head1 TWO FOR THE PRICE OF ONE
 711
 712 File::Find::Rule also gives you a procedural interface.  This is
 713 documented in L<File::Find::Rule::Procedural>
 714
 715 =head1 EXPORTS
 716
 717 L</find>, L</rule>
 718
 719 =head1 TAINT MODE INTERACTION
 720
 721 As of 0.32 File::Find::Rule doesn't capture the current working directory in
 722 a taint-unsafe manner.  File::Find itself still does operations that the taint
 723 system will flag as insecure but you can use the L</extras> feature to ask
 724 L<File::Find> to internally C<untaint> file paths with a regex like so:
 725
 726     my $rule = File::Find::Rule->extras({ untaint => 1 });
 727
 728 Please consult L<File::Find>'s documentation for C<untaint>,
 729 C<untaint_pattern>, and C<untaint_skip> for more information.
 730
 731 =head1 BUGS
 732
 733 The code makes use of the C<our> keyword and as such requires perl version
 734 5.6.0 or newer.
 735
 736 Currently it isn't possible to remove a clause from a rule object.  If
 737 this becomes a significant issue it will be addressed.
 738
 739 =head1 AUTHOR
 740
 741 Richard Clamp <richardc@unixbeard.net> with input gained from this
 742 use.perl discussion: http://use.perl.org/~richardc/journal/6467
 743
 744 Additional proofreading and input provided by Kake, Greg McCarroll,
 745 and Andy Lester andy@petdance.com.
 746
 747 =head1 COPYRIGHT
 748
 749 Copyright (C) 2002, 2003, 2004, 2006, 2009 Richard Clamp.  All Rights Reserved.
 750
 751 This module is free software; you can redistribute it and/or modify it
 752 under the same terms as Perl itself.
 753
 754 =head1 SEE ALSO
 755
 756 L<File::Find>, L<Text::Glob>, L<Number::Compare>, find(1)
 757
 758 If you want to know about the procedural interface, see
 759 L<File::Find::Rule::Procedural>, and if you have an idea for a neat
 760 extension L<File::Find::Rule::Extending>
 761
 762 =cut
 763
 764 Implementation notes:
 765
 766 $self->rules is an array of hashrefs.  it may be a code fragment or a call
 767 to a subroutine.
 768
 769 Anonymous subroutines are stored in the $self->subs hashref keyed on the
 770 stringfied version of the coderef.
 771
 772 When one File::Find::Rule object is combined with another, such as in the any
 773 and not operations, this entire hash is merged.
 774
 775 The _compile method walks the rules element and simply glues the code
 776 fragments together so they can be compiled into an anyonymous File::Find
 777 match sub for speed
 778
 779
 780 [*] There's probably a win to be made with the current model in making
 781 stat calls use C<_>.  For
 782
 783   find( file => size => "> 20M" => size => "< 400M" );
 784
 785 up to 3 stats will happen for each candidate.  Adding a priming _
 786 would be a bit blind if the first operation was C< name => 'foo' >,
 787 since that can be tested by a single regex.  Simply checking what the
 788 next type of operation doesn't work since any arbritary exec sub may
 789 or may not stat.  Potentially worse, they could stat something else
 790 like so:
 791
 792   # extract from the worlds stupidest make(1)
 793   find( exec => sub { my $f = $_; $f =~ s/\.c$/.o/ && !-e $f } );
 794
 795 Maybe the best way is to treat C<_> as invalid after calling an exec,
 796 and doc that C<_> will only be meaningful after stat and -X tests if
 797 they're wanted in exec blocks.