1 package File::GlobMapper;
13 require File::BSDGlob; import File::BSDGlob qw(:glob) ;
14 $CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
15 *globber = \&File::BSDGlob::csh_glob;
19 require File::Glob; import File::Glob qw(:glob) ;
20 $CSH_GLOB = File::Glob::GLOB_CSH() ;
21 #*globber = \&File::Glob::bsd_glob;
22 *globber = \&File::Glob::csh_glob;
28 our ($VERSION, @EXPORT_OK);
29 $VERSION = '0.000_02';
30 @EXPORT_OK = qw( globmap );
33 our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
34 $noPreBS = '(?<!\\\)' ; # no preceeding backslash
35 $metachars = '.*?[](){}';
36 $matchMetaRE = '[' . quotemeta($metachars) . ']';
47 %wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;
51 my $inputGlob = shift ;
52 my $outputGlob = shift ;
54 my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_)
55 or croak "globmap: $Error" ;
56 return $obj->getFileMap();
62 my $inputGlob = shift ;
63 my $outputGlob = shift ;
64 # TODO -- flags needs to default to whatever File::Glob does
65 my $flags = shift || $CSH_GLOB ;
68 $inputGlob =~ s/^\s*\<\s*//;
69 $inputGlob =~ s/\s*\>\s*$//;
71 $outputGlob =~ s/^\s*\<\s*//;
72 $outputGlob =~ s/\s*\>\s*$//;
75 ( InputGlob => $inputGlob,
76 OutputGlob => $outputGlob,
84 my $self = bless \%object, ref($class) || $class ;
86 $self->_parseInputGlob()
89 $self->_parseOutputGlob()
92 my @inputFiles = globber($self->{InputGlob}, $flags) ;
102 my $missing = grep { ! -e $_ } @inputFiles ;
106 $Error = "$missing input files do not exist";
111 $self->{InputFiles} = \@inputFiles ;
122 $Error = "$string in input fileglob" ;
128 my $delimeter = shift ;
130 _retError("Unmatched $delimeter");
143 while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//)
145 $out .= quotemeta($1) ;
146 $out .= $mapping{$2} if defined $mapping{$2};
148 ++ $self->{WildCount} if $wildCount{$2} ;
152 return _unmatched "("
163 return _unmatched ")"
170 # TODO -- quotemeta & check no '/'
171 # TODO -- check for \] & other \ within the []
172 $string =~ s#(.*?\])##
173 or return _unmatched "[" ;
178 return _unmatched "]" ;
180 elsif ($2 eq '{' || $2 eq '}')
182 return _retError "Nested {} not allowed" ;
186 $out .= quotemeta $string;
188 return _unmatched "("
198 my $string = $self->{InputGlob} ;
201 # Multiple concatenated *'s don't make sense
202 #$string =~ s#\*\*+#*# ;
204 # TODO -- Allow space to delimit patterns?
205 #my @strings = split /\s+/, $string ;
206 #for my $str (@strings)
210 while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
212 $out .= quotemeta($1) ;
213 $out .= $mapping{$2} if defined $mapping{$2};
214 ++ $self->{WildCount} if $wildCount{$2} ;
222 return _unmatched ")"
229 # TODO -- quotemeta & check no '/' or '(' or ')'
230 # TODO -- check for \] & other \ within the []
231 $string =~ s#(.*?\])##
232 or return _unmatched "[";
237 return _unmatched "]" ;
241 return _unmatched "}" ;
245 # TODO -- check no '/' within the {}
246 # TODO -- check for \} & other \ within the {}
249 unless ( $string =~ s/(.*?)$noPreBS\}//)
251 return _unmatched "{";
253 #$string =~ s#(.*?)\}##;
256 # map { quotemeta $_ }
257 # split "$noPreBS,", $1 ;
258 my $alt = $self->_parseBit($1);
259 defined $alt or return 0 ;
266 return _unmatched "("
269 $out .= quotemeta $string ;
272 $self->{InputGlob} =~ s/$noPreBS[\(\)]//g;
273 $self->{InputPattern} = $out ;
275 #print "# INPUT '$self->{InputGlob}' => '$out'\n";
285 my $string = $self->{OutputGlob} ;
286 my $maxwild = $self->{WildCount};
288 if ($self->{GlobFlags} & GLOB_TILDE)
292 ^ ~ # find a leading tilde
294 [^/] # a non-slash character
295 * # repeated 0 or more times (0 means me)
300 : ( $ENV{HOME} || $ENV{LOGDIR} )
305 # max #1 must be == to max no of '*' in input
306 while ( $string =~ m/#(\d)/g )
308 croak "Max wild is #$maxwild, you tried #$1"
312 my $noPreBS = '(?<!\\\)' ; # no preceeding backslash
313 #warn "noPreBS = '$noPreBS'\n";
315 #$string =~ s/${noPreBS}\$(\d)/\${$1}/g;
316 $string =~ s/${noPreBS}#(\d)/\${$1}/g;
317 $string =~ s#${noPreBS}\*#\${inFile}#g;
318 $string = '"' . $string . '"';
320 #print "OUTPUT '$self->{OutputGlob}' => '$string'\n";
321 $self->{OutputPattern} = $string ;
330 my %outInMapping = ();
333 foreach my $inFile (@{ $self->{InputFiles} })
335 next if $inFiles{$inFile} ++ ;
337 my $outFile = $inFile ;
339 if ( $inFile =~ m/$self->{InputPattern}/ )
341 no warnings 'uninitialized';
342 eval "\$outFile = $self->{OutputPattern};" ;
344 if (defined $outInMapping{$outFile})
346 $Error = "multiple input files map to one output file";
349 $outInMapping{$outFile} = $inFile;
350 push @{ $self->{Pairs} }, [$inFile, $outFile];
361 return $self->{Pairs} ;
368 return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
377 File::GlobMapper - Extend File Glob to Allow Input and Output Files
381 use File::GlobMapper qw( globmap );
383 my $aref = globmap $input => $output
384 or die $File::GlobMapper::Error ;
386 my $gm = new File::GlobMapper $input => $output
387 or die $File::GlobMapper::Error ;
392 B<WARNING Alpha Release Alert!>
396 =item * This code is a work in progress.
398 =item * There are known bugs.
400 =item * The interface defined here is tentative.
402 =item * There are portability issues.
404 =item * Do not use in production code.
406 =item * Consider yourself warned!
410 This module needs Perl5.005 or better.
412 This module takes the existing C<File::Glob> module as a starting point and
413 extends it to allow new filenames to be derived from the files matched by
416 This can be useful when carrying out batch operations on multiple files that
417 have both an input filename and output filename and the output file can be
418 derived from the input filename. Examples of operations where this can be
419 useful include, file renaming, file copying and file compression.
422 =head2 Behind The Scenes
424 To help explain what C<File::GlobMapper> does, consider what code you
425 would write if you wanted to rename all files in the current directory
426 that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
433 and they need renamed to this
439 Below is a possible implementation of a script to carry out the rename
440 (error cases have been omitted)
442 foreach my $old ( glob "*.tar.gz" )
445 $new =~ s#(.*)\.tar\.gz$#$1.tgz# ;
448 or die "Cannot rename '$old' to '$new': $!\n;
451 Notice that a file glob pattern C<*.tar.gz> was used to match the
452 C<.tar.gz> files, then a fairly similar regular expression was used in
453 the substitute to allow the new filename to be created.
455 Given that the file glob is just a cut-down regular expression and that it
456 has already done a lot of the hard work in pattern matching the filenames,
457 wouldn't it be handy to be able to use the patterns in the fileglob to
458 drive the new filename?
460 Well, that's I<exactly> what C<File::GlobMapper> does.
462 Here is same snippet of code rewritten using C<globmap>
464 for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
466 my ($from, $to) = @$pair;
468 or die "Cannot rename '$old' to '$new': $!\n;
473 Behind the scenes the C<globmap> function does a combination of a
474 file glob to match existing filenames followed by a substitute
475 to create the new filenames.
477 Notice how both parameters to C<globmap> are strings that are delimited by <>.
478 This is done to make them look more like file globs - it is just syntactic
479 sugar, but it can be handy when you want the strings to be visually
480 distinctive. The enclosing <> are optional, so you don't have to use them - in
481 fact the first thing globmap will do is remove these delimiters if they are
484 The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>.
485 Once the enclosing "< ... >" is removed, this is passed (more or
486 less) unchanged to C<File::Glob> to carry out a file match.
488 Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
489 full Perl regular expression, with the additional step of wrapping each
490 transformed wildcard metacharacter sequence in parenthesis.
492 In this case the input fileglob C<*.tar.gz> will be transformed into
493 this Perl regular expression
497 Wrapping with parenthesis allows the wildcard parts of the Input File
498 Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>,
499 the I<Output File Glob>. This parameter operates just like the replacement
500 part of a substitute command. The difference is that the C<#1> syntax
501 is used to reference sub-patterns matched in the input fileglob, rather
502 than the C<$1> syntax that is used with perl regular expressions. In
503 this case C<#1> is used to refer to the text matched by the C<*> in the
504 Input File Glob. This makes it easier to use this module where the
505 parameters to C<globmap> are typed at the command line.
507 The final step involves passing each filename matched by the C<*.tar.gz>
508 file glob through the derived Perl regular expression in turn and
509 expanding the output fileglob using it.
511 The end result of all this is a list of pairs of filenames. By default
512 that is what is returned by C<globmap>. In this example the data structure
513 returned will look like this
515 ( ['alpha.tar.gz' => 'alpha.tgz'],
516 ['beta.tar.gz' => 'beta.tgz' ],
517 ['gamma.tar.gz' => 'gamma.tgz']
521 Each pair is an array reference with two elements - namely the I<from>
522 filename, that C<File::Glob> has matched, and a I<to> filename that is
523 derived from the I<from> filename.
529 C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to
530 solve all filename mapping operations. Under the hood C<File::Glob> (or for
531 older versions of Perl, C<File::BSDGlob>) is used to match the files, so you
532 will never have the flexibility of full Perl regular expression.
534 =head2 Input File Glob
536 The syntax for an Input FileGlob is identical to C<File::Glob>, except
547 Whitespace does not delimit fileglobs.
551 The use of parenthesis can be used to capture parts of the input filename.
555 If an Input glob matches the same file more than once, only the first
571 Matches a literal '.'.
572 Equivalent to the Perl regular expression
578 Matches zero or more characters, except '/'. Equivalent to the Perl
585 Matches zero or one character, except '/'. Equivalent to the Perl
592 Backslash is used, as usual, to escape the next character.
604 Capturing parenthesis that work just like perl
608 Any other character it taken literally.
610 =head2 Output File Glob
612 The Output File Glob is a normal string, with 2 glob-like features.
614 The first is the '*' metacharacter. This will be replaced by the complete
615 filename matched by the input file glob. So
621 Output FileGlobs take the
627 The "*" character will be replaced with the complete input filename.
631 Patterns of the form /#\d/ will be replaced with the
640 =head2 A Rename script
642 Below is a simple "rename" script that uses C<globmap> to determine the
643 source and destination filenames.
645 use File::GlobMapper qw(globmap) ;
648 die "rename: Usage rename 'from' 'to'\n"
651 my $fromGlob = shift @ARGV;
652 my $toGlob = shift @ARGV;
654 my $pairs = globmap($fromGlob, $toGlob)
655 or die $File::GlobMapper::Error;
657 for my $pair (@$pairs)
659 my ($from, $to) = @$pair;
665 Here is an example that renames all c files to cpp.
667 $ rename '*.c' '#1.cpp'
669 =head2 A few example globmaps
671 Below are a few examples of globmaps
673 To copy all your .c file to a backup directory
675 '</my/home/*.c>' '</my/backup/#1.c>'
677 If you want to compress all
679 '</my/home/*.[ch]>' '<*.gz>'
683 '</my/home/*.[ch].gz>' '</my/home/#1.#2>'
687 L<File::Glob|File::Glob>
691 The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>.
693 =head1 COPYRIGHT AND LICENSE
695 Copyright (c) 2005 Paul Marquess. All rights reserved.
696 This program is free software; you can redistribute it and/or
697 modify it under the same terms as Perl itself.