Commit | Line | Data |
642e522c |
1 | package File::GlobMapper; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Carp; |
6 | |
7 | our ($CSH_GLOB); |
8 | |
9 | BEGIN |
10 | { |
11 | if ($] < 5.006) |
12 | { |
13 | require File::BSDGlob; import File::BSDGlob qw(:glob) ; |
14 | $CSH_GLOB = File::BSDGlob::GLOB_CSH() ; |
1a6a8453 |
15 | *globber = \&File::BSDGlob::csh_glob; |
642e522c |
16 | } |
17 | else |
18 | { |
19 | require File::Glob; import File::Glob qw(:glob) ; |
20 | $CSH_GLOB = File::Glob::GLOB_CSH() ; |
21 | #*globber = \&File::Glob::bsd_glob; |
1a6a8453 |
22 | *globber = \&File::Glob::csh_glob; |
642e522c |
23 | } |
24 | } |
25 | |
26 | our ($Error); |
27 | |
28 | our ($VERSION, @EXPORT_OK); |
29 | $VERSION = '0.000_02'; |
30 | @EXPORT_OK = qw( globmap ); |
31 | |
32 | |
33 | our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount); |
34 | $noPreBS = '(?<!\\\)' ; # no preceeding backslash |
35 | $metachars = '.*?[](){}'; |
36 | $matchMetaRE = '[' . quotemeta($metachars) . ']'; |
37 | |
38 | %mapping = ( |
39 | '*' => '([^/]*)', |
40 | '?' => '([^/])', |
41 | '.' => '\.', |
42 | '[' => '([', |
43 | '(' => '(', |
44 | ')' => ')', |
45 | ); |
46 | |
47 | %wildCount = map { $_ => 1 } qw/ * ? . { ( [ /; |
48 | |
49 | sub globmap ($$;) |
50 | { |
51 | my $inputGlob = shift ; |
52 | my $outputGlob = shift ; |
53 | |
54 | my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_) |
55 | or croak "globmap: $Error" ; |
56 | return $obj->getFileMap(); |
57 | } |
58 | |
59 | sub new |
60 | { |
61 | my $class = shift ; |
62 | my $inputGlob = shift ; |
63 | my $outputGlob = shift ; |
64 | # TODO -- flags needs to default to whatever File::Glob does |
65 | my $flags = shift || $CSH_GLOB ; |
66 | #my $flags = shift ; |
67 | |
68 | $inputGlob =~ s/^\s*\<\s*//; |
69 | $inputGlob =~ s/\s*\>\s*$//; |
70 | |
71 | $outputGlob =~ s/^\s*\<\s*//; |
72 | $outputGlob =~ s/\s*\>\s*$//; |
73 | |
74 | my %object = |
75 | ( InputGlob => $inputGlob, |
76 | OutputGlob => $outputGlob, |
77 | GlobFlags => $flags, |
78 | Braces => 0, |
79 | WildCount => 0, |
80 | Pairs => [], |
81 | Sigil => '#', |
82 | ); |
83 | |
84 | my $self = bless \%object, ref($class) || $class ; |
85 | |
86 | $self->_parseInputGlob() |
87 | or return undef ; |
88 | |
89 | $self->_parseOutputGlob() |
90 | or return undef ; |
91 | |
92 | my @inputFiles = globber($self->{InputGlob}, $flags) ; |
93 | |
94 | if (GLOB_ERROR) |
95 | { |
96 | $Error = $!; |
97 | return undef ; |
98 | } |
99 | |
100 | #if (whatever) |
101 | { |
102 | my $missing = grep { ! -e $_ } @inputFiles ; |
103 | |
104 | if ($missing) |
105 | { |
106 | $Error = "$missing input files do not exist"; |
107 | return undef ; |
108 | } |
109 | } |
110 | |
111 | $self->{InputFiles} = \@inputFiles ; |
112 | |
113 | $self->_getFiles() |
114 | or return undef ; |
115 | |
116 | return $self; |
117 | } |
118 | |
119 | sub _retError |
120 | { |
121 | my $string = shift ; |
122 | $Error = "$string in input fileglob" ; |
123 | return undef ; |
124 | } |
125 | |
126 | sub _unmatched |
127 | { |
128 | my $delimeter = shift ; |
129 | |
130 | _retError("Unmatched $delimeter"); |
131 | return undef ; |
132 | } |
133 | |
134 | sub _parseBit |
135 | { |
136 | my $self = shift ; |
137 | |
138 | my $string = shift ; |
139 | |
140 | my $out = ''; |
141 | my $depth = 0 ; |
142 | |
143 | while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//) |
144 | { |
145 | $out .= quotemeta($1) ; |
146 | $out .= $mapping{$2} if defined $mapping{$2}; |
147 | |
148 | ++ $self->{WildCount} if $wildCount{$2} ; |
149 | |
150 | if ($2 eq ',') |
151 | { |
152 | return _unmatched "(" |
153 | if $depth ; |
154 | |
155 | $out .= '|'; |
156 | } |
157 | elsif ($2 eq '(') |
158 | { |
159 | ++ $depth ; |
160 | } |
161 | elsif ($2 eq ')') |
162 | { |
163 | return _unmatched ")" |
164 | if ! $depth ; |
165 | |
166 | -- $depth ; |
167 | } |
168 | elsif ($2 eq '[') |
169 | { |
170 | # TODO -- quotemeta & check no '/' |
171 | # TODO -- check for \] & other \ within the [] |
172 | $string =~ s#(.*?\])## |
173 | or return _unmatched "[" ; |
174 | $out .= "$1)" ; |
175 | } |
176 | elsif ($2 eq ']') |
177 | { |
178 | return _unmatched "]" ; |
179 | } |
180 | elsif ($2 eq '{' || $2 eq '}') |
181 | { |
182 | return _retError "Nested {} not allowed" ; |
183 | } |
184 | } |
185 | |
186 | $out .= quotemeta $string; |
187 | |
188 | return _unmatched "(" |
189 | if $depth ; |
190 | |
191 | return $out ; |
192 | } |
193 | |
194 | sub _parseInputGlob |
195 | { |
196 | my $self = shift ; |
197 | |
198 | my $string = $self->{InputGlob} ; |
199 | my $inGlob = ''; |
200 | |
201 | # Multiple concatenated *'s don't make sense |
202 | #$string =~ s#\*\*+#*# ; |
203 | |
204 | # TODO -- Allow space to delimit patterns? |
205 | #my @strings = split /\s+/, $string ; |
206 | #for my $str (@strings) |
207 | my $out = ''; |
208 | my $depth = 0 ; |
209 | |
210 | while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//) |
211 | { |
212 | $out .= quotemeta($1) ; |
213 | $out .= $mapping{$2} if defined $mapping{$2}; |
214 | ++ $self->{WildCount} if $wildCount{$2} ; |
215 | |
216 | if ($2 eq '(') |
217 | { |
218 | ++ $depth ; |
219 | } |
220 | elsif ($2 eq ')') |
221 | { |
222 | return _unmatched ")" |
223 | if ! $depth ; |
224 | |
225 | -- $depth ; |
226 | } |
227 | elsif ($2 eq '[') |
228 | { |
229 | # TODO -- quotemeta & check no '/' or '(' or ')' |
230 | # TODO -- check for \] & other \ within the [] |
231 | $string =~ s#(.*?\])## |
232 | or return _unmatched "["; |
233 | $out .= "$1)" ; |
234 | } |
235 | elsif ($2 eq ']') |
236 | { |
237 | return _unmatched "]" ; |
238 | } |
239 | elsif ($2 eq '}') |
240 | { |
241 | return _unmatched "}" ; |
242 | } |
243 | elsif ($2 eq '{') |
244 | { |
245 | # TODO -- check no '/' within the {} |
246 | # TODO -- check for \} & other \ within the {} |
247 | |
248 | my $tmp ; |
249 | unless ( $string =~ s/(.*?)$noPreBS\}//) |
250 | { |
251 | return _unmatched "{"; |
252 | } |
253 | #$string =~ s#(.*?)\}##; |
254 | |
255 | #my $alt = join '|', |
256 | # map { quotemeta $_ } |
257 | # split "$noPreBS,", $1 ; |
258 | my $alt = $self->_parseBit($1); |
259 | defined $alt or return 0 ; |
260 | $out .= "($alt)" ; |
261 | |
262 | ++ $self->{Braces} ; |
263 | } |
264 | } |
265 | |
266 | return _unmatched "(" |
267 | if $depth ; |
268 | |
269 | $out .= quotemeta $string ; |
270 | |
271 | |
272 | $self->{InputGlob} =~ s/$noPreBS[\(\)]//g; |
273 | $self->{InputPattern} = $out ; |
274 | |
275 | #print "# INPUT '$self->{InputGlob}' => '$out'\n"; |
276 | |
277 | return 1 ; |
278 | |
279 | } |
280 | |
281 | sub _parseOutputGlob |
282 | { |
283 | my $self = shift ; |
284 | |
285 | my $string = $self->{OutputGlob} ; |
286 | my $maxwild = $self->{WildCount}; |
287 | |
288 | if ($self->{GlobFlags} & GLOB_TILDE) |
289 | #if (1) |
290 | { |
291 | $string =~ s{ |
292 | ^ ~ # find a leading tilde |
293 | ( # save this in $1 |
294 | [^/] # a non-slash character |
295 | * # repeated 0 or more times (0 means me) |
296 | ) |
297 | }{ |
298 | $1 |
299 | ? (getpwnam($1))[7] |
300 | : ( $ENV{HOME} || $ENV{LOGDIR} ) |
301 | }ex; |
302 | |
303 | } |
304 | |
305 | # max #1 must be == to max no of '*' in input |
306 | while ( $string =~ m/#(\d)/g ) |
307 | { |
308 | croak "Max wild is #$maxwild, you tried #$1" |
309 | if $1 > $maxwild ; |
310 | } |
311 | |
312 | my $noPreBS = '(?<!\\\)' ; # no preceeding backslash |
313 | #warn "noPreBS = '$noPreBS'\n"; |
314 | |
315 | #$string =~ s/${noPreBS}\$(\d)/\${$1}/g; |
316 | $string =~ s/${noPreBS}#(\d)/\${$1}/g; |
317 | $string =~ s#${noPreBS}\*#\${inFile}#g; |
318 | $string = '"' . $string . '"'; |
319 | |
320 | #print "OUTPUT '$self->{OutputGlob}' => '$string'\n"; |
321 | $self->{OutputPattern} = $string ; |
322 | |
323 | return 1 ; |
324 | } |
325 | |
326 | sub _getFiles |
327 | { |
328 | my $self = shift ; |
329 | |
330 | my %outInMapping = (); |
331 | my %inFiles = () ; |
332 | |
333 | foreach my $inFile (@{ $self->{InputFiles} }) |
334 | { |
335 | next if $inFiles{$inFile} ++ ; |
336 | |
337 | my $outFile = $inFile ; |
338 | |
339 | if ( $inFile =~ m/$self->{InputPattern}/ ) |
340 | { |
341 | no warnings 'uninitialized'; |
342 | eval "\$outFile = $self->{OutputPattern};" ; |
343 | |
344 | if (defined $outInMapping{$outFile}) |
345 | { |
346 | $Error = "multiple input files map to one output file"; |
347 | return undef ; |
348 | } |
349 | $outInMapping{$outFile} = $inFile; |
350 | push @{ $self->{Pairs} }, [$inFile, $outFile]; |
351 | } |
352 | } |
353 | |
354 | return 1 ; |
355 | } |
356 | |
357 | sub getFileMap |
358 | { |
359 | my $self = shift ; |
360 | |
361 | return $self->{Pairs} ; |
362 | } |
363 | |
364 | sub getHash |
365 | { |
366 | my $self = shift ; |
367 | |
368 | return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ; |
369 | } |
370 | |
371 | 1; |
372 | |
373 | __END__ |
374 | |
375 | =head1 NAME |
376 | |
377 | File::GlobMapper - Extend File Glob to Allow Input and Output Files |
378 | |
379 | =head1 SYNOPSIS |
380 | |
381 | use File::GlobMapper qw( globmap ); |
382 | |
383 | my $aref = globmap $input => $output |
384 | or die $File::GlobMapper::Error ; |
385 | |
386 | my $gm = new File::GlobMapper $input => $output |
387 | or die $File::GlobMapper::Error ; |
388 | |
389 | |
390 | =head1 DESCRIPTION |
391 | |
392 | B<WARNING Alpha Release Alert!> |
393 | |
394 | =over 5 |
395 | |
396 | =item * This code is a work in progress. |
397 | |
398 | =item * There are known bugs. |
399 | |
400 | =item * The interface defined here is tentative. |
401 | |
402 | =item * There are portability issues. |
403 | |
404 | =item * Do not use in production code. |
405 | |
406 | =item * Consider yourself warned! |
407 | |
408 | =back |
409 | |
410 | This module needs Perl5.005 or better. |
411 | |
412 | This module takes the existing C<File::Glob> module as a starting point and |
413 | extends it to allow new filenames to be derived from the files matched by |
414 | C<File::Glob>. |
415 | |
416 | This can be useful when carrying out batch operations on multiple files that |
417 | have both an input filename and output filename and the output file can be |
418 | derived from the input filename. Examples of operations where this can be |
419 | useful include, file renaming, file copying and file compression. |
420 | |
421 | |
422 | =head2 Behind The Scenes |
423 | |
424 | To help explain what C<File::GlobMapper> does, consider what code you |
425 | would write if you wanted to rename all files in the current directory |
426 | that ended in C<.tar.gz> to C<.tgz>. So say these files are in the |
1a6a8453 |
427 | current directory |
642e522c |
428 | |
429 | alpha.tar.gz |
430 | beta.tar.gz |
431 | gamma.tar.gz |
432 | |
433 | and they need renamed to this |
434 | |
435 | alpha.tgz |
436 | beta.tgz |
437 | gamma.tgz |
438 | |
439 | Below is a possible implementation of a script to carry out the rename |
440 | (error cases have been omitted) |
441 | |
442 | foreach my $old ( glob "*.tar.gz" ) |
443 | { |
444 | my $new = $old; |
445 | $new =~ s#(.*)\.tar\.gz$#$1.tgz# ; |
446 | |
447 | rename $old => $new |
448 | or die "Cannot rename '$old' to '$new': $!\n; |
449 | } |
450 | |
451 | Notice that a file glob pattern C<*.tar.gz> was used to match the |
452 | C<.tar.gz> files, then a fairly similar regular expression was used in |
453 | the substitute to allow the new filename to be created. |
454 | |
455 | Given that the file glob is just a cut-down regular expression and that it |
456 | has already done a lot of the hard work in pattern matching the filenames, |
457 | wouldn't it be handy to be able to use the patterns in the fileglob to |
458 | drive the new filename? |
459 | |
460 | Well, that's I<exactly> what C<File::GlobMapper> does. |
461 | |
462 | Here is same snippet of code rewritten using C<globmap> |
463 | |
464 | for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' ) |
465 | { |
466 | my ($from, $to) = @$pair; |
467 | rename $from => $to |
468 | or die "Cannot rename '$old' to '$new': $!\n; |
469 | } |
470 | |
471 | So how does it work? |
472 | |
473 | Behind the scenes the C<globmap> function does a combination of a |
474 | file glob to match existing filenames followed by a substitute |
475 | to create the new filenames. |
476 | |
1a6a8453 |
477 | Notice how both parameters to C<globmap> are strings that are delimited by <>. |
642e522c |
478 | This is done to make them look more like file globs - it is just syntactic |
479 | sugar, but it can be handy when you want the strings to be visually |
480 | distinctive. The enclosing <> are optional, so you don't have to use them - in |
1a6a8453 |
481 | fact the first thing globmap will do is remove these delimiters if they are |
642e522c |
482 | present. |
483 | |
484 | The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>. |
485 | Once the enclosing "< ... >" is removed, this is passed (more or |
486 | less) unchanged to C<File::Glob> to carry out a file match. |
487 | |
488 | Next the fileglob C<*.tar.gz> is transformed behind the scenes into a |
489 | full Perl regular expression, with the additional step of wrapping each |
490 | transformed wildcard metacharacter sequence in parenthesis. |
491 | |
492 | In this case the input fileglob C<*.tar.gz> will be transformed into |
493 | this Perl regular expression |
494 | |
495 | ([^/]*)\.tar\.gz |
496 | |
497 | Wrapping with parenthesis allows the wildcard parts of the Input File |
498 | Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>, |
499 | the I<Output File Glob>. This parameter operates just like the replacement |
500 | part of a substitute command. The difference is that the C<#1> syntax |
501 | is used to reference sub-patterns matched in the input fileglob, rather |
502 | than the C<$1> syntax that is used with perl regular expressions. In |
503 | this case C<#1> is used to refer to the text matched by the C<*> in the |
504 | Input File Glob. This makes it easier to use this module where the |
505 | parameters to C<globmap> are typed at the command line. |
506 | |
507 | The final step involves passing each filename matched by the C<*.tar.gz> |
508 | file glob through the derived Perl regular expression in turn and |
509 | expanding the output fileglob using it. |
510 | |
511 | The end result of all this is a list of pairs of filenames. By default |
512 | that is what is returned by C<globmap>. In this example the data structure |
513 | returned will look like this |
514 | |
515 | ( ['alpha.tar.gz' => 'alpha.tgz'], |
516 | ['beta.tar.gz' => 'beta.tgz' ], |
517 | ['gamma.tar.gz' => 'gamma.tgz'] |
518 | ) |
519 | |
520 | |
521 | Each pair is an array reference with two elements - namely the I<from> |
522 | filename, that C<File::Glob> has matched, and a I<to> filename that is |
523 | derived from the I<from> filename. |
524 | |
525 | |
526 | |
527 | =head2 Limitations |
528 | |
529 | C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to |
530 | solve all filename mapping operations. Under the hood C<File::Glob> (or for |
1a6a8453 |
531 | older versions of Perl, C<File::BSDGlob>) is used to match the files, so you |
642e522c |
532 | will never have the flexibility of full Perl regular expression. |
533 | |
534 | =head2 Input File Glob |
535 | |
536 | The syntax for an Input FileGlob is identical to C<File::Glob>, except |
537 | for the following |
538 | |
539 | =over 5 |
540 | |
541 | =item 1. |
542 | |
543 | No nested {} |
544 | |
545 | =item 2. |
546 | |
547 | Whitespace does not delimit fileglobs. |
548 | |
549 | =item 3. |
550 | |
551 | The use of parenthesis can be used to capture parts of the input filename. |
552 | |
553 | =item 4. |
554 | |
555 | If an Input glob matches the same file more than once, only the first |
556 | will be used. |
557 | |
558 | =back |
559 | |
560 | The syntax |
561 | |
562 | =over 5 |
563 | |
564 | =item B<~> |
565 | |
566 | =item B<~user> |
567 | |
568 | |
569 | =item B<.> |
570 | |
571 | Matches a literal '.'. |
572 | Equivalent to the Perl regular expression |
573 | |
574 | \. |
575 | |
576 | =item B<*> |
577 | |
578 | Matches zero or more characters, except '/'. Equivalent to the Perl |
579 | regular expression |
580 | |
581 | [^/]* |
582 | |
583 | =item B<?> |
584 | |
585 | Matches zero or one character, except '/'. Equivalent to the Perl |
586 | regular expression |
587 | |
588 | [^/]? |
589 | |
590 | =item B<\> |
591 | |
592 | Backslash is used, as usual, to escape the next character. |
593 | |
594 | =item B<[]> |
595 | |
596 | Character class. |
597 | |
598 | =item B<{,}> |
599 | |
600 | Alternation |
601 | |
602 | =item B<()> |
603 | |
604 | Capturing parenthesis that work just like perl |
605 | |
606 | =back |
607 | |
608 | Any other character it taken literally. |
609 | |
610 | =head2 Output File Glob |
611 | |
612 | The Output File Glob is a normal string, with 2 glob-like features. |
613 | |
614 | The first is the '*' metacharacter. This will be replaced by the complete |
615 | filename matched by the input file glob. So |
616 | |
617 | *.c *.Z |
618 | |
619 | The second is |
620 | |
621 | Output FileGlobs take the |
622 | |
623 | =over 5 |
624 | |
625 | =item "*" |
626 | |
1a6a8453 |
627 | The "*" character will be replaced with the complete input filename. |
642e522c |
628 | |
629 | =item #1 |
630 | |
631 | Patterns of the form /#\d/ will be replaced with the |
632 | |
633 | =back |
634 | |
635 | =head2 Returned Data |
636 | |
637 | |
638 | =head1 EXAMPLES |
639 | |
640 | =head2 A Rename script |
641 | |
642 | Below is a simple "rename" script that uses C<globmap> to determine the |
643 | source and destination filenames. |
644 | |
645 | use File::GlobMapper qw(globmap) ; |
646 | use File::Copy; |
647 | |
648 | die "rename: Usage rename 'from' 'to'\n" |
649 | unless @ARGV == 2 ; |
650 | |
651 | my $fromGlob = shift @ARGV; |
652 | my $toGlob = shift @ARGV; |
653 | |
654 | my $pairs = globmap($fromGlob, $toGlob) |
655 | or die $File::GlobMapper::Error; |
656 | |
657 | for my $pair (@$pairs) |
658 | { |
659 | my ($from, $to) = @$pair; |
660 | move $from => $to ; |
661 | } |
662 | |
663 | |
664 | |
665 | Here is an example that renames all c files to cpp. |
666 | |
667 | $ rename '*.c' '#1.cpp' |
668 | |
669 | =head2 A few example globmaps |
670 | |
1a6a8453 |
671 | Below are a few examples of globmaps |
642e522c |
672 | |
673 | To copy all your .c file to a backup directory |
674 | |
675 | '</my/home/*.c>' '</my/backup/#1.c>' |
676 | |
677 | If you want to compress all |
678 | |
679 | '</my/home/*.[ch]>' '<*.gz>' |
680 | |
681 | To uncompress |
682 | |
683 | '</my/home/*.[ch].gz>' '</my/home/#1.#2>' |
684 | |
685 | =head1 SEE ALSO |
686 | |
687 | L<File::Glob|File::Glob> |
688 | |
689 | =head1 AUTHOR |
690 | |
691 | The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>. |
692 | |
693 | =head1 COPYRIGHT AND LICENSE |
694 | |
695 | Copyright (c) 2005 Paul Marquess. All rights reserved. |
696 | This program is free software; you can redistribute it and/or |
697 | modify it under the same terms as Perl itself. |