Move IO::Compress from ext/ to cpan/
[p5sagit/p5-mst-13.2.git] / cpan / IO-Compress / lib / File / GlobMapper.pm
CommitLineData
25f0751f 1package File::GlobMapper;
2
3use strict;
4use warnings;
5use Carp;
6
7our ($CSH_GLOB);
8
9BEGIN
10{
11 if ($] < 5.006)
12 {
13 require File::BSDGlob; import File::BSDGlob qw(:glob) ;
14 $CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
15 *globber = \&File::BSDGlob::csh_glob;
16 }
17 else
18 {
19 require File::Glob; import File::Glob qw(:glob) ;
20 $CSH_GLOB = File::Glob::GLOB_CSH() ;
21 #*globber = \&File::Glob::bsd_glob;
22 *globber = \&File::Glob::csh_glob;
23 }
24}
25
26our ($Error);
27
28our ($VERSION, @EXPORT_OK);
d54256af 29$VERSION = '1.000';
25f0751f 30@EXPORT_OK = qw( globmap );
31
32
33our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
34$noPreBS = '(?<!\\\)' ; # no preceeding backslash
35$metachars = '.*?[](){}';
36$matchMetaRE = '[' . quotemeta($metachars) . ']';
37
38%mapping = (
39 '*' => '([^/]*)',
40 '?' => '([^/])',
41 '.' => '\.',
42 '[' => '([',
43 '(' => '(',
44 ')' => ')',
45 );
46
47%wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;
48
49sub globmap ($$;)
50{
51 my $inputGlob = shift ;
52 my $outputGlob = shift ;
53
54 my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_)
55 or croak "globmap: $Error" ;
56 return $obj->getFileMap();
57}
58
59sub new
60{
61 my $class = shift ;
62 my $inputGlob = shift ;
63 my $outputGlob = shift ;
64 # TODO -- flags needs to default to whatever File::Glob does
65 my $flags = shift || $CSH_GLOB ;
66 #my $flags = shift ;
67
68 $inputGlob =~ s/^\s*\<\s*//;
69 $inputGlob =~ s/\s*\>\s*$//;
70
71 $outputGlob =~ s/^\s*\<\s*//;
72 $outputGlob =~ s/\s*\>\s*$//;
73
74 my %object =
75 ( InputGlob => $inputGlob,
76 OutputGlob => $outputGlob,
77 GlobFlags => $flags,
78 Braces => 0,
79 WildCount => 0,
80 Pairs => [],
81 Sigil => '#',
82 );
83
84 my $self = bless \%object, ref($class) || $class ;
85
86 $self->_parseInputGlob()
87 or return undef ;
88
89 $self->_parseOutputGlob()
90 or return undef ;
91
92 my @inputFiles = globber($self->{InputGlob}, $flags) ;
93
94 if (GLOB_ERROR)
95 {
96 $Error = $!;
97 return undef ;
98 }
99
100 #if (whatever)
101 {
102 my $missing = grep { ! -e $_ } @inputFiles ;
103
104 if ($missing)
105 {
106 $Error = "$missing input files do not exist";
107 return undef ;
108 }
109 }
110
111 $self->{InputFiles} = \@inputFiles ;
112
113 $self->_getFiles()
114 or return undef ;
115
116 return $self;
117}
118
119sub _retError
120{
121 my $string = shift ;
122 $Error = "$string in input fileglob" ;
123 return undef ;
124}
125
126sub _unmatched
127{
128 my $delimeter = shift ;
129
130 _retError("Unmatched $delimeter");
131 return undef ;
132}
133
134sub _parseBit
135{
136 my $self = shift ;
137
138 my $string = shift ;
139
140 my $out = '';
141 my $depth = 0 ;
142
143 while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//)
144 {
145 $out .= quotemeta($1) ;
146 $out .= $mapping{$2} if defined $mapping{$2};
147
148 ++ $self->{WildCount} if $wildCount{$2} ;
149
150 if ($2 eq ',')
151 {
152 return _unmatched "("
153 if $depth ;
154
155 $out .= '|';
156 }
157 elsif ($2 eq '(')
158 {
159 ++ $depth ;
160 }
161 elsif ($2 eq ')')
162 {
163 return _unmatched ")"
164 if ! $depth ;
165
166 -- $depth ;
167 }
168 elsif ($2 eq '[')
169 {
170 # TODO -- quotemeta & check no '/'
171 # TODO -- check for \] & other \ within the []
172 $string =~ s#(.*?\])##
173 or return _unmatched "[" ;
174 $out .= "$1)" ;
175 }
176 elsif ($2 eq ']')
177 {
178 return _unmatched "]" ;
179 }
180 elsif ($2 eq '{' || $2 eq '}')
181 {
182 return _retError "Nested {} not allowed" ;
183 }
184 }
185
186 $out .= quotemeta $string;
187
188 return _unmatched "("
189 if $depth ;
190
191 return $out ;
192}
193
194sub _parseInputGlob
195{
196 my $self = shift ;
197
198 my $string = $self->{InputGlob} ;
199 my $inGlob = '';
200
201 # Multiple concatenated *'s don't make sense
202 #$string =~ s#\*\*+#*# ;
203
204 # TODO -- Allow space to delimit patterns?
205 #my @strings = split /\s+/, $string ;
206 #for my $str (@strings)
207 my $out = '';
208 my $depth = 0 ;
209
210 while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
211 {
212 $out .= quotemeta($1) ;
213 $out .= $mapping{$2} if defined $mapping{$2};
214 ++ $self->{WildCount} if $wildCount{$2} ;
215
216 if ($2 eq '(')
217 {
218 ++ $depth ;
219 }
220 elsif ($2 eq ')')
221 {
222 return _unmatched ")"
223 if ! $depth ;
224
225 -- $depth ;
226 }
227 elsif ($2 eq '[')
228 {
229 # TODO -- quotemeta & check no '/' or '(' or ')'
230 # TODO -- check for \] & other \ within the []
231 $string =~ s#(.*?\])##
232 or return _unmatched "[";
233 $out .= "$1)" ;
234 }
235 elsif ($2 eq ']')
236 {
237 return _unmatched "]" ;
238 }
239 elsif ($2 eq '}')
240 {
241 return _unmatched "}" ;
242 }
243 elsif ($2 eq '{')
244 {
245 # TODO -- check no '/' within the {}
246 # TODO -- check for \} & other \ within the {}
247
248 my $tmp ;
249 unless ( $string =~ s/(.*?)$noPreBS\}//)
250 {
251 return _unmatched "{";
252 }
253 #$string =~ s#(.*?)\}##;
254
255 #my $alt = join '|',
256 # map { quotemeta $_ }
257 # split "$noPreBS,", $1 ;
258 my $alt = $self->_parseBit($1);
259 defined $alt or return 0 ;
260 $out .= "($alt)" ;
261
262 ++ $self->{Braces} ;
263 }
264 }
265
266 return _unmatched "("
267 if $depth ;
268
269 $out .= quotemeta $string ;
270
271
272 $self->{InputGlob} =~ s/$noPreBS[\(\)]//g;
273 $self->{InputPattern} = $out ;
274
275 #print "# INPUT '$self->{InputGlob}' => '$out'\n";
276
277 return 1 ;
278
279}
280
281sub _parseOutputGlob
282{
283 my $self = shift ;
284
285 my $string = $self->{OutputGlob} ;
286 my $maxwild = $self->{WildCount};
287
288 if ($self->{GlobFlags} & GLOB_TILDE)
289 #if (1)
290 {
291 $string =~ s{
292 ^ ~ # find a leading tilde
293 ( # save this in $1
294 [^/] # a non-slash character
295 * # repeated 0 or more times (0 means me)
296 )
297 }{
298 $1
299 ? (getpwnam($1))[7]
300 : ( $ENV{HOME} || $ENV{LOGDIR} )
301 }ex;
302
303 }
304
305 # max #1 must be == to max no of '*' in input
306 while ( $string =~ m/#(\d)/g )
307 {
308 croak "Max wild is #$maxwild, you tried #$1"
309 if $1 > $maxwild ;
310 }
311
312 my $noPreBS = '(?<!\\\)' ; # no preceeding backslash
313 #warn "noPreBS = '$noPreBS'\n";
314
315 #$string =~ s/${noPreBS}\$(\d)/\${$1}/g;
316 $string =~ s/${noPreBS}#(\d)/\${$1}/g;
317 $string =~ s#${noPreBS}\*#\${inFile}#g;
318 $string = '"' . $string . '"';
319
320 #print "OUTPUT '$self->{OutputGlob}' => '$string'\n";
321 $self->{OutputPattern} = $string ;
322
323 return 1 ;
324}
325
326sub _getFiles
327{
328 my $self = shift ;
329
330 my %outInMapping = ();
331 my %inFiles = () ;
332
333 foreach my $inFile (@{ $self->{InputFiles} })
334 {
335 next if $inFiles{$inFile} ++ ;
336
337 my $outFile = $inFile ;
338
339 if ( $inFile =~ m/$self->{InputPattern}/ )
340 {
341 no warnings 'uninitialized';
342 eval "\$outFile = $self->{OutputPattern};" ;
343
344 if (defined $outInMapping{$outFile})
345 {
346 $Error = "multiple input files map to one output file";
347 return undef ;
348 }
349 $outInMapping{$outFile} = $inFile;
350 push @{ $self->{Pairs} }, [$inFile, $outFile];
351 }
352 }
353
354 return 1 ;
355}
356
357sub getFileMap
358{
359 my $self = shift ;
360
361 return $self->{Pairs} ;
362}
363
364sub getHash
365{
366 my $self = shift ;
367
368 return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
369}
370
3711;
372
373__END__
374
375=head1 NAME
376
377File::GlobMapper - Extend File Glob to Allow Input and Output Files
378
379=head1 SYNOPSIS
380
381 use File::GlobMapper qw( globmap );
382
383 my $aref = globmap $input => $output
384 or die $File::GlobMapper::Error ;
385
386 my $gm = new File::GlobMapper $input => $output
387 or die $File::GlobMapper::Error ;
388
389
390=head1 DESCRIPTION
391
25f0751f 392This module needs Perl5.005 or better.
393
394This module takes the existing C<File::Glob> module as a starting point and
395extends it to allow new filenames to be derived from the files matched by
396C<File::Glob>.
397
398This can be useful when carrying out batch operations on multiple files that
399have both an input filename and output filename and the output file can be
400derived from the input filename. Examples of operations where this can be
401useful include, file renaming, file copying and file compression.
402
403
404=head2 Behind The Scenes
405
406To help explain what C<File::GlobMapper> does, consider what code you
407would write if you wanted to rename all files in the current directory
408that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
409current directory
410
411 alpha.tar.gz
412 beta.tar.gz
413 gamma.tar.gz
414
415and they need renamed to this
416
417 alpha.tgz
418 beta.tgz
419 gamma.tgz
420
421Below is a possible implementation of a script to carry out the rename
422(error cases have been omitted)
423
424 foreach my $old ( glob "*.tar.gz" )
425 {
426 my $new = $old;
427 $new =~ s#(.*)\.tar\.gz$#$1.tgz# ;
428
429 rename $old => $new
430 or die "Cannot rename '$old' to '$new': $!\n;
431 }
432
433Notice that a file glob pattern C<*.tar.gz> was used to match the
434C<.tar.gz> files, then a fairly similar regular expression was used in
435the substitute to allow the new filename to be created.
436
437Given that the file glob is just a cut-down regular expression and that it
438has already done a lot of the hard work in pattern matching the filenames,
439wouldn't it be handy to be able to use the patterns in the fileglob to
440drive the new filename?
441
442Well, that's I<exactly> what C<File::GlobMapper> does.
443
444Here is same snippet of code rewritten using C<globmap>
445
446 for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
447 {
448 my ($from, $to) = @$pair;
449 rename $from => $to
450 or die "Cannot rename '$old' to '$new': $!\n;
451 }
452
453So how does it work?
454
455Behind the scenes the C<globmap> function does a combination of a
456file glob to match existing filenames followed by a substitute
457to create the new filenames.
458
459Notice how both parameters to C<globmap> are strings that are delimited by <>.
460This is done to make them look more like file globs - it is just syntactic
461sugar, but it can be handy when you want the strings to be visually
462distinctive. The enclosing <> are optional, so you don't have to use them - in
463fact the first thing globmap will do is remove these delimiters if they are
464present.
465
466The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>.
467Once the enclosing "< ... >" is removed, this is passed (more or
468less) unchanged to C<File::Glob> to carry out a file match.
469
470Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
471full Perl regular expression, with the additional step of wrapping each
472transformed wildcard metacharacter sequence in parenthesis.
473
474In this case the input fileglob C<*.tar.gz> will be transformed into
475this Perl regular expression
476
477 ([^/]*)\.tar\.gz
478
479Wrapping with parenthesis allows the wildcard parts of the Input File
480Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>,
481the I<Output File Glob>. This parameter operates just like the replacement
482part of a substitute command. The difference is that the C<#1> syntax
483is used to reference sub-patterns matched in the input fileglob, rather
484than the C<$1> syntax that is used with perl regular expressions. In
485this case C<#1> is used to refer to the text matched by the C<*> in the
486Input File Glob. This makes it easier to use this module where the
487parameters to C<globmap> are typed at the command line.
488
489The final step involves passing each filename matched by the C<*.tar.gz>
490file glob through the derived Perl regular expression in turn and
491expanding the output fileglob using it.
492
493The end result of all this is a list of pairs of filenames. By default
494that is what is returned by C<globmap>. In this example the data structure
495returned will look like this
496
497 ( ['alpha.tar.gz' => 'alpha.tgz'],
498 ['beta.tar.gz' => 'beta.tgz' ],
499 ['gamma.tar.gz' => 'gamma.tgz']
500 )
501
502
503Each pair is an array reference with two elements - namely the I<from>
504filename, that C<File::Glob> has matched, and a I<to> filename that is
505derived from the I<from> filename.
506
507
508
509=head2 Limitations
510
511C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to
512solve all filename mapping operations. Under the hood C<File::Glob> (or for
513older versions of Perl, C<File::BSDGlob>) is used to match the files, so you
514will never have the flexibility of full Perl regular expression.
515
516=head2 Input File Glob
517
518The syntax for an Input FileGlob is identical to C<File::Glob>, except
519for the following
520
521=over 5
522
523=item 1.
524
525No nested {}
526
527=item 2.
528
529Whitespace does not delimit fileglobs.
530
531=item 3.
532
533The use of parenthesis can be used to capture parts of the input filename.
534
535=item 4.
536
537If an Input glob matches the same file more than once, only the first
538will be used.
539
540=back
541
542The syntax
543
544=over 5
545
546=item B<~>
547
548=item B<~user>
549
550
551=item B<.>
552
553Matches a literal '.'.
554Equivalent to the Perl regular expression
555
556 \.
557
558=item B<*>
559
560Matches zero or more characters, except '/'. Equivalent to the Perl
561regular expression
562
563 [^/]*
564
565=item B<?>
566
567Matches zero or one character, except '/'. Equivalent to the Perl
568regular expression
569
570 [^/]?
571
572=item B<\>
573
574Backslash is used, as usual, to escape the next character.
575
576=item B<[]>
577
578Character class.
579
580=item B<{,}>
581
582Alternation
583
584=item B<()>
585
586Capturing parenthesis that work just like perl
587
588=back
589
590Any other character it taken literally.
591
592=head2 Output File Glob
593
594The Output File Glob is a normal string, with 2 glob-like features.
595
596The first is the '*' metacharacter. This will be replaced by the complete
597filename matched by the input file glob. So
598
599 *.c *.Z
600
601The second is
602
603Output FileGlobs take the
604
605=over 5
606
607=item "*"
608
609The "*" character will be replaced with the complete input filename.
610
611=item #1
612
613Patterns of the form /#\d/ will be replaced with the
614
615=back
616
617=head2 Returned Data
618
619
620=head1 EXAMPLES
621
622=head2 A Rename script
623
624Below is a simple "rename" script that uses C<globmap> to determine the
625source and destination filenames.
626
627 use File::GlobMapper qw(globmap) ;
628 use File::Copy;
629
630 die "rename: Usage rename 'from' 'to'\n"
631 unless @ARGV == 2 ;
632
633 my $fromGlob = shift @ARGV;
634 my $toGlob = shift @ARGV;
635
636 my $pairs = globmap($fromGlob, $toGlob)
637 or die $File::GlobMapper::Error;
638
639 for my $pair (@$pairs)
640 {
641 my ($from, $to) = @$pair;
642 move $from => $to ;
643 }
644
645
646
647Here is an example that renames all c files to cpp.
648
649 $ rename '*.c' '#1.cpp'
650
651=head2 A few example globmaps
652
653Below are a few examples of globmaps
654
655To copy all your .c file to a backup directory
656
657 '</my/home/*.c>' '</my/backup/#1.c>'
658
659If you want to compress all
660
661 '</my/home/*.[ch]>' '<*.gz>'
662
663To uncompress
664
665 '</my/home/*.[ch].gz>' '</my/home/#1.#2>'
666
667=head1 SEE ALSO
668
669L<File::Glob|File::Glob>
670
671=head1 AUTHOR
672
673The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>.
674
675=head1 COPYRIGHT AND LICENSE
676
677Copyright (c) 2005 Paul Marquess. All rights reserved.
678This program is free software; you can redistribute it and/or
679modify it under the same terms as Perl itself.