[p5sagit/p5-mst-13.2.git] / ext / Compress / Zlib / lib / File / GlobMapper.pm

package File::GlobMapper;

use strict;
use warnings;
use Carp;

our ($CSH_GLOB);

BEGIN
{
    if ($] < 5.006)
    { 
        require File::BSDGlob; import File::BSDGlob qw(:glob) ;
        $CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
        *globber = \&File::BSDGlob::csh_glob;
    }  
    else
    { 
        require File::Glob; import File::Glob qw(:glob) ;
        $CSH_GLOB = File::Glob::GLOB_CSH() ;
        #*globber = \&File::Glob::bsd_glob;
        *globber = \&File::Glob::csh_glob;
    }  
}

our ($Error);

our ($VERSION, @EXPORT_OK);
$VERSION = '0.000_02';
@EXPORT_OK = qw( globmap );


our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
$noPreBS = '(?<!\\\)' ; # no preceeding backslash
$metachars = '.*?[](){}';
$matchMetaRE = '[' . quotemeta($metachars) . ']';

%mapping = (
                '*' => '([^/]*)',
                '?' => '([^/])',
                '.' => '\.',
                '[' => '([',
                '(' => '(',
                ')' => ')',
           );

%wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;           

sub globmap ($$;)
{
    my $inputGlob = shift ;
    my $outputGlob = shift ;

    my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_)
        or croak "globmap: $Error" ;
    return $obj->getFileMap();
}

sub new
{
    my $class = shift ;
    my $inputGlob = shift ;
    my $outputGlob = shift ;
    # TODO -- flags needs to default to whatever File::Glob does
    my $flags = shift || $CSH_GLOB ;
    #my $flags = shift ;

    $inputGlob =~ s/^\s*\<\s*//;
    $inputGlob =~ s/\s*\>\s*$//;

    $outputGlob =~ s/^\s*\<\s*//;
    $outputGlob =~ s/\s*\>\s*$//;

    my %object =
            (   InputGlob   => $inputGlob,
                OutputGlob  => $outputGlob,
                GlobFlags   => $flags,
                Braces      => 0,
                WildCount   => 0,
                Pairs       => [],
                Sigil       => '#',
            );

    my $self = bless \%object, ref($class) || $class ;

    $self->_parseInputGlob()
        or return undef ;

    $self->_parseOutputGlob()
        or return undef ;
    
    my @inputFiles = globber($self->{InputGlob}, $flags) ;

    if (GLOB_ERROR)
    {
        $Error = $!;
        return undef ;
    }

    #if (whatever)
    {
        my $missing = grep { ! -e $_ } @inputFiles ;

        if ($missing)
        {
            $Error = "$missing input files do not exist";
            return undef ;
        }
    }

    $self->{InputFiles} = \@inputFiles ;

    $self->_getFiles()
        or return undef ;

    return $self;
}

sub _retError
{
    my $string = shift ;
    $Error = "$string in input fileglob" ;
    return undef ;
}

sub _unmatched
{
    my $delimeter = shift ;

    _retError("Unmatched $delimeter");
    return undef ;
}

sub _parseBit
{
    my $self = shift ;

    my $string = shift ;

    my $out = '';
    my $depth = 0 ;

    while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//)
    {
        $out .= quotemeta($1) ;
        $out .= $mapping{$2} if defined $mapping{$2};

        ++ $self->{WildCount} if $wildCount{$2} ;

        if ($2 eq ',')
        { 
            return _unmatched "("
                if $depth ;
            
            $out .= '|';
        }
        elsif ($2 eq '(')
        { 
            ++ $depth ;
        }
        elsif ($2 eq ')')
        { 
            return _unmatched ")"
                if ! $depth ;

            -- $depth ;
        }
        elsif ($2 eq '[')
        {
            # TODO -- quotemeta & check no '/'
            # TODO -- check for \]  & other \ within the []
            $string =~ s#(.*?\])##
                or return _unmatched "[" ;
            $out .= "$1)" ;
        }
        elsif ($2 eq ']')
        {
            return _unmatched "]" ;
        }
        elsif ($2 eq '{' || $2 eq '}')
        {
            return _retError "Nested {} not allowed" ;
        }
    }

    $out .= quotemeta $string;

    return _unmatched "("
        if $depth ;

    return $out ;
}

sub _parseInputGlob
{
    my $self = shift ;

    my $string = $self->{InputGlob} ;
    my $inGlob = '';

    # Multiple concatenated *'s don't make sense
    #$string =~ s#\*\*+#*# ;

    # TODO -- Allow space to delimit patterns?
    #my @strings = split /\s+/, $string ;
    #for my $str (@strings)
    my $out = '';
    my $depth = 0 ;

    while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
    {
        $out .= quotemeta($1) ;
        $out .= $mapping{$2} if defined $mapping{$2};
        ++ $self->{WildCount} if $wildCount{$2} ;

        if ($2 eq '(')
        { 
            ++ $depth ;
        }
        elsif ($2 eq ')')
        { 
            return _unmatched ")"
                if ! $depth ;

            -- $depth ;
        }
        elsif ($2 eq '[')
        {
            # TODO -- quotemeta & check no '/' or '(' or ')'
            # TODO -- check for \]  & other \ within the []
            $string =~ s#(.*?\])##
                or return _unmatched "[";
            $out .= "$1)" ;
        }
        elsif ($2 eq ']')
        {
            return _unmatched "]" ;
        }
        elsif ($2 eq '}')
        {
            return _unmatched "}" ;
        }
        elsif ($2 eq '{')
        {
            # TODO -- check no '/' within the {}
            # TODO -- check for \}  & other \ within the {}

            my $tmp ;
            unless ( $string =~ s/(.*?)$noPreBS\}//)
            {
                return _unmatched "{";
            }
            #$string =~ s#(.*?)\}##;

            #my $alt = join '|', 
            #          map { quotemeta $_ } 
            #          split "$noPreBS,", $1 ;
            my $alt = $self->_parseBit($1);
            defined $alt or return 0 ;
            $out .= "($alt)" ;

            ++ $self->{Braces} ;
        }
    }

    return _unmatched "("
        if $depth ;

    $out .= quotemeta $string ;


    $self->{InputGlob} =~ s/$noPreBS[\(\)]//g;
    $self->{InputPattern} = $out ;

    #print "# INPUT '$self->{InputGlob}' => '$out'\n";

    return 1 ;

}

sub _parseOutputGlob
{
    my $self = shift ;

    my $string = $self->{OutputGlob} ;
    my $maxwild = $self->{WildCount};

    if ($self->{GlobFlags} & GLOB_TILDE)
    #if (1)
    {
        $string =~ s{
              ^ ~             # find a leading tilde
              (               # save this in $1
                  [^/]        # a non-slash character
                        *     # repeated 0 or more times (0 means me)
              )
            }{
              $1
                  ? (getpwnam($1))[7]
                  : ( $ENV{HOME} || $ENV{LOGDIR} )
            }ex;

    }

    # max #1 must be == to max no of '*' in input
    while ( $string =~ m/#(\d)/g )
    {
        croak "Max wild is #$maxwild, you tried #$1"
            if $1 > $maxwild ;
    }

    my $noPreBS = '(?<!\\\)' ; # no preceeding backslash
    #warn "noPreBS = '$noPreBS'\n";

    #$string =~ s/${noPreBS}\$(\d)/\${$1}/g;
    $string =~ s/${noPreBS}#(\d)/\${$1}/g;
    $string =~ s#${noPreBS}\*#\${inFile}#g;
    $string = '"' . $string . '"';

    #print "OUTPUT '$self->{OutputGlob}' => '$string'\n";
    $self->{OutputPattern} = $string ;

    return 1 ;
}

sub _getFiles
{
    my $self = shift ;

    my %outInMapping = ();
    my %inFiles = () ;

    foreach my $inFile (@{ $self->{InputFiles} })
    {
        next if $inFiles{$inFile} ++ ;

        my $outFile = $inFile ;

        if ( $inFile =~ m/$self->{InputPattern}/ )
        {
            no warnings 'uninitialized';
            eval "\$outFile = $self->{OutputPattern};" ;

            if (defined $outInMapping{$outFile})
            {
                $Error =  "multiple input files map to one output file";
                return undef ;
            }
            $outInMapping{$outFile} = $inFile;
            push @{ $self->{Pairs} }, [$inFile, $outFile];
        }
    }

    return 1 ;
}

sub getFileMap
{
    my $self = shift ;

    return $self->{Pairs} ;
}

sub getHash
{
    my $self = shift ;

    return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
}

1;

__END__

=head1 NAME

File::GlobMapper - Extend File Glob to Allow Input and Output Files

=head1 SYNOPSIS

    use File::GlobMapper qw( globmap );

    my $aref = globmap $input => $output
        or die $File::GlobMapper::Error ;

    my $gm = new File::GlobMapper $input => $output
        or die $File::GlobMapper::Error ;


=head1 DESCRIPTION

B<WARNING Alpha Release Alert!> 

=over 5

=item * This code is a work in progress. 

=item * There are known bugs. 

=item * The interface defined here is tentative. 

=item * There are portability issues. 

=item * Do not use in production code.

=item * Consider yourself warned!

=back

This module needs Perl5.005 or better.

This module takes the existing C<File::Glob> module as a starting point and
extends it to allow new filenames to be derived from the files matched by
C<File::Glob>.

This can be useful when carrying out batch operations on multiple files that
have both an input filename and output filename and the output file can be
derived from the input filename. Examples of operations where this can be
useful include, file renaming, file copying and file compression.


=head2 Behind The Scenes

To help explain what C<File::GlobMapper> does, consider what code you
would write if you wanted to rename all files in the current directory
that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
current directory

    alpha.tar.gz
    beta.tar.gz
    gamma.tar.gz

and they need renamed to this

    alpha.tgz
    beta.tgz
    gamma.tgz

Below is a possible implementation of a script to carry out the rename
(error cases have been omitted)

    foreach my $old ( glob "*.tar.gz" )
    {
        my $new = $old;
        $new =~ s#(.*)\.tar\.gz$#$1.tgz# ;

        rename $old => $new 
            or die "Cannot rename '$old' to '$new': $!\n;
    }

Notice that a file glob pattern C<*.tar.gz> was used to match the
C<.tar.gz> files, then a fairly similar regular expression was used in
the substitute to allow the new filename to be created.

Given that the file glob is just a cut-down regular expression and that it
has already done a lot of the hard work in pattern matching the filenames,
wouldn't it be handy to be able to use the patterns in the fileglob to
drive the new filename?

Well, that's I<exactly> what C<File::GlobMapper> does. 

Here is same snippet of code rewritten using C<globmap>

    for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
    {
        my ($from, $to) = @$pair;
        rename $from => $to 
            or die "Cannot rename '$old' to '$new': $!\n;
    }

So how does it work?

Behind the scenes the C<globmap> function does a combination of a
file glob to match existing filenames followed by a substitute
to create the new filenames. 

Notice how both parameters to C<globmap> are strings that are delimited by <>.
This is done to make them look more like file globs - it is just syntactic
sugar, but it can be handy when you want the strings to be visually
distinctive. The enclosing <> are optional, so you don't have to use them - in
fact the first thing globmap will do is remove these delimiters if they are
present.

The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>. 
Once the enclosing "< ... >" is removed, this is passed (more or
less) unchanged to C<File::Glob> to carry out a file match.

Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
full Perl regular expression, with the additional step of wrapping each
transformed wildcard metacharacter sequence in parenthesis.

In this case the input fileglob C<*.tar.gz> will be transformed into
this Perl regular expression 

    ([^/]*)\.tar\.gz

Wrapping with parenthesis allows the wildcard parts of the Input File
Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>,
the I<Output File Glob>. This parameter operates just like the replacement
part of a substitute command. The difference is that the C<#1> syntax
is used to reference sub-patterns matched in the input fileglob, rather
than the C<$1> syntax that is used with perl regular expressions. In
this case C<#1> is used to refer to the text matched by the C<*> in the
Input File Glob. This makes it easier to use this module where the
parameters to C<globmap> are typed at the command line.

The final step involves passing each filename matched by the C<*.tar.gz>
file glob through the derived Perl regular expression in turn and
expanding the output fileglob using it.

The end result of all this is a list of pairs of filenames. By default
that is what is returned by C<globmap>. In this example the data structure
returned will look like this

     ( ['alpha.tar.gz' => 'alpha.tgz'],
       ['beta.tar.gz'  => 'beta.tgz' ],
       ['gamma.tar.gz' => 'gamma.tgz']
     )


Each pair is an array reference with two elements - namely the I<from>
filename, that C<File::Glob> has matched, and a I<to> filename that is
derived from the I<from> filename.


=head2 Limitations

C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to
solve all filename mapping operations. Under the hood C<File::Glob> (or for
older versions of Perl, C<File::BSDGlob>) is used to match the files, so you
will never have the flexibility of full Perl regular expression.

=head2 Input File Glob

The syntax for an Input FileGlob is identical to C<File::Glob>, except
for the following

=over 5

=item 1.

No nested {}

=item 2.

Whitespace does not delimit fileglobs.

=item 3.

The use of parenthesis can be used to capture parts of the input filename.

=item 4.

If an Input glob matches the same file more than once, only the first
will be used.

=back

The syntax

=over 5

=item B<~>

=item B<~user>


=item B<.>

Matches a literal '.'.
Equivalent to the Perl regular expression

    \.

=item B<*>

Matches zero or more characters, except '/'. Equivalent to the Perl
regular expression

    [^/]*

=item B<?>

Matches zero or one character, except '/'. Equivalent to the Perl
regular expression

    [^/]?

=item B<\>

Backslash is used, as usual, to escape the next character.

=item  B<[]>

Character class.

=item  B<{,}>

Alternation

=item  B<()>

Capturing parenthesis that work just like perl

=back

Any other character it taken literally.

=head2 Output File Glob

The Output File Glob is a normal string, with 2 glob-like features.

The first is the '*' metacharacter. This will be replaced by the complete
filename matched by the input file glob. So

    *.c *.Z

The second is     

Output FileGlobs take the 

=over 5

=item "*"

The "*" character will be replaced with the complete input filename.

=item #1

Patterns of the form /#\d/ will be replaced with the 

=back

=head2 Returned Data


=head1 EXAMPLES

=head2 A Rename script

Below is a simple "rename" script that uses C<globmap> to determine the
source and destination filenames.

    use File::GlobMapper qw(globmap) ;
    use File::Copy;

    die "rename: Usage rename 'from' 'to'\n"
        unless @ARGV == 2 ;

    my $fromGlob = shift @ARGV;
    my $toGlob   = shift @ARGV;

    my $pairs = globmap($fromGlob, $toGlob)
        or die $File::GlobMapper::Error;

    for my $pair (@$pairs)
    {
        my ($from, $to) = @$pair;
        move $from => $to ;
    }


Here is an example that renames all c files to cpp.
    
    $ rename '*.c' '#1.cpp'

=head2 A few example globmaps

Below are a few examples of globmaps

To copy all your .c file to a backup directory

    '</my/home/*.c>'    '</my/backup/#1.c>'

If you want to compress all    

    '</my/home/*.[ch]>'    '<*.gz>'

To uncompress

    '</my/home/*.[ch].gz>'    '</my/home/#1.#2>'

=head1 SEE ALSO

L<File::Glob|File::Glob>

=head1 AUTHOR

The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>.

=head1 COPYRIGHT AND LICENSE

Copyright (c) 2005 Paul Marquess. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
Commit	Line	Data
642e522c	1	package File::GlobMapper;
	2
	3	use strict;
	4	use warnings;
	5	use Carp;
	6
	7	our ($CSH_GLOB);
	8
	9	BEGIN
	10	{
	11	if ($] < 5.006)
	12	{
	13	require File::BSDGlob; import File::BSDGlob qw(:glob) ;
	14	$CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
1a6a8453	15	*globber = \&File::BSDGlob::csh_glob;
642e522c	16	}
	17	else
	18	{
	19	require File::Glob; import File::Glob qw(:glob) ;
	20	$CSH_GLOB = File::Glob::GLOB_CSH() ;
	21	#*globber = \&File::Glob::bsd_glob;
1a6a8453	22	*globber = \&File::Glob::csh_glob;
642e522c	23	}
	24	}
	25
	26	our ($Error);
	27
	28	our ($VERSION, @EXPORT_OK);
	29	$VERSION = '0.000_02';
	30	@EXPORT_OK = qw( globmap );
	31
	32
	33	our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
	34	$noPreBS = '(?<!\\\)' ; # no preceeding backslash
	35	$metachars = '.*?[](){}';
	36	$matchMetaRE = '[' . quotemeta($metachars) . ']';
	37
	38	%mapping = (
	39	'' => '([^/])',
	40	'?' => '([^/])',
	41	'.' => '\.',
	42	'[' => '([',
	43	'(' => '(',
	44	')' => ')',
	45	);
	46
	47	%wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;
	48
	49	sub globmap ($$;)
	50	{
	51	my $inputGlob = shift ;
	52	my $outputGlob = shift ;
	53
	54	my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_)
	55	or croak "globmap: $Error" ;
	56	return $obj->getFileMap();
	57	}
	58
	59	sub new
	60	{
	61	my $class = shift ;
	62	my $inputGlob = shift ;
	63	my $outputGlob = shift ;
	64	# TODO -- flags needs to default to whatever File::Glob does
	65	my $flags = shift \|\| $CSH_GLOB ;
	66	#my $flags = shift ;
	67
	68	$inputGlob =~ s/^\s\<\s//;
	69	$inputGlob =~ s/\s\>\s$//;
	70
	71	$outputGlob =~ s/^\s\<\s//;
	72	$outputGlob =~ s/\s\>\s$//;
	73
	74	my %object =
	75	( InputGlob => $inputGlob,
	76	OutputGlob => $outputGlob,
	77	GlobFlags => $flags,
	78	Braces => 0,
	79	WildCount => 0,
	80	Pairs => [],
	81	Sigil => '#',
	82	);
	83
	84	my $self = bless \%object, ref($class) \|\| $class ;
	85
	86	$self->_parseInputGlob()
87	or return undef ;
88
89	$self->_parseOutputGlob()
90	or return undef ;
91
92	my @inputFiles = globber($self->{InputGlob}, $flags) ;
93
94	if (GLOB_ERROR)
95	{
96	$Error = $!;
97	return undef ;
98	}
99
100	#if (whatever)
101	{
102	my $missing = grep { ! -e $_ } @inputFiles ;
103
104	if ($missing)
105	{
106	$Error = "$missing input files do not exist";
107	return undef ;
108	}
109	}
110
111	$self->{InputFiles} = \@inputFiles ;
112
113	$self->_getFiles()
114	or return undef ;
115
116	return $self;
117	}
118
119	sub _retError
120	{
121	my $string = shift ;
122	$Error = "$string in input fileglob" ;
123	return undef ;
124	}
125
126	sub _unmatched
127	{
128	my $delimeter = shift ;
129
130	_retError("Unmatched $delimeter");
131	return undef ;
132	}
133
134	sub _parseBit
135	{
136	my $self = shift ;
137
138	my $string = shift ;
139
140	my $out = '';
141	my $depth = 0 ;
142
143	while ($string =~ s/(.*?)$noPreBS(,\|$matchMetaRE)//)
144	{
145	$out .= quotemeta($1) ;
146	$out .= $mapping{$2} if defined $mapping{$2};
147
148	++ $self->{WildCount} if $wildCount{$2} ;
149
150	if ($2 eq ',')
151	{
152	return _unmatched "("
153	if $depth ;
154
155	$out .= '\|';
156	}
157	elsif ($2 eq '(')
158	{
159	++ $depth ;
160	}
161	elsif ($2 eq ')')
162	{
163	return _unmatched ")"
164	if ! $depth ;
165
166	-- $depth ;
167	}
168	elsif ($2 eq '[')
169	{
170	# TODO -- quotemeta & check no '/'
171	# TODO -- check for \] & other \ within the []
172	$string =~ s#(.*?\])##
173	or return _unmatched "[" ;
174	$out .= "$1)" ;
175	}
176	elsif ($2 eq ']')
177	{
178	return _unmatched "]" ;
179	}
180	elsif ($2 eq '{' \|\| $2 eq '}')
181	{
182	return _retError "Nested {} not allowed" ;
183	}
184	}
185
186	$out .= quotemeta $string;
187
188	return _unmatched "("
189	if $depth ;
190
191	return $out ;
192	}
193
194	sub _parseInputGlob
195	{
196	my $self = shift ;
197
198	my $string = $self->{InputGlob} ;
199	my $inGlob = '';
200
201	# Multiple concatenated *'s don't make sense
202	#$string =~ s#\\+#*# ;
203
204	# TODO -- Allow space to delimit patterns?
205	#my @strings = split /\s+/, $string ;
206	#for my $str (@strings)
207	my $out = '';
208	my $depth = 0 ;
209
210	while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
211	{
212	$out .= quotemeta($1) ;
213	$out .= $mapping{$2} if defined $mapping{$2};
214	++ $self->{WildCount} if $wildCount{$2} ;
215
216	if ($2 eq '(')
217	{
218	++ $depth ;
219	}
220	elsif ($2 eq ')')
221	{
222	return _unmatched ")"
223	if ! $depth ;
224
225	-- $depth ;
226	}
227	elsif ($2 eq '[')
228	{
229	# TODO -- quotemeta & check no '/' or '(' or ')'
230	# TODO -- check for \] & other \ within the []
231	$string =~ s#(.*?\])##
232	or return _unmatched "[";
233	$out .= "$1)" ;
234	}
235	elsif ($2 eq ']')
236	{
237	return _unmatched "]" ;
238	}
239	elsif ($2 eq '}')
240	{
241	return _unmatched "}" ;
242	}
243	elsif ($2 eq '{')
244	{
245	# TODO -- check no '/' within the {}
246	# TODO -- check for \} & other \ within the {}
247
248	my $tmp ;
249	unless ( $string =~ s/(.*?)$noPreBS\}//)
250	{
251	return _unmatched "{";
252	}
253	#$string =~ s#(.*?)\}##;
254
255	#my $alt = join '\|',
256	# map { quotemeta $_ }
257	# split "$noPreBS,", $1 ;
258	my $alt = $self->_parseBit($1);
259	defined $alt or return 0 ;
260	$out .= "($alt)" ;
261
262	++ $self->{Braces} ;
263	}
264	}
265
266	return _unmatched "("
267	if $depth ;
268
269	$out .= quotemeta $string ;
270
271
272	$self->{InputGlob} =~ s/$noPreBS[\(\)]//g;
273	$self->{InputPattern} = $out ;
274
275	#print "# INPUT '$self->{InputGlob}' => '$out'\n";
276
277	return 1 ;
278
279	}
280
281	sub _parseOutputGlob
282	{
283	my $self = shift ;
284
285	my $string = $self->{OutputGlob} ;
286	my $maxwild = $self->{WildCount};
287
288	if ($self->{GlobFlags} & GLOB_TILDE)
289	#if (1)
290	{
291	$string =~ s{
292	^ ~ # find a leading tilde
293	( # save this in $1
294	[^/] # a non-slash character
295	* # repeated 0 or more times (0 means me)
296	)
297	}{
298	$1
299	? (getpwnam($1))[7]
300	: ( $ENV{HOME} \|\| $ENV{LOGDIR} )
301	}ex;
302
303	}
304
305	# max #1 must be == to max no of '*' in input
306	while ( $string =~ m/#(\d)/g )
307	{
308	croak "Max wild is #$maxwild, you tried #$1"
309	if $1 > $maxwild ;
310	}
311
312	my $noPreBS = '(?<!\\\)' ; # no preceeding backslash
313	#warn "noPreBS = '$noPreBS'\n";
314
315	#$string =~ s/${noPreBS}\$(\d)/\${$1}/g;
316	$string =~ s/${noPreBS}#(\d)/\${$1}/g;
317	$string =~ s#${noPreBS}\*#\${inFile}#g;
318	$string = '"' . $string . '"';
319
320	#print "OUTPUT '$self->{OutputGlob}' => '$string'\n";
321	$self->{OutputPattern} = $string ;
322
323	return 1 ;
324	}
325
326	sub _getFiles
327	{
328	my $self = shift ;
329
330	my %outInMapping = ();
331	my %inFiles = () ;
332
333	foreach my $inFile (@{ $self->{InputFiles} })
334	{
335	next if $inFiles{$inFile} ++ ;
336
337	my $outFile = $inFile ;
338
339	if ( $inFile =~ m/$self->{InputPattern}/ )
340	{
341	no warnings 'uninitialized';
342	eval "\$outFile = $self->{OutputPattern};" ;
343
344	if (defined $outInMapping{$outFile})
345	{
346	$Error = "multiple input files map to one output file";
347	return undef ;
348	}
349	$outInMapping{$outFile} = $inFile;
350	push @{ $self->{Pairs} }, [$inFile, $outFile];
351	}
352	}
353
354	return 1 ;
355	}
356
357	sub getFileMap
358	{
359	my $self = shift ;
360
361	return $self->{Pairs} ;
362	}
363
364	sub getHash
365	{
366	my $self = shift ;
367
368	return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
369	}
370
371	1;
372
373	__END__
374
375	=head1 NAME
376
377	File::GlobMapper - Extend File Glob to Allow Input and Output Files
378
379	=head1 SYNOPSIS
380
381	use File::GlobMapper qw( globmap );
382
383	my $aref = globmap $input => $output
384	or die $File::GlobMapper::Error ;
385
386	my $gm = new File::GlobMapper $input => $output
387	or die $File::GlobMapper::Error ;
388
389
390	=head1 DESCRIPTION
391
392	B<WARNING Alpha Release Alert!>
393
394	=over 5
395
396	=item * This code is a work in progress.
397
398	=item * There are known bugs.
399
400	=item * The interface defined here is tentative.
401
402	=item * There are portability issues.
403
404	=item * Do not use in production code.
405
406	=item * Consider yourself warned!
407
408	=back
409
410	This module needs Perl5.005 or better.
411
412	This module takes the existing C<File::Glob> module as a starting point and
413	extends it to allow new filenames to be derived from the files matched by
414	C<File::Glob>.
415
416	This can be useful when carrying out batch operations on multiple files that
417	have both an input filename and output filename and the output file can be
418	derived from the input filename. Examples of operations where this can be
419	useful include, file renaming, file copying and file compression.
420
421
422	=head2 Behind The Scenes
423
424	To help explain what C<File::GlobMapper> does, consider what code you
425	would write if you wanted to rename all files in the current directory
426	that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
1a6a8453	427	current directory
642e522c	428
	429	alpha.tar.gz
	430	beta.tar.gz
	431	gamma.tar.gz
	432
	433	and they need renamed to this
	434
	435	alpha.tgz
	436	beta.tgz
	437	gamma.tgz
	438
	439	Below is a possible implementation of a script to carry out the rename
	440	(error cases have been omitted)
	441
	442	foreach my $old ( glob "*.tar.gz" )
	443	{
	444	my $new = $old;
	445	$new =~ s#(.*)\.tar\.gz$#$1.tgz# ;
	446
	447	rename $old => $new
	448	or die "Cannot rename '$old' to '$new': $!\n;
	449	}
	450
	451	Notice that a file glob pattern C<*.tar.gz> was used to match the
	452	C<.tar.gz> files, then a fairly similar regular expression was used in
	453	the substitute to allow the new filename to be created.
	454
	455	Given that the file glob is just a cut-down regular expression and that it
	456	has already done a lot of the hard work in pattern matching the filenames,
	457	wouldn't it be handy to be able to use the patterns in the fileglob to
	458	drive the new filename?
	459
	460	Well, that's I<exactly> what C<File::GlobMapper> does.
	461
	462	Here is same snippet of code rewritten using C<globmap>
	463
	464	for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
	465	{
	466	my ($from, $to) = @$pair;
	467	rename $from => $to
	468	or die "Cannot rename '$old' to '$new': $!\n;
	469	}
	470
	471	So how does it work?
	472
	473	Behind the scenes the C<globmap> function does a combination of a
	474	file glob to match existing filenames followed by a substitute
	475	to create the new filenames.
	476
1a6a8453	477	Notice how both parameters to C<globmap> are strings that are delimited by <>.
642e522c	478	This is done to make them look more like file globs - it is just syntactic
	479	sugar, but it can be handy when you want the strings to be visually
	480	distinctive. The enclosing <> are optional, so you don't have to use them - in
1a6a8453	481	fact the first thing globmap will do is remove these delimiters if they are
642e522c	482	present.
	483
	484	The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>.
	485	Once the enclosing "< ... >" is removed, this is passed (more or
	486	less) unchanged to C<File::Glob> to carry out a file match.
	487
	488	Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
	489	full Perl regular expression, with the additional step of wrapping each
	490	transformed wildcard metacharacter sequence in parenthesis.
	491
	492	In this case the input fileglob C<*.tar.gz> will be transformed into
	493	this Perl regular expression
	494
	495	([^/]*)\.tar\.gz
	496
	497	Wrapping with parenthesis allows the wildcard parts of the Input File
	498	Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>,
	499	the I<Output File Glob>. This parameter operates just like the replacement
	500	part of a substitute command. The difference is that the C<#1> syntax
	501	is used to reference sub-patterns matched in the input fileglob, rather
	502	than the C<$1> syntax that is used with perl regular expressions. In
	503	this case C<#1> is used to refer to the text matched by the C<*> in the
	504	Input File Glob. This makes it easier to use this module where the
	505	parameters to C<globmap> are typed at the command line.
	506
	507	The final step involves passing each filename matched by the C<*.tar.gz>
	508	file glob through the derived Perl regular expression in turn and
	509	expanding the output fileglob using it.
	510
	511	The end result of all this is a list of pairs of filenames. By default
	512	that is what is returned by C<globmap>. In this example the data structure
	513	returned will look like this
	514
	515	( ['alpha.tar.gz' => 'alpha.tgz'],
	516	['beta.tar.gz' => 'beta.tgz' ],
	517	['gamma.tar.gz' => 'gamma.tgz']
	518	)
	519
	520
	521	Each pair is an array reference with two elements - namely the I<from>
	522	filename, that C<File::Glob> has matched, and a I<to> filename that is
	523	derived from the I<from> filename.
	524
	525
	526
	527	=head2 Limitations
	528
	529	C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to
	530	solve all filename mapping operations. Under the hood C<File::Glob> (or for
1a6a8453	531	older versions of Perl, C<File::BSDGlob>) is used to match the files, so you
642e522c	532	will never have the flexibility of full Perl regular expression.
	533
	534	=head2 Input File Glob
	535
	536	The syntax for an Input FileGlob is identical to C<File::Glob>, except
	537	for the following
	538
	539	=over 5
	540
	541	=item 1.
	542
	543	No nested {}
	544
	545	=item 2.
	546
	547	Whitespace does not delimit fileglobs.
	548
	549	=item 3.
	550
	551	The use of parenthesis can be used to capture parts of the input filename.
	552
	553	=item 4.
	554
	555	If an Input glob matches the same file more than once, only the first
	556	will be used.
	557
	558	=back
	559
	560	The syntax
	561
	562	=over 5
	563
	564	=item B<~>
	565
	566	=item B<~user>
	567
	568
	569	=item B<.>
	570
	571	Matches a literal '.'.
	572	Equivalent to the Perl regular expression
	573
	574	\.
	575
	576	=item B<*>
	577
	578	Matches zero or more characters, except '/'. Equivalent to the Perl
	579	regular expression
	580
	581	[^/]*
	582
	583	=item B<?>
	584
	585	Matches zero or one character, except '/'. Equivalent to the Perl
	586	regular expression
	587
	588	[^/]?
	589
	590	=item B<\>
	591
	592	Backslash is used, as usual, to escape the next character.
	593
	594	=item B<[]>
	595
596	Character class.
597
598	=item B<{,}>
599
600	Alternation
601
602	=item B<()>
603
604	Capturing parenthesis that work just like perl
605
606	=back
607
608	Any other character it taken literally.
609
610	=head2 Output File Glob
611
612	The Output File Glob is a normal string, with 2 glob-like features.
613
614	The first is the '*' metacharacter. This will be replaced by the complete
615	filename matched by the input file glob. So
616
617	.c .Z
618
619	The second is
620
621	Output FileGlobs take the
622
623	=over 5
624
625	=item "*"
626
1a6a8453	627	The "*" character will be replaced with the complete input filename.
642e522c	628
	629	=item #1
	630
	631	Patterns of the form /#\d/ will be replaced with the
	632
	633	=back
	634
	635	=head2 Returned Data
	636
	637
	638	=head1 EXAMPLES
	639
	640	=head2 A Rename script
	641
	642	Below is a simple "rename" script that uses C<globmap> to determine the
	643	source and destination filenames.
	644
	645	use File::GlobMapper qw(globmap) ;
	646	use File::Copy;
	647
	648	die "rename: Usage rename 'from' 'to'\n"
	649	unless @ARGV == 2 ;
	650
	651	my $fromGlob = shift @ARGV;
	652	my $toGlob = shift @ARGV;
	653
	654	my $pairs = globmap($fromGlob, $toGlob)
	655	or die $File::GlobMapper::Error;
	656
	657	for my $pair (@$pairs)
	658	{
	659	my ($from, $to) = @$pair;
	660	move $from => $to ;
	661	}
	662
	663
	664
	665	Here is an example that renames all c files to cpp.
	666
	667	$ rename '*.c' '#1.cpp'
	668
	669	=head2 A few example globmaps
	670
1a6a8453	671	Below are a few examples of globmaps
642e522c	672
	673	To copy all your .c file to a backup directory
	674
	675	'</my/home/*.c>' '</my/backup/#1.c>'
	676
	677	If you want to compress all
	678
	679	'</my/home/.[ch]>' '<.gz>'
	680
	681	To uncompress
	682
	683	'</my/home/*.[ch].gz>' '</my/home/#1.#2>'
	684
	685	=head1 SEE ALSO
	686
	687	L<File::Glob\|File::Glob>
	688
	689	=head1 AUTHOR
	690
	691	The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>.
	692
	693	=head1 COPYRIGHT AND LICENSE
	694
	695	Copyright (c) 2005 Paul Marquess. All rights reserved.
	696	This program is free software; you can redistribute it and/or
	697	modify it under the same terms as Perl itself.