X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FFile%2FDosGlob.pm;h=496a14c13797b72e61b76b699af4b2927fe5e906;hb=e63b33793c3cf76a134a6446d1f83479e030a15f;hp=594ee2ec8432b1c0bb7429ae1a29f0f9344629b6;hpb=163d180b58c52940c22cec66c02d57eda243c262;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/File/DosGlob.pm b/lib/File/DosGlob.pm index 594ee2e..496a14c 100644 --- a/lib/File/DosGlob.pm +++ b/lib/File/DosGlob.pm @@ -1,49 +1,60 @@ #!perl -w +# use strict fails +#Can't use string ("main::glob") as a symbol ref while "strict refs" in use at /usr/lib/perl5/5.005/File/DosGlob.pm line 191. + # # Documentation at the __END__ # package File::DosGlob; +our $VERSION = '1.01'; +use strict; +use warnings; + sub doglob { my $cond = shift; my @retval = (); #print "doglob: ", join('|', @_), "\n"; OUTER: - for my $arg (@_) { - local $_ = $arg; + for my $pat (@_) { my @matched = (); my @globdirs = (); my $head = '.'; my $sepchr = '/'; - next OUTER unless defined $_ and $_ ne ''; + my $tail; + next OUTER unless defined $pat and $pat ne ''; # if arg is within quotes strip em and do no globbing - if (/^"(.*)"$/) { - $_ = $1; - if ($cond eq 'd') { push(@retval, $_) if -d $_ } - else { push(@retval, $_) if -e $_ } + if ($pat =~ /^"(.*)"\z/s) { + $pat = $1; + if ($cond eq 'd') { push(@retval, $pat) if -d $pat } + else { push(@retval, $pat) if -e $pat } next OUTER; } - if (m|^(.*)([\\/])([^\\/]*)$|) { - my $tail; + # wildcards with a drive prefix such as h:*.pm must be changed + # to h:./*.pm to expand correctly + if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) { + substr($pat,0,2) = $1 . "./"; + } + if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) { ($head, $sepchr, $tail) = ($1,$2,$3); #print "div: |$head|$sepchr|$tail|\n"; - push (@retval, $_), next OUTER if $tail eq ''; + push (@retval, $pat), next OUTER if $tail eq ''; if ($head =~ /[*?]/) { @globdirs = doglob('d', $head); push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)), next OUTER if @globdirs; } - $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:$/; - $_ = $tail; + $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s; + $pat = $tail; } # # If file component has no wildcards, we can avoid opendir - unless (/[*?]/) { + unless ($pat =~ /[*?]/) { $head = '' if $head eq '.'; $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; - $head .= $_; + $head .= $pat; if ($cond eq 'd') { push(@retval,$head) if -d $head } else { push(@retval,$head) if -e $head } next OUTER; @@ -55,14 +66,13 @@ sub doglob { $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; # escape regex metachars but not glob chars - s:([].+^\-\${}[|]):\\$1:g; + $pat =~ s:([].+^\-\${}[|]):\\$1:g; # and convert DOS-style wildcards to regex - s/\*/.*/g; - s/\?/.?/g; + $pat =~ s/\*/.*/g; + $pat =~ s/\?/.?/g; - #print "regex: '$_', head: '$head'\n"; - my $matchsub = eval 'sub { $_[0] =~ m|^' . $_ . '$|io }'; - warn($@), next OUTER if $@; + #print "regex: '$pat', head: '$head'\n"; + my $matchsub = sub { $_[0] =~ m|^$pat\z|is }; INNER: for my $e (@leaves) { next INNER if $e eq '.' or $e eq '..'; @@ -75,7 +85,7 @@ sub doglob { # has a dot *and* name is shorter than 9 chars. # if (index($e,'.') == -1 and length($e) < 9 - and index($_,'\\.') != -1) { + and index($pat,'\\.') != -1) { push(@matched, "$head$e"), next INNER if &$matchsub("$e."); } } @@ -84,6 +94,207 @@ sub doglob { return @retval; } + +# +# Do DOS-like globbing on Mac OS +# +sub doglob_Mac { + my $cond = shift; + my @retval = (); + + #print "doglob_Mac: ", join('|', @_), "\n"; + OUTER: + for my $arg (@_) { + local $_ = $arg; + my @matched = (); + my @globdirs = (); + my $head = ':'; + my $not_esc_head = $head; + my $sepchr = ':'; + next OUTER unless defined $_ and $_ ne ''; + # if arg is within quotes strip em and do no globbing + if (/^"(.*)"\z/s) { + $_ = $1; + # $_ may contain escaped metachars '\*', '\?' and '\' + my $not_esc_arg = $_; + $not_esc_arg =~ s/\\([*?\\])/$1/g; + if ($cond eq 'd') { push(@retval, $not_esc_arg) if -d $not_esc_arg } + else { push(@retval, $not_esc_arg) if -e $not_esc_arg } + next OUTER; + } + + if (m|^(.*?)(:+)([^:]*)\z|s) { # note: $1 is not greedy + my $tail; + ($head, $sepchr, $tail) = ($1,$2,$3); + #print "div: |$head|$sepchr|$tail|\n"; + push (@retval, $_), next OUTER if $tail eq ''; + # + # $head may contain escaped metachars '\*' and '\?' + + my $tmp_head = $head; + # if a '*' or '?' is preceded by an odd count of '\', temporary delete + # it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as + # wildcards + $tmp_head =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg; + + if ($tmp_head =~ /[*?]/) { # if there are wildcards ... + @globdirs = doglob_Mac('d', $head); + push(@retval, doglob_Mac($cond, map {"$_$sepchr$tail"} @globdirs)), + next OUTER if @globdirs; + } + + $head .= $sepchr; + $not_esc_head = $head; + # unescape $head for file operations + $not_esc_head =~ s/\\([*?\\])/$1/g; + $_ = $tail; + } + # + # If file component has no wildcards, we can avoid opendir + + my $tmp_tail = $_; + # if a '*' or '?' is preceded by an odd count of '\', temporary delete + # it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as + # wildcards + $tmp_tail =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg; + + unless ($tmp_tail =~ /[*?]/) { # if there are wildcards ... + $not_esc_head = $head = '' if $head eq ':'; + my $not_esc_tail = $_; + # unescape $head and $tail for file operations + $not_esc_tail =~ s/\\([*?\\])/$1/g; + $head .= $_; + $not_esc_head .= $not_esc_tail; + if ($cond eq 'd') { push(@retval,$head) if -d $not_esc_head } + else { push(@retval,$head) if -e $not_esc_head } + next OUTER; + } + #print "opendir($not_esc_head)\n"; + opendir(D, $not_esc_head) or next OUTER; + my @leaves = readdir D; + closedir D; + + # escape regex metachars but not '\' and glob chars '*', '?' + $_ =~ s:([].+^\-\${}[|]):\\$1:g; + # and convert DOS-style wildcards to regex, + # but only if they are not escaped + $_ =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg; + + #print "regex: '$_', head: '$head', unescaped head: '$not_esc_head'\n"; + my $matchsub = eval 'sub { $_[0] =~ m|^' . $_ . '\\z|ios }'; + warn($@), next OUTER if $@; + INNER: + for my $e (@leaves) { + next INNER if $e eq '.' or $e eq '..'; + next INNER if $cond eq 'd' and ! -d "$not_esc_head$e"; + + if (&$matchsub($e)) { + my $leave = (($not_esc_head eq ':') && (-f "$not_esc_head$e")) ? + "$e" : "$not_esc_head$e"; + # + # On Mac OS, the two glob metachars '*' and '?' and the escape + # char '\' are valid characters for file and directory names. + # We have to escape and treat them specially. + $leave =~ s|([*?\\])|\\$1|g; + push(@matched, $leave); + next INNER; + } + } + push @retval, @matched if @matched; + } + return @retval; +} + +# +# _expand_volume() will only be used on Mac OS (Classic): +# Takes an array of original patterns as argument and returns an array of +# possibly modified patterns. Each original pattern is processed like +# that: +# + If there's a volume name in the pattern, we push a separate pattern +# for each mounted volume that matches (with '*', '?' and '\' escaped). +# + If there's no volume name in the original pattern, it is pushed +# unchanged. +# Note that the returned array of patterns may be empty. +# +sub _expand_volume { + + require MacPerl; # to be verbose + + my @pat = @_; + my @new_pat = (); + my @FSSpec_Vols = MacPerl::Volumes(); + my @mounted_volumes = (); + + foreach my $spec_vol (@FSSpec_Vols) { + # push all mounted volumes into array + push @mounted_volumes, MacPerl::MakePath($spec_vol); + } + #print "mounted volumes: |@mounted_volumes|\n"; + + while (@pat) { + my $pat = shift @pat; + if ($pat =~ /^([^:]+:)(.*)\z/) { # match a volume name? + my $vol_pat = $1; + my $tail = $2; + # + # escape regex metachars but not '\' and glob chars '*', '?' + $vol_pat =~ s:([].+^\-\${}[|]):\\$1:g; + # and convert DOS-style wildcards to regex, + # but only if they are not escaped + $vol_pat =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg; + #print "volume regex: '$vol_pat' \n"; + + foreach my $volume (@mounted_volumes) { + if ($volume =~ m|^$vol_pat\z|ios) { + # + # On Mac OS, the two glob metachars '*' and '?' and the + # escape char '\' are valid characters for volume names. + # We have to escape and treat them specially. + $volume =~ s|([*?\\])|\\$1|g; + push @new_pat, $volume . $tail; + } + } + } else { # no volume name in pattern, push original pattern + push @new_pat, $pat; + } + } + return @new_pat; +} + + +# +# _preprocess_pattern() will only be used on Mac OS (Classic): +# Resolves any updirs in the pattern. Removes a single trailing colon +# from the pattern, unless it's a volume name pattern like "*HD:" +# +sub _preprocess_pattern { + my @pat = @_; + + foreach my $p (@pat) { + my $proceed; + # resolve any updirs, e.g. "*HD:t?p::a*" -> "*HD:a*" + do { + $proceed = ($p =~ s/^(.*):[^:]+::(.*?)\z/$1:$2/); + } while ($proceed); + # remove a single trailing colon, e.g. ":*:" -> ":*" + $p =~ s/:([^:]+):\z/:$1/; + } + return @pat; +} + + +# +# _un_escape() will only be used on Mac OS (Classic): +# Unescapes a list of arguments which may contain escaped +# metachars '*', '?' and '\'. +# +sub _un_escape { + foreach (@_) { + s/\\([*?\\])/$1/g; + } + return @_; +} + # # this can be used to override CORE::glob in a specific # package by saying C in that @@ -95,8 +306,7 @@ my %iter; my %entries; sub glob { - my $pat = shift; - my $cxix = shift; + my($pat,$cxix) = @_; my @pat; # glob without args defaults to $_ @@ -111,14 +321,68 @@ sub glob { push @pat, $pat; } + # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3. + # abc3 will be the original {3} (and drop the {}). + # abc1 abc2 will be put in @appendpat. + # This was just the esiest way, not nearly the best. + REHASH: { + my @appendpat = (); + for (@pat) { + # There must be a "," I.E. abc{efg} is not what we want. + while ( /^(.*)(?; - + # from the command line (overrides only in main::) > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>" @@ -195,6 +462,61 @@ of the quoting rules used. Extending it to csh patterns is left as an exercise to the reader. +=head1 NOTES + +=over 4 + +=item * + +Mac OS (Classic) users should note a few differences. The specification +of pathnames in glob patterns adheres to the usual Mac OS conventions: +The path separator is a colon ':', not a slash '/' or backslash '\'. A +full path always begins with a volume name. A relative pathname on Mac +OS must always begin with a ':', except when specifying a file or +directory name in the current working directory, where the leading colon +is optional. If specifying a volume name only, a trailing ':' is +required. Due to these rules, a glob like E*:E will find all +mounted volumes, while a glob like E*E or E:*E will find +all files and directories in the current directory. + +Note that updirs in the glob pattern are resolved before the matching begins, +i.e. a pattern like "*HD:t?p::a*" will be matched as "*HD:a*". Note also, +that a single trailing ':' in the pattern is ignored (unless it's a volume +name pattern like "*HD:"), i.e. a glob like <:*:> will find both directories +I files (and not, as one might expect, only directories). + +The metachars '*', '?' and the escape char '\' are valid characters in +volume, directory and file names on Mac OS. Hence, if you want to match +a '*', '?' or '\' literally, you have to escape these characters. Due to +perl's quoting rules, things may get a bit complicated, when you want to +match a string like '\*' literally, or when you want to match '\' literally, +but treat the immediately following character '*' as metachar. So, here's a +rule of thumb (applies to both single- and double-quoted strings): escape +each '*' or '?' or '\' with a backslash, if you want to treat them literally, +and then double each backslash and your are done. E.g. + +- Match '\*' literally + + escape both '\' and '*' : '\\\*' + double the backslashes : '\\\\\\*' + +(Internally, the glob routine sees a '\\\*', which means that both '\' and +'*' are escaped.) + + +- Match '\' literally, treat '*' as metachar + + escape '\' but not '*' : '\\*' + double the backslashes : '\\\\*' + +(Internally, the glob routine sees a '\\*', which means that '\' is escaped and +'*' is not.) + +Note that you also have to quote literal spaces in the glob pattern, as described +above. + +=back + =head1 EXPORTS (by request only) glob() @@ -206,7 +528,7 @@ pandering to DOS habits. Needs a dose of optimizium too. =head1 AUTHOR -Gurusamy Sarathy +Gurusamy Sarathy =head1 HISTORY