Integrate mainline
[p5sagit/p5-mst-13.2.git] / ext / File / Glob / Glob.pm
CommitLineData
72b16652 1package File::Glob;
2
3use strict;
17f410f9 4our($VERSION, @ISA, @EXPORT_OK, @EXPORT_FAIL, %EXPORT_TAGS,
5 $AUTOLOAD, $DEFAULT_FLAGS);
72b16652 6
9426adcd 7use XSLoader ();
72b16652 8
72f7b9a1 9@ISA = qw(Exporter);
72b16652 10
00c80938 11# NOTE: The glob() export is only here for compatibility with 5.6.0.
12# csh_glob() should not be used directly, unless you know what you're doing.
13
72b16652 14@EXPORT_OK = qw(
72b16652 15 csh_glob
00c80938 16 bsd_glob
72b16652 17 glob
18 GLOB_ABEND
2d5e9e5d 19 GLOB_ALPHASORT
72b16652 20 GLOB_ALTDIRFUNC
21 GLOB_BRACE
220398a0 22 GLOB_CSH
72b16652 23 GLOB_ERR
24 GLOB_ERROR
b8ef571c 25 GLOB_LIMIT
72b16652 26 GLOB_MARK
220398a0 27 GLOB_NOCASE
72b16652 28 GLOB_NOCHECK
29 GLOB_NOMAGIC
30 GLOB_NOSORT
31 GLOB_NOSPACE
32 GLOB_QUOTE
33 GLOB_TILDE
80a5d8e7 34 GLOB_UTF8
72b16652 35);
36
72b16652 37%EXPORT_TAGS = (
38 'glob' => [ qw(
39 GLOB_ABEND
2d5e9e5d 40 GLOB_ALPHASORT
72b16652 41 GLOB_ALTDIRFUNC
42 GLOB_BRACE
220398a0 43 GLOB_CSH
72b16652 44 GLOB_ERR
45 GLOB_ERROR
b8ef571c 46 GLOB_LIMIT
72b16652 47 GLOB_MARK
220398a0 48 GLOB_NOCASE
72b16652 49 GLOB_NOCHECK
50 GLOB_NOMAGIC
51 GLOB_NOSORT
52 GLOB_NOSPACE
53 GLOB_QUOTE
54 GLOB_TILDE
80a5d8e7 55 GLOB_UTF8
72b16652 56 glob
00c80938 57 bsd_glob
72b16652 58 ) ],
59);
60
72f7b9a1 61$VERSION = '1.01';
220398a0 62
63sub import {
7d3fb230 64 require Exporter;
220398a0 65 my $i = 1;
66 while ($i < @_) {
80a5d8e7 67 if ($_[$i] =~ /^:(case|nocase|globally|utf8)$/) {
220398a0 68 splice(@_, $i, 1);
69 $DEFAULT_FLAGS &= ~GLOB_NOCASE() if $1 eq 'case';
70 $DEFAULT_FLAGS |= GLOB_NOCASE() if $1 eq 'nocase';
80a5d8e7 71 $DEFAULT_FLAGS |= GLOB_UTF8() if $1 eq 'utf8';
220398a0 72 if ($1 eq 'globally') {
7d3fb230 73 local $^W;
220398a0 74 *CORE::GLOBAL::glob = \&File::Glob::csh_glob;
75 }
76 next;
77 }
78 ++$i;
72b16652 79 }
220398a0 80 goto &Exporter::import;
72b16652 81}
82
83sub AUTOLOAD {
84 # This AUTOLOAD is used to 'autoload' constants from the constant()
85 # XS function. If a constant is not found then control is passed
86 # to the AUTOLOAD in AutoLoader.
87
88 my $constname;
89 ($constname = $AUTOLOAD) =~ s/.*:://;
72f7b9a1 90 my ($error, $val) = constant($constname);
91 if ($error) {
92 require Carp;
93 Carp::croak($error);
72b16652 94 }
95 eval "sub $AUTOLOAD { $val }";
96 goto &$AUTOLOAD;
97}
98
9426adcd 99XSLoader::load 'File::Glob', $VERSION;
72b16652 100
101# Preloaded methods go here.
102
103sub GLOB_ERROR {
72f7b9a1 104 return (constant('GLOB_ERROR'))[1];
72b16652 105}
106
2d5e9e5d 107sub GLOB_CSH () {
108 GLOB_BRACE()
109 | GLOB_NOMAGIC()
110 | GLOB_QUOTE()
111 | GLOB_TILDE()
112 | GLOB_ALPHASORT()
113}
72b16652 114
220398a0 115$DEFAULT_FLAGS = GLOB_CSH();
116if ($^O =~ /^(?:MSWin32|VMS|os2|dos|riscos|MacOS)$/) {
117 $DEFAULT_FLAGS |= GLOB_NOCASE();
118}
119
72b16652 120# Autoload methods go after =cut, and are processed by the autosplit program.
121
00c80938 122sub bsd_glob {
f0963acb 123 my ($pat,$flags) = @_;
124 $flags = $DEFAULT_FLAGS if @_ < 2;
125 return doglob($pat,$flags);
72b16652 126}
127
00c80938 128# File::Glob::glob() is deprecated because its prototype is different from
129# CORE::glob() (use bsd_glob() instead)
130sub glob {
131 goto &bsd_glob;
132}
133
72b16652 134## borrowed heavily from gsar's File::DosGlob
135my %iter;
136my %entries;
137
138sub csh_glob {
139 my $pat = shift;
140 my $cxix = shift;
141 my @pat;
142
143 # glob without args defaults to $_
144 $pat = $_ unless defined $pat;
145
146 # extract patterns
be3174d2 147 $pat =~ s/^\s+//; # Protect against empty elements in
148 $pat =~ s/\s+$//; # things like < *.c> and <*.c >.
149 # These alone shouldn't trigger ParseWords.
72b16652 150 if ($pat =~ /\s/) {
151 # XXX this is needed for compatibility with the csh
152 # implementation in Perl. Need to support a flag
153 # to disable this behavior.
154 require Text::ParseWords;
155 @pat = Text::ParseWords::parse_line('\s+',0,$pat);
156 }
157
158 # assume global context if not provided one
159 $cxix = '_G_' unless defined $cxix;
160 $iter{$cxix} = 0 unless exists $iter{$cxix};
161
162 # if we're just beginning, do it all first
163 if ($iter{$cxix} == 0) {
164 if (@pat) {
220398a0 165 $entries{$cxix} = [ map { doglob($_, $DEFAULT_FLAGS) } @pat ];
72b16652 166 }
167 else {
220398a0 168 $entries{$cxix} = [ doglob($pat, $DEFAULT_FLAGS) ];
72b16652 169 }
170 }
171
172 # chuck it all out, quick or slow
173 if (wantarray) {
174 delete $iter{$cxix};
175 return @{delete $entries{$cxix}};
176 }
177 else {
178 if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
179 return shift @{$entries{$cxix}};
180 }
181 else {
182 # return undef for EOL
183 delete $iter{$cxix};
184 delete $entries{$cxix};
185 return undef;
186 }
187 }
188}
189
1901;
191__END__
192
193=head1 NAME
194
195File::Glob - Perl extension for BSD glob routine
196
197=head1 SYNOPSIS
198
199 use File::Glob ':glob';
00c80938 200 @list = bsd_glob('*.[ch]');
201 $homedir = bsd_glob('~gnat', GLOB_TILDE | GLOB_ERR);
72b16652 202 if (GLOB_ERROR) {
203 # an error occurred reading $homedir
204 }
205
00c80938 206 ## override the core glob (CORE::glob() does this automatically
11fe14b1 207 ## by default anyway, since v5.6.0)
220398a0 208 use File::Glob ':globally';
80a5d8e7 209 my @sources = <*.{c,h,y}>;
220398a0 210
211 ## override the core glob, forcing case sensitivity
212 use File::Glob qw(:globally :case);
80a5d8e7 213 my @sources = <*.{c,h,y}>;
220398a0 214
215 ## override the core glob forcing case insensitivity
216 use File::Glob qw(:globally :nocase);
80a5d8e7 217 my @sources = <*.{c,h,y}>;
218
219 ## override the core glob forcing UTF-8 names
220 use File::Glob qw(:globally :utf8);
221 my @sources = <*.{c,h,y}>;
72b16652 222
223=head1 DESCRIPTION
224
00c80938 225File::Glob::bsd_glob() implements the FreeBSD glob(3) routine, which is
226a superset of the POSIX glob() (described in IEEE Std 1003.2 "POSIX.2").
227bsd_glob() takes a mandatory C<pattern> argument, and an optional
72b16652 228C<flags> argument, and returns a list of filenames matching the
229pattern, with interpretation of the pattern modified by the C<flags>
00c80938 230variable.
231
232Since v5.6.0, Perl's CORE::glob() is implemented in terms of bsd_glob().
233Note that they don't share the same prototype--CORE::glob() only accepts
234a single argument. Due to historical reasons, CORE::glob() will also
235split its argument on whitespace, treating it as multiple patterns,
236whereas bsd_glob() considers them as one pattern.
237
238The POSIX defined flags for bsd_glob() are:
72b16652 239
240=over 4
241
242=item C<GLOB_ERR>
243
00c80938 244Force bsd_glob() to return an error when it encounters a directory it
245cannot open or read. Ordinarily bsd_glob() continues to find matches.
72b16652 246
b8ef571c 247=item C<GLOB_LIMIT>
248
249Make bsd_glob() return an error (GLOB_NOSPACE) when the pattern expands
250to a size bigger than the system constant C<ARG_MAX> (usually found in
251limits.h). If your system does not define this constant, bsd_glob() uses
252C<sysconf(_SC_ARG_MAX)> or C<_POSIX_ARG_MAX> where available (in that
253order). You can inspect these values using the standard C<POSIX>
254extension.
255
72b16652 256=item C<GLOB_MARK>
257
258Each pathname that is a directory that matches the pattern has a slash
259appended.
260
220398a0 261=item C<GLOB_NOCASE>
262
263By default, file names are assumed to be case sensitive; this flag
00c80938 264makes bsd_glob() treat case differences as not significant.
220398a0 265
72b16652 266=item C<GLOB_NOCHECK>
267
00c80938 268If the pattern does not match any pathname, then bsd_glob() returns a list
72b16652 269consisting of only the pattern. If C<GLOB_QUOTE> is set, its effect
270is present in the pattern returned.
271
272=item C<GLOB_NOSORT>
273
274By default, the pathnames are sorted in ascending ASCII order; this
00c80938 275flag prevents that sorting (speeding up bsd_glob()).
72b16652 276
277=back
278
279The FreeBSD extensions to the POSIX standard are the following flags:
280
281=over 4
282
283=item C<GLOB_BRACE>
284
a45bd81d 285Pre-process the string to expand C<{pat,pat,...}> strings like csh(1).
72b16652 286The pattern '{}' is left unexpanded for historical reasons (and csh(1)
287does the same thing to ease typing of find(1) patterns).
288
289=item C<GLOB_NOMAGIC>
290
291Same as C<GLOB_NOCHECK> but it only returns the pattern if it does not
292contain any of the special characters "*", "?" or "[". C<NOMAGIC> is
293provided to simplify implementing the historic csh(1) globbing
294behaviour and should probably not be used anywhere else.
295
296=item C<GLOB_QUOTE>
297
298Use the backslash ('\') character for quoting: every occurrence of a
299backslash followed by a character in the pattern is replaced by that
300character, avoiding any special interpretation of the character.
220398a0 301(But see below for exceptions on DOSISH systems).
72b16652 302
303=item C<GLOB_TILDE>
304
305Expand patterns that start with '~' to user name home directories.
306
307=item C<GLOB_CSH>
308
309For convenience, C<GLOB_CSH> is a synonym for
2d5e9e5d 310C<GLOB_BRACE | GLOB_NOMAGIC | GLOB_QUOTE | GLOB_TILDE | GLOB_ALPHASORT>.
72b16652 311
312=back
313
314The POSIX provided C<GLOB_APPEND>, C<GLOB_DOOFFS>, and the FreeBSD
315extensions C<GLOB_ALTDIRFUNC>, and C<GLOB_MAGCHAR> flags have not been
316implemented in the Perl version because they involve more complex
317interaction with the underlying C structures.
318
2d5e9e5d 319The following flag has been added in the Perl implementation for
320csh compatibility:
321
322=over 4
323
324=item C<GLOB_ALPHASORT>
325
326If C<GLOB_NOSORT> is not in effect, sort filenames is alphabetical
327order (case does not matter) rather than in ASCII order.
328
329=back
330
80a5d8e7 331The following flag has been added in the Perl implementation for
332Unicode compatibility:
333
334=over 4
335
336=item C<GLOB_UTF8>
337
338The filenames returned will be marked as being in UTF-8 encoding of
339Unicode. Note that it is your responsibility to ascertain that the
340filesystem you are globbing in returns valid UTF-8 filenames.
341
72b16652 342=head1 DIAGNOSTICS
343
00c80938 344bsd_glob() returns a list of matching paths, possibly zero length. If an
72b16652 345error occurred, &File::Glob::GLOB_ERROR will be non-zero and C<$!> will be
346set. &File::Glob::GLOB_ERROR is guaranteed to be zero if no error occurred,
347or one of the following values otherwise:
348
349=over 4
350
351=item C<GLOB_NOSPACE>
352
353An attempt to allocate memory failed.
354
355=item C<GLOB_ABEND>
356
357The glob was stopped because an error was encountered.
358
359=back
360
00c80938 361In the case where bsd_glob() has found some matching paths, but is
362interrupted by an error, it will return a list of filenames B<and>
72b16652 363set &File::Glob::ERROR.
364
00c80938 365Note that bsd_glob() deviates from POSIX and FreeBSD glob(3) behaviour
366by not considering C<ENOENT> and C<ENOTDIR> as errors - bsd_glob() will
72b16652 367continue processing despite those errors, unless the C<GLOB_ERR> flag is
368set.
369
370Be aware that all filenames returned from File::Glob are tainted.
371
372=head1 NOTES
373
374=over 4
375
376=item *
377
00c80938 378If you want to use multiple patterns, e.g. C<bsd_glob "a* b*">, you should
150b260b 379probably throw them in a set as in C<bsd_glob "{a*,b*}">. This is because
380the argument to bsd_glob() isn't subjected to parsing by the C shell.
381Remember that you can use a backslash to escape things.
72b16652 382
383=item *
384
220398a0 385On DOSISH systems, backslash is a valid directory separator character.
386In this case, use of backslash as a quoting character (via GLOB_QUOTE)
387interferes with the use of backslash as a directory separator. The
388best (simplest, most portable) solution is to use forward slashes for
389directory separators, and backslashes for quoting. However, this does
390not match "normal practice" on these systems. As a concession to user
391expectation, therefore, backslashes (under GLOB_QUOTE) only quote the
392glob metacharacters '[', ']', '{', '}', '-', '~', and backslash itself.
393All other backslashes are passed through unchanged.
394
395=item *
396
72b16652 397Win32 users should use the real slash. If you really want to use
398backslashes, consider using Sarathy's File::DosGlob, which comes with
399the standard Perl distribution.
400
7369a524 401=item *
402
403Mac OS (Classic) users should note a few differences. Since
404Mac OS is not Unix, when the glob code encounters a tilde glob (e.g.
be708cc0 405~user) and the C<GLOB_TILDE> flag is used, it simply returns that
7369a524 406pattern without doing any expansion.
407
408Glob on Mac OS is case-insensitive by default (if you don't use any
409flags). If you specify any flags at all and still want glob
410to be case-insensitive, you must include C<GLOB_NOCASE> in the flags.
411
412The path separator is ':' (aka colon), not '/' (aka slash). Mac OS users
413should be careful about specifying relative pathnames. While a full path
414always begins with a volume name, a relative pathname should always
415begin with a ':'. If specifying a volume name only, a trailing ':' is
416required.
417
be708cc0 418The specification of pathnames in glob patterns adheres to the usual Mac
419OS conventions: The path separator is a colon ':', not a slash '/'. A
420full path always begins with a volume name. A relative pathname on Mac
421OS must always begin with a ':', except when specifying a file or
422directory name in the current working directory, where the leading colon
423is optional. If specifying a volume name only, a trailing ':' is
424required. Due to these rules, a glob like E<lt>*:E<gt> will find all
425mounted volumes, while a glob like E<lt>*E<gt> or E<lt>:*E<gt> will find
426all files and directories in the current directory.
427
428Note that updirs in the glob pattern are resolved before the matching begins,
429i.e. a pattern like "*HD:t?p::a*" will be matched as "*HD:a*". Note also,
430that a single trailing ':' in the pattern is ignored (unless it's a volume
431name pattern like "*HD:"), i.e. a glob like E<lt>:*:E<gt> will find both
432directories I<and> files (and not, as one might expect, only directories).
433You can, however, use the C<GLOB_MARK> flag to distinguish (without a file
434test) directory names from file names.
435
436If the C<GLOB_MARK> flag is set, all directory paths will have a ':' appended.
437Since a directory like 'lib:' is I<not> a valid I<relative> path on Mac OS,
438both a leading and a trailing colon will be added, when the directory name in
439question doesn't contain any colons (e.g. 'lib' becomes ':lib:').
440
a45bd81d 441=back
442
72b16652 443=head1 AUTHOR
444
0e950d83 445The Perl interface was written by Nathan Torkington E<lt>gnat@frii.comE<gt>,
72b16652 446and is released under the artistic license. Further modifications were
7369a524 447made by Greg Bacon E<lt>gbacon@cs.uah.eduE<gt>, Gurusamy Sarathy
448E<lt>gsar@activestate.comE<gt>, and Thomas Wegner
449E<lt>wegner_thomas@yahoo.comE<gt>. The C glob code has the
72b16652 450following copyright:
451
0e950d83 452 Copyright (c) 1989, 1993 The Regents of the University of California.
453 All rights reserved.
3cb6de81 454
0e950d83 455 This code is derived from software contributed to Berkeley by
456 Guido van Rossum.
457
458 Redistribution and use in source and binary forms, with or without
459 modification, are permitted provided that the following conditions
460 are met:
461
462 1. Redistributions of source code must retain the above copyright
463 notice, this list of conditions and the following disclaimer.
464 2. Redistributions in binary form must reproduce the above copyright
465 notice, this list of conditions and the following disclaimer in the
466 documentation and/or other materials provided with the distribution.
467 3. Neither the name of the University nor the names of its contributors
468 may be used to endorse or promote products derived from this software
469 without specific prior written permission.
470
471 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
472 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
473 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
474 ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
475 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
476 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
477 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
478 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
479 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
480 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
481 SUCH DAMAGE.
72b16652 482
483=cut