Remove a spurious undefined warning when using getopts.pl with -w.
[p5sagit/p5-mst-13.2.git] / lib / Text / ParseWords.pm
CommitLineData
a0d0e21e 1package Text::ParseWords;
2
b174585d 3use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
975e5341 4$VERSION = "3.24";
a0d0e21e 5
9b599b2a 6require 5.000;
dc848c6f 7
9b599b2a 8use Exporter;
dc848c6f 9@ISA = qw(Exporter);
9b599b2a 10@EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
a0d0e21e 11@EXPORT_OK = qw(old_shellwords);
12
a5f75d66 13
9b599b2a 14sub shellwords {
6a724e38 15 my(@lines) = @_;
9b599b2a 16 $lines[$#lines] =~ s/\s+$//;
17 return(quotewords('\s+', 0, @lines));
18}
a5f75d66 19
a5f75d66 20
a5f75d66 21
9b599b2a 22sub quotewords {
23 my($delim, $keep, @lines) = @_;
24 my($line, @words, @allwords);
9b599b2a 25
26 foreach $line (@lines) {
27 @words = parse_line($delim, $keep, $line);
28 return() unless (@words || !length($line));
29 push(@allwords, @words);
30 }
31 return(@allwords);
32}
a5f75d66 33
a5f75d66 34
a5f75d66 35
9b599b2a 36sub nested_quotewords {
37 my($delim, $keep, @lines) = @_;
38 my($i, @allwords);
6a724e38 39
9b599b2a 40 for ($i = 0; $i < @lines; $i++) {
41 @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
42 return() unless (@{$allwords[$i]} || !length($lines[$i]));
43 }
44 return(@allwords);
2304df62 45}
46
47
a0d0e21e 48
9b599b2a 49sub parse_line {
50 my($delimiter, $keep, $line) = @_;
429b060a 51 my($word, @pieces);
936c8837 52
6a724e38 53 no warnings 'uninitialized'; # we will be testing undef strings
54
9b599b2a 55 while (length($line)) {
429b060a 56 $line =~ s/^(["']) # a $quote
57 ((?:\\.|(?!\1)[^\\])*) # and $quoted text
58 \1 # followed by the same quote
59 | # --OR--
60 ^((?:\\.|[^\\"'])*?) # an $unquoted text
61 (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))
62 # plus EOL, delimiter, or quote
30799d55 63 //xs or return; # extended layout
429b060a 64 my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4);
65 return() unless( defined($quote) || length($unquoted) || length($delim));
936c8837 66
9b599b2a 67 if ($keep) {
68 $quoted = "$quote$quoted$quote";
69 }
70 else {
429b060a 71 $unquoted =~ s/\\(.)/$1/sg;
167b9ebc 72 if (defined $quote) {
429b060a 73 $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
167b9ebc 74 $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
75 }
9b599b2a 76 }
6a724e38 77 $word .= substr($line, 0, 0); # leave results tainted
167b9ebc 78 $word .= defined $quote ? $quoted : $unquoted;
9b599b2a 79
80 if (length($delim)) {
81 push(@pieces, $word);
82 push(@pieces, $delim) if ($keep eq 'delimiters');
83 undef $word;
84 }
85 if (!length($line)) {
86 push(@pieces, $word);
2304df62 87 }
2304df62 88 }
9b599b2a 89 return(@pieces);
2304df62 90}
2304df62 91
92
9b599b2a 93
a0d0e21e 94sub old_shellwords {
95
96 # Usage:
97 # use ParseWords;
98 # @words = old_shellwords($line);
99 # or
100 # @words = old_shellwords(@lines);
6a724e38 101 # or
102 # @words = old_shellwords(); # defaults to $_ (and clobbers it)
a0d0e21e 103
6a724e38 104 no warnings 'uninitialized'; # we will be testing undef strings
105 local *_ = \join('', @_) if @_;
106 my (@words, $snippet);
a0d0e21e 107
6a724e38 108 s/\A\s+//;
a0d0e21e 109 while ($_ ne '') {
6a724e38 110 my $field = substr($_, 0, 0); # leave results tainted
a0d0e21e 111 for (;;) {
6a724e38 112 if (s/\A"(([^"\\]|\\.)*)"//s) {
113 ($snippet = $1) =~ s#\\(.)#$1#sg;
a0d0e21e 114 }
6a724e38 115 elsif (/\A"/) {
116 require Carp;
117 Carp::carp("Unmatched double quote: $_");
9b599b2a 118 return();
a0d0e21e 119 }
6a724e38 120 elsif (s/\A'(([^'\\]|\\.)*)'//s) {
121 ($snippet = $1) =~ s#\\(.)#$1#sg;
a0d0e21e 122 }
6a724e38 123 elsif (/\A'/) {
124 require Carp;
125 Carp::carp("Unmatched single quote: $_");
9b599b2a 126 return();
a0d0e21e 127 }
6a724e38 128 elsif (s/\A\\(.)//s) {
a0d0e21e 129 $snippet = $1;
130 }
6a724e38 131 elsif (s/\A([^\s\\'"]+)//) {
a0d0e21e 132 $snippet = $1;
133 }
134 else {
6a724e38 135 s/\A\s+//;
a0d0e21e 136 last;
137 }
138 $field .= $snippet;
139 }
140 push(@words, $field);
141 }
6a724e38 142 return @words;
a0d0e21e 143}
9b599b2a 144
1451;
146
147__END__
148
149=head1 NAME
150
151Text::ParseWords - parse text into an array of tokens or array of arrays
152
153=head1 SYNOPSIS
154
155 use Text::ParseWords;
156 @lists = &nested_quotewords($delim, $keep, @lines);
157 @words = &quotewords($delim, $keep, @lines);
158 @words = &shellwords(@lines);
159 @words = &parse_line($delim, $keep, $line);
160 @words = &old_shellwords(@lines); # DEPRECATED!
161
162=head1 DESCRIPTION
163
164The &nested_quotewords() and &quotewords() functions accept a delimiter
165(which can be a regular expression)
166and a list of lines and then breaks those lines up into a list of
167words ignoring delimiters that appear inside quotes. &quotewords()
168returns all of the tokens in a single long list, while &nested_quotewords()
169returns a list of token lists corresponding to the elements of @lines.
170&parse_line() does tokenizing on a single string. The &*quotewords()
cf18bebb 171functions simply call &parse_line(), so if you're only splitting
172one line you can call &parse_line() directly and save a function
9b599b2a 173call.
174
175The $keep argument is a boolean flag. If true, then the tokens are
176split on the specified delimiter, but all other characters (quotes,
177backslashes, etc.) are kept in the tokens. If $keep is false then the
178&*quotewords() functions remove all quotes and backslashes that are
179not themselves backslash-escaped or inside of single quotes (i.e.,
180&quotewords() tries to interpret these characters just like the Bourne
181shell). NB: these semantics are significantly different from the
182original version of this module shipped with Perl 5.000 through 5.004.
183As an additional feature, $keep may be the keyword "delimiters" which
184causes the functions to preserve the delimiters in each string as
185tokens in the token lists, in addition to preserving quote and
186backslash characters.
187
188&shellwords() is written as a special case of &quotewords(), and it
189does token parsing with whitespace as a delimiter-- similar to most
190Unix shells.
191
192=head1 EXAMPLES
193
194The sample program:
195
196 use Text::ParseWords;
197 @words = &quotewords('\s+', 0, q{this is "a test" of\ quotewords \"for you});
198 $i = 0;
199 foreach (@words) {
200 print "$i: <$_>\n";
201 $i++;
202 }
203
204produces:
205
206 0: <this>
207 1: <is>
208 2: <a test>
209 3: <of quotewords>
210 4: <"for>
211 5: <you>
212
213demonstrating:
214
215=over 4
216
217=item 0
551e1d92 218
9b599b2a 219a simple word
220
221=item 1
551e1d92 222
9b599b2a 223multiple spaces are skipped because of our $delim
224
225=item 2
551e1d92 226
9b599b2a 227use of quotes to include a space in a word
228
229=item 3
551e1d92 230
9b599b2a 231use of a backslash to include a space in a word
232
233=item 4
551e1d92 234
9b599b2a 235use of a backslash to remove the special meaning of a double-quote
236
237=item 5
551e1d92 238
9b599b2a 239another simple word (note the lack of effect of the
240backslashed double-quote)
241
242=back
243
244Replacing C<&quotewords('\s+', 0, q{this is...})>
245with C<&shellwords(q{this is...})>
246is a simpler way to accomplish the same thing.
247
248=head1 AUTHORS
249
250Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original
251author unknown). Much of the code for &parse_line() (including the
252primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>.
253
254Examples section another documentation provided by John Heidemann
255<johnh@ISI.EDU>
256
257Bug reports, patches, and nagging provided by lots of folks-- thanks
258everybody! Special thanks to Michael Schwern <schwern@envirolink.org>
259for assuring me that a &nested_quotewords() would be useful, and to
260Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about
261error-checking (sort of-- you had to be there).
262
263=cut