Bump the debugger's version. Fail to update the changes.
[p5sagit/p5-mst-13.2.git] / lib / Text / ParseWords.pm
CommitLineData
a0d0e21e 1package Text::ParseWords;
2
9480d411 3use strict;
4require 5.006;
5our $VERSION = "3.27";
a0d0e21e 6
dc848c6f 7
9b599b2a 8use Exporter;
9480d411 9our @ISA = qw(Exporter);
10our @EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
11our @EXPORT_OK = qw(old_shellwords);
12our $PERL_SINGLE_QUOTE;
a0d0e21e 13
a5f75d66 14
9b599b2a 15sub shellwords {
d5c14ab2 16 my (@lines) = @_;
17 my @allwords;
18
19 foreach my $line (@lines) {
20 $line =~ s/^\s+//;
21 my @words = parse_line('\s+', 0, $line);
22 pop @words if (@words and !defined $words[-1]);
23 return() unless (@words || !length($line));
24 push(@allwords, @words);
25 }
26 return(@allwords);
9b599b2a 27}
a5f75d66 28
a5f75d66 29
a5f75d66 30
9b599b2a 31sub quotewords {
32 my($delim, $keep, @lines) = @_;
33 my($line, @words, @allwords);
9b599b2a 34
35 foreach $line (@lines) {
36 @words = parse_line($delim, $keep, $line);
37 return() unless (@words || !length($line));
38 push(@allwords, @words);
39 }
40 return(@allwords);
41}
a5f75d66 42
a5f75d66 43
a5f75d66 44
9b599b2a 45sub nested_quotewords {
46 my($delim, $keep, @lines) = @_;
47 my($i, @allwords);
6a724e38 48
9b599b2a 49 for ($i = 0; $i < @lines; $i++) {
50 @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
51 return() unless (@{$allwords[$i]} || !length($lines[$i]));
52 }
53 return(@allwords);
2304df62 54}
55
56
a0d0e21e 57
9b599b2a 58sub parse_line {
59 my($delimiter, $keep, $line) = @_;
429b060a 60 my($word, @pieces);
936c8837 61
6a724e38 62 no warnings 'uninitialized'; # we will be testing undef strings
63
9b599b2a 64 while (length($line)) {
f2a30bc9 65 # This pattern is optimised to be stack conservative on older perls.
66 # Do not refactor without being careful and testing it on very long strings.
67 # See Perl bug #42980 for an example of a stack busting input.
68 $line =~ s/^
69 (?:
70 # double quoted string
71 (") # $quote
72 ((?>[^\\"]*(?:\\.[^\\"]*)*))" # $quoted
73 | # --OR--
74 # singe quoted string
75 (') # $quote
76 ((?>[^\\']*(?:\\.[^\\']*)*))' # $quoted
77 | # --OR--
78 # unquoted string
79 ( # $unquoted
80 (?:\\.|[^\\"'])*?
81 )
82 # followed by
83 ( # $delim
84 \Z(?!\n) # EOL
85 | # --OR--
86 (?-x:$delimiter) # delimiter
87 | # --OR--
88 (?!^)(?=["']) # a quote
89 )
90 )//xs or return; # extended layout
91 my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6);
92
93
429b060a 94 return() unless( defined($quote) || length($unquoted) || length($delim));
936c8837 95
9b599b2a 96 if ($keep) {
97 $quoted = "$quote$quoted$quote";
98 }
99 else {
429b060a 100 $unquoted =~ s/\\(.)/$1/sg;
167b9ebc 101 if (defined $quote) {
429b060a 102 $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
167b9ebc 103 $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
104 }
9b599b2a 105 }
6a724e38 106 $word .= substr($line, 0, 0); # leave results tainted
167b9ebc 107 $word .= defined $quote ? $quoted : $unquoted;
9b599b2a 108
109 if (length($delim)) {
110 push(@pieces, $word);
111 push(@pieces, $delim) if ($keep eq 'delimiters');
112 undef $word;
113 }
114 if (!length($line)) {
115 push(@pieces, $word);
2304df62 116 }
2304df62 117 }
9b599b2a 118 return(@pieces);
2304df62 119}
2304df62 120
121
9b599b2a 122
a0d0e21e 123sub old_shellwords {
124
125 # Usage:
126 # use ParseWords;
127 # @words = old_shellwords($line);
128 # or
129 # @words = old_shellwords(@lines);
6a724e38 130 # or
131 # @words = old_shellwords(); # defaults to $_ (and clobbers it)
a0d0e21e 132
6a724e38 133 no warnings 'uninitialized'; # we will be testing undef strings
134 local *_ = \join('', @_) if @_;
135 my (@words, $snippet);
a0d0e21e 136
6a724e38 137 s/\A\s+//;
a0d0e21e 138 while ($_ ne '') {
6a724e38 139 my $field = substr($_, 0, 0); # leave results tainted
a0d0e21e 140 for (;;) {
6a724e38 141 if (s/\A"(([^"\\]|\\.)*)"//s) {
142 ($snippet = $1) =~ s#\\(.)#$1#sg;
a0d0e21e 143 }
6a724e38 144 elsif (/\A"/) {
145 require Carp;
146 Carp::carp("Unmatched double quote: $_");
9b599b2a 147 return();
a0d0e21e 148 }
6a724e38 149 elsif (s/\A'(([^'\\]|\\.)*)'//s) {
150 ($snippet = $1) =~ s#\\(.)#$1#sg;
a0d0e21e 151 }
6a724e38 152 elsif (/\A'/) {
153 require Carp;
154 Carp::carp("Unmatched single quote: $_");
9b599b2a 155 return();
a0d0e21e 156 }
9983eac8 157 elsif (s/\A\\(.?)//s) {
a0d0e21e 158 $snippet = $1;
159 }
6a724e38 160 elsif (s/\A([^\s\\'"]+)//) {
a0d0e21e 161 $snippet = $1;
162 }
163 else {
6a724e38 164 s/\A\s+//;
a0d0e21e 165 last;
166 }
167 $field .= $snippet;
168 }
169 push(@words, $field);
170 }
6a724e38 171 return @words;
a0d0e21e 172}
9b599b2a 173
1741;
175
176__END__
177
178=head1 NAME
179
180Text::ParseWords - parse text into an array of tokens or array of arrays
181
182=head1 SYNOPSIS
183
184 use Text::ParseWords;
9480d411 185 @lists = nested_quotewords($delim, $keep, @lines);
186 @words = quotewords($delim, $keep, @lines);
187 @words = shellwords(@lines);
188 @words = parse_line($delim, $keep, $line);
189 @words = old_shellwords(@lines); # DEPRECATED!
9b599b2a 190
191=head1 DESCRIPTION
192
193The &nested_quotewords() and &quotewords() functions accept a delimiter
194(which can be a regular expression)
195and a list of lines and then breaks those lines up into a list of
196words ignoring delimiters that appear inside quotes. &quotewords()
197returns all of the tokens in a single long list, while &nested_quotewords()
198returns a list of token lists corresponding to the elements of @lines.
199&parse_line() does tokenizing on a single string. The &*quotewords()
cf18bebb 200functions simply call &parse_line(), so if you're only splitting
201one line you can call &parse_line() directly and save a function
9b599b2a 202call.
203
204The $keep argument is a boolean flag. If true, then the tokens are
205split on the specified delimiter, but all other characters (quotes,
206backslashes, etc.) are kept in the tokens. If $keep is false then the
207&*quotewords() functions remove all quotes and backslashes that are
208not themselves backslash-escaped or inside of single quotes (i.e.,
209&quotewords() tries to interpret these characters just like the Bourne
210shell). NB: these semantics are significantly different from the
211original version of this module shipped with Perl 5.000 through 5.004.
212As an additional feature, $keep may be the keyword "delimiters" which
213causes the functions to preserve the delimiters in each string as
214tokens in the token lists, in addition to preserving quote and
215backslash characters.
216
217&shellwords() is written as a special case of &quotewords(), and it
218does token parsing with whitespace as a delimiter-- similar to most
219Unix shells.
220
221=head1 EXAMPLES
222
223The sample program:
224
225 use Text::ParseWords;
9480d411 226 @words = quotewords('\s+', 0, q{this is "a test" of\ quotewords \"for you});
9b599b2a 227 $i = 0;
228 foreach (@words) {
229 print "$i: <$_>\n";
230 $i++;
231 }
232
233produces:
234
235 0: <this>
236 1: <is>
237 2: <a test>
238 3: <of quotewords>
239 4: <"for>
240 5: <you>
241
242demonstrating:
243
244=over 4
245
246=item 0
551e1d92 247
9b599b2a 248a simple word
249
250=item 1
551e1d92 251
9b599b2a 252multiple spaces are skipped because of our $delim
253
254=item 2
551e1d92 255
9b599b2a 256use of quotes to include a space in a word
257
258=item 3
551e1d92 259
9b599b2a 260use of a backslash to include a space in a word
261
262=item 4
551e1d92 263
9b599b2a 264use of a backslash to remove the special meaning of a double-quote
265
266=item 5
551e1d92 267
9b599b2a 268another simple word (note the lack of effect of the
269backslashed double-quote)
270
271=back
272
9480d411 273Replacing C<quotewords('\s+', 0, q{this is...})>
274with C<shellwords(q{this is...})>
9b599b2a 275is a simpler way to accomplish the same thing.
276
277=head1 AUTHORS
278
9480d411 279Maintainer: Alexandr Ciornii <alexchornyATgmail.com>.
280
281Previous maintainer: Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original
9b599b2a 282author unknown). Much of the code for &parse_line() (including the
283primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>.
284
285Examples section another documentation provided by John Heidemann
286<johnh@ISI.EDU>
287
288Bug reports, patches, and nagging provided by lots of folks-- thanks
289everybody! Special thanks to Michael Schwern <schwern@envirolink.org>
290for assuring me that a &nested_quotewords() would be useful, and to
291Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about
292error-checking (sort of-- you had to be there).
293
294=cut