CGI.pm broke again
[p5sagit/p5-mst-13.2.git] / lib / Text / ParseWords.pm
CommitLineData
a0d0e21e 1package Text::ParseWords;
2
3require 5.000;
a0d0e21e 4use Carp;
5
dc848c6f 6require AutoLoader;
7*AUTOLOAD = \&AutoLoader::AUTOLOAD;
8
9require Exporter;
10@ISA = qw(Exporter);
a0d0e21e 11@EXPORT = qw(shellwords quotewords);
12@EXPORT_OK = qw(old_shellwords);
13
a5f75d66 14=head1 NAME
2304df62 15
a5f75d66 16Text::ParseWords - parse text into an array of tokens
a0d0e21e 17
a5f75d66 18=head1 SYNOPSIS
a0d0e21e 19
a5f75d66 20 use Text::ParseWords;
21 @words = &quotewords($delim, $keep, @lines);
22 @words = &shellwords(@lines);
23 @words = &old_shellwords(@lines);
24
25=head1 DESCRIPTION
26
27&quotewords() accepts a delimiter (which can be a regular expression)
28and a list of lines and then breaks those lines up into a list of
29words ignoring delimiters that appear inside quotes.
30
31The $keep argument is a boolean flag. If true, the quotes are kept
32with each word, otherwise quotes are stripped in the splitting process.
33$keep also defines whether unprotected backslashes are retained.
34
35A &shellwords() replacement is included to demonstrate the new package.
36This version differs from the original in that it will _NOT_ default
37to using $_ if no arguments are given. I personally find the old behavior
38to be a mis-feature.
39
40
41&quotewords() works by simply jamming all of @lines into a single
42string in $_ and then pulling off words a bit at a time until $_
43is exhausted.
44
a5f75d66 45=head1 AUTHORS
46
47Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
48
49Basically an update and generalization of the old shellwords.pl.
50Much code shamelessly stolen from the old version (author unknown).
51
52=cut
53
541;
55__END__
56
57sub shellwords {
2304df62 58 local(@lines) = @_;
59 $lines[$#lines] =~ s/\s+$//;
a0d0e21e 60 &quotewords('\s+', 0, @lines);
2304df62 61}
62
63
a0d0e21e 64
65sub quotewords {
0dbfbeef 66
67# The inner "for" loop builds up each word (or $field) one $snippet
68# at a time. A $snippet is a quoted string, a backslashed character,
69# or an unquoted string. We fall out of the "for" loop when we reach
70# the end of $_ or when we hit a delimiter. Falling out of the "for"
71# loop, we push the $field we've been building up onto the list of
72# @words we'll be returning, and then loop back and pull another word
73# off of $_.
74#
75# The first two cases inside the "for" loop deal with quoted strings.
76# The first case matches a double quoted string, removes it from $_,
77# and assigns the double quoted string to $snippet in the body of the
78# conditional. The second case handles single quoted strings. In
79# the third case we've found a quote at the current beginning of $_,
80# but it didn't match the quoted string regexps in the first two cases,
81# so it must be an unbalanced quote and we croak with an error (which can
82# be caught by eval()).
83#
84# The next case handles backslashed characters, and the next case is the
85# exit case on reaching the end of the string or finding a delimiter.
86#
87# Otherwise, we've found an unquoted thing and we pull of characters one
88# at a time until we reach something that could start another $snippet--
89# a quote of some sort, a backslash, or the delimiter. This one character
90# at a time behavior was necessary if the delimiter was going to be a
91# regexp (love to hear it if you can figure out a better way).
92
2304df62 93 local($delim, $keep, @lines) = @_;
94 local(@words,$snippet,$field,$_);
95
96 $_ = join('', @lines);
fb7007dc 97 while (length($_)) {
2304df62 98 $field = '';
99 for (;;) {
456e8aa7 100 $snippet = '';
101 if (s/^"(([^"\\]|\\.)*)"//) {
2304df62 102 $snippet = $1;
103 $snippet = "\"$snippet\"" if ($keep);
104 }
456e8aa7 105 elsif (s/^'(([^'\\]|\\.)*)'//) {
2304df62 106 $snippet = $1;
107 $snippet = "'$snippet'" if ($keep);
108 }
109 elsif (/^["']/) {
a0d0e21e 110 croak "Unmatched quote";
2304df62 111 }
112 elsif (s/^\\(.)//) {
113 $snippet = $1;
114 $snippet = "\\$snippet" if ($keep);
115 }
fb7007dc 116 elsif (!length($_) || s/^$delim//) {
2304df62 117 last;
118 }
119 else {
55497cff 120 while ($_ ne '' && !(/^$delim/ || /^['"\\]/)) {
2304df62 121 $snippet .= substr($_, 0, 1);
122 substr($_, 0, 1) = '';
123 }
124 }
125 $field .= $snippet;
126 }
127 push(@words, $field);
128 }
129 @words;
130}
2304df62 131
132
a0d0e21e 133sub old_shellwords {
134
135 # Usage:
136 # use ParseWords;
137 # @words = old_shellwords($line);
138 # or
139 # @words = old_shellwords(@lines);
140
141 local($_) = join('', @_);
142 my(@words,$snippet,$field);
143
144 s/^\s+//;
145 while ($_ ne '') {
146 $field = '';
147 for (;;) {
148 if (s/^"(([^"\\]|\\.)*)"//) {
149 ($snippet = $1) =~ s#\\(.)#$1#g;
150 }
151 elsif (/^"/) {
152 croak "Unmatched double quote: $_";
153 }
154 elsif (s/^'(([^'\\]|\\.)*)'//) {
155 ($snippet = $1) =~ s#\\(.)#$1#g;
156 }
157 elsif (/^'/) {
158 croak "Unmatched single quote: $_";
159 }
160 elsif (s/^\\(.)//) {
161 $snippet = $1;
162 }
163 elsif (s/^([^\s\\'"]+)//) {
164 $snippet = $1;
165 }
166 else {
167 s/^\s+//;
168 last;
169 }
170 $field .= $snippet;
171 }
172 push(@words, $field);
173 }
174 @words;
175}