1 package Text::ParseWords;
8 @ISA = qw(Exporter AutoLoader);
9 @EXPORT = qw(shellwords quotewords);
10 @EXPORT_OK = qw(old_shellwords);
12 # This code needs updating to use new Perl 5 features (regexp etc).
18 # @words = "ewords($delim, $keep, @lines);
19 # @words = &shellwords(@lines);
20 # @words = &old_shellwords(@lines);
22 # Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
23 # Permission to use and distribute under the same terms as Perl.
24 # No warranty expressed or implied.
26 # Basically an update and generalization of the old shellwords.pl.
27 # Much code shamelessly stolen from the old version (author unknown).
29 # "ewords() accepts a delimiter (which can be a regular expression)
30 # and a list of lines and then breaks those lines up into a list of
31 # words ignoring delimiters that appear inside quotes.
33 # The $keep argument is a boolean flag. If true, the quotes are kept
34 # with each word, otherwise quotes are stripped in the splitting process.
35 # $keep also defines whether unprotected backslashes are retained.
44 # A &shellwords() replacement is included to demonstrate the new package.
45 # This version differs from the original in that it will _NOT_ default
46 # to using $_ if no arguments are given. I personally find the old behavior
47 # to be a mis-feature.
50 $lines[$#lines] =~ s/\s+$//;
51 "ewords('\s+', 0, @lines);
58 # "ewords() works by simply jamming all of @lines into a single
59 # string in $_ and then pulling off words a bit at a time until $_
62 # The inner "for" loop builds up each word (or $field) one $snippet
63 # at a time. A $snippet is a quoted string, a backslashed character,
64 # or an unquoted string. We fall out of the "for" loop when we reach
65 # the end of $_ or when we hit a delimiter. Falling out of the "for"
66 # loop, we push the $field we've been building up onto the list of
67 # @words we'll be returning, and then loop back and pull another word
70 # The first two cases inside the "for" loop deal with quoted strings.
71 # The first case matches a double quoted string, removes it from $_,
72 # and assigns the double quoted string to $snippet in the body of the
73 # conditional. The second case handles single quoted strings. In
74 # the third case we've found a quote at the current beginning of $_,
75 # but it didn't match the quoted string regexps in the first two cases,
76 # so it must be an unbalanced quote and we croak with an error (which can
77 # be caught by eval()).
79 # The next case handles backslashed characters, and the next case is the
80 # exit case on reaching the end of the string or finding a delimiter.
82 # Otherwise, we've found an unquoted thing and we pull of characters one
83 # at a time until we reach something that could start another $snippet--
84 # a quote of some sort, a backslash, or the delimiter. This one character
85 # at a time behavior was necessary if the delimiter was going to be a
86 # regexp (love to hear it if you can figure out a better way).
88 local($delim, $keep, @lines) = @_;
89 local(@words,$snippet,$field,$_);
91 $_ = join('', @lines);
96 if (s/^"(([^"\\]|\\[\\"])*)"//) {
98 $snippet = "\"$snippet\"" if ($keep);
100 elsif (s/^'(([^'\\]|\\[\\'])*)'//) {
102 $snippet = "'$snippet'" if ($keep);
105 croak "Unmatched quote";
109 $snippet = "\\$snippet" if ($keep);
111 elsif (!$_ || s/^$delim//) {
115 while ($_ && !(/^$delim/ || /^['"\\]/)) {
116 $snippet .= substr($_, 0, 1);
117 substr($_, 0, 1) = '';
122 push(@words, $field);
132 # @words = old_shellwords($line);
134 # @words = old_shellwords(@lines);
136 local($_) = join('', @_);
137 my(@words,$snippet,$field);
143 if (s/^"(([^"\\]|\\.)*)"//) {
144 ($snippet = $1) =~ s#\\(.)#$1#g;
147 croak "Unmatched double quote: $_";
149 elsif (s/^'(([^'\\]|\\.)*)'//) {
150 ($snippet = $1) =~ s#\\(.)#$1#g;
153 croak "Unmatched single quote: $_";
158 elsif (s/^([^\s\\'"]+)//) {
167 push(@words, $field);