perl 5.002gamma: utils/h2xs.PL
[p5sagit/p5-mst-13.2.git] / lib / Text / ParseWords.pm
CommitLineData
a0d0e21e 1package Text::ParseWords;
2
3require 5.000;
4require Exporter;
5require AutoLoader;
6use Carp;
7
8@ISA = qw(Exporter AutoLoader);
9@EXPORT = qw(shellwords quotewords);
10@EXPORT_OK = qw(old_shellwords);
11
12# This code needs updating to use new Perl 5 features (regexp etc).
13
14# ParseWords.pm
2304df62 15#
16# Usage:
a0d0e21e 17# use ParseWords;
2304df62 18# @words = &quotewords($delim, $keep, @lines);
19# @words = &shellwords(@lines);
a0d0e21e 20# @words = &old_shellwords(@lines);
2304df62 21
22# Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
23# Permission to use and distribute under the same terms as Perl.
24# No warranty expressed or implied.
25
26# Basically an update and generalization of the old shellwords.pl.
27# Much code shamelessly stolen from the old version (author unknown).
28#
29# &quotewords() accepts a delimiter (which can be a regular expression)
30# and a list of lines and then breaks those lines up into a list of
31# words ignoring delimiters that appear inside quotes.
32#
33# The $keep argument is a boolean flag. If true, the quotes are kept
34# with each word, otherwise quotes are stripped in the splitting process.
35# $keep also defines whether unprotected backslashes are retained.
36#
2304df62 37
a0d0e21e 381;
39__END__
40
41
42sub shellwords {
43
44 # A &shellwords() replacement is included to demonstrate the new package.
45 # This version differs from the original in that it will _NOT_ default
46 # to using $_ if no arguments are given. I personally find the old behavior
47 # to be a mis-feature.
2304df62 48
2304df62 49 local(@lines) = @_;
50 $lines[$#lines] =~ s/\s+$//;
a0d0e21e 51 &quotewords('\s+', 0, @lines);
2304df62 52}
53
54
a0d0e21e 55
56sub quotewords {
57
2304df62 58# &quotewords() works by simply jamming all of @lines into a single
59# string in $_ and then pulling off words a bit at a time until $_
60# is exhausted.
61#
62# The inner "for" loop builds up each word (or $field) one $snippet
63# at a time. A $snippet is a quoted string, a backslashed character,
64# or an unquoted string. We fall out of the "for" loop when we reach
65# the end of $_ or when we hit a delimiter. Falling out of the "for"
66# loop, we push the $field we've been building up onto the list of
67# @words we'll be returning, and then loop back and pull another word
68# off of $_.
69#
70# The first two cases inside the "for" loop deal with quoted strings.
71# The first case matches a double quoted string, removes it from $_,
72# and assigns the double quoted string to $snippet in the body of the
73# conditional. The second case handles single quoted strings. In
74# the third case we've found a quote at the current beginning of $_,
75# but it didn't match the quoted string regexps in the first two cases,
a0d0e21e 76# so it must be an unbalanced quote and we croak with an error (which can
2304df62 77# be caught by eval()).
78#
79# The next case handles backslashed characters, and the next case is the
80# exit case on reaching the end of the string or finding a delimiter.
81#
82# Otherwise, we've found an unquoted thing and we pull of characters one
83# at a time until we reach something that could start another $snippet--
84# a quote of some sort, a backslash, or the delimiter. This one character
85# at a time behavior was necessary if the delimiter was going to be a
86# regexp (love to hear it if you can figure out a better way).
87
2304df62 88 local($delim, $keep, @lines) = @_;
89 local(@words,$snippet,$field,$_);
90
91 $_ = join('', @lines);
92 while ($_) {
93 $field = '';
94 for (;;) {
95 $snippet = '';
96 if (s/^"(([^"\\]|\\[\\"])*)"//) {
97 $snippet = $1;
98 $snippet = "\"$snippet\"" if ($keep);
99 }
100 elsif (s/^'(([^'\\]|\\[\\'])*)'//) {
101 $snippet = $1;
102 $snippet = "'$snippet'" if ($keep);
103 }
104 elsif (/^["']/) {
a0d0e21e 105 croak "Unmatched quote";
2304df62 106 }
107 elsif (s/^\\(.)//) {
108 $snippet = $1;
109 $snippet = "\\$snippet" if ($keep);
110 }
111 elsif (!$_ || s/^$delim//) {
112 last;
113 }
114 else {
115 while ($_ && !(/^$delim/ || /^['"\\]/)) {
116 $snippet .= substr($_, 0, 1);
117 substr($_, 0, 1) = '';
118 }
119 }
120 $field .= $snippet;
121 }
122 push(@words, $field);
123 }
124 @words;
125}
2304df62 126
127
a0d0e21e 128sub old_shellwords {
129
130 # Usage:
131 # use ParseWords;
132 # @words = old_shellwords($line);
133 # or
134 # @words = old_shellwords(@lines);
135
136 local($_) = join('', @_);
137 my(@words,$snippet,$field);
138
139 s/^\s+//;
140 while ($_ ne '') {
141 $field = '';
142 for (;;) {
143 if (s/^"(([^"\\]|\\.)*)"//) {
144 ($snippet = $1) =~ s#\\(.)#$1#g;
145 }
146 elsif (/^"/) {
147 croak "Unmatched double quote: $_";
148 }
149 elsif (s/^'(([^'\\]|\\.)*)'//) {
150 ($snippet = $1) =~ s#\\(.)#$1#g;
151 }
152 elsif (/^'/) {
153 croak "Unmatched single quote: $_";
154 }
155 elsif (s/^\\(.)//) {
156 $snippet = $1;
157 }
158 elsif (s/^([^\s\\'"]+)//) {
159 $snippet = $1;
160 }
161 else {
162 s/^\s+//;
163 last;
164 }
165 $field .= $snippet;
166 }
167 push(@words, $field);
168 }
169 @words;
170}