package Text::ParseWords;
use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
-$VERSION = "3.1";
+$VERSION = "3.25";
require 5.000;
sub shellwords {
- local(@lines) = @_;
- $lines[$#lines] =~ s/\s+$//;
- return(quotewords('\s+', 0, @lines));
+ my (@lines) = @_;
+ my @allwords;
+
+ foreach my $line (@lines) {
+ $line =~ s/^\s+//;
+ my @words = parse_line('\s+', 0, $line);
+ pop @words if (@words and !defined $words[-1]);
+ return() unless (@words || !length($line));
+ push(@allwords, @words);
+ }
+ return(@allwords);
}
sub quotewords {
my($delim, $keep, @lines) = @_;
my($line, @words, @allwords);
-
foreach $line (@lines) {
@words = parse_line($delim, $keep, $line);
sub nested_quotewords {
my($delim, $keep, @lines) = @_;
my($i, @allwords);
-
+
for ($i = 0; $i < @lines; $i++) {
@{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
return() unless (@{$allwords[$i]} || !length($lines[$i]));
sub parse_line {
- # We will be testing undef strings
- local($^W) = 0;
-
my($delimiter, $keep, $line) = @_;
- my($quote, $quoted, $unquoted, $delim, $word, @pieces);
+ my($word, @pieces);
- while (length($line)) {
+ no warnings 'uninitialized'; # we will be testing undef strings
- ($quote, $quoted, undef, $unquoted, $delim, undef) =
- $line =~ m/^(["']) # a $quote
- ((?:\\.|(?!\1)[^\\])*) # and $quoted text
- \1 # followed by the same quote
- ([\000-\377]*) # and the rest
- | # --OR--
- ^((?:\\.|[^\\"'])*?) # an $unquoted text
- (\Z(?!\n)|$delimiter|(?!^)(?=["']))
- # plus EOL, delimiter, or quote
- ([\000-\377]*) # the rest
- /x; # extended layout
- return() unless( $quote || length($unquoted) || length($delim));
-
- $line = $+;
+ while (length($line)) {
+ $line =~ s/^(["']) # a $quote
+ ((?:\\.|(?!\1)[^\\])*) # and $quoted text
+ \1 # followed by the same quote
+ | # --OR--
+ ^((?:\\.|[^\\"'])*?) # an $unquoted text
+ (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))
+ # plus EOL, delimiter, or quote
+ //xs or return; # extended layout
+ my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4);
+ return() unless( defined($quote) || length($unquoted) || length($delim));
if ($keep) {
$quoted = "$quote$quoted$quote";
}
else {
- $unquoted =~ s/\\(.)/$1/g;
- $quoted =~ s/\\(.)/$1/g if ($quote eq '"');
- $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
+ $unquoted =~ s/\\(.)/$1/sg;
+ if (defined $quote) {
+ $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
+ $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
+ }
}
- $word .= ($quote) ? $quoted : $unquoted;
+ $word .= substr($line, 0, 0); # leave results tainted
+ $word .= defined $quote ? $quoted : $unquoted;
if (length($delim)) {
push(@pieces, $word);
# @words = old_shellwords($line);
# or
# @words = old_shellwords(@lines);
+ # or
+ # @words = old_shellwords(); # defaults to $_ (and clobbers it)
- local($_) = join('', @_);
- my(@words,$snippet,$field);
+ no warnings 'uninitialized'; # we will be testing undef strings
+ local *_ = \join('', @_) if @_;
+ my (@words, $snippet);
- s/^\s+//;
+ s/\A\s+//;
while ($_ ne '') {
- $field = '';
+ my $field = substr($_, 0, 0); # leave results tainted
for (;;) {
- if (s/^"(([^"\\]|\\.)*)"//) {
- ($snippet = $1) =~ s#\\(.)#$1#g;
+ if (s/\A"(([^"\\]|\\.)*)"//s) {
+ ($snippet = $1) =~ s#\\(.)#$1#sg;
}
- elsif (/^"/) {
+ elsif (/\A"/) {
+ require Carp;
+ Carp::carp("Unmatched double quote: $_");
return();
}
- elsif (s/^'(([^'\\]|\\.)*)'//) {
- ($snippet = $1) =~ s#\\(.)#$1#g;
+ elsif (s/\A'(([^'\\]|\\.)*)'//s) {
+ ($snippet = $1) =~ s#\\(.)#$1#sg;
}
- elsif (/^'/) {
+ elsif (/\A'/) {
+ require Carp;
+ Carp::carp("Unmatched single quote: $_");
return();
}
- elsif (s/^\\(.)//) {
+ elsif (s/\A\\(.)//s) {
$snippet = $1;
}
- elsif (s/^([^\s\\'"]+)//) {
+ elsif (s/\A([^\s\\'"]+)//) {
$snippet = $1;
}
else {
- s/^\s+//;
+ s/\A\s+//;
last;
}
$field .= $snippet;
}
push(@words, $field);
}
- @words;
+ return @words;
}
1;
returns all of the tokens in a single long list, while &nested_quotewords()
returns a list of token lists corresponding to the elements of @lines.
&parse_line() does tokenizing on a single string. The &*quotewords()
-functions simply call &parse_lines(), so if you're only splitting
-one line you can call &parse_lines() directly and save a function
+functions simply call &parse_line(), so if you're only splitting
+one line you can call &parse_line() directly and save a function
call.
The $keep argument is a boolean flag. If true, then the tokens are
=over 4
=item 0
+
a simple word
=item 1
+
multiple spaces are skipped because of our $delim
=item 2
+
use of quotes to include a space in a word
=item 3
+
use of a backslash to include a space in a word
=item 4
+
use of a backslash to remove the special meaning of a double-quote
=item 5
+
another simple word (note the lack of effect of the
backslashed double-quote)