X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FParseWords.pm;h=1411986cd9a7e4f305925a13ff88378fb603b96c;hb=46f0e7a52641befa6f3ed588e017a5454a12be04;hp=95f0e9b41f3e14b5cf3b5658c49556cc2be220f5;hpb=b174585de5ccc9973ba572393b2b34e1a6a5b749;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Text/ParseWords.pm b/lib/Text/ParseWords.pm index 95f0e9b..1411986 100644 --- a/lib/Text/ParseWords.pm +++ b/lib/Text/ParseWords.pm @@ -1,7 +1,7 @@ package Text::ParseWords; use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE); -$VERSION = "3.1"; +$VERSION = "3.25"; require 5.000; @@ -12,9 +12,17 @@ use Exporter; sub shellwords { - local(@lines) = @_; - $lines[$#lines] =~ s/\s+$//; - return(quotewords('\s+', 0, @lines)); + my (@lines) = @_; + my @allwords; + + foreach my $line (@lines) { + $line =~ s/^\s+//; + my @words = parse_line('\s+', 0, $line); + pop @words if (@words and !defined $words[-1]); + return() unless (@words || !length($line)); + push(@allwords, @words); + } + return(@allwords); } @@ -22,7 +30,6 @@ sub shellwords { sub quotewords { my($delim, $keep, @lines) = @_; my($line, @words, @allwords); - foreach $line (@lines) { @words = parse_line($delim, $keep, $line); @@ -37,7 +44,7 @@ sub quotewords { sub nested_quotewords { my($delim, $keep, @lines) = @_; my($i, @allwords); - + for ($i = 0; $i < @lines; $i++) { @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]); return() unless (@{$allwords[$i]} || !length($lines[$i])); @@ -48,38 +55,35 @@ sub nested_quotewords { sub parse_line { - # We will be testing undef strings - local($^W) = 0; - my($delimiter, $keep, $line) = @_; - my($quote, $quoted, $unquoted, $delim, $word, @pieces); + my($word, @pieces); - while (length($line)) { + no warnings 'uninitialized'; # we will be testing undef strings - ($quote, $quoted, undef, $unquoted, $delim, undef) = - $line =~ m/^(["']) # a $quote - ((?:\\.|(?!\1)[^\\])*) # and $quoted text - \1 # followed by the same quote - ([\000-\377]*) # and the rest - | # --OR-- - ^((?:\\.|[^\\"'])*?) # an $unquoted text - (\Z(?!\n)|$delimiter|(?!^)(?=["'])) - # plus EOL, delimiter, or quote - ([\000-\377]*) # the rest - /x; # extended layout - return() unless( $quote || length($unquoted) || length($delim)); - - $line = $+; + while (length($line)) { + $line =~ s/^(["']) # a $quote + ((?:\\.|(?!\1)[^\\])*) # and $quoted text + \1 # followed by the same quote + | # --OR-- + ^((?:\\.|[^\\"'])*?) # an $unquoted text + (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["'])) + # plus EOL, delimiter, or quote + //xs or return; # extended layout + my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4); + return() unless( defined($quote) || length($unquoted) || length($delim)); if ($keep) { $quoted = "$quote$quoted$quote"; } else { - $unquoted =~ s/\\(.)/$1/g; - $quoted =~ s/\\(.)/$1/g if ($quote eq '"'); - $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'"); + $unquoted =~ s/\\(.)/$1/sg; + if (defined $quote) { + $quoted =~ s/\\(.)/$1/sg if ($quote eq '"'); + $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'"); + } } - $word .= ($quote) ? $quoted : $unquoted; + $word .= substr($line, 0, 0); # leave results tainted + $word .= defined $quote ? $quoted : $unquoted; if (length($delim)) { push(@pieces, $word); @@ -102,41 +106,48 @@ sub old_shellwords { # @words = old_shellwords($line); # or # @words = old_shellwords(@lines); + # or + # @words = old_shellwords(); # defaults to $_ (and clobbers it) - local($_) = join('', @_); - my(@words,$snippet,$field); + no warnings 'uninitialized'; # we will be testing undef strings + local *_ = \join('', @_) if @_; + my (@words, $snippet); - s/^\s+//; + s/\A\s+//; while ($_ ne '') { - $field = ''; + my $field = substr($_, 0, 0); # leave results tainted for (;;) { - if (s/^"(([^"\\]|\\.)*)"//) { - ($snippet = $1) =~ s#\\(.)#$1#g; + if (s/\A"(([^"\\]|\\.)*)"//s) { + ($snippet = $1) =~ s#\\(.)#$1#sg; } - elsif (/^"/) { + elsif (/\A"/) { + require Carp; + Carp::carp("Unmatched double quote: $_"); return(); } - elsif (s/^'(([^'\\]|\\.)*)'//) { - ($snippet = $1) =~ s#\\(.)#$1#g; + elsif (s/\A'(([^'\\]|\\.)*)'//s) { + ($snippet = $1) =~ s#\\(.)#$1#sg; } - elsif (/^'/) { + elsif (/\A'/) { + require Carp; + Carp::carp("Unmatched single quote: $_"); return(); } - elsif (s/^\\(.)//) { + elsif (s/\A\\(.)//s) { $snippet = $1; } - elsif (s/^([^\s\\'"]+)//) { + elsif (s/\A([^\s\\'"]+)//) { $snippet = $1; } else { - s/^\s+//; + s/\A\s+//; last; } $field .= $snippet; } push(@words, $field); } - @words; + return @words; } 1; @@ -165,8 +176,8 @@ words ignoring delimiters that appear inside quotes. "ewords() returns all of the tokens in a single long list, while &nested_quotewords() returns a list of token lists corresponding to the elements of @lines. &parse_line() does tokenizing on a single string. The &*quotewords() -functions simply call &parse_lines(), so if you're only splitting -one line you can call &parse_lines() directly and save a function +functions simply call &parse_line(), so if you're only splitting +one line you can call &parse_line() directly and save a function call. The $keep argument is a boolean flag. If true, then the tokens are @@ -212,21 +223,27 @@ demonstrating: =over 4 =item 0 + a simple word =item 1 + multiple spaces are skipped because of our $delim =item 2 + use of quotes to include a space in a word =item 3 + use of a backslash to include a space in a word =item 4 + use of a backslash to remove the special meaning of a double-quote =item 5 + another simple word (note the lack of effect of the backslashed double-quote)