X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FParseWords.pm;h=6235d3cb9044bfbaccfa9b25d192aa5b5ed4123f;hb=fe0438b3fdd7184c1a19b7c24a3a26460d03083a;hp=bb1e7be87b635d8b7c7aff658f929ebf454d8932;hpb=ff270adde2ed532b630356e4973f5a74e6152498;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Text/ParseWords.pm b/lib/Text/ParseWords.pm index bb1e7be..6235d3c 100644 --- a/lib/Text/ParseWords.pm +++ b/lib/Text/ParseWords.pm @@ -1,7 +1,7 @@ package Text::ParseWords; use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE); -$VERSION = "3.24"; +$VERSION = "3.26"; require 5.000; @@ -12,9 +12,17 @@ use Exporter; sub shellwords { - local(@lines) = @_; - $lines[$#lines] =~ s/\s+$//; - return(quotewords('\s+', 0, @lines)); + my (@lines) = @_; + my @allwords; + + foreach my $line (@lines) { + $line =~ s/^\s+//; + my @words = parse_line('\s+', 0, $line); + pop @words if (@words and !defined $words[-1]); + return() unless (@words || !length($line)); + push(@allwords, @words); + } + return(@allwords); } @@ -22,7 +30,6 @@ sub shellwords { sub quotewords { my($delim, $keep, @lines) = @_; my($line, @words, @allwords); - foreach $line (@lines) { @words = parse_line($delim, $keep, $line); @@ -37,7 +44,7 @@ sub quotewords { sub nested_quotewords { my($delim, $keep, @lines) = @_; my($i, @allwords); - + for ($i = 0; $i < @lines; $i++) { @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]); return() unless (@{$allwords[$i]} || !length($lines[$i])); @@ -48,23 +55,41 @@ sub nested_quotewords { sub parse_line { - # We will be testing undef strings - no warnings; - use re 'taint'; # if it's tainted, leave it as such - my($delimiter, $keep, $line) = @_; my($word, @pieces); + no warnings 'uninitialized'; # we will be testing undef strings + while (length($line)) { - $line =~ s/^(["']) # a $quote - ((?:\\.|(?!\1)[^\\])*) # and $quoted text - \1 # followed by the same quote - | # --OR-- - ^((?:\\.|[^\\"'])*?) # an $unquoted text - (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["'])) - # plus EOL, delimiter, or quote - //xs; # extended layout - my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4); + # This pattern is optimised to be stack conservative on older perls. + # Do not refactor without being careful and testing it on very long strings. + # See Perl bug #42980 for an example of a stack busting input. + $line =~ s/^ + (?: + # double quoted string + (") # $quote + ((?>[^\\"]*(?:\\.[^\\"]*)*))" # $quoted + | # --OR-- + # singe quoted string + (') # $quote + ((?>[^\\']*(?:\\.[^\\']*)*))' # $quoted + | # --OR-- + # unquoted string + ( # $unquoted + (?:\\.|[^\\"'])*? + ) + # followed by + ( # $delim + \Z(?!\n) # EOL + | # --OR-- + (?-x:$delimiter) # delimiter + | # --OR-- + (?!^)(?=["']) # a quote + ) + )//xs or return; # extended layout + my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6); + + return() unless( defined($quote) || length($unquoted) || length($delim)); if ($keep) { @@ -77,6 +102,7 @@ sub parse_line { $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'"); } } + $word .= substr($line, 0, 0); # leave results tainted $word .= defined $quote ? $quoted : $unquoted; if (length($delim)) { @@ -100,41 +126,48 @@ sub old_shellwords { # @words = old_shellwords($line); # or # @words = old_shellwords(@lines); + # or + # @words = old_shellwords(); # defaults to $_ (and clobbers it) - local($_) = join('', @_); - my(@words,$snippet,$field); + no warnings 'uninitialized'; # we will be testing undef strings + local *_ = \join('', @_) if @_; + my (@words, $snippet); - s/^\s+//; + s/\A\s+//; while ($_ ne '') { - $field = ''; + my $field = substr($_, 0, 0); # leave results tainted for (;;) { - if (s/^"(([^"\\]|\\.)*)"//) { - ($snippet = $1) =~ s#\\(.)#$1#g; + if (s/\A"(([^"\\]|\\.)*)"//s) { + ($snippet = $1) =~ s#\\(.)#$1#sg; } - elsif (/^"/) { + elsif (/\A"/) { + require Carp; + Carp::carp("Unmatched double quote: $_"); return(); } - elsif (s/^'(([^'\\]|\\.)*)'//) { - ($snippet = $1) =~ s#\\(.)#$1#g; + elsif (s/\A'(([^'\\]|\\.)*)'//s) { + ($snippet = $1) =~ s#\\(.)#$1#sg; } - elsif (/^'/) { + elsif (/\A'/) { + require Carp; + Carp::carp("Unmatched single quote: $_"); return(); } - elsif (s/^\\(.)//) { + elsif (s/\A\\(.?)//s) { $snippet = $1; } - elsif (s/^([^\s\\'"]+)//) { + elsif (s/\A([^\s\\'"]+)//) { $snippet = $1; } else { - s/^\s+//; + s/\A\s+//; last; } $field .= $snippet; } push(@words, $field); } - @words; + return @words; } 1;