X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FParseWords.pm;h=6235d3cb9044bfbaccfa9b25d192aa5b5ed4123f;hb=fe0438b3fdd7184c1a19b7c24a3a26460d03083a;hp=d3a89f03b8ab0201501d2d361014d4b82b8fe38d;hpb=9b599b2a63d2324ddacddd9710c41b795a95070d;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/lib/Text/ParseWords.pm b/lib/Text/ParseWords.pm
index d3a89f0..6235d3c 100644
--- a/lib/Text/ParseWords.pm
+++ b/lib/Text/ParseWords.pm
@@ -1,7 +1,7 @@
 package Text::ParseWords;
 
-use vars qw($VERSION @ISA @EXPORT);
-$VERSION = "3.0";
+use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
+$VERSION = "3.26";
 
 require 5.000;
 
@@ -12,9 +12,17 @@ use Exporter;
 
 
 sub shellwords {
-    local(@lines) = @_;
-    $lines[$#lines] =~ s/\s+$//;
-    return(quotewords('\s+', 0, @lines));
+    my (@lines) = @_;
+    my @allwords;
+
+    foreach my $line (@lines) {
+	$line =~ s/^\s+//;
+	my @words = parse_line('\s+', 0, $line);
+	pop @words if (@words and !defined $words[-1]);
+	return() unless (@words || !length($line));
+	push(@allwords, @words);
+    }
+    return(@allwords);
 }
 
 
@@ -22,7 +30,6 @@ sub shellwords {
 sub quotewords {
     my($delim, $keep, @lines) = @_;
     my($line, @words, @allwords);
-    
 
     foreach $line (@lines) {
 	@words = parse_line($delim, $keep, $line);
@@ -37,7 +44,7 @@ sub quotewords {
 sub nested_quotewords {
     my($delim, $keep, @lines) = @_;
     my($i, @allwords);
-    
+
     for ($i = 0; $i < @lines; $i++) {
 	@{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
 	return() unless (@{$allwords[$i]} || !length($lines[$i]));
@@ -49,30 +56,54 @@ sub nested_quotewords {
 
 sub parse_line {
     my($delimiter, $keep, $line) = @_;
-    my($quote, $quoted, $unquoted, $delim, $word, @pieces);
+    my($word, @pieces);
+
+    no warnings 'uninitialized';	# we will be testing undef strings
 
     while (length($line)) {
-	($quote, $quoted, $unquoted, $delim) =
-	    $line =~ m/^(["'])                 # a $quote
-                        ((?:\\.|[^\1\\])*?)    # and $quoted text
-                        \1                     # followed by the same quote
-		       |                       # --OR--
-                       ^((?:\\.|[^\\"'])*?)    # an $unquoted text
-                        (\Z(?!\n)|$delimiter|(?!^)(?=["']))  
-                                               # plus EOL, delimiter, or quote
-                      /x;                      # extended layout
-
-        return() unless(length($&));
-        $line = $';
+        # This pattern is optimised to be stack conservative on older perls.
+        # Do not refactor without being careful and testing it on very long strings.
+        # See Perl bug #42980 for an example of a stack busting input.
+        $line =~ s/^
+                    (?: 
+                        # double quoted string
+                        (")                             # $quote
+                        ((?>[^\\"]*(?:\\.[^\\"]*)*))"   # $quoted 
+		    |	# --OR--
+                        # singe quoted string
+                        (')                             # $quote
+                        ((?>[^\\']*(?:\\.[^\\']*)*))'   # $quoted
+                    |   # --OR--
+                        # unquoted string
+		        (                               # $unquoted 
+                            (?:\\.|[^\\"'])*?           
+                        )		
+                        # followed by
+		        (                               # $delim
+                            \Z(?!\n)                    # EOL
+                        |   # --OR--
+                            (?-x:$delimiter)            # delimiter
+                        |   # --OR--                    
+                            (?!^)(?=["'])               # a quote
+                        )  
+		    )//xs or return;		# extended layout                  
+        my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6);
+
+
+	return() unless( defined($quote) || length($unquoted) || length($delim));
 
         if ($keep) {
 	    $quoted = "$quote$quoted$quote";
 	}
         else {
-	    $unquoted =~ s/\\(.)/$1/g;
-	    $quoted =~ s/\\(.)/$1/g if ($quote eq '"');
+	    $unquoted =~ s/\\(.)/$1/sg;
+	    if (defined $quote) {
+		$quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
+		$quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
+            }
 	}
-        $word .= ($quote) ? $quoted : $unquoted;
+        $word .= substr($line, 0, 0);	# leave results tainted
+        $word .= defined $quote ? $quoted : $unquoted;
  
         if (length($delim)) {
             push(@pieces, $word);
@@ -95,41 +126,48 @@ sub old_shellwords {
     #	@words = old_shellwords($line);
     #	or
     #	@words = old_shellwords(@lines);
+    #	or
+    #	@words = old_shellwords();	# defaults to $_ (and clobbers it)
 
-    local($_) = join('', @_);
-    my(@words,$snippet,$field);
+    no warnings 'uninitialized';	# we will be testing undef strings
+    local *_ = \join('', @_) if @_;
+    my (@words, $snippet);
 
-    s/^\s+//;
+    s/\A\s+//;
     while ($_ ne '') {
-	$field = '';
+	my $field = substr($_, 0, 0);	# leave results tainted
 	for (;;) {
-	    if (s/^"(([^"\\]|\\.)*)"//) {
-		($snippet = $1) =~ s#\\(.)#$1#g;
+	    if (s/\A"(([^"\\]|\\.)*)"//s) {
+		($snippet = $1) =~ s#\\(.)#$1#sg;
 	    }
-	    elsif (/^"/) {
+	    elsif (/\A"/) {
+		require Carp;
+		Carp::carp("Unmatched double quote: $_");
 		return();
 	    }
-	    elsif (s/^'(([^'\\]|\\.)*)'//) {
-		($snippet = $1) =~ s#\\(.)#$1#g;
+	    elsif (s/\A'(([^'\\]|\\.)*)'//s) {
+		($snippet = $1) =~ s#\\(.)#$1#sg;
 	    }
-	    elsif (/^'/) {
+	    elsif (/\A'/) {
+		require Carp;
+		Carp::carp("Unmatched single quote: $_");
 		return();
 	    }
-	    elsif (s/^\\(.)//) {
+	    elsif (s/\A\\(.?)//s) {
 		$snippet = $1;
 	    }
-	    elsif (s/^([^\s\\'"]+)//) {
+	    elsif (s/\A([^\s\\'"]+)//) {
 		$snippet = $1;
 	    }
 	    else {
-		s/^\s+//;
+		s/\A\s+//;
 		last;
 	    }
 	    $field .= $snippet;
 	}
 	push(@words, $field);
     }
-    @words;
+    return @words;
 }
 
 1;
@@ -158,8 +196,8 @@ words ignoring delimiters that appear inside quotes.  &quotewords()
 returns all of the tokens in a single long list, while &nested_quotewords()
 returns a list of token lists corresponding to the elements of @lines.
 &parse_line() does tokenizing on a single string.  The &*quotewords()
-functions simply call &parse_lines(), so if you're only splitting
-one line you can call &parse_lines() directly and save a function
+functions simply call &parse_line(), so if you're only splitting
+one line you can call &parse_line() directly and save a function
 call.
 
 The $keep argument is a boolean flag.  If true, then the tokens are
@@ -205,21 +243,27 @@ demonstrating:
 =over 4
 
 =item 0
+
 a simple word
 
 =item 1
+
 multiple spaces are skipped because of our $delim
 
 =item 2
+
 use of quotes to include a space in a word
 
 =item 3
+
 use of a backslash to include a space in a word
 
 =item 4
+
 use of a backslash to remove the special meaning of a double-quote
 
 =item 5
+
 another simple word (note the lack of effect of the
 backslashed double-quote)