Fix for: [perl #30442] Text::ParseWords does not handle backslashed newline inside...
Marcus Holland-Moritz [Thu, 24 Jun 2004 19:51:06 +0000 (19:51 +0000)]
Use the suggested regex fix, plus some tests.

p4raw-id: //depot/perl@22992

lib/Text/ParseWords.pm
lib/Text/ParseWords.t

index e758bc6..fbc0ee0 100644 (file)
@@ -1,7 +1,7 @@
 package Text::ParseWords;
 
 use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
-$VERSION = "3.21";
+$VERSION = "3.22";
 
 require 5.000;
 
@@ -59,11 +59,11 @@ sub parse_line {
 
        ($quote, $quoted, undef, $unquoted, $delim, undef) =
            $line =~ m/^(["'])                 # a $quote
-                        ((?:\\.|(?!\1)[^\\])*)    # and $quoted text
+                        ((?:\\[\000-\377]|(?!\1)[^\\])*)  # and $quoted text
                         \1                    # followed by the same quote
                         ([\000-\377]*)        # and the rest
                       |                       # --OR--
-                       ^((?:\\.|[^\\"'])*?)    # an $unquoted text
+                       ^((?:\\[\000-\377]|[^\\"'])*?)     # an $unquoted text
                      (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))  
                                                # plus EOL, delimiter, or quote
                       ([\000-\377]*)          # the rest
@@ -76,9 +76,9 @@ sub parse_line {
            $quoted = "$quote$quoted$quote";
        }
         else {
-           $unquoted =~ s/\\(.)/$1/g;
+           $unquoted =~ s/\\([\000-\377])/$1/g;
            if (defined $quote) {
-               $quoted =~ s/\\(.)/$1/g if ($quote eq '"');
+               $quoted =~ s/\\([\000-\377])/$1/g if ($quote eq '"');
                $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
             }
        }
index 261d81f..ef0e562 100755 (executable)
@@ -8,7 +8,7 @@ BEGIN {
 use warnings;
 use Text::ParseWords;
 
-print "1..18\n";
+print "1..20\n";
 
 @words = shellwords(qq(foo "bar quiz" zoo));
 print "not " if $words[0] ne 'foo';
@@ -108,3 +108,14 @@ print "ok 17\n";
 @words = quotewords(' ', 1, '4 3 2 1 0');
 print "not " unless join(";", @words) eq qq(4;3;2;1;0);
 print "ok 18\n";
+
+# [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text
+$string = qq{"field1"  "field2\\\nstill field2"        "field3"};
+
+$result = join('|', parse_line("\t", 1, $string));
+print "not " unless $result eq qq{"field1"|"field2\\\nstill field2"|"field3"};
+print "ok 19\n";
+
+$result = join('|', parse_line("\t", 0, $string));
+print "not " unless $result eq "field1|field2\nstill field2|field3";
+print "ok 20\n";