Reolve perlbug #59328: In re's, \N{U+...} doesn't match for ... > 256
karl williamson [Wed, 5 Nov 2008 18:42:16 +0000 (11:42 -0700)]
  Subject: PATCH [perl #59328] In re's, \N{U+...} doesn't match for ... > 256
  Message-ID: <49124B78.2000907@khwilliamson.com>
  Date: Wed, 05 Nov 2008 18:42:16 -0700

p4raw-id: //depot/perl@34747

regcomp.c
t/op/re_tests

index 6d3da0f..b90a783 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -6617,20 +6617,30 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
             | PERL_SCAN_DISALLOW_PREFIX
             | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
         UV cp;
-       char string;
         len = (STRLEN)(endbrace - name - 2);
         cp = grok_hex(name + 2, &len, &fl, NULL);
         if ( len != (STRLEN)(endbrace - name - 2) ) {
             cp = 0xFFFD;
         }    
-        if (cp > 0xff)
-            RExC_utf8 = 1;
         if ( valuep ) {
+           if (cp > 0xff) RExC_utf8 = 1;
             *valuep = cp;
             return NULL;
         }
-       string = (char)cp;
-        sv_str= newSVpvn(&string, 1);
+
+       /* Need to convert to utf8 if either: won't fit into a byte, or the re
+        * is going to be in utf8 and the representation changes under utf8. */
+       if (cp > 0xff || (RExC_utf8 && ! UNI_IS_INVARIANT(cp))) {
+           U8 string[UTF8_MAXBYTES+1];
+           U8 *tmps;
+           RExC_utf8 = 1;
+           tmps = uvuni_to_utf8(string, cp);
+           sv_str = newSVpvn_utf8((char*)string, tmps - string, TRUE);
+       } else {    /* Otherwise, no need for utf8, can skip that step */
+           char string;
+           string = (char)cp;
+           sv_str= newSVpvn(&string, 1);
+       }
     } else {
         /* fetch the charnames handler for this scope */
         HV * const table = GvHV(PL_hintgv);
@@ -6809,7 +6819,7 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
         Set_Node_Cur_Length(ret); /* MJD */
         RExC_parse--; 
         nextchar(pRExC_state);
-    } else {
+    } else {   /* zero length */
         ret = reg_node(pRExC_state,NOTHING);
     }
     if (!cached) {
index a4f1e53..6d3ef4f 100644 (file)
@@ -1360,3 +1360,4 @@ foo(\h)bar        foo\tbar        y       $1      \t
 /(.*?)a(?!(a+)b\2c)/   baaabaac        y       $&-$1   baa-ba
 # [perl #60344] Regex lookbehind failure after an (if)then|else in perl 5.10
 /\A(?(?=db2)db2|\D+)(?<!processed)\.csv\z/xms  sql_processed.csv       n       -       -
+/\N{U+0100}/   \x{100} y       $&      \x{100} # Bug #59328
\ No newline at end of file