From: karl williamson Date: Wed, 5 Nov 2008 18:42:16 +0000 (-0700) Subject: Reolve perlbug #59328: In re's, \N{U+...} doesn't match for ... > 256 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=a4893424bce3ee5da2b2e8e1c256b806c74bfb50;p=p5sagit%2Fp5-mst-13.2.git Reolve perlbug #59328: In re's, \N{U+...} doesn't match for ... > 256 Subject: PATCH [perl #59328] In re's, \N{U+...} doesn't match for ... > 256 Message-ID: <49124B78.2000907@khwilliamson.com> Date: Wed, 05 Nov 2008 18:42:16 -0700 p4raw-id: //depot/perl@34747 --- diff --git a/regcomp.c b/regcomp.c index 6d3da0f..b90a783 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6617,20 +6617,30 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep) | PERL_SCAN_DISALLOW_PREFIX | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0); UV cp; - char string; len = (STRLEN)(endbrace - name - 2); cp = grok_hex(name + 2, &len, &fl, NULL); if ( len != (STRLEN)(endbrace - name - 2) ) { cp = 0xFFFD; } - if (cp > 0xff) - RExC_utf8 = 1; if ( valuep ) { + if (cp > 0xff) RExC_utf8 = 1; *valuep = cp; return NULL; } - string = (char)cp; - sv_str= newSVpvn(&string, 1); + + /* Need to convert to utf8 if either: won't fit into a byte, or the re + * is going to be in utf8 and the representation changes under utf8. */ + if (cp > 0xff || (RExC_utf8 && ! UNI_IS_INVARIANT(cp))) { + U8 string[UTF8_MAXBYTES+1]; + U8 *tmps; + RExC_utf8 = 1; + tmps = uvuni_to_utf8(string, cp); + sv_str = newSVpvn_utf8((char*)string, tmps - string, TRUE); + } else { /* Otherwise, no need for utf8, can skip that step */ + char string; + string = (char)cp; + sv_str= newSVpvn(&string, 1); + } } else { /* fetch the charnames handler for this scope */ HV * const table = GvHV(PL_hintgv); @@ -6809,7 +6819,7 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep) Set_Node_Cur_Length(ret); /* MJD */ RExC_parse--; nextchar(pRExC_state); - } else { + } else { /* zero length */ ret = reg_node(pRExC_state,NOTHING); } if (!cached) { diff --git a/t/op/re_tests b/t/op/re_tests index a4f1e53..6d3ef4f 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -1360,3 +1360,4 @@ foo(\h)bar foo\tbar y $1 \t /(.*?)a(?!(a+)b\2c)/ baaabaac y $&-$1 baa-ba # [perl #60344] Regex lookbehind failure after an (if)then|else in perl 5.10 /\A(?(?=db2)db2|\D+)(?