char *oldp, *s;
STRLEN numlen;
STRLEN ulen;
- U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
+ STRLEN foldlen;
+ U8 tmpbuf[UTF8_MAXLEN_UCLC+1], *foldbuf;
parse_start = RExC_parse - 1;
case '\\':
switch (*++p) {
case 'A':
+ case 'C':
+ case 'X':
case 'G':
case 'Z':
case 'z':
}
if (RExC_flags16 & PMf_EXTENDED)
p = regwhite(p, RExC_end);
- if (UTF && FOLD) {
- toLOWER_uni(ender, tmpbuf, &ulen);
- ender = utf8_to_uvchr(tmpbuf, 0);
- }
+ if (UTF && FOLD)
+ toFOLD_uni(ender, tmpbuf, &foldlen);
if (ISMULT2(p)) { /* Back off on ?+*. */
if (len)
p = oldp;
else if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
- reguni(pRExC_state, ender, s, &numlen);
- s += numlen;
- len += numlen;
+ if (FOLD) {
+ for (foldbuf = tmpbuf;
+ foldlen;
+ foldlen -= numlen) {
+ ender = utf8_to_uvchr(foldbuf, &numlen);
+ reguni(pRExC_state, ender, s, &numlen);
+ s += numlen;
+ len += numlen;
+ foldbuf += numlen;
+ }
+ }
+ else {
+ reguni(pRExC_state, ender, s, &numlen);
+ s += numlen;
+ len += numlen;
+ }
}
else {
len++;
break;
}
if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
- reguni(pRExC_state, ender, s, &numlen);
- s += numlen;
- len += numlen - 1;
+ if (FOLD) {
+ for (foldbuf = tmpbuf;
+ foldlen;
+ foldlen -= numlen) {
+ ender = utf8_to_uvchr(foldbuf, &numlen);
+ reguni(pRExC_state, ender, s, &numlen);
+ s += numlen;
+ len += numlen;
+ foldbuf += numlen;
+ }
+ }
+ else {
+ reguni(pRExC_state, ender, s, &numlen);
+ s += numlen;
+ len += numlen;
+ }
+ len--;
}
else
REGC(ender, s++);
break;
}
- if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT && !RExC_utf8) {
+ if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
STRLEN oldlen = STR_LEN(ret);
SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
- char *s = Perl_sv_recode_to_utf8(aTHX_ sv, PL_encoding);
- STRLEN newlen = SvCUR(sv);
- if (!SIZE_ONLY) {
- DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
- (int)oldlen, STRING(ret), (int)newlen, s));
- Copy(s, STRING(ret), newlen, char);
- STR_LEN(ret) += newlen - oldlen;
- RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
- } else
- RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
- RExC_utf8 = 1;
+
+ if (RExC_utf8)
+ SvUTF8_on(sv);
+ if (sv_utf8_downgrade(sv, TRUE)) {
+ char *s = Perl_sv_recode_to_utf8(aTHX_ sv, PL_encoding);
+ STRLEN newlen = SvCUR(sv);
+
+ if (!SIZE_ONLY) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
+ (int)oldlen, STRING(ret),
+ (int)newlen, s));
+ Copy(s, STRING(ret), newlen, char);
+ STR_LEN(ret) += newlen - oldlen;
+ RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
+ } else
+ RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
+ }
}
return(ret);
if (prevvalue < value)
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
(UV)prevvalue, (UV)value);
- else if (prevvalue == value)
+ else if (prevvalue == value) {
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
(UV)value);
+ if (FOLD) {
+ U8 tmpbuf [UTF8_MAXLEN+1];
+ U8 foldbuf[UTF8_MAXLEN_FOLD+1];
+ STRLEN foldlen;
+ UV f;
+
+ uvchr_to_utf8(tmpbuf, value);
+ to_utf8_fold(tmpbuf, foldbuf, &foldlen);
+ f = utf8_to_uvchr(foldbuf, 0);
+
+ if (f != value)
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", f);
+
+ if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+ (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+ (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
+ }
+ else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA)
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+ (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
+ }
+ }
}
}