SV *dsv = PERL_DEBUG_PAD_ZERO(0);
#endif
+ if (prog->reganch & ROPT_UTF8) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "UTF-8 regex...\n"));
+ PL_reg_flags |= RF_utf8;
+ }
+
DEBUG_r({
- char*s = UTF ? sv_uni_display(dsv, sv, 60, 0) : strpos;
- int len = UTF ? strlen(s) : strend - strpos;
+ char *s = PL_reg_match_utf8 ?
+ sv_uni_display(dsv, sv, 60, 0) : strpos;
+ int len = PL_reg_match_utf8 ?
+ strlen(s) : strend - strpos;
if (!PL_colorset)
reginitcolors();
+ if (PL_reg_match_utf8)
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "UTF-8 target...\n"));
PerlIO_printf(Perl_debug_log,
"%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
PL_colors[4],PL_colors[5],PL_colors[0],
);
});
- if (prog->reganch & ROPT_UTF8)
- PL_reg_flags |= RF_utf8;
-
if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
"String too short... [re_intuit_start]\n"));
* Fortunately, not getting this right is allowed
* for Unicode Regular Expression Support level 1,
* only one-to-one matching is required. --jhi */
- if (c1 == c2)
+ if (c1 == c2) {
while (s <= e) {
if ( utf8_to_uvchr((U8*)s, &len) == c1
&& (ln == len ||
goto got_it;
s += len;
}
- else
+ }
+ else {
while (s <= e) {
UV c = utf8_to_uvchr((U8*)s, &len);
+ if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
+ c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
+ c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
if ( (c == c1 || c == c2)
&& (ln == len ||
ibcmp_utf8(s, do_utf8, strend - s,
goto got_it;
s += len;
}
+ }
}
else {
if (c1 == c2)
nextchr = UCHARAT(++locinput);
break;
case CLUMP:
- LOAD_UTF8_CHARCLASS(mark,"~");
- if (locinput >= PL_regeol ||
- swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
- sayNO;
- locinput += PL_utf8skip[nextchr];
- while (locinput < PL_regeol &&
- swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
- locinput += UTF8SKIP(locinput);
- if (locinput > PL_regeol)
+ if (locinput >= PL_regeol)
sayNO;
+ if (do_utf8) {
+ LOAD_UTF8_CHARCLASS(mark,"~");
+ if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
+ sayNO;
+ locinput += PL_utf8skip[nextchr];
+ while (locinput < PL_regeol &&
+ swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
+ locinput += UTF8SKIP(locinput);
+ if (locinput > PL_regeol)
+ sayNO;
+ }
+ else
+ locinput++;
nextchr = UCHARAT(locinput);
break;
case REFFL: