* Fortunately, not getting this right is allowed
* for Unicode Regular Expression Support level 1,
* only one-to-one matching is required. --jhi */
- if (c1 == c2)
+ if (c1 == c2) {
while (s <= e) {
if ( utf8_to_uvchr((U8*)s, &len) == c1
&& (ln == len ||
goto got_it;
s += len;
}
- else
+ }
+ else {
while (s <= e) {
UV c = utf8_to_uvchr((U8*)s, &len);
+ if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
+ c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
+ c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
if ( (c == c1 || c == c2)
&& (ln == len ||
ibcmp_utf8(s, do_utf8, strend - s,
goto got_it;
s += len;
}
+ }
}
else {
if (c1 == c2)
nextchr = UCHARAT(++locinput);
break;
case CLUMP:
- LOAD_UTF8_CHARCLASS(mark,"~");
- if (locinput >= PL_regeol ||
- swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
- sayNO;
- locinput += PL_utf8skip[nextchr];
- while (locinput < PL_regeol &&
- swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
- locinput += UTF8SKIP(locinput);
- if (locinput > PL_regeol)
+ if (locinput >= PL_regeol)
sayNO;
+ if (do_utf8) {
+ LOAD_UTF8_CHARCLASS(mark,"~");
+ if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
+ sayNO;
+ locinput += PL_utf8skip[nextchr];
+ while (locinput < PL_regeol &&
+ swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
+ locinput += UTF8SKIP(locinput);
+ if (locinput > PL_regeol)
+ sayNO;
+ }
+ else
+ locinput++;
nextchr = UCHARAT(locinput);
break;
case REFFL: