if (UTF) {
U8 *s = (U8*)STRING(scan);
l = utf8_length(s, s + l);
- uc = utf8_to_uv_simple(s, NULL);
+ uc = utf8_to_uvchr(s, NULL);
}
min += l;
if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
if (UTF) {
U8 *s = (U8 *)STRING(scan);
l = utf8_length(s, s + l);
- uc = utf8_to_uv_simple(s, NULL);
+ uc = utf8_to_uvchr(s, NULL);
}
min += l;
if (data && (flags & SCF_DO_SUBSTR))
r->reganch |= ROPT_LOOKBEHIND_SEEN;
if (RExC_seen & REG_SEEN_EVAL)
r->reganch |= ROPT_EVAL_SEEN;
+ if (RExC_seen & REG_SEEN_SANY)
+ r->reganch |= ROPT_SANY_SEEN;
Newz(1002, r->startp, RExC_npar, I32);
Newz(1002, r->endp, RExC_npar, I32);
PL_regdata = r->data; /* for regprop() */
break;
case 'C':
ret = reg_node(pRExC_state, SANY);
+ RExC_seen |= REG_SEEN_SANY;
*flagp |= HASWIDTH|SIMPLE;
nextchar(pRExC_state);
break;
ret = reg_node(pRExC_state, CLUMP);
*flagp |= HASWIDTH;
nextchar(pRExC_state);
- if (UTF && !PL_utf8_mark)
- is_utf8_mark((U8*)"~"); /* preload table */
break;
case 'w':
ret = reg_node(pRExC_state, LOC ? ALNUML : ALNUM);
*flagp |= HASWIDTH|SIMPLE;
nextchar(pRExC_state);
- if (UTF && !PL_utf8_alnum)
- is_utf8_alnum((U8*)"a"); /* preload table */
break;
case 'W':
ret = reg_node(pRExC_state, LOC ? NALNUML : NALNUM);
*flagp |= HASWIDTH|SIMPLE;
nextchar(pRExC_state);
- if (UTF && !PL_utf8_alnum)
- is_utf8_alnum((U8*)"a"); /* preload table */
break;
case 'b':
RExC_seen_zerolen++;
ret = reg_node(pRExC_state, LOC ? BOUNDL : BOUND);
*flagp |= SIMPLE;
nextchar(pRExC_state);
- if (UTF && !PL_utf8_alnum)
- is_utf8_alnum((U8*)"a"); /* preload table */
break;
case 'B':
RExC_seen_zerolen++;
ret = reg_node(pRExC_state, LOC ? NBOUNDL : NBOUND);
*flagp |= SIMPLE;
nextchar(pRExC_state);
- if (UTF && !PL_utf8_alnum)
- is_utf8_alnum((U8*)"a"); /* preload table */
break;
case 's':
ret = reg_node(pRExC_state, LOC ? SPACEL : SPACE);
*flagp |= HASWIDTH|SIMPLE;
nextchar(pRExC_state);
- if (UTF && !PL_utf8_space)
- is_utf8_space((U8*)" "); /* preload table */
break;
case 'S':
ret = reg_node(pRExC_state, LOC ? NSPACEL : NSPACE);
*flagp |= HASWIDTH|SIMPLE;
nextchar(pRExC_state);
- if (UTF && !PL_utf8_space)
- is_utf8_space((U8*)" "); /* preload table */
break;
case 'd':
ret = reg_node(pRExC_state, DIGIT);
*flagp |= HASWIDTH|SIMPLE;
nextchar(pRExC_state);
- if (UTF && !PL_utf8_digit)
- is_utf8_digit((U8*)"1"); /* preload table */
break;
case 'D':
ret = reg_node(pRExC_state, NDIGIT);
*flagp |= HASWIDTH|SIMPLE;
nextchar(pRExC_state);
- if (UTF && !PL_utf8_digit)
- is_utf8_digit((U8*)"1"); /* preload table */
break;
case 'p':
case 'P':
else {
numlen = 1; /* allow underscores */
ender = (UV)scan_hex(p + 1, e - p - 1, &numlen);
+ if (ender > 0xff)
+ RExC_utf8 = 1;
/* numlen is generous */
if (numlen + len >= 127) {
p--;
default:
normal_default:
if (UTF8_IS_START(*p) && UTF) {
- ender = utf8_to_uv((U8*)p, RExC_end - p,
+ ender = utf8n_to_uvuni((U8*)p, RExC_end - p,
&numlen, 0);
p += numlen;
}
p = regwhite(p, RExC_end);
if (UTF && FOLD) {
if (LOC)
- ender = toLOWER_LC_uni(ender);
+ ender = toLOWER_LC_uvchr(UNI_TO_NATIVE(ender));
else
ender = toLOWER_uni(ender);
}
if (!range)
rangebegin = RExC_parse;
if (UTF) {
- value = utf8_to_uv((U8*)RExC_parse,
+ value = utf8n_to_uvuni((U8*)RExC_parse,
RExC_end - RExC_parse,
&numlen, 0);
RExC_parse += numlen;
namedclass = regpposixcc(pRExC_state, value);
else if (value == '\\') {
if (UTF) {
- value = utf8_to_uv((U8*)RExC_parse,
+ value = utf8n_to_uvuni((U8*)RExC_parse,
RExC_end - RExC_parse,
&numlen, 0);
RExC_parse += numlen;
STATIC void
S_reguni(pTHX_ RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
{
- *lenp = SIZE_ONLY ? UNISKIP(uv) : (uv_to_utf8((U8*)s, uv) - (U8*)s);
+ *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvuni_to_utf8((U8*)s, uv) - (U8*)s);
}
/*
U8 s[UTF8_MAXLEN+1];
for (i = 0; i <= 256; i++) { /* just the first 256 */
- U8 *e = uv_to_utf8(s, i);
+ U8 *e = uvuni_to_utf8(s, i);
if (i < 256 && swash_fetch(sw, s)) {
if (rangestart == -1)
if (i <= rangestart + 3)
for (; rangestart < i; rangestart++) {
- for(e = uv_to_utf8(s, rangestart), p = s; p < e; p++)
+ for(e = uvuni_to_utf8(s, rangestart), p = s; p < e; p++)
put_byte(sv, *p);
}
else {
- for (e = uv_to_utf8(s, rangestart), p = s; p < e; p++)
+ for (e = uvuni_to_utf8(s, rangestart), p = s; p < e; p++)
put_byte(sv, *p);
sv_catpv(sv, "-");
- for (e = uv_to_utf8(s, i - 1), p = s; p < e; p++)
+ for (e = uvuni_to_utf8(s, i - 1), p = s; p < e; p++)
put_byte(sv, *p);
}
rangestart = -1;