to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
- c1 = utf8_to_uvchr(tmpbuf1, 0);
- c2 = utf8_to_uvchr(tmpbuf2, 0);
+ c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC,
+ 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+ c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
+ 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
}
else {
c1 = *(U8*)m;
c1 = *(U8*)m;
c2 = PL_fold_locale[c1];
do_exactf:
- e = do_utf8 ? s + ln : strend - ln;
+ e = HOP3c(strend, -(I32)ln, s);
if (norun && e < s)
e = s; /* Due to minlen logic of intuit() */
if (c1 == c2) {
while (s <= e) {
- c = utf8_to_uvchr((U8*)s, &len);
+ c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
if ( c == c1
&& (ln == len ||
ibcmp_utf8(s, (char **)0, 0, do_utf8,
}
else {
while (s <= e) {
- c = utf8_to_uvchr((U8*)s, &len);
+ c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
/* Handle some of the three Greek sigmas cases.
* Note that not all the possible combinations
if (s == PL_bostr)
tmp = '\n';
else {
- U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
+ U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
- if (s > (char*)r)
- tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
+ tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
}
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
if (s == PL_bostr)
tmp = '\n';
else {
- U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
+ U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
- if (s > (char*)r)
- tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
+ tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
}
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
if (l >= PL_regeol)
sayNO;
if (NATIVE_TO_UNI(*(U8*)s) !=
- utf8_to_uvuni((U8*)l, &ulen))
+ utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY))
sayNO;
l += ulen;
s ++;
if (l >= PL_regeol)
sayNO;
if (NATIVE_TO_UNI(*((U8*)l)) !=
- utf8_to_uvuni((U8*)s, &ulen))
+ utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY))
sayNO;
s += ulen;
l ++;
if (locinput == PL_bostr)
ln = '\n';
else {
- U8 *r = reghop((U8*)locinput, -1);
+ U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
- ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
+ ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
}
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
ln = isALNUM_uni(ln);
to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
- c1 = utf8_to_uvuni(tmpbuf1, 0);
- c2 = utf8_to_uvuni(tmpbuf2, 0);
+ c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
+ c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
}
else {
- c2 = c1 = utf8_to_uvchr(s, NULL);
+ c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
}
}
}
if (c1 != -1000) {
char *e; /* Should not check after this */
char *old = locinput;
+ int count = 0;
if (n == REG_INFTY) {
e = PL_regeol - 1;
e = PL_regeol - 1;
}
while (1) {
- int count;
/* Find place 'next' could work */
if (!do_utf8) {
if (c1 == c2) {
else {
STRLEN len;
if (c1 == c2) {
- for (count = 0;
- locinput <= e &&
- utf8_to_uvchr((U8*)locinput, &len) != c1;
- count++)
+ /* count initialised to
+ * utf8_distance(old, locinput) */
+ while (locinput <= e &&
+ utf8n_to_uvchr((U8*)locinput,
+ UTF8_MAXLEN, &len,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY) != c1) {
locinput += len;
-
+ count++;
+ }
} else {
- for (count = 0; locinput <= e; count++) {
- UV c = utf8_to_uvchr((U8*)locinput, &len);
+ /* count initialised to
+ * utf8_distance(old, locinput) */
+ while (locinput <= e) {
+ UV c = utf8n_to_uvchr((U8*)locinput,
+ UTF8_MAXLEN, &len,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
if (c == c1 || c == c2)
break;
- locinput += len;
+ locinput += len;
+ count++;
}
}
}
locinput += UTF8SKIP(locinput);
else
locinput++;
+ count = 1;
}
}
else
UV c;
if (c1 != -1000) {
if (do_utf8)
- c = utf8_to_uvchr((U8*)PL_reginput, NULL);
+ c = utf8n_to_uvchr((U8*)PL_reginput,
+ UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
else
c = UCHARAT(PL_reginput);
/* If it could work, try it. */
while (n >= ln) {
if (c1 != -1000) {
if (do_utf8)
- c = utf8_to_uvchr((U8*)PL_reginput, NULL);
+ c = utf8n_to_uvchr((U8*)PL_reginput,
+ UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
else
c = UCHARAT(PL_reginput);
}
while (n >= ln) {
if (c1 != -1000) {
if (do_utf8)
- c = utf8_to_uvchr((U8*)PL_reginput, NULL);
+ c = utf8n_to_uvchr((U8*)PL_reginput,
+ UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
else
c = UCHARAT(PL_reginput);
}
STRLEN len = 0;
STRLEN plen;
- c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
+ c = do_utf8 ? utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
+ ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY) : *p;
plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
if (do_utf8 || (flags & ANYOF_UNICODE)) {