c = utf8_to_uv(s, send - s, &ulen, 0);
if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
matches++;
- if (ch < 0x80)
+ if (UTF8_IS_ASCII(ch))
*d++ = ch;
else
d = uv_to_utf8(d,ch);
if (!isutf8) {
U8 *t = s, *e = s + len;
while (t < e)
- if ((hibit = *t++ & 0x80))
+ if ((hibit = UTF8_IS_CONTINUED(*t++)))
break;
if (hibit)
s = bytes_to_utf8(s, &len);
if (!SvUTF8(sv)) {
U8 *t = s, *e = s + len;
while (t < e)
- if ((hibit = *t++ & 0x80))
+ if ((hibit = !UTF8_IS_ASCII(*t++)))
break;
if (hibit)
start = s = bytes_to_utf8(s, &len);
if (!isutf8) {
U8 *t = s, *e = s + len;
while (t < e)
- if ((hibit = *t++ & 0x80))
+ if ((hibit = !UTF8_IS_ASCII(*t++)))
break;
if (hibit)
s = bytes_to_utf8(s, &len);
if (!(PL_in_my == KEY_our ||
isALPHA(name[1]) ||
- (PL_hints & HINT_UTF8 && (name[1] & 0xc0) == 0xc0) ||
+ (PL_hints & HINT_UTF8 && UTF8_IS_START(name[1])) ||
(name[1] == '_' && (int)strlen(name) > 2)))
{
if (!isPRINT(name[1]) || strchr("\t\n\r\f", name[1])) {
else {
if (ckWARN(WARN_PARENTHESIS) && PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == ',') {
char *s;
- for (s = PL_bufptr; *s && (isALNUM(*s) || (*s & 0x80) || strchr("@$%, ",*s)); s++) ;
+ for (s = PL_bufptr; *s && (isALNUM(*s) || UTF8_IS_CONTINUED(*s) || strchr("@$%, ",*s)); s++) ;
if (*s == ';' || *s == '=')
Perl_warner(aTHX_ WARN_PARENTHESIS,
"Parentheses missing around \"%s\" list",
sv_setsv(TARG, sv);
*SvPV_force(TARG, len) = *s == '-' ? '+' : '-';
}
- else if (DO_UTF8(sv) && *(U8*)s >= 0xc0 && isIDFIRST_utf8((U8*)s)) {
+ else if (DO_UTF8(sv) && UTF8_IS_START(*s) && isIDFIRST_utf8((U8*)s)) {
sv_setpvn(TARG, "-", 1);
sv_catsv(TARG, sv);
}
(void)SvUPGRADE(TARG,SVt_PV);
- if ((value > 255 && !IN_BYTE) || (value & 0x80 && PL_hints & HINT_UTF8) ) {
+ if ((value > 255 && !IN_BYTE) ||
+ (UTF8_IS_CONTINUED(value) && (PL_hints & HINT_UTF8)) ) {
SvGROW(TARG, UTF8_MAXLEN+1);
tmps = SvPVX(TARG);
tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value);
register U8 *s;
STRLEN slen;
- if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
+ if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && UTF8_IS_START(*s)) {
STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tend;
register U8 *s;
STRLEN slen;
- if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
+ if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && UTF8_IS_START(*s)) {
STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tend;
d = SvPVX(TARG);
if (DO_UTF8(sv)) {
while (len) {
- if (*s & 0x80) {
+ if (UTF8_IS_CONTINUED(*s)) {
STRLEN ulen = UTF8SKIP(s);
if (ulen > len)
ulen = len;
while ((len > 0) && (s < strend)) {
auv = (auv << 7) | (*s & 0x7f);
- if (!(*s++ & 0x80)) {
+ if (UTF8_IS_ASCII(*s++)) {
bytes = 0;
sv = NEWSV(40, 0);
sv_setuv(sv, auv);
s = item;
if (item_is_utf) {
while (arg--) {
- if (*s & 0x80) {
+ if (UTF8_IS_CONTINUED(*s)) {
switch (UTF8SKIP(s)) {
case 7: *t++ = *s++;
case 6: *t++ = *s++;
!(ob=(SV*)GvIO(iogv)))
{
if (!packname ||
- ((*(U8*)packname >= 0xc0 && DO_UTF8(sv))
+ ((UTF8_IS_START(*packname) && DO_UTF8(sv))
? !isIDFIRST_utf8((U8*)packname)
: !isIDFIRST(*packname)
))
continue;
#endif
/* utf8 characters don't count as odd */
- if (*s & 0x40) {
+ if (UTF8_IS_START(*s)) {
int ulen = UTF8SKIP(s);
if (ulen < len - i) {
int j;
for (j = 1; j < ulen; j++) {
- if ((s[j] & 0xc0) != 0x80)
+ if (!UTF8_IS_CONTINUATION(s[j]))
goto not_utf8;
}
--ulen; /* loop does extra increment */
break;
default:
normal_default:
- if ((*p & 0xc0) == 0xc0 && UTF) {
+ if (UTF8_IS_START(*p) && UTF) {
ender = utf8_to_uv((U8*)p, RExC_end - p,
&numlen, 0);
p += numlen;
if (ISMULT2(p)) { /* Back off on ?+*. */
if (len)
p = oldp;
+ /* ender is a Unicode value so it can be > 0xff --
+ * in other words, do not use UTF8_IS_CONTINUED(). */
else if (ender >= 0x80 && UTF) {
reguni(pRExC_state, ender, s, &numlen);
s += numlen;
}
break;
}
+ /* ender is a Unicode value so it can be > 0xff --
+ * in other words, do not use UTF8_IS_CONTINUED(). */
if (ender >= 0x80 && UTF) {
reguni(pRExC_state, ender, s, &numlen);
s += numlen;
if (!nextchr)
sayNO;
if (do_utf8) {
- if (nextchr & 0x80) {
+ if (UTF8_IS_CONTINUED(nextchr)) {
if (!(OP(scan) == SPACE
? swash_fetch(PL_utf8_space, (U8*)locinput)
: isSPACE_LC_utf8((U8*)locinput)))
e = SvEND(sv);
t = s;
while (t < e) {
- if ((hibit = *t++ & 0x80))
+ if ((hibit = UTF8_IS_CONTINUED(*t++)))
break;
}
return FALSE;
e = SvEND(sv);
while (c < e) {
- if (*c++ & 0x80) {
+ if (UTF8_IS_CONTINUED(*c++)) {
SvUTF8_on(sv);
break;
}
int hicount = 0;
char *c;
for (c = SvPVX(sv); c < d; c++) {
- if (*c & 0x80)
+ if (UTF8_IS_CONTINUED(*c))
hicount++;
}
if (hicount) {
dst = d - 1;
while (src < dst) {
- if (*src & 0x80) {
+ if (UTF8_IS_CONTINUED(*src)) {
dst--;
uv_to_utf8((U8*)dst, (U8)*src--);
dst--;
/* (now in tr/// code again) */
- if (*s & 0x80 && (this_utf8 || has_utf8)) {
+ if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
STRLEN len = (STRLEN) -1;
UV uv;
if (this_utf8) {
missingterm((char*)0);
yylval.ival = OP_CONST;
for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
- if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
+ if (*d == '$' || *d == '@' || *d == '\\' || UTF8_IS_CONTINUED(*d)) {
yylval.ival = OP_STRINGIFY;
break;
}
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
+ else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
char *t = s + UTF8SKIP(s);
- while (*t & 0x80 && is_utf8_mark((U8*)t))
+ while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
t += UTF8SKIP(t);
if (d + (t - s) > e)
Perl_croak(aTHX_ ident_too_long);
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
+ else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
char *t = s + UTF8SKIP(s);
- while (*t & 0x80 && is_utf8_mark((U8*)t))
+ while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
t += UTF8SKIP(t);
if (d + (t - s) > e)
Perl_croak(aTHX_ ident_too_long);
e = s;
while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
e += UTF8SKIP(e);
- while (e < send && *e & 0x80 && is_utf8_mark((U8*)e))
+ while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
e += UTF8SKIP(e);
}
Copy(s, d, e - s, char);
/* after skipping whitespace, the next character is the terminator */
term = *s;
- if ((term & 0x80) && UTF)
+ if (UTF8_IS_CONTINUED(term) && UTF)
has_utf8 = TRUE;
/* mark where we are */
have found the terminator */
else if (*s == term)
break;
- else if (!has_utf8 && (*s & 0x80) && UTF)
+ else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
has_utf8 = TRUE;
*to = *s;
}
break;
else if (*s == PL_multi_open)
brackets++;
- else if (!has_utf8 && (*s & 0x80) && UTF)
+ else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
has_utf8 = TRUE;
*to = *s;
}