From: Nick Ing-Simmons Date: Sun, 18 Mar 2001 15:23:51 +0000 (+0000) Subject: Prefer !UTF8_IS_INVARIANT() over UTF8_IS_CONTINUED() when that X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=63cd067459124b5dc1d2ae98453df4ffdff11607;p=p5sagit%2Fp5-mst-13.2.git Prefer !UTF8_IS_INVARIANT() over UTF8_IS_CONTINUED() when that is the sense of the test being done. Avoid some magical 127 and 128 values by using macros. p4raw-id: //depot/perlio@9199 --- diff --git a/doop.c b/doop.c index bd66b42..e4a516a 100644 --- a/doop.c +++ b/doop.c @@ -317,7 +317,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ if (!isutf8) { U8 *t = s, *e = s + len; while (t < e) - if ((hibit = UTF8_IS_CONTINUED(*t++))) + if ((hibit = !UTF8_IS_INVARIANT(*t++))) break; if (hibit) s = bytes_to_utf8(s, &len); diff --git a/pp_ctl.c b/pp_ctl.c index 93b89b1..f08f8bf 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -551,7 +551,13 @@ PP(pp_formline) if (item_is_utf) { while (arg--) { if (UTF8_IS_CONTINUED(*s)) { - switch (UTF8SKIP(s)) { + STRLEN skip = UTF8SKIP(s); + switch (skip) { + default: + Move(s,t,skip,char); + s += skip; + t += skip; + break; case 7: *t++ = *s++; case 6: *t++ = *s++; case 5: *t++ = *s++; diff --git a/sv.c b/sv.c index 4d3181a..18c5ac9 100644 --- a/sv.c +++ b/sv.c @@ -2978,7 +2978,7 @@ Perl_sv_utf8_upgrade(pTHX_ register SV *sv) e = (U8 *) SvEND(sv); t = s; while (t < e) { - if ((hibit = UTF8_IS_CONTINUED(NATIVE_TO_ASCII(*t++)))) + if ((hibit = !UTF8_IS_INVARIANT(*t++))) break; } if (hibit) { @@ -3097,8 +3097,8 @@ bool Perl_sv_utf8_decode(pTHX_ register SV *sv) { if (SvPOK(sv)) { - char *c; - char *e; + U8 *c; + U8 *e; /* The octets may have got themselves encoded - get them back as bytes */ if (!sv_utf8_downgrade(sv, TRUE)) @@ -3107,12 +3107,12 @@ Perl_sv_utf8_decode(pTHX_ register SV *sv) /* it is actually just a matter of turning the utf8 flag on, but * we want to make sure everything inside is valid utf8 first. */ - c = SvPVX(sv); - if (!is_utf8_string((U8*)c, SvCUR(sv)+1)) + c = (U8 *) SvPVX(sv); + if (!is_utf8_string(c, SvCUR(sv)+1)) return FALSE; - e = SvEND(sv); + e = (U8 *) SvEND(sv); while (c < e) { - if (UTF8_IS_CONTINUED(*c++)) { + if (!UTF8_IS_INVARIANT(*c++)) { SvUTF8_on(sv); break; } @@ -7127,7 +7127,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV case 'c': uv = args ? va_arg(*args, int) : SvIVx(argsv); - if ((uv > 255 || (uv > 127 && SvUTF8(sv))) && !IN_BYTE) { + if ((uv > 255 || (!UTF8_IS_INVARIANT(uv) && SvUTF8(sv))) && !IN_BYTE) { eptr = (char*)utf8buf; elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf; is_utf = TRUE; diff --git a/toke.c b/toke.c index 3652c11..53159f3 100644 --- a/toke.c +++ b/toke.c @@ -1457,7 +1457,7 @@ S_scan_const(pTHX_ char *start) /* We need to map to chars to ASCII before doing the tests to cover EBCDIC */ - if (NATIVE_TO_UNI(uv) > 127) { + if (!UTF8_IS_INVARIANT(uv)) { if (!has_utf8 && uv > 255) { /* Might need to recode whatever we have * accumulated so far if it contains any @@ -1467,13 +1467,13 @@ S_scan_const(pTHX_ char *start) * this rescan? --jhi) */ int hicount = 0; - char *c; - for (c = SvPVX(sv); c < d; c++) { - if (UTF8_IS_CONTINUED(NATIVE_TO_ASCII(*c))) { + U8 *c; + for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) { + if (!UTF8_IS_INVARIANT(*c)) { hicount++; } } - if (hicount || NATIVE_TO_ASCII('A') != 'A') { + if (hicount) { STRLEN offset = d - SvPVX(sv); U8 *src, *dst; d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset; @@ -1481,13 +1481,13 @@ S_scan_const(pTHX_ char *start) dst = src+hicount; d += hicount; while (src >= (U8 *)SvPVX(sv)) { - U8 ch = NATIVE_TO_ASCII(*src); - if (UTF8_IS_CONTINUED(ch)) { + if (!UTF8_IS_INVARIANT(*src)) { + U8 ch = NATIVE_TO_ASCII(*src); *dst-- = UTF8_EIGHT_BIT_LO(ch); *dst-- = UTF8_EIGHT_BIT_HI(ch); } else { - *dst-- = ch; + *dst-- = *src; } src--; } @@ -1603,9 +1603,8 @@ S_scan_const(pTHX_ char *start) } /* end if (backslash) */ default_action: -#ifndef EBCDIC /* The 'has_utf8' here is very dubious */ - if (UTF8_IS_CONTINUED(NATIVE_TO_ASCII(*s)) && (this_utf8 || has_utf8)) { + if (!UTF8_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { STRLEN len = (STRLEN) -1; UV uv; if (this_utf8) { @@ -1630,7 +1629,6 @@ S_scan_const(pTHX_ char *start) } continue; } -#endif *d++ = NATIVE_TO_NEED(has_utf8,*s++); } /* while loop to process each character */ @@ -3726,7 +3724,7 @@ Perl_yylex(pTHX) missingterm((char*)0); yylval.ival = OP_CONST; for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { - if (*d == '$' || *d == '@' || *d == '\\' || UTF8_IS_CONTINUED(*d)) { + if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { yylval.ival = OP_STRINGIFY; break; } @@ -6698,7 +6696,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* after skipping whitespace, the next character is the terminator */ term = *s; - if (UTF8_IS_CONTINUED(term) && UTF) + if (!UTF8_IS_INVARIANT((U8)term) && UTF) has_utf8 = TRUE; /* mark where we are */ @@ -6745,7 +6743,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) have found the terminator */ else if (*s == term) break; - else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF) + else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; *to = *s; } @@ -6774,7 +6772,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) break; else if (*s == PL_multi_open) brackets++; - else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF) + else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; *to = *s; }