/* utf8.c
*
- * Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ * Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
* by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
#define PERL_IN_UTF8_C
#include "perl.h"
+#ifndef EBCDIC
+/* Separate prototypes needed because in ASCII systems these
+ * usually macros but they still are compiled as code, too. */
+PERL_CALLCONV UV Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags);
+PERL_CALLCONV U8* Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv);
+#endif
+
static const char unees[] =
"Malformed UTF-8 character (unexpected end of string)";
bool
Perl_is_utf8_string(pTHX_ const U8 *s, STRLEN len)
{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
const U8* x = s;
- const U8* send;
PERL_UNUSED_CONTEXT;
- if (!len)
- len = strlen((const char *)s);
- send = s + len;
while (x < send) {
STRLEN c;
bool
Perl_is_utf8_string_loclen(pTHX_ const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
const U8* x = s;
- const U8* send;
STRLEN c;
+ STRLEN outlen = 0;
PERL_UNUSED_CONTEXT;
- if (!len)
- len = strlen((const char *)s);
- send = s + len;
- if (el)
- *el = 0;
-
while (x < send) {
/* Inline the easy bits of is_utf8_char() here for speed... */
if (UTF8_IS_INVARIANT(*x))
goto out;
}
x += c;
- if (el)
- (*el)++;
+ outlen++;
}
out:
+ if (el)
+ *el = outlen;
+
if (ep)
*ep = x;
- if (x != send)
- return FALSE;
-
- return TRUE;
+ return (x == send);
}
/*
if (flags & UTF8_CHECK_ONLY) {
if (retlen)
- *retlen = -1;
+ *retlen = ((STRLEN) -1);
return 0;
}
{
dVAR;
STRLEN len = 0;
+ U8 t = 0;
/* Note: cannot use UTF8_IS_...() too eagerly here since e.g.
* the bitops (especially ~) can create illegal UTF-8.
if (e < s)
goto warn_and_return;
while (s < e) {
- const U8 t = UTF8SKIP(s);
+ t = UTF8SKIP(s);
if (e - s < t) {
warn_and_return:
if (ckWARN_d(WARN_UTF8)) {
updates len to contain the new length.
Returns zero on failure, setting C<len> to -1.
+If you need a copy of the string, see C<bytes_from_utf8>.
+
=cut
*/
if (!UTF8_IS_INVARIANT(c) &&
(!UTF8_IS_DOWNGRADEABLE_START(c) || (s >= send)
|| !(c = *s++) || !UTF8_IS_CONTINUATION(c))) {
- *len = -1;
+ *len = ((STRLEN) -1);
return 0;
}
}
}
}
- *is_utf8 = 0;
+ *is_utf8 = FALSE;
Newx(d, (*len) - count + 1, U8);
s = start; start = d;
}
if (bytelen & 1)
- Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %"UVf, (UV)bytelen);
+ Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %"UVuf, (UV)bytelen);
pend = p + bytelen;
UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
p += 2;
if (uv < 0x80) {
+#ifdef EBCDIC
+ *d++ = UNI_TO_NATIVE(uv);
+#else
*d++ = (U8)uv;
+#endif
continue;
}
if (uv < 0x800) {
if (special && (uv1 == 0xDF || uv1 > 0xFF)) {
/* It might be "special" (sometimes, but not always,
* a multicharacter mapping) */
- HV *hv;
+ HV * const hv = get_hv(special, FALSE);
SV **svp;
- if ((hv = get_hv(special, FALSE)) &&
+ if (hv &&
(svp = hv_fetch(hv, (const char*)tmpbuf, UNISKIP(uv1), FALSE)) &&
(*svp)) {
const char *s;
dSP;
const size_t pkg_len = strlen(pkg);
const size_t name_len = strlen(name);
- HV * const stash = gv_stashpvn(pkg, pkg_len, FALSE);
+ HV * const stash = gv_stashpvn(pkg, pkg_len, 0);
SV* errsv_save;
PUSHSTACKi(PERLSI_MAGIC);
if (!SvROK(retval) || SvTYPE(SvRV(retval)) != SVt_PVHV) {
if (SvPOK(retval))
Perl_croak(aTHX_ "Can't find Unicode property definition \"%"SVf"\"",
- (void*)retval);
+ SVfARG(retval));
Perl_croak(aTHX_ "SWASHNEW didn't return an HV ref");
}
return retval;
U32 bit;
SV *swatch;
U8 tmputf8[2];
- UV c = NATIVE_TO_ASCII(*ptr);
+ const UV c = NATIVE_TO_ASCII(*ptr);
if (!do_utf8 && !UNI_IS_INVARIANT(c)) {
tmputf8[0] = (U8)UTF8_EIGHT_BIT_HI(c);
return (tmps[off] << 24) + (tmps[off+1] << 16) + (tmps[off+2] << 8) + tmps[off + 3] ;
}
Perl_croak(aTHX_ "panic: swash_fetch got swatch of unexpected bit width");
+ NORETURN_FUNCTION_END;
}
/* Note:
l = (U8*)SvPV(*listsvp, lcur);
lend = l + lcur;
while (l < lend) {
- UV min, max, val, key;
+ UV min, max, val;
STRLEN numlen;
I32 flags = PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_DISALLOW_PREFIX;
continue;
if (octets) {
+ UV key;
if (min < start) {
if (!none || val < none) {
val += start - min;
}
}
else { /* bits == 1, then val should be ignored */
+ UV key;
if (min < start)
min = start;
for (key = min; key <= max; key++) {
U8 *s, *o, *nl;
STRLEN slen, olen;
- U8 opc = *x++;
+ const U8 opc = *x++;
if (opc == '\n')
continue;
else {
STRLEN otheroctets = otherbits >> 3;
STRLEN offset = 0;
- U8* send = s + slen;
+ U8* const send = s + slen;
while (s < send) {
UV otherval = 0;