#include "perl.h"
#if PERL_VERSION >= 9
-#define PERL_PACK_CAN_BYTEORDER
-#define PERL_PACK_CAN_SHRIEKSIGN
+# define PERL_PACK_CAN_BYTEORDER
+# define PERL_PACK_CAN_SHRIEKSIGN
+#endif
+
+#ifndef CHAR_BIT
+# define CHAR_BIT 8
#endif
/*
# define OFF32(p) ((char *) (p))
#endif
-#define COPY16(s,p) Copy(s, OFF16(p), SIZE16, char)
-#define COPY32(s,p) Copy(s, OFF32(p), SIZE32, char)
-#define CAT16(sv,p) sv_catpvn(sv, OFF16(p), SIZE16)
-#define CAT32(sv,p) sv_catpvn(sv, OFF32(p), SIZE32)
+/* Only to be used inside a loop (see the break) */
+#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
+ if (utf8) { \
+ if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
+ } else { \
+ Copy(s, OFF16(p), SIZE16, char); \
+ (s) += SIZE16; \
+ } \
+} STMT_END
+
+/* Only to be used inside a loop (see the break) */
+#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
+ if (utf8) { \
+ if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
+ } else { \
+ Copy(s, OFF32(p), SIZE32, char); \
+ (s) += SIZE32; \
+ } \
+} STMT_END
+
+#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
+#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
/* Only to be used inside a loop (see the break) */
-#define COPYVAR(s,strend,utf8,var,format) \
+#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
STMT_START { \
if (utf8) { \
- if (!next_uni_bytes(aTHX_ &s, strend, \
- (char *) &var, sizeof(var))) break; \
+ if (!uni_to_bytes(aTHX_ &s, strend, \
+ (char *) &var, sizeof(var), datumtype)) break;\
} else { \
Copy(s, (char *) &var, sizeof(var), char); \
s += sizeof(var); \
} \
- DO_BO_UNPACK(var, format); \
} STMT_END
+#define PUSH_VAR(utf8, aptr, var) \
+ PUSH_BYTES(utf8, aptr, (char *) &(var), sizeof(var))
+
/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
#define MAX_SUB_TEMPLATE_LEVEL 100
/* flags (note that type modifiers can also be used as flags!) */
-#define FLAG_UNPACK_WAS_UTF8 0x40 /* original had FLAG_UNPACK_DO_UTF8 */
-#define FLAG_UNPACK_PARSE_UTF8 0x20 /* Parse as utf8 */
+#define FLAG_WAS_UTF8 0x40
+#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
#define FLAG_UNPACK_ONLY_ONE 0x10
-#define FLAG_UNPACK_DO_UTF8 0x08 /* The underlying string is utf8 */
+#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
#define FLAG_SLASH 0x04
#define FLAG_COMMA 0x02
#define FLAG_PACK 0x01
#define TYPE_IS_SHRIEKING 0x100
#define TYPE_IS_BIG_ENDIAN 0x200
#define TYPE_IS_LITTLE_ENDIAN 0x400
+#define TYPE_IS_PACK 0x800
#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
# define DO_BO_UNPACK_P(var)
# define DO_BO_PACK_P(var)
-#else
+#else /* PERL_PACK_CAN_BYTEORDER */
# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
# endif
-#endif
+#endif /* PERL_PACK_CAN_BYTEORDER */
#define PACK_SIZE_CANNOT_CSUM 0x80
-#define PACK_SIZE_SPARE 0x40
+#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
#define PACK_SIZE_MASK 0x3F
0,
/* S */ SIZE16,
0,
- /* U */ sizeof(char),
+ /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
/* V */ SIZE32,
- /* W */ sizeof(unsigned char),
+ /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* c */ sizeof(char),
/* d */ sizeof(double),
/* s */ SIZE16,
0, 0,
/* v */ SIZE16,
- /* w */ sizeof(char) | PACK_SIZE_CANNOT_CSUM,
+ /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
};
unsigned char size_shrieking[46] = {
/* I */ sizeof(unsigned int),
/* s */ SIZE16,
0, 0,
/* v */ SIZE16,
- /* w */ sizeof(char) | PACK_SIZE_CANNOT_CSUM,
+ /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0,
/* C */ sizeof(unsigned char),
0, 0, 0, 0, 0, 0, 0, 0, 0,
/* S */ SIZE16,
0,
- /* U */ sizeof(char),
+ /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
/* V */ SIZE32,
- /* W */ sizeof(unsigned char),
+ /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
};
unsigned char size_shrieking[93] = {
/* i */ sizeof(int),
#endif
STATIC U8
-next_uni_byte(pTHX_ char **s, const char *end, I32 datumtype)
+uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
{
UV val;
STRLEN retlen;
- val =
- UNI_TO_NATIVE(utf8n_to_uvuni((U8*)*s, end-*s, &retlen,
- ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY));
+ val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
+ ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
/* We try to process malformed UTF-8 as much as possible (preferrably with
warnings), but these two mean we make no progress in the string and
might enter an infinite loop */
if (retlen == (STRLEN) -1 || retlen == 0)
- Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+ Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
+ (int) TYPE_NO_MODIFIERS(datumtype));
if (val >= 0x100) {
+ if (ckWARN(WARN_UNPACK))
Perl_warner(aTHX_ packWARN(WARN_UNPACK),
"Character in '%c' format wrapped in unpack",
- (int) datumtype);
+ (int) TYPE_NO_MODIFIERS(datumtype));
val &= 0xff;
}
*s += retlen;
return val;
}
-#define NEXT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
- next_uni_byte(aTHX_ &(s), (strend), (datumtype)) : \
+#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
+ uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
*(U8 *)(s)++)
STATIC bool
-next_uni_bytes(pTHX_ char **s, char *end, char *buf, int buf_len)
+uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
{
UV val;
STRLEN retlen;
UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
for (;buf_len > 0; buf_len--) {
if (from >= end) return FALSE;
- val = UNI_TO_NATIVE(utf8n_to_uvuni((U8*)from, end-from, &retlen, flags));
+ val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
if (retlen == (STRLEN) -1 || retlen == 0) {
from += UTF8SKIP(from);
bad |= 1;
flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
if (ptr >= end) break;
- utf8n_to_uvuni((U8*)ptr, end-ptr, &retlen, flags);
+ utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
}
if (from > end) from = end;
}
if ((bad & 2) && ckWARN(WARN_UNPACK))
- Perl_warner(aTHX_ packWARN(WARN_UNPACK),
- "Character(s) wrapped in unpack");
+ Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
+ WARN_PACK : WARN_UNPACK),
+ "Character(s) in '%c' format wrapped in %s",
+ (int) TYPE_NO_MODIFIERS(datumtype),
+ datumtype & TYPE_IS_PACK ? "pack" : "unpack");
}
*s = from;
return TRUE;
{
UV val;
STRLEN retlen;
- char *from = *s;
- val = UNI_TO_NATIVE(utf8n_to_uvuni((U8*)*s, end-*s, &retlen, UTF8_CHECK_ONLY));
+ val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
if (val >= 0x100 || !ISUUCHAR(val) ||
retlen == (STRLEN) -1 || retlen == 0) {
*out = 0;
return FALSE;
}
*out = PL_uudmap[val] & 077;
- *s = from;
+ *s += retlen;
return TRUE;
}
+STATIC void
+bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
+ U8 buffer[UTF8_MAXLEN];
+ U8 *end = start + len;
+ char *d = *dest;
+ while (start < end) {
+ int length =
+ uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
+ switch(length) {
+ case 1:
+ *d++ = buffer[0];
+ break;
+ case 2:
+ *d++ = buffer[0];
+ *d++ = buffer[1];
+ break;
+ default:
+ Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
+ *start, length);
+ }
+ start++;
+ }
+ *dest = d;
+}
+
+#define PUSH_BYTES(utf8, cur, buf, len) \
+STMT_START { \
+ if (utf8) bytes_to_uni(aTHX_ buf, len, &(cur)); \
+ else { \
+ Copy(buf, cur, len, char); \
+ (cur) += (len); \
+ } \
+} STMT_END
+
+#define GROWING(utf8, cat, start, cur, in_len) \
+STMT_START { \
+ STRLEN glen = (in_len); \
+ if (utf8) glen *= 2; \
+ if ((cur) + glen >= (start) + SvLEN(cat)) { \
+ (start) = sv_exp_grow(aTHX_ cat, glen); \
+ (cur) = (start) + SvCUR(cat); \
+ } \
+} STMT_END
+
+#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
+STMT_START { \
+ STRLEN glen = (in_len); \
+ STRLEN gl = glen; \
+ if (utf8) gl *= 2; \
+ if ((cur) + gl >= (start) + SvLEN(cat)) { \
+ *cur = '\0'; \
+ SvCUR(cat) = (cur) - (start); \
+ (start) = sv_exp_grow(aTHX_ cat, gl); \
+ (cur) = (start) + SvCUR(cat); \
+ } \
+ PUSH_BYTES(utf8, cur, buf, glen); \
+} STMT_END
+
+#define PUSH_BYTE(utf8, s, byte) \
+STMT_START { \
+ if (utf8) { \
+ U8 au8 = (byte); \
+ bytes_to_uni(aTHX_ &au8, 1, &(s)); \
+ } else *(U8 *)(s)++ = (byte); \
+} STMT_END
+
+/* Only to be used inside a loop (see the break) */
+#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
+STMT_START { \
+ STRLEN retlen; \
+ if (str >= end) break; \
+ val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
+ if (retlen == (STRLEN) -1 || retlen == 0) { \
+ *cur = '\0'; \
+ Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
+ } \
+ str += retlen; \
+} STMT_END
+
/* Returns the sizeof() struct described by pat */
STATIC I32
-S_measure_struct(pTHX_ register tempsym_t* symptr)
+S_measure_struct(pTHX_ tempsym_t* symptr)
{
- register I32 len = 0;
- register I32 total = 0;
- int star;
-
- register int size;
+ I32 total = 0;
while (next_symbol(symptr)) {
- int which = (symptr->code & TYPE_IS_SHRIEKING)
- ? PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
- int offset
- = TYPE_NO_MODIFIERS(symptr->code) - packsize[which].first;
-
- switch( symptr->howlen ){
- case e_no_len:
- case e_number:
- len = symptr->length;
- break;
+ I32 len;
+ int star, size;
+ int which = (symptr->code & TYPE_IS_SHRIEKING) ?
+ PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
+ int offset = TYPE_NO_MODIFIERS(symptr->code) - packsize[which].first;
+
+ switch (symptr->howlen) {
case e_star:
Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
symptr->flags & FLAG_PACK ? "pack" : "unpack" );
break;
+ default:
+ /* e_no_len and e_number */
+ len = symptr->length;
+ break;
}
if ((offset >= 0) && (offset < packsize[which].size))
* locates next template code and count
*/
STATIC bool
-S_next_symbol(pTHX_ register tempsym_t* symptr )
+S_next_symbol(pTHX_ tempsym_t* symptr )
{
- register char* patptr = symptr->patptr;
- register char* patend = symptr->patend;
+ char* patptr = symptr->patptr;
+ char* patend = symptr->patend;
+ const char *allowed = "";
symptr->flags &= ~FLAG_SLASH;
/* look for modifiers */
while (patptr < patend) {
- const char *allowed;
I32 modifier = 0;
switch (*patptr) {
case '!':
modifier = TYPE_IS_LITTLE_ENDIAN;
allowed = ENDIANNESS_ALLOWED_TYPES;
break;
-#endif
+#endif /* PERL_PACK_CAN_BYTEORDER */
default:
break;
}
=cut */
I32
-Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
+Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
{
tempsym_t sym = { 0 };
- if (flags & FLAG_UNPACK_DO_UTF8) flags |= FLAG_UNPACK_WAS_UTF8;
+ if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
else if (need_utf8(pat, patend)) {
/* We probably should try to avoid this in case a scalar context call
wouldn't get to the "U0" */
STRLEN len = strend - s;
- s = (char*)bytes_to_utf8((U8*)s, &len);
+ s = (char *) bytes_to_utf8(s, &len);
SAVEFREEPV(s);
strend = s + len;
- flags |= FLAG_UNPACK_DO_UTF8;
+ flags |= FLAG_DO_UTF8;
}
- if (first_symbol(pat, patend) != 'U' && (flags & FLAG_UNPACK_DO_UTF8))
- flags |= FLAG_UNPACK_PARSE_UTF8;
+ if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
+ flags |= FLAG_PARSE_UTF8;
sym.patptr = pat;
sym.patend = patend;
=cut */
I32
-Perl_unpackstring(pTHX_ char *pat, register char *patend, register char *s, char *strend, U32 flags)
+Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
{
tempsym_t sym = { 0 };
- if (flags & FLAG_UNPACK_DO_UTF8) flags |= FLAG_UNPACK_WAS_UTF8;
+ if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
else if (need_utf8(pat, patend)) {
/* We probably should try to avoid this in case a scalar context call
wouldn't get to the "U0" */
STRLEN len = strend - s;
- s = (char*)bytes_to_utf8((U8*)s, &len);
+ s = (char *) bytes_to_utf8(s, &len);
SAVEFREEPV(s);
strend = s + len;
- flags |= FLAG_UNPACK_DO_UTF8;
+ flags |= FLAG_DO_UTF8;
}
- if (first_symbol(pat, patend) != 'U' && (flags & FLAG_UNPACK_DO_UTF8))
- flags |= FLAG_UNPACK_PARSE_UTF8;
+ if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
+ flags |= FLAG_PARSE_UTF8;
sym.patptr = pat;
sym.patend = patend;
S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
{
dSP;
- I32 datumtype, ai32;
- I32 len = 0;
SV *sv;
I32 start_sp_offset = SP - PL_stack_base;
howlen_t howlen;
I32 checksum = 0;
UV cuv = 0;
NV cdouble = 0.0;
- const int bits_in_uv = 8 * sizeof(cuv);
+ const int bits_in_uv = CHAR_BIT * sizeof(cuv);
char* strrelbeg = s;
bool beyond = FALSE;
bool explicit_length;
bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
- bool utf8 = (symptr->flags & FLAG_UNPACK_PARSE_UTF8) ? 1 : 0;
+ bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
while (next_symbol(symptr)) {
- datumtype = symptr->code;
+ I32 len, ai32;
+ I32 datumtype = symptr->code;
/* do first one only unless in list context
/ is implemented by unpacking the count, then popping it from the
stack, so must check that we're not in the middle of a / */
&& (datumtype != '/') ) /* XXX can this be omitted */
break;
- switch( howlen = symptr->howlen ){
- case e_no_len:
- case e_number:
- len = symptr->length;
- break;
+ switch (howlen = symptr->howlen) {
case e_star:
len = strend - strbeg; /* long enough */
break;
+ default:
+ /* e_no_len and e_number */
+ len = symptr->length;
+ break;
}
explicit_length = TRUE;
redo_switch:
beyond = s >= strend;
{
- int which = (symptr->code & TYPE_IS_SHRIEKING)
- ? PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
+ struct packsize_t *pack_props =
+ &packsize[(symptr->code & TYPE_IS_SHRIEKING) ?
+ PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL];
const int rawtype = TYPE_NO_MODIFIERS(datumtype);
- int offset = rawtype - packsize[which].first;
+ int offset = rawtype - pack_props->first;
- if (offset >= 0 && offset < packsize[which].size) {
+ if (offset >= 0 && offset < pack_props->size) {
/* Data about this template letter */
- unsigned char data = packsize[which].array[offset];
+ unsigned char data = pack_props->array[offset];
if (data) {
/* data nonzero means we can process this letter. */
PUTBACK;
while (len--) {
symptr->patptr = savsym.grpbeg;
- if (utf8) symptr->flags |= FLAG_UNPACK_PARSE_UTF8;
- else symptr->flags &= ~FLAG_UNPACK_PARSE_UTF8;
+ if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
+ else symptr->flags &= ~FLAG_PARSE_UTF8;
unpack_rec(symptr, s, strbeg, strend, &s);
if (s == strend && savsym.howlen == e_star)
break; /* No way to continue */
len = 1;
if (utf8) {
char *hop, *last;
- I32 l;
- for (l=len, hop = strbeg; hop < s; l++, hop += UTF8SKIP(hop))
- if (l == len) {
+ I32 l = len;
+ hop = last = strbeg;
+ while (hop < s) {
+ hop += UTF8SKIP(hop);
+ if (--l == 0) {
last = hop;
- l = 0;
+ l = len;
+ }
}
+ if (last > s)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
s = last;
break;
- } else len = (s - strbeg) % len;
+ }
+ len = (s - strbeg) % len;
/* FALL THROUGH */
case 'X':
if (utf8) {
while (len > 0) {
if (s <= strbeg)
Perl_croak(aTHX_ "'X' outside of string in unpack");
- while (UTF8_IS_CONTINUATION(*--s)) {
+ while (--s, UTF8_IS_CONTINUATION(*s)) {
if (s <= strbeg)
Perl_croak(aTHX_ "'X' outside of string in unpack");
}
case 'x' | TYPE_IS_SHRIEKING:
if (!len) /* Avoid division by 0 */
len = 1;
- if (utf8) {
- char *hop = strbeg;
- I32 l = 0;
- for (hop = strbeg; hop < s; hop += UTF8SKIP(hop)) l++;
- if (s != hop)
- Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
- ai32 = l % len;
- } else ai32 = (s - strbeg) % len;
+ if (utf8) ai32 = utf8_length(strbeg, s) % len;
+ else ai32 = (s - strbeg) % len;
if (ai32 == 0) break;
len -= ai32;
/* FALL THROUGH */
if (len > strend - s)
Perl_croak(aTHX_ "'x' outside of string in unpack");
s += len;
- };
+ }
break;
case '/':
Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
if (datumtype == 'Z') {
/* 'Z' strips stuff after first null */
- char *ptr;
- for (ptr = s; ptr < strend; ptr++) if (*ptr == 0) break;
+ char *ptr, *end;
+ end = s + len;
+ for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
sv = newSVpvn(s, ptr-s);
if (howlen == e_star) /* exact for 'Z*' */
len = ptr-s + (ptr != strend ? 1 : 0);
if (utf8) {
SvUTF8_on(sv);
/* Undo any upgrade done due to need_utf8() */
- if (!(symptr->flags & FLAG_UNPACK_WAS_UTF8))
+ if (!(symptr->flags & FLAG_WAS_UTF8))
sv_utf8_downgrade(sv, 0);
}
XPUSHs(sv_2mortal(sv));
if (bits & 128) PL_bitcount[bits]++;
}
}
- if (utf8) {
+ if (utf8)
while (len >= 8 && s < strend) {
- cuv += PL_bitcount[next_uni_byte(aTHX_ &s, strend, datumtype)];
+ cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
len -= 8;
}
- } else {
+ else
while (len >= 8) {
cuv += PL_bitcount[*(U8 *)s++];
len -= 8;
}
- }
if (len && s < strend) {
U8 bits;
- bits = NEXT_BYTE(utf8, s, strend, datumtype);
- if (datumtype == 'b') {
+ bits = SHIFT_BYTE(utf8, s, strend, datumtype);
+ if (datumtype == 'b')
while (len-- > 0) {
if (bits & 1) cuv++;
bits >>= 1;
}
- } else {
+ else
while (len-- > 0) {
if (bits & 0x80) cuv++;
bits <<= 1;
}
}
- }
break;
}
SvPOK_on(sv);
str = SvPVX(sv);
if (datumtype == 'b') {
- U8 bits;
+ U8 bits = 0;
ai32 = len;
for (len = 0; len < ai32; len++) {
if (len & 7) bits >>= 1;
else if (utf8) {
if (s >= strend) break;
- bits = next_uni_byte(aTHX_ &s, strend, datumtype);
+ bits = uni_to_byte(aTHX_ &s, strend, datumtype);
} else bits = *(U8 *) s++;
*str++ = bits & 1 ? '1' : '0';
}
} else {
- U8 bits;
+ U8 bits = 0;
ai32 = len;
for (len = 0; len < ai32; len++) {
if (len & 7) bits <<= 1;
else if (utf8) {
if (s >= strend) break;
- bits = next_uni_byte(aTHX_ &s, strend, datumtype);
+ bits = uni_to_byte(aTHX_ &s, strend, datumtype);
} else bits = *(U8 *) s++;
*str++ = bits & 0x80 ? '1' : '0';
}
SvPOK_on(sv);
str = SvPVX(sv);
if (datumtype == 'h') {
- U8 bits;
+ U8 bits = 0;
ai32 = len;
for (len = 0; len < ai32; len++) {
if (len & 1) bits >>= 4;
else if (utf8) {
if (s >= strend) break;
- bits = next_uni_byte(aTHX_ &s, strend, datumtype);
+ bits = uni_to_byte(aTHX_ &s, strend, datumtype);
} else bits = * (U8 *) s++;
*str++ = PL_hexdigit[bits & 15];
}
} else {
- U8 bits;
+ U8 bits = 0;
ai32 = len;
for (len = 0; len < ai32; len++) {
if (len & 1) bits <<= 4;
else if (utf8) {
if (s >= strend) break;
- bits = next_uni_byte(aTHX_ &s, strend, datumtype);
+ bits = uni_to_byte(aTHX_ &s, strend, datumtype);
} else bits = *(U8 *) s++;
*str++ = PL_hexdigit[(bits >> 4) & 15];
}
}
case 'c':
while (len-- > 0) {
- int aint = NEXT_BYTE(utf8, s, strend, datumtype);
+ int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
if (aint >= 128) /* fake up signed chars */
aint -= 256;
if (!checksum)
if (len == 0) {
if (explicit_length && datumtype == 'C')
/* Switch to "character" mode */
- utf8 = (symptr->flags & FLAG_UNPACK_DO_UTF8) ? 1 : 0;
+ utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
break;
}
if (datumtype == 'C' ?
- (symptr->flags & FLAG_UNPACK_DO_UTF8) &&
- !(symptr->flags & FLAG_UNPACK_WAS_UTF8) : utf8) {
+ (symptr->flags & FLAG_DO_UTF8) &&
+ !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
while (len-- > 0 && s < strend) {
UV val;
STRLEN retlen;
- val =
- UNI_TO_NATIVE(utf8n_to_uvuni((U8*)s, strend-s, &retlen,
- ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY));
+ val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
+ ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
if (retlen == (STRLEN) -1 || retlen == 0)
Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
s += retlen;
if (len == 0) {
if (explicit_length) {
/* Switch to "bytes in UTF-8" mode */
- if (symptr->flags & FLAG_UNPACK_DO_UTF8) utf8 = 0;
+ if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
else
/* Should be impossible due to the need_utf8() test */
Perl_croak(aTHX_ "U0 mode on a byte string");
ptr = s;
/* Bug: warns about bad utf8 even if we are short on bytes
and will break out of the loop */
- if (!next_uni_bytes(aTHX_ &ptr, strend, (char*)result, 1))
+ if (!uni_to_bytes(aTHX_ &ptr, strend, result, 1, 'U'))
break;
len = UTF8SKIP(result);
- if (!next_uni_bytes(aTHX_ &ptr, strend, (char*)&result[1], len-1))
- break;
+ if (!uni_to_bytes(aTHX_ &ptr, strend,
+ &result[1], len-1, 'U')) break;
auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
s = ptr;
} else {
#if SHORTSIZE != SIZE16
while (len-- > 0) {
short ashort;
- COPYVAR(s, strend, utf8, ashort, s);
+ SHIFT_VAR(utf8, s, strend, ashort, datumtype);
+ DO_BO_UNPACK(ashort, s);
if (!checksum)
PUSHs(sv_2mortal(newSViv((IV)ashort)));
else if (checksum > bits_in_uv)
#if U16SIZE > SIZE16
ai16 = 0;
#endif
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend,
- OFF16(&ai16), SIZE16)) break;
- } else {
- COPY16(s, &ai16);
- s += SIZE16;
- }
+ SHIFT16(utf8, s, strend, &ai16, datumtype);
DO_BO_UNPACK(ai16, 16);
#if U16SIZE > SIZE16
if (ai16 > 32767)
#if SHORTSIZE != SIZE16
while (len-- > 0) {
unsigned short aushort;
- COPYVAR(s, strend, utf8, aushort, s);
+ SHIFT_VAR(utf8, s, strend, aushort, datumtype);
+ DO_BO_UNPACK(aushort, s);
if (!checksum)
PUSHs(sv_2mortal(newSVuv((UV) aushort)));
else if (checksum > bits_in_uv)
#if U16SIZE > SIZE16
au16 = 0;
#endif
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend,
- OFF16(&au16), SIZE16)) break;
- } else {
- COPY16(s, &au16);
- s += SIZE16;
- }
+ SHIFT16(utf8, s, strend, &au16, datumtype);
DO_BO_UNPACK(au16, 16);
#ifdef HAS_NTOHS
if (datumtype == 'n')
if (!checksum)
PUSHs(sv_2mortal(newSVuv((UV)au16)));
else if (checksum > bits_in_uv)
- cdouble += (NV)au16;
+ cdouble += (NV) au16;
else
cuv += au16;
}
# if U16SIZE > SIZE16
ai16 = 0;
# endif
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend,
- (char *) &ai16, sizeof(ai16))) break;
- } else {
- COPY16(s, &ai16);
- s += SIZE16;
- }
+ SHIFT16(utf8, s, strend, &ai16, datumtype);
# ifdef HAS_NTOHS
if (datumtype == ('n' | TYPE_IS_SHRIEKING))
ai16 = (I16) PerlSock_ntohs((U16) ai16);
case 'i' | TYPE_IS_SHRIEKING:
while (len-- > 0) {
int aint;
- COPYVAR(s, strend, utf8, aint, i);
+ SHIFT_VAR(utf8, s, strend, aint, datumtype);
+ DO_BO_UNPACK(aint, i);
if (!checksum)
PUSHs(sv_2mortal(newSViv((IV)aint)));
else if (checksum > bits_in_uv)
case 'I' | TYPE_IS_SHRIEKING:
while (len-- > 0) {
unsigned int auint;
- COPYVAR(s, strend, utf8, auint, i);
+ SHIFT_VAR(utf8, s, strend, auint, datumtype);
+ DO_BO_UNPACK(auint, i);
if (!checksum)
PUSHs(sv_2mortal(newSVuv((UV)auint)));
else if (checksum > bits_in_uv)
case 'j':
while (len-- > 0) {
IV aiv;
+ SHIFT_VAR(utf8, s, strend, aiv, datumtype);
#if IVSIZE == INTSIZE
- COPYVAR(s, strend, utf8, aiv, i);
+ DO_BO_UNPACK(aiv, i);
#elif IVSIZE == LONGSIZE
- COPYVAR(s, strend, utf8, aiv, l);
+ DO_BO_UNPACK(aiv, l);
#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
- COPYVAR(s, strend, utf8, aiv, 64);
+ DO_BO_UNPACK(aiv, 64);
#else
Perl_croak(aTHX_ "'j' not supported on this platform");
#endif
case 'J':
while (len-- > 0) {
UV auv;
+ SHIFT_VAR(utf8, s, strend, auv, datumtype);
#if IVSIZE == INTSIZE
- COPYVAR(s, strend, utf8, auv, i);
+ DO_BO_UNPACK(auv, i);
#elif IVSIZE == LONGSIZE
- COPYVAR(s, strend, utf8, auv, l);
+ DO_BO_UNPACK(auv, l);
#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
- COPYVAR(s, strend, utf8, auv, 64);
+ DO_BO_UNPACK(auv, 64);
#else
Perl_croak(aTHX_ "'J' not supported on this platform");
#endif
#if LONGSIZE != SIZE32
while (len-- > 0) {
long along;
- COPYVAR(s, strend, utf8, along, l);
+ SHIFT_VAR(utf8, s, strend, along, datumtype);
+ DO_BO_UNPACK(along, l);
if (!checksum)
PUSHs(sv_2mortal(newSViv((IV)along)));
else if (checksum > bits_in_uv)
#if U32SIZE > SIZE32
ai32 = 0;
#endif
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend,
- OFF32(&ai32), SIZE32)) break;
- } else {
- COPY32(s, &ai32);
- s += SIZE32;
- }
+ SHIFT32(utf8, s, strend, &ai32, datumtype);
DO_BO_UNPACK(ai32, 32);
#if U32SIZE > SIZE32
if (ai32 > 2147483647) ai32 -= 4294967296;
#if LONGSIZE != SIZE32
while (len-- > 0) {
unsigned long aulong;
- COPYVAR(s, strend, utf8, aulong, l);
+ SHIFT_VAR(utf8, s, strend, aulong, datumtype);
+ DO_BO_UNPACK(aulong, l);
if (!checksum)
PUSHs(sv_2mortal(newSVuv((UV)aulong)));
else if (checksum > bits_in_uv)
#if U32SIZE > SIZE32
au32 = 0;
#endif
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend,
- OFF32(&au32), SIZE32)) break;
- } else {
- COPY32(s, &au32);
- s += SIZE32;
- }
+ SHIFT32(utf8, s, strend, &au32, datumtype);
DO_BO_UNPACK(au32, 32);
#ifdef HAS_NTOHL
if (datumtype == 'N')
# if U32SIZE > SIZE32
ai32 = 0;
# endif
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend,
- OFF32(&ai32), SIZE32)) break;
- } else {
- COPY32(s, &ai32);
- s += SIZE32;
- }
+ SHIFT32(utf8, s, strend, &ai32, datumtype);
# ifdef HAS_NTOHL
if (datumtype == ('N' | TYPE_IS_SHRIEKING))
ai32 = (I32)PerlSock_ntohl((U32)ai32);
case 'p':
while (len-- > 0) {
char *aptr;
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend,
- (char *) &aptr, sizeof(aptr))) break;
- } else {
- Copy(s, &aptr, 1, char*);
- s += sizeof(aptr);
- }
+ SHIFT_VAR(utf8, s, strend, aptr, datumtype);
DO_BO_UNPACK_P(aptr);
/* newSVpv generates undef if aptr is NULL */
PUSHs(sv_2mortal(newSVpv(aptr, 0)));
while (len > 0 && s < strend) {
U8 ch;
- ch = NEXT_BYTE(utf8, s, strend, 'w');
+ ch = SHIFT_BYTE(utf8, s, strend, datumtype);
auv = (auv << 7) | (ch & 0x7f);
/* UTF8_IS_XXXXX not right here - using constant 0x80 */
if (ch < 0x80) {
sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
while (s < strend) {
- ch = NEXT_BYTE(utf8, s, strend, 'w');
+ ch = SHIFT_BYTE(utf8, s, strend, datumtype);
sv = mul128(sv, (U8)(ch & 0x7f));
if (!(ch & 0x80)) {
bytes = 0;
EXTEND(SP, 1);
if (sizeof(char*) <= strend - s) {
char *aptr;
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend, (char *) &aptr,
- sizeof(aptr))) break;
- } else {
- Copy(s, &aptr, 1, char*);
- s += sizeof(aptr);
- }
+ SHIFT_VAR(utf8, s, strend, aptr, datumtype);
DO_BO_UNPACK_P(aptr);
/* newSVpvn generates undef if aptr is NULL */
PUSHs(sv_2mortal(newSVpvn(aptr, len)));
case 'q':
while (len-- > 0) {
Quad_t aquad;
- COPYVAR(s, strend, utf8, aquad, 64);
+ SHIFT_VAR(utf8, s, strend, aquad, datumtype);
+ DO_BO_UNPACK(aquad, 64);
if (!checksum)
PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
newSViv((IV)aquad) : newSVnv((NV)aquad)));
case 'Q':
while (len-- > 0) {
Uquad_t auquad;
- COPYVAR(s, strend, utf8, auquad, 64);
+ SHIFT_VAR(utf8, s, strend, auquad, datumtype);
+ DO_BO_UNPACK(auquad, 64);
if (!checksum)
PUSHs(sv_2mortal(auquad <= UV_MAX ?
newSVuv((UV)auquad):newSVnv((NV)auquad)));
case 'f':
while (len-- > 0) {
float afloat;
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend, (char *) &afloat,
- sizeof(afloat))) break;
- } else {
- Copy(s, &afloat, 1, float);
- s += sizeof(float);
- }
+ SHIFT_VAR(utf8, s, strend, afloat, datumtype);
DO_BO_UNPACK_N(afloat, float);
if (!checksum)
PUSHs(sv_2mortal(newSVnv((NV)afloat)));
case 'd':
while (len-- > 0) {
double adouble;
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend, (char *) &adouble,
- sizeof(adouble))) break;
- } else {
- Copy(s, &adouble, 1, double);
- s += sizeof(double);
- }
+ SHIFT_VAR(utf8, s, strend, adouble, datumtype);
DO_BO_UNPACK_N(adouble, double);
if (!checksum)
PUSHs(sv_2mortal(newSVnv((NV)adouble)));
case 'F':
while (len-- > 0) {
NV anv;
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend,
- (char *) &anv, sizeof(anv))) break;
- } else {
- Copy(s, &anv, 1, NV);
- s += NVSIZE;
- }
+ SHIFT_VAR(utf8, s, strend, anv, datumtype);
DO_BO_UNPACK_N(anv, NV);
if (!checksum)
PUSHs(sv_2mortal(newSVnv(anv)));
case 'D':
while (len-- > 0) {
long double aldouble;
- if (utf8) {
- if (!next_uni_bytes(aTHX_ &s, strend, (char *) &aldouble,
- sizeof(aldouble))) break;
- } else {
- Copy(s, &aldouble, 1, long double);
- s += LONG_DOUBLESIZE;
- }
+ SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
DO_BO_UNPACK_N(aldouble, long double);
if (!checksum)
PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
PUTBACK;
cnt = unpackstring(pat, patend, s, strend,
((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
- | (DO_UTF8(right) ? FLAG_UNPACK_DO_UTF8 : 0));
+ | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
SPAGAIN;
if ( !cnt && gimme == G_SCALAR )
RETURN;
}
-STATIC void
-S_doencodes(pTHX_ register SV *sv, register char *s, register I32 len)
+STATIC U8 *
+doencodes(U8 *h, char *s, I32 len)
{
- char hunk[5];
-
- *hunk = PL_uuemap[len];
- sv_catpvn(sv, hunk, 1);
- hunk[4] = '\0';
+ *h++ = PL_uuemap[len];
while (len > 2) {
- hunk[0] = PL_uuemap[(077 & (*s >> 2))];
- hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017)))];
- hunk[2] = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
- hunk[3] = PL_uuemap[(077 & (s[2] & 077))];
- sv_catpvn(sv, hunk, 4);
+ *h++ = PL_uuemap[(077 & (s[0] >> 2))];
+ *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
+ *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
+ *h++ = PL_uuemap[(077 & (s[2] & 077))];
s += 3;
len -= 3;
}
if (len > 0) {
char r = (len > 1 ? s[1] : '\0');
- hunk[0] = PL_uuemap[(077 & (*s >> 2))];
- hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((r >> 4) & 017)))];
- hunk[2] = PL_uuemap[(077 & ((r << 2) & 074))];
- hunk[3] = PL_uuemap[0];
- sv_catpvn(sv, hunk, 4);
+ *h++ = PL_uuemap[(077 & (s[0] >> 2))];
+ *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
+ *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
+ *h++ = PL_uuemap[0];
}
- sv_catpvn(sv, "\n", 1);
+ *h++ = '\n';
+ return h;
}
STATIC SV *
void
Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
{
+ STRLEN no_len;
tempsym_t sym = { 0 };
+
sym.patptr = pat;
sym.patend = patend;
sym.flags = FLAG_PACK;
+ /* We're going to do changes through SvPVX(cat). Make sure it's valid.
+ Also make sure any UTF8 flag is loaded */
+ SvPV_force(cat, no_len);
+ if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
+
(void)pack_rec( cat, &sym, beglist, endlist );
}
+/* like sv_utf8_upgrade, but also repoint the group start markers */
+STATIC void
+marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
+ STRLEN len;
+ tempsym_t *group;
+ char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
+
+ if (SvUTF8(sv)) return;
+
+ from_start = SvPVX(sv);
+ from_end = from_start + SvCUR(sv);
+ for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
+ if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
+ if (from_ptr == from_end) {
+ /* Simple case: no character needs to be changed */
+ SvUTF8_on(sv);
+ return;
+ }
+
+ /* We assume a char translates to at most 2 UTF-8 bytes */
+ len = (from_end-from_ptr)*2+(from_ptr-from_start)+1;
+ New('U', to_start, len, char);
+ Copy(from_start, to_start, from_ptr-from_start, char);
+ to_ptr = to_start + (from_ptr-from_start);
+
+ New('U', marks, sym_ptr->level+2, char *);
+ for (group=sym_ptr; group; group = group->previous)
+ marks[group->level] = from_start + group->strbeg;
+ marks[sym_ptr->level+1] = from_end+1;
+ for (m = marks; *m < from_ptr; m++)
+ *m = to_start + (*m-from_start);
+
+ for (;from_ptr < from_end; from_ptr++) {
+ while (*m == from_ptr) *m++ = to_ptr;
+ to_ptr = uvchr_to_utf8(to_ptr, *(U8 *) from_ptr);
+ }
+ *to_ptr = 0;
+
+ while (*m == from_ptr) *m++ = to_ptr;
+ if (m != marks + sym_ptr->level+1) {
+ Safefree(marks);
+ Safefree(to_start);
+ Perl_croak(aTHX_ "Assertion: marks beyond string end");
+ }
+ for (group=sym_ptr; group; group = group->previous)
+ group->strbeg = marks[group->level] - to_start;
+ Safefree(marks);
+
+ if (SvOOK(sv)) {
+ if (SvIVX(sv)) {
+ SvLEN(sv) += SvIVX(sv);
+ from_start -= SvIVX(sv);
+ SvIV_set(sv, 0);
+ }
+ SvFLAGS(sv) &= ~SVf_OOK;
+ }
+ if (SvLEN(sv) != 0)
+ Safefree(from_start);
+ SvPVX(sv) = to_start;
+ SvCUR(sv) = to_ptr - to_start;
+ SvLEN(sv) = len;
+ SvUTF8_on(sv);
+}
+
+/* Exponential string grower. Makes string extension effectively O(n)
+ needed says how many extra bytes we need (not counting the final '\0')
+ Only grows the string if there is an actual lack of space
+*/
+STATIC char *
+sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
+ STRLEN cur = SvCUR(sv);
+ STRLEN len = SvLEN(sv);
+ STRLEN extend;
+ if (len - cur > needed) return SvPVX(sv);
+ extend = needed > len ? needed : len;
+ return SvGROW(sv, len+extend+1);
+}
STATIC
SV **
-S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV **endlist )
+S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
{
- register I32 items;
- STRLEN fromlen;
- register I32 len = 0;
- SV *fromstr;
- /*SUPPRESS 442*/
- static char null10[] = {0,0,0,0,0,0,0,0,0,0};
- static char *space10 = " ";
- bool found;
-
- /* These must not be in registers: */
- char achar;
- I16 ai16;
- U16 au16;
- I32 ai32;
- U32 au32;
-#ifdef HAS_QUAD
- Quad_t aquad;
- Uquad_t auquad;
-#endif
-#if SHORTSIZE != SIZE16
- short ashort;
- unsigned short aushort;
-#endif
- int aint;
- unsigned int auint;
-#if LONGSIZE != SIZE32
- long along;
- unsigned long aulong;
-#endif
- char *aptr;
- float afloat;
- double adouble;
-#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
- long double aldouble;
-#endif
- IV aiv;
- UV auv;
- NV anv;
-
- int strrelbeg = SvCUR(cat);
tempsym_t lookahead;
-
- items = endlist - beglist;
- found = next_symbol( symptr );
-
-#ifndef PACKED_IS_OCTETS
- if (symptr->level == 0 && found && symptr->code == 'U' ){
- SvUTF8_on(cat);
+ I32 items = endlist - beglist;
+ bool found = next_symbol(symptr);
+ bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
+
+ if (symptr->level == 0 && found && symptr->code == 'U') {
+ marked_upgrade(aTHX_ cat, symptr);
+ symptr->flags |= FLAG_DO_UTF8;
+ utf8 = 0;
}
-#endif
+ symptr->strbeg = SvCUR(cat);
while (found) {
+ SV *fromstr;
+ STRLEN fromlen;
+ I32 len;
SV *lengthcode = Nullsv;
-#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
-
I32 datumtype = symptr->code;
- howlen_t howlen;
+ howlen_t howlen = symptr->howlen;
+ char *start = SvPVX(cat);
+ char *cur = start + SvCUR(cat);
- switch( howlen = symptr->howlen ){
- case e_no_len:
- case e_number:
- len = symptr->length;
- break;
+#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
+
+ switch (howlen) {
case e_star:
- len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ? 0 : items;
+ len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
+ 0 : items;
+ break;
+ default:
+ /* e_no_len and e_number */
+ len = symptr->length;
break;
}
+ if (len) {
+ struct packsize_t *pack_props =
+ &packsize[(symptr->code & TYPE_IS_SHRIEKING) ?
+ PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL];
+ const int rawtype = TYPE_NO_MODIFIERS(datumtype);
+ int offset = rawtype - pack_props->first;
+
+ if (offset >= 0 && offset < pack_props->size) {
+ /* Data about this template letter */
+ unsigned char data = pack_props->array[offset];
+
+ if (data && !(data & PACK_SIZE_UNPREDICTABLE)) {
+ /* We can process this letter. */
+ STRLEN size = data & PACK_SIZE_MASK;
+ GROWING(utf8, cat, start, cur, (STRLEN) len * size);
+ }
+ }
+
+ }
+
/* Look ahead for next symbol. Do we have code/code? */
lookahead = *symptr;
found = next_symbol(&lookahead);
if ( symptr->flags & FLAG_SLASH ) {
- if (found){
+ if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
if ( 0 == strchr( "aAZ", lookahead.code ) ||
e_star != lookahead.howlen )
Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
- lengthcode = sv_2mortal(newSViv(sv_len(items > 0
- ? *beglist : &PL_sv_no)
- + (lookahead.code == 'Z' ? 1 : 0)));
- } else {
- Perl_croak(aTHX_ "Code missing after '/' in pack");
- }
+ lengthcode =
+ sv_2mortal(newSViv((items > 0 ? DO_UTF8(*beglist) ? sv_len_utf8(*beglist) : sv_len(*beglist) : 0) + (lookahead.code == 'Z' ? 1 : 0)));
}
+ /* Code inside the switch must take care to properly update
+ cat (CUR length and '\0' termination) if it updated *cur and
+ doesn't simply leave using break */
switch(TYPE_NO_ENDIANNESS(datumtype)) {
default:
- Perl_croak(aTHX_ "Invalid type '%c' in pack", (int)TYPE_NO_MODIFIERS(datumtype));
+ Perl_croak(aTHX_ "Invalid type '%c' in pack",
+ (int) TYPE_NO_MODIFIERS(datumtype));
case '%':
Perl_croak(aTHX_ "'%%' may not be used in pack");
case '@':
- len += strrelbeg - SvCUR(cat);
- if (len > 0)
- goto grow;
+ if (utf8) {
+ char *s = start + symptr->strbeg;
+ while (len > 0 && s < cur) {
+ s += UTF8SKIP(s);
+ len--;
+ }
+ if (s > cur)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+ if (len > 0) {
+ grow:
+ GROWING(0, cat, start, cur, len);
+ Zero(cur, len, char);
+ cur += len;
+ } else if (s < cur) cur = s;
+ else goto no_change;
+ } else {
+ len -= cur - (start+symptr->strbeg);
+ if (len > 0) goto grow;
len = -len;
- if (len > 0)
- goto shrink;
+ if (len > 0) goto shrink;
+ else goto no_change;
+ }
break;
- case '(':
- {
+ case '(': {
tempsym_t savsym = *symptr;
U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
symptr->flags |= group_modifiers;
symptr->patend = savsym.grpend;
symptr->level++;
+ symptr->previous = &lookahead;
while (len--) {
+ U32 was_utf8;
+ if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
+ else symptr->flags &= ~FLAG_PARSE_UTF8;
+ was_utf8 = SvUTF8(cat);
symptr->patptr = savsym.grpbeg;
- beglist = pack_rec(cat, symptr, beglist, endlist );
+ beglist = pack_rec(cat, symptr, beglist, endlist);
+ if (SvUTF8(cat) != was_utf8)
+ /* This had better be an upgrade while in utf8==0 mode */
+ utf8 = 1;
+
if (savsym.howlen == e_star && beglist == endlist)
break; /* No way to continue */
}
- symptr->flags &= ~group_modifiers;
- lookahead.flags = symptr->flags;
- *symptr = savsym;
- break;
+ lookahead.flags = symptr->flags & ~group_modifiers;
+ goto no_change;
}
case 'X' | TYPE_IS_SHRIEKING:
if (!len) /* Avoid division by 0 */
len = 1;
- len = (SvCUR(cat)) % len;
+ if (utf8) {
+ char *hop, *last;
+ I32 l = len;
+ hop = last = start;
+ while (hop < cur) {
+ hop += UTF8SKIP(hop);
+ if (--l == 0) {
+ last = hop;
+ l = len;
+ }
+ }
+ if (last > cur)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+ cur = last;
+ break;
+ }
+ len = (cur-start) % len;
/* FALL THROUGH */
case 'X':
+ if (utf8) {
+ if (len < 1) goto no_change;
+ while (len > 0) {
+ if (cur <= start)
+ Perl_croak(aTHX_ "'X' outside of string in pack");
+ while (--cur, UTF8_IS_CONTINUATION(*cur)) {
+ if (cur <= start)
+ Perl_croak(aTHX_ "'X' outside of string in pack");
+ }
+ len--;
+ }
+ } else {
shrink:
- if ((I32)SvCUR(cat) < len)
+ if (cur - start < len)
Perl_croak(aTHX_ "'X' outside of string in pack");
- SvCUR(cat) -= len;
- *SvEND(cat) = '\0';
+ cur -= len;
+ }
+ if (cur < start+symptr->strbeg) {
+ /* Make sure group starts don't point into the void */
+ tempsym_t *group;
+ STRLEN length = cur-start;
+ for (group = symptr;
+ group && length < group->strbeg;
+ group = group->previous) group->strbeg = length;
+ lookahead.strbeg = length;
+ }
break;
- case 'x' | TYPE_IS_SHRIEKING:
+ case 'x' | TYPE_IS_SHRIEKING: {
+ I32 ai32;
if (!len) /* Avoid division by 0 */
len = 1;
- aint = (SvCUR(cat)) % len;
- if (aint) /* Other portable ways? */
- len = len - aint;
- else
- len = 0;
+ if (utf8) ai32 = utf8_length(start, cur) % len;
+ else ai32 = (cur - start) % len;
+ if (ai32 == 0) goto no_change;
+ len -= ai32;
+ }
/* FALL THROUGH */
-
case 'x':
- grow:
- while (len >= 10) {
- sv_catpvn(cat, null10, 10);
- len -= 10;
- }
- sv_catpvn(cat, null10, len);
- break;
+ goto grow;
case 'A':
case 'Z':
- case 'a':
+ case 'a': {
+ char *aptr;
+
fromstr = NEXTFROM;
aptr = SvPV(fromstr, fromlen);
+ if (DO_UTF8(fromstr)) {
+ char *end, *s;
+
+ if (!utf8 && !SvUTF8(cat)) {
+ marked_upgrade(aTHX_ cat, symptr);
+ lookahead.flags |= FLAG_DO_UTF8;
+ lookahead.strbeg = symptr->strbeg;
+ utf8 = 1;
+ start = SvPVX(cat);
+ cur = start + SvCUR(cat);
+ }
if (howlen == e_star) {
+ if (utf8) goto string_copy;
+ len = fromlen+1;
+ }
+ s = aptr;
+ end = aptr + fromlen;
+ fromlen = datumtype == 'Z' ? len-1 : len;
+ while ((I32) fromlen > 0 && s < end) {
+ s += UTF8SKIP(s);
+ fromlen--;
+ }
+ if (s > end)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+ if (utf8) {
len = fromlen;
- if (datumtype == 'Z')
- ++len;
- }
- if ((I32)fromlen >= len) {
- sv_catpvn(cat, aptr, len);
- if (datumtype == 'Z' && len > 0)
- *(SvEND(cat)-1) = '\0';
+ if (datumtype == 'Z') len++;
+ fromlen = s-aptr;
+ len += fromlen;
+
+ goto string_copy;
+ }
+ fromlen = len - fromlen;
+ if (datumtype == 'Z') fromlen--;
+ if (howlen == e_star) {
+ len = fromlen;
+ if (datumtype == 'Z') len++;
}
- else {
- sv_catpvn(cat, aptr, fromlen);
+ GROWING(0, cat, start, cur, len);
+ if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
+ datumtype | TYPE_IS_PACK))
+ Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
+ cur += fromlen;
len -= fromlen;
- if (datumtype == 'A') {
- while (len >= 10) {
- sv_catpvn(cat, space10, 10);
- len -= 10;
- }
- sv_catpvn(cat, space10, len);
+ } else if (utf8) {
+ if (howlen == e_star) {
+ len = fromlen;
+ if (datumtype == 'Z') len++;
}
- else {
- while (len >= 10) {
- sv_catpvn(cat, null10, 10);
- len -= 10;
+ if (len <= (I32) fromlen) {
+ fromlen = len;
+ if (datumtype == 'Z' && fromlen > 0) fromlen--;
+ }
+ /* assumes a byte expands to at most 2 bytes on upgrade:
+ expected_length <= from_len*2 + (len-from_len) */
+ GROWING(0, cat, start, cur, fromlen+len);
+ len -= fromlen;
+ while (fromlen > 0) {
+ cur = uvchr_to_utf8(cur, * (U8 *) aptr);
+ aptr++;
+ fromlen--;
}
- sv_catpvn(cat, null10, len);
+ } else {
+ string_copy:
+ if (howlen == e_star) {
+ len = fromlen;
+ if (datumtype == 'Z') len++;
+ }
+ if (len <= (I32) fromlen) {
+ fromlen = len;
+ if (datumtype == 'Z' && fromlen > 0) fromlen--;
}
+ GROWING(0, cat, start, cur, len);
+ Copy(aptr, cur, fromlen, char);
+ cur += fromlen;
+ len -= fromlen;
}
+ memset(cur, datumtype == 'A' ? ' ' : '\0', len);
+ cur += len;
break;
+ }
case 'B':
- case 'b':
- {
- register char *str;
- I32 saveitems;
+ case 'b': {
+ char *str, *end;
+ I32 l, field_len;
+ U8 bits;
+ bool utf8_source;
+ U32 utf8_flags;
fromstr = NEXTFROM;
- saveitems = items;
str = SvPV(fromstr, fromlen);
- if (howlen == e_star)
- len = fromlen;
- aint = SvCUR(cat);
- SvCUR(cat) += (len+7)/8;
- SvGROW(cat, SvCUR(cat) + 1);
- aptr = SvPVX(cat) + aint;
- if (len > (I32)fromlen)
- len = fromlen;
- aint = len;
- items = 0;
- if (datumtype == 'B') {
- for (len = 0; len++ < aint;) {
- items |= *str++ & 1;
- if (len & 7)
- items <<= 1;
+ end = str + fromlen;
+ if (DO_UTF8(fromstr)) {
+ utf8_source = TRUE;
+ utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+ } else {
+ utf8_source = FALSE;
+ utf8_flags = 0; /* Unused, but keep compilers happy */
+ }
+ if (howlen == e_star) len = fromlen;
+ field_len = (len+7)/8;
+ GROWING(utf8, cat, start, cur, field_len);
+ if (len > (I32)fromlen) len = fromlen;
+ bits = 0;
+ l = 0;
+ if (datumtype == 'B')
+ while (l++ < len) {
+ if (utf8_source) {
+ UV val;
+ NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+ bits |= val & 1;
+ } else bits |= *str++ & 1;
+ if (l & 7) bits <<= 1;
else {
- *aptr++ = items & 0xff;
- items = 0;
- }
+ PUSH_BYTE(utf8, cur, bits);
+ bits = 0;
}
}
+ else
+ /* datumtype == 'b' */
+ while (l++ < len) {
+ if (utf8_source) {
+ UV val;
+ NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+ if (val & 1) bits |= 0x80;
+ } else if (*str++ & 1)
+ bits |= 0x80;
+ if (l & 7) bits >>= 1;
else {
- for (len = 0; len++ < aint;) {
- if (*str++ & 1)
- items |= 128;
- if (len & 7)
- items >>= 1;
- else {
- *aptr++ = items & 0xff;
- items = 0;
- }
+ PUSH_BYTE(utf8, cur, bits);
+ bits = 0;
}
}
- if (aint & 7) {
+ l--;
+ if (l & 7) {
if (datumtype == 'B')
- items <<= 7 - (aint & 7);
+ bits <<= 7 - (l & 7);
else
- items >>= 7 - (aint & 7);
- *aptr++ = items & 0xff;
- }
- str = SvPVX(cat) + SvCUR(cat);
- while (aptr <= str)
- *aptr++ = '\0';
-
- items = saveitems;
+ bits >>= 7 - (l & 7);
+ PUSH_BYTE(utf8, cur, bits);
+ l += 7;
}
+ /* Determine how many chars are left in the requested field */
+ l /= 8;
+ if (howlen == e_star) field_len = 0;
+ else field_len -= l;
+ Zero(cur, field_len, char);
+ cur += field_len;
break;
+ }
case 'H':
- case 'h':
- {
- register char *str;
- I32 saveitems;
+ case 'h': {
+ char *str, *end;
+ I32 l, field_len;
+ U8 bits;
+ bool utf8_source;
+ U32 utf8_flags;
fromstr = NEXTFROM;
- saveitems = items;
str = SvPV(fromstr, fromlen);
- if (howlen == e_star)
- len = fromlen;
- aint = SvCUR(cat);
- SvCUR(cat) += (len+1)/2;
- SvGROW(cat, SvCUR(cat) + 1);
- aptr = SvPVX(cat) + aint;
- if (len > (I32)fromlen)
- len = fromlen;
- aint = len;
- items = 0;
- if (datumtype == 'H') {
- for (len = 0; len++ < aint;) {
- if (isALPHA(*str))
- items |= ((*str++ & 15) + 9) & 15;
+ end = str + fromlen;
+ if (DO_UTF8(fromstr)) {
+ utf8_source = TRUE;
+ utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+ } else {
+ utf8_source = FALSE;
+ utf8_flags = 0; /* Unused, but keep compilers happy */
+ }
+ if (howlen == e_star) len = fromlen;
+ field_len = (len+1)/2;
+ GROWING(utf8, cat, start, cur, field_len);
+ if (!utf8 && len > (I32)fromlen) len = fromlen;
+ bits = 0;
+ l = 0;
+ if (datumtype == 'H')
+ while (l++ < len) {
+ if (utf8_source) {
+ UV val;
+ NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+ if (val < 256 && isALPHA(val))
+ bits |= (val + 9) & 0xf;
else
- items |= *str++ & 15;
- if (len & 1)
- items <<= 4;
+ bits |= val & 0xf;
+ } else if (isALPHA(*str))
+ bits |= (*str++ + 9) & 0xf;
+ else
+ bits |= *str++ & 0xf;
+ if (l & 1) bits <<= 4;
else {
- *aptr++ = items & 0xff;
- items = 0;
- }
+ PUSH_BYTE(utf8, cur, bits);
+ bits = 0;
}
}
- else {
- for (len = 0; len++ < aint;) {
- if (isALPHA(*str))
- items |= (((*str++ & 15) + 9) & 15) << 4;
+ else
+ while (l++ < len) {
+ if (utf8_source) {
+ UV val;
+ NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+ if (val < 256 && isALPHA(val))
+ bits |= ((val + 9) & 0xf) << 4;
else
- items |= (*str++ & 15) << 4;
- if (len & 1)
- items >>= 4;
- else {
- *aptr++ = items & 0xff;
- items = 0;
+ bits |= (val & 0xf) << 4;
+ } else if (isALPHA(*str))
+ bits |= ((*str++ + 9) & 0xf) << 4;
+ else
+ bits |= (*str++ & 0xf) << 4;
+ if (l & 1) bits >>= 4;
+ else {
+ PUSH_BYTE(utf8, cur, bits);
+ bits = 0;
}
}
+ l--;
+ if (l & 1) {
+ PUSH_BYTE(utf8, cur, bits);
+ l++;
+ }
+ /* Determine how many chars are left in the requested field */
+ l /= 2;
+ if (howlen == e_star) field_len = 0;
+ else field_len -= l;
+ Zero(cur, field_len, char);
+ cur += field_len;
+ break;
}
- if (aint & 1)
- *aptr++ = items & 0xff;
- str = SvPVX(cat) + SvCUR(cat);
- while (aptr <= str)
- *aptr++ = '\0';
-
- items = saveitems;
+ case 'c':
+ while (len-- > 0) {
+ IV aiv;
+ fromstr = NEXTFROM;
+ aiv = SvIV(fromstr);
+ if ((-128 > aiv || aiv > 127) &&
+ ckWARN(WARN_PACK))
+ Perl_warner(aTHX_ packWARN(WARN_PACK),
+ "Character in 'c' format wrapped in pack");
+ PUSH_BYTE(utf8, cur, aiv & 0xff);
}
break;
case 'C':
- case 'c':
+ if (len == 0) {
+ utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
+ break;
+ }
+ GROWING(0, cat, start, cur, len);
while (len-- > 0) {
+ IV aiv;
fromstr = NEXTFROM;
- switch (TYPE_NO_MODIFIERS(datumtype)) {
- case 'C':
- aint = SvIV(fromstr);
- if ((aint < 0 || aint > 255) &&
+ aiv = SvIV(fromstr);
+ if ((0 > aiv || aiv > 0xff) &&
ckWARN(WARN_PACK))
Perl_warner(aTHX_ packWARN(WARN_PACK),
"Character in 'C' format wrapped in pack");
- achar = aint & 255;
- sv_catpvn(cat, &achar, sizeof(char));
+ *cur++ = aiv & 0xff;
+ }
break;
- case 'c':
- aint = SvIV(fromstr);
- if ((aint < -128 || aint > 127) &&
- ckWARN(WARN_PACK))
+ case 'W': {
+ char *end;
+ U8 in_bytes = IN_BYTES;
+
+ end = start+SvLEN(cat)-1;
+ if (utf8) end -= UTF8_MAXLEN-1;
+ while (len-- > 0) {
+ UV auv;
+ fromstr = NEXTFROM;
+ auv = SvUV(fromstr);
+ if (in_bytes) auv = auv % 0x100;
+ if (utf8) {
+ W_utf8:
+ if (cur > end) {
+ *cur = '\0';
+ SvCUR(cat) = cur - start;
+
+ GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
+ end = start+SvLEN(cat)-UTF8_MAXLEN;
+ }
+ cur = uvuni_to_utf8_flags(cur, NATIVE_TO_UNI(auv),
+ ckWARN(WARN_UTF8) ?
+ 0 : UNICODE_ALLOW_ANY);
+ } else {
+ if (auv >= 0x100) {
+ if (!SvUTF8(cat)) {
+ *cur = '\0';
+ SvCUR(cat) = cur - start;
+ marked_upgrade(aTHX_ cat, symptr);
+ lookahead.flags |= FLAG_DO_UTF8;
+ lookahead.strbeg = symptr->strbeg;
+ utf8 = 1;
+ start = SvPVX(cat);
+ cur = start + SvCUR(cat);
+ end = start+SvLEN(cat)-UTF8_MAXLEN;
+ goto W_utf8;
+ }
+ if (ckWARN(WARN_PACK))
Perl_warner(aTHX_ packWARN(WARN_PACK),
- "Character in 'c' format wrapped in pack" );
- achar = aint & 255;
- sv_catpvn(cat, &achar, sizeof(char));
- break;
+ "Character in 'W' format wrapped in pack");
+ auv &= 0xff;
+ }
+ if (cur >= end) {
+ *cur = '\0';
+ SvCUR(cat) = cur - start;
+ GROWING(0, cat, start, cur, len+1);
+ end = start+SvLEN(cat)-1;
+ }
+ *(U8 *) cur++ = auv;
}
}
break;
- case 'U':
+ }
+ case 'U': {
+ char *end;
+
+ if (len == 0) {
+ if (!(symptr->flags & FLAG_DO_UTF8)) {
+ marked_upgrade(aTHX_ cat, symptr);
+ lookahead.flags |= FLAG_DO_UTF8;
+ lookahead.strbeg = symptr->strbeg;
+ }
+ utf8 = 0;
+ goto no_change;
+ }
+
+ end = start+SvLEN(cat);
+ if (!utf8) end -= UTF8_MAXLEN;
while (len-- > 0) {
+ UV auv;
fromstr = NEXTFROM;
- auint = UNI_TO_NATIVE(SvUV(fromstr));
- SvGROW(cat, SvCUR(cat) + UTF8_MAXBYTES + 1);
- SvCUR_set(cat,
- (char*)uvchr_to_utf8_flags((U8*)SvEND(cat),
- auint,
+ auv = SvUV(fromstr);
+ if (utf8) {
+ char buffer[UTF8_MAXLEN], *end;
+ end = uvuni_to_utf8_flags(buffer, auv,
+ ckWARN(WARN_UTF8) ?
+ 0 : UNICODE_ALLOW_ANY);
+ if (cur >= end-(end-buffer)*2) {
+ *cur = '\0';
+ SvCUR(cat) = cur - start;
+ GROWING(0, cat, start, cur, len+(end-buffer)*2);
+ end = start+SvLEN(cat)-UTF8_MAXLEN;
+ }
+ bytes_to_uni(aTHX_ buffer, end-buffer, &cur);
+ } else {
+ if (cur >= end) {
+ *cur = '\0';
+ SvCUR(cat) = cur - start;
+ GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
+ end = start+SvLEN(cat)-UTF8_MAXLEN;
+ }
+ cur = uvuni_to_utf8_flags(cur, auv,
ckWARN(WARN_UTF8) ?
- 0 : UNICODE_ALLOW_ANY)
- - SvPVX(cat));
+ 0 : UNICODE_ALLOW_ANY);
+ }
}
- *SvEND(cat) = '\0';
break;
+ }
/* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
case 'f':
while (len-- > 0) {
+ float afloat;
+ NV anv;
fromstr = NEXTFROM;
+ anv = SvNV(fromstr);
#ifdef __VOS__
-/* VOS does not automatically map a floating-point overflow
+ /* VOS does not automatically map a floating-point overflow
during conversion from double to float into infinity, so we
do it by hand. This code should either be generalized for
any OS that needs it, or removed if and when VOS implements
posix-976 (suggestion to support mapping to infinity).
Paul.Green@stratus.com 02-04-02. */
- if (SvNV(fromstr) > FLT_MAX)
+ if (anv > FLT_MAX)
afloat = _float_constants[0]; /* single prec. inf. */
- else if (SvNV(fromstr) < -FLT_MAX)
+ else if (anv < -FLT_MAX)
afloat = _float_constants[0]; /* single prec. inf. */
- else afloat = (float)SvNV(fromstr);
-#else
+ else afloat = (float) anv;
+#else /* __VOS__ */
# if defined(VMS) && !defined(__IEEE_FP)
-/* IEEE fp overflow shenanigans are unavailable on VAX and optional
+ /* IEEE fp overflow shenanigans are unavailable on VAX and optional
* on Alpha; fake it if we don't have them.
*/
- if (SvNV(fromstr) > FLT_MAX)
+ if (anv > FLT_MAX)
afloat = FLT_MAX;
- else if (SvNV(fromstr) < -FLT_MAX)
+ else if (anv < -FLT_MAX)
afloat = -FLT_MAX;
- else afloat = (float)SvNV(fromstr);
+ else afloat = (float)anv;
# else
- afloat = (float)SvNV(fromstr);
+ afloat = (float)anv;
# endif
-#endif
+#endif /* __VOS__ */
DO_BO_PACK_N(afloat, float);
- sv_catpvn(cat, (char *)&afloat, sizeof (float));
+ PUSH_VAR(utf8, cur, afloat);
}
break;
case 'd':
while (len-- > 0) {
+ double adouble;
+ NV anv;
fromstr = NEXTFROM;
+ anv = SvNV(fromstr);
#ifdef __VOS__
-/* VOS does not automatically map a floating-point overflow
+ /* VOS does not automatically map a floating-point overflow
during conversion from long double to double into infinity,
so we do it by hand. This code should either be generalized
for any OS that needs it, or removed if and when VOS
implements posix-976 (suggestion to support mapping to
infinity). Paul.Green@stratus.com 02-04-02. */
- if (SvNV(fromstr) > DBL_MAX)
+ if (anv > DBL_MAX)
adouble = _double_constants[0]; /* double prec. inf. */
- else if (SvNV(fromstr) < -DBL_MAX)
+ else if (anv < -DBL_MAX)
adouble = _double_constants[0]; /* double prec. inf. */
- else adouble = (double)SvNV(fromstr);
-#else
+ else adouble = (double) anv;
+#else /* __VOS__ */
# if defined(VMS) && !defined(__IEEE_FP)
-/* IEEE fp overflow shenanigans are unavailable on VAX and optional
+ /* IEEE fp overflow shenanigans are unavailable on VAX and optional
* on Alpha; fake it if we don't have them.
*/
- if (SvNV(fromstr) > DBL_MAX)
+ if (anv > DBL_MAX)
adouble = DBL_MAX;
- else if (SvNV(fromstr) < -DBL_MAX)
+ else if (anv < -DBL_MAX)
adouble = -DBL_MAX;
- else adouble = (double)SvNV(fromstr);
+ else adouble = (double)anv;
# else
- adouble = (double)SvNV(fromstr);
+ adouble = (double)anv;
# endif
-#endif
+#endif /* __VOS__ */
DO_BO_PACK_N(adouble, double);
- sv_catpvn(cat, (char *)&adouble, sizeof (double));
+ PUSH_VAR(utf8, cur, adouble);
}
break;
- case 'F':
+ case 'F': {
+ NV anv;
Zero(&anv, 1, NV); /* can be long double with unused bits */
while (len-- > 0) {
fromstr = NEXTFROM;
anv = SvNV(fromstr);
DO_BO_PACK_N(anv, NV);
- sv_catpvn(cat, (char *)&anv, NVSIZE);
+ PUSH_VAR(utf8, cur, anv);
}
break;
+ }
#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
- case 'D':
+ case 'D': {
+ long double aldouble;
/* long doubles can have unused bits, which may be nonzero */
Zero(&aldouble, 1, long double);
while (len-- > 0) {
fromstr = NEXTFROM;
aldouble = (long double)SvNV(fromstr);
DO_BO_PACK_N(aldouble, long double);
- sv_catpvn(cat, (char *)&aldouble, LONG_DOUBLESIZE);
+ PUSH_VAR(utf8, cur, aldouble);
}
break;
+ }
#endif
#ifdef PERL_PACK_CAN_SHRIEKSIGN
case 'n' | TYPE_IS_SHRIEKING:
#endif
case 'n':
while (len-- > 0) {
+ I16 ai16;
fromstr = NEXTFROM;
ai16 = (I16)SvIV(fromstr);
#ifdef HAS_HTONS
ai16 = PerlSock_htons(ai16);
#endif
- CAT16(cat, &ai16);
+ PUSH16(utf8, cur, &ai16);
}
break;
#ifdef PERL_PACK_CAN_SHRIEKSIGN
#endif
case 'v':
while (len-- > 0) {
+ I16 ai16;
fromstr = NEXTFROM;
ai16 = (I16)SvIV(fromstr);
#ifdef HAS_HTOVS
ai16 = htovs(ai16);
#endif
- CAT16(cat, &ai16);
+ PUSH16(utf8, cur, &ai16);
}
break;
case 'S' | TYPE_IS_SHRIEKING:
#if SHORTSIZE != SIZE16
- {
while (len-- > 0) {
+ unsigned short aushort;
fromstr = NEXTFROM;
aushort = SvUV(fromstr);
DO_BO_PACK(aushort, s);
- sv_catpvn(cat, (char *)&aushort, sizeof(unsigned short));
- }
+ PUSH_VAR(utf8, cur, aushort);
}
break;
#else
/* Fall through! */
#endif
case 'S':
- {
while (len-- > 0) {
+ U16 au16;
fromstr = NEXTFROM;
au16 = (U16)SvUV(fromstr);
DO_BO_PACK(au16, 16);
- CAT16(cat, &au16);
- }
-
+ PUSH16(utf8, cur, &au16);
}
break;
case 's' | TYPE_IS_SHRIEKING:
#if SHORTSIZE != SIZE16
- {
while (len-- > 0) {
+ short ashort;
fromstr = NEXTFROM;
ashort = SvIV(fromstr);
DO_BO_PACK(ashort, s);
- sv_catpvn(cat, (char *)&ashort, sizeof(short));
- }
+ PUSH_VAR(utf8, cur, ashort);
}
break;
#else
#endif
case 's':
while (len-- > 0) {
+ I16 ai16;
fromstr = NEXTFROM;
ai16 = (I16)SvIV(fromstr);
DO_BO_PACK(ai16, 16);
- CAT16(cat, &ai16);
+ PUSH16(utf8, cur, &ai16);
}
break;
case 'I':
case 'I' | TYPE_IS_SHRIEKING:
while (len-- > 0) {
+ unsigned int auint;
fromstr = NEXTFROM;
auint = SvUV(fromstr);
DO_BO_PACK(auint, i);
- sv_catpvn(cat, (char*)&auint, sizeof(unsigned int));
+ PUSH_VAR(utf8, cur, auint);
}
break;
case 'j':
while (len-- > 0) {
+ IV aiv;
fromstr = NEXTFROM;
aiv = SvIV(fromstr);
#if IVSIZE == INTSIZE
DO_BO_PACK(aiv, l);
#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
DO_BO_PACK(aiv, 64);
+#else
+ Perl_croak(aTHX_ "'j' not supported on this platform");
#endif
- sv_catpvn(cat, (char*)&aiv, IVSIZE);
+ PUSH_VAR(utf8, cur, aiv);
}
break;
case 'J':
while (len-- > 0) {
+ UV auv;
fromstr = NEXTFROM;
auv = SvUV(fromstr);
#if UVSIZE == INTSIZE
DO_BO_PACK(auv, l);
#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
DO_BO_PACK(auv, 64);
+#else
+ Perl_croak(aTHX_ "'J' not supported on this platform");
#endif
- sv_catpvn(cat, (char*)&auv, UVSIZE);
+ PUSH_VAR(utf8, cur, auv);
}
break;
case 'w':
while (len-- > 0) {
+ NV anv;
fromstr = NEXTFROM;
anv = SvNV(fromstr);
- if (anv < 0)
+ if (anv < 0) {
+ *cur = '\0';
+ SvCUR(cat) = cur - start;
Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
+ }
/* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
any negative IVs will have already been got by the croak()
above. IOK is untrue for fractions, so we test them
against UV_MAX_P1. */
- if (SvIOK(fromstr) || anv < UV_MAX_P1)
- {
- char buf[(sizeof(UV)*8)/7+1];
+ if (SvIOK(fromstr) || anv < UV_MAX_P1) {
+ char buf[(sizeof(UV)*CHAR_BIT)/7+1];
char *in = buf + sizeof(buf);
UV auv = SvUV(fromstr);
auv >>= 7;
} while (auv);
buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
- sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
- }
- else if (SvPOKp(fromstr)) { /* decimal string arithmetics */
- char *from, *result, *in;
- SV *norm;
- STRLEN len;
- bool done;
-
- /* Copy string and check for compliance */
- from = SvPV(fromstr, len);
- if ((norm = is_an_int(from, len)) == NULL)
- Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
-
- New('w', result, len, char);
- in = result + len;
- done = FALSE;
- while (!done)
- *--in = div128(norm, &done) | 0x80;
- result[len - 1] &= 0x7F; /* clear continue bit */
- sv_catpvn(cat, in, (result + len) - in);
- Safefree(result);
- SvREFCNT_dec(norm); /* free norm */
- }
+ PUSH_GROWING_BYTES(utf8, cat, start, cur,
+ in, (buf + sizeof(buf)) - in);
+ } else if (SvPOKp(fromstr))
+ goto w_string;
else if (SvNOKp(fromstr)) {
/* 10**NV_MAX_10_EXP is the largest power of 10
so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
floating-point value.
*/
#ifdef NV_MAX_10_EXP
-/* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
+ /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
#else
-/* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
+ /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
#endif
char *in = buf + sizeof(buf);
anv = next;
} while (anv > 0);
buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
- sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
- }
- else {
+ PUSH_GROWING_BYTES(utf8, cat, start, cur,
+ in, (buf + sizeof(buf)) - in);
+ } else {
char *from, *result, *in;
SV *norm;
STRLEN len;
bool done;
+ w_string:
/* Copy string and check for compliance */
from = SvPV(fromstr, len);
if ((norm = is_an_int(from, len)) == NULL)
New('w', result, len, char);
in = result + len;
done = FALSE;
- while (!done)
- *--in = div128(norm, &done) | 0x80;
+ while (!done) *--in = div128(norm, &done) | 0x80;
result[len - 1] &= 0x7F; /* clear continue bit */
- sv_catpvn(cat, in, (result + len) - in);
+ PUSH_GROWING_BYTES(utf8, cat, start, cur,
+ in, (result + len) - in);
Safefree(result);
SvREFCNT_dec(norm); /* free norm */
}
case 'i':
case 'i' | TYPE_IS_SHRIEKING:
while (len-- > 0) {
+ int aint;
fromstr = NEXTFROM;
aint = SvIV(fromstr);
DO_BO_PACK(aint, i);
- sv_catpvn(cat, (char*)&aint, sizeof(int));
+ PUSH_VAR(utf8, cur, aint);
}
break;
#ifdef PERL_PACK_CAN_SHRIEKSIGN
#endif
case 'N':
while (len-- > 0) {
+ U32 au32;
fromstr = NEXTFROM;
au32 = SvUV(fromstr);
#ifdef HAS_HTONL
au32 = PerlSock_htonl(au32);
#endif
- CAT32(cat, &au32);
+ PUSH32(utf8, cur, &au32);
}
break;
#ifdef PERL_PACK_CAN_SHRIEKSIGN
#endif
case 'V':
while (len-- > 0) {
+ U32 au32;
fromstr = NEXTFROM;
au32 = SvUV(fromstr);
#ifdef HAS_HTOVL
au32 = htovl(au32);
#endif
- CAT32(cat, &au32);
+ PUSH32(utf8, cur, &au32);
}
break;
case 'L' | TYPE_IS_SHRIEKING:
#if LONGSIZE != SIZE32
- {
while (len-- > 0) {
+ unsigned long aulong;
fromstr = NEXTFROM;
aulong = SvUV(fromstr);
DO_BO_PACK(aulong, l);
- sv_catpvn(cat, (char *)&aulong, sizeof(unsigned long));
- }
+ PUSH_VAR(utf8, cur, aulong);
}
break;
#else
/* Fall though! */
#endif
case 'L':
- {
while (len-- > 0) {
+ U32 au32;
fromstr = NEXTFROM;
au32 = SvUV(fromstr);
DO_BO_PACK(au32, 32);
- CAT32(cat, &au32);
- }
+ PUSH32(utf8, cur, &au32);
}
break;
case 'l' | TYPE_IS_SHRIEKING:
#if LONGSIZE != SIZE32
- {
while (len-- > 0) {
+ long along;
fromstr = NEXTFROM;
along = SvIV(fromstr);
DO_BO_PACK(along, l);
- sv_catpvn(cat, (char *)&along, sizeof(long));
- }
+ PUSH_VAR(utf8, cur, along);
}
break;
#else
#endif
case 'l':
while (len-- > 0) {
+ I32 ai32;
fromstr = NEXTFROM;
ai32 = SvIV(fromstr);
DO_BO_PACK(ai32, 32);
- CAT32(cat, &ai32);
+ PUSH32(utf8, cur, &ai32);
}
break;
#ifdef HAS_QUAD
case 'Q':
while (len-- > 0) {
+ Uquad_t auquad;
fromstr = NEXTFROM;
- auquad = (Uquad_t)SvUV(fromstr);
+ auquad = (Uquad_t) SvUV(fromstr);
DO_BO_PACK(auquad, 64);
- sv_catpvn(cat, (char*)&auquad, sizeof(Uquad_t));
+ PUSH_VAR(utf8, cur, auquad);
}
break;
case 'q':
while (len-- > 0) {
+ Quad_t aquad;
fromstr = NEXTFROM;
aquad = (Quad_t)SvIV(fromstr);
DO_BO_PACK(aquad, 64);
- sv_catpvn(cat, (char*)&aquad, sizeof(Quad_t));
+ PUSH_VAR(utf8, cur, aquad);
}
break;
-#endif
+#endif /* HAS_QUAD */
case 'P':
len = 1; /* assume SV is correct length */
+ GROWING(utf8, cat, start, cur, sizeof(char *));
/* Fall through! */
case 'p':
while (len-- > 0) {
+ char *aptr;
+
fromstr = NEXTFROM;
SvGETMAGIC(fromstr);
if (!SvOK(fromstr)) aptr = NULL;
* of pack() (and all copies of the result) are
* gone.
*/
- if (ckWARN(WARN_PACK) && (SvTEMP(fromstr)
- || (SvPADTMP(fromstr)
- && !SvREADONLY(fromstr))))
- {
+ if (ckWARN(WARN_PACK) &&
+ (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
+ !SvREADONLY(fromstr)))) {
Perl_warner(aTHX_ packWARN(WARN_PACK),
"Attempt to pack pointer to temporary value");
}
aptr = SvPV_force_flags(fromstr, n_a, 0);
}
DO_BO_PACK_P(aptr);
- sv_catpvn(cat, (char*)&aptr, sizeof(char*));
+ PUSH_VAR(utf8, cur, aptr);
}
break;
- case 'u':
+ case 'u': {
+ char *aptr, *aend;
+ bool from_utf8;
+
fromstr = NEXTFROM;
+ if (len <= 2) len = 45;
+ else len = len / 3 * 3;
+ if (len >= 64) {
+ Perl_warner(aTHX_ packWARN(WARN_PACK),
+ "Field too wide in 'u' format in pack");
+ len = 63;
+ }
aptr = SvPV(fromstr, fromlen);
- SvGROW(cat, fromlen * 4 / 3);
- if (len <= 2)
- len = 45;
- else
- len = len / 3 * 3;
+ from_utf8 = DO_UTF8(fromstr);
+ if (from_utf8) {
+ aend = aptr + fromlen;
+ fromlen = sv_len_utf8(fromstr);
+ } else aend = NULL; /* Unused, but keep compilers happy */
+ GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
while (fromlen > 0) {
+ U8 *end;
I32 todo;
+ U8 hunk[1+63/3*4+1];
if ((I32)fromlen > len)
todo = len;
else
todo = fromlen;
- doencodes(cat, aptr, todo);
- fromlen -= todo;
+ if (from_utf8) {
+ char buffer[64];
+ if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
+ 'u' | TYPE_IS_PACK)) {
+ *cur = '\0';
+ SvCUR(cat) = cur - start;
+ Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
+ }
+ end = doencodes(hunk, buffer, todo);
+ } else {
+ end = doencodes(hunk, aptr, todo);
aptr += todo;
}
+ PUSH_BYTES(utf8, cur, hunk, end-hunk);
+ fromlen -= todo;
+ }
break;
}
+ }
+ *cur = '\0';
+ SvCUR(cat) = cur - start;
+ no_change:
*symptr = lookahead;
}
return beglist;
MARK++;
sv_setpvn(cat, "", 0);
+ SvUTF8_off(cat);
packlist(cat, pat, patend, MARK, SP + 1);
my $no_signedness = $] > 5.009 ? '' :
"Signed/unsigned pack modifiers not available on this perl";
-plan tests => 13864;
+plan tests => 14604;
use strict;
use warnings;
print "# test unpack-pack lengths\n";
-my @templates = qw(c C i I s S l L n N v V f d q Q);
+my @templates = qw(c C W i I s S l L n N v V f d q Q);
foreach my $base (@templates) {
my @tmpl = ($base);
- $base =~ /^[cnv]/i or push @tmpl, "$base>", "$base<";
+ $base =~ /^[cwnv]/i or push @tmpl, "$base>", "$base<";
foreach my $t (@tmpl) {
SKIP: {
my @t = eval { unpack("$t*", pack("$t*", 12, 34)) };
numbers ('c', -128, -1, 0, 1, 127);
numbers ('C', 0, 1, 127, 128, 255);
+numbers ('W', 0, 1, 127, 128, 255, 256, 0x7ff, 0x800, 0xfffd);
numbers ('s', -32768, -1, 0, 1, 32767);
numbers ('S', 0, 1, 32767, 32768, 65535);
numbers ('i', -2147483648, -1, 0, 1, 2147483647);
}
{ # Repeat count [SUBEXPR]
- my @codes = qw( x A Z a c C B b H h s v n S i I l V N L p P f F d
+ my @codes = qw( x A Z a c C W B b H h s v n S i I l V N L p P f F d
s! S! i! I! l! L! j J);
my $G;
if (eval { pack 'q', 1 } ) {
@val{@codes} = map { / [Xx] (?{ undef })
| [AZa] (?{ 'something' })
| C (?{ 214 })
+ | W (?{ 8188 })
| c (?{ 114 })
| [Bb] (?{ '101' })
| [Hh] (?{ 'b8' })
my (@x) = unpack("a(U0)U", "b\341\277\274");
is($x[0], 'b', 'before scope');
is($x[1], 8188, 'after scope');
+
+ is(pack("a(U0)U", "b", 8188), "b\341\277\274");
}
{
my (@x) = unpack("C*", pack("CZ0", 1, "b"));
is(join(',', @x), '1', 'pack Z0 doesn\'t destroy the character before');
}
+
+{
+ # Encoding neutrality
+ # String we will pull apart and rebuild in several ways:
+ my $down = "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff\x05\x06";
+ my $up = $down;
+ utf8::upgrade($up);
+
+ my %expect =
+ # [expected result,
+ # how many chars it should progress,
+ # (optional) expected result of pack]
+ (a5 => ["\xf8\xf9\xfa\xfb\xfc", 5],
+ A5 => ["\xf8\xf9\xfa\xfb\xfc", 5],
+ Z5 => ["\xf8\xf9\xfa\xfb\xfc", 5, "\xf8\xf9\xfa\xfb\x00\xfd"],
+ b21 => ["000111111001111101011", 3, "\xf8\xf9\x1a\xfb"],
+ B21 => ["111110001111100111111", 3, "\xf8\xf9\xf8\xfb"],
+ H5 => ["f8f9f", 3, "\xf8\xf9\xf0\xfb"],
+ h5 => ["8f9fa", 3, "\xf8\xf9\x0a\xfb"],
+ "s<" => [-1544, 2],
+ "s>" => [-1799, 2],
+ "S<" => [0xf9f8, 2],
+ "S>" => [0xf8f9, 2],
+ "l<" => [-67438088, 4],
+ "l>" => [-117835013, 4],
+ "L>" => [0xf8f9fafb, 4],
+ "L<" => [0xfbfaf9f8, 4],
+ n => [0xf8f9, 2],
+ N => [0xf8f9fafb, 4],
+ v => [63992, 2],
+ V => [0xfbfaf9f8, 4],
+ c => [-8, 1],
+ U0U => [0xf8, 1],
+ w => ["8715569050387726213", 9],
+ q => ["-283686952306184", 8],
+ Q => ["18446460386757245432", 8],
+ );
+
+ for my $string ($down, $up) {
+ for my $format (sort {lc($a) cmp lc($b) || $a cmp $b } keys %expect) {
+ SKIP: {
+ my $expect = $expect{$format};
+ # unpack upgraded and downgraded string
+ my @result = eval { unpack("$format C0 W", $string) };
+ skip "cannot pack/unpack '$format C0 W' on this perl", 5 if
+ $@ && is_valid_error($@);
+ is(@result, 2, "Two results from unpack $format C0 W");
+
+ # pack to downgraded
+ my $new = pack("$format C0 W", @result);
+ is(length($new), $expect->[1]+1,
+ "pack $format C0 W should give $expect->[1]+1 chars");
+ is($new, $expect->[2] || substr($string, 0, length $new),
+ "pack $format C0 W returns expected value");
+
+ # pack to upgraded
+ $new = pack("a0 $format C0 W", chr(256), @result);
+ is(length($new), $expect->[1]+1,
+ "pack a0 $format C0 W should give $expect->[1]+1 chars");
+ is($new, $expect->[2] || substr($string, 0, length $new),
+ "pack a0 $format C0 W returns expected value");
+ }
+ }
+ }
+}
+
+{
+ # Encoding neutrality, numbers
+ my $val = -2.68;
+ for my $format (qw(s S i I l L j J f d F D q Q
+ s! S! i! I! l! L! n! N! v! V!)) {
+ SKIP: {
+ my $down = eval { pack($format, $val) };
+ skip "cannot pack/unpack $format on this perl", 9 if
+ $@ && is_valid_error($@);
+ ok(!utf8::is_utf8($down), "Simple $format pack doesn't get upgraded");
+ my $up = pack("a0 $format", chr(256), $val);
+ ok(utf8::is_utf8($up), "a0 $format with high char leads to upgrade");
+ is($down, $up, "$format generated strings are equal though");
+ my @down_expanded = unpack("$format W", $down . chr(0xce));
+ is(@down_expanded, 2, "Expand to two values");
+ is($down_expanded[1], 0xce,
+ "unpack $format left us at the expected position");
+ my @up_expanded = unpack("$format W", $up . chr(0xce));
+ is(@up_expanded, 2, "Expand to two values");
+ is($up_expanded[1], 0xce,
+ "unpack $format left us at the expected position");
+ is($down_expanded[0], $up_expanded[0], "$format unpack was neutral");
+ is(pack($format, $down_expanded[0]), $down, "Pack $format undoes unpack $format");
+ }
+ }
+}
+
+{
+ # C is *not* neutral
+ my $down = "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff\x05\x06";
+ my $up = $down;
+ utf8::upgrade($up);
+ my @down = unpack("C*", $down);
+ my @expect_down = (0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x05, 0x06);
+ is("@down", "@expect_down", "byte expand");
+ is(pack("C*", @down), $down, "byte join");
+
+ my @up = unpack("C*", $up);
+ my @expect_up = (0xc3, 0xb8, 0xc3, 0xb9, 0xc3, 0xba, 0xc3, 0xbb, 0xc3, 0xbc, 0xc3, 0xbd, 0xc3, 0xbe, 0xc3, 0xbf, 0x05, 0x06);
+ is("@up", "@expect_up", "UTF-8 expand");
+ is(pack("U0C0C*", @up), $up, "UTF-8 join");
+}
+
+{
+ # Harder cases for the neutrality test
+
+ # u format
+ my $down = "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff\x05\x06";
+ my $up = $down;
+ utf8::upgrade($up);
+ is(pack("u", $down), pack("u", $up), "u pack is neutral");
+ is(unpack("u", pack("u", $down)), $down, "u unpack to downgraded works");
+ is(unpack("U0C0u", pack("u", $down)), $up, "u unpack to upgraded works");
+
+ # p/P format
+ # This actually only tests something if the address contains a byte >= 0x80
+ my $str = "abc\xa5\x00\xfede";
+ $down = pack("p", $str);
+ is(pack("P", $str), $down);
+ is(pack("U0C0p", $str), $down);
+ is(pack("U0C0P", $str), $down);
+ is(unpack("p", $down), "abc\xa5", "unpack p downgraded");
+ $up = $down;
+ utf8::upgrade($up);
+ is(unpack("p", $up), "abc\xa5", "unpack p upgraded");
+
+ is(unpack("P7", $down), "abc\xa5\x00\xfed", "unpack P downgraded");
+ is(unpack("P7", $up), "abc\xa5\x00\xfed", "unpack P upgraded");
+
+ # x, X and @
+ $down = "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff\x05\x06";
+ $up = $down;
+ utf8::upgrade($up);
+
+ is(unpack('@4W', $down), 0xfc, "\@positioning on downgraded string");
+ is(unpack('@4W', $up), 0xfc, "\@positioning on upgraded string");
+
+ is(unpack('@4x2W', $down), 0xfe, "x moving on downgraded string");
+ is(unpack('@4x2W', $up), 0xfe, "x moving on upgraded string");
+ is(unpack('@4x!4W', $down), 0xfc, "x! moving on downgraded string");
+ is(unpack('@4x!4W', $up), 0xfc, "x! moving on upgraded string");
+ is(unpack('@5x!4W', $down), 0x05, "x! moving on downgraded string");
+ is(unpack('@5x!4W', $up), 0x05, "x! moving on upgraded string");
+
+ is(unpack('@4X2W', $down), 0xfa, "X moving on downgraded string");
+ is(unpack('@4X2W', $up), 0xfa, "X moving on upgraded string");
+ is(unpack('@4X!4W', $down), 0xfc, "X! moving on downgraded string");
+ is(unpack('@4X!4W', $up), 0xfc, "X! moving on upgraded string");
+ is(unpack('@5X!4W', $down), 0xfc, "X! moving on downgraded string");
+ is(unpack('@5X!4W', $up), 0xfc, "X! moving on upgraded string");
+ is(unpack('@5X!8W', $down), 0xf8, "X! moving on downgraded string");
+ is(unpack('@5X!8W', $up), 0xf8, "X! moving on upgraded string");
+
+ is(pack("W2x", 0xfa, 0xe3), "\xfa\xe3\x00", "x on downgraded string");
+ is(pack("W2x!4", 0xfa, 0xe3), "\xfa\xe3\x00\x00",
+ "x! on downgraded string");
+ is(pack("W2x!2", 0xfa, 0xe3), "\xfa\xe3", "x! on downgraded string");
+ is(pack("U0C0W2x", 0xfa, 0xe3), "\xfa\xe3\x00", "x on upgraded string");
+ is(pack("U0C0W2x!4", 0xfa, 0xe3), "\xfa\xe3\x00\x00",
+ "x! on upgraded string");
+ is(pack("U0C0W2x!2", 0xfa, 0xe3), "\xfa\xe3", "x! on upgraded string");
+ is(pack("W2X", 0xfa, 0xe3), "\xfa", "X on downgraded string");
+ is(pack("U0C0W2X", 0xfa, 0xe3), "\xfa", "X on upgraded string");
+ is(pack("W2X!2", 0xfa, 0xe3), "\xfa\xe3", "X! on downgraded string");
+ is(pack("U0C0W2X!2", 0xfa, 0xe3), "\xfa\xe3", "X! on upgraded string");
+ is(pack("W3X!2", 0xfa, 0xe3, 0xa6), "\xfa\xe3", "X! on downgraded string");
+ is(pack("U0C0W3X!2", 0xfa, 0xe3, 0xa6), "\xfa\xe3",
+ "X! on upgraded string");
+
+ # backward eating through a ( moves the group starting point backwards
+ is(pack("a*(Xa)", "abc", "q"), "abq",
+ "eating before strbeg moves it back");
+ is(pack("a*(Xa)", "ab" . chr(512), "q"), "abq",
+ "eating before strbeg moves it back");
+
+ # Check marked_upgrade
+ is(pack('W(W(Wa@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, "a", 0xa4, 0xa5, 0xa6),
+ "\xa1\xa2\xa3a\x00\xa4\x00\xa5\x00\xa6");
+ $up = "a";
+ utf8::upgrade($up);
+ is(pack('W(W(Wa@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, $up, 0xa4, 0xa5, 0xa6),
+ "\xa1\xa2\xa3a\x00\xa4\x00\xa5\x00\xa6", "marked upgrade caused by a");
+ is(pack('W(W(WW@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, 256, 0xa4, 0xa5, 0xa6),
+ "\xa1\xa2\xa3\x{100}\x00\xa4\x00\xa5\x00\xa6",
+ "marked upgrade caused by W");
+ is(pack('W(W(WU0aC0@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, "a", 0xa4, 0xa5, 0xa6),
+ "\xa1\xa2\xa3a\x00\xa4\x00\xa5\x00\xa6", "marked upgrade caused by U0");
+
+ # a, A and Z
+ $down = "\xa4\xa6\xa7";
+ $up = $down;
+ utf8::upgrade($up);
+ utf8::upgrade(my $high = "\xfeb");
+
+ for my $format ("a0", "A0", "Z0", "U0a0C0", "U0A0C0", "U0Z0C0") {
+ is(pack("a* $format a*", "ab", $down, "cd"), "abcd",
+ "$format format on plain string");
+ is(pack("a* $format a*", "ab", $up, "cd"), "abcd",
+ "$format format on upgraded string");
+ is(pack("a* $format a*", $high, $down, "cd"), "\xfebcd",
+ "$format format on plain string");
+ is(pack("a* $format a*", $high, $up, "cd"), "\xfebcd",
+ "$format format on upgraded string");
+ my @down = unpack("a1 $format a*", "\xfeb");
+ is("@down", "\xfe b", "unpack $format");
+ my @up = unpack("a1 $format a*", $high);
+ is("@up", "\xfe b", "unpack $format");
+ }
+ is(pack("a1", $high), "\xfe");
+ is(pack("A1", $high), "\xfe");
+ is(pack("Z1", $high), "\x00");
+ is(pack("a2", $high), "\xfeb");
+ is(pack("A2", $high), "\xfeb");
+ is(pack("Z2", $high), "\xfe\x00");
+ is(pack("a5", $high), "\xfeb\x00\x00\x00");
+ is(pack("A5", $high), "\xfeb ");
+ is(pack("Z5", $high), "\xfeb\x00\x00\x00");
+ is(pack("a*", $high), "\xfeb");
+ is(pack("A*", $high), "\xfeb");
+ is(pack("Z*", $high), "\xfeb\x00");
+
+ utf8::upgrade($high = "\xc3\xbeb");
+ is(pack("U0a2", $high), "\xfe");
+ is(pack("U0A2", $high), "\xfe");
+ is(pack("U0Z1", $high), "\x00");
+ is(pack("U0a3", $high), "\xfeb");
+ is(pack("U0A3", $high), "\xfeb");
+ is(pack("U0Z3", $high), "\xfe\x00");
+ is(pack("U0a6", $high), "\xfeb\x00\x00\x00");
+ is(pack("U0A6", $high), "\xfeb ");
+ is(pack("U0Z6", $high), "\xfeb\x00\x00\x00");
+ is(pack("U0a*", $high), "\xfeb");
+ is(pack("U0A*", $high), "\xfeb");
+ is(pack("U0Z*", $high), "\xfeb\x00");
+}