} STMT_END
#define PUSH_VAR(utf8, aptr, var) \
- PUSH_BYTES(utf8, aptr, (char *) &(var), sizeof(var))
+ PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
#define MAX_SUB_TEMPLATE_LEVEL 100
#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
#ifdef PERL_PACK_CAN_SHRIEKSIGN
-#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
+# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
#else
-#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
+# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
#endif
#ifndef PERL_PACK_CAN_BYTEORDER
# define DO_BO_UNPACK(var, type)
# define DO_BO_PACK(var, type)
-# define DO_BO_UNPACK_PTR(var, type, pre_cast)
-# define DO_BO_PACK_PTR(var, type, pre_cast)
+# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
+# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
# define DO_BO_UNPACK_N(var, type)
# define DO_BO_PACK_N(var, type)
# define DO_BO_UNPACK_P(var)
} \
} STMT_END
-# define DO_BO_UNPACK_PTR(var, type, pre_cast) \
+# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
STMT_START { \
switch (TYPE_ENDIANNESS(datumtype)) { \
case TYPE_IS_BIG_ENDIAN: \
- var = (void *) my_betoh ## type ((pre_cast) var); \
+ var = (post_cast*) my_betoh ## type ((pre_cast) var); \
break; \
case TYPE_IS_LITTLE_ENDIAN: \
- var = (void *) my_letoh ## type ((pre_cast) var); \
+ var = (post_cast *) my_letoh ## type ((pre_cast) var); \
break; \
default: \
break; \
} \
} STMT_END
-# define DO_BO_PACK_PTR(var, type, pre_cast) \
+# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
STMT_START { \
switch (TYPE_ENDIANNESS(datumtype)) { \
case TYPE_IS_BIG_ENDIAN: \
- var = (void *) my_htobe ## type ((pre_cast) var); \
+ var = (post_cast *) my_htobe ## type ((pre_cast) var); \
break; \
case TYPE_IS_LITTLE_ENDIAN: \
- var = (void *) my_htole ## type ((pre_cast) var); \
+ var = (post_cast *) my_htole ## type ((pre_cast) var); \
break; \
default: \
break; \
} STMT_END
# if PTRSIZE == INTSIZE
-# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
-# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
+# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
+# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
+# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
+# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
# elif PTRSIZE == LONGSIZE
-# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
-# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
+# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
+# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
+# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
+# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
# else
# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
val &= 0xff;
}
*s += retlen;
- return val;
+ return (U8)val;
}
#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
bad |= 2;
val &= 0xff;
}
- *(U8 *)buf++ = val;
+ *(U8 *)buf++ = (U8)val;
}
/* We have enough characters for the buffer. Did we have problems ? */
if (bad) {
*dest = d;
}
-#define PUSH_BYTES(utf8, cur, buf, len) \
-STMT_START { \
- if (utf8) bytes_to_uni(aTHX_ buf, len, &(cur)); \
- else { \
- Copy(buf, cur, len, char); \
- (cur) += (len); \
- } \
+#define PUSH_BYTES(utf8, cur, buf, len) \
+STMT_START { \
+ if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
+ else { \
+ Copy(buf, cur, len, char); \
+ (cur) += (len); \
+ } \
} STMT_END
#define GROWING(utf8, cat, start, cur, in_len) \
Perl_croak(aTHX_ "Invalid type '%c' in %s",
(int)TYPE_NO_MODIFIERS(symptr->code),
symptr->flags & FLAG_PACK ? "pack" : "unpack" );
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '.' | TYPE_IS_SHRIEKING:
+ case '@' | TYPE_IS_SHRIEKING:
+#endif
case '@':
+ case '.':
case '/':
case 'U': /* XXXX Is it correct? */
case 'w':
case 'u':
Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
- (int)symptr->code,
+ (int) TYPE_NO_MODIFIERS(symptr->code),
symptr->flags & FLAG_PACK ? "pack" : "unpack" );
case '%':
size = 0;
while (pat < patend) {
if (pat[0] == '#') {
pat++;
- pat = memchr(pat, '\n', patend-pat);
+ pat = (char *) memchr(pat, '\n', patend-pat);
if (!pat) return FALSE;
} else if (pat[0] == 'U') {
if (first || pat[1] == '0') return TRUE;
while (pat < patend) {
if (pat[0] != '#') return pat[0];
pat++;
- pat = memchr(pat, '\n', patend-pat);
+ pat = (char *) memchr(pat, '\n', patend-pat);
if (!pat) return 0;
pat++;
}
/* We probably should try to avoid this in case a scalar context call
wouldn't get to the "U0" */
STRLEN len = strend - s;
- s = (char *) bytes_to_utf8(s, &len);
+ s = (char *) bytes_to_utf8((U8 *) s, &len);
SAVEFREEPV(s);
strend = s + len;
flags |= FLAG_DO_UTF8;
/* We probably should try to avoid this in case a scalar context call
wouldn't get to the "U0" */
STRLEN len = strend - s;
- s = (char *) bytes_to_utf8(s, &len);
+ s = (char *) bytes_to_utf8((U8 *) s, &len);
SAVEFREEPV(s);
strend = s + len;
flags |= FLAG_DO_UTF8;
UV cuv = 0;
NV cdouble = 0.0;
const int bits_in_uv = CHAR_BIT * sizeof(cuv);
- char* strrelbeg = s;
bool beyond = FALSE;
bool explicit_length;
bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
+ symptr->strbeg = s - strbeg;
while (next_symbol(symptr)) {
packprops_t props;
U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
symptr->flags |= group_modifiers;
symptr->patend = savsym.grpend;
+ symptr->previous = &savsym;
symptr->level++;
PUTBACK;
while (len--) {
break; /* No way to continue */
}
SPAGAIN;
- symptr->flags &= ~group_modifiers;
- savsym.flags = symptr->flags;
+ savsym.flags = symptr->flags & ~group_modifiers;
*symptr = savsym;
break;
}
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '.' | TYPE_IS_SHRIEKING:
+#endif
+ case '.': {
+ char *from;
+ SV *sv;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+ bool u8 = utf8;
+#endif
+ if (howlen == e_star) from = strbeg;
+ else if (len <= 0) from = s;
+ else {
+ tempsym_t *group = symptr;
+
+ while (--len && group) group = group->previous;
+ from = group ? strbeg + group->strbeg : strbeg;
+ }
+ sv = from <= s ?
+ newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
+ newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
+ XPUSHs(sv_2mortal(sv));
+ break;
+ }
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '@' | TYPE_IS_SHRIEKING:
+#endif
case '@':
- if (utf8) {
- s = strrelbeg;
+ s = strbeg + symptr->strbeg;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+ if (utf8)
+#endif
+ {
while (len > 0) {
if (s >= strend)
Perl_croak(aTHX_ "'@' outside of string in unpack");
if (s > strend)
Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
} else {
- if (len > strend - strrelbeg)
+ if (strend-s < len)
Perl_croak(aTHX_ "'@' outside of string in unpack");
- s = strrelbeg + len;
+ s += len;
}
break;
case 'X' | TYPE_IS_SHRIEKING:
case 'x' | TYPE_IS_SHRIEKING:
if (!len) /* Avoid division by 0 */
len = 1;
- if (utf8) ai32 = utf8_length(strbeg, s) % len;
- else ai32 = (s - strbeg) % len;
+ if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
+ else ai32 = (s - strbeg) % len;
if (ai32 == 0) break;
len -= ai32;
/* FALL THROUGH */
} else if (datumtype == 'A') {
/* 'A' strips both nulls and spaces */
char *ptr;
- for (ptr = s+len-1; ptr >= s; ptr--)
- if (*ptr != 0 && !isSPACE(*ptr)) break;
- ptr++;
+ if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
+ for (ptr = s+len-1; ptr >= s; ptr--)
+ if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
+ !is_utf8_space((U8 *) ptr)) break;
+ if (ptr >= s) ptr += UTF8SKIP(ptr);
+ else ptr++;
+ if (ptr > s+len)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+ } else {
+ for (ptr = s+len-1; ptr >= s; ptr--)
+ if (*ptr != 0 && !isSPACE(*ptr)) break;
+ ptr++;
+ }
sv = newSVpvn(s, ptr-s);
} else sv = newSVpvn(s, len);
ptr = s;
/* Bug: warns about bad utf8 even if we are short on bytes
and will break out of the loop */
- if (!uni_to_bytes(aTHX_ &ptr, strend, result, 1, 'U'))
+ if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
+ 'U'))
break;
len = UTF8SKIP(result);
if (!uni_to_bytes(aTHX_ &ptr, strend,
- &result[1], len-1, 'U')) break;
+ (char *) &result[1], len-1, 'U')) break;
auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
s = ptr;
} else {
while (len-- > 0) {
char *aptr;
SHIFT_VAR(utf8, s, strend, aptr, datumtype);
- DO_BO_UNPACK_P(aptr);
+ DO_BO_UNPACK_PC(aptr);
/* newSVpv generates undef if aptr is NULL */
PUSHs(sv_2mortal(newSVpv(aptr, 0)));
}
if (sizeof(char*) <= strend - s) {
char *aptr;
SHIFT_VAR(utf8, s, strend, aptr, datumtype);
- DO_BO_UNPACK_P(aptr);
+ DO_BO_UNPACK_PC(aptr);
/* newSVpvn generates undef if aptr is NULL */
PUSHs(sv_2mortal(newSVpvn(aptr, len)));
}
for (;from_ptr < from_end; from_ptr++) {
while (*m == from_ptr) *m++ = to_ptr;
- to_ptr = uvchr_to_utf8(to_ptr, *(U8 *) from_ptr);
+ to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
}
*to_ptr = 0;
/* Look ahead for next symbol. Do we have code/code? */
lookahead = *symptr;
found = next_symbol(&lookahead);
- if ( symptr->flags & FLAG_SLASH ) {
+ if (symptr->flags & FLAG_SLASH) {
+ IV count;
if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
- if ( 0 == strchr( "aAZ", lookahead.code ) ||
- e_star != lookahead.howlen )
- Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
- lengthcode =
- sv_2mortal(newSViv((items > 0 ? DO_UTF8(*beglist) ? sv_len_utf8(*beglist) : sv_len(*beglist) : 0) + (lookahead.code == 'Z' ? 1 : 0)));
+ if (strchr("aAZ", lookahead.code)) {
+ if (lookahead.howlen == e_number) count = lookahead.length;
+ else {
+ if (items > 0)
+ count = DO_UTF8(*beglist) ?
+ sv_len_utf8(*beglist) : sv_len(*beglist);
+ else count = 0;
+ if (lookahead.code == 'Z') count++;
+ }
+ } else {
+ if (lookahead.howlen == e_number && lookahead.length < items)
+ count = lookahead.length;
+ else count = items;
+ }
+ lookahead.howlen = e_number;
+ lookahead.length = count;
+ lengthcode = sv_2mortal(newSViv(count));
}
/* Code inside the switch must take care to properly update
(int) TYPE_NO_MODIFIERS(datumtype));
case '%':
Perl_croak(aTHX_ "'%%' may not be used in pack");
+ {
+ char *from;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '.' | TYPE_IS_SHRIEKING:
+#endif
+ case '.':
+ if (howlen == e_star) from = start;
+ else if (len == 0) from = cur;
+ else {
+ tempsym_t *group = symptr;
+
+ while (--len && group) group = group->previous;
+ from = group ? start + group->strbeg : start;
+ }
+ fromstr = NEXTFROM;
+ len = SvIV(fromstr);
+ goto resize;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ case '@' | TYPE_IS_SHRIEKING:
+#endif
case '@':
- if (utf8) {
- char *s = start + symptr->strbeg;
- while (len > 0 && s < cur) {
- s += UTF8SKIP(s);
- len--;
+ from = start + symptr->strbeg;
+ resize:
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+ if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+ if (utf8)
+#endif
+ if (len >= 0) {
+ while (len && from < cur) {
+ from += UTF8SKIP(from);
+ len--;
+ }
+ if (from > cur)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+ if (len) {
+ /* Here we know from == cur */
+ grow:
+ GROWING(0, cat, start, cur, len);
+ Zero(cur, len, char);
+ cur += len;
+ } else if (from < cur) {
+ len = cur - from;
+ goto shrink;
+ } else goto no_change;
+ } else {
+ cur = from;
+ len = -len;
+ goto utf8_shrink;
}
- if (s > cur)
- Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
- if (len > 0) {
- grow:
- GROWING(0, cat, start, cur, len);
- Zero(cur, len, char);
- cur += len;
- } else if (s < cur) cur = s;
- else goto no_change;
- } else {
- len -= cur - (start+symptr->strbeg);
+ else {
+ len -= cur - from;
if (len > 0) goto grow;
+ if (len == 0) goto no_change;
len = -len;
- if (len > 0) goto shrink;
- else goto no_change;
+ goto shrink;
}
break;
+ }
case '(': {
tempsym_t savsym = *symptr;
U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
case 'X':
if (utf8) {
if (len < 1) goto no_change;
+ utf8_shrink:
while (len > 0) {
if (cur <= start)
- Perl_croak(aTHX_ "'X' outside of string in pack");
+ Perl_croak(aTHX_ "'%c' outside of string in pack",
+ (int) TYPE_NO_MODIFIERS(datumtype));
while (--cur, UTF8_IS_CONTINUATION(*cur)) {
if (cur <= start)
- Perl_croak(aTHX_ "'X' outside of string in pack");
+ Perl_croak(aTHX_ "'%c' outside of string in pack",
+ (int) TYPE_NO_MODIFIERS(datumtype));
}
len--;
}
} else {
shrink:
if (cur - start < len)
- Perl_croak(aTHX_ "'X' outside of string in pack");
+ Perl_croak(aTHX_ "'%c' outside of string in pack",
+ (int) TYPE_NO_MODIFIERS(datumtype));
cur -= len;
}
if (cur < start+symptr->strbeg) {
I32 ai32;
if (!len) /* Avoid division by 0 */
len = 1;
- if (utf8) ai32 = utf8_length(start, cur) % len;
+ if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
else ai32 = (cur - start) % len;
if (ai32 == 0) goto no_change;
len -= ai32;
GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
len -= fromlen;
while (fromlen > 0) {
- cur = uvchr_to_utf8(cur, * (U8 *) aptr);
+ cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
aptr++;
fromlen--;
}
GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
end = start+SvLEN(cat)-UTF8_MAXLEN;
}
- cur = uvuni_to_utf8_flags(cur, NATIVE_TO_UNI(auv),
- ckWARN(WARN_UTF8) ?
- 0 : UNICODE_ALLOW_ANY);
+ cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
+ NATIVE_TO_UNI(auv),
+ ckWARN(WARN_UTF8) ?
+ 0 : UNICODE_ALLOW_ANY);
} else {
if (auv >= 0x100) {
if (!SvUTF8(cat)) {
GROWING(0, cat, start, cur, len+1);
end = start+SvLEN(cat)-1;
}
- *(U8 *) cur++ = auv;
+ *(U8 *) cur++ = (U8)auv;
}
}
break;
fromstr = NEXTFROM;
auv = SvUV(fromstr);
if (utf8) {
- char buffer[UTF8_MAXLEN], *endb;
+ U8 buffer[UTF8_MAXLEN], *endb;
endb = uvuni_to_utf8_flags(buffer, auv,
ckWARN(WARN_UTF8) ?
0 : UNICODE_ALLOW_ANY);
GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
end = start+SvLEN(cat)-UTF8_MAXLEN;
}
- cur = uvuni_to_utf8_flags(cur, auv,
- ckWARN(WARN_UTF8) ?
- 0 : UNICODE_ALLOW_ANY);
+ cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
+ ckWARN(WARN_UTF8) ?
+ 0 : UNICODE_ALLOW_ANY);
}
}
break;
else
aptr = SvPV_force_flags(fromstr, n_a, 0);
}
- DO_BO_PACK_P(aptr);
+ DO_BO_PACK_PC(aptr);
PUSH_VAR(utf8, cur, aptr);
}
break;