/* pp_pack.c
*
* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
+ * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
* other pp*.c files for the rest of the pp_ functions.
*/
-
#include "EXTERN.h"
#define PERL_IN_PP_PACK_C
#include "perl.h"
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0,
- /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
+ /* C */ sizeof(unsigned char),
#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
/* D */ LONG_DOUBLESIZE,
#else
/* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
+ /* C */ sizeof(unsigned char),
#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
/* D */ LONG_DOUBLESIZE,
#else
XPUSHs(sv);
break;
}
+ case 'C':
+ if (len == 0) {
+ if (explicit_length)
+ /* Switch to "character" mode */
+ utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
+ break;
+ }
+ /* FALL THROUGH */
case 'c':
- while (len-- > 0) {
- int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
- if (aint >= 128) /* fake up signed chars */
+ while (len-- > 0 && s < strend) {
+ int aint;
+ if (utf8)
+ {
+ STRLEN retlen;
+ aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
+ ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+ if (retlen == (STRLEN) -1 || retlen == 0)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+ s += retlen;
+ }
+ else
+ aint = *(U8 *)(s)++;
+ if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
aint -= 256;
if (!checksum)
PUSHs(sv_2mortal(newSViv((IV)aint)));
cuv += aint;
}
break;
- case 'C':
case 'W':
W_checksum:
- if (len == 0) {
- if (explicit_length && datumtype == 'C')
- /* Switch to "character" mode */
- utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
- break;
- }
- if (datumtype == 'C' ?
- (symptr->flags & FLAG_DO_UTF8) &&
- !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
+ if (utf8) {
while (len-- > 0 && s < strend) {
STRLEN retlen;
const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
break;
#endif
case 'u':
- /* MKS:
- * Initialise the decode mapping. By using a table driven
- * algorithm, the code will be character-set independent
- * (and just as fast as doing character arithmetic)
- */
- if (PL_uudmap[(U8)'M'] == 0) {
- size_t i;
-
- for (i = 0; i < sizeof(PL_uuemap); ++i)
- PL_uudmap[(U8)PL_uuemap[i]] = i;
- /*
- * Because ' ' and '`' map to the same value,
- * we need to decode them both the same.
- */
- PL_uudmap[(U8)' '] = 0;
- }
{
const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
sv = sv_2mortal(newSV(l));
utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
break;
}
- GROWING(0, cat, start, cur, len);
while (len-- > 0) {
IV aiv;
fromstr = NEXTFROM;
ckWARN(WARN_PACK))
Perl_warner(aTHX_ packWARN(WARN_PACK),
"Character in 'C' format wrapped in pack");
- *cur++ = (char)(aiv & 0xff);
+ PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
}
break;
case 'W': {