3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
35 # define PERL_PACK_CAN_BYTEORDER
36 # define PERL_PACK_CAN_SHRIEKSIGN
42 /* Maximum number of bytes to which a byte can grow due to upgrade */
46 * Offset for integer pack/unpack.
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
66 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
69 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
70 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
71 # define OFF16(p) ((char*)(p))
72 # define OFF32(p) ((char*)(p))
74 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
75 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
78 ++++ bad cray byte order
82 # define OFF16(p) ((char *) (p))
83 # define OFF32(p) ((char *) (p))
86 /* Only to be used inside a loop (see the break) */
87 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
91 Copy(s, OFF16(p), SIZE16, char); \
96 /* Only to be used inside a loop (see the break) */
97 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
101 Copy(s, OFF32(p), SIZE32, char); \
106 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
109 /* Only to be used inside a loop (see the break) */
110 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
116 Copy(s, (char *) &var, sizeof(var), char); \
121 #define PUSH_VAR(utf8, aptr, var) \
122 PUSH_BYTES(utf8, aptr, (char *) &(var), sizeof(var))
124 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125 #define MAX_SUB_TEMPLATE_LEVEL 100
127 /* flags (note that type modifiers can also be used as flags!) */
128 #define FLAG_WAS_UTF8 0x40
129 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
130 #define FLAG_UNPACK_ONLY_ONE 0x10
131 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
132 #define FLAG_SLASH 0x04
133 #define FLAG_COMMA 0x02
134 #define FLAG_PACK 0x01
137 S_mul128(pTHX_ SV *sv, U8 m)
140 char *s = SvPV(sv, len);
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
153 while (!*t) /* trailing '\0'? */
156 i = ((*t - '0') << 7) + m;
157 *(t--) = '0' + (char)(i % 10);
163 /* Explosives and implosives. */
165 #if 'I' == 73 && 'J' == 74
166 /* On an ASCII/ISO kind of system */
167 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
170 Some other sort of character set - use memchr() so we don't match
173 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
177 #define TYPE_IS_SHRIEKING 0x100
178 #define TYPE_IS_BIG_ENDIAN 0x200
179 #define TYPE_IS_LITTLE_ENDIAN 0x400
180 #define TYPE_IS_PACK 0x800
181 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
182 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
183 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
185 #ifdef PERL_PACK_CAN_SHRIEKSIGN
186 #define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
188 #define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
191 #ifndef PERL_PACK_CAN_BYTEORDER
192 /* Put "can't" first because it is shorter */
193 # define TYPE_ENDIANNESS(t) 0
194 # define TYPE_NO_ENDIANNESS(t) (t)
196 # define ENDIANNESS_ALLOWED_TYPES ""
198 # define DO_BO_UNPACK(var, type)
199 # define DO_BO_PACK(var, type)
200 # define DO_BO_UNPACK_PTR(var, type, pre_cast)
201 # define DO_BO_PACK_PTR(var, type, pre_cast)
202 # define DO_BO_UNPACK_N(var, type)
203 # define DO_BO_PACK_N(var, type)
204 # define DO_BO_UNPACK_P(var)
205 # define DO_BO_PACK_P(var)
207 #else /* PERL_PACK_CAN_BYTEORDER */
209 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
212 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
214 # define DO_BO_UNPACK(var, type) \
216 switch (TYPE_ENDIANNESS(datumtype)) { \
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
223 # define DO_BO_PACK(var, type) \
225 switch (TYPE_ENDIANNESS(datumtype)) { \
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
232 # define DO_BO_UNPACK_PTR(var, type, pre_cast) \
234 switch (TYPE_ENDIANNESS(datumtype)) { \
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (void *) my_betoh ## type ((pre_cast) var); \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (void *) my_letoh ## type ((pre_cast) var); \
246 # define DO_BO_PACK_PTR(var, type, pre_cast) \
248 switch (TYPE_ENDIANNESS(datumtype)) { \
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (void *) my_htobe ## type ((pre_cast) var); \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (void *) my_htole ## type ((pre_cast) var); \
260 # define BO_CANT_DOIT(action, type) \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
276 # if PTRSIZE == INTSIZE
277 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
278 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
279 # elif PTRSIZE == LONGSIZE
280 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
281 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
283 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
284 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
287 # if defined(my_htolen) && defined(my_letohn) && \
288 defined(my_htoben) && defined(my_betohn)
289 # define DO_BO_UNPACK_N(var, type) \
291 switch (TYPE_ENDIANNESS(datumtype)) { \
292 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
293 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
298 # define DO_BO_PACK_N(var, type) \
300 switch (TYPE_ENDIANNESS(datumtype)) { \
301 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
302 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
307 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
308 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
311 #endif /* PERL_PACK_CAN_BYTEORDER */
313 #define PACK_SIZE_CANNOT_CSUM 0x80
314 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
315 #define PACK_SIZE_MASK 0x3F
319 const unsigned char *array;
324 #define PACK_SIZE_NORMAL 0
325 #define PACK_SIZE_SHRIEKING 1
327 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
328 in). You're unlikely ever to need to regenerate them. */
331 unsigned char size_normal[53] = {
332 /* C */ sizeof(unsigned char),
333 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
334 /* D */ LONG_DOUBLESIZE,
341 /* I */ sizeof(unsigned int),
348 #if defined(HAS_QUAD)
349 /* Q */ sizeof(Uquad_t),
356 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
358 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
360 /* c */ sizeof(char),
361 /* d */ sizeof(double),
363 /* f */ sizeof(float),
372 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
373 #if defined(HAS_QUAD)
374 /* q */ sizeof(Quad_t),
382 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
384 unsigned char size_shrieking[46] = {
385 /* I */ sizeof(unsigned int),
387 /* L */ sizeof(unsigned long),
389 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
395 /* S */ sizeof(unsigned short),
397 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 /* l */ sizeof(long),
407 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
413 /* s */ sizeof(short),
415 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
421 struct packsize_t packsize[2] = {
422 {size_normal, 67, 53},
423 {size_shrieking, 73, 46}
426 /* EBCDIC (or bust) */
427 unsigned char size_normal[100] = {
428 /* c */ sizeof(char),
429 /* d */ sizeof(double),
431 /* f */ sizeof(float),
441 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
442 #if defined(HAS_QUAD)
443 /* q */ sizeof(Quad_t),
447 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 /* C */ sizeof(unsigned char),
455 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
456 /* D */ LONG_DOUBLESIZE,
463 /* I */ sizeof(unsigned int),
471 #if defined(HAS_QUAD)
472 /* Q */ sizeof(Uquad_t),
476 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
481 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
483 unsigned char size_shrieking[93] = {
485 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 /* l */ sizeof(long),
488 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 /* s */ sizeof(short),
496 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 /* I */ sizeof(unsigned int),
504 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 /* L */ sizeof(unsigned long),
507 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 /* S */ sizeof(unsigned short),
515 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
521 struct packsize_t packsize[2] = {
522 {size_normal, 131, 100},
523 {size_shrieking, 137, 93}
528 uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
532 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
533 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
534 /* We try to process malformed UTF-8 as much as possible (preferrably with
535 warnings), but these two mean we make no progress in the string and
536 might enter an infinite loop */
537 if (retlen == (STRLEN) -1 || retlen == 0)
538 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
539 (int) TYPE_NO_MODIFIERS(datumtype));
541 if (ckWARN(WARN_UNPACK))
542 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
543 "Character in '%c' format wrapped in unpack",
544 (int) TYPE_NO_MODIFIERS(datumtype));
551 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
552 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
556 uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
562 U32 flags = ckWARN(WARN_UTF8) ?
563 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
564 for (;buf_len > 0; buf_len--) {
565 if (from >= end) return FALSE;
566 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
567 if (retlen == (STRLEN) -1 || retlen == 0) {
568 from += UTF8SKIP(from);
570 } else from += retlen;
577 /* We have enough characters for the buffer. Did we have problems ? */
580 /* Rewalk the string fragment while warning */
582 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
583 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
584 if (ptr >= end) break;
585 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
587 if (from > end) from = end;
589 if ((bad & 2) && ckWARN(WARN_UNPACK))
590 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
591 WARN_PACK : WARN_UNPACK),
592 "Character(s) in '%c' format wrapped in %s",
593 (int) TYPE_NO_MODIFIERS(datumtype),
594 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
601 next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
605 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
606 if (val >= 0x100 || !ISUUCHAR(val) ||
607 retlen == (STRLEN) -1 || retlen == 0) {
611 *out = PL_uudmap[val] & 077;
617 bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
618 U8 buffer[UTF8_MAXLEN];
619 U8 *end = start + len;
621 while (start < end) {
623 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
633 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
641 #define PUSH_BYTES(utf8, cur, buf, len) \
643 if (utf8) bytes_to_uni(aTHX_ buf, len, &(cur)); \
645 Copy(buf, cur, len, char); \
650 #define GROWING(utf8, cat, start, cur, in_len) \
652 STRLEN glen = (in_len); \
653 if (utf8) glen *= UTF8_EXPAND; \
654 if ((cur) + glen >= (start) + SvLEN(cat)) { \
655 (start) = sv_exp_grow(aTHX_ cat, glen); \
656 (cur) = (start) + SvCUR(cat); \
660 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
662 STRLEN glen = (in_len); \
664 if (utf8) gl *= UTF8_EXPAND; \
665 if ((cur) + gl >= (start) + SvLEN(cat)) { \
667 SvCUR(cat) = (cur) - (start); \
668 (start) = sv_exp_grow(aTHX_ cat, gl); \
669 (cur) = (start) + SvCUR(cat); \
671 PUSH_BYTES(utf8, cur, buf, glen); \
674 #define PUSH_BYTE(utf8, s, byte) \
678 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
679 } else *(U8 *)(s)++ = (byte); \
682 /* Only to be used inside a loop (see the break) */
683 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
686 if (str >= end) break; \
687 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
688 if (retlen == (STRLEN) -1 || retlen == 0) { \
690 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
695 /* Returns the sizeof() struct described by pat */
697 S_measure_struct(pTHX_ tempsym_t* symptr)
701 while (next_symbol(symptr)) {
704 int which = (symptr->code & TYPE_IS_SHRIEKING) ?
705 PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
706 int offset = TYPE_NO_MODIFIERS(symptr->code) - packsize[which].first;
708 switch (symptr->howlen) {
710 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
711 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
714 /* e_no_len and e_number */
715 len = symptr->length;
719 if ((offset >= 0) && (offset < packsize[which].size))
720 size = packsize[which].array[offset] & PACK_SIZE_MASK;
725 /* endianness doesn't influence the size of a type */
726 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
728 Perl_croak(aTHX_ "Invalid type '%c' in %s",
729 (int)TYPE_NO_MODIFIERS(symptr->code),
730 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
733 case 'U': /* XXXX Is it correct? */
736 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
738 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
744 tempsym_t savsym = *symptr;
745 symptr->patptr = savsym.grpbeg;
746 symptr->patend = savsym.grpend;
747 /* XXXX Theoretically, we need to measure many times at
748 different positions, since the subexpression may contain
749 alignment commands, but be not of aligned length.
750 Need to detect this and croak(). */
751 size = measure_struct(symptr);
755 case 'X' | TYPE_IS_SHRIEKING:
756 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
758 if (!len) /* Avoid division by 0 */
760 len = total % len; /* Assumed: the start is aligned. */
765 Perl_croak(aTHX_ "'X' outside of string in %s",
766 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
768 case 'x' | TYPE_IS_SHRIEKING:
769 if (!len) /* Avoid division by 0 */
771 star = total % len; /* Assumed: the start is aligned. */
772 if (star) /* Other portable ways? */
796 size = sizeof(char*);
806 /* locate matching closing parenthesis or bracket
807 * returns char pointer to char after match, or NULL
810 S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
812 while (patptr < patend) {
820 while (patptr < patend && *patptr != '\n')
824 patptr = group_end(patptr, patend, ')') + 1;
826 patptr = group_end(patptr, patend, ']') + 1;
828 Perl_croak(aTHX_ "No group ending character '%c' found in template",
834 /* Convert unsigned decimal number to binary.
835 * Expects a pointer to the first digit and address of length variable
836 * Advances char pointer to 1st non-digit char and returns number
839 S_get_num(pTHX_ register char *patptr, I32 *lenptr )
841 I32 len = *patptr++ - '0';
842 while (isDIGIT(*patptr)) {
843 if (len >= 0x7FFFFFFF/10)
844 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
845 len = (len * 10) + (*patptr++ - '0');
851 /* The marvellous template parsing routine: Using state stored in *symptr,
852 * locates next template code and count
855 S_next_symbol(pTHX_ tempsym_t* symptr )
857 char* patptr = symptr->patptr;
858 char* patend = symptr->patend;
859 const char *allowed = "";
861 symptr->flags &= ~FLAG_SLASH;
863 while (patptr < patend) {
864 if (isSPACE(*patptr))
866 else if (*patptr == '#') {
868 while (patptr < patend && *patptr != '\n')
873 /* We should have found a template code */
874 I32 code = *patptr++ & 0xFF;
875 U32 inherited_modifiers = 0;
877 if (code == ','){ /* grandfather in commas but with a warning */
878 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
879 symptr->flags |= FLAG_COMMA;
880 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
881 "Invalid type ',' in %s",
882 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
887 /* for '(', skip to ')' */
889 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
890 Perl_croak(aTHX_ "()-group starts with a count in %s",
891 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
892 symptr->grpbeg = patptr;
893 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
894 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
895 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
896 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
899 /* look for group modifiers to inherit */
900 if (TYPE_ENDIANNESS(symptr->flags)) {
901 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
902 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
905 /* look for modifiers */
906 while (patptr < patend) {
910 modifier = TYPE_IS_SHRIEKING;
911 allowed = SHRIEKING_ALLOWED_TYPES;
913 #ifdef PERL_PACK_CAN_BYTEORDER
915 modifier = TYPE_IS_BIG_ENDIAN;
916 allowed = ENDIANNESS_ALLOWED_TYPES;
919 modifier = TYPE_IS_LITTLE_ENDIAN;
920 allowed = ENDIANNESS_ALLOWED_TYPES;
922 #endif /* PERL_PACK_CAN_BYTEORDER */
930 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
931 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
932 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
934 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
935 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
936 (int) TYPE_NO_MODIFIERS(code),
937 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
938 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
939 TYPE_ENDIANNESS_MASK)
940 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
941 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
943 if (ckWARN(WARN_UNPACK)) {
945 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
946 "Duplicate modifier '%c' after '%c' in %s",
947 *patptr, (int) TYPE_NO_MODIFIERS(code),
948 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
955 /* inherit modifiers */
956 code |= inherited_modifiers;
958 /* look for count and/or / */
959 if (patptr < patend) {
960 if (isDIGIT(*patptr)) {
961 patptr = get_num( patptr, &symptr->length );
962 symptr->howlen = e_number;
964 } else if (*patptr == '*') {
966 symptr->howlen = e_star;
968 } else if (*patptr == '[') {
969 char* lenptr = ++patptr;
970 symptr->howlen = e_number;
971 patptr = group_end( patptr, patend, ']' ) + 1;
972 /* what kind of [] is it? */
973 if (isDIGIT(*lenptr)) {
974 lenptr = get_num( lenptr, &symptr->length );
976 Perl_croak(aTHX_ "Malformed integer in [] in %s",
977 symptr->flags & FLAG_PACK ? "pack" : "unpack");
979 tempsym_t savsym = *symptr;
980 symptr->patend = patptr-1;
981 symptr->patptr = lenptr;
982 savsym.length = measure_struct(symptr);
986 symptr->howlen = e_no_len;
991 while (patptr < patend) {
992 if (isSPACE(*patptr))
994 else if (*patptr == '#') {
996 while (patptr < patend && *patptr != '\n')
1001 if (*patptr == '/') {
1002 symptr->flags |= FLAG_SLASH;
1004 if (patptr < patend &&
1005 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1006 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1007 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1013 /* at end - no count, no / */
1014 symptr->howlen = e_no_len;
1018 symptr->code = code;
1019 symptr->patptr = patptr;
1023 symptr->patptr = patptr;
1028 There is no way to cleanly handle the case where we should process the
1029 string per byte in its upgraded form while it's really in downgraded form
1030 (e.g. estimates like strend-s as an upper bound for the number of
1031 characters left wouldn't work). So if we foresee the need of this
1032 (pattern starts with U or contains U0), we want to work on the encoded
1033 version of the string. Users are advised to upgrade their pack string
1034 themselves if they need to do a lot of unpacks like this on it
1037 need_utf8(const char *pat, const char *patend)
1040 while (pat < patend) {
1041 if (pat[0] == '#') {
1043 pat = memchr(pat, '\n', patend-pat);
1044 if (!pat) return FALSE;
1045 } else if (pat[0] == 'U') {
1046 if (first || pat[1] == '0') return TRUE;
1047 } else first = FALSE;
1054 first_symbol(const char *pat, const char *patend) {
1055 while (pat < patend) {
1056 if (pat[0] != '#') return pat[0];
1058 pat = memchr(pat, '\n', patend-pat);
1066 =for apidoc unpack_str
1068 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1069 and ocnt are not used. This call should not be used, use unpackstring instead.
1074 Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
1076 tempsym_t sym = { 0 };
1078 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1079 else if (need_utf8(pat, patend)) {
1080 /* We probably should try to avoid this in case a scalar context call
1081 wouldn't get to the "U0" */
1082 STRLEN len = strend - s;
1083 s = (char *) bytes_to_utf8(s, &len);
1086 flags |= FLAG_DO_UTF8;
1089 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1090 flags |= FLAG_PARSE_UTF8;
1093 sym.patend = patend;
1096 return unpack_rec(&sym, s, s, strend, NULL );
1100 =for apidoc unpackstring
1102 The engine implementing unpack() Perl function. C<unpackstring> puts the
1103 extracted list items on the stack and returns the number of elements.
1104 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1109 Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
1111 tempsym_t sym = { 0 };
1113 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1114 else if (need_utf8(pat, patend)) {
1115 /* We probably should try to avoid this in case a scalar context call
1116 wouldn't get to the "U0" */
1117 STRLEN len = strend - s;
1118 s = (char *) bytes_to_utf8(s, &len);
1121 flags |= FLAG_DO_UTF8;
1124 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1125 flags |= FLAG_PARSE_UTF8;
1128 sym.patend = patend;
1131 return unpack_rec(&sym, s, s, strend, NULL );
1136 S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
1140 I32 start_sp_offset = SP - PL_stack_base;
1146 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1147 char* strrelbeg = s;
1148 bool beyond = FALSE;
1149 bool explicit_length;
1150 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1151 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1153 while (next_symbol(symptr)) {
1155 I32 datumtype = symptr->code;
1156 /* do first one only unless in list context
1157 / is implemented by unpacking the count, then popping it from the
1158 stack, so must check that we're not in the middle of a / */
1159 if ( unpack_only_one
1160 && (SP - PL_stack_base == start_sp_offset + 1)
1161 && (datumtype != '/') ) /* XXX can this be omitted */
1164 switch (howlen = symptr->howlen) {
1166 len = strend - strbeg; /* long enough */
1169 /* e_no_len and e_number */
1170 len = symptr->length;
1174 explicit_length = TRUE;
1176 beyond = s >= strend;
1178 struct packsize_t *pack_props =
1179 &packsize[(symptr->code & TYPE_IS_SHRIEKING) ?
1180 PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL];
1181 const int rawtype = TYPE_NO_MODIFIERS(datumtype);
1182 int offset = rawtype - pack_props->first;
1184 if (offset >= 0 && offset < pack_props->size) {
1185 /* Data about this template letter */
1186 unsigned char data = pack_props->array[offset];
1189 /* data nonzero means we can process this letter. */
1190 long size = data & PACK_SIZE_MASK;
1191 long howmany = (strend - s) / size;
1195 if (!checksum || (data & PACK_SIZE_CANNOT_CSUM)) {
1196 if (len && unpack_only_one) len = 1;
1203 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1205 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1208 if (howlen == e_no_len)
1209 len = 16; /* len is not specified */
1217 tempsym_t savsym = *symptr;
1218 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1219 symptr->flags |= group_modifiers;
1220 symptr->patend = savsym.grpend;
1224 symptr->patptr = savsym.grpbeg;
1225 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1226 else symptr->flags &= ~FLAG_PARSE_UTF8;
1227 unpack_rec(symptr, s, strbeg, strend, &s);
1228 if (s == strend && savsym.howlen == e_star)
1229 break; /* No way to continue */
1232 symptr->flags &= ~group_modifiers;
1233 savsym.flags = symptr->flags;
1242 Perl_croak(aTHX_ "'@' outside of string in unpack");
1247 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1249 if (len > strend - strrelbeg)
1250 Perl_croak(aTHX_ "'@' outside of string in unpack");
1251 s = strrelbeg + len;
1254 case 'X' | TYPE_IS_SHRIEKING:
1255 if (!len) /* Avoid division by 0 */
1260 hop = last = strbeg;
1262 hop += UTF8SKIP(hop);
1269 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1273 len = (s - strbeg) % len;
1279 Perl_croak(aTHX_ "'X' outside of string in unpack");
1280 while (--s, UTF8_IS_CONTINUATION(*s)) {
1282 Perl_croak(aTHX_ "'X' outside of string in unpack");
1287 if (len > s - strbeg)
1288 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1292 case 'x' | TYPE_IS_SHRIEKING:
1293 if (!len) /* Avoid division by 0 */
1295 if (utf8) ai32 = utf8_length(strbeg, s) % len;
1296 else ai32 = (s - strbeg) % len;
1297 if (ai32 == 0) break;
1304 Perl_croak(aTHX_ "'x' outside of string in unpack");
1309 if (len > strend - s)
1310 Perl_croak(aTHX_ "'x' outside of string in unpack");
1315 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1321 /* Preliminary length estimate is assumed done in 'W' */
1322 if (len > strend - s) len = strend - s;
1328 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1329 if (hop >= strend) {
1331 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1336 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1338 } else if (len > strend - s)
1341 if (datumtype == 'Z') {
1342 /* 'Z' strips stuff after first null */
1345 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1346 sv = newSVpvn(s, ptr-s);
1347 if (howlen == e_star) /* exact for 'Z*' */
1348 len = ptr-s + (ptr != strend ? 1 : 0);
1349 } else if (datumtype == 'A') {
1350 /* 'A' strips both nulls and spaces */
1352 for (ptr = s+len-1; ptr >= s; ptr--)
1353 if (*ptr != 0 && !isSPACE(*ptr)) break;
1355 sv = newSVpvn(s, ptr-s);
1356 } else sv = newSVpvn(s, len);
1360 /* Undo any upgrade done due to need_utf8() */
1361 if (!(symptr->flags & FLAG_WAS_UTF8))
1362 sv_utf8_downgrade(sv, 0);
1364 XPUSHs(sv_2mortal(sv));
1370 if (howlen == e_star || len > (strend - s) * 8)
1371 len = (strend - s) * 8;
1375 Newz(601, PL_bitcount, 256, char);
1376 for (bits = 1; bits < 256; bits++) {
1377 if (bits & 1) PL_bitcount[bits]++;
1378 if (bits & 2) PL_bitcount[bits]++;
1379 if (bits & 4) PL_bitcount[bits]++;
1380 if (bits & 8) PL_bitcount[bits]++;
1381 if (bits & 16) PL_bitcount[bits]++;
1382 if (bits & 32) PL_bitcount[bits]++;
1383 if (bits & 64) PL_bitcount[bits]++;
1384 if (bits & 128) PL_bitcount[bits]++;
1388 while (len >= 8 && s < strend) {
1389 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1394 cuv += PL_bitcount[*(U8 *)s++];
1397 if (len && s < strend) {
1399 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1400 if (datumtype == 'b')
1402 if (bits & 1) cuv++;
1407 if (bits & 0x80) cuv++;
1414 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1417 if (datumtype == 'b') {
1420 for (len = 0; len < ai32; len++) {
1421 if (len & 7) bits >>= 1;
1423 if (s >= strend) break;
1424 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1425 } else bits = *(U8 *) s++;
1426 *str++ = bits & 1 ? '1' : '0';
1431 for (len = 0; len < ai32; len++) {
1432 if (len & 7) bits <<= 1;
1434 if (s >= strend) break;
1435 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1436 } else bits = *(U8 *) s++;
1437 *str++ = bits & 0x80 ? '1' : '0';
1441 SvCUR_set(sv, str - SvPVX(sv));
1448 /* Preliminary length estimate, acceptable for utf8 too */
1449 if (howlen == e_star || len > (strend - s) * 2)
1450 len = (strend - s) * 2;
1451 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1454 if (datumtype == 'h') {
1457 for (len = 0; len < ai32; len++) {
1458 if (len & 1) bits >>= 4;
1460 if (s >= strend) break;
1461 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1462 } else bits = * (U8 *) s++;
1463 *str++ = PL_hexdigit[bits & 15];
1468 for (len = 0; len < ai32; len++) {
1469 if (len & 1) bits <<= 4;
1471 if (s >= strend) break;
1472 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1473 } else bits = *(U8 *) s++;
1474 *str++ = PL_hexdigit[(bits >> 4) & 15];
1478 SvCUR_set(sv, str - SvPVX(sv));
1484 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1485 if (aint >= 128) /* fake up signed chars */
1488 PUSHs(sv_2mortal(newSViv((IV)aint)));
1489 else if (checksum > bits_in_uv)
1490 cdouble += (NV)aint;
1499 if (explicit_length && datumtype == 'C')
1500 /* Switch to "character" mode */
1501 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1504 if (datumtype == 'C' ?
1505 (symptr->flags & FLAG_DO_UTF8) &&
1506 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1507 while (len-- > 0 && s < strend) {
1510 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1511 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1512 if (retlen == (STRLEN) -1 || retlen == 0)
1513 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1516 PUSHs(sv_2mortal(newSVuv((UV) val)));
1517 else if (checksum > bits_in_uv)
1518 cdouble += (NV) val;
1522 } else if (!checksum)
1524 U8 ch = *(U8 *) s++;
1525 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1527 else if (checksum > bits_in_uv)
1528 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1530 while (len-- > 0) cuv += *(U8 *) s++;
1534 if (explicit_length) {
1535 /* Switch to "bytes in UTF-8" mode */
1536 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1538 /* Should be impossible due to the need_utf8() test */
1539 Perl_croak(aTHX_ "U0 mode on a byte string");
1543 if (len > strend - s) len = strend - s;
1545 if (len && unpack_only_one) len = 1;
1549 while (len-- > 0 && s < strend) {
1553 U8 result[UTF8_MAXLEN];
1557 /* Bug: warns about bad utf8 even if we are short on bytes
1558 and will break out of the loop */
1559 if (!uni_to_bytes(aTHX_ &ptr, strend, result, 1, 'U'))
1561 len = UTF8SKIP(result);
1562 if (!uni_to_bytes(aTHX_ &ptr, strend,
1563 &result[1], len-1, 'U')) break;
1564 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1567 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1568 if (retlen == (STRLEN) -1 || retlen == 0)
1569 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1573 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1574 else if (checksum > bits_in_uv)
1575 cdouble += (NV) auv;
1580 case 's' | TYPE_IS_SHRIEKING:
1581 #if SHORTSIZE != SIZE16
1584 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1585 DO_BO_UNPACK(ashort, s);
1587 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1588 else if (checksum > bits_in_uv)
1589 cdouble += (NV)ashort;
1601 #if U16SIZE > SIZE16
1604 SHIFT16(utf8, s, strend, &ai16, datumtype);
1605 DO_BO_UNPACK(ai16, 16);
1606 #if U16SIZE > SIZE16
1611 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1612 else if (checksum > bits_in_uv)
1613 cdouble += (NV)ai16;
1618 case 'S' | TYPE_IS_SHRIEKING:
1619 #if SHORTSIZE != SIZE16
1621 unsigned short aushort;
1622 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1623 DO_BO_UNPACK(aushort, s);
1625 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1626 else if (checksum > bits_in_uv)
1627 cdouble += (NV)aushort;
1640 #if U16SIZE > SIZE16
1643 SHIFT16(utf8, s, strend, &au16, datumtype);
1644 DO_BO_UNPACK(au16, 16);
1646 if (datumtype == 'n')
1647 au16 = PerlSock_ntohs(au16);
1650 if (datumtype == 'v')
1654 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1655 else if (checksum > bits_in_uv)
1656 cdouble += (NV) au16;
1661 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1662 case 'v' | TYPE_IS_SHRIEKING:
1663 case 'n' | TYPE_IS_SHRIEKING:
1666 # if U16SIZE > SIZE16
1669 SHIFT16(utf8, s, strend, &ai16, datumtype);
1671 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1672 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1673 # endif /* HAS_NTOHS */
1675 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1676 ai16 = (I16) vtohs((U16) ai16);
1677 # endif /* HAS_VTOHS */
1679 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1680 else if (checksum > bits_in_uv)
1681 cdouble += (NV) ai16;
1686 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1688 case 'i' | TYPE_IS_SHRIEKING:
1691 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1692 DO_BO_UNPACK(aint, i);
1694 PUSHs(sv_2mortal(newSViv((IV)aint)));
1695 else if (checksum > bits_in_uv)
1696 cdouble += (NV)aint;
1702 case 'I' | TYPE_IS_SHRIEKING:
1705 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1706 DO_BO_UNPACK(auint, i);
1708 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1709 else if (checksum > bits_in_uv)
1710 cdouble += (NV)auint;
1718 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1719 #if IVSIZE == INTSIZE
1720 DO_BO_UNPACK(aiv, i);
1721 #elif IVSIZE == LONGSIZE
1722 DO_BO_UNPACK(aiv, l);
1723 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1724 DO_BO_UNPACK(aiv, 64);
1726 Perl_croak(aTHX_ "'j' not supported on this platform");
1729 PUSHs(sv_2mortal(newSViv(aiv)));
1730 else if (checksum > bits_in_uv)
1739 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1740 #if IVSIZE == INTSIZE
1741 DO_BO_UNPACK(auv, i);
1742 #elif IVSIZE == LONGSIZE
1743 DO_BO_UNPACK(auv, l);
1744 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1745 DO_BO_UNPACK(auv, 64);
1747 Perl_croak(aTHX_ "'J' not supported on this platform");
1750 PUSHs(sv_2mortal(newSVuv(auv)));
1751 else if (checksum > bits_in_uv)
1757 case 'l' | TYPE_IS_SHRIEKING:
1758 #if LONGSIZE != SIZE32
1761 SHIFT_VAR(utf8, s, strend, along, datumtype);
1762 DO_BO_UNPACK(along, l);
1764 PUSHs(sv_2mortal(newSViv((IV)along)));
1765 else if (checksum > bits_in_uv)
1766 cdouble += (NV)along;
1777 #if U32SIZE > SIZE32
1780 SHIFT32(utf8, s, strend, &ai32, datumtype);
1781 DO_BO_UNPACK(ai32, 32);
1782 #if U32SIZE > SIZE32
1783 if (ai32 > 2147483647) ai32 -= 4294967296;
1786 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1787 else if (checksum > bits_in_uv)
1788 cdouble += (NV)ai32;
1793 case 'L' | TYPE_IS_SHRIEKING:
1794 #if LONGSIZE != SIZE32
1796 unsigned long aulong;
1797 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1798 DO_BO_UNPACK(aulong, l);
1800 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1801 else if (checksum > bits_in_uv)
1802 cdouble += (NV)aulong;
1815 #if U32SIZE > SIZE32
1818 SHIFT32(utf8, s, strend, &au32, datumtype);
1819 DO_BO_UNPACK(au32, 32);
1821 if (datumtype == 'N')
1822 au32 = PerlSock_ntohl(au32);
1825 if (datumtype == 'V')
1829 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1830 else if (checksum > bits_in_uv)
1831 cdouble += (NV)au32;
1836 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1837 case 'V' | TYPE_IS_SHRIEKING:
1838 case 'N' | TYPE_IS_SHRIEKING:
1841 # if U32SIZE > SIZE32
1844 SHIFT32(utf8, s, strend, &ai32, datumtype);
1846 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1847 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1850 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1851 ai32 = (I32)vtohl((U32)ai32);
1854 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1855 else if (checksum > bits_in_uv)
1856 cdouble += (NV)ai32;
1861 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1865 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1866 DO_BO_UNPACK_P(aptr);
1867 /* newSVpv generates undef if aptr is NULL */
1868 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1876 while (len > 0 && s < strend) {
1878 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1879 auv = (auv << 7) | (ch & 0x7f);
1880 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1883 PUSHs(sv_2mortal(newSVuv(auv)));
1888 if (++bytes >= sizeof(UV)) { /* promote to string */
1892 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1893 while (s < strend) {
1894 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1895 sv = mul128(sv, (U8)(ch & 0x7f));
1905 PUSHs(sv_2mortal(sv));
1910 if ((s >= strend) && bytes)
1911 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1915 if (symptr->howlen == e_star)
1916 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1918 if (sizeof(char*) <= strend - s) {
1920 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1921 DO_BO_UNPACK_P(aptr);
1922 /* newSVpvn generates undef if aptr is NULL */
1923 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
1930 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1931 DO_BO_UNPACK(aquad, 64);
1933 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
1934 newSViv((IV)aquad) : newSVnv((NV)aquad)));
1935 else if (checksum > bits_in_uv)
1936 cdouble += (NV)aquad;
1944 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1945 DO_BO_UNPACK(auquad, 64);
1947 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1948 newSVuv((UV)auquad):newSVnv((NV)auquad)));
1949 else if (checksum > bits_in_uv)
1950 cdouble += (NV)auquad;
1955 #endif /* HAS_QUAD */
1956 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1960 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1961 DO_BO_UNPACK_N(afloat, float);
1963 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
1971 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1972 DO_BO_UNPACK_N(adouble, double);
1974 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
1982 SHIFT_VAR(utf8, s, strend, anv, datumtype);
1983 DO_BO_UNPACK_N(anv, NV);
1985 PUSHs(sv_2mortal(newSVnv(anv)));
1990 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1993 long double aldouble;
1994 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
1995 DO_BO_UNPACK_N(aldouble, long double);
1997 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
1999 cdouble += aldouble;
2005 * Initialise the decode mapping. By using a table driven
2006 * algorithm, the code will be character-set independent
2007 * (and just as fast as doing character arithmetic)
2009 if (PL_uudmap['M'] == 0) {
2012 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2013 PL_uudmap[(U8)PL_uuemap[i]] = i;
2015 * Because ' ' and '`' map to the same value,
2016 * we need to decode them both the same.
2021 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2022 sv = sv_2mortal(NEWSV(42, l));
2023 if (l) SvPOK_on(sv);
2026 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2032 next_uni_uu(aTHX_ &s, strend, &a);
2033 next_uni_uu(aTHX_ &s, strend, &b);
2034 next_uni_uu(aTHX_ &s, strend, &c);
2035 next_uni_uu(aTHX_ &s, strend, &d);
2036 hunk[0] = (char)((a << 2) | (b >> 4));
2037 hunk[1] = (char)((b << 4) | (c >> 2));
2038 hunk[2] = (char)((c << 6) | d);
2039 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2043 if (*s == '\n') s++;
2045 /* possible checksum byte */
2046 char *skip = s+UTF8SKIP(s);
2047 if (skip < strend && *skip == '\n') s = skip+1;
2052 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2057 len = PL_uudmap[*(U8*)s++] & 077;
2059 if (s < strend && ISUUCHAR(*s))
2060 a = PL_uudmap[*(U8*)s++] & 077;
2063 if (s < strend && ISUUCHAR(*s))
2064 b = PL_uudmap[*(U8*)s++] & 077;
2067 if (s < strend && ISUUCHAR(*s))
2068 c = PL_uudmap[*(U8*)s++] & 077;
2071 if (s < strend && ISUUCHAR(*s))
2072 d = PL_uudmap[*(U8*)s++] & 077;
2075 hunk[0] = (char)((a << 2) | (b >> 4));
2076 hunk[1] = (char)((b << 4) | (c >> 2));
2077 hunk[2] = (char)((c << 6) | d);
2078 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2083 else /* possible checksum byte */
2084 if (s + 1 < strend && s[1] == '\n')
2093 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2094 (checksum > bits_in_uv &&
2095 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2098 anv = (NV) (1 << (checksum & 15));
2099 while (checksum >= 16) {
2103 while (cdouble < 0.0)
2105 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2106 sv = newSVnv(cdouble);
2109 if (checksum < bits_in_uv) {
2110 UV mask = ((UV)1 << checksum) - 1;
2115 XPUSHs(sv_2mortal(sv));
2119 if (symptr->flags & FLAG_SLASH){
2120 if (SP - PL_stack_base - start_sp_offset <= 0)
2121 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2122 if( next_symbol(symptr) ){
2123 if( symptr->howlen == e_number )
2124 Perl_croak(aTHX_ "Count after length/code in unpack" );
2126 /* ...end of char buffer then no decent length available */
2127 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2129 /* take top of stack (hope it's numeric) */
2132 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2135 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2137 datumtype = symptr->code;
2138 explicit_length = FALSE;
2146 return SP - PL_stack_base - start_sp_offset;
2153 I32 gimme = GIMME_V;
2156 char *pat = SvPV(left, llen);
2157 char *s = SvPV(right, rlen);
2158 char *strend = s + rlen;
2159 char *patend = pat + llen;
2163 cnt = unpackstring(pat, patend, s, strend,
2164 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2165 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2168 if ( !cnt && gimme == G_SCALAR )
2169 PUSHs(&PL_sv_undef);
2174 doencodes(U8 *h, char *s, I32 len)
2176 *h++ = PL_uuemap[len];
2178 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2179 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2180 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2181 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2186 char r = (len > 1 ? s[1] : '\0');
2187 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2188 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2189 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2190 *h++ = PL_uuemap[0];
2197 S_is_an_int(pTHX_ char *s, STRLEN l)
2200 SV *result = newSVpvn(s, l);
2201 char *result_c = SvPV(result, n_a); /* convenience */
2202 char *out = result_c;
2212 SvREFCNT_dec(result);
2235 SvREFCNT_dec(result);
2241 SvCUR_set(result, out - result_c);
2245 /* pnum must be '\0' terminated */
2247 S_div128(pTHX_ SV *pnum, bool *done)
2250 char *s = SvPV(pnum, len);
2259 i = m * 10 + (*t - '0');
2261 r = (i >> 7); /* r < 10 */
2268 SvCUR_set(pnum, (STRLEN) (t - s));
2275 =for apidoc pack_cat
2277 The engine implementing pack() Perl function. Note: parameters next_in_list and
2278 flags are not used. This call should not be used; use packlist instead.
2284 Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2286 tempsym_t sym = { 0 };
2288 sym.patend = patend;
2289 sym.flags = FLAG_PACK;
2291 (void)pack_rec( cat, &sym, beglist, endlist );
2296 =for apidoc packlist
2298 The engine implementing pack() Perl function.
2304 Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2307 tempsym_t sym = { 0 };
2310 sym.patend = patend;
2311 sym.flags = FLAG_PACK;
2313 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2314 Also make sure any UTF8 flag is loaded */
2315 SvPV_force(cat, no_len);
2316 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2318 (void)pack_rec( cat, &sym, beglist, endlist );
2321 /* like sv_utf8_upgrade, but also repoint the group start markers */
2323 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2326 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2328 if (SvUTF8(sv)) return;
2330 from_start = SvPVX(sv);
2331 from_end = from_start + SvCUR(sv);
2332 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2333 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2334 if (from_ptr == from_end) {
2335 /* Simple case: no character needs to be changed */
2340 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2341 New('U', to_start, len, char);
2342 Copy(from_start, to_start, from_ptr-from_start, char);
2343 to_ptr = to_start + (from_ptr-from_start);
2345 New('U', marks, sym_ptr->level+2, char *);
2346 for (group=sym_ptr; group; group = group->previous)
2347 marks[group->level] = from_start + group->strbeg;
2348 marks[sym_ptr->level+1] = from_end+1;
2349 for (m = marks; *m < from_ptr; m++)
2350 *m = to_start + (*m-from_start);
2352 for (;from_ptr < from_end; from_ptr++) {
2353 while (*m == from_ptr) *m++ = to_ptr;
2354 to_ptr = uvchr_to_utf8(to_ptr, *(U8 *) from_ptr);
2358 while (*m == from_ptr) *m++ = to_ptr;
2359 if (m != marks + sym_ptr->level+1) {
2362 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2364 for (group=sym_ptr; group; group = group->previous)
2365 group->strbeg = marks[group->level] - to_start;
2370 SvLEN(sv) += SvIVX(sv);
2371 from_start -= SvIVX(sv);
2374 SvFLAGS(sv) &= ~SVf_OOK;
2377 Safefree(from_start);
2378 SvPVX(sv) = to_start;
2379 SvCUR(sv) = to_ptr - to_start;
2384 /* Exponential string grower. Makes string extension effectively O(n)
2385 needed says how many extra bytes we need (not counting the final '\0')
2386 Only grows the string if there is an actual lack of space
2389 sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2390 STRLEN cur = SvCUR(sv);
2391 STRLEN len = SvLEN(sv);
2393 if (len - cur > needed) return SvPVX(sv);
2394 extend = needed > len ? needed : len;
2395 return SvGROW(sv, len+extend+1);
2400 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2402 tempsym_t lookahead;
2403 I32 items = endlist - beglist;
2404 bool found = next_symbol(symptr);
2405 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2407 if (symptr->level == 0 && found && symptr->code == 'U') {
2408 marked_upgrade(aTHX_ cat, symptr);
2409 symptr->flags |= FLAG_DO_UTF8;
2412 symptr->strbeg = SvCUR(cat);
2418 SV *lengthcode = Nullsv;
2419 I32 datumtype = symptr->code;
2420 howlen_t howlen = symptr->howlen;
2421 char *start = SvPVX(cat);
2422 char *cur = start + SvCUR(cat);
2424 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2428 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2432 /* e_no_len and e_number */
2433 len = symptr->length;
2438 struct packsize_t *pack_props =
2439 &packsize[(symptr->code & TYPE_IS_SHRIEKING) ?
2440 PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL];
2441 const int rawtype = TYPE_NO_MODIFIERS(datumtype);
2442 int offset = rawtype - pack_props->first;
2444 if (offset >= 0 && offset < pack_props->size) {
2445 /* Data about this template letter */
2446 unsigned char data = pack_props->array[offset];
2448 if (data && !(data & PACK_SIZE_UNPREDICTABLE)) {
2449 /* We can process this letter. */
2450 STRLEN size = data & PACK_SIZE_MASK;
2451 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2457 /* Look ahead for next symbol. Do we have code/code? */
2458 lookahead = *symptr;
2459 found = next_symbol(&lookahead);
2460 if ( symptr->flags & FLAG_SLASH ) {
2461 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2462 if ( 0 == strchr( "aAZ", lookahead.code ) ||
2463 e_star != lookahead.howlen )
2464 Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
2466 sv_2mortal(newSViv((items > 0 ? DO_UTF8(*beglist) ? sv_len_utf8(*beglist) : sv_len(*beglist) : 0) + (lookahead.code == 'Z' ? 1 : 0)));
2469 /* Code inside the switch must take care to properly update
2470 cat (CUR length and '\0' termination) if it updated *cur and
2471 doesn't simply leave using break */
2472 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2474 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2475 (int) TYPE_NO_MODIFIERS(datumtype));
2477 Perl_croak(aTHX_ "'%%' may not be used in pack");
2480 char *s = start + symptr->strbeg;
2481 while (len > 0 && s < cur) {
2486 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2489 GROWING(0, cat, start, cur, len);
2490 Zero(cur, len, char);
2492 } else if (s < cur) cur = s;
2493 else goto no_change;
2495 len -= cur - (start+symptr->strbeg);
2496 if (len > 0) goto grow;
2498 if (len > 0) goto shrink;
2499 else goto no_change;
2503 tempsym_t savsym = *symptr;
2504 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2505 symptr->flags |= group_modifiers;
2506 symptr->patend = savsym.grpend;
2508 symptr->previous = &lookahead;
2511 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2512 else symptr->flags &= ~FLAG_PARSE_UTF8;
2513 was_utf8 = SvUTF8(cat);
2514 symptr->patptr = savsym.grpbeg;
2515 beglist = pack_rec(cat, symptr, beglist, endlist);
2516 if (SvUTF8(cat) != was_utf8)
2517 /* This had better be an upgrade while in utf8==0 mode */
2520 if (savsym.howlen == e_star && beglist == endlist)
2521 break; /* No way to continue */
2523 lookahead.flags = symptr->flags & ~group_modifiers;
2526 case 'X' | TYPE_IS_SHRIEKING:
2527 if (!len) /* Avoid division by 0 */
2534 hop += UTF8SKIP(hop);
2541 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2545 len = (cur-start) % len;
2549 if (len < 1) goto no_change;
2552 Perl_croak(aTHX_ "'X' outside of string in pack");
2553 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2555 Perl_croak(aTHX_ "'X' outside of string in pack");
2561 if (cur - start < len)
2562 Perl_croak(aTHX_ "'X' outside of string in pack");
2565 if (cur < start+symptr->strbeg) {
2566 /* Make sure group starts don't point into the void */
2568 STRLEN length = cur-start;
2569 for (group = symptr;
2570 group && length < group->strbeg;
2571 group = group->previous) group->strbeg = length;
2572 lookahead.strbeg = length;
2575 case 'x' | TYPE_IS_SHRIEKING: {
2577 if (!len) /* Avoid division by 0 */
2579 if (utf8) ai32 = utf8_length(start, cur) % len;
2580 else ai32 = (cur - start) % len;
2581 if (ai32 == 0) goto no_change;
2593 aptr = SvPV(fromstr, fromlen);
2594 if (DO_UTF8(fromstr)) {
2597 if (!utf8 && !SvUTF8(cat)) {
2598 marked_upgrade(aTHX_ cat, symptr);
2599 lookahead.flags |= FLAG_DO_UTF8;
2600 lookahead.strbeg = symptr->strbeg;
2603 cur = start + SvCUR(cat);
2605 if (howlen == e_star) {
2606 if (utf8) goto string_copy;
2610 end = aptr + fromlen;
2611 fromlen = datumtype == 'Z' ? len-1 : len;
2612 while ((I32) fromlen > 0 && s < end) {
2617 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2620 if (datumtype == 'Z') len++;
2626 fromlen = len - fromlen;
2627 if (datumtype == 'Z') fromlen--;
2628 if (howlen == e_star) {
2630 if (datumtype == 'Z') len++;
2632 GROWING(0, cat, start, cur, len);
2633 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2634 datumtype | TYPE_IS_PACK))
2635 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2639 if (howlen == e_star) {
2641 if (datumtype == 'Z') len++;
2643 if (len <= (I32) fromlen) {
2645 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2647 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2649 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2650 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2652 while (fromlen > 0) {
2653 cur = uvchr_to_utf8(cur, * (U8 *) aptr);
2659 if (howlen == e_star) {
2661 if (datumtype == 'Z') len++;
2663 if (len <= (I32) fromlen) {
2665 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2667 GROWING(0, cat, start, cur, len);
2668 Copy(aptr, cur, fromlen, char);
2672 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2685 str = SvPV(fromstr, fromlen);
2686 end = str + fromlen;
2687 if (DO_UTF8(fromstr)) {
2689 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2691 utf8_source = FALSE;
2692 utf8_flags = 0; /* Unused, but keep compilers happy */
2694 if (howlen == e_star) len = fromlen;
2695 field_len = (len+7)/8;
2696 GROWING(utf8, cat, start, cur, field_len);
2697 if (len > (I32)fromlen) len = fromlen;
2700 if (datumtype == 'B')
2704 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2706 } else bits |= *str++ & 1;
2707 if (l & 7) bits <<= 1;
2709 PUSH_BYTE(utf8, cur, bits);
2714 /* datumtype == 'b' */
2718 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2719 if (val & 1) bits |= 0x80;
2720 } else if (*str++ & 1)
2722 if (l & 7) bits >>= 1;
2724 PUSH_BYTE(utf8, cur, bits);
2730 if (datumtype == 'B')
2731 bits <<= 7 - (l & 7);
2733 bits >>= 7 - (l & 7);
2734 PUSH_BYTE(utf8, cur, bits);
2737 /* Determine how many chars are left in the requested field */
2739 if (howlen == e_star) field_len = 0;
2740 else field_len -= l;
2741 Zero(cur, field_len, char);
2754 str = SvPV(fromstr, fromlen);
2755 end = str + fromlen;
2756 if (DO_UTF8(fromstr)) {
2758 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2760 utf8_source = FALSE;
2761 utf8_flags = 0; /* Unused, but keep compilers happy */
2763 if (howlen == e_star) len = fromlen;
2764 field_len = (len+1)/2;
2765 GROWING(utf8, cat, start, cur, field_len);
2766 if (!utf8 && len > (I32)fromlen) len = fromlen;
2769 if (datumtype == 'H')
2773 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2774 if (val < 256 && isALPHA(val))
2775 bits |= (val + 9) & 0xf;
2778 } else if (isALPHA(*str))
2779 bits |= (*str++ + 9) & 0xf;
2781 bits |= *str++ & 0xf;
2782 if (l & 1) bits <<= 4;
2784 PUSH_BYTE(utf8, cur, bits);
2792 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2793 if (val < 256 && isALPHA(val))
2794 bits |= ((val + 9) & 0xf) << 4;
2796 bits |= (val & 0xf) << 4;
2797 } else if (isALPHA(*str))
2798 bits |= ((*str++ + 9) & 0xf) << 4;
2800 bits |= (*str++ & 0xf) << 4;
2801 if (l & 1) bits >>= 4;
2803 PUSH_BYTE(utf8, cur, bits);
2809 PUSH_BYTE(utf8, cur, bits);
2812 /* Determine how many chars are left in the requested field */
2814 if (howlen == e_star) field_len = 0;
2815 else field_len -= l;
2816 Zero(cur, field_len, char);
2824 aiv = SvIV(fromstr);
2825 if ((-128 > aiv || aiv > 127) &&
2827 Perl_warner(aTHX_ packWARN(WARN_PACK),
2828 "Character in 'c' format wrapped in pack");
2829 PUSH_BYTE(utf8, cur, aiv & 0xff);
2834 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2837 GROWING(0, cat, start, cur, len);
2841 aiv = SvIV(fromstr);
2842 if ((0 > aiv || aiv > 0xff) &&
2844 Perl_warner(aTHX_ packWARN(WARN_PACK),
2845 "Character in 'C' format wrapped in pack");
2846 *cur++ = aiv & 0xff;
2851 U8 in_bytes = IN_BYTES;
2853 end = start+SvLEN(cat)-1;
2854 if (utf8) end -= UTF8_MAXLEN-1;
2858 auv = SvUV(fromstr);
2859 if (in_bytes) auv = auv % 0x100;
2864 SvCUR(cat) = cur - start;
2866 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2867 end = start+SvLEN(cat)-UTF8_MAXLEN;
2869 cur = uvuni_to_utf8_flags(cur, NATIVE_TO_UNI(auv),
2871 0 : UNICODE_ALLOW_ANY);
2876 SvCUR(cat) = cur - start;
2877 marked_upgrade(aTHX_ cat, symptr);
2878 lookahead.flags |= FLAG_DO_UTF8;
2879 lookahead.strbeg = symptr->strbeg;
2882 cur = start + SvCUR(cat);
2883 end = start+SvLEN(cat)-UTF8_MAXLEN;
2886 if (ckWARN(WARN_PACK))
2887 Perl_warner(aTHX_ packWARN(WARN_PACK),
2888 "Character in 'W' format wrapped in pack");
2893 SvCUR(cat) = cur - start;
2894 GROWING(0, cat, start, cur, len+1);
2895 end = start+SvLEN(cat)-1;
2897 *(U8 *) cur++ = auv;
2906 if (!(symptr->flags & FLAG_DO_UTF8)) {
2907 marked_upgrade(aTHX_ cat, symptr);
2908 lookahead.flags |= FLAG_DO_UTF8;
2909 lookahead.strbeg = symptr->strbeg;
2915 end = start+SvLEN(cat);
2916 if (!utf8) end -= UTF8_MAXLEN;
2920 auv = SvUV(fromstr);
2922 char buffer[UTF8_MAXLEN], *endb;
2923 endb = uvuni_to_utf8_flags(buffer, auv,
2925 0 : UNICODE_ALLOW_ANY);
2926 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2928 SvCUR(cat) = cur - start;
2929 GROWING(0, cat, start, cur,
2930 len+(endb-buffer)*UTF8_EXPAND);
2931 end = start+SvLEN(cat);
2933 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
2937 SvCUR(cat) = cur - start;
2938 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2939 end = start+SvLEN(cat)-UTF8_MAXLEN;
2941 cur = uvuni_to_utf8_flags(cur, auv,
2943 0 : UNICODE_ALLOW_ANY);
2948 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2954 anv = SvNV(fromstr);
2956 /* VOS does not automatically map a floating-point overflow
2957 during conversion from double to float into infinity, so we
2958 do it by hand. This code should either be generalized for
2959 any OS that needs it, or removed if and when VOS implements
2960 posix-976 (suggestion to support mapping to infinity).
2961 Paul.Green@stratus.com 02-04-02. */
2963 afloat = _float_constants[0]; /* single prec. inf. */
2964 else if (anv < -FLT_MAX)
2965 afloat = _float_constants[0]; /* single prec. inf. */
2966 else afloat = (float) anv;
2968 # if defined(VMS) && !defined(__IEEE_FP)
2969 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2970 * on Alpha; fake it if we don't have them.
2974 else if (anv < -FLT_MAX)
2976 else afloat = (float)anv;
2978 afloat = (float)anv;
2980 #endif /* __VOS__ */
2981 DO_BO_PACK_N(afloat, float);
2982 PUSH_VAR(utf8, cur, afloat);
2990 anv = SvNV(fromstr);
2992 /* VOS does not automatically map a floating-point overflow
2993 during conversion from long double to double into infinity,
2994 so we do it by hand. This code should either be generalized
2995 for any OS that needs it, or removed if and when VOS
2996 implements posix-976 (suggestion to support mapping to
2997 infinity). Paul.Green@stratus.com 02-04-02. */
2999 adouble = _double_constants[0]; /* double prec. inf. */
3000 else if (anv < -DBL_MAX)
3001 adouble = _double_constants[0]; /* double prec. inf. */
3002 else adouble = (double) anv;
3004 # if defined(VMS) && !defined(__IEEE_FP)
3005 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3006 * on Alpha; fake it if we don't have them.
3010 else if (anv < -DBL_MAX)
3012 else adouble = (double)anv;
3014 adouble = (double)anv;
3016 #endif /* __VOS__ */
3017 DO_BO_PACK_N(adouble, double);
3018 PUSH_VAR(utf8, cur, adouble);
3023 Zero(&anv, 1, NV); /* can be long double with unused bits */
3026 anv = SvNV(fromstr);
3027 DO_BO_PACK_N(anv, NV);
3028 PUSH_VAR(utf8, cur, anv);
3032 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3034 long double aldouble;
3035 /* long doubles can have unused bits, which may be nonzero */
3036 Zero(&aldouble, 1, long double);
3039 aldouble = (long double)SvNV(fromstr);
3040 DO_BO_PACK_N(aldouble, long double);
3041 PUSH_VAR(utf8, cur, aldouble);
3046 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3047 case 'n' | TYPE_IS_SHRIEKING:
3053 ai16 = (I16)SvIV(fromstr);
3055 ai16 = PerlSock_htons(ai16);
3057 PUSH16(utf8, cur, &ai16);
3060 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3061 case 'v' | TYPE_IS_SHRIEKING:
3067 ai16 = (I16)SvIV(fromstr);
3071 PUSH16(utf8, cur, &ai16);
3074 case 'S' | TYPE_IS_SHRIEKING:
3075 #if SHORTSIZE != SIZE16
3077 unsigned short aushort;
3079 aushort = SvUV(fromstr);
3080 DO_BO_PACK(aushort, s);
3081 PUSH_VAR(utf8, cur, aushort);
3091 au16 = (U16)SvUV(fromstr);
3092 DO_BO_PACK(au16, 16);
3093 PUSH16(utf8, cur, &au16);
3096 case 's' | TYPE_IS_SHRIEKING:
3097 #if SHORTSIZE != SIZE16
3101 ashort = SvIV(fromstr);
3102 DO_BO_PACK(ashort, s);
3103 PUSH_VAR(utf8, cur, ashort);
3113 ai16 = (I16)SvIV(fromstr);
3114 DO_BO_PACK(ai16, 16);
3115 PUSH16(utf8, cur, &ai16);
3119 case 'I' | TYPE_IS_SHRIEKING:
3123 auint = SvUV(fromstr);
3124 DO_BO_PACK(auint, i);
3125 PUSH_VAR(utf8, cur, auint);
3132 aiv = SvIV(fromstr);
3133 #if IVSIZE == INTSIZE
3135 #elif IVSIZE == LONGSIZE
3137 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3138 DO_BO_PACK(aiv, 64);
3140 Perl_croak(aTHX_ "'j' not supported on this platform");
3142 PUSH_VAR(utf8, cur, aiv);
3149 auv = SvUV(fromstr);
3150 #if UVSIZE == INTSIZE
3152 #elif UVSIZE == LONGSIZE
3154 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3155 DO_BO_PACK(auv, 64);
3157 Perl_croak(aTHX_ "'J' not supported on this platform");
3159 PUSH_VAR(utf8, cur, auv);
3166 anv = SvNV(fromstr);
3170 SvCUR(cat) = cur - start;
3171 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3174 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3175 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3176 any negative IVs will have already been got by the croak()
3177 above. IOK is untrue for fractions, so we test them
3178 against UV_MAX_P1. */
3179 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3180 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3181 char *in = buf + sizeof(buf);
3182 UV auv = SvUV(fromstr);
3185 *--in = (char)((auv & 0x7f) | 0x80);
3188 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3189 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3190 in, (buf + sizeof(buf)) - in);
3191 } else if (SvPOKp(fromstr))
3193 else if (SvNOKp(fromstr)) {
3194 /* 10**NV_MAX_10_EXP is the largest power of 10
3195 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3196 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3197 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3198 And with that many bytes only Inf can overflow.
3199 Some C compilers are strict about integral constant
3200 expressions so we conservatively divide by a slightly
3201 smaller integer instead of multiplying by the exact
3202 floating-point value.
3204 #ifdef NV_MAX_10_EXP
3205 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3206 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3208 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3209 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3211 char *in = buf + sizeof(buf);
3213 anv = Perl_floor(anv);
3215 NV next = Perl_floor(anv / 128);
3216 if (in <= buf) /* this cannot happen ;-) */
3217 Perl_croak(aTHX_ "Cannot compress integer in pack");
3218 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3221 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3222 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3223 in, (buf + sizeof(buf)) - in);
3225 char *from, *result, *in;
3231 /* Copy string and check for compliance */
3232 from = SvPV(fromstr, len);
3233 if ((norm = is_an_int(from, len)) == NULL)
3234 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3236 New('w', result, len, char);
3239 while (!done) *--in = div128(norm, &done) | 0x80;
3240 result[len - 1] &= 0x7F; /* clear continue bit */
3241 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3242 in, (result + len) - in);
3244 SvREFCNT_dec(norm); /* free norm */
3249 case 'i' | TYPE_IS_SHRIEKING:
3253 aint = SvIV(fromstr);
3254 DO_BO_PACK(aint, i);
3255 PUSH_VAR(utf8, cur, aint);
3258 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3259 case 'N' | TYPE_IS_SHRIEKING:
3265 au32 = SvUV(fromstr);
3267 au32 = PerlSock_htonl(au32);
3269 PUSH32(utf8, cur, &au32);
3272 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3273 case 'V' | TYPE_IS_SHRIEKING:
3279 au32 = SvUV(fromstr);
3283 PUSH32(utf8, cur, &au32);
3286 case 'L' | TYPE_IS_SHRIEKING:
3287 #if LONGSIZE != SIZE32
3289 unsigned long aulong;
3291 aulong = SvUV(fromstr);
3292 DO_BO_PACK(aulong, l);
3293 PUSH_VAR(utf8, cur, aulong);
3303 au32 = SvUV(fromstr);
3304 DO_BO_PACK(au32, 32);
3305 PUSH32(utf8, cur, &au32);
3308 case 'l' | TYPE_IS_SHRIEKING:
3309 #if LONGSIZE != SIZE32
3313 along = SvIV(fromstr);
3314 DO_BO_PACK(along, l);
3315 PUSH_VAR(utf8, cur, along);
3325 ai32 = SvIV(fromstr);
3326 DO_BO_PACK(ai32, 32);
3327 PUSH32(utf8, cur, &ai32);
3335 auquad = (Uquad_t) SvUV(fromstr);
3336 DO_BO_PACK(auquad, 64);
3337 PUSH_VAR(utf8, cur, auquad);
3344 aquad = (Quad_t)SvIV(fromstr);
3345 DO_BO_PACK(aquad, 64);
3346 PUSH_VAR(utf8, cur, aquad);
3349 #endif /* HAS_QUAD */
3351 len = 1; /* assume SV is correct length */
3352 GROWING(utf8, cat, start, cur, sizeof(char *));
3359 SvGETMAGIC(fromstr);
3360 if (!SvOK(fromstr)) aptr = NULL;
3363 /* XXX better yet, could spirit away the string to
3364 * a safe spot and hang on to it until the result
3365 * of pack() (and all copies of the result) are
3368 if (ckWARN(WARN_PACK) &&
3369 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3370 !SvREADONLY(fromstr)))) {
3371 Perl_warner(aTHX_ packWARN(WARN_PACK),
3372 "Attempt to pack pointer to temporary value");
3374 if (SvPOK(fromstr) || SvNIOK(fromstr))
3375 aptr = SvPV_flags(fromstr, n_a, 0);
3377 aptr = SvPV_force_flags(fromstr, n_a, 0);
3380 PUSH_VAR(utf8, cur, aptr);
3388 if (len <= 2) len = 45;
3389 else len = len / 3 * 3;
3391 Perl_warner(aTHX_ packWARN(WARN_PACK),
3392 "Field too wide in 'u' format in pack");
3395 aptr = SvPV(fromstr, fromlen);
3396 from_utf8 = DO_UTF8(fromstr);
3398 aend = aptr + fromlen;
3399 fromlen = sv_len_utf8(fromstr);
3400 } else aend = NULL; /* Unused, but keep compilers happy */
3401 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3402 while (fromlen > 0) {
3405 U8 hunk[1+63/3*4+1];
3407 if ((I32)fromlen > len)
3413 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3414 'u' | TYPE_IS_PACK)) {
3416 SvCUR(cat) = cur - start;
3417 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3419 end = doencodes(hunk, buffer, todo);
3421 end = doencodes(hunk, aptr, todo);
3424 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3431 SvCUR(cat) = cur - start;
3433 *symptr = lookahead;
3442 dSP; dMARK; dORIGMARK; dTARGET;
3443 register SV *cat = TARG;
3445 register char *pat = SvPVx(*++MARK, fromlen);
3446 register char *patend = pat + fromlen;
3449 sv_setpvn(cat, "", 0);
3452 packlist(cat, pat, patend, MARK, SP + 1);
3462 * c-indentation-style: bsd
3464 * indent-tabs-mode: t
3467 * vim: shiftwidth=4: