3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
35 # define PERL_PACK_CAN_BYTEORDER
36 # define PERL_PACK_CAN_SHRIEKSIGN
44 * Offset for integer pack/unpack.
46 * On architectures where I16 and I32 aren't really 16 and 32 bits,
47 * which for now are all Crays, pack and unpack have to play games.
51 * These values are required for portability of pack() output.
52 * If they're not right on your machine, then pack() and unpack()
53 * wouldn't work right anyway; you'll need to apply the Cray hack.
54 * (I'd like to check them with #if, but you can't use sizeof() in
55 * the preprocessor.) --???
58 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
59 defines are now in config.h. --Andy Dougherty April 1998
64 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
67 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
68 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
69 # define OFF16(p) ((char*)(p))
70 # define OFF32(p) ((char*)(p))
72 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
73 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
74 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
76 ++++ bad cray byte order
80 # define OFF16(p) ((char *) (p))
81 # define OFF32(p) ((char *) (p))
84 /* Only to be used inside a loop (see the break) */
85 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
87 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
89 Copy(s, OFF16(p), SIZE16, char); \
94 /* Only to be used inside a loop (see the break) */
95 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
97 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
99 Copy(s, OFF32(p), SIZE32, char); \
104 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
105 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
107 /* Only to be used inside a loop (see the break) */
108 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
111 if (!uni_to_bytes(aTHX_ &s, strend, \
112 (char *) &var, sizeof(var), datumtype)) break;\
114 Copy(s, (char *) &var, sizeof(var), char); \
119 #define PUSH_VAR(utf8, aptr, var) \
120 PUSH_BYTES(utf8, aptr, (char *) &(var), sizeof(var))
122 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
123 #define MAX_SUB_TEMPLATE_LEVEL 100
125 /* flags (note that type modifiers can also be used as flags!) */
126 #define FLAG_WAS_UTF8 0x40
127 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
128 #define FLAG_UNPACK_ONLY_ONE 0x10
129 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
130 #define FLAG_SLASH 0x04
131 #define FLAG_COMMA 0x02
132 #define FLAG_PACK 0x01
135 S_mul128(pTHX_ SV *sv, U8 m)
138 char *s = SvPV(sv, len);
142 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
143 SV *tmpNew = newSVpvn("0000000000", 10);
145 sv_catsv(tmpNew, sv);
146 SvREFCNT_dec(sv); /* free old sv */
151 while (!*t) /* trailing '\0'? */
154 i = ((*t - '0') << 7) + m;
155 *(t--) = '0' + (char)(i % 10);
161 /* Explosives and implosives. */
163 #if 'I' == 73 && 'J' == 74
164 /* On an ASCII/ISO kind of system */
165 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
168 Some other sort of character set - use memchr() so we don't match
171 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
175 #define TYPE_IS_SHRIEKING 0x100
176 #define TYPE_IS_BIG_ENDIAN 0x200
177 #define TYPE_IS_LITTLE_ENDIAN 0x400
178 #define TYPE_IS_PACK 0x800
179 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
180 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
181 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
183 #ifdef PERL_PACK_CAN_SHRIEKSIGN
184 #define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
186 #define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
189 #ifndef PERL_PACK_CAN_BYTEORDER
190 /* Put "can't" first because it is shorter */
191 # define TYPE_ENDIANNESS(t) 0
192 # define TYPE_NO_ENDIANNESS(t) (t)
194 # define ENDIANNESS_ALLOWED_TYPES ""
196 # define DO_BO_UNPACK(var, type)
197 # define DO_BO_PACK(var, type)
198 # define DO_BO_UNPACK_PTR(var, type, pre_cast)
199 # define DO_BO_PACK_PTR(var, type, pre_cast)
200 # define DO_BO_UNPACK_N(var, type)
201 # define DO_BO_PACK_N(var, type)
202 # define DO_BO_UNPACK_P(var)
203 # define DO_BO_PACK_P(var)
205 #else /* PERL_PACK_CAN_BYTEORDER */
207 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
208 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
210 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
212 # define DO_BO_UNPACK(var, type) \
214 switch (TYPE_ENDIANNESS(datumtype)) { \
215 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
216 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
221 # define DO_BO_PACK(var, type) \
223 switch (TYPE_ENDIANNESS(datumtype)) { \
224 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
225 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
230 # define DO_BO_UNPACK_PTR(var, type, pre_cast) \
232 switch (TYPE_ENDIANNESS(datumtype)) { \
233 case TYPE_IS_BIG_ENDIAN: \
234 var = (void *) my_betoh ## type ((pre_cast) var); \
236 case TYPE_IS_LITTLE_ENDIAN: \
237 var = (void *) my_letoh ## type ((pre_cast) var); \
244 # define DO_BO_PACK_PTR(var, type, pre_cast) \
246 switch (TYPE_ENDIANNESS(datumtype)) { \
247 case TYPE_IS_BIG_ENDIAN: \
248 var = (void *) my_htobe ## type ((pre_cast) var); \
250 case TYPE_IS_LITTLE_ENDIAN: \
251 var = (void *) my_htole ## type ((pre_cast) var); \
258 # define BO_CANT_DOIT(action, type) \
260 switch (TYPE_ENDIANNESS(datumtype)) { \
261 case TYPE_IS_BIG_ENDIAN: \
262 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
263 "platform", #action, #type); \
265 case TYPE_IS_LITTLE_ENDIAN: \
266 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
267 "platform", #action, #type); \
274 # if PTRSIZE == INTSIZE
275 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
276 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
277 # elif PTRSIZE == LONGSIZE
278 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
279 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
281 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
282 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
285 # if defined(my_htolen) && defined(my_letohn) && \
286 defined(my_htoben) && defined(my_betohn)
287 # define DO_BO_UNPACK_N(var, type) \
289 switch (TYPE_ENDIANNESS(datumtype)) { \
290 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
291 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
296 # define DO_BO_PACK_N(var, type) \
298 switch (TYPE_ENDIANNESS(datumtype)) { \
299 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
300 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
305 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
306 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
309 #endif /* PERL_PACK_CAN_BYTEORDER */
311 #define PACK_SIZE_CANNOT_CSUM 0x80
312 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
313 #define PACK_SIZE_MASK 0x3F
317 const unsigned char *array;
322 #define PACK_SIZE_NORMAL 0
323 #define PACK_SIZE_SHRIEKING 1
325 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
326 in). You're unlikely ever to need to regenerate them. */
329 unsigned char size_normal[53] = {
330 /* C */ sizeof(unsigned char),
331 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
332 /* D */ LONG_DOUBLESIZE,
339 /* I */ sizeof(unsigned int),
346 #if defined(HAS_QUAD)
347 /* Q */ sizeof(Uquad_t),
354 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
356 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 /* c */ sizeof(char),
359 /* d */ sizeof(double),
361 /* f */ sizeof(float),
370 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
371 #if defined(HAS_QUAD)
372 /* q */ sizeof(Quad_t),
380 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
382 unsigned char size_shrieking[46] = {
383 /* I */ sizeof(unsigned int),
385 /* L */ sizeof(unsigned long),
387 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
393 /* S */ sizeof(unsigned short),
395 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
400 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 /* l */ sizeof(long),
405 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
411 /* s */ sizeof(short),
413 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
419 struct packsize_t packsize[2] = {
420 {size_normal, 67, 53},
421 {size_shrieking, 73, 46}
424 /* EBCDIC (or bust) */
425 unsigned char size_normal[100] = {
426 /* c */ sizeof(char),
427 /* d */ sizeof(double),
429 /* f */ sizeof(float),
439 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
440 #if defined(HAS_QUAD)
441 /* q */ sizeof(Quad_t),
445 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 /* C */ sizeof(unsigned char),
453 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
454 /* D */ LONG_DOUBLESIZE,
461 /* I */ sizeof(unsigned int),
469 #if defined(HAS_QUAD)
470 /* Q */ sizeof(Uquad_t),
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
477 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
479 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
481 unsigned char size_shrieking[93] = {
483 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 /* l */ sizeof(long),
486 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 /* s */ sizeof(short),
494 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 /* I */ sizeof(unsigned int),
502 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 /* L */ sizeof(unsigned long),
505 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 /* S */ sizeof(unsigned short),
513 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
519 struct packsize_t packsize[2] = {
520 {size_normal, 131, 100},
521 {size_shrieking, 137, 93}
526 uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
530 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
531 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
532 /* We try to process malformed UTF-8 as much as possible (preferrably with
533 warnings), but these two mean we make no progress in the string and
534 might enter an infinite loop */
535 if (retlen == (STRLEN) -1 || retlen == 0)
536 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
537 (int) TYPE_NO_MODIFIERS(datumtype));
539 if (ckWARN(WARN_UNPACK))
540 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
541 "Character in '%c' format wrapped in unpack",
542 (int) TYPE_NO_MODIFIERS(datumtype));
549 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
550 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
554 uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
560 U32 flags = ckWARN(WARN_UTF8) ?
561 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
562 for (;buf_len > 0; buf_len--) {
563 if (from >= end) return FALSE;
564 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
565 if (retlen == (STRLEN) -1 || retlen == 0) {
566 from += UTF8SKIP(from);
568 } else from += retlen;
575 /* We have enough characters for the buffer. Did we have problems ? */
578 /* Rewalk the string fragment while warning */
580 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
581 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
582 if (ptr >= end) break;
583 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
585 if (from > end) from = end;
587 if ((bad & 2) && ckWARN(WARN_UNPACK))
588 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
589 WARN_PACK : WARN_UNPACK),
590 "Character(s) in '%c' format wrapped in %s",
591 (int) TYPE_NO_MODIFIERS(datumtype),
592 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
599 next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
603 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
604 if (val >= 0x100 || !ISUUCHAR(val) ||
605 retlen == (STRLEN) -1 || retlen == 0) {
609 *out = PL_uudmap[val] & 077;
615 bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
616 U8 buffer[UTF8_MAXLEN];
617 U8 *end = start + len;
619 while (start < end) {
621 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
631 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
639 #define PUSH_BYTES(utf8, cur, buf, len) \
641 if (utf8) bytes_to_uni(aTHX_ buf, len, &(cur)); \
643 Copy(buf, cur, len, char); \
648 #define GROWING(utf8, cat, start, cur, in_len) \
650 STRLEN glen = (in_len); \
651 if (utf8) glen *= 2; \
652 if ((cur) + glen >= (start) + SvLEN(cat)) { \
653 (start) = sv_exp_grow(aTHX_ cat, glen); \
654 (cur) = (start) + SvCUR(cat); \
658 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
660 STRLEN glen = (in_len); \
663 if ((cur) + gl >= (start) + SvLEN(cat)) { \
665 SvCUR(cat) = (cur) - (start); \
666 (start) = sv_exp_grow(aTHX_ cat, gl); \
667 (cur) = (start) + SvCUR(cat); \
669 PUSH_BYTES(utf8, cur, buf, glen); \
672 #define PUSH_BYTE(utf8, s, byte) \
676 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
677 } else *(U8 *)(s)++ = (byte); \
680 /* Only to be used inside a loop (see the break) */
681 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
684 if (str >= end) break; \
685 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
686 if (retlen == (STRLEN) -1 || retlen == 0) { \
688 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
693 /* Returns the sizeof() struct described by pat */
695 S_measure_struct(pTHX_ tempsym_t* symptr)
699 while (next_symbol(symptr)) {
702 int which = (symptr->code & TYPE_IS_SHRIEKING) ?
703 PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
704 int offset = TYPE_NO_MODIFIERS(symptr->code) - packsize[which].first;
706 switch (symptr->howlen) {
708 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
709 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
712 /* e_no_len and e_number */
713 len = symptr->length;
717 if ((offset >= 0) && (offset < packsize[which].size))
718 size = packsize[which].array[offset] & PACK_SIZE_MASK;
723 /* endianness doesn't influence the size of a type */
724 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
726 Perl_croak(aTHX_ "Invalid type '%c' in %s",
727 (int)TYPE_NO_MODIFIERS(symptr->code),
728 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
731 case 'U': /* XXXX Is it correct? */
734 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
736 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
742 tempsym_t savsym = *symptr;
743 symptr->patptr = savsym.grpbeg;
744 symptr->patend = savsym.grpend;
745 /* XXXX Theoretically, we need to measure many times at
746 different positions, since the subexpression may contain
747 alignment commands, but be not of aligned length.
748 Need to detect this and croak(). */
749 size = measure_struct(symptr);
753 case 'X' | TYPE_IS_SHRIEKING:
754 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
756 if (!len) /* Avoid division by 0 */
758 len = total % len; /* Assumed: the start is aligned. */
763 Perl_croak(aTHX_ "'X' outside of string in %s",
764 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
766 case 'x' | TYPE_IS_SHRIEKING:
767 if (!len) /* Avoid division by 0 */
769 star = total % len; /* Assumed: the start is aligned. */
770 if (star) /* Other portable ways? */
794 size = sizeof(char*);
804 /* locate matching closing parenthesis or bracket
805 * returns char pointer to char after match, or NULL
808 S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
810 while (patptr < patend) {
818 while (patptr < patend && *patptr != '\n')
822 patptr = group_end(patptr, patend, ')') + 1;
824 patptr = group_end(patptr, patend, ']') + 1;
826 Perl_croak(aTHX_ "No group ending character '%c' found in template",
832 /* Convert unsigned decimal number to binary.
833 * Expects a pointer to the first digit and address of length variable
834 * Advances char pointer to 1st non-digit char and returns number
837 S_get_num(pTHX_ register char *patptr, I32 *lenptr )
839 I32 len = *patptr++ - '0';
840 while (isDIGIT(*patptr)) {
841 if (len >= 0x7FFFFFFF/10)
842 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
843 len = (len * 10) + (*patptr++ - '0');
849 /* The marvellous template parsing routine: Using state stored in *symptr,
850 * locates next template code and count
853 S_next_symbol(pTHX_ tempsym_t* symptr )
855 char* patptr = symptr->patptr;
856 char* patend = symptr->patend;
857 const char *allowed = "";
859 symptr->flags &= ~FLAG_SLASH;
861 while (patptr < patend) {
862 if (isSPACE(*patptr))
864 else if (*patptr == '#') {
866 while (patptr < patend && *patptr != '\n')
871 /* We should have found a template code */
872 I32 code = *patptr++ & 0xFF;
873 U32 inherited_modifiers = 0;
875 if (code == ','){ /* grandfather in commas but with a warning */
876 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
877 symptr->flags |= FLAG_COMMA;
878 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
879 "Invalid type ',' in %s",
880 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
885 /* for '(', skip to ')' */
887 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
888 Perl_croak(aTHX_ "()-group starts with a count in %s",
889 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
890 symptr->grpbeg = patptr;
891 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
892 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
893 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
894 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
897 /* look for group modifiers to inherit */
898 if (TYPE_ENDIANNESS(symptr->flags)) {
899 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
900 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
903 /* look for modifiers */
904 while (patptr < patend) {
908 modifier = TYPE_IS_SHRIEKING;
909 allowed = SHRIEKING_ALLOWED_TYPES;
911 #ifdef PERL_PACK_CAN_BYTEORDER
913 modifier = TYPE_IS_BIG_ENDIAN;
914 allowed = ENDIANNESS_ALLOWED_TYPES;
917 modifier = TYPE_IS_LITTLE_ENDIAN;
918 allowed = ENDIANNESS_ALLOWED_TYPES;
920 #endif /* PERL_PACK_CAN_BYTEORDER */
928 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
929 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
930 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
932 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
933 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
934 (int) TYPE_NO_MODIFIERS(code),
935 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
936 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
937 TYPE_ENDIANNESS_MASK)
938 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
939 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
941 if (ckWARN(WARN_UNPACK)) {
943 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
944 "Duplicate modifier '%c' after '%c' in %s",
945 *patptr, (int) TYPE_NO_MODIFIERS(code),
946 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
953 /* inherit modifiers */
954 code |= inherited_modifiers;
956 /* look for count and/or / */
957 if (patptr < patend) {
958 if (isDIGIT(*patptr)) {
959 patptr = get_num( patptr, &symptr->length );
960 symptr->howlen = e_number;
962 } else if (*patptr == '*') {
964 symptr->howlen = e_star;
966 } else if (*patptr == '[') {
967 char* lenptr = ++patptr;
968 symptr->howlen = e_number;
969 patptr = group_end( patptr, patend, ']' ) + 1;
970 /* what kind of [] is it? */
971 if (isDIGIT(*lenptr)) {
972 lenptr = get_num( lenptr, &symptr->length );
974 Perl_croak(aTHX_ "Malformed integer in [] in %s",
975 symptr->flags & FLAG_PACK ? "pack" : "unpack");
977 tempsym_t savsym = *symptr;
978 symptr->patend = patptr-1;
979 symptr->patptr = lenptr;
980 savsym.length = measure_struct(symptr);
984 symptr->howlen = e_no_len;
989 while (patptr < patend) {
990 if (isSPACE(*patptr))
992 else if (*patptr == '#') {
994 while (patptr < patend && *patptr != '\n')
999 if (*patptr == '/') {
1000 symptr->flags |= FLAG_SLASH;
1002 if (patptr < patend &&
1003 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1004 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1005 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1011 /* at end - no count, no / */
1012 symptr->howlen = e_no_len;
1016 symptr->code = code;
1017 symptr->patptr = patptr;
1021 symptr->patptr = patptr;
1026 There is no way to cleanly handle the case where we should process the
1027 string per byte in its upgraded form while it's really in downgraded form
1028 (e.g. estimates like strend-s as an upper bound for the number of
1029 characters left wouldn't work). So if we foresee the need of this
1030 (pattern starts with U or contains U0), we want to work on the encoded
1031 version of the string. Users are advised to upgrade their pack string
1032 themselves if they need to do a lot of unpacks like this on it
1035 need_utf8(const char *pat, const char *patend)
1038 while (pat < patend) {
1039 if (pat[0] == '#') {
1041 pat = memchr(pat, '\n', patend-pat);
1042 if (!pat) return FALSE;
1043 } else if (pat[0] == 'U') {
1044 if (first || pat[1] == '0') return TRUE;
1045 } else first = FALSE;
1052 first_symbol(const char *pat, const char *patend) {
1053 while (pat < patend) {
1054 if (pat[0] != '#') return pat[0];
1056 pat = memchr(pat, '\n', patend-pat);
1064 =for apidoc unpack_str
1066 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1067 and ocnt are not used. This call should not be used, use unpackstring instead.
1072 Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
1074 tempsym_t sym = { 0 };
1076 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1077 else if (need_utf8(pat, patend)) {
1078 /* We probably should try to avoid this in case a scalar context call
1079 wouldn't get to the "U0" */
1080 STRLEN len = strend - s;
1081 s = (char *) bytes_to_utf8(s, &len);
1084 flags |= FLAG_DO_UTF8;
1087 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1088 flags |= FLAG_PARSE_UTF8;
1091 sym.patend = patend;
1094 return unpack_rec(&sym, s, s, strend, NULL );
1098 =for apidoc unpackstring
1100 The engine implementing unpack() Perl function. C<unpackstring> puts the
1101 extracted list items on the stack and returns the number of elements.
1102 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1107 Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
1109 tempsym_t sym = { 0 };
1111 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1112 else if (need_utf8(pat, patend)) {
1113 /* We probably should try to avoid this in case a scalar context call
1114 wouldn't get to the "U0" */
1115 STRLEN len = strend - s;
1116 s = (char *) bytes_to_utf8(s, &len);
1119 flags |= FLAG_DO_UTF8;
1122 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1123 flags |= FLAG_PARSE_UTF8;
1126 sym.patend = patend;
1129 return unpack_rec(&sym, s, s, strend, NULL );
1134 S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
1138 I32 start_sp_offset = SP - PL_stack_base;
1144 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1145 char* strrelbeg = s;
1146 bool beyond = FALSE;
1147 bool explicit_length;
1148 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1149 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1151 while (next_symbol(symptr)) {
1153 I32 datumtype = symptr->code;
1154 /* do first one only unless in list context
1155 / is implemented by unpacking the count, then popping it from the
1156 stack, so must check that we're not in the middle of a / */
1157 if ( unpack_only_one
1158 && (SP - PL_stack_base == start_sp_offset + 1)
1159 && (datumtype != '/') ) /* XXX can this be omitted */
1162 switch (howlen = symptr->howlen) {
1164 len = strend - strbeg; /* long enough */
1167 /* e_no_len and e_number */
1168 len = symptr->length;
1172 explicit_length = TRUE;
1174 beyond = s >= strend;
1176 struct packsize_t *pack_props =
1177 &packsize[(symptr->code & TYPE_IS_SHRIEKING) ?
1178 PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL];
1179 const int rawtype = TYPE_NO_MODIFIERS(datumtype);
1180 int offset = rawtype - pack_props->first;
1182 if (offset >= 0 && offset < pack_props->size) {
1183 /* Data about this template letter */
1184 unsigned char data = pack_props->array[offset];
1187 /* data nonzero means we can process this letter. */
1188 long size = data & PACK_SIZE_MASK;
1189 long howmany = (strend - s) / size;
1193 if (!checksum || (data & PACK_SIZE_CANNOT_CSUM)) {
1194 if (len && unpack_only_one) len = 1;
1201 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1203 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1206 if (howlen == e_no_len)
1207 len = 16; /* len is not specified */
1215 tempsym_t savsym = *symptr;
1216 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1217 symptr->flags |= group_modifiers;
1218 symptr->patend = savsym.grpend;
1222 symptr->patptr = savsym.grpbeg;
1223 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1224 else symptr->flags &= ~FLAG_PARSE_UTF8;
1225 unpack_rec(symptr, s, strbeg, strend, &s);
1226 if (s == strend && savsym.howlen == e_star)
1227 break; /* No way to continue */
1230 symptr->flags &= ~group_modifiers;
1231 savsym.flags = symptr->flags;
1240 Perl_croak(aTHX_ "'@' outside of string in unpack");
1245 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1247 if (len > strend - strrelbeg)
1248 Perl_croak(aTHX_ "'@' outside of string in unpack");
1249 s = strrelbeg + len;
1252 case 'X' | TYPE_IS_SHRIEKING:
1253 if (!len) /* Avoid division by 0 */
1258 hop = last = strbeg;
1260 hop += UTF8SKIP(hop);
1267 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1271 len = (s - strbeg) % len;
1277 Perl_croak(aTHX_ "'X' outside of string in unpack");
1278 while (--s, UTF8_IS_CONTINUATION(*s)) {
1280 Perl_croak(aTHX_ "'X' outside of string in unpack");
1285 if (len > s - strbeg)
1286 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1290 case 'x' | TYPE_IS_SHRIEKING:
1291 if (!len) /* Avoid division by 0 */
1293 if (utf8) ai32 = utf8_length(strbeg, s) % len;
1294 else ai32 = (s - strbeg) % len;
1295 if (ai32 == 0) break;
1302 Perl_croak(aTHX_ "'x' outside of string in unpack");
1307 if (len > strend - s)
1308 Perl_croak(aTHX_ "'x' outside of string in unpack");
1313 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1319 /* Preliminary length estimate is assumed done in 'W' */
1320 if (len > strend - s) len = strend - s;
1326 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1327 if (hop >= strend) {
1329 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1334 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1336 } else if (len > strend - s)
1339 if (datumtype == 'Z') {
1340 /* 'Z' strips stuff after first null */
1343 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1344 sv = newSVpvn(s, ptr-s);
1345 if (howlen == e_star) /* exact for 'Z*' */
1346 len = ptr-s + (ptr != strend ? 1 : 0);
1347 } else if (datumtype == 'A') {
1348 /* 'A' strips both nulls and spaces */
1350 for (ptr = s+len-1; ptr >= s; ptr--)
1351 if (*ptr != 0 && !isSPACE(*ptr)) break;
1353 sv = newSVpvn(s, ptr-s);
1354 } else sv = newSVpvn(s, len);
1358 /* Undo any upgrade done due to need_utf8() */
1359 if (!(symptr->flags & FLAG_WAS_UTF8))
1360 sv_utf8_downgrade(sv, 0);
1362 XPUSHs(sv_2mortal(sv));
1368 if (howlen == e_star || len > (strend - s) * 8)
1369 len = (strend - s) * 8;
1373 Newz(601, PL_bitcount, 256, char);
1374 for (bits = 1; bits < 256; bits++) {
1375 if (bits & 1) PL_bitcount[bits]++;
1376 if (bits & 2) PL_bitcount[bits]++;
1377 if (bits & 4) PL_bitcount[bits]++;
1378 if (bits & 8) PL_bitcount[bits]++;
1379 if (bits & 16) PL_bitcount[bits]++;
1380 if (bits & 32) PL_bitcount[bits]++;
1381 if (bits & 64) PL_bitcount[bits]++;
1382 if (bits & 128) PL_bitcount[bits]++;
1386 while (len >= 8 && s < strend) {
1387 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1392 cuv += PL_bitcount[*(U8 *)s++];
1395 if (len && s < strend) {
1397 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1398 if (datumtype == 'b')
1400 if (bits & 1) cuv++;
1405 if (bits & 0x80) cuv++;
1412 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1415 if (datumtype == 'b') {
1418 for (len = 0; len < ai32; len++) {
1419 if (len & 7) bits >>= 1;
1421 if (s >= strend) break;
1422 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1423 } else bits = *(U8 *) s++;
1424 *str++ = bits & 1 ? '1' : '0';
1429 for (len = 0; len < ai32; len++) {
1430 if (len & 7) bits <<= 1;
1432 if (s >= strend) break;
1433 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1434 } else bits = *(U8 *) s++;
1435 *str++ = bits & 0x80 ? '1' : '0';
1439 SvCUR_set(sv, str - SvPVX(sv));
1446 /* Preliminary length estimate, acceptable for utf8 too */
1447 if (howlen == e_star || len > (strend - s) * 2)
1448 len = (strend - s) * 2;
1449 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1452 if (datumtype == 'h') {
1455 for (len = 0; len < ai32; len++) {
1456 if (len & 1) bits >>= 4;
1458 if (s >= strend) break;
1459 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1460 } else bits = * (U8 *) s++;
1461 *str++ = PL_hexdigit[bits & 15];
1466 for (len = 0; len < ai32; len++) {
1467 if (len & 1) bits <<= 4;
1469 if (s >= strend) break;
1470 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1471 } else bits = *(U8 *) s++;
1472 *str++ = PL_hexdigit[(bits >> 4) & 15];
1476 SvCUR_set(sv, str - SvPVX(sv));
1482 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1483 if (aint >= 128) /* fake up signed chars */
1486 PUSHs(sv_2mortal(newSViv((IV)aint)));
1487 else if (checksum > bits_in_uv)
1488 cdouble += (NV)aint;
1497 if (explicit_length && datumtype == 'C')
1498 /* Switch to "character" mode */
1499 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1502 if (datumtype == 'C' ?
1503 (symptr->flags & FLAG_DO_UTF8) &&
1504 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1505 while (len-- > 0 && s < strend) {
1508 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1509 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1510 if (retlen == (STRLEN) -1 || retlen == 0)
1511 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1514 PUSHs(sv_2mortal(newSVuv((UV) val)));
1515 else if (checksum > bits_in_uv)
1516 cdouble += (NV) val;
1520 } else if (!checksum)
1522 U8 ch = *(U8 *) s++;
1523 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1525 else if (checksum > bits_in_uv)
1526 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1528 while (len-- > 0) cuv += *(U8 *) s++;
1532 if (explicit_length) {
1533 /* Switch to "bytes in UTF-8" mode */
1534 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1536 /* Should be impossible due to the need_utf8() test */
1537 Perl_croak(aTHX_ "U0 mode on a byte string");
1541 if (len > strend - s) len = strend - s;
1543 if (len && unpack_only_one) len = 1;
1547 while (len-- > 0 && s < strend) {
1551 U8 result[UTF8_MAXLEN];
1555 /* Bug: warns about bad utf8 even if we are short on bytes
1556 and will break out of the loop */
1557 if (!uni_to_bytes(aTHX_ &ptr, strend, result, 1, 'U'))
1559 len = UTF8SKIP(result);
1560 if (!uni_to_bytes(aTHX_ &ptr, strend,
1561 &result[1], len-1, 'U')) break;
1562 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1565 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1566 if (retlen == (STRLEN) -1 || retlen == 0)
1567 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1571 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1572 else if (checksum > bits_in_uv)
1573 cdouble += (NV) auv;
1578 case 's' | TYPE_IS_SHRIEKING:
1579 #if SHORTSIZE != SIZE16
1582 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1583 DO_BO_UNPACK(ashort, s);
1585 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1586 else if (checksum > bits_in_uv)
1587 cdouble += (NV)ashort;
1599 #if U16SIZE > SIZE16
1602 SHIFT16(utf8, s, strend, &ai16, datumtype);
1603 DO_BO_UNPACK(ai16, 16);
1604 #if U16SIZE > SIZE16
1609 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1610 else if (checksum > bits_in_uv)
1611 cdouble += (NV)ai16;
1616 case 'S' | TYPE_IS_SHRIEKING:
1617 #if SHORTSIZE != SIZE16
1619 unsigned short aushort;
1620 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1621 DO_BO_UNPACK(aushort, s);
1623 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1624 else if (checksum > bits_in_uv)
1625 cdouble += (NV)aushort;
1638 #if U16SIZE > SIZE16
1641 SHIFT16(utf8, s, strend, &au16, datumtype);
1642 DO_BO_UNPACK(au16, 16);
1644 if (datumtype == 'n')
1645 au16 = PerlSock_ntohs(au16);
1648 if (datumtype == 'v')
1652 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1653 else if (checksum > bits_in_uv)
1654 cdouble += (NV) au16;
1659 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1660 case 'v' | TYPE_IS_SHRIEKING:
1661 case 'n' | TYPE_IS_SHRIEKING:
1664 # if U16SIZE > SIZE16
1667 SHIFT16(utf8, s, strend, &ai16, datumtype);
1669 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1670 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1671 # endif /* HAS_NTOHS */
1673 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1674 ai16 = (I16) vtohs((U16) ai16);
1675 # endif /* HAS_VTOHS */
1677 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1678 else if (checksum > bits_in_uv)
1679 cdouble += (NV) ai16;
1684 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1686 case 'i' | TYPE_IS_SHRIEKING:
1689 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1690 DO_BO_UNPACK(aint, i);
1692 PUSHs(sv_2mortal(newSViv((IV)aint)));
1693 else if (checksum > bits_in_uv)
1694 cdouble += (NV)aint;
1700 case 'I' | TYPE_IS_SHRIEKING:
1703 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1704 DO_BO_UNPACK(auint, i);
1706 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1707 else if (checksum > bits_in_uv)
1708 cdouble += (NV)auint;
1716 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1717 #if IVSIZE == INTSIZE
1718 DO_BO_UNPACK(aiv, i);
1719 #elif IVSIZE == LONGSIZE
1720 DO_BO_UNPACK(aiv, l);
1721 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1722 DO_BO_UNPACK(aiv, 64);
1724 Perl_croak(aTHX_ "'j' not supported on this platform");
1727 PUSHs(sv_2mortal(newSViv(aiv)));
1728 else if (checksum > bits_in_uv)
1737 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1738 #if IVSIZE == INTSIZE
1739 DO_BO_UNPACK(auv, i);
1740 #elif IVSIZE == LONGSIZE
1741 DO_BO_UNPACK(auv, l);
1742 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1743 DO_BO_UNPACK(auv, 64);
1745 Perl_croak(aTHX_ "'J' not supported on this platform");
1748 PUSHs(sv_2mortal(newSVuv(auv)));
1749 else if (checksum > bits_in_uv)
1755 case 'l' | TYPE_IS_SHRIEKING:
1756 #if LONGSIZE != SIZE32
1759 SHIFT_VAR(utf8, s, strend, along, datumtype);
1760 DO_BO_UNPACK(along, l);
1762 PUSHs(sv_2mortal(newSViv((IV)along)));
1763 else if (checksum > bits_in_uv)
1764 cdouble += (NV)along;
1775 #if U32SIZE > SIZE32
1778 SHIFT32(utf8, s, strend, &ai32, datumtype);
1779 DO_BO_UNPACK(ai32, 32);
1780 #if U32SIZE > SIZE32
1781 if (ai32 > 2147483647) ai32 -= 4294967296;
1784 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1785 else if (checksum > bits_in_uv)
1786 cdouble += (NV)ai32;
1791 case 'L' | TYPE_IS_SHRIEKING:
1792 #if LONGSIZE != SIZE32
1794 unsigned long aulong;
1795 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1796 DO_BO_UNPACK(aulong, l);
1798 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV)aulong;
1813 #if U32SIZE > SIZE32
1816 SHIFT32(utf8, s, strend, &au32, datumtype);
1817 DO_BO_UNPACK(au32, 32);
1819 if (datumtype == 'N')
1820 au32 = PerlSock_ntohl(au32);
1823 if (datumtype == 'V')
1827 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1828 else if (checksum > bits_in_uv)
1829 cdouble += (NV)au32;
1834 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1835 case 'V' | TYPE_IS_SHRIEKING:
1836 case 'N' | TYPE_IS_SHRIEKING:
1839 # if U32SIZE > SIZE32
1842 SHIFT32(utf8, s, strend, &ai32, datumtype);
1844 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1845 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1848 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1849 ai32 = (I32)vtohl((U32)ai32);
1852 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1853 else if (checksum > bits_in_uv)
1854 cdouble += (NV)ai32;
1859 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1863 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1864 DO_BO_UNPACK_P(aptr);
1865 /* newSVpv generates undef if aptr is NULL */
1866 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1874 while (len > 0 && s < strend) {
1876 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1877 auv = (auv << 7) | (ch & 0x7f);
1878 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1881 PUSHs(sv_2mortal(newSVuv(auv)));
1886 if (++bytes >= sizeof(UV)) { /* promote to string */
1890 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1891 while (s < strend) {
1892 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1893 sv = mul128(sv, (U8)(ch & 0x7f));
1903 PUSHs(sv_2mortal(sv));
1908 if ((s >= strend) && bytes)
1909 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1913 if (symptr->howlen == e_star)
1914 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1916 if (sizeof(char*) <= strend - s) {
1918 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1919 DO_BO_UNPACK_P(aptr);
1920 /* newSVpvn generates undef if aptr is NULL */
1921 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
1928 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1929 DO_BO_UNPACK(aquad, 64);
1931 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
1932 newSViv((IV)aquad) : newSVnv((NV)aquad)));
1933 else if (checksum > bits_in_uv)
1934 cdouble += (NV)aquad;
1942 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1943 DO_BO_UNPACK(auquad, 64);
1945 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1946 newSVuv((UV)auquad):newSVnv((NV)auquad)));
1947 else if (checksum > bits_in_uv)
1948 cdouble += (NV)auquad;
1953 #endif /* HAS_QUAD */
1954 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1958 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1959 DO_BO_UNPACK_N(afloat, float);
1961 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
1969 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1970 DO_BO_UNPACK_N(adouble, double);
1972 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
1980 SHIFT_VAR(utf8, s, strend, anv, datumtype);
1981 DO_BO_UNPACK_N(anv, NV);
1983 PUSHs(sv_2mortal(newSVnv(anv)));
1988 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1991 long double aldouble;
1992 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
1993 DO_BO_UNPACK_N(aldouble, long double);
1995 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
1997 cdouble += aldouble;
2003 * Initialise the decode mapping. By using a table driven
2004 * algorithm, the code will be character-set independent
2005 * (and just as fast as doing character arithmetic)
2007 if (PL_uudmap['M'] == 0) {
2010 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2011 PL_uudmap[(U8)PL_uuemap[i]] = i;
2013 * Because ' ' and '`' map to the same value,
2014 * we need to decode them both the same.
2019 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2020 sv = sv_2mortal(NEWSV(42, l));
2021 if (l) SvPOK_on(sv);
2024 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2030 next_uni_uu(aTHX_ &s, strend, &a);
2031 next_uni_uu(aTHX_ &s, strend, &b);
2032 next_uni_uu(aTHX_ &s, strend, &c);
2033 next_uni_uu(aTHX_ &s, strend, &d);
2034 hunk[0] = (char)((a << 2) | (b >> 4));
2035 hunk[1] = (char)((b << 4) | (c >> 2));
2036 hunk[2] = (char)((c << 6) | d);
2037 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2041 if (*s == '\n') s++;
2043 /* possible checksum byte */
2044 char *skip = s+UTF8SKIP(s);
2045 if (skip < strend && *skip == '\n') s = skip+1;
2050 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2055 len = PL_uudmap[*(U8*)s++] & 077;
2057 if (s < strend && ISUUCHAR(*s))
2058 a = PL_uudmap[*(U8*)s++] & 077;
2061 if (s < strend && ISUUCHAR(*s))
2062 b = PL_uudmap[*(U8*)s++] & 077;
2065 if (s < strend && ISUUCHAR(*s))
2066 c = PL_uudmap[*(U8*)s++] & 077;
2069 if (s < strend && ISUUCHAR(*s))
2070 d = PL_uudmap[*(U8*)s++] & 077;
2073 hunk[0] = (char)((a << 2) | (b >> 4));
2074 hunk[1] = (char)((b << 4) | (c >> 2));
2075 hunk[2] = (char)((c << 6) | d);
2076 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2081 else /* possible checksum byte */
2082 if (s + 1 < strend && s[1] == '\n')
2091 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2092 (checksum > bits_in_uv &&
2093 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2096 anv = (NV) (1 << (checksum & 15));
2097 while (checksum >= 16) {
2101 while (cdouble < 0.0)
2103 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2104 sv = newSVnv(cdouble);
2107 if (checksum < bits_in_uv) {
2108 UV mask = ((UV)1 << checksum) - 1;
2113 XPUSHs(sv_2mortal(sv));
2117 if (symptr->flags & FLAG_SLASH){
2118 if (SP - PL_stack_base - start_sp_offset <= 0)
2119 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2120 if( next_symbol(symptr) ){
2121 if( symptr->howlen == e_number )
2122 Perl_croak(aTHX_ "Count after length/code in unpack" );
2124 /* ...end of char buffer then no decent length available */
2125 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2127 /* take top of stack (hope it's numeric) */
2130 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2133 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2135 datumtype = symptr->code;
2136 explicit_length = FALSE;
2144 return SP - PL_stack_base - start_sp_offset;
2151 I32 gimme = GIMME_V;
2154 char *pat = SvPV(left, llen);
2155 char *s = SvPV(right, rlen);
2156 char *strend = s + rlen;
2157 char *patend = pat + llen;
2161 cnt = unpackstring(pat, patend, s, strend,
2162 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2163 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2166 if ( !cnt && gimme == G_SCALAR )
2167 PUSHs(&PL_sv_undef);
2172 doencodes(U8 *h, char *s, I32 len)
2174 *h++ = PL_uuemap[len];
2176 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2177 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2178 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2179 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2184 char r = (len > 1 ? s[1] : '\0');
2185 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2186 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2187 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2188 *h++ = PL_uuemap[0];
2195 S_is_an_int(pTHX_ char *s, STRLEN l)
2198 SV *result = newSVpvn(s, l);
2199 char *result_c = SvPV(result, n_a); /* convenience */
2200 char *out = result_c;
2210 SvREFCNT_dec(result);
2233 SvREFCNT_dec(result);
2239 SvCUR_set(result, out - result_c);
2243 /* pnum must be '\0' terminated */
2245 S_div128(pTHX_ SV *pnum, bool *done)
2248 char *s = SvPV(pnum, len);
2257 i = m * 10 + (*t - '0');
2259 r = (i >> 7); /* r < 10 */
2266 SvCUR_set(pnum, (STRLEN) (t - s));
2273 =for apidoc pack_cat
2275 The engine implementing pack() Perl function. Note: parameters next_in_list and
2276 flags are not used. This call should not be used; use packlist instead.
2282 Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2284 tempsym_t sym = { 0 };
2286 sym.patend = patend;
2287 sym.flags = FLAG_PACK;
2289 (void)pack_rec( cat, &sym, beglist, endlist );
2294 =for apidoc packlist
2296 The engine implementing pack() Perl function.
2302 Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2305 tempsym_t sym = { 0 };
2308 sym.patend = patend;
2309 sym.flags = FLAG_PACK;
2311 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2312 Also make sure any UTF8 flag is loaded */
2313 SvPV_force(cat, no_len);
2314 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2316 (void)pack_rec( cat, &sym, beglist, endlist );
2319 /* like sv_utf8_upgrade, but also repoint the group start markers */
2321 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2324 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2326 if (SvUTF8(sv)) return;
2328 from_start = SvPVX(sv);
2329 from_end = from_start + SvCUR(sv);
2330 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2331 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2332 if (from_ptr == from_end) {
2333 /* Simple case: no character needs to be changed */
2338 /* We assume a char translates to at most 2 UTF-8 bytes */
2339 len = (from_end-from_ptr)*2+(from_ptr-from_start)+1;
2340 New('U', to_start, len, char);
2341 Copy(from_start, to_start, from_ptr-from_start, char);
2342 to_ptr = to_start + (from_ptr-from_start);
2344 New('U', marks, sym_ptr->level+2, char *);
2345 for (group=sym_ptr; group; group = group->previous)
2346 marks[group->level] = from_start + group->strbeg;
2347 marks[sym_ptr->level+1] = from_end+1;
2348 for (m = marks; *m < from_ptr; m++)
2349 *m = to_start + (*m-from_start);
2351 for (;from_ptr < from_end; from_ptr++) {
2352 while (*m == from_ptr) *m++ = to_ptr;
2353 to_ptr = uvchr_to_utf8(to_ptr, *(U8 *) from_ptr);
2357 while (*m == from_ptr) *m++ = to_ptr;
2358 if (m != marks + sym_ptr->level+1) {
2361 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2363 for (group=sym_ptr; group; group = group->previous)
2364 group->strbeg = marks[group->level] - to_start;
2369 SvLEN(sv) += SvIVX(sv);
2370 from_start -= SvIVX(sv);
2373 SvFLAGS(sv) &= ~SVf_OOK;
2376 Safefree(from_start);
2377 SvPVX(sv) = to_start;
2378 SvCUR(sv) = to_ptr - to_start;
2383 /* Exponential string grower. Makes string extension effectively O(n)
2384 needed says how many extra bytes we need (not counting the final '\0')
2385 Only grows the string if there is an actual lack of space
2388 sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2389 STRLEN cur = SvCUR(sv);
2390 STRLEN len = SvLEN(sv);
2392 if (len - cur > needed) return SvPVX(sv);
2393 extend = needed > len ? needed : len;
2394 return SvGROW(sv, len+extend+1);
2399 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2401 tempsym_t lookahead;
2402 I32 items = endlist - beglist;
2403 bool found = next_symbol(symptr);
2404 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2406 if (symptr->level == 0 && found && symptr->code == 'U') {
2407 marked_upgrade(aTHX_ cat, symptr);
2408 symptr->flags |= FLAG_DO_UTF8;
2411 symptr->strbeg = SvCUR(cat);
2417 SV *lengthcode = Nullsv;
2418 I32 datumtype = symptr->code;
2419 howlen_t howlen = symptr->howlen;
2420 char *start = SvPVX(cat);
2421 char *cur = start + SvCUR(cat);
2423 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2427 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2431 /* e_no_len and e_number */
2432 len = symptr->length;
2437 struct packsize_t *pack_props =
2438 &packsize[(symptr->code & TYPE_IS_SHRIEKING) ?
2439 PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL];
2440 const int rawtype = TYPE_NO_MODIFIERS(datumtype);
2441 int offset = rawtype - pack_props->first;
2443 if (offset >= 0 && offset < pack_props->size) {
2444 /* Data about this template letter */
2445 unsigned char data = pack_props->array[offset];
2447 if (data && !(data & PACK_SIZE_UNPREDICTABLE)) {
2448 /* We can process this letter. */
2449 STRLEN size = data & PACK_SIZE_MASK;
2450 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2456 /* Look ahead for next symbol. Do we have code/code? */
2457 lookahead = *symptr;
2458 found = next_symbol(&lookahead);
2459 if ( symptr->flags & FLAG_SLASH ) {
2460 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2461 if ( 0 == strchr( "aAZ", lookahead.code ) ||
2462 e_star != lookahead.howlen )
2463 Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
2465 sv_2mortal(newSViv((items > 0 ? DO_UTF8(*beglist) ? sv_len_utf8(*beglist) : sv_len(*beglist) : 0) + (lookahead.code == 'Z' ? 1 : 0)));
2468 /* Code inside the switch must take care to properly update
2469 cat (CUR length and '\0' termination) if it updated *cur and
2470 doesn't simply leave using break */
2471 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2473 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2474 (int) TYPE_NO_MODIFIERS(datumtype));
2476 Perl_croak(aTHX_ "'%%' may not be used in pack");
2479 char *s = start + symptr->strbeg;
2480 while (len > 0 && s < cur) {
2485 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2488 GROWING(0, cat, start, cur, len);
2489 Zero(cur, len, char);
2491 } else if (s < cur) cur = s;
2492 else goto no_change;
2494 len -= cur - (start+symptr->strbeg);
2495 if (len > 0) goto grow;
2497 if (len > 0) goto shrink;
2498 else goto no_change;
2502 tempsym_t savsym = *symptr;
2503 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2504 symptr->flags |= group_modifiers;
2505 symptr->patend = savsym.grpend;
2507 symptr->previous = &lookahead;
2510 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2511 else symptr->flags &= ~FLAG_PARSE_UTF8;
2512 was_utf8 = SvUTF8(cat);
2513 symptr->patptr = savsym.grpbeg;
2514 beglist = pack_rec(cat, symptr, beglist, endlist);
2515 if (SvUTF8(cat) != was_utf8)
2516 /* This had better be an upgrade while in utf8==0 mode */
2519 if (savsym.howlen == e_star && beglist == endlist)
2520 break; /* No way to continue */
2522 lookahead.flags = symptr->flags & ~group_modifiers;
2525 case 'X' | TYPE_IS_SHRIEKING:
2526 if (!len) /* Avoid division by 0 */
2533 hop += UTF8SKIP(hop);
2540 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2544 len = (cur-start) % len;
2548 if (len < 1) goto no_change;
2551 Perl_croak(aTHX_ "'X' outside of string in pack");
2552 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2554 Perl_croak(aTHX_ "'X' outside of string in pack");
2560 if (cur - start < len)
2561 Perl_croak(aTHX_ "'X' outside of string in pack");
2564 if (cur < start+symptr->strbeg) {
2565 /* Make sure group starts don't point into the void */
2567 STRLEN length = cur-start;
2568 for (group = symptr;
2569 group && length < group->strbeg;
2570 group = group->previous) group->strbeg = length;
2571 lookahead.strbeg = length;
2574 case 'x' | TYPE_IS_SHRIEKING: {
2576 if (!len) /* Avoid division by 0 */
2578 if (utf8) ai32 = utf8_length(start, cur) % len;
2579 else ai32 = (cur - start) % len;
2580 if (ai32 == 0) goto no_change;
2592 aptr = SvPV(fromstr, fromlen);
2593 if (DO_UTF8(fromstr)) {
2596 if (!utf8 && !SvUTF8(cat)) {
2597 marked_upgrade(aTHX_ cat, symptr);
2598 lookahead.flags |= FLAG_DO_UTF8;
2599 lookahead.strbeg = symptr->strbeg;
2602 cur = start + SvCUR(cat);
2604 if (howlen == e_star) {
2605 if (utf8) goto string_copy;
2609 end = aptr + fromlen;
2610 fromlen = datumtype == 'Z' ? len-1 : len;
2611 while ((I32) fromlen > 0 && s < end) {
2616 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2619 if (datumtype == 'Z') len++;
2625 fromlen = len - fromlen;
2626 if (datumtype == 'Z') fromlen--;
2627 if (howlen == e_star) {
2629 if (datumtype == 'Z') len++;
2631 GROWING(0, cat, start, cur, len);
2632 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2633 datumtype | TYPE_IS_PACK))
2634 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2638 if (howlen == e_star) {
2640 if (datumtype == 'Z') len++;
2642 if (len <= (I32) fromlen) {
2644 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2646 /* assumes a byte expands to at most 2 bytes on upgrade:
2647 expected_length <= from_len*2 + (len-from_len) */
2648 GROWING(0, cat, start, cur, fromlen+len);
2650 while (fromlen > 0) {
2651 cur = uvchr_to_utf8(cur, * (U8 *) aptr);
2657 if (howlen == e_star) {
2659 if (datumtype == 'Z') len++;
2661 if (len <= (I32) fromlen) {
2663 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2665 GROWING(0, cat, start, cur, len);
2666 Copy(aptr, cur, fromlen, char);
2670 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2683 str = SvPV(fromstr, fromlen);
2684 end = str + fromlen;
2685 if (DO_UTF8(fromstr)) {
2687 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2689 utf8_source = FALSE;
2690 utf8_flags = 0; /* Unused, but keep compilers happy */
2692 if (howlen == e_star) len = fromlen;
2693 field_len = (len+7)/8;
2694 GROWING(utf8, cat, start, cur, field_len);
2695 if (len > (I32)fromlen) len = fromlen;
2698 if (datumtype == 'B')
2702 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2704 } else bits |= *str++ & 1;
2705 if (l & 7) bits <<= 1;
2707 PUSH_BYTE(utf8, cur, bits);
2712 /* datumtype == 'b' */
2716 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2717 if (val & 1) bits |= 0x80;
2718 } else if (*str++ & 1)
2720 if (l & 7) bits >>= 1;
2722 PUSH_BYTE(utf8, cur, bits);
2728 if (datumtype == 'B')
2729 bits <<= 7 - (l & 7);
2731 bits >>= 7 - (l & 7);
2732 PUSH_BYTE(utf8, cur, bits);
2735 /* Determine how many chars are left in the requested field */
2737 if (howlen == e_star) field_len = 0;
2738 else field_len -= l;
2739 Zero(cur, field_len, char);
2752 str = SvPV(fromstr, fromlen);
2753 end = str + fromlen;
2754 if (DO_UTF8(fromstr)) {
2756 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2758 utf8_source = FALSE;
2759 utf8_flags = 0; /* Unused, but keep compilers happy */
2761 if (howlen == e_star) len = fromlen;
2762 field_len = (len+1)/2;
2763 GROWING(utf8, cat, start, cur, field_len);
2764 if (!utf8 && len > (I32)fromlen) len = fromlen;
2767 if (datumtype == 'H')
2771 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2772 if (val < 256 && isALPHA(val))
2773 bits |= (val + 9) & 0xf;
2776 } else if (isALPHA(*str))
2777 bits |= (*str++ + 9) & 0xf;
2779 bits |= *str++ & 0xf;
2780 if (l & 1) bits <<= 4;
2782 PUSH_BYTE(utf8, cur, bits);
2790 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2791 if (val < 256 && isALPHA(val))
2792 bits |= ((val + 9) & 0xf) << 4;
2794 bits |= (val & 0xf) << 4;
2795 } else if (isALPHA(*str))
2796 bits |= ((*str++ + 9) & 0xf) << 4;
2798 bits |= (*str++ & 0xf) << 4;
2799 if (l & 1) bits >>= 4;
2801 PUSH_BYTE(utf8, cur, bits);
2807 PUSH_BYTE(utf8, cur, bits);
2810 /* Determine how many chars are left in the requested field */
2812 if (howlen == e_star) field_len = 0;
2813 else field_len -= l;
2814 Zero(cur, field_len, char);
2822 aiv = SvIV(fromstr);
2823 if ((-128 > aiv || aiv > 127) &&
2825 Perl_warner(aTHX_ packWARN(WARN_PACK),
2826 "Character in 'c' format wrapped in pack");
2827 PUSH_BYTE(utf8, cur, aiv & 0xff);
2832 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2835 GROWING(0, cat, start, cur, len);
2839 aiv = SvIV(fromstr);
2840 if ((0 > aiv || aiv > 0xff) &&
2842 Perl_warner(aTHX_ packWARN(WARN_PACK),
2843 "Character in 'C' format wrapped in pack");
2844 *cur++ = aiv & 0xff;
2849 U8 in_bytes = IN_BYTES;
2851 end = start+SvLEN(cat)-1;
2852 if (utf8) end -= UTF8_MAXLEN-1;
2856 auv = SvUV(fromstr);
2857 if (in_bytes) auv = auv % 0x100;
2862 SvCUR(cat) = cur - start;
2864 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2865 end = start+SvLEN(cat)-UTF8_MAXLEN;
2867 cur = uvuni_to_utf8_flags(cur, NATIVE_TO_UNI(auv),
2869 0 : UNICODE_ALLOW_ANY);
2874 SvCUR(cat) = cur - start;
2875 marked_upgrade(aTHX_ cat, symptr);
2876 lookahead.flags |= FLAG_DO_UTF8;
2877 lookahead.strbeg = symptr->strbeg;
2880 cur = start + SvCUR(cat);
2881 end = start+SvLEN(cat)-UTF8_MAXLEN;
2884 if (ckWARN(WARN_PACK))
2885 Perl_warner(aTHX_ packWARN(WARN_PACK),
2886 "Character in 'W' format wrapped in pack");
2891 SvCUR(cat) = cur - start;
2892 GROWING(0, cat, start, cur, len+1);
2893 end = start+SvLEN(cat)-1;
2895 *(U8 *) cur++ = auv;
2904 if (!(symptr->flags & FLAG_DO_UTF8)) {
2905 marked_upgrade(aTHX_ cat, symptr);
2906 lookahead.flags |= FLAG_DO_UTF8;
2907 lookahead.strbeg = symptr->strbeg;
2913 end = start+SvLEN(cat);
2914 if (!utf8) end -= UTF8_MAXLEN;
2918 auv = SvUV(fromstr);
2920 char buffer[UTF8_MAXLEN], *end;
2921 end = uvuni_to_utf8_flags(buffer, auv,
2923 0 : UNICODE_ALLOW_ANY);
2924 if (cur >= end-(end-buffer)*2) {
2926 SvCUR(cat) = cur - start;
2927 GROWING(0, cat, start, cur, len+(end-buffer)*2);
2928 end = start+SvLEN(cat)-UTF8_MAXLEN;
2930 bytes_to_uni(aTHX_ buffer, end-buffer, &cur);
2934 SvCUR(cat) = cur - start;
2935 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2936 end = start+SvLEN(cat)-UTF8_MAXLEN;
2938 cur = uvuni_to_utf8_flags(cur, auv,
2940 0 : UNICODE_ALLOW_ANY);
2945 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2951 anv = SvNV(fromstr);
2953 /* VOS does not automatically map a floating-point overflow
2954 during conversion from double to float into infinity, so we
2955 do it by hand. This code should either be generalized for
2956 any OS that needs it, or removed if and when VOS implements
2957 posix-976 (suggestion to support mapping to infinity).
2958 Paul.Green@stratus.com 02-04-02. */
2960 afloat = _float_constants[0]; /* single prec. inf. */
2961 else if (anv < -FLT_MAX)
2962 afloat = _float_constants[0]; /* single prec. inf. */
2963 else afloat = (float) anv;
2965 # if defined(VMS) && !defined(__IEEE_FP)
2966 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2967 * on Alpha; fake it if we don't have them.
2971 else if (anv < -FLT_MAX)
2973 else afloat = (float)anv;
2975 afloat = (float)anv;
2977 #endif /* __VOS__ */
2978 DO_BO_PACK_N(afloat, float);
2979 PUSH_VAR(utf8, cur, afloat);
2987 anv = SvNV(fromstr);
2989 /* VOS does not automatically map a floating-point overflow
2990 during conversion from long double to double into infinity,
2991 so we do it by hand. This code should either be generalized
2992 for any OS that needs it, or removed if and when VOS
2993 implements posix-976 (suggestion to support mapping to
2994 infinity). Paul.Green@stratus.com 02-04-02. */
2996 adouble = _double_constants[0]; /* double prec. inf. */
2997 else if (anv < -DBL_MAX)
2998 adouble = _double_constants[0]; /* double prec. inf. */
2999 else adouble = (double) anv;
3001 # if defined(VMS) && !defined(__IEEE_FP)
3002 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3003 * on Alpha; fake it if we don't have them.
3007 else if (anv < -DBL_MAX)
3009 else adouble = (double)anv;
3011 adouble = (double)anv;
3013 #endif /* __VOS__ */
3014 DO_BO_PACK_N(adouble, double);
3015 PUSH_VAR(utf8, cur, adouble);
3020 Zero(&anv, 1, NV); /* can be long double with unused bits */
3023 anv = SvNV(fromstr);
3024 DO_BO_PACK_N(anv, NV);
3025 PUSH_VAR(utf8, cur, anv);
3029 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3031 long double aldouble;
3032 /* long doubles can have unused bits, which may be nonzero */
3033 Zero(&aldouble, 1, long double);
3036 aldouble = (long double)SvNV(fromstr);
3037 DO_BO_PACK_N(aldouble, long double);
3038 PUSH_VAR(utf8, cur, aldouble);
3043 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3044 case 'n' | TYPE_IS_SHRIEKING:
3050 ai16 = (I16)SvIV(fromstr);
3052 ai16 = PerlSock_htons(ai16);
3054 PUSH16(utf8, cur, &ai16);
3057 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3058 case 'v' | TYPE_IS_SHRIEKING:
3064 ai16 = (I16)SvIV(fromstr);
3068 PUSH16(utf8, cur, &ai16);
3071 case 'S' | TYPE_IS_SHRIEKING:
3072 #if SHORTSIZE != SIZE16
3074 unsigned short aushort;
3076 aushort = SvUV(fromstr);
3077 DO_BO_PACK(aushort, s);
3078 PUSH_VAR(utf8, cur, aushort);
3088 au16 = (U16)SvUV(fromstr);
3089 DO_BO_PACK(au16, 16);
3090 PUSH16(utf8, cur, &au16);
3093 case 's' | TYPE_IS_SHRIEKING:
3094 #if SHORTSIZE != SIZE16
3098 ashort = SvIV(fromstr);
3099 DO_BO_PACK(ashort, s);
3100 PUSH_VAR(utf8, cur, ashort);
3110 ai16 = (I16)SvIV(fromstr);
3111 DO_BO_PACK(ai16, 16);
3112 PUSH16(utf8, cur, &ai16);
3116 case 'I' | TYPE_IS_SHRIEKING:
3120 auint = SvUV(fromstr);
3121 DO_BO_PACK(auint, i);
3122 PUSH_VAR(utf8, cur, auint);
3129 aiv = SvIV(fromstr);
3130 #if IVSIZE == INTSIZE
3132 #elif IVSIZE == LONGSIZE
3134 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3135 DO_BO_PACK(aiv, 64);
3137 Perl_croak(aTHX_ "'j' not supported on this platform");
3139 PUSH_VAR(utf8, cur, aiv);
3146 auv = SvUV(fromstr);
3147 #if UVSIZE == INTSIZE
3149 #elif UVSIZE == LONGSIZE
3151 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3152 DO_BO_PACK(auv, 64);
3154 Perl_croak(aTHX_ "'J' not supported on this platform");
3156 PUSH_VAR(utf8, cur, auv);
3163 anv = SvNV(fromstr);
3167 SvCUR(cat) = cur - start;
3168 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3171 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3172 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3173 any negative IVs will have already been got by the croak()
3174 above. IOK is untrue for fractions, so we test them
3175 against UV_MAX_P1. */
3176 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3177 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3178 char *in = buf + sizeof(buf);
3179 UV auv = SvUV(fromstr);
3182 *--in = (char)((auv & 0x7f) | 0x80);
3185 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3186 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3187 in, (buf + sizeof(buf)) - in);
3188 } else if (SvPOKp(fromstr))
3190 else if (SvNOKp(fromstr)) {
3191 /* 10**NV_MAX_10_EXP is the largest power of 10
3192 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3193 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3194 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3195 And with that many bytes only Inf can overflow.
3196 Some C compilers are strict about integral constant
3197 expressions so we conservatively divide by a slightly
3198 smaller integer instead of multiplying by the exact
3199 floating-point value.
3201 #ifdef NV_MAX_10_EXP
3202 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3203 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3205 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3206 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3208 char *in = buf + sizeof(buf);
3210 anv = Perl_floor(anv);
3212 NV next = Perl_floor(anv / 128);
3213 if (in <= buf) /* this cannot happen ;-) */
3214 Perl_croak(aTHX_ "Cannot compress integer in pack");
3215 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3218 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3219 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3220 in, (buf + sizeof(buf)) - in);
3222 char *from, *result, *in;
3228 /* Copy string and check for compliance */
3229 from = SvPV(fromstr, len);
3230 if ((norm = is_an_int(from, len)) == NULL)
3231 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3233 New('w', result, len, char);
3236 while (!done) *--in = div128(norm, &done) | 0x80;
3237 result[len - 1] &= 0x7F; /* clear continue bit */
3238 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3239 in, (result + len) - in);
3241 SvREFCNT_dec(norm); /* free norm */
3246 case 'i' | TYPE_IS_SHRIEKING:
3250 aint = SvIV(fromstr);
3251 DO_BO_PACK(aint, i);
3252 PUSH_VAR(utf8, cur, aint);
3255 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3256 case 'N' | TYPE_IS_SHRIEKING:
3262 au32 = SvUV(fromstr);
3264 au32 = PerlSock_htonl(au32);
3266 PUSH32(utf8, cur, &au32);
3269 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3270 case 'V' | TYPE_IS_SHRIEKING:
3276 au32 = SvUV(fromstr);
3280 PUSH32(utf8, cur, &au32);
3283 case 'L' | TYPE_IS_SHRIEKING:
3284 #if LONGSIZE != SIZE32
3286 unsigned long aulong;
3288 aulong = SvUV(fromstr);
3289 DO_BO_PACK(aulong, l);
3290 PUSH_VAR(utf8, cur, aulong);
3300 au32 = SvUV(fromstr);
3301 DO_BO_PACK(au32, 32);
3302 PUSH32(utf8, cur, &au32);
3305 case 'l' | TYPE_IS_SHRIEKING:
3306 #if LONGSIZE != SIZE32
3310 along = SvIV(fromstr);
3311 DO_BO_PACK(along, l);
3312 PUSH_VAR(utf8, cur, along);
3322 ai32 = SvIV(fromstr);
3323 DO_BO_PACK(ai32, 32);
3324 PUSH32(utf8, cur, &ai32);
3332 auquad = (Uquad_t) SvUV(fromstr);
3333 DO_BO_PACK(auquad, 64);
3334 PUSH_VAR(utf8, cur, auquad);
3341 aquad = (Quad_t)SvIV(fromstr);
3342 DO_BO_PACK(aquad, 64);
3343 PUSH_VAR(utf8, cur, aquad);
3346 #endif /* HAS_QUAD */
3348 len = 1; /* assume SV is correct length */
3349 GROWING(utf8, cat, start, cur, sizeof(char *));
3356 SvGETMAGIC(fromstr);
3357 if (!SvOK(fromstr)) aptr = NULL;
3360 /* XXX better yet, could spirit away the string to
3361 * a safe spot and hang on to it until the result
3362 * of pack() (and all copies of the result) are
3365 if (ckWARN(WARN_PACK) &&
3366 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3367 !SvREADONLY(fromstr)))) {
3368 Perl_warner(aTHX_ packWARN(WARN_PACK),
3369 "Attempt to pack pointer to temporary value");
3371 if (SvPOK(fromstr) || SvNIOK(fromstr))
3372 aptr = SvPV_flags(fromstr, n_a, 0);
3374 aptr = SvPV_force_flags(fromstr, n_a, 0);
3377 PUSH_VAR(utf8, cur, aptr);
3385 if (len <= 2) len = 45;
3386 else len = len / 3 * 3;
3388 Perl_warner(aTHX_ packWARN(WARN_PACK),
3389 "Field too wide in 'u' format in pack");
3392 aptr = SvPV(fromstr, fromlen);
3393 from_utf8 = DO_UTF8(fromstr);
3395 aend = aptr + fromlen;
3396 fromlen = sv_len_utf8(fromstr);
3397 } else aend = NULL; /* Unused, but keep compilers happy */
3398 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3399 while (fromlen > 0) {
3402 U8 hunk[1+63/3*4+1];
3404 if ((I32)fromlen > len)
3410 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3411 'u' | TYPE_IS_PACK)) {
3413 SvCUR(cat) = cur - start;
3414 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3416 end = doencodes(hunk, buffer, todo);
3418 end = doencodes(hunk, aptr, todo);
3421 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3428 SvCUR(cat) = cur - start;
3430 *symptr = lookahead;
3439 dSP; dMARK; dORIGMARK; dTARGET;
3440 register SV *cat = TARG;
3442 register char *pat = SvPVx(*++MARK, fromlen);
3443 register char *patend = pat + fromlen;
3446 sv_setpvn(cat, "", 0);
3449 packlist(cat, pat, patend, MARK, SP + 1);
3459 * c-indentation-style: bsd
3461 * indent-tabs-mode: t
3464 * vim: shiftwidth=4: