3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV * const tmpNew = newSVpvs("0000000000");
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
243 # define DO_BO_UNPACK_PC(var)
244 # define DO_BO_PACK_PC(var)
246 #else /* PERL_PACK_CAN_BYTEORDER */
248 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
251 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
253 # define DO_BO_UNPACK(var, type) \
255 switch (TYPE_ENDIANNESS(datumtype)) { \
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
262 # define DO_BO_PACK(var, type) \
264 switch (TYPE_ENDIANNESS(datumtype)) { \
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
271 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: \
275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
277 case TYPE_IS_LITTLE_ENDIAN: \
278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
285 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
287 switch (TYPE_ENDIANNESS(datumtype)) { \
288 case TYPE_IS_BIG_ENDIAN: \
289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
291 case TYPE_IS_LITTLE_ENDIAN: \
292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
299 # define BO_CANT_DOIT(action, type) \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
315 # if PTRSIZE == INTSIZE
316 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
320 # elif PTRSIZE == LONGSIZE
321 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
325 # elif PTRSIZE == IVSIZE
326 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
331 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
333 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
337 # if defined(my_htolen) && defined(my_letohn) && \
338 defined(my_htoben) && defined(my_betohn)
339 # define DO_BO_UNPACK_N(var, type) \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
348 # define DO_BO_PACK_N(var, type) \
350 switch (TYPE_ENDIANNESS(datumtype)) { \
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
357 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
361 #endif /* PERL_PACK_CAN_BYTEORDER */
363 #define PACK_SIZE_CANNOT_CSUM 0x80
364 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
365 #define PACK_SIZE_MASK 0x3F
367 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
370 #if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
374 typedef U8 packprops_t;
377 STATIC const packprops_t packprops[512] = {
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
385 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
386 /* D */ LONG_DOUBLESIZE,
393 /* I */ sizeof(unsigned int),
400 #if defined(HAS_QUAD)
401 /* Q */ sizeof(Uquad_t),
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
415 /* f */ sizeof(float),
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
425 #if defined(HAS_QUAD)
426 /* q */ sizeof(Quad_t),
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
452 /* L */ sizeof(unsigned long),
454 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
460 /* S */ sizeof(unsigned short),
462 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 /* l */ sizeof(long),
473 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
479 /* s */ sizeof(short),
481 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
497 /* EBCDIC (or bust) */
498 STATIC const packprops_t packprops[512] = {
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
512 /* f */ sizeof(float),
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
523 #if defined(HAS_QUAD)
524 /* q */ sizeof(Quad_t),
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
536 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
537 /* D */ LONG_DOUBLESIZE,
544 /* I */ sizeof(unsigned int),
552 #if defined(HAS_QUAD)
553 /* Q */ sizeof(Uquad_t),
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
579 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
587 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
599 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
607 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
618 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
630 if (ckWARN(WARN_UNPACK))
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
633 (int) TYPE_NO_MODIFIERS(datumtype));
640 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
645 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
649 const char *from = *s;
651 const U32 flags = ckWARN(WARN_UTF8) ?
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
659 } else from += retlen;
664 *(U8 *)buf++ = (U8)val;
666 /* We have enough characters for the buffer. Did we have problems ? */
669 /* Rewalk the string fragment while warning */
671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
676 if (from > end) from = end;
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
682 (int) TYPE_NO_MODIFIERS(datumtype),
683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
690 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
693 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
694 if (val >= 0x100 || !ISUUCHAR(val) ||
695 retlen == (STRLEN) -1 || retlen == 0) {
699 *out = PL_uudmap[val] & 077;
705 bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
706 U8 buffer[UTF8_MAXLEN];
707 const U8 * const end = start + len;
709 while (start < end) {
711 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
721 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
729 #define PUSH_BYTES(utf8, cur, buf, len) \
731 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
733 Copy(buf, cur, len, char); \
738 #define GROWING(utf8, cat, start, cur, in_len) \
740 STRLEN glen = (in_len); \
741 if (utf8) glen *= UTF8_EXPAND; \
742 if ((cur) + glen >= (start) + SvLEN(cat)) { \
743 (start) = sv_exp_grow(cat, glen); \
744 (cur) = (start) + SvCUR(cat); \
748 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
750 const STRLEN glen = (in_len); \
752 if (utf8) gl *= UTF8_EXPAND; \
753 if ((cur) + gl >= (start) + SvLEN(cat)) { \
755 SvCUR_set((cat), (cur) - (start)); \
756 (start) = sv_exp_grow(cat, gl); \
757 (cur) = (start) + SvCUR(cat); \
759 PUSH_BYTES(utf8, cur, buf, glen); \
762 #define PUSH_BYTE(utf8, s, byte) \
765 const U8 au8 = (byte); \
766 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
767 } else *(U8 *)(s)++ = (byte); \
770 /* Only to be used inside a loop (see the break) */
771 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
774 if (str >= end) break; \
775 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
776 if (retlen == (STRLEN) -1 || retlen == 0) { \
778 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
783 static const char *_action( const tempsym_t* symptr )
785 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
788 /* Returns the sizeof() struct described by pat */
790 S_measure_struct(pTHX_ tempsym_t* symptr)
794 while (next_symbol(symptr)) {
798 switch (symptr->howlen) {
800 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
804 /* e_no_len and e_number */
805 len = symptr->length;
809 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
812 /* endianness doesn't influence the size of a type */
813 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
815 Perl_croak(aTHX_ "Invalid type '%c' in %s",
816 (int)TYPE_NO_MODIFIERS(symptr->code),
818 #ifdef PERL_PACK_CAN_SHRIEKSIGN
819 case '.' | TYPE_IS_SHRIEKING:
820 case '@' | TYPE_IS_SHRIEKING:
825 case 'U': /* XXXX Is it correct? */
828 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
829 (int) TYPE_NO_MODIFIERS(symptr->code),
836 tempsym_t savsym = *symptr;
837 symptr->patptr = savsym.grpbeg;
838 symptr->patend = savsym.grpend;
839 /* XXXX Theoretically, we need to measure many times at
840 different positions, since the subexpression may contain
841 alignment commands, but be not of aligned length.
842 Need to detect this and croak(). */
843 size = measure_struct(symptr);
847 case 'X' | TYPE_IS_SHRIEKING:
848 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
850 if (!len) /* Avoid division by 0 */
852 len = total % len; /* Assumed: the start is aligned. */
857 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
859 case 'x' | TYPE_IS_SHRIEKING:
860 if (!len) /* Avoid division by 0 */
862 star = total % len; /* Assumed: the start is aligned. */
863 if (star) /* Other portable ways? */
887 size = sizeof(char*);
897 /* locate matching closing parenthesis or bracket
898 * returns char pointer to char after match, or NULL
901 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
903 while (patptr < patend) {
904 const char c = *patptr++;
911 while (patptr < patend && *patptr != '\n')
915 patptr = group_end(patptr, patend, ')') + 1;
917 patptr = group_end(patptr, patend, ']') + 1;
919 Perl_croak(aTHX_ "No group ending character '%c' found in template",
925 /* Convert unsigned decimal number to binary.
926 * Expects a pointer to the first digit and address of length variable
927 * Advances char pointer to 1st non-digit char and returns number
930 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
932 I32 len = *patptr++ - '0';
933 while (isDIGIT(*patptr)) {
934 if (len >= 0x7FFFFFFF/10)
935 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
936 len = (len * 10) + (*patptr++ - '0');
942 /* The marvellous template parsing routine: Using state stored in *symptr,
943 * locates next template code and count
946 S_next_symbol(pTHX_ tempsym_t* symptr )
948 const char* patptr = symptr->patptr;
949 const char* const patend = symptr->patend;
951 symptr->flags &= ~FLAG_SLASH;
953 while (patptr < patend) {
954 if (isSPACE(*patptr))
956 else if (*patptr == '#') {
958 while (patptr < patend && *patptr != '\n')
963 /* We should have found a template code */
964 I32 code = *patptr++ & 0xFF;
965 U32 inherited_modifiers = 0;
967 if (code == ','){ /* grandfather in commas but with a warning */
968 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
969 symptr->flags |= FLAG_COMMA;
970 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
971 "Invalid type ',' in %s", _action( symptr ) );
976 /* for '(', skip to ')' */
978 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
979 Perl_croak(aTHX_ "()-group starts with a count in %s",
981 symptr->grpbeg = patptr;
982 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
983 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
984 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
988 /* look for group modifiers to inherit */
989 if (TYPE_ENDIANNESS(symptr->flags)) {
990 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
991 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
994 /* look for modifiers */
995 while (patptr < patend) {
1000 modifier = TYPE_IS_SHRIEKING;
1001 allowed = SHRIEKING_ALLOWED_TYPES;
1003 #ifdef PERL_PACK_CAN_BYTEORDER
1005 modifier = TYPE_IS_BIG_ENDIAN;
1006 allowed = ENDIANNESS_ALLOWED_TYPES;
1009 modifier = TYPE_IS_LITTLE_ENDIAN;
1010 allowed = ENDIANNESS_ALLOWED_TYPES;
1012 #endif /* PERL_PACK_CAN_BYTEORDER */
1022 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1023 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1024 allowed, _action( symptr ) );
1026 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1027 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1028 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1029 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1030 TYPE_ENDIANNESS_MASK)
1031 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1032 *patptr, _action( symptr ) );
1034 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1035 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1036 "Duplicate modifier '%c' after '%c' in %s",
1037 *patptr, (int) TYPE_NO_MODIFIERS(code),
1038 _action( symptr ) );
1045 /* inherit modifiers */
1046 code |= inherited_modifiers;
1048 /* look for count and/or / */
1049 if (patptr < patend) {
1050 if (isDIGIT(*patptr)) {
1051 patptr = get_num( patptr, &symptr->length );
1052 symptr->howlen = e_number;
1054 } else if (*patptr == '*') {
1056 symptr->howlen = e_star;
1058 } else if (*patptr == '[') {
1059 const char* lenptr = ++patptr;
1060 symptr->howlen = e_number;
1061 patptr = group_end( patptr, patend, ']' ) + 1;
1062 /* what kind of [] is it? */
1063 if (isDIGIT(*lenptr)) {
1064 lenptr = get_num( lenptr, &symptr->length );
1065 if( *lenptr != ']' )
1066 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1067 _action( symptr ) );
1069 tempsym_t savsym = *symptr;
1070 symptr->patend = patptr-1;
1071 symptr->patptr = lenptr;
1072 savsym.length = measure_struct(symptr);
1076 symptr->howlen = e_no_len;
1081 while (patptr < patend) {
1082 if (isSPACE(*patptr))
1084 else if (*patptr == '#') {
1086 while (patptr < patend && *patptr != '\n')
1088 if (patptr < patend)
1091 if (*patptr == '/') {
1092 symptr->flags |= FLAG_SLASH;
1094 if (patptr < patend &&
1095 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1096 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1097 _action( symptr ) );
1103 /* at end - no count, no / */
1104 symptr->howlen = e_no_len;
1108 symptr->code = code;
1109 symptr->patptr = patptr;
1113 symptr->patptr = patptr;
1118 There is no way to cleanly handle the case where we should process the
1119 string per byte in its upgraded form while it's really in downgraded form
1120 (e.g. estimates like strend-s as an upper bound for the number of
1121 characters left wouldn't work). So if we foresee the need of this
1122 (pattern starts with U or contains U0), we want to work on the encoded
1123 version of the string. Users are advised to upgrade their pack string
1124 themselves if they need to do a lot of unpacks like this on it
1127 need_utf8(const char *pat, const char *patend)
1130 while (pat < patend) {
1131 if (pat[0] == '#') {
1133 pat = (const char *) memchr(pat, '\n', patend-pat);
1134 if (!pat) return FALSE;
1135 } else if (pat[0] == 'U') {
1136 if (first || pat[1] == '0') return TRUE;
1137 } else first = FALSE;
1144 first_symbol(const char *pat, const char *patend) {
1145 while (pat < patend) {
1146 if (pat[0] != '#') return pat[0];
1148 pat = (const char *) memchr(pat, '\n', patend-pat);
1156 =for apidoc unpack_str
1158 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1159 and ocnt are not used. This call should not be used, use unpackstring instead.
1164 Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
1167 PERL_UNUSED_ARG(strbeg);
1168 PERL_UNUSED_ARG(new_s);
1169 PERL_UNUSED_ARG(ocnt);
1171 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1172 else if (need_utf8(pat, patend)) {
1173 /* We probably should try to avoid this in case a scalar context call
1174 wouldn't get to the "U0" */
1175 STRLEN len = strend - s;
1176 s = (char *) bytes_to_utf8((U8 *) s, &len);
1179 flags |= FLAG_DO_UTF8;
1182 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1183 flags |= FLAG_PARSE_UTF8;
1185 TEMPSYM_INIT(&sym, pat, patend, flags);
1187 return unpack_rec(&sym, s, s, strend, NULL );
1191 =for apidoc unpackstring
1193 The engine implementing unpack() Perl function. C<unpackstring> puts the
1194 extracted list items on the stack and returns the number of elements.
1195 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1200 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1204 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1205 else if (need_utf8(pat, patend)) {
1206 /* We probably should try to avoid this in case a scalar context call
1207 wouldn't get to the "U0" */
1208 STRLEN len = strend - s;
1209 s = (char *) bytes_to_utf8((U8 *) s, &len);
1212 flags |= FLAG_DO_UTF8;
1215 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1216 flags |= FLAG_PARSE_UTF8;
1218 TEMPSYM_INIT(&sym, pat, patend, flags);
1220 return unpack_rec(&sym, s, s, strend, NULL );
1225 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1229 const I32 start_sp_offset = SP - PL_stack_base;
1235 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1236 bool beyond = FALSE;
1237 bool explicit_length;
1238 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1239 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1240 symptr->strbeg = s - strbeg;
1242 while (next_symbol(symptr)) {
1245 I32 datumtype = symptr->code;
1246 /* do first one only unless in list context
1247 / is implemented by unpacking the count, then popping it from the
1248 stack, so must check that we're not in the middle of a / */
1249 if ( unpack_only_one
1250 && (SP - PL_stack_base == start_sp_offset + 1)
1251 && (datumtype != '/') ) /* XXX can this be omitted */
1254 switch (howlen = symptr->howlen) {
1256 len = strend - strbeg; /* long enough */
1259 /* e_no_len and e_number */
1260 len = symptr->length;
1264 explicit_length = TRUE;
1266 beyond = s >= strend;
1268 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1270 /* props nonzero means we can process this letter. */
1271 const long size = props & PACK_SIZE_MASK;
1272 const long howmany = (strend - s) / size;
1276 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1277 if (len && unpack_only_one) len = 1;
1283 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1285 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1288 if (howlen == e_no_len)
1289 len = 16; /* len is not specified */
1297 tempsym_t savsym = *symptr;
1298 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1299 symptr->flags |= group_modifiers;
1300 symptr->patend = savsym.grpend;
1301 symptr->previous = &savsym;
1305 symptr->patptr = savsym.grpbeg;
1306 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1307 else symptr->flags &= ~FLAG_PARSE_UTF8;
1308 unpack_rec(symptr, s, strbeg, strend, &s);
1309 if (s == strend && savsym.howlen == e_star)
1310 break; /* No way to continue */
1313 savsym.flags = symptr->flags & ~group_modifiers;
1317 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1318 case '.' | TYPE_IS_SHRIEKING:
1323 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1324 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1325 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1326 const bool u8 = utf8;
1328 if (howlen == e_star) from = strbeg;
1329 else if (len <= 0) from = s;
1331 tempsym_t *group = symptr;
1333 while (--len && group) group = group->previous;
1334 from = group ? strbeg + group->strbeg : strbeg;
1337 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1338 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1339 XPUSHs(sv_2mortal(sv));
1342 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1343 case '@' | TYPE_IS_SHRIEKING:
1346 s = strbeg + symptr->strbeg;
1347 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1348 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1349 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1355 Perl_croak(aTHX_ "'@' outside of string in unpack");
1360 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1363 Perl_croak(aTHX_ "'@' outside of string in unpack");
1367 case 'X' | TYPE_IS_SHRIEKING:
1368 if (!len) /* Avoid division by 0 */
1371 const char *hop, *last;
1373 hop = last = strbeg;
1375 hop += UTF8SKIP(hop);
1382 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1386 len = (s - strbeg) % len;
1392 Perl_croak(aTHX_ "'X' outside of string in unpack");
1393 while (--s, UTF8_IS_CONTINUATION(*s)) {
1395 Perl_croak(aTHX_ "'X' outside of string in unpack");
1400 if (len > s - strbeg)
1401 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1405 case 'x' | TYPE_IS_SHRIEKING: {
1407 if (!len) /* Avoid division by 0 */
1409 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1410 else ai32 = (s - strbeg) % len;
1411 if (ai32 == 0) break;
1419 Perl_croak(aTHX_ "'x' outside of string in unpack");
1424 if (len > strend - s)
1425 Perl_croak(aTHX_ "'x' outside of string in unpack");
1430 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1436 /* Preliminary length estimate is assumed done in 'W' */
1437 if (len > strend - s) len = strend - s;
1443 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1444 if (hop >= strend) {
1446 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1451 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1453 } else if (len > strend - s)
1456 if (datumtype == 'Z') {
1457 /* 'Z' strips stuff after first null */
1458 const char *ptr, *end;
1460 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1461 sv = newSVpvn(s, ptr-s);
1462 if (howlen == e_star) /* exact for 'Z*' */
1463 len = ptr-s + (ptr != strend ? 1 : 0);
1464 } else if (datumtype == 'A') {
1465 /* 'A' strips both nulls and spaces */
1467 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1468 for (ptr = s+len-1; ptr >= s; ptr--)
1469 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1470 !is_utf8_space((U8 *) ptr)) break;
1471 if (ptr >= s) ptr += UTF8SKIP(ptr);
1474 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1476 for (ptr = s+len-1; ptr >= s; ptr--)
1477 if (*ptr != 0 && !isSPACE(*ptr)) break;
1480 sv = newSVpvn(s, ptr-s);
1481 } else sv = newSVpvn(s, len);
1485 /* Undo any upgrade done due to need_utf8() */
1486 if (!(symptr->flags & FLAG_WAS_UTF8))
1487 sv_utf8_downgrade(sv, 0);
1489 XPUSHs(sv_2mortal(sv));
1495 if (howlen == e_star || len > (strend - s) * 8)
1496 len = (strend - s) * 8;
1500 Newxz(PL_bitcount, 256, char);
1501 for (bits = 1; bits < 256; bits++) {
1502 if (bits & 1) PL_bitcount[bits]++;
1503 if (bits & 2) PL_bitcount[bits]++;
1504 if (bits & 4) PL_bitcount[bits]++;
1505 if (bits & 8) PL_bitcount[bits]++;
1506 if (bits & 16) PL_bitcount[bits]++;
1507 if (bits & 32) PL_bitcount[bits]++;
1508 if (bits & 64) PL_bitcount[bits]++;
1509 if (bits & 128) PL_bitcount[bits]++;
1513 while (len >= 8 && s < strend) {
1514 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1519 cuv += PL_bitcount[*(U8 *)s++];
1522 if (len && s < strend) {
1524 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1525 if (datumtype == 'b')
1527 if (bits & 1) cuv++;
1532 if (bits & 0x80) cuv++;
1539 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1542 if (datumtype == 'b') {
1544 const I32 ai32 = len;
1545 for (len = 0; len < ai32; len++) {
1546 if (len & 7) bits >>= 1;
1548 if (s >= strend) break;
1549 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1550 } else bits = *(U8 *) s++;
1551 *str++ = bits & 1 ? '1' : '0';
1555 const I32 ai32 = len;
1556 for (len = 0; len < ai32; len++) {
1557 if (len & 7) bits <<= 1;
1559 if (s >= strend) break;
1560 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1561 } else bits = *(U8 *) s++;
1562 *str++ = bits & 0x80 ? '1' : '0';
1566 SvCUR_set(sv, str - SvPVX_const(sv));
1573 /* Preliminary length estimate, acceptable for utf8 too */
1574 if (howlen == e_star || len > (strend - s) * 2)
1575 len = (strend - s) * 2;
1576 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1579 if (datumtype == 'h') {
1582 for (len = 0; len < ai32; len++) {
1583 if (len & 1) bits >>= 4;
1585 if (s >= strend) break;
1586 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1587 } else bits = * (U8 *) s++;
1588 *str++ = PL_hexdigit[bits & 15];
1592 const I32 ai32 = len;
1593 for (len = 0; len < ai32; len++) {
1594 if (len & 1) bits <<= 4;
1596 if (s >= strend) break;
1597 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1598 } else bits = *(U8 *) s++;
1599 *str++ = PL_hexdigit[(bits >> 4) & 15];
1603 SvCUR_set(sv, str - SvPVX_const(sv));
1609 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1610 if (aint >= 128) /* fake up signed chars */
1613 PUSHs(sv_2mortal(newSViv((IV)aint)));
1614 else if (checksum > bits_in_uv)
1615 cdouble += (NV)aint;
1624 if (explicit_length && datumtype == 'C')
1625 /* Switch to "character" mode */
1626 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1629 if (datumtype == 'C' ?
1630 (symptr->flags & FLAG_DO_UTF8) &&
1631 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1632 while (len-- > 0 && s < strend) {
1634 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1635 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1636 if (retlen == (STRLEN) -1 || retlen == 0)
1637 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1640 PUSHs(sv_2mortal(newSVuv((UV) val)));
1641 else if (checksum > bits_in_uv)
1642 cdouble += (NV) val;
1646 } else if (!checksum)
1648 const U8 ch = *(U8 *) s++;
1649 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1651 else if (checksum > bits_in_uv)
1652 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1654 while (len-- > 0) cuv += *(U8 *) s++;
1658 if (explicit_length) {
1659 /* Switch to "bytes in UTF-8" mode */
1660 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1662 /* Should be impossible due to the need_utf8() test */
1663 Perl_croak(aTHX_ "U0 mode on a byte string");
1667 if (len > strend - s) len = strend - s;
1669 if (len && unpack_only_one) len = 1;
1673 while (len-- > 0 && s < strend) {
1677 U8 result[UTF8_MAXLEN];
1678 const char *ptr = s;
1680 /* Bug: warns about bad utf8 even if we are short on bytes
1681 and will break out of the loop */
1682 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1685 len = UTF8SKIP(result);
1686 if (!uni_to_bytes(aTHX_ &ptr, strend,
1687 (char *) &result[1], len-1, 'U')) break;
1688 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1691 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1692 if (retlen == (STRLEN) -1 || retlen == 0)
1693 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1697 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1698 else if (checksum > bits_in_uv)
1699 cdouble += (NV) auv;
1704 case 's' | TYPE_IS_SHRIEKING:
1705 #if SHORTSIZE != SIZE16
1708 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1709 DO_BO_UNPACK(ashort, s);
1711 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1712 else if (checksum > bits_in_uv)
1713 cdouble += (NV)ashort;
1725 #if U16SIZE > SIZE16
1728 SHIFT16(utf8, s, strend, &ai16, datumtype);
1729 DO_BO_UNPACK(ai16, 16);
1730 #if U16SIZE > SIZE16
1735 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1736 else if (checksum > bits_in_uv)
1737 cdouble += (NV)ai16;
1742 case 'S' | TYPE_IS_SHRIEKING:
1743 #if SHORTSIZE != SIZE16
1745 unsigned short aushort;
1746 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1747 DO_BO_UNPACK(aushort, s);
1749 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1750 else if (checksum > bits_in_uv)
1751 cdouble += (NV)aushort;
1764 #if U16SIZE > SIZE16
1767 SHIFT16(utf8, s, strend, &au16, datumtype);
1768 DO_BO_UNPACK(au16, 16);
1770 if (datumtype == 'n')
1771 au16 = PerlSock_ntohs(au16);
1774 if (datumtype == 'v')
1778 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1779 else if (checksum > bits_in_uv)
1780 cdouble += (NV) au16;
1785 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1786 case 'v' | TYPE_IS_SHRIEKING:
1787 case 'n' | TYPE_IS_SHRIEKING:
1790 # if U16SIZE > SIZE16
1793 SHIFT16(utf8, s, strend, &ai16, datumtype);
1795 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1796 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1797 # endif /* HAS_NTOHS */
1799 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1800 ai16 = (I16) vtohs((U16) ai16);
1801 # endif /* HAS_VTOHS */
1803 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1804 else if (checksum > bits_in_uv)
1805 cdouble += (NV) ai16;
1810 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1812 case 'i' | TYPE_IS_SHRIEKING:
1815 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1816 DO_BO_UNPACK(aint, i);
1818 PUSHs(sv_2mortal(newSViv((IV)aint)));
1819 else if (checksum > bits_in_uv)
1820 cdouble += (NV)aint;
1826 case 'I' | TYPE_IS_SHRIEKING:
1829 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1830 DO_BO_UNPACK(auint, i);
1832 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1833 else if (checksum > bits_in_uv)
1834 cdouble += (NV)auint;
1842 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1843 #if IVSIZE == INTSIZE
1844 DO_BO_UNPACK(aiv, i);
1845 #elif IVSIZE == LONGSIZE
1846 DO_BO_UNPACK(aiv, l);
1847 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1848 DO_BO_UNPACK(aiv, 64);
1850 Perl_croak(aTHX_ "'j' not supported on this platform");
1853 PUSHs(sv_2mortal(newSViv(aiv)));
1854 else if (checksum > bits_in_uv)
1863 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1864 #if IVSIZE == INTSIZE
1865 DO_BO_UNPACK(auv, i);
1866 #elif IVSIZE == LONGSIZE
1867 DO_BO_UNPACK(auv, l);
1868 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1869 DO_BO_UNPACK(auv, 64);
1871 Perl_croak(aTHX_ "'J' not supported on this platform");
1874 PUSHs(sv_2mortal(newSVuv(auv)));
1875 else if (checksum > bits_in_uv)
1881 case 'l' | TYPE_IS_SHRIEKING:
1882 #if LONGSIZE != SIZE32
1885 SHIFT_VAR(utf8, s, strend, along, datumtype);
1886 DO_BO_UNPACK(along, l);
1888 PUSHs(sv_2mortal(newSViv((IV)along)));
1889 else if (checksum > bits_in_uv)
1890 cdouble += (NV)along;
1901 #if U32SIZE > SIZE32
1904 SHIFT32(utf8, s, strend, &ai32, datumtype);
1905 DO_BO_UNPACK(ai32, 32);
1906 #if U32SIZE > SIZE32
1907 if (ai32 > 2147483647) ai32 -= 4294967296;
1910 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1911 else if (checksum > bits_in_uv)
1912 cdouble += (NV)ai32;
1917 case 'L' | TYPE_IS_SHRIEKING:
1918 #if LONGSIZE != SIZE32
1920 unsigned long aulong;
1921 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1922 DO_BO_UNPACK(aulong, l);
1924 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1925 else if (checksum > bits_in_uv)
1926 cdouble += (NV)aulong;
1939 #if U32SIZE > SIZE32
1942 SHIFT32(utf8, s, strend, &au32, datumtype);
1943 DO_BO_UNPACK(au32, 32);
1945 if (datumtype == 'N')
1946 au32 = PerlSock_ntohl(au32);
1949 if (datumtype == 'V')
1953 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1954 else if (checksum > bits_in_uv)
1955 cdouble += (NV)au32;
1960 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1961 case 'V' | TYPE_IS_SHRIEKING:
1962 case 'N' | TYPE_IS_SHRIEKING:
1965 # if U32SIZE > SIZE32
1968 SHIFT32(utf8, s, strend, &ai32, datumtype);
1970 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1971 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1974 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1975 ai32 = (I32)vtohl((U32)ai32);
1978 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1979 else if (checksum > bits_in_uv)
1980 cdouble += (NV)ai32;
1985 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1989 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1990 DO_BO_UNPACK_PC(aptr);
1991 /* newSVpv generates undef if aptr is NULL */
1992 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
2000 while (len > 0 && s < strend) {
2002 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2003 auv = (auv << 7) | (ch & 0x7f);
2004 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2007 PUSHs(sv_2mortal(newSVuv(auv)));
2012 if (++bytes >= sizeof(UV)) { /* promote to string */
2015 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2016 while (s < strend) {
2017 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2018 sv = mul128(sv, (U8)(ch & 0x7f));
2024 t = SvPV_nolen_const(sv);
2028 PUSHs(sv_2mortal(sv));
2033 if ((s >= strend) && bytes)
2034 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2038 if (symptr->howlen == e_star)
2039 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2041 if (s + sizeof(char*) <= strend) {
2043 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2044 DO_BO_UNPACK_PC(aptr);
2045 /* newSVpvn generates undef if aptr is NULL */
2046 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2053 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2054 DO_BO_UNPACK(aquad, 64);
2056 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2057 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2058 else if (checksum > bits_in_uv)
2059 cdouble += (NV)aquad;
2067 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2068 DO_BO_UNPACK(auquad, 64);
2070 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2071 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2072 else if (checksum > bits_in_uv)
2073 cdouble += (NV)auquad;
2078 #endif /* HAS_QUAD */
2079 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2083 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2084 DO_BO_UNPACK_N(afloat, float);
2086 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2094 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2095 DO_BO_UNPACK_N(adouble, double);
2097 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2105 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2106 DO_BO_UNPACK_N(anv, NV);
2108 PUSHs(sv_2mortal(newSVnv(anv)));
2113 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2116 long double aldouble;
2117 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2118 DO_BO_UNPACK_N(aldouble, long double);
2120 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2122 cdouble += aldouble;
2128 * Initialise the decode mapping. By using a table driven
2129 * algorithm, the code will be character-set independent
2130 * (and just as fast as doing character arithmetic)
2132 if (PL_uudmap['M'] == 0) {
2135 for (i = 0; i < sizeof(PL_uuemap); ++i)
2136 PL_uudmap[(U8)PL_uuemap[i]] = i;
2138 * Because ' ' and '`' map to the same value,
2139 * we need to decode them both the same.
2144 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2145 sv = sv_2mortal(NEWSV(42, l));
2146 if (l) SvPOK_on(sv);
2149 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2155 next_uni_uu(aTHX_ &s, strend, &a);
2156 next_uni_uu(aTHX_ &s, strend, &b);
2157 next_uni_uu(aTHX_ &s, strend, &c);
2158 next_uni_uu(aTHX_ &s, strend, &d);
2159 hunk[0] = (char)((a << 2) | (b >> 4));
2160 hunk[1] = (char)((b << 4) | (c >> 2));
2161 hunk[2] = (char)((c << 6) | d);
2162 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2170 /* possible checksum byte */
2171 const char *skip = s+UTF8SKIP(s);
2172 if (skip < strend && *skip == '\n')
2178 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2183 len = PL_uudmap[*(U8*)s++] & 077;
2185 if (s < strend && ISUUCHAR(*s))
2186 a = PL_uudmap[*(U8*)s++] & 077;
2189 if (s < strend && ISUUCHAR(*s))
2190 b = PL_uudmap[*(U8*)s++] & 077;
2193 if (s < strend && ISUUCHAR(*s))
2194 c = PL_uudmap[*(U8*)s++] & 077;
2197 if (s < strend && ISUUCHAR(*s))
2198 d = PL_uudmap[*(U8*)s++] & 077;
2201 hunk[0] = (char)((a << 2) | (b >> 4));
2202 hunk[1] = (char)((b << 4) | (c >> 2));
2203 hunk[2] = (char)((c << 6) | d);
2204 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2209 else /* possible checksum byte */
2210 if (s + 1 < strend && s[1] == '\n')
2219 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2220 (checksum > bits_in_uv &&
2221 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2224 anv = (NV) (1 << (checksum & 15));
2225 while (checksum >= 16) {
2229 while (cdouble < 0.0)
2231 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2232 sv = newSVnv(cdouble);
2235 if (checksum < bits_in_uv) {
2236 UV mask = ((UV)1 << checksum) - 1;
2241 XPUSHs(sv_2mortal(sv));
2245 if (symptr->flags & FLAG_SLASH){
2246 if (SP - PL_stack_base - start_sp_offset <= 0)
2247 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2248 if( next_symbol(symptr) ){
2249 if( symptr->howlen == e_number )
2250 Perl_croak(aTHX_ "Count after length/code in unpack" );
2252 /* ...end of char buffer then no decent length available */
2253 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2255 /* take top of stack (hope it's numeric) */
2258 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2261 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2263 datumtype = symptr->code;
2264 explicit_length = FALSE;
2272 return SP - PL_stack_base - start_sp_offset;
2279 I32 gimme = GIMME_V;
2282 const char *pat = SvPV_const(left, llen);
2283 const char *s = SvPV_const(right, rlen);
2284 const char *strend = s + rlen;
2285 const char *patend = pat + llen;
2289 cnt = unpackstring(pat, patend, s, strend,
2290 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2291 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2294 if ( !cnt && gimme == G_SCALAR )
2295 PUSHs(&PL_sv_undef);
2300 doencodes(U8 *h, const char *s, I32 len)
2302 *h++ = PL_uuemap[len];
2304 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2305 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2306 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2307 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2312 const char r = (len > 1 ? s[1] : '\0');
2313 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2314 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2315 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2316 *h++ = PL_uuemap[0];
2323 S_is_an_int(pTHX_ const char *s, STRLEN l)
2325 SV *result = newSVpvn(s, l);
2326 char *const result_c = SvPV_nolen(result); /* convenience */
2327 char *out = result_c;
2337 SvREFCNT_dec(result);
2360 SvREFCNT_dec(result);
2366 SvCUR_set(result, out - result_c);
2370 /* pnum must be '\0' terminated */
2372 S_div128(pTHX_ SV *pnum, bool *done)
2375 char * const s = SvPV(pnum, len);
2381 const int i = m * 10 + (*t - '0');
2382 const int r = (i >> 7); /* r < 10 */
2390 SvCUR_set(pnum, (STRLEN) (t - s));
2395 =for apidoc pack_cat
2397 The engine implementing pack() Perl function. Note: parameters next_in_list and
2398 flags are not used. This call should not be used; use packlist instead.
2404 Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2407 PERL_UNUSED_ARG(next_in_list);
2408 PERL_UNUSED_ARG(flags);
2410 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2412 (void)pack_rec( cat, &sym, beglist, endlist );
2417 =for apidoc packlist
2419 The engine implementing pack() Perl function.
2425 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2430 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2432 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2433 Also make sure any UTF8 flag is loaded */
2434 SvPV_force(cat, no_len);
2436 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2438 (void)pack_rec( cat, &sym, beglist, endlist );
2441 /* like sv_utf8_upgrade, but also repoint the group start markers */
2443 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2446 const char *from_ptr, *from_start, *from_end, **marks, **m;
2447 char *to_start, *to_ptr;
2449 if (SvUTF8(sv)) return;
2451 from_start = SvPVX_const(sv);
2452 from_end = from_start + SvCUR(sv);
2453 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2454 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2455 if (from_ptr == from_end) {
2456 /* Simple case: no character needs to be changed */
2461 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2462 Newx(to_start, len, char);
2463 Copy(from_start, to_start, from_ptr-from_start, char);
2464 to_ptr = to_start + (from_ptr-from_start);
2466 Newx(marks, sym_ptr->level+2, const char *);
2467 for (group=sym_ptr; group; group = group->previous)
2468 marks[group->level] = from_start + group->strbeg;
2469 marks[sym_ptr->level+1] = from_end+1;
2470 for (m = marks; *m < from_ptr; m++)
2471 *m = to_start + (*m-from_start);
2473 for (;from_ptr < from_end; from_ptr++) {
2474 while (*m == from_ptr) *m++ = to_ptr;
2475 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2479 while (*m == from_ptr) *m++ = to_ptr;
2480 if (m != marks + sym_ptr->level+1) {
2483 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2485 for (group=sym_ptr; group; group = group->previous)
2486 group->strbeg = marks[group->level] - to_start;
2491 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2492 from_start -= SvIVX(sv);
2495 SvFLAGS(sv) &= ~SVf_OOK;
2498 Safefree(from_start);
2499 SvPV_set(sv, to_start);
2500 SvCUR_set(sv, to_ptr - to_start);
2505 /* Exponential string grower. Makes string extension effectively O(n)
2506 needed says how many extra bytes we need (not counting the final '\0')
2507 Only grows the string if there is an actual lack of space
2510 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2511 const STRLEN cur = SvCUR(sv);
2512 const STRLEN len = SvLEN(sv);
2514 if (len - cur > needed) return SvPVX(sv);
2515 extend = needed > len ? needed : len;
2516 return SvGROW(sv, len+extend+1);
2521 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2523 tempsym_t lookahead;
2524 I32 items = endlist - beglist;
2525 bool found = next_symbol(symptr);
2526 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2527 bool warn_utf8 = ckWARN(WARN_UTF8);
2529 if (symptr->level == 0 && found && symptr->code == 'U') {
2530 marked_upgrade(aTHX_ cat, symptr);
2531 symptr->flags |= FLAG_DO_UTF8;
2534 symptr->strbeg = SvCUR(cat);
2540 SV *lengthcode = Nullsv;
2541 I32 datumtype = symptr->code;
2542 howlen_t howlen = symptr->howlen;
2543 char *start = SvPVX(cat);
2544 char *cur = start + SvCUR(cat);
2546 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2550 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2554 /* e_no_len and e_number */
2555 len = symptr->length;
2560 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2562 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2563 /* We can process this letter. */
2564 STRLEN size = props & PACK_SIZE_MASK;
2565 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2569 /* Look ahead for next symbol. Do we have code/code? */
2570 lookahead = *symptr;
2571 found = next_symbol(&lookahead);
2572 if (symptr->flags & FLAG_SLASH) {
2574 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2575 if (strchr("aAZ", lookahead.code)) {
2576 if (lookahead.howlen == e_number) count = lookahead.length;
2579 count = DO_UTF8(*beglist) ?
2580 sv_len_utf8(*beglist) : sv_len(*beglist);
2582 if (lookahead.code == 'Z') count++;
2585 if (lookahead.howlen == e_number && lookahead.length < items)
2586 count = lookahead.length;
2589 lookahead.howlen = e_number;
2590 lookahead.length = count;
2591 lengthcode = sv_2mortal(newSViv(count));
2594 /* Code inside the switch must take care to properly update
2595 cat (CUR length and '\0' termination) if it updated *cur and
2596 doesn't simply leave using break */
2597 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2599 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2600 (int) TYPE_NO_MODIFIERS(datumtype));
2602 Perl_croak(aTHX_ "'%%' may not be used in pack");
2605 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2606 case '.' | TYPE_IS_SHRIEKING:
2609 if (howlen == e_star) from = start;
2610 else if (len == 0) from = cur;
2612 tempsym_t *group = symptr;
2614 while (--len && group) group = group->previous;
2615 from = group ? start + group->strbeg : start;
2618 len = SvIV(fromstr);
2620 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2621 case '@' | TYPE_IS_SHRIEKING:
2624 from = start + symptr->strbeg;
2626 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2627 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2628 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2632 while (len && from < cur) {
2633 from += UTF8SKIP(from);
2637 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2639 /* Here we know from == cur */
2641 GROWING(0, cat, start, cur, len);
2642 Zero(cur, len, char);
2644 } else if (from < cur) {
2647 } else goto no_change;
2655 if (len > 0) goto grow;
2656 if (len == 0) goto no_change;
2663 tempsym_t savsym = *symptr;
2664 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2665 symptr->flags |= group_modifiers;
2666 symptr->patend = savsym.grpend;
2668 symptr->previous = &lookahead;
2671 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2672 else symptr->flags &= ~FLAG_PARSE_UTF8;
2673 was_utf8 = SvUTF8(cat);
2674 symptr->patptr = savsym.grpbeg;
2675 beglist = pack_rec(cat, symptr, beglist, endlist);
2676 if (SvUTF8(cat) != was_utf8)
2677 /* This had better be an upgrade while in utf8==0 mode */
2680 if (savsym.howlen == e_star && beglist == endlist)
2681 break; /* No way to continue */
2683 lookahead.flags = symptr->flags & ~group_modifiers;
2686 case 'X' | TYPE_IS_SHRIEKING:
2687 if (!len) /* Avoid division by 0 */
2694 hop += UTF8SKIP(hop);
2701 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2705 len = (cur-start) % len;
2709 if (len < 1) goto no_change;
2713 Perl_croak(aTHX_ "'%c' outside of string in pack",
2714 (int) TYPE_NO_MODIFIERS(datumtype));
2715 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2717 Perl_croak(aTHX_ "'%c' outside of string in pack",
2718 (int) TYPE_NO_MODIFIERS(datumtype));
2724 if (cur - start < len)
2725 Perl_croak(aTHX_ "'%c' outside of string in pack",
2726 (int) TYPE_NO_MODIFIERS(datumtype));
2729 if (cur < start+symptr->strbeg) {
2730 /* Make sure group starts don't point into the void */
2732 const STRLEN length = cur-start;
2733 for (group = symptr;
2734 group && length < group->strbeg;
2735 group = group->previous) group->strbeg = length;
2736 lookahead.strbeg = length;
2739 case 'x' | TYPE_IS_SHRIEKING: {
2741 if (!len) /* Avoid division by 0 */
2743 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2744 else ai32 = (cur - start) % len;
2745 if (ai32 == 0) goto no_change;
2757 aptr = SvPV_const(fromstr, fromlen);
2758 if (DO_UTF8(fromstr)) {
2759 const char *end, *s;
2761 if (!utf8 && !SvUTF8(cat)) {
2762 marked_upgrade(aTHX_ cat, symptr);
2763 lookahead.flags |= FLAG_DO_UTF8;
2764 lookahead.strbeg = symptr->strbeg;
2767 cur = start + SvCUR(cat);
2769 if (howlen == e_star) {
2770 if (utf8) goto string_copy;
2774 end = aptr + fromlen;
2775 fromlen = datumtype == 'Z' ? len-1 : len;
2776 while ((I32) fromlen > 0 && s < end) {
2781 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2784 if (datumtype == 'Z') len++;
2790 fromlen = len - fromlen;
2791 if (datumtype == 'Z') fromlen--;
2792 if (howlen == e_star) {
2794 if (datumtype == 'Z') len++;
2796 GROWING(0, cat, start, cur, len);
2797 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2798 datumtype | TYPE_IS_PACK))
2799 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2803 if (howlen == e_star) {
2805 if (datumtype == 'Z') len++;
2807 if (len <= (I32) fromlen) {
2809 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2811 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2813 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2814 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2816 while (fromlen > 0) {
2817 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2823 if (howlen == e_star) {
2825 if (datumtype == 'Z') len++;
2827 if (len <= (I32) fromlen) {
2829 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2831 GROWING(0, cat, start, cur, len);
2832 Copy(aptr, cur, fromlen, char);
2836 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2842 const char *str, *end;
2849 str = SvPV_const(fromstr, fromlen);
2850 end = str + fromlen;
2851 if (DO_UTF8(fromstr)) {
2853 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2855 utf8_source = FALSE;
2856 utf8_flags = 0; /* Unused, but keep compilers happy */
2858 if (howlen == e_star) len = fromlen;
2859 field_len = (len+7)/8;
2860 GROWING(utf8, cat, start, cur, field_len);
2861 if (len > (I32)fromlen) len = fromlen;
2864 if (datumtype == 'B')
2868 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2870 } else bits |= *str++ & 1;
2871 if (l & 7) bits <<= 1;
2873 PUSH_BYTE(utf8, cur, bits);
2878 /* datumtype == 'b' */
2882 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2883 if (val & 1) bits |= 0x80;
2884 } else if (*str++ & 1)
2886 if (l & 7) bits >>= 1;
2888 PUSH_BYTE(utf8, cur, bits);
2894 if (datumtype == 'B')
2895 bits <<= 7 - (l & 7);
2897 bits >>= 7 - (l & 7);
2898 PUSH_BYTE(utf8, cur, bits);
2901 /* Determine how many chars are left in the requested field */
2903 if (howlen == e_star) field_len = 0;
2904 else field_len -= l;
2905 Zero(cur, field_len, char);
2911 const char *str, *end;
2918 str = SvPV_const(fromstr, fromlen);
2919 end = str + fromlen;
2920 if (DO_UTF8(fromstr)) {
2922 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2924 utf8_source = FALSE;
2925 utf8_flags = 0; /* Unused, but keep compilers happy */
2927 if (howlen == e_star) len = fromlen;
2928 field_len = (len+1)/2;
2929 GROWING(utf8, cat, start, cur, field_len);
2930 if (!utf8 && len > (I32)fromlen) len = fromlen;
2933 if (datumtype == 'H')
2937 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2938 if (val < 256 && isALPHA(val))
2939 bits |= (val + 9) & 0xf;
2942 } else if (isALPHA(*str))
2943 bits |= (*str++ + 9) & 0xf;
2945 bits |= *str++ & 0xf;
2946 if (l & 1) bits <<= 4;
2948 PUSH_BYTE(utf8, cur, bits);
2956 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2957 if (val < 256 && isALPHA(val))
2958 bits |= ((val + 9) & 0xf) << 4;
2960 bits |= (val & 0xf) << 4;
2961 } else if (isALPHA(*str))
2962 bits |= ((*str++ + 9) & 0xf) << 4;
2964 bits |= (*str++ & 0xf) << 4;
2965 if (l & 1) bits >>= 4;
2967 PUSH_BYTE(utf8, cur, bits);
2973 PUSH_BYTE(utf8, cur, bits);
2976 /* Determine how many chars are left in the requested field */
2978 if (howlen == e_star) field_len = 0;
2979 else field_len -= l;
2980 Zero(cur, field_len, char);
2988 aiv = SvIV(fromstr);
2989 if ((-128 > aiv || aiv > 127) &&
2991 Perl_warner(aTHX_ packWARN(WARN_PACK),
2992 "Character in 'c' format wrapped in pack");
2993 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2998 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
3001 GROWING(0, cat, start, cur, len);
3005 aiv = SvIV(fromstr);
3006 if ((0 > aiv || aiv > 0xff) &&
3008 Perl_warner(aTHX_ packWARN(WARN_PACK),
3009 "Character in 'C' format wrapped in pack");
3010 *cur++ = (char)(aiv & 0xff);
3015 U8 in_bytes = IN_BYTES;
3017 end = start+SvLEN(cat)-1;
3018 if (utf8) end -= UTF8_MAXLEN-1;
3022 auv = SvUV(fromstr);
3023 if (in_bytes) auv = auv % 0x100;
3028 SvCUR_set(cat, cur - start);
3030 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3031 end = start+SvLEN(cat)-UTF8_MAXLEN;
3033 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3036 0 : UNICODE_ALLOW_ANY);
3041 SvCUR_set(cat, cur - start);
3042 marked_upgrade(aTHX_ cat, symptr);
3043 lookahead.flags |= FLAG_DO_UTF8;
3044 lookahead.strbeg = symptr->strbeg;
3047 cur = start + SvCUR(cat);
3048 end = start+SvLEN(cat)-UTF8_MAXLEN;
3051 if (ckWARN(WARN_PACK))
3052 Perl_warner(aTHX_ packWARN(WARN_PACK),
3053 "Character in 'W' format wrapped in pack");
3058 SvCUR_set(cat, cur - start);
3059 GROWING(0, cat, start, cur, len+1);
3060 end = start+SvLEN(cat)-1;
3062 *(U8 *) cur++ = (U8)auv;
3071 if (!(symptr->flags & FLAG_DO_UTF8)) {
3072 marked_upgrade(aTHX_ cat, symptr);
3073 lookahead.flags |= FLAG_DO_UTF8;
3074 lookahead.strbeg = symptr->strbeg;
3080 end = start+SvLEN(cat);
3081 if (!utf8) end -= UTF8_MAXLEN;
3085 auv = SvUV(fromstr);
3087 U8 buffer[UTF8_MAXLEN], *endb;
3088 endb = uvuni_to_utf8_flags(buffer, auv,
3090 0 : UNICODE_ALLOW_ANY);
3091 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3093 SvCUR_set(cat, cur - start);
3094 GROWING(0, cat, start, cur,
3095 len+(endb-buffer)*UTF8_EXPAND);
3096 end = start+SvLEN(cat);
3098 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3102 SvCUR_set(cat, cur - start);
3103 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3104 end = start+SvLEN(cat)-UTF8_MAXLEN;
3106 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3108 0 : UNICODE_ALLOW_ANY);
3113 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3119 anv = SvNV(fromstr);
3121 /* VOS does not automatically map a floating-point overflow
3122 during conversion from double to float into infinity, so we
3123 do it by hand. This code should either be generalized for
3124 any OS that needs it, or removed if and when VOS implements
3125 posix-976 (suggestion to support mapping to infinity).
3126 Paul.Green@stratus.com 02-04-02. */
3128 afloat = _float_constants[0]; /* single prec. inf. */
3129 else if (anv < -FLT_MAX)
3130 afloat = _float_constants[0]; /* single prec. inf. */
3131 else afloat = (float) anv;
3133 # if defined(VMS) && !defined(__IEEE_FP)
3134 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3135 * on Alpha; fake it if we don't have them.
3139 else if (anv < -FLT_MAX)
3141 else afloat = (float)anv;
3143 afloat = (float)anv;
3145 #endif /* __VOS__ */
3146 DO_BO_PACK_N(afloat, float);
3147 PUSH_VAR(utf8, cur, afloat);
3155 anv = SvNV(fromstr);
3157 /* VOS does not automatically map a floating-point overflow
3158 during conversion from long double to double into infinity,
3159 so we do it by hand. This code should either be generalized
3160 for any OS that needs it, or removed if and when VOS
3161 implements posix-976 (suggestion to support mapping to
3162 infinity). Paul.Green@stratus.com 02-04-02. */
3164 adouble = _double_constants[0]; /* double prec. inf. */
3165 else if (anv < -DBL_MAX)
3166 adouble = _double_constants[0]; /* double prec. inf. */
3167 else adouble = (double) anv;
3169 # if defined(VMS) && !defined(__IEEE_FP)
3170 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3171 * on Alpha; fake it if we don't have them.
3175 else if (anv < -DBL_MAX)
3177 else adouble = (double)anv;
3179 adouble = (double)anv;
3181 #endif /* __VOS__ */
3182 DO_BO_PACK_N(adouble, double);
3183 PUSH_VAR(utf8, cur, adouble);
3188 Zero(&anv, 1, NV); /* can be long double with unused bits */
3191 anv = SvNV(fromstr);
3192 DO_BO_PACK_N(anv, NV);
3193 PUSH_VAR(utf8, cur, anv);
3197 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3199 long double aldouble;
3200 /* long doubles can have unused bits, which may be nonzero */
3201 Zero(&aldouble, 1, long double);
3204 aldouble = (long double)SvNV(fromstr);
3205 DO_BO_PACK_N(aldouble, long double);
3206 PUSH_VAR(utf8, cur, aldouble);
3211 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3212 case 'n' | TYPE_IS_SHRIEKING:
3218 ai16 = (I16)SvIV(fromstr);
3220 ai16 = PerlSock_htons(ai16);
3222 PUSH16(utf8, cur, &ai16);
3225 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3226 case 'v' | TYPE_IS_SHRIEKING:
3232 ai16 = (I16)SvIV(fromstr);
3236 PUSH16(utf8, cur, &ai16);
3239 case 'S' | TYPE_IS_SHRIEKING:
3240 #if SHORTSIZE != SIZE16
3242 unsigned short aushort;
3244 aushort = SvUV(fromstr);
3245 DO_BO_PACK(aushort, s);
3246 PUSH_VAR(utf8, cur, aushort);
3256 au16 = (U16)SvUV(fromstr);
3257 DO_BO_PACK(au16, 16);
3258 PUSH16(utf8, cur, &au16);
3261 case 's' | TYPE_IS_SHRIEKING:
3262 #if SHORTSIZE != SIZE16
3266 ashort = SvIV(fromstr);
3267 DO_BO_PACK(ashort, s);
3268 PUSH_VAR(utf8, cur, ashort);
3278 ai16 = (I16)SvIV(fromstr);
3279 DO_BO_PACK(ai16, 16);
3280 PUSH16(utf8, cur, &ai16);
3284 case 'I' | TYPE_IS_SHRIEKING:
3288 auint = SvUV(fromstr);
3289 DO_BO_PACK(auint, i);
3290 PUSH_VAR(utf8, cur, auint);
3297 aiv = SvIV(fromstr);
3298 #if IVSIZE == INTSIZE
3300 #elif IVSIZE == LONGSIZE
3302 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3303 DO_BO_PACK(aiv, 64);
3305 Perl_croak(aTHX_ "'j' not supported on this platform");
3307 PUSH_VAR(utf8, cur, aiv);
3314 auv = SvUV(fromstr);
3315 #if UVSIZE == INTSIZE
3317 #elif UVSIZE == LONGSIZE
3319 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3320 DO_BO_PACK(auv, 64);
3322 Perl_croak(aTHX_ "'J' not supported on this platform");
3324 PUSH_VAR(utf8, cur, auv);
3331 anv = SvNV(fromstr);
3335 SvCUR_set(cat, cur - start);
3336 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3339 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3340 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3341 any negative IVs will have already been got by the croak()
3342 above. IOK is untrue for fractions, so we test them
3343 against UV_MAX_P1. */
3344 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3345 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3346 char *in = buf + sizeof(buf);
3347 UV auv = SvUV(fromstr);
3350 *--in = (char)((auv & 0x7f) | 0x80);
3353 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3354 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3355 in, (buf + sizeof(buf)) - in);
3356 } else if (SvPOKp(fromstr))
3358 else if (SvNOKp(fromstr)) {
3359 /* 10**NV_MAX_10_EXP is the largest power of 10
3360 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3361 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3362 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3363 And with that many bytes only Inf can overflow.
3364 Some C compilers are strict about integral constant
3365 expressions so we conservatively divide by a slightly
3366 smaller integer instead of multiplying by the exact
3367 floating-point value.
3369 #ifdef NV_MAX_10_EXP
3370 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3371 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3373 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3374 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3376 char *in = buf + sizeof(buf);
3378 anv = Perl_floor(anv);
3380 const NV next = Perl_floor(anv / 128);
3381 if (in <= buf) /* this cannot happen ;-) */
3382 Perl_croak(aTHX_ "Cannot compress integer in pack");
3383 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3386 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3387 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3388 in, (buf + sizeof(buf)) - in);
3397 /* Copy string and check for compliance */
3398 from = SvPV_const(fromstr, len);
3399 if ((norm = is_an_int(from, len)) == NULL)
3400 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3402 Newx(result, len, char);
3405 while (!done) *--in = div128(norm, &done) | 0x80;
3406 result[len - 1] &= 0x7F; /* clear continue bit */
3407 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3408 in, (result + len) - in);
3410 SvREFCNT_dec(norm); /* free norm */
3415 case 'i' | TYPE_IS_SHRIEKING:
3419 aint = SvIV(fromstr);
3420 DO_BO_PACK(aint, i);
3421 PUSH_VAR(utf8, cur, aint);
3424 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3425 case 'N' | TYPE_IS_SHRIEKING:
3431 au32 = SvUV(fromstr);
3433 au32 = PerlSock_htonl(au32);
3435 PUSH32(utf8, cur, &au32);
3438 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3439 case 'V' | TYPE_IS_SHRIEKING:
3445 au32 = SvUV(fromstr);
3449 PUSH32(utf8, cur, &au32);
3452 case 'L' | TYPE_IS_SHRIEKING:
3453 #if LONGSIZE != SIZE32
3455 unsigned long aulong;
3457 aulong = SvUV(fromstr);
3458 DO_BO_PACK(aulong, l);
3459 PUSH_VAR(utf8, cur, aulong);
3469 au32 = SvUV(fromstr);
3470 DO_BO_PACK(au32, 32);
3471 PUSH32(utf8, cur, &au32);
3474 case 'l' | TYPE_IS_SHRIEKING:
3475 #if LONGSIZE != SIZE32
3479 along = SvIV(fromstr);
3480 DO_BO_PACK(along, l);
3481 PUSH_VAR(utf8, cur, along);
3491 ai32 = SvIV(fromstr);
3492 DO_BO_PACK(ai32, 32);
3493 PUSH32(utf8, cur, &ai32);
3501 auquad = (Uquad_t) SvUV(fromstr);
3502 DO_BO_PACK(auquad, 64);
3503 PUSH_VAR(utf8, cur, auquad);
3510 aquad = (Quad_t)SvIV(fromstr);
3511 DO_BO_PACK(aquad, 64);
3512 PUSH_VAR(utf8, cur, aquad);
3515 #endif /* HAS_QUAD */
3517 len = 1; /* assume SV is correct length */
3518 GROWING(utf8, cat, start, cur, sizeof(char *));
3525 SvGETMAGIC(fromstr);
3526 if (!SvOK(fromstr)) aptr = NULL;
3528 /* XXX better yet, could spirit away the string to
3529 * a safe spot and hang on to it until the result
3530 * of pack() (and all copies of the result) are
3533 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3534 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3535 Perl_warner(aTHX_ packWARN(WARN_PACK),
3536 "Attempt to pack pointer to temporary value");
3538 if (SvPOK(fromstr) || SvNIOK(fromstr))
3539 aptr = SvPV_nomg_const_nolen(fromstr);
3541 aptr = SvPV_force_flags_nolen(fromstr, 0);
3543 DO_BO_PACK_PC(aptr);
3544 PUSH_VAR(utf8, cur, aptr);
3548 const char *aptr, *aend;
3552 if (len <= 2) len = 45;
3553 else len = len / 3 * 3;
3555 if (ckWARN(WARN_PACK))
3556 Perl_warner(aTHX_ packWARN(WARN_PACK),
3557 "Field too wide in 'u' format in pack");
3560 aptr = SvPV_const(fromstr, fromlen);
3561 from_utf8 = DO_UTF8(fromstr);
3563 aend = aptr + fromlen;
3564 fromlen = sv_len_utf8(fromstr);
3565 } else aend = NULL; /* Unused, but keep compilers happy */
3566 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3567 while (fromlen > 0) {
3570 U8 hunk[1+63/3*4+1];
3572 if ((I32)fromlen > len)
3578 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3579 'u' | TYPE_IS_PACK)) {
3581 SvCUR_set(cat, cur - start);
3582 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3584 end = doencodes(hunk, buffer, todo);
3586 end = doencodes(hunk, aptr, todo);
3589 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3596 SvCUR_set(cat, cur - start);
3598 *symptr = lookahead;
3607 dSP; dMARK; dORIGMARK; dTARGET;
3608 register SV *cat = TARG;
3610 SV *pat_sv = *++MARK;
3611 register const char *pat = SvPV_const(pat_sv, fromlen);
3612 register const char *patend = pat + fromlen;
3615 sv_setpvn(cat, "", 0);
3618 packlist(cat, pat, patend, MARK, SP + 1);
3628 * c-indentation-style: bsd
3630 * indent-tabs-mode: t
3633 * ex: set ts=8 sts=4 sw=4 noet: