3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV * const tmpNew = newSVpvs("0000000000");
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
243 # define DO_BO_UNPACK_PC(var)
244 # define DO_BO_PACK_PC(var)
246 #else /* PERL_PACK_CAN_BYTEORDER */
248 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
251 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
253 # define DO_BO_UNPACK(var, type) \
255 switch (TYPE_ENDIANNESS(datumtype)) { \
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
262 # define DO_BO_PACK(var, type) \
264 switch (TYPE_ENDIANNESS(datumtype)) { \
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
271 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: \
275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
277 case TYPE_IS_LITTLE_ENDIAN: \
278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
285 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
287 switch (TYPE_ENDIANNESS(datumtype)) { \
288 case TYPE_IS_BIG_ENDIAN: \
289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
291 case TYPE_IS_LITTLE_ENDIAN: \
292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
299 # define BO_CANT_DOIT(action, type) \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
315 # if PTRSIZE == INTSIZE
316 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
320 # elif PTRSIZE == LONGSIZE
321 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
325 # elif PTRSIZE == IVSIZE
326 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
331 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
333 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
337 # if defined(my_htolen) && defined(my_letohn) && \
338 defined(my_htoben) && defined(my_betohn)
339 # define DO_BO_UNPACK_N(var, type) \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
348 # define DO_BO_PACK_N(var, type) \
350 switch (TYPE_ENDIANNESS(datumtype)) { \
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
357 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
361 #endif /* PERL_PACK_CAN_BYTEORDER */
363 #define PACK_SIZE_CANNOT_CSUM 0x80
364 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
365 #define PACK_SIZE_MASK 0x3F
367 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
370 #if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
374 typedef U8 packprops_t;
377 STATIC const packprops_t packprops[512] = {
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
385 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
386 /* D */ LONG_DOUBLESIZE,
393 /* I */ sizeof(unsigned int),
400 #if defined(HAS_QUAD)
401 /* Q */ sizeof(Uquad_t),
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
415 /* f */ sizeof(float),
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
425 #if defined(HAS_QUAD)
426 /* q */ sizeof(Quad_t),
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
452 /* L */ sizeof(unsigned long),
454 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
460 /* S */ sizeof(unsigned short),
462 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 /* l */ sizeof(long),
473 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
479 /* s */ sizeof(short),
481 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
497 /* EBCDIC (or bust) */
498 STATIC const packprops_t packprops[512] = {
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
512 /* f */ sizeof(float),
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
523 #if defined(HAS_QUAD)
524 /* q */ sizeof(Quad_t),
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
536 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
537 /* D */ LONG_DOUBLESIZE,
544 /* I */ sizeof(unsigned int),
552 #if defined(HAS_QUAD)
553 /* Q */ sizeof(Uquad_t),
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
579 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
587 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
599 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
607 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
618 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
630 if (ckWARN(WARN_UNPACK))
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
633 (int) TYPE_NO_MODIFIERS(datumtype));
640 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
645 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
649 const char *from = *s;
651 const U32 flags = ckWARN(WARN_UTF8) ?
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
659 } else from += retlen;
664 *(U8 *)buf++ = (U8)val;
666 /* We have enough characters for the buffer. Did we have problems ? */
669 /* Rewalk the string fragment while warning */
671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
676 if (from > end) from = end;
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
682 (int) TYPE_NO_MODIFIERS(datumtype),
683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
690 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
700 *out = PL_uudmap[val] & 077;
706 bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
707 U8 buffer[UTF8_MAXLEN];
708 const U8 * const end = start + len;
710 while (start < end) {
712 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
722 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
730 #define PUSH_BYTES(utf8, cur, buf, len) \
732 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
734 Copy(buf, cur, len, char); \
739 #define GROWING(utf8, cat, start, cur, in_len) \
741 STRLEN glen = (in_len); \
742 if (utf8) glen *= UTF8_EXPAND; \
743 if ((cur) + glen >= (start) + SvLEN(cat)) { \
744 (start) = sv_exp_grow(cat, glen); \
745 (cur) = (start) + SvCUR(cat); \
749 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
751 const STRLEN glen = (in_len); \
753 if (utf8) gl *= UTF8_EXPAND; \
754 if ((cur) + gl >= (start) + SvLEN(cat)) { \
756 SvCUR_set((cat), (cur) - (start)); \
757 (start) = sv_exp_grow(cat, gl); \
758 (cur) = (start) + SvCUR(cat); \
760 PUSH_BYTES(utf8, cur, buf, glen); \
763 #define PUSH_BYTE(utf8, s, byte) \
766 const U8 au8 = (byte); \
767 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
768 } else *(U8 *)(s)++ = (byte); \
771 /* Only to be used inside a loop (see the break) */
772 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
775 if (str >= end) break; \
776 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
777 if (retlen == (STRLEN) -1 || retlen == 0) { \
779 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
784 static const char *_action( const tempsym_t* symptr )
786 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
789 /* Returns the sizeof() struct described by pat */
791 S_measure_struct(pTHX_ tempsym_t* symptr)
795 while (next_symbol(symptr)) {
799 switch (symptr->howlen) {
801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
805 /* e_no_len and e_number */
806 len = symptr->length;
810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
819 #ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
826 case 'U': /* XXXX Is it correct? */
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
830 (int) TYPE_NO_MODIFIERS(symptr->code),
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
851 if (!len) /* Avoid division by 0 */
853 len = total % len; /* Assumed: the start is aligned. */
858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
888 size = sizeof(char*);
898 /* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
902 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
904 while (patptr < patend) {
905 const char c = *patptr++;
912 while (patptr < patend && *patptr != '\n')
916 patptr = group_end(patptr, patend, ')') + 1;
918 patptr = group_end(patptr, patend, ']') + 1;
920 Perl_croak(aTHX_ "No group ending character '%c' found in template",
926 /* Convert unsigned decimal number to binary.
927 * Expects a pointer to the first digit and address of length variable
928 * Advances char pointer to 1st non-digit char and returns number
931 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
933 I32 len = *patptr++ - '0';
934 while (isDIGIT(*patptr)) {
935 if (len >= 0x7FFFFFFF/10)
936 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
937 len = (len * 10) + (*patptr++ - '0');
943 /* The marvellous template parsing routine: Using state stored in *symptr,
944 * locates next template code and count
947 S_next_symbol(pTHX_ tempsym_t* symptr )
949 const char* patptr = symptr->patptr;
950 const char* const patend = symptr->patend;
952 symptr->flags &= ~FLAG_SLASH;
954 while (patptr < patend) {
955 if (isSPACE(*patptr))
957 else if (*patptr == '#') {
959 while (patptr < patend && *patptr != '\n')
964 /* We should have found a template code */
965 I32 code = *patptr++ & 0xFF;
966 U32 inherited_modifiers = 0;
968 if (code == ','){ /* grandfather in commas but with a warning */
969 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
970 symptr->flags |= FLAG_COMMA;
971 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
972 "Invalid type ',' in %s", _action( symptr ) );
977 /* for '(', skip to ')' */
979 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
980 Perl_croak(aTHX_ "()-group starts with a count in %s",
982 symptr->grpbeg = patptr;
983 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
984 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
985 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
989 /* look for group modifiers to inherit */
990 if (TYPE_ENDIANNESS(symptr->flags)) {
991 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
992 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
995 /* look for modifiers */
996 while (patptr < patend) {
1001 modifier = TYPE_IS_SHRIEKING;
1002 allowed = SHRIEKING_ALLOWED_TYPES;
1004 #ifdef PERL_PACK_CAN_BYTEORDER
1006 modifier = TYPE_IS_BIG_ENDIAN;
1007 allowed = ENDIANNESS_ALLOWED_TYPES;
1010 modifier = TYPE_IS_LITTLE_ENDIAN;
1011 allowed = ENDIANNESS_ALLOWED_TYPES;
1013 #endif /* PERL_PACK_CAN_BYTEORDER */
1023 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1024 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1025 allowed, _action( symptr ) );
1027 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1028 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1029 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1030 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1031 TYPE_ENDIANNESS_MASK)
1032 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1033 *patptr, _action( symptr ) );
1035 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1036 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1037 "Duplicate modifier '%c' after '%c' in %s",
1038 *patptr, (int) TYPE_NO_MODIFIERS(code),
1039 _action( symptr ) );
1046 /* inherit modifiers */
1047 code |= inherited_modifiers;
1049 /* look for count and/or / */
1050 if (patptr < patend) {
1051 if (isDIGIT(*patptr)) {
1052 patptr = get_num( patptr, &symptr->length );
1053 symptr->howlen = e_number;
1055 } else if (*patptr == '*') {
1057 symptr->howlen = e_star;
1059 } else if (*patptr == '[') {
1060 const char* lenptr = ++patptr;
1061 symptr->howlen = e_number;
1062 patptr = group_end( patptr, patend, ']' ) + 1;
1063 /* what kind of [] is it? */
1064 if (isDIGIT(*lenptr)) {
1065 lenptr = get_num( lenptr, &symptr->length );
1066 if( *lenptr != ']' )
1067 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1068 _action( symptr ) );
1070 tempsym_t savsym = *symptr;
1071 symptr->patend = patptr-1;
1072 symptr->patptr = lenptr;
1073 savsym.length = measure_struct(symptr);
1077 symptr->howlen = e_no_len;
1082 while (patptr < patend) {
1083 if (isSPACE(*patptr))
1085 else if (*patptr == '#') {
1087 while (patptr < patend && *patptr != '\n')
1089 if (patptr < patend)
1092 if (*patptr == '/') {
1093 symptr->flags |= FLAG_SLASH;
1095 if (patptr < patend &&
1096 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1097 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1098 _action( symptr ) );
1104 /* at end - no count, no / */
1105 symptr->howlen = e_no_len;
1109 symptr->code = code;
1110 symptr->patptr = patptr;
1114 symptr->patptr = patptr;
1119 There is no way to cleanly handle the case where we should process the
1120 string per byte in its upgraded form while it's really in downgraded form
1121 (e.g. estimates like strend-s as an upper bound for the number of
1122 characters left wouldn't work). So if we foresee the need of this
1123 (pattern starts with U or contains U0), we want to work on the encoded
1124 version of the string. Users are advised to upgrade their pack string
1125 themselves if they need to do a lot of unpacks like this on it
1128 need_utf8(const char *pat, const char *patend)
1131 while (pat < patend) {
1132 if (pat[0] == '#') {
1134 pat = (const char *) memchr(pat, '\n', patend-pat);
1135 if (!pat) return FALSE;
1136 } else if (pat[0] == 'U') {
1137 if (first || pat[1] == '0') return TRUE;
1138 } else first = FALSE;
1145 first_symbol(const char *pat, const char *patend) {
1146 while (pat < patend) {
1147 if (pat[0] != '#') return pat[0];
1149 pat = (const char *) memchr(pat, '\n', patend-pat);
1157 =for apidoc unpack_str
1159 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1160 and ocnt are not used. This call should not be used, use unpackstring instead.
1165 Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
1168 PERL_UNUSED_ARG(strbeg);
1169 PERL_UNUSED_ARG(new_s);
1170 PERL_UNUSED_ARG(ocnt);
1172 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1173 else if (need_utf8(pat, patend)) {
1174 /* We probably should try to avoid this in case a scalar context call
1175 wouldn't get to the "U0" */
1176 STRLEN len = strend - s;
1177 s = (char *) bytes_to_utf8((U8 *) s, &len);
1180 flags |= FLAG_DO_UTF8;
1183 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1184 flags |= FLAG_PARSE_UTF8;
1186 TEMPSYM_INIT(&sym, pat, patend, flags);
1188 return unpack_rec(&sym, s, s, strend, NULL );
1192 =for apidoc unpackstring
1194 The engine implementing unpack() Perl function. C<unpackstring> puts the
1195 extracted list items on the stack and returns the number of elements.
1196 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1201 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1205 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1206 else if (need_utf8(pat, patend)) {
1207 /* We probably should try to avoid this in case a scalar context call
1208 wouldn't get to the "U0" */
1209 STRLEN len = strend - s;
1210 s = (char *) bytes_to_utf8((U8 *) s, &len);
1213 flags |= FLAG_DO_UTF8;
1216 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1217 flags |= FLAG_PARSE_UTF8;
1219 TEMPSYM_INIT(&sym, pat, patend, flags);
1221 return unpack_rec(&sym, s, s, strend, NULL );
1226 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1230 const I32 start_sp_offset = SP - PL_stack_base;
1236 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1237 bool beyond = FALSE;
1238 bool explicit_length;
1239 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1240 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1241 symptr->strbeg = s - strbeg;
1243 while (next_symbol(symptr)) {
1246 I32 datumtype = symptr->code;
1247 /* do first one only unless in list context
1248 / is implemented by unpacking the count, then popping it from the
1249 stack, so must check that we're not in the middle of a / */
1250 if ( unpack_only_one
1251 && (SP - PL_stack_base == start_sp_offset + 1)
1252 && (datumtype != '/') ) /* XXX can this be omitted */
1255 switch (howlen = symptr->howlen) {
1257 len = strend - strbeg; /* long enough */
1260 /* e_no_len and e_number */
1261 len = symptr->length;
1265 explicit_length = TRUE;
1267 beyond = s >= strend;
1269 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1271 /* props nonzero means we can process this letter. */
1272 const long size = props & PACK_SIZE_MASK;
1273 const long howmany = (strend - s) / size;
1277 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1278 if (len && unpack_only_one) len = 1;
1284 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1286 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1289 if (howlen == e_no_len)
1290 len = 16; /* len is not specified */
1298 tempsym_t savsym = *symptr;
1299 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1300 symptr->flags |= group_modifiers;
1301 symptr->patend = savsym.grpend;
1302 symptr->previous = &savsym;
1306 symptr->patptr = savsym.grpbeg;
1307 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1308 else symptr->flags &= ~FLAG_PARSE_UTF8;
1309 unpack_rec(symptr, s, strbeg, strend, &s);
1310 if (s == strend && savsym.howlen == e_star)
1311 break; /* No way to continue */
1314 savsym.flags = symptr->flags & ~group_modifiers;
1318 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1319 case '.' | TYPE_IS_SHRIEKING:
1324 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1325 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1326 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1327 const bool u8 = utf8;
1329 if (howlen == e_star) from = strbeg;
1330 else if (len <= 0) from = s;
1332 tempsym_t *group = symptr;
1334 while (--len && group) group = group->previous;
1335 from = group ? strbeg + group->strbeg : strbeg;
1338 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1339 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1340 XPUSHs(sv_2mortal(sv));
1343 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1344 case '@' | TYPE_IS_SHRIEKING:
1347 s = strbeg + symptr->strbeg;
1348 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1349 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1350 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1356 Perl_croak(aTHX_ "'@' outside of string in unpack");
1361 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1364 Perl_croak(aTHX_ "'@' outside of string in unpack");
1368 case 'X' | TYPE_IS_SHRIEKING:
1369 if (!len) /* Avoid division by 0 */
1372 const char *hop, *last;
1374 hop = last = strbeg;
1376 hop += UTF8SKIP(hop);
1383 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1387 len = (s - strbeg) % len;
1393 Perl_croak(aTHX_ "'X' outside of string in unpack");
1394 while (--s, UTF8_IS_CONTINUATION(*s)) {
1396 Perl_croak(aTHX_ "'X' outside of string in unpack");
1401 if (len > s - strbeg)
1402 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1406 case 'x' | TYPE_IS_SHRIEKING: {
1408 if (!len) /* Avoid division by 0 */
1410 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1411 else ai32 = (s - strbeg) % len;
1412 if (ai32 == 0) break;
1420 Perl_croak(aTHX_ "'x' outside of string in unpack");
1425 if (len > strend - s)
1426 Perl_croak(aTHX_ "'x' outside of string in unpack");
1431 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1437 /* Preliminary length estimate is assumed done in 'W' */
1438 if (len > strend - s) len = strend - s;
1444 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1445 if (hop >= strend) {
1447 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1452 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1454 } else if (len > strend - s)
1457 if (datumtype == 'Z') {
1458 /* 'Z' strips stuff after first null */
1459 const char *ptr, *end;
1461 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1462 sv = newSVpvn(s, ptr-s);
1463 if (howlen == e_star) /* exact for 'Z*' */
1464 len = ptr-s + (ptr != strend ? 1 : 0);
1465 } else if (datumtype == 'A') {
1466 /* 'A' strips both nulls and spaces */
1468 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1469 for (ptr = s+len-1; ptr >= s; ptr--)
1470 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1471 !is_utf8_space((U8 *) ptr)) break;
1472 if (ptr >= s) ptr += UTF8SKIP(ptr);
1475 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1477 for (ptr = s+len-1; ptr >= s; ptr--)
1478 if (*ptr != 0 && !isSPACE(*ptr)) break;
1481 sv = newSVpvn(s, ptr-s);
1482 } else sv = newSVpvn(s, len);
1486 /* Undo any upgrade done due to need_utf8() */
1487 if (!(symptr->flags & FLAG_WAS_UTF8))
1488 sv_utf8_downgrade(sv, 0);
1490 XPUSHs(sv_2mortal(sv));
1496 if (howlen == e_star || len > (strend - s) * 8)
1497 len = (strend - s) * 8;
1501 Newxz(PL_bitcount, 256, char);
1502 for (bits = 1; bits < 256; bits++) {
1503 if (bits & 1) PL_bitcount[bits]++;
1504 if (bits & 2) PL_bitcount[bits]++;
1505 if (bits & 4) PL_bitcount[bits]++;
1506 if (bits & 8) PL_bitcount[bits]++;
1507 if (bits & 16) PL_bitcount[bits]++;
1508 if (bits & 32) PL_bitcount[bits]++;
1509 if (bits & 64) PL_bitcount[bits]++;
1510 if (bits & 128) PL_bitcount[bits]++;
1514 while (len >= 8 && s < strend) {
1515 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1520 cuv += PL_bitcount[*(U8 *)s++];
1523 if (len && s < strend) {
1525 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1526 if (datumtype == 'b')
1528 if (bits & 1) cuv++;
1533 if (bits & 0x80) cuv++;
1540 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1543 if (datumtype == 'b') {
1545 const I32 ai32 = len;
1546 for (len = 0; len < ai32; len++) {
1547 if (len & 7) bits >>= 1;
1549 if (s >= strend) break;
1550 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1551 } else bits = *(U8 *) s++;
1552 *str++ = bits & 1 ? '1' : '0';
1556 const I32 ai32 = len;
1557 for (len = 0; len < ai32; len++) {
1558 if (len & 7) bits <<= 1;
1560 if (s >= strend) break;
1561 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1562 } else bits = *(U8 *) s++;
1563 *str++ = bits & 0x80 ? '1' : '0';
1567 SvCUR_set(sv, str - SvPVX_const(sv));
1574 /* Preliminary length estimate, acceptable for utf8 too */
1575 if (howlen == e_star || len > (strend - s) * 2)
1576 len = (strend - s) * 2;
1577 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1580 if (datumtype == 'h') {
1583 for (len = 0; len < ai32; len++) {
1584 if (len & 1) bits >>= 4;
1586 if (s >= strend) break;
1587 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1588 } else bits = * (U8 *) s++;
1589 *str++ = PL_hexdigit[bits & 15];
1593 const I32 ai32 = len;
1594 for (len = 0; len < ai32; len++) {
1595 if (len & 1) bits <<= 4;
1597 if (s >= strend) break;
1598 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1599 } else bits = *(U8 *) s++;
1600 *str++ = PL_hexdigit[(bits >> 4) & 15];
1604 SvCUR_set(sv, str - SvPVX_const(sv));
1610 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1611 if (aint >= 128) /* fake up signed chars */
1614 PUSHs(sv_2mortal(newSViv((IV)aint)));
1615 else if (checksum > bits_in_uv)
1616 cdouble += (NV)aint;
1625 if (explicit_length && datumtype == 'C')
1626 /* Switch to "character" mode */
1627 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1630 if (datumtype == 'C' ?
1631 (symptr->flags & FLAG_DO_UTF8) &&
1632 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1633 while (len-- > 0 && s < strend) {
1635 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1636 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1637 if (retlen == (STRLEN) -1 || retlen == 0)
1638 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1641 PUSHs(sv_2mortal(newSVuv((UV) val)));
1642 else if (checksum > bits_in_uv)
1643 cdouble += (NV) val;
1647 } else if (!checksum)
1649 const U8 ch = *(U8 *) s++;
1650 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1652 else if (checksum > bits_in_uv)
1653 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1655 while (len-- > 0) cuv += *(U8 *) s++;
1659 if (explicit_length) {
1660 /* Switch to "bytes in UTF-8" mode */
1661 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1663 /* Should be impossible due to the need_utf8() test */
1664 Perl_croak(aTHX_ "U0 mode on a byte string");
1668 if (len > strend - s) len = strend - s;
1670 if (len && unpack_only_one) len = 1;
1674 while (len-- > 0 && s < strend) {
1678 U8 result[UTF8_MAXLEN];
1679 const char *ptr = s;
1681 /* Bug: warns about bad utf8 even if we are short on bytes
1682 and will break out of the loop */
1683 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1686 len = UTF8SKIP(result);
1687 if (!uni_to_bytes(aTHX_ &ptr, strend,
1688 (char *) &result[1], len-1, 'U')) break;
1689 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1692 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1693 if (retlen == (STRLEN) -1 || retlen == 0)
1694 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1698 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1699 else if (checksum > bits_in_uv)
1700 cdouble += (NV) auv;
1705 case 's' | TYPE_IS_SHRIEKING:
1706 #if SHORTSIZE != SIZE16
1709 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1710 DO_BO_UNPACK(ashort, s);
1712 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1713 else if (checksum > bits_in_uv)
1714 cdouble += (NV)ashort;
1726 #if U16SIZE > SIZE16
1729 SHIFT16(utf8, s, strend, &ai16, datumtype);
1730 DO_BO_UNPACK(ai16, 16);
1731 #if U16SIZE > SIZE16
1736 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1737 else if (checksum > bits_in_uv)
1738 cdouble += (NV)ai16;
1743 case 'S' | TYPE_IS_SHRIEKING:
1744 #if SHORTSIZE != SIZE16
1746 unsigned short aushort;
1747 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1748 DO_BO_UNPACK(aushort, s);
1750 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1751 else if (checksum > bits_in_uv)
1752 cdouble += (NV)aushort;
1765 #if U16SIZE > SIZE16
1768 SHIFT16(utf8, s, strend, &au16, datumtype);
1769 DO_BO_UNPACK(au16, 16);
1771 if (datumtype == 'n')
1772 au16 = PerlSock_ntohs(au16);
1775 if (datumtype == 'v')
1779 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1780 else if (checksum > bits_in_uv)
1781 cdouble += (NV) au16;
1786 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1787 case 'v' | TYPE_IS_SHRIEKING:
1788 case 'n' | TYPE_IS_SHRIEKING:
1791 # if U16SIZE > SIZE16
1794 SHIFT16(utf8, s, strend, &ai16, datumtype);
1796 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1797 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1798 # endif /* HAS_NTOHS */
1800 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1801 ai16 = (I16) vtohs((U16) ai16);
1802 # endif /* HAS_VTOHS */
1804 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1805 else if (checksum > bits_in_uv)
1806 cdouble += (NV) ai16;
1811 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1813 case 'i' | TYPE_IS_SHRIEKING:
1816 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1817 DO_BO_UNPACK(aint, i);
1819 PUSHs(sv_2mortal(newSViv((IV)aint)));
1820 else if (checksum > bits_in_uv)
1821 cdouble += (NV)aint;
1827 case 'I' | TYPE_IS_SHRIEKING:
1830 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1831 DO_BO_UNPACK(auint, i);
1833 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1834 else if (checksum > bits_in_uv)
1835 cdouble += (NV)auint;
1843 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1844 #if IVSIZE == INTSIZE
1845 DO_BO_UNPACK(aiv, i);
1846 #elif IVSIZE == LONGSIZE
1847 DO_BO_UNPACK(aiv, l);
1848 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1849 DO_BO_UNPACK(aiv, 64);
1851 Perl_croak(aTHX_ "'j' not supported on this platform");
1854 PUSHs(sv_2mortal(newSViv(aiv)));
1855 else if (checksum > bits_in_uv)
1864 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1865 #if IVSIZE == INTSIZE
1866 DO_BO_UNPACK(auv, i);
1867 #elif IVSIZE == LONGSIZE
1868 DO_BO_UNPACK(auv, l);
1869 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1870 DO_BO_UNPACK(auv, 64);
1872 Perl_croak(aTHX_ "'J' not supported on this platform");
1875 PUSHs(sv_2mortal(newSVuv(auv)));
1876 else if (checksum > bits_in_uv)
1882 case 'l' | TYPE_IS_SHRIEKING:
1883 #if LONGSIZE != SIZE32
1886 SHIFT_VAR(utf8, s, strend, along, datumtype);
1887 DO_BO_UNPACK(along, l);
1889 PUSHs(sv_2mortal(newSViv((IV)along)));
1890 else if (checksum > bits_in_uv)
1891 cdouble += (NV)along;
1902 #if U32SIZE > SIZE32
1905 SHIFT32(utf8, s, strend, &ai32, datumtype);
1906 DO_BO_UNPACK(ai32, 32);
1907 #if U32SIZE > SIZE32
1908 if (ai32 > 2147483647) ai32 -= 4294967296;
1911 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1912 else if (checksum > bits_in_uv)
1913 cdouble += (NV)ai32;
1918 case 'L' | TYPE_IS_SHRIEKING:
1919 #if LONGSIZE != SIZE32
1921 unsigned long aulong;
1922 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1923 DO_BO_UNPACK(aulong, l);
1925 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1926 else if (checksum > bits_in_uv)
1927 cdouble += (NV)aulong;
1940 #if U32SIZE > SIZE32
1943 SHIFT32(utf8, s, strend, &au32, datumtype);
1944 DO_BO_UNPACK(au32, 32);
1946 if (datumtype == 'N')
1947 au32 = PerlSock_ntohl(au32);
1950 if (datumtype == 'V')
1954 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1955 else if (checksum > bits_in_uv)
1956 cdouble += (NV)au32;
1961 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1962 case 'V' | TYPE_IS_SHRIEKING:
1963 case 'N' | TYPE_IS_SHRIEKING:
1966 # if U32SIZE > SIZE32
1969 SHIFT32(utf8, s, strend, &ai32, datumtype);
1971 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1972 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1975 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1976 ai32 = (I32)vtohl((U32)ai32);
1979 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1980 else if (checksum > bits_in_uv)
1981 cdouble += (NV)ai32;
1986 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1990 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1991 DO_BO_UNPACK_PC(aptr);
1992 /* newSVpv generates undef if aptr is NULL */
1993 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
2001 while (len > 0 && s < strend) {
2003 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2004 auv = (auv << 7) | (ch & 0x7f);
2005 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2008 PUSHs(sv_2mortal(newSVuv(auv)));
2013 if (++bytes >= sizeof(UV)) { /* promote to string */
2016 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2017 while (s < strend) {
2018 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2019 sv = mul128(sv, (U8)(ch & 0x7f));
2025 t = SvPV_nolen_const(sv);
2029 PUSHs(sv_2mortal(sv));
2034 if ((s >= strend) && bytes)
2035 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2039 if (symptr->howlen == e_star)
2040 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2042 if (s + sizeof(char*) <= strend) {
2044 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2045 DO_BO_UNPACK_PC(aptr);
2046 /* newSVpvn generates undef if aptr is NULL */
2047 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2054 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2055 DO_BO_UNPACK(aquad, 64);
2057 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2058 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2059 else if (checksum > bits_in_uv)
2060 cdouble += (NV)aquad;
2068 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2069 DO_BO_UNPACK(auquad, 64);
2071 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2072 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2073 else if (checksum > bits_in_uv)
2074 cdouble += (NV)auquad;
2079 #endif /* HAS_QUAD */
2080 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2084 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2085 DO_BO_UNPACK_N(afloat, float);
2087 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2095 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2096 DO_BO_UNPACK_N(adouble, double);
2098 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2106 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2107 DO_BO_UNPACK_N(anv, NV);
2109 PUSHs(sv_2mortal(newSVnv(anv)));
2114 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2117 long double aldouble;
2118 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2119 DO_BO_UNPACK_N(aldouble, long double);
2121 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2123 cdouble += aldouble;
2129 * Initialise the decode mapping. By using a table driven
2130 * algorithm, the code will be character-set independent
2131 * (and just as fast as doing character arithmetic)
2133 if (PL_uudmap['M'] == 0) {
2136 for (i = 0; i < sizeof(PL_uuemap); ++i)
2137 PL_uudmap[(U8)PL_uuemap[i]] = i;
2139 * Because ' ' and '`' map to the same value,
2140 * we need to decode them both the same.
2145 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2146 sv = sv_2mortal(NEWSV(42, l));
2147 if (l) SvPOK_on(sv);
2150 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2156 next_uni_uu(aTHX_ &s, strend, &a);
2157 next_uni_uu(aTHX_ &s, strend, &b);
2158 next_uni_uu(aTHX_ &s, strend, &c);
2159 next_uni_uu(aTHX_ &s, strend, &d);
2160 hunk[0] = (char)((a << 2) | (b >> 4));
2161 hunk[1] = (char)((b << 4) | (c >> 2));
2162 hunk[2] = (char)((c << 6) | d);
2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2171 /* possible checksum byte */
2172 const char *skip = s+UTF8SKIP(s);
2173 if (skip < strend && *skip == '\n')
2179 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2184 len = PL_uudmap[*(U8*)s++] & 077;
2186 if (s < strend && ISUUCHAR(*s))
2187 a = PL_uudmap[*(U8*)s++] & 077;
2190 if (s < strend && ISUUCHAR(*s))
2191 b = PL_uudmap[*(U8*)s++] & 077;
2194 if (s < strend && ISUUCHAR(*s))
2195 c = PL_uudmap[*(U8*)s++] & 077;
2198 if (s < strend && ISUUCHAR(*s))
2199 d = PL_uudmap[*(U8*)s++] & 077;
2202 hunk[0] = (char)((a << 2) | (b >> 4));
2203 hunk[1] = (char)((b << 4) | (c >> 2));
2204 hunk[2] = (char)((c << 6) | d);
2205 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2210 else /* possible checksum byte */
2211 if (s + 1 < strend && s[1] == '\n')
2220 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2221 (checksum > bits_in_uv &&
2222 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2225 anv = (NV) (1 << (checksum & 15));
2226 while (checksum >= 16) {
2230 while (cdouble < 0.0)
2232 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2233 sv = newSVnv(cdouble);
2236 if (checksum < bits_in_uv) {
2237 UV mask = ((UV)1 << checksum) - 1;
2242 XPUSHs(sv_2mortal(sv));
2246 if (symptr->flags & FLAG_SLASH){
2247 if (SP - PL_stack_base - start_sp_offset <= 0)
2248 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2249 if( next_symbol(symptr) ){
2250 if( symptr->howlen == e_number )
2251 Perl_croak(aTHX_ "Count after length/code in unpack" );
2253 /* ...end of char buffer then no decent length available */
2254 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2256 /* take top of stack (hope it's numeric) */
2259 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2262 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2264 datumtype = symptr->code;
2265 explicit_length = FALSE;
2273 return SP - PL_stack_base - start_sp_offset;
2281 I32 gimme = GIMME_V;
2284 const char *pat = SvPV_const(left, llen);
2285 const char *s = SvPV_const(right, rlen);
2286 const char *strend = s + rlen;
2287 const char *patend = pat + llen;
2291 cnt = unpackstring(pat, patend, s, strend,
2292 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2293 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2296 if ( !cnt && gimme == G_SCALAR )
2297 PUSHs(&PL_sv_undef);
2302 doencodes(U8 *h, const char *s, I32 len)
2304 *h++ = PL_uuemap[len];
2306 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2307 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2308 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2309 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2314 const char r = (len > 1 ? s[1] : '\0');
2315 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2316 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2317 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2318 *h++ = PL_uuemap[0];
2325 S_is_an_int(pTHX_ const char *s, STRLEN l)
2327 SV *result = newSVpvn(s, l);
2328 char *const result_c = SvPV_nolen(result); /* convenience */
2329 char *out = result_c;
2339 SvREFCNT_dec(result);
2362 SvREFCNT_dec(result);
2368 SvCUR_set(result, out - result_c);
2372 /* pnum must be '\0' terminated */
2374 S_div128(pTHX_ SV *pnum, bool *done)
2377 char * const s = SvPV(pnum, len);
2383 const int i = m * 10 + (*t - '0');
2384 const int r = (i >> 7); /* r < 10 */
2392 SvCUR_set(pnum, (STRLEN) (t - s));
2397 =for apidoc pack_cat
2399 The engine implementing pack() Perl function. Note: parameters next_in_list and
2400 flags are not used. This call should not be used; use packlist instead.
2406 Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2409 PERL_UNUSED_ARG(next_in_list);
2410 PERL_UNUSED_ARG(flags);
2412 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2414 (void)pack_rec( cat, &sym, beglist, endlist );
2419 =for apidoc packlist
2421 The engine implementing pack() Perl function.
2427 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2433 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2435 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2436 Also make sure any UTF8 flag is loaded */
2437 SvPV_force(cat, no_len);
2439 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2441 (void)pack_rec( cat, &sym, beglist, endlist );
2444 /* like sv_utf8_upgrade, but also repoint the group start markers */
2446 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2449 const char *from_ptr, *from_start, *from_end, **marks, **m;
2450 char *to_start, *to_ptr;
2452 if (SvUTF8(sv)) return;
2454 from_start = SvPVX_const(sv);
2455 from_end = from_start + SvCUR(sv);
2456 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2457 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2458 if (from_ptr == from_end) {
2459 /* Simple case: no character needs to be changed */
2464 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2465 Newx(to_start, len, char);
2466 Copy(from_start, to_start, from_ptr-from_start, char);
2467 to_ptr = to_start + (from_ptr-from_start);
2469 Newx(marks, sym_ptr->level+2, const char *);
2470 for (group=sym_ptr; group; group = group->previous)
2471 marks[group->level] = from_start + group->strbeg;
2472 marks[sym_ptr->level+1] = from_end+1;
2473 for (m = marks; *m < from_ptr; m++)
2474 *m = to_start + (*m-from_start);
2476 for (;from_ptr < from_end; from_ptr++) {
2477 while (*m == from_ptr) *m++ = to_ptr;
2478 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2482 while (*m == from_ptr) *m++ = to_ptr;
2483 if (m != marks + sym_ptr->level+1) {
2486 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2488 for (group=sym_ptr; group; group = group->previous)
2489 group->strbeg = marks[group->level] - to_start;
2494 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2495 from_start -= SvIVX(sv);
2498 SvFLAGS(sv) &= ~SVf_OOK;
2501 Safefree(from_start);
2502 SvPV_set(sv, to_start);
2503 SvCUR_set(sv, to_ptr - to_start);
2508 /* Exponential string grower. Makes string extension effectively O(n)
2509 needed says how many extra bytes we need (not counting the final '\0')
2510 Only grows the string if there is an actual lack of space
2513 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2514 const STRLEN cur = SvCUR(sv);
2515 const STRLEN len = SvLEN(sv);
2517 if (len - cur > needed) return SvPVX(sv);
2518 extend = needed > len ? needed : len;
2519 return SvGROW(sv, len+extend+1);
2524 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2527 tempsym_t lookahead;
2528 I32 items = endlist - beglist;
2529 bool found = next_symbol(symptr);
2530 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2531 bool warn_utf8 = ckWARN(WARN_UTF8);
2533 if (symptr->level == 0 && found && symptr->code == 'U') {
2534 marked_upgrade(aTHX_ cat, symptr);
2535 symptr->flags |= FLAG_DO_UTF8;
2538 symptr->strbeg = SvCUR(cat);
2544 SV *lengthcode = Nullsv;
2545 I32 datumtype = symptr->code;
2546 howlen_t howlen = symptr->howlen;
2547 char *start = SvPVX(cat);
2548 char *cur = start + SvCUR(cat);
2550 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2554 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2558 /* e_no_len and e_number */
2559 len = symptr->length;
2564 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2566 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2567 /* We can process this letter. */
2568 STRLEN size = props & PACK_SIZE_MASK;
2569 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2573 /* Look ahead for next symbol. Do we have code/code? */
2574 lookahead = *symptr;
2575 found = next_symbol(&lookahead);
2576 if (symptr->flags & FLAG_SLASH) {
2578 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2579 if (strchr("aAZ", lookahead.code)) {
2580 if (lookahead.howlen == e_number) count = lookahead.length;
2583 count = DO_UTF8(*beglist) ?
2584 sv_len_utf8(*beglist) : sv_len(*beglist);
2586 if (lookahead.code == 'Z') count++;
2589 if (lookahead.howlen == e_number && lookahead.length < items)
2590 count = lookahead.length;
2593 lookahead.howlen = e_number;
2594 lookahead.length = count;
2595 lengthcode = sv_2mortal(newSViv(count));
2598 /* Code inside the switch must take care to properly update
2599 cat (CUR length and '\0' termination) if it updated *cur and
2600 doesn't simply leave using break */
2601 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2603 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2604 (int) TYPE_NO_MODIFIERS(datumtype));
2606 Perl_croak(aTHX_ "'%%' may not be used in pack");
2609 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2610 case '.' | TYPE_IS_SHRIEKING:
2613 if (howlen == e_star) from = start;
2614 else if (len == 0) from = cur;
2616 tempsym_t *group = symptr;
2618 while (--len && group) group = group->previous;
2619 from = group ? start + group->strbeg : start;
2622 len = SvIV(fromstr);
2624 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2625 case '@' | TYPE_IS_SHRIEKING:
2628 from = start + symptr->strbeg;
2630 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2631 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2632 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2636 while (len && from < cur) {
2637 from += UTF8SKIP(from);
2641 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2643 /* Here we know from == cur */
2645 GROWING(0, cat, start, cur, len);
2646 Zero(cur, len, char);
2648 } else if (from < cur) {
2651 } else goto no_change;
2659 if (len > 0) goto grow;
2660 if (len == 0) goto no_change;
2667 tempsym_t savsym = *symptr;
2668 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2669 symptr->flags |= group_modifiers;
2670 symptr->patend = savsym.grpend;
2672 symptr->previous = &lookahead;
2675 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2676 else symptr->flags &= ~FLAG_PARSE_UTF8;
2677 was_utf8 = SvUTF8(cat);
2678 symptr->patptr = savsym.grpbeg;
2679 beglist = pack_rec(cat, symptr, beglist, endlist);
2680 if (SvUTF8(cat) != was_utf8)
2681 /* This had better be an upgrade while in utf8==0 mode */
2684 if (savsym.howlen == e_star && beglist == endlist)
2685 break; /* No way to continue */
2687 lookahead.flags = symptr->flags & ~group_modifiers;
2690 case 'X' | TYPE_IS_SHRIEKING:
2691 if (!len) /* Avoid division by 0 */
2698 hop += UTF8SKIP(hop);
2705 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2709 len = (cur-start) % len;
2713 if (len < 1) goto no_change;
2717 Perl_croak(aTHX_ "'%c' outside of string in pack",
2718 (int) TYPE_NO_MODIFIERS(datumtype));
2719 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2721 Perl_croak(aTHX_ "'%c' outside of string in pack",
2722 (int) TYPE_NO_MODIFIERS(datumtype));
2728 if (cur - start < len)
2729 Perl_croak(aTHX_ "'%c' outside of string in pack",
2730 (int) TYPE_NO_MODIFIERS(datumtype));
2733 if (cur < start+symptr->strbeg) {
2734 /* Make sure group starts don't point into the void */
2736 const STRLEN length = cur-start;
2737 for (group = symptr;
2738 group && length < group->strbeg;
2739 group = group->previous) group->strbeg = length;
2740 lookahead.strbeg = length;
2743 case 'x' | TYPE_IS_SHRIEKING: {
2745 if (!len) /* Avoid division by 0 */
2747 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2748 else ai32 = (cur - start) % len;
2749 if (ai32 == 0) goto no_change;
2761 aptr = SvPV_const(fromstr, fromlen);
2762 if (DO_UTF8(fromstr)) {
2763 const char *end, *s;
2765 if (!utf8 && !SvUTF8(cat)) {
2766 marked_upgrade(aTHX_ cat, symptr);
2767 lookahead.flags |= FLAG_DO_UTF8;
2768 lookahead.strbeg = symptr->strbeg;
2771 cur = start + SvCUR(cat);
2773 if (howlen == e_star) {
2774 if (utf8) goto string_copy;
2778 end = aptr + fromlen;
2779 fromlen = datumtype == 'Z' ? len-1 : len;
2780 while ((I32) fromlen > 0 && s < end) {
2785 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2788 if (datumtype == 'Z') len++;
2794 fromlen = len - fromlen;
2795 if (datumtype == 'Z') fromlen--;
2796 if (howlen == e_star) {
2798 if (datumtype == 'Z') len++;
2800 GROWING(0, cat, start, cur, len);
2801 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2802 datumtype | TYPE_IS_PACK))
2803 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2807 if (howlen == e_star) {
2809 if (datumtype == 'Z') len++;
2811 if (len <= (I32) fromlen) {
2813 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2815 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2817 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2818 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2820 while (fromlen > 0) {
2821 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2827 if (howlen == e_star) {
2829 if (datumtype == 'Z') len++;
2831 if (len <= (I32) fromlen) {
2833 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2835 GROWING(0, cat, start, cur, len);
2836 Copy(aptr, cur, fromlen, char);
2840 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2846 const char *str, *end;
2853 str = SvPV_const(fromstr, fromlen);
2854 end = str + fromlen;
2855 if (DO_UTF8(fromstr)) {
2857 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2859 utf8_source = FALSE;
2860 utf8_flags = 0; /* Unused, but keep compilers happy */
2862 if (howlen == e_star) len = fromlen;
2863 field_len = (len+7)/8;
2864 GROWING(utf8, cat, start, cur, field_len);
2865 if (len > (I32)fromlen) len = fromlen;
2868 if (datumtype == 'B')
2872 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2874 } else bits |= *str++ & 1;
2875 if (l & 7) bits <<= 1;
2877 PUSH_BYTE(utf8, cur, bits);
2882 /* datumtype == 'b' */
2886 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2887 if (val & 1) bits |= 0x80;
2888 } else if (*str++ & 1)
2890 if (l & 7) bits >>= 1;
2892 PUSH_BYTE(utf8, cur, bits);
2898 if (datumtype == 'B')
2899 bits <<= 7 - (l & 7);
2901 bits >>= 7 - (l & 7);
2902 PUSH_BYTE(utf8, cur, bits);
2905 /* Determine how many chars are left in the requested field */
2907 if (howlen == e_star) field_len = 0;
2908 else field_len -= l;
2909 Zero(cur, field_len, char);
2915 const char *str, *end;
2922 str = SvPV_const(fromstr, fromlen);
2923 end = str + fromlen;
2924 if (DO_UTF8(fromstr)) {
2926 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2928 utf8_source = FALSE;
2929 utf8_flags = 0; /* Unused, but keep compilers happy */
2931 if (howlen == e_star) len = fromlen;
2932 field_len = (len+1)/2;
2933 GROWING(utf8, cat, start, cur, field_len);
2934 if (!utf8 && len > (I32)fromlen) len = fromlen;
2937 if (datumtype == 'H')
2941 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2942 if (val < 256 && isALPHA(val))
2943 bits |= (val + 9) & 0xf;
2946 } else if (isALPHA(*str))
2947 bits |= (*str++ + 9) & 0xf;
2949 bits |= *str++ & 0xf;
2950 if (l & 1) bits <<= 4;
2952 PUSH_BYTE(utf8, cur, bits);
2960 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2961 if (val < 256 && isALPHA(val))
2962 bits |= ((val + 9) & 0xf) << 4;
2964 bits |= (val & 0xf) << 4;
2965 } else if (isALPHA(*str))
2966 bits |= ((*str++ + 9) & 0xf) << 4;
2968 bits |= (*str++ & 0xf) << 4;
2969 if (l & 1) bits >>= 4;
2971 PUSH_BYTE(utf8, cur, bits);
2977 PUSH_BYTE(utf8, cur, bits);
2980 /* Determine how many chars are left in the requested field */
2982 if (howlen == e_star) field_len = 0;
2983 else field_len -= l;
2984 Zero(cur, field_len, char);
2992 aiv = SvIV(fromstr);
2993 if ((-128 > aiv || aiv > 127) &&
2995 Perl_warner(aTHX_ packWARN(WARN_PACK),
2996 "Character in 'c' format wrapped in pack");
2997 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
3002 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
3005 GROWING(0, cat, start, cur, len);
3009 aiv = SvIV(fromstr);
3010 if ((0 > aiv || aiv > 0xff) &&
3012 Perl_warner(aTHX_ packWARN(WARN_PACK),
3013 "Character in 'C' format wrapped in pack");
3014 *cur++ = (char)(aiv & 0xff);
3019 U8 in_bytes = IN_BYTES;
3021 end = start+SvLEN(cat)-1;
3022 if (utf8) end -= UTF8_MAXLEN-1;
3026 auv = SvUV(fromstr);
3027 if (in_bytes) auv = auv % 0x100;
3032 SvCUR_set(cat, cur - start);
3034 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3035 end = start+SvLEN(cat)-UTF8_MAXLEN;
3037 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3040 0 : UNICODE_ALLOW_ANY);
3045 SvCUR_set(cat, cur - start);
3046 marked_upgrade(aTHX_ cat, symptr);
3047 lookahead.flags |= FLAG_DO_UTF8;
3048 lookahead.strbeg = symptr->strbeg;
3051 cur = start + SvCUR(cat);
3052 end = start+SvLEN(cat)-UTF8_MAXLEN;
3055 if (ckWARN(WARN_PACK))
3056 Perl_warner(aTHX_ packWARN(WARN_PACK),
3057 "Character in 'W' format wrapped in pack");
3062 SvCUR_set(cat, cur - start);
3063 GROWING(0, cat, start, cur, len+1);
3064 end = start+SvLEN(cat)-1;
3066 *(U8 *) cur++ = (U8)auv;
3075 if (!(symptr->flags & FLAG_DO_UTF8)) {
3076 marked_upgrade(aTHX_ cat, symptr);
3077 lookahead.flags |= FLAG_DO_UTF8;
3078 lookahead.strbeg = symptr->strbeg;
3084 end = start+SvLEN(cat);
3085 if (!utf8) end -= UTF8_MAXLEN;
3089 auv = SvUV(fromstr);
3091 U8 buffer[UTF8_MAXLEN], *endb;
3092 endb = uvuni_to_utf8_flags(buffer, auv,
3094 0 : UNICODE_ALLOW_ANY);
3095 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3097 SvCUR_set(cat, cur - start);
3098 GROWING(0, cat, start, cur,
3099 len+(endb-buffer)*UTF8_EXPAND);
3100 end = start+SvLEN(cat);
3102 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3106 SvCUR_set(cat, cur - start);
3107 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3108 end = start+SvLEN(cat)-UTF8_MAXLEN;
3110 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3112 0 : UNICODE_ALLOW_ANY);
3117 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3123 anv = SvNV(fromstr);
3125 /* VOS does not automatically map a floating-point overflow
3126 during conversion from double to float into infinity, so we
3127 do it by hand. This code should either be generalized for
3128 any OS that needs it, or removed if and when VOS implements
3129 posix-976 (suggestion to support mapping to infinity).
3130 Paul.Green@stratus.com 02-04-02. */
3132 afloat = _float_constants[0]; /* single prec. inf. */
3133 else if (anv < -FLT_MAX)
3134 afloat = _float_constants[0]; /* single prec. inf. */
3135 else afloat = (float) anv;
3137 # if defined(VMS) && !defined(__IEEE_FP)
3138 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3139 * on Alpha; fake it if we don't have them.
3143 else if (anv < -FLT_MAX)
3145 else afloat = (float)anv;
3147 afloat = (float)anv;
3149 #endif /* __VOS__ */
3150 DO_BO_PACK_N(afloat, float);
3151 PUSH_VAR(utf8, cur, afloat);
3159 anv = SvNV(fromstr);
3161 /* VOS does not automatically map a floating-point overflow
3162 during conversion from long double to double into infinity,
3163 so we do it by hand. This code should either be generalized
3164 for any OS that needs it, or removed if and when VOS
3165 implements posix-976 (suggestion to support mapping to
3166 infinity). Paul.Green@stratus.com 02-04-02. */
3168 adouble = _double_constants[0]; /* double prec. inf. */
3169 else if (anv < -DBL_MAX)
3170 adouble = _double_constants[0]; /* double prec. inf. */
3171 else adouble = (double) anv;
3173 # if defined(VMS) && !defined(__IEEE_FP)
3174 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3175 * on Alpha; fake it if we don't have them.
3179 else if (anv < -DBL_MAX)
3181 else adouble = (double)anv;
3183 adouble = (double)anv;
3185 #endif /* __VOS__ */
3186 DO_BO_PACK_N(adouble, double);
3187 PUSH_VAR(utf8, cur, adouble);
3192 Zero(&anv, 1, NV); /* can be long double with unused bits */
3195 anv = SvNV(fromstr);
3196 DO_BO_PACK_N(anv, NV);
3197 PUSH_VAR(utf8, cur, anv);
3201 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3203 long double aldouble;
3204 /* long doubles can have unused bits, which may be nonzero */
3205 Zero(&aldouble, 1, long double);
3208 aldouble = (long double)SvNV(fromstr);
3209 DO_BO_PACK_N(aldouble, long double);
3210 PUSH_VAR(utf8, cur, aldouble);
3215 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3216 case 'n' | TYPE_IS_SHRIEKING:
3222 ai16 = (I16)SvIV(fromstr);
3224 ai16 = PerlSock_htons(ai16);
3226 PUSH16(utf8, cur, &ai16);
3229 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3230 case 'v' | TYPE_IS_SHRIEKING:
3236 ai16 = (I16)SvIV(fromstr);
3240 PUSH16(utf8, cur, &ai16);
3243 case 'S' | TYPE_IS_SHRIEKING:
3244 #if SHORTSIZE != SIZE16
3246 unsigned short aushort;
3248 aushort = SvUV(fromstr);
3249 DO_BO_PACK(aushort, s);
3250 PUSH_VAR(utf8, cur, aushort);
3260 au16 = (U16)SvUV(fromstr);
3261 DO_BO_PACK(au16, 16);
3262 PUSH16(utf8, cur, &au16);
3265 case 's' | TYPE_IS_SHRIEKING:
3266 #if SHORTSIZE != SIZE16
3270 ashort = SvIV(fromstr);
3271 DO_BO_PACK(ashort, s);
3272 PUSH_VAR(utf8, cur, ashort);
3282 ai16 = (I16)SvIV(fromstr);
3283 DO_BO_PACK(ai16, 16);
3284 PUSH16(utf8, cur, &ai16);
3288 case 'I' | TYPE_IS_SHRIEKING:
3292 auint = SvUV(fromstr);
3293 DO_BO_PACK(auint, i);
3294 PUSH_VAR(utf8, cur, auint);
3301 aiv = SvIV(fromstr);
3302 #if IVSIZE == INTSIZE
3304 #elif IVSIZE == LONGSIZE
3306 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3307 DO_BO_PACK(aiv, 64);
3309 Perl_croak(aTHX_ "'j' not supported on this platform");
3311 PUSH_VAR(utf8, cur, aiv);
3318 auv = SvUV(fromstr);
3319 #if UVSIZE == INTSIZE
3321 #elif UVSIZE == LONGSIZE
3323 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3324 DO_BO_PACK(auv, 64);
3326 Perl_croak(aTHX_ "'J' not supported on this platform");
3328 PUSH_VAR(utf8, cur, auv);
3335 anv = SvNV(fromstr);
3339 SvCUR_set(cat, cur - start);
3340 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3343 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3344 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3345 any negative IVs will have already been got by the croak()
3346 above. IOK is untrue for fractions, so we test them
3347 against UV_MAX_P1. */
3348 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3349 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3350 char *in = buf + sizeof(buf);
3351 UV auv = SvUV(fromstr);
3354 *--in = (char)((auv & 0x7f) | 0x80);
3357 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3358 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3359 in, (buf + sizeof(buf)) - in);
3360 } else if (SvPOKp(fromstr))
3362 else if (SvNOKp(fromstr)) {
3363 /* 10**NV_MAX_10_EXP is the largest power of 10
3364 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3365 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3366 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3367 And with that many bytes only Inf can overflow.
3368 Some C compilers are strict about integral constant
3369 expressions so we conservatively divide by a slightly
3370 smaller integer instead of multiplying by the exact
3371 floating-point value.
3373 #ifdef NV_MAX_10_EXP
3374 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3375 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3377 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3378 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3380 char *in = buf + sizeof(buf);
3382 anv = Perl_floor(anv);
3384 const NV next = Perl_floor(anv / 128);
3385 if (in <= buf) /* this cannot happen ;-) */
3386 Perl_croak(aTHX_ "Cannot compress integer in pack");
3387 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3390 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3391 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3392 in, (buf + sizeof(buf)) - in);
3401 /* Copy string and check for compliance */
3402 from = SvPV_const(fromstr, len);
3403 if ((norm = is_an_int(from, len)) == NULL)
3404 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3406 Newx(result, len, char);
3409 while (!done) *--in = div128(norm, &done) | 0x80;
3410 result[len - 1] &= 0x7F; /* clear continue bit */
3411 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3412 in, (result + len) - in);
3414 SvREFCNT_dec(norm); /* free norm */
3419 case 'i' | TYPE_IS_SHRIEKING:
3423 aint = SvIV(fromstr);
3424 DO_BO_PACK(aint, i);
3425 PUSH_VAR(utf8, cur, aint);
3428 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3429 case 'N' | TYPE_IS_SHRIEKING:
3435 au32 = SvUV(fromstr);
3437 au32 = PerlSock_htonl(au32);
3439 PUSH32(utf8, cur, &au32);
3442 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3443 case 'V' | TYPE_IS_SHRIEKING:
3449 au32 = SvUV(fromstr);
3453 PUSH32(utf8, cur, &au32);
3456 case 'L' | TYPE_IS_SHRIEKING:
3457 #if LONGSIZE != SIZE32
3459 unsigned long aulong;
3461 aulong = SvUV(fromstr);
3462 DO_BO_PACK(aulong, l);
3463 PUSH_VAR(utf8, cur, aulong);
3473 au32 = SvUV(fromstr);
3474 DO_BO_PACK(au32, 32);
3475 PUSH32(utf8, cur, &au32);
3478 case 'l' | TYPE_IS_SHRIEKING:
3479 #if LONGSIZE != SIZE32
3483 along = SvIV(fromstr);
3484 DO_BO_PACK(along, l);
3485 PUSH_VAR(utf8, cur, along);
3495 ai32 = SvIV(fromstr);
3496 DO_BO_PACK(ai32, 32);
3497 PUSH32(utf8, cur, &ai32);
3505 auquad = (Uquad_t) SvUV(fromstr);
3506 DO_BO_PACK(auquad, 64);
3507 PUSH_VAR(utf8, cur, auquad);
3514 aquad = (Quad_t)SvIV(fromstr);
3515 DO_BO_PACK(aquad, 64);
3516 PUSH_VAR(utf8, cur, aquad);
3519 #endif /* HAS_QUAD */
3521 len = 1; /* assume SV is correct length */
3522 GROWING(utf8, cat, start, cur, sizeof(char *));
3529 SvGETMAGIC(fromstr);
3530 if (!SvOK(fromstr)) aptr = NULL;
3532 /* XXX better yet, could spirit away the string to
3533 * a safe spot and hang on to it until the result
3534 * of pack() (and all copies of the result) are
3537 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3538 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3539 Perl_warner(aTHX_ packWARN(WARN_PACK),
3540 "Attempt to pack pointer to temporary value");
3542 if (SvPOK(fromstr) || SvNIOK(fromstr))
3543 aptr = SvPV_nomg_const_nolen(fromstr);
3545 aptr = SvPV_force_flags_nolen(fromstr, 0);
3547 DO_BO_PACK_PC(aptr);
3548 PUSH_VAR(utf8, cur, aptr);
3552 const char *aptr, *aend;
3556 if (len <= 2) len = 45;
3557 else len = len / 3 * 3;
3559 if (ckWARN(WARN_PACK))
3560 Perl_warner(aTHX_ packWARN(WARN_PACK),
3561 "Field too wide in 'u' format in pack");
3564 aptr = SvPV_const(fromstr, fromlen);
3565 from_utf8 = DO_UTF8(fromstr);
3567 aend = aptr + fromlen;
3568 fromlen = sv_len_utf8(fromstr);
3569 } else aend = NULL; /* Unused, but keep compilers happy */
3570 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3571 while (fromlen > 0) {
3574 U8 hunk[1+63/3*4+1];
3576 if ((I32)fromlen > len)
3582 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3583 'u' | TYPE_IS_PACK)) {
3585 SvCUR_set(cat, cur - start);
3586 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3588 end = doencodes(hunk, buffer, todo);
3590 end = doencodes(hunk, aptr, todo);
3593 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3600 SvCUR_set(cat, cur - start);
3602 *symptr = lookahead;
3611 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3612 register SV *cat = TARG;
3614 SV *pat_sv = *++MARK;
3615 register const char *pat = SvPV_const(pat_sv, fromlen);
3616 register const char *patend = pat + fromlen;
3619 sv_setpvn(cat, "", 0);
3622 packlist(cat, pat, patend, MARK, SP + 1);
3632 * c-indentation-style: bsd
3634 * indent-tabs-mode: t
3637 * ex: set ts=8 sts=4 sw=4 noet: