3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = e_no_len; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV * const tmpNew = newSVpvs("0000000000");
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
243 # define DO_BO_UNPACK_PC(var)
244 # define DO_BO_PACK_PC(var)
246 #else /* PERL_PACK_CAN_BYTEORDER */
248 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
251 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
253 # define DO_BO_UNPACK(var, type) \
255 switch (TYPE_ENDIANNESS(datumtype)) { \
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
262 # define DO_BO_PACK(var, type) \
264 switch (TYPE_ENDIANNESS(datumtype)) { \
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
271 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: \
275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
277 case TYPE_IS_LITTLE_ENDIAN: \
278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
285 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
287 switch (TYPE_ENDIANNESS(datumtype)) { \
288 case TYPE_IS_BIG_ENDIAN: \
289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
291 case TYPE_IS_LITTLE_ENDIAN: \
292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
299 # define BO_CANT_DOIT(action, type) \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
315 # if PTRSIZE == INTSIZE
316 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
320 # elif PTRSIZE == LONGSIZE
321 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
325 # elif PTRSIZE == IVSIZE
326 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
331 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
333 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
337 # if defined(my_htolen) && defined(my_letohn) && \
338 defined(my_htoben) && defined(my_betohn)
339 # define DO_BO_UNPACK_N(var, type) \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
348 # define DO_BO_PACK_N(var, type) \
350 switch (TYPE_ENDIANNESS(datumtype)) { \
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
357 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
361 #endif /* PERL_PACK_CAN_BYTEORDER */
363 #define PACK_SIZE_CANNOT_CSUM 0x80
364 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
365 #define PACK_SIZE_MASK 0x3F
367 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
370 #if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
374 typedef U8 packprops_t;
377 STATIC const packprops_t packprops[512] = {
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
385 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
386 /* D */ LONG_DOUBLESIZE,
393 /* I */ sizeof(unsigned int),
400 #if defined(HAS_QUAD)
401 /* Q */ sizeof(Uquad_t),
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
415 /* f */ sizeof(float),
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
425 #if defined(HAS_QUAD)
426 /* q */ sizeof(Quad_t),
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
452 /* L */ sizeof(unsigned long),
454 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
460 /* S */ sizeof(unsigned short),
462 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 /* l */ sizeof(long),
473 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
479 /* s */ sizeof(short),
481 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
497 /* EBCDIC (or bust) */
498 STATIC const packprops_t packprops[512] = {
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
512 /* f */ sizeof(float),
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
523 #if defined(HAS_QUAD)
524 /* q */ sizeof(Quad_t),
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
536 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
537 /* D */ LONG_DOUBLESIZE,
544 /* I */ sizeof(unsigned int),
552 #if defined(HAS_QUAD)
553 /* Q */ sizeof(Uquad_t),
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
579 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
587 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
599 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
607 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
618 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
630 if (ckWARN(WARN_UNPACK))
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
633 (int) TYPE_NO_MODIFIERS(datumtype));
640 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
645 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
649 const char *from = *s;
651 const U32 flags = ckWARN(WARN_UTF8) ?
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
659 } else from += retlen;
664 *(U8 *)buf++ = (U8)val;
666 /* We have enough characters for the buffer. Did we have problems ? */
669 /* Rewalk the string fragment while warning */
671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
676 if (from > end) from = end;
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
682 (int) TYPE_NO_MODIFIERS(datumtype),
683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
690 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
700 *out = PL_uudmap[val] & 077;
706 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
707 const U8 * const end = start + len;
709 while (start < end) {
710 const UV uv = NATIVE_TO_ASCII(*start);
711 if (UNI_IS_INVARIANT(uv))
712 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
714 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
715 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
722 #define PUSH_BYTES(utf8, cur, buf, len) \
725 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
727 Copy(buf, cur, len, char); \
732 #define GROWING(utf8, cat, start, cur, in_len) \
734 STRLEN glen = (in_len); \
735 if (utf8) glen *= UTF8_EXPAND; \
736 if ((cur) + glen >= (start) + SvLEN(cat)) { \
737 (start) = sv_exp_grow(cat, glen); \
738 (cur) = (start) + SvCUR(cat); \
742 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
744 const STRLEN glen = (in_len); \
746 if (utf8) gl *= UTF8_EXPAND; \
747 if ((cur) + gl >= (start) + SvLEN(cat)) { \
749 SvCUR_set((cat), (cur) - (start)); \
750 (start) = sv_exp_grow(cat, gl); \
751 (cur) = (start) + SvCUR(cat); \
753 PUSH_BYTES(utf8, cur, buf, glen); \
756 #define PUSH_BYTE(utf8, s, byte) \
759 const U8 au8 = (byte); \
760 (s) = bytes_to_uni(&au8, 1, (s)); \
761 } else *(U8 *)(s)++ = (byte); \
764 /* Only to be used inside a loop (see the break) */
765 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
768 if (str >= end) break; \
769 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
770 if (retlen == (STRLEN) -1 || retlen == 0) { \
772 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
777 static const char *_action( const tempsym_t* symptr )
779 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
782 /* Returns the sizeof() struct described by pat */
784 S_measure_struct(pTHX_ tempsym_t* symptr)
788 while (next_symbol(symptr)) {
792 switch (symptr->howlen) {
794 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
798 /* e_no_len and e_number */
799 len = symptr->length;
803 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
806 /* endianness doesn't influence the size of a type */
807 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
809 Perl_croak(aTHX_ "Invalid type '%c' in %s",
810 (int)TYPE_NO_MODIFIERS(symptr->code),
812 #ifdef PERL_PACK_CAN_SHRIEKSIGN
813 case '.' | TYPE_IS_SHRIEKING:
814 case '@' | TYPE_IS_SHRIEKING:
819 case 'U': /* XXXX Is it correct? */
822 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
823 (int) TYPE_NO_MODIFIERS(symptr->code),
830 tempsym_t savsym = *symptr;
831 symptr->patptr = savsym.grpbeg;
832 symptr->patend = savsym.grpend;
833 /* XXXX Theoretically, we need to measure many times at
834 different positions, since the subexpression may contain
835 alignment commands, but be not of aligned length.
836 Need to detect this and croak(). */
837 size = measure_struct(symptr);
841 case 'X' | TYPE_IS_SHRIEKING:
842 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
844 if (!len) /* Avoid division by 0 */
846 len = total % len; /* Assumed: the start is aligned. */
851 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
853 case 'x' | TYPE_IS_SHRIEKING:
854 if (!len) /* Avoid division by 0 */
856 star = total % len; /* Assumed: the start is aligned. */
857 if (star) /* Other portable ways? */
881 size = sizeof(char*);
891 /* locate matching closing parenthesis or bracket
892 * returns char pointer to char after match, or NULL
895 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
897 while (patptr < patend) {
898 const char c = *patptr++;
905 while (patptr < patend && *patptr != '\n')
909 patptr = group_end(patptr, patend, ')') + 1;
911 patptr = group_end(patptr, patend, ']') + 1;
913 Perl_croak(aTHX_ "No group ending character '%c' found in template",
919 /* Convert unsigned decimal number to binary.
920 * Expects a pointer to the first digit and address of length variable
921 * Advances char pointer to 1st non-digit char and returns number
924 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
926 I32 len = *patptr++ - '0';
927 while (isDIGIT(*patptr)) {
928 if (len >= 0x7FFFFFFF/10)
929 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
930 len = (len * 10) + (*patptr++ - '0');
936 /* The marvellous template parsing routine: Using state stored in *symptr,
937 * locates next template code and count
940 S_next_symbol(pTHX_ tempsym_t* symptr )
942 const char* patptr = symptr->patptr;
943 const char* const patend = symptr->patend;
945 symptr->flags &= ~FLAG_SLASH;
947 while (patptr < patend) {
948 if (isSPACE(*patptr))
950 else if (*patptr == '#') {
952 while (patptr < patend && *patptr != '\n')
957 /* We should have found a template code */
958 I32 code = *patptr++ & 0xFF;
959 U32 inherited_modifiers = 0;
961 if (code == ','){ /* grandfather in commas but with a warning */
962 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
963 symptr->flags |= FLAG_COMMA;
964 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
965 "Invalid type ',' in %s", _action( symptr ) );
970 /* for '(', skip to ')' */
972 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
973 Perl_croak(aTHX_ "()-group starts with a count in %s",
975 symptr->grpbeg = patptr;
976 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
977 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
978 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
982 /* look for group modifiers to inherit */
983 if (TYPE_ENDIANNESS(symptr->flags)) {
984 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
985 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
988 /* look for modifiers */
989 while (patptr < patend) {
994 modifier = TYPE_IS_SHRIEKING;
995 allowed = SHRIEKING_ALLOWED_TYPES;
997 #ifdef PERL_PACK_CAN_BYTEORDER
999 modifier = TYPE_IS_BIG_ENDIAN;
1000 allowed = ENDIANNESS_ALLOWED_TYPES;
1003 modifier = TYPE_IS_LITTLE_ENDIAN;
1004 allowed = ENDIANNESS_ALLOWED_TYPES;
1006 #endif /* PERL_PACK_CAN_BYTEORDER */
1016 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1017 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1018 allowed, _action( symptr ) );
1020 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1021 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1022 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1023 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1024 TYPE_ENDIANNESS_MASK)
1025 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1026 *patptr, _action( symptr ) );
1028 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1029 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1030 "Duplicate modifier '%c' after '%c' in %s",
1031 *patptr, (int) TYPE_NO_MODIFIERS(code),
1032 _action( symptr ) );
1039 /* inherit modifiers */
1040 code |= inherited_modifiers;
1042 /* look for count and/or / */
1043 if (patptr < patend) {
1044 if (isDIGIT(*patptr)) {
1045 patptr = get_num( patptr, &symptr->length );
1046 symptr->howlen = e_number;
1048 } else if (*patptr == '*') {
1050 symptr->howlen = e_star;
1052 } else if (*patptr == '[') {
1053 const char* lenptr = ++patptr;
1054 symptr->howlen = e_number;
1055 patptr = group_end( patptr, patend, ']' ) + 1;
1056 /* what kind of [] is it? */
1057 if (isDIGIT(*lenptr)) {
1058 lenptr = get_num( lenptr, &symptr->length );
1059 if( *lenptr != ']' )
1060 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1061 _action( symptr ) );
1063 tempsym_t savsym = *symptr;
1064 symptr->patend = patptr-1;
1065 symptr->patptr = lenptr;
1066 savsym.length = measure_struct(symptr);
1070 symptr->howlen = e_no_len;
1075 while (patptr < patend) {
1076 if (isSPACE(*patptr))
1078 else if (*patptr == '#') {
1080 while (patptr < patend && *patptr != '\n')
1082 if (patptr < patend)
1085 if (*patptr == '/') {
1086 symptr->flags |= FLAG_SLASH;
1088 if (patptr < patend &&
1089 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1090 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1091 _action( symptr ) );
1097 /* at end - no count, no / */
1098 symptr->howlen = e_no_len;
1102 symptr->code = code;
1103 symptr->patptr = patptr;
1107 symptr->patptr = patptr;
1112 There is no way to cleanly handle the case where we should process the
1113 string per byte in its upgraded form while it's really in downgraded form
1114 (e.g. estimates like strend-s as an upper bound for the number of
1115 characters left wouldn't work). So if we foresee the need of this
1116 (pattern starts with U or contains U0), we want to work on the encoded
1117 version of the string. Users are advised to upgrade their pack string
1118 themselves if they need to do a lot of unpacks like this on it
1121 need_utf8(const char *pat, const char *patend)
1124 while (pat < patend) {
1125 if (pat[0] == '#') {
1127 pat = (const char *) memchr(pat, '\n', patend-pat);
1128 if (!pat) return FALSE;
1129 } else if (pat[0] == 'U') {
1130 if (first || pat[1] == '0') return TRUE;
1131 } else first = FALSE;
1138 first_symbol(const char *pat, const char *patend) {
1139 while (pat < patend) {
1140 if (pat[0] != '#') return pat[0];
1142 pat = (const char *) memchr(pat, '\n', patend-pat);
1150 =for apidoc unpackstring
1152 The engine implementing unpack() Perl function. C<unpackstring> puts the
1153 extracted list items on the stack and returns the number of elements.
1154 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1159 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1163 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1164 else if (need_utf8(pat, patend)) {
1165 /* We probably should try to avoid this in case a scalar context call
1166 wouldn't get to the "U0" */
1167 STRLEN len = strend - s;
1168 s = (char *) bytes_to_utf8((U8 *) s, &len);
1171 flags |= FLAG_DO_UTF8;
1174 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1175 flags |= FLAG_PARSE_UTF8;
1177 TEMPSYM_INIT(&sym, pat, patend, flags);
1179 return unpack_rec(&sym, s, s, strend, NULL );
1184 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1188 const I32 start_sp_offset = SP - PL_stack_base;
1194 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1195 bool beyond = FALSE;
1196 bool explicit_length;
1197 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1198 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1199 symptr->strbeg = s - strbeg;
1201 while (next_symbol(symptr)) {
1204 I32 datumtype = symptr->code;
1205 /* do first one only unless in list context
1206 / is implemented by unpacking the count, then popping it from the
1207 stack, so must check that we're not in the middle of a / */
1208 if ( unpack_only_one
1209 && (SP - PL_stack_base == start_sp_offset + 1)
1210 && (datumtype != '/') ) /* XXX can this be omitted */
1213 switch (howlen = symptr->howlen) {
1215 len = strend - strbeg; /* long enough */
1218 /* e_no_len and e_number */
1219 len = symptr->length;
1223 explicit_length = TRUE;
1225 beyond = s >= strend;
1227 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1229 /* props nonzero means we can process this letter. */
1230 const long size = props & PACK_SIZE_MASK;
1231 const long howmany = (strend - s) / size;
1235 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1236 if (len && unpack_only_one) len = 1;
1242 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1244 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1247 if (howlen == e_no_len)
1248 len = 16; /* len is not specified */
1256 tempsym_t savsym = *symptr;
1257 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1258 symptr->flags |= group_modifiers;
1259 symptr->patend = savsym.grpend;
1260 symptr->previous = &savsym;
1264 symptr->patptr = savsym.grpbeg;
1265 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1266 else symptr->flags &= ~FLAG_PARSE_UTF8;
1267 unpack_rec(symptr, s, strbeg, strend, &s);
1268 if (s == strend && savsym.howlen == e_star)
1269 break; /* No way to continue */
1272 savsym.flags = symptr->flags & ~group_modifiers;
1276 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1277 case '.' | TYPE_IS_SHRIEKING:
1282 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1283 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1284 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1285 const bool u8 = utf8;
1287 if (howlen == e_star) from = strbeg;
1288 else if (len <= 0) from = s;
1290 tempsym_t *group = symptr;
1292 while (--len && group) group = group->previous;
1293 from = group ? strbeg + group->strbeg : strbeg;
1296 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1297 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1298 XPUSHs(sv_2mortal(sv));
1301 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1302 case '@' | TYPE_IS_SHRIEKING:
1305 s = strbeg + symptr->strbeg;
1306 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1307 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1308 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1314 Perl_croak(aTHX_ "'@' outside of string in unpack");
1319 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1322 Perl_croak(aTHX_ "'@' outside of string in unpack");
1326 case 'X' | TYPE_IS_SHRIEKING:
1327 if (!len) /* Avoid division by 0 */
1330 const char *hop, *last;
1332 hop = last = strbeg;
1334 hop += UTF8SKIP(hop);
1341 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1345 len = (s - strbeg) % len;
1351 Perl_croak(aTHX_ "'X' outside of string in unpack");
1352 while (--s, UTF8_IS_CONTINUATION(*s)) {
1354 Perl_croak(aTHX_ "'X' outside of string in unpack");
1359 if (len > s - strbeg)
1360 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1364 case 'x' | TYPE_IS_SHRIEKING: {
1366 if (!len) /* Avoid division by 0 */
1368 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1369 else ai32 = (s - strbeg) % len;
1370 if (ai32 == 0) break;
1378 Perl_croak(aTHX_ "'x' outside of string in unpack");
1383 if (len > strend - s)
1384 Perl_croak(aTHX_ "'x' outside of string in unpack");
1389 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1395 /* Preliminary length estimate is assumed done in 'W' */
1396 if (len > strend - s) len = strend - s;
1402 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1403 if (hop >= strend) {
1405 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1410 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1412 } else if (len > strend - s)
1415 if (datumtype == 'Z') {
1416 /* 'Z' strips stuff after first null */
1417 const char *ptr, *end;
1419 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1420 sv = newSVpvn(s, ptr-s);
1421 if (howlen == e_star) /* exact for 'Z*' */
1422 len = ptr-s + (ptr != strend ? 1 : 0);
1423 } else if (datumtype == 'A') {
1424 /* 'A' strips both nulls and spaces */
1426 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1427 for (ptr = s+len-1; ptr >= s; ptr--)
1428 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1429 !is_utf8_space((U8 *) ptr)) break;
1430 if (ptr >= s) ptr += UTF8SKIP(ptr);
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1435 for (ptr = s+len-1; ptr >= s; ptr--)
1436 if (*ptr != 0 && !isSPACE(*ptr)) break;
1439 sv = newSVpvn(s, ptr-s);
1440 } else sv = newSVpvn(s, len);
1444 /* Undo any upgrade done due to need_utf8() */
1445 if (!(symptr->flags & FLAG_WAS_UTF8))
1446 sv_utf8_downgrade(sv, 0);
1448 XPUSHs(sv_2mortal(sv));
1454 if (howlen == e_star || len > (strend - s) * 8)
1455 len = (strend - s) * 8;
1459 Newxz(PL_bitcount, 256, char);
1460 for (bits = 1; bits < 256; bits++) {
1461 if (bits & 1) PL_bitcount[bits]++;
1462 if (bits & 2) PL_bitcount[bits]++;
1463 if (bits & 4) PL_bitcount[bits]++;
1464 if (bits & 8) PL_bitcount[bits]++;
1465 if (bits & 16) PL_bitcount[bits]++;
1466 if (bits & 32) PL_bitcount[bits]++;
1467 if (bits & 64) PL_bitcount[bits]++;
1468 if (bits & 128) PL_bitcount[bits]++;
1472 while (len >= 8 && s < strend) {
1473 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1478 cuv += PL_bitcount[*(U8 *)s++];
1481 if (len && s < strend) {
1483 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1484 if (datumtype == 'b')
1486 if (bits & 1) cuv++;
1491 if (bits & 0x80) cuv++;
1498 sv = sv_2mortal(newSV(len ? len : 1));
1501 if (datumtype == 'b') {
1503 const I32 ai32 = len;
1504 for (len = 0; len < ai32; len++) {
1505 if (len & 7) bits >>= 1;
1507 if (s >= strend) break;
1508 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1509 } else bits = *(U8 *) s++;
1510 *str++ = bits & 1 ? '1' : '0';
1514 const I32 ai32 = len;
1515 for (len = 0; len < ai32; len++) {
1516 if (len & 7) bits <<= 1;
1518 if (s >= strend) break;
1519 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1520 } else bits = *(U8 *) s++;
1521 *str++ = bits & 0x80 ? '1' : '0';
1525 SvCUR_set(sv, str - SvPVX_const(sv));
1532 /* Preliminary length estimate, acceptable for utf8 too */
1533 if (howlen == e_star || len > (strend - s) * 2)
1534 len = (strend - s) * 2;
1535 sv = sv_2mortal(newSV(len ? len : 1));
1538 if (datumtype == 'h') {
1541 for (len = 0; len < ai32; len++) {
1542 if (len & 1) bits >>= 4;
1544 if (s >= strend) break;
1545 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1546 } else bits = * (U8 *) s++;
1547 *str++ = PL_hexdigit[bits & 15];
1551 const I32 ai32 = len;
1552 for (len = 0; len < ai32; len++) {
1553 if (len & 1) bits <<= 4;
1555 if (s >= strend) break;
1556 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1557 } else bits = *(U8 *) s++;
1558 *str++ = PL_hexdigit[(bits >> 4) & 15];
1562 SvCUR_set(sv, str - SvPVX_const(sv));
1568 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1569 if (aint >= 128) /* fake up signed chars */
1572 PUSHs(sv_2mortal(newSViv((IV)aint)));
1573 else if (checksum > bits_in_uv)
1574 cdouble += (NV)aint;
1583 if (explicit_length && datumtype == 'C')
1584 /* Switch to "character" mode */
1585 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1588 if (datumtype == 'C' ?
1589 (symptr->flags & FLAG_DO_UTF8) &&
1590 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1591 while (len-- > 0 && s < strend) {
1593 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1594 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1595 if (retlen == (STRLEN) -1 || retlen == 0)
1596 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1599 PUSHs(sv_2mortal(newSVuv((UV) val)));
1600 else if (checksum > bits_in_uv)
1601 cdouble += (NV) val;
1605 } else if (!checksum)
1607 const U8 ch = *(U8 *) s++;
1608 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1610 else if (checksum > bits_in_uv)
1611 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1613 while (len-- > 0) cuv += *(U8 *) s++;
1617 if (explicit_length) {
1618 /* Switch to "bytes in UTF-8" mode */
1619 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1621 /* Should be impossible due to the need_utf8() test */
1622 Perl_croak(aTHX_ "U0 mode on a byte string");
1626 if (len > strend - s) len = strend - s;
1628 if (len && unpack_only_one) len = 1;
1632 while (len-- > 0 && s < strend) {
1636 U8 result[UTF8_MAXLEN];
1637 const char *ptr = s;
1639 /* Bug: warns about bad utf8 even if we are short on bytes
1640 and will break out of the loop */
1641 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1644 len = UTF8SKIP(result);
1645 if (!uni_to_bytes(aTHX_ &ptr, strend,
1646 (char *) &result[1], len-1, 'U')) break;
1647 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1650 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1651 if (retlen == (STRLEN) -1 || retlen == 0)
1652 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1656 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1657 else if (checksum > bits_in_uv)
1658 cdouble += (NV) auv;
1663 case 's' | TYPE_IS_SHRIEKING:
1664 #if SHORTSIZE != SIZE16
1667 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1668 DO_BO_UNPACK(ashort, s);
1670 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1671 else if (checksum > bits_in_uv)
1672 cdouble += (NV)ashort;
1684 #if U16SIZE > SIZE16
1687 SHIFT16(utf8, s, strend, &ai16, datumtype);
1688 DO_BO_UNPACK(ai16, 16);
1689 #if U16SIZE > SIZE16
1694 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1695 else if (checksum > bits_in_uv)
1696 cdouble += (NV)ai16;
1701 case 'S' | TYPE_IS_SHRIEKING:
1702 #if SHORTSIZE != SIZE16
1704 unsigned short aushort;
1705 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1706 DO_BO_UNPACK(aushort, s);
1708 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1709 else if (checksum > bits_in_uv)
1710 cdouble += (NV)aushort;
1723 #if U16SIZE > SIZE16
1726 SHIFT16(utf8, s, strend, &au16, datumtype);
1727 DO_BO_UNPACK(au16, 16);
1729 if (datumtype == 'n')
1730 au16 = PerlSock_ntohs(au16);
1733 if (datumtype == 'v')
1737 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1738 else if (checksum > bits_in_uv)
1739 cdouble += (NV) au16;
1744 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1745 case 'v' | TYPE_IS_SHRIEKING:
1746 case 'n' | TYPE_IS_SHRIEKING:
1749 # if U16SIZE > SIZE16
1752 SHIFT16(utf8, s, strend, &ai16, datumtype);
1754 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1755 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1756 # endif /* HAS_NTOHS */
1758 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1759 ai16 = (I16) vtohs((U16) ai16);
1760 # endif /* HAS_VTOHS */
1762 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1763 else if (checksum > bits_in_uv)
1764 cdouble += (NV) ai16;
1769 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1771 case 'i' | TYPE_IS_SHRIEKING:
1774 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1775 DO_BO_UNPACK(aint, i);
1777 PUSHs(sv_2mortal(newSViv((IV)aint)));
1778 else if (checksum > bits_in_uv)
1779 cdouble += (NV)aint;
1785 case 'I' | TYPE_IS_SHRIEKING:
1788 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1789 DO_BO_UNPACK(auint, i);
1791 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1792 else if (checksum > bits_in_uv)
1793 cdouble += (NV)auint;
1801 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1802 #if IVSIZE == INTSIZE
1803 DO_BO_UNPACK(aiv, i);
1804 #elif IVSIZE == LONGSIZE
1805 DO_BO_UNPACK(aiv, l);
1806 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1807 DO_BO_UNPACK(aiv, 64);
1809 Perl_croak(aTHX_ "'j' not supported on this platform");
1812 PUSHs(sv_2mortal(newSViv(aiv)));
1813 else if (checksum > bits_in_uv)
1822 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1823 #if IVSIZE == INTSIZE
1824 DO_BO_UNPACK(auv, i);
1825 #elif IVSIZE == LONGSIZE
1826 DO_BO_UNPACK(auv, l);
1827 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1828 DO_BO_UNPACK(auv, 64);
1830 Perl_croak(aTHX_ "'J' not supported on this platform");
1833 PUSHs(sv_2mortal(newSVuv(auv)));
1834 else if (checksum > bits_in_uv)
1840 case 'l' | TYPE_IS_SHRIEKING:
1841 #if LONGSIZE != SIZE32
1844 SHIFT_VAR(utf8, s, strend, along, datumtype);
1845 DO_BO_UNPACK(along, l);
1847 PUSHs(sv_2mortal(newSViv((IV)along)));
1848 else if (checksum > bits_in_uv)
1849 cdouble += (NV)along;
1860 #if U32SIZE > SIZE32
1863 SHIFT32(utf8, s, strend, &ai32, datumtype);
1864 DO_BO_UNPACK(ai32, 32);
1865 #if U32SIZE > SIZE32
1866 if (ai32 > 2147483647) ai32 -= 4294967296;
1869 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1870 else if (checksum > bits_in_uv)
1871 cdouble += (NV)ai32;
1876 case 'L' | TYPE_IS_SHRIEKING:
1877 #if LONGSIZE != SIZE32
1879 unsigned long aulong;
1880 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1881 DO_BO_UNPACK(aulong, l);
1883 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1884 else if (checksum > bits_in_uv)
1885 cdouble += (NV)aulong;
1898 #if U32SIZE > SIZE32
1901 SHIFT32(utf8, s, strend, &au32, datumtype);
1902 DO_BO_UNPACK(au32, 32);
1904 if (datumtype == 'N')
1905 au32 = PerlSock_ntohl(au32);
1908 if (datumtype == 'V')
1912 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1913 else if (checksum > bits_in_uv)
1914 cdouble += (NV)au32;
1919 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1920 case 'V' | TYPE_IS_SHRIEKING:
1921 case 'N' | TYPE_IS_SHRIEKING:
1924 # if U32SIZE > SIZE32
1927 SHIFT32(utf8, s, strend, &ai32, datumtype);
1929 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1930 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1933 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1934 ai32 = (I32)vtohl((U32)ai32);
1937 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1938 else if (checksum > bits_in_uv)
1939 cdouble += (NV)ai32;
1944 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1948 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1949 DO_BO_UNPACK_PC(aptr);
1950 /* newSVpv generates undef if aptr is NULL */
1951 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1959 while (len > 0 && s < strend) {
1961 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1962 auv = (auv << 7) | (ch & 0x7f);
1963 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1966 PUSHs(sv_2mortal(newSVuv(auv)));
1971 if (++bytes >= sizeof(UV)) { /* promote to string */
1974 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1975 while (s < strend) {
1976 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1977 sv = mul128(sv, (U8)(ch & 0x7f));
1983 t = SvPV_nolen_const(sv);
1987 PUSHs(sv_2mortal(sv));
1992 if ((s >= strend) && bytes)
1993 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1997 if (symptr->howlen == e_star)
1998 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2000 if (s + sizeof(char*) <= strend) {
2002 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2003 DO_BO_UNPACK_PC(aptr);
2004 /* newSVpvn generates undef if aptr is NULL */
2005 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2012 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2013 DO_BO_UNPACK(aquad, 64);
2015 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2016 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2017 else if (checksum > bits_in_uv)
2018 cdouble += (NV)aquad;
2026 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2027 DO_BO_UNPACK(auquad, 64);
2029 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2030 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2031 else if (checksum > bits_in_uv)
2032 cdouble += (NV)auquad;
2037 #endif /* HAS_QUAD */
2038 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2042 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2043 DO_BO_UNPACK_N(afloat, float);
2045 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2053 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2054 DO_BO_UNPACK_N(adouble, double);
2056 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2064 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2065 DO_BO_UNPACK_N(anv, NV);
2067 PUSHs(sv_2mortal(newSVnv(anv)));
2072 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2075 long double aldouble;
2076 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2077 DO_BO_UNPACK_N(aldouble, long double);
2079 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2081 cdouble += aldouble;
2087 * Initialise the decode mapping. By using a table driven
2088 * algorithm, the code will be character-set independent
2089 * (and just as fast as doing character arithmetic)
2091 if (PL_uudmap[(U8)'M'] == 0) {
2094 for (i = 0; i < sizeof(PL_uuemap); ++i)
2095 PL_uudmap[(U8)PL_uuemap[i]] = i;
2097 * Because ' ' and '`' map to the same value,
2098 * we need to decode them both the same.
2100 PL_uudmap[(U8)' '] = 0;
2103 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2104 sv = sv_2mortal(newSV(l));
2105 if (l) SvPOK_on(sv);
2108 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2114 next_uni_uu(aTHX_ &s, strend, &a);
2115 next_uni_uu(aTHX_ &s, strend, &b);
2116 next_uni_uu(aTHX_ &s, strend, &c);
2117 next_uni_uu(aTHX_ &s, strend, &d);
2118 hunk[0] = (char)((a << 2) | (b >> 4));
2119 hunk[1] = (char)((b << 4) | (c >> 2));
2120 hunk[2] = (char)((c << 6) | d);
2121 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2129 /* possible checksum byte */
2130 const char *skip = s+UTF8SKIP(s);
2131 if (skip < strend && *skip == '\n')
2137 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2142 len = PL_uudmap[*(U8*)s++] & 077;
2144 if (s < strend && ISUUCHAR(*s))
2145 a = PL_uudmap[*(U8*)s++] & 077;
2148 if (s < strend && ISUUCHAR(*s))
2149 b = PL_uudmap[*(U8*)s++] & 077;
2152 if (s < strend && ISUUCHAR(*s))
2153 c = PL_uudmap[*(U8*)s++] & 077;
2156 if (s < strend && ISUUCHAR(*s))
2157 d = PL_uudmap[*(U8*)s++] & 077;
2160 hunk[0] = (char)((a << 2) | (b >> 4));
2161 hunk[1] = (char)((b << 4) | (c >> 2));
2162 hunk[2] = (char)((c << 6) | d);
2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2168 else /* possible checksum byte */
2169 if (s + 1 < strend && s[1] == '\n')
2178 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2179 (checksum > bits_in_uv &&
2180 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2183 anv = (NV) (1 << (checksum & 15));
2184 while (checksum >= 16) {
2188 while (cdouble < 0.0)
2190 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2191 sv = newSVnv(cdouble);
2194 if (checksum < bits_in_uv) {
2195 UV mask = ((UV)1 << checksum) - 1;
2200 XPUSHs(sv_2mortal(sv));
2204 if (symptr->flags & FLAG_SLASH){
2205 if (SP - PL_stack_base - start_sp_offset <= 0)
2206 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2207 if( next_symbol(symptr) ){
2208 if( symptr->howlen == e_number )
2209 Perl_croak(aTHX_ "Count after length/code in unpack" );
2211 /* ...end of char buffer then no decent length available */
2212 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2214 /* take top of stack (hope it's numeric) */
2217 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2220 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2222 datumtype = symptr->code;
2223 explicit_length = FALSE;
2231 return SP - PL_stack_base - start_sp_offset;
2239 I32 gimme = GIMME_V;
2242 const char *pat = SvPV_const(left, llen);
2243 const char *s = SvPV_const(right, rlen);
2244 const char *strend = s + rlen;
2245 const char *patend = pat + llen;
2249 cnt = unpackstring(pat, patend, s, strend,
2250 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2251 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2254 if ( !cnt && gimme == G_SCALAR )
2255 PUSHs(&PL_sv_undef);
2260 doencodes(U8 *h, const char *s, I32 len)
2262 *h++ = PL_uuemap[len];
2264 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2265 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2266 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2267 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2272 const char r = (len > 1 ? s[1] : '\0');
2273 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2274 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2275 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2276 *h++ = PL_uuemap[0];
2283 S_is_an_int(pTHX_ const char *s, STRLEN l)
2285 SV *result = newSVpvn(s, l);
2286 char *const result_c = SvPV_nolen(result); /* convenience */
2287 char *out = result_c;
2297 SvREFCNT_dec(result);
2320 SvREFCNT_dec(result);
2326 SvCUR_set(result, out - result_c);
2330 /* pnum must be '\0' terminated */
2332 S_div128(pTHX_ SV *pnum, bool *done)
2335 char * const s = SvPV(pnum, len);
2341 const int i = m * 10 + (*t - '0');
2342 const int r = (i >> 7); /* r < 10 */
2350 SvCUR_set(pnum, (STRLEN) (t - s));
2355 =for apidoc packlist
2357 The engine implementing pack() Perl function.
2363 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2368 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2370 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2371 Also make sure any UTF8 flag is loaded */
2372 SvPV_force_nolen(cat);
2374 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2376 (void)pack_rec( cat, &sym, beglist, endlist );
2379 /* like sv_utf8_upgrade, but also repoint the group start markers */
2381 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2384 const char *from_ptr, *from_start, *from_end, **marks, **m;
2385 char *to_start, *to_ptr;
2387 if (SvUTF8(sv)) return;
2389 from_start = SvPVX_const(sv);
2390 from_end = from_start + SvCUR(sv);
2391 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2392 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2393 if (from_ptr == from_end) {
2394 /* Simple case: no character needs to be changed */
2399 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2400 Newx(to_start, len, char);
2401 Copy(from_start, to_start, from_ptr-from_start, char);
2402 to_ptr = to_start + (from_ptr-from_start);
2404 Newx(marks, sym_ptr->level+2, const char *);
2405 for (group=sym_ptr; group; group = group->previous)
2406 marks[group->level] = from_start + group->strbeg;
2407 marks[sym_ptr->level+1] = from_end+1;
2408 for (m = marks; *m < from_ptr; m++)
2409 *m = to_start + (*m-from_start);
2411 for (;from_ptr < from_end; from_ptr++) {
2412 while (*m == from_ptr) *m++ = to_ptr;
2413 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2417 while (*m == from_ptr) *m++ = to_ptr;
2418 if (m != marks + sym_ptr->level+1) {
2421 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2423 for (group=sym_ptr; group; group = group->previous)
2424 group->strbeg = marks[group->level] - to_start;
2429 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2430 from_start -= SvIVX(sv);
2433 SvFLAGS(sv) &= ~SVf_OOK;
2436 Safefree(from_start);
2437 SvPV_set(sv, to_start);
2438 SvCUR_set(sv, to_ptr - to_start);
2443 /* Exponential string grower. Makes string extension effectively O(n)
2444 needed says how many extra bytes we need (not counting the final '\0')
2445 Only grows the string if there is an actual lack of space
2448 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2449 const STRLEN cur = SvCUR(sv);
2450 const STRLEN len = SvLEN(sv);
2452 if (len - cur > needed) return SvPVX(sv);
2453 extend = needed > len ? needed : len;
2454 return SvGROW(sv, len+extend+1);
2459 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2462 tempsym_t lookahead;
2463 I32 items = endlist - beglist;
2464 bool found = next_symbol(symptr);
2465 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2466 bool warn_utf8 = ckWARN(WARN_UTF8);
2468 if (symptr->level == 0 && found && symptr->code == 'U') {
2469 marked_upgrade(aTHX_ cat, symptr);
2470 symptr->flags |= FLAG_DO_UTF8;
2473 symptr->strbeg = SvCUR(cat);
2479 SV *lengthcode = NULL;
2480 I32 datumtype = symptr->code;
2481 howlen_t howlen = symptr->howlen;
2482 char *start = SvPVX(cat);
2483 char *cur = start + SvCUR(cat);
2485 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2489 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2493 /* e_no_len and e_number */
2494 len = symptr->length;
2499 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2501 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2502 /* We can process this letter. */
2503 STRLEN size = props & PACK_SIZE_MASK;
2504 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2508 /* Look ahead for next symbol. Do we have code/code? */
2509 lookahead = *symptr;
2510 found = next_symbol(&lookahead);
2511 if (symptr->flags & FLAG_SLASH) {
2513 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2514 if (strchr("aAZ", lookahead.code)) {
2515 if (lookahead.howlen == e_number) count = lookahead.length;
2518 if (SvGAMAGIC(*beglist)) {
2519 /* Avoid reading the active data more than once
2520 by copying it to a temporary. */
2522 const char *const pv = SvPV_const(*beglist, len);
2523 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2524 if (SvUTF8(*beglist))
2528 count = DO_UTF8(*beglist) ?
2529 sv_len_utf8(*beglist) : sv_len(*beglist);
2532 if (lookahead.code == 'Z') count++;
2535 if (lookahead.howlen == e_number && lookahead.length < items)
2536 count = lookahead.length;
2539 lookahead.howlen = e_number;
2540 lookahead.length = count;
2541 lengthcode = sv_2mortal(newSViv(count));
2544 /* Code inside the switch must take care to properly update
2545 cat (CUR length and '\0' termination) if it updated *cur and
2546 doesn't simply leave using break */
2547 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2549 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2550 (int) TYPE_NO_MODIFIERS(datumtype));
2552 Perl_croak(aTHX_ "'%%' may not be used in pack");
2555 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2556 case '.' | TYPE_IS_SHRIEKING:
2559 if (howlen == e_star) from = start;
2560 else if (len == 0) from = cur;
2562 tempsym_t *group = symptr;
2564 while (--len && group) group = group->previous;
2565 from = group ? start + group->strbeg : start;
2568 len = SvIV(fromstr);
2570 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2571 case '@' | TYPE_IS_SHRIEKING:
2574 from = start + symptr->strbeg;
2576 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2577 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2578 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2582 while (len && from < cur) {
2583 from += UTF8SKIP(from);
2587 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2589 /* Here we know from == cur */
2591 GROWING(0, cat, start, cur, len);
2592 Zero(cur, len, char);
2594 } else if (from < cur) {
2597 } else goto no_change;
2605 if (len > 0) goto grow;
2606 if (len == 0) goto no_change;
2613 tempsym_t savsym = *symptr;
2614 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2615 symptr->flags |= group_modifiers;
2616 symptr->patend = savsym.grpend;
2618 symptr->previous = &lookahead;
2621 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2622 else symptr->flags &= ~FLAG_PARSE_UTF8;
2623 was_utf8 = SvUTF8(cat);
2624 symptr->patptr = savsym.grpbeg;
2625 beglist = pack_rec(cat, symptr, beglist, endlist);
2626 if (SvUTF8(cat) != was_utf8)
2627 /* This had better be an upgrade while in utf8==0 mode */
2630 if (savsym.howlen == e_star && beglist == endlist)
2631 break; /* No way to continue */
2633 items = endlist - beglist;
2634 lookahead.flags = symptr->flags & ~group_modifiers;
2637 case 'X' | TYPE_IS_SHRIEKING:
2638 if (!len) /* Avoid division by 0 */
2645 hop += UTF8SKIP(hop);
2652 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2656 len = (cur-start) % len;
2660 if (len < 1) goto no_change;
2664 Perl_croak(aTHX_ "'%c' outside of string in pack",
2665 (int) TYPE_NO_MODIFIERS(datumtype));
2666 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2668 Perl_croak(aTHX_ "'%c' outside of string in pack",
2669 (int) TYPE_NO_MODIFIERS(datumtype));
2675 if (cur - start < len)
2676 Perl_croak(aTHX_ "'%c' outside of string in pack",
2677 (int) TYPE_NO_MODIFIERS(datumtype));
2680 if (cur < start+symptr->strbeg) {
2681 /* Make sure group starts don't point into the void */
2683 const STRLEN length = cur-start;
2684 for (group = symptr;
2685 group && length < group->strbeg;
2686 group = group->previous) group->strbeg = length;
2687 lookahead.strbeg = length;
2690 case 'x' | TYPE_IS_SHRIEKING: {
2692 if (!len) /* Avoid division by 0 */
2694 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2695 else ai32 = (cur - start) % len;
2696 if (ai32 == 0) goto no_change;
2708 aptr = SvPV_const(fromstr, fromlen);
2709 if (DO_UTF8(fromstr)) {
2710 const char *end, *s;
2712 if (!utf8 && !SvUTF8(cat)) {
2713 marked_upgrade(aTHX_ cat, symptr);
2714 lookahead.flags |= FLAG_DO_UTF8;
2715 lookahead.strbeg = symptr->strbeg;
2718 cur = start + SvCUR(cat);
2720 if (howlen == e_star) {
2721 if (utf8) goto string_copy;
2725 end = aptr + fromlen;
2726 fromlen = datumtype == 'Z' ? len-1 : len;
2727 while ((I32) fromlen > 0 && s < end) {
2732 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2735 if (datumtype == 'Z') len++;
2741 fromlen = len - fromlen;
2742 if (datumtype == 'Z') fromlen--;
2743 if (howlen == e_star) {
2745 if (datumtype == 'Z') len++;
2747 GROWING(0, cat, start, cur, len);
2748 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2749 datumtype | TYPE_IS_PACK))
2750 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2754 if (howlen == e_star) {
2756 if (datumtype == 'Z') len++;
2758 if (len <= (I32) fromlen) {
2760 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2762 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2764 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2765 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2767 while (fromlen > 0) {
2768 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2774 if (howlen == e_star) {
2776 if (datumtype == 'Z') len++;
2778 if (len <= (I32) fromlen) {
2780 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2782 GROWING(0, cat, start, cur, len);
2783 Copy(aptr, cur, fromlen, char);
2787 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2793 const char *str, *end;
2800 str = SvPV_const(fromstr, fromlen);
2801 end = str + fromlen;
2802 if (DO_UTF8(fromstr)) {
2804 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2806 utf8_source = FALSE;
2807 utf8_flags = 0; /* Unused, but keep compilers happy */
2809 if (howlen == e_star) len = fromlen;
2810 field_len = (len+7)/8;
2811 GROWING(utf8, cat, start, cur, field_len);
2812 if (len > (I32)fromlen) len = fromlen;
2815 if (datumtype == 'B')
2819 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2821 } else bits |= *str++ & 1;
2822 if (l & 7) bits <<= 1;
2824 PUSH_BYTE(utf8, cur, bits);
2829 /* datumtype == 'b' */
2833 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2834 if (val & 1) bits |= 0x80;
2835 } else if (*str++ & 1)
2837 if (l & 7) bits >>= 1;
2839 PUSH_BYTE(utf8, cur, bits);
2845 if (datumtype == 'B')
2846 bits <<= 7 - (l & 7);
2848 bits >>= 7 - (l & 7);
2849 PUSH_BYTE(utf8, cur, bits);
2852 /* Determine how many chars are left in the requested field */
2854 if (howlen == e_star) field_len = 0;
2855 else field_len -= l;
2856 Zero(cur, field_len, char);
2862 const char *str, *end;
2869 str = SvPV_const(fromstr, fromlen);
2870 end = str + fromlen;
2871 if (DO_UTF8(fromstr)) {
2873 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2875 utf8_source = FALSE;
2876 utf8_flags = 0; /* Unused, but keep compilers happy */
2878 if (howlen == e_star) len = fromlen;
2879 field_len = (len+1)/2;
2880 GROWING(utf8, cat, start, cur, field_len);
2881 if (!utf8 && len > (I32)fromlen) len = fromlen;
2884 if (datumtype == 'H')
2888 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2889 if (val < 256 && isALPHA(val))
2890 bits |= (val + 9) & 0xf;
2893 } else if (isALPHA(*str))
2894 bits |= (*str++ + 9) & 0xf;
2896 bits |= *str++ & 0xf;
2897 if (l & 1) bits <<= 4;
2899 PUSH_BYTE(utf8, cur, bits);
2907 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2908 if (val < 256 && isALPHA(val))
2909 bits |= ((val + 9) & 0xf) << 4;
2911 bits |= (val & 0xf) << 4;
2912 } else if (isALPHA(*str))
2913 bits |= ((*str++ + 9) & 0xf) << 4;
2915 bits |= (*str++ & 0xf) << 4;
2916 if (l & 1) bits >>= 4;
2918 PUSH_BYTE(utf8, cur, bits);
2924 PUSH_BYTE(utf8, cur, bits);
2927 /* Determine how many chars are left in the requested field */
2929 if (howlen == e_star) field_len = 0;
2930 else field_len -= l;
2931 Zero(cur, field_len, char);
2939 aiv = SvIV(fromstr);
2940 if ((-128 > aiv || aiv > 127) &&
2942 Perl_warner(aTHX_ packWARN(WARN_PACK),
2943 "Character in 'c' format wrapped in pack");
2944 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2949 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2952 GROWING(0, cat, start, cur, len);
2956 aiv = SvIV(fromstr);
2957 if ((0 > aiv || aiv > 0xff) &&
2959 Perl_warner(aTHX_ packWARN(WARN_PACK),
2960 "Character in 'C' format wrapped in pack");
2961 *cur++ = (char)(aiv & 0xff);
2966 U8 in_bytes = (U8)IN_BYTES;
2968 end = start+SvLEN(cat)-1;
2969 if (utf8) end -= UTF8_MAXLEN-1;
2973 auv = SvUV(fromstr);
2974 if (in_bytes) auv = auv % 0x100;
2979 SvCUR_set(cat, cur - start);
2981 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2982 end = start+SvLEN(cat)-UTF8_MAXLEN;
2984 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2987 0 : UNICODE_ALLOW_ANY);
2992 SvCUR_set(cat, cur - start);
2993 marked_upgrade(aTHX_ cat, symptr);
2994 lookahead.flags |= FLAG_DO_UTF8;
2995 lookahead.strbeg = symptr->strbeg;
2998 cur = start + SvCUR(cat);
2999 end = start+SvLEN(cat)-UTF8_MAXLEN;
3002 if (ckWARN(WARN_PACK))
3003 Perl_warner(aTHX_ packWARN(WARN_PACK),
3004 "Character in 'W' format wrapped in pack");
3009 SvCUR_set(cat, cur - start);
3010 GROWING(0, cat, start, cur, len+1);
3011 end = start+SvLEN(cat)-1;
3013 *(U8 *) cur++ = (U8)auv;
3022 if (!(symptr->flags & FLAG_DO_UTF8)) {
3023 marked_upgrade(aTHX_ cat, symptr);
3024 lookahead.flags |= FLAG_DO_UTF8;
3025 lookahead.strbeg = symptr->strbeg;
3031 end = start+SvLEN(cat);
3032 if (!utf8) end -= UTF8_MAXLEN;
3036 auv = SvUV(fromstr);
3038 U8 buffer[UTF8_MAXLEN], *endb;
3039 endb = uvuni_to_utf8_flags(buffer, auv,
3041 0 : UNICODE_ALLOW_ANY);
3042 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3044 SvCUR_set(cat, cur - start);
3045 GROWING(0, cat, start, cur,
3046 len+(endb-buffer)*UTF8_EXPAND);
3047 end = start+SvLEN(cat);
3049 cur = bytes_to_uni(buffer, endb-buffer, cur);
3053 SvCUR_set(cat, cur - start);
3054 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3055 end = start+SvLEN(cat)-UTF8_MAXLEN;
3057 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3059 0 : UNICODE_ALLOW_ANY);
3064 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3070 anv = SvNV(fromstr);
3072 /* VOS does not automatically map a floating-point overflow
3073 during conversion from double to float into infinity, so we
3074 do it by hand. This code should either be generalized for
3075 any OS that needs it, or removed if and when VOS implements
3076 posix-976 (suggestion to support mapping to infinity).
3077 Paul.Green@stratus.com 02-04-02. */
3079 afloat = _float_constants[0]; /* single prec. inf. */
3080 else if (anv < -FLT_MAX)
3081 afloat = _float_constants[0]; /* single prec. inf. */
3082 else afloat = (float) anv;
3084 # if defined(VMS) && !defined(__IEEE_FP)
3085 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3086 * on Alpha; fake it if we don't have them.
3090 else if (anv < -FLT_MAX)
3092 else afloat = (float)anv;
3094 afloat = (float)anv;
3096 #endif /* __VOS__ */
3097 DO_BO_PACK_N(afloat, float);
3098 PUSH_VAR(utf8, cur, afloat);
3106 anv = SvNV(fromstr);
3108 /* VOS does not automatically map a floating-point overflow
3109 during conversion from long double to double into infinity,
3110 so we do it by hand. This code should either be generalized
3111 for any OS that needs it, or removed if and when VOS
3112 implements posix-976 (suggestion to support mapping to
3113 infinity). Paul.Green@stratus.com 02-04-02. */
3115 adouble = _double_constants[0]; /* double prec. inf. */
3116 else if (anv < -DBL_MAX)
3117 adouble = _double_constants[0]; /* double prec. inf. */
3118 else adouble = (double) anv;
3120 # if defined(VMS) && !defined(__IEEE_FP)
3121 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3122 * on Alpha; fake it if we don't have them.
3126 else if (anv < -DBL_MAX)
3128 else adouble = (double)anv;
3130 adouble = (double)anv;
3132 #endif /* __VOS__ */
3133 DO_BO_PACK_N(adouble, double);
3134 PUSH_VAR(utf8, cur, adouble);
3139 Zero(&anv, 1, NV); /* can be long double with unused bits */
3142 anv = SvNV(fromstr);
3143 DO_BO_PACK_N(anv, NV);
3144 PUSH_VAR(utf8, cur, anv);
3148 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3150 long double aldouble;
3151 /* long doubles can have unused bits, which may be nonzero */
3152 Zero(&aldouble, 1, long double);
3155 aldouble = (long double)SvNV(fromstr);
3156 DO_BO_PACK_N(aldouble, long double);
3157 PUSH_VAR(utf8, cur, aldouble);
3162 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3163 case 'n' | TYPE_IS_SHRIEKING:
3169 ai16 = (I16)SvIV(fromstr);
3171 ai16 = PerlSock_htons(ai16);
3173 PUSH16(utf8, cur, &ai16);
3176 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3177 case 'v' | TYPE_IS_SHRIEKING:
3183 ai16 = (I16)SvIV(fromstr);
3187 PUSH16(utf8, cur, &ai16);
3190 case 'S' | TYPE_IS_SHRIEKING:
3191 #if SHORTSIZE != SIZE16
3193 unsigned short aushort;
3195 aushort = SvUV(fromstr);
3196 DO_BO_PACK(aushort, s);
3197 PUSH_VAR(utf8, cur, aushort);
3207 au16 = (U16)SvUV(fromstr);
3208 DO_BO_PACK(au16, 16);
3209 PUSH16(utf8, cur, &au16);
3212 case 's' | TYPE_IS_SHRIEKING:
3213 #if SHORTSIZE != SIZE16
3217 ashort = SvIV(fromstr);
3218 DO_BO_PACK(ashort, s);
3219 PUSH_VAR(utf8, cur, ashort);
3229 ai16 = (I16)SvIV(fromstr);
3230 DO_BO_PACK(ai16, 16);
3231 PUSH16(utf8, cur, &ai16);
3235 case 'I' | TYPE_IS_SHRIEKING:
3239 auint = SvUV(fromstr);
3240 DO_BO_PACK(auint, i);
3241 PUSH_VAR(utf8, cur, auint);
3248 aiv = SvIV(fromstr);
3249 #if IVSIZE == INTSIZE
3251 #elif IVSIZE == LONGSIZE
3253 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3254 DO_BO_PACK(aiv, 64);
3256 Perl_croak(aTHX_ "'j' not supported on this platform");
3258 PUSH_VAR(utf8, cur, aiv);
3265 auv = SvUV(fromstr);
3266 #if UVSIZE == INTSIZE
3268 #elif UVSIZE == LONGSIZE
3270 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3271 DO_BO_PACK(auv, 64);
3273 Perl_croak(aTHX_ "'J' not supported on this platform");
3275 PUSH_VAR(utf8, cur, auv);
3282 anv = SvNV(fromstr);
3286 SvCUR_set(cat, cur - start);
3287 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3290 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3291 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3292 any negative IVs will have already been got by the croak()
3293 above. IOK is untrue for fractions, so we test them
3294 against UV_MAX_P1. */
3295 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3296 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3297 char *in = buf + sizeof(buf);
3298 UV auv = SvUV(fromstr);
3301 *--in = (char)((auv & 0x7f) | 0x80);
3304 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3305 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3306 in, (buf + sizeof(buf)) - in);
3307 } else if (SvPOKp(fromstr))
3309 else if (SvNOKp(fromstr)) {
3310 /* 10**NV_MAX_10_EXP is the largest power of 10
3311 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3312 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3313 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3314 And with that many bytes only Inf can overflow.
3315 Some C compilers are strict about integral constant
3316 expressions so we conservatively divide by a slightly
3317 smaller integer instead of multiplying by the exact
3318 floating-point value.
3320 #ifdef NV_MAX_10_EXP
3321 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3322 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3324 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3325 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3327 char *in = buf + sizeof(buf);
3329 anv = Perl_floor(anv);
3331 const NV next = Perl_floor(anv / 128);
3332 if (in <= buf) /* this cannot happen ;-) */
3333 Perl_croak(aTHX_ "Cannot compress integer in pack");
3334 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3337 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3338 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3339 in, (buf + sizeof(buf)) - in);
3348 /* Copy string and check for compliance */
3349 from = SvPV_const(fromstr, len);
3350 if ((norm = is_an_int(from, len)) == NULL)
3351 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3353 Newx(result, len, char);
3356 while (!done) *--in = div128(norm, &done) | 0x80;
3357 result[len - 1] &= 0x7F; /* clear continue bit */
3358 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3359 in, (result + len) - in);
3361 SvREFCNT_dec(norm); /* free norm */
3366 case 'i' | TYPE_IS_SHRIEKING:
3370 aint = SvIV(fromstr);
3371 DO_BO_PACK(aint, i);
3372 PUSH_VAR(utf8, cur, aint);
3375 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3376 case 'N' | TYPE_IS_SHRIEKING:
3382 au32 = SvUV(fromstr);
3384 au32 = PerlSock_htonl(au32);
3386 PUSH32(utf8, cur, &au32);
3389 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3390 case 'V' | TYPE_IS_SHRIEKING:
3396 au32 = SvUV(fromstr);
3400 PUSH32(utf8, cur, &au32);
3403 case 'L' | TYPE_IS_SHRIEKING:
3404 #if LONGSIZE != SIZE32
3406 unsigned long aulong;
3408 aulong = SvUV(fromstr);
3409 DO_BO_PACK(aulong, l);
3410 PUSH_VAR(utf8, cur, aulong);
3420 au32 = SvUV(fromstr);
3421 DO_BO_PACK(au32, 32);
3422 PUSH32(utf8, cur, &au32);
3425 case 'l' | TYPE_IS_SHRIEKING:
3426 #if LONGSIZE != SIZE32
3430 along = SvIV(fromstr);
3431 DO_BO_PACK(along, l);
3432 PUSH_VAR(utf8, cur, along);
3442 ai32 = SvIV(fromstr);
3443 DO_BO_PACK(ai32, 32);
3444 PUSH32(utf8, cur, &ai32);
3452 auquad = (Uquad_t) SvUV(fromstr);
3453 DO_BO_PACK(auquad, 64);
3454 PUSH_VAR(utf8, cur, auquad);
3461 aquad = (Quad_t)SvIV(fromstr);
3462 DO_BO_PACK(aquad, 64);
3463 PUSH_VAR(utf8, cur, aquad);
3466 #endif /* HAS_QUAD */
3468 len = 1; /* assume SV is correct length */
3469 GROWING(utf8, cat, start, cur, sizeof(char *));
3476 SvGETMAGIC(fromstr);
3477 if (!SvOK(fromstr)) aptr = NULL;
3479 /* XXX better yet, could spirit away the string to
3480 * a safe spot and hang on to it until the result
3481 * of pack() (and all copies of the result) are
3484 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3485 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3486 Perl_warner(aTHX_ packWARN(WARN_PACK),
3487 "Attempt to pack pointer to temporary value");
3489 if (SvPOK(fromstr) || SvNIOK(fromstr))
3490 aptr = SvPV_nomg_const_nolen(fromstr);
3492 aptr = SvPV_force_flags_nolen(fromstr, 0);
3494 DO_BO_PACK_PC(aptr);
3495 PUSH_VAR(utf8, cur, aptr);
3499 const char *aptr, *aend;
3503 if (len <= 2) len = 45;
3504 else len = len / 3 * 3;
3506 if (ckWARN(WARN_PACK))
3507 Perl_warner(aTHX_ packWARN(WARN_PACK),
3508 "Field too wide in 'u' format in pack");
3511 aptr = SvPV_const(fromstr, fromlen);
3512 from_utf8 = DO_UTF8(fromstr);
3514 aend = aptr + fromlen;
3515 fromlen = sv_len_utf8(fromstr);
3516 } else aend = NULL; /* Unused, but keep compilers happy */
3517 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3518 while (fromlen > 0) {
3521 U8 hunk[1+63/3*4+1];
3523 if ((I32)fromlen > len)
3529 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3530 'u' | TYPE_IS_PACK)) {
3532 SvCUR_set(cat, cur - start);
3533 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3535 end = doencodes(hunk, buffer, todo);
3537 end = doencodes(hunk, aptr, todo);
3540 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3547 SvCUR_set(cat, cur - start);
3549 *symptr = lookahead;
3558 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3559 register SV *cat = TARG;
3561 SV *pat_sv = *++MARK;
3562 register const char *pat = SvPV_const(pat_sv, fromlen);
3563 register const char *patend = pat + fromlen;
3566 sv_setpvn(cat, "", 0);
3569 packlist(cat, pat, patend, MARK, SP + 1);
3579 * c-indentation-style: bsd
3581 * indent-tabs-mode: t
3584 * ex: set ts=8 sts=4 sw=4 noet: