3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV * const tmpNew = newSVpvs("0000000000");
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
243 # define DO_BO_UNPACK_PC(var)
244 # define DO_BO_PACK_PC(var)
246 #else /* PERL_PACK_CAN_BYTEORDER */
248 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
251 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
253 # define DO_BO_UNPACK(var, type) \
255 switch (TYPE_ENDIANNESS(datumtype)) { \
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
262 # define DO_BO_PACK(var, type) \
264 switch (TYPE_ENDIANNESS(datumtype)) { \
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
271 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: \
275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
277 case TYPE_IS_LITTLE_ENDIAN: \
278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
285 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
287 switch (TYPE_ENDIANNESS(datumtype)) { \
288 case TYPE_IS_BIG_ENDIAN: \
289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
291 case TYPE_IS_LITTLE_ENDIAN: \
292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
299 # define BO_CANT_DOIT(action, type) \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
315 # if PTRSIZE == INTSIZE
316 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
320 # elif PTRSIZE == LONGSIZE
321 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
325 # elif PTRSIZE == IVSIZE
326 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
331 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
333 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
337 # if defined(my_htolen) && defined(my_letohn) && \
338 defined(my_htoben) && defined(my_betohn)
339 # define DO_BO_UNPACK_N(var, type) \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
348 # define DO_BO_PACK_N(var, type) \
350 switch (TYPE_ENDIANNESS(datumtype)) { \
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
357 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
361 #endif /* PERL_PACK_CAN_BYTEORDER */
363 #define PACK_SIZE_CANNOT_CSUM 0x80
364 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
365 #define PACK_SIZE_MASK 0x3F
367 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
370 #if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
374 typedef U8 packprops_t;
377 STATIC const packprops_t packprops[512] = {
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
385 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
386 /* D */ LONG_DOUBLESIZE,
393 /* I */ sizeof(unsigned int),
400 #if defined(HAS_QUAD)
401 /* Q */ sizeof(Uquad_t),
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
415 /* f */ sizeof(float),
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
425 #if defined(HAS_QUAD)
426 /* q */ sizeof(Quad_t),
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
452 /* L */ sizeof(unsigned long),
454 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
460 /* S */ sizeof(unsigned short),
462 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 /* l */ sizeof(long),
473 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
479 /* s */ sizeof(short),
481 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
497 /* EBCDIC (or bust) */
498 STATIC const packprops_t packprops[512] = {
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
512 /* f */ sizeof(float),
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
523 #if defined(HAS_QUAD)
524 /* q */ sizeof(Quad_t),
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
536 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
537 /* D */ LONG_DOUBLESIZE,
544 /* I */ sizeof(unsigned int),
552 #if defined(HAS_QUAD)
553 /* Q */ sizeof(Uquad_t),
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
579 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
587 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
599 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
607 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
618 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
630 if (ckWARN(WARN_UNPACK))
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
633 (int) TYPE_NO_MODIFIERS(datumtype));
640 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
645 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
649 const char *from = *s;
651 const U32 flags = ckWARN(WARN_UTF8) ?
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
659 } else from += retlen;
664 *(U8 *)buf++ = (U8)val;
666 /* We have enough characters for the buffer. Did we have problems ? */
669 /* Rewalk the string fragment while warning */
671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
676 if (from > end) from = end;
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
682 (int) TYPE_NO_MODIFIERS(datumtype),
683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
690 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
700 *out = PL_uudmap[val] & 077;
706 bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
707 U8 buffer[UTF8_MAXLEN];
708 const U8 * const end = start + len;
710 while (start < end) {
712 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
722 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
730 #define PUSH_BYTES(utf8, cur, buf, len) \
732 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
734 Copy(buf, cur, len, char); \
739 #define GROWING(utf8, cat, start, cur, in_len) \
741 STRLEN glen = (in_len); \
742 if (utf8) glen *= UTF8_EXPAND; \
743 if ((cur) + glen >= (start) + SvLEN(cat)) { \
744 (start) = sv_exp_grow(cat, glen); \
745 (cur) = (start) + SvCUR(cat); \
749 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
751 const STRLEN glen = (in_len); \
753 if (utf8) gl *= UTF8_EXPAND; \
754 if ((cur) + gl >= (start) + SvLEN(cat)) { \
756 SvCUR_set((cat), (cur) - (start)); \
757 (start) = sv_exp_grow(cat, gl); \
758 (cur) = (start) + SvCUR(cat); \
760 PUSH_BYTES(utf8, cur, buf, glen); \
763 #define PUSH_BYTE(utf8, s, byte) \
766 const U8 au8 = (byte); \
767 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
768 } else *(U8 *)(s)++ = (byte); \
771 /* Only to be used inside a loop (see the break) */
772 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
775 if (str >= end) break; \
776 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
777 if (retlen == (STRLEN) -1 || retlen == 0) { \
779 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
784 static const char *_action( const tempsym_t* symptr )
786 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
789 /* Returns the sizeof() struct described by pat */
791 S_measure_struct(pTHX_ tempsym_t* symptr)
795 while (next_symbol(symptr)) {
799 switch (symptr->howlen) {
801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
805 /* e_no_len and e_number */
806 len = symptr->length;
810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
819 #ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
826 case 'U': /* XXXX Is it correct? */
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
830 (int) TYPE_NO_MODIFIERS(symptr->code),
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
851 if (!len) /* Avoid division by 0 */
853 len = total % len; /* Assumed: the start is aligned. */
858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
888 size = sizeof(char*);
898 /* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
902 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
904 while (patptr < patend) {
905 const char c = *patptr++;
912 while (patptr < patend && *patptr != '\n')
916 patptr = group_end(patptr, patend, ')') + 1;
918 patptr = group_end(patptr, patend, ']') + 1;
920 Perl_croak(aTHX_ "No group ending character '%c' found in template",
926 /* Convert unsigned decimal number to binary.
927 * Expects a pointer to the first digit and address of length variable
928 * Advances char pointer to 1st non-digit char and returns number
931 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
933 I32 len = *patptr++ - '0';
934 while (isDIGIT(*patptr)) {
935 if (len >= 0x7FFFFFFF/10)
936 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
937 len = (len * 10) + (*patptr++ - '0');
943 /* The marvellous template parsing routine: Using state stored in *symptr,
944 * locates next template code and count
947 S_next_symbol(pTHX_ tempsym_t* symptr )
949 const char* patptr = symptr->patptr;
950 const char* const patend = symptr->patend;
952 symptr->flags &= ~FLAG_SLASH;
954 while (patptr < patend) {
955 if (isSPACE(*patptr))
957 else if (*patptr == '#') {
959 while (patptr < patend && *patptr != '\n')
964 /* We should have found a template code */
965 I32 code = *patptr++ & 0xFF;
966 U32 inherited_modifiers = 0;
968 if (code == ','){ /* grandfather in commas but with a warning */
969 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
970 symptr->flags |= FLAG_COMMA;
971 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
972 "Invalid type ',' in %s", _action( symptr ) );
977 /* for '(', skip to ')' */
979 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
980 Perl_croak(aTHX_ "()-group starts with a count in %s",
982 symptr->grpbeg = patptr;
983 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
984 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
985 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
989 /* look for group modifiers to inherit */
990 if (TYPE_ENDIANNESS(symptr->flags)) {
991 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
992 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
995 /* look for modifiers */
996 while (patptr < patend) {
1001 modifier = TYPE_IS_SHRIEKING;
1002 allowed = SHRIEKING_ALLOWED_TYPES;
1004 #ifdef PERL_PACK_CAN_BYTEORDER
1006 modifier = TYPE_IS_BIG_ENDIAN;
1007 allowed = ENDIANNESS_ALLOWED_TYPES;
1010 modifier = TYPE_IS_LITTLE_ENDIAN;
1011 allowed = ENDIANNESS_ALLOWED_TYPES;
1013 #endif /* PERL_PACK_CAN_BYTEORDER */
1023 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1024 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1025 allowed, _action( symptr ) );
1027 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1028 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1029 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1030 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1031 TYPE_ENDIANNESS_MASK)
1032 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1033 *patptr, _action( symptr ) );
1035 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1036 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1037 "Duplicate modifier '%c' after '%c' in %s",
1038 *patptr, (int) TYPE_NO_MODIFIERS(code),
1039 _action( symptr ) );
1046 /* inherit modifiers */
1047 code |= inherited_modifiers;
1049 /* look for count and/or / */
1050 if (patptr < patend) {
1051 if (isDIGIT(*patptr)) {
1052 patptr = get_num( patptr, &symptr->length );
1053 symptr->howlen = e_number;
1055 } else if (*patptr == '*') {
1057 symptr->howlen = e_star;
1059 } else if (*patptr == '[') {
1060 const char* lenptr = ++patptr;
1061 symptr->howlen = e_number;
1062 patptr = group_end( patptr, patend, ']' ) + 1;
1063 /* what kind of [] is it? */
1064 if (isDIGIT(*lenptr)) {
1065 lenptr = get_num( lenptr, &symptr->length );
1066 if( *lenptr != ']' )
1067 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1068 _action( symptr ) );
1070 tempsym_t savsym = *symptr;
1071 symptr->patend = patptr-1;
1072 symptr->patptr = lenptr;
1073 savsym.length = measure_struct(symptr);
1077 symptr->howlen = e_no_len;
1082 while (patptr < patend) {
1083 if (isSPACE(*patptr))
1085 else if (*patptr == '#') {
1087 while (patptr < patend && *patptr != '\n')
1089 if (patptr < patend)
1092 if (*patptr == '/') {
1093 symptr->flags |= FLAG_SLASH;
1095 if (patptr < patend &&
1096 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1097 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1098 _action( symptr ) );
1104 /* at end - no count, no / */
1105 symptr->howlen = e_no_len;
1109 symptr->code = code;
1110 symptr->patptr = patptr;
1114 symptr->patptr = patptr;
1119 There is no way to cleanly handle the case where we should process the
1120 string per byte in its upgraded form while it's really in downgraded form
1121 (e.g. estimates like strend-s as an upper bound for the number of
1122 characters left wouldn't work). So if we foresee the need of this
1123 (pattern starts with U or contains U0), we want to work on the encoded
1124 version of the string. Users are advised to upgrade their pack string
1125 themselves if they need to do a lot of unpacks like this on it
1128 need_utf8(const char *pat, const char *patend)
1131 while (pat < patend) {
1132 if (pat[0] == '#') {
1134 pat = (const char *) memchr(pat, '\n', patend-pat);
1135 if (!pat) return FALSE;
1136 } else if (pat[0] == 'U') {
1137 if (first || pat[1] == '0') return TRUE;
1138 } else first = FALSE;
1145 first_symbol(const char *pat, const char *patend) {
1146 while (pat < patend) {
1147 if (pat[0] != '#') return pat[0];
1149 pat = (const char *) memchr(pat, '\n', patend-pat);
1157 =for apidoc unpackstring
1159 The engine implementing unpack() Perl function. C<unpackstring> puts the
1160 extracted list items on the stack and returns the number of elements.
1161 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1166 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1170 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1171 else if (need_utf8(pat, patend)) {
1172 /* We probably should try to avoid this in case a scalar context call
1173 wouldn't get to the "U0" */
1174 STRLEN len = strend - s;
1175 s = (char *) bytes_to_utf8((U8 *) s, &len);
1178 flags |= FLAG_DO_UTF8;
1181 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1182 flags |= FLAG_PARSE_UTF8;
1184 TEMPSYM_INIT(&sym, pat, patend, flags);
1186 return unpack_rec(&sym, s, s, strend, NULL );
1191 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1195 const I32 start_sp_offset = SP - PL_stack_base;
1201 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1202 bool beyond = FALSE;
1203 bool explicit_length;
1204 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1205 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1206 symptr->strbeg = s - strbeg;
1208 while (next_symbol(symptr)) {
1211 I32 datumtype = symptr->code;
1212 /* do first one only unless in list context
1213 / is implemented by unpacking the count, then popping it from the
1214 stack, so must check that we're not in the middle of a / */
1215 if ( unpack_only_one
1216 && (SP - PL_stack_base == start_sp_offset + 1)
1217 && (datumtype != '/') ) /* XXX can this be omitted */
1220 switch (howlen = symptr->howlen) {
1222 len = strend - strbeg; /* long enough */
1225 /* e_no_len and e_number */
1226 len = symptr->length;
1230 explicit_length = TRUE;
1232 beyond = s >= strend;
1234 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1236 /* props nonzero means we can process this letter. */
1237 const long size = props & PACK_SIZE_MASK;
1238 const long howmany = (strend - s) / size;
1242 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1243 if (len && unpack_only_one) len = 1;
1249 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1251 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1254 if (howlen == e_no_len)
1255 len = 16; /* len is not specified */
1263 tempsym_t savsym = *symptr;
1264 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1265 symptr->flags |= group_modifiers;
1266 symptr->patend = savsym.grpend;
1267 symptr->previous = &savsym;
1271 symptr->patptr = savsym.grpbeg;
1272 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1273 else symptr->flags &= ~FLAG_PARSE_UTF8;
1274 unpack_rec(symptr, s, strbeg, strend, &s);
1275 if (s == strend && savsym.howlen == e_star)
1276 break; /* No way to continue */
1279 savsym.flags = symptr->flags & ~group_modifiers;
1283 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1284 case '.' | TYPE_IS_SHRIEKING:
1289 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1290 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1291 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1292 const bool u8 = utf8;
1294 if (howlen == e_star) from = strbeg;
1295 else if (len <= 0) from = s;
1297 tempsym_t *group = symptr;
1299 while (--len && group) group = group->previous;
1300 from = group ? strbeg + group->strbeg : strbeg;
1303 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1304 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1305 XPUSHs(sv_2mortal(sv));
1308 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1309 case '@' | TYPE_IS_SHRIEKING:
1312 s = strbeg + symptr->strbeg;
1313 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1314 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1315 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1321 Perl_croak(aTHX_ "'@' outside of string in unpack");
1326 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1329 Perl_croak(aTHX_ "'@' outside of string in unpack");
1333 case 'X' | TYPE_IS_SHRIEKING:
1334 if (!len) /* Avoid division by 0 */
1337 const char *hop, *last;
1339 hop = last = strbeg;
1341 hop += UTF8SKIP(hop);
1348 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1352 len = (s - strbeg) % len;
1358 Perl_croak(aTHX_ "'X' outside of string in unpack");
1359 while (--s, UTF8_IS_CONTINUATION(*s)) {
1361 Perl_croak(aTHX_ "'X' outside of string in unpack");
1366 if (len > s - strbeg)
1367 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1371 case 'x' | TYPE_IS_SHRIEKING: {
1373 if (!len) /* Avoid division by 0 */
1375 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1376 else ai32 = (s - strbeg) % len;
1377 if (ai32 == 0) break;
1385 Perl_croak(aTHX_ "'x' outside of string in unpack");
1390 if (len > strend - s)
1391 Perl_croak(aTHX_ "'x' outside of string in unpack");
1396 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1402 /* Preliminary length estimate is assumed done in 'W' */
1403 if (len > strend - s) len = strend - s;
1409 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1410 if (hop >= strend) {
1412 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1417 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1419 } else if (len > strend - s)
1422 if (datumtype == 'Z') {
1423 /* 'Z' strips stuff after first null */
1424 const char *ptr, *end;
1426 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1427 sv = newSVpvn(s, ptr-s);
1428 if (howlen == e_star) /* exact for 'Z*' */
1429 len = ptr-s + (ptr != strend ? 1 : 0);
1430 } else if (datumtype == 'A') {
1431 /* 'A' strips both nulls and spaces */
1433 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1434 for (ptr = s+len-1; ptr >= s; ptr--)
1435 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1436 !is_utf8_space((U8 *) ptr)) break;
1437 if (ptr >= s) ptr += UTF8SKIP(ptr);
1440 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1442 for (ptr = s+len-1; ptr >= s; ptr--)
1443 if (*ptr != 0 && !isSPACE(*ptr)) break;
1446 sv = newSVpvn(s, ptr-s);
1447 } else sv = newSVpvn(s, len);
1451 /* Undo any upgrade done due to need_utf8() */
1452 if (!(symptr->flags & FLAG_WAS_UTF8))
1453 sv_utf8_downgrade(sv, 0);
1455 XPUSHs(sv_2mortal(sv));
1461 if (howlen == e_star || len > (strend - s) * 8)
1462 len = (strend - s) * 8;
1466 Newxz(PL_bitcount, 256, char);
1467 for (bits = 1; bits < 256; bits++) {
1468 if (bits & 1) PL_bitcount[bits]++;
1469 if (bits & 2) PL_bitcount[bits]++;
1470 if (bits & 4) PL_bitcount[bits]++;
1471 if (bits & 8) PL_bitcount[bits]++;
1472 if (bits & 16) PL_bitcount[bits]++;
1473 if (bits & 32) PL_bitcount[bits]++;
1474 if (bits & 64) PL_bitcount[bits]++;
1475 if (bits & 128) PL_bitcount[bits]++;
1479 while (len >= 8 && s < strend) {
1480 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1485 cuv += PL_bitcount[*(U8 *)s++];
1488 if (len && s < strend) {
1490 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1491 if (datumtype == 'b')
1493 if (bits & 1) cuv++;
1498 if (bits & 0x80) cuv++;
1505 sv = sv_2mortal(newSV(len ? len : 1));
1508 if (datumtype == 'b') {
1510 const I32 ai32 = len;
1511 for (len = 0; len < ai32; len++) {
1512 if (len & 7) bits >>= 1;
1514 if (s >= strend) break;
1515 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1516 } else bits = *(U8 *) s++;
1517 *str++ = bits & 1 ? '1' : '0';
1521 const I32 ai32 = len;
1522 for (len = 0; len < ai32; len++) {
1523 if (len & 7) bits <<= 1;
1525 if (s >= strend) break;
1526 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1527 } else bits = *(U8 *) s++;
1528 *str++ = bits & 0x80 ? '1' : '0';
1532 SvCUR_set(sv, str - SvPVX_const(sv));
1539 /* Preliminary length estimate, acceptable for utf8 too */
1540 if (howlen == e_star || len > (strend - s) * 2)
1541 len = (strend - s) * 2;
1542 sv = sv_2mortal(newSV(len ? len : 1));
1545 if (datumtype == 'h') {
1548 for (len = 0; len < ai32; len++) {
1549 if (len & 1) bits >>= 4;
1551 if (s >= strend) break;
1552 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1553 } else bits = * (U8 *) s++;
1554 *str++ = PL_hexdigit[bits & 15];
1558 const I32 ai32 = len;
1559 for (len = 0; len < ai32; len++) {
1560 if (len & 1) bits <<= 4;
1562 if (s >= strend) break;
1563 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1564 } else bits = *(U8 *) s++;
1565 *str++ = PL_hexdigit[(bits >> 4) & 15];
1569 SvCUR_set(sv, str - SvPVX_const(sv));
1575 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1576 if (aint >= 128) /* fake up signed chars */
1579 PUSHs(sv_2mortal(newSViv((IV)aint)));
1580 else if (checksum > bits_in_uv)
1581 cdouble += (NV)aint;
1590 if (explicit_length && datumtype == 'C')
1591 /* Switch to "character" mode */
1592 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1595 if (datumtype == 'C' ?
1596 (symptr->flags & FLAG_DO_UTF8) &&
1597 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1598 while (len-- > 0 && s < strend) {
1600 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1601 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1602 if (retlen == (STRLEN) -1 || retlen == 0)
1603 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1606 PUSHs(sv_2mortal(newSVuv((UV) val)));
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV) val;
1612 } else if (!checksum)
1614 const U8 ch = *(U8 *) s++;
1615 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1617 else if (checksum > bits_in_uv)
1618 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1620 while (len-- > 0) cuv += *(U8 *) s++;
1624 if (explicit_length) {
1625 /* Switch to "bytes in UTF-8" mode */
1626 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1628 /* Should be impossible due to the need_utf8() test */
1629 Perl_croak(aTHX_ "U0 mode on a byte string");
1633 if (len > strend - s) len = strend - s;
1635 if (len && unpack_only_one) len = 1;
1639 while (len-- > 0 && s < strend) {
1643 U8 result[UTF8_MAXLEN];
1644 const char *ptr = s;
1646 /* Bug: warns about bad utf8 even if we are short on bytes
1647 and will break out of the loop */
1648 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1651 len = UTF8SKIP(result);
1652 if (!uni_to_bytes(aTHX_ &ptr, strend,
1653 (char *) &result[1], len-1, 'U')) break;
1654 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1657 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1658 if (retlen == (STRLEN) -1 || retlen == 0)
1659 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1663 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1664 else if (checksum > bits_in_uv)
1665 cdouble += (NV) auv;
1670 case 's' | TYPE_IS_SHRIEKING:
1671 #if SHORTSIZE != SIZE16
1674 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1675 DO_BO_UNPACK(ashort, s);
1677 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1678 else if (checksum > bits_in_uv)
1679 cdouble += (NV)ashort;
1691 #if U16SIZE > SIZE16
1694 SHIFT16(utf8, s, strend, &ai16, datumtype);
1695 DO_BO_UNPACK(ai16, 16);
1696 #if U16SIZE > SIZE16
1701 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1702 else if (checksum > bits_in_uv)
1703 cdouble += (NV)ai16;
1708 case 'S' | TYPE_IS_SHRIEKING:
1709 #if SHORTSIZE != SIZE16
1711 unsigned short aushort;
1712 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1713 DO_BO_UNPACK(aushort, s);
1715 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1716 else if (checksum > bits_in_uv)
1717 cdouble += (NV)aushort;
1730 #if U16SIZE > SIZE16
1733 SHIFT16(utf8, s, strend, &au16, datumtype);
1734 DO_BO_UNPACK(au16, 16);
1736 if (datumtype == 'n')
1737 au16 = PerlSock_ntohs(au16);
1740 if (datumtype == 'v')
1744 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1745 else if (checksum > bits_in_uv)
1746 cdouble += (NV) au16;
1751 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1752 case 'v' | TYPE_IS_SHRIEKING:
1753 case 'n' | TYPE_IS_SHRIEKING:
1756 # if U16SIZE > SIZE16
1759 SHIFT16(utf8, s, strend, &ai16, datumtype);
1761 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1762 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1763 # endif /* HAS_NTOHS */
1765 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1766 ai16 = (I16) vtohs((U16) ai16);
1767 # endif /* HAS_VTOHS */
1769 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1770 else if (checksum > bits_in_uv)
1771 cdouble += (NV) ai16;
1776 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1778 case 'i' | TYPE_IS_SHRIEKING:
1781 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1782 DO_BO_UNPACK(aint, i);
1784 PUSHs(sv_2mortal(newSViv((IV)aint)));
1785 else if (checksum > bits_in_uv)
1786 cdouble += (NV)aint;
1792 case 'I' | TYPE_IS_SHRIEKING:
1795 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1796 DO_BO_UNPACK(auint, i);
1798 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV)auint;
1808 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1809 #if IVSIZE == INTSIZE
1810 DO_BO_UNPACK(aiv, i);
1811 #elif IVSIZE == LONGSIZE
1812 DO_BO_UNPACK(aiv, l);
1813 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1814 DO_BO_UNPACK(aiv, 64);
1816 Perl_croak(aTHX_ "'j' not supported on this platform");
1819 PUSHs(sv_2mortal(newSViv(aiv)));
1820 else if (checksum > bits_in_uv)
1829 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1830 #if IVSIZE == INTSIZE
1831 DO_BO_UNPACK(auv, i);
1832 #elif IVSIZE == LONGSIZE
1833 DO_BO_UNPACK(auv, l);
1834 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1835 DO_BO_UNPACK(auv, 64);
1837 Perl_croak(aTHX_ "'J' not supported on this platform");
1840 PUSHs(sv_2mortal(newSVuv(auv)));
1841 else if (checksum > bits_in_uv)
1847 case 'l' | TYPE_IS_SHRIEKING:
1848 #if LONGSIZE != SIZE32
1851 SHIFT_VAR(utf8, s, strend, along, datumtype);
1852 DO_BO_UNPACK(along, l);
1854 PUSHs(sv_2mortal(newSViv((IV)along)));
1855 else if (checksum > bits_in_uv)
1856 cdouble += (NV)along;
1867 #if U32SIZE > SIZE32
1870 SHIFT32(utf8, s, strend, &ai32, datumtype);
1871 DO_BO_UNPACK(ai32, 32);
1872 #if U32SIZE > SIZE32
1873 if (ai32 > 2147483647) ai32 -= 4294967296;
1876 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1877 else if (checksum > bits_in_uv)
1878 cdouble += (NV)ai32;
1883 case 'L' | TYPE_IS_SHRIEKING:
1884 #if LONGSIZE != SIZE32
1886 unsigned long aulong;
1887 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1888 DO_BO_UNPACK(aulong, l);
1890 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1891 else if (checksum > bits_in_uv)
1892 cdouble += (NV)aulong;
1905 #if U32SIZE > SIZE32
1908 SHIFT32(utf8, s, strend, &au32, datumtype);
1909 DO_BO_UNPACK(au32, 32);
1911 if (datumtype == 'N')
1912 au32 = PerlSock_ntohl(au32);
1915 if (datumtype == 'V')
1919 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1920 else if (checksum > bits_in_uv)
1921 cdouble += (NV)au32;
1926 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1927 case 'V' | TYPE_IS_SHRIEKING:
1928 case 'N' | TYPE_IS_SHRIEKING:
1931 # if U32SIZE > SIZE32
1934 SHIFT32(utf8, s, strend, &ai32, datumtype);
1936 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1937 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1940 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1941 ai32 = (I32)vtohl((U32)ai32);
1944 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1945 else if (checksum > bits_in_uv)
1946 cdouble += (NV)ai32;
1951 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1955 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1956 DO_BO_UNPACK_PC(aptr);
1957 /* newSVpv generates undef if aptr is NULL */
1958 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1966 while (len > 0 && s < strend) {
1968 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1969 auv = (auv << 7) | (ch & 0x7f);
1970 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1973 PUSHs(sv_2mortal(newSVuv(auv)));
1978 if (++bytes >= sizeof(UV)) { /* promote to string */
1981 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1982 while (s < strend) {
1983 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1984 sv = mul128(sv, (U8)(ch & 0x7f));
1990 t = SvPV_nolen_const(sv);
1994 PUSHs(sv_2mortal(sv));
1999 if ((s >= strend) && bytes)
2000 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2004 if (symptr->howlen == e_star)
2005 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2007 if (s + sizeof(char*) <= strend) {
2009 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2010 DO_BO_UNPACK_PC(aptr);
2011 /* newSVpvn generates undef if aptr is NULL */
2012 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2019 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2020 DO_BO_UNPACK(aquad, 64);
2022 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2023 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2024 else if (checksum > bits_in_uv)
2025 cdouble += (NV)aquad;
2033 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2034 DO_BO_UNPACK(auquad, 64);
2036 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2037 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2038 else if (checksum > bits_in_uv)
2039 cdouble += (NV)auquad;
2044 #endif /* HAS_QUAD */
2045 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2049 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2050 DO_BO_UNPACK_N(afloat, float);
2052 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2060 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2061 DO_BO_UNPACK_N(adouble, double);
2063 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2071 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2072 DO_BO_UNPACK_N(anv, NV);
2074 PUSHs(sv_2mortal(newSVnv(anv)));
2079 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2082 long double aldouble;
2083 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2084 DO_BO_UNPACK_N(aldouble, long double);
2086 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2088 cdouble += aldouble;
2094 * Initialise the decode mapping. By using a table driven
2095 * algorithm, the code will be character-set independent
2096 * (and just as fast as doing character arithmetic)
2098 if (PL_uudmap['M'] == 0) {
2101 for (i = 0; i < sizeof(PL_uuemap); ++i)
2102 PL_uudmap[(U8)PL_uuemap[i]] = i;
2104 * Because ' ' and '`' map to the same value,
2105 * we need to decode them both the same.
2110 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2111 sv = sv_2mortal(newSV(l));
2112 if (l) SvPOK_on(sv);
2115 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2121 next_uni_uu(aTHX_ &s, strend, &a);
2122 next_uni_uu(aTHX_ &s, strend, &b);
2123 next_uni_uu(aTHX_ &s, strend, &c);
2124 next_uni_uu(aTHX_ &s, strend, &d);
2125 hunk[0] = (char)((a << 2) | (b >> 4));
2126 hunk[1] = (char)((b << 4) | (c >> 2));
2127 hunk[2] = (char)((c << 6) | d);
2128 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2136 /* possible checksum byte */
2137 const char *skip = s+UTF8SKIP(s);
2138 if (skip < strend && *skip == '\n')
2144 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2149 len = PL_uudmap[*(U8*)s++] & 077;
2151 if (s < strend && ISUUCHAR(*s))
2152 a = PL_uudmap[*(U8*)s++] & 077;
2155 if (s < strend && ISUUCHAR(*s))
2156 b = PL_uudmap[*(U8*)s++] & 077;
2159 if (s < strend && ISUUCHAR(*s))
2160 c = PL_uudmap[*(U8*)s++] & 077;
2163 if (s < strend && ISUUCHAR(*s))
2164 d = PL_uudmap[*(U8*)s++] & 077;
2167 hunk[0] = (char)((a << 2) | (b >> 4));
2168 hunk[1] = (char)((b << 4) | (c >> 2));
2169 hunk[2] = (char)((c << 6) | d);
2170 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2175 else /* possible checksum byte */
2176 if (s + 1 < strend && s[1] == '\n')
2185 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2186 (checksum > bits_in_uv &&
2187 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2190 anv = (NV) (1 << (checksum & 15));
2191 while (checksum >= 16) {
2195 while (cdouble < 0.0)
2197 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2198 sv = newSVnv(cdouble);
2201 if (checksum < bits_in_uv) {
2202 UV mask = ((UV)1 << checksum) - 1;
2207 XPUSHs(sv_2mortal(sv));
2211 if (symptr->flags & FLAG_SLASH){
2212 if (SP - PL_stack_base - start_sp_offset <= 0)
2213 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2214 if( next_symbol(symptr) ){
2215 if( symptr->howlen == e_number )
2216 Perl_croak(aTHX_ "Count after length/code in unpack" );
2218 /* ...end of char buffer then no decent length available */
2219 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2221 /* take top of stack (hope it's numeric) */
2224 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2227 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2229 datumtype = symptr->code;
2230 explicit_length = FALSE;
2238 return SP - PL_stack_base - start_sp_offset;
2246 I32 gimme = GIMME_V;
2249 const char *pat = SvPV_const(left, llen);
2250 const char *s = SvPV_const(right, rlen);
2251 const char *strend = s + rlen;
2252 const char *patend = pat + llen;
2256 cnt = unpackstring(pat, patend, s, strend,
2257 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2258 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2261 if ( !cnt && gimme == G_SCALAR )
2262 PUSHs(&PL_sv_undef);
2267 doencodes(U8 *h, const char *s, I32 len)
2269 *h++ = PL_uuemap[len];
2271 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2272 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2273 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2274 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2279 const char r = (len > 1 ? s[1] : '\0');
2280 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2281 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2282 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2283 *h++ = PL_uuemap[0];
2290 S_is_an_int(pTHX_ const char *s, STRLEN l)
2292 SV *result = newSVpvn(s, l);
2293 char *const result_c = SvPV_nolen(result); /* convenience */
2294 char *out = result_c;
2304 SvREFCNT_dec(result);
2327 SvREFCNT_dec(result);
2333 SvCUR_set(result, out - result_c);
2337 /* pnum must be '\0' terminated */
2339 S_div128(pTHX_ SV *pnum, bool *done)
2342 char * const s = SvPV(pnum, len);
2348 const int i = m * 10 + (*t - '0');
2349 const int r = (i >> 7); /* r < 10 */
2357 SvCUR_set(pnum, (STRLEN) (t - s));
2362 =for apidoc pack_cat
2364 The engine implementing pack() Perl function. Note: parameters next_in_list and
2365 flags are not used. This call should not be used; use packlist instead.
2371 Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2374 PERL_UNUSED_ARG(next_in_list);
2375 PERL_UNUSED_ARG(flags);
2377 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2379 (void)pack_rec( cat, &sym, beglist, endlist );
2384 =for apidoc packlist
2386 The engine implementing pack() Perl function.
2392 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2398 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2400 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2401 Also make sure any UTF8 flag is loaded */
2402 SvPV_force(cat, no_len);
2404 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2406 (void)pack_rec( cat, &sym, beglist, endlist );
2409 /* like sv_utf8_upgrade, but also repoint the group start markers */
2411 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2414 const char *from_ptr, *from_start, *from_end, **marks, **m;
2415 char *to_start, *to_ptr;
2417 if (SvUTF8(sv)) return;
2419 from_start = SvPVX_const(sv);
2420 from_end = from_start + SvCUR(sv);
2421 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2422 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2423 if (from_ptr == from_end) {
2424 /* Simple case: no character needs to be changed */
2429 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2430 Newx(to_start, len, char);
2431 Copy(from_start, to_start, from_ptr-from_start, char);
2432 to_ptr = to_start + (from_ptr-from_start);
2434 Newx(marks, sym_ptr->level+2, const char *);
2435 for (group=sym_ptr; group; group = group->previous)
2436 marks[group->level] = from_start + group->strbeg;
2437 marks[sym_ptr->level+1] = from_end+1;
2438 for (m = marks; *m < from_ptr; m++)
2439 *m = to_start + (*m-from_start);
2441 for (;from_ptr < from_end; from_ptr++) {
2442 while (*m == from_ptr) *m++ = to_ptr;
2443 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2447 while (*m == from_ptr) *m++ = to_ptr;
2448 if (m != marks + sym_ptr->level+1) {
2451 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2453 for (group=sym_ptr; group; group = group->previous)
2454 group->strbeg = marks[group->level] - to_start;
2459 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2460 from_start -= SvIVX(sv);
2463 SvFLAGS(sv) &= ~SVf_OOK;
2466 Safefree(from_start);
2467 SvPV_set(sv, to_start);
2468 SvCUR_set(sv, to_ptr - to_start);
2473 /* Exponential string grower. Makes string extension effectively O(n)
2474 needed says how many extra bytes we need (not counting the final '\0')
2475 Only grows the string if there is an actual lack of space
2478 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2479 const STRLEN cur = SvCUR(sv);
2480 const STRLEN len = SvLEN(sv);
2482 if (len - cur > needed) return SvPVX(sv);
2483 extend = needed > len ? needed : len;
2484 return SvGROW(sv, len+extend+1);
2489 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2492 tempsym_t lookahead;
2493 I32 items = endlist - beglist;
2494 bool found = next_symbol(symptr);
2495 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2496 bool warn_utf8 = ckWARN(WARN_UTF8);
2498 if (symptr->level == 0 && found && symptr->code == 'U') {
2499 marked_upgrade(aTHX_ cat, symptr);
2500 symptr->flags |= FLAG_DO_UTF8;
2503 symptr->strbeg = SvCUR(cat);
2509 SV *lengthcode = NULL;
2510 I32 datumtype = symptr->code;
2511 howlen_t howlen = symptr->howlen;
2512 char *start = SvPVX(cat);
2513 char *cur = start + SvCUR(cat);
2515 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2519 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2523 /* e_no_len and e_number */
2524 len = symptr->length;
2529 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2531 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2532 /* We can process this letter. */
2533 STRLEN size = props & PACK_SIZE_MASK;
2534 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2538 /* Look ahead for next symbol. Do we have code/code? */
2539 lookahead = *symptr;
2540 found = next_symbol(&lookahead);
2541 if (symptr->flags & FLAG_SLASH) {
2543 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2544 if (strchr("aAZ", lookahead.code)) {
2545 if (lookahead.howlen == e_number) count = lookahead.length;
2548 if (SvGAMAGIC(*beglist)) {
2549 /* Avoid reading the active data more than once
2550 by copying it to a temporary. */
2552 const char *const pv = SvPV_const(*beglist, len);
2553 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2554 if (SvUTF8(*beglist))
2558 count = DO_UTF8(*beglist) ?
2559 sv_len_utf8(*beglist) : sv_len(*beglist);
2562 if (lookahead.code == 'Z') count++;
2565 if (lookahead.howlen == e_number && lookahead.length < items)
2566 count = lookahead.length;
2569 lookahead.howlen = e_number;
2570 lookahead.length = count;
2571 lengthcode = sv_2mortal(newSViv(count));
2574 /* Code inside the switch must take care to properly update
2575 cat (CUR length and '\0' termination) if it updated *cur and
2576 doesn't simply leave using break */
2577 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2579 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2580 (int) TYPE_NO_MODIFIERS(datumtype));
2582 Perl_croak(aTHX_ "'%%' may not be used in pack");
2585 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2586 case '.' | TYPE_IS_SHRIEKING:
2589 if (howlen == e_star) from = start;
2590 else if (len == 0) from = cur;
2592 tempsym_t *group = symptr;
2594 while (--len && group) group = group->previous;
2595 from = group ? start + group->strbeg : start;
2598 len = SvIV(fromstr);
2600 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2601 case '@' | TYPE_IS_SHRIEKING:
2604 from = start + symptr->strbeg;
2606 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2607 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2608 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2612 while (len && from < cur) {
2613 from += UTF8SKIP(from);
2617 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2619 /* Here we know from == cur */
2621 GROWING(0, cat, start, cur, len);
2622 Zero(cur, len, char);
2624 } else if (from < cur) {
2627 } else goto no_change;
2635 if (len > 0) goto grow;
2636 if (len == 0) goto no_change;
2643 tempsym_t savsym = *symptr;
2644 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2645 symptr->flags |= group_modifiers;
2646 symptr->patend = savsym.grpend;
2648 symptr->previous = &lookahead;
2651 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2652 else symptr->flags &= ~FLAG_PARSE_UTF8;
2653 was_utf8 = SvUTF8(cat);
2654 symptr->patptr = savsym.grpbeg;
2655 beglist = pack_rec(cat, symptr, beglist, endlist);
2656 if (SvUTF8(cat) != was_utf8)
2657 /* This had better be an upgrade while in utf8==0 mode */
2660 if (savsym.howlen == e_star && beglist == endlist)
2661 break; /* No way to continue */
2663 lookahead.flags = symptr->flags & ~group_modifiers;
2666 case 'X' | TYPE_IS_SHRIEKING:
2667 if (!len) /* Avoid division by 0 */
2674 hop += UTF8SKIP(hop);
2681 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2685 len = (cur-start) % len;
2689 if (len < 1) goto no_change;
2693 Perl_croak(aTHX_ "'%c' outside of string in pack",
2694 (int) TYPE_NO_MODIFIERS(datumtype));
2695 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2697 Perl_croak(aTHX_ "'%c' outside of string in pack",
2698 (int) TYPE_NO_MODIFIERS(datumtype));
2704 if (cur - start < len)
2705 Perl_croak(aTHX_ "'%c' outside of string in pack",
2706 (int) TYPE_NO_MODIFIERS(datumtype));
2709 if (cur < start+symptr->strbeg) {
2710 /* Make sure group starts don't point into the void */
2712 const STRLEN length = cur-start;
2713 for (group = symptr;
2714 group && length < group->strbeg;
2715 group = group->previous) group->strbeg = length;
2716 lookahead.strbeg = length;
2719 case 'x' | TYPE_IS_SHRIEKING: {
2721 if (!len) /* Avoid division by 0 */
2723 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2724 else ai32 = (cur - start) % len;
2725 if (ai32 == 0) goto no_change;
2737 aptr = SvPV_const(fromstr, fromlen);
2738 if (DO_UTF8(fromstr)) {
2739 const char *end, *s;
2741 if (!utf8 && !SvUTF8(cat)) {
2742 marked_upgrade(aTHX_ cat, symptr);
2743 lookahead.flags |= FLAG_DO_UTF8;
2744 lookahead.strbeg = symptr->strbeg;
2747 cur = start + SvCUR(cat);
2749 if (howlen == e_star) {
2750 if (utf8) goto string_copy;
2754 end = aptr + fromlen;
2755 fromlen = datumtype == 'Z' ? len-1 : len;
2756 while ((I32) fromlen > 0 && s < end) {
2761 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2764 if (datumtype == 'Z') len++;
2770 fromlen = len - fromlen;
2771 if (datumtype == 'Z') fromlen--;
2772 if (howlen == e_star) {
2774 if (datumtype == 'Z') len++;
2776 GROWING(0, cat, start, cur, len);
2777 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2778 datumtype | TYPE_IS_PACK))
2779 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2783 if (howlen == e_star) {
2785 if (datumtype == 'Z') len++;
2787 if (len <= (I32) fromlen) {
2789 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2791 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2793 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2794 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2796 while (fromlen > 0) {
2797 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2803 if (howlen == e_star) {
2805 if (datumtype == 'Z') len++;
2807 if (len <= (I32) fromlen) {
2809 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2811 GROWING(0, cat, start, cur, len);
2812 Copy(aptr, cur, fromlen, char);
2816 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2822 const char *str, *end;
2829 str = SvPV_const(fromstr, fromlen);
2830 end = str + fromlen;
2831 if (DO_UTF8(fromstr)) {
2833 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2835 utf8_source = FALSE;
2836 utf8_flags = 0; /* Unused, but keep compilers happy */
2838 if (howlen == e_star) len = fromlen;
2839 field_len = (len+7)/8;
2840 GROWING(utf8, cat, start, cur, field_len);
2841 if (len > (I32)fromlen) len = fromlen;
2844 if (datumtype == 'B')
2848 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2850 } else bits |= *str++ & 1;
2851 if (l & 7) bits <<= 1;
2853 PUSH_BYTE(utf8, cur, bits);
2858 /* datumtype == 'b' */
2862 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2863 if (val & 1) bits |= 0x80;
2864 } else if (*str++ & 1)
2866 if (l & 7) bits >>= 1;
2868 PUSH_BYTE(utf8, cur, bits);
2874 if (datumtype == 'B')
2875 bits <<= 7 - (l & 7);
2877 bits >>= 7 - (l & 7);
2878 PUSH_BYTE(utf8, cur, bits);
2881 /* Determine how many chars are left in the requested field */
2883 if (howlen == e_star) field_len = 0;
2884 else field_len -= l;
2885 Zero(cur, field_len, char);
2891 const char *str, *end;
2898 str = SvPV_const(fromstr, fromlen);
2899 end = str + fromlen;
2900 if (DO_UTF8(fromstr)) {
2902 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2904 utf8_source = FALSE;
2905 utf8_flags = 0; /* Unused, but keep compilers happy */
2907 if (howlen == e_star) len = fromlen;
2908 field_len = (len+1)/2;
2909 GROWING(utf8, cat, start, cur, field_len);
2910 if (!utf8 && len > (I32)fromlen) len = fromlen;
2913 if (datumtype == 'H')
2917 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2918 if (val < 256 && isALPHA(val))
2919 bits |= (val + 9) & 0xf;
2922 } else if (isALPHA(*str))
2923 bits |= (*str++ + 9) & 0xf;
2925 bits |= *str++ & 0xf;
2926 if (l & 1) bits <<= 4;
2928 PUSH_BYTE(utf8, cur, bits);
2936 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2937 if (val < 256 && isALPHA(val))
2938 bits |= ((val + 9) & 0xf) << 4;
2940 bits |= (val & 0xf) << 4;
2941 } else if (isALPHA(*str))
2942 bits |= ((*str++ + 9) & 0xf) << 4;
2944 bits |= (*str++ & 0xf) << 4;
2945 if (l & 1) bits >>= 4;
2947 PUSH_BYTE(utf8, cur, bits);
2953 PUSH_BYTE(utf8, cur, bits);
2956 /* Determine how many chars are left in the requested field */
2958 if (howlen == e_star) field_len = 0;
2959 else field_len -= l;
2960 Zero(cur, field_len, char);
2968 aiv = SvIV(fromstr);
2969 if ((-128 > aiv || aiv > 127) &&
2971 Perl_warner(aTHX_ packWARN(WARN_PACK),
2972 "Character in 'c' format wrapped in pack");
2973 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2978 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2981 GROWING(0, cat, start, cur, len);
2985 aiv = SvIV(fromstr);
2986 if ((0 > aiv || aiv > 0xff) &&
2988 Perl_warner(aTHX_ packWARN(WARN_PACK),
2989 "Character in 'C' format wrapped in pack");
2990 *cur++ = (char)(aiv & 0xff);
2995 U8 in_bytes = IN_BYTES;
2997 end = start+SvLEN(cat)-1;
2998 if (utf8) end -= UTF8_MAXLEN-1;
3002 auv = SvUV(fromstr);
3003 if (in_bytes) auv = auv % 0x100;
3008 SvCUR_set(cat, cur - start);
3010 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3011 end = start+SvLEN(cat)-UTF8_MAXLEN;
3013 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3016 0 : UNICODE_ALLOW_ANY);
3021 SvCUR_set(cat, cur - start);
3022 marked_upgrade(aTHX_ cat, symptr);
3023 lookahead.flags |= FLAG_DO_UTF8;
3024 lookahead.strbeg = symptr->strbeg;
3027 cur = start + SvCUR(cat);
3028 end = start+SvLEN(cat)-UTF8_MAXLEN;
3031 if (ckWARN(WARN_PACK))
3032 Perl_warner(aTHX_ packWARN(WARN_PACK),
3033 "Character in 'W' format wrapped in pack");
3038 SvCUR_set(cat, cur - start);
3039 GROWING(0, cat, start, cur, len+1);
3040 end = start+SvLEN(cat)-1;
3042 *(U8 *) cur++ = (U8)auv;
3051 if (!(symptr->flags & FLAG_DO_UTF8)) {
3052 marked_upgrade(aTHX_ cat, symptr);
3053 lookahead.flags |= FLAG_DO_UTF8;
3054 lookahead.strbeg = symptr->strbeg;
3060 end = start+SvLEN(cat);
3061 if (!utf8) end -= UTF8_MAXLEN;
3065 auv = SvUV(fromstr);
3067 U8 buffer[UTF8_MAXLEN], *endb;
3068 endb = uvuni_to_utf8_flags(buffer, auv,
3070 0 : UNICODE_ALLOW_ANY);
3071 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3073 SvCUR_set(cat, cur - start);
3074 GROWING(0, cat, start, cur,
3075 len+(endb-buffer)*UTF8_EXPAND);
3076 end = start+SvLEN(cat);
3078 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3082 SvCUR_set(cat, cur - start);
3083 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3084 end = start+SvLEN(cat)-UTF8_MAXLEN;
3086 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3088 0 : UNICODE_ALLOW_ANY);
3093 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3099 anv = SvNV(fromstr);
3101 /* VOS does not automatically map a floating-point overflow
3102 during conversion from double to float into infinity, so we
3103 do it by hand. This code should either be generalized for
3104 any OS that needs it, or removed if and when VOS implements
3105 posix-976 (suggestion to support mapping to infinity).
3106 Paul.Green@stratus.com 02-04-02. */
3108 afloat = _float_constants[0]; /* single prec. inf. */
3109 else if (anv < -FLT_MAX)
3110 afloat = _float_constants[0]; /* single prec. inf. */
3111 else afloat = (float) anv;
3113 # if defined(VMS) && !defined(__IEEE_FP)
3114 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3115 * on Alpha; fake it if we don't have them.
3119 else if (anv < -FLT_MAX)
3121 else afloat = (float)anv;
3123 afloat = (float)anv;
3125 #endif /* __VOS__ */
3126 DO_BO_PACK_N(afloat, float);
3127 PUSH_VAR(utf8, cur, afloat);
3135 anv = SvNV(fromstr);
3137 /* VOS does not automatically map a floating-point overflow
3138 during conversion from long double to double into infinity,
3139 so we do it by hand. This code should either be generalized
3140 for any OS that needs it, or removed if and when VOS
3141 implements posix-976 (suggestion to support mapping to
3142 infinity). Paul.Green@stratus.com 02-04-02. */
3144 adouble = _double_constants[0]; /* double prec. inf. */
3145 else if (anv < -DBL_MAX)
3146 adouble = _double_constants[0]; /* double prec. inf. */
3147 else adouble = (double) anv;
3149 # if defined(VMS) && !defined(__IEEE_FP)
3150 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3151 * on Alpha; fake it if we don't have them.
3155 else if (anv < -DBL_MAX)
3157 else adouble = (double)anv;
3159 adouble = (double)anv;
3161 #endif /* __VOS__ */
3162 DO_BO_PACK_N(adouble, double);
3163 PUSH_VAR(utf8, cur, adouble);
3168 Zero(&anv, 1, NV); /* can be long double with unused bits */
3171 anv = SvNV(fromstr);
3172 DO_BO_PACK_N(anv, NV);
3173 PUSH_VAR(utf8, cur, anv);
3177 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3179 long double aldouble;
3180 /* long doubles can have unused bits, which may be nonzero */
3181 Zero(&aldouble, 1, long double);
3184 aldouble = (long double)SvNV(fromstr);
3185 DO_BO_PACK_N(aldouble, long double);
3186 PUSH_VAR(utf8, cur, aldouble);
3191 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3192 case 'n' | TYPE_IS_SHRIEKING:
3198 ai16 = (I16)SvIV(fromstr);
3200 ai16 = PerlSock_htons(ai16);
3202 PUSH16(utf8, cur, &ai16);
3205 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3206 case 'v' | TYPE_IS_SHRIEKING:
3212 ai16 = (I16)SvIV(fromstr);
3216 PUSH16(utf8, cur, &ai16);
3219 case 'S' | TYPE_IS_SHRIEKING:
3220 #if SHORTSIZE != SIZE16
3222 unsigned short aushort;
3224 aushort = SvUV(fromstr);
3225 DO_BO_PACK(aushort, s);
3226 PUSH_VAR(utf8, cur, aushort);
3236 au16 = (U16)SvUV(fromstr);
3237 DO_BO_PACK(au16, 16);
3238 PUSH16(utf8, cur, &au16);
3241 case 's' | TYPE_IS_SHRIEKING:
3242 #if SHORTSIZE != SIZE16
3246 ashort = SvIV(fromstr);
3247 DO_BO_PACK(ashort, s);
3248 PUSH_VAR(utf8, cur, ashort);
3258 ai16 = (I16)SvIV(fromstr);
3259 DO_BO_PACK(ai16, 16);
3260 PUSH16(utf8, cur, &ai16);
3264 case 'I' | TYPE_IS_SHRIEKING:
3268 auint = SvUV(fromstr);
3269 DO_BO_PACK(auint, i);
3270 PUSH_VAR(utf8, cur, auint);
3277 aiv = SvIV(fromstr);
3278 #if IVSIZE == INTSIZE
3280 #elif IVSIZE == LONGSIZE
3282 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3283 DO_BO_PACK(aiv, 64);
3285 Perl_croak(aTHX_ "'j' not supported on this platform");
3287 PUSH_VAR(utf8, cur, aiv);
3294 auv = SvUV(fromstr);
3295 #if UVSIZE == INTSIZE
3297 #elif UVSIZE == LONGSIZE
3299 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3300 DO_BO_PACK(auv, 64);
3302 Perl_croak(aTHX_ "'J' not supported on this platform");
3304 PUSH_VAR(utf8, cur, auv);
3311 anv = SvNV(fromstr);
3315 SvCUR_set(cat, cur - start);
3316 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3319 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3320 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3321 any negative IVs will have already been got by the croak()
3322 above. IOK is untrue for fractions, so we test them
3323 against UV_MAX_P1. */
3324 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3325 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3326 char *in = buf + sizeof(buf);
3327 UV auv = SvUV(fromstr);
3330 *--in = (char)((auv & 0x7f) | 0x80);
3333 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3334 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3335 in, (buf + sizeof(buf)) - in);
3336 } else if (SvPOKp(fromstr))
3338 else if (SvNOKp(fromstr)) {
3339 /* 10**NV_MAX_10_EXP is the largest power of 10
3340 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3341 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3342 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3343 And with that many bytes only Inf can overflow.
3344 Some C compilers are strict about integral constant
3345 expressions so we conservatively divide by a slightly
3346 smaller integer instead of multiplying by the exact
3347 floating-point value.
3349 #ifdef NV_MAX_10_EXP
3350 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3351 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3353 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3354 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3356 char *in = buf + sizeof(buf);
3358 anv = Perl_floor(anv);
3360 const NV next = Perl_floor(anv / 128);
3361 if (in <= buf) /* this cannot happen ;-) */
3362 Perl_croak(aTHX_ "Cannot compress integer in pack");
3363 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3366 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3367 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3368 in, (buf + sizeof(buf)) - in);
3377 /* Copy string and check for compliance */
3378 from = SvPV_const(fromstr, len);
3379 if ((norm = is_an_int(from, len)) == NULL)
3380 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3382 Newx(result, len, char);
3385 while (!done) *--in = div128(norm, &done) | 0x80;
3386 result[len - 1] &= 0x7F; /* clear continue bit */
3387 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3388 in, (result + len) - in);
3390 SvREFCNT_dec(norm); /* free norm */
3395 case 'i' | TYPE_IS_SHRIEKING:
3399 aint = SvIV(fromstr);
3400 DO_BO_PACK(aint, i);
3401 PUSH_VAR(utf8, cur, aint);
3404 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3405 case 'N' | TYPE_IS_SHRIEKING:
3411 au32 = SvUV(fromstr);
3413 au32 = PerlSock_htonl(au32);
3415 PUSH32(utf8, cur, &au32);
3418 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3419 case 'V' | TYPE_IS_SHRIEKING:
3425 au32 = SvUV(fromstr);
3429 PUSH32(utf8, cur, &au32);
3432 case 'L' | TYPE_IS_SHRIEKING:
3433 #if LONGSIZE != SIZE32
3435 unsigned long aulong;
3437 aulong = SvUV(fromstr);
3438 DO_BO_PACK(aulong, l);
3439 PUSH_VAR(utf8, cur, aulong);
3449 au32 = SvUV(fromstr);
3450 DO_BO_PACK(au32, 32);
3451 PUSH32(utf8, cur, &au32);
3454 case 'l' | TYPE_IS_SHRIEKING:
3455 #if LONGSIZE != SIZE32
3459 along = SvIV(fromstr);
3460 DO_BO_PACK(along, l);
3461 PUSH_VAR(utf8, cur, along);
3471 ai32 = SvIV(fromstr);
3472 DO_BO_PACK(ai32, 32);
3473 PUSH32(utf8, cur, &ai32);
3481 auquad = (Uquad_t) SvUV(fromstr);
3482 DO_BO_PACK(auquad, 64);
3483 PUSH_VAR(utf8, cur, auquad);
3490 aquad = (Quad_t)SvIV(fromstr);
3491 DO_BO_PACK(aquad, 64);
3492 PUSH_VAR(utf8, cur, aquad);
3495 #endif /* HAS_QUAD */
3497 len = 1; /* assume SV is correct length */
3498 GROWING(utf8, cat, start, cur, sizeof(char *));
3505 SvGETMAGIC(fromstr);
3506 if (!SvOK(fromstr)) aptr = NULL;
3508 /* XXX better yet, could spirit away the string to
3509 * a safe spot and hang on to it until the result
3510 * of pack() (and all copies of the result) are
3513 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3514 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3515 Perl_warner(aTHX_ packWARN(WARN_PACK),
3516 "Attempt to pack pointer to temporary value");
3518 if (SvPOK(fromstr) || SvNIOK(fromstr))
3519 aptr = SvPV_nomg_const_nolen(fromstr);
3521 aptr = SvPV_force_flags_nolen(fromstr, 0);
3523 DO_BO_PACK_PC(aptr);
3524 PUSH_VAR(utf8, cur, aptr);
3528 const char *aptr, *aend;
3532 if (len <= 2) len = 45;
3533 else len = len / 3 * 3;
3535 if (ckWARN(WARN_PACK))
3536 Perl_warner(aTHX_ packWARN(WARN_PACK),
3537 "Field too wide in 'u' format in pack");
3540 aptr = SvPV_const(fromstr, fromlen);
3541 from_utf8 = DO_UTF8(fromstr);
3543 aend = aptr + fromlen;
3544 fromlen = sv_len_utf8(fromstr);
3545 } else aend = NULL; /* Unused, but keep compilers happy */
3546 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3547 while (fromlen > 0) {
3550 U8 hunk[1+63/3*4+1];
3552 if ((I32)fromlen > len)
3558 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3559 'u' | TYPE_IS_PACK)) {
3561 SvCUR_set(cat, cur - start);
3562 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3564 end = doencodes(hunk, buffer, todo);
3566 end = doencodes(hunk, aptr, todo);
3569 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3576 SvCUR_set(cat, cur - start);
3578 *symptr = lookahead;
3587 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3588 register SV *cat = TARG;
3590 SV *pat_sv = *++MARK;
3591 register const char *pat = SvPV_const(pat_sv, fromlen);
3592 register const char *patend = pat + fromlen;
3595 sv_setpvn(cat, "", 0);
3598 packlist(cat, pat, patend, MARK, SP + 1);
3608 * c-indentation-style: bsd
3610 * indent-tabs-mode: t
3613 * ex: set ts=8 sts=4 sw=4 noet: