3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV * const tmpNew = newSVpvs("0000000000");
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
243 # define DO_BO_UNPACK_PC(var)
244 # define DO_BO_PACK_PC(var)
246 #else /* PERL_PACK_CAN_BYTEORDER */
248 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
251 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
253 # define DO_BO_UNPACK(var, type) \
255 switch (TYPE_ENDIANNESS(datumtype)) { \
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
262 # define DO_BO_PACK(var, type) \
264 switch (TYPE_ENDIANNESS(datumtype)) { \
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
271 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: \
275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
277 case TYPE_IS_LITTLE_ENDIAN: \
278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
285 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
287 switch (TYPE_ENDIANNESS(datumtype)) { \
288 case TYPE_IS_BIG_ENDIAN: \
289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
291 case TYPE_IS_LITTLE_ENDIAN: \
292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
299 # define BO_CANT_DOIT(action, type) \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
315 # if PTRSIZE == INTSIZE
316 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
320 # elif PTRSIZE == LONGSIZE
321 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
325 # elif PTRSIZE == IVSIZE
326 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
331 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
333 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
337 # if defined(my_htolen) && defined(my_letohn) && \
338 defined(my_htoben) && defined(my_betohn)
339 # define DO_BO_UNPACK_N(var, type) \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
348 # define DO_BO_PACK_N(var, type) \
350 switch (TYPE_ENDIANNESS(datumtype)) { \
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
357 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
361 #endif /* PERL_PACK_CAN_BYTEORDER */
363 #define PACK_SIZE_CANNOT_CSUM 0x80
364 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
365 #define PACK_SIZE_MASK 0x3F
367 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
370 #if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
374 typedef U8 packprops_t;
377 STATIC const packprops_t packprops[512] = {
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
385 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
386 /* D */ LONG_DOUBLESIZE,
393 /* I */ sizeof(unsigned int),
400 #if defined(HAS_QUAD)
401 /* Q */ sizeof(Uquad_t),
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
415 /* f */ sizeof(float),
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
425 #if defined(HAS_QUAD)
426 /* q */ sizeof(Quad_t),
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
452 /* L */ sizeof(unsigned long),
454 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
460 /* S */ sizeof(unsigned short),
462 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 /* l */ sizeof(long),
473 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
479 /* s */ sizeof(short),
481 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
497 /* EBCDIC (or bust) */
498 STATIC const packprops_t packprops[512] = {
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
512 /* f */ sizeof(float),
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
523 #if defined(HAS_QUAD)
524 /* q */ sizeof(Quad_t),
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
536 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
537 /* D */ LONG_DOUBLESIZE,
544 /* I */ sizeof(unsigned int),
552 #if defined(HAS_QUAD)
553 /* Q */ sizeof(Uquad_t),
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
579 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
587 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
599 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
607 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
618 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
630 if (ckWARN(WARN_UNPACK))
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
633 (int) TYPE_NO_MODIFIERS(datumtype));
640 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
645 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
649 const char *from = *s;
651 const U32 flags = ckWARN(WARN_UTF8) ?
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
659 } else from += retlen;
664 *(U8 *)buf++ = (U8)val;
666 /* We have enough characters for the buffer. Did we have problems ? */
669 /* Rewalk the string fragment while warning */
671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
676 if (from > end) from = end;
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
682 (int) TYPE_NO_MODIFIERS(datumtype),
683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
690 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
700 *out = PL_uudmap[val] & 077;
706 S_bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char *dest) {
707 const U8 * const end = start + len;
709 while (start < end) {
710 U8 buffer[UTF8_MAXLEN];
712 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
722 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
730 #define PUSH_BYTES(utf8, cur, buf, len) \
733 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
735 Copy(buf, cur, len, char); \
740 #define GROWING(utf8, cat, start, cur, in_len) \
742 STRLEN glen = (in_len); \
743 if (utf8) glen *= UTF8_EXPAND; \
744 if ((cur) + glen >= (start) + SvLEN(cat)) { \
745 (start) = sv_exp_grow(cat, glen); \
746 (cur) = (start) + SvCUR(cat); \
750 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
752 const STRLEN glen = (in_len); \
754 if (utf8) gl *= UTF8_EXPAND; \
755 if ((cur) + gl >= (start) + SvLEN(cat)) { \
757 SvCUR_set((cat), (cur) - (start)); \
758 (start) = sv_exp_grow(cat, gl); \
759 (cur) = (start) + SvCUR(cat); \
761 PUSH_BYTES(utf8, cur, buf, glen); \
764 #define PUSH_BYTE(utf8, s, byte) \
767 const U8 au8 = (byte); \
768 (s) = bytes_to_uni(&au8, 1, (s)); \
769 } else *(U8 *)(s)++ = (byte); \
772 /* Only to be used inside a loop (see the break) */
773 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
776 if (str >= end) break; \
777 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
778 if (retlen == (STRLEN) -1 || retlen == 0) { \
780 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
785 static const char *_action( const tempsym_t* symptr )
787 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
790 /* Returns the sizeof() struct described by pat */
792 S_measure_struct(pTHX_ tempsym_t* symptr)
796 while (next_symbol(symptr)) {
800 switch (symptr->howlen) {
802 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
806 /* e_no_len and e_number */
807 len = symptr->length;
811 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
814 /* endianness doesn't influence the size of a type */
815 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
817 Perl_croak(aTHX_ "Invalid type '%c' in %s",
818 (int)TYPE_NO_MODIFIERS(symptr->code),
820 #ifdef PERL_PACK_CAN_SHRIEKSIGN
821 case '.' | TYPE_IS_SHRIEKING:
822 case '@' | TYPE_IS_SHRIEKING:
827 case 'U': /* XXXX Is it correct? */
830 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
831 (int) TYPE_NO_MODIFIERS(symptr->code),
838 tempsym_t savsym = *symptr;
839 symptr->patptr = savsym.grpbeg;
840 symptr->patend = savsym.grpend;
841 /* XXXX Theoretically, we need to measure many times at
842 different positions, since the subexpression may contain
843 alignment commands, but be not of aligned length.
844 Need to detect this and croak(). */
845 size = measure_struct(symptr);
849 case 'X' | TYPE_IS_SHRIEKING:
850 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
852 if (!len) /* Avoid division by 0 */
854 len = total % len; /* Assumed: the start is aligned. */
859 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
861 case 'x' | TYPE_IS_SHRIEKING:
862 if (!len) /* Avoid division by 0 */
864 star = total % len; /* Assumed: the start is aligned. */
865 if (star) /* Other portable ways? */
889 size = sizeof(char*);
899 /* locate matching closing parenthesis or bracket
900 * returns char pointer to char after match, or NULL
903 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
905 while (patptr < patend) {
906 const char c = *patptr++;
913 while (patptr < patend && *patptr != '\n')
917 patptr = group_end(patptr, patend, ')') + 1;
919 patptr = group_end(patptr, patend, ']') + 1;
921 Perl_croak(aTHX_ "No group ending character '%c' found in template",
927 /* Convert unsigned decimal number to binary.
928 * Expects a pointer to the first digit and address of length variable
929 * Advances char pointer to 1st non-digit char and returns number
932 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
934 I32 len = *patptr++ - '0';
935 while (isDIGIT(*patptr)) {
936 if (len >= 0x7FFFFFFF/10)
937 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
938 len = (len * 10) + (*patptr++ - '0');
944 /* The marvellous template parsing routine: Using state stored in *symptr,
945 * locates next template code and count
948 S_next_symbol(pTHX_ tempsym_t* symptr )
950 const char* patptr = symptr->patptr;
951 const char* const patend = symptr->patend;
953 symptr->flags &= ~FLAG_SLASH;
955 while (patptr < patend) {
956 if (isSPACE(*patptr))
958 else if (*patptr == '#') {
960 while (patptr < patend && *patptr != '\n')
965 /* We should have found a template code */
966 I32 code = *patptr++ & 0xFF;
967 U32 inherited_modifiers = 0;
969 if (code == ','){ /* grandfather in commas but with a warning */
970 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
971 symptr->flags |= FLAG_COMMA;
972 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
973 "Invalid type ',' in %s", _action( symptr ) );
978 /* for '(', skip to ')' */
980 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
981 Perl_croak(aTHX_ "()-group starts with a count in %s",
983 symptr->grpbeg = patptr;
984 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
985 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
986 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
990 /* look for group modifiers to inherit */
991 if (TYPE_ENDIANNESS(symptr->flags)) {
992 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
993 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
996 /* look for modifiers */
997 while (patptr < patend) {
1002 modifier = TYPE_IS_SHRIEKING;
1003 allowed = SHRIEKING_ALLOWED_TYPES;
1005 #ifdef PERL_PACK_CAN_BYTEORDER
1007 modifier = TYPE_IS_BIG_ENDIAN;
1008 allowed = ENDIANNESS_ALLOWED_TYPES;
1011 modifier = TYPE_IS_LITTLE_ENDIAN;
1012 allowed = ENDIANNESS_ALLOWED_TYPES;
1014 #endif /* PERL_PACK_CAN_BYTEORDER */
1024 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1025 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1026 allowed, _action( symptr ) );
1028 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1029 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1030 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1031 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1032 TYPE_ENDIANNESS_MASK)
1033 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1034 *patptr, _action( symptr ) );
1036 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1037 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1038 "Duplicate modifier '%c' after '%c' in %s",
1039 *patptr, (int) TYPE_NO_MODIFIERS(code),
1040 _action( symptr ) );
1047 /* inherit modifiers */
1048 code |= inherited_modifiers;
1050 /* look for count and/or / */
1051 if (patptr < patend) {
1052 if (isDIGIT(*patptr)) {
1053 patptr = get_num( patptr, &symptr->length );
1054 symptr->howlen = e_number;
1056 } else if (*patptr == '*') {
1058 symptr->howlen = e_star;
1060 } else if (*patptr == '[') {
1061 const char* lenptr = ++patptr;
1062 symptr->howlen = e_number;
1063 patptr = group_end( patptr, patend, ']' ) + 1;
1064 /* what kind of [] is it? */
1065 if (isDIGIT(*lenptr)) {
1066 lenptr = get_num( lenptr, &symptr->length );
1067 if( *lenptr != ']' )
1068 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1069 _action( symptr ) );
1071 tempsym_t savsym = *symptr;
1072 symptr->patend = patptr-1;
1073 symptr->patptr = lenptr;
1074 savsym.length = measure_struct(symptr);
1078 symptr->howlen = e_no_len;
1083 while (patptr < patend) {
1084 if (isSPACE(*patptr))
1086 else if (*patptr == '#') {
1088 while (patptr < patend && *patptr != '\n')
1090 if (patptr < patend)
1093 if (*patptr == '/') {
1094 symptr->flags |= FLAG_SLASH;
1096 if (patptr < patend &&
1097 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1098 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1099 _action( symptr ) );
1105 /* at end - no count, no / */
1106 symptr->howlen = e_no_len;
1110 symptr->code = code;
1111 symptr->patptr = patptr;
1115 symptr->patptr = patptr;
1120 There is no way to cleanly handle the case where we should process the
1121 string per byte in its upgraded form while it's really in downgraded form
1122 (e.g. estimates like strend-s as an upper bound for the number of
1123 characters left wouldn't work). So if we foresee the need of this
1124 (pattern starts with U or contains U0), we want to work on the encoded
1125 version of the string. Users are advised to upgrade their pack string
1126 themselves if they need to do a lot of unpacks like this on it
1129 need_utf8(const char *pat, const char *patend)
1132 while (pat < patend) {
1133 if (pat[0] == '#') {
1135 pat = (const char *) memchr(pat, '\n', patend-pat);
1136 if (!pat) return FALSE;
1137 } else if (pat[0] == 'U') {
1138 if (first || pat[1] == '0') return TRUE;
1139 } else first = FALSE;
1146 first_symbol(const char *pat, const char *patend) {
1147 while (pat < patend) {
1148 if (pat[0] != '#') return pat[0];
1150 pat = (const char *) memchr(pat, '\n', patend-pat);
1158 =for apidoc unpackstring
1160 The engine implementing unpack() Perl function. C<unpackstring> puts the
1161 extracted list items on the stack and returns the number of elements.
1162 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1167 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1171 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1172 else if (need_utf8(pat, patend)) {
1173 /* We probably should try to avoid this in case a scalar context call
1174 wouldn't get to the "U0" */
1175 STRLEN len = strend - s;
1176 s = (char *) bytes_to_utf8((U8 *) s, &len);
1179 flags |= FLAG_DO_UTF8;
1182 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1183 flags |= FLAG_PARSE_UTF8;
1185 TEMPSYM_INIT(&sym, pat, patend, flags);
1187 return unpack_rec(&sym, s, s, strend, NULL );
1192 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1196 const I32 start_sp_offset = SP - PL_stack_base;
1202 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1203 bool beyond = FALSE;
1204 bool explicit_length;
1205 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1206 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1207 symptr->strbeg = s - strbeg;
1209 while (next_symbol(symptr)) {
1212 I32 datumtype = symptr->code;
1213 /* do first one only unless in list context
1214 / is implemented by unpacking the count, then popping it from the
1215 stack, so must check that we're not in the middle of a / */
1216 if ( unpack_only_one
1217 && (SP - PL_stack_base == start_sp_offset + 1)
1218 && (datumtype != '/') ) /* XXX can this be omitted */
1221 switch (howlen = symptr->howlen) {
1223 len = strend - strbeg; /* long enough */
1226 /* e_no_len and e_number */
1227 len = symptr->length;
1231 explicit_length = TRUE;
1233 beyond = s >= strend;
1235 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1237 /* props nonzero means we can process this letter. */
1238 const long size = props & PACK_SIZE_MASK;
1239 const long howmany = (strend - s) / size;
1243 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1244 if (len && unpack_only_one) len = 1;
1250 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1252 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1255 if (howlen == e_no_len)
1256 len = 16; /* len is not specified */
1264 tempsym_t savsym = *symptr;
1265 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1266 symptr->flags |= group_modifiers;
1267 symptr->patend = savsym.grpend;
1268 symptr->previous = &savsym;
1272 symptr->patptr = savsym.grpbeg;
1273 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1274 else symptr->flags &= ~FLAG_PARSE_UTF8;
1275 unpack_rec(symptr, s, strbeg, strend, &s);
1276 if (s == strend && savsym.howlen == e_star)
1277 break; /* No way to continue */
1280 savsym.flags = symptr->flags & ~group_modifiers;
1284 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1285 case '.' | TYPE_IS_SHRIEKING:
1290 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1291 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1292 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1293 const bool u8 = utf8;
1295 if (howlen == e_star) from = strbeg;
1296 else if (len <= 0) from = s;
1298 tempsym_t *group = symptr;
1300 while (--len && group) group = group->previous;
1301 from = group ? strbeg + group->strbeg : strbeg;
1304 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1305 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1306 XPUSHs(sv_2mortal(sv));
1309 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1310 case '@' | TYPE_IS_SHRIEKING:
1313 s = strbeg + symptr->strbeg;
1314 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1315 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1316 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1322 Perl_croak(aTHX_ "'@' outside of string in unpack");
1327 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1330 Perl_croak(aTHX_ "'@' outside of string in unpack");
1334 case 'X' | TYPE_IS_SHRIEKING:
1335 if (!len) /* Avoid division by 0 */
1338 const char *hop, *last;
1340 hop = last = strbeg;
1342 hop += UTF8SKIP(hop);
1349 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1353 len = (s - strbeg) % len;
1359 Perl_croak(aTHX_ "'X' outside of string in unpack");
1360 while (--s, UTF8_IS_CONTINUATION(*s)) {
1362 Perl_croak(aTHX_ "'X' outside of string in unpack");
1367 if (len > s - strbeg)
1368 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1372 case 'x' | TYPE_IS_SHRIEKING: {
1374 if (!len) /* Avoid division by 0 */
1376 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1377 else ai32 = (s - strbeg) % len;
1378 if (ai32 == 0) break;
1386 Perl_croak(aTHX_ "'x' outside of string in unpack");
1391 if (len > strend - s)
1392 Perl_croak(aTHX_ "'x' outside of string in unpack");
1397 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1403 /* Preliminary length estimate is assumed done in 'W' */
1404 if (len > strend - s) len = strend - s;
1410 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1411 if (hop >= strend) {
1413 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1418 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1420 } else if (len > strend - s)
1423 if (datumtype == 'Z') {
1424 /* 'Z' strips stuff after first null */
1425 const char *ptr, *end;
1427 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1428 sv = newSVpvn(s, ptr-s);
1429 if (howlen == e_star) /* exact for 'Z*' */
1430 len = ptr-s + (ptr != strend ? 1 : 0);
1431 } else if (datumtype == 'A') {
1432 /* 'A' strips both nulls and spaces */
1434 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1435 for (ptr = s+len-1; ptr >= s; ptr--)
1436 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1437 !is_utf8_space((U8 *) ptr)) break;
1438 if (ptr >= s) ptr += UTF8SKIP(ptr);
1441 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1443 for (ptr = s+len-1; ptr >= s; ptr--)
1444 if (*ptr != 0 && !isSPACE(*ptr)) break;
1447 sv = newSVpvn(s, ptr-s);
1448 } else sv = newSVpvn(s, len);
1452 /* Undo any upgrade done due to need_utf8() */
1453 if (!(symptr->flags & FLAG_WAS_UTF8))
1454 sv_utf8_downgrade(sv, 0);
1456 XPUSHs(sv_2mortal(sv));
1462 if (howlen == e_star || len > (strend - s) * 8)
1463 len = (strend - s) * 8;
1467 Newxz(PL_bitcount, 256, char);
1468 for (bits = 1; bits < 256; bits++) {
1469 if (bits & 1) PL_bitcount[bits]++;
1470 if (bits & 2) PL_bitcount[bits]++;
1471 if (bits & 4) PL_bitcount[bits]++;
1472 if (bits & 8) PL_bitcount[bits]++;
1473 if (bits & 16) PL_bitcount[bits]++;
1474 if (bits & 32) PL_bitcount[bits]++;
1475 if (bits & 64) PL_bitcount[bits]++;
1476 if (bits & 128) PL_bitcount[bits]++;
1480 while (len >= 8 && s < strend) {
1481 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1486 cuv += PL_bitcount[*(U8 *)s++];
1489 if (len && s < strend) {
1491 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1492 if (datumtype == 'b')
1494 if (bits & 1) cuv++;
1499 if (bits & 0x80) cuv++;
1506 sv = sv_2mortal(newSV(len ? len : 1));
1509 if (datumtype == 'b') {
1511 const I32 ai32 = len;
1512 for (len = 0; len < ai32; len++) {
1513 if (len & 7) bits >>= 1;
1515 if (s >= strend) break;
1516 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1517 } else bits = *(U8 *) s++;
1518 *str++ = bits & 1 ? '1' : '0';
1522 const I32 ai32 = len;
1523 for (len = 0; len < ai32; len++) {
1524 if (len & 7) bits <<= 1;
1526 if (s >= strend) break;
1527 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1528 } else bits = *(U8 *) s++;
1529 *str++ = bits & 0x80 ? '1' : '0';
1533 SvCUR_set(sv, str - SvPVX_const(sv));
1540 /* Preliminary length estimate, acceptable for utf8 too */
1541 if (howlen == e_star || len > (strend - s) * 2)
1542 len = (strend - s) * 2;
1543 sv = sv_2mortal(newSV(len ? len : 1));
1546 if (datumtype == 'h') {
1549 for (len = 0; len < ai32; len++) {
1550 if (len & 1) bits >>= 4;
1552 if (s >= strend) break;
1553 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1554 } else bits = * (U8 *) s++;
1555 *str++ = PL_hexdigit[bits & 15];
1559 const I32 ai32 = len;
1560 for (len = 0; len < ai32; len++) {
1561 if (len & 1) bits <<= 4;
1563 if (s >= strend) break;
1564 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1565 } else bits = *(U8 *) s++;
1566 *str++ = PL_hexdigit[(bits >> 4) & 15];
1570 SvCUR_set(sv, str - SvPVX_const(sv));
1576 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1577 if (aint >= 128) /* fake up signed chars */
1580 PUSHs(sv_2mortal(newSViv((IV)aint)));
1581 else if (checksum > bits_in_uv)
1582 cdouble += (NV)aint;
1591 if (explicit_length && datumtype == 'C')
1592 /* Switch to "character" mode */
1593 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1596 if (datumtype == 'C' ?
1597 (symptr->flags & FLAG_DO_UTF8) &&
1598 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1599 while (len-- > 0 && s < strend) {
1601 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1602 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1603 if (retlen == (STRLEN) -1 || retlen == 0)
1604 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1607 PUSHs(sv_2mortal(newSVuv((UV) val)));
1608 else if (checksum > bits_in_uv)
1609 cdouble += (NV) val;
1613 } else if (!checksum)
1615 const U8 ch = *(U8 *) s++;
1616 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1618 else if (checksum > bits_in_uv)
1619 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1621 while (len-- > 0) cuv += *(U8 *) s++;
1625 if (explicit_length) {
1626 /* Switch to "bytes in UTF-8" mode */
1627 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1629 /* Should be impossible due to the need_utf8() test */
1630 Perl_croak(aTHX_ "U0 mode on a byte string");
1634 if (len > strend - s) len = strend - s;
1636 if (len && unpack_only_one) len = 1;
1640 while (len-- > 0 && s < strend) {
1644 U8 result[UTF8_MAXLEN];
1645 const char *ptr = s;
1647 /* Bug: warns about bad utf8 even if we are short on bytes
1648 and will break out of the loop */
1649 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1652 len = UTF8SKIP(result);
1653 if (!uni_to_bytes(aTHX_ &ptr, strend,
1654 (char *) &result[1], len-1, 'U')) break;
1655 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1658 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1659 if (retlen == (STRLEN) -1 || retlen == 0)
1660 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1664 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1665 else if (checksum > bits_in_uv)
1666 cdouble += (NV) auv;
1671 case 's' | TYPE_IS_SHRIEKING:
1672 #if SHORTSIZE != SIZE16
1675 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1676 DO_BO_UNPACK(ashort, s);
1678 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1679 else if (checksum > bits_in_uv)
1680 cdouble += (NV)ashort;
1692 #if U16SIZE > SIZE16
1695 SHIFT16(utf8, s, strend, &ai16, datumtype);
1696 DO_BO_UNPACK(ai16, 16);
1697 #if U16SIZE > SIZE16
1702 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1703 else if (checksum > bits_in_uv)
1704 cdouble += (NV)ai16;
1709 case 'S' | TYPE_IS_SHRIEKING:
1710 #if SHORTSIZE != SIZE16
1712 unsigned short aushort;
1713 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1714 DO_BO_UNPACK(aushort, s);
1716 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1717 else if (checksum > bits_in_uv)
1718 cdouble += (NV)aushort;
1731 #if U16SIZE > SIZE16
1734 SHIFT16(utf8, s, strend, &au16, datumtype);
1735 DO_BO_UNPACK(au16, 16);
1737 if (datumtype == 'n')
1738 au16 = PerlSock_ntohs(au16);
1741 if (datumtype == 'v')
1745 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1746 else if (checksum > bits_in_uv)
1747 cdouble += (NV) au16;
1752 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1753 case 'v' | TYPE_IS_SHRIEKING:
1754 case 'n' | TYPE_IS_SHRIEKING:
1757 # if U16SIZE > SIZE16
1760 SHIFT16(utf8, s, strend, &ai16, datumtype);
1762 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1763 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1764 # endif /* HAS_NTOHS */
1766 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1767 ai16 = (I16) vtohs((U16) ai16);
1768 # endif /* HAS_VTOHS */
1770 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1771 else if (checksum > bits_in_uv)
1772 cdouble += (NV) ai16;
1777 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1779 case 'i' | TYPE_IS_SHRIEKING:
1782 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1783 DO_BO_UNPACK(aint, i);
1785 PUSHs(sv_2mortal(newSViv((IV)aint)));
1786 else if (checksum > bits_in_uv)
1787 cdouble += (NV)aint;
1793 case 'I' | TYPE_IS_SHRIEKING:
1796 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1797 DO_BO_UNPACK(auint, i);
1799 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1800 else if (checksum > bits_in_uv)
1801 cdouble += (NV)auint;
1809 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1810 #if IVSIZE == INTSIZE
1811 DO_BO_UNPACK(aiv, i);
1812 #elif IVSIZE == LONGSIZE
1813 DO_BO_UNPACK(aiv, l);
1814 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1815 DO_BO_UNPACK(aiv, 64);
1817 Perl_croak(aTHX_ "'j' not supported on this platform");
1820 PUSHs(sv_2mortal(newSViv(aiv)));
1821 else if (checksum > bits_in_uv)
1830 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1831 #if IVSIZE == INTSIZE
1832 DO_BO_UNPACK(auv, i);
1833 #elif IVSIZE == LONGSIZE
1834 DO_BO_UNPACK(auv, l);
1835 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1836 DO_BO_UNPACK(auv, 64);
1838 Perl_croak(aTHX_ "'J' not supported on this platform");
1841 PUSHs(sv_2mortal(newSVuv(auv)));
1842 else if (checksum > bits_in_uv)
1848 case 'l' | TYPE_IS_SHRIEKING:
1849 #if LONGSIZE != SIZE32
1852 SHIFT_VAR(utf8, s, strend, along, datumtype);
1853 DO_BO_UNPACK(along, l);
1855 PUSHs(sv_2mortal(newSViv((IV)along)));
1856 else if (checksum > bits_in_uv)
1857 cdouble += (NV)along;
1868 #if U32SIZE > SIZE32
1871 SHIFT32(utf8, s, strend, &ai32, datumtype);
1872 DO_BO_UNPACK(ai32, 32);
1873 #if U32SIZE > SIZE32
1874 if (ai32 > 2147483647) ai32 -= 4294967296;
1877 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1878 else if (checksum > bits_in_uv)
1879 cdouble += (NV)ai32;
1884 case 'L' | TYPE_IS_SHRIEKING:
1885 #if LONGSIZE != SIZE32
1887 unsigned long aulong;
1888 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1889 DO_BO_UNPACK(aulong, l);
1891 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1892 else if (checksum > bits_in_uv)
1893 cdouble += (NV)aulong;
1906 #if U32SIZE > SIZE32
1909 SHIFT32(utf8, s, strend, &au32, datumtype);
1910 DO_BO_UNPACK(au32, 32);
1912 if (datumtype == 'N')
1913 au32 = PerlSock_ntohl(au32);
1916 if (datumtype == 'V')
1920 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1921 else if (checksum > bits_in_uv)
1922 cdouble += (NV)au32;
1927 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1928 case 'V' | TYPE_IS_SHRIEKING:
1929 case 'N' | TYPE_IS_SHRIEKING:
1932 # if U32SIZE > SIZE32
1935 SHIFT32(utf8, s, strend, &ai32, datumtype);
1937 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1938 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1941 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1942 ai32 = (I32)vtohl((U32)ai32);
1945 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1946 else if (checksum > bits_in_uv)
1947 cdouble += (NV)ai32;
1952 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1956 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1957 DO_BO_UNPACK_PC(aptr);
1958 /* newSVpv generates undef if aptr is NULL */
1959 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1967 while (len > 0 && s < strend) {
1969 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1970 auv = (auv << 7) | (ch & 0x7f);
1971 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1974 PUSHs(sv_2mortal(newSVuv(auv)));
1979 if (++bytes >= sizeof(UV)) { /* promote to string */
1982 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1983 while (s < strend) {
1984 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1985 sv = mul128(sv, (U8)(ch & 0x7f));
1991 t = SvPV_nolen_const(sv);
1995 PUSHs(sv_2mortal(sv));
2000 if ((s >= strend) && bytes)
2001 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2005 if (symptr->howlen == e_star)
2006 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2008 if (s + sizeof(char*) <= strend) {
2010 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2011 DO_BO_UNPACK_PC(aptr);
2012 /* newSVpvn generates undef if aptr is NULL */
2013 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2020 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2021 DO_BO_UNPACK(aquad, 64);
2023 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2024 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2025 else if (checksum > bits_in_uv)
2026 cdouble += (NV)aquad;
2034 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2035 DO_BO_UNPACK(auquad, 64);
2037 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2038 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2039 else if (checksum > bits_in_uv)
2040 cdouble += (NV)auquad;
2045 #endif /* HAS_QUAD */
2046 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2050 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2051 DO_BO_UNPACK_N(afloat, float);
2053 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2061 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2062 DO_BO_UNPACK_N(adouble, double);
2064 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2072 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2073 DO_BO_UNPACK_N(anv, NV);
2075 PUSHs(sv_2mortal(newSVnv(anv)));
2080 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2083 long double aldouble;
2084 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2085 DO_BO_UNPACK_N(aldouble, long double);
2087 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2089 cdouble += aldouble;
2095 * Initialise the decode mapping. By using a table driven
2096 * algorithm, the code will be character-set independent
2097 * (and just as fast as doing character arithmetic)
2099 if (PL_uudmap['M'] == 0) {
2102 for (i = 0; i < sizeof(PL_uuemap); ++i)
2103 PL_uudmap[(U8)PL_uuemap[i]] = i;
2105 * Because ' ' and '`' map to the same value,
2106 * we need to decode them both the same.
2111 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2112 sv = sv_2mortal(newSV(l));
2113 if (l) SvPOK_on(sv);
2116 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2122 next_uni_uu(aTHX_ &s, strend, &a);
2123 next_uni_uu(aTHX_ &s, strend, &b);
2124 next_uni_uu(aTHX_ &s, strend, &c);
2125 next_uni_uu(aTHX_ &s, strend, &d);
2126 hunk[0] = (char)((a << 2) | (b >> 4));
2127 hunk[1] = (char)((b << 4) | (c >> 2));
2128 hunk[2] = (char)((c << 6) | d);
2129 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2137 /* possible checksum byte */
2138 const char *skip = s+UTF8SKIP(s);
2139 if (skip < strend && *skip == '\n')
2145 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2150 len = PL_uudmap[*(U8*)s++] & 077;
2152 if (s < strend && ISUUCHAR(*s))
2153 a = PL_uudmap[*(U8*)s++] & 077;
2156 if (s < strend && ISUUCHAR(*s))
2157 b = PL_uudmap[*(U8*)s++] & 077;
2160 if (s < strend && ISUUCHAR(*s))
2161 c = PL_uudmap[*(U8*)s++] & 077;
2164 if (s < strend && ISUUCHAR(*s))
2165 d = PL_uudmap[*(U8*)s++] & 077;
2168 hunk[0] = (char)((a << 2) | (b >> 4));
2169 hunk[1] = (char)((b << 4) | (c >> 2));
2170 hunk[2] = (char)((c << 6) | d);
2171 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2176 else /* possible checksum byte */
2177 if (s + 1 < strend && s[1] == '\n')
2186 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2187 (checksum > bits_in_uv &&
2188 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2191 anv = (NV) (1 << (checksum & 15));
2192 while (checksum >= 16) {
2196 while (cdouble < 0.0)
2198 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2199 sv = newSVnv(cdouble);
2202 if (checksum < bits_in_uv) {
2203 UV mask = ((UV)1 << checksum) - 1;
2208 XPUSHs(sv_2mortal(sv));
2212 if (symptr->flags & FLAG_SLASH){
2213 if (SP - PL_stack_base - start_sp_offset <= 0)
2214 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2215 if( next_symbol(symptr) ){
2216 if( symptr->howlen == e_number )
2217 Perl_croak(aTHX_ "Count after length/code in unpack" );
2219 /* ...end of char buffer then no decent length available */
2220 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2222 /* take top of stack (hope it's numeric) */
2225 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2228 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2230 datumtype = symptr->code;
2231 explicit_length = FALSE;
2239 return SP - PL_stack_base - start_sp_offset;
2247 I32 gimme = GIMME_V;
2250 const char *pat = SvPV_const(left, llen);
2251 const char *s = SvPV_const(right, rlen);
2252 const char *strend = s + rlen;
2253 const char *patend = pat + llen;
2257 cnt = unpackstring(pat, patend, s, strend,
2258 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2259 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2262 if ( !cnt && gimme == G_SCALAR )
2263 PUSHs(&PL_sv_undef);
2268 doencodes(U8 *h, const char *s, I32 len)
2270 *h++ = PL_uuemap[len];
2272 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2273 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2274 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2275 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2280 const char r = (len > 1 ? s[1] : '\0');
2281 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2282 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2283 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2284 *h++ = PL_uuemap[0];
2291 S_is_an_int(pTHX_ const char *s, STRLEN l)
2293 SV *result = newSVpvn(s, l);
2294 char *const result_c = SvPV_nolen(result); /* convenience */
2295 char *out = result_c;
2305 SvREFCNT_dec(result);
2328 SvREFCNT_dec(result);
2334 SvCUR_set(result, out - result_c);
2338 /* pnum must be '\0' terminated */
2340 S_div128(pTHX_ SV *pnum, bool *done)
2343 char * const s = SvPV(pnum, len);
2349 const int i = m * 10 + (*t - '0');
2350 const int r = (i >> 7); /* r < 10 */
2358 SvCUR_set(pnum, (STRLEN) (t - s));
2363 =for apidoc packlist
2365 The engine implementing pack() Perl function.
2371 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2376 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2378 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2379 Also make sure any UTF8 flag is loaded */
2380 SvPV_force_nolen(cat);
2382 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2384 (void)pack_rec( cat, &sym, beglist, endlist );
2387 /* like sv_utf8_upgrade, but also repoint the group start markers */
2389 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2392 const char *from_ptr, *from_start, *from_end, **marks, **m;
2393 char *to_start, *to_ptr;
2395 if (SvUTF8(sv)) return;
2397 from_start = SvPVX_const(sv);
2398 from_end = from_start + SvCUR(sv);
2399 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2400 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2401 if (from_ptr == from_end) {
2402 /* Simple case: no character needs to be changed */
2407 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2408 Newx(to_start, len, char);
2409 Copy(from_start, to_start, from_ptr-from_start, char);
2410 to_ptr = to_start + (from_ptr-from_start);
2412 Newx(marks, sym_ptr->level+2, const char *);
2413 for (group=sym_ptr; group; group = group->previous)
2414 marks[group->level] = from_start + group->strbeg;
2415 marks[sym_ptr->level+1] = from_end+1;
2416 for (m = marks; *m < from_ptr; m++)
2417 *m = to_start + (*m-from_start);
2419 for (;from_ptr < from_end; from_ptr++) {
2420 while (*m == from_ptr) *m++ = to_ptr;
2421 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2425 while (*m == from_ptr) *m++ = to_ptr;
2426 if (m != marks + sym_ptr->level+1) {
2429 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2431 for (group=sym_ptr; group; group = group->previous)
2432 group->strbeg = marks[group->level] - to_start;
2437 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2438 from_start -= SvIVX(sv);
2441 SvFLAGS(sv) &= ~SVf_OOK;
2444 Safefree(from_start);
2445 SvPV_set(sv, to_start);
2446 SvCUR_set(sv, to_ptr - to_start);
2451 /* Exponential string grower. Makes string extension effectively O(n)
2452 needed says how many extra bytes we need (not counting the final '\0')
2453 Only grows the string if there is an actual lack of space
2456 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2457 const STRLEN cur = SvCUR(sv);
2458 const STRLEN len = SvLEN(sv);
2460 if (len - cur > needed) return SvPVX(sv);
2461 extend = needed > len ? needed : len;
2462 return SvGROW(sv, len+extend+1);
2467 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2470 tempsym_t lookahead;
2471 I32 items = endlist - beglist;
2472 bool found = next_symbol(symptr);
2473 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2474 bool warn_utf8 = ckWARN(WARN_UTF8);
2476 if (symptr->level == 0 && found && symptr->code == 'U') {
2477 marked_upgrade(aTHX_ cat, symptr);
2478 symptr->flags |= FLAG_DO_UTF8;
2481 symptr->strbeg = SvCUR(cat);
2487 SV *lengthcode = NULL;
2488 I32 datumtype = symptr->code;
2489 howlen_t howlen = symptr->howlen;
2490 char *start = SvPVX(cat);
2491 char *cur = start + SvCUR(cat);
2493 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2497 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2501 /* e_no_len and e_number */
2502 len = symptr->length;
2507 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2509 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2510 /* We can process this letter. */
2511 STRLEN size = props & PACK_SIZE_MASK;
2512 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2516 /* Look ahead for next symbol. Do we have code/code? */
2517 lookahead = *symptr;
2518 found = next_symbol(&lookahead);
2519 if (symptr->flags & FLAG_SLASH) {
2521 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2522 if (strchr("aAZ", lookahead.code)) {
2523 if (lookahead.howlen == e_number) count = lookahead.length;
2526 if (SvGAMAGIC(*beglist)) {
2527 /* Avoid reading the active data more than once
2528 by copying it to a temporary. */
2530 const char *const pv = SvPV_const(*beglist, len);
2531 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2532 if (SvUTF8(*beglist))
2536 count = DO_UTF8(*beglist) ?
2537 sv_len_utf8(*beglist) : sv_len(*beglist);
2540 if (lookahead.code == 'Z') count++;
2543 if (lookahead.howlen == e_number && lookahead.length < items)
2544 count = lookahead.length;
2547 lookahead.howlen = e_number;
2548 lookahead.length = count;
2549 lengthcode = sv_2mortal(newSViv(count));
2552 /* Code inside the switch must take care to properly update
2553 cat (CUR length and '\0' termination) if it updated *cur and
2554 doesn't simply leave using break */
2555 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2557 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2558 (int) TYPE_NO_MODIFIERS(datumtype));
2560 Perl_croak(aTHX_ "'%%' may not be used in pack");
2563 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2564 case '.' | TYPE_IS_SHRIEKING:
2567 if (howlen == e_star) from = start;
2568 else if (len == 0) from = cur;
2570 tempsym_t *group = symptr;
2572 while (--len && group) group = group->previous;
2573 from = group ? start + group->strbeg : start;
2576 len = SvIV(fromstr);
2578 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2579 case '@' | TYPE_IS_SHRIEKING:
2582 from = start + symptr->strbeg;
2584 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2585 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2586 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2590 while (len && from < cur) {
2591 from += UTF8SKIP(from);
2595 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2597 /* Here we know from == cur */
2599 GROWING(0, cat, start, cur, len);
2600 Zero(cur, len, char);
2602 } else if (from < cur) {
2605 } else goto no_change;
2613 if (len > 0) goto grow;
2614 if (len == 0) goto no_change;
2621 tempsym_t savsym = *symptr;
2622 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2623 symptr->flags |= group_modifiers;
2624 symptr->patend = savsym.grpend;
2626 symptr->previous = &lookahead;
2629 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2630 else symptr->flags &= ~FLAG_PARSE_UTF8;
2631 was_utf8 = SvUTF8(cat);
2632 symptr->patptr = savsym.grpbeg;
2633 beglist = pack_rec(cat, symptr, beglist, endlist);
2634 if (SvUTF8(cat) != was_utf8)
2635 /* This had better be an upgrade while in utf8==0 mode */
2638 if (savsym.howlen == e_star && beglist == endlist)
2639 break; /* No way to continue */
2641 lookahead.flags = symptr->flags & ~group_modifiers;
2644 case 'X' | TYPE_IS_SHRIEKING:
2645 if (!len) /* Avoid division by 0 */
2652 hop += UTF8SKIP(hop);
2659 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2663 len = (cur-start) % len;
2667 if (len < 1) goto no_change;
2671 Perl_croak(aTHX_ "'%c' outside of string in pack",
2672 (int) TYPE_NO_MODIFIERS(datumtype));
2673 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2675 Perl_croak(aTHX_ "'%c' outside of string in pack",
2676 (int) TYPE_NO_MODIFIERS(datumtype));
2682 if (cur - start < len)
2683 Perl_croak(aTHX_ "'%c' outside of string in pack",
2684 (int) TYPE_NO_MODIFIERS(datumtype));
2687 if (cur < start+symptr->strbeg) {
2688 /* Make sure group starts don't point into the void */
2690 const STRLEN length = cur-start;
2691 for (group = symptr;
2692 group && length < group->strbeg;
2693 group = group->previous) group->strbeg = length;
2694 lookahead.strbeg = length;
2697 case 'x' | TYPE_IS_SHRIEKING: {
2699 if (!len) /* Avoid division by 0 */
2701 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2702 else ai32 = (cur - start) % len;
2703 if (ai32 == 0) goto no_change;
2715 aptr = SvPV_const(fromstr, fromlen);
2716 if (DO_UTF8(fromstr)) {
2717 const char *end, *s;
2719 if (!utf8 && !SvUTF8(cat)) {
2720 marked_upgrade(aTHX_ cat, symptr);
2721 lookahead.flags |= FLAG_DO_UTF8;
2722 lookahead.strbeg = symptr->strbeg;
2725 cur = start + SvCUR(cat);
2727 if (howlen == e_star) {
2728 if (utf8) goto string_copy;
2732 end = aptr + fromlen;
2733 fromlen = datumtype == 'Z' ? len-1 : len;
2734 while ((I32) fromlen > 0 && s < end) {
2739 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2742 if (datumtype == 'Z') len++;
2748 fromlen = len - fromlen;
2749 if (datumtype == 'Z') fromlen--;
2750 if (howlen == e_star) {
2752 if (datumtype == 'Z') len++;
2754 GROWING(0, cat, start, cur, len);
2755 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2756 datumtype | TYPE_IS_PACK))
2757 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2761 if (howlen == e_star) {
2763 if (datumtype == 'Z') len++;
2765 if (len <= (I32) fromlen) {
2767 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2769 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2771 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2772 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2774 while (fromlen > 0) {
2775 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2781 if (howlen == e_star) {
2783 if (datumtype == 'Z') len++;
2785 if (len <= (I32) fromlen) {
2787 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2789 GROWING(0, cat, start, cur, len);
2790 Copy(aptr, cur, fromlen, char);
2794 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2800 const char *str, *end;
2807 str = SvPV_const(fromstr, fromlen);
2808 end = str + fromlen;
2809 if (DO_UTF8(fromstr)) {
2811 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2813 utf8_source = FALSE;
2814 utf8_flags = 0; /* Unused, but keep compilers happy */
2816 if (howlen == e_star) len = fromlen;
2817 field_len = (len+7)/8;
2818 GROWING(utf8, cat, start, cur, field_len);
2819 if (len > (I32)fromlen) len = fromlen;
2822 if (datumtype == 'B')
2826 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2828 } else bits |= *str++ & 1;
2829 if (l & 7) bits <<= 1;
2831 PUSH_BYTE(utf8, cur, bits);
2836 /* datumtype == 'b' */
2840 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2841 if (val & 1) bits |= 0x80;
2842 } else if (*str++ & 1)
2844 if (l & 7) bits >>= 1;
2846 PUSH_BYTE(utf8, cur, bits);
2852 if (datumtype == 'B')
2853 bits <<= 7 - (l & 7);
2855 bits >>= 7 - (l & 7);
2856 PUSH_BYTE(utf8, cur, bits);
2859 /* Determine how many chars are left in the requested field */
2861 if (howlen == e_star) field_len = 0;
2862 else field_len -= l;
2863 Zero(cur, field_len, char);
2869 const char *str, *end;
2876 str = SvPV_const(fromstr, fromlen);
2877 end = str + fromlen;
2878 if (DO_UTF8(fromstr)) {
2880 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2882 utf8_source = FALSE;
2883 utf8_flags = 0; /* Unused, but keep compilers happy */
2885 if (howlen == e_star) len = fromlen;
2886 field_len = (len+1)/2;
2887 GROWING(utf8, cat, start, cur, field_len);
2888 if (!utf8 && len > (I32)fromlen) len = fromlen;
2891 if (datumtype == 'H')
2895 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2896 if (val < 256 && isALPHA(val))
2897 bits |= (val + 9) & 0xf;
2900 } else if (isALPHA(*str))
2901 bits |= (*str++ + 9) & 0xf;
2903 bits |= *str++ & 0xf;
2904 if (l & 1) bits <<= 4;
2906 PUSH_BYTE(utf8, cur, bits);
2914 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2915 if (val < 256 && isALPHA(val))
2916 bits |= ((val + 9) & 0xf) << 4;
2918 bits |= (val & 0xf) << 4;
2919 } else if (isALPHA(*str))
2920 bits |= ((*str++ + 9) & 0xf) << 4;
2922 bits |= (*str++ & 0xf) << 4;
2923 if (l & 1) bits >>= 4;
2925 PUSH_BYTE(utf8, cur, bits);
2931 PUSH_BYTE(utf8, cur, bits);
2934 /* Determine how many chars are left in the requested field */
2936 if (howlen == e_star) field_len = 0;
2937 else field_len -= l;
2938 Zero(cur, field_len, char);
2946 aiv = SvIV(fromstr);
2947 if ((-128 > aiv || aiv > 127) &&
2949 Perl_warner(aTHX_ packWARN(WARN_PACK),
2950 "Character in 'c' format wrapped in pack");
2951 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2956 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2959 GROWING(0, cat, start, cur, len);
2963 aiv = SvIV(fromstr);
2964 if ((0 > aiv || aiv > 0xff) &&
2966 Perl_warner(aTHX_ packWARN(WARN_PACK),
2967 "Character in 'C' format wrapped in pack");
2968 *cur++ = (char)(aiv & 0xff);
2973 U8 in_bytes = IN_BYTES;
2975 end = start+SvLEN(cat)-1;
2976 if (utf8) end -= UTF8_MAXLEN-1;
2980 auv = SvUV(fromstr);
2981 if (in_bytes) auv = auv % 0x100;
2986 SvCUR_set(cat, cur - start);
2988 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2989 end = start+SvLEN(cat)-UTF8_MAXLEN;
2991 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2994 0 : UNICODE_ALLOW_ANY);
2999 SvCUR_set(cat, cur - start);
3000 marked_upgrade(aTHX_ cat, symptr);
3001 lookahead.flags |= FLAG_DO_UTF8;
3002 lookahead.strbeg = symptr->strbeg;
3005 cur = start + SvCUR(cat);
3006 end = start+SvLEN(cat)-UTF8_MAXLEN;
3009 if (ckWARN(WARN_PACK))
3010 Perl_warner(aTHX_ packWARN(WARN_PACK),
3011 "Character in 'W' format wrapped in pack");
3016 SvCUR_set(cat, cur - start);
3017 GROWING(0, cat, start, cur, len+1);
3018 end = start+SvLEN(cat)-1;
3020 *(U8 *) cur++ = (U8)auv;
3029 if (!(symptr->flags & FLAG_DO_UTF8)) {
3030 marked_upgrade(aTHX_ cat, symptr);
3031 lookahead.flags |= FLAG_DO_UTF8;
3032 lookahead.strbeg = symptr->strbeg;
3038 end = start+SvLEN(cat);
3039 if (!utf8) end -= UTF8_MAXLEN;
3043 auv = SvUV(fromstr);
3045 U8 buffer[UTF8_MAXLEN], *endb;
3046 endb = uvuni_to_utf8_flags(buffer, auv,
3048 0 : UNICODE_ALLOW_ANY);
3049 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3051 SvCUR_set(cat, cur - start);
3052 GROWING(0, cat, start, cur,
3053 len+(endb-buffer)*UTF8_EXPAND);
3054 end = start+SvLEN(cat);
3056 cur = bytes_to_uni(buffer, endb-buffer, cur);
3060 SvCUR_set(cat, cur - start);
3061 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3062 end = start+SvLEN(cat)-UTF8_MAXLEN;
3064 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3066 0 : UNICODE_ALLOW_ANY);
3071 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3077 anv = SvNV(fromstr);
3079 /* VOS does not automatically map a floating-point overflow
3080 during conversion from double to float into infinity, so we
3081 do it by hand. This code should either be generalized for
3082 any OS that needs it, or removed if and when VOS implements
3083 posix-976 (suggestion to support mapping to infinity).
3084 Paul.Green@stratus.com 02-04-02. */
3086 afloat = _float_constants[0]; /* single prec. inf. */
3087 else if (anv < -FLT_MAX)
3088 afloat = _float_constants[0]; /* single prec. inf. */
3089 else afloat = (float) anv;
3091 # if defined(VMS) && !defined(__IEEE_FP)
3092 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3093 * on Alpha; fake it if we don't have them.
3097 else if (anv < -FLT_MAX)
3099 else afloat = (float)anv;
3101 afloat = (float)anv;
3103 #endif /* __VOS__ */
3104 DO_BO_PACK_N(afloat, float);
3105 PUSH_VAR(utf8, cur, afloat);
3113 anv = SvNV(fromstr);
3115 /* VOS does not automatically map a floating-point overflow
3116 during conversion from long double to double into infinity,
3117 so we do it by hand. This code should either be generalized
3118 for any OS that needs it, or removed if and when VOS
3119 implements posix-976 (suggestion to support mapping to
3120 infinity). Paul.Green@stratus.com 02-04-02. */
3122 adouble = _double_constants[0]; /* double prec. inf. */
3123 else if (anv < -DBL_MAX)
3124 adouble = _double_constants[0]; /* double prec. inf. */
3125 else adouble = (double) anv;
3127 # if defined(VMS) && !defined(__IEEE_FP)
3128 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3129 * on Alpha; fake it if we don't have them.
3133 else if (anv < -DBL_MAX)
3135 else adouble = (double)anv;
3137 adouble = (double)anv;
3139 #endif /* __VOS__ */
3140 DO_BO_PACK_N(adouble, double);
3141 PUSH_VAR(utf8, cur, adouble);
3146 Zero(&anv, 1, NV); /* can be long double with unused bits */
3149 anv = SvNV(fromstr);
3150 DO_BO_PACK_N(anv, NV);
3151 PUSH_VAR(utf8, cur, anv);
3155 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3157 long double aldouble;
3158 /* long doubles can have unused bits, which may be nonzero */
3159 Zero(&aldouble, 1, long double);
3162 aldouble = (long double)SvNV(fromstr);
3163 DO_BO_PACK_N(aldouble, long double);
3164 PUSH_VAR(utf8, cur, aldouble);
3169 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3170 case 'n' | TYPE_IS_SHRIEKING:
3176 ai16 = (I16)SvIV(fromstr);
3178 ai16 = PerlSock_htons(ai16);
3180 PUSH16(utf8, cur, &ai16);
3183 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3184 case 'v' | TYPE_IS_SHRIEKING:
3190 ai16 = (I16)SvIV(fromstr);
3194 PUSH16(utf8, cur, &ai16);
3197 case 'S' | TYPE_IS_SHRIEKING:
3198 #if SHORTSIZE != SIZE16
3200 unsigned short aushort;
3202 aushort = SvUV(fromstr);
3203 DO_BO_PACK(aushort, s);
3204 PUSH_VAR(utf8, cur, aushort);
3214 au16 = (U16)SvUV(fromstr);
3215 DO_BO_PACK(au16, 16);
3216 PUSH16(utf8, cur, &au16);
3219 case 's' | TYPE_IS_SHRIEKING:
3220 #if SHORTSIZE != SIZE16
3224 ashort = SvIV(fromstr);
3225 DO_BO_PACK(ashort, s);
3226 PUSH_VAR(utf8, cur, ashort);
3236 ai16 = (I16)SvIV(fromstr);
3237 DO_BO_PACK(ai16, 16);
3238 PUSH16(utf8, cur, &ai16);
3242 case 'I' | TYPE_IS_SHRIEKING:
3246 auint = SvUV(fromstr);
3247 DO_BO_PACK(auint, i);
3248 PUSH_VAR(utf8, cur, auint);
3255 aiv = SvIV(fromstr);
3256 #if IVSIZE == INTSIZE
3258 #elif IVSIZE == LONGSIZE
3260 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3261 DO_BO_PACK(aiv, 64);
3263 Perl_croak(aTHX_ "'j' not supported on this platform");
3265 PUSH_VAR(utf8, cur, aiv);
3272 auv = SvUV(fromstr);
3273 #if UVSIZE == INTSIZE
3275 #elif UVSIZE == LONGSIZE
3277 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3278 DO_BO_PACK(auv, 64);
3280 Perl_croak(aTHX_ "'J' not supported on this platform");
3282 PUSH_VAR(utf8, cur, auv);
3289 anv = SvNV(fromstr);
3293 SvCUR_set(cat, cur - start);
3294 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3297 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3298 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3299 any negative IVs will have already been got by the croak()
3300 above. IOK is untrue for fractions, so we test them
3301 against UV_MAX_P1. */
3302 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3303 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3304 char *in = buf + sizeof(buf);
3305 UV auv = SvUV(fromstr);
3308 *--in = (char)((auv & 0x7f) | 0x80);
3311 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3312 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3313 in, (buf + sizeof(buf)) - in);
3314 } else if (SvPOKp(fromstr))
3316 else if (SvNOKp(fromstr)) {
3317 /* 10**NV_MAX_10_EXP is the largest power of 10
3318 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3319 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3320 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3321 And with that many bytes only Inf can overflow.
3322 Some C compilers are strict about integral constant
3323 expressions so we conservatively divide by a slightly
3324 smaller integer instead of multiplying by the exact
3325 floating-point value.
3327 #ifdef NV_MAX_10_EXP
3328 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3329 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3331 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3332 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3334 char *in = buf + sizeof(buf);
3336 anv = Perl_floor(anv);
3338 const NV next = Perl_floor(anv / 128);
3339 if (in <= buf) /* this cannot happen ;-) */
3340 Perl_croak(aTHX_ "Cannot compress integer in pack");
3341 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3344 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3345 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3346 in, (buf + sizeof(buf)) - in);
3355 /* Copy string and check for compliance */
3356 from = SvPV_const(fromstr, len);
3357 if ((norm = is_an_int(from, len)) == NULL)
3358 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3360 Newx(result, len, char);
3363 while (!done) *--in = div128(norm, &done) | 0x80;
3364 result[len - 1] &= 0x7F; /* clear continue bit */
3365 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3366 in, (result + len) - in);
3368 SvREFCNT_dec(norm); /* free norm */
3373 case 'i' | TYPE_IS_SHRIEKING:
3377 aint = SvIV(fromstr);
3378 DO_BO_PACK(aint, i);
3379 PUSH_VAR(utf8, cur, aint);
3382 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3383 case 'N' | TYPE_IS_SHRIEKING:
3389 au32 = SvUV(fromstr);
3391 au32 = PerlSock_htonl(au32);
3393 PUSH32(utf8, cur, &au32);
3396 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3397 case 'V' | TYPE_IS_SHRIEKING:
3403 au32 = SvUV(fromstr);
3407 PUSH32(utf8, cur, &au32);
3410 case 'L' | TYPE_IS_SHRIEKING:
3411 #if LONGSIZE != SIZE32
3413 unsigned long aulong;
3415 aulong = SvUV(fromstr);
3416 DO_BO_PACK(aulong, l);
3417 PUSH_VAR(utf8, cur, aulong);
3427 au32 = SvUV(fromstr);
3428 DO_BO_PACK(au32, 32);
3429 PUSH32(utf8, cur, &au32);
3432 case 'l' | TYPE_IS_SHRIEKING:
3433 #if LONGSIZE != SIZE32
3437 along = SvIV(fromstr);
3438 DO_BO_PACK(along, l);
3439 PUSH_VAR(utf8, cur, along);
3449 ai32 = SvIV(fromstr);
3450 DO_BO_PACK(ai32, 32);
3451 PUSH32(utf8, cur, &ai32);
3459 auquad = (Uquad_t) SvUV(fromstr);
3460 DO_BO_PACK(auquad, 64);
3461 PUSH_VAR(utf8, cur, auquad);
3468 aquad = (Quad_t)SvIV(fromstr);
3469 DO_BO_PACK(aquad, 64);
3470 PUSH_VAR(utf8, cur, aquad);
3473 #endif /* HAS_QUAD */
3475 len = 1; /* assume SV is correct length */
3476 GROWING(utf8, cat, start, cur, sizeof(char *));
3483 SvGETMAGIC(fromstr);
3484 if (!SvOK(fromstr)) aptr = NULL;
3486 /* XXX better yet, could spirit away the string to
3487 * a safe spot and hang on to it until the result
3488 * of pack() (and all copies of the result) are
3491 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3492 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3493 Perl_warner(aTHX_ packWARN(WARN_PACK),
3494 "Attempt to pack pointer to temporary value");
3496 if (SvPOK(fromstr) || SvNIOK(fromstr))
3497 aptr = SvPV_nomg_const_nolen(fromstr);
3499 aptr = SvPV_force_flags_nolen(fromstr, 0);
3501 DO_BO_PACK_PC(aptr);
3502 PUSH_VAR(utf8, cur, aptr);
3506 const char *aptr, *aend;
3510 if (len <= 2) len = 45;
3511 else len = len / 3 * 3;
3513 if (ckWARN(WARN_PACK))
3514 Perl_warner(aTHX_ packWARN(WARN_PACK),
3515 "Field too wide in 'u' format in pack");
3518 aptr = SvPV_const(fromstr, fromlen);
3519 from_utf8 = DO_UTF8(fromstr);
3521 aend = aptr + fromlen;
3522 fromlen = sv_len_utf8(fromstr);
3523 } else aend = NULL; /* Unused, but keep compilers happy */
3524 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3525 while (fromlen > 0) {
3528 U8 hunk[1+63/3*4+1];
3530 if ((I32)fromlen > len)
3536 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3537 'u' | TYPE_IS_PACK)) {
3539 SvCUR_set(cat, cur - start);
3540 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3542 end = doencodes(hunk, buffer, todo);
3544 end = doencodes(hunk, aptr, todo);
3547 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3554 SvCUR_set(cat, cur - start);
3556 *symptr = lookahead;
3565 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3566 register SV *cat = TARG;
3568 SV *pat_sv = *++MARK;
3569 register const char *pat = SvPV_const(pat_sv, fromlen);
3570 register const char *patend = pat + fromlen;
3573 sv_setpvn(cat, "", 0);
3576 packlist(cat, pat, patend, MARK, SP + 1);
3586 * c-indentation-style: bsd
3588 * indent-tabs-mode: t
3591 * ex: set ts=8 sts=4 sw=4 noet: