3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
86 # define PERL_PACK_CAN_BYTEORDER
87 # define PERL_PACK_CAN_SHRIEKSIGN
93 /* Maximum number of bytes to which a byte can grow due to upgrade */
97 * Offset for integer pack/unpack.
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
117 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
120 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
121 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
122 # define OFF16(p) ((char*)(p))
123 # define OFF32(p) ((char*)(p))
125 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
126 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
129 ++++ bad cray byte order
133 # define OFF16(p) ((char *) (p))
134 # define OFF32(p) ((char *) (p))
137 /* Only to be used inside a loop (see the break) */
138 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
142 Copy(s, OFF16(p), SIZE16, char); \
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
152 Copy(s, OFF32(p), SIZE32, char); \
157 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
160 /* Only to be used inside a loop (see the break) */
161 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
164 if (!uni_to_bytes(aTHX_ &s, strend, \
165 (char *) (buf), len, datumtype)) break; \
167 Copy(s, (char *) (buf), len, char); \
172 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
175 #define PUSH_VAR(utf8, aptr, var) \
176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
178 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179 #define MAX_SUB_TEMPLATE_LEVEL 100
181 /* flags (note that type modifiers can also be used as flags!) */
182 #define FLAG_WAS_UTF8 0x40
183 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
184 #define FLAG_UNPACK_ONLY_ONE 0x10
185 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
186 #define FLAG_SLASH 0x04
187 #define FLAG_COMMA 0x02
188 #define FLAG_PACK 0x01
191 S_mul128(pTHX_ SV *sv, U8 m)
194 char *s = SvPV(sv, len);
197 PERL_ARGS_ASSERT_MUL128;
199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
200 SV * const tmpNew = newSVpvs("0000000000");
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
208 while (!*t) /* trailing '\0'? */
211 const U32 i = ((*t - '0') << 7) + m;
212 *(t--) = '0' + (char)(i % 10);
218 /* Explosives and implosives. */
220 #if 'I' == 73 && 'J' == 74
221 /* On an ASCII/ISO kind of system */
222 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
225 Some other sort of character set - use memchr() so we don't match
228 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
232 #define TYPE_IS_SHRIEKING 0x100
233 #define TYPE_IS_BIG_ENDIAN 0x200
234 #define TYPE_IS_LITTLE_ENDIAN 0x400
235 #define TYPE_IS_PACK 0x800
236 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
237 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
238 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
240 #ifdef PERL_PACK_CAN_SHRIEKSIGN
241 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
243 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
246 #ifndef PERL_PACK_CAN_BYTEORDER
247 /* Put "can't" first because it is shorter */
248 # define TYPE_ENDIANNESS(t) 0
249 # define TYPE_NO_ENDIANNESS(t) (t)
251 # define ENDIANNESS_ALLOWED_TYPES ""
253 # define DO_BO_UNPACK(var, type)
254 # define DO_BO_PACK(var, type)
255 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
257 # define DO_BO_UNPACK_N(var, type)
258 # define DO_BO_PACK_N(var, type)
259 # define DO_BO_UNPACK_P(var)
260 # define DO_BO_PACK_P(var)
261 # define DO_BO_UNPACK_PC(var)
262 # define DO_BO_PACK_PC(var)
264 #else /* PERL_PACK_CAN_BYTEORDER */
266 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
269 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
271 # define DO_BO_UNPACK(var, type) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
280 # define DO_BO_PACK(var, type) \
282 switch (TYPE_ENDIANNESS(datumtype)) { \
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
289 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
291 switch (TYPE_ENDIANNESS(datumtype)) { \
292 case TYPE_IS_BIG_ENDIAN: \
293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
295 case TYPE_IS_LITTLE_ENDIAN: \
296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
303 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
305 switch (TYPE_ENDIANNESS(datumtype)) { \
306 case TYPE_IS_BIG_ENDIAN: \
307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
317 # define BO_CANT_DOIT(action, type) \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
333 # if PTRSIZE == INTSIZE
334 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
338 # elif PTRSIZE == LONGSIZE
339 # if LONGSIZE < IVSIZE && IVSIZE == 8
340 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
345 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
350 # elif PTRSIZE == IVSIZE
351 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
356 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
358 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
362 # if defined(my_htolen) && defined(my_letohn) && \
363 defined(my_htoben) && defined(my_betohn)
364 # define DO_BO_UNPACK_N(var, type) \
366 switch (TYPE_ENDIANNESS(datumtype)) { \
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
373 # define DO_BO_PACK_N(var, type) \
375 switch (TYPE_ENDIANNESS(datumtype)) { \
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
382 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
386 #endif /* PERL_PACK_CAN_BYTEORDER */
388 #define PACK_SIZE_CANNOT_CSUM 0x80
389 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
390 #define PACK_SIZE_MASK 0x3F
392 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
395 #if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
399 typedef U8 packprops_t;
402 STATIC const packprops_t packprops[512] = {
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
409 /* C */ sizeof(unsigned char),
410 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
411 /* D */ LONG_DOUBLESIZE,
418 /* I */ sizeof(unsigned int),
425 #if defined(HAS_QUAD)
426 /* Q */ sizeof(Uquad_t),
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
440 /* f */ sizeof(float),
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
450 #if defined(HAS_QUAD)
451 /* q */ sizeof(Quad_t),
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
477 /* L */ sizeof(unsigned long),
479 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
485 /* S */ sizeof(unsigned short),
487 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 /* l */ sizeof(long),
498 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
504 /* s */ sizeof(short),
506 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
522 /* EBCDIC (or bust) */
523 STATIC const packprops_t packprops[512] = {
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
537 /* f */ sizeof(float),
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
548 #if defined(HAS_QUAD)
549 /* q */ sizeof(Quad_t),
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* C */ sizeof(unsigned char),
561 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
562 /* D */ LONG_DOUBLESIZE,
569 /* I */ sizeof(unsigned int),
577 #if defined(HAS_QUAD)
578 /* Q */ sizeof(Uquad_t),
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
604 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
612 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
624 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
632 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
643 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
648 /* We try to process malformed UTF-8 as much as possible (preferrably with
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
664 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
669 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
673 const char *from = *s;
675 const U32 flags = ckWARN(WARN_UTF8) ?
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
683 } else from += retlen;
688 *(U8 *)buf++ = (U8)val;
690 /* We have enough characters for the buffer. Did we have problems ? */
693 /* Rewalk the string fragment while warning */
695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
700 if (from > end) from = end;
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
704 WARN_PACK : WARN_UNPACK),
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
714 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
724 *out = PL_uudmap[val] & 077;
730 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
731 const U8 * const end = start + len;
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
735 while (start < end) {
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
748 #define PUSH_BYTES(utf8, cur, buf, len) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
753 Copy(buf, cur, len, char); \
758 #define GROWING(utf8, cat, start, cur, in_len) \
760 STRLEN glen = (in_len); \
761 if (utf8) glen *= UTF8_EXPAND; \
762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
763 (start) = sv_exp_grow(cat, glen); \
764 (cur) = (start) + SvCUR(cat); \
768 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
770 const STRLEN glen = (in_len); \
772 if (utf8) gl *= UTF8_EXPAND; \
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
775 SvCUR_set((cat), (cur) - (start)); \
776 (start) = sv_exp_grow(cat, gl); \
777 (cur) = (start) + SvCUR(cat); \
779 PUSH_BYTES(utf8, cur, buf, glen); \
782 #define PUSH_BYTE(utf8, s, byte) \
785 const U8 au8 = (byte); \
786 (s) = bytes_to_uni(&au8, 1, (s)); \
787 } else *(U8 *)(s)++ = (byte); \
790 /* Only to be used inside a loop (see the break) */
791 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
803 static const char *_action( const tempsym_t* symptr )
805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
808 /* Returns the sizeof() struct described by pat */
810 S_measure_struct(pTHX_ tempsym_t* symptr)
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
816 while (next_symbol(symptr)) {
820 switch (symptr->howlen) {
822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
826 /* e_no_len and e_number */
827 len = symptr->length;
831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
840 #ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
847 case 'U': /* XXXX Is it correct? */
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
851 (int) TYPE_NO_MODIFIERS(symptr->code),
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
872 if (!len) /* Avoid division by 0 */
874 len = total % len; /* Assumed: the start is aligned. */
879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
909 size = sizeof(char*);
919 /* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
923 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
925 PERL_ARGS_ASSERT_GROUP_END;
927 while (patptr < patend) {
928 const char c = *patptr++;
935 while (patptr < patend && *patptr != '\n')
939 patptr = group_end(patptr, patend, ')') + 1;
941 patptr = group_end(patptr, patend, ']') + 1;
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
949 /* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
954 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
956 I32 len = *patptr++ - '0';
958 PERL_ARGS_ASSERT_GET_NUM;
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
969 /* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
973 S_next_symbol(pTHX_ tempsym_t* symptr )
975 const char* patptr = symptr->patptr;
976 const char* const patend = symptr->patend;
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
980 symptr->flags &= ~FLAG_SLASH;
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
985 else if (*patptr == '#') {
987 while (patptr < patend && *patptr != '\n')
992 /* We should have found a template code */
993 I32 code = *patptr++ & 0xFF;
994 U32 inherited_modifiers = 0;
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1000 "Invalid type ',' in %s", _action( symptr ) );
1005 /* for '(', skip to ')' */
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
1009 _action( symptr ) );
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
1014 _action( symptr ) );
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1023 /* look for modifiers */
1024 while (patptr < patend) {
1025 const char *allowed;
1029 modifier = TYPE_IS_SHRIEKING;
1030 allowed = SHRIEKING_ALLOWED_TYPES;
1032 #ifdef PERL_PACK_CAN_BYTEORDER
1034 modifier = TYPE_IS_BIG_ENDIAN;
1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1041 #endif /* PERL_PACK_CAN_BYTEORDER */
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1053 allowed, _action( symptr ) );
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1061 *patptr, _action( symptr ) );
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1077 /* look for count and/or / */
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1083 } else if (*patptr == '*') {
1085 symptr->howlen = e_star;
1087 } else if (*patptr == '[') {
1088 const char* lenptr = ++patptr;
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1096 _action( symptr ) );
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1105 symptr->howlen = e_no_len;
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1113 else if (*patptr == '#') {
1115 while (patptr < patend && *patptr != '\n')
1117 if (patptr < patend)
1120 if (*patptr == '/') {
1121 symptr->flags |= FLAG_SLASH;
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1126 _action( symptr ) );
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1137 symptr->code = code;
1138 symptr->patptr = patptr;
1142 symptr->patptr = patptr;
1147 There is no way to cleanly handle the case where we should process the
1148 string per byte in its upgraded form while it's really in downgraded form
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
1153 themselves if they need to do a lot of unpacks like this on it
1156 need_utf8(const char *pat, const char *patend)
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1165 pat = (const char *) memchr(pat, '\n', patend-pat);
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1176 first_symbol(const char *pat, const char *patend) {
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1182 pat = (const char *) memchr(pat, '\n', patend-pat);
1190 =for apidoc unpackstring
1192 The engine implementing unpack() Perl function. C<unpackstring> puts the
1193 extracted list items on the stack and returns the number of elements.
1194 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1199 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1203 PERL_ARGS_ASSERT_UNPACKSTRING;
1205 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1206 else if (need_utf8(pat, patend)) {
1207 /* We probably should try to avoid this in case a scalar context call
1208 wouldn't get to the "U0" */
1209 STRLEN len = strend - s;
1210 s = (char *) bytes_to_utf8((U8 *) s, &len);
1213 flags |= FLAG_DO_UTF8;
1216 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1217 flags |= FLAG_PARSE_UTF8;
1219 TEMPSYM_INIT(&sym, pat, patend, flags);
1221 return unpack_rec(&sym, s, s, strend, NULL );
1225 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1229 const I32 start_sp_offset = SP - PL_stack_base;
1234 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1235 bool beyond = FALSE;
1236 bool explicit_length;
1237 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1238 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1240 PERL_ARGS_ASSERT_UNPACK_REC;
1242 symptr->strbeg = s - strbeg;
1244 while (next_symbol(symptr)) {
1247 I32 datumtype = symptr->code;
1248 /* do first one only unless in list context
1249 / is implemented by unpacking the count, then popping it from the
1250 stack, so must check that we're not in the middle of a / */
1251 if ( unpack_only_one
1252 && (SP - PL_stack_base == start_sp_offset + 1)
1253 && (datumtype != '/') ) /* XXX can this be omitted */
1256 switch (howlen = symptr->howlen) {
1258 len = strend - strbeg; /* long enough */
1261 /* e_no_len and e_number */
1262 len = symptr->length;
1266 explicit_length = TRUE;
1268 beyond = s >= strend;
1270 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1272 /* props nonzero means we can process this letter. */
1273 const long size = props & PACK_SIZE_MASK;
1274 const long howmany = (strend - s) / size;
1278 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1279 if (len && unpack_only_one) len = 1;
1285 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1287 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1290 if (howlen == e_no_len)
1291 len = 16; /* len is not specified */
1299 tempsym_t savsym = *symptr;
1300 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1301 symptr->flags |= group_modifiers;
1302 symptr->patend = savsym.grpend;
1303 symptr->previous = &savsym;
1306 if (len && unpack_only_one) len = 1;
1308 symptr->patptr = savsym.grpbeg;
1309 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1310 else symptr->flags &= ~FLAG_PARSE_UTF8;
1311 unpack_rec(symptr, s, strbeg, strend, &s);
1312 if (s == strend && savsym.howlen == e_star)
1313 break; /* No way to continue */
1316 savsym.flags = symptr->flags & ~group_modifiers;
1320 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1321 case '.' | TYPE_IS_SHRIEKING:
1326 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1327 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1328 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1329 const bool u8 = utf8;
1331 if (howlen == e_star) from = strbeg;
1332 else if (len <= 0) from = s;
1334 tempsym_t *group = symptr;
1336 while (--len && group) group = group->previous;
1337 from = group ? strbeg + group->strbeg : strbeg;
1340 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1341 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1345 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1346 case '@' | TYPE_IS_SHRIEKING:
1349 s = strbeg + symptr->strbeg;
1350 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1351 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1352 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
1363 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1366 Perl_croak(aTHX_ "'@' outside of string in unpack");
1370 case 'X' | TYPE_IS_SHRIEKING:
1371 if (!len) /* Avoid division by 0 */
1374 const char *hop, *last;
1376 hop = last = strbeg;
1378 hop += UTF8SKIP(hop);
1385 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1389 len = (s - strbeg) % len;
1395 Perl_croak(aTHX_ "'X' outside of string in unpack");
1396 while (--s, UTF8_IS_CONTINUATION(*s)) {
1398 Perl_croak(aTHX_ "'X' outside of string in unpack");
1403 if (len > s - strbeg)
1404 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1408 case 'x' | TYPE_IS_SHRIEKING: {
1410 if (!len) /* Avoid division by 0 */
1412 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1413 else ai32 = (s - strbeg) % len;
1414 if (ai32 == 0) break;
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1427 if (len > strend - s)
1428 Perl_croak(aTHX_ "'x' outside of string in unpack");
1433 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1439 /* Preliminary length estimate is assumed done in 'W' */
1440 if (len > strend - s) len = strend - s;
1446 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1447 if (hop >= strend) {
1449 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1454 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1456 } else if (len > strend - s)
1459 if (datumtype == 'Z') {
1460 /* 'Z' strips stuff after first null */
1461 const char *ptr, *end;
1463 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1464 sv = newSVpvn(s, ptr-s);
1465 if (howlen == e_star) /* exact for 'Z*' */
1466 len = ptr-s + (ptr != strend ? 1 : 0);
1467 } else if (datumtype == 'A') {
1468 /* 'A' strips both nulls and spaces */
1470 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1473 !is_utf8_space((U8 *) ptr)) break;
1474 if (ptr >= s) ptr += UTF8SKIP(ptr);
1477 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1479 for (ptr = s+len-1; ptr >= s; ptr--)
1480 if (*ptr != 0 && !isSPACE(*ptr)) break;
1483 sv = newSVpvn(s, ptr-s);
1484 } else sv = newSVpvn(s, len);
1488 /* Undo any upgrade done due to need_utf8() */
1489 if (!(symptr->flags & FLAG_WAS_UTF8))
1490 sv_utf8_downgrade(sv, 0);
1498 if (howlen == e_star || len > (strend - s) * 8)
1499 len = (strend - s) * 8;
1502 while (len >= 8 && s < strend) {
1503 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1508 cuv += PL_bitcount[*(U8 *)s++];
1511 if (len && s < strend) {
1513 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1514 if (datumtype == 'b')
1516 if (bits & 1) cuv++;
1521 if (bits & 0x80) cuv++;
1528 sv = sv_2mortal(newSV(len ? len : 1));
1531 if (datumtype == 'b') {
1533 const I32 ai32 = len;
1534 for (len = 0; len < ai32; len++) {
1535 if (len & 7) bits >>= 1;
1537 if (s >= strend) break;
1538 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1539 } else bits = *(U8 *) s++;
1540 *str++ = bits & 1 ? '1' : '0';
1544 const I32 ai32 = len;
1545 for (len = 0; len < ai32; len++) {
1546 if (len & 7) bits <<= 1;
1548 if (s >= strend) break;
1549 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1550 } else bits = *(U8 *) s++;
1551 *str++ = bits & 0x80 ? '1' : '0';
1555 SvCUR_set(sv, str - SvPVX_const(sv));
1562 /* Preliminary length estimate, acceptable for utf8 too */
1563 if (howlen == e_star || len > (strend - s) * 2)
1564 len = (strend - s) * 2;
1565 sv = sv_2mortal(newSV(len ? len : 1));
1568 if (datumtype == 'h') {
1571 for (len = 0; len < ai32; len++) {
1572 if (len & 1) bits >>= 4;
1574 if (s >= strend) break;
1575 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1576 } else bits = * (U8 *) s++;
1577 *str++ = PL_hexdigit[bits & 15];
1581 const I32 ai32 = len;
1582 for (len = 0; len < ai32; len++) {
1583 if (len & 1) bits <<= 4;
1585 if (s >= strend) break;
1586 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1587 } else bits = *(U8 *) s++;
1588 *str++ = PL_hexdigit[(bits >> 4) & 15];
1592 SvCUR_set(sv, str - SvPVX_const(sv));
1598 if (explicit_length)
1599 /* Switch to "character" mode */
1600 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1605 while (len-- > 0 && s < strend) {
1610 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1611 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1612 if (retlen == (STRLEN) -1 || retlen == 0)
1613 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1617 aint = *(U8 *)(s)++;
1618 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1622 else if (checksum > bits_in_uv)
1623 cdouble += (NV)aint;
1631 while (len-- > 0 && s < strend) {
1633 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1634 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1635 if (retlen == (STRLEN) -1 || retlen == 0)
1636 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1640 else if (checksum > bits_in_uv)
1641 cdouble += (NV) val;
1645 } else if (!checksum)
1647 const U8 ch = *(U8 *) s++;
1650 else if (checksum > bits_in_uv)
1651 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1653 while (len-- > 0) cuv += *(U8 *) s++;
1657 if (explicit_length) {
1658 /* Switch to "bytes in UTF-8" mode */
1659 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1661 /* Should be impossible due to the need_utf8() test */
1662 Perl_croak(aTHX_ "U0 mode on a byte string");
1666 if (len > strend - s) len = strend - s;
1668 if (len && unpack_only_one) len = 1;
1672 while (len-- > 0 && s < strend) {
1676 U8 result[UTF8_MAXLEN];
1677 const char *ptr = s;
1679 /* Bug: warns about bad utf8 even if we are short on bytes
1680 and will break out of the loop */
1681 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1684 len = UTF8SKIP(result);
1685 if (!uni_to_bytes(aTHX_ &ptr, strend,
1686 (char *) &result[1], len-1, 'U')) break;
1687 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1690 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1691 if (retlen == (STRLEN) -1 || retlen == 0)
1692 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1697 else if (checksum > bits_in_uv)
1698 cdouble += (NV) auv;
1703 case 's' | TYPE_IS_SHRIEKING:
1704 #if SHORTSIZE != SIZE16
1707 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1708 DO_BO_UNPACK(ashort, s);
1711 else if (checksum > bits_in_uv)
1712 cdouble += (NV)ashort;
1724 #if U16SIZE > SIZE16
1727 SHIFT16(utf8, s, strend, &ai16, datumtype);
1728 DO_BO_UNPACK(ai16, 16);
1729 #if U16SIZE > SIZE16
1735 else if (checksum > bits_in_uv)
1736 cdouble += (NV)ai16;
1741 case 'S' | TYPE_IS_SHRIEKING:
1742 #if SHORTSIZE != SIZE16
1744 unsigned short aushort;
1745 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1746 DO_BO_UNPACK(aushort, s);
1749 else if (checksum > bits_in_uv)
1750 cdouble += (NV)aushort;
1763 #if U16SIZE > SIZE16
1766 SHIFT16(utf8, s, strend, &au16, datumtype);
1767 DO_BO_UNPACK(au16, 16);
1769 if (datumtype == 'n')
1770 au16 = PerlSock_ntohs(au16);
1773 if (datumtype == 'v')
1778 else if (checksum > bits_in_uv)
1779 cdouble += (NV) au16;
1784 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1785 case 'v' | TYPE_IS_SHRIEKING:
1786 case 'n' | TYPE_IS_SHRIEKING:
1789 # if U16SIZE > SIZE16
1792 SHIFT16(utf8, s, strend, &ai16, datumtype);
1794 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1795 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1796 # endif /* HAS_NTOHS */
1798 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1799 ai16 = (I16) vtohs((U16) ai16);
1800 # endif /* HAS_VTOHS */
1803 else if (checksum > bits_in_uv)
1804 cdouble += (NV) ai16;
1809 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1811 case 'i' | TYPE_IS_SHRIEKING:
1814 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1815 DO_BO_UNPACK(aint, i);
1818 else if (checksum > bits_in_uv)
1819 cdouble += (NV)aint;
1825 case 'I' | TYPE_IS_SHRIEKING:
1828 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1829 DO_BO_UNPACK(auint, i);
1832 else if (checksum > bits_in_uv)
1833 cdouble += (NV)auint;
1841 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1842 #if IVSIZE == INTSIZE
1843 DO_BO_UNPACK(aiv, i);
1844 #elif IVSIZE == LONGSIZE
1845 DO_BO_UNPACK(aiv, l);
1846 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1847 DO_BO_UNPACK(aiv, 64);
1849 Perl_croak(aTHX_ "'j' not supported on this platform");
1853 else if (checksum > bits_in_uv)
1862 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1863 #if IVSIZE == INTSIZE
1864 DO_BO_UNPACK(auv, i);
1865 #elif IVSIZE == LONGSIZE
1866 DO_BO_UNPACK(auv, l);
1867 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1868 DO_BO_UNPACK(auv, 64);
1870 Perl_croak(aTHX_ "'J' not supported on this platform");
1874 else if (checksum > bits_in_uv)
1880 case 'l' | TYPE_IS_SHRIEKING:
1881 #if LONGSIZE != SIZE32
1884 SHIFT_VAR(utf8, s, strend, along, datumtype);
1885 DO_BO_UNPACK(along, l);
1888 else if (checksum > bits_in_uv)
1889 cdouble += (NV)along;
1900 #if U32SIZE > SIZE32
1903 SHIFT32(utf8, s, strend, &ai32, datumtype);
1904 DO_BO_UNPACK(ai32, 32);
1905 #if U32SIZE > SIZE32
1906 if (ai32 > 2147483647) ai32 -= 4294967296;
1910 else if (checksum > bits_in_uv)
1911 cdouble += (NV)ai32;
1916 case 'L' | TYPE_IS_SHRIEKING:
1917 #if LONGSIZE != SIZE32
1919 unsigned long aulong;
1920 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1921 DO_BO_UNPACK(aulong, l);
1924 else if (checksum > bits_in_uv)
1925 cdouble += (NV)aulong;
1938 #if U32SIZE > SIZE32
1941 SHIFT32(utf8, s, strend, &au32, datumtype);
1942 DO_BO_UNPACK(au32, 32);
1944 if (datumtype == 'N')
1945 au32 = PerlSock_ntohl(au32);
1948 if (datumtype == 'V')
1953 else if (checksum > bits_in_uv)
1954 cdouble += (NV)au32;
1959 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1960 case 'V' | TYPE_IS_SHRIEKING:
1961 case 'N' | TYPE_IS_SHRIEKING:
1964 # if U32SIZE > SIZE32
1967 SHIFT32(utf8, s, strend, &ai32, datumtype);
1969 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1970 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1973 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1974 ai32 = (I32)vtohl((U32)ai32);
1978 else if (checksum > bits_in_uv)
1979 cdouble += (NV)ai32;
1984 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1988 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1989 DO_BO_UNPACK_PC(aptr);
1990 /* newSVpv generates undef if aptr is NULL */
1991 mPUSHs(newSVpv(aptr, 0));
1999 while (len > 0 && s < strend) {
2001 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2002 auv = (auv << 7) | (ch & 0x7f);
2003 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2011 if (++bytes >= sizeof(UV)) { /* promote to string */
2014 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2015 while (s < strend) {
2016 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2017 sv = mul128(sv, (U8)(ch & 0x7f));
2023 t = SvPV_nolen_const(sv);
2032 if ((s >= strend) && bytes)
2033 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2037 if (symptr->howlen == e_star)
2038 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2040 if (s + sizeof(char*) <= strend) {
2042 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2043 DO_BO_UNPACK_PC(aptr);
2044 /* newSVpvn generates undef if aptr is NULL */
2045 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2052 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2053 DO_BO_UNPACK(aquad, 64);
2055 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2056 newSViv((IV)aquad) : newSVnv((NV)aquad));
2057 else if (checksum > bits_in_uv)
2058 cdouble += (NV)aquad;
2066 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2067 DO_BO_UNPACK(auquad, 64);
2069 mPUSHs(auquad <= UV_MAX ?
2070 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2071 else if (checksum > bits_in_uv)
2072 cdouble += (NV)auquad;
2077 #endif /* HAS_QUAD */
2078 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2082 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2083 DO_BO_UNPACK_N(afloat, float);
2093 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2094 DO_BO_UNPACK_N(adouble, double);
2104 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2105 DO_BO_UNPACK_N(anv.nv, NV);
2112 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2116 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2117 DO_BO_UNPACK_N(aldouble.ld, long double);
2119 mPUSHn(aldouble.ld);
2121 cdouble += aldouble.ld;
2127 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2128 sv = sv_2mortal(newSV(l));
2129 if (l) SvPOK_on(sv);
2132 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2137 next_uni_uu(aTHX_ &s, strend, &a);
2138 next_uni_uu(aTHX_ &s, strend, &b);
2139 next_uni_uu(aTHX_ &s, strend, &c);
2140 next_uni_uu(aTHX_ &s, strend, &d);
2141 hunk[0] = (char)((a << 2) | (b >> 4));
2142 hunk[1] = (char)((b << 4) | (c >> 2));
2143 hunk[2] = (char)((c << 6) | d);
2144 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2152 /* possible checksum byte */
2153 const char *skip = s+UTF8SKIP(s);
2154 if (skip < strend && *skip == '\n')
2160 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2164 len = PL_uudmap[*(U8*)s++] & 077;
2166 if (s < strend && ISUUCHAR(*s))
2167 a = PL_uudmap[*(U8*)s++] & 077;
2170 if (s < strend && ISUUCHAR(*s))
2171 b = PL_uudmap[*(U8*)s++] & 077;
2174 if (s < strend && ISUUCHAR(*s))
2175 c = PL_uudmap[*(U8*)s++] & 077;
2178 if (s < strend && ISUUCHAR(*s))
2179 d = PL_uudmap[*(U8*)s++] & 077;
2182 hunk[0] = (char)((a << 2) | (b >> 4));
2183 hunk[1] = (char)((b << 4) | (c >> 2));
2184 hunk[2] = (char)((c << 6) | d);
2185 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2190 else /* possible checksum byte */
2191 if (s + 1 < strend && s[1] == '\n')
2200 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2201 (checksum > bits_in_uv &&
2202 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2205 anv = (NV) (1 << (checksum & 15));
2206 while (checksum >= 16) {
2210 while (cdouble < 0.0)
2212 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2213 sv = newSVnv(cdouble);
2216 if (checksum < bits_in_uv) {
2217 UV mask = ((UV)1 << checksum) - 1;
2226 if (symptr->flags & FLAG_SLASH){
2227 if (SP - PL_stack_base - start_sp_offset <= 0)
2228 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2229 if( next_symbol(symptr) ){
2230 if( symptr->howlen == e_number )
2231 Perl_croak(aTHX_ "Count after length/code in unpack" );
2233 /* ...end of char buffer then no decent length available */
2234 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2236 /* take top of stack (hope it's numeric) */
2239 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2242 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2244 datumtype = symptr->code;
2245 explicit_length = FALSE;
2253 return SP - PL_stack_base - start_sp_offset;
2261 I32 gimme = GIMME_V;
2264 const char *pat = SvPV_const(left, llen);
2265 const char *s = SvPV_const(right, rlen);
2266 const char *strend = s + rlen;
2267 const char *patend = pat + llen;
2271 cnt = unpackstring(pat, patend, s, strend,
2272 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2273 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2276 if ( !cnt && gimme == G_SCALAR )
2277 PUSHs(&PL_sv_undef);
2282 doencodes(U8 *h, const char *s, I32 len)
2284 *h++ = PL_uuemap[len];
2286 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2287 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2288 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2289 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2294 const char r = (len > 1 ? s[1] : '\0');
2295 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2296 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2297 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2298 *h++ = PL_uuemap[0];
2305 S_is_an_int(pTHX_ const char *s, STRLEN l)
2307 SV *result = newSVpvn(s, l);
2308 char *const result_c = SvPV_nolen(result); /* convenience */
2309 char *out = result_c;
2313 PERL_ARGS_ASSERT_IS_AN_INT;
2321 SvREFCNT_dec(result);
2344 SvREFCNT_dec(result);
2350 SvCUR_set(result, out - result_c);
2354 /* pnum must be '\0' terminated */
2356 S_div128(pTHX_ SV *pnum, bool *done)
2359 char * const s = SvPV(pnum, len);
2363 PERL_ARGS_ASSERT_DIV128;
2367 const int i = m * 10 + (*t - '0');
2368 const int r = (i >> 7); /* r < 10 */
2376 SvCUR_set(pnum, (STRLEN) (t - s));
2381 =for apidoc packlist
2383 The engine implementing pack() Perl function.
2389 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2394 PERL_ARGS_ASSERT_PACKLIST;
2396 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2398 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2399 Also make sure any UTF8 flag is loaded */
2400 SvPV_force_nolen(cat);
2402 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2404 (void)pack_rec( cat, &sym, beglist, endlist );
2407 /* like sv_utf8_upgrade, but also repoint the group start markers */
2409 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2412 const char *from_ptr, *from_start, *from_end, **marks, **m;
2413 char *to_start, *to_ptr;
2415 if (SvUTF8(sv)) return;
2417 from_start = SvPVX_const(sv);
2418 from_end = from_start + SvCUR(sv);
2419 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2420 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2421 if (from_ptr == from_end) {
2422 /* Simple case: no character needs to be changed */
2427 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2428 Newx(to_start, len, char);
2429 Copy(from_start, to_start, from_ptr-from_start, char);
2430 to_ptr = to_start + (from_ptr-from_start);
2432 Newx(marks, sym_ptr->level+2, const char *);
2433 for (group=sym_ptr; group; group = group->previous)
2434 marks[group->level] = from_start + group->strbeg;
2435 marks[sym_ptr->level+1] = from_end+1;
2436 for (m = marks; *m < from_ptr; m++)
2437 *m = to_start + (*m-from_start);
2439 for (;from_ptr < from_end; from_ptr++) {
2440 while (*m == from_ptr) *m++ = to_ptr;
2441 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2445 while (*m == from_ptr) *m++ = to_ptr;
2446 if (m != marks + sym_ptr->level+1) {
2449 Perl_croak(aTHX_ "panic: marks beyond string end");
2451 for (group=sym_ptr; group; group = group->previous)
2452 group->strbeg = marks[group->level] - to_start;
2457 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2458 from_start -= SvIVX(sv);
2461 SvFLAGS(sv) &= ~SVf_OOK;
2464 Safefree(from_start);
2465 SvPV_set(sv, to_start);
2466 SvCUR_set(sv, to_ptr - to_start);
2471 /* Exponential string grower. Makes string extension effectively O(n)
2472 needed says how many extra bytes we need (not counting the final '\0')
2473 Only grows the string if there is an actual lack of space
2476 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2477 const STRLEN cur = SvCUR(sv);
2478 const STRLEN len = SvLEN(sv);
2481 PERL_ARGS_ASSERT_SV_EXP_GROW;
2483 if (len - cur > needed) return SvPVX(sv);
2484 extend = needed > len ? needed : len;
2485 return SvGROW(sv, len+extend+1);
2490 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2493 tempsym_t lookahead;
2494 I32 items = endlist - beglist;
2495 bool found = next_symbol(symptr);
2496 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2497 bool warn_utf8 = ckWARN(WARN_UTF8);
2499 PERL_ARGS_ASSERT_PACK_REC;
2501 if (symptr->level == 0 && found && symptr->code == 'U') {
2502 marked_upgrade(aTHX_ cat, symptr);
2503 symptr->flags |= FLAG_DO_UTF8;
2506 symptr->strbeg = SvCUR(cat);
2512 SV *lengthcode = NULL;
2513 I32 datumtype = symptr->code;
2514 howlen_t howlen = symptr->howlen;
2515 char *start = SvPVX(cat);
2516 char *cur = start + SvCUR(cat);
2518 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2522 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2526 /* e_no_len and e_number */
2527 len = symptr->length;
2532 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2534 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2535 /* We can process this letter. */
2536 STRLEN size = props & PACK_SIZE_MASK;
2537 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2541 /* Look ahead for next symbol. Do we have code/code? */
2542 lookahead = *symptr;
2543 found = next_symbol(&lookahead);
2544 if (symptr->flags & FLAG_SLASH) {
2546 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2547 if (strchr("aAZ", lookahead.code)) {
2548 if (lookahead.howlen == e_number) count = lookahead.length;
2551 if (SvGAMAGIC(*beglist)) {
2552 /* Avoid reading the active data more than once
2553 by copying it to a temporary. */
2555 const char *const pv = SvPV_const(*beglist, len);
2557 = newSVpvn_flags(pv, len,
2558 SVs_TEMP | SvUTF8(*beglist));
2561 count = DO_UTF8(*beglist) ?
2562 sv_len_utf8(*beglist) : sv_len(*beglist);
2565 if (lookahead.code == 'Z') count++;
2568 if (lookahead.howlen == e_number && lookahead.length < items)
2569 count = lookahead.length;
2572 lookahead.howlen = e_number;
2573 lookahead.length = count;
2574 lengthcode = sv_2mortal(newSViv(count));
2577 /* Code inside the switch must take care to properly update
2578 cat (CUR length and '\0' termination) if it updated *cur and
2579 doesn't simply leave using break */
2580 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2582 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2583 (int) TYPE_NO_MODIFIERS(datumtype));
2585 Perl_croak(aTHX_ "'%%' may not be used in pack");
2588 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2589 case '.' | TYPE_IS_SHRIEKING:
2592 if (howlen == e_star) from = start;
2593 else if (len == 0) from = cur;
2595 tempsym_t *group = symptr;
2597 while (--len && group) group = group->previous;
2598 from = group ? start + group->strbeg : start;
2601 len = SvIV(fromstr);
2603 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2604 case '@' | TYPE_IS_SHRIEKING:
2607 from = start + symptr->strbeg;
2609 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2610 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2611 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2615 while (len && from < cur) {
2616 from += UTF8SKIP(from);
2620 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2622 /* Here we know from == cur */
2624 GROWING(0, cat, start, cur, len);
2625 Zero(cur, len, char);
2627 } else if (from < cur) {
2630 } else goto no_change;
2638 if (len > 0) goto grow;
2639 if (len == 0) goto no_change;
2646 tempsym_t savsym = *symptr;
2647 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2648 symptr->flags |= group_modifiers;
2649 symptr->patend = savsym.grpend;
2651 symptr->previous = &lookahead;
2654 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2655 else symptr->flags &= ~FLAG_PARSE_UTF8;
2656 was_utf8 = SvUTF8(cat);
2657 symptr->patptr = savsym.grpbeg;
2658 beglist = pack_rec(cat, symptr, beglist, endlist);
2659 if (SvUTF8(cat) != was_utf8)
2660 /* This had better be an upgrade while in utf8==0 mode */
2663 if (savsym.howlen == e_star && beglist == endlist)
2664 break; /* No way to continue */
2666 items = endlist - beglist;
2667 lookahead.flags = symptr->flags & ~group_modifiers;
2670 case 'X' | TYPE_IS_SHRIEKING:
2671 if (!len) /* Avoid division by 0 */
2678 hop += UTF8SKIP(hop);
2685 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2689 len = (cur-start) % len;
2693 if (len < 1) goto no_change;
2697 Perl_croak(aTHX_ "'%c' outside of string in pack",
2698 (int) TYPE_NO_MODIFIERS(datumtype));
2699 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2701 Perl_croak(aTHX_ "'%c' outside of string in pack",
2702 (int) TYPE_NO_MODIFIERS(datumtype));
2708 if (cur - start < len)
2709 Perl_croak(aTHX_ "'%c' outside of string in pack",
2710 (int) TYPE_NO_MODIFIERS(datumtype));
2713 if (cur < start+symptr->strbeg) {
2714 /* Make sure group starts don't point into the void */
2716 const STRLEN length = cur-start;
2717 for (group = symptr;
2718 group && length < group->strbeg;
2719 group = group->previous) group->strbeg = length;
2720 lookahead.strbeg = length;
2723 case 'x' | TYPE_IS_SHRIEKING: {
2725 if (!len) /* Avoid division by 0 */
2727 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2728 else ai32 = (cur - start) % len;
2729 if (ai32 == 0) goto no_change;
2741 aptr = SvPV_const(fromstr, fromlen);
2742 if (DO_UTF8(fromstr)) {
2743 const char *end, *s;
2745 if (!utf8 && !SvUTF8(cat)) {
2746 marked_upgrade(aTHX_ cat, symptr);
2747 lookahead.flags |= FLAG_DO_UTF8;
2748 lookahead.strbeg = symptr->strbeg;
2751 cur = start + SvCUR(cat);
2753 if (howlen == e_star) {
2754 if (utf8) goto string_copy;
2758 end = aptr + fromlen;
2759 fromlen = datumtype == 'Z' ? len-1 : len;
2760 while ((I32) fromlen > 0 && s < end) {
2765 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2768 if (datumtype == 'Z') len++;
2774 fromlen = len - fromlen;
2775 if (datumtype == 'Z') fromlen--;
2776 if (howlen == e_star) {
2778 if (datumtype == 'Z') len++;
2780 GROWING(0, cat, start, cur, len);
2781 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2782 datumtype | TYPE_IS_PACK))
2783 Perl_croak(aTHX_ "panic: predicted utf8 length not available");
2787 if (howlen == e_star) {
2789 if (datumtype == 'Z') len++;
2791 if (len <= (I32) fromlen) {
2793 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2795 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2797 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2798 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2800 while (fromlen > 0) {
2801 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2807 if (howlen == e_star) {
2809 if (datumtype == 'Z') len++;
2811 if (len <= (I32) fromlen) {
2813 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2815 GROWING(0, cat, start, cur, len);
2816 Copy(aptr, cur, fromlen, char);
2820 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2827 const char *str, *end;
2834 str = SvPV_const(fromstr, fromlen);
2835 end = str + fromlen;
2836 if (DO_UTF8(fromstr)) {
2838 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2840 utf8_source = FALSE;
2841 utf8_flags = 0; /* Unused, but keep compilers happy */
2843 if (howlen == e_star) len = fromlen;
2844 field_len = (len+7)/8;
2845 GROWING(utf8, cat, start, cur, field_len);
2846 if (len > (I32)fromlen) len = fromlen;
2849 if (datumtype == 'B')
2853 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2855 } else bits |= *str++ & 1;
2856 if (l & 7) bits <<= 1;
2858 PUSH_BYTE(utf8, cur, bits);
2863 /* datumtype == 'b' */
2867 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2868 if (val & 1) bits |= 0x80;
2869 } else if (*str++ & 1)
2871 if (l & 7) bits >>= 1;
2873 PUSH_BYTE(utf8, cur, bits);
2879 if (datumtype == 'B')
2880 bits <<= 7 - (l & 7);
2882 bits >>= 7 - (l & 7);
2883 PUSH_BYTE(utf8, cur, bits);
2886 /* Determine how many chars are left in the requested field */
2888 if (howlen == e_star) field_len = 0;
2889 else field_len -= l;
2890 Zero(cur, field_len, char);
2896 const char *str, *end;
2903 str = SvPV_const(fromstr, fromlen);
2904 end = str + fromlen;
2905 if (DO_UTF8(fromstr)) {
2907 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2909 utf8_source = FALSE;
2910 utf8_flags = 0; /* Unused, but keep compilers happy */
2912 if (howlen == e_star) len = fromlen;
2913 field_len = (len+1)/2;
2914 GROWING(utf8, cat, start, cur, field_len);
2915 if (!utf8 && len > (I32)fromlen) len = fromlen;
2918 if (datumtype == 'H')
2922 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2923 if (val < 256 && isALPHA(val))
2924 bits |= (val + 9) & 0xf;
2927 } else if (isALPHA(*str))
2928 bits |= (*str++ + 9) & 0xf;
2930 bits |= *str++ & 0xf;
2931 if (l & 1) bits <<= 4;
2933 PUSH_BYTE(utf8, cur, bits);
2941 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2942 if (val < 256 && isALPHA(val))
2943 bits |= ((val + 9) & 0xf) << 4;
2945 bits |= (val & 0xf) << 4;
2946 } else if (isALPHA(*str))
2947 bits |= ((*str++ + 9) & 0xf) << 4;
2949 bits |= (*str++ & 0xf) << 4;
2950 if (l & 1) bits >>= 4;
2952 PUSH_BYTE(utf8, cur, bits);
2958 PUSH_BYTE(utf8, cur, bits);
2961 /* Determine how many chars are left in the requested field */
2963 if (howlen == e_star) field_len = 0;
2964 else field_len -= l;
2965 Zero(cur, field_len, char);
2973 aiv = SvIV(fromstr);
2974 if ((-128 > aiv || aiv > 127))
2975 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2976 "Character in 'c' format wrapped in pack");
2977 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2982 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2988 aiv = SvIV(fromstr);
2989 if ((0 > aiv || aiv > 0xff))
2990 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2991 "Character in 'C' format wrapped in pack");
2992 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2997 U8 in_bytes = (U8)IN_BYTES;
2999 end = start+SvLEN(cat)-1;
3000 if (utf8) end -= UTF8_MAXLEN-1;
3004 auv = SvUV(fromstr);
3005 if (in_bytes) auv = auv % 0x100;
3010 SvCUR_set(cat, cur - start);
3012 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3013 end = start+SvLEN(cat)-UTF8_MAXLEN;
3015 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3018 0 : UNICODE_ALLOW_ANY);
3023 SvCUR_set(cat, cur - start);
3024 marked_upgrade(aTHX_ cat, symptr);
3025 lookahead.flags |= FLAG_DO_UTF8;
3026 lookahead.strbeg = symptr->strbeg;
3029 cur = start + SvCUR(cat);
3030 end = start+SvLEN(cat)-UTF8_MAXLEN;
3033 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3034 "Character in 'W' format wrapped in pack");
3039 SvCUR_set(cat, cur - start);
3040 GROWING(0, cat, start, cur, len+1);
3041 end = start+SvLEN(cat)-1;
3043 *(U8 *) cur++ = (U8)auv;
3052 if (!(symptr->flags & FLAG_DO_UTF8)) {
3053 marked_upgrade(aTHX_ cat, symptr);
3054 lookahead.flags |= FLAG_DO_UTF8;
3055 lookahead.strbeg = symptr->strbeg;
3061 end = start+SvLEN(cat);
3062 if (!utf8) end -= UTF8_MAXLEN;
3066 auv = SvUV(fromstr);
3068 U8 buffer[UTF8_MAXLEN], *endb;
3069 endb = uvuni_to_utf8_flags(buffer, auv,
3071 0 : UNICODE_ALLOW_ANY);
3072 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3074 SvCUR_set(cat, cur - start);
3075 GROWING(0, cat, start, cur,
3076 len+(endb-buffer)*UTF8_EXPAND);
3077 end = start+SvLEN(cat);
3079 cur = bytes_to_uni(buffer, endb-buffer, cur);
3083 SvCUR_set(cat, cur - start);
3084 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3085 end = start+SvLEN(cat)-UTF8_MAXLEN;
3087 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3089 0 : UNICODE_ALLOW_ANY);
3094 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3100 anv = SvNV(fromstr);
3102 /* VOS does not automatically map a floating-point overflow
3103 during conversion from double to float into infinity, so we
3104 do it by hand. This code should either be generalized for
3105 any OS that needs it, or removed if and when VOS implements
3106 posix-976 (suggestion to support mapping to infinity).
3107 Paul.Green@stratus.com 02-04-02. */
3109 extern const float _float_constants[];
3111 afloat = _float_constants[0]; /* single prec. inf. */
3112 else if (anv < -FLT_MAX)
3113 afloat = _float_constants[0]; /* single prec. inf. */
3114 else afloat = (float) anv;
3117 # if defined(VMS) && !defined(__IEEE_FP)
3118 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3119 * on Alpha; fake it if we don't have them.
3123 else if (anv < -FLT_MAX)
3125 else afloat = (float)anv;
3127 afloat = (float)anv;
3129 #endif /* __VOS__ */
3130 DO_BO_PACK_N(afloat, float);
3131 PUSH_VAR(utf8, cur, afloat);
3139 anv = SvNV(fromstr);
3141 /* VOS does not automatically map a floating-point overflow
3142 during conversion from long double to double into infinity,
3143 so we do it by hand. This code should either be generalized
3144 for any OS that needs it, or removed if and when VOS
3145 implements posix-976 (suggestion to support mapping to
3146 infinity). Paul.Green@stratus.com 02-04-02. */
3148 extern const double _double_constants[];
3150 adouble = _double_constants[0]; /* double prec. inf. */
3151 else if (anv < -DBL_MAX)
3152 adouble = _double_constants[0]; /* double prec. inf. */
3153 else adouble = (double) anv;
3156 # if defined(VMS) && !defined(__IEEE_FP)
3157 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3158 * on Alpha; fake it if we don't have them.
3162 else if (anv < -DBL_MAX)
3164 else adouble = (double)anv;
3166 adouble = (double)anv;
3168 #endif /* __VOS__ */
3169 DO_BO_PACK_N(adouble, double);
3170 PUSH_VAR(utf8, cur, adouble);
3175 Zero(&anv, 1, NV); /* can be long double with unused bits */
3178 anv.nv = SvNV(fromstr);
3179 DO_BO_PACK_N(anv, NV);
3180 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
3184 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3187 /* long doubles can have unused bits, which may be nonzero */
3188 Zero(&aldouble, 1, long double);
3191 aldouble.ld = (long double)SvNV(fromstr);
3192 DO_BO_PACK_N(aldouble, long double);
3193 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
3198 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3199 case 'n' | TYPE_IS_SHRIEKING:
3205 ai16 = (I16)SvIV(fromstr);
3207 ai16 = PerlSock_htons(ai16);
3209 PUSH16(utf8, cur, &ai16);
3212 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3213 case 'v' | TYPE_IS_SHRIEKING:
3219 ai16 = (I16)SvIV(fromstr);
3223 PUSH16(utf8, cur, &ai16);
3226 case 'S' | TYPE_IS_SHRIEKING:
3227 #if SHORTSIZE != SIZE16
3229 unsigned short aushort;
3231 aushort = SvUV(fromstr);
3232 DO_BO_PACK(aushort, s);
3233 PUSH_VAR(utf8, cur, aushort);
3243 au16 = (U16)SvUV(fromstr);
3244 DO_BO_PACK(au16, 16);
3245 PUSH16(utf8, cur, &au16);
3248 case 's' | TYPE_IS_SHRIEKING:
3249 #if SHORTSIZE != SIZE16
3253 ashort = SvIV(fromstr);
3254 DO_BO_PACK(ashort, s);
3255 PUSH_VAR(utf8, cur, ashort);
3265 ai16 = (I16)SvIV(fromstr);
3266 DO_BO_PACK(ai16, 16);
3267 PUSH16(utf8, cur, &ai16);
3271 case 'I' | TYPE_IS_SHRIEKING:
3275 auint = SvUV(fromstr);
3276 DO_BO_PACK(auint, i);
3277 PUSH_VAR(utf8, cur, auint);
3284 aiv = SvIV(fromstr);
3285 #if IVSIZE == INTSIZE
3287 #elif IVSIZE == LONGSIZE
3289 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3290 DO_BO_PACK(aiv, 64);
3292 Perl_croak(aTHX_ "'j' not supported on this platform");
3294 PUSH_VAR(utf8, cur, aiv);
3301 auv = SvUV(fromstr);
3302 #if UVSIZE == INTSIZE
3304 #elif UVSIZE == LONGSIZE
3306 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3307 DO_BO_PACK(auv, 64);
3309 Perl_croak(aTHX_ "'J' not supported on this platform");
3311 PUSH_VAR(utf8, cur, auv);
3318 anv = SvNV(fromstr);
3322 SvCUR_set(cat, cur - start);
3323 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3326 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3327 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3328 any negative IVs will have already been got by the croak()
3329 above. IOK is untrue for fractions, so we test them
3330 against UV_MAX_P1. */
3331 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3332 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3333 char *in = buf + sizeof(buf);
3334 UV auv = SvUV(fromstr);
3337 *--in = (char)((auv & 0x7f) | 0x80);
3340 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3341 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3342 in, (buf + sizeof(buf)) - in);
3343 } else if (SvPOKp(fromstr))
3345 else if (SvNOKp(fromstr)) {
3346 /* 10**NV_MAX_10_EXP is the largest power of 10
3347 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3348 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3349 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3350 And with that many bytes only Inf can overflow.
3351 Some C compilers are strict about integral constant
3352 expressions so we conservatively divide by a slightly
3353 smaller integer instead of multiplying by the exact
3354 floating-point value.
3356 #ifdef NV_MAX_10_EXP
3357 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3358 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3360 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3361 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3363 char *in = buf + sizeof(buf);
3365 anv = Perl_floor(anv);
3367 const NV next = Perl_floor(anv / 128);
3368 if (in <= buf) /* this cannot happen ;-) */
3369 Perl_croak(aTHX_ "Cannot compress integer in pack");
3370 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3373 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3374 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3375 in, (buf + sizeof(buf)) - in);
3384 /* Copy string and check for compliance */
3385 from = SvPV_const(fromstr, len);
3386 if ((norm = is_an_int(from, len)) == NULL)
3387 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3389 Newx(result, len, char);
3392 while (!done) *--in = div128(norm, &done) | 0x80;
3393 result[len - 1] &= 0x7F; /* clear continue bit */
3394 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3395 in, (result + len) - in);
3397 SvREFCNT_dec(norm); /* free norm */
3402 case 'i' | TYPE_IS_SHRIEKING:
3406 aint = SvIV(fromstr);
3407 DO_BO_PACK(aint, i);
3408 PUSH_VAR(utf8, cur, aint);
3411 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3412 case 'N' | TYPE_IS_SHRIEKING:
3418 au32 = SvUV(fromstr);
3420 au32 = PerlSock_htonl(au32);
3422 PUSH32(utf8, cur, &au32);
3425 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3426 case 'V' | TYPE_IS_SHRIEKING:
3432 au32 = SvUV(fromstr);
3436 PUSH32(utf8, cur, &au32);
3439 case 'L' | TYPE_IS_SHRIEKING:
3440 #if LONGSIZE != SIZE32
3442 unsigned long aulong;
3444 aulong = SvUV(fromstr);
3445 DO_BO_PACK(aulong, l);
3446 PUSH_VAR(utf8, cur, aulong);
3456 au32 = SvUV(fromstr);
3457 DO_BO_PACK(au32, 32);
3458 PUSH32(utf8, cur, &au32);
3461 case 'l' | TYPE_IS_SHRIEKING:
3462 #if LONGSIZE != SIZE32
3466 along = SvIV(fromstr);
3467 DO_BO_PACK(along, l);
3468 PUSH_VAR(utf8, cur, along);
3478 ai32 = SvIV(fromstr);
3479 DO_BO_PACK(ai32, 32);
3480 PUSH32(utf8, cur, &ai32);
3488 auquad = (Uquad_t) SvUV(fromstr);
3489 DO_BO_PACK(auquad, 64);
3490 PUSH_VAR(utf8, cur, auquad);
3497 aquad = (Quad_t)SvIV(fromstr);
3498 DO_BO_PACK(aquad, 64);
3499 PUSH_VAR(utf8, cur, aquad);
3502 #endif /* HAS_QUAD */
3504 len = 1; /* assume SV is correct length */
3505 GROWING(utf8, cat, start, cur, sizeof(char *));
3512 SvGETMAGIC(fromstr);
3513 if (!SvOK(fromstr)) aptr = NULL;
3515 /* XXX better yet, could spirit away the string to
3516 * a safe spot and hang on to it until the result
3517 * of pack() (and all copies of the result) are
3520 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3521 !SvREADONLY(fromstr)))) {
3522 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3523 "Attempt to pack pointer to temporary value");
3525 if (SvPOK(fromstr) || SvNIOK(fromstr))
3526 aptr = SvPV_nomg_const_nolen(fromstr);
3528 aptr = SvPV_force_flags_nolen(fromstr, 0);
3530 DO_BO_PACK_PC(aptr);
3531 PUSH_VAR(utf8, cur, aptr);
3535 const char *aptr, *aend;
3539 if (len <= 2) len = 45;
3540 else len = len / 3 * 3;
3542 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3543 "Field too wide in 'u' format in pack");
3546 aptr = SvPV_const(fromstr, fromlen);
3547 from_utf8 = DO_UTF8(fromstr);
3549 aend = aptr + fromlen;
3550 fromlen = sv_len_utf8(fromstr);
3551 } else aend = NULL; /* Unused, but keep compilers happy */
3552 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3553 while (fromlen > 0) {
3556 U8 hunk[1+63/3*4+1];
3558 if ((I32)fromlen > len)
3564 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3565 'u' | TYPE_IS_PACK)) {
3567 SvCUR_set(cat, cur - start);
3568 Perl_croak(aTHX_ "panic: string is shorter than advertised");
3570 end = doencodes(hunk, buffer, todo);
3572 end = doencodes(hunk, aptr, todo);
3575 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3582 SvCUR_set(cat, cur - start);
3584 *symptr = lookahead;
3593 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3594 register SV *cat = TARG;
3596 SV *pat_sv = *++MARK;
3597 register const char *pat = SvPV_const(pat_sv, fromlen);
3598 register const char *patend = pat + fromlen;
3604 packlist(cat, pat, patend, MARK, SP + 1);
3614 * c-indentation-style: bsd
3616 * indent-tabs-mode: t
3619 * ex: set ts=8 sts=4 sw=4 noet: