3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = e_no_len; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV * const tmpNew = newSVpvs("0000000000");
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
243 # define DO_BO_UNPACK_PC(var)
244 # define DO_BO_PACK_PC(var)
246 #else /* PERL_PACK_CAN_BYTEORDER */
248 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
251 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
253 # define DO_BO_UNPACK(var, type) \
255 switch (TYPE_ENDIANNESS(datumtype)) { \
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
262 # define DO_BO_PACK(var, type) \
264 switch (TYPE_ENDIANNESS(datumtype)) { \
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
271 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: \
275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
277 case TYPE_IS_LITTLE_ENDIAN: \
278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
285 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
287 switch (TYPE_ENDIANNESS(datumtype)) { \
288 case TYPE_IS_BIG_ENDIAN: \
289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
291 case TYPE_IS_LITTLE_ENDIAN: \
292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
299 # define BO_CANT_DOIT(action, type) \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
315 # if PTRSIZE == INTSIZE
316 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
320 # elif PTRSIZE == LONGSIZE
321 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
325 # elif PTRSIZE == IVSIZE
326 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
331 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
333 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
337 # if defined(my_htolen) && defined(my_letohn) && \
338 defined(my_htoben) && defined(my_betohn)
339 # define DO_BO_UNPACK_N(var, type) \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
348 # define DO_BO_PACK_N(var, type) \
350 switch (TYPE_ENDIANNESS(datumtype)) { \
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
357 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
361 #endif /* PERL_PACK_CAN_BYTEORDER */
363 #define PACK_SIZE_CANNOT_CSUM 0x80
364 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
365 #define PACK_SIZE_MASK 0x3F
367 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
370 #if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
374 typedef U8 packprops_t;
377 STATIC const packprops_t packprops[512] = {
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
385 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
386 /* D */ LONG_DOUBLESIZE,
393 /* I */ sizeof(unsigned int),
400 #if defined(HAS_QUAD)
401 /* Q */ sizeof(Uquad_t),
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
415 /* f */ sizeof(float),
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
425 #if defined(HAS_QUAD)
426 /* q */ sizeof(Quad_t),
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
452 /* L */ sizeof(unsigned long),
454 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
460 /* S */ sizeof(unsigned short),
462 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 /* l */ sizeof(long),
473 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
479 /* s */ sizeof(short),
481 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
497 /* EBCDIC (or bust) */
498 STATIC const packprops_t packprops[512] = {
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
512 /* f */ sizeof(float),
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
523 #if defined(HAS_QUAD)
524 /* q */ sizeof(Quad_t),
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
536 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
537 /* D */ LONG_DOUBLESIZE,
544 /* I */ sizeof(unsigned int),
552 #if defined(HAS_QUAD)
553 /* Q */ sizeof(Uquad_t),
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
579 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
587 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
599 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
607 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
618 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
630 if (ckWARN(WARN_UNPACK))
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
633 (int) TYPE_NO_MODIFIERS(datumtype));
640 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
645 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
649 const char *from = *s;
651 const U32 flags = ckWARN(WARN_UTF8) ?
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
659 } else from += retlen;
664 *(U8 *)buf++ = (U8)val;
666 /* We have enough characters for the buffer. Did we have problems ? */
669 /* Rewalk the string fragment while warning */
671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
676 if (from > end) from = end;
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
682 (int) TYPE_NO_MODIFIERS(datumtype),
683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
690 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
700 *out = PL_uudmap[val] & 077;
706 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
707 const U8 * const end = start + len;
709 while (start < end) {
710 const UV uv = NATIVE_TO_ASCII(*start);
711 if (UNI_IS_INVARIANT(uv))
712 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
714 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
715 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
722 #define PUSH_BYTES(utf8, cur, buf, len) \
725 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
727 Copy(buf, cur, len, char); \
732 #define GROWING(utf8, cat, start, cur, in_len) \
734 STRLEN glen = (in_len); \
735 if (utf8) glen *= UTF8_EXPAND; \
736 if ((cur) + glen >= (start) + SvLEN(cat)) { \
737 (start) = sv_exp_grow(cat, glen); \
738 (cur) = (start) + SvCUR(cat); \
742 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
744 const STRLEN glen = (in_len); \
746 if (utf8) gl *= UTF8_EXPAND; \
747 if ((cur) + gl >= (start) + SvLEN(cat)) { \
749 SvCUR_set((cat), (cur) - (start)); \
750 (start) = sv_exp_grow(cat, gl); \
751 (cur) = (start) + SvCUR(cat); \
753 PUSH_BYTES(utf8, cur, buf, glen); \
756 #define PUSH_BYTE(utf8, s, byte) \
759 const U8 au8 = (byte); \
760 (s) = bytes_to_uni(&au8, 1, (s)); \
761 } else *(U8 *)(s)++ = (byte); \
764 /* Only to be used inside a loop (see the break) */
765 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
768 if (str >= end) break; \
769 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
770 if (retlen == (STRLEN) -1 || retlen == 0) { \
772 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
777 static const char *_action( const tempsym_t* symptr )
779 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
782 /* Returns the sizeof() struct described by pat */
784 S_measure_struct(pTHX_ tempsym_t* symptr)
788 while (next_symbol(symptr)) {
792 switch (symptr->howlen) {
794 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
798 /* e_no_len and e_number */
799 len = symptr->length;
803 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
806 /* endianness doesn't influence the size of a type */
807 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
809 Perl_croak(aTHX_ "Invalid type '%c' in %s",
810 (int)TYPE_NO_MODIFIERS(symptr->code),
812 #ifdef PERL_PACK_CAN_SHRIEKSIGN
813 case '.' | TYPE_IS_SHRIEKING:
814 case '@' | TYPE_IS_SHRIEKING:
819 case 'U': /* XXXX Is it correct? */
822 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
823 (int) TYPE_NO_MODIFIERS(symptr->code),
830 tempsym_t savsym = *symptr;
831 symptr->patptr = savsym.grpbeg;
832 symptr->patend = savsym.grpend;
833 /* XXXX Theoretically, we need to measure many times at
834 different positions, since the subexpression may contain
835 alignment commands, but be not of aligned length.
836 Need to detect this and croak(). */
837 size = measure_struct(symptr);
841 case 'X' | TYPE_IS_SHRIEKING:
842 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
844 if (!len) /* Avoid division by 0 */
846 len = total % len; /* Assumed: the start is aligned. */
851 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
853 case 'x' | TYPE_IS_SHRIEKING:
854 if (!len) /* Avoid division by 0 */
856 star = total % len; /* Assumed: the start is aligned. */
857 if (star) /* Other portable ways? */
881 size = sizeof(char*);
891 /* locate matching closing parenthesis or bracket
892 * returns char pointer to char after match, or NULL
895 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
897 while (patptr < patend) {
898 const char c = *patptr++;
905 while (patptr < patend && *patptr != '\n')
909 patptr = group_end(patptr, patend, ')') + 1;
911 patptr = group_end(patptr, patend, ']') + 1;
913 Perl_croak(aTHX_ "No group ending character '%c' found in template",
919 /* Convert unsigned decimal number to binary.
920 * Expects a pointer to the first digit and address of length variable
921 * Advances char pointer to 1st non-digit char and returns number
924 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
926 I32 len = *patptr++ - '0';
927 while (isDIGIT(*patptr)) {
928 if (len >= 0x7FFFFFFF/10)
929 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
930 len = (len * 10) + (*patptr++ - '0');
936 /* The marvellous template parsing routine: Using state stored in *symptr,
937 * locates next template code and count
940 S_next_symbol(pTHX_ tempsym_t* symptr )
942 const char* patptr = symptr->patptr;
943 const char* const patend = symptr->patend;
945 symptr->flags &= ~FLAG_SLASH;
947 while (patptr < patend) {
948 if (isSPACE(*patptr))
950 else if (*patptr == '#') {
952 while (patptr < patend && *patptr != '\n')
957 /* We should have found a template code */
958 I32 code = *patptr++ & 0xFF;
959 U32 inherited_modifiers = 0;
961 if (code == ','){ /* grandfather in commas but with a warning */
962 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
963 symptr->flags |= FLAG_COMMA;
964 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
965 "Invalid type ',' in %s", _action( symptr ) );
970 /* for '(', skip to ')' */
972 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
973 Perl_croak(aTHX_ "()-group starts with a count in %s",
975 symptr->grpbeg = patptr;
976 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
977 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
978 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
982 /* look for group modifiers to inherit */
983 if (TYPE_ENDIANNESS(symptr->flags)) {
984 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
985 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
988 /* look for modifiers */
989 while (patptr < patend) {
994 modifier = TYPE_IS_SHRIEKING;
995 allowed = SHRIEKING_ALLOWED_TYPES;
997 #ifdef PERL_PACK_CAN_BYTEORDER
999 modifier = TYPE_IS_BIG_ENDIAN;
1000 allowed = ENDIANNESS_ALLOWED_TYPES;
1003 modifier = TYPE_IS_LITTLE_ENDIAN;
1004 allowed = ENDIANNESS_ALLOWED_TYPES;
1006 #endif /* PERL_PACK_CAN_BYTEORDER */
1016 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1017 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1018 allowed, _action( symptr ) );
1020 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1021 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1022 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1023 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1024 TYPE_ENDIANNESS_MASK)
1025 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1026 *patptr, _action( symptr ) );
1028 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1029 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1030 "Duplicate modifier '%c' after '%c' in %s",
1031 *patptr, (int) TYPE_NO_MODIFIERS(code),
1032 _action( symptr ) );
1039 /* inherit modifiers */
1040 code |= inherited_modifiers;
1042 /* look for count and/or / */
1043 if (patptr < patend) {
1044 if (isDIGIT(*patptr)) {
1045 patptr = get_num( patptr, &symptr->length );
1046 symptr->howlen = e_number;
1048 } else if (*patptr == '*') {
1050 symptr->howlen = e_star;
1052 } else if (*patptr == '[') {
1053 const char* lenptr = ++patptr;
1054 symptr->howlen = e_number;
1055 patptr = group_end( patptr, patend, ']' ) + 1;
1056 /* what kind of [] is it? */
1057 if (isDIGIT(*lenptr)) {
1058 lenptr = get_num( lenptr, &symptr->length );
1059 if( *lenptr != ']' )
1060 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1061 _action( symptr ) );
1063 tempsym_t savsym = *symptr;
1064 symptr->patend = patptr-1;
1065 symptr->patptr = lenptr;
1066 savsym.length = measure_struct(symptr);
1070 symptr->howlen = e_no_len;
1075 while (patptr < patend) {
1076 if (isSPACE(*patptr))
1078 else if (*patptr == '#') {
1080 while (patptr < patend && *patptr != '\n')
1082 if (patptr < patend)
1085 if (*patptr == '/') {
1086 symptr->flags |= FLAG_SLASH;
1088 if (patptr < patend &&
1089 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1090 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1091 _action( symptr ) );
1097 /* at end - no count, no / */
1098 symptr->howlen = e_no_len;
1102 symptr->code = code;
1103 symptr->patptr = patptr;
1107 symptr->patptr = patptr;
1112 There is no way to cleanly handle the case where we should process the
1113 string per byte in its upgraded form while it's really in downgraded form
1114 (e.g. estimates like strend-s as an upper bound for the number of
1115 characters left wouldn't work). So if we foresee the need of this
1116 (pattern starts with U or contains U0), we want to work on the encoded
1117 version of the string. Users are advised to upgrade their pack string
1118 themselves if they need to do a lot of unpacks like this on it
1121 need_utf8(const char *pat, const char *patend)
1124 while (pat < patend) {
1125 if (pat[0] == '#') {
1127 pat = (const char *) memchr(pat, '\n', patend-pat);
1128 if (!pat) return FALSE;
1129 } else if (pat[0] == 'U') {
1130 if (first || pat[1] == '0') return TRUE;
1131 } else first = FALSE;
1138 first_symbol(const char *pat, const char *patend) {
1139 while (pat < patend) {
1140 if (pat[0] != '#') return pat[0];
1142 pat = (const char *) memchr(pat, '\n', patend-pat);
1150 =for apidoc unpackstring
1152 The engine implementing unpack() Perl function. C<unpackstring> puts the
1153 extracted list items on the stack and returns the number of elements.
1154 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1159 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1163 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1164 else if (need_utf8(pat, patend)) {
1165 /* We probably should try to avoid this in case a scalar context call
1166 wouldn't get to the "U0" */
1167 STRLEN len = strend - s;
1168 s = (char *) bytes_to_utf8((U8 *) s, &len);
1171 flags |= FLAG_DO_UTF8;
1174 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1175 flags |= FLAG_PARSE_UTF8;
1177 TEMPSYM_INIT(&sym, pat, patend, flags);
1179 return unpack_rec(&sym, s, s, strend, NULL );
1183 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1187 const I32 start_sp_offset = SP - PL_stack_base;
1193 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1194 bool beyond = FALSE;
1195 bool explicit_length;
1196 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1197 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1198 symptr->strbeg = s - strbeg;
1200 while (next_symbol(symptr)) {
1203 I32 datumtype = symptr->code;
1204 /* do first one only unless in list context
1205 / is implemented by unpacking the count, then popping it from the
1206 stack, so must check that we're not in the middle of a / */
1207 if ( unpack_only_one
1208 && (SP - PL_stack_base == start_sp_offset + 1)
1209 && (datumtype != '/') ) /* XXX can this be omitted */
1212 switch (howlen = symptr->howlen) {
1214 len = strend - strbeg; /* long enough */
1217 /* e_no_len and e_number */
1218 len = symptr->length;
1222 explicit_length = TRUE;
1224 beyond = s >= strend;
1226 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1228 /* props nonzero means we can process this letter. */
1229 const long size = props & PACK_SIZE_MASK;
1230 const long howmany = (strend - s) / size;
1234 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1235 if (len && unpack_only_one) len = 1;
1241 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1243 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1246 if (howlen == e_no_len)
1247 len = 16; /* len is not specified */
1255 tempsym_t savsym = *symptr;
1256 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1257 symptr->flags |= group_modifiers;
1258 symptr->patend = savsym.grpend;
1259 symptr->previous = &savsym;
1263 symptr->patptr = savsym.grpbeg;
1264 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1265 else symptr->flags &= ~FLAG_PARSE_UTF8;
1266 unpack_rec(symptr, s, strbeg, strend, &s);
1267 if (s == strend && savsym.howlen == e_star)
1268 break; /* No way to continue */
1271 savsym.flags = symptr->flags & ~group_modifiers;
1275 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1276 case '.' | TYPE_IS_SHRIEKING:
1281 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1282 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1283 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1284 const bool u8 = utf8;
1286 if (howlen == e_star) from = strbeg;
1287 else if (len <= 0) from = s;
1289 tempsym_t *group = symptr;
1291 while (--len && group) group = group->previous;
1292 from = group ? strbeg + group->strbeg : strbeg;
1295 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1296 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1297 XPUSHs(sv_2mortal(sv));
1300 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1301 case '@' | TYPE_IS_SHRIEKING:
1304 s = strbeg + symptr->strbeg;
1305 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1307 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1313 Perl_croak(aTHX_ "'@' outside of string in unpack");
1318 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1321 Perl_croak(aTHX_ "'@' outside of string in unpack");
1325 case 'X' | TYPE_IS_SHRIEKING:
1326 if (!len) /* Avoid division by 0 */
1329 const char *hop, *last;
1331 hop = last = strbeg;
1333 hop += UTF8SKIP(hop);
1340 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1344 len = (s - strbeg) % len;
1350 Perl_croak(aTHX_ "'X' outside of string in unpack");
1351 while (--s, UTF8_IS_CONTINUATION(*s)) {
1353 Perl_croak(aTHX_ "'X' outside of string in unpack");
1358 if (len > s - strbeg)
1359 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1363 case 'x' | TYPE_IS_SHRIEKING: {
1365 if (!len) /* Avoid division by 0 */
1367 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1368 else ai32 = (s - strbeg) % len;
1369 if (ai32 == 0) break;
1377 Perl_croak(aTHX_ "'x' outside of string in unpack");
1382 if (len > strend - s)
1383 Perl_croak(aTHX_ "'x' outside of string in unpack");
1388 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1394 /* Preliminary length estimate is assumed done in 'W' */
1395 if (len > strend - s) len = strend - s;
1401 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1402 if (hop >= strend) {
1404 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1409 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1411 } else if (len > strend - s)
1414 if (datumtype == 'Z') {
1415 /* 'Z' strips stuff after first null */
1416 const char *ptr, *end;
1418 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1419 sv = newSVpvn(s, ptr-s);
1420 if (howlen == e_star) /* exact for 'Z*' */
1421 len = ptr-s + (ptr != strend ? 1 : 0);
1422 } else if (datumtype == 'A') {
1423 /* 'A' strips both nulls and spaces */
1425 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1426 for (ptr = s+len-1; ptr >= s; ptr--)
1427 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1428 !is_utf8_space((U8 *) ptr)) break;
1429 if (ptr >= s) ptr += UTF8SKIP(ptr);
1432 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434 for (ptr = s+len-1; ptr >= s; ptr--)
1435 if (*ptr != 0 && !isSPACE(*ptr)) break;
1438 sv = newSVpvn(s, ptr-s);
1439 } else sv = newSVpvn(s, len);
1443 /* Undo any upgrade done due to need_utf8() */
1444 if (!(symptr->flags & FLAG_WAS_UTF8))
1445 sv_utf8_downgrade(sv, 0);
1447 XPUSHs(sv_2mortal(sv));
1453 if (howlen == e_star || len > (strend - s) * 8)
1454 len = (strend - s) * 8;
1458 Newxz(PL_bitcount, 256, char);
1459 for (bits = 1; bits < 256; bits++) {
1460 if (bits & 1) PL_bitcount[bits]++;
1461 if (bits & 2) PL_bitcount[bits]++;
1462 if (bits & 4) PL_bitcount[bits]++;
1463 if (bits & 8) PL_bitcount[bits]++;
1464 if (bits & 16) PL_bitcount[bits]++;
1465 if (bits & 32) PL_bitcount[bits]++;
1466 if (bits & 64) PL_bitcount[bits]++;
1467 if (bits & 128) PL_bitcount[bits]++;
1471 while (len >= 8 && s < strend) {
1472 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1477 cuv += PL_bitcount[*(U8 *)s++];
1480 if (len && s < strend) {
1482 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1483 if (datumtype == 'b')
1485 if (bits & 1) cuv++;
1490 if (bits & 0x80) cuv++;
1497 sv = sv_2mortal(newSV(len ? len : 1));
1500 if (datumtype == 'b') {
1502 const I32 ai32 = len;
1503 for (len = 0; len < ai32; len++) {
1504 if (len & 7) bits >>= 1;
1506 if (s >= strend) break;
1507 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1508 } else bits = *(U8 *) s++;
1509 *str++ = bits & 1 ? '1' : '0';
1513 const I32 ai32 = len;
1514 for (len = 0; len < ai32; len++) {
1515 if (len & 7) bits <<= 1;
1517 if (s >= strend) break;
1518 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1519 } else bits = *(U8 *) s++;
1520 *str++ = bits & 0x80 ? '1' : '0';
1524 SvCUR_set(sv, str - SvPVX_const(sv));
1531 /* Preliminary length estimate, acceptable for utf8 too */
1532 if (howlen == e_star || len > (strend - s) * 2)
1533 len = (strend - s) * 2;
1534 sv = sv_2mortal(newSV(len ? len : 1));
1537 if (datumtype == 'h') {
1540 for (len = 0; len < ai32; len++) {
1541 if (len & 1) bits >>= 4;
1543 if (s >= strend) break;
1544 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1545 } else bits = * (U8 *) s++;
1546 *str++ = PL_hexdigit[bits & 15];
1550 const I32 ai32 = len;
1551 for (len = 0; len < ai32; len++) {
1552 if (len & 1) bits <<= 4;
1554 if (s >= strend) break;
1555 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1556 } else bits = *(U8 *) s++;
1557 *str++ = PL_hexdigit[(bits >> 4) & 15];
1561 SvCUR_set(sv, str - SvPVX_const(sv));
1567 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1568 if (aint >= 128) /* fake up signed chars */
1571 PUSHs(sv_2mortal(newSViv((IV)aint)));
1572 else if (checksum > bits_in_uv)
1573 cdouble += (NV)aint;
1582 if (explicit_length && datumtype == 'C')
1583 /* Switch to "character" mode */
1584 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1587 if (datumtype == 'C' ?
1588 (symptr->flags & FLAG_DO_UTF8) &&
1589 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1590 while (len-- > 0 && s < strend) {
1592 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1593 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1594 if (retlen == (STRLEN) -1 || retlen == 0)
1595 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1598 PUSHs(sv_2mortal(newSVuv((UV) val)));
1599 else if (checksum > bits_in_uv)
1600 cdouble += (NV) val;
1604 } else if (!checksum)
1606 const U8 ch = *(U8 *) s++;
1607 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1609 else if (checksum > bits_in_uv)
1610 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1612 while (len-- > 0) cuv += *(U8 *) s++;
1616 if (explicit_length) {
1617 /* Switch to "bytes in UTF-8" mode */
1618 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1620 /* Should be impossible due to the need_utf8() test */
1621 Perl_croak(aTHX_ "U0 mode on a byte string");
1625 if (len > strend - s) len = strend - s;
1627 if (len && unpack_only_one) len = 1;
1631 while (len-- > 0 && s < strend) {
1635 U8 result[UTF8_MAXLEN];
1636 const char *ptr = s;
1638 /* Bug: warns about bad utf8 even if we are short on bytes
1639 and will break out of the loop */
1640 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1643 len = UTF8SKIP(result);
1644 if (!uni_to_bytes(aTHX_ &ptr, strend,
1645 (char *) &result[1], len-1, 'U')) break;
1646 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1649 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1650 if (retlen == (STRLEN) -1 || retlen == 0)
1651 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1655 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1656 else if (checksum > bits_in_uv)
1657 cdouble += (NV) auv;
1662 case 's' | TYPE_IS_SHRIEKING:
1663 #if SHORTSIZE != SIZE16
1666 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1667 DO_BO_UNPACK(ashort, s);
1669 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1670 else if (checksum > bits_in_uv)
1671 cdouble += (NV)ashort;
1683 #if U16SIZE > SIZE16
1686 SHIFT16(utf8, s, strend, &ai16, datumtype);
1687 DO_BO_UNPACK(ai16, 16);
1688 #if U16SIZE > SIZE16
1693 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1694 else if (checksum > bits_in_uv)
1695 cdouble += (NV)ai16;
1700 case 'S' | TYPE_IS_SHRIEKING:
1701 #if SHORTSIZE != SIZE16
1703 unsigned short aushort;
1704 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1705 DO_BO_UNPACK(aushort, s);
1707 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1708 else if (checksum > bits_in_uv)
1709 cdouble += (NV)aushort;
1722 #if U16SIZE > SIZE16
1725 SHIFT16(utf8, s, strend, &au16, datumtype);
1726 DO_BO_UNPACK(au16, 16);
1728 if (datumtype == 'n')
1729 au16 = PerlSock_ntohs(au16);
1732 if (datumtype == 'v')
1736 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1737 else if (checksum > bits_in_uv)
1738 cdouble += (NV) au16;
1743 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1744 case 'v' | TYPE_IS_SHRIEKING:
1745 case 'n' | TYPE_IS_SHRIEKING:
1748 # if U16SIZE > SIZE16
1751 SHIFT16(utf8, s, strend, &ai16, datumtype);
1753 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1754 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1755 # endif /* HAS_NTOHS */
1757 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1758 ai16 = (I16) vtohs((U16) ai16);
1759 # endif /* HAS_VTOHS */
1761 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1762 else if (checksum > bits_in_uv)
1763 cdouble += (NV) ai16;
1768 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1770 case 'i' | TYPE_IS_SHRIEKING:
1773 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1774 DO_BO_UNPACK(aint, i);
1776 PUSHs(sv_2mortal(newSViv((IV)aint)));
1777 else if (checksum > bits_in_uv)
1778 cdouble += (NV)aint;
1784 case 'I' | TYPE_IS_SHRIEKING:
1787 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1788 DO_BO_UNPACK(auint, i);
1790 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1791 else if (checksum > bits_in_uv)
1792 cdouble += (NV)auint;
1800 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1801 #if IVSIZE == INTSIZE
1802 DO_BO_UNPACK(aiv, i);
1803 #elif IVSIZE == LONGSIZE
1804 DO_BO_UNPACK(aiv, l);
1805 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1806 DO_BO_UNPACK(aiv, 64);
1808 Perl_croak(aTHX_ "'j' not supported on this platform");
1811 PUSHs(sv_2mortal(newSViv(aiv)));
1812 else if (checksum > bits_in_uv)
1821 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1822 #if IVSIZE == INTSIZE
1823 DO_BO_UNPACK(auv, i);
1824 #elif IVSIZE == LONGSIZE
1825 DO_BO_UNPACK(auv, l);
1826 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1827 DO_BO_UNPACK(auv, 64);
1829 Perl_croak(aTHX_ "'J' not supported on this platform");
1832 PUSHs(sv_2mortal(newSVuv(auv)));
1833 else if (checksum > bits_in_uv)
1839 case 'l' | TYPE_IS_SHRIEKING:
1840 #if LONGSIZE != SIZE32
1843 SHIFT_VAR(utf8, s, strend, along, datumtype);
1844 DO_BO_UNPACK(along, l);
1846 PUSHs(sv_2mortal(newSViv((IV)along)));
1847 else if (checksum > bits_in_uv)
1848 cdouble += (NV)along;
1859 #if U32SIZE > SIZE32
1862 SHIFT32(utf8, s, strend, &ai32, datumtype);
1863 DO_BO_UNPACK(ai32, 32);
1864 #if U32SIZE > SIZE32
1865 if (ai32 > 2147483647) ai32 -= 4294967296;
1868 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1869 else if (checksum > bits_in_uv)
1870 cdouble += (NV)ai32;
1875 case 'L' | TYPE_IS_SHRIEKING:
1876 #if LONGSIZE != SIZE32
1878 unsigned long aulong;
1879 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1880 DO_BO_UNPACK(aulong, l);
1882 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1883 else if (checksum > bits_in_uv)
1884 cdouble += (NV)aulong;
1897 #if U32SIZE > SIZE32
1900 SHIFT32(utf8, s, strend, &au32, datumtype);
1901 DO_BO_UNPACK(au32, 32);
1903 if (datumtype == 'N')
1904 au32 = PerlSock_ntohl(au32);
1907 if (datumtype == 'V')
1911 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1912 else if (checksum > bits_in_uv)
1913 cdouble += (NV)au32;
1918 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1919 case 'V' | TYPE_IS_SHRIEKING:
1920 case 'N' | TYPE_IS_SHRIEKING:
1923 # if U32SIZE > SIZE32
1926 SHIFT32(utf8, s, strend, &ai32, datumtype);
1928 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1929 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1932 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1933 ai32 = (I32)vtohl((U32)ai32);
1936 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1937 else if (checksum > bits_in_uv)
1938 cdouble += (NV)ai32;
1943 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1947 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1948 DO_BO_UNPACK_PC(aptr);
1949 /* newSVpv generates undef if aptr is NULL */
1950 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1958 while (len > 0 && s < strend) {
1960 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1961 auv = (auv << 7) | (ch & 0x7f);
1962 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1965 PUSHs(sv_2mortal(newSVuv(auv)));
1970 if (++bytes >= sizeof(UV)) { /* promote to string */
1973 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1974 while (s < strend) {
1975 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1976 sv = mul128(sv, (U8)(ch & 0x7f));
1982 t = SvPV_nolen_const(sv);
1986 PUSHs(sv_2mortal(sv));
1991 if ((s >= strend) && bytes)
1992 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1996 if (symptr->howlen == e_star)
1997 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1999 if (s + sizeof(char*) <= strend) {
2001 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2002 DO_BO_UNPACK_PC(aptr);
2003 /* newSVpvn generates undef if aptr is NULL */
2004 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2011 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2012 DO_BO_UNPACK(aquad, 64);
2014 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2015 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2016 else if (checksum > bits_in_uv)
2017 cdouble += (NV)aquad;
2025 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2026 DO_BO_UNPACK(auquad, 64);
2028 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2029 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2030 else if (checksum > bits_in_uv)
2031 cdouble += (NV)auquad;
2036 #endif /* HAS_QUAD */
2037 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2041 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2042 DO_BO_UNPACK_N(afloat, float);
2044 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2052 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2053 DO_BO_UNPACK_N(adouble, double);
2055 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2063 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2064 DO_BO_UNPACK_N(anv, NV);
2066 PUSHs(sv_2mortal(newSVnv(anv)));
2071 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2074 long double aldouble;
2075 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2076 DO_BO_UNPACK_N(aldouble, long double);
2078 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2080 cdouble += aldouble;
2086 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2087 sv = sv_2mortal(newSV(l));
2088 if (l) SvPOK_on(sv);
2091 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2096 next_uni_uu(aTHX_ &s, strend, &a);
2097 next_uni_uu(aTHX_ &s, strend, &b);
2098 next_uni_uu(aTHX_ &s, strend, &c);
2099 next_uni_uu(aTHX_ &s, strend, &d);
2100 hunk[0] = (char)((a << 2) | (b >> 4));
2101 hunk[1] = (char)((b << 4) | (c >> 2));
2102 hunk[2] = (char)((c << 6) | d);
2103 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2111 /* possible checksum byte */
2112 const char *skip = s+UTF8SKIP(s);
2113 if (skip < strend && *skip == '\n')
2119 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2123 len = PL_uudmap[*(U8*)s++] & 077;
2125 if (s < strend && ISUUCHAR(*s))
2126 a = PL_uudmap[*(U8*)s++] & 077;
2129 if (s < strend && ISUUCHAR(*s))
2130 b = PL_uudmap[*(U8*)s++] & 077;
2133 if (s < strend && ISUUCHAR(*s))
2134 c = PL_uudmap[*(U8*)s++] & 077;
2137 if (s < strend && ISUUCHAR(*s))
2138 d = PL_uudmap[*(U8*)s++] & 077;
2141 hunk[0] = (char)((a << 2) | (b >> 4));
2142 hunk[1] = (char)((b << 4) | (c >> 2));
2143 hunk[2] = (char)((c << 6) | d);
2144 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2149 else /* possible checksum byte */
2150 if (s + 1 < strend && s[1] == '\n')
2159 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2160 (checksum > bits_in_uv &&
2161 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2164 anv = (NV) (1 << (checksum & 15));
2165 while (checksum >= 16) {
2169 while (cdouble < 0.0)
2171 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2172 sv = newSVnv(cdouble);
2175 if (checksum < bits_in_uv) {
2176 UV mask = ((UV)1 << checksum) - 1;
2181 XPUSHs(sv_2mortal(sv));
2185 if (symptr->flags & FLAG_SLASH){
2186 if (SP - PL_stack_base - start_sp_offset <= 0)
2187 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2188 if( next_symbol(symptr) ){
2189 if( symptr->howlen == e_number )
2190 Perl_croak(aTHX_ "Count after length/code in unpack" );
2192 /* ...end of char buffer then no decent length available */
2193 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2195 /* take top of stack (hope it's numeric) */
2198 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2201 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2203 datumtype = symptr->code;
2204 explicit_length = FALSE;
2212 return SP - PL_stack_base - start_sp_offset;
2220 I32 gimme = GIMME_V;
2223 const char *pat = SvPV_const(left, llen);
2224 const char *s = SvPV_const(right, rlen);
2225 const char *strend = s + rlen;
2226 const char *patend = pat + llen;
2230 cnt = unpackstring(pat, patend, s, strend,
2231 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2232 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2235 if ( !cnt && gimme == G_SCALAR )
2236 PUSHs(&PL_sv_undef);
2241 doencodes(U8 *h, const char *s, I32 len)
2243 *h++ = PL_uuemap[len];
2245 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2246 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2247 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2248 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2253 const char r = (len > 1 ? s[1] : '\0');
2254 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2255 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2256 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2257 *h++ = PL_uuemap[0];
2264 S_is_an_int(pTHX_ const char *s, STRLEN l)
2266 SV *result = newSVpvn(s, l);
2267 char *const result_c = SvPV_nolen(result); /* convenience */
2268 char *out = result_c;
2278 SvREFCNT_dec(result);
2301 SvREFCNT_dec(result);
2307 SvCUR_set(result, out - result_c);
2311 /* pnum must be '\0' terminated */
2313 S_div128(pTHX_ SV *pnum, bool *done)
2316 char * const s = SvPV(pnum, len);
2322 const int i = m * 10 + (*t - '0');
2323 const int r = (i >> 7); /* r < 10 */
2331 SvCUR_set(pnum, (STRLEN) (t - s));
2336 =for apidoc packlist
2338 The engine implementing pack() Perl function.
2344 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2349 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2351 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2352 Also make sure any UTF8 flag is loaded */
2353 SvPV_force_nolen(cat);
2355 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2357 (void)pack_rec( cat, &sym, beglist, endlist );
2360 /* like sv_utf8_upgrade, but also repoint the group start markers */
2362 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2365 const char *from_ptr, *from_start, *from_end, **marks, **m;
2366 char *to_start, *to_ptr;
2368 if (SvUTF8(sv)) return;
2370 from_start = SvPVX_const(sv);
2371 from_end = from_start + SvCUR(sv);
2372 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2373 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2374 if (from_ptr == from_end) {
2375 /* Simple case: no character needs to be changed */
2380 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2381 Newx(to_start, len, char);
2382 Copy(from_start, to_start, from_ptr-from_start, char);
2383 to_ptr = to_start + (from_ptr-from_start);
2385 Newx(marks, sym_ptr->level+2, const char *);
2386 for (group=sym_ptr; group; group = group->previous)
2387 marks[group->level] = from_start + group->strbeg;
2388 marks[sym_ptr->level+1] = from_end+1;
2389 for (m = marks; *m < from_ptr; m++)
2390 *m = to_start + (*m-from_start);
2392 for (;from_ptr < from_end; from_ptr++) {
2393 while (*m == from_ptr) *m++ = to_ptr;
2394 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2398 while (*m == from_ptr) *m++ = to_ptr;
2399 if (m != marks + sym_ptr->level+1) {
2402 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2404 for (group=sym_ptr; group; group = group->previous)
2405 group->strbeg = marks[group->level] - to_start;
2410 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2411 from_start -= SvIVX(sv);
2414 SvFLAGS(sv) &= ~SVf_OOK;
2417 Safefree(from_start);
2418 SvPV_set(sv, to_start);
2419 SvCUR_set(sv, to_ptr - to_start);
2424 /* Exponential string grower. Makes string extension effectively O(n)
2425 needed says how many extra bytes we need (not counting the final '\0')
2426 Only grows the string if there is an actual lack of space
2429 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2430 const STRLEN cur = SvCUR(sv);
2431 const STRLEN len = SvLEN(sv);
2433 if (len - cur > needed) return SvPVX(sv);
2434 extend = needed > len ? needed : len;
2435 return SvGROW(sv, len+extend+1);
2440 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2443 tempsym_t lookahead;
2444 I32 items = endlist - beglist;
2445 bool found = next_symbol(symptr);
2446 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2447 bool warn_utf8 = ckWARN(WARN_UTF8);
2449 if (symptr->level == 0 && found && symptr->code == 'U') {
2450 marked_upgrade(aTHX_ cat, symptr);
2451 symptr->flags |= FLAG_DO_UTF8;
2454 symptr->strbeg = SvCUR(cat);
2460 SV *lengthcode = NULL;
2461 I32 datumtype = symptr->code;
2462 howlen_t howlen = symptr->howlen;
2463 char *start = SvPVX(cat);
2464 char *cur = start + SvCUR(cat);
2466 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2470 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2474 /* e_no_len and e_number */
2475 len = symptr->length;
2480 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2482 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2483 /* We can process this letter. */
2484 STRLEN size = props & PACK_SIZE_MASK;
2485 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2489 /* Look ahead for next symbol. Do we have code/code? */
2490 lookahead = *symptr;
2491 found = next_symbol(&lookahead);
2492 if (symptr->flags & FLAG_SLASH) {
2494 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2495 if (strchr("aAZ", lookahead.code)) {
2496 if (lookahead.howlen == e_number) count = lookahead.length;
2499 if (SvGAMAGIC(*beglist)) {
2500 /* Avoid reading the active data more than once
2501 by copying it to a temporary. */
2503 const char *const pv = SvPV_const(*beglist, len);
2504 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2505 if (SvUTF8(*beglist))
2509 count = DO_UTF8(*beglist) ?
2510 sv_len_utf8(*beglist) : sv_len(*beglist);
2513 if (lookahead.code == 'Z') count++;
2516 if (lookahead.howlen == e_number && lookahead.length < items)
2517 count = lookahead.length;
2520 lookahead.howlen = e_number;
2521 lookahead.length = count;
2522 lengthcode = sv_2mortal(newSViv(count));
2525 /* Code inside the switch must take care to properly update
2526 cat (CUR length and '\0' termination) if it updated *cur and
2527 doesn't simply leave using break */
2528 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2530 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2531 (int) TYPE_NO_MODIFIERS(datumtype));
2533 Perl_croak(aTHX_ "'%%' may not be used in pack");
2536 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2537 case '.' | TYPE_IS_SHRIEKING:
2540 if (howlen == e_star) from = start;
2541 else if (len == 0) from = cur;
2543 tempsym_t *group = symptr;
2545 while (--len && group) group = group->previous;
2546 from = group ? start + group->strbeg : start;
2549 len = SvIV(fromstr);
2551 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2552 case '@' | TYPE_IS_SHRIEKING:
2555 from = start + symptr->strbeg;
2557 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2558 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2559 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2563 while (len && from < cur) {
2564 from += UTF8SKIP(from);
2568 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2570 /* Here we know from == cur */
2572 GROWING(0, cat, start, cur, len);
2573 Zero(cur, len, char);
2575 } else if (from < cur) {
2578 } else goto no_change;
2586 if (len > 0) goto grow;
2587 if (len == 0) goto no_change;
2594 tempsym_t savsym = *symptr;
2595 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2596 symptr->flags |= group_modifiers;
2597 symptr->patend = savsym.grpend;
2599 symptr->previous = &lookahead;
2602 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2603 else symptr->flags &= ~FLAG_PARSE_UTF8;
2604 was_utf8 = SvUTF8(cat);
2605 symptr->patptr = savsym.grpbeg;
2606 beglist = pack_rec(cat, symptr, beglist, endlist);
2607 if (SvUTF8(cat) != was_utf8)
2608 /* This had better be an upgrade while in utf8==0 mode */
2611 if (savsym.howlen == e_star && beglist == endlist)
2612 break; /* No way to continue */
2614 items = endlist - beglist;
2615 lookahead.flags = symptr->flags & ~group_modifiers;
2618 case 'X' | TYPE_IS_SHRIEKING:
2619 if (!len) /* Avoid division by 0 */
2626 hop += UTF8SKIP(hop);
2633 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2637 len = (cur-start) % len;
2641 if (len < 1) goto no_change;
2645 Perl_croak(aTHX_ "'%c' outside of string in pack",
2646 (int) TYPE_NO_MODIFIERS(datumtype));
2647 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2649 Perl_croak(aTHX_ "'%c' outside of string in pack",
2650 (int) TYPE_NO_MODIFIERS(datumtype));
2656 if (cur - start < len)
2657 Perl_croak(aTHX_ "'%c' outside of string in pack",
2658 (int) TYPE_NO_MODIFIERS(datumtype));
2661 if (cur < start+symptr->strbeg) {
2662 /* Make sure group starts don't point into the void */
2664 const STRLEN length = cur-start;
2665 for (group = symptr;
2666 group && length < group->strbeg;
2667 group = group->previous) group->strbeg = length;
2668 lookahead.strbeg = length;
2671 case 'x' | TYPE_IS_SHRIEKING: {
2673 if (!len) /* Avoid division by 0 */
2675 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2676 else ai32 = (cur - start) % len;
2677 if (ai32 == 0) goto no_change;
2689 aptr = SvPV_const(fromstr, fromlen);
2690 if (DO_UTF8(fromstr)) {
2691 const char *end, *s;
2693 if (!utf8 && !SvUTF8(cat)) {
2694 marked_upgrade(aTHX_ cat, symptr);
2695 lookahead.flags |= FLAG_DO_UTF8;
2696 lookahead.strbeg = symptr->strbeg;
2699 cur = start + SvCUR(cat);
2701 if (howlen == e_star) {
2702 if (utf8) goto string_copy;
2706 end = aptr + fromlen;
2707 fromlen = datumtype == 'Z' ? len-1 : len;
2708 while ((I32) fromlen > 0 && s < end) {
2713 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2716 if (datumtype == 'Z') len++;
2722 fromlen = len - fromlen;
2723 if (datumtype == 'Z') fromlen--;
2724 if (howlen == e_star) {
2726 if (datumtype == 'Z') len++;
2728 GROWING(0, cat, start, cur, len);
2729 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2730 datumtype | TYPE_IS_PACK))
2731 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2735 if (howlen == e_star) {
2737 if (datumtype == 'Z') len++;
2739 if (len <= (I32) fromlen) {
2741 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2743 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2745 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2746 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2748 while (fromlen > 0) {
2749 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2755 if (howlen == e_star) {
2757 if (datumtype == 'Z') len++;
2759 if (len <= (I32) fromlen) {
2761 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2763 GROWING(0, cat, start, cur, len);
2764 Copy(aptr, cur, fromlen, char);
2768 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2774 const char *str, *end;
2781 str = SvPV_const(fromstr, fromlen);
2782 end = str + fromlen;
2783 if (DO_UTF8(fromstr)) {
2785 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2787 utf8_source = FALSE;
2788 utf8_flags = 0; /* Unused, but keep compilers happy */
2790 if (howlen == e_star) len = fromlen;
2791 field_len = (len+7)/8;
2792 GROWING(utf8, cat, start, cur, field_len);
2793 if (len > (I32)fromlen) len = fromlen;
2796 if (datumtype == 'B')
2800 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2802 } else bits |= *str++ & 1;
2803 if (l & 7) bits <<= 1;
2805 PUSH_BYTE(utf8, cur, bits);
2810 /* datumtype == 'b' */
2814 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2815 if (val & 1) bits |= 0x80;
2816 } else if (*str++ & 1)
2818 if (l & 7) bits >>= 1;
2820 PUSH_BYTE(utf8, cur, bits);
2826 if (datumtype == 'B')
2827 bits <<= 7 - (l & 7);
2829 bits >>= 7 - (l & 7);
2830 PUSH_BYTE(utf8, cur, bits);
2833 /* Determine how many chars are left in the requested field */
2835 if (howlen == e_star) field_len = 0;
2836 else field_len -= l;
2837 Zero(cur, field_len, char);
2843 const char *str, *end;
2850 str = SvPV_const(fromstr, fromlen);
2851 end = str + fromlen;
2852 if (DO_UTF8(fromstr)) {
2854 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2856 utf8_source = FALSE;
2857 utf8_flags = 0; /* Unused, but keep compilers happy */
2859 if (howlen == e_star) len = fromlen;
2860 field_len = (len+1)/2;
2861 GROWING(utf8, cat, start, cur, field_len);
2862 if (!utf8 && len > (I32)fromlen) len = fromlen;
2865 if (datumtype == 'H')
2869 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2870 if (val < 256 && isALPHA(val))
2871 bits |= (val + 9) & 0xf;
2874 } else if (isALPHA(*str))
2875 bits |= (*str++ + 9) & 0xf;
2877 bits |= *str++ & 0xf;
2878 if (l & 1) bits <<= 4;
2880 PUSH_BYTE(utf8, cur, bits);
2888 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2889 if (val < 256 && isALPHA(val))
2890 bits |= ((val + 9) & 0xf) << 4;
2892 bits |= (val & 0xf) << 4;
2893 } else if (isALPHA(*str))
2894 bits |= ((*str++ + 9) & 0xf) << 4;
2896 bits |= (*str++ & 0xf) << 4;
2897 if (l & 1) bits >>= 4;
2899 PUSH_BYTE(utf8, cur, bits);
2905 PUSH_BYTE(utf8, cur, bits);
2908 /* Determine how many chars are left in the requested field */
2910 if (howlen == e_star) field_len = 0;
2911 else field_len -= l;
2912 Zero(cur, field_len, char);
2920 aiv = SvIV(fromstr);
2921 if ((-128 > aiv || aiv > 127) &&
2923 Perl_warner(aTHX_ packWARN(WARN_PACK),
2924 "Character in 'c' format wrapped in pack");
2925 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2930 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2933 GROWING(0, cat, start, cur, len);
2937 aiv = SvIV(fromstr);
2938 if ((0 > aiv || aiv > 0xff) &&
2940 Perl_warner(aTHX_ packWARN(WARN_PACK),
2941 "Character in 'C' format wrapped in pack");
2942 *cur++ = (char)(aiv & 0xff);
2947 U8 in_bytes = (U8)IN_BYTES;
2949 end = start+SvLEN(cat)-1;
2950 if (utf8) end -= UTF8_MAXLEN-1;
2954 auv = SvUV(fromstr);
2955 if (in_bytes) auv = auv % 0x100;
2960 SvCUR_set(cat, cur - start);
2962 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2963 end = start+SvLEN(cat)-UTF8_MAXLEN;
2965 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2968 0 : UNICODE_ALLOW_ANY);
2973 SvCUR_set(cat, cur - start);
2974 marked_upgrade(aTHX_ cat, symptr);
2975 lookahead.flags |= FLAG_DO_UTF8;
2976 lookahead.strbeg = symptr->strbeg;
2979 cur = start + SvCUR(cat);
2980 end = start+SvLEN(cat)-UTF8_MAXLEN;
2983 if (ckWARN(WARN_PACK))
2984 Perl_warner(aTHX_ packWARN(WARN_PACK),
2985 "Character in 'W' format wrapped in pack");
2990 SvCUR_set(cat, cur - start);
2991 GROWING(0, cat, start, cur, len+1);
2992 end = start+SvLEN(cat)-1;
2994 *(U8 *) cur++ = (U8)auv;
3003 if (!(symptr->flags & FLAG_DO_UTF8)) {
3004 marked_upgrade(aTHX_ cat, symptr);
3005 lookahead.flags |= FLAG_DO_UTF8;
3006 lookahead.strbeg = symptr->strbeg;
3012 end = start+SvLEN(cat);
3013 if (!utf8) end -= UTF8_MAXLEN;
3017 auv = SvUV(fromstr);
3019 U8 buffer[UTF8_MAXLEN], *endb;
3020 endb = uvuni_to_utf8_flags(buffer, auv,
3022 0 : UNICODE_ALLOW_ANY);
3023 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3025 SvCUR_set(cat, cur - start);
3026 GROWING(0, cat, start, cur,
3027 len+(endb-buffer)*UTF8_EXPAND);
3028 end = start+SvLEN(cat);
3030 cur = bytes_to_uni(buffer, endb-buffer, cur);
3034 SvCUR_set(cat, cur - start);
3035 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3036 end = start+SvLEN(cat)-UTF8_MAXLEN;
3038 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3040 0 : UNICODE_ALLOW_ANY);
3045 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3051 anv = SvNV(fromstr);
3053 /* VOS does not automatically map a floating-point overflow
3054 during conversion from double to float into infinity, so we
3055 do it by hand. This code should either be generalized for
3056 any OS that needs it, or removed if and when VOS implements
3057 posix-976 (suggestion to support mapping to infinity).
3058 Paul.Green@stratus.com 02-04-02. */
3060 afloat = _float_constants[0]; /* single prec. inf. */
3061 else if (anv < -FLT_MAX)
3062 afloat = _float_constants[0]; /* single prec. inf. */
3063 else afloat = (float) anv;
3065 # if defined(VMS) && !defined(__IEEE_FP)
3066 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3067 * on Alpha; fake it if we don't have them.
3071 else if (anv < -FLT_MAX)
3073 else afloat = (float)anv;
3075 afloat = (float)anv;
3077 #endif /* __VOS__ */
3078 DO_BO_PACK_N(afloat, float);
3079 PUSH_VAR(utf8, cur, afloat);
3087 anv = SvNV(fromstr);
3089 /* VOS does not automatically map a floating-point overflow
3090 during conversion from long double to double into infinity,
3091 so we do it by hand. This code should either be generalized
3092 for any OS that needs it, or removed if and when VOS
3093 implements posix-976 (suggestion to support mapping to
3094 infinity). Paul.Green@stratus.com 02-04-02. */
3096 adouble = _double_constants[0]; /* double prec. inf. */
3097 else if (anv < -DBL_MAX)
3098 adouble = _double_constants[0]; /* double prec. inf. */
3099 else adouble = (double) anv;
3101 # if defined(VMS) && !defined(__IEEE_FP)
3102 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3103 * on Alpha; fake it if we don't have them.
3107 else if (anv < -DBL_MAX)
3109 else adouble = (double)anv;
3111 adouble = (double)anv;
3113 #endif /* __VOS__ */
3114 DO_BO_PACK_N(adouble, double);
3115 PUSH_VAR(utf8, cur, adouble);
3120 Zero(&anv, 1, NV); /* can be long double with unused bits */
3123 anv = SvNV(fromstr);
3124 DO_BO_PACK_N(anv, NV);
3125 PUSH_VAR(utf8, cur, anv);
3129 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3131 long double aldouble;
3132 /* long doubles can have unused bits, which may be nonzero */
3133 Zero(&aldouble, 1, long double);
3136 aldouble = (long double)SvNV(fromstr);
3137 DO_BO_PACK_N(aldouble, long double);
3138 PUSH_VAR(utf8, cur, aldouble);
3143 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3144 case 'n' | TYPE_IS_SHRIEKING:
3150 ai16 = (I16)SvIV(fromstr);
3152 ai16 = PerlSock_htons(ai16);
3154 PUSH16(utf8, cur, &ai16);
3157 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3158 case 'v' | TYPE_IS_SHRIEKING:
3164 ai16 = (I16)SvIV(fromstr);
3168 PUSH16(utf8, cur, &ai16);
3171 case 'S' | TYPE_IS_SHRIEKING:
3172 #if SHORTSIZE != SIZE16
3174 unsigned short aushort;
3176 aushort = SvUV(fromstr);
3177 DO_BO_PACK(aushort, s);
3178 PUSH_VAR(utf8, cur, aushort);
3188 au16 = (U16)SvUV(fromstr);
3189 DO_BO_PACK(au16, 16);
3190 PUSH16(utf8, cur, &au16);
3193 case 's' | TYPE_IS_SHRIEKING:
3194 #if SHORTSIZE != SIZE16
3198 ashort = SvIV(fromstr);
3199 DO_BO_PACK(ashort, s);
3200 PUSH_VAR(utf8, cur, ashort);
3210 ai16 = (I16)SvIV(fromstr);
3211 DO_BO_PACK(ai16, 16);
3212 PUSH16(utf8, cur, &ai16);
3216 case 'I' | TYPE_IS_SHRIEKING:
3220 auint = SvUV(fromstr);
3221 DO_BO_PACK(auint, i);
3222 PUSH_VAR(utf8, cur, auint);
3229 aiv = SvIV(fromstr);
3230 #if IVSIZE == INTSIZE
3232 #elif IVSIZE == LONGSIZE
3234 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3235 DO_BO_PACK(aiv, 64);
3237 Perl_croak(aTHX_ "'j' not supported on this platform");
3239 PUSH_VAR(utf8, cur, aiv);
3246 auv = SvUV(fromstr);
3247 #if UVSIZE == INTSIZE
3249 #elif UVSIZE == LONGSIZE
3251 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3252 DO_BO_PACK(auv, 64);
3254 Perl_croak(aTHX_ "'J' not supported on this platform");
3256 PUSH_VAR(utf8, cur, auv);
3263 anv = SvNV(fromstr);
3267 SvCUR_set(cat, cur - start);
3268 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3271 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3272 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3273 any negative IVs will have already been got by the croak()
3274 above. IOK is untrue for fractions, so we test them
3275 against UV_MAX_P1. */
3276 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3277 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3278 char *in = buf + sizeof(buf);
3279 UV auv = SvUV(fromstr);
3282 *--in = (char)((auv & 0x7f) | 0x80);
3285 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3286 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3287 in, (buf + sizeof(buf)) - in);
3288 } else if (SvPOKp(fromstr))
3290 else if (SvNOKp(fromstr)) {
3291 /* 10**NV_MAX_10_EXP is the largest power of 10
3292 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3293 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3294 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3295 And with that many bytes only Inf can overflow.
3296 Some C compilers are strict about integral constant
3297 expressions so we conservatively divide by a slightly
3298 smaller integer instead of multiplying by the exact
3299 floating-point value.
3301 #ifdef NV_MAX_10_EXP
3302 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3303 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3305 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3306 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3308 char *in = buf + sizeof(buf);
3310 anv = Perl_floor(anv);
3312 const NV next = Perl_floor(anv / 128);
3313 if (in <= buf) /* this cannot happen ;-) */
3314 Perl_croak(aTHX_ "Cannot compress integer in pack");
3315 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3318 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3319 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3320 in, (buf + sizeof(buf)) - in);
3329 /* Copy string and check for compliance */
3330 from = SvPV_const(fromstr, len);
3331 if ((norm = is_an_int(from, len)) == NULL)
3332 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3334 Newx(result, len, char);
3337 while (!done) *--in = div128(norm, &done) | 0x80;
3338 result[len - 1] &= 0x7F; /* clear continue bit */
3339 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3340 in, (result + len) - in);
3342 SvREFCNT_dec(norm); /* free norm */
3347 case 'i' | TYPE_IS_SHRIEKING:
3351 aint = SvIV(fromstr);
3352 DO_BO_PACK(aint, i);
3353 PUSH_VAR(utf8, cur, aint);
3356 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3357 case 'N' | TYPE_IS_SHRIEKING:
3363 au32 = SvUV(fromstr);
3365 au32 = PerlSock_htonl(au32);
3367 PUSH32(utf8, cur, &au32);
3370 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3371 case 'V' | TYPE_IS_SHRIEKING:
3377 au32 = SvUV(fromstr);
3381 PUSH32(utf8, cur, &au32);
3384 case 'L' | TYPE_IS_SHRIEKING:
3385 #if LONGSIZE != SIZE32
3387 unsigned long aulong;
3389 aulong = SvUV(fromstr);
3390 DO_BO_PACK(aulong, l);
3391 PUSH_VAR(utf8, cur, aulong);
3401 au32 = SvUV(fromstr);
3402 DO_BO_PACK(au32, 32);
3403 PUSH32(utf8, cur, &au32);
3406 case 'l' | TYPE_IS_SHRIEKING:
3407 #if LONGSIZE != SIZE32
3411 along = SvIV(fromstr);
3412 DO_BO_PACK(along, l);
3413 PUSH_VAR(utf8, cur, along);
3423 ai32 = SvIV(fromstr);
3424 DO_BO_PACK(ai32, 32);
3425 PUSH32(utf8, cur, &ai32);
3433 auquad = (Uquad_t) SvUV(fromstr);
3434 DO_BO_PACK(auquad, 64);
3435 PUSH_VAR(utf8, cur, auquad);
3442 aquad = (Quad_t)SvIV(fromstr);
3443 DO_BO_PACK(aquad, 64);
3444 PUSH_VAR(utf8, cur, aquad);
3447 #endif /* HAS_QUAD */
3449 len = 1; /* assume SV is correct length */
3450 GROWING(utf8, cat, start, cur, sizeof(char *));
3457 SvGETMAGIC(fromstr);
3458 if (!SvOK(fromstr)) aptr = NULL;
3460 /* XXX better yet, could spirit away the string to
3461 * a safe spot and hang on to it until the result
3462 * of pack() (and all copies of the result) are
3465 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3466 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3467 Perl_warner(aTHX_ packWARN(WARN_PACK),
3468 "Attempt to pack pointer to temporary value");
3470 if (SvPOK(fromstr) || SvNIOK(fromstr))
3471 aptr = SvPV_nomg_const_nolen(fromstr);
3473 aptr = SvPV_force_flags_nolen(fromstr, 0);
3475 DO_BO_PACK_PC(aptr);
3476 PUSH_VAR(utf8, cur, aptr);
3480 const char *aptr, *aend;
3484 if (len <= 2) len = 45;
3485 else len = len / 3 * 3;
3487 if (ckWARN(WARN_PACK))
3488 Perl_warner(aTHX_ packWARN(WARN_PACK),
3489 "Field too wide in 'u' format in pack");
3492 aptr = SvPV_const(fromstr, fromlen);
3493 from_utf8 = DO_UTF8(fromstr);
3495 aend = aptr + fromlen;
3496 fromlen = sv_len_utf8(fromstr);
3497 } else aend = NULL; /* Unused, but keep compilers happy */
3498 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3499 while (fromlen > 0) {
3502 U8 hunk[1+63/3*4+1];
3504 if ((I32)fromlen > len)
3510 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3511 'u' | TYPE_IS_PACK)) {
3513 SvCUR_set(cat, cur - start);
3514 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3516 end = doencodes(hunk, buffer, todo);
3518 end = doencodes(hunk, aptr, todo);
3521 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3528 SvCUR_set(cat, cur - start);
3530 *symptr = lookahead;
3539 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3540 register SV *cat = TARG;
3542 SV *pat_sv = *++MARK;
3543 register const char *pat = SvPV_const(pat_sv, fromlen);
3544 register const char *patend = pat + fromlen;
3547 sv_setpvn(cat, "", 0);
3550 packlist(cat, pat, patend, MARK, SP + 1);
3560 * c-indentation-style: bsd
3562 * indent-tabs-mode: t
3565 * ex: set ts=8 sts=4 sw=4 noet: