3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV *tmpNew = newSVpvn("0000000000", 10);
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
244 #else /* PERL_PACK_CAN_BYTEORDER */
246 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
247 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
249 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
251 # define DO_BO_UNPACK(var, type) \
253 switch (TYPE_ENDIANNESS(datumtype)) { \
254 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
255 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
260 # define DO_BO_PACK(var, type) \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
269 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
271 switch (TYPE_ENDIANNESS(datumtype)) { \
272 case TYPE_IS_BIG_ENDIAN: \
273 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
275 case TYPE_IS_LITTLE_ENDIAN: \
276 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
283 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
285 switch (TYPE_ENDIANNESS(datumtype)) { \
286 case TYPE_IS_BIG_ENDIAN: \
287 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
289 case TYPE_IS_LITTLE_ENDIAN: \
290 var = (post_cast *) my_htole ## type ((pre_cast) var); \
297 # define BO_CANT_DOIT(action, type) \
299 switch (TYPE_ENDIANNESS(datumtype)) { \
300 case TYPE_IS_BIG_ENDIAN: \
301 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
302 "platform", #action, #type); \
304 case TYPE_IS_LITTLE_ENDIAN: \
305 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
306 "platform", #action, #type); \
313 # if PTRSIZE == INTSIZE
314 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
315 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
316 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
317 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
318 # elif PTRSIZE == LONGSIZE
319 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
320 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
321 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
322 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
324 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
325 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
328 # if defined(my_htolen) && defined(my_letohn) && \
329 defined(my_htoben) && defined(my_betohn)
330 # define DO_BO_UNPACK_N(var, type) \
332 switch (TYPE_ENDIANNESS(datumtype)) { \
333 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
334 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
339 # define DO_BO_PACK_N(var, type) \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
348 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
349 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
352 #endif /* PERL_PACK_CAN_BYTEORDER */
354 #define PACK_SIZE_CANNOT_CSUM 0x80
355 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
356 #define PACK_SIZE_MASK 0x3F
358 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
359 in). You're unlikely ever to need to regenerate them. */
361 #if TYPE_IS_SHRIEKING != 0x100
362 ++++shriek offset should be 256
365 typedef U8 packprops_t;
368 const packprops_t packprops[512] = {
370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
375 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
376 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
377 /* D */ LONG_DOUBLESIZE,
384 /* I */ sizeof(unsigned int),
391 #if defined(HAS_QUAD)
392 /* Q */ sizeof(Uquad_t),
399 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
401 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 /* c */ sizeof(char),
404 /* d */ sizeof(double),
406 /* f */ sizeof(float),
415 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
416 #if defined(HAS_QUAD)
417 /* q */ sizeof(Quad_t),
425 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 /* I */ sizeof(unsigned int),
443 /* L */ sizeof(unsigned long),
445 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
451 /* S */ sizeof(unsigned short),
453 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 /* l */ sizeof(long),
464 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
470 /* s */ sizeof(short),
472 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0
488 /* EBCDIC (or bust) */
489 const packprops_t packprops[512] = {
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 /* c */ sizeof(char),
501 /* d */ sizeof(double),
503 /* f */ sizeof(float),
513 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
514 #if defined(HAS_QUAD)
515 /* q */ sizeof(Quad_t),
519 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
527 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
528 /* D */ LONG_DOUBLESIZE,
535 /* I */ sizeof(unsigned int),
543 #if defined(HAS_QUAD)
544 /* Q */ sizeof(Uquad_t),
548 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
553 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* l */ sizeof(long),
570 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* s */ sizeof(short),
578 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
586 /* I */ sizeof(unsigned int),
587 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* L */ sizeof(unsigned long),
590 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* S */ sizeof(unsigned short),
598 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
609 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
613 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
614 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
615 /* We try to process malformed UTF-8 as much as possible (preferrably with
616 warnings), but these two mean we make no progress in the string and
617 might enter an infinite loop */
618 if (retlen == (STRLEN) -1 || retlen == 0)
619 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
620 (int) TYPE_NO_MODIFIERS(datumtype));
622 if (ckWARN(WARN_UNPACK))
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Character in '%c' format wrapped in unpack",
625 (int) TYPE_NO_MODIFIERS(datumtype));
632 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
633 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
637 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
641 const char *from = *s;
643 const U32 flags = ckWARN(WARN_UTF8) ?
644 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
645 for (;buf_len > 0; buf_len--) {
646 if (from >= end) return FALSE;
647 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
648 if (retlen == (STRLEN) -1 || retlen == 0) {
649 from += UTF8SKIP(from);
651 } else from += retlen;
656 *(U8 *)buf++ = (U8)val;
658 /* We have enough characters for the buffer. Did we have problems ? */
661 /* Rewalk the string fragment while warning */
663 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
664 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
665 if (ptr >= end) break;
666 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
668 if (from > end) from = end;
670 if ((bad & 2) && ckWARN(WARN_UNPACK))
671 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
672 WARN_PACK : WARN_UNPACK),
673 "Character(s) in '%c' format wrapped in %s",
674 (int) TYPE_NO_MODIFIERS(datumtype),
675 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
682 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
686 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
687 if (val >= 0x100 || !ISUUCHAR(val) ||
688 retlen == (STRLEN) -1 || retlen == 0) {
692 *out = PL_uudmap[val] & 077;
698 bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
699 U8 buffer[UTF8_MAXLEN];
700 const U8 *end = start + len;
702 while (start < end) {
704 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
714 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
722 #define PUSH_BYTES(utf8, cur, buf, len) \
724 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
726 Copy(buf, cur, len, char); \
731 #define GROWING(utf8, cat, start, cur, in_len) \
733 STRLEN glen = (in_len); \
734 if (utf8) glen *= UTF8_EXPAND; \
735 if ((cur) + glen >= (start) + SvLEN(cat)) { \
736 (start) = sv_exp_grow(aTHX_ cat, glen); \
737 (cur) = (start) + SvCUR(cat); \
741 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
743 const STRLEN glen = (in_len); \
745 if (utf8) gl *= UTF8_EXPAND; \
746 if ((cur) + gl >= (start) + SvLEN(cat)) { \
748 SvCUR_set((cat), (cur) - (start)); \
749 (start) = sv_exp_grow(aTHX_ cat, gl); \
750 (cur) = (start) + SvCUR(cat); \
752 PUSH_BYTES(utf8, cur, buf, glen); \
755 #define PUSH_BYTE(utf8, s, byte) \
758 const U8 au8 = (byte); \
759 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
760 } else *(U8 *)(s)++ = (byte); \
763 /* Only to be used inside a loop (see the break) */
764 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
767 if (str >= end) break; \
768 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
769 if (retlen == (STRLEN) -1 || retlen == 0) { \
771 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
776 static const char *_action( const tempsym_t* symptr )
778 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
781 /* Returns the sizeof() struct described by pat */
783 S_measure_struct(pTHX_ tempsym_t* symptr)
787 while (next_symbol(symptr)) {
791 switch (symptr->howlen) {
793 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
797 /* e_no_len and e_number */
798 len = symptr->length;
802 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
805 /* endianness doesn't influence the size of a type */
806 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
808 Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 (int)TYPE_NO_MODIFIERS(symptr->code),
811 #ifdef PERL_PACK_CAN_SHRIEKSIGN
812 case '.' | TYPE_IS_SHRIEKING:
813 case '@' | TYPE_IS_SHRIEKING:
818 case 'U': /* XXXX Is it correct? */
821 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
822 (int) TYPE_NO_MODIFIERS(symptr->code),
829 tempsym_t savsym = *symptr;
830 symptr->patptr = savsym.grpbeg;
831 symptr->patend = savsym.grpend;
832 /* XXXX Theoretically, we need to measure many times at
833 different positions, since the subexpression may contain
834 alignment commands, but be not of aligned length.
835 Need to detect this and croak(). */
836 size = measure_struct(symptr);
840 case 'X' | TYPE_IS_SHRIEKING:
841 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
843 if (!len) /* Avoid division by 0 */
845 len = total % len; /* Assumed: the start is aligned. */
850 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
852 case 'x' | TYPE_IS_SHRIEKING:
853 if (!len) /* Avoid division by 0 */
855 star = total % len; /* Assumed: the start is aligned. */
856 if (star) /* Other portable ways? */
880 size = sizeof(char*);
890 /* locate matching closing parenthesis or bracket
891 * returns char pointer to char after match, or NULL
894 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
896 while (patptr < patend) {
897 const char c = *patptr++;
904 while (patptr < patend && *patptr != '\n')
908 patptr = group_end(patptr, patend, ')') + 1;
910 patptr = group_end(patptr, patend, ']') + 1;
912 Perl_croak(aTHX_ "No group ending character '%c' found in template",
918 /* Convert unsigned decimal number to binary.
919 * Expects a pointer to the first digit and address of length variable
920 * Advances char pointer to 1st non-digit char and returns number
923 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
925 I32 len = *patptr++ - '0';
926 while (isDIGIT(*patptr)) {
927 if (len >= 0x7FFFFFFF/10)
928 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929 len = (len * 10) + (*patptr++ - '0');
935 /* The marvellous template parsing routine: Using state stored in *symptr,
936 * locates next template code and count
939 S_next_symbol(pTHX_ tempsym_t* symptr )
941 const char* patptr = symptr->patptr;
942 const char* patend = symptr->patend;
944 symptr->flags &= ~FLAG_SLASH;
946 while (patptr < patend) {
947 if (isSPACE(*patptr))
949 else if (*patptr == '#') {
951 while (patptr < patend && *patptr != '\n')
956 /* We should have found a template code */
957 I32 code = *patptr++ & 0xFF;
958 U32 inherited_modifiers = 0;
960 if (code == ','){ /* grandfather in commas but with a warning */
961 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962 symptr->flags |= FLAG_COMMA;
963 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
964 "Invalid type ',' in %s", _action( symptr ) );
969 /* for '(', skip to ')' */
971 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972 Perl_croak(aTHX_ "()-group starts with a count in %s",
974 symptr->grpbeg = patptr;
975 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
981 /* look for group modifiers to inherit */
982 if (TYPE_ENDIANNESS(symptr->flags)) {
983 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
987 /* look for modifiers */
988 while (patptr < patend) {
993 modifier = TYPE_IS_SHRIEKING;
994 allowed = SHRIEKING_ALLOWED_TYPES;
996 #ifdef PERL_PACK_CAN_BYTEORDER
998 modifier = TYPE_IS_BIG_ENDIAN;
999 allowed = ENDIANNESS_ALLOWED_TYPES;
1002 modifier = TYPE_IS_LITTLE_ENDIAN;
1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1005 #endif /* PERL_PACK_CAN_BYTEORDER */
1015 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1017 allowed, _action( symptr ) );
1019 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1020 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1021 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1022 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023 TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1025 *patptr, _action( symptr ) );
1027 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1028 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1029 "Duplicate modifier '%c' after '%c' in %s",
1030 *patptr, (int) TYPE_NO_MODIFIERS(code),
1031 _action( symptr ) );
1038 /* inherit modifiers */
1039 code |= inherited_modifiers;
1041 /* look for count and/or / */
1042 if (patptr < patend) {
1043 if (isDIGIT(*patptr)) {
1044 patptr = get_num( patptr, &symptr->length );
1045 symptr->howlen = e_number;
1047 } else if (*patptr == '*') {
1049 symptr->howlen = e_star;
1051 } else if (*patptr == '[') {
1052 const char* lenptr = ++patptr;
1053 symptr->howlen = e_number;
1054 patptr = group_end( patptr, patend, ']' ) + 1;
1055 /* what kind of [] is it? */
1056 if (isDIGIT(*lenptr)) {
1057 lenptr = get_num( lenptr, &symptr->length );
1058 if( *lenptr != ']' )
1059 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1060 _action( symptr ) );
1062 tempsym_t savsym = *symptr;
1063 symptr->patend = patptr-1;
1064 symptr->patptr = lenptr;
1065 savsym.length = measure_struct(symptr);
1069 symptr->howlen = e_no_len;
1074 while (patptr < patend) {
1075 if (isSPACE(*patptr))
1077 else if (*patptr == '#') {
1079 while (patptr < patend && *patptr != '\n')
1081 if (patptr < patend)
1084 if (*patptr == '/') {
1085 symptr->flags |= FLAG_SLASH;
1087 if (patptr < patend &&
1088 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1089 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1090 _action( symptr ) );
1096 /* at end - no count, no / */
1097 symptr->howlen = e_no_len;
1101 symptr->code = code;
1102 symptr->patptr = patptr;
1106 symptr->patptr = patptr;
1111 There is no way to cleanly handle the case where we should process the
1112 string per byte in its upgraded form while it's really in downgraded form
1113 (e.g. estimates like strend-s as an upper bound for the number of
1114 characters left wouldn't work). So if we foresee the need of this
1115 (pattern starts with U or contains U0), we want to work on the encoded
1116 version of the string. Users are advised to upgrade their pack string
1117 themselves if they need to do a lot of unpacks like this on it
1119 /* XXX These can be const */
1121 need_utf8(const char *pat, const char *patend)
1124 while (pat < patend) {
1125 if (pat[0] == '#') {
1127 pat = (const char *) memchr(pat, '\n', patend-pat);
1128 if (!pat) return FALSE;
1129 } else if (pat[0] == 'U') {
1130 if (first || pat[1] == '0') return TRUE;
1131 } else first = FALSE;
1138 first_symbol(const char *pat, const char *patend) {
1139 while (pat < patend) {
1140 if (pat[0] != '#') return pat[0];
1142 pat = (const char *) memchr(pat, '\n', patend-pat);
1150 =for apidoc unpack_str
1152 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1153 and ocnt are not used. This call should not be used, use unpackstring instead.
1158 Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
1161 PERL_UNUSED_ARG(strbeg);
1162 PERL_UNUSED_ARG(new_s);
1163 PERL_UNUSED_ARG(ocnt);
1165 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1166 else if (need_utf8(pat, patend)) {
1167 /* We probably should try to avoid this in case a scalar context call
1168 wouldn't get to the "U0" */
1169 STRLEN len = strend - s;
1170 s = (char *) bytes_to_utf8((U8 *) s, &len);
1173 flags |= FLAG_DO_UTF8;
1176 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1177 flags |= FLAG_PARSE_UTF8;
1179 TEMPSYM_INIT(&sym, pat, patend, flags);
1181 return unpack_rec(&sym, s, s, strend, NULL );
1185 =for apidoc unpackstring
1187 The engine implementing unpack() Perl function. C<unpackstring> puts the
1188 extracted list items on the stack and returns the number of elements.
1189 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1194 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1198 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1199 else if (need_utf8(pat, patend)) {
1200 /* We probably should try to avoid this in case a scalar context call
1201 wouldn't get to the "U0" */
1202 STRLEN len = strend - s;
1203 s = (char *) bytes_to_utf8((U8 *) s, &len);
1206 flags |= FLAG_DO_UTF8;
1209 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1210 flags |= FLAG_PARSE_UTF8;
1212 TEMPSYM_INIT(&sym, pat, patend, flags);
1214 return unpack_rec(&sym, s, s, strend, NULL );
1219 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1223 const I32 start_sp_offset = SP - PL_stack_base;
1229 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1230 bool beyond = FALSE;
1231 bool explicit_length;
1232 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1233 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1234 symptr->strbeg = s - strbeg;
1236 while (next_symbol(symptr)) {
1239 I32 datumtype = symptr->code;
1240 /* do first one only unless in list context
1241 / is implemented by unpacking the count, then popping it from the
1242 stack, so must check that we're not in the middle of a / */
1243 if ( unpack_only_one
1244 && (SP - PL_stack_base == start_sp_offset + 1)
1245 && (datumtype != '/') ) /* XXX can this be omitted */
1248 switch (howlen = symptr->howlen) {
1250 len = strend - strbeg; /* long enough */
1253 /* e_no_len and e_number */
1254 len = symptr->length;
1258 explicit_length = TRUE;
1260 beyond = s >= strend;
1262 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1264 /* props nonzero means we can process this letter. */
1265 const long size = props & PACK_SIZE_MASK;
1266 const long howmany = (strend - s) / size;
1270 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1271 if (len && unpack_only_one) len = 1;
1277 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1279 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1282 if (howlen == e_no_len)
1283 len = 16; /* len is not specified */
1291 tempsym_t savsym = *symptr;
1292 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1293 symptr->flags |= group_modifiers;
1294 symptr->patend = savsym.grpend;
1295 symptr->previous = &savsym;
1299 symptr->patptr = savsym.grpbeg;
1300 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1301 else symptr->flags &= ~FLAG_PARSE_UTF8;
1302 unpack_rec(symptr, s, strbeg, strend, &s);
1303 if (s == strend && savsym.howlen == e_star)
1304 break; /* No way to continue */
1307 savsym.flags = symptr->flags & ~group_modifiers;
1311 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1312 case '.' | TYPE_IS_SHRIEKING:
1317 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1318 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1319 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1320 const bool u8 = utf8;
1322 if (howlen == e_star) from = strbeg;
1323 else if (len <= 0) from = s;
1325 tempsym_t *group = symptr;
1327 while (--len && group) group = group->previous;
1328 from = group ? strbeg + group->strbeg : strbeg;
1331 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1332 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1333 XPUSHs(sv_2mortal(sv));
1336 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1337 case '@' | TYPE_IS_SHRIEKING:
1340 s = strbeg + symptr->strbeg;
1341 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1342 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1343 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1349 Perl_croak(aTHX_ "'@' outside of string in unpack");
1354 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1357 Perl_croak(aTHX_ "'@' outside of string in unpack");
1361 case 'X' | TYPE_IS_SHRIEKING:
1362 if (!len) /* Avoid division by 0 */
1365 const char *hop, *last;
1367 hop = last = strbeg;
1369 hop += UTF8SKIP(hop);
1376 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1380 len = (s - strbeg) % len;
1386 Perl_croak(aTHX_ "'X' outside of string in unpack");
1387 while (--s, UTF8_IS_CONTINUATION(*s)) {
1389 Perl_croak(aTHX_ "'X' outside of string in unpack");
1394 if (len > s - strbeg)
1395 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1399 case 'x' | TYPE_IS_SHRIEKING: {
1401 if (!len) /* Avoid division by 0 */
1403 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1404 else ai32 = (s - strbeg) % len;
1405 if (ai32 == 0) break;
1413 Perl_croak(aTHX_ "'x' outside of string in unpack");
1418 if (len > strend - s)
1419 Perl_croak(aTHX_ "'x' outside of string in unpack");
1424 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1430 /* Preliminary length estimate is assumed done in 'W' */
1431 if (len > strend - s) len = strend - s;
1437 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1438 if (hop >= strend) {
1440 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1445 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1447 } else if (len > strend - s)
1450 if (datumtype == 'Z') {
1451 /* 'Z' strips stuff after first null */
1452 const char *ptr, *end;
1454 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1455 sv = newSVpvn(s, ptr-s);
1456 if (howlen == e_star) /* exact for 'Z*' */
1457 len = ptr-s + (ptr != strend ? 1 : 0);
1458 } else if (datumtype == 'A') {
1459 /* 'A' strips both nulls and spaces */
1461 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1462 for (ptr = s+len-1; ptr >= s; ptr--)
1463 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1464 !is_utf8_space((U8 *) ptr)) break;
1465 if (ptr >= s) ptr += UTF8SKIP(ptr);
1468 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1470 for (ptr = s+len-1; ptr >= s; ptr--)
1471 if (*ptr != 0 && !isSPACE(*ptr)) break;
1474 sv = newSVpvn(s, ptr-s);
1475 } else sv = newSVpvn(s, len);
1479 /* Undo any upgrade done due to need_utf8() */
1480 if (!(symptr->flags & FLAG_WAS_UTF8))
1481 sv_utf8_downgrade(sv, 0);
1483 XPUSHs(sv_2mortal(sv));
1489 if (howlen == e_star || len > (strend - s) * 8)
1490 len = (strend - s) * 8;
1494 Newxz(PL_bitcount, 256, char);
1495 for (bits = 1; bits < 256; bits++) {
1496 if (bits & 1) PL_bitcount[bits]++;
1497 if (bits & 2) PL_bitcount[bits]++;
1498 if (bits & 4) PL_bitcount[bits]++;
1499 if (bits & 8) PL_bitcount[bits]++;
1500 if (bits & 16) PL_bitcount[bits]++;
1501 if (bits & 32) PL_bitcount[bits]++;
1502 if (bits & 64) PL_bitcount[bits]++;
1503 if (bits & 128) PL_bitcount[bits]++;
1507 while (len >= 8 && s < strend) {
1508 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1513 cuv += PL_bitcount[*(U8 *)s++];
1516 if (len && s < strend) {
1518 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1519 if (datumtype == 'b')
1521 if (bits & 1) cuv++;
1526 if (bits & 0x80) cuv++;
1533 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1536 if (datumtype == 'b') {
1538 const I32 ai32 = len;
1539 for (len = 0; len < ai32; len++) {
1540 if (len & 7) bits >>= 1;
1542 if (s >= strend) break;
1543 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1544 } else bits = *(U8 *) s++;
1545 *str++ = bits & 1 ? '1' : '0';
1549 const I32 ai32 = len;
1550 for (len = 0; len < ai32; len++) {
1551 if (len & 7) bits <<= 1;
1553 if (s >= strend) break;
1554 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1555 } else bits = *(U8 *) s++;
1556 *str++ = bits & 0x80 ? '1' : '0';
1560 SvCUR_set(sv, str - SvPVX_const(sv));
1567 /* Preliminary length estimate, acceptable for utf8 too */
1568 if (howlen == e_star || len > (strend - s) * 2)
1569 len = (strend - s) * 2;
1570 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1573 if (datumtype == 'h') {
1576 for (len = 0; len < ai32; len++) {
1577 if (len & 1) bits >>= 4;
1579 if (s >= strend) break;
1580 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1581 } else bits = * (U8 *) s++;
1582 *str++ = PL_hexdigit[bits & 15];
1586 const I32 ai32 = len;
1587 for (len = 0; len < ai32; len++) {
1588 if (len & 1) bits <<= 4;
1590 if (s >= strend) break;
1591 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1592 } else bits = *(U8 *) s++;
1593 *str++ = PL_hexdigit[(bits >> 4) & 15];
1597 SvCUR_set(sv, str - SvPVX_const(sv));
1603 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1604 if (aint >= 128) /* fake up signed chars */
1607 PUSHs(sv_2mortal(newSViv((IV)aint)));
1608 else if (checksum > bits_in_uv)
1609 cdouble += (NV)aint;
1618 if (explicit_length && datumtype == 'C')
1619 /* Switch to "character" mode */
1620 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1623 if (datumtype == 'C' ?
1624 (symptr->flags & FLAG_DO_UTF8) &&
1625 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1626 while (len-- > 0 && s < strend) {
1628 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1629 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1630 if (retlen == (STRLEN) -1 || retlen == 0)
1631 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1634 PUSHs(sv_2mortal(newSVuv((UV) val)));
1635 else if (checksum > bits_in_uv)
1636 cdouble += (NV) val;
1640 } else if (!checksum)
1642 const U8 ch = *(U8 *) s++;
1643 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1645 else if (checksum > bits_in_uv)
1646 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1648 while (len-- > 0) cuv += *(U8 *) s++;
1652 if (explicit_length) {
1653 /* Switch to "bytes in UTF-8" mode */
1654 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1656 /* Should be impossible due to the need_utf8() test */
1657 Perl_croak(aTHX_ "U0 mode on a byte string");
1661 if (len > strend - s) len = strend - s;
1663 if (len && unpack_only_one) len = 1;
1667 while (len-- > 0 && s < strend) {
1671 U8 result[UTF8_MAXLEN];
1672 const char *ptr = s;
1674 /* Bug: warns about bad utf8 even if we are short on bytes
1675 and will break out of the loop */
1676 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1679 len = UTF8SKIP(result);
1680 if (!uni_to_bytes(aTHX_ &ptr, strend,
1681 (char *) &result[1], len-1, 'U')) break;
1682 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1685 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1686 if (retlen == (STRLEN) -1 || retlen == 0)
1687 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1691 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1692 else if (checksum > bits_in_uv)
1693 cdouble += (NV) auv;
1698 case 's' | TYPE_IS_SHRIEKING:
1699 #if SHORTSIZE != SIZE16
1702 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1703 DO_BO_UNPACK(ashort, s);
1705 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1706 else if (checksum > bits_in_uv)
1707 cdouble += (NV)ashort;
1719 #if U16SIZE > SIZE16
1722 SHIFT16(utf8, s, strend, &ai16, datumtype);
1723 DO_BO_UNPACK(ai16, 16);
1724 #if U16SIZE > SIZE16
1729 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1730 else if (checksum > bits_in_uv)
1731 cdouble += (NV)ai16;
1736 case 'S' | TYPE_IS_SHRIEKING:
1737 #if SHORTSIZE != SIZE16
1739 unsigned short aushort;
1740 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1741 DO_BO_UNPACK(aushort, s);
1743 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1744 else if (checksum > bits_in_uv)
1745 cdouble += (NV)aushort;
1758 #if U16SIZE > SIZE16
1761 SHIFT16(utf8, s, strend, &au16, datumtype);
1762 DO_BO_UNPACK(au16, 16);
1764 if (datumtype == 'n')
1765 au16 = PerlSock_ntohs(au16);
1768 if (datumtype == 'v')
1772 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1773 else if (checksum > bits_in_uv)
1774 cdouble += (NV) au16;
1779 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1780 case 'v' | TYPE_IS_SHRIEKING:
1781 case 'n' | TYPE_IS_SHRIEKING:
1784 # if U16SIZE > SIZE16
1787 SHIFT16(utf8, s, strend, &ai16, datumtype);
1789 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1790 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1791 # endif /* HAS_NTOHS */
1793 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1794 ai16 = (I16) vtohs((U16) ai16);
1795 # endif /* HAS_VTOHS */
1797 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1798 else if (checksum > bits_in_uv)
1799 cdouble += (NV) ai16;
1804 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1806 case 'i' | TYPE_IS_SHRIEKING:
1809 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1810 DO_BO_UNPACK(aint, i);
1812 PUSHs(sv_2mortal(newSViv((IV)aint)));
1813 else if (checksum > bits_in_uv)
1814 cdouble += (NV)aint;
1820 case 'I' | TYPE_IS_SHRIEKING:
1823 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1824 DO_BO_UNPACK(auint, i);
1826 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1827 else if (checksum > bits_in_uv)
1828 cdouble += (NV)auint;
1836 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1837 #if IVSIZE == INTSIZE
1838 DO_BO_UNPACK(aiv, i);
1839 #elif IVSIZE == LONGSIZE
1840 DO_BO_UNPACK(aiv, l);
1841 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1842 DO_BO_UNPACK(aiv, 64);
1844 Perl_croak(aTHX_ "'j' not supported on this platform");
1847 PUSHs(sv_2mortal(newSViv(aiv)));
1848 else if (checksum > bits_in_uv)
1857 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1858 #if IVSIZE == INTSIZE
1859 DO_BO_UNPACK(auv, i);
1860 #elif IVSIZE == LONGSIZE
1861 DO_BO_UNPACK(auv, l);
1862 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1863 DO_BO_UNPACK(auv, 64);
1865 Perl_croak(aTHX_ "'J' not supported on this platform");
1868 PUSHs(sv_2mortal(newSVuv(auv)));
1869 else if (checksum > bits_in_uv)
1875 case 'l' | TYPE_IS_SHRIEKING:
1876 #if LONGSIZE != SIZE32
1879 SHIFT_VAR(utf8, s, strend, along, datumtype);
1880 DO_BO_UNPACK(along, l);
1882 PUSHs(sv_2mortal(newSViv((IV)along)));
1883 else if (checksum > bits_in_uv)
1884 cdouble += (NV)along;
1895 #if U32SIZE > SIZE32
1898 SHIFT32(utf8, s, strend, &ai32, datumtype);
1899 DO_BO_UNPACK(ai32, 32);
1900 #if U32SIZE > SIZE32
1901 if (ai32 > 2147483647) ai32 -= 4294967296;
1904 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1905 else if (checksum > bits_in_uv)
1906 cdouble += (NV)ai32;
1911 case 'L' | TYPE_IS_SHRIEKING:
1912 #if LONGSIZE != SIZE32
1914 unsigned long aulong;
1915 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1916 DO_BO_UNPACK(aulong, l);
1918 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1919 else if (checksum > bits_in_uv)
1920 cdouble += (NV)aulong;
1933 #if U32SIZE > SIZE32
1936 SHIFT32(utf8, s, strend, &au32, datumtype);
1937 DO_BO_UNPACK(au32, 32);
1939 if (datumtype == 'N')
1940 au32 = PerlSock_ntohl(au32);
1943 if (datumtype == 'V')
1947 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1948 else if (checksum > bits_in_uv)
1949 cdouble += (NV)au32;
1954 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1955 case 'V' | TYPE_IS_SHRIEKING:
1956 case 'N' | TYPE_IS_SHRIEKING:
1959 # if U32SIZE > SIZE32
1962 SHIFT32(utf8, s, strend, &ai32, datumtype);
1964 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1965 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1968 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1969 ai32 = (I32)vtohl((U32)ai32);
1972 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1973 else if (checksum > bits_in_uv)
1974 cdouble += (NV)ai32;
1979 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1983 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1984 DO_BO_UNPACK_PC(aptr);
1985 /* newSVpv generates undef if aptr is NULL */
1986 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1994 while (len > 0 && s < strend) {
1996 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1997 auv = (auv << 7) | (ch & 0x7f);
1998 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2001 PUSHs(sv_2mortal(newSVuv(auv)));
2006 if (++bytes >= sizeof(UV)) { /* promote to string */
2009 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2010 while (s < strend) {
2011 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2012 sv = mul128(sv, (U8)(ch & 0x7f));
2018 t = SvPV_nolen_const(sv);
2022 PUSHs(sv_2mortal(sv));
2027 if ((s >= strend) && bytes)
2028 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2032 if (symptr->howlen == e_star)
2033 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2035 if (sizeof(char*) <= strend - s) {
2037 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2038 DO_BO_UNPACK_PC(aptr);
2039 /* newSVpvn generates undef if aptr is NULL */
2040 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2047 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2048 DO_BO_UNPACK(aquad, 64);
2050 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2051 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2052 else if (checksum > bits_in_uv)
2053 cdouble += (NV)aquad;
2061 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2062 DO_BO_UNPACK(auquad, 64);
2064 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2065 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2066 else if (checksum > bits_in_uv)
2067 cdouble += (NV)auquad;
2072 #endif /* HAS_QUAD */
2073 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2077 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2078 DO_BO_UNPACK_N(afloat, float);
2080 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2088 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2089 DO_BO_UNPACK_N(adouble, double);
2091 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2099 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2100 DO_BO_UNPACK_N(anv, NV);
2102 PUSHs(sv_2mortal(newSVnv(anv)));
2107 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2110 long double aldouble;
2111 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2112 DO_BO_UNPACK_N(aldouble, long double);
2114 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2116 cdouble += aldouble;
2122 * Initialise the decode mapping. By using a table driven
2123 * algorithm, the code will be character-set independent
2124 * (and just as fast as doing character arithmetic)
2126 if (PL_uudmap['M'] == 0) {
2129 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2130 PL_uudmap[(U8)PL_uuemap[i]] = i;
2132 * Because ' ' and '`' map to the same value,
2133 * we need to decode them both the same.
2138 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2139 sv = sv_2mortal(NEWSV(42, l));
2140 if (l) SvPOK_on(sv);
2143 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2149 next_uni_uu(aTHX_ &s, strend, &a);
2150 next_uni_uu(aTHX_ &s, strend, &b);
2151 next_uni_uu(aTHX_ &s, strend, &c);
2152 next_uni_uu(aTHX_ &s, strend, &d);
2153 hunk[0] = (char)((a << 2) | (b >> 4));
2154 hunk[1] = (char)((b << 4) | (c >> 2));
2155 hunk[2] = (char)((c << 6) | d);
2156 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2164 /* possible checksum byte */
2165 const char *skip = s+UTF8SKIP(s);
2166 if (skip < strend && *skip == '\n')
2172 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2177 len = PL_uudmap[*(U8*)s++] & 077;
2179 if (s < strend && ISUUCHAR(*s))
2180 a = PL_uudmap[*(U8*)s++] & 077;
2183 if (s < strend && ISUUCHAR(*s))
2184 b = PL_uudmap[*(U8*)s++] & 077;
2187 if (s < strend && ISUUCHAR(*s))
2188 c = PL_uudmap[*(U8*)s++] & 077;
2191 if (s < strend && ISUUCHAR(*s))
2192 d = PL_uudmap[*(U8*)s++] & 077;
2195 hunk[0] = (char)((a << 2) | (b >> 4));
2196 hunk[1] = (char)((b << 4) | (c >> 2));
2197 hunk[2] = (char)((c << 6) | d);
2198 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2203 else /* possible checksum byte */
2204 if (s + 1 < strend && s[1] == '\n')
2213 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2214 (checksum > bits_in_uv &&
2215 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2218 anv = (NV) (1 << (checksum & 15));
2219 while (checksum >= 16) {
2223 while (cdouble < 0.0)
2225 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2226 sv = newSVnv(cdouble);
2229 if (checksum < bits_in_uv) {
2230 UV mask = ((UV)1 << checksum) - 1;
2235 XPUSHs(sv_2mortal(sv));
2239 if (symptr->flags & FLAG_SLASH){
2240 if (SP - PL_stack_base - start_sp_offset <= 0)
2241 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2242 if( next_symbol(symptr) ){
2243 if( symptr->howlen == e_number )
2244 Perl_croak(aTHX_ "Count after length/code in unpack" );
2246 /* ...end of char buffer then no decent length available */
2247 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2249 /* take top of stack (hope it's numeric) */
2252 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2255 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2257 datumtype = symptr->code;
2258 explicit_length = FALSE;
2266 return SP - PL_stack_base - start_sp_offset;
2273 I32 gimme = GIMME_V;
2276 const char *pat = SvPV_const(left, llen);
2277 const char *s = SvPV_const(right, rlen);
2278 const char *strend = s + rlen;
2279 const char *patend = pat + llen;
2283 cnt = unpackstring(pat, patend, s, strend,
2284 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2285 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2288 if ( !cnt && gimme == G_SCALAR )
2289 PUSHs(&PL_sv_undef);
2294 doencodes(U8 *h, const char *s, I32 len)
2296 *h++ = PL_uuemap[len];
2298 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2299 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2300 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2301 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2306 const char r = (len > 1 ? s[1] : '\0');
2307 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2308 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2309 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2310 *h++ = PL_uuemap[0];
2317 S_is_an_int(pTHX_ const char *s, STRLEN l)
2319 SV *result = newSVpvn(s, l);
2320 char *const result_c = SvPV_nolen(result); /* convenience */
2321 char *out = result_c;
2331 SvREFCNT_dec(result);
2354 SvREFCNT_dec(result);
2360 SvCUR_set(result, out - result_c);
2364 /* pnum must be '\0' terminated */
2366 S_div128(pTHX_ SV *pnum, bool *done)
2369 char * const s = SvPV(pnum, len);
2375 const int i = m * 10 + (*t - '0');
2376 const int r = (i >> 7); /* r < 10 */
2384 SvCUR_set(pnum, (STRLEN) (t - s));
2389 =for apidoc pack_cat
2391 The engine implementing pack() Perl function. Note: parameters next_in_list and
2392 flags are not used. This call should not be used; use packlist instead.
2398 Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2401 PERL_UNUSED_ARG(next_in_list);
2402 PERL_UNUSED_ARG(flags);
2404 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2406 (void)pack_rec( cat, &sym, beglist, endlist );
2411 =for apidoc packlist
2413 The engine implementing pack() Perl function.
2419 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2424 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2426 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2427 Also make sure any UTF8 flag is loaded */
2428 SvPV_force(cat, no_len);
2429 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2431 (void)pack_rec( cat, &sym, beglist, endlist );
2434 /* like sv_utf8_upgrade, but also repoint the group start markers */
2436 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2439 const char *from_ptr, *from_start, *from_end, **marks, **m;
2440 char *to_start, *to_ptr;
2442 if (SvUTF8(sv)) return;
2444 from_start = SvPVX_const(sv);
2445 from_end = from_start + SvCUR(sv);
2446 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2447 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2448 if (from_ptr == from_end) {
2449 /* Simple case: no character needs to be changed */
2454 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2455 Newx(to_start, len, char);
2456 Copy(from_start, to_start, from_ptr-from_start, char);
2457 to_ptr = to_start + (from_ptr-from_start);
2459 Newx(marks, sym_ptr->level+2, const char *);
2460 for (group=sym_ptr; group; group = group->previous)
2461 marks[group->level] = from_start + group->strbeg;
2462 marks[sym_ptr->level+1] = from_end+1;
2463 for (m = marks; *m < from_ptr; m++)
2464 *m = to_start + (*m-from_start);
2466 for (;from_ptr < from_end; from_ptr++) {
2467 while (*m == from_ptr) *m++ = to_ptr;
2468 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2472 while (*m == from_ptr) *m++ = to_ptr;
2473 if (m != marks + sym_ptr->level+1) {
2476 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2478 for (group=sym_ptr; group; group = group->previous)
2479 group->strbeg = marks[group->level] - to_start;
2484 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2485 from_start -= SvIVX(sv);
2488 SvFLAGS(sv) &= ~SVf_OOK;
2491 Safefree(from_start);
2492 SvPV_set(sv, to_start);
2493 SvCUR_set(sv, to_ptr - to_start);
2498 /* Exponential string grower. Makes string extension effectively O(n)
2499 needed says how many extra bytes we need (not counting the final '\0')
2500 Only grows the string if there is an actual lack of space
2503 sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2504 const STRLEN cur = SvCUR(sv);
2505 const STRLEN len = SvLEN(sv);
2507 if (len - cur > needed) return SvPVX(sv);
2508 extend = needed > len ? needed : len;
2509 return SvGROW(sv, len+extend+1);
2514 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2516 tempsym_t lookahead;
2517 I32 items = endlist - beglist;
2518 bool found = next_symbol(symptr);
2519 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2520 bool warn_utf8 = ckWARN(WARN_UTF8);
2522 if (symptr->level == 0 && found && symptr->code == 'U') {
2523 marked_upgrade(aTHX_ cat, symptr);
2524 symptr->flags |= FLAG_DO_UTF8;
2527 symptr->strbeg = SvCUR(cat);
2533 SV *lengthcode = Nullsv;
2534 I32 datumtype = symptr->code;
2535 howlen_t howlen = symptr->howlen;
2536 char *start = SvPVX(cat);
2537 char *cur = start + SvCUR(cat);
2539 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2543 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2547 /* e_no_len and e_number */
2548 len = symptr->length;
2553 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2555 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2556 /* We can process this letter. */
2557 STRLEN size = props & PACK_SIZE_MASK;
2558 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2562 /* Look ahead for next symbol. Do we have code/code? */
2563 lookahead = *symptr;
2564 found = next_symbol(&lookahead);
2565 if (symptr->flags & FLAG_SLASH) {
2567 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2568 if (strchr("aAZ", lookahead.code)) {
2569 if (lookahead.howlen == e_number) count = lookahead.length;
2572 count = DO_UTF8(*beglist) ?
2573 sv_len_utf8(*beglist) : sv_len(*beglist);
2575 if (lookahead.code == 'Z') count++;
2578 if (lookahead.howlen == e_number && lookahead.length < items)
2579 count = lookahead.length;
2582 lookahead.howlen = e_number;
2583 lookahead.length = count;
2584 lengthcode = sv_2mortal(newSViv(count));
2587 /* Code inside the switch must take care to properly update
2588 cat (CUR length and '\0' termination) if it updated *cur and
2589 doesn't simply leave using break */
2590 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2592 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2593 (int) TYPE_NO_MODIFIERS(datumtype));
2595 Perl_croak(aTHX_ "'%%' may not be used in pack");
2598 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2599 case '.' | TYPE_IS_SHRIEKING:
2602 if (howlen == e_star) from = start;
2603 else if (len == 0) from = cur;
2605 tempsym_t *group = symptr;
2607 while (--len && group) group = group->previous;
2608 from = group ? start + group->strbeg : start;
2611 len = SvIV(fromstr);
2613 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2614 case '@' | TYPE_IS_SHRIEKING:
2617 from = start + symptr->strbeg;
2619 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2620 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2621 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2625 while (len && from < cur) {
2626 from += UTF8SKIP(from);
2630 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2632 /* Here we know from == cur */
2634 GROWING(0, cat, start, cur, len);
2635 Zero(cur, len, char);
2637 } else if (from < cur) {
2640 } else goto no_change;
2648 if (len > 0) goto grow;
2649 if (len == 0) goto no_change;
2656 tempsym_t savsym = *symptr;
2657 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2658 symptr->flags |= group_modifiers;
2659 symptr->patend = savsym.grpend;
2661 symptr->previous = &lookahead;
2664 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2665 else symptr->flags &= ~FLAG_PARSE_UTF8;
2666 was_utf8 = SvUTF8(cat);
2667 symptr->patptr = savsym.grpbeg;
2668 beglist = pack_rec(cat, symptr, beglist, endlist);
2669 if (SvUTF8(cat) != was_utf8)
2670 /* This had better be an upgrade while in utf8==0 mode */
2673 if (savsym.howlen == e_star && beglist == endlist)
2674 break; /* No way to continue */
2676 lookahead.flags = symptr->flags & ~group_modifiers;
2679 case 'X' | TYPE_IS_SHRIEKING:
2680 if (!len) /* Avoid division by 0 */
2687 hop += UTF8SKIP(hop);
2694 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2698 len = (cur-start) % len;
2702 if (len < 1) goto no_change;
2706 Perl_croak(aTHX_ "'%c' outside of string in pack",
2707 (int) TYPE_NO_MODIFIERS(datumtype));
2708 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2710 Perl_croak(aTHX_ "'%c' outside of string in pack",
2711 (int) TYPE_NO_MODIFIERS(datumtype));
2717 if (cur - start < len)
2718 Perl_croak(aTHX_ "'%c' outside of string in pack",
2719 (int) TYPE_NO_MODIFIERS(datumtype));
2722 if (cur < start+symptr->strbeg) {
2723 /* Make sure group starts don't point into the void */
2725 const STRLEN length = cur-start;
2726 for (group = symptr;
2727 group && length < group->strbeg;
2728 group = group->previous) group->strbeg = length;
2729 lookahead.strbeg = length;
2732 case 'x' | TYPE_IS_SHRIEKING: {
2734 if (!len) /* Avoid division by 0 */
2736 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2737 else ai32 = (cur - start) % len;
2738 if (ai32 == 0) goto no_change;
2750 aptr = SvPV_const(fromstr, fromlen);
2751 if (DO_UTF8(fromstr)) {
2752 const char *end, *s;
2754 if (!utf8 && !SvUTF8(cat)) {
2755 marked_upgrade(aTHX_ cat, symptr);
2756 lookahead.flags |= FLAG_DO_UTF8;
2757 lookahead.strbeg = symptr->strbeg;
2760 cur = start + SvCUR(cat);
2762 if (howlen == e_star) {
2763 if (utf8) goto string_copy;
2767 end = aptr + fromlen;
2768 fromlen = datumtype == 'Z' ? len-1 : len;
2769 while ((I32) fromlen > 0 && s < end) {
2774 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2777 if (datumtype == 'Z') len++;
2783 fromlen = len - fromlen;
2784 if (datumtype == 'Z') fromlen--;
2785 if (howlen == e_star) {
2787 if (datumtype == 'Z') len++;
2789 GROWING(0, cat, start, cur, len);
2790 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2791 datumtype | TYPE_IS_PACK))
2792 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2796 if (howlen == e_star) {
2798 if (datumtype == 'Z') len++;
2800 if (len <= (I32) fromlen) {
2802 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2804 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2806 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2807 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2809 while (fromlen > 0) {
2810 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2816 if (howlen == e_star) {
2818 if (datumtype == 'Z') len++;
2820 if (len <= (I32) fromlen) {
2822 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2824 GROWING(0, cat, start, cur, len);
2825 Copy(aptr, cur, fromlen, char);
2829 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2835 const char *str, *end;
2842 str = SvPV_const(fromstr, fromlen);
2843 end = str + fromlen;
2844 if (DO_UTF8(fromstr)) {
2846 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2848 utf8_source = FALSE;
2849 utf8_flags = 0; /* Unused, but keep compilers happy */
2851 if (howlen == e_star) len = fromlen;
2852 field_len = (len+7)/8;
2853 GROWING(utf8, cat, start, cur, field_len);
2854 if (len > (I32)fromlen) len = fromlen;
2857 if (datumtype == 'B')
2861 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2863 } else bits |= *str++ & 1;
2864 if (l & 7) bits <<= 1;
2866 PUSH_BYTE(utf8, cur, bits);
2871 /* datumtype == 'b' */
2875 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2876 if (val & 1) bits |= 0x80;
2877 } else if (*str++ & 1)
2879 if (l & 7) bits >>= 1;
2881 PUSH_BYTE(utf8, cur, bits);
2887 if (datumtype == 'B')
2888 bits <<= 7 - (l & 7);
2890 bits >>= 7 - (l & 7);
2891 PUSH_BYTE(utf8, cur, bits);
2894 /* Determine how many chars are left in the requested field */
2896 if (howlen == e_star) field_len = 0;
2897 else field_len -= l;
2898 Zero(cur, field_len, char);
2904 const char *str, *end;
2911 str = SvPV_const(fromstr, fromlen);
2912 end = str + fromlen;
2913 if (DO_UTF8(fromstr)) {
2915 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2917 utf8_source = FALSE;
2918 utf8_flags = 0; /* Unused, but keep compilers happy */
2920 if (howlen == e_star) len = fromlen;
2921 field_len = (len+1)/2;
2922 GROWING(utf8, cat, start, cur, field_len);
2923 if (!utf8 && len > (I32)fromlen) len = fromlen;
2926 if (datumtype == 'H')
2930 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2931 if (val < 256 && isALPHA(val))
2932 bits |= (val + 9) & 0xf;
2935 } else if (isALPHA(*str))
2936 bits |= (*str++ + 9) & 0xf;
2938 bits |= *str++ & 0xf;
2939 if (l & 1) bits <<= 4;
2941 PUSH_BYTE(utf8, cur, bits);
2949 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2950 if (val < 256 && isALPHA(val))
2951 bits |= ((val + 9) & 0xf) << 4;
2953 bits |= (val & 0xf) << 4;
2954 } else if (isALPHA(*str))
2955 bits |= ((*str++ + 9) & 0xf) << 4;
2957 bits |= (*str++ & 0xf) << 4;
2958 if (l & 1) bits >>= 4;
2960 PUSH_BYTE(utf8, cur, bits);
2966 PUSH_BYTE(utf8, cur, bits);
2969 /* Determine how many chars are left in the requested field */
2971 if (howlen == e_star) field_len = 0;
2972 else field_len -= l;
2973 Zero(cur, field_len, char);
2981 aiv = SvIV(fromstr);
2982 if ((-128 > aiv || aiv > 127) &&
2984 Perl_warner(aTHX_ packWARN(WARN_PACK),
2985 "Character in 'c' format wrapped in pack");
2986 PUSH_BYTE(utf8, cur, aiv & 0xff);
2991 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2994 GROWING(0, cat, start, cur, len);
2998 aiv = SvIV(fromstr);
2999 if ((0 > aiv || aiv > 0xff) &&
3001 Perl_warner(aTHX_ packWARN(WARN_PACK),
3002 "Character in 'C' format wrapped in pack");
3003 *cur++ = aiv & 0xff;
3008 U8 in_bytes = IN_BYTES;
3010 end = start+SvLEN(cat)-1;
3011 if (utf8) end -= UTF8_MAXLEN-1;
3015 auv = SvUV(fromstr);
3016 if (in_bytes) auv = auv % 0x100;
3021 SvCUR_set(cat, cur - start);
3023 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3024 end = start+SvLEN(cat)-UTF8_MAXLEN;
3026 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3029 0 : UNICODE_ALLOW_ANY);
3034 SvCUR_set(cat, cur - start);
3035 marked_upgrade(aTHX_ cat, symptr);
3036 lookahead.flags |= FLAG_DO_UTF8;
3037 lookahead.strbeg = symptr->strbeg;
3040 cur = start + SvCUR(cat);
3041 end = start+SvLEN(cat)-UTF8_MAXLEN;
3044 if (ckWARN(WARN_PACK))
3045 Perl_warner(aTHX_ packWARN(WARN_PACK),
3046 "Character in 'W' format wrapped in pack");
3051 SvCUR_set(cat, cur - start);
3052 GROWING(0, cat, start, cur, len+1);
3053 end = start+SvLEN(cat)-1;
3055 *(U8 *) cur++ = (U8)auv;
3064 if (!(symptr->flags & FLAG_DO_UTF8)) {
3065 marked_upgrade(aTHX_ cat, symptr);
3066 lookahead.flags |= FLAG_DO_UTF8;
3067 lookahead.strbeg = symptr->strbeg;
3073 end = start+SvLEN(cat);
3074 if (!utf8) end -= UTF8_MAXLEN;
3078 auv = SvUV(fromstr);
3080 U8 buffer[UTF8_MAXLEN], *endb;
3081 endb = uvuni_to_utf8_flags(buffer, auv,
3083 0 : UNICODE_ALLOW_ANY);
3084 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3086 SvCUR_set(cat, cur - start);
3087 GROWING(0, cat, start, cur,
3088 len+(endb-buffer)*UTF8_EXPAND);
3089 end = start+SvLEN(cat);
3091 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3095 SvCUR_set(cat, cur - start);
3096 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3097 end = start+SvLEN(cat)-UTF8_MAXLEN;
3099 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3101 0 : UNICODE_ALLOW_ANY);
3106 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3112 anv = SvNV(fromstr);
3114 /* VOS does not automatically map a floating-point overflow
3115 during conversion from double to float into infinity, so we
3116 do it by hand. This code should either be generalized for
3117 any OS that needs it, or removed if and when VOS implements
3118 posix-976 (suggestion to support mapping to infinity).
3119 Paul.Green@stratus.com 02-04-02. */
3121 afloat = _float_constants[0]; /* single prec. inf. */
3122 else if (anv < -FLT_MAX)
3123 afloat = _float_constants[0]; /* single prec. inf. */
3124 else afloat = (float) anv;
3126 # if defined(VMS) && !defined(__IEEE_FP)
3127 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3128 * on Alpha; fake it if we don't have them.
3132 else if (anv < -FLT_MAX)
3134 else afloat = (float)anv;
3136 afloat = (float)anv;
3138 #endif /* __VOS__ */
3139 DO_BO_PACK_N(afloat, float);
3140 PUSH_VAR(utf8, cur, afloat);
3148 anv = SvNV(fromstr);
3150 /* VOS does not automatically map a floating-point overflow
3151 during conversion from long double to double into infinity,
3152 so we do it by hand. This code should either be generalized
3153 for any OS that needs it, or removed if and when VOS
3154 implements posix-976 (suggestion to support mapping to
3155 infinity). Paul.Green@stratus.com 02-04-02. */
3157 adouble = _double_constants[0]; /* double prec. inf. */
3158 else if (anv < -DBL_MAX)
3159 adouble = _double_constants[0]; /* double prec. inf. */
3160 else adouble = (double) anv;
3162 # if defined(VMS) && !defined(__IEEE_FP)
3163 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3164 * on Alpha; fake it if we don't have them.
3168 else if (anv < -DBL_MAX)
3170 else adouble = (double)anv;
3172 adouble = (double)anv;
3174 #endif /* __VOS__ */
3175 DO_BO_PACK_N(adouble, double);
3176 PUSH_VAR(utf8, cur, adouble);
3181 Zero(&anv, 1, NV); /* can be long double with unused bits */
3184 anv = SvNV(fromstr);
3185 DO_BO_PACK_N(anv, NV);
3186 PUSH_VAR(utf8, cur, anv);
3190 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3192 long double aldouble;
3193 /* long doubles can have unused bits, which may be nonzero */
3194 Zero(&aldouble, 1, long double);
3197 aldouble = (long double)SvNV(fromstr);
3198 DO_BO_PACK_N(aldouble, long double);
3199 PUSH_VAR(utf8, cur, aldouble);
3204 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3205 case 'n' | TYPE_IS_SHRIEKING:
3211 ai16 = (I16)SvIV(fromstr);
3213 ai16 = PerlSock_htons(ai16);
3215 PUSH16(utf8, cur, &ai16);
3218 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3219 case 'v' | TYPE_IS_SHRIEKING:
3225 ai16 = (I16)SvIV(fromstr);
3229 PUSH16(utf8, cur, &ai16);
3232 case 'S' | TYPE_IS_SHRIEKING:
3233 #if SHORTSIZE != SIZE16
3235 unsigned short aushort;
3237 aushort = SvUV(fromstr);
3238 DO_BO_PACK(aushort, s);
3239 PUSH_VAR(utf8, cur, aushort);
3249 au16 = (U16)SvUV(fromstr);
3250 DO_BO_PACK(au16, 16);
3251 PUSH16(utf8, cur, &au16);
3254 case 's' | TYPE_IS_SHRIEKING:
3255 #if SHORTSIZE != SIZE16
3259 ashort = SvIV(fromstr);
3260 DO_BO_PACK(ashort, s);
3261 PUSH_VAR(utf8, cur, ashort);
3271 ai16 = (I16)SvIV(fromstr);
3272 DO_BO_PACK(ai16, 16);
3273 PUSH16(utf8, cur, &ai16);
3277 case 'I' | TYPE_IS_SHRIEKING:
3281 auint = SvUV(fromstr);
3282 DO_BO_PACK(auint, i);
3283 PUSH_VAR(utf8, cur, auint);
3290 aiv = SvIV(fromstr);
3291 #if IVSIZE == INTSIZE
3293 #elif IVSIZE == LONGSIZE
3295 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3296 DO_BO_PACK(aiv, 64);
3298 Perl_croak(aTHX_ "'j' not supported on this platform");
3300 PUSH_VAR(utf8, cur, aiv);
3307 auv = SvUV(fromstr);
3308 #if UVSIZE == INTSIZE
3310 #elif UVSIZE == LONGSIZE
3312 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3313 DO_BO_PACK(auv, 64);
3315 Perl_croak(aTHX_ "'J' not supported on this platform");
3317 PUSH_VAR(utf8, cur, auv);
3324 anv = SvNV(fromstr);
3328 SvCUR_set(cat, cur - start);
3329 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3332 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3333 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3334 any negative IVs will have already been got by the croak()
3335 above. IOK is untrue for fractions, so we test them
3336 against UV_MAX_P1. */
3337 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3338 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3339 char *in = buf + sizeof(buf);
3340 UV auv = SvUV(fromstr);
3343 *--in = (char)((auv & 0x7f) | 0x80);
3346 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3347 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3348 in, (buf + sizeof(buf)) - in);
3349 } else if (SvPOKp(fromstr))
3351 else if (SvNOKp(fromstr)) {
3352 /* 10**NV_MAX_10_EXP is the largest power of 10
3353 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3354 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3355 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3356 And with that many bytes only Inf can overflow.
3357 Some C compilers are strict about integral constant
3358 expressions so we conservatively divide by a slightly
3359 smaller integer instead of multiplying by the exact
3360 floating-point value.
3362 #ifdef NV_MAX_10_EXP
3363 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3364 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3366 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3367 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3369 char *in = buf + sizeof(buf);
3371 anv = Perl_floor(anv);
3373 const NV next = Perl_floor(anv / 128);
3374 if (in <= buf) /* this cannot happen ;-) */
3375 Perl_croak(aTHX_ "Cannot compress integer in pack");
3376 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3379 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3380 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3381 in, (buf + sizeof(buf)) - in);
3390 /* Copy string and check for compliance */
3391 from = SvPV_const(fromstr, len);
3392 if ((norm = is_an_int(from, len)) == NULL)
3393 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3395 Newx(result, len, char);
3398 while (!done) *--in = div128(norm, &done) | 0x80;
3399 result[len - 1] &= 0x7F; /* clear continue bit */
3400 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3401 in, (result + len) - in);
3403 SvREFCNT_dec(norm); /* free norm */
3408 case 'i' | TYPE_IS_SHRIEKING:
3412 aint = SvIV(fromstr);
3413 DO_BO_PACK(aint, i);
3414 PUSH_VAR(utf8, cur, aint);
3417 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3418 case 'N' | TYPE_IS_SHRIEKING:
3424 au32 = SvUV(fromstr);
3426 au32 = PerlSock_htonl(au32);
3428 PUSH32(utf8, cur, &au32);
3431 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3432 case 'V' | TYPE_IS_SHRIEKING:
3438 au32 = SvUV(fromstr);
3442 PUSH32(utf8, cur, &au32);
3445 case 'L' | TYPE_IS_SHRIEKING:
3446 #if LONGSIZE != SIZE32
3448 unsigned long aulong;
3450 aulong = SvUV(fromstr);
3451 DO_BO_PACK(aulong, l);
3452 PUSH_VAR(utf8, cur, aulong);
3462 au32 = SvUV(fromstr);
3463 DO_BO_PACK(au32, 32);
3464 PUSH32(utf8, cur, &au32);
3467 case 'l' | TYPE_IS_SHRIEKING:
3468 #if LONGSIZE != SIZE32
3472 along = SvIV(fromstr);
3473 DO_BO_PACK(along, l);
3474 PUSH_VAR(utf8, cur, along);
3484 ai32 = SvIV(fromstr);
3485 DO_BO_PACK(ai32, 32);
3486 PUSH32(utf8, cur, &ai32);
3494 auquad = (Uquad_t) SvUV(fromstr);
3495 DO_BO_PACK(auquad, 64);
3496 PUSH_VAR(utf8, cur, auquad);
3503 aquad = (Quad_t)SvIV(fromstr);
3504 DO_BO_PACK(aquad, 64);
3505 PUSH_VAR(utf8, cur, aquad);
3508 #endif /* HAS_QUAD */
3510 len = 1; /* assume SV is correct length */
3511 GROWING(utf8, cat, start, cur, sizeof(char *));
3518 SvGETMAGIC(fromstr);
3519 if (!SvOK(fromstr)) aptr = NULL;
3522 /* XXX better yet, could spirit away the string to
3523 * a safe spot and hang on to it until the result
3524 * of pack() (and all copies of the result) are
3527 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3528 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3529 Perl_warner(aTHX_ packWARN(WARN_PACK),
3530 "Attempt to pack pointer to temporary value");
3532 if (SvPOK(fromstr) || SvNIOK(fromstr))
3533 aptr = SvPV_nomg_const(fromstr, n_a);
3535 aptr = SvPV_force_flags(fromstr, n_a, 0);
3537 DO_BO_PACK_PC(aptr);
3538 PUSH_VAR(utf8, cur, aptr);
3542 const char *aptr, *aend;
3546 if (len <= 2) len = 45;
3547 else len = len / 3 * 3;
3549 Perl_warner(aTHX_ packWARN(WARN_PACK),
3550 "Field too wide in 'u' format in pack");
3553 aptr = SvPV_const(fromstr, fromlen);
3554 from_utf8 = DO_UTF8(fromstr);
3556 aend = aptr + fromlen;
3557 fromlen = sv_len_utf8(fromstr);
3558 } else aend = NULL; /* Unused, but keep compilers happy */
3559 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3560 while (fromlen > 0) {
3563 U8 hunk[1+63/3*4+1];
3565 if ((I32)fromlen > len)
3571 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3572 'u' | TYPE_IS_PACK)) {
3574 SvCUR_set(cat, cur - start);
3575 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3577 end = doencodes(hunk, buffer, todo);
3579 end = doencodes(hunk, aptr, todo);
3582 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3589 SvCUR_set(cat, cur - start);
3591 *symptr = lookahead;
3600 dSP; dMARK; dORIGMARK; dTARGET;
3601 register SV *cat = TARG;
3603 SV *pat_sv = *++MARK;
3604 register const char *pat = SvPV_const(pat_sv, fromlen);
3605 register const char *patend = pat + fromlen;
3608 sv_setpvn(cat, "", 0);
3611 packlist(cat, pat, patend, MARK, SP + 1);
3621 * c-indentation-style: bsd
3623 * indent-tabs-mode: t
3626 * ex: set ts=8 sts=4 sw=4 noet: