3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
30 #define PERL_IN_PP_PACK_C
33 /* Types used by pack/unpack */
35 e_no_len, /* no length */
36 e_number, /* number, [] */
40 typedef struct tempsym {
41 const char* patptr; /* current template char */
42 const char* patend; /* one after last char */
43 const char* grpbeg; /* 1st char of ()-group */
44 const char* grpend; /* end of ()-group */
45 I32 code; /* template code (!<>) */
46 I32 length; /* length/repeat count */
47 howlen_t howlen; /* how length is given */
48 int level; /* () nesting level */
49 U32 flags; /* /=4, comma=2, pack=1 */
50 /* and group modifiers */
51 STRLEN strbeg; /* offset of group start */
52 struct tempsym *previous; /* previous group */
55 #define TEMPSYM_INIT(symptr, p, e, f) \
57 (symptr)->patptr = (p); \
58 (symptr)->patend = (e); \
59 (symptr)->grpbeg = NULL; \
60 (symptr)->grpend = NULL; \
61 (symptr)->grpend = NULL; \
63 (symptr)->length = 0; \
64 (symptr)->howlen = e_no_len; \
65 (symptr)->level = 0; \
66 (symptr)->flags = (f); \
67 (symptr)->strbeg = 0; \
68 (symptr)->previous = NULL; \
72 # define PERL_PACK_CAN_BYTEORDER
73 # define PERL_PACK_CAN_SHRIEKSIGN
79 /* Maximum number of bytes to which a byte can grow due to upgrade */
83 * Offset for integer pack/unpack.
85 * On architectures where I16 and I32 aren't really 16 and 32 bits,
86 * which for now are all Crays, pack and unpack have to play games.
90 * These values are required for portability of pack() output.
91 * If they're not right on your machine, then pack() and unpack()
92 * wouldn't work right anyway; you'll need to apply the Cray hack.
93 * (I'd like to check them with #if, but you can't use sizeof() in
94 * the preprocessor.) --???
97 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
98 defines are now in config.h. --Andy Dougherty April 1998
103 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
106 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
107 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
108 # define OFF16(p) ((char*)(p))
109 # define OFF32(p) ((char*)(p))
111 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
112 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
113 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115 ++++ bad cray byte order
119 # define OFF16(p) ((char *) (p))
120 # define OFF32(p) ((char *) (p))
123 /* Only to be used inside a loop (see the break) */
124 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 Copy(s, OFF16(p), SIZE16, char); \
133 /* Only to be used inside a loop (see the break) */
134 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 Copy(s, OFF32(p), SIZE32, char); \
143 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
144 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
150 if (!uni_to_bytes(aTHX_ &s, strend, \
151 (char *) &var, sizeof(var), datumtype)) break;\
153 Copy(s, (char *) &var, sizeof(var), char); \
158 #define PUSH_VAR(utf8, aptr, var) \
159 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
161 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
162 #define MAX_SUB_TEMPLATE_LEVEL 100
164 /* flags (note that type modifiers can also be used as flags!) */
165 #define FLAG_WAS_UTF8 0x40
166 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
167 #define FLAG_UNPACK_ONLY_ONE 0x10
168 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
169 #define FLAG_SLASH 0x04
170 #define FLAG_COMMA 0x02
171 #define FLAG_PACK 0x01
174 S_mul128(pTHX_ SV *sv, U8 m)
177 char *s = SvPV(sv, len);
180 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
181 SV * const tmpNew = newSVpvs("0000000000");
183 sv_catsv(tmpNew, sv);
184 SvREFCNT_dec(sv); /* free old sv */
189 while (!*t) /* trailing '\0'? */
192 const U32 i = ((*t - '0') << 7) + m;
193 *(t--) = '0' + (char)(i % 10);
199 /* Explosives and implosives. */
201 #if 'I' == 73 && 'J' == 74
202 /* On an ASCII/ISO kind of system */
203 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
206 Some other sort of character set - use memchr() so we don't match
209 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
213 #define TYPE_IS_SHRIEKING 0x100
214 #define TYPE_IS_BIG_ENDIAN 0x200
215 #define TYPE_IS_LITTLE_ENDIAN 0x400
216 #define TYPE_IS_PACK 0x800
217 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
218 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
219 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221 #ifdef PERL_PACK_CAN_SHRIEKSIGN
222 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
224 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
227 #ifndef PERL_PACK_CAN_BYTEORDER
228 /* Put "can't" first because it is shorter */
229 # define TYPE_ENDIANNESS(t) 0
230 # define TYPE_NO_ENDIANNESS(t) (t)
232 # define ENDIANNESS_ALLOWED_TYPES ""
234 # define DO_BO_UNPACK(var, type)
235 # define DO_BO_PACK(var, type)
236 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
237 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_UNPACK_N(var, type)
239 # define DO_BO_PACK_N(var, type)
240 # define DO_BO_UNPACK_P(var)
241 # define DO_BO_PACK_P(var)
242 # define DO_BO_UNPACK_PC(var)
243 # define DO_BO_PACK_PC(var)
245 #else /* PERL_PACK_CAN_BYTEORDER */
247 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
248 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252 # define DO_BO_UNPACK(var, type) \
254 switch (TYPE_ENDIANNESS(datumtype)) { \
255 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
256 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
261 # define DO_BO_PACK(var, type) \
263 switch (TYPE_ENDIANNESS(datumtype)) { \
264 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
265 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
270 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
272 switch (TYPE_ENDIANNESS(datumtype)) { \
273 case TYPE_IS_BIG_ENDIAN: \
274 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
276 case TYPE_IS_LITTLE_ENDIAN: \
277 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
284 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
286 switch (TYPE_ENDIANNESS(datumtype)) { \
287 case TYPE_IS_BIG_ENDIAN: \
288 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
290 case TYPE_IS_LITTLE_ENDIAN: \
291 var = (post_cast *) my_htole ## type ((pre_cast) var); \
298 # define BO_CANT_DOIT(action, type) \
300 switch (TYPE_ENDIANNESS(datumtype)) { \
301 case TYPE_IS_BIG_ENDIAN: \
302 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
303 "platform", #action, #type); \
305 case TYPE_IS_LITTLE_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
307 "platform", #action, #type); \
314 # if PTRSIZE == INTSIZE
315 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
316 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
317 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
318 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
319 # elif PTRSIZE == LONGSIZE
320 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
321 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
322 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
323 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
324 # elif PTRSIZE == IVSIZE
325 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
326 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
327 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
328 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
330 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
331 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
332 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
333 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
336 # if defined(my_htolen) && defined(my_letohn) && \
337 defined(my_htoben) && defined(my_betohn)
338 # define DO_BO_UNPACK_N(var, type) \
340 switch (TYPE_ENDIANNESS(datumtype)) { \
341 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
342 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
347 # define DO_BO_PACK_N(var, type) \
349 switch (TYPE_ENDIANNESS(datumtype)) { \
350 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
351 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
356 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
357 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
360 #endif /* PERL_PACK_CAN_BYTEORDER */
362 #define PACK_SIZE_CANNOT_CSUM 0x80
363 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
364 #define PACK_SIZE_MASK 0x3F
366 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
367 in). You're unlikely ever to need to regenerate them. */
369 #if TYPE_IS_SHRIEKING != 0x100
370 ++++shriek offset should be 256
373 typedef U8 packprops_t;
376 STATIC const packprops_t packprops[512] = {
378 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 /* C */ sizeof(unsigned char),
384 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
385 /* D */ LONG_DOUBLESIZE,
392 /* I */ sizeof(unsigned int),
399 #if defined(HAS_QUAD)
400 /* Q */ sizeof(Uquad_t),
407 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
411 /* c */ sizeof(char),
412 /* d */ sizeof(double),
414 /* f */ sizeof(float),
423 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
424 #if defined(HAS_QUAD)
425 /* q */ sizeof(Quad_t),
433 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 /* I */ sizeof(unsigned int),
451 /* L */ sizeof(unsigned long),
453 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
459 /* S */ sizeof(unsigned short),
461 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
470 /* l */ sizeof(long),
472 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
478 /* s */ sizeof(short),
480 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0
496 /* EBCDIC (or bust) */
497 STATIC const packprops_t packprops[512] = {
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 /* c */ sizeof(char),
509 /* d */ sizeof(double),
511 /* f */ sizeof(float),
521 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
522 #if defined(HAS_QUAD)
523 /* q */ sizeof(Quad_t),
527 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 /* C */ sizeof(unsigned char),
535 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
536 /* D */ LONG_DOUBLESIZE,
543 /* I */ sizeof(unsigned int),
551 #if defined(HAS_QUAD)
552 /* Q */ sizeof(Uquad_t),
556 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* l */ sizeof(long),
578 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 /* s */ sizeof(short),
586 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 /* I */ sizeof(unsigned int),
595 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* L */ sizeof(unsigned long),
598 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 /* S */ sizeof(unsigned short),
606 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
611 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
617 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
620 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
621 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
622 /* We try to process malformed UTF-8 as much as possible (preferrably with
623 warnings), but these two mean we make no progress in the string and
624 might enter an infinite loop */
625 if (retlen == (STRLEN) -1 || retlen == 0)
626 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
627 (int) TYPE_NO_MODIFIERS(datumtype));
629 if (ckWARN(WARN_UNPACK))
630 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
631 "Character in '%c' format wrapped in unpack",
632 (int) TYPE_NO_MODIFIERS(datumtype));
639 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
640 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
644 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
648 const char *from = *s;
650 const U32 flags = ckWARN(WARN_UTF8) ?
651 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
652 for (;buf_len > 0; buf_len--) {
653 if (from >= end) return FALSE;
654 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
655 if (retlen == (STRLEN) -1 || retlen == 0) {
656 from += UTF8SKIP(from);
658 } else from += retlen;
663 *(U8 *)buf++ = (U8)val;
665 /* We have enough characters for the buffer. Did we have problems ? */
668 /* Rewalk the string fragment while warning */
670 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
671 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
672 if (ptr >= end) break;
673 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
675 if (from > end) from = end;
677 if ((bad & 2) && ckWARN(WARN_UNPACK))
678 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
679 WARN_PACK : WARN_UNPACK),
680 "Character(s) in '%c' format wrapped in %s",
681 (int) TYPE_NO_MODIFIERS(datumtype),
682 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
689 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
693 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
694 if (val >= 0x100 || !ISUUCHAR(val) ||
695 retlen == (STRLEN) -1 || retlen == 0) {
699 *out = PL_uudmap[val] & 077;
705 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
706 const U8 * const end = start + len;
708 while (start < end) {
709 const UV uv = NATIVE_TO_ASCII(*start);
710 if (UNI_IS_INVARIANT(uv))
711 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
713 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
714 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
721 #define PUSH_BYTES(utf8, cur, buf, len) \
724 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
726 Copy(buf, cur, len, char); \
731 #define GROWING(utf8, cat, start, cur, in_len) \
733 STRLEN glen = (in_len); \
734 if (utf8) glen *= UTF8_EXPAND; \
735 if ((cur) + glen >= (start) + SvLEN(cat)) { \
736 (start) = sv_exp_grow(cat, glen); \
737 (cur) = (start) + SvCUR(cat); \
741 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
743 const STRLEN glen = (in_len); \
745 if (utf8) gl *= UTF8_EXPAND; \
746 if ((cur) + gl >= (start) + SvLEN(cat)) { \
748 SvCUR_set((cat), (cur) - (start)); \
749 (start) = sv_exp_grow(cat, gl); \
750 (cur) = (start) + SvCUR(cat); \
752 PUSH_BYTES(utf8, cur, buf, glen); \
755 #define PUSH_BYTE(utf8, s, byte) \
758 const U8 au8 = (byte); \
759 (s) = bytes_to_uni(&au8, 1, (s)); \
760 } else *(U8 *)(s)++ = (byte); \
763 /* Only to be used inside a loop (see the break) */
764 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
767 if (str >= end) break; \
768 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
769 if (retlen == (STRLEN) -1 || retlen == 0) { \
771 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
776 static const char *_action( const tempsym_t* symptr )
778 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
781 /* Returns the sizeof() struct described by pat */
783 S_measure_struct(pTHX_ tempsym_t* symptr)
787 while (next_symbol(symptr)) {
791 switch (symptr->howlen) {
793 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
797 /* e_no_len and e_number */
798 len = symptr->length;
802 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
805 /* endianness doesn't influence the size of a type */
806 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
808 Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 (int)TYPE_NO_MODIFIERS(symptr->code),
811 #ifdef PERL_PACK_CAN_SHRIEKSIGN
812 case '.' | TYPE_IS_SHRIEKING:
813 case '@' | TYPE_IS_SHRIEKING:
818 case 'U': /* XXXX Is it correct? */
821 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
822 (int) TYPE_NO_MODIFIERS(symptr->code),
829 tempsym_t savsym = *symptr;
830 symptr->patptr = savsym.grpbeg;
831 symptr->patend = savsym.grpend;
832 /* XXXX Theoretically, we need to measure many times at
833 different positions, since the subexpression may contain
834 alignment commands, but be not of aligned length.
835 Need to detect this and croak(). */
836 size = measure_struct(symptr);
840 case 'X' | TYPE_IS_SHRIEKING:
841 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
843 if (!len) /* Avoid division by 0 */
845 len = total % len; /* Assumed: the start is aligned. */
850 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
852 case 'x' | TYPE_IS_SHRIEKING:
853 if (!len) /* Avoid division by 0 */
855 star = total % len; /* Assumed: the start is aligned. */
856 if (star) /* Other portable ways? */
880 size = sizeof(char*);
890 /* locate matching closing parenthesis or bracket
891 * returns char pointer to char after match, or NULL
894 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
896 while (patptr < patend) {
897 const char c = *patptr++;
904 while (patptr < patend && *patptr != '\n')
908 patptr = group_end(patptr, patend, ')') + 1;
910 patptr = group_end(patptr, patend, ']') + 1;
912 Perl_croak(aTHX_ "No group ending character '%c' found in template",
918 /* Convert unsigned decimal number to binary.
919 * Expects a pointer to the first digit and address of length variable
920 * Advances char pointer to 1st non-digit char and returns number
923 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
925 I32 len = *patptr++ - '0';
926 while (isDIGIT(*patptr)) {
927 if (len >= 0x7FFFFFFF/10)
928 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929 len = (len * 10) + (*patptr++ - '0');
935 /* The marvellous template parsing routine: Using state stored in *symptr,
936 * locates next template code and count
939 S_next_symbol(pTHX_ tempsym_t* symptr )
941 const char* patptr = symptr->patptr;
942 const char* const patend = symptr->patend;
944 symptr->flags &= ~FLAG_SLASH;
946 while (patptr < patend) {
947 if (isSPACE(*patptr))
949 else if (*patptr == '#') {
951 while (patptr < patend && *patptr != '\n')
956 /* We should have found a template code */
957 I32 code = *patptr++ & 0xFF;
958 U32 inherited_modifiers = 0;
960 if (code == ','){ /* grandfather in commas but with a warning */
961 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962 symptr->flags |= FLAG_COMMA;
963 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
964 "Invalid type ',' in %s", _action( symptr ) );
969 /* for '(', skip to ')' */
971 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972 Perl_croak(aTHX_ "()-group starts with a count in %s",
974 symptr->grpbeg = patptr;
975 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
981 /* look for group modifiers to inherit */
982 if (TYPE_ENDIANNESS(symptr->flags)) {
983 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
987 /* look for modifiers */
988 while (patptr < patend) {
993 modifier = TYPE_IS_SHRIEKING;
994 allowed = SHRIEKING_ALLOWED_TYPES;
996 #ifdef PERL_PACK_CAN_BYTEORDER
998 modifier = TYPE_IS_BIG_ENDIAN;
999 allowed = ENDIANNESS_ALLOWED_TYPES;
1002 modifier = TYPE_IS_LITTLE_ENDIAN;
1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1005 #endif /* PERL_PACK_CAN_BYTEORDER */
1015 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1017 allowed, _action( symptr ) );
1019 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1020 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1021 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1022 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023 TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1025 *patptr, _action( symptr ) );
1027 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1028 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1029 "Duplicate modifier '%c' after '%c' in %s",
1030 *patptr, (int) TYPE_NO_MODIFIERS(code),
1031 _action( symptr ) );
1038 /* inherit modifiers */
1039 code |= inherited_modifiers;
1041 /* look for count and/or / */
1042 if (patptr < patend) {
1043 if (isDIGIT(*patptr)) {
1044 patptr = get_num( patptr, &symptr->length );
1045 symptr->howlen = e_number;
1047 } else if (*patptr == '*') {
1049 symptr->howlen = e_star;
1051 } else if (*patptr == '[') {
1052 const char* lenptr = ++patptr;
1053 symptr->howlen = e_number;
1054 patptr = group_end( patptr, patend, ']' ) + 1;
1055 /* what kind of [] is it? */
1056 if (isDIGIT(*lenptr)) {
1057 lenptr = get_num( lenptr, &symptr->length );
1058 if( *lenptr != ']' )
1059 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1060 _action( symptr ) );
1062 tempsym_t savsym = *symptr;
1063 symptr->patend = patptr-1;
1064 symptr->patptr = lenptr;
1065 savsym.length = measure_struct(symptr);
1069 symptr->howlen = e_no_len;
1074 while (patptr < patend) {
1075 if (isSPACE(*patptr))
1077 else if (*patptr == '#') {
1079 while (patptr < patend && *patptr != '\n')
1081 if (patptr < patend)
1084 if (*patptr == '/') {
1085 symptr->flags |= FLAG_SLASH;
1087 if (patptr < patend &&
1088 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1089 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1090 _action( symptr ) );
1096 /* at end - no count, no / */
1097 symptr->howlen = e_no_len;
1101 symptr->code = code;
1102 symptr->patptr = patptr;
1106 symptr->patptr = patptr;
1111 There is no way to cleanly handle the case where we should process the
1112 string per byte in its upgraded form while it's really in downgraded form
1113 (e.g. estimates like strend-s as an upper bound for the number of
1114 characters left wouldn't work). So if we foresee the need of this
1115 (pattern starts with U or contains U0), we want to work on the encoded
1116 version of the string. Users are advised to upgrade their pack string
1117 themselves if they need to do a lot of unpacks like this on it
1120 need_utf8(const char *pat, const char *patend)
1123 while (pat < patend) {
1124 if (pat[0] == '#') {
1126 pat = (const char *) memchr(pat, '\n', patend-pat);
1127 if (!pat) return FALSE;
1128 } else if (pat[0] == 'U') {
1129 if (first || pat[1] == '0') return TRUE;
1130 } else first = FALSE;
1137 first_symbol(const char *pat, const char *patend) {
1138 while (pat < patend) {
1139 if (pat[0] != '#') return pat[0];
1141 pat = (const char *) memchr(pat, '\n', patend-pat);
1149 =for apidoc unpackstring
1151 The engine implementing unpack() Perl function. C<unpackstring> puts the
1152 extracted list items on the stack and returns the number of elements.
1153 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1158 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1162 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1163 else if (need_utf8(pat, patend)) {
1164 /* We probably should try to avoid this in case a scalar context call
1165 wouldn't get to the "U0" */
1166 STRLEN len = strend - s;
1167 s = (char *) bytes_to_utf8((U8 *) s, &len);
1170 flags |= FLAG_DO_UTF8;
1173 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1174 flags |= FLAG_PARSE_UTF8;
1176 TEMPSYM_INIT(&sym, pat, patend, flags);
1178 return unpack_rec(&sym, s, s, strend, NULL );
1182 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1186 const I32 start_sp_offset = SP - PL_stack_base;
1192 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1193 bool beyond = FALSE;
1194 bool explicit_length;
1195 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1196 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1197 symptr->strbeg = s - strbeg;
1199 while (next_symbol(symptr)) {
1202 I32 datumtype = symptr->code;
1203 /* do first one only unless in list context
1204 / is implemented by unpacking the count, then popping it from the
1205 stack, so must check that we're not in the middle of a / */
1206 if ( unpack_only_one
1207 && (SP - PL_stack_base == start_sp_offset + 1)
1208 && (datumtype != '/') ) /* XXX can this be omitted */
1211 switch (howlen = symptr->howlen) {
1213 len = strend - strbeg; /* long enough */
1216 /* e_no_len and e_number */
1217 len = symptr->length;
1221 explicit_length = TRUE;
1223 beyond = s >= strend;
1225 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1227 /* props nonzero means we can process this letter. */
1228 const long size = props & PACK_SIZE_MASK;
1229 const long howmany = (strend - s) / size;
1233 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1234 if (len && unpack_only_one) len = 1;
1240 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1242 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1245 if (howlen == e_no_len)
1246 len = 16; /* len is not specified */
1254 tempsym_t savsym = *symptr;
1255 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1256 symptr->flags |= group_modifiers;
1257 symptr->patend = savsym.grpend;
1258 symptr->previous = &savsym;
1262 symptr->patptr = savsym.grpbeg;
1263 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1264 else symptr->flags &= ~FLAG_PARSE_UTF8;
1265 unpack_rec(symptr, s, strbeg, strend, &s);
1266 if (s == strend && savsym.howlen == e_star)
1267 break; /* No way to continue */
1270 savsym.flags = symptr->flags & ~group_modifiers;
1274 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1275 case '.' | TYPE_IS_SHRIEKING:
1280 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1281 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1282 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1283 const bool u8 = utf8;
1285 if (howlen == e_star) from = strbeg;
1286 else if (len <= 0) from = s;
1288 tempsym_t *group = symptr;
1290 while (--len && group) group = group->previous;
1291 from = group ? strbeg + group->strbeg : strbeg;
1294 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1295 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1296 XPUSHs(sv_2mortal(sv));
1299 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 case '@' | TYPE_IS_SHRIEKING:
1303 s = strbeg + symptr->strbeg;
1304 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1305 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1306 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1312 Perl_croak(aTHX_ "'@' outside of string in unpack");
1317 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1320 Perl_croak(aTHX_ "'@' outside of string in unpack");
1324 case 'X' | TYPE_IS_SHRIEKING:
1325 if (!len) /* Avoid division by 0 */
1328 const char *hop, *last;
1330 hop = last = strbeg;
1332 hop += UTF8SKIP(hop);
1339 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1343 len = (s - strbeg) % len;
1349 Perl_croak(aTHX_ "'X' outside of string in unpack");
1350 while (--s, UTF8_IS_CONTINUATION(*s)) {
1352 Perl_croak(aTHX_ "'X' outside of string in unpack");
1357 if (len > s - strbeg)
1358 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1362 case 'x' | TYPE_IS_SHRIEKING: {
1364 if (!len) /* Avoid division by 0 */
1366 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1367 else ai32 = (s - strbeg) % len;
1368 if (ai32 == 0) break;
1376 Perl_croak(aTHX_ "'x' outside of string in unpack");
1381 if (len > strend - s)
1382 Perl_croak(aTHX_ "'x' outside of string in unpack");
1387 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1393 /* Preliminary length estimate is assumed done in 'W' */
1394 if (len > strend - s) len = strend - s;
1400 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1401 if (hop >= strend) {
1403 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1408 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1410 } else if (len > strend - s)
1413 if (datumtype == 'Z') {
1414 /* 'Z' strips stuff after first null */
1415 const char *ptr, *end;
1417 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1418 sv = newSVpvn(s, ptr-s);
1419 if (howlen == e_star) /* exact for 'Z*' */
1420 len = ptr-s + (ptr != strend ? 1 : 0);
1421 } else if (datumtype == 'A') {
1422 /* 'A' strips both nulls and spaces */
1424 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1425 for (ptr = s+len-1; ptr >= s; ptr--)
1426 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1427 !is_utf8_space((U8 *) ptr)) break;
1428 if (ptr >= s) ptr += UTF8SKIP(ptr);
1431 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1433 for (ptr = s+len-1; ptr >= s; ptr--)
1434 if (*ptr != 0 && !isSPACE(*ptr)) break;
1437 sv = newSVpvn(s, ptr-s);
1438 } else sv = newSVpvn(s, len);
1442 /* Undo any upgrade done due to need_utf8() */
1443 if (!(symptr->flags & FLAG_WAS_UTF8))
1444 sv_utf8_downgrade(sv, 0);
1446 XPUSHs(sv_2mortal(sv));
1452 if (howlen == e_star || len > (strend - s) * 8)
1453 len = (strend - s) * 8;
1457 Newxz(PL_bitcount, 256, char);
1458 for (bits = 1; bits < 256; bits++) {
1459 if (bits & 1) PL_bitcount[bits]++;
1460 if (bits & 2) PL_bitcount[bits]++;
1461 if (bits & 4) PL_bitcount[bits]++;
1462 if (bits & 8) PL_bitcount[bits]++;
1463 if (bits & 16) PL_bitcount[bits]++;
1464 if (bits & 32) PL_bitcount[bits]++;
1465 if (bits & 64) PL_bitcount[bits]++;
1466 if (bits & 128) PL_bitcount[bits]++;
1470 while (len >= 8 && s < strend) {
1471 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1476 cuv += PL_bitcount[*(U8 *)s++];
1479 if (len && s < strend) {
1481 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1482 if (datumtype == 'b')
1484 if (bits & 1) cuv++;
1489 if (bits & 0x80) cuv++;
1496 sv = sv_2mortal(newSV(len ? len : 1));
1499 if (datumtype == 'b') {
1501 const I32 ai32 = len;
1502 for (len = 0; len < ai32; len++) {
1503 if (len & 7) bits >>= 1;
1505 if (s >= strend) break;
1506 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1507 } else bits = *(U8 *) s++;
1508 *str++ = bits & 1 ? '1' : '0';
1512 const I32 ai32 = len;
1513 for (len = 0; len < ai32; len++) {
1514 if (len & 7) bits <<= 1;
1516 if (s >= strend) break;
1517 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1518 } else bits = *(U8 *) s++;
1519 *str++ = bits & 0x80 ? '1' : '0';
1523 SvCUR_set(sv, str - SvPVX_const(sv));
1530 /* Preliminary length estimate, acceptable for utf8 too */
1531 if (howlen == e_star || len > (strend - s) * 2)
1532 len = (strend - s) * 2;
1533 sv = sv_2mortal(newSV(len ? len : 1));
1536 if (datumtype == 'h') {
1539 for (len = 0; len < ai32; len++) {
1540 if (len & 1) bits >>= 4;
1542 if (s >= strend) break;
1543 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1544 } else bits = * (U8 *) s++;
1545 *str++ = PL_hexdigit[bits & 15];
1549 const I32 ai32 = len;
1550 for (len = 0; len < ai32; len++) {
1551 if (len & 1) bits <<= 4;
1553 if (s >= strend) break;
1554 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1555 } else bits = *(U8 *) s++;
1556 *str++ = PL_hexdigit[(bits >> 4) & 15];
1560 SvCUR_set(sv, str - SvPVX_const(sv));
1566 if (explicit_length)
1567 /* Switch to "character" mode */
1568 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1573 while (len-- > 0 && s < strend) {
1578 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1579 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1580 if (retlen == (STRLEN) -1 || retlen == 0)
1581 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1585 aint = *(U8 *)(s)++;
1586 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1589 PUSHs(sv_2mortal(newSViv((IV)aint)));
1590 else if (checksum > bits_in_uv)
1591 cdouble += (NV)aint;
1599 while (len-- > 0 && s < strend) {
1601 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1602 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1603 if (retlen == (STRLEN) -1 || retlen == 0)
1604 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1607 PUSHs(sv_2mortal(newSVuv((UV) val)));
1608 else if (checksum > bits_in_uv)
1609 cdouble += (NV) val;
1613 } else if (!checksum)
1615 const U8 ch = *(U8 *) s++;
1616 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1618 else if (checksum > bits_in_uv)
1619 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1621 while (len-- > 0) cuv += *(U8 *) s++;
1625 if (explicit_length) {
1626 /* Switch to "bytes in UTF-8" mode */
1627 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1629 /* Should be impossible due to the need_utf8() test */
1630 Perl_croak(aTHX_ "U0 mode on a byte string");
1634 if (len > strend - s) len = strend - s;
1636 if (len && unpack_only_one) len = 1;
1640 while (len-- > 0 && s < strend) {
1644 U8 result[UTF8_MAXLEN];
1645 const char *ptr = s;
1647 /* Bug: warns about bad utf8 even if we are short on bytes
1648 and will break out of the loop */
1649 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1652 len = UTF8SKIP(result);
1653 if (!uni_to_bytes(aTHX_ &ptr, strend,
1654 (char *) &result[1], len-1, 'U')) break;
1655 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1658 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1659 if (retlen == (STRLEN) -1 || retlen == 0)
1660 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1664 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1665 else if (checksum > bits_in_uv)
1666 cdouble += (NV) auv;
1671 case 's' | TYPE_IS_SHRIEKING:
1672 #if SHORTSIZE != SIZE16
1675 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1676 DO_BO_UNPACK(ashort, s);
1678 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1679 else if (checksum > bits_in_uv)
1680 cdouble += (NV)ashort;
1692 #if U16SIZE > SIZE16
1695 SHIFT16(utf8, s, strend, &ai16, datumtype);
1696 DO_BO_UNPACK(ai16, 16);
1697 #if U16SIZE > SIZE16
1702 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1703 else if (checksum > bits_in_uv)
1704 cdouble += (NV)ai16;
1709 case 'S' | TYPE_IS_SHRIEKING:
1710 #if SHORTSIZE != SIZE16
1712 unsigned short aushort;
1713 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1714 DO_BO_UNPACK(aushort, s);
1716 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1717 else if (checksum > bits_in_uv)
1718 cdouble += (NV)aushort;
1731 #if U16SIZE > SIZE16
1734 SHIFT16(utf8, s, strend, &au16, datumtype);
1735 DO_BO_UNPACK(au16, 16);
1737 if (datumtype == 'n')
1738 au16 = PerlSock_ntohs(au16);
1741 if (datumtype == 'v')
1745 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1746 else if (checksum > bits_in_uv)
1747 cdouble += (NV) au16;
1752 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1753 case 'v' | TYPE_IS_SHRIEKING:
1754 case 'n' | TYPE_IS_SHRIEKING:
1757 # if U16SIZE > SIZE16
1760 SHIFT16(utf8, s, strend, &ai16, datumtype);
1762 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1763 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1764 # endif /* HAS_NTOHS */
1766 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1767 ai16 = (I16) vtohs((U16) ai16);
1768 # endif /* HAS_VTOHS */
1770 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1771 else if (checksum > bits_in_uv)
1772 cdouble += (NV) ai16;
1777 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1779 case 'i' | TYPE_IS_SHRIEKING:
1782 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1783 DO_BO_UNPACK(aint, i);
1785 PUSHs(sv_2mortal(newSViv((IV)aint)));
1786 else if (checksum > bits_in_uv)
1787 cdouble += (NV)aint;
1793 case 'I' | TYPE_IS_SHRIEKING:
1796 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1797 DO_BO_UNPACK(auint, i);
1799 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1800 else if (checksum > bits_in_uv)
1801 cdouble += (NV)auint;
1809 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1810 #if IVSIZE == INTSIZE
1811 DO_BO_UNPACK(aiv, i);
1812 #elif IVSIZE == LONGSIZE
1813 DO_BO_UNPACK(aiv, l);
1814 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1815 DO_BO_UNPACK(aiv, 64);
1817 Perl_croak(aTHX_ "'j' not supported on this platform");
1820 PUSHs(sv_2mortal(newSViv(aiv)));
1821 else if (checksum > bits_in_uv)
1830 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1831 #if IVSIZE == INTSIZE
1832 DO_BO_UNPACK(auv, i);
1833 #elif IVSIZE == LONGSIZE
1834 DO_BO_UNPACK(auv, l);
1835 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1836 DO_BO_UNPACK(auv, 64);
1838 Perl_croak(aTHX_ "'J' not supported on this platform");
1841 PUSHs(sv_2mortal(newSVuv(auv)));
1842 else if (checksum > bits_in_uv)
1848 case 'l' | TYPE_IS_SHRIEKING:
1849 #if LONGSIZE != SIZE32
1852 SHIFT_VAR(utf8, s, strend, along, datumtype);
1853 DO_BO_UNPACK(along, l);
1855 PUSHs(sv_2mortal(newSViv((IV)along)));
1856 else if (checksum > bits_in_uv)
1857 cdouble += (NV)along;
1868 #if U32SIZE > SIZE32
1871 SHIFT32(utf8, s, strend, &ai32, datumtype);
1872 DO_BO_UNPACK(ai32, 32);
1873 #if U32SIZE > SIZE32
1874 if (ai32 > 2147483647) ai32 -= 4294967296;
1877 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1878 else if (checksum > bits_in_uv)
1879 cdouble += (NV)ai32;
1884 case 'L' | TYPE_IS_SHRIEKING:
1885 #if LONGSIZE != SIZE32
1887 unsigned long aulong;
1888 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1889 DO_BO_UNPACK(aulong, l);
1891 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1892 else if (checksum > bits_in_uv)
1893 cdouble += (NV)aulong;
1906 #if U32SIZE > SIZE32
1909 SHIFT32(utf8, s, strend, &au32, datumtype);
1910 DO_BO_UNPACK(au32, 32);
1912 if (datumtype == 'N')
1913 au32 = PerlSock_ntohl(au32);
1916 if (datumtype == 'V')
1920 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1921 else if (checksum > bits_in_uv)
1922 cdouble += (NV)au32;
1927 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1928 case 'V' | TYPE_IS_SHRIEKING:
1929 case 'N' | TYPE_IS_SHRIEKING:
1932 # if U32SIZE > SIZE32
1935 SHIFT32(utf8, s, strend, &ai32, datumtype);
1937 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1938 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1941 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1942 ai32 = (I32)vtohl((U32)ai32);
1945 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1946 else if (checksum > bits_in_uv)
1947 cdouble += (NV)ai32;
1952 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1956 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1957 DO_BO_UNPACK_PC(aptr);
1958 /* newSVpv generates undef if aptr is NULL */
1959 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1967 while (len > 0 && s < strend) {
1969 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1970 auv = (auv << 7) | (ch & 0x7f);
1971 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1974 PUSHs(sv_2mortal(newSVuv(auv)));
1979 if (++bytes >= sizeof(UV)) { /* promote to string */
1982 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1983 while (s < strend) {
1984 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1985 sv = mul128(sv, (U8)(ch & 0x7f));
1991 t = SvPV_nolen_const(sv);
1995 PUSHs(sv_2mortal(sv));
2000 if ((s >= strend) && bytes)
2001 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2005 if (symptr->howlen == e_star)
2006 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2008 if (s + sizeof(char*) <= strend) {
2010 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2011 DO_BO_UNPACK_PC(aptr);
2012 /* newSVpvn generates undef if aptr is NULL */
2013 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2020 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2021 DO_BO_UNPACK(aquad, 64);
2023 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2024 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2025 else if (checksum > bits_in_uv)
2026 cdouble += (NV)aquad;
2034 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2035 DO_BO_UNPACK(auquad, 64);
2037 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2038 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2039 else if (checksum > bits_in_uv)
2040 cdouble += (NV)auquad;
2045 #endif /* HAS_QUAD */
2046 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2050 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2051 DO_BO_UNPACK_N(afloat, float);
2053 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2061 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2062 DO_BO_UNPACK_N(adouble, double);
2064 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2072 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2073 DO_BO_UNPACK_N(anv, NV);
2075 PUSHs(sv_2mortal(newSVnv(anv)));
2080 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2083 long double aldouble;
2084 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2085 DO_BO_UNPACK_N(aldouble, long double);
2087 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2089 cdouble += aldouble;
2095 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2096 sv = sv_2mortal(newSV(l));
2097 if (l) SvPOK_on(sv);
2100 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2105 next_uni_uu(aTHX_ &s, strend, &a);
2106 next_uni_uu(aTHX_ &s, strend, &b);
2107 next_uni_uu(aTHX_ &s, strend, &c);
2108 next_uni_uu(aTHX_ &s, strend, &d);
2109 hunk[0] = (char)((a << 2) | (b >> 4));
2110 hunk[1] = (char)((b << 4) | (c >> 2));
2111 hunk[2] = (char)((c << 6) | d);
2112 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2120 /* possible checksum byte */
2121 const char *skip = s+UTF8SKIP(s);
2122 if (skip < strend && *skip == '\n')
2128 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2132 len = PL_uudmap[*(U8*)s++] & 077;
2134 if (s < strend && ISUUCHAR(*s))
2135 a = PL_uudmap[*(U8*)s++] & 077;
2138 if (s < strend && ISUUCHAR(*s))
2139 b = PL_uudmap[*(U8*)s++] & 077;
2142 if (s < strend && ISUUCHAR(*s))
2143 c = PL_uudmap[*(U8*)s++] & 077;
2146 if (s < strend && ISUUCHAR(*s))
2147 d = PL_uudmap[*(U8*)s++] & 077;
2150 hunk[0] = (char)((a << 2) | (b >> 4));
2151 hunk[1] = (char)((b << 4) | (c >> 2));
2152 hunk[2] = (char)((c << 6) | d);
2153 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2158 else /* possible checksum byte */
2159 if (s + 1 < strend && s[1] == '\n')
2168 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2169 (checksum > bits_in_uv &&
2170 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2173 anv = (NV) (1 << (checksum & 15));
2174 while (checksum >= 16) {
2178 while (cdouble < 0.0)
2180 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2181 sv = newSVnv(cdouble);
2184 if (checksum < bits_in_uv) {
2185 UV mask = ((UV)1 << checksum) - 1;
2190 XPUSHs(sv_2mortal(sv));
2194 if (symptr->flags & FLAG_SLASH){
2195 if (SP - PL_stack_base - start_sp_offset <= 0)
2196 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2197 if( next_symbol(symptr) ){
2198 if( symptr->howlen == e_number )
2199 Perl_croak(aTHX_ "Count after length/code in unpack" );
2201 /* ...end of char buffer then no decent length available */
2202 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2204 /* take top of stack (hope it's numeric) */
2207 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2210 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2212 datumtype = symptr->code;
2213 explicit_length = FALSE;
2221 return SP - PL_stack_base - start_sp_offset;
2229 I32 gimme = GIMME_V;
2232 const char *pat = SvPV_const(left, llen);
2233 const char *s = SvPV_const(right, rlen);
2234 const char *strend = s + rlen;
2235 const char *patend = pat + llen;
2239 cnt = unpackstring(pat, patend, s, strend,
2240 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2241 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2244 if ( !cnt && gimme == G_SCALAR )
2245 PUSHs(&PL_sv_undef);
2250 doencodes(U8 *h, const char *s, I32 len)
2252 *h++ = PL_uuemap[len];
2254 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2255 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2256 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2257 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2262 const char r = (len > 1 ? s[1] : '\0');
2263 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2264 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2265 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2266 *h++ = PL_uuemap[0];
2273 S_is_an_int(pTHX_ const char *s, STRLEN l)
2275 SV *result = newSVpvn(s, l);
2276 char *const result_c = SvPV_nolen(result); /* convenience */
2277 char *out = result_c;
2287 SvREFCNT_dec(result);
2310 SvREFCNT_dec(result);
2316 SvCUR_set(result, out - result_c);
2320 /* pnum must be '\0' terminated */
2322 S_div128(pTHX_ SV *pnum, bool *done)
2325 char * const s = SvPV(pnum, len);
2331 const int i = m * 10 + (*t - '0');
2332 const int r = (i >> 7); /* r < 10 */
2340 SvCUR_set(pnum, (STRLEN) (t - s));
2345 =for apidoc packlist
2347 The engine implementing pack() Perl function.
2353 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2358 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2360 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2361 Also make sure any UTF8 flag is loaded */
2362 SvPV_force_nolen(cat);
2364 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2366 (void)pack_rec( cat, &sym, beglist, endlist );
2369 /* like sv_utf8_upgrade, but also repoint the group start markers */
2371 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2374 const char *from_ptr, *from_start, *from_end, **marks, **m;
2375 char *to_start, *to_ptr;
2377 if (SvUTF8(sv)) return;
2379 from_start = SvPVX_const(sv);
2380 from_end = from_start + SvCUR(sv);
2381 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2382 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2383 if (from_ptr == from_end) {
2384 /* Simple case: no character needs to be changed */
2389 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2390 Newx(to_start, len, char);
2391 Copy(from_start, to_start, from_ptr-from_start, char);
2392 to_ptr = to_start + (from_ptr-from_start);
2394 Newx(marks, sym_ptr->level+2, const char *);
2395 for (group=sym_ptr; group; group = group->previous)
2396 marks[group->level] = from_start + group->strbeg;
2397 marks[sym_ptr->level+1] = from_end+1;
2398 for (m = marks; *m < from_ptr; m++)
2399 *m = to_start + (*m-from_start);
2401 for (;from_ptr < from_end; from_ptr++) {
2402 while (*m == from_ptr) *m++ = to_ptr;
2403 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2407 while (*m == from_ptr) *m++ = to_ptr;
2408 if (m != marks + sym_ptr->level+1) {
2411 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2413 for (group=sym_ptr; group; group = group->previous)
2414 group->strbeg = marks[group->level] - to_start;
2419 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2420 from_start -= SvIVX(sv);
2423 SvFLAGS(sv) &= ~SVf_OOK;
2426 Safefree(from_start);
2427 SvPV_set(sv, to_start);
2428 SvCUR_set(sv, to_ptr - to_start);
2433 /* Exponential string grower. Makes string extension effectively O(n)
2434 needed says how many extra bytes we need (not counting the final '\0')
2435 Only grows the string if there is an actual lack of space
2438 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2439 const STRLEN cur = SvCUR(sv);
2440 const STRLEN len = SvLEN(sv);
2442 if (len - cur > needed) return SvPVX(sv);
2443 extend = needed > len ? needed : len;
2444 return SvGROW(sv, len+extend+1);
2449 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2452 tempsym_t lookahead;
2453 I32 items = endlist - beglist;
2454 bool found = next_symbol(symptr);
2455 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2456 bool warn_utf8 = ckWARN(WARN_UTF8);
2458 if (symptr->level == 0 && found && symptr->code == 'U') {
2459 marked_upgrade(aTHX_ cat, symptr);
2460 symptr->flags |= FLAG_DO_UTF8;
2463 symptr->strbeg = SvCUR(cat);
2469 SV *lengthcode = NULL;
2470 I32 datumtype = symptr->code;
2471 howlen_t howlen = symptr->howlen;
2472 char *start = SvPVX(cat);
2473 char *cur = start + SvCUR(cat);
2475 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2479 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2483 /* e_no_len and e_number */
2484 len = symptr->length;
2489 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2491 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2492 /* We can process this letter. */
2493 STRLEN size = props & PACK_SIZE_MASK;
2494 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2498 /* Look ahead for next symbol. Do we have code/code? */
2499 lookahead = *symptr;
2500 found = next_symbol(&lookahead);
2501 if (symptr->flags & FLAG_SLASH) {
2503 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2504 if (strchr("aAZ", lookahead.code)) {
2505 if (lookahead.howlen == e_number) count = lookahead.length;
2508 if (SvGAMAGIC(*beglist)) {
2509 /* Avoid reading the active data more than once
2510 by copying it to a temporary. */
2512 const char *const pv = SvPV_const(*beglist, len);
2513 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2514 if (SvUTF8(*beglist))
2518 count = DO_UTF8(*beglist) ?
2519 sv_len_utf8(*beglist) : sv_len(*beglist);
2522 if (lookahead.code == 'Z') count++;
2525 if (lookahead.howlen == e_number && lookahead.length < items)
2526 count = lookahead.length;
2529 lookahead.howlen = e_number;
2530 lookahead.length = count;
2531 lengthcode = sv_2mortal(newSViv(count));
2534 /* Code inside the switch must take care to properly update
2535 cat (CUR length and '\0' termination) if it updated *cur and
2536 doesn't simply leave using break */
2537 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2539 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2540 (int) TYPE_NO_MODIFIERS(datumtype));
2542 Perl_croak(aTHX_ "'%%' may not be used in pack");
2545 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2546 case '.' | TYPE_IS_SHRIEKING:
2549 if (howlen == e_star) from = start;
2550 else if (len == 0) from = cur;
2552 tempsym_t *group = symptr;
2554 while (--len && group) group = group->previous;
2555 from = group ? start + group->strbeg : start;
2558 len = SvIV(fromstr);
2560 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2561 case '@' | TYPE_IS_SHRIEKING:
2564 from = start + symptr->strbeg;
2566 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2567 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2568 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2572 while (len && from < cur) {
2573 from += UTF8SKIP(from);
2577 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2579 /* Here we know from == cur */
2581 GROWING(0, cat, start, cur, len);
2582 Zero(cur, len, char);
2584 } else if (from < cur) {
2587 } else goto no_change;
2595 if (len > 0) goto grow;
2596 if (len == 0) goto no_change;
2603 tempsym_t savsym = *symptr;
2604 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2605 symptr->flags |= group_modifiers;
2606 symptr->patend = savsym.grpend;
2608 symptr->previous = &lookahead;
2611 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2612 else symptr->flags &= ~FLAG_PARSE_UTF8;
2613 was_utf8 = SvUTF8(cat);
2614 symptr->patptr = savsym.grpbeg;
2615 beglist = pack_rec(cat, symptr, beglist, endlist);
2616 if (SvUTF8(cat) != was_utf8)
2617 /* This had better be an upgrade while in utf8==0 mode */
2620 if (savsym.howlen == e_star && beglist == endlist)
2621 break; /* No way to continue */
2623 items = endlist - beglist;
2624 lookahead.flags = symptr->flags & ~group_modifiers;
2627 case 'X' | TYPE_IS_SHRIEKING:
2628 if (!len) /* Avoid division by 0 */
2635 hop += UTF8SKIP(hop);
2642 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2646 len = (cur-start) % len;
2650 if (len < 1) goto no_change;
2654 Perl_croak(aTHX_ "'%c' outside of string in pack",
2655 (int) TYPE_NO_MODIFIERS(datumtype));
2656 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2658 Perl_croak(aTHX_ "'%c' outside of string in pack",
2659 (int) TYPE_NO_MODIFIERS(datumtype));
2665 if (cur - start < len)
2666 Perl_croak(aTHX_ "'%c' outside of string in pack",
2667 (int) TYPE_NO_MODIFIERS(datumtype));
2670 if (cur < start+symptr->strbeg) {
2671 /* Make sure group starts don't point into the void */
2673 const STRLEN length = cur-start;
2674 for (group = symptr;
2675 group && length < group->strbeg;
2676 group = group->previous) group->strbeg = length;
2677 lookahead.strbeg = length;
2680 case 'x' | TYPE_IS_SHRIEKING: {
2682 if (!len) /* Avoid division by 0 */
2684 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2685 else ai32 = (cur - start) % len;
2686 if (ai32 == 0) goto no_change;
2698 aptr = SvPV_const(fromstr, fromlen);
2699 if (DO_UTF8(fromstr)) {
2700 const char *end, *s;
2702 if (!utf8 && !SvUTF8(cat)) {
2703 marked_upgrade(aTHX_ cat, symptr);
2704 lookahead.flags |= FLAG_DO_UTF8;
2705 lookahead.strbeg = symptr->strbeg;
2708 cur = start + SvCUR(cat);
2710 if (howlen == e_star) {
2711 if (utf8) goto string_copy;
2715 end = aptr + fromlen;
2716 fromlen = datumtype == 'Z' ? len-1 : len;
2717 while ((I32) fromlen > 0 && s < end) {
2722 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2725 if (datumtype == 'Z') len++;
2731 fromlen = len - fromlen;
2732 if (datumtype == 'Z') fromlen--;
2733 if (howlen == e_star) {
2735 if (datumtype == 'Z') len++;
2737 GROWING(0, cat, start, cur, len);
2738 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2739 datumtype | TYPE_IS_PACK))
2740 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2744 if (howlen == e_star) {
2746 if (datumtype == 'Z') len++;
2748 if (len <= (I32) fromlen) {
2750 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2752 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2754 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2755 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2757 while (fromlen > 0) {
2758 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2764 if (howlen == e_star) {
2766 if (datumtype == 'Z') len++;
2768 if (len <= (I32) fromlen) {
2770 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2772 GROWING(0, cat, start, cur, len);
2773 Copy(aptr, cur, fromlen, char);
2777 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2783 const char *str, *end;
2790 str = SvPV_const(fromstr, fromlen);
2791 end = str + fromlen;
2792 if (DO_UTF8(fromstr)) {
2794 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2796 utf8_source = FALSE;
2797 utf8_flags = 0; /* Unused, but keep compilers happy */
2799 if (howlen == e_star) len = fromlen;
2800 field_len = (len+7)/8;
2801 GROWING(utf8, cat, start, cur, field_len);
2802 if (len > (I32)fromlen) len = fromlen;
2805 if (datumtype == 'B')
2809 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2811 } else bits |= *str++ & 1;
2812 if (l & 7) bits <<= 1;
2814 PUSH_BYTE(utf8, cur, bits);
2819 /* datumtype == 'b' */
2823 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2824 if (val & 1) bits |= 0x80;
2825 } else if (*str++ & 1)
2827 if (l & 7) bits >>= 1;
2829 PUSH_BYTE(utf8, cur, bits);
2835 if (datumtype == 'B')
2836 bits <<= 7 - (l & 7);
2838 bits >>= 7 - (l & 7);
2839 PUSH_BYTE(utf8, cur, bits);
2842 /* Determine how many chars are left in the requested field */
2844 if (howlen == e_star) field_len = 0;
2845 else field_len -= l;
2846 Zero(cur, field_len, char);
2852 const char *str, *end;
2859 str = SvPV_const(fromstr, fromlen);
2860 end = str + fromlen;
2861 if (DO_UTF8(fromstr)) {
2863 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2865 utf8_source = FALSE;
2866 utf8_flags = 0; /* Unused, but keep compilers happy */
2868 if (howlen == e_star) len = fromlen;
2869 field_len = (len+1)/2;
2870 GROWING(utf8, cat, start, cur, field_len);
2871 if (!utf8 && len > (I32)fromlen) len = fromlen;
2874 if (datumtype == 'H')
2878 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2879 if (val < 256 && isALPHA(val))
2880 bits |= (val + 9) & 0xf;
2883 } else if (isALPHA(*str))
2884 bits |= (*str++ + 9) & 0xf;
2886 bits |= *str++ & 0xf;
2887 if (l & 1) bits <<= 4;
2889 PUSH_BYTE(utf8, cur, bits);
2897 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2898 if (val < 256 && isALPHA(val))
2899 bits |= ((val + 9) & 0xf) << 4;
2901 bits |= (val & 0xf) << 4;
2902 } else if (isALPHA(*str))
2903 bits |= ((*str++ + 9) & 0xf) << 4;
2905 bits |= (*str++ & 0xf) << 4;
2906 if (l & 1) bits >>= 4;
2908 PUSH_BYTE(utf8, cur, bits);
2914 PUSH_BYTE(utf8, cur, bits);
2917 /* Determine how many chars are left in the requested field */
2919 if (howlen == e_star) field_len = 0;
2920 else field_len -= l;
2921 Zero(cur, field_len, char);
2929 aiv = SvIV(fromstr);
2930 if ((-128 > aiv || aiv > 127) &&
2932 Perl_warner(aTHX_ packWARN(WARN_PACK),
2933 "Character in 'c' format wrapped in pack");
2934 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2939 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2945 aiv = SvIV(fromstr);
2946 if ((0 > aiv || aiv > 0xff) &&
2948 Perl_warner(aTHX_ packWARN(WARN_PACK),
2949 "Character in 'C' format wrapped in pack");
2950 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2955 U8 in_bytes = (U8)IN_BYTES;
2957 end = start+SvLEN(cat)-1;
2958 if (utf8) end -= UTF8_MAXLEN-1;
2962 auv = SvUV(fromstr);
2963 if (in_bytes) auv = auv % 0x100;
2968 SvCUR_set(cat, cur - start);
2970 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2971 end = start+SvLEN(cat)-UTF8_MAXLEN;
2973 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2976 0 : UNICODE_ALLOW_ANY);
2981 SvCUR_set(cat, cur - start);
2982 marked_upgrade(aTHX_ cat, symptr);
2983 lookahead.flags |= FLAG_DO_UTF8;
2984 lookahead.strbeg = symptr->strbeg;
2987 cur = start + SvCUR(cat);
2988 end = start+SvLEN(cat)-UTF8_MAXLEN;
2991 if (ckWARN(WARN_PACK))
2992 Perl_warner(aTHX_ packWARN(WARN_PACK),
2993 "Character in 'W' format wrapped in pack");
2998 SvCUR_set(cat, cur - start);
2999 GROWING(0, cat, start, cur, len+1);
3000 end = start+SvLEN(cat)-1;
3002 *(U8 *) cur++ = (U8)auv;
3011 if (!(symptr->flags & FLAG_DO_UTF8)) {
3012 marked_upgrade(aTHX_ cat, symptr);
3013 lookahead.flags |= FLAG_DO_UTF8;
3014 lookahead.strbeg = symptr->strbeg;
3020 end = start+SvLEN(cat);
3021 if (!utf8) end -= UTF8_MAXLEN;
3025 auv = SvUV(fromstr);
3027 U8 buffer[UTF8_MAXLEN], *endb;
3028 endb = uvuni_to_utf8_flags(buffer, auv,
3030 0 : UNICODE_ALLOW_ANY);
3031 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3033 SvCUR_set(cat, cur - start);
3034 GROWING(0, cat, start, cur,
3035 len+(endb-buffer)*UTF8_EXPAND);
3036 end = start+SvLEN(cat);
3038 cur = bytes_to_uni(buffer, endb-buffer, cur);
3042 SvCUR_set(cat, cur - start);
3043 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3044 end = start+SvLEN(cat)-UTF8_MAXLEN;
3046 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3048 0 : UNICODE_ALLOW_ANY);
3053 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3059 anv = SvNV(fromstr);
3061 /* VOS does not automatically map a floating-point overflow
3062 during conversion from double to float into infinity, so we
3063 do it by hand. This code should either be generalized for
3064 any OS that needs it, or removed if and when VOS implements
3065 posix-976 (suggestion to support mapping to infinity).
3066 Paul.Green@stratus.com 02-04-02. */
3068 afloat = _float_constants[0]; /* single prec. inf. */
3069 else if (anv < -FLT_MAX)
3070 afloat = _float_constants[0]; /* single prec. inf. */
3071 else afloat = (float) anv;
3073 # if defined(VMS) && !defined(__IEEE_FP)
3074 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3075 * on Alpha; fake it if we don't have them.
3079 else if (anv < -FLT_MAX)
3081 else afloat = (float)anv;
3083 afloat = (float)anv;
3085 #endif /* __VOS__ */
3086 DO_BO_PACK_N(afloat, float);
3087 PUSH_VAR(utf8, cur, afloat);
3095 anv = SvNV(fromstr);
3097 /* VOS does not automatically map a floating-point overflow
3098 during conversion from long double to double into infinity,
3099 so we do it by hand. This code should either be generalized
3100 for any OS that needs it, or removed if and when VOS
3101 implements posix-976 (suggestion to support mapping to
3102 infinity). Paul.Green@stratus.com 02-04-02. */
3104 adouble = _double_constants[0]; /* double prec. inf. */
3105 else if (anv < -DBL_MAX)
3106 adouble = _double_constants[0]; /* double prec. inf. */
3107 else adouble = (double) anv;
3109 # if defined(VMS) && !defined(__IEEE_FP)
3110 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3111 * on Alpha; fake it if we don't have them.
3115 else if (anv < -DBL_MAX)
3117 else adouble = (double)anv;
3119 adouble = (double)anv;
3121 #endif /* __VOS__ */
3122 DO_BO_PACK_N(adouble, double);
3123 PUSH_VAR(utf8, cur, adouble);
3128 Zero(&anv, 1, NV); /* can be long double with unused bits */
3131 anv = SvNV(fromstr);
3132 DO_BO_PACK_N(anv, NV);
3133 PUSH_VAR(utf8, cur, anv);
3137 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3139 long double aldouble;
3140 /* long doubles can have unused bits, which may be nonzero */
3141 Zero(&aldouble, 1, long double);
3144 aldouble = (long double)SvNV(fromstr);
3145 DO_BO_PACK_N(aldouble, long double);
3146 PUSH_VAR(utf8, cur, aldouble);
3151 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3152 case 'n' | TYPE_IS_SHRIEKING:
3158 ai16 = (I16)SvIV(fromstr);
3160 ai16 = PerlSock_htons(ai16);
3162 PUSH16(utf8, cur, &ai16);
3165 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3166 case 'v' | TYPE_IS_SHRIEKING:
3172 ai16 = (I16)SvIV(fromstr);
3176 PUSH16(utf8, cur, &ai16);
3179 case 'S' | TYPE_IS_SHRIEKING:
3180 #if SHORTSIZE != SIZE16
3182 unsigned short aushort;
3184 aushort = SvUV(fromstr);
3185 DO_BO_PACK(aushort, s);
3186 PUSH_VAR(utf8, cur, aushort);
3196 au16 = (U16)SvUV(fromstr);
3197 DO_BO_PACK(au16, 16);
3198 PUSH16(utf8, cur, &au16);
3201 case 's' | TYPE_IS_SHRIEKING:
3202 #if SHORTSIZE != SIZE16
3206 ashort = SvIV(fromstr);
3207 DO_BO_PACK(ashort, s);
3208 PUSH_VAR(utf8, cur, ashort);
3218 ai16 = (I16)SvIV(fromstr);
3219 DO_BO_PACK(ai16, 16);
3220 PUSH16(utf8, cur, &ai16);
3224 case 'I' | TYPE_IS_SHRIEKING:
3228 auint = SvUV(fromstr);
3229 DO_BO_PACK(auint, i);
3230 PUSH_VAR(utf8, cur, auint);
3237 aiv = SvIV(fromstr);
3238 #if IVSIZE == INTSIZE
3240 #elif IVSIZE == LONGSIZE
3242 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3243 DO_BO_PACK(aiv, 64);
3245 Perl_croak(aTHX_ "'j' not supported on this platform");
3247 PUSH_VAR(utf8, cur, aiv);
3254 auv = SvUV(fromstr);
3255 #if UVSIZE == INTSIZE
3257 #elif UVSIZE == LONGSIZE
3259 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3260 DO_BO_PACK(auv, 64);
3262 Perl_croak(aTHX_ "'J' not supported on this platform");
3264 PUSH_VAR(utf8, cur, auv);
3271 anv = SvNV(fromstr);
3275 SvCUR_set(cat, cur - start);
3276 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3279 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3280 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3281 any negative IVs will have already been got by the croak()
3282 above. IOK is untrue for fractions, so we test them
3283 against UV_MAX_P1. */
3284 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3285 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3286 char *in = buf + sizeof(buf);
3287 UV auv = SvUV(fromstr);
3290 *--in = (char)((auv & 0x7f) | 0x80);
3293 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3294 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3295 in, (buf + sizeof(buf)) - in);
3296 } else if (SvPOKp(fromstr))
3298 else if (SvNOKp(fromstr)) {
3299 /* 10**NV_MAX_10_EXP is the largest power of 10
3300 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3301 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3302 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3303 And with that many bytes only Inf can overflow.
3304 Some C compilers are strict about integral constant
3305 expressions so we conservatively divide by a slightly
3306 smaller integer instead of multiplying by the exact
3307 floating-point value.
3309 #ifdef NV_MAX_10_EXP
3310 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3311 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3313 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3314 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3316 char *in = buf + sizeof(buf);
3318 anv = Perl_floor(anv);
3320 const NV next = Perl_floor(anv / 128);
3321 if (in <= buf) /* this cannot happen ;-) */
3322 Perl_croak(aTHX_ "Cannot compress integer in pack");
3323 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3326 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3327 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3328 in, (buf + sizeof(buf)) - in);
3337 /* Copy string and check for compliance */
3338 from = SvPV_const(fromstr, len);
3339 if ((norm = is_an_int(from, len)) == NULL)
3340 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3342 Newx(result, len, char);
3345 while (!done) *--in = div128(norm, &done) | 0x80;
3346 result[len - 1] &= 0x7F; /* clear continue bit */
3347 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3348 in, (result + len) - in);
3350 SvREFCNT_dec(norm); /* free norm */
3355 case 'i' | TYPE_IS_SHRIEKING:
3359 aint = SvIV(fromstr);
3360 DO_BO_PACK(aint, i);
3361 PUSH_VAR(utf8, cur, aint);
3364 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3365 case 'N' | TYPE_IS_SHRIEKING:
3371 au32 = SvUV(fromstr);
3373 au32 = PerlSock_htonl(au32);
3375 PUSH32(utf8, cur, &au32);
3378 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3379 case 'V' | TYPE_IS_SHRIEKING:
3385 au32 = SvUV(fromstr);
3389 PUSH32(utf8, cur, &au32);
3392 case 'L' | TYPE_IS_SHRIEKING:
3393 #if LONGSIZE != SIZE32
3395 unsigned long aulong;
3397 aulong = SvUV(fromstr);
3398 DO_BO_PACK(aulong, l);
3399 PUSH_VAR(utf8, cur, aulong);
3409 au32 = SvUV(fromstr);
3410 DO_BO_PACK(au32, 32);
3411 PUSH32(utf8, cur, &au32);
3414 case 'l' | TYPE_IS_SHRIEKING:
3415 #if LONGSIZE != SIZE32
3419 along = SvIV(fromstr);
3420 DO_BO_PACK(along, l);
3421 PUSH_VAR(utf8, cur, along);
3431 ai32 = SvIV(fromstr);
3432 DO_BO_PACK(ai32, 32);
3433 PUSH32(utf8, cur, &ai32);
3441 auquad = (Uquad_t) SvUV(fromstr);
3442 DO_BO_PACK(auquad, 64);
3443 PUSH_VAR(utf8, cur, auquad);
3450 aquad = (Quad_t)SvIV(fromstr);
3451 DO_BO_PACK(aquad, 64);
3452 PUSH_VAR(utf8, cur, aquad);
3455 #endif /* HAS_QUAD */
3457 len = 1; /* assume SV is correct length */
3458 GROWING(utf8, cat, start, cur, sizeof(char *));
3465 SvGETMAGIC(fromstr);
3466 if (!SvOK(fromstr)) aptr = NULL;
3468 /* XXX better yet, could spirit away the string to
3469 * a safe spot and hang on to it until the result
3470 * of pack() (and all copies of the result) are
3473 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3474 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3475 Perl_warner(aTHX_ packWARN(WARN_PACK),
3476 "Attempt to pack pointer to temporary value");
3478 if (SvPOK(fromstr) || SvNIOK(fromstr))
3479 aptr = SvPV_nomg_const_nolen(fromstr);
3481 aptr = SvPV_force_flags_nolen(fromstr, 0);
3483 DO_BO_PACK_PC(aptr);
3484 PUSH_VAR(utf8, cur, aptr);
3488 const char *aptr, *aend;
3492 if (len <= 2) len = 45;
3493 else len = len / 3 * 3;
3495 if (ckWARN(WARN_PACK))
3496 Perl_warner(aTHX_ packWARN(WARN_PACK),
3497 "Field too wide in 'u' format in pack");
3500 aptr = SvPV_const(fromstr, fromlen);
3501 from_utf8 = DO_UTF8(fromstr);
3503 aend = aptr + fromlen;
3504 fromlen = sv_len_utf8(fromstr);
3505 } else aend = NULL; /* Unused, but keep compilers happy */
3506 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3507 while (fromlen > 0) {
3510 U8 hunk[1+63/3*4+1];
3512 if ((I32)fromlen > len)
3518 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3519 'u' | TYPE_IS_PACK)) {
3521 SvCUR_set(cat, cur - start);
3522 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3524 end = doencodes(hunk, buffer, todo);
3526 end = doencodes(hunk, aptr, todo);
3529 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3536 SvCUR_set(cat, cur - start);
3538 *symptr = lookahead;
3547 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3548 register SV *cat = TARG;
3550 SV *pat_sv = *++MARK;
3551 register const char *pat = SvPV_const(pat_sv, fromlen);
3552 register const char *patend = pat + fromlen;
3555 sv_setpvn(cat, "", 0);
3558 packlist(cat, pat, patend, MARK, SP + 1);
3568 * c-indentation-style: bsd
3570 * indent-tabs-mode: t
3573 * ex: set ts=8 sts=4 sw=4 noet: