3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
30 #define PERL_IN_PP_PACK_C
33 /* Types used by pack/unpack */
35 e_no_len, /* no length */
36 e_number, /* number, [] */
40 typedef struct tempsym {
41 const char* patptr; /* current template char */
42 const char* patend; /* one after last char */
43 const char* grpbeg; /* 1st char of ()-group */
44 const char* grpend; /* end of ()-group */
45 I32 code; /* template code (!<>) */
46 I32 length; /* length/repeat count */
47 howlen_t howlen; /* how length is given */
48 int level; /* () nesting level */
49 U32 flags; /* /=4, comma=2, pack=1 */
50 /* and group modifiers */
51 STRLEN strbeg; /* offset of group start */
52 struct tempsym *previous; /* previous group */
55 #define TEMPSYM_INIT(symptr, p, e, f) \
57 (symptr)->patptr = (p); \
58 (symptr)->patend = (e); \
59 (symptr)->grpbeg = NULL; \
60 (symptr)->grpend = NULL; \
61 (symptr)->grpend = NULL; \
63 (symptr)->length = 0; \
64 (symptr)->howlen = e_no_len; \
65 (symptr)->level = 0; \
66 (symptr)->flags = (f); \
67 (symptr)->strbeg = 0; \
68 (symptr)->previous = NULL; \
72 # define PERL_PACK_CAN_BYTEORDER
73 # define PERL_PACK_CAN_SHRIEKSIGN
79 /* Maximum number of bytes to which a byte can grow due to upgrade */
83 * Offset for integer pack/unpack.
85 * On architectures where I16 and I32 aren't really 16 and 32 bits,
86 * which for now are all Crays, pack and unpack have to play games.
90 * These values are required for portability of pack() output.
91 * If they're not right on your machine, then pack() and unpack()
92 * wouldn't work right anyway; you'll need to apply the Cray hack.
93 * (I'd like to check them with #if, but you can't use sizeof() in
94 * the preprocessor.) --???
97 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
98 defines are now in config.h. --Andy Dougherty April 1998
103 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
106 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
107 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
108 # define OFF16(p) ((char*)(p))
109 # define OFF32(p) ((char*)(p))
111 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
112 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
113 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115 ++++ bad cray byte order
119 # define OFF16(p) ((char *) (p))
120 # define OFF32(p) ((char *) (p))
123 /* Only to be used inside a loop (see the break) */
124 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 Copy(s, OFF16(p), SIZE16, char); \
133 /* Only to be used inside a loop (see the break) */
134 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 Copy(s, OFF32(p), SIZE32, char); \
143 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
144 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
150 if (!uni_to_bytes(aTHX_ &s, strend, \
151 (char *) &var, sizeof(var), datumtype)) break;\
153 Copy(s, (char *) &var, sizeof(var), char); \
158 #define PUSH_VAR(utf8, aptr, var) \
159 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
161 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
162 #define MAX_SUB_TEMPLATE_LEVEL 100
164 /* flags (note that type modifiers can also be used as flags!) */
165 #define FLAG_WAS_UTF8 0x40
166 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
167 #define FLAG_UNPACK_ONLY_ONE 0x10
168 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
169 #define FLAG_SLASH 0x04
170 #define FLAG_COMMA 0x02
171 #define FLAG_PACK 0x01
174 S_mul128(pTHX_ SV *sv, U8 m)
177 char *s = SvPV(sv, len);
180 PERL_ARGS_ASSERT_MUL128;
182 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
183 SV * const tmpNew = newSVpvs("0000000000");
185 sv_catsv(tmpNew, sv);
186 SvREFCNT_dec(sv); /* free old sv */
191 while (!*t) /* trailing '\0'? */
194 const U32 i = ((*t - '0') << 7) + m;
195 *(t--) = '0' + (char)(i % 10);
201 /* Explosives and implosives. */
203 #if 'I' == 73 && 'J' == 74
204 /* On an ASCII/ISO kind of system */
205 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
208 Some other sort of character set - use memchr() so we don't match
211 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
215 #define TYPE_IS_SHRIEKING 0x100
216 #define TYPE_IS_BIG_ENDIAN 0x200
217 #define TYPE_IS_LITTLE_ENDIAN 0x400
218 #define TYPE_IS_PACK 0x800
219 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
220 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
221 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
223 #ifdef PERL_PACK_CAN_SHRIEKSIGN
224 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
226 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
229 #ifndef PERL_PACK_CAN_BYTEORDER
230 /* Put "can't" first because it is shorter */
231 # define TYPE_ENDIANNESS(t) 0
232 # define TYPE_NO_ENDIANNESS(t) (t)
234 # define ENDIANNESS_ALLOWED_TYPES ""
236 # define DO_BO_UNPACK(var, type)
237 # define DO_BO_PACK(var, type)
238 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
240 # define DO_BO_UNPACK_N(var, type)
241 # define DO_BO_PACK_N(var, type)
242 # define DO_BO_UNPACK_P(var)
243 # define DO_BO_PACK_P(var)
244 # define DO_BO_UNPACK_PC(var)
245 # define DO_BO_PACK_PC(var)
247 #else /* PERL_PACK_CAN_BYTEORDER */
249 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
250 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
252 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
254 # define DO_BO_UNPACK(var, type) \
256 switch (TYPE_ENDIANNESS(datumtype)) { \
257 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
258 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
263 # define DO_BO_PACK(var, type) \
265 switch (TYPE_ENDIANNESS(datumtype)) { \
266 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
267 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
272 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
274 switch (TYPE_ENDIANNESS(datumtype)) { \
275 case TYPE_IS_BIG_ENDIAN: \
276 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
278 case TYPE_IS_LITTLE_ENDIAN: \
279 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
286 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
288 switch (TYPE_ENDIANNESS(datumtype)) { \
289 case TYPE_IS_BIG_ENDIAN: \
290 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
292 case TYPE_IS_LITTLE_ENDIAN: \
293 var = (post_cast *) my_htole ## type ((pre_cast) var); \
300 # define BO_CANT_DOIT(action, type) \
302 switch (TYPE_ENDIANNESS(datumtype)) { \
303 case TYPE_IS_BIG_ENDIAN: \
304 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
305 "platform", #action, #type); \
307 case TYPE_IS_LITTLE_ENDIAN: \
308 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
309 "platform", #action, #type); \
316 # if PTRSIZE == INTSIZE
317 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
318 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
319 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
320 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
321 # elif PTRSIZE == LONGSIZE
322 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
323 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
324 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
325 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
326 # elif PTRSIZE == IVSIZE
327 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
328 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
329 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
330 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
332 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
333 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
334 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
335 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
338 # if defined(my_htolen) && defined(my_letohn) && \
339 defined(my_htoben) && defined(my_betohn)
340 # define DO_BO_UNPACK_N(var, type) \
342 switch (TYPE_ENDIANNESS(datumtype)) { \
343 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
344 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
349 # define DO_BO_PACK_N(var, type) \
351 switch (TYPE_ENDIANNESS(datumtype)) { \
352 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
353 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
358 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
359 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
362 #endif /* PERL_PACK_CAN_BYTEORDER */
364 #define PACK_SIZE_CANNOT_CSUM 0x80
365 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
366 #define PACK_SIZE_MASK 0x3F
368 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
369 in). You're unlikely ever to need to regenerate them. */
371 #if TYPE_IS_SHRIEKING != 0x100
372 ++++shriek offset should be 256
375 typedef U8 packprops_t;
378 STATIC const packprops_t packprops[512] = {
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 /* C */ sizeof(unsigned char),
386 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
387 /* D */ LONG_DOUBLESIZE,
394 /* I */ sizeof(unsigned int),
401 #if defined(HAS_QUAD)
402 /* Q */ sizeof(Uquad_t),
409 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
411 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
412 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
413 /* c */ sizeof(char),
414 /* d */ sizeof(double),
416 /* f */ sizeof(float),
425 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
426 #if defined(HAS_QUAD)
427 /* q */ sizeof(Quad_t),
435 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 /* I */ sizeof(unsigned int),
453 /* L */ sizeof(unsigned long),
455 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
461 /* S */ sizeof(unsigned short),
463 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
468 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 /* l */ sizeof(long),
474 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
480 /* s */ sizeof(short),
482 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0
498 /* EBCDIC (or bust) */
499 STATIC const packprops_t packprops[512] = {
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 /* c */ sizeof(char),
511 /* d */ sizeof(double),
513 /* f */ sizeof(float),
523 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
524 #if defined(HAS_QUAD)
525 /* q */ sizeof(Quad_t),
529 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
536 /* C */ sizeof(unsigned char),
537 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
538 /* D */ LONG_DOUBLESIZE,
545 /* I */ sizeof(unsigned int),
553 #if defined(HAS_QUAD)
554 /* Q */ sizeof(Uquad_t),
558 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
563 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 /* l */ sizeof(long),
580 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
586 /* s */ sizeof(short),
588 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* I */ sizeof(unsigned int),
597 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 /* L */ sizeof(unsigned long),
600 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
605 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
606 /* S */ sizeof(unsigned short),
608 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
614 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
619 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
622 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
623 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
624 /* We try to process malformed UTF-8 as much as possible (preferrably with
625 warnings), but these two mean we make no progress in the string and
626 might enter an infinite loop */
627 if (retlen == (STRLEN) -1 || retlen == 0)
628 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
629 (int) TYPE_NO_MODIFIERS(datumtype));
631 if (ckWARN(WARN_UNPACK))
632 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
633 "Character in '%c' format wrapped in unpack",
634 (int) TYPE_NO_MODIFIERS(datumtype));
641 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
642 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
646 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
650 const char *from = *s;
652 const U32 flags = ckWARN(WARN_UTF8) ?
653 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
654 for (;buf_len > 0; buf_len--) {
655 if (from >= end) return FALSE;
656 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
657 if (retlen == (STRLEN) -1 || retlen == 0) {
658 from += UTF8SKIP(from);
660 } else from += retlen;
665 *(U8 *)buf++ = (U8)val;
667 /* We have enough characters for the buffer. Did we have problems ? */
670 /* Rewalk the string fragment while warning */
672 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
673 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
674 if (ptr >= end) break;
675 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
677 if (from > end) from = end;
679 if ((bad & 2) && ckWARN(WARN_UNPACK))
680 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
681 WARN_PACK : WARN_UNPACK),
682 "Character(s) in '%c' format wrapped in %s",
683 (int) TYPE_NO_MODIFIERS(datumtype),
684 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
691 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
695 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
696 if (val >= 0x100 || !ISUUCHAR(val) ||
697 retlen == (STRLEN) -1 || retlen == 0) {
701 *out = PL_uudmap[val] & 077;
707 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
708 const U8 * const end = start + len;
710 PERL_ARGS_ASSERT_BYTES_TO_UNI;
712 while (start < end) {
713 const UV uv = NATIVE_TO_ASCII(*start);
714 if (UNI_IS_INVARIANT(uv))
715 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
717 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
718 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
725 #define PUSH_BYTES(utf8, cur, buf, len) \
728 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
730 Copy(buf, cur, len, char); \
735 #define GROWING(utf8, cat, start, cur, in_len) \
737 STRLEN glen = (in_len); \
738 if (utf8) glen *= UTF8_EXPAND; \
739 if ((cur) + glen >= (start) + SvLEN(cat)) { \
740 (start) = sv_exp_grow(cat, glen); \
741 (cur) = (start) + SvCUR(cat); \
745 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
747 const STRLEN glen = (in_len); \
749 if (utf8) gl *= UTF8_EXPAND; \
750 if ((cur) + gl >= (start) + SvLEN(cat)) { \
752 SvCUR_set((cat), (cur) - (start)); \
753 (start) = sv_exp_grow(cat, gl); \
754 (cur) = (start) + SvCUR(cat); \
756 PUSH_BYTES(utf8, cur, buf, glen); \
759 #define PUSH_BYTE(utf8, s, byte) \
762 const U8 au8 = (byte); \
763 (s) = bytes_to_uni(&au8, 1, (s)); \
764 } else *(U8 *)(s)++ = (byte); \
767 /* Only to be used inside a loop (see the break) */
768 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
771 if (str >= end) break; \
772 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
773 if (retlen == (STRLEN) -1 || retlen == 0) { \
775 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
780 static const char *_action( const tempsym_t* symptr )
782 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
785 /* Returns the sizeof() struct described by pat */
787 S_measure_struct(pTHX_ tempsym_t* symptr)
791 PERL_ARGS_ASSERT_MEASURE_STRUCT;
793 while (next_symbol(symptr)) {
797 switch (symptr->howlen) {
799 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
803 /* e_no_len and e_number */
804 len = symptr->length;
808 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
811 /* endianness doesn't influence the size of a type */
812 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
814 Perl_croak(aTHX_ "Invalid type '%c' in %s",
815 (int)TYPE_NO_MODIFIERS(symptr->code),
817 #ifdef PERL_PACK_CAN_SHRIEKSIGN
818 case '.' | TYPE_IS_SHRIEKING:
819 case '@' | TYPE_IS_SHRIEKING:
824 case 'U': /* XXXX Is it correct? */
827 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
828 (int) TYPE_NO_MODIFIERS(symptr->code),
835 tempsym_t savsym = *symptr;
836 symptr->patptr = savsym.grpbeg;
837 symptr->patend = savsym.grpend;
838 /* XXXX Theoretically, we need to measure many times at
839 different positions, since the subexpression may contain
840 alignment commands, but be not of aligned length.
841 Need to detect this and croak(). */
842 size = measure_struct(symptr);
846 case 'X' | TYPE_IS_SHRIEKING:
847 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
849 if (!len) /* Avoid division by 0 */
851 len = total % len; /* Assumed: the start is aligned. */
856 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
858 case 'x' | TYPE_IS_SHRIEKING:
859 if (!len) /* Avoid division by 0 */
861 star = total % len; /* Assumed: the start is aligned. */
862 if (star) /* Other portable ways? */
886 size = sizeof(char*);
896 /* locate matching closing parenthesis or bracket
897 * returns char pointer to char after match, or NULL
900 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
902 PERL_ARGS_ASSERT_GROUP_END;
904 while (patptr < patend) {
905 const char c = *patptr++;
912 while (patptr < patend && *patptr != '\n')
916 patptr = group_end(patptr, patend, ')') + 1;
918 patptr = group_end(patptr, patend, ']') + 1;
920 Perl_croak(aTHX_ "No group ending character '%c' found in template",
926 /* Convert unsigned decimal number to binary.
927 * Expects a pointer to the first digit and address of length variable
928 * Advances char pointer to 1st non-digit char and returns number
931 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
933 I32 len = *patptr++ - '0';
935 PERL_ARGS_ASSERT_GET_NUM;
937 while (isDIGIT(*patptr)) {
938 if (len >= 0x7FFFFFFF/10)
939 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
940 len = (len * 10) + (*patptr++ - '0');
946 /* The marvellous template parsing routine: Using state stored in *symptr,
947 * locates next template code and count
950 S_next_symbol(pTHX_ tempsym_t* symptr )
952 const char* patptr = symptr->patptr;
953 const char* const patend = symptr->patend;
955 PERL_ARGS_ASSERT_NEXT_SYMBOL;
957 symptr->flags &= ~FLAG_SLASH;
959 while (patptr < patend) {
960 if (isSPACE(*patptr))
962 else if (*patptr == '#') {
964 while (patptr < patend && *patptr != '\n')
969 /* We should have found a template code */
970 I32 code = *patptr++ & 0xFF;
971 U32 inherited_modifiers = 0;
973 if (code == ','){ /* grandfather in commas but with a warning */
974 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
975 symptr->flags |= FLAG_COMMA;
976 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
977 "Invalid type ',' in %s", _action( symptr ) );
982 /* for '(', skip to ')' */
984 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
985 Perl_croak(aTHX_ "()-group starts with a count in %s",
987 symptr->grpbeg = patptr;
988 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
989 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
990 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
994 /* look for group modifiers to inherit */
995 if (TYPE_ENDIANNESS(symptr->flags)) {
996 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
997 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1000 /* look for modifiers */
1001 while (patptr < patend) {
1002 const char *allowed;
1006 modifier = TYPE_IS_SHRIEKING;
1007 allowed = SHRIEKING_ALLOWED_TYPES;
1009 #ifdef PERL_PACK_CAN_BYTEORDER
1011 modifier = TYPE_IS_BIG_ENDIAN;
1012 allowed = ENDIANNESS_ALLOWED_TYPES;
1015 modifier = TYPE_IS_LITTLE_ENDIAN;
1016 allowed = ENDIANNESS_ALLOWED_TYPES;
1018 #endif /* PERL_PACK_CAN_BYTEORDER */
1028 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1029 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1030 allowed, _action( symptr ) );
1032 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1033 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1034 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1035 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1036 TYPE_ENDIANNESS_MASK)
1037 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1038 *patptr, _action( symptr ) );
1040 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1041 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1042 "Duplicate modifier '%c' after '%c' in %s",
1043 *patptr, (int) TYPE_NO_MODIFIERS(code),
1044 _action( symptr ) );
1051 /* inherit modifiers */
1052 code |= inherited_modifiers;
1054 /* look for count and/or / */
1055 if (patptr < patend) {
1056 if (isDIGIT(*patptr)) {
1057 patptr = get_num( patptr, &symptr->length );
1058 symptr->howlen = e_number;
1060 } else if (*patptr == '*') {
1062 symptr->howlen = e_star;
1064 } else if (*patptr == '[') {
1065 const char* lenptr = ++patptr;
1066 symptr->howlen = e_number;
1067 patptr = group_end( patptr, patend, ']' ) + 1;
1068 /* what kind of [] is it? */
1069 if (isDIGIT(*lenptr)) {
1070 lenptr = get_num( lenptr, &symptr->length );
1071 if( *lenptr != ']' )
1072 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1073 _action( symptr ) );
1075 tempsym_t savsym = *symptr;
1076 symptr->patend = patptr-1;
1077 symptr->patptr = lenptr;
1078 savsym.length = measure_struct(symptr);
1082 symptr->howlen = e_no_len;
1087 while (patptr < patend) {
1088 if (isSPACE(*patptr))
1090 else if (*patptr == '#') {
1092 while (patptr < patend && *patptr != '\n')
1094 if (patptr < patend)
1097 if (*patptr == '/') {
1098 symptr->flags |= FLAG_SLASH;
1100 if (patptr < patend &&
1101 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1102 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1103 _action( symptr ) );
1109 /* at end - no count, no / */
1110 symptr->howlen = e_no_len;
1114 symptr->code = code;
1115 symptr->patptr = patptr;
1119 symptr->patptr = patptr;
1124 There is no way to cleanly handle the case where we should process the
1125 string per byte in its upgraded form while it's really in downgraded form
1126 (e.g. estimates like strend-s as an upper bound for the number of
1127 characters left wouldn't work). So if we foresee the need of this
1128 (pattern starts with U or contains U0), we want to work on the encoded
1129 version of the string. Users are advised to upgrade their pack string
1130 themselves if they need to do a lot of unpacks like this on it
1133 need_utf8(const char *pat, const char *patend)
1137 PERL_ARGS_ASSERT_NEED_UTF8;
1139 while (pat < patend) {
1140 if (pat[0] == '#') {
1142 pat = (const char *) memchr(pat, '\n', patend-pat);
1143 if (!pat) return FALSE;
1144 } else if (pat[0] == 'U') {
1145 if (first || pat[1] == '0') return TRUE;
1146 } else first = FALSE;
1153 first_symbol(const char *pat, const char *patend) {
1154 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1156 while (pat < patend) {
1157 if (pat[0] != '#') return pat[0];
1159 pat = (const char *) memchr(pat, '\n', patend-pat);
1167 =for apidoc unpackstring
1169 The engine implementing unpack() Perl function. C<unpackstring> puts the
1170 extracted list items on the stack and returns the number of elements.
1171 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1176 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1180 PERL_ARGS_ASSERT_UNPACKSTRING;
1182 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1183 else if (need_utf8(pat, patend)) {
1184 /* We probably should try to avoid this in case a scalar context call
1185 wouldn't get to the "U0" */
1186 STRLEN len = strend - s;
1187 s = (char *) bytes_to_utf8((U8 *) s, &len);
1190 flags |= FLAG_DO_UTF8;
1193 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1194 flags |= FLAG_PARSE_UTF8;
1196 TEMPSYM_INIT(&sym, pat, patend, flags);
1198 return unpack_rec(&sym, s, s, strend, NULL );
1202 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1206 const I32 start_sp_offset = SP - PL_stack_base;
1211 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1212 bool beyond = FALSE;
1213 bool explicit_length;
1214 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1215 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1217 PERL_ARGS_ASSERT_UNPACK_REC;
1219 symptr->strbeg = s - strbeg;
1221 while (next_symbol(symptr)) {
1224 I32 datumtype = symptr->code;
1225 /* do first one only unless in list context
1226 / is implemented by unpacking the count, then popping it from the
1227 stack, so must check that we're not in the middle of a / */
1228 if ( unpack_only_one
1229 && (SP - PL_stack_base == start_sp_offset + 1)
1230 && (datumtype != '/') ) /* XXX can this be omitted */
1233 switch (howlen = symptr->howlen) {
1235 len = strend - strbeg; /* long enough */
1238 /* e_no_len and e_number */
1239 len = symptr->length;
1243 explicit_length = TRUE;
1245 beyond = s >= strend;
1247 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1249 /* props nonzero means we can process this letter. */
1250 const long size = props & PACK_SIZE_MASK;
1251 const long howmany = (strend - s) / size;
1255 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1256 if (len && unpack_only_one) len = 1;
1262 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1264 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1267 if (howlen == e_no_len)
1268 len = 16; /* len is not specified */
1276 tempsym_t savsym = *symptr;
1277 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1278 symptr->flags |= group_modifiers;
1279 symptr->patend = savsym.grpend;
1280 symptr->previous = &savsym;
1283 if (len && unpack_only_one) len = 1;
1285 symptr->patptr = savsym.grpbeg;
1286 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1287 else symptr->flags &= ~FLAG_PARSE_UTF8;
1288 unpack_rec(symptr, s, strbeg, strend, &s);
1289 if (s == strend && savsym.howlen == e_star)
1290 break; /* No way to continue */
1293 savsym.flags = symptr->flags & ~group_modifiers;
1297 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1298 case '.' | TYPE_IS_SHRIEKING:
1303 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1304 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1305 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1306 const bool u8 = utf8;
1308 if (howlen == e_star) from = strbeg;
1309 else if (len <= 0) from = s;
1311 tempsym_t *group = symptr;
1313 while (--len && group) group = group->previous;
1314 from = group ? strbeg + group->strbeg : strbeg;
1317 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1318 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1322 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1323 case '@' | TYPE_IS_SHRIEKING:
1326 s = strbeg + symptr->strbeg;
1327 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1328 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1329 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1335 Perl_croak(aTHX_ "'@' outside of string in unpack");
1340 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1343 Perl_croak(aTHX_ "'@' outside of string in unpack");
1347 case 'X' | TYPE_IS_SHRIEKING:
1348 if (!len) /* Avoid division by 0 */
1351 const char *hop, *last;
1353 hop = last = strbeg;
1355 hop += UTF8SKIP(hop);
1362 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1366 len = (s - strbeg) % len;
1372 Perl_croak(aTHX_ "'X' outside of string in unpack");
1373 while (--s, UTF8_IS_CONTINUATION(*s)) {
1375 Perl_croak(aTHX_ "'X' outside of string in unpack");
1380 if (len > s - strbeg)
1381 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1385 case 'x' | TYPE_IS_SHRIEKING: {
1387 if (!len) /* Avoid division by 0 */
1389 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1390 else ai32 = (s - strbeg) % len;
1391 if (ai32 == 0) break;
1399 Perl_croak(aTHX_ "'x' outside of string in unpack");
1404 if (len > strend - s)
1405 Perl_croak(aTHX_ "'x' outside of string in unpack");
1410 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1416 /* Preliminary length estimate is assumed done in 'W' */
1417 if (len > strend - s) len = strend - s;
1423 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1424 if (hop >= strend) {
1426 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1431 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1433 } else if (len > strend - s)
1436 if (datumtype == 'Z') {
1437 /* 'Z' strips stuff after first null */
1438 const char *ptr, *end;
1440 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1441 sv = newSVpvn(s, ptr-s);
1442 if (howlen == e_star) /* exact for 'Z*' */
1443 len = ptr-s + (ptr != strend ? 1 : 0);
1444 } else if (datumtype == 'A') {
1445 /* 'A' strips both nulls and spaces */
1447 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1448 for (ptr = s+len-1; ptr >= s; ptr--)
1449 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1450 !is_utf8_space((U8 *) ptr)) break;
1451 if (ptr >= s) ptr += UTF8SKIP(ptr);
1454 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1456 for (ptr = s+len-1; ptr >= s; ptr--)
1457 if (*ptr != 0 && !isSPACE(*ptr)) break;
1460 sv = newSVpvn(s, ptr-s);
1461 } else sv = newSVpvn(s, len);
1465 /* Undo any upgrade done due to need_utf8() */
1466 if (!(symptr->flags & FLAG_WAS_UTF8))
1467 sv_utf8_downgrade(sv, 0);
1475 if (howlen == e_star || len > (strend - s) * 8)
1476 len = (strend - s) * 8;
1480 Newxz(PL_bitcount, 256, char);
1481 for (bits = 1; bits < 256; bits++) {
1482 if (bits & 1) PL_bitcount[bits]++;
1483 if (bits & 2) PL_bitcount[bits]++;
1484 if (bits & 4) PL_bitcount[bits]++;
1485 if (bits & 8) PL_bitcount[bits]++;
1486 if (bits & 16) PL_bitcount[bits]++;
1487 if (bits & 32) PL_bitcount[bits]++;
1488 if (bits & 64) PL_bitcount[bits]++;
1489 if (bits & 128) PL_bitcount[bits]++;
1493 while (len >= 8 && s < strend) {
1494 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1499 cuv += PL_bitcount[*(U8 *)s++];
1502 if (len && s < strend) {
1504 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1505 if (datumtype == 'b')
1507 if (bits & 1) cuv++;
1512 if (bits & 0x80) cuv++;
1519 sv = sv_2mortal(newSV(len ? len : 1));
1522 if (datumtype == 'b') {
1524 const I32 ai32 = len;
1525 for (len = 0; len < ai32; len++) {
1526 if (len & 7) bits >>= 1;
1528 if (s >= strend) break;
1529 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1530 } else bits = *(U8 *) s++;
1531 *str++ = bits & 1 ? '1' : '0';
1535 const I32 ai32 = len;
1536 for (len = 0; len < ai32; len++) {
1537 if (len & 7) bits <<= 1;
1539 if (s >= strend) break;
1540 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1541 } else bits = *(U8 *) s++;
1542 *str++ = bits & 0x80 ? '1' : '0';
1546 SvCUR_set(sv, str - SvPVX_const(sv));
1553 /* Preliminary length estimate, acceptable for utf8 too */
1554 if (howlen == e_star || len > (strend - s) * 2)
1555 len = (strend - s) * 2;
1556 sv = sv_2mortal(newSV(len ? len : 1));
1559 if (datumtype == 'h') {
1562 for (len = 0; len < ai32; len++) {
1563 if (len & 1) bits >>= 4;
1565 if (s >= strend) break;
1566 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1567 } else bits = * (U8 *) s++;
1568 *str++ = PL_hexdigit[bits & 15];
1572 const I32 ai32 = len;
1573 for (len = 0; len < ai32; len++) {
1574 if (len & 1) bits <<= 4;
1576 if (s >= strend) break;
1577 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1578 } else bits = *(U8 *) s++;
1579 *str++ = PL_hexdigit[(bits >> 4) & 15];
1583 SvCUR_set(sv, str - SvPVX_const(sv));
1589 if (explicit_length)
1590 /* Switch to "character" mode */
1591 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1596 while (len-- > 0 && s < strend) {
1601 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1602 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1603 if (retlen == (STRLEN) -1 || retlen == 0)
1604 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1608 aint = *(U8 *)(s)++;
1609 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1613 else if (checksum > bits_in_uv)
1614 cdouble += (NV)aint;
1622 while (len-- > 0 && s < strend) {
1624 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1626 if (retlen == (STRLEN) -1 || retlen == 0)
1627 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1631 else if (checksum > bits_in_uv)
1632 cdouble += (NV) val;
1636 } else if (!checksum)
1638 const U8 ch = *(U8 *) s++;
1641 else if (checksum > bits_in_uv)
1642 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1644 while (len-- > 0) cuv += *(U8 *) s++;
1648 if (explicit_length) {
1649 /* Switch to "bytes in UTF-8" mode */
1650 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1652 /* Should be impossible due to the need_utf8() test */
1653 Perl_croak(aTHX_ "U0 mode on a byte string");
1657 if (len > strend - s) len = strend - s;
1659 if (len && unpack_only_one) len = 1;
1663 while (len-- > 0 && s < strend) {
1667 U8 result[UTF8_MAXLEN];
1668 const char *ptr = s;
1670 /* Bug: warns about bad utf8 even if we are short on bytes
1671 and will break out of the loop */
1672 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1675 len = UTF8SKIP(result);
1676 if (!uni_to_bytes(aTHX_ &ptr, strend,
1677 (char *) &result[1], len-1, 'U')) break;
1678 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1681 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1682 if (retlen == (STRLEN) -1 || retlen == 0)
1683 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1688 else if (checksum > bits_in_uv)
1689 cdouble += (NV) auv;
1694 case 's' | TYPE_IS_SHRIEKING:
1695 #if SHORTSIZE != SIZE16
1698 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1699 DO_BO_UNPACK(ashort, s);
1702 else if (checksum > bits_in_uv)
1703 cdouble += (NV)ashort;
1715 #if U16SIZE > SIZE16
1718 SHIFT16(utf8, s, strend, &ai16, datumtype);
1719 DO_BO_UNPACK(ai16, 16);
1720 #if U16SIZE > SIZE16
1726 else if (checksum > bits_in_uv)
1727 cdouble += (NV)ai16;
1732 case 'S' | TYPE_IS_SHRIEKING:
1733 #if SHORTSIZE != SIZE16
1735 unsigned short aushort;
1736 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1737 DO_BO_UNPACK(aushort, s);
1740 else if (checksum > bits_in_uv)
1741 cdouble += (NV)aushort;
1754 #if U16SIZE > SIZE16
1757 SHIFT16(utf8, s, strend, &au16, datumtype);
1758 DO_BO_UNPACK(au16, 16);
1760 if (datumtype == 'n')
1761 au16 = PerlSock_ntohs(au16);
1764 if (datumtype == 'v')
1769 else if (checksum > bits_in_uv)
1770 cdouble += (NV) au16;
1775 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1776 case 'v' | TYPE_IS_SHRIEKING:
1777 case 'n' | TYPE_IS_SHRIEKING:
1780 # if U16SIZE > SIZE16
1783 SHIFT16(utf8, s, strend, &ai16, datumtype);
1785 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1786 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1787 # endif /* HAS_NTOHS */
1789 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1790 ai16 = (I16) vtohs((U16) ai16);
1791 # endif /* HAS_VTOHS */
1794 else if (checksum > bits_in_uv)
1795 cdouble += (NV) ai16;
1800 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1802 case 'i' | TYPE_IS_SHRIEKING:
1805 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1806 DO_BO_UNPACK(aint, i);
1809 else if (checksum > bits_in_uv)
1810 cdouble += (NV)aint;
1816 case 'I' | TYPE_IS_SHRIEKING:
1819 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1820 DO_BO_UNPACK(auint, i);
1823 else if (checksum > bits_in_uv)
1824 cdouble += (NV)auint;
1832 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1833 #if IVSIZE == INTSIZE
1834 DO_BO_UNPACK(aiv, i);
1835 #elif IVSIZE == LONGSIZE
1836 DO_BO_UNPACK(aiv, l);
1837 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1838 DO_BO_UNPACK(aiv, 64);
1840 Perl_croak(aTHX_ "'j' not supported on this platform");
1844 else if (checksum > bits_in_uv)
1853 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1854 #if IVSIZE == INTSIZE
1855 DO_BO_UNPACK(auv, i);
1856 #elif IVSIZE == LONGSIZE
1857 DO_BO_UNPACK(auv, l);
1858 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1859 DO_BO_UNPACK(auv, 64);
1861 Perl_croak(aTHX_ "'J' not supported on this platform");
1865 else if (checksum > bits_in_uv)
1871 case 'l' | TYPE_IS_SHRIEKING:
1872 #if LONGSIZE != SIZE32
1875 SHIFT_VAR(utf8, s, strend, along, datumtype);
1876 DO_BO_UNPACK(along, l);
1879 else if (checksum > bits_in_uv)
1880 cdouble += (NV)along;
1891 #if U32SIZE > SIZE32
1894 SHIFT32(utf8, s, strend, &ai32, datumtype);
1895 DO_BO_UNPACK(ai32, 32);
1896 #if U32SIZE > SIZE32
1897 if (ai32 > 2147483647) ai32 -= 4294967296;
1901 else if (checksum > bits_in_uv)
1902 cdouble += (NV)ai32;
1907 case 'L' | TYPE_IS_SHRIEKING:
1908 #if LONGSIZE != SIZE32
1910 unsigned long aulong;
1911 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1912 DO_BO_UNPACK(aulong, l);
1915 else if (checksum > bits_in_uv)
1916 cdouble += (NV)aulong;
1929 #if U32SIZE > SIZE32
1932 SHIFT32(utf8, s, strend, &au32, datumtype);
1933 DO_BO_UNPACK(au32, 32);
1935 if (datumtype == 'N')
1936 au32 = PerlSock_ntohl(au32);
1939 if (datumtype == 'V')
1944 else if (checksum > bits_in_uv)
1945 cdouble += (NV)au32;
1950 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1951 case 'V' | TYPE_IS_SHRIEKING:
1952 case 'N' | TYPE_IS_SHRIEKING:
1955 # if U32SIZE > SIZE32
1958 SHIFT32(utf8, s, strend, &ai32, datumtype);
1960 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1961 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1964 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1965 ai32 = (I32)vtohl((U32)ai32);
1969 else if (checksum > bits_in_uv)
1970 cdouble += (NV)ai32;
1975 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1979 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1980 DO_BO_UNPACK_PC(aptr);
1981 /* newSVpv generates undef if aptr is NULL */
1982 mPUSHs(newSVpv(aptr, 0));
1990 while (len > 0 && s < strend) {
1992 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1993 auv = (auv << 7) | (ch & 0x7f);
1994 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2002 if (++bytes >= sizeof(UV)) { /* promote to string */
2005 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2006 while (s < strend) {
2007 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2008 sv = mul128(sv, (U8)(ch & 0x7f));
2014 t = SvPV_nolen_const(sv);
2023 if ((s >= strend) && bytes)
2024 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2028 if (symptr->howlen == e_star)
2029 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2031 if (s + sizeof(char*) <= strend) {
2033 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2034 DO_BO_UNPACK_PC(aptr);
2035 /* newSVpvn generates undef if aptr is NULL */
2036 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2043 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2044 DO_BO_UNPACK(aquad, 64);
2046 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2047 newSViv((IV)aquad) : newSVnv((NV)aquad));
2048 else if (checksum > bits_in_uv)
2049 cdouble += (NV)aquad;
2057 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2058 DO_BO_UNPACK(auquad, 64);
2060 mPUSHs(auquad <= UV_MAX ?
2061 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2062 else if (checksum > bits_in_uv)
2063 cdouble += (NV)auquad;
2068 #endif /* HAS_QUAD */
2069 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2073 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2074 DO_BO_UNPACK_N(afloat, float);
2084 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2085 DO_BO_UNPACK_N(adouble, double);
2095 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2096 DO_BO_UNPACK_N(anv, NV);
2103 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2106 long double aldouble;
2107 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2108 DO_BO_UNPACK_N(aldouble, long double);
2112 cdouble += aldouble;
2118 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2119 sv = sv_2mortal(newSV(l));
2120 if (l) SvPOK_on(sv);
2123 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2128 next_uni_uu(aTHX_ &s, strend, &a);
2129 next_uni_uu(aTHX_ &s, strend, &b);
2130 next_uni_uu(aTHX_ &s, strend, &c);
2131 next_uni_uu(aTHX_ &s, strend, &d);
2132 hunk[0] = (char)((a << 2) | (b >> 4));
2133 hunk[1] = (char)((b << 4) | (c >> 2));
2134 hunk[2] = (char)((c << 6) | d);
2135 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2143 /* possible checksum byte */
2144 const char *skip = s+UTF8SKIP(s);
2145 if (skip < strend && *skip == '\n')
2151 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2155 len = PL_uudmap[*(U8*)s++] & 077;
2157 if (s < strend && ISUUCHAR(*s))
2158 a = PL_uudmap[*(U8*)s++] & 077;
2161 if (s < strend && ISUUCHAR(*s))
2162 b = PL_uudmap[*(U8*)s++] & 077;
2165 if (s < strend && ISUUCHAR(*s))
2166 c = PL_uudmap[*(U8*)s++] & 077;
2169 if (s < strend && ISUUCHAR(*s))
2170 d = PL_uudmap[*(U8*)s++] & 077;
2173 hunk[0] = (char)((a << 2) | (b >> 4));
2174 hunk[1] = (char)((b << 4) | (c >> 2));
2175 hunk[2] = (char)((c << 6) | d);
2176 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2181 else /* possible checksum byte */
2182 if (s + 1 < strend && s[1] == '\n')
2191 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2192 (checksum > bits_in_uv &&
2193 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2196 anv = (NV) (1 << (checksum & 15));
2197 while (checksum >= 16) {
2201 while (cdouble < 0.0)
2203 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2204 sv = newSVnv(cdouble);
2207 if (checksum < bits_in_uv) {
2208 UV mask = ((UV)1 << checksum) - 1;
2217 if (symptr->flags & FLAG_SLASH){
2218 if (SP - PL_stack_base - start_sp_offset <= 0)
2219 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2220 if( next_symbol(symptr) ){
2221 if( symptr->howlen == e_number )
2222 Perl_croak(aTHX_ "Count after length/code in unpack" );
2224 /* ...end of char buffer then no decent length available */
2225 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2227 /* take top of stack (hope it's numeric) */
2230 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2233 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2235 datumtype = symptr->code;
2236 explicit_length = FALSE;
2244 return SP - PL_stack_base - start_sp_offset;
2252 I32 gimme = GIMME_V;
2255 const char *pat = SvPV_const(left, llen);
2256 const char *s = SvPV_const(right, rlen);
2257 const char *strend = s + rlen;
2258 const char *patend = pat + llen;
2262 cnt = unpackstring(pat, patend, s, strend,
2263 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2264 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2267 if ( !cnt && gimme == G_SCALAR )
2268 PUSHs(&PL_sv_undef);
2273 doencodes(U8 *h, const char *s, I32 len)
2275 *h++ = PL_uuemap[len];
2277 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2278 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2279 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2280 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2285 const char r = (len > 1 ? s[1] : '\0');
2286 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2287 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2288 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2289 *h++ = PL_uuemap[0];
2296 S_is_an_int(pTHX_ const char *s, STRLEN l)
2298 SV *result = newSVpvn(s, l);
2299 char *const result_c = SvPV_nolen(result); /* convenience */
2300 char *out = result_c;
2304 PERL_ARGS_ASSERT_IS_AN_INT;
2312 SvREFCNT_dec(result);
2335 SvREFCNT_dec(result);
2341 SvCUR_set(result, out - result_c);
2345 /* pnum must be '\0' terminated */
2347 S_div128(pTHX_ SV *pnum, bool *done)
2350 char * const s = SvPV(pnum, len);
2354 PERL_ARGS_ASSERT_DIV128;
2358 const int i = m * 10 + (*t - '0');
2359 const int r = (i >> 7); /* r < 10 */
2367 SvCUR_set(pnum, (STRLEN) (t - s));
2372 =for apidoc packlist
2374 The engine implementing pack() Perl function.
2380 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2385 PERL_ARGS_ASSERT_PACKLIST;
2387 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2389 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2390 Also make sure any UTF8 flag is loaded */
2391 SvPV_force_nolen(cat);
2393 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2395 (void)pack_rec( cat, &sym, beglist, endlist );
2398 /* like sv_utf8_upgrade, but also repoint the group start markers */
2400 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2403 const char *from_ptr, *from_start, *from_end, **marks, **m;
2404 char *to_start, *to_ptr;
2406 if (SvUTF8(sv)) return;
2408 from_start = SvPVX_const(sv);
2409 from_end = from_start + SvCUR(sv);
2410 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2411 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2412 if (from_ptr == from_end) {
2413 /* Simple case: no character needs to be changed */
2418 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2419 Newx(to_start, len, char);
2420 Copy(from_start, to_start, from_ptr-from_start, char);
2421 to_ptr = to_start + (from_ptr-from_start);
2423 Newx(marks, sym_ptr->level+2, const char *);
2424 for (group=sym_ptr; group; group = group->previous)
2425 marks[group->level] = from_start + group->strbeg;
2426 marks[sym_ptr->level+1] = from_end+1;
2427 for (m = marks; *m < from_ptr; m++)
2428 *m = to_start + (*m-from_start);
2430 for (;from_ptr < from_end; from_ptr++) {
2431 while (*m == from_ptr) *m++ = to_ptr;
2432 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2436 while (*m == from_ptr) *m++ = to_ptr;
2437 if (m != marks + sym_ptr->level+1) {
2440 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2442 for (group=sym_ptr; group; group = group->previous)
2443 group->strbeg = marks[group->level] - to_start;
2448 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2449 from_start -= SvIVX(sv);
2452 SvFLAGS(sv) &= ~SVf_OOK;
2455 Safefree(from_start);
2456 SvPV_set(sv, to_start);
2457 SvCUR_set(sv, to_ptr - to_start);
2462 /* Exponential string grower. Makes string extension effectively O(n)
2463 needed says how many extra bytes we need (not counting the final '\0')
2464 Only grows the string if there is an actual lack of space
2467 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2468 const STRLEN cur = SvCUR(sv);
2469 const STRLEN len = SvLEN(sv);
2472 PERL_ARGS_ASSERT_SV_EXP_GROW;
2474 if (len - cur > needed) return SvPVX(sv);
2475 extend = needed > len ? needed : len;
2476 return SvGROW(sv, len+extend+1);
2481 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2484 tempsym_t lookahead;
2485 I32 items = endlist - beglist;
2486 bool found = next_symbol(symptr);
2487 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2488 bool warn_utf8 = ckWARN(WARN_UTF8);
2490 PERL_ARGS_ASSERT_PACK_REC;
2492 if (symptr->level == 0 && found && symptr->code == 'U') {
2493 marked_upgrade(aTHX_ cat, symptr);
2494 symptr->flags |= FLAG_DO_UTF8;
2497 symptr->strbeg = SvCUR(cat);
2503 SV *lengthcode = NULL;
2504 I32 datumtype = symptr->code;
2505 howlen_t howlen = symptr->howlen;
2506 char *start = SvPVX(cat);
2507 char *cur = start + SvCUR(cat);
2509 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2513 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2517 /* e_no_len and e_number */
2518 len = symptr->length;
2523 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2525 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2526 /* We can process this letter. */
2527 STRLEN size = props & PACK_SIZE_MASK;
2528 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2532 /* Look ahead for next symbol. Do we have code/code? */
2533 lookahead = *symptr;
2534 found = next_symbol(&lookahead);
2535 if (symptr->flags & FLAG_SLASH) {
2537 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2538 if (strchr("aAZ", lookahead.code)) {
2539 if (lookahead.howlen == e_number) count = lookahead.length;
2542 if (SvGAMAGIC(*beglist)) {
2543 /* Avoid reading the active data more than once
2544 by copying it to a temporary. */
2546 const char *const pv = SvPV_const(*beglist, len);
2548 = newSVpvn_flags(pv, len,
2549 SVs_TEMP | SvUTF8(*beglist));
2552 count = DO_UTF8(*beglist) ?
2553 sv_len_utf8(*beglist) : sv_len(*beglist);
2556 if (lookahead.code == 'Z') count++;
2559 if (lookahead.howlen == e_number && lookahead.length < items)
2560 count = lookahead.length;
2563 lookahead.howlen = e_number;
2564 lookahead.length = count;
2565 lengthcode = sv_2mortal(newSViv(count));
2568 /* Code inside the switch must take care to properly update
2569 cat (CUR length and '\0' termination) if it updated *cur and
2570 doesn't simply leave using break */
2571 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2573 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2574 (int) TYPE_NO_MODIFIERS(datumtype));
2576 Perl_croak(aTHX_ "'%%' may not be used in pack");
2579 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2580 case '.' | TYPE_IS_SHRIEKING:
2583 if (howlen == e_star) from = start;
2584 else if (len == 0) from = cur;
2586 tempsym_t *group = symptr;
2588 while (--len && group) group = group->previous;
2589 from = group ? start + group->strbeg : start;
2592 len = SvIV(fromstr);
2594 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2595 case '@' | TYPE_IS_SHRIEKING:
2598 from = start + symptr->strbeg;
2600 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2601 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2602 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2606 while (len && from < cur) {
2607 from += UTF8SKIP(from);
2611 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2613 /* Here we know from == cur */
2615 GROWING(0, cat, start, cur, len);
2616 Zero(cur, len, char);
2618 } else if (from < cur) {
2621 } else goto no_change;
2629 if (len > 0) goto grow;
2630 if (len == 0) goto no_change;
2637 tempsym_t savsym = *symptr;
2638 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2639 symptr->flags |= group_modifiers;
2640 symptr->patend = savsym.grpend;
2642 symptr->previous = &lookahead;
2645 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2646 else symptr->flags &= ~FLAG_PARSE_UTF8;
2647 was_utf8 = SvUTF8(cat);
2648 symptr->patptr = savsym.grpbeg;
2649 beglist = pack_rec(cat, symptr, beglist, endlist);
2650 if (SvUTF8(cat) != was_utf8)
2651 /* This had better be an upgrade while in utf8==0 mode */
2654 if (savsym.howlen == e_star && beglist == endlist)
2655 break; /* No way to continue */
2657 items = endlist - beglist;
2658 lookahead.flags = symptr->flags & ~group_modifiers;
2661 case 'X' | TYPE_IS_SHRIEKING:
2662 if (!len) /* Avoid division by 0 */
2669 hop += UTF8SKIP(hop);
2676 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2680 len = (cur-start) % len;
2684 if (len < 1) goto no_change;
2688 Perl_croak(aTHX_ "'%c' outside of string in pack",
2689 (int) TYPE_NO_MODIFIERS(datumtype));
2690 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2692 Perl_croak(aTHX_ "'%c' outside of string in pack",
2693 (int) TYPE_NO_MODIFIERS(datumtype));
2699 if (cur - start < len)
2700 Perl_croak(aTHX_ "'%c' outside of string in pack",
2701 (int) TYPE_NO_MODIFIERS(datumtype));
2704 if (cur < start+symptr->strbeg) {
2705 /* Make sure group starts don't point into the void */
2707 const STRLEN length = cur-start;
2708 for (group = symptr;
2709 group && length < group->strbeg;
2710 group = group->previous) group->strbeg = length;
2711 lookahead.strbeg = length;
2714 case 'x' | TYPE_IS_SHRIEKING: {
2716 if (!len) /* Avoid division by 0 */
2718 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2719 else ai32 = (cur - start) % len;
2720 if (ai32 == 0) goto no_change;
2732 aptr = SvPV_const(fromstr, fromlen);
2733 if (DO_UTF8(fromstr)) {
2734 const char *end, *s;
2736 if (!utf8 && !SvUTF8(cat)) {
2737 marked_upgrade(aTHX_ cat, symptr);
2738 lookahead.flags |= FLAG_DO_UTF8;
2739 lookahead.strbeg = symptr->strbeg;
2742 cur = start + SvCUR(cat);
2744 if (howlen == e_star) {
2745 if (utf8) goto string_copy;
2749 end = aptr + fromlen;
2750 fromlen = datumtype == 'Z' ? len-1 : len;
2751 while ((I32) fromlen > 0 && s < end) {
2756 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2759 if (datumtype == 'Z') len++;
2765 fromlen = len - fromlen;
2766 if (datumtype == 'Z') fromlen--;
2767 if (howlen == e_star) {
2769 if (datumtype == 'Z') len++;
2771 GROWING(0, cat, start, cur, len);
2772 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2773 datumtype | TYPE_IS_PACK))
2774 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2778 if (howlen == e_star) {
2780 if (datumtype == 'Z') len++;
2782 if (len <= (I32) fromlen) {
2784 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2786 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2788 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2789 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2791 while (fromlen > 0) {
2792 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2798 if (howlen == e_star) {
2800 if (datumtype == 'Z') len++;
2802 if (len <= (I32) fromlen) {
2804 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2806 GROWING(0, cat, start, cur, len);
2807 Copy(aptr, cur, fromlen, char);
2811 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2817 const char *str, *end;
2824 str = SvPV_const(fromstr, fromlen);
2825 end = str + fromlen;
2826 if (DO_UTF8(fromstr)) {
2828 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2830 utf8_source = FALSE;
2831 utf8_flags = 0; /* Unused, but keep compilers happy */
2833 if (howlen == e_star) len = fromlen;
2834 field_len = (len+7)/8;
2835 GROWING(utf8, cat, start, cur, field_len);
2836 if (len > (I32)fromlen) len = fromlen;
2839 if (datumtype == 'B')
2843 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2845 } else bits |= *str++ & 1;
2846 if (l & 7) bits <<= 1;
2848 PUSH_BYTE(utf8, cur, bits);
2853 /* datumtype == 'b' */
2857 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2858 if (val & 1) bits |= 0x80;
2859 } else if (*str++ & 1)
2861 if (l & 7) bits >>= 1;
2863 PUSH_BYTE(utf8, cur, bits);
2869 if (datumtype == 'B')
2870 bits <<= 7 - (l & 7);
2872 bits >>= 7 - (l & 7);
2873 PUSH_BYTE(utf8, cur, bits);
2876 /* Determine how many chars are left in the requested field */
2878 if (howlen == e_star) field_len = 0;
2879 else field_len -= l;
2880 Zero(cur, field_len, char);
2886 const char *str, *end;
2893 str = SvPV_const(fromstr, fromlen);
2894 end = str + fromlen;
2895 if (DO_UTF8(fromstr)) {
2897 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2899 utf8_source = FALSE;
2900 utf8_flags = 0; /* Unused, but keep compilers happy */
2902 if (howlen == e_star) len = fromlen;
2903 field_len = (len+1)/2;
2904 GROWING(utf8, cat, start, cur, field_len);
2905 if (!utf8 && len > (I32)fromlen) len = fromlen;
2908 if (datumtype == 'H')
2912 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2913 if (val < 256 && isALPHA(val))
2914 bits |= (val + 9) & 0xf;
2917 } else if (isALPHA(*str))
2918 bits |= (*str++ + 9) & 0xf;
2920 bits |= *str++ & 0xf;
2921 if (l & 1) bits <<= 4;
2923 PUSH_BYTE(utf8, cur, bits);
2931 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2932 if (val < 256 && isALPHA(val))
2933 bits |= ((val + 9) & 0xf) << 4;
2935 bits |= (val & 0xf) << 4;
2936 } else if (isALPHA(*str))
2937 bits |= ((*str++ + 9) & 0xf) << 4;
2939 bits |= (*str++ & 0xf) << 4;
2940 if (l & 1) bits >>= 4;
2942 PUSH_BYTE(utf8, cur, bits);
2948 PUSH_BYTE(utf8, cur, bits);
2951 /* Determine how many chars are left in the requested field */
2953 if (howlen == e_star) field_len = 0;
2954 else field_len -= l;
2955 Zero(cur, field_len, char);
2963 aiv = SvIV(fromstr);
2964 if ((-128 > aiv || aiv > 127) &&
2966 Perl_warner(aTHX_ packWARN(WARN_PACK),
2967 "Character in 'c' format wrapped in pack");
2968 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2973 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2979 aiv = SvIV(fromstr);
2980 if ((0 > aiv || aiv > 0xff) &&
2982 Perl_warner(aTHX_ packWARN(WARN_PACK),
2983 "Character in 'C' format wrapped in pack");
2984 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2989 U8 in_bytes = (U8)IN_BYTES;
2991 end = start+SvLEN(cat)-1;
2992 if (utf8) end -= UTF8_MAXLEN-1;
2996 auv = SvUV(fromstr);
2997 if (in_bytes) auv = auv % 0x100;
3002 SvCUR_set(cat, cur - start);
3004 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3005 end = start+SvLEN(cat)-UTF8_MAXLEN;
3007 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3010 0 : UNICODE_ALLOW_ANY);
3015 SvCUR_set(cat, cur - start);
3016 marked_upgrade(aTHX_ cat, symptr);
3017 lookahead.flags |= FLAG_DO_UTF8;
3018 lookahead.strbeg = symptr->strbeg;
3021 cur = start + SvCUR(cat);
3022 end = start+SvLEN(cat)-UTF8_MAXLEN;
3025 if (ckWARN(WARN_PACK))
3026 Perl_warner(aTHX_ packWARN(WARN_PACK),
3027 "Character in 'W' format wrapped in pack");
3032 SvCUR_set(cat, cur - start);
3033 GROWING(0, cat, start, cur, len+1);
3034 end = start+SvLEN(cat)-1;
3036 *(U8 *) cur++ = (U8)auv;
3045 if (!(symptr->flags & FLAG_DO_UTF8)) {
3046 marked_upgrade(aTHX_ cat, symptr);
3047 lookahead.flags |= FLAG_DO_UTF8;
3048 lookahead.strbeg = symptr->strbeg;
3054 end = start+SvLEN(cat);
3055 if (!utf8) end -= UTF8_MAXLEN;
3059 auv = SvUV(fromstr);
3061 U8 buffer[UTF8_MAXLEN], *endb;
3062 endb = uvuni_to_utf8_flags(buffer, auv,
3064 0 : UNICODE_ALLOW_ANY);
3065 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3067 SvCUR_set(cat, cur - start);
3068 GROWING(0, cat, start, cur,
3069 len+(endb-buffer)*UTF8_EXPAND);
3070 end = start+SvLEN(cat);
3072 cur = bytes_to_uni(buffer, endb-buffer, cur);
3076 SvCUR_set(cat, cur - start);
3077 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3078 end = start+SvLEN(cat)-UTF8_MAXLEN;
3080 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3082 0 : UNICODE_ALLOW_ANY);
3087 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3093 anv = SvNV(fromstr);
3095 /* VOS does not automatically map a floating-point overflow
3096 during conversion from double to float into infinity, so we
3097 do it by hand. This code should either be generalized for
3098 any OS that needs it, or removed if and when VOS implements
3099 posix-976 (suggestion to support mapping to infinity).
3100 Paul.Green@stratus.com 02-04-02. */
3102 extern const float _float_constants[];
3104 afloat = _float_constants[0]; /* single prec. inf. */
3105 else if (anv < -FLT_MAX)
3106 afloat = _float_constants[0]; /* single prec. inf. */
3107 else afloat = (float) anv;
3110 # if defined(VMS) && !defined(__IEEE_FP)
3111 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3112 * on Alpha; fake it if we don't have them.
3116 else if (anv < -FLT_MAX)
3118 else afloat = (float)anv;
3120 afloat = (float)anv;
3122 #endif /* __VOS__ */
3123 DO_BO_PACK_N(afloat, float);
3124 PUSH_VAR(utf8, cur, afloat);
3132 anv = SvNV(fromstr);
3134 /* VOS does not automatically map a floating-point overflow
3135 during conversion from long double to double into infinity,
3136 so we do it by hand. This code should either be generalized
3137 for any OS that needs it, or removed if and when VOS
3138 implements posix-976 (suggestion to support mapping to
3139 infinity). Paul.Green@stratus.com 02-04-02. */
3141 extern const double _double_constants[];
3143 adouble = _double_constants[0]; /* double prec. inf. */
3144 else if (anv < -DBL_MAX)
3145 adouble = _double_constants[0]; /* double prec. inf. */
3146 else adouble = (double) anv;
3149 # if defined(VMS) && !defined(__IEEE_FP)
3150 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3151 * on Alpha; fake it if we don't have them.
3155 else if (anv < -DBL_MAX)
3157 else adouble = (double)anv;
3159 adouble = (double)anv;
3161 #endif /* __VOS__ */
3162 DO_BO_PACK_N(adouble, double);
3163 PUSH_VAR(utf8, cur, adouble);
3168 Zero(&anv, 1, NV); /* can be long double with unused bits */
3171 anv = SvNV(fromstr);
3172 DO_BO_PACK_N(anv, NV);
3173 PUSH_VAR(utf8, cur, anv);
3177 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3179 long double aldouble;
3180 /* long doubles can have unused bits, which may be nonzero */
3181 Zero(&aldouble, 1, long double);
3184 aldouble = (long double)SvNV(fromstr);
3185 DO_BO_PACK_N(aldouble, long double);
3186 PUSH_VAR(utf8, cur, aldouble);
3191 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3192 case 'n' | TYPE_IS_SHRIEKING:
3198 ai16 = (I16)SvIV(fromstr);
3200 ai16 = PerlSock_htons(ai16);
3202 PUSH16(utf8, cur, &ai16);
3205 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3206 case 'v' | TYPE_IS_SHRIEKING:
3212 ai16 = (I16)SvIV(fromstr);
3216 PUSH16(utf8, cur, &ai16);
3219 case 'S' | TYPE_IS_SHRIEKING:
3220 #if SHORTSIZE != SIZE16
3222 unsigned short aushort;
3224 aushort = SvUV(fromstr);
3225 DO_BO_PACK(aushort, s);
3226 PUSH_VAR(utf8, cur, aushort);
3236 au16 = (U16)SvUV(fromstr);
3237 DO_BO_PACK(au16, 16);
3238 PUSH16(utf8, cur, &au16);
3241 case 's' | TYPE_IS_SHRIEKING:
3242 #if SHORTSIZE != SIZE16
3246 ashort = SvIV(fromstr);
3247 DO_BO_PACK(ashort, s);
3248 PUSH_VAR(utf8, cur, ashort);
3258 ai16 = (I16)SvIV(fromstr);
3259 DO_BO_PACK(ai16, 16);
3260 PUSH16(utf8, cur, &ai16);
3264 case 'I' | TYPE_IS_SHRIEKING:
3268 auint = SvUV(fromstr);
3269 DO_BO_PACK(auint, i);
3270 PUSH_VAR(utf8, cur, auint);
3277 aiv = SvIV(fromstr);
3278 #if IVSIZE == INTSIZE
3280 #elif IVSIZE == LONGSIZE
3282 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3283 DO_BO_PACK(aiv, 64);
3285 Perl_croak(aTHX_ "'j' not supported on this platform");
3287 PUSH_VAR(utf8, cur, aiv);
3294 auv = SvUV(fromstr);
3295 #if UVSIZE == INTSIZE
3297 #elif UVSIZE == LONGSIZE
3299 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3300 DO_BO_PACK(auv, 64);
3302 Perl_croak(aTHX_ "'J' not supported on this platform");
3304 PUSH_VAR(utf8, cur, auv);
3311 anv = SvNV(fromstr);
3315 SvCUR_set(cat, cur - start);
3316 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3319 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3320 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3321 any negative IVs will have already been got by the croak()
3322 above. IOK is untrue for fractions, so we test them
3323 against UV_MAX_P1. */
3324 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3325 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3326 char *in = buf + sizeof(buf);
3327 UV auv = SvUV(fromstr);
3330 *--in = (char)((auv & 0x7f) | 0x80);
3333 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3334 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3335 in, (buf + sizeof(buf)) - in);
3336 } else if (SvPOKp(fromstr))
3338 else if (SvNOKp(fromstr)) {
3339 /* 10**NV_MAX_10_EXP is the largest power of 10
3340 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3341 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3342 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3343 And with that many bytes only Inf can overflow.
3344 Some C compilers are strict about integral constant
3345 expressions so we conservatively divide by a slightly
3346 smaller integer instead of multiplying by the exact
3347 floating-point value.
3349 #ifdef NV_MAX_10_EXP
3350 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3351 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3353 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3354 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3356 char *in = buf + sizeof(buf);
3358 anv = Perl_floor(anv);
3360 const NV next = Perl_floor(anv / 128);
3361 if (in <= buf) /* this cannot happen ;-) */
3362 Perl_croak(aTHX_ "Cannot compress integer in pack");
3363 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3366 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3367 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3368 in, (buf + sizeof(buf)) - in);
3377 /* Copy string and check for compliance */
3378 from = SvPV_const(fromstr, len);
3379 if ((norm = is_an_int(from, len)) == NULL)
3380 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3382 Newx(result, len, char);
3385 while (!done) *--in = div128(norm, &done) | 0x80;
3386 result[len - 1] &= 0x7F; /* clear continue bit */
3387 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3388 in, (result + len) - in);
3390 SvREFCNT_dec(norm); /* free norm */
3395 case 'i' | TYPE_IS_SHRIEKING:
3399 aint = SvIV(fromstr);
3400 DO_BO_PACK(aint, i);
3401 PUSH_VAR(utf8, cur, aint);
3404 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3405 case 'N' | TYPE_IS_SHRIEKING:
3411 au32 = SvUV(fromstr);
3413 au32 = PerlSock_htonl(au32);
3415 PUSH32(utf8, cur, &au32);
3418 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3419 case 'V' | TYPE_IS_SHRIEKING:
3425 au32 = SvUV(fromstr);
3429 PUSH32(utf8, cur, &au32);
3432 case 'L' | TYPE_IS_SHRIEKING:
3433 #if LONGSIZE != SIZE32
3435 unsigned long aulong;
3437 aulong = SvUV(fromstr);
3438 DO_BO_PACK(aulong, l);
3439 PUSH_VAR(utf8, cur, aulong);
3449 au32 = SvUV(fromstr);
3450 DO_BO_PACK(au32, 32);
3451 PUSH32(utf8, cur, &au32);
3454 case 'l' | TYPE_IS_SHRIEKING:
3455 #if LONGSIZE != SIZE32
3459 along = SvIV(fromstr);
3460 DO_BO_PACK(along, l);
3461 PUSH_VAR(utf8, cur, along);
3471 ai32 = SvIV(fromstr);
3472 DO_BO_PACK(ai32, 32);
3473 PUSH32(utf8, cur, &ai32);
3481 auquad = (Uquad_t) SvUV(fromstr);
3482 DO_BO_PACK(auquad, 64);
3483 PUSH_VAR(utf8, cur, auquad);
3490 aquad = (Quad_t)SvIV(fromstr);
3491 DO_BO_PACK(aquad, 64);
3492 PUSH_VAR(utf8, cur, aquad);
3495 #endif /* HAS_QUAD */
3497 len = 1; /* assume SV is correct length */
3498 GROWING(utf8, cat, start, cur, sizeof(char *));
3505 SvGETMAGIC(fromstr);
3506 if (!SvOK(fromstr)) aptr = NULL;
3508 /* XXX better yet, could spirit away the string to
3509 * a safe spot and hang on to it until the result
3510 * of pack() (and all copies of the result) are
3513 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3514 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3515 Perl_warner(aTHX_ packWARN(WARN_PACK),
3516 "Attempt to pack pointer to temporary value");
3518 if (SvPOK(fromstr) || SvNIOK(fromstr))
3519 aptr = SvPV_nomg_const_nolen(fromstr);
3521 aptr = SvPV_force_flags_nolen(fromstr, 0);
3523 DO_BO_PACK_PC(aptr);
3524 PUSH_VAR(utf8, cur, aptr);
3528 const char *aptr, *aend;
3532 if (len <= 2) len = 45;
3533 else len = len / 3 * 3;
3535 if (ckWARN(WARN_PACK))
3536 Perl_warner(aTHX_ packWARN(WARN_PACK),
3537 "Field too wide in 'u' format in pack");
3540 aptr = SvPV_const(fromstr, fromlen);
3541 from_utf8 = DO_UTF8(fromstr);
3543 aend = aptr + fromlen;
3544 fromlen = sv_len_utf8(fromstr);
3545 } else aend = NULL; /* Unused, but keep compilers happy */
3546 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3547 while (fromlen > 0) {
3550 U8 hunk[1+63/3*4+1];
3552 if ((I32)fromlen > len)
3558 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3559 'u' | TYPE_IS_PACK)) {
3561 SvCUR_set(cat, cur - start);
3562 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3564 end = doencodes(hunk, buffer, todo);
3566 end = doencodes(hunk, aptr, todo);
3569 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3576 SvCUR_set(cat, cur - start);
3578 *symptr = lookahead;
3587 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3588 register SV *cat = TARG;
3590 SV *pat_sv = *++MARK;
3591 register const char *pat = SvPV_const(pat_sv, fromlen);
3592 register const char *patend = pat + fromlen;
3595 sv_setpvn(cat, "", 0);
3598 packlist(cat, pat, patend, MARK, SP + 1);
3608 * c-indentation-style: bsd
3610 * indent-tabs-mode: t
3613 * ex: set ts=8 sts=4 sw=4 noet: