3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
74 # define PERL_PACK_CAN_BYTEORDER
75 # define PERL_PACK_CAN_SHRIEKSIGN
81 /* Maximum number of bytes to which a byte can grow due to upgrade */
85 * Offset for integer pack/unpack.
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
105 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
108 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
109 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
110 # define OFF16(p) ((char*)(p))
111 # define OFF32(p) ((char*)(p))
113 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
114 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
117 ++++ bad cray byte order
121 # define OFF16(p) ((char *) (p))
122 # define OFF32(p) ((char *) (p))
125 /* Only to be used inside a loop (see the break) */
126 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
130 Copy(s, OFF16(p), SIZE16, char); \
135 /* Only to be used inside a loop (see the break) */
136 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
140 Copy(s, OFF32(p), SIZE32, char); \
145 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
148 /* Only to be used inside a loop (see the break) */
149 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
155 Copy(s, (char *) &var, sizeof(var), char); \
160 #define PUSH_VAR(utf8, aptr, var) \
161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
163 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164 #define MAX_SUB_TEMPLATE_LEVEL 100
166 /* flags (note that type modifiers can also be used as flags!) */
167 #define FLAG_WAS_UTF8 0x40
168 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
169 #define FLAG_UNPACK_ONLY_ONE 0x10
170 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
171 #define FLAG_SLASH 0x04
172 #define FLAG_COMMA 0x02
173 #define FLAG_PACK 0x01
176 S_mul128(pTHX_ SV *sv, U8 m)
179 char *s = SvPV(sv, len);
182 PERL_ARGS_ASSERT_MUL128;
184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
185 SV * const tmpNew = newSVpvs("0000000000");
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
193 while (!*t) /* trailing '\0'? */
196 const U32 i = ((*t - '0') << 7) + m;
197 *(t--) = '0' + (char)(i % 10);
203 /* Explosives and implosives. */
205 #if 'I' == 73 && 'J' == 74
206 /* On an ASCII/ISO kind of system */
207 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
210 Some other sort of character set - use memchr() so we don't match
213 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
217 #define TYPE_IS_SHRIEKING 0x100
218 #define TYPE_IS_BIG_ENDIAN 0x200
219 #define TYPE_IS_LITTLE_ENDIAN 0x400
220 #define TYPE_IS_PACK 0x800
221 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
222 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
223 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
225 #ifdef PERL_PACK_CAN_SHRIEKSIGN
226 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
228 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
231 #ifndef PERL_PACK_CAN_BYTEORDER
232 /* Put "can't" first because it is shorter */
233 # define TYPE_ENDIANNESS(t) 0
234 # define TYPE_NO_ENDIANNESS(t) (t)
236 # define ENDIANNESS_ALLOWED_TYPES ""
238 # define DO_BO_UNPACK(var, type)
239 # define DO_BO_PACK(var, type)
240 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
242 # define DO_BO_UNPACK_N(var, type)
243 # define DO_BO_PACK_N(var, type)
244 # define DO_BO_UNPACK_P(var)
245 # define DO_BO_PACK_P(var)
246 # define DO_BO_UNPACK_PC(var)
247 # define DO_BO_PACK_PC(var)
249 #else /* PERL_PACK_CAN_BYTEORDER */
251 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
254 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
256 # define DO_BO_UNPACK(var, type) \
258 switch (TYPE_ENDIANNESS(datumtype)) { \
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
265 # define DO_BO_PACK(var, type) \
267 switch (TYPE_ENDIANNESS(datumtype)) { \
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
274 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
276 switch (TYPE_ENDIANNESS(datumtype)) { \
277 case TYPE_IS_BIG_ENDIAN: \
278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
280 case TYPE_IS_LITTLE_ENDIAN: \
281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
288 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
290 switch (TYPE_ENDIANNESS(datumtype)) { \
291 case TYPE_IS_BIG_ENDIAN: \
292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
294 case TYPE_IS_LITTLE_ENDIAN: \
295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
302 # define BO_CANT_DOIT(action, type) \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
318 # if PTRSIZE == INTSIZE
319 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
323 # elif PTRSIZE == LONGSIZE
324 # if LONGSIZE < IVSIZE && IVSIZE == 8
325 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
326 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
327 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
328 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
330 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
331 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
332 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
333 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
335 # elif PTRSIZE == IVSIZE
336 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
337 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
338 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
339 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
341 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
342 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
343 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
344 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
347 # if defined(my_htolen) && defined(my_letohn) && \
348 defined(my_htoben) && defined(my_betohn)
349 # define DO_BO_UNPACK_N(var, type) \
351 switch (TYPE_ENDIANNESS(datumtype)) { \
352 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
353 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
358 # define DO_BO_PACK_N(var, type) \
360 switch (TYPE_ENDIANNESS(datumtype)) { \
361 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
362 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
367 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
368 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
371 #endif /* PERL_PACK_CAN_BYTEORDER */
373 #define PACK_SIZE_CANNOT_CSUM 0x80
374 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
375 #define PACK_SIZE_MASK 0x3F
377 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
378 in). You're unlikely ever to need to regenerate them. */
380 #if TYPE_IS_SHRIEKING != 0x100
381 ++++shriek offset should be 256
384 typedef U8 packprops_t;
387 STATIC const packprops_t packprops[512] = {
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
394 /* C */ sizeof(unsigned char),
395 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
396 /* D */ LONG_DOUBLESIZE,
403 /* I */ sizeof(unsigned int),
410 #if defined(HAS_QUAD)
411 /* Q */ sizeof(Uquad_t),
418 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
420 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
421 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
422 /* c */ sizeof(char),
423 /* d */ sizeof(double),
425 /* f */ sizeof(float),
434 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
435 #if defined(HAS_QUAD)
436 /* q */ sizeof(Quad_t),
444 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 /* I */ sizeof(unsigned int),
462 /* L */ sizeof(unsigned long),
464 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
470 /* S */ sizeof(unsigned short),
472 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
481 /* l */ sizeof(long),
483 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
489 /* s */ sizeof(short),
491 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0
507 /* EBCDIC (or bust) */
508 STATIC const packprops_t packprops[512] = {
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 /* c */ sizeof(char),
520 /* d */ sizeof(double),
522 /* f */ sizeof(float),
532 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
533 #if defined(HAS_QUAD)
534 /* q */ sizeof(Quad_t),
538 0, 0, 0, 0, 0, 0, 0, 0, 0,
542 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 /* C */ sizeof(unsigned char),
546 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
547 /* D */ LONG_DOUBLESIZE,
554 /* I */ sizeof(unsigned int),
562 #if defined(HAS_QUAD)
563 /* Q */ sizeof(Uquad_t),
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
572 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0,
586 0, 0, 0, 0, 0, 0, 0, 0, 0,
587 /* l */ sizeof(long),
589 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 /* s */ sizeof(short),
597 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
602 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* I */ sizeof(unsigned int),
606 0, 0, 0, 0, 0, 0, 0, 0, 0,
607 /* L */ sizeof(unsigned long),
609 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
614 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
615 /* S */ sizeof(unsigned short),
617 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
622 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
628 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
631 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
632 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
633 /* We try to process malformed UTF-8 as much as possible (preferrably with
634 warnings), but these two mean we make no progress in the string and
635 might enter an infinite loop */
636 if (retlen == (STRLEN) -1 || retlen == 0)
637 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
638 (int) TYPE_NO_MODIFIERS(datumtype));
640 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
641 "Character in '%c' format wrapped in unpack",
642 (int) TYPE_NO_MODIFIERS(datumtype));
649 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
650 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
654 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
658 const char *from = *s;
660 const U32 flags = ckWARN(WARN_UTF8) ?
661 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
662 for (;buf_len > 0; buf_len--) {
663 if (from >= end) return FALSE;
664 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
665 if (retlen == (STRLEN) -1 || retlen == 0) {
666 from += UTF8SKIP(from);
668 } else from += retlen;
673 *(U8 *)buf++ = (U8)val;
675 /* We have enough characters for the buffer. Did we have problems ? */
678 /* Rewalk the string fragment while warning */
680 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
681 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
682 if (ptr >= end) break;
683 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
685 if (from > end) from = end;
688 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
689 WARN_PACK : WARN_UNPACK),
690 "Character(s) in '%c' format wrapped in %s",
691 (int) TYPE_NO_MODIFIERS(datumtype),
692 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
699 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
703 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
704 if (val >= 0x100 || !ISUUCHAR(val) ||
705 retlen == (STRLEN) -1 || retlen == 0) {
709 *out = PL_uudmap[val] & 077;
715 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
716 const U8 * const end = start + len;
718 PERL_ARGS_ASSERT_BYTES_TO_UNI;
720 while (start < end) {
721 const UV uv = NATIVE_TO_ASCII(*start);
722 if (UNI_IS_INVARIANT(uv))
723 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
725 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
726 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
733 #define PUSH_BYTES(utf8, cur, buf, len) \
736 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
738 Copy(buf, cur, len, char); \
743 #define GROWING(utf8, cat, start, cur, in_len) \
745 STRLEN glen = (in_len); \
746 if (utf8) glen *= UTF8_EXPAND; \
747 if ((cur) + glen >= (start) + SvLEN(cat)) { \
748 (start) = sv_exp_grow(cat, glen); \
749 (cur) = (start) + SvCUR(cat); \
753 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
755 const STRLEN glen = (in_len); \
757 if (utf8) gl *= UTF8_EXPAND; \
758 if ((cur) + gl >= (start) + SvLEN(cat)) { \
760 SvCUR_set((cat), (cur) - (start)); \
761 (start) = sv_exp_grow(cat, gl); \
762 (cur) = (start) + SvCUR(cat); \
764 PUSH_BYTES(utf8, cur, buf, glen); \
767 #define PUSH_BYTE(utf8, s, byte) \
770 const U8 au8 = (byte); \
771 (s) = bytes_to_uni(&au8, 1, (s)); \
772 } else *(U8 *)(s)++ = (byte); \
775 /* Only to be used inside a loop (see the break) */
776 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
779 if (str >= end) break; \
780 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
781 if (retlen == (STRLEN) -1 || retlen == 0) { \
783 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
788 static const char *_action( const tempsym_t* symptr )
790 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
793 /* Returns the sizeof() struct described by pat */
795 S_measure_struct(pTHX_ tempsym_t* symptr)
799 PERL_ARGS_ASSERT_MEASURE_STRUCT;
801 while (next_symbol(symptr)) {
805 switch (symptr->howlen) {
807 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
811 /* e_no_len and e_number */
812 len = symptr->length;
816 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
819 /* endianness doesn't influence the size of a type */
820 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
822 Perl_croak(aTHX_ "Invalid type '%c' in %s",
823 (int)TYPE_NO_MODIFIERS(symptr->code),
825 #ifdef PERL_PACK_CAN_SHRIEKSIGN
826 case '.' | TYPE_IS_SHRIEKING:
827 case '@' | TYPE_IS_SHRIEKING:
832 case 'U': /* XXXX Is it correct? */
835 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
836 (int) TYPE_NO_MODIFIERS(symptr->code),
843 tempsym_t savsym = *symptr;
844 symptr->patptr = savsym.grpbeg;
845 symptr->patend = savsym.grpend;
846 /* XXXX Theoretically, we need to measure many times at
847 different positions, since the subexpression may contain
848 alignment commands, but be not of aligned length.
849 Need to detect this and croak(). */
850 size = measure_struct(symptr);
854 case 'X' | TYPE_IS_SHRIEKING:
855 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
857 if (!len) /* Avoid division by 0 */
859 len = total % len; /* Assumed: the start is aligned. */
864 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
866 case 'x' | TYPE_IS_SHRIEKING:
867 if (!len) /* Avoid division by 0 */
869 star = total % len; /* Assumed: the start is aligned. */
870 if (star) /* Other portable ways? */
894 size = sizeof(char*);
904 /* locate matching closing parenthesis or bracket
905 * returns char pointer to char after match, or NULL
908 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
910 PERL_ARGS_ASSERT_GROUP_END;
912 while (patptr < patend) {
913 const char c = *patptr++;
920 while (patptr < patend && *patptr != '\n')
924 patptr = group_end(patptr, patend, ')') + 1;
926 patptr = group_end(patptr, patend, ']') + 1;
928 Perl_croak(aTHX_ "No group ending character '%c' found in template",
934 /* Convert unsigned decimal number to binary.
935 * Expects a pointer to the first digit and address of length variable
936 * Advances char pointer to 1st non-digit char and returns number
939 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
941 I32 len = *patptr++ - '0';
943 PERL_ARGS_ASSERT_GET_NUM;
945 while (isDIGIT(*patptr)) {
946 if (len >= 0x7FFFFFFF/10)
947 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
948 len = (len * 10) + (*patptr++ - '0');
954 /* The marvellous template parsing routine: Using state stored in *symptr,
955 * locates next template code and count
958 S_next_symbol(pTHX_ tempsym_t* symptr )
960 const char* patptr = symptr->patptr;
961 const char* const patend = symptr->patend;
963 PERL_ARGS_ASSERT_NEXT_SYMBOL;
965 symptr->flags &= ~FLAG_SLASH;
967 while (patptr < patend) {
968 if (isSPACE(*patptr))
970 else if (*patptr == '#') {
972 while (patptr < patend && *patptr != '\n')
977 /* We should have found a template code */
978 I32 code = *patptr++ & 0xFF;
979 U32 inherited_modifiers = 0;
981 if (code == ','){ /* grandfather in commas but with a warning */
982 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
983 symptr->flags |= FLAG_COMMA;
984 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
985 "Invalid type ',' in %s", _action( symptr ) );
990 /* for '(', skip to ')' */
992 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
993 Perl_croak(aTHX_ "()-group starts with a count in %s",
995 symptr->grpbeg = patptr;
996 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
997 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
998 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
1002 /* look for group modifiers to inherit */
1003 if (TYPE_ENDIANNESS(symptr->flags)) {
1004 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1005 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1008 /* look for modifiers */
1009 while (patptr < patend) {
1010 const char *allowed;
1014 modifier = TYPE_IS_SHRIEKING;
1015 allowed = SHRIEKING_ALLOWED_TYPES;
1017 #ifdef PERL_PACK_CAN_BYTEORDER
1019 modifier = TYPE_IS_BIG_ENDIAN;
1020 allowed = ENDIANNESS_ALLOWED_TYPES;
1023 modifier = TYPE_IS_LITTLE_ENDIAN;
1024 allowed = ENDIANNESS_ALLOWED_TYPES;
1026 #endif /* PERL_PACK_CAN_BYTEORDER */
1036 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1037 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1038 allowed, _action( symptr ) );
1040 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1041 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1042 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1043 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1044 TYPE_ENDIANNESS_MASK)
1045 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1046 *patptr, _action( symptr ) );
1048 if ((code & modifier)) {
1049 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1050 "Duplicate modifier '%c' after '%c' in %s",
1051 *patptr, (int) TYPE_NO_MODIFIERS(code),
1052 _action( symptr ) );
1059 /* inherit modifiers */
1060 code |= inherited_modifiers;
1062 /* look for count and/or / */
1063 if (patptr < patend) {
1064 if (isDIGIT(*patptr)) {
1065 patptr = get_num( patptr, &symptr->length );
1066 symptr->howlen = e_number;
1068 } else if (*patptr == '*') {
1070 symptr->howlen = e_star;
1072 } else if (*patptr == '[') {
1073 const char* lenptr = ++patptr;
1074 symptr->howlen = e_number;
1075 patptr = group_end( patptr, patend, ']' ) + 1;
1076 /* what kind of [] is it? */
1077 if (isDIGIT(*lenptr)) {
1078 lenptr = get_num( lenptr, &symptr->length );
1079 if( *lenptr != ']' )
1080 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1081 _action( symptr ) );
1083 tempsym_t savsym = *symptr;
1084 symptr->patend = patptr-1;
1085 symptr->patptr = lenptr;
1086 savsym.length = measure_struct(symptr);
1090 symptr->howlen = e_no_len;
1095 while (patptr < patend) {
1096 if (isSPACE(*patptr))
1098 else if (*patptr == '#') {
1100 while (patptr < patend && *patptr != '\n')
1102 if (patptr < patend)
1105 if (*patptr == '/') {
1106 symptr->flags |= FLAG_SLASH;
1108 if (patptr < patend &&
1109 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1110 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1111 _action( symptr ) );
1117 /* at end - no count, no / */
1118 symptr->howlen = e_no_len;
1122 symptr->code = code;
1123 symptr->patptr = patptr;
1127 symptr->patptr = patptr;
1132 There is no way to cleanly handle the case where we should process the
1133 string per byte in its upgraded form while it's really in downgraded form
1134 (e.g. estimates like strend-s as an upper bound for the number of
1135 characters left wouldn't work). So if we foresee the need of this
1136 (pattern starts with U or contains U0), we want to work on the encoded
1137 version of the string. Users are advised to upgrade their pack string
1138 themselves if they need to do a lot of unpacks like this on it
1141 need_utf8(const char *pat, const char *patend)
1145 PERL_ARGS_ASSERT_NEED_UTF8;
1147 while (pat < patend) {
1148 if (pat[0] == '#') {
1150 pat = (const char *) memchr(pat, '\n', patend-pat);
1151 if (!pat) return FALSE;
1152 } else if (pat[0] == 'U') {
1153 if (first || pat[1] == '0') return TRUE;
1154 } else first = FALSE;
1161 first_symbol(const char *pat, const char *patend) {
1162 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1164 while (pat < patend) {
1165 if (pat[0] != '#') return pat[0];
1167 pat = (const char *) memchr(pat, '\n', patend-pat);
1175 =for apidoc unpackstring
1177 The engine implementing unpack() Perl function. C<unpackstring> puts the
1178 extracted list items on the stack and returns the number of elements.
1179 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1184 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1188 PERL_ARGS_ASSERT_UNPACKSTRING;
1190 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1191 else if (need_utf8(pat, patend)) {
1192 /* We probably should try to avoid this in case a scalar context call
1193 wouldn't get to the "U0" */
1194 STRLEN len = strend - s;
1195 s = (char *) bytes_to_utf8((U8 *) s, &len);
1198 flags |= FLAG_DO_UTF8;
1201 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1202 flags |= FLAG_PARSE_UTF8;
1204 TEMPSYM_INIT(&sym, pat, patend, flags);
1206 return unpack_rec(&sym, s, s, strend, NULL );
1210 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1214 const I32 start_sp_offset = SP - PL_stack_base;
1219 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1220 bool beyond = FALSE;
1221 bool explicit_length;
1222 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1223 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1225 PERL_ARGS_ASSERT_UNPACK_REC;
1227 symptr->strbeg = s - strbeg;
1229 while (next_symbol(symptr)) {
1232 I32 datumtype = symptr->code;
1233 /* do first one only unless in list context
1234 / is implemented by unpacking the count, then popping it from the
1235 stack, so must check that we're not in the middle of a / */
1236 if ( unpack_only_one
1237 && (SP - PL_stack_base == start_sp_offset + 1)
1238 && (datumtype != '/') ) /* XXX can this be omitted */
1241 switch (howlen = symptr->howlen) {
1243 len = strend - strbeg; /* long enough */
1246 /* e_no_len and e_number */
1247 len = symptr->length;
1251 explicit_length = TRUE;
1253 beyond = s >= strend;
1255 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1257 /* props nonzero means we can process this letter. */
1258 const long size = props & PACK_SIZE_MASK;
1259 const long howmany = (strend - s) / size;
1263 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1264 if (len && unpack_only_one) len = 1;
1270 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1272 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1275 if (howlen == e_no_len)
1276 len = 16; /* len is not specified */
1284 tempsym_t savsym = *symptr;
1285 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1286 symptr->flags |= group_modifiers;
1287 symptr->patend = savsym.grpend;
1288 symptr->previous = &savsym;
1291 if (len && unpack_only_one) len = 1;
1293 symptr->patptr = savsym.grpbeg;
1294 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1295 else symptr->flags &= ~FLAG_PARSE_UTF8;
1296 unpack_rec(symptr, s, strbeg, strend, &s);
1297 if (s == strend && savsym.howlen == e_star)
1298 break; /* No way to continue */
1301 savsym.flags = symptr->flags & ~group_modifiers;
1305 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 case '.' | TYPE_IS_SHRIEKING:
1311 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1312 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1313 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1314 const bool u8 = utf8;
1316 if (howlen == e_star) from = strbeg;
1317 else if (len <= 0) from = s;
1319 tempsym_t *group = symptr;
1321 while (--len && group) group = group->previous;
1322 from = group ? strbeg + group->strbeg : strbeg;
1325 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1326 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1330 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1331 case '@' | TYPE_IS_SHRIEKING:
1334 s = strbeg + symptr->strbeg;
1335 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1336 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1337 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1343 Perl_croak(aTHX_ "'@' outside of string in unpack");
1348 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1351 Perl_croak(aTHX_ "'@' outside of string in unpack");
1355 case 'X' | TYPE_IS_SHRIEKING:
1356 if (!len) /* Avoid division by 0 */
1359 const char *hop, *last;
1361 hop = last = strbeg;
1363 hop += UTF8SKIP(hop);
1370 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1374 len = (s - strbeg) % len;
1380 Perl_croak(aTHX_ "'X' outside of string in unpack");
1381 while (--s, UTF8_IS_CONTINUATION(*s)) {
1383 Perl_croak(aTHX_ "'X' outside of string in unpack");
1388 if (len > s - strbeg)
1389 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1393 case 'x' | TYPE_IS_SHRIEKING: {
1395 if (!len) /* Avoid division by 0 */
1397 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1398 else ai32 = (s - strbeg) % len;
1399 if (ai32 == 0) break;
1407 Perl_croak(aTHX_ "'x' outside of string in unpack");
1412 if (len > strend - s)
1413 Perl_croak(aTHX_ "'x' outside of string in unpack");
1418 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1424 /* Preliminary length estimate is assumed done in 'W' */
1425 if (len > strend - s) len = strend - s;
1431 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1432 if (hop >= strend) {
1434 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1439 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1441 } else if (len > strend - s)
1444 if (datumtype == 'Z') {
1445 /* 'Z' strips stuff after first null */
1446 const char *ptr, *end;
1448 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1449 sv = newSVpvn(s, ptr-s);
1450 if (howlen == e_star) /* exact for 'Z*' */
1451 len = ptr-s + (ptr != strend ? 1 : 0);
1452 } else if (datumtype == 'A') {
1453 /* 'A' strips both nulls and spaces */
1455 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1456 for (ptr = s+len-1; ptr >= s; ptr--)
1457 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1458 !is_utf8_space((U8 *) ptr)) break;
1459 if (ptr >= s) ptr += UTF8SKIP(ptr);
1462 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1464 for (ptr = s+len-1; ptr >= s; ptr--)
1465 if (*ptr != 0 && !isSPACE(*ptr)) break;
1468 sv = newSVpvn(s, ptr-s);
1469 } else sv = newSVpvn(s, len);
1473 /* Undo any upgrade done due to need_utf8() */
1474 if (!(symptr->flags & FLAG_WAS_UTF8))
1475 sv_utf8_downgrade(sv, 0);
1483 if (howlen == e_star || len > (strend - s) * 8)
1484 len = (strend - s) * 8;
1487 while (len >= 8 && s < strend) {
1488 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1493 cuv += PL_bitcount[*(U8 *)s++];
1496 if (len && s < strend) {
1498 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1499 if (datumtype == 'b')
1501 if (bits & 1) cuv++;
1506 if (bits & 0x80) cuv++;
1513 sv = sv_2mortal(newSV(len ? len : 1));
1516 if (datumtype == 'b') {
1518 const I32 ai32 = len;
1519 for (len = 0; len < ai32; len++) {
1520 if (len & 7) bits >>= 1;
1522 if (s >= strend) break;
1523 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1524 } else bits = *(U8 *) s++;
1525 *str++ = bits & 1 ? '1' : '0';
1529 const I32 ai32 = len;
1530 for (len = 0; len < ai32; len++) {
1531 if (len & 7) bits <<= 1;
1533 if (s >= strend) break;
1534 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1535 } else bits = *(U8 *) s++;
1536 *str++ = bits & 0x80 ? '1' : '0';
1540 SvCUR_set(sv, str - SvPVX_const(sv));
1547 /* Preliminary length estimate, acceptable for utf8 too */
1548 if (howlen == e_star || len > (strend - s) * 2)
1549 len = (strend - s) * 2;
1550 sv = sv_2mortal(newSV(len ? len : 1));
1553 if (datumtype == 'h') {
1556 for (len = 0; len < ai32; len++) {
1557 if (len & 1) bits >>= 4;
1559 if (s >= strend) break;
1560 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1561 } else bits = * (U8 *) s++;
1562 *str++ = PL_hexdigit[bits & 15];
1566 const I32 ai32 = len;
1567 for (len = 0; len < ai32; len++) {
1568 if (len & 1) bits <<= 4;
1570 if (s >= strend) break;
1571 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1572 } else bits = *(U8 *) s++;
1573 *str++ = PL_hexdigit[(bits >> 4) & 15];
1577 SvCUR_set(sv, str - SvPVX_const(sv));
1583 if (explicit_length)
1584 /* Switch to "character" mode */
1585 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1590 while (len-- > 0 && s < strend) {
1595 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1596 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1597 if (retlen == (STRLEN) -1 || retlen == 0)
1598 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1602 aint = *(U8 *)(s)++;
1603 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV)aint;
1616 while (len-- > 0 && s < strend) {
1618 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1619 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1620 if (retlen == (STRLEN) -1 || retlen == 0)
1621 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1625 else if (checksum > bits_in_uv)
1626 cdouble += (NV) val;
1630 } else if (!checksum)
1632 const U8 ch = *(U8 *) s++;
1635 else if (checksum > bits_in_uv)
1636 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1638 while (len-- > 0) cuv += *(U8 *) s++;
1642 if (explicit_length) {
1643 /* Switch to "bytes in UTF-8" mode */
1644 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1646 /* Should be impossible due to the need_utf8() test */
1647 Perl_croak(aTHX_ "U0 mode on a byte string");
1651 if (len > strend - s) len = strend - s;
1653 if (len && unpack_only_one) len = 1;
1657 while (len-- > 0 && s < strend) {
1661 U8 result[UTF8_MAXLEN];
1662 const char *ptr = s;
1664 /* Bug: warns about bad utf8 even if we are short on bytes
1665 and will break out of the loop */
1666 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1669 len = UTF8SKIP(result);
1670 if (!uni_to_bytes(aTHX_ &ptr, strend,
1671 (char *) &result[1], len-1, 'U')) break;
1672 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1675 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1676 if (retlen == (STRLEN) -1 || retlen == 0)
1677 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1682 else if (checksum > bits_in_uv)
1683 cdouble += (NV) auv;
1688 case 's' | TYPE_IS_SHRIEKING:
1689 #if SHORTSIZE != SIZE16
1692 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1693 DO_BO_UNPACK(ashort, s);
1696 else if (checksum > bits_in_uv)
1697 cdouble += (NV)ashort;
1709 #if U16SIZE > SIZE16
1712 SHIFT16(utf8, s, strend, &ai16, datumtype);
1713 DO_BO_UNPACK(ai16, 16);
1714 #if U16SIZE > SIZE16
1720 else if (checksum > bits_in_uv)
1721 cdouble += (NV)ai16;
1726 case 'S' | TYPE_IS_SHRIEKING:
1727 #if SHORTSIZE != SIZE16
1729 unsigned short aushort;
1730 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1731 DO_BO_UNPACK(aushort, s);
1734 else if (checksum > bits_in_uv)
1735 cdouble += (NV)aushort;
1748 #if U16SIZE > SIZE16
1751 SHIFT16(utf8, s, strend, &au16, datumtype);
1752 DO_BO_UNPACK(au16, 16);
1754 if (datumtype == 'n')
1755 au16 = PerlSock_ntohs(au16);
1758 if (datumtype == 'v')
1763 else if (checksum > bits_in_uv)
1764 cdouble += (NV) au16;
1769 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1770 case 'v' | TYPE_IS_SHRIEKING:
1771 case 'n' | TYPE_IS_SHRIEKING:
1774 # if U16SIZE > SIZE16
1777 SHIFT16(utf8, s, strend, &ai16, datumtype);
1779 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1780 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1781 # endif /* HAS_NTOHS */
1783 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1784 ai16 = (I16) vtohs((U16) ai16);
1785 # endif /* HAS_VTOHS */
1788 else if (checksum > bits_in_uv)
1789 cdouble += (NV) ai16;
1794 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1796 case 'i' | TYPE_IS_SHRIEKING:
1799 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1800 DO_BO_UNPACK(aint, i);
1803 else if (checksum > bits_in_uv)
1804 cdouble += (NV)aint;
1810 case 'I' | TYPE_IS_SHRIEKING:
1813 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1814 DO_BO_UNPACK(auint, i);
1817 else if (checksum > bits_in_uv)
1818 cdouble += (NV)auint;
1826 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1827 #if IVSIZE == INTSIZE
1828 DO_BO_UNPACK(aiv, i);
1829 #elif IVSIZE == LONGSIZE
1830 DO_BO_UNPACK(aiv, l);
1831 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1832 DO_BO_UNPACK(aiv, 64);
1834 Perl_croak(aTHX_ "'j' not supported on this platform");
1838 else if (checksum > bits_in_uv)
1847 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1848 #if IVSIZE == INTSIZE
1849 DO_BO_UNPACK(auv, i);
1850 #elif IVSIZE == LONGSIZE
1851 DO_BO_UNPACK(auv, l);
1852 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1853 DO_BO_UNPACK(auv, 64);
1855 Perl_croak(aTHX_ "'J' not supported on this platform");
1859 else if (checksum > bits_in_uv)
1865 case 'l' | TYPE_IS_SHRIEKING:
1866 #if LONGSIZE != SIZE32
1869 SHIFT_VAR(utf8, s, strend, along, datumtype);
1870 DO_BO_UNPACK(along, l);
1873 else if (checksum > bits_in_uv)
1874 cdouble += (NV)along;
1885 #if U32SIZE > SIZE32
1888 SHIFT32(utf8, s, strend, &ai32, datumtype);
1889 DO_BO_UNPACK(ai32, 32);
1890 #if U32SIZE > SIZE32
1891 if (ai32 > 2147483647) ai32 -= 4294967296;
1895 else if (checksum > bits_in_uv)
1896 cdouble += (NV)ai32;
1901 case 'L' | TYPE_IS_SHRIEKING:
1902 #if LONGSIZE != SIZE32
1904 unsigned long aulong;
1905 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1906 DO_BO_UNPACK(aulong, l);
1909 else if (checksum > bits_in_uv)
1910 cdouble += (NV)aulong;
1923 #if U32SIZE > SIZE32
1926 SHIFT32(utf8, s, strend, &au32, datumtype);
1927 DO_BO_UNPACK(au32, 32);
1929 if (datumtype == 'N')
1930 au32 = PerlSock_ntohl(au32);
1933 if (datumtype == 'V')
1938 else if (checksum > bits_in_uv)
1939 cdouble += (NV)au32;
1944 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1945 case 'V' | TYPE_IS_SHRIEKING:
1946 case 'N' | TYPE_IS_SHRIEKING:
1949 # if U32SIZE > SIZE32
1952 SHIFT32(utf8, s, strend, &ai32, datumtype);
1954 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1955 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1958 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1959 ai32 = (I32)vtohl((U32)ai32);
1963 else if (checksum > bits_in_uv)
1964 cdouble += (NV)ai32;
1969 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1973 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1974 DO_BO_UNPACK_PC(aptr);
1975 /* newSVpv generates undef if aptr is NULL */
1976 mPUSHs(newSVpv(aptr, 0));
1984 while (len > 0 && s < strend) {
1986 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1987 auv = (auv << 7) | (ch & 0x7f);
1988 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1996 if (++bytes >= sizeof(UV)) { /* promote to string */
1999 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2000 while (s < strend) {
2001 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2002 sv = mul128(sv, (U8)(ch & 0x7f));
2008 t = SvPV_nolen_const(sv);
2017 if ((s >= strend) && bytes)
2018 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2022 if (symptr->howlen == e_star)
2023 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2025 if (s + sizeof(char*) <= strend) {
2027 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2028 DO_BO_UNPACK_PC(aptr);
2029 /* newSVpvn generates undef if aptr is NULL */
2030 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2037 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2038 DO_BO_UNPACK(aquad, 64);
2040 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2041 newSViv((IV)aquad) : newSVnv((NV)aquad));
2042 else if (checksum > bits_in_uv)
2043 cdouble += (NV)aquad;
2051 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2052 DO_BO_UNPACK(auquad, 64);
2054 mPUSHs(auquad <= UV_MAX ?
2055 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2056 else if (checksum > bits_in_uv)
2057 cdouble += (NV)auquad;
2062 #endif /* HAS_QUAD */
2063 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2067 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2068 DO_BO_UNPACK_N(afloat, float);
2078 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2079 DO_BO_UNPACK_N(adouble, double);
2089 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2090 DO_BO_UNPACK_N(anv, NV);
2097 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2100 long double aldouble;
2101 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2102 DO_BO_UNPACK_N(aldouble, long double);
2106 cdouble += aldouble;
2112 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2113 sv = sv_2mortal(newSV(l));
2114 if (l) SvPOK_on(sv);
2117 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2122 next_uni_uu(aTHX_ &s, strend, &a);
2123 next_uni_uu(aTHX_ &s, strend, &b);
2124 next_uni_uu(aTHX_ &s, strend, &c);
2125 next_uni_uu(aTHX_ &s, strend, &d);
2126 hunk[0] = (char)((a << 2) | (b >> 4));
2127 hunk[1] = (char)((b << 4) | (c >> 2));
2128 hunk[2] = (char)((c << 6) | d);
2129 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2137 /* possible checksum byte */
2138 const char *skip = s+UTF8SKIP(s);
2139 if (skip < strend && *skip == '\n')
2145 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2149 len = PL_uudmap[*(U8*)s++] & 077;
2151 if (s < strend && ISUUCHAR(*s))
2152 a = PL_uudmap[*(U8*)s++] & 077;
2155 if (s < strend && ISUUCHAR(*s))
2156 b = PL_uudmap[*(U8*)s++] & 077;
2159 if (s < strend && ISUUCHAR(*s))
2160 c = PL_uudmap[*(U8*)s++] & 077;
2163 if (s < strend && ISUUCHAR(*s))
2164 d = PL_uudmap[*(U8*)s++] & 077;
2167 hunk[0] = (char)((a << 2) | (b >> 4));
2168 hunk[1] = (char)((b << 4) | (c >> 2));
2169 hunk[2] = (char)((c << 6) | d);
2170 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2175 else /* possible checksum byte */
2176 if (s + 1 < strend && s[1] == '\n')
2185 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2186 (checksum > bits_in_uv &&
2187 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2190 anv = (NV) (1 << (checksum & 15));
2191 while (checksum >= 16) {
2195 while (cdouble < 0.0)
2197 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2198 sv = newSVnv(cdouble);
2201 if (checksum < bits_in_uv) {
2202 UV mask = ((UV)1 << checksum) - 1;
2211 if (symptr->flags & FLAG_SLASH){
2212 if (SP - PL_stack_base - start_sp_offset <= 0)
2213 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2214 if( next_symbol(symptr) ){
2215 if( symptr->howlen == e_number )
2216 Perl_croak(aTHX_ "Count after length/code in unpack" );
2218 /* ...end of char buffer then no decent length available */
2219 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2221 /* take top of stack (hope it's numeric) */
2224 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2227 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2229 datumtype = symptr->code;
2230 explicit_length = FALSE;
2238 return SP - PL_stack_base - start_sp_offset;
2246 I32 gimme = GIMME_V;
2249 const char *pat = SvPV_const(left, llen);
2250 const char *s = SvPV_const(right, rlen);
2251 const char *strend = s + rlen;
2252 const char *patend = pat + llen;
2256 cnt = unpackstring(pat, patend, s, strend,
2257 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2258 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2261 if ( !cnt && gimme == G_SCALAR )
2262 PUSHs(&PL_sv_undef);
2267 doencodes(U8 *h, const char *s, I32 len)
2269 *h++ = PL_uuemap[len];
2271 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2272 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2273 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2274 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2279 const char r = (len > 1 ? s[1] : '\0');
2280 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2281 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2282 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2283 *h++ = PL_uuemap[0];
2290 S_is_an_int(pTHX_ const char *s, STRLEN l)
2292 SV *result = newSVpvn(s, l);
2293 char *const result_c = SvPV_nolen(result); /* convenience */
2294 char *out = result_c;
2298 PERL_ARGS_ASSERT_IS_AN_INT;
2306 SvREFCNT_dec(result);
2329 SvREFCNT_dec(result);
2335 SvCUR_set(result, out - result_c);
2339 /* pnum must be '\0' terminated */
2341 S_div128(pTHX_ SV *pnum, bool *done)
2344 char * const s = SvPV(pnum, len);
2348 PERL_ARGS_ASSERT_DIV128;
2352 const int i = m * 10 + (*t - '0');
2353 const int r = (i >> 7); /* r < 10 */
2361 SvCUR_set(pnum, (STRLEN) (t - s));
2366 =for apidoc packlist
2368 The engine implementing pack() Perl function.
2374 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2379 PERL_ARGS_ASSERT_PACKLIST;
2381 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2383 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2384 Also make sure any UTF8 flag is loaded */
2385 SvPV_force_nolen(cat);
2387 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2389 (void)pack_rec( cat, &sym, beglist, endlist );
2392 /* like sv_utf8_upgrade, but also repoint the group start markers */
2394 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2397 const char *from_ptr, *from_start, *from_end, **marks, **m;
2398 char *to_start, *to_ptr;
2400 if (SvUTF8(sv)) return;
2402 from_start = SvPVX_const(sv);
2403 from_end = from_start + SvCUR(sv);
2404 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2405 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2406 if (from_ptr == from_end) {
2407 /* Simple case: no character needs to be changed */
2412 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2413 Newx(to_start, len, char);
2414 Copy(from_start, to_start, from_ptr-from_start, char);
2415 to_ptr = to_start + (from_ptr-from_start);
2417 Newx(marks, sym_ptr->level+2, const char *);
2418 for (group=sym_ptr; group; group = group->previous)
2419 marks[group->level] = from_start + group->strbeg;
2420 marks[sym_ptr->level+1] = from_end+1;
2421 for (m = marks; *m < from_ptr; m++)
2422 *m = to_start + (*m-from_start);
2424 for (;from_ptr < from_end; from_ptr++) {
2425 while (*m == from_ptr) *m++ = to_ptr;
2426 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2430 while (*m == from_ptr) *m++ = to_ptr;
2431 if (m != marks + sym_ptr->level+1) {
2434 Perl_croak(aTHX_ "panic: marks beyond string end");
2436 for (group=sym_ptr; group; group = group->previous)
2437 group->strbeg = marks[group->level] - to_start;
2442 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2443 from_start -= SvIVX(sv);
2446 SvFLAGS(sv) &= ~SVf_OOK;
2449 Safefree(from_start);
2450 SvPV_set(sv, to_start);
2451 SvCUR_set(sv, to_ptr - to_start);
2456 /* Exponential string grower. Makes string extension effectively O(n)
2457 needed says how many extra bytes we need (not counting the final '\0')
2458 Only grows the string if there is an actual lack of space
2461 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2462 const STRLEN cur = SvCUR(sv);
2463 const STRLEN len = SvLEN(sv);
2466 PERL_ARGS_ASSERT_SV_EXP_GROW;
2468 if (len - cur > needed) return SvPVX(sv);
2469 extend = needed > len ? needed : len;
2470 return SvGROW(sv, len+extend+1);
2475 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2478 tempsym_t lookahead;
2479 I32 items = endlist - beglist;
2480 bool found = next_symbol(symptr);
2481 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2482 bool warn_utf8 = ckWARN(WARN_UTF8);
2484 PERL_ARGS_ASSERT_PACK_REC;
2486 if (symptr->level == 0 && found && symptr->code == 'U') {
2487 marked_upgrade(aTHX_ cat, symptr);
2488 symptr->flags |= FLAG_DO_UTF8;
2491 symptr->strbeg = SvCUR(cat);
2497 SV *lengthcode = NULL;
2498 I32 datumtype = symptr->code;
2499 howlen_t howlen = symptr->howlen;
2500 char *start = SvPVX(cat);
2501 char *cur = start + SvCUR(cat);
2503 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2507 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2511 /* e_no_len and e_number */
2512 len = symptr->length;
2517 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2519 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2520 /* We can process this letter. */
2521 STRLEN size = props & PACK_SIZE_MASK;
2522 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2526 /* Look ahead for next symbol. Do we have code/code? */
2527 lookahead = *symptr;
2528 found = next_symbol(&lookahead);
2529 if (symptr->flags & FLAG_SLASH) {
2531 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2532 if (strchr("aAZ", lookahead.code)) {
2533 if (lookahead.howlen == e_number) count = lookahead.length;
2536 if (SvGAMAGIC(*beglist)) {
2537 /* Avoid reading the active data more than once
2538 by copying it to a temporary. */
2540 const char *const pv = SvPV_const(*beglist, len);
2542 = newSVpvn_flags(pv, len,
2543 SVs_TEMP | SvUTF8(*beglist));
2546 count = DO_UTF8(*beglist) ?
2547 sv_len_utf8(*beglist) : sv_len(*beglist);
2550 if (lookahead.code == 'Z') count++;
2553 if (lookahead.howlen == e_number && lookahead.length < items)
2554 count = lookahead.length;
2557 lookahead.howlen = e_number;
2558 lookahead.length = count;
2559 lengthcode = sv_2mortal(newSViv(count));
2562 /* Code inside the switch must take care to properly update
2563 cat (CUR length and '\0' termination) if it updated *cur and
2564 doesn't simply leave using break */
2565 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2567 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2568 (int) TYPE_NO_MODIFIERS(datumtype));
2570 Perl_croak(aTHX_ "'%%' may not be used in pack");
2573 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2574 case '.' | TYPE_IS_SHRIEKING:
2577 if (howlen == e_star) from = start;
2578 else if (len == 0) from = cur;
2580 tempsym_t *group = symptr;
2582 while (--len && group) group = group->previous;
2583 from = group ? start + group->strbeg : start;
2586 len = SvIV(fromstr);
2588 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2589 case '@' | TYPE_IS_SHRIEKING:
2592 from = start + symptr->strbeg;
2594 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2595 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2596 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2600 while (len && from < cur) {
2601 from += UTF8SKIP(from);
2605 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2607 /* Here we know from == cur */
2609 GROWING(0, cat, start, cur, len);
2610 Zero(cur, len, char);
2612 } else if (from < cur) {
2615 } else goto no_change;
2623 if (len > 0) goto grow;
2624 if (len == 0) goto no_change;
2631 tempsym_t savsym = *symptr;
2632 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2633 symptr->flags |= group_modifiers;
2634 symptr->patend = savsym.grpend;
2636 symptr->previous = &lookahead;
2639 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2640 else symptr->flags &= ~FLAG_PARSE_UTF8;
2641 was_utf8 = SvUTF8(cat);
2642 symptr->patptr = savsym.grpbeg;
2643 beglist = pack_rec(cat, symptr, beglist, endlist);
2644 if (SvUTF8(cat) != was_utf8)
2645 /* This had better be an upgrade while in utf8==0 mode */
2648 if (savsym.howlen == e_star && beglist == endlist)
2649 break; /* No way to continue */
2651 items = endlist - beglist;
2652 lookahead.flags = symptr->flags & ~group_modifiers;
2655 case 'X' | TYPE_IS_SHRIEKING:
2656 if (!len) /* Avoid division by 0 */
2663 hop += UTF8SKIP(hop);
2670 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2674 len = (cur-start) % len;
2678 if (len < 1) goto no_change;
2682 Perl_croak(aTHX_ "'%c' outside of string in pack",
2683 (int) TYPE_NO_MODIFIERS(datumtype));
2684 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2686 Perl_croak(aTHX_ "'%c' outside of string in pack",
2687 (int) TYPE_NO_MODIFIERS(datumtype));
2693 if (cur - start < len)
2694 Perl_croak(aTHX_ "'%c' outside of string in pack",
2695 (int) TYPE_NO_MODIFIERS(datumtype));
2698 if (cur < start+symptr->strbeg) {
2699 /* Make sure group starts don't point into the void */
2701 const STRLEN length = cur-start;
2702 for (group = symptr;
2703 group && length < group->strbeg;
2704 group = group->previous) group->strbeg = length;
2705 lookahead.strbeg = length;
2708 case 'x' | TYPE_IS_SHRIEKING: {
2710 if (!len) /* Avoid division by 0 */
2712 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2713 else ai32 = (cur - start) % len;
2714 if (ai32 == 0) goto no_change;
2726 aptr = SvPV_const(fromstr, fromlen);
2727 if (DO_UTF8(fromstr)) {
2728 const char *end, *s;
2730 if (!utf8 && !SvUTF8(cat)) {
2731 marked_upgrade(aTHX_ cat, symptr);
2732 lookahead.flags |= FLAG_DO_UTF8;
2733 lookahead.strbeg = symptr->strbeg;
2736 cur = start + SvCUR(cat);
2738 if (howlen == e_star) {
2739 if (utf8) goto string_copy;
2743 end = aptr + fromlen;
2744 fromlen = datumtype == 'Z' ? len-1 : len;
2745 while ((I32) fromlen > 0 && s < end) {
2750 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2753 if (datumtype == 'Z') len++;
2759 fromlen = len - fromlen;
2760 if (datumtype == 'Z') fromlen--;
2761 if (howlen == e_star) {
2763 if (datumtype == 'Z') len++;
2765 GROWING(0, cat, start, cur, len);
2766 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2767 datumtype | TYPE_IS_PACK))
2768 Perl_croak(aTHX_ "panic: predicted utf8 length not available");
2772 if (howlen == e_star) {
2774 if (datumtype == 'Z') len++;
2776 if (len <= (I32) fromlen) {
2778 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2780 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2782 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2783 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2785 while (fromlen > 0) {
2786 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2792 if (howlen == e_star) {
2794 if (datumtype == 'Z') len++;
2796 if (len <= (I32) fromlen) {
2798 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2800 GROWING(0, cat, start, cur, len);
2801 Copy(aptr, cur, fromlen, char);
2805 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2812 const char *str, *end;
2819 str = SvPV_const(fromstr, fromlen);
2820 end = str + fromlen;
2821 if (DO_UTF8(fromstr)) {
2823 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2825 utf8_source = FALSE;
2826 utf8_flags = 0; /* Unused, but keep compilers happy */
2828 if (howlen == e_star) len = fromlen;
2829 field_len = (len+7)/8;
2830 GROWING(utf8, cat, start, cur, field_len);
2831 if (len > (I32)fromlen) len = fromlen;
2834 if (datumtype == 'B')
2838 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2840 } else bits |= *str++ & 1;
2841 if (l & 7) bits <<= 1;
2843 PUSH_BYTE(utf8, cur, bits);
2848 /* datumtype == 'b' */
2852 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2853 if (val & 1) bits |= 0x80;
2854 } else if (*str++ & 1)
2856 if (l & 7) bits >>= 1;
2858 PUSH_BYTE(utf8, cur, bits);
2864 if (datumtype == 'B')
2865 bits <<= 7 - (l & 7);
2867 bits >>= 7 - (l & 7);
2868 PUSH_BYTE(utf8, cur, bits);
2871 /* Determine how many chars are left in the requested field */
2873 if (howlen == e_star) field_len = 0;
2874 else field_len -= l;
2875 Zero(cur, field_len, char);
2881 const char *str, *end;
2888 str = SvPV_const(fromstr, fromlen);
2889 end = str + fromlen;
2890 if (DO_UTF8(fromstr)) {
2892 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2894 utf8_source = FALSE;
2895 utf8_flags = 0; /* Unused, but keep compilers happy */
2897 if (howlen == e_star) len = fromlen;
2898 field_len = (len+1)/2;
2899 GROWING(utf8, cat, start, cur, field_len);
2900 if (!utf8 && len > (I32)fromlen) len = fromlen;
2903 if (datumtype == 'H')
2907 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2908 if (val < 256 && isALPHA(val))
2909 bits |= (val + 9) & 0xf;
2912 } else if (isALPHA(*str))
2913 bits |= (*str++ + 9) & 0xf;
2915 bits |= *str++ & 0xf;
2916 if (l & 1) bits <<= 4;
2918 PUSH_BYTE(utf8, cur, bits);
2926 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2927 if (val < 256 && isALPHA(val))
2928 bits |= ((val + 9) & 0xf) << 4;
2930 bits |= (val & 0xf) << 4;
2931 } else if (isALPHA(*str))
2932 bits |= ((*str++ + 9) & 0xf) << 4;
2934 bits |= (*str++ & 0xf) << 4;
2935 if (l & 1) bits >>= 4;
2937 PUSH_BYTE(utf8, cur, bits);
2943 PUSH_BYTE(utf8, cur, bits);
2946 /* Determine how many chars are left in the requested field */
2948 if (howlen == e_star) field_len = 0;
2949 else field_len -= l;
2950 Zero(cur, field_len, char);
2958 aiv = SvIV(fromstr);
2959 if ((-128 > aiv || aiv > 127))
2960 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2961 "Character in 'c' format wrapped in pack");
2962 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2967 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2973 aiv = SvIV(fromstr);
2974 if ((0 > aiv || aiv > 0xff))
2975 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2976 "Character in 'C' format wrapped in pack");
2977 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2982 U8 in_bytes = (U8)IN_BYTES;
2984 end = start+SvLEN(cat)-1;
2985 if (utf8) end -= UTF8_MAXLEN-1;
2989 auv = SvUV(fromstr);
2990 if (in_bytes) auv = auv % 0x100;
2995 SvCUR_set(cat, cur - start);
2997 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2998 end = start+SvLEN(cat)-UTF8_MAXLEN;
3000 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3003 0 : UNICODE_ALLOW_ANY);
3008 SvCUR_set(cat, cur - start);
3009 marked_upgrade(aTHX_ cat, symptr);
3010 lookahead.flags |= FLAG_DO_UTF8;
3011 lookahead.strbeg = symptr->strbeg;
3014 cur = start + SvCUR(cat);
3015 end = start+SvLEN(cat)-UTF8_MAXLEN;
3018 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3019 "Character in 'W' format wrapped in pack");
3024 SvCUR_set(cat, cur - start);
3025 GROWING(0, cat, start, cur, len+1);
3026 end = start+SvLEN(cat)-1;
3028 *(U8 *) cur++ = (U8)auv;
3037 if (!(symptr->flags & FLAG_DO_UTF8)) {
3038 marked_upgrade(aTHX_ cat, symptr);
3039 lookahead.flags |= FLAG_DO_UTF8;
3040 lookahead.strbeg = symptr->strbeg;
3046 end = start+SvLEN(cat);
3047 if (!utf8) end -= UTF8_MAXLEN;
3051 auv = SvUV(fromstr);
3053 U8 buffer[UTF8_MAXLEN], *endb;
3054 endb = uvuni_to_utf8_flags(buffer, auv,
3056 0 : UNICODE_ALLOW_ANY);
3057 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3059 SvCUR_set(cat, cur - start);
3060 GROWING(0, cat, start, cur,
3061 len+(endb-buffer)*UTF8_EXPAND);
3062 end = start+SvLEN(cat);
3064 cur = bytes_to_uni(buffer, endb-buffer, cur);
3068 SvCUR_set(cat, cur - start);
3069 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3070 end = start+SvLEN(cat)-UTF8_MAXLEN;
3072 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3074 0 : UNICODE_ALLOW_ANY);
3079 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3085 anv = SvNV(fromstr);
3087 /* VOS does not automatically map a floating-point overflow
3088 during conversion from double to float into infinity, so we
3089 do it by hand. This code should either be generalized for
3090 any OS that needs it, or removed if and when VOS implements
3091 posix-976 (suggestion to support mapping to infinity).
3092 Paul.Green@stratus.com 02-04-02. */
3094 extern const float _float_constants[];
3096 afloat = _float_constants[0]; /* single prec. inf. */
3097 else if (anv < -FLT_MAX)
3098 afloat = _float_constants[0]; /* single prec. inf. */
3099 else afloat = (float) anv;
3102 # if defined(VMS) && !defined(__IEEE_FP)
3103 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3104 * on Alpha; fake it if we don't have them.
3108 else if (anv < -FLT_MAX)
3110 else afloat = (float)anv;
3112 afloat = (float)anv;
3114 #endif /* __VOS__ */
3115 DO_BO_PACK_N(afloat, float);
3116 PUSH_VAR(utf8, cur, afloat);
3124 anv = SvNV(fromstr);
3126 /* VOS does not automatically map a floating-point overflow
3127 during conversion from long double to double into infinity,
3128 so we do it by hand. This code should either be generalized
3129 for any OS that needs it, or removed if and when VOS
3130 implements posix-976 (suggestion to support mapping to
3131 infinity). Paul.Green@stratus.com 02-04-02. */
3133 extern const double _double_constants[];
3135 adouble = _double_constants[0]; /* double prec. inf. */
3136 else if (anv < -DBL_MAX)
3137 adouble = _double_constants[0]; /* double prec. inf. */
3138 else adouble = (double) anv;
3141 # if defined(VMS) && !defined(__IEEE_FP)
3142 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3143 * on Alpha; fake it if we don't have them.
3147 else if (anv < -DBL_MAX)
3149 else adouble = (double)anv;
3151 adouble = (double)anv;
3153 #endif /* __VOS__ */
3154 DO_BO_PACK_N(adouble, double);
3155 PUSH_VAR(utf8, cur, adouble);
3160 Zero(&anv, 1, NV); /* can be long double with unused bits */
3163 anv = SvNV(fromstr);
3164 DO_BO_PACK_N(anv, NV);
3165 PUSH_VAR(utf8, cur, anv);
3169 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3171 long double aldouble;
3172 /* long doubles can have unused bits, which may be nonzero */
3173 Zero(&aldouble, 1, long double);
3176 aldouble = (long double)SvNV(fromstr);
3177 DO_BO_PACK_N(aldouble, long double);
3178 PUSH_VAR(utf8, cur, aldouble);
3183 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3184 case 'n' | TYPE_IS_SHRIEKING:
3190 ai16 = (I16)SvIV(fromstr);
3192 ai16 = PerlSock_htons(ai16);
3194 PUSH16(utf8, cur, &ai16);
3197 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3198 case 'v' | TYPE_IS_SHRIEKING:
3204 ai16 = (I16)SvIV(fromstr);
3208 PUSH16(utf8, cur, &ai16);
3211 case 'S' | TYPE_IS_SHRIEKING:
3212 #if SHORTSIZE != SIZE16
3214 unsigned short aushort;
3216 aushort = SvUV(fromstr);
3217 DO_BO_PACK(aushort, s);
3218 PUSH_VAR(utf8, cur, aushort);
3228 au16 = (U16)SvUV(fromstr);
3229 DO_BO_PACK(au16, 16);
3230 PUSH16(utf8, cur, &au16);
3233 case 's' | TYPE_IS_SHRIEKING:
3234 #if SHORTSIZE != SIZE16
3238 ashort = SvIV(fromstr);
3239 DO_BO_PACK(ashort, s);
3240 PUSH_VAR(utf8, cur, ashort);
3250 ai16 = (I16)SvIV(fromstr);
3251 DO_BO_PACK(ai16, 16);
3252 PUSH16(utf8, cur, &ai16);
3256 case 'I' | TYPE_IS_SHRIEKING:
3260 auint = SvUV(fromstr);
3261 DO_BO_PACK(auint, i);
3262 PUSH_VAR(utf8, cur, auint);
3269 aiv = SvIV(fromstr);
3270 #if IVSIZE == INTSIZE
3272 #elif IVSIZE == LONGSIZE
3274 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3275 DO_BO_PACK(aiv, 64);
3277 Perl_croak(aTHX_ "'j' not supported on this platform");
3279 PUSH_VAR(utf8, cur, aiv);
3286 auv = SvUV(fromstr);
3287 #if UVSIZE == INTSIZE
3289 #elif UVSIZE == LONGSIZE
3291 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3292 DO_BO_PACK(auv, 64);
3294 Perl_croak(aTHX_ "'J' not supported on this platform");
3296 PUSH_VAR(utf8, cur, auv);
3303 anv = SvNV(fromstr);
3307 SvCUR_set(cat, cur - start);
3308 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3311 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3312 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3313 any negative IVs will have already been got by the croak()
3314 above. IOK is untrue for fractions, so we test them
3315 against UV_MAX_P1. */
3316 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3317 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3318 char *in = buf + sizeof(buf);
3319 UV auv = SvUV(fromstr);
3322 *--in = (char)((auv & 0x7f) | 0x80);
3325 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3326 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3327 in, (buf + sizeof(buf)) - in);
3328 } else if (SvPOKp(fromstr))
3330 else if (SvNOKp(fromstr)) {
3331 /* 10**NV_MAX_10_EXP is the largest power of 10
3332 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3333 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3334 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3335 And with that many bytes only Inf can overflow.
3336 Some C compilers are strict about integral constant
3337 expressions so we conservatively divide by a slightly
3338 smaller integer instead of multiplying by the exact
3339 floating-point value.
3341 #ifdef NV_MAX_10_EXP
3342 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3343 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3345 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3346 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3348 char *in = buf + sizeof(buf);
3350 anv = Perl_floor(anv);
3352 const NV next = Perl_floor(anv / 128);
3353 if (in <= buf) /* this cannot happen ;-) */
3354 Perl_croak(aTHX_ "Cannot compress integer in pack");
3355 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3358 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3359 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3360 in, (buf + sizeof(buf)) - in);
3369 /* Copy string and check for compliance */
3370 from = SvPV_const(fromstr, len);
3371 if ((norm = is_an_int(from, len)) == NULL)
3372 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3374 Newx(result, len, char);
3377 while (!done) *--in = div128(norm, &done) | 0x80;
3378 result[len - 1] &= 0x7F; /* clear continue bit */
3379 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3380 in, (result + len) - in);
3382 SvREFCNT_dec(norm); /* free norm */
3387 case 'i' | TYPE_IS_SHRIEKING:
3391 aint = SvIV(fromstr);
3392 DO_BO_PACK(aint, i);
3393 PUSH_VAR(utf8, cur, aint);
3396 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3397 case 'N' | TYPE_IS_SHRIEKING:
3403 au32 = SvUV(fromstr);
3405 au32 = PerlSock_htonl(au32);
3407 PUSH32(utf8, cur, &au32);
3410 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3411 case 'V' | TYPE_IS_SHRIEKING:
3417 au32 = SvUV(fromstr);
3421 PUSH32(utf8, cur, &au32);
3424 case 'L' | TYPE_IS_SHRIEKING:
3425 #if LONGSIZE != SIZE32
3427 unsigned long aulong;
3429 aulong = SvUV(fromstr);
3430 DO_BO_PACK(aulong, l);
3431 PUSH_VAR(utf8, cur, aulong);
3441 au32 = SvUV(fromstr);
3442 DO_BO_PACK(au32, 32);
3443 PUSH32(utf8, cur, &au32);
3446 case 'l' | TYPE_IS_SHRIEKING:
3447 #if LONGSIZE != SIZE32
3451 along = SvIV(fromstr);
3452 DO_BO_PACK(along, l);
3453 PUSH_VAR(utf8, cur, along);
3463 ai32 = SvIV(fromstr);
3464 DO_BO_PACK(ai32, 32);
3465 PUSH32(utf8, cur, &ai32);
3473 auquad = (Uquad_t) SvUV(fromstr);
3474 DO_BO_PACK(auquad, 64);
3475 PUSH_VAR(utf8, cur, auquad);
3482 aquad = (Quad_t)SvIV(fromstr);
3483 DO_BO_PACK(aquad, 64);
3484 PUSH_VAR(utf8, cur, aquad);
3487 #endif /* HAS_QUAD */
3489 len = 1; /* assume SV is correct length */
3490 GROWING(utf8, cat, start, cur, sizeof(char *));
3497 SvGETMAGIC(fromstr);
3498 if (!SvOK(fromstr)) aptr = NULL;
3500 /* XXX better yet, could spirit away the string to
3501 * a safe spot and hang on to it until the result
3502 * of pack() (and all copies of the result) are
3505 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3506 !SvREADONLY(fromstr)))) {
3507 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3508 "Attempt to pack pointer to temporary value");
3510 if (SvPOK(fromstr) || SvNIOK(fromstr))
3511 aptr = SvPV_nomg_const_nolen(fromstr);
3513 aptr = SvPV_force_flags_nolen(fromstr, 0);
3515 DO_BO_PACK_PC(aptr);
3516 PUSH_VAR(utf8, cur, aptr);
3520 const char *aptr, *aend;
3524 if (len <= 2) len = 45;
3525 else len = len / 3 * 3;
3527 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3528 "Field too wide in 'u' format in pack");
3531 aptr = SvPV_const(fromstr, fromlen);
3532 from_utf8 = DO_UTF8(fromstr);
3534 aend = aptr + fromlen;
3535 fromlen = sv_len_utf8(fromstr);
3536 } else aend = NULL; /* Unused, but keep compilers happy */
3537 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3538 while (fromlen > 0) {
3541 U8 hunk[1+63/3*4+1];
3543 if ((I32)fromlen > len)
3549 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3550 'u' | TYPE_IS_PACK)) {
3552 SvCUR_set(cat, cur - start);
3553 Perl_croak(aTHX_ "panic: string is shorter than advertised");
3555 end = doencodes(hunk, buffer, todo);
3557 end = doencodes(hunk, aptr, todo);
3560 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3567 SvCUR_set(cat, cur - start);
3569 *symptr = lookahead;
3578 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3579 register SV *cat = TARG;
3581 SV *pat_sv = *++MARK;
3582 register const char *pat = SvPV_const(pat_sv, fromlen);
3583 register const char *patend = pat + fromlen;
3589 packlist(cat, pat, patend, MARK, SP + 1);
3599 * c-indentation-style: bsd
3601 * indent-tabs-mode: t
3604 * ex: set ts=8 sts=4 sw=4 noet: