3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
74 # define PERL_PACK_CAN_BYTEORDER
75 # define PERL_PACK_CAN_SHRIEKSIGN
81 /* Maximum number of bytes to which a byte can grow due to upgrade */
85 * Offset for integer pack/unpack.
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
105 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
108 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
109 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
110 # define OFF16(p) ((char*)(p))
111 # define OFF32(p) ((char*)(p))
113 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
114 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
117 ++++ bad cray byte order
121 # define OFF16(p) ((char *) (p))
122 # define OFF32(p) ((char *) (p))
125 /* Only to be used inside a loop (see the break) */
126 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
130 Copy(s, OFF16(p), SIZE16, char); \
135 /* Only to be used inside a loop (see the break) */
136 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
140 Copy(s, OFF32(p), SIZE32, char); \
145 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
148 /* Only to be used inside a loop (see the break) */
149 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
155 Copy(s, (char *) &var, sizeof(var), char); \
160 #define PUSH_VAR(utf8, aptr, var) \
161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
163 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164 #define MAX_SUB_TEMPLATE_LEVEL 100
166 /* flags (note that type modifiers can also be used as flags!) */
167 #define FLAG_WAS_UTF8 0x40
168 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
169 #define FLAG_UNPACK_ONLY_ONE 0x10
170 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
171 #define FLAG_SLASH 0x04
172 #define FLAG_COMMA 0x02
173 #define FLAG_PACK 0x01
176 S_mul128(pTHX_ SV *sv, U8 m)
179 char *s = SvPV(sv, len);
182 PERL_ARGS_ASSERT_MUL128;
184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
185 SV * const tmpNew = newSVpvs("0000000000");
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
193 while (!*t) /* trailing '\0'? */
196 const U32 i = ((*t - '0') << 7) + m;
197 *(t--) = '0' + (char)(i % 10);
203 /* Explosives and implosives. */
205 #if 'I' == 73 && 'J' == 74
206 /* On an ASCII/ISO kind of system */
207 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
210 Some other sort of character set - use memchr() so we don't match
213 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
217 #define TYPE_IS_SHRIEKING 0x100
218 #define TYPE_IS_BIG_ENDIAN 0x200
219 #define TYPE_IS_LITTLE_ENDIAN 0x400
220 #define TYPE_IS_PACK 0x800
221 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
222 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
223 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
225 #ifdef PERL_PACK_CAN_SHRIEKSIGN
226 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
228 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
231 #ifndef PERL_PACK_CAN_BYTEORDER
232 /* Put "can't" first because it is shorter */
233 # define TYPE_ENDIANNESS(t) 0
234 # define TYPE_NO_ENDIANNESS(t) (t)
236 # define ENDIANNESS_ALLOWED_TYPES ""
238 # define DO_BO_UNPACK(var, type)
239 # define DO_BO_PACK(var, type)
240 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
242 # define DO_BO_UNPACK_N(var, type)
243 # define DO_BO_PACK_N(var, type)
244 # define DO_BO_UNPACK_P(var)
245 # define DO_BO_PACK_P(var)
246 # define DO_BO_UNPACK_PC(var)
247 # define DO_BO_PACK_PC(var)
249 #else /* PERL_PACK_CAN_BYTEORDER */
251 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
254 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
256 # define DO_BO_UNPACK(var, type) \
258 switch (TYPE_ENDIANNESS(datumtype)) { \
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
265 # define DO_BO_PACK(var, type) \
267 switch (TYPE_ENDIANNESS(datumtype)) { \
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
274 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
276 switch (TYPE_ENDIANNESS(datumtype)) { \
277 case TYPE_IS_BIG_ENDIAN: \
278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
280 case TYPE_IS_LITTLE_ENDIAN: \
281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
288 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
290 switch (TYPE_ENDIANNESS(datumtype)) { \
291 case TYPE_IS_BIG_ENDIAN: \
292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
294 case TYPE_IS_LITTLE_ENDIAN: \
295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
302 # define BO_CANT_DOIT(action, type) \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
318 # if PTRSIZE == INTSIZE
319 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
323 # elif PTRSIZE == LONGSIZE
324 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
325 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
326 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
327 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
328 # elif PTRSIZE == IVSIZE
329 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
330 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
331 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
332 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
334 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
335 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
336 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
337 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
340 # if defined(my_htolen) && defined(my_letohn) && \
341 defined(my_htoben) && defined(my_betohn)
342 # define DO_BO_UNPACK_N(var, type) \
344 switch (TYPE_ENDIANNESS(datumtype)) { \
345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
351 # define DO_BO_PACK_N(var, type) \
353 switch (TYPE_ENDIANNESS(datumtype)) { \
354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
360 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
361 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
364 #endif /* PERL_PACK_CAN_BYTEORDER */
366 #define PACK_SIZE_CANNOT_CSUM 0x80
367 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
368 #define PACK_SIZE_MASK 0x3F
370 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371 in). You're unlikely ever to need to regenerate them. */
373 #if TYPE_IS_SHRIEKING != 0x100
374 ++++shriek offset should be 256
377 typedef U8 packprops_t;
380 STATIC const packprops_t packprops[512] = {
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 /* C */ sizeof(unsigned char),
388 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
389 /* D */ LONG_DOUBLESIZE,
396 /* I */ sizeof(unsigned int),
403 #if defined(HAS_QUAD)
404 /* Q */ sizeof(Uquad_t),
411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* c */ sizeof(char),
416 /* d */ sizeof(double),
418 /* f */ sizeof(float),
427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
428 #if defined(HAS_QUAD)
429 /* q */ sizeof(Quad_t),
437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* I */ sizeof(unsigned int),
455 /* L */ sizeof(unsigned long),
457 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
463 /* S */ sizeof(unsigned short),
465 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 /* l */ sizeof(long),
476 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
482 /* s */ sizeof(short),
484 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0
500 /* EBCDIC (or bust) */
501 STATIC const packprops_t packprops[512] = {
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 /* c */ sizeof(char),
513 /* d */ sizeof(double),
515 /* f */ sizeof(float),
525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
526 #if defined(HAS_QUAD)
527 /* q */ sizeof(Quad_t),
531 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 /* C */ sizeof(unsigned char),
539 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
540 /* D */ LONG_DOUBLESIZE,
547 /* I */ sizeof(unsigned int),
555 #if defined(HAS_QUAD)
556 /* Q */ sizeof(Uquad_t),
560 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* l */ sizeof(long),
582 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* s */ sizeof(short),
590 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 /* I */ sizeof(unsigned int),
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* L */ sizeof(unsigned long),
602 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 /* S */ sizeof(unsigned short),
610 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
621 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
626 /* We try to process malformed UTF-8 as much as possible (preferrably with
627 warnings), but these two mean we make no progress in the string and
628 might enter an infinite loop */
629 if (retlen == (STRLEN) -1 || retlen == 0)
630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 (int) TYPE_NO_MODIFIERS(datumtype));
633 if (ckWARN(WARN_UNPACK))
634 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
635 "Character in '%c' format wrapped in unpack",
636 (int) TYPE_NO_MODIFIERS(datumtype));
643 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
644 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
648 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
652 const char *from = *s;
654 const U32 flags = ckWARN(WARN_UTF8) ?
655 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
656 for (;buf_len > 0; buf_len--) {
657 if (from >= end) return FALSE;
658 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
659 if (retlen == (STRLEN) -1 || retlen == 0) {
660 from += UTF8SKIP(from);
662 } else from += retlen;
667 *(U8 *)buf++ = (U8)val;
669 /* We have enough characters for the buffer. Did we have problems ? */
672 /* Rewalk the string fragment while warning */
674 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
675 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
676 if (ptr >= end) break;
677 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
679 if (from > end) from = end;
681 if ((bad & 2) && ckWARN(WARN_UNPACK))
682 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
683 WARN_PACK : WARN_UNPACK),
684 "Character(s) in '%c' format wrapped in %s",
685 (int) TYPE_NO_MODIFIERS(datumtype),
686 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
693 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
697 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
698 if (val >= 0x100 || !ISUUCHAR(val) ||
699 retlen == (STRLEN) -1 || retlen == 0) {
703 *out = PL_uudmap[val] & 077;
709 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
710 const U8 * const end = start + len;
712 PERL_ARGS_ASSERT_BYTES_TO_UNI;
714 while (start < end) {
715 const UV uv = NATIVE_TO_ASCII(*start);
716 if (UNI_IS_INVARIANT(uv))
717 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
719 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
720 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
727 #define PUSH_BYTES(utf8, cur, buf, len) \
730 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
732 Copy(buf, cur, len, char); \
737 #define GROWING(utf8, cat, start, cur, in_len) \
739 STRLEN glen = (in_len); \
740 if (utf8) glen *= UTF8_EXPAND; \
741 if ((cur) + glen >= (start) + SvLEN(cat)) { \
742 (start) = sv_exp_grow(cat, glen); \
743 (cur) = (start) + SvCUR(cat); \
747 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
749 const STRLEN glen = (in_len); \
751 if (utf8) gl *= UTF8_EXPAND; \
752 if ((cur) + gl >= (start) + SvLEN(cat)) { \
754 SvCUR_set((cat), (cur) - (start)); \
755 (start) = sv_exp_grow(cat, gl); \
756 (cur) = (start) + SvCUR(cat); \
758 PUSH_BYTES(utf8, cur, buf, glen); \
761 #define PUSH_BYTE(utf8, s, byte) \
764 const U8 au8 = (byte); \
765 (s) = bytes_to_uni(&au8, 1, (s)); \
766 } else *(U8 *)(s)++ = (byte); \
769 /* Only to be used inside a loop (see the break) */
770 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
773 if (str >= end) break; \
774 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
775 if (retlen == (STRLEN) -1 || retlen == 0) { \
777 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
782 static const char *_action( const tempsym_t* symptr )
784 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
787 /* Returns the sizeof() struct described by pat */
789 S_measure_struct(pTHX_ tempsym_t* symptr)
793 PERL_ARGS_ASSERT_MEASURE_STRUCT;
795 while (next_symbol(symptr)) {
799 switch (symptr->howlen) {
801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
805 /* e_no_len and e_number */
806 len = symptr->length;
810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
819 #ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
826 case 'U': /* XXXX Is it correct? */
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
830 (int) TYPE_NO_MODIFIERS(symptr->code),
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
851 if (!len) /* Avoid division by 0 */
853 len = total % len; /* Assumed: the start is aligned. */
858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
888 size = sizeof(char*);
898 /* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
902 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
904 PERL_ARGS_ASSERT_GROUP_END;
906 while (patptr < patend) {
907 const char c = *patptr++;
914 while (patptr < patend && *patptr != '\n')
918 patptr = group_end(patptr, patend, ')') + 1;
920 patptr = group_end(patptr, patend, ']') + 1;
922 Perl_croak(aTHX_ "No group ending character '%c' found in template",
928 /* Convert unsigned decimal number to binary.
929 * Expects a pointer to the first digit and address of length variable
930 * Advances char pointer to 1st non-digit char and returns number
933 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
935 I32 len = *patptr++ - '0';
937 PERL_ARGS_ASSERT_GET_NUM;
939 while (isDIGIT(*patptr)) {
940 if (len >= 0x7FFFFFFF/10)
941 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
942 len = (len * 10) + (*patptr++ - '0');
948 /* The marvellous template parsing routine: Using state stored in *symptr,
949 * locates next template code and count
952 S_next_symbol(pTHX_ tempsym_t* symptr )
954 const char* patptr = symptr->patptr;
955 const char* const patend = symptr->patend;
957 PERL_ARGS_ASSERT_NEXT_SYMBOL;
959 symptr->flags &= ~FLAG_SLASH;
961 while (patptr < patend) {
962 if (isSPACE(*patptr))
964 else if (*patptr == '#') {
966 while (patptr < patend && *patptr != '\n')
971 /* We should have found a template code */
972 I32 code = *patptr++ & 0xFF;
973 U32 inherited_modifiers = 0;
975 if (code == ','){ /* grandfather in commas but with a warning */
976 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
977 symptr->flags |= FLAG_COMMA;
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Invalid type ',' in %s", _action( symptr ) );
984 /* for '(', skip to ')' */
986 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
987 Perl_croak(aTHX_ "()-group starts with a count in %s",
989 symptr->grpbeg = patptr;
990 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
991 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
992 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
996 /* look for group modifiers to inherit */
997 if (TYPE_ENDIANNESS(symptr->flags)) {
998 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
999 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1002 /* look for modifiers */
1003 while (patptr < patend) {
1004 const char *allowed;
1008 modifier = TYPE_IS_SHRIEKING;
1009 allowed = SHRIEKING_ALLOWED_TYPES;
1011 #ifdef PERL_PACK_CAN_BYTEORDER
1013 modifier = TYPE_IS_BIG_ENDIAN;
1014 allowed = ENDIANNESS_ALLOWED_TYPES;
1017 modifier = TYPE_IS_LITTLE_ENDIAN;
1018 allowed = ENDIANNESS_ALLOWED_TYPES;
1020 #endif /* PERL_PACK_CAN_BYTEORDER */
1030 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1031 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1032 allowed, _action( symptr ) );
1034 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1035 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1036 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1037 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1038 TYPE_ENDIANNESS_MASK)
1039 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1040 *patptr, _action( symptr ) );
1042 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1043 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1044 "Duplicate modifier '%c' after '%c' in %s",
1045 *patptr, (int) TYPE_NO_MODIFIERS(code),
1046 _action( symptr ) );
1053 /* inherit modifiers */
1054 code |= inherited_modifiers;
1056 /* look for count and/or / */
1057 if (patptr < patend) {
1058 if (isDIGIT(*patptr)) {
1059 patptr = get_num( patptr, &symptr->length );
1060 symptr->howlen = e_number;
1062 } else if (*patptr == '*') {
1064 symptr->howlen = e_star;
1066 } else if (*patptr == '[') {
1067 const char* lenptr = ++patptr;
1068 symptr->howlen = e_number;
1069 patptr = group_end( patptr, patend, ']' ) + 1;
1070 /* what kind of [] is it? */
1071 if (isDIGIT(*lenptr)) {
1072 lenptr = get_num( lenptr, &symptr->length );
1073 if( *lenptr != ']' )
1074 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1075 _action( symptr ) );
1077 tempsym_t savsym = *symptr;
1078 symptr->patend = patptr-1;
1079 symptr->patptr = lenptr;
1080 savsym.length = measure_struct(symptr);
1084 symptr->howlen = e_no_len;
1089 while (patptr < patend) {
1090 if (isSPACE(*patptr))
1092 else if (*patptr == '#') {
1094 while (patptr < patend && *patptr != '\n')
1096 if (patptr < patend)
1099 if (*patptr == '/') {
1100 symptr->flags |= FLAG_SLASH;
1102 if (patptr < patend &&
1103 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1104 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1105 _action( symptr ) );
1111 /* at end - no count, no / */
1112 symptr->howlen = e_no_len;
1116 symptr->code = code;
1117 symptr->patptr = patptr;
1121 symptr->patptr = patptr;
1126 There is no way to cleanly handle the case where we should process the
1127 string per byte in its upgraded form while it's really in downgraded form
1128 (e.g. estimates like strend-s as an upper bound for the number of
1129 characters left wouldn't work). So if we foresee the need of this
1130 (pattern starts with U or contains U0), we want to work on the encoded
1131 version of the string. Users are advised to upgrade their pack string
1132 themselves if they need to do a lot of unpacks like this on it
1135 need_utf8(const char *pat, const char *patend)
1139 PERL_ARGS_ASSERT_NEED_UTF8;
1141 while (pat < patend) {
1142 if (pat[0] == '#') {
1144 pat = (const char *) memchr(pat, '\n', patend-pat);
1145 if (!pat) return FALSE;
1146 } else if (pat[0] == 'U') {
1147 if (first || pat[1] == '0') return TRUE;
1148 } else first = FALSE;
1155 first_symbol(const char *pat, const char *patend) {
1156 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1158 while (pat < patend) {
1159 if (pat[0] != '#') return pat[0];
1161 pat = (const char *) memchr(pat, '\n', patend-pat);
1169 =for apidoc unpackstring
1171 The engine implementing unpack() Perl function. C<unpackstring> puts the
1172 extracted list items on the stack and returns the number of elements.
1173 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1178 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1182 PERL_ARGS_ASSERT_UNPACKSTRING;
1184 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1185 else if (need_utf8(pat, patend)) {
1186 /* We probably should try to avoid this in case a scalar context call
1187 wouldn't get to the "U0" */
1188 STRLEN len = strend - s;
1189 s = (char *) bytes_to_utf8((U8 *) s, &len);
1192 flags |= FLAG_DO_UTF8;
1195 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1196 flags |= FLAG_PARSE_UTF8;
1198 TEMPSYM_INIT(&sym, pat, patend, flags);
1200 return unpack_rec(&sym, s, s, strend, NULL );
1204 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1208 const I32 start_sp_offset = SP - PL_stack_base;
1213 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1214 bool beyond = FALSE;
1215 bool explicit_length;
1216 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1217 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1219 PERL_ARGS_ASSERT_UNPACK_REC;
1221 symptr->strbeg = s - strbeg;
1223 while (next_symbol(symptr)) {
1226 I32 datumtype = symptr->code;
1227 /* do first one only unless in list context
1228 / is implemented by unpacking the count, then popping it from the
1229 stack, so must check that we're not in the middle of a / */
1230 if ( unpack_only_one
1231 && (SP - PL_stack_base == start_sp_offset + 1)
1232 && (datumtype != '/') ) /* XXX can this be omitted */
1235 switch (howlen = symptr->howlen) {
1237 len = strend - strbeg; /* long enough */
1240 /* e_no_len and e_number */
1241 len = symptr->length;
1245 explicit_length = TRUE;
1247 beyond = s >= strend;
1249 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1251 /* props nonzero means we can process this letter. */
1252 const long size = props & PACK_SIZE_MASK;
1253 const long howmany = (strend - s) / size;
1257 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1258 if (len && unpack_only_one) len = 1;
1264 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1266 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1269 if (howlen == e_no_len)
1270 len = 16; /* len is not specified */
1278 tempsym_t savsym = *symptr;
1279 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1280 symptr->flags |= group_modifiers;
1281 symptr->patend = savsym.grpend;
1282 symptr->previous = &savsym;
1285 if (len && unpack_only_one) len = 1;
1287 symptr->patptr = savsym.grpbeg;
1288 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1289 else symptr->flags &= ~FLAG_PARSE_UTF8;
1290 unpack_rec(symptr, s, strbeg, strend, &s);
1291 if (s == strend && savsym.howlen == e_star)
1292 break; /* No way to continue */
1295 savsym.flags = symptr->flags & ~group_modifiers;
1299 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 case '.' | TYPE_IS_SHRIEKING:
1305 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1307 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1308 const bool u8 = utf8;
1310 if (howlen == e_star) from = strbeg;
1311 else if (len <= 0) from = s;
1313 tempsym_t *group = symptr;
1315 while (--len && group) group = group->previous;
1316 from = group ? strbeg + group->strbeg : strbeg;
1319 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1320 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1324 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1325 case '@' | TYPE_IS_SHRIEKING:
1328 s = strbeg + symptr->strbeg;
1329 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1330 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1331 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1337 Perl_croak(aTHX_ "'@' outside of string in unpack");
1342 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1345 Perl_croak(aTHX_ "'@' outside of string in unpack");
1349 case 'X' | TYPE_IS_SHRIEKING:
1350 if (!len) /* Avoid division by 0 */
1353 const char *hop, *last;
1355 hop = last = strbeg;
1357 hop += UTF8SKIP(hop);
1364 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1368 len = (s - strbeg) % len;
1374 Perl_croak(aTHX_ "'X' outside of string in unpack");
1375 while (--s, UTF8_IS_CONTINUATION(*s)) {
1377 Perl_croak(aTHX_ "'X' outside of string in unpack");
1382 if (len > s - strbeg)
1383 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1387 case 'x' | TYPE_IS_SHRIEKING: {
1389 if (!len) /* Avoid division by 0 */
1391 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1392 else ai32 = (s - strbeg) % len;
1393 if (ai32 == 0) break;
1401 Perl_croak(aTHX_ "'x' outside of string in unpack");
1406 if (len > strend - s)
1407 Perl_croak(aTHX_ "'x' outside of string in unpack");
1412 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1418 /* Preliminary length estimate is assumed done in 'W' */
1419 if (len > strend - s) len = strend - s;
1425 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1426 if (hop >= strend) {
1428 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1435 } else if (len > strend - s)
1438 if (datumtype == 'Z') {
1439 /* 'Z' strips stuff after first null */
1440 const char *ptr, *end;
1442 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1443 sv = newSVpvn(s, ptr-s);
1444 if (howlen == e_star) /* exact for 'Z*' */
1445 len = ptr-s + (ptr != strend ? 1 : 0);
1446 } else if (datumtype == 'A') {
1447 /* 'A' strips both nulls and spaces */
1449 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1450 for (ptr = s+len-1; ptr >= s; ptr--)
1451 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1452 !is_utf8_space((U8 *) ptr)) break;
1453 if (ptr >= s) ptr += UTF8SKIP(ptr);
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1458 for (ptr = s+len-1; ptr >= s; ptr--)
1459 if (*ptr != 0 && !isSPACE(*ptr)) break;
1462 sv = newSVpvn(s, ptr-s);
1463 } else sv = newSVpvn(s, len);
1467 /* Undo any upgrade done due to need_utf8() */
1468 if (!(symptr->flags & FLAG_WAS_UTF8))
1469 sv_utf8_downgrade(sv, 0);
1477 if (howlen == e_star || len > (strend - s) * 8)
1478 len = (strend - s) * 8;
1482 Newxz(PL_bitcount, 256, char);
1483 for (bits = 1; bits < 256; bits++) {
1484 if (bits & 1) PL_bitcount[bits]++;
1485 if (bits & 2) PL_bitcount[bits]++;
1486 if (bits & 4) PL_bitcount[bits]++;
1487 if (bits & 8) PL_bitcount[bits]++;
1488 if (bits & 16) PL_bitcount[bits]++;
1489 if (bits & 32) PL_bitcount[bits]++;
1490 if (bits & 64) PL_bitcount[bits]++;
1491 if (bits & 128) PL_bitcount[bits]++;
1495 while (len >= 8 && s < strend) {
1496 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1501 cuv += PL_bitcount[*(U8 *)s++];
1504 if (len && s < strend) {
1506 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1507 if (datumtype == 'b')
1509 if (bits & 1) cuv++;
1514 if (bits & 0x80) cuv++;
1521 sv = sv_2mortal(newSV(len ? len : 1));
1524 if (datumtype == 'b') {
1526 const I32 ai32 = len;
1527 for (len = 0; len < ai32; len++) {
1528 if (len & 7) bits >>= 1;
1530 if (s >= strend) break;
1531 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1532 } else bits = *(U8 *) s++;
1533 *str++ = bits & 1 ? '1' : '0';
1537 const I32 ai32 = len;
1538 for (len = 0; len < ai32; len++) {
1539 if (len & 7) bits <<= 1;
1541 if (s >= strend) break;
1542 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1543 } else bits = *(U8 *) s++;
1544 *str++ = bits & 0x80 ? '1' : '0';
1548 SvCUR_set(sv, str - SvPVX_const(sv));
1555 /* Preliminary length estimate, acceptable for utf8 too */
1556 if (howlen == e_star || len > (strend - s) * 2)
1557 len = (strend - s) * 2;
1558 sv = sv_2mortal(newSV(len ? len : 1));
1561 if (datumtype == 'h') {
1564 for (len = 0; len < ai32; len++) {
1565 if (len & 1) bits >>= 4;
1567 if (s >= strend) break;
1568 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1569 } else bits = * (U8 *) s++;
1570 *str++ = PL_hexdigit[bits & 15];
1574 const I32 ai32 = len;
1575 for (len = 0; len < ai32; len++) {
1576 if (len & 1) bits <<= 4;
1578 if (s >= strend) break;
1579 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1580 } else bits = *(U8 *) s++;
1581 *str++ = PL_hexdigit[(bits >> 4) & 15];
1585 SvCUR_set(sv, str - SvPVX_const(sv));
1591 if (explicit_length)
1592 /* Switch to "character" mode */
1593 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1598 while (len-- > 0 && s < strend) {
1603 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1604 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1605 if (retlen == (STRLEN) -1 || retlen == 0)
1606 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1610 aint = *(U8 *)(s)++;
1611 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1615 else if (checksum > bits_in_uv)
1616 cdouble += (NV)aint;
1624 while (len-- > 0 && s < strend) {
1626 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1627 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1628 if (retlen == (STRLEN) -1 || retlen == 0)
1629 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1633 else if (checksum > bits_in_uv)
1634 cdouble += (NV) val;
1638 } else if (!checksum)
1640 const U8 ch = *(U8 *) s++;
1643 else if (checksum > bits_in_uv)
1644 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1646 while (len-- > 0) cuv += *(U8 *) s++;
1650 if (explicit_length) {
1651 /* Switch to "bytes in UTF-8" mode */
1652 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1654 /* Should be impossible due to the need_utf8() test */
1655 Perl_croak(aTHX_ "U0 mode on a byte string");
1659 if (len > strend - s) len = strend - s;
1661 if (len && unpack_only_one) len = 1;
1665 while (len-- > 0 && s < strend) {
1669 U8 result[UTF8_MAXLEN];
1670 const char *ptr = s;
1672 /* Bug: warns about bad utf8 even if we are short on bytes
1673 and will break out of the loop */
1674 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1677 len = UTF8SKIP(result);
1678 if (!uni_to_bytes(aTHX_ &ptr, strend,
1679 (char *) &result[1], len-1, 'U')) break;
1680 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1683 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1684 if (retlen == (STRLEN) -1 || retlen == 0)
1685 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1690 else if (checksum > bits_in_uv)
1691 cdouble += (NV) auv;
1696 case 's' | TYPE_IS_SHRIEKING:
1697 #if SHORTSIZE != SIZE16
1700 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1701 DO_BO_UNPACK(ashort, s);
1704 else if (checksum > bits_in_uv)
1705 cdouble += (NV)ashort;
1717 #if U16SIZE > SIZE16
1720 SHIFT16(utf8, s, strend, &ai16, datumtype);
1721 DO_BO_UNPACK(ai16, 16);
1722 #if U16SIZE > SIZE16
1728 else if (checksum > bits_in_uv)
1729 cdouble += (NV)ai16;
1734 case 'S' | TYPE_IS_SHRIEKING:
1735 #if SHORTSIZE != SIZE16
1737 unsigned short aushort;
1738 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1739 DO_BO_UNPACK(aushort, s);
1742 else if (checksum > bits_in_uv)
1743 cdouble += (NV)aushort;
1756 #if U16SIZE > SIZE16
1759 SHIFT16(utf8, s, strend, &au16, datumtype);
1760 DO_BO_UNPACK(au16, 16);
1762 if (datumtype == 'n')
1763 au16 = PerlSock_ntohs(au16);
1766 if (datumtype == 'v')
1771 else if (checksum > bits_in_uv)
1772 cdouble += (NV) au16;
1777 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1778 case 'v' | TYPE_IS_SHRIEKING:
1779 case 'n' | TYPE_IS_SHRIEKING:
1782 # if U16SIZE > SIZE16
1785 SHIFT16(utf8, s, strend, &ai16, datumtype);
1787 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1788 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1789 # endif /* HAS_NTOHS */
1791 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1792 ai16 = (I16) vtohs((U16) ai16);
1793 # endif /* HAS_VTOHS */
1796 else if (checksum > bits_in_uv)
1797 cdouble += (NV) ai16;
1802 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1804 case 'i' | TYPE_IS_SHRIEKING:
1807 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1808 DO_BO_UNPACK(aint, i);
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV)aint;
1818 case 'I' | TYPE_IS_SHRIEKING:
1821 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1822 DO_BO_UNPACK(auint, i);
1825 else if (checksum > bits_in_uv)
1826 cdouble += (NV)auint;
1834 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1835 #if IVSIZE == INTSIZE
1836 DO_BO_UNPACK(aiv, i);
1837 #elif IVSIZE == LONGSIZE
1838 DO_BO_UNPACK(aiv, l);
1839 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1840 DO_BO_UNPACK(aiv, 64);
1842 Perl_croak(aTHX_ "'j' not supported on this platform");
1846 else if (checksum > bits_in_uv)
1855 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1856 #if IVSIZE == INTSIZE
1857 DO_BO_UNPACK(auv, i);
1858 #elif IVSIZE == LONGSIZE
1859 DO_BO_UNPACK(auv, l);
1860 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1861 DO_BO_UNPACK(auv, 64);
1863 Perl_croak(aTHX_ "'J' not supported on this platform");
1867 else if (checksum > bits_in_uv)
1873 case 'l' | TYPE_IS_SHRIEKING:
1874 #if LONGSIZE != SIZE32
1877 SHIFT_VAR(utf8, s, strend, along, datumtype);
1878 DO_BO_UNPACK(along, l);
1881 else if (checksum > bits_in_uv)
1882 cdouble += (NV)along;
1893 #if U32SIZE > SIZE32
1896 SHIFT32(utf8, s, strend, &ai32, datumtype);
1897 DO_BO_UNPACK(ai32, 32);
1898 #if U32SIZE > SIZE32
1899 if (ai32 > 2147483647) ai32 -= 4294967296;
1903 else if (checksum > bits_in_uv)
1904 cdouble += (NV)ai32;
1909 case 'L' | TYPE_IS_SHRIEKING:
1910 #if LONGSIZE != SIZE32
1912 unsigned long aulong;
1913 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1914 DO_BO_UNPACK(aulong, l);
1917 else if (checksum > bits_in_uv)
1918 cdouble += (NV)aulong;
1931 #if U32SIZE > SIZE32
1934 SHIFT32(utf8, s, strend, &au32, datumtype);
1935 DO_BO_UNPACK(au32, 32);
1937 if (datumtype == 'N')
1938 au32 = PerlSock_ntohl(au32);
1941 if (datumtype == 'V')
1946 else if (checksum > bits_in_uv)
1947 cdouble += (NV)au32;
1952 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1953 case 'V' | TYPE_IS_SHRIEKING:
1954 case 'N' | TYPE_IS_SHRIEKING:
1957 # if U32SIZE > SIZE32
1960 SHIFT32(utf8, s, strend, &ai32, datumtype);
1962 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1963 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1966 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1967 ai32 = (I32)vtohl((U32)ai32);
1971 else if (checksum > bits_in_uv)
1972 cdouble += (NV)ai32;
1977 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1981 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1982 DO_BO_UNPACK_PC(aptr);
1983 /* newSVpv generates undef if aptr is NULL */
1984 mPUSHs(newSVpv(aptr, 0));
1992 while (len > 0 && s < strend) {
1994 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1995 auv = (auv << 7) | (ch & 0x7f);
1996 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2004 if (++bytes >= sizeof(UV)) { /* promote to string */
2007 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2008 while (s < strend) {
2009 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2010 sv = mul128(sv, (U8)(ch & 0x7f));
2016 t = SvPV_nolen_const(sv);
2025 if ((s >= strend) && bytes)
2026 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2030 if (symptr->howlen == e_star)
2031 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2033 if (s + sizeof(char*) <= strend) {
2035 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2036 DO_BO_UNPACK_PC(aptr);
2037 /* newSVpvn generates undef if aptr is NULL */
2038 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2045 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2046 DO_BO_UNPACK(aquad, 64);
2048 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2049 newSViv((IV)aquad) : newSVnv((NV)aquad));
2050 else if (checksum > bits_in_uv)
2051 cdouble += (NV)aquad;
2059 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2060 DO_BO_UNPACK(auquad, 64);
2062 mPUSHs(auquad <= UV_MAX ?
2063 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2064 else if (checksum > bits_in_uv)
2065 cdouble += (NV)auquad;
2070 #endif /* HAS_QUAD */
2071 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2075 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2076 DO_BO_UNPACK_N(afloat, float);
2086 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2087 DO_BO_UNPACK_N(adouble, double);
2097 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2098 DO_BO_UNPACK_N(anv, NV);
2105 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2108 long double aldouble;
2109 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2110 DO_BO_UNPACK_N(aldouble, long double);
2114 cdouble += aldouble;
2120 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2121 sv = sv_2mortal(newSV(l));
2122 if (l) SvPOK_on(sv);
2125 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2130 next_uni_uu(aTHX_ &s, strend, &a);
2131 next_uni_uu(aTHX_ &s, strend, &b);
2132 next_uni_uu(aTHX_ &s, strend, &c);
2133 next_uni_uu(aTHX_ &s, strend, &d);
2134 hunk[0] = (char)((a << 2) | (b >> 4));
2135 hunk[1] = (char)((b << 4) | (c >> 2));
2136 hunk[2] = (char)((c << 6) | d);
2137 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2145 /* possible checksum byte */
2146 const char *skip = s+UTF8SKIP(s);
2147 if (skip < strend && *skip == '\n')
2153 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2157 len = PL_uudmap[*(U8*)s++] & 077;
2159 if (s < strend && ISUUCHAR(*s))
2160 a = PL_uudmap[*(U8*)s++] & 077;
2163 if (s < strend && ISUUCHAR(*s))
2164 b = PL_uudmap[*(U8*)s++] & 077;
2167 if (s < strend && ISUUCHAR(*s))
2168 c = PL_uudmap[*(U8*)s++] & 077;
2171 if (s < strend && ISUUCHAR(*s))
2172 d = PL_uudmap[*(U8*)s++] & 077;
2175 hunk[0] = (char)((a << 2) | (b >> 4));
2176 hunk[1] = (char)((b << 4) | (c >> 2));
2177 hunk[2] = (char)((c << 6) | d);
2178 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2183 else /* possible checksum byte */
2184 if (s + 1 < strend && s[1] == '\n')
2193 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2194 (checksum > bits_in_uv &&
2195 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2198 anv = (NV) (1 << (checksum & 15));
2199 while (checksum >= 16) {
2203 while (cdouble < 0.0)
2205 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2206 sv = newSVnv(cdouble);
2209 if (checksum < bits_in_uv) {
2210 UV mask = ((UV)1 << checksum) - 1;
2219 if (symptr->flags & FLAG_SLASH){
2220 if (SP - PL_stack_base - start_sp_offset <= 0)
2221 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2222 if( next_symbol(symptr) ){
2223 if( symptr->howlen == e_number )
2224 Perl_croak(aTHX_ "Count after length/code in unpack" );
2226 /* ...end of char buffer then no decent length available */
2227 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2229 /* take top of stack (hope it's numeric) */
2232 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2235 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2237 datumtype = symptr->code;
2238 explicit_length = FALSE;
2246 return SP - PL_stack_base - start_sp_offset;
2254 I32 gimme = GIMME_V;
2257 const char *pat = SvPV_const(left, llen);
2258 const char *s = SvPV_const(right, rlen);
2259 const char *strend = s + rlen;
2260 const char *patend = pat + llen;
2264 cnt = unpackstring(pat, patend, s, strend,
2265 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2266 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2269 if ( !cnt && gimme == G_SCALAR )
2270 PUSHs(&PL_sv_undef);
2275 doencodes(U8 *h, const char *s, I32 len)
2277 *h++ = PL_uuemap[len];
2279 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2280 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2281 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2282 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2287 const char r = (len > 1 ? s[1] : '\0');
2288 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2289 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2290 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2291 *h++ = PL_uuemap[0];
2298 S_is_an_int(pTHX_ const char *s, STRLEN l)
2300 SV *result = newSVpvn(s, l);
2301 char *const result_c = SvPV_nolen(result); /* convenience */
2302 char *out = result_c;
2306 PERL_ARGS_ASSERT_IS_AN_INT;
2314 SvREFCNT_dec(result);
2337 SvREFCNT_dec(result);
2343 SvCUR_set(result, out - result_c);
2347 /* pnum must be '\0' terminated */
2349 S_div128(pTHX_ SV *pnum, bool *done)
2352 char * const s = SvPV(pnum, len);
2356 PERL_ARGS_ASSERT_DIV128;
2360 const int i = m * 10 + (*t - '0');
2361 const int r = (i >> 7); /* r < 10 */
2369 SvCUR_set(pnum, (STRLEN) (t - s));
2374 =for apidoc packlist
2376 The engine implementing pack() Perl function.
2382 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2387 PERL_ARGS_ASSERT_PACKLIST;
2389 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2391 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2392 Also make sure any UTF8 flag is loaded */
2393 SvPV_force_nolen(cat);
2395 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2397 (void)pack_rec( cat, &sym, beglist, endlist );
2400 /* like sv_utf8_upgrade, but also repoint the group start markers */
2402 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2405 const char *from_ptr, *from_start, *from_end, **marks, **m;
2406 char *to_start, *to_ptr;
2408 if (SvUTF8(sv)) return;
2410 from_start = SvPVX_const(sv);
2411 from_end = from_start + SvCUR(sv);
2412 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2413 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2414 if (from_ptr == from_end) {
2415 /* Simple case: no character needs to be changed */
2420 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2421 Newx(to_start, len, char);
2422 Copy(from_start, to_start, from_ptr-from_start, char);
2423 to_ptr = to_start + (from_ptr-from_start);
2425 Newx(marks, sym_ptr->level+2, const char *);
2426 for (group=sym_ptr; group; group = group->previous)
2427 marks[group->level] = from_start + group->strbeg;
2428 marks[sym_ptr->level+1] = from_end+1;
2429 for (m = marks; *m < from_ptr; m++)
2430 *m = to_start + (*m-from_start);
2432 for (;from_ptr < from_end; from_ptr++) {
2433 while (*m == from_ptr) *m++ = to_ptr;
2434 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2438 while (*m == from_ptr) *m++ = to_ptr;
2439 if (m != marks + sym_ptr->level+1) {
2442 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2444 for (group=sym_ptr; group; group = group->previous)
2445 group->strbeg = marks[group->level] - to_start;
2450 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2451 from_start -= SvIVX(sv);
2454 SvFLAGS(sv) &= ~SVf_OOK;
2457 Safefree(from_start);
2458 SvPV_set(sv, to_start);
2459 SvCUR_set(sv, to_ptr - to_start);
2464 /* Exponential string grower. Makes string extension effectively O(n)
2465 needed says how many extra bytes we need (not counting the final '\0')
2466 Only grows the string if there is an actual lack of space
2469 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2470 const STRLEN cur = SvCUR(sv);
2471 const STRLEN len = SvLEN(sv);
2474 PERL_ARGS_ASSERT_SV_EXP_GROW;
2476 if (len - cur > needed) return SvPVX(sv);
2477 extend = needed > len ? needed : len;
2478 return SvGROW(sv, len+extend+1);
2483 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2486 tempsym_t lookahead;
2487 I32 items = endlist - beglist;
2488 bool found = next_symbol(symptr);
2489 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2490 bool warn_utf8 = ckWARN(WARN_UTF8);
2492 PERL_ARGS_ASSERT_PACK_REC;
2494 if (symptr->level == 0 && found && symptr->code == 'U') {
2495 marked_upgrade(aTHX_ cat, symptr);
2496 symptr->flags |= FLAG_DO_UTF8;
2499 symptr->strbeg = SvCUR(cat);
2505 SV *lengthcode = NULL;
2506 I32 datumtype = symptr->code;
2507 howlen_t howlen = symptr->howlen;
2508 char *start = SvPVX(cat);
2509 char *cur = start + SvCUR(cat);
2511 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2515 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2519 /* e_no_len and e_number */
2520 len = symptr->length;
2525 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2527 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2528 /* We can process this letter. */
2529 STRLEN size = props & PACK_SIZE_MASK;
2530 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2534 /* Look ahead for next symbol. Do we have code/code? */
2535 lookahead = *symptr;
2536 found = next_symbol(&lookahead);
2537 if (symptr->flags & FLAG_SLASH) {
2539 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2540 if (strchr("aAZ", lookahead.code)) {
2541 if (lookahead.howlen == e_number) count = lookahead.length;
2544 if (SvGAMAGIC(*beglist)) {
2545 /* Avoid reading the active data more than once
2546 by copying it to a temporary. */
2548 const char *const pv = SvPV_const(*beglist, len);
2550 = newSVpvn_flags(pv, len,
2551 SVs_TEMP | SvUTF8(*beglist));
2554 count = DO_UTF8(*beglist) ?
2555 sv_len_utf8(*beglist) : sv_len(*beglist);
2558 if (lookahead.code == 'Z') count++;
2561 if (lookahead.howlen == e_number && lookahead.length < items)
2562 count = lookahead.length;
2565 lookahead.howlen = e_number;
2566 lookahead.length = count;
2567 lengthcode = sv_2mortal(newSViv(count));
2570 /* Code inside the switch must take care to properly update
2571 cat (CUR length and '\0' termination) if it updated *cur and
2572 doesn't simply leave using break */
2573 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2575 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2576 (int) TYPE_NO_MODIFIERS(datumtype));
2578 Perl_croak(aTHX_ "'%%' may not be used in pack");
2581 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2582 case '.' | TYPE_IS_SHRIEKING:
2585 if (howlen == e_star) from = start;
2586 else if (len == 0) from = cur;
2588 tempsym_t *group = symptr;
2590 while (--len && group) group = group->previous;
2591 from = group ? start + group->strbeg : start;
2594 len = SvIV(fromstr);
2596 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2597 case '@' | TYPE_IS_SHRIEKING:
2600 from = start + symptr->strbeg;
2602 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2603 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2604 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2608 while (len && from < cur) {
2609 from += UTF8SKIP(from);
2613 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2615 /* Here we know from == cur */
2617 GROWING(0, cat, start, cur, len);
2618 Zero(cur, len, char);
2620 } else if (from < cur) {
2623 } else goto no_change;
2631 if (len > 0) goto grow;
2632 if (len == 0) goto no_change;
2639 tempsym_t savsym = *symptr;
2640 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2641 symptr->flags |= group_modifiers;
2642 symptr->patend = savsym.grpend;
2644 symptr->previous = &lookahead;
2647 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2648 else symptr->flags &= ~FLAG_PARSE_UTF8;
2649 was_utf8 = SvUTF8(cat);
2650 symptr->patptr = savsym.grpbeg;
2651 beglist = pack_rec(cat, symptr, beglist, endlist);
2652 if (SvUTF8(cat) != was_utf8)
2653 /* This had better be an upgrade while in utf8==0 mode */
2656 if (savsym.howlen == e_star && beglist == endlist)
2657 break; /* No way to continue */
2659 items = endlist - beglist;
2660 lookahead.flags = symptr->flags & ~group_modifiers;
2663 case 'X' | TYPE_IS_SHRIEKING:
2664 if (!len) /* Avoid division by 0 */
2671 hop += UTF8SKIP(hop);
2678 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2682 len = (cur-start) % len;
2686 if (len < 1) goto no_change;
2690 Perl_croak(aTHX_ "'%c' outside of string in pack",
2691 (int) TYPE_NO_MODIFIERS(datumtype));
2692 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2694 Perl_croak(aTHX_ "'%c' outside of string in pack",
2695 (int) TYPE_NO_MODIFIERS(datumtype));
2701 if (cur - start < len)
2702 Perl_croak(aTHX_ "'%c' outside of string in pack",
2703 (int) TYPE_NO_MODIFIERS(datumtype));
2706 if (cur < start+symptr->strbeg) {
2707 /* Make sure group starts don't point into the void */
2709 const STRLEN length = cur-start;
2710 for (group = symptr;
2711 group && length < group->strbeg;
2712 group = group->previous) group->strbeg = length;
2713 lookahead.strbeg = length;
2716 case 'x' | TYPE_IS_SHRIEKING: {
2718 if (!len) /* Avoid division by 0 */
2720 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2721 else ai32 = (cur - start) % len;
2722 if (ai32 == 0) goto no_change;
2734 aptr = SvPV_const(fromstr, fromlen);
2735 if (DO_UTF8(fromstr)) {
2736 const char *end, *s;
2738 if (!utf8 && !SvUTF8(cat)) {
2739 marked_upgrade(aTHX_ cat, symptr);
2740 lookahead.flags |= FLAG_DO_UTF8;
2741 lookahead.strbeg = symptr->strbeg;
2744 cur = start + SvCUR(cat);
2746 if (howlen == e_star) {
2747 if (utf8) goto string_copy;
2751 end = aptr + fromlen;
2752 fromlen = datumtype == 'Z' ? len-1 : len;
2753 while ((I32) fromlen > 0 && s < end) {
2758 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2761 if (datumtype == 'Z') len++;
2767 fromlen = len - fromlen;
2768 if (datumtype == 'Z') fromlen--;
2769 if (howlen == e_star) {
2771 if (datumtype == 'Z') len++;
2773 GROWING(0, cat, start, cur, len);
2774 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2775 datumtype | TYPE_IS_PACK))
2776 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2780 if (howlen == e_star) {
2782 if (datumtype == 'Z') len++;
2784 if (len <= (I32) fromlen) {
2786 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2788 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2790 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2791 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2793 while (fromlen > 0) {
2794 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2800 if (howlen == e_star) {
2802 if (datumtype == 'Z') len++;
2804 if (len <= (I32) fromlen) {
2806 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2808 GROWING(0, cat, start, cur, len);
2809 Copy(aptr, cur, fromlen, char);
2813 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2819 const char *str, *end;
2826 str = SvPV_const(fromstr, fromlen);
2827 end = str + fromlen;
2828 if (DO_UTF8(fromstr)) {
2830 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2832 utf8_source = FALSE;
2833 utf8_flags = 0; /* Unused, but keep compilers happy */
2835 if (howlen == e_star) len = fromlen;
2836 field_len = (len+7)/8;
2837 GROWING(utf8, cat, start, cur, field_len);
2838 if (len > (I32)fromlen) len = fromlen;
2841 if (datumtype == 'B')
2845 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2847 } else bits |= *str++ & 1;
2848 if (l & 7) bits <<= 1;
2850 PUSH_BYTE(utf8, cur, bits);
2855 /* datumtype == 'b' */
2859 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2860 if (val & 1) bits |= 0x80;
2861 } else if (*str++ & 1)
2863 if (l & 7) bits >>= 1;
2865 PUSH_BYTE(utf8, cur, bits);
2871 if (datumtype == 'B')
2872 bits <<= 7 - (l & 7);
2874 bits >>= 7 - (l & 7);
2875 PUSH_BYTE(utf8, cur, bits);
2878 /* Determine how many chars are left in the requested field */
2880 if (howlen == e_star) field_len = 0;
2881 else field_len -= l;
2882 Zero(cur, field_len, char);
2888 const char *str, *end;
2895 str = SvPV_const(fromstr, fromlen);
2896 end = str + fromlen;
2897 if (DO_UTF8(fromstr)) {
2899 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2901 utf8_source = FALSE;
2902 utf8_flags = 0; /* Unused, but keep compilers happy */
2904 if (howlen == e_star) len = fromlen;
2905 field_len = (len+1)/2;
2906 GROWING(utf8, cat, start, cur, field_len);
2907 if (!utf8 && len > (I32)fromlen) len = fromlen;
2910 if (datumtype == 'H')
2914 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2915 if (val < 256 && isALPHA(val))
2916 bits |= (val + 9) & 0xf;
2919 } else if (isALPHA(*str))
2920 bits |= (*str++ + 9) & 0xf;
2922 bits |= *str++ & 0xf;
2923 if (l & 1) bits <<= 4;
2925 PUSH_BYTE(utf8, cur, bits);
2933 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2934 if (val < 256 && isALPHA(val))
2935 bits |= ((val + 9) & 0xf) << 4;
2937 bits |= (val & 0xf) << 4;
2938 } else if (isALPHA(*str))
2939 bits |= ((*str++ + 9) & 0xf) << 4;
2941 bits |= (*str++ & 0xf) << 4;
2942 if (l & 1) bits >>= 4;
2944 PUSH_BYTE(utf8, cur, bits);
2950 PUSH_BYTE(utf8, cur, bits);
2953 /* Determine how many chars are left in the requested field */
2955 if (howlen == e_star) field_len = 0;
2956 else field_len -= l;
2957 Zero(cur, field_len, char);
2965 aiv = SvIV(fromstr);
2966 if ((-128 > aiv || aiv > 127) &&
2968 Perl_warner(aTHX_ packWARN(WARN_PACK),
2969 "Character in 'c' format wrapped in pack");
2970 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2975 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2981 aiv = SvIV(fromstr);
2982 if ((0 > aiv || aiv > 0xff) &&
2984 Perl_warner(aTHX_ packWARN(WARN_PACK),
2985 "Character in 'C' format wrapped in pack");
2986 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2991 U8 in_bytes = (U8)IN_BYTES;
2993 end = start+SvLEN(cat)-1;
2994 if (utf8) end -= UTF8_MAXLEN-1;
2998 auv = SvUV(fromstr);
2999 if (in_bytes) auv = auv % 0x100;
3004 SvCUR_set(cat, cur - start);
3006 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3007 end = start+SvLEN(cat)-UTF8_MAXLEN;
3009 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3012 0 : UNICODE_ALLOW_ANY);
3017 SvCUR_set(cat, cur - start);
3018 marked_upgrade(aTHX_ cat, symptr);
3019 lookahead.flags |= FLAG_DO_UTF8;
3020 lookahead.strbeg = symptr->strbeg;
3023 cur = start + SvCUR(cat);
3024 end = start+SvLEN(cat)-UTF8_MAXLEN;
3027 if (ckWARN(WARN_PACK))
3028 Perl_warner(aTHX_ packWARN(WARN_PACK),
3029 "Character in 'W' format wrapped in pack");
3034 SvCUR_set(cat, cur - start);
3035 GROWING(0, cat, start, cur, len+1);
3036 end = start+SvLEN(cat)-1;
3038 *(U8 *) cur++ = (U8)auv;
3047 if (!(symptr->flags & FLAG_DO_UTF8)) {
3048 marked_upgrade(aTHX_ cat, symptr);
3049 lookahead.flags |= FLAG_DO_UTF8;
3050 lookahead.strbeg = symptr->strbeg;
3056 end = start+SvLEN(cat);
3057 if (!utf8) end -= UTF8_MAXLEN;
3061 auv = SvUV(fromstr);
3063 U8 buffer[UTF8_MAXLEN], *endb;
3064 endb = uvuni_to_utf8_flags(buffer, auv,
3066 0 : UNICODE_ALLOW_ANY);
3067 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3069 SvCUR_set(cat, cur - start);
3070 GROWING(0, cat, start, cur,
3071 len+(endb-buffer)*UTF8_EXPAND);
3072 end = start+SvLEN(cat);
3074 cur = bytes_to_uni(buffer, endb-buffer, cur);
3078 SvCUR_set(cat, cur - start);
3079 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3080 end = start+SvLEN(cat)-UTF8_MAXLEN;
3082 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3084 0 : UNICODE_ALLOW_ANY);
3089 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3095 anv = SvNV(fromstr);
3097 /* VOS does not automatically map a floating-point overflow
3098 during conversion from double to float into infinity, so we
3099 do it by hand. This code should either be generalized for
3100 any OS that needs it, or removed if and when VOS implements
3101 posix-976 (suggestion to support mapping to infinity).
3102 Paul.Green@stratus.com 02-04-02. */
3104 extern const float _float_constants[];
3106 afloat = _float_constants[0]; /* single prec. inf. */
3107 else if (anv < -FLT_MAX)
3108 afloat = _float_constants[0]; /* single prec. inf. */
3109 else afloat = (float) anv;
3112 # if defined(VMS) && !defined(__IEEE_FP)
3113 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3114 * on Alpha; fake it if we don't have them.
3118 else if (anv < -FLT_MAX)
3120 else afloat = (float)anv;
3122 afloat = (float)anv;
3124 #endif /* __VOS__ */
3125 DO_BO_PACK_N(afloat, float);
3126 PUSH_VAR(utf8, cur, afloat);
3134 anv = SvNV(fromstr);
3136 /* VOS does not automatically map a floating-point overflow
3137 during conversion from long double to double into infinity,
3138 so we do it by hand. This code should either be generalized
3139 for any OS that needs it, or removed if and when VOS
3140 implements posix-976 (suggestion to support mapping to
3141 infinity). Paul.Green@stratus.com 02-04-02. */
3143 extern const double _double_constants[];
3145 adouble = _double_constants[0]; /* double prec. inf. */
3146 else if (anv < -DBL_MAX)
3147 adouble = _double_constants[0]; /* double prec. inf. */
3148 else adouble = (double) anv;
3151 # if defined(VMS) && !defined(__IEEE_FP)
3152 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3153 * on Alpha; fake it if we don't have them.
3157 else if (anv < -DBL_MAX)
3159 else adouble = (double)anv;
3161 adouble = (double)anv;
3163 #endif /* __VOS__ */
3164 DO_BO_PACK_N(adouble, double);
3165 PUSH_VAR(utf8, cur, adouble);
3170 Zero(&anv, 1, NV); /* can be long double with unused bits */
3173 anv = SvNV(fromstr);
3174 DO_BO_PACK_N(anv, NV);
3175 PUSH_VAR(utf8, cur, anv);
3179 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3181 long double aldouble;
3182 /* long doubles can have unused bits, which may be nonzero */
3183 Zero(&aldouble, 1, long double);
3186 aldouble = (long double)SvNV(fromstr);
3187 DO_BO_PACK_N(aldouble, long double);
3188 PUSH_VAR(utf8, cur, aldouble);
3193 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3194 case 'n' | TYPE_IS_SHRIEKING:
3200 ai16 = (I16)SvIV(fromstr);
3202 ai16 = PerlSock_htons(ai16);
3204 PUSH16(utf8, cur, &ai16);
3207 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3208 case 'v' | TYPE_IS_SHRIEKING:
3214 ai16 = (I16)SvIV(fromstr);
3218 PUSH16(utf8, cur, &ai16);
3221 case 'S' | TYPE_IS_SHRIEKING:
3222 #if SHORTSIZE != SIZE16
3224 unsigned short aushort;
3226 aushort = SvUV(fromstr);
3227 DO_BO_PACK(aushort, s);
3228 PUSH_VAR(utf8, cur, aushort);
3238 au16 = (U16)SvUV(fromstr);
3239 DO_BO_PACK(au16, 16);
3240 PUSH16(utf8, cur, &au16);
3243 case 's' | TYPE_IS_SHRIEKING:
3244 #if SHORTSIZE != SIZE16
3248 ashort = SvIV(fromstr);
3249 DO_BO_PACK(ashort, s);
3250 PUSH_VAR(utf8, cur, ashort);
3260 ai16 = (I16)SvIV(fromstr);
3261 DO_BO_PACK(ai16, 16);
3262 PUSH16(utf8, cur, &ai16);
3266 case 'I' | TYPE_IS_SHRIEKING:
3270 auint = SvUV(fromstr);
3271 DO_BO_PACK(auint, i);
3272 PUSH_VAR(utf8, cur, auint);
3279 aiv = SvIV(fromstr);
3280 #if IVSIZE == INTSIZE
3282 #elif IVSIZE == LONGSIZE
3284 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3285 DO_BO_PACK(aiv, 64);
3287 Perl_croak(aTHX_ "'j' not supported on this platform");
3289 PUSH_VAR(utf8, cur, aiv);
3296 auv = SvUV(fromstr);
3297 #if UVSIZE == INTSIZE
3299 #elif UVSIZE == LONGSIZE
3301 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3302 DO_BO_PACK(auv, 64);
3304 Perl_croak(aTHX_ "'J' not supported on this platform");
3306 PUSH_VAR(utf8, cur, auv);
3313 anv = SvNV(fromstr);
3317 SvCUR_set(cat, cur - start);
3318 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3321 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3322 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3323 any negative IVs will have already been got by the croak()
3324 above. IOK is untrue for fractions, so we test them
3325 against UV_MAX_P1. */
3326 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3327 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3328 char *in = buf + sizeof(buf);
3329 UV auv = SvUV(fromstr);
3332 *--in = (char)((auv & 0x7f) | 0x80);
3335 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3336 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3337 in, (buf + sizeof(buf)) - in);
3338 } else if (SvPOKp(fromstr))
3340 else if (SvNOKp(fromstr)) {
3341 /* 10**NV_MAX_10_EXP is the largest power of 10
3342 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3343 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3344 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3345 And with that many bytes only Inf can overflow.
3346 Some C compilers are strict about integral constant
3347 expressions so we conservatively divide by a slightly
3348 smaller integer instead of multiplying by the exact
3349 floating-point value.
3351 #ifdef NV_MAX_10_EXP
3352 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3353 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3355 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3356 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3358 char *in = buf + sizeof(buf);
3360 anv = Perl_floor(anv);
3362 const NV next = Perl_floor(anv / 128);
3363 if (in <= buf) /* this cannot happen ;-) */
3364 Perl_croak(aTHX_ "Cannot compress integer in pack");
3365 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3368 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3369 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3370 in, (buf + sizeof(buf)) - in);
3379 /* Copy string and check for compliance */
3380 from = SvPV_const(fromstr, len);
3381 if ((norm = is_an_int(from, len)) == NULL)
3382 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3384 Newx(result, len, char);
3387 while (!done) *--in = div128(norm, &done) | 0x80;
3388 result[len - 1] &= 0x7F; /* clear continue bit */
3389 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3390 in, (result + len) - in);
3392 SvREFCNT_dec(norm); /* free norm */
3397 case 'i' | TYPE_IS_SHRIEKING:
3401 aint = SvIV(fromstr);
3402 DO_BO_PACK(aint, i);
3403 PUSH_VAR(utf8, cur, aint);
3406 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3407 case 'N' | TYPE_IS_SHRIEKING:
3413 au32 = SvUV(fromstr);
3415 au32 = PerlSock_htonl(au32);
3417 PUSH32(utf8, cur, &au32);
3420 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3421 case 'V' | TYPE_IS_SHRIEKING:
3427 au32 = SvUV(fromstr);
3431 PUSH32(utf8, cur, &au32);
3434 case 'L' | TYPE_IS_SHRIEKING:
3435 #if LONGSIZE != SIZE32
3437 unsigned long aulong;
3439 aulong = SvUV(fromstr);
3440 DO_BO_PACK(aulong, l);
3441 PUSH_VAR(utf8, cur, aulong);
3451 au32 = SvUV(fromstr);
3452 DO_BO_PACK(au32, 32);
3453 PUSH32(utf8, cur, &au32);
3456 case 'l' | TYPE_IS_SHRIEKING:
3457 #if LONGSIZE != SIZE32
3461 along = SvIV(fromstr);
3462 DO_BO_PACK(along, l);
3463 PUSH_VAR(utf8, cur, along);
3473 ai32 = SvIV(fromstr);
3474 DO_BO_PACK(ai32, 32);
3475 PUSH32(utf8, cur, &ai32);
3483 auquad = (Uquad_t) SvUV(fromstr);
3484 DO_BO_PACK(auquad, 64);
3485 PUSH_VAR(utf8, cur, auquad);
3492 aquad = (Quad_t)SvIV(fromstr);
3493 DO_BO_PACK(aquad, 64);
3494 PUSH_VAR(utf8, cur, aquad);
3497 #endif /* HAS_QUAD */
3499 len = 1; /* assume SV is correct length */
3500 GROWING(utf8, cat, start, cur, sizeof(char *));
3507 SvGETMAGIC(fromstr);
3508 if (!SvOK(fromstr)) aptr = NULL;
3510 /* XXX better yet, could spirit away the string to
3511 * a safe spot and hang on to it until the result
3512 * of pack() (and all copies of the result) are
3515 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3516 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3517 Perl_warner(aTHX_ packWARN(WARN_PACK),
3518 "Attempt to pack pointer to temporary value");
3520 if (SvPOK(fromstr) || SvNIOK(fromstr))
3521 aptr = SvPV_nomg_const_nolen(fromstr);
3523 aptr = SvPV_force_flags_nolen(fromstr, 0);
3525 DO_BO_PACK_PC(aptr);
3526 PUSH_VAR(utf8, cur, aptr);
3530 const char *aptr, *aend;
3534 if (len <= 2) len = 45;
3535 else len = len / 3 * 3;
3537 if (ckWARN(WARN_PACK))
3538 Perl_warner(aTHX_ packWARN(WARN_PACK),
3539 "Field too wide in 'u' format in pack");
3542 aptr = SvPV_const(fromstr, fromlen);
3543 from_utf8 = DO_UTF8(fromstr);
3545 aend = aptr + fromlen;
3546 fromlen = sv_len_utf8(fromstr);
3547 } else aend = NULL; /* Unused, but keep compilers happy */
3548 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3549 while (fromlen > 0) {
3552 U8 hunk[1+63/3*4+1];
3554 if ((I32)fromlen > len)
3560 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3561 'u' | TYPE_IS_PACK)) {
3563 SvCUR_set(cat, cur - start);
3564 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3566 end = doencodes(hunk, buffer, todo);
3568 end = doencodes(hunk, aptr, todo);
3571 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3578 SvCUR_set(cat, cur - start);
3580 *symptr = lookahead;
3589 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3590 register SV *cat = TARG;
3592 SV *pat_sv = *++MARK;
3593 register const char *pat = SvPV_const(pat_sv, fromlen);
3594 register const char *patend = pat + fromlen;
3600 packlist(cat, pat, patend, MARK, SP + 1);
3610 * c-indentation-style: bsd
3612 * indent-tabs-mode: t
3615 * ex: set ts=8 sts=4 sw=4 noet: