3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
74 # define PERL_PACK_CAN_BYTEORDER
75 # define PERL_PACK_CAN_SHRIEKSIGN
81 /* Maximum number of bytes to which a byte can grow due to upgrade */
85 * Offset for integer pack/unpack.
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
105 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
108 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
109 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
110 # define OFF16(p) ((char*)(p))
111 # define OFF32(p) ((char*)(p))
113 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
114 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
117 ++++ bad cray byte order
121 # define OFF16(p) ((char *) (p))
122 # define OFF32(p) ((char *) (p))
125 /* Only to be used inside a loop (see the break) */
126 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
130 Copy(s, OFF16(p), SIZE16, char); \
135 /* Only to be used inside a loop (see the break) */
136 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
140 Copy(s, OFF32(p), SIZE32, char); \
145 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
148 /* Only to be used inside a loop (see the break) */
149 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
155 Copy(s, (char *) &var, sizeof(var), char); \
160 #define PUSH_VAR(utf8, aptr, var) \
161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
163 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164 #define MAX_SUB_TEMPLATE_LEVEL 100
166 /* flags (note that type modifiers can also be used as flags!) */
167 #define FLAG_WAS_UTF8 0x40
168 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
169 #define FLAG_UNPACK_ONLY_ONE 0x10
170 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
171 #define FLAG_SLASH 0x04
172 #define FLAG_COMMA 0x02
173 #define FLAG_PACK 0x01
176 S_mul128(pTHX_ SV *sv, U8 m)
179 char *s = SvPV(sv, len);
182 PERL_ARGS_ASSERT_MUL128;
184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
185 SV * const tmpNew = newSVpvs("0000000000");
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
193 while (!*t) /* trailing '\0'? */
196 const U32 i = ((*t - '0') << 7) + m;
197 *(t--) = '0' + (char)(i % 10);
203 /* Explosives and implosives. */
205 #if 'I' == 73 && 'J' == 74
206 /* On an ASCII/ISO kind of system */
207 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
210 Some other sort of character set - use memchr() so we don't match
213 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
217 #define TYPE_IS_SHRIEKING 0x100
218 #define TYPE_IS_BIG_ENDIAN 0x200
219 #define TYPE_IS_LITTLE_ENDIAN 0x400
220 #define TYPE_IS_PACK 0x800
221 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
222 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
223 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
225 #ifdef PERL_PACK_CAN_SHRIEKSIGN
226 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
228 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
231 #ifndef PERL_PACK_CAN_BYTEORDER
232 /* Put "can't" first because it is shorter */
233 # define TYPE_ENDIANNESS(t) 0
234 # define TYPE_NO_ENDIANNESS(t) (t)
236 # define ENDIANNESS_ALLOWED_TYPES ""
238 # define DO_BO_UNPACK(var, type)
239 # define DO_BO_PACK(var, type)
240 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
242 # define DO_BO_UNPACK_N(var, type)
243 # define DO_BO_PACK_N(var, type)
244 # define DO_BO_UNPACK_P(var)
245 # define DO_BO_PACK_P(var)
246 # define DO_BO_UNPACK_PC(var)
247 # define DO_BO_PACK_PC(var)
249 #else /* PERL_PACK_CAN_BYTEORDER */
251 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
254 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
256 # define DO_BO_UNPACK(var, type) \
258 switch (TYPE_ENDIANNESS(datumtype)) { \
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
265 # define DO_BO_PACK(var, type) \
267 switch (TYPE_ENDIANNESS(datumtype)) { \
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
274 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
276 switch (TYPE_ENDIANNESS(datumtype)) { \
277 case TYPE_IS_BIG_ENDIAN: \
278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
280 case TYPE_IS_LITTLE_ENDIAN: \
281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
288 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
290 switch (TYPE_ENDIANNESS(datumtype)) { \
291 case TYPE_IS_BIG_ENDIAN: \
292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
294 case TYPE_IS_LITTLE_ENDIAN: \
295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
302 # define BO_CANT_DOIT(action, type) \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
318 # if PTRSIZE == INTSIZE
319 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
323 # elif PTRSIZE == LONGSIZE
324 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
325 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
326 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
327 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
328 # elif PTRSIZE == IVSIZE
329 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
330 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
331 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
332 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
334 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
335 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
336 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
337 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
340 # if defined(my_htolen) && defined(my_letohn) && \
341 defined(my_htoben) && defined(my_betohn)
342 # define DO_BO_UNPACK_N(var, type) \
344 switch (TYPE_ENDIANNESS(datumtype)) { \
345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
351 # define DO_BO_PACK_N(var, type) \
353 switch (TYPE_ENDIANNESS(datumtype)) { \
354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
360 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
361 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
364 #endif /* PERL_PACK_CAN_BYTEORDER */
366 #define PACK_SIZE_CANNOT_CSUM 0x80
367 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
368 #define PACK_SIZE_MASK 0x3F
370 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371 in). You're unlikely ever to need to regenerate them. */
373 #if TYPE_IS_SHRIEKING != 0x100
374 ++++shriek offset should be 256
377 typedef U8 packprops_t;
380 STATIC const packprops_t packprops[512] = {
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 /* C */ sizeof(unsigned char),
388 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
389 /* D */ LONG_DOUBLESIZE,
396 /* I */ sizeof(unsigned int),
403 #if defined(HAS_QUAD)
404 /* Q */ sizeof(Uquad_t),
411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* c */ sizeof(char),
416 /* d */ sizeof(double),
418 /* f */ sizeof(float),
427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
428 #if defined(HAS_QUAD)
429 /* q */ sizeof(Quad_t),
437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* I */ sizeof(unsigned int),
455 /* L */ sizeof(unsigned long),
457 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
463 /* S */ sizeof(unsigned short),
465 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 /* l */ sizeof(long),
476 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
482 /* s */ sizeof(short),
484 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0
500 /* EBCDIC (or bust) */
501 STATIC const packprops_t packprops[512] = {
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 /* c */ sizeof(char),
513 /* d */ sizeof(double),
515 /* f */ sizeof(float),
525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
526 #if defined(HAS_QUAD)
527 /* q */ sizeof(Quad_t),
531 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 /* C */ sizeof(unsigned char),
539 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
540 /* D */ LONG_DOUBLESIZE,
547 /* I */ sizeof(unsigned int),
555 #if defined(HAS_QUAD)
556 /* Q */ sizeof(Uquad_t),
560 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* l */ sizeof(long),
582 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* s */ sizeof(short),
590 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 /* I */ sizeof(unsigned int),
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* L */ sizeof(unsigned long),
602 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 /* S */ sizeof(unsigned short),
610 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
621 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
626 /* We try to process malformed UTF-8 as much as possible (preferrably with
627 warnings), but these two mean we make no progress in the string and
628 might enter an infinite loop */
629 if (retlen == (STRLEN) -1 || retlen == 0)
630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 (int) TYPE_NO_MODIFIERS(datumtype));
633 if (ckWARN(WARN_UNPACK))
634 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
635 "Character in '%c' format wrapped in unpack",
636 (int) TYPE_NO_MODIFIERS(datumtype));
643 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
644 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
648 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
652 const char *from = *s;
654 const U32 flags = ckWARN(WARN_UTF8) ?
655 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
656 for (;buf_len > 0; buf_len--) {
657 if (from >= end) return FALSE;
658 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
659 if (retlen == (STRLEN) -1 || retlen == 0) {
660 from += UTF8SKIP(from);
662 } else from += retlen;
667 *(U8 *)buf++ = (U8)val;
669 /* We have enough characters for the buffer. Did we have problems ? */
672 /* Rewalk the string fragment while warning */
674 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
675 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
676 if (ptr >= end) break;
677 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
679 if (from > end) from = end;
681 if ((bad & 2) && ((datumtype & TYPE_IS_PACK)
682 ? ckWARN(WARN_PACK) : ckWARN(WARN_UNPACK)))
683 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
684 WARN_PACK : WARN_UNPACK),
685 "Character(s) in '%c' format wrapped in %s",
686 (int) TYPE_NO_MODIFIERS(datumtype),
687 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
694 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
698 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
699 if (val >= 0x100 || !ISUUCHAR(val) ||
700 retlen == (STRLEN) -1 || retlen == 0) {
704 *out = PL_uudmap[val] & 077;
710 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
711 const U8 * const end = start + len;
713 PERL_ARGS_ASSERT_BYTES_TO_UNI;
715 while (start < end) {
716 const UV uv = NATIVE_TO_ASCII(*start);
717 if (UNI_IS_INVARIANT(uv))
718 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
720 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
721 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
728 #define PUSH_BYTES(utf8, cur, buf, len) \
731 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
733 Copy(buf, cur, len, char); \
738 #define GROWING(utf8, cat, start, cur, in_len) \
740 STRLEN glen = (in_len); \
741 if (utf8) glen *= UTF8_EXPAND; \
742 if ((cur) + glen >= (start) + SvLEN(cat)) { \
743 (start) = sv_exp_grow(cat, glen); \
744 (cur) = (start) + SvCUR(cat); \
748 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
750 const STRLEN glen = (in_len); \
752 if (utf8) gl *= UTF8_EXPAND; \
753 if ((cur) + gl >= (start) + SvLEN(cat)) { \
755 SvCUR_set((cat), (cur) - (start)); \
756 (start) = sv_exp_grow(cat, gl); \
757 (cur) = (start) + SvCUR(cat); \
759 PUSH_BYTES(utf8, cur, buf, glen); \
762 #define PUSH_BYTE(utf8, s, byte) \
765 const U8 au8 = (byte); \
766 (s) = bytes_to_uni(&au8, 1, (s)); \
767 } else *(U8 *)(s)++ = (byte); \
770 /* Only to be used inside a loop (see the break) */
771 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
774 if (str >= end) break; \
775 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
776 if (retlen == (STRLEN) -1 || retlen == 0) { \
778 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
783 static const char *_action( const tempsym_t* symptr )
785 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
788 /* Returns the sizeof() struct described by pat */
790 S_measure_struct(pTHX_ tempsym_t* symptr)
794 PERL_ARGS_ASSERT_MEASURE_STRUCT;
796 while (next_symbol(symptr)) {
800 switch (symptr->howlen) {
802 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
806 /* e_no_len and e_number */
807 len = symptr->length;
811 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
814 /* endianness doesn't influence the size of a type */
815 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
817 Perl_croak(aTHX_ "Invalid type '%c' in %s",
818 (int)TYPE_NO_MODIFIERS(symptr->code),
820 #ifdef PERL_PACK_CAN_SHRIEKSIGN
821 case '.' | TYPE_IS_SHRIEKING:
822 case '@' | TYPE_IS_SHRIEKING:
827 case 'U': /* XXXX Is it correct? */
830 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
831 (int) TYPE_NO_MODIFIERS(symptr->code),
838 tempsym_t savsym = *symptr;
839 symptr->patptr = savsym.grpbeg;
840 symptr->patend = savsym.grpend;
841 /* XXXX Theoretically, we need to measure many times at
842 different positions, since the subexpression may contain
843 alignment commands, but be not of aligned length.
844 Need to detect this and croak(). */
845 size = measure_struct(symptr);
849 case 'X' | TYPE_IS_SHRIEKING:
850 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
852 if (!len) /* Avoid division by 0 */
854 len = total % len; /* Assumed: the start is aligned. */
859 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
861 case 'x' | TYPE_IS_SHRIEKING:
862 if (!len) /* Avoid division by 0 */
864 star = total % len; /* Assumed: the start is aligned. */
865 if (star) /* Other portable ways? */
889 size = sizeof(char*);
899 /* locate matching closing parenthesis or bracket
900 * returns char pointer to char after match, or NULL
903 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
905 PERL_ARGS_ASSERT_GROUP_END;
907 while (patptr < patend) {
908 const char c = *patptr++;
915 while (patptr < patend && *patptr != '\n')
919 patptr = group_end(patptr, patend, ')') + 1;
921 patptr = group_end(patptr, patend, ']') + 1;
923 Perl_croak(aTHX_ "No group ending character '%c' found in template",
929 /* Convert unsigned decimal number to binary.
930 * Expects a pointer to the first digit and address of length variable
931 * Advances char pointer to 1st non-digit char and returns number
934 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
936 I32 len = *patptr++ - '0';
938 PERL_ARGS_ASSERT_GET_NUM;
940 while (isDIGIT(*patptr)) {
941 if (len >= 0x7FFFFFFF/10)
942 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
943 len = (len * 10) + (*patptr++ - '0');
949 /* The marvellous template parsing routine: Using state stored in *symptr,
950 * locates next template code and count
953 S_next_symbol(pTHX_ tempsym_t* symptr )
955 const char* patptr = symptr->patptr;
956 const char* const patend = symptr->patend;
958 PERL_ARGS_ASSERT_NEXT_SYMBOL;
960 symptr->flags &= ~FLAG_SLASH;
962 while (patptr < patend) {
963 if (isSPACE(*patptr))
965 else if (*patptr == '#') {
967 while (patptr < patend && *patptr != '\n')
972 /* We should have found a template code */
973 I32 code = *patptr++ & 0xFF;
974 U32 inherited_modifiers = 0;
976 if (code == ','){ /* grandfather in commas but with a warning */
977 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
978 symptr->flags |= FLAG_COMMA;
979 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
980 "Invalid type ',' in %s", _action( symptr ) );
985 /* for '(', skip to ')' */
987 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
988 Perl_croak(aTHX_ "()-group starts with a count in %s",
990 symptr->grpbeg = patptr;
991 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
992 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
993 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
997 /* look for group modifiers to inherit */
998 if (TYPE_ENDIANNESS(symptr->flags)) {
999 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1000 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1003 /* look for modifiers */
1004 while (patptr < patend) {
1005 const char *allowed;
1009 modifier = TYPE_IS_SHRIEKING;
1010 allowed = SHRIEKING_ALLOWED_TYPES;
1012 #ifdef PERL_PACK_CAN_BYTEORDER
1014 modifier = TYPE_IS_BIG_ENDIAN;
1015 allowed = ENDIANNESS_ALLOWED_TYPES;
1018 modifier = TYPE_IS_LITTLE_ENDIAN;
1019 allowed = ENDIANNESS_ALLOWED_TYPES;
1021 #endif /* PERL_PACK_CAN_BYTEORDER */
1031 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1032 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1033 allowed, _action( symptr ) );
1035 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1036 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1037 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1038 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1039 TYPE_ENDIANNESS_MASK)
1040 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1041 *patptr, _action( symptr ) );
1043 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1044 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1045 "Duplicate modifier '%c' after '%c' in %s",
1046 *patptr, (int) TYPE_NO_MODIFIERS(code),
1047 _action( symptr ) );
1054 /* inherit modifiers */
1055 code |= inherited_modifiers;
1057 /* look for count and/or / */
1058 if (patptr < patend) {
1059 if (isDIGIT(*patptr)) {
1060 patptr = get_num( patptr, &symptr->length );
1061 symptr->howlen = e_number;
1063 } else if (*patptr == '*') {
1065 symptr->howlen = e_star;
1067 } else if (*patptr == '[') {
1068 const char* lenptr = ++patptr;
1069 symptr->howlen = e_number;
1070 patptr = group_end( patptr, patend, ']' ) + 1;
1071 /* what kind of [] is it? */
1072 if (isDIGIT(*lenptr)) {
1073 lenptr = get_num( lenptr, &symptr->length );
1074 if( *lenptr != ']' )
1075 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1076 _action( symptr ) );
1078 tempsym_t savsym = *symptr;
1079 symptr->patend = patptr-1;
1080 symptr->patptr = lenptr;
1081 savsym.length = measure_struct(symptr);
1085 symptr->howlen = e_no_len;
1090 while (patptr < patend) {
1091 if (isSPACE(*patptr))
1093 else if (*patptr == '#') {
1095 while (patptr < patend && *patptr != '\n')
1097 if (patptr < patend)
1100 if (*patptr == '/') {
1101 symptr->flags |= FLAG_SLASH;
1103 if (patptr < patend &&
1104 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1105 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1106 _action( symptr ) );
1112 /* at end - no count, no / */
1113 symptr->howlen = e_no_len;
1117 symptr->code = code;
1118 symptr->patptr = patptr;
1122 symptr->patptr = patptr;
1127 There is no way to cleanly handle the case where we should process the
1128 string per byte in its upgraded form while it's really in downgraded form
1129 (e.g. estimates like strend-s as an upper bound for the number of
1130 characters left wouldn't work). So if we foresee the need of this
1131 (pattern starts with U or contains U0), we want to work on the encoded
1132 version of the string. Users are advised to upgrade their pack string
1133 themselves if they need to do a lot of unpacks like this on it
1136 need_utf8(const char *pat, const char *patend)
1140 PERL_ARGS_ASSERT_NEED_UTF8;
1142 while (pat < patend) {
1143 if (pat[0] == '#') {
1145 pat = (const char *) memchr(pat, '\n', patend-pat);
1146 if (!pat) return FALSE;
1147 } else if (pat[0] == 'U') {
1148 if (first || pat[1] == '0') return TRUE;
1149 } else first = FALSE;
1156 first_symbol(const char *pat, const char *patend) {
1157 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1159 while (pat < patend) {
1160 if (pat[0] != '#') return pat[0];
1162 pat = (const char *) memchr(pat, '\n', patend-pat);
1170 =for apidoc unpackstring
1172 The engine implementing unpack() Perl function. C<unpackstring> puts the
1173 extracted list items on the stack and returns the number of elements.
1174 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1179 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1183 PERL_ARGS_ASSERT_UNPACKSTRING;
1185 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1186 else if (need_utf8(pat, patend)) {
1187 /* We probably should try to avoid this in case a scalar context call
1188 wouldn't get to the "U0" */
1189 STRLEN len = strend - s;
1190 s = (char *) bytes_to_utf8((U8 *) s, &len);
1193 flags |= FLAG_DO_UTF8;
1196 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1197 flags |= FLAG_PARSE_UTF8;
1199 TEMPSYM_INIT(&sym, pat, patend, flags);
1201 return unpack_rec(&sym, s, s, strend, NULL );
1205 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1209 const I32 start_sp_offset = SP - PL_stack_base;
1214 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1215 bool beyond = FALSE;
1216 bool explicit_length;
1217 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1218 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1220 PERL_ARGS_ASSERT_UNPACK_REC;
1222 symptr->strbeg = s - strbeg;
1224 while (next_symbol(symptr)) {
1227 I32 datumtype = symptr->code;
1228 /* do first one only unless in list context
1229 / is implemented by unpacking the count, then popping it from the
1230 stack, so must check that we're not in the middle of a / */
1231 if ( unpack_only_one
1232 && (SP - PL_stack_base == start_sp_offset + 1)
1233 && (datumtype != '/') ) /* XXX can this be omitted */
1236 switch (howlen = symptr->howlen) {
1238 len = strend - strbeg; /* long enough */
1241 /* e_no_len and e_number */
1242 len = symptr->length;
1246 explicit_length = TRUE;
1248 beyond = s >= strend;
1250 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1252 /* props nonzero means we can process this letter. */
1253 const long size = props & PACK_SIZE_MASK;
1254 const long howmany = (strend - s) / size;
1258 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1259 if (len && unpack_only_one) len = 1;
1265 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1267 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1270 if (howlen == e_no_len)
1271 len = 16; /* len is not specified */
1279 tempsym_t savsym = *symptr;
1280 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1281 symptr->flags |= group_modifiers;
1282 symptr->patend = savsym.grpend;
1283 symptr->previous = &savsym;
1286 if (len && unpack_only_one) len = 1;
1288 symptr->patptr = savsym.grpbeg;
1289 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1290 else symptr->flags &= ~FLAG_PARSE_UTF8;
1291 unpack_rec(symptr, s, strbeg, strend, &s);
1292 if (s == strend && savsym.howlen == e_star)
1293 break; /* No way to continue */
1296 savsym.flags = symptr->flags & ~group_modifiers;
1300 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1301 case '.' | TYPE_IS_SHRIEKING:
1306 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1307 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1308 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1309 const bool u8 = utf8;
1311 if (howlen == e_star) from = strbeg;
1312 else if (len <= 0) from = s;
1314 tempsym_t *group = symptr;
1316 while (--len && group) group = group->previous;
1317 from = group ? strbeg + group->strbeg : strbeg;
1320 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1321 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1325 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1326 case '@' | TYPE_IS_SHRIEKING:
1329 s = strbeg + symptr->strbeg;
1330 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1331 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1332 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1338 Perl_croak(aTHX_ "'@' outside of string in unpack");
1343 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1346 Perl_croak(aTHX_ "'@' outside of string in unpack");
1350 case 'X' | TYPE_IS_SHRIEKING:
1351 if (!len) /* Avoid division by 0 */
1354 const char *hop, *last;
1356 hop = last = strbeg;
1358 hop += UTF8SKIP(hop);
1365 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1369 len = (s - strbeg) % len;
1375 Perl_croak(aTHX_ "'X' outside of string in unpack");
1376 while (--s, UTF8_IS_CONTINUATION(*s)) {
1378 Perl_croak(aTHX_ "'X' outside of string in unpack");
1383 if (len > s - strbeg)
1384 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1388 case 'x' | TYPE_IS_SHRIEKING: {
1390 if (!len) /* Avoid division by 0 */
1392 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1393 else ai32 = (s - strbeg) % len;
1394 if (ai32 == 0) break;
1402 Perl_croak(aTHX_ "'x' outside of string in unpack");
1407 if (len > strend - s)
1408 Perl_croak(aTHX_ "'x' outside of string in unpack");
1413 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1419 /* Preliminary length estimate is assumed done in 'W' */
1420 if (len > strend - s) len = strend - s;
1426 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1427 if (hop >= strend) {
1429 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1436 } else if (len > strend - s)
1439 if (datumtype == 'Z') {
1440 /* 'Z' strips stuff after first null */
1441 const char *ptr, *end;
1443 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1444 sv = newSVpvn(s, ptr-s);
1445 if (howlen == e_star) /* exact for 'Z*' */
1446 len = ptr-s + (ptr != strend ? 1 : 0);
1447 } else if (datumtype == 'A') {
1448 /* 'A' strips both nulls and spaces */
1450 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1451 for (ptr = s+len-1; ptr >= s; ptr--)
1452 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1453 !is_utf8_space((U8 *) ptr)) break;
1454 if (ptr >= s) ptr += UTF8SKIP(ptr);
1457 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1459 for (ptr = s+len-1; ptr >= s; ptr--)
1460 if (*ptr != 0 && !isSPACE(*ptr)) break;
1463 sv = newSVpvn(s, ptr-s);
1464 } else sv = newSVpvn(s, len);
1468 /* Undo any upgrade done due to need_utf8() */
1469 if (!(symptr->flags & FLAG_WAS_UTF8))
1470 sv_utf8_downgrade(sv, 0);
1478 if (howlen == e_star || len > (strend - s) * 8)
1479 len = (strend - s) * 8;
1482 while (len >= 8 && s < strend) {
1483 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1488 cuv += PL_bitcount[*(U8 *)s++];
1491 if (len && s < strend) {
1493 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1494 if (datumtype == 'b')
1496 if (bits & 1) cuv++;
1501 if (bits & 0x80) cuv++;
1508 sv = sv_2mortal(newSV(len ? len : 1));
1511 if (datumtype == 'b') {
1513 const I32 ai32 = len;
1514 for (len = 0; len < ai32; len++) {
1515 if (len & 7) bits >>= 1;
1517 if (s >= strend) break;
1518 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1519 } else bits = *(U8 *) s++;
1520 *str++ = bits & 1 ? '1' : '0';
1524 const I32 ai32 = len;
1525 for (len = 0; len < ai32; len++) {
1526 if (len & 7) bits <<= 1;
1528 if (s >= strend) break;
1529 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1530 } else bits = *(U8 *) s++;
1531 *str++ = bits & 0x80 ? '1' : '0';
1535 SvCUR_set(sv, str - SvPVX_const(sv));
1542 /* Preliminary length estimate, acceptable for utf8 too */
1543 if (howlen == e_star || len > (strend - s) * 2)
1544 len = (strend - s) * 2;
1545 sv = sv_2mortal(newSV(len ? len : 1));
1548 if (datumtype == 'h') {
1551 for (len = 0; len < ai32; len++) {
1552 if (len & 1) bits >>= 4;
1554 if (s >= strend) break;
1555 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1556 } else bits = * (U8 *) s++;
1557 *str++ = PL_hexdigit[bits & 15];
1561 const I32 ai32 = len;
1562 for (len = 0; len < ai32; len++) {
1563 if (len & 1) bits <<= 4;
1565 if (s >= strend) break;
1566 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1567 } else bits = *(U8 *) s++;
1568 *str++ = PL_hexdigit[(bits >> 4) & 15];
1572 SvCUR_set(sv, str - SvPVX_const(sv));
1578 if (explicit_length)
1579 /* Switch to "character" mode */
1580 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1585 while (len-- > 0 && s < strend) {
1590 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1591 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1592 if (retlen == (STRLEN) -1 || retlen == 0)
1593 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1597 aint = *(U8 *)(s)++;
1598 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1602 else if (checksum > bits_in_uv)
1603 cdouble += (NV)aint;
1611 while (len-- > 0 && s < strend) {
1613 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1614 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1615 if (retlen == (STRLEN) -1 || retlen == 0)
1616 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1620 else if (checksum > bits_in_uv)
1621 cdouble += (NV) val;
1625 } else if (!checksum)
1627 const U8 ch = *(U8 *) s++;
1630 else if (checksum > bits_in_uv)
1631 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1633 while (len-- > 0) cuv += *(U8 *) s++;
1637 if (explicit_length) {
1638 /* Switch to "bytes in UTF-8" mode */
1639 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1641 /* Should be impossible due to the need_utf8() test */
1642 Perl_croak(aTHX_ "U0 mode on a byte string");
1646 if (len > strend - s) len = strend - s;
1648 if (len && unpack_only_one) len = 1;
1652 while (len-- > 0 && s < strend) {
1656 U8 result[UTF8_MAXLEN];
1657 const char *ptr = s;
1659 /* Bug: warns about bad utf8 even if we are short on bytes
1660 and will break out of the loop */
1661 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1664 len = UTF8SKIP(result);
1665 if (!uni_to_bytes(aTHX_ &ptr, strend,
1666 (char *) &result[1], len-1, 'U')) break;
1667 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1670 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1671 if (retlen == (STRLEN) -1 || retlen == 0)
1672 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1677 else if (checksum > bits_in_uv)
1678 cdouble += (NV) auv;
1683 case 's' | TYPE_IS_SHRIEKING:
1684 #if SHORTSIZE != SIZE16
1687 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1688 DO_BO_UNPACK(ashort, s);
1691 else if (checksum > bits_in_uv)
1692 cdouble += (NV)ashort;
1704 #if U16SIZE > SIZE16
1707 SHIFT16(utf8, s, strend, &ai16, datumtype);
1708 DO_BO_UNPACK(ai16, 16);
1709 #if U16SIZE > SIZE16
1715 else if (checksum > bits_in_uv)
1716 cdouble += (NV)ai16;
1721 case 'S' | TYPE_IS_SHRIEKING:
1722 #if SHORTSIZE != SIZE16
1724 unsigned short aushort;
1725 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1726 DO_BO_UNPACK(aushort, s);
1729 else if (checksum > bits_in_uv)
1730 cdouble += (NV)aushort;
1743 #if U16SIZE > SIZE16
1746 SHIFT16(utf8, s, strend, &au16, datumtype);
1747 DO_BO_UNPACK(au16, 16);
1749 if (datumtype == 'n')
1750 au16 = PerlSock_ntohs(au16);
1753 if (datumtype == 'v')
1758 else if (checksum > bits_in_uv)
1759 cdouble += (NV) au16;
1764 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1765 case 'v' | TYPE_IS_SHRIEKING:
1766 case 'n' | TYPE_IS_SHRIEKING:
1769 # if U16SIZE > SIZE16
1772 SHIFT16(utf8, s, strend, &ai16, datumtype);
1774 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1775 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1776 # endif /* HAS_NTOHS */
1778 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1779 ai16 = (I16) vtohs((U16) ai16);
1780 # endif /* HAS_VTOHS */
1783 else if (checksum > bits_in_uv)
1784 cdouble += (NV) ai16;
1789 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1791 case 'i' | TYPE_IS_SHRIEKING:
1794 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1795 DO_BO_UNPACK(aint, i);
1798 else if (checksum > bits_in_uv)
1799 cdouble += (NV)aint;
1805 case 'I' | TYPE_IS_SHRIEKING:
1808 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1809 DO_BO_UNPACK(auint, i);
1812 else if (checksum > bits_in_uv)
1813 cdouble += (NV)auint;
1821 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1822 #if IVSIZE == INTSIZE
1823 DO_BO_UNPACK(aiv, i);
1824 #elif IVSIZE == LONGSIZE
1825 DO_BO_UNPACK(aiv, l);
1826 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1827 DO_BO_UNPACK(aiv, 64);
1829 Perl_croak(aTHX_ "'j' not supported on this platform");
1833 else if (checksum > bits_in_uv)
1842 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1843 #if IVSIZE == INTSIZE
1844 DO_BO_UNPACK(auv, i);
1845 #elif IVSIZE == LONGSIZE
1846 DO_BO_UNPACK(auv, l);
1847 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1848 DO_BO_UNPACK(auv, 64);
1850 Perl_croak(aTHX_ "'J' not supported on this platform");
1854 else if (checksum > bits_in_uv)
1860 case 'l' | TYPE_IS_SHRIEKING:
1861 #if LONGSIZE != SIZE32
1864 SHIFT_VAR(utf8, s, strend, along, datumtype);
1865 DO_BO_UNPACK(along, l);
1868 else if (checksum > bits_in_uv)
1869 cdouble += (NV)along;
1880 #if U32SIZE > SIZE32
1883 SHIFT32(utf8, s, strend, &ai32, datumtype);
1884 DO_BO_UNPACK(ai32, 32);
1885 #if U32SIZE > SIZE32
1886 if (ai32 > 2147483647) ai32 -= 4294967296;
1890 else if (checksum > bits_in_uv)
1891 cdouble += (NV)ai32;
1896 case 'L' | TYPE_IS_SHRIEKING:
1897 #if LONGSIZE != SIZE32
1899 unsigned long aulong;
1900 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1901 DO_BO_UNPACK(aulong, l);
1904 else if (checksum > bits_in_uv)
1905 cdouble += (NV)aulong;
1918 #if U32SIZE > SIZE32
1921 SHIFT32(utf8, s, strend, &au32, datumtype);
1922 DO_BO_UNPACK(au32, 32);
1924 if (datumtype == 'N')
1925 au32 = PerlSock_ntohl(au32);
1928 if (datumtype == 'V')
1933 else if (checksum > bits_in_uv)
1934 cdouble += (NV)au32;
1939 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1940 case 'V' | TYPE_IS_SHRIEKING:
1941 case 'N' | TYPE_IS_SHRIEKING:
1944 # if U32SIZE > SIZE32
1947 SHIFT32(utf8, s, strend, &ai32, datumtype);
1949 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1950 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1953 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1954 ai32 = (I32)vtohl((U32)ai32);
1958 else if (checksum > bits_in_uv)
1959 cdouble += (NV)ai32;
1964 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1968 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1969 DO_BO_UNPACK_PC(aptr);
1970 /* newSVpv generates undef if aptr is NULL */
1971 mPUSHs(newSVpv(aptr, 0));
1979 while (len > 0 && s < strend) {
1981 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1982 auv = (auv << 7) | (ch & 0x7f);
1983 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1991 if (++bytes >= sizeof(UV)) { /* promote to string */
1994 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1995 while (s < strend) {
1996 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1997 sv = mul128(sv, (U8)(ch & 0x7f));
2003 t = SvPV_nolen_const(sv);
2012 if ((s >= strend) && bytes)
2013 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2017 if (symptr->howlen == e_star)
2018 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2020 if (s + sizeof(char*) <= strend) {
2022 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2023 DO_BO_UNPACK_PC(aptr);
2024 /* newSVpvn generates undef if aptr is NULL */
2025 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2032 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2033 DO_BO_UNPACK(aquad, 64);
2035 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2036 newSViv((IV)aquad) : newSVnv((NV)aquad));
2037 else if (checksum > bits_in_uv)
2038 cdouble += (NV)aquad;
2046 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2047 DO_BO_UNPACK(auquad, 64);
2049 mPUSHs(auquad <= UV_MAX ?
2050 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2051 else if (checksum > bits_in_uv)
2052 cdouble += (NV)auquad;
2057 #endif /* HAS_QUAD */
2058 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2062 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2063 DO_BO_UNPACK_N(afloat, float);
2073 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2074 DO_BO_UNPACK_N(adouble, double);
2084 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2085 DO_BO_UNPACK_N(anv, NV);
2092 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2095 long double aldouble;
2096 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2097 DO_BO_UNPACK_N(aldouble, long double);
2101 cdouble += aldouble;
2107 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2108 sv = sv_2mortal(newSV(l));
2109 if (l) SvPOK_on(sv);
2112 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2117 next_uni_uu(aTHX_ &s, strend, &a);
2118 next_uni_uu(aTHX_ &s, strend, &b);
2119 next_uni_uu(aTHX_ &s, strend, &c);
2120 next_uni_uu(aTHX_ &s, strend, &d);
2121 hunk[0] = (char)((a << 2) | (b >> 4));
2122 hunk[1] = (char)((b << 4) | (c >> 2));
2123 hunk[2] = (char)((c << 6) | d);
2124 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2132 /* possible checksum byte */
2133 const char *skip = s+UTF8SKIP(s);
2134 if (skip < strend && *skip == '\n')
2140 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2144 len = PL_uudmap[*(U8*)s++] & 077;
2146 if (s < strend && ISUUCHAR(*s))
2147 a = PL_uudmap[*(U8*)s++] & 077;
2150 if (s < strend && ISUUCHAR(*s))
2151 b = PL_uudmap[*(U8*)s++] & 077;
2154 if (s < strend && ISUUCHAR(*s))
2155 c = PL_uudmap[*(U8*)s++] & 077;
2158 if (s < strend && ISUUCHAR(*s))
2159 d = PL_uudmap[*(U8*)s++] & 077;
2162 hunk[0] = (char)((a << 2) | (b >> 4));
2163 hunk[1] = (char)((b << 4) | (c >> 2));
2164 hunk[2] = (char)((c << 6) | d);
2165 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2170 else /* possible checksum byte */
2171 if (s + 1 < strend && s[1] == '\n')
2180 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2181 (checksum > bits_in_uv &&
2182 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2185 anv = (NV) (1 << (checksum & 15));
2186 while (checksum >= 16) {
2190 while (cdouble < 0.0)
2192 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2193 sv = newSVnv(cdouble);
2196 if (checksum < bits_in_uv) {
2197 UV mask = ((UV)1 << checksum) - 1;
2206 if (symptr->flags & FLAG_SLASH){
2207 if (SP - PL_stack_base - start_sp_offset <= 0)
2208 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2209 if( next_symbol(symptr) ){
2210 if( symptr->howlen == e_number )
2211 Perl_croak(aTHX_ "Count after length/code in unpack" );
2213 /* ...end of char buffer then no decent length available */
2214 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2216 /* take top of stack (hope it's numeric) */
2219 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2222 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2224 datumtype = symptr->code;
2225 explicit_length = FALSE;
2233 return SP - PL_stack_base - start_sp_offset;
2241 I32 gimme = GIMME_V;
2244 const char *pat = SvPV_const(left, llen);
2245 const char *s = SvPV_const(right, rlen);
2246 const char *strend = s + rlen;
2247 const char *patend = pat + llen;
2251 cnt = unpackstring(pat, patend, s, strend,
2252 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2253 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2256 if ( !cnt && gimme == G_SCALAR )
2257 PUSHs(&PL_sv_undef);
2262 doencodes(U8 *h, const char *s, I32 len)
2264 *h++ = PL_uuemap[len];
2266 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2267 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2268 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2269 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2274 const char r = (len > 1 ? s[1] : '\0');
2275 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2276 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2277 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2278 *h++ = PL_uuemap[0];
2285 S_is_an_int(pTHX_ const char *s, STRLEN l)
2287 SV *result = newSVpvn(s, l);
2288 char *const result_c = SvPV_nolen(result); /* convenience */
2289 char *out = result_c;
2293 PERL_ARGS_ASSERT_IS_AN_INT;
2301 SvREFCNT_dec(result);
2324 SvREFCNT_dec(result);
2330 SvCUR_set(result, out - result_c);
2334 /* pnum must be '\0' terminated */
2336 S_div128(pTHX_ SV *pnum, bool *done)
2339 char * const s = SvPV(pnum, len);
2343 PERL_ARGS_ASSERT_DIV128;
2347 const int i = m * 10 + (*t - '0');
2348 const int r = (i >> 7); /* r < 10 */
2356 SvCUR_set(pnum, (STRLEN) (t - s));
2361 =for apidoc packlist
2363 The engine implementing pack() Perl function.
2369 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2374 PERL_ARGS_ASSERT_PACKLIST;
2376 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2378 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2379 Also make sure any UTF8 flag is loaded */
2380 SvPV_force_nolen(cat);
2382 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2384 (void)pack_rec( cat, &sym, beglist, endlist );
2387 /* like sv_utf8_upgrade, but also repoint the group start markers */
2389 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2392 const char *from_ptr, *from_start, *from_end, **marks, **m;
2393 char *to_start, *to_ptr;
2395 if (SvUTF8(sv)) return;
2397 from_start = SvPVX_const(sv);
2398 from_end = from_start + SvCUR(sv);
2399 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2400 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2401 if (from_ptr == from_end) {
2402 /* Simple case: no character needs to be changed */
2407 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2408 Newx(to_start, len, char);
2409 Copy(from_start, to_start, from_ptr-from_start, char);
2410 to_ptr = to_start + (from_ptr-from_start);
2412 Newx(marks, sym_ptr->level+2, const char *);
2413 for (group=sym_ptr; group; group = group->previous)
2414 marks[group->level] = from_start + group->strbeg;
2415 marks[sym_ptr->level+1] = from_end+1;
2416 for (m = marks; *m < from_ptr; m++)
2417 *m = to_start + (*m-from_start);
2419 for (;from_ptr < from_end; from_ptr++) {
2420 while (*m == from_ptr) *m++ = to_ptr;
2421 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2425 while (*m == from_ptr) *m++ = to_ptr;
2426 if (m != marks + sym_ptr->level+1) {
2429 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2431 for (group=sym_ptr; group; group = group->previous)
2432 group->strbeg = marks[group->level] - to_start;
2437 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2438 from_start -= SvIVX(sv);
2441 SvFLAGS(sv) &= ~SVf_OOK;
2444 Safefree(from_start);
2445 SvPV_set(sv, to_start);
2446 SvCUR_set(sv, to_ptr - to_start);
2451 /* Exponential string grower. Makes string extension effectively O(n)
2452 needed says how many extra bytes we need (not counting the final '\0')
2453 Only grows the string if there is an actual lack of space
2456 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2457 const STRLEN cur = SvCUR(sv);
2458 const STRLEN len = SvLEN(sv);
2461 PERL_ARGS_ASSERT_SV_EXP_GROW;
2463 if (len - cur > needed) return SvPVX(sv);
2464 extend = needed > len ? needed : len;
2465 return SvGROW(sv, len+extend+1);
2470 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2473 tempsym_t lookahead;
2474 I32 items = endlist - beglist;
2475 bool found = next_symbol(symptr);
2476 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2477 bool warn_utf8 = ckWARN(WARN_UTF8);
2479 PERL_ARGS_ASSERT_PACK_REC;
2481 if (symptr->level == 0 && found && symptr->code == 'U') {
2482 marked_upgrade(aTHX_ cat, symptr);
2483 symptr->flags |= FLAG_DO_UTF8;
2486 symptr->strbeg = SvCUR(cat);
2492 SV *lengthcode = NULL;
2493 I32 datumtype = symptr->code;
2494 howlen_t howlen = symptr->howlen;
2495 char *start = SvPVX(cat);
2496 char *cur = start + SvCUR(cat);
2498 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2502 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2506 /* e_no_len and e_number */
2507 len = symptr->length;
2512 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2514 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2515 /* We can process this letter. */
2516 STRLEN size = props & PACK_SIZE_MASK;
2517 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2521 /* Look ahead for next symbol. Do we have code/code? */
2522 lookahead = *symptr;
2523 found = next_symbol(&lookahead);
2524 if (symptr->flags & FLAG_SLASH) {
2526 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2527 if (strchr("aAZ", lookahead.code)) {
2528 if (lookahead.howlen == e_number) count = lookahead.length;
2531 if (SvGAMAGIC(*beglist)) {
2532 /* Avoid reading the active data more than once
2533 by copying it to a temporary. */
2535 const char *const pv = SvPV_const(*beglist, len);
2537 = newSVpvn_flags(pv, len,
2538 SVs_TEMP | SvUTF8(*beglist));
2541 count = DO_UTF8(*beglist) ?
2542 sv_len_utf8(*beglist) : sv_len(*beglist);
2545 if (lookahead.code == 'Z') count++;
2548 if (lookahead.howlen == e_number && lookahead.length < items)
2549 count = lookahead.length;
2552 lookahead.howlen = e_number;
2553 lookahead.length = count;
2554 lengthcode = sv_2mortal(newSViv(count));
2557 /* Code inside the switch must take care to properly update
2558 cat (CUR length and '\0' termination) if it updated *cur and
2559 doesn't simply leave using break */
2560 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2562 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2563 (int) TYPE_NO_MODIFIERS(datumtype));
2565 Perl_croak(aTHX_ "'%%' may not be used in pack");
2568 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2569 case '.' | TYPE_IS_SHRIEKING:
2572 if (howlen == e_star) from = start;
2573 else if (len == 0) from = cur;
2575 tempsym_t *group = symptr;
2577 while (--len && group) group = group->previous;
2578 from = group ? start + group->strbeg : start;
2581 len = SvIV(fromstr);
2583 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2584 case '@' | TYPE_IS_SHRIEKING:
2587 from = start + symptr->strbeg;
2589 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2590 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2591 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2595 while (len && from < cur) {
2596 from += UTF8SKIP(from);
2600 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2602 /* Here we know from == cur */
2604 GROWING(0, cat, start, cur, len);
2605 Zero(cur, len, char);
2607 } else if (from < cur) {
2610 } else goto no_change;
2618 if (len > 0) goto grow;
2619 if (len == 0) goto no_change;
2626 tempsym_t savsym = *symptr;
2627 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2628 symptr->flags |= group_modifiers;
2629 symptr->patend = savsym.grpend;
2631 symptr->previous = &lookahead;
2634 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2635 else symptr->flags &= ~FLAG_PARSE_UTF8;
2636 was_utf8 = SvUTF8(cat);
2637 symptr->patptr = savsym.grpbeg;
2638 beglist = pack_rec(cat, symptr, beglist, endlist);
2639 if (SvUTF8(cat) != was_utf8)
2640 /* This had better be an upgrade while in utf8==0 mode */
2643 if (savsym.howlen == e_star && beglist == endlist)
2644 break; /* No way to continue */
2646 items = endlist - beglist;
2647 lookahead.flags = symptr->flags & ~group_modifiers;
2650 case 'X' | TYPE_IS_SHRIEKING:
2651 if (!len) /* Avoid division by 0 */
2658 hop += UTF8SKIP(hop);
2665 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2669 len = (cur-start) % len;
2673 if (len < 1) goto no_change;
2677 Perl_croak(aTHX_ "'%c' outside of string in pack",
2678 (int) TYPE_NO_MODIFIERS(datumtype));
2679 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2681 Perl_croak(aTHX_ "'%c' outside of string in pack",
2682 (int) TYPE_NO_MODIFIERS(datumtype));
2688 if (cur - start < len)
2689 Perl_croak(aTHX_ "'%c' outside of string in pack",
2690 (int) TYPE_NO_MODIFIERS(datumtype));
2693 if (cur < start+symptr->strbeg) {
2694 /* Make sure group starts don't point into the void */
2696 const STRLEN length = cur-start;
2697 for (group = symptr;
2698 group && length < group->strbeg;
2699 group = group->previous) group->strbeg = length;
2700 lookahead.strbeg = length;
2703 case 'x' | TYPE_IS_SHRIEKING: {
2705 if (!len) /* Avoid division by 0 */
2707 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2708 else ai32 = (cur - start) % len;
2709 if (ai32 == 0) goto no_change;
2721 aptr = SvPV_const(fromstr, fromlen);
2722 if (DO_UTF8(fromstr)) {
2723 const char *end, *s;
2725 if (!utf8 && !SvUTF8(cat)) {
2726 marked_upgrade(aTHX_ cat, symptr);
2727 lookahead.flags |= FLAG_DO_UTF8;
2728 lookahead.strbeg = symptr->strbeg;
2731 cur = start + SvCUR(cat);
2733 if (howlen == e_star) {
2734 if (utf8) goto string_copy;
2738 end = aptr + fromlen;
2739 fromlen = datumtype == 'Z' ? len-1 : len;
2740 while ((I32) fromlen > 0 && s < end) {
2745 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2748 if (datumtype == 'Z') len++;
2754 fromlen = len - fromlen;
2755 if (datumtype == 'Z') fromlen--;
2756 if (howlen == e_star) {
2758 if (datumtype == 'Z') len++;
2760 GROWING(0, cat, start, cur, len);
2761 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2762 datumtype | TYPE_IS_PACK))
2763 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2767 if (howlen == e_star) {
2769 if (datumtype == 'Z') len++;
2771 if (len <= (I32) fromlen) {
2773 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2775 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2777 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2778 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2780 while (fromlen > 0) {
2781 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2787 if (howlen == e_star) {
2789 if (datumtype == 'Z') len++;
2791 if (len <= (I32) fromlen) {
2793 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2795 GROWING(0, cat, start, cur, len);
2796 Copy(aptr, cur, fromlen, char);
2800 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2807 const char *str, *end;
2814 str = SvPV_const(fromstr, fromlen);
2815 end = str + fromlen;
2816 if (DO_UTF8(fromstr)) {
2818 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2820 utf8_source = FALSE;
2821 utf8_flags = 0; /* Unused, but keep compilers happy */
2823 if (howlen == e_star) len = fromlen;
2824 field_len = (len+7)/8;
2825 GROWING(utf8, cat, start, cur, field_len);
2826 if (len > (I32)fromlen) len = fromlen;
2829 if (datumtype == 'B')
2833 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2835 } else bits |= *str++ & 1;
2836 if (l & 7) bits <<= 1;
2838 PUSH_BYTE(utf8, cur, bits);
2843 /* datumtype == 'b' */
2847 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2848 if (val & 1) bits |= 0x80;
2849 } else if (*str++ & 1)
2851 if (l & 7) bits >>= 1;
2853 PUSH_BYTE(utf8, cur, bits);
2859 if (datumtype == 'B')
2860 bits <<= 7 - (l & 7);
2862 bits >>= 7 - (l & 7);
2863 PUSH_BYTE(utf8, cur, bits);
2866 /* Determine how many chars are left in the requested field */
2868 if (howlen == e_star) field_len = 0;
2869 else field_len -= l;
2870 Zero(cur, field_len, char);
2876 const char *str, *end;
2883 str = SvPV_const(fromstr, fromlen);
2884 end = str + fromlen;
2885 if (DO_UTF8(fromstr)) {
2887 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2889 utf8_source = FALSE;
2890 utf8_flags = 0; /* Unused, but keep compilers happy */
2892 if (howlen == e_star) len = fromlen;
2893 field_len = (len+1)/2;
2894 GROWING(utf8, cat, start, cur, field_len);
2895 if (!utf8 && len > (I32)fromlen) len = fromlen;
2898 if (datumtype == 'H')
2902 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2903 if (val < 256 && isALPHA(val))
2904 bits |= (val + 9) & 0xf;
2907 } else if (isALPHA(*str))
2908 bits |= (*str++ + 9) & 0xf;
2910 bits |= *str++ & 0xf;
2911 if (l & 1) bits <<= 4;
2913 PUSH_BYTE(utf8, cur, bits);
2921 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2922 if (val < 256 && isALPHA(val))
2923 bits |= ((val + 9) & 0xf) << 4;
2925 bits |= (val & 0xf) << 4;
2926 } else if (isALPHA(*str))
2927 bits |= ((*str++ + 9) & 0xf) << 4;
2929 bits |= (*str++ & 0xf) << 4;
2930 if (l & 1) bits >>= 4;
2932 PUSH_BYTE(utf8, cur, bits);
2938 PUSH_BYTE(utf8, cur, bits);
2941 /* Determine how many chars are left in the requested field */
2943 if (howlen == e_star) field_len = 0;
2944 else field_len -= l;
2945 Zero(cur, field_len, char);
2953 aiv = SvIV(fromstr);
2954 if ((-128 > aiv || aiv > 127) &&
2956 Perl_warner(aTHX_ packWARN(WARN_PACK),
2957 "Character in 'c' format wrapped in pack");
2958 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2963 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2969 aiv = SvIV(fromstr);
2970 if ((0 > aiv || aiv > 0xff) &&
2972 Perl_warner(aTHX_ packWARN(WARN_PACK),
2973 "Character in 'C' format wrapped in pack");
2974 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2979 U8 in_bytes = (U8)IN_BYTES;
2981 end = start+SvLEN(cat)-1;
2982 if (utf8) end -= UTF8_MAXLEN-1;
2986 auv = SvUV(fromstr);
2987 if (in_bytes) auv = auv % 0x100;
2992 SvCUR_set(cat, cur - start);
2994 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2995 end = start+SvLEN(cat)-UTF8_MAXLEN;
2997 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3000 0 : UNICODE_ALLOW_ANY);
3005 SvCUR_set(cat, cur - start);
3006 marked_upgrade(aTHX_ cat, symptr);
3007 lookahead.flags |= FLAG_DO_UTF8;
3008 lookahead.strbeg = symptr->strbeg;
3011 cur = start + SvCUR(cat);
3012 end = start+SvLEN(cat)-UTF8_MAXLEN;
3015 if (ckWARN(WARN_PACK))
3016 Perl_warner(aTHX_ packWARN(WARN_PACK),
3017 "Character in 'W' format wrapped in pack");
3022 SvCUR_set(cat, cur - start);
3023 GROWING(0, cat, start, cur, len+1);
3024 end = start+SvLEN(cat)-1;
3026 *(U8 *) cur++ = (U8)auv;
3035 if (!(symptr->flags & FLAG_DO_UTF8)) {
3036 marked_upgrade(aTHX_ cat, symptr);
3037 lookahead.flags |= FLAG_DO_UTF8;
3038 lookahead.strbeg = symptr->strbeg;
3044 end = start+SvLEN(cat);
3045 if (!utf8) end -= UTF8_MAXLEN;
3049 auv = SvUV(fromstr);
3051 U8 buffer[UTF8_MAXLEN], *endb;
3052 endb = uvuni_to_utf8_flags(buffer, auv,
3054 0 : UNICODE_ALLOW_ANY);
3055 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3057 SvCUR_set(cat, cur - start);
3058 GROWING(0, cat, start, cur,
3059 len+(endb-buffer)*UTF8_EXPAND);
3060 end = start+SvLEN(cat);
3062 cur = bytes_to_uni(buffer, endb-buffer, cur);
3066 SvCUR_set(cat, cur - start);
3067 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3068 end = start+SvLEN(cat)-UTF8_MAXLEN;
3070 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3072 0 : UNICODE_ALLOW_ANY);
3077 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3083 anv = SvNV(fromstr);
3085 /* VOS does not automatically map a floating-point overflow
3086 during conversion from double to float into infinity, so we
3087 do it by hand. This code should either be generalized for
3088 any OS that needs it, or removed if and when VOS implements
3089 posix-976 (suggestion to support mapping to infinity).
3090 Paul.Green@stratus.com 02-04-02. */
3092 extern const float _float_constants[];
3094 afloat = _float_constants[0]; /* single prec. inf. */
3095 else if (anv < -FLT_MAX)
3096 afloat = _float_constants[0]; /* single prec. inf. */
3097 else afloat = (float) anv;
3100 # if defined(VMS) && !defined(__IEEE_FP)
3101 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3102 * on Alpha; fake it if we don't have them.
3106 else if (anv < -FLT_MAX)
3108 else afloat = (float)anv;
3110 afloat = (float)anv;
3112 #endif /* __VOS__ */
3113 DO_BO_PACK_N(afloat, float);
3114 PUSH_VAR(utf8, cur, afloat);
3122 anv = SvNV(fromstr);
3124 /* VOS does not automatically map a floating-point overflow
3125 during conversion from long double to double into infinity,
3126 so we do it by hand. This code should either be generalized
3127 for any OS that needs it, or removed if and when VOS
3128 implements posix-976 (suggestion to support mapping to
3129 infinity). Paul.Green@stratus.com 02-04-02. */
3131 extern const double _double_constants[];
3133 adouble = _double_constants[0]; /* double prec. inf. */
3134 else if (anv < -DBL_MAX)
3135 adouble = _double_constants[0]; /* double prec. inf. */
3136 else adouble = (double) anv;
3139 # if defined(VMS) && !defined(__IEEE_FP)
3140 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3141 * on Alpha; fake it if we don't have them.
3145 else if (anv < -DBL_MAX)
3147 else adouble = (double)anv;
3149 adouble = (double)anv;
3151 #endif /* __VOS__ */
3152 DO_BO_PACK_N(adouble, double);
3153 PUSH_VAR(utf8, cur, adouble);
3158 Zero(&anv, 1, NV); /* can be long double with unused bits */
3161 anv = SvNV(fromstr);
3162 DO_BO_PACK_N(anv, NV);
3163 PUSH_VAR(utf8, cur, anv);
3167 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3169 long double aldouble;
3170 /* long doubles can have unused bits, which may be nonzero */
3171 Zero(&aldouble, 1, long double);
3174 aldouble = (long double)SvNV(fromstr);
3175 DO_BO_PACK_N(aldouble, long double);
3176 PUSH_VAR(utf8, cur, aldouble);
3181 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3182 case 'n' | TYPE_IS_SHRIEKING:
3188 ai16 = (I16)SvIV(fromstr);
3190 ai16 = PerlSock_htons(ai16);
3192 PUSH16(utf8, cur, &ai16);
3195 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3196 case 'v' | TYPE_IS_SHRIEKING:
3202 ai16 = (I16)SvIV(fromstr);
3206 PUSH16(utf8, cur, &ai16);
3209 case 'S' | TYPE_IS_SHRIEKING:
3210 #if SHORTSIZE != SIZE16
3212 unsigned short aushort;
3214 aushort = SvUV(fromstr);
3215 DO_BO_PACK(aushort, s);
3216 PUSH_VAR(utf8, cur, aushort);
3226 au16 = (U16)SvUV(fromstr);
3227 DO_BO_PACK(au16, 16);
3228 PUSH16(utf8, cur, &au16);
3231 case 's' | TYPE_IS_SHRIEKING:
3232 #if SHORTSIZE != SIZE16
3236 ashort = SvIV(fromstr);
3237 DO_BO_PACK(ashort, s);
3238 PUSH_VAR(utf8, cur, ashort);
3248 ai16 = (I16)SvIV(fromstr);
3249 DO_BO_PACK(ai16, 16);
3250 PUSH16(utf8, cur, &ai16);
3254 case 'I' | TYPE_IS_SHRIEKING:
3258 auint = SvUV(fromstr);
3259 DO_BO_PACK(auint, i);
3260 PUSH_VAR(utf8, cur, auint);
3267 aiv = SvIV(fromstr);
3268 #if IVSIZE == INTSIZE
3270 #elif IVSIZE == LONGSIZE
3272 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3273 DO_BO_PACK(aiv, 64);
3275 Perl_croak(aTHX_ "'j' not supported on this platform");
3277 PUSH_VAR(utf8, cur, aiv);
3284 auv = SvUV(fromstr);
3285 #if UVSIZE == INTSIZE
3287 #elif UVSIZE == LONGSIZE
3289 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3290 DO_BO_PACK(auv, 64);
3292 Perl_croak(aTHX_ "'J' not supported on this platform");
3294 PUSH_VAR(utf8, cur, auv);
3301 anv = SvNV(fromstr);
3305 SvCUR_set(cat, cur - start);
3306 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3309 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3310 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3311 any negative IVs will have already been got by the croak()
3312 above. IOK is untrue for fractions, so we test them
3313 against UV_MAX_P1. */
3314 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3315 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3316 char *in = buf + sizeof(buf);
3317 UV auv = SvUV(fromstr);
3320 *--in = (char)((auv & 0x7f) | 0x80);
3323 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3324 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3325 in, (buf + sizeof(buf)) - in);
3326 } else if (SvPOKp(fromstr))
3328 else if (SvNOKp(fromstr)) {
3329 /* 10**NV_MAX_10_EXP is the largest power of 10
3330 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3331 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3332 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3333 And with that many bytes only Inf can overflow.
3334 Some C compilers are strict about integral constant
3335 expressions so we conservatively divide by a slightly
3336 smaller integer instead of multiplying by the exact
3337 floating-point value.
3339 #ifdef NV_MAX_10_EXP
3340 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3341 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3343 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3344 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3346 char *in = buf + sizeof(buf);
3348 anv = Perl_floor(anv);
3350 const NV next = Perl_floor(anv / 128);
3351 if (in <= buf) /* this cannot happen ;-) */
3352 Perl_croak(aTHX_ "Cannot compress integer in pack");
3353 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3356 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3357 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3358 in, (buf + sizeof(buf)) - in);
3367 /* Copy string and check for compliance */
3368 from = SvPV_const(fromstr, len);
3369 if ((norm = is_an_int(from, len)) == NULL)
3370 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3372 Newx(result, len, char);
3375 while (!done) *--in = div128(norm, &done) | 0x80;
3376 result[len - 1] &= 0x7F; /* clear continue bit */
3377 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3378 in, (result + len) - in);
3380 SvREFCNT_dec(norm); /* free norm */
3385 case 'i' | TYPE_IS_SHRIEKING:
3389 aint = SvIV(fromstr);
3390 DO_BO_PACK(aint, i);
3391 PUSH_VAR(utf8, cur, aint);
3394 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3395 case 'N' | TYPE_IS_SHRIEKING:
3401 au32 = SvUV(fromstr);
3403 au32 = PerlSock_htonl(au32);
3405 PUSH32(utf8, cur, &au32);
3408 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3409 case 'V' | TYPE_IS_SHRIEKING:
3415 au32 = SvUV(fromstr);
3419 PUSH32(utf8, cur, &au32);
3422 case 'L' | TYPE_IS_SHRIEKING:
3423 #if LONGSIZE != SIZE32
3425 unsigned long aulong;
3427 aulong = SvUV(fromstr);
3428 DO_BO_PACK(aulong, l);
3429 PUSH_VAR(utf8, cur, aulong);
3439 au32 = SvUV(fromstr);
3440 DO_BO_PACK(au32, 32);
3441 PUSH32(utf8, cur, &au32);
3444 case 'l' | TYPE_IS_SHRIEKING:
3445 #if LONGSIZE != SIZE32
3449 along = SvIV(fromstr);
3450 DO_BO_PACK(along, l);
3451 PUSH_VAR(utf8, cur, along);
3461 ai32 = SvIV(fromstr);
3462 DO_BO_PACK(ai32, 32);
3463 PUSH32(utf8, cur, &ai32);
3471 auquad = (Uquad_t) SvUV(fromstr);
3472 DO_BO_PACK(auquad, 64);
3473 PUSH_VAR(utf8, cur, auquad);
3480 aquad = (Quad_t)SvIV(fromstr);
3481 DO_BO_PACK(aquad, 64);
3482 PUSH_VAR(utf8, cur, aquad);
3485 #endif /* HAS_QUAD */
3487 len = 1; /* assume SV is correct length */
3488 GROWING(utf8, cat, start, cur, sizeof(char *));
3495 SvGETMAGIC(fromstr);
3496 if (!SvOK(fromstr)) aptr = NULL;
3498 /* XXX better yet, could spirit away the string to
3499 * a safe spot and hang on to it until the result
3500 * of pack() (and all copies of the result) are
3503 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3504 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3505 Perl_warner(aTHX_ packWARN(WARN_PACK),
3506 "Attempt to pack pointer to temporary value");
3508 if (SvPOK(fromstr) || SvNIOK(fromstr))
3509 aptr = SvPV_nomg_const_nolen(fromstr);
3511 aptr = SvPV_force_flags_nolen(fromstr, 0);
3513 DO_BO_PACK_PC(aptr);
3514 PUSH_VAR(utf8, cur, aptr);
3518 const char *aptr, *aend;
3522 if (len <= 2) len = 45;
3523 else len = len / 3 * 3;
3525 if (ckWARN(WARN_PACK))
3526 Perl_warner(aTHX_ packWARN(WARN_PACK),
3527 "Field too wide in 'u' format in pack");
3530 aptr = SvPV_const(fromstr, fromlen);
3531 from_utf8 = DO_UTF8(fromstr);
3533 aend = aptr + fromlen;
3534 fromlen = sv_len_utf8(fromstr);
3535 } else aend = NULL; /* Unused, but keep compilers happy */
3536 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3537 while (fromlen > 0) {
3540 U8 hunk[1+63/3*4+1];
3542 if ((I32)fromlen > len)
3548 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3549 'u' | TYPE_IS_PACK)) {
3551 SvCUR_set(cat, cur - start);
3552 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3554 end = doencodes(hunk, buffer, todo);
3556 end = doencodes(hunk, aptr, todo);
3559 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3566 SvCUR_set(cat, cur - start);
3568 *symptr = lookahead;
3577 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3578 register SV *cat = TARG;
3580 SV *pat_sv = *++MARK;
3581 register const char *pat = SvPV_const(pat_sv, fromlen);
3582 register const char *patend = pat + fromlen;
3588 packlist(cat, pat, patend, MARK, SP + 1);
3598 * c-indentation-style: bsd
3600 * indent-tabs-mode: t
3603 * ex: set ts=8 sts=4 sw=4 noet: