3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
74 # define PERL_PACK_CAN_BYTEORDER
75 # define PERL_PACK_CAN_SHRIEKSIGN
81 /* Maximum number of bytes to which a byte can grow due to upgrade */
85 * Offset for integer pack/unpack.
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
105 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
108 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
109 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
110 # define OFF16(p) ((char*)(p))
111 # define OFF32(p) ((char*)(p))
113 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
114 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
117 ++++ bad cray byte order
121 # define OFF16(p) ((char *) (p))
122 # define OFF32(p) ((char *) (p))
125 /* Only to be used inside a loop (see the break) */
126 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
130 Copy(s, OFF16(p), SIZE16, char); \
135 /* Only to be used inside a loop (see the break) */
136 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
140 Copy(s, OFF32(p), SIZE32, char); \
145 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
148 /* Only to be used inside a loop (see the break) */
149 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
155 Copy(s, (char *) &var, sizeof(var), char); \
160 #define PUSH_VAR(utf8, aptr, var) \
161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
163 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164 #define MAX_SUB_TEMPLATE_LEVEL 100
166 /* flags (note that type modifiers can also be used as flags!) */
167 #define FLAG_WAS_UTF8 0x40
168 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
169 #define FLAG_UNPACK_ONLY_ONE 0x10
170 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
171 #define FLAG_SLASH 0x04
172 #define FLAG_COMMA 0x02
173 #define FLAG_PACK 0x01
176 S_mul128(pTHX_ SV *sv, U8 m)
179 char *s = SvPV(sv, len);
182 PERL_ARGS_ASSERT_MUL128;
184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
185 SV * const tmpNew = newSVpvs("0000000000");
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
193 while (!*t) /* trailing '\0'? */
196 const U32 i = ((*t - '0') << 7) + m;
197 *(t--) = '0' + (char)(i % 10);
203 /* Explosives and implosives. */
205 #if 'I' == 73 && 'J' == 74
206 /* On an ASCII/ISO kind of system */
207 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
210 Some other sort of character set - use memchr() so we don't match
213 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
217 #define TYPE_IS_SHRIEKING 0x100
218 #define TYPE_IS_BIG_ENDIAN 0x200
219 #define TYPE_IS_LITTLE_ENDIAN 0x400
220 #define TYPE_IS_PACK 0x800
221 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
222 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
223 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
225 #ifdef PERL_PACK_CAN_SHRIEKSIGN
226 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
228 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
231 #ifndef PERL_PACK_CAN_BYTEORDER
232 /* Put "can't" first because it is shorter */
233 # define TYPE_ENDIANNESS(t) 0
234 # define TYPE_NO_ENDIANNESS(t) (t)
236 # define ENDIANNESS_ALLOWED_TYPES ""
238 # define DO_BO_UNPACK(var, type)
239 # define DO_BO_PACK(var, type)
240 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
242 # define DO_BO_UNPACK_N(var, type)
243 # define DO_BO_PACK_N(var, type)
244 # define DO_BO_UNPACK_P(var)
245 # define DO_BO_PACK_P(var)
246 # define DO_BO_UNPACK_PC(var)
247 # define DO_BO_PACK_PC(var)
249 #else /* PERL_PACK_CAN_BYTEORDER */
251 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
254 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
256 # define DO_BO_UNPACK(var, type) \
258 switch (TYPE_ENDIANNESS(datumtype)) { \
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
265 # define DO_BO_PACK(var, type) \
267 switch (TYPE_ENDIANNESS(datumtype)) { \
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
274 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
276 switch (TYPE_ENDIANNESS(datumtype)) { \
277 case TYPE_IS_BIG_ENDIAN: \
278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
280 case TYPE_IS_LITTLE_ENDIAN: \
281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
288 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
290 switch (TYPE_ENDIANNESS(datumtype)) { \
291 case TYPE_IS_BIG_ENDIAN: \
292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
294 case TYPE_IS_LITTLE_ENDIAN: \
295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
302 # define BO_CANT_DOIT(action, type) \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
318 # if PTRSIZE == INTSIZE
319 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
323 # elif PTRSIZE == LONGSIZE
324 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
325 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
326 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
327 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
328 # elif PTRSIZE == IVSIZE
329 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
330 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
331 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
332 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
334 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
335 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
336 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
337 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
340 # if defined(my_htolen) && defined(my_letohn) && \
341 defined(my_htoben) && defined(my_betohn)
342 # define DO_BO_UNPACK_N(var, type) \
344 switch (TYPE_ENDIANNESS(datumtype)) { \
345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
351 # define DO_BO_PACK_N(var, type) \
353 switch (TYPE_ENDIANNESS(datumtype)) { \
354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
360 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
361 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
364 #endif /* PERL_PACK_CAN_BYTEORDER */
366 #define PACK_SIZE_CANNOT_CSUM 0x80
367 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
368 #define PACK_SIZE_MASK 0x3F
370 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371 in). You're unlikely ever to need to regenerate them. */
373 #if TYPE_IS_SHRIEKING != 0x100
374 ++++shriek offset should be 256
377 typedef U8 packprops_t;
380 STATIC const packprops_t packprops[512] = {
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 /* C */ sizeof(unsigned char),
388 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
389 /* D */ LONG_DOUBLESIZE,
396 /* I */ sizeof(unsigned int),
403 #if defined(HAS_QUAD)
404 /* Q */ sizeof(Uquad_t),
411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* c */ sizeof(char),
416 /* d */ sizeof(double),
418 /* f */ sizeof(float),
427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
428 #if defined(HAS_QUAD)
429 /* q */ sizeof(Quad_t),
437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* I */ sizeof(unsigned int),
455 /* L */ sizeof(unsigned long),
457 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
463 /* S */ sizeof(unsigned short),
465 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 /* l */ sizeof(long),
476 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
482 /* s */ sizeof(short),
484 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0
500 /* EBCDIC (or bust) */
501 STATIC const packprops_t packprops[512] = {
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 /* c */ sizeof(char),
513 /* d */ sizeof(double),
515 /* f */ sizeof(float),
525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
526 #if defined(HAS_QUAD)
527 /* q */ sizeof(Quad_t),
531 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 /* C */ sizeof(unsigned char),
539 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
540 /* D */ LONG_DOUBLESIZE,
547 /* I */ sizeof(unsigned int),
555 #if defined(HAS_QUAD)
556 /* Q */ sizeof(Uquad_t),
560 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* l */ sizeof(long),
582 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* s */ sizeof(short),
590 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 /* I */ sizeof(unsigned int),
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* L */ sizeof(unsigned long),
602 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 /* S */ sizeof(unsigned short),
610 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
621 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
626 /* We try to process malformed UTF-8 as much as possible (preferrably with
627 warnings), but these two mean we make no progress in the string and
628 might enter an infinite loop */
629 if (retlen == (STRLEN) -1 || retlen == 0)
630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 (int) TYPE_NO_MODIFIERS(datumtype));
633 if (ckWARN(WARN_UNPACK))
634 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
635 "Character in '%c' format wrapped in unpack",
636 (int) TYPE_NO_MODIFIERS(datumtype));
643 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
644 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
648 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
652 const char *from = *s;
654 const U32 flags = ckWARN(WARN_UTF8) ?
655 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
656 for (;buf_len > 0; buf_len--) {
657 if (from >= end) return FALSE;
658 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
659 if (retlen == (STRLEN) -1 || retlen == 0) {
660 from += UTF8SKIP(from);
662 } else from += retlen;
667 *(U8 *)buf++ = (U8)val;
669 /* We have enough characters for the buffer. Did we have problems ? */
672 /* Rewalk the string fragment while warning */
674 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
675 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
676 if (ptr >= end) break;
677 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
679 if (from > end) from = end;
681 if ((bad & 2) && ckWARN(WARN_UNPACK))
682 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
683 WARN_PACK : WARN_UNPACK),
684 "Character(s) in '%c' format wrapped in %s",
685 (int) TYPE_NO_MODIFIERS(datumtype),
686 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
693 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
697 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
698 if (val >= 0x100 || !ISUUCHAR(val) ||
699 retlen == (STRLEN) -1 || retlen == 0) {
703 *out = PL_uudmap[val] & 077;
709 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
710 const U8 * const end = start + len;
712 PERL_ARGS_ASSERT_BYTES_TO_UNI;
714 while (start < end) {
715 const UV uv = NATIVE_TO_ASCII(*start);
716 if (UNI_IS_INVARIANT(uv))
717 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
719 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
720 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
727 #define PUSH_BYTES(utf8, cur, buf, len) \
730 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
732 Copy(buf, cur, len, char); \
737 #define GROWING(utf8, cat, start, cur, in_len) \
739 STRLEN glen = (in_len); \
740 if (utf8) glen *= UTF8_EXPAND; \
741 if ((cur) + glen >= (start) + SvLEN(cat)) { \
742 (start) = sv_exp_grow(cat, glen); \
743 (cur) = (start) + SvCUR(cat); \
747 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
749 const STRLEN glen = (in_len); \
751 if (utf8) gl *= UTF8_EXPAND; \
752 if ((cur) + gl >= (start) + SvLEN(cat)) { \
754 SvCUR_set((cat), (cur) - (start)); \
755 (start) = sv_exp_grow(cat, gl); \
756 (cur) = (start) + SvCUR(cat); \
758 PUSH_BYTES(utf8, cur, buf, glen); \
761 #define PUSH_BYTE(utf8, s, byte) \
764 const U8 au8 = (byte); \
765 (s) = bytes_to_uni(&au8, 1, (s)); \
766 } else *(U8 *)(s)++ = (byte); \
769 /* Only to be used inside a loop (see the break) */
770 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
773 if (str >= end) break; \
774 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
775 if (retlen == (STRLEN) -1 || retlen == 0) { \
777 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
782 static const char *_action( const tempsym_t* symptr )
784 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
787 /* Returns the sizeof() struct described by pat */
789 S_measure_struct(pTHX_ tempsym_t* symptr)
793 PERL_ARGS_ASSERT_MEASURE_STRUCT;
795 while (next_symbol(symptr)) {
799 switch (symptr->howlen) {
801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
805 /* e_no_len and e_number */
806 len = symptr->length;
810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
819 #ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
826 case 'U': /* XXXX Is it correct? */
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
830 (int) TYPE_NO_MODIFIERS(symptr->code),
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
851 if (!len) /* Avoid division by 0 */
853 len = total % len; /* Assumed: the start is aligned. */
858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
888 size = sizeof(char*);
898 /* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
902 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
904 PERL_ARGS_ASSERT_GROUP_END;
906 while (patptr < patend) {
907 const char c = *patptr++;
914 while (patptr < patend && *patptr != '\n')
918 patptr = group_end(patptr, patend, ')') + 1;
920 patptr = group_end(patptr, patend, ']') + 1;
922 Perl_croak(aTHX_ "No group ending character '%c' found in template",
928 /* Convert unsigned decimal number to binary.
929 * Expects a pointer to the first digit and address of length variable
930 * Advances char pointer to 1st non-digit char and returns number
933 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
935 I32 len = *patptr++ - '0';
937 PERL_ARGS_ASSERT_GET_NUM;
939 while (isDIGIT(*patptr)) {
940 if (len >= 0x7FFFFFFF/10)
941 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
942 len = (len * 10) + (*patptr++ - '0');
948 /* The marvellous template parsing routine: Using state stored in *symptr,
949 * locates next template code and count
952 S_next_symbol(pTHX_ tempsym_t* symptr )
954 const char* patptr = symptr->patptr;
955 const char* const patend = symptr->patend;
957 PERL_ARGS_ASSERT_NEXT_SYMBOL;
959 symptr->flags &= ~FLAG_SLASH;
961 while (patptr < patend) {
962 if (isSPACE(*patptr))
964 else if (*patptr == '#') {
966 while (patptr < patend && *patptr != '\n')
971 /* We should have found a template code */
972 I32 code = *patptr++ & 0xFF;
973 U32 inherited_modifiers = 0;
975 if (code == ','){ /* grandfather in commas but with a warning */
976 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
977 symptr->flags |= FLAG_COMMA;
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Invalid type ',' in %s", _action( symptr ) );
984 /* for '(', skip to ')' */
986 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
987 Perl_croak(aTHX_ "()-group starts with a count in %s",
989 symptr->grpbeg = patptr;
990 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
991 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
992 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
996 /* look for group modifiers to inherit */
997 if (TYPE_ENDIANNESS(symptr->flags)) {
998 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
999 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1002 /* look for modifiers */
1003 while (patptr < patend) {
1004 const char *allowed;
1008 modifier = TYPE_IS_SHRIEKING;
1009 allowed = SHRIEKING_ALLOWED_TYPES;
1011 #ifdef PERL_PACK_CAN_BYTEORDER
1013 modifier = TYPE_IS_BIG_ENDIAN;
1014 allowed = ENDIANNESS_ALLOWED_TYPES;
1017 modifier = TYPE_IS_LITTLE_ENDIAN;
1018 allowed = ENDIANNESS_ALLOWED_TYPES;
1020 #endif /* PERL_PACK_CAN_BYTEORDER */
1030 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1031 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1032 allowed, _action( symptr ) );
1034 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1035 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1036 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1037 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1038 TYPE_ENDIANNESS_MASK)
1039 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1040 *patptr, _action( symptr ) );
1042 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1043 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1044 "Duplicate modifier '%c' after '%c' in %s",
1045 *patptr, (int) TYPE_NO_MODIFIERS(code),
1046 _action( symptr ) );
1053 /* inherit modifiers */
1054 code |= inherited_modifiers;
1056 /* look for count and/or / */
1057 if (patptr < patend) {
1058 if (isDIGIT(*patptr)) {
1059 patptr = get_num( patptr, &symptr->length );
1060 symptr->howlen = e_number;
1062 } else if (*patptr == '*') {
1064 symptr->howlen = e_star;
1066 } else if (*patptr == '[') {
1067 const char* lenptr = ++patptr;
1068 symptr->howlen = e_number;
1069 patptr = group_end( patptr, patend, ']' ) + 1;
1070 /* what kind of [] is it? */
1071 if (isDIGIT(*lenptr)) {
1072 lenptr = get_num( lenptr, &symptr->length );
1073 if( *lenptr != ']' )
1074 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1075 _action( symptr ) );
1077 tempsym_t savsym = *symptr;
1078 symptr->patend = patptr-1;
1079 symptr->patptr = lenptr;
1080 savsym.length = measure_struct(symptr);
1084 symptr->howlen = e_no_len;
1089 while (patptr < patend) {
1090 if (isSPACE(*patptr))
1092 else if (*patptr == '#') {
1094 while (patptr < patend && *patptr != '\n')
1096 if (patptr < patend)
1099 if (*patptr == '/') {
1100 symptr->flags |= FLAG_SLASH;
1102 if (patptr < patend &&
1103 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1104 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1105 _action( symptr ) );
1111 /* at end - no count, no / */
1112 symptr->howlen = e_no_len;
1116 symptr->code = code;
1117 symptr->patptr = patptr;
1121 symptr->patptr = patptr;
1126 There is no way to cleanly handle the case where we should process the
1127 string per byte in its upgraded form while it's really in downgraded form
1128 (e.g. estimates like strend-s as an upper bound for the number of
1129 characters left wouldn't work). So if we foresee the need of this
1130 (pattern starts with U or contains U0), we want to work on the encoded
1131 version of the string. Users are advised to upgrade their pack string
1132 themselves if they need to do a lot of unpacks like this on it
1135 need_utf8(const char *pat, const char *patend)
1139 PERL_ARGS_ASSERT_NEED_UTF8;
1141 while (pat < patend) {
1142 if (pat[0] == '#') {
1144 pat = (const char *) memchr(pat, '\n', patend-pat);
1145 if (!pat) return FALSE;
1146 } else if (pat[0] == 'U') {
1147 if (first || pat[1] == '0') return TRUE;
1148 } else first = FALSE;
1155 first_symbol(const char *pat, const char *patend) {
1156 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1158 while (pat < patend) {
1159 if (pat[0] != '#') return pat[0];
1161 pat = (const char *) memchr(pat, '\n', patend-pat);
1169 =for apidoc unpackstring
1171 The engine implementing unpack() Perl function. C<unpackstring> puts the
1172 extracted list items on the stack and returns the number of elements.
1173 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1178 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1182 PERL_ARGS_ASSERT_UNPACKSTRING;
1184 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1185 else if (need_utf8(pat, patend)) {
1186 /* We probably should try to avoid this in case a scalar context call
1187 wouldn't get to the "U0" */
1188 STRLEN len = strend - s;
1189 s = (char *) bytes_to_utf8((U8 *) s, &len);
1192 flags |= FLAG_DO_UTF8;
1195 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1196 flags |= FLAG_PARSE_UTF8;
1198 TEMPSYM_INIT(&sym, pat, patend, flags);
1200 return unpack_rec(&sym, s, s, strend, NULL );
1204 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1208 const I32 start_sp_offset = SP - PL_stack_base;
1213 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1214 bool beyond = FALSE;
1215 bool explicit_length;
1216 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1217 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1219 PERL_ARGS_ASSERT_UNPACK_REC;
1221 symptr->strbeg = s - strbeg;
1223 while (next_symbol(symptr)) {
1226 I32 datumtype = symptr->code;
1227 /* do first one only unless in list context
1228 / is implemented by unpacking the count, then popping it from the
1229 stack, so must check that we're not in the middle of a / */
1230 if ( unpack_only_one
1231 && (SP - PL_stack_base == start_sp_offset + 1)
1232 && (datumtype != '/') ) /* XXX can this be omitted */
1235 switch (howlen = symptr->howlen) {
1237 len = strend - strbeg; /* long enough */
1240 /* e_no_len and e_number */
1241 len = symptr->length;
1245 explicit_length = TRUE;
1247 beyond = s >= strend;
1249 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1251 /* props nonzero means we can process this letter. */
1252 const long size = props & PACK_SIZE_MASK;
1253 const long howmany = (strend - s) / size;
1257 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1258 if (len && unpack_only_one) len = 1;
1264 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1266 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1269 if (howlen == e_no_len)
1270 len = 16; /* len is not specified */
1278 tempsym_t savsym = *symptr;
1279 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1280 symptr->flags |= group_modifiers;
1281 symptr->patend = savsym.grpend;
1282 symptr->previous = &savsym;
1285 if (len && unpack_only_one) len = 1;
1287 symptr->patptr = savsym.grpbeg;
1288 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1289 else symptr->flags &= ~FLAG_PARSE_UTF8;
1290 unpack_rec(symptr, s, strbeg, strend, &s);
1291 if (s == strend && savsym.howlen == e_star)
1292 break; /* No way to continue */
1295 savsym.flags = symptr->flags & ~group_modifiers;
1299 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 case '.' | TYPE_IS_SHRIEKING:
1305 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1307 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1308 const bool u8 = utf8;
1310 if (howlen == e_star) from = strbeg;
1311 else if (len <= 0) from = s;
1313 tempsym_t *group = symptr;
1315 while (--len && group) group = group->previous;
1316 from = group ? strbeg + group->strbeg : strbeg;
1319 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1320 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1324 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1325 case '@' | TYPE_IS_SHRIEKING:
1328 s = strbeg + symptr->strbeg;
1329 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1330 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1331 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1337 Perl_croak(aTHX_ "'@' outside of string in unpack");
1342 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1345 Perl_croak(aTHX_ "'@' outside of string in unpack");
1349 case 'X' | TYPE_IS_SHRIEKING:
1350 if (!len) /* Avoid division by 0 */
1353 const char *hop, *last;
1355 hop = last = strbeg;
1357 hop += UTF8SKIP(hop);
1364 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1368 len = (s - strbeg) % len;
1374 Perl_croak(aTHX_ "'X' outside of string in unpack");
1375 while (--s, UTF8_IS_CONTINUATION(*s)) {
1377 Perl_croak(aTHX_ "'X' outside of string in unpack");
1382 if (len > s - strbeg)
1383 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1387 case 'x' | TYPE_IS_SHRIEKING: {
1389 if (!len) /* Avoid division by 0 */
1391 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1392 else ai32 = (s - strbeg) % len;
1393 if (ai32 == 0) break;
1401 Perl_croak(aTHX_ "'x' outside of string in unpack");
1406 if (len > strend - s)
1407 Perl_croak(aTHX_ "'x' outside of string in unpack");
1412 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1418 /* Preliminary length estimate is assumed done in 'W' */
1419 if (len > strend - s) len = strend - s;
1425 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1426 if (hop >= strend) {
1428 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1435 } else if (len > strend - s)
1438 if (datumtype == 'Z') {
1439 /* 'Z' strips stuff after first null */
1440 const char *ptr, *end;
1442 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1443 sv = newSVpvn(s, ptr-s);
1444 if (howlen == e_star) /* exact for 'Z*' */
1445 len = ptr-s + (ptr != strend ? 1 : 0);
1446 } else if (datumtype == 'A') {
1447 /* 'A' strips both nulls and spaces */
1449 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1450 for (ptr = s+len-1; ptr >= s; ptr--)
1451 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1452 !is_utf8_space((U8 *) ptr)) break;
1453 if (ptr >= s) ptr += UTF8SKIP(ptr);
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1458 for (ptr = s+len-1; ptr >= s; ptr--)
1459 if (*ptr != 0 && !isSPACE(*ptr)) break;
1462 sv = newSVpvn(s, ptr-s);
1463 } else sv = newSVpvn(s, len);
1467 /* Undo any upgrade done due to need_utf8() */
1468 if (!(symptr->flags & FLAG_WAS_UTF8))
1469 sv_utf8_downgrade(sv, 0);
1477 if (howlen == e_star || len > (strend - s) * 8)
1478 len = (strend - s) * 8;
1481 while (len >= 8 && s < strend) {
1482 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1487 cuv += PL_bitcount[*(U8 *)s++];
1490 if (len && s < strend) {
1492 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1493 if (datumtype == 'b')
1495 if (bits & 1) cuv++;
1500 if (bits & 0x80) cuv++;
1507 sv = sv_2mortal(newSV(len ? len : 1));
1510 if (datumtype == 'b') {
1512 const I32 ai32 = len;
1513 for (len = 0; len < ai32; len++) {
1514 if (len & 7) bits >>= 1;
1516 if (s >= strend) break;
1517 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1518 } else bits = *(U8 *) s++;
1519 *str++ = bits & 1 ? '1' : '0';
1523 const I32 ai32 = len;
1524 for (len = 0; len < ai32; len++) {
1525 if (len & 7) bits <<= 1;
1527 if (s >= strend) break;
1528 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1529 } else bits = *(U8 *) s++;
1530 *str++ = bits & 0x80 ? '1' : '0';
1534 SvCUR_set(sv, str - SvPVX_const(sv));
1541 /* Preliminary length estimate, acceptable for utf8 too */
1542 if (howlen == e_star || len > (strend - s) * 2)
1543 len = (strend - s) * 2;
1544 sv = sv_2mortal(newSV(len ? len : 1));
1547 if (datumtype == 'h') {
1550 for (len = 0; len < ai32; len++) {
1551 if (len & 1) bits >>= 4;
1553 if (s >= strend) break;
1554 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1555 } else bits = * (U8 *) s++;
1556 *str++ = PL_hexdigit[bits & 15];
1560 const I32 ai32 = len;
1561 for (len = 0; len < ai32; len++) {
1562 if (len & 1) bits <<= 4;
1564 if (s >= strend) break;
1565 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1566 } else bits = *(U8 *) s++;
1567 *str++ = PL_hexdigit[(bits >> 4) & 15];
1571 SvCUR_set(sv, str - SvPVX_const(sv));
1577 if (explicit_length)
1578 /* Switch to "character" mode */
1579 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1584 while (len-- > 0 && s < strend) {
1589 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1590 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1591 if (retlen == (STRLEN) -1 || retlen == 0)
1592 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1596 aint = *(U8 *)(s)++;
1597 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1601 else if (checksum > bits_in_uv)
1602 cdouble += (NV)aint;
1610 while (len-- > 0 && s < strend) {
1612 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1613 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1614 if (retlen == (STRLEN) -1 || retlen == 0)
1615 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1619 else if (checksum > bits_in_uv)
1620 cdouble += (NV) val;
1624 } else if (!checksum)
1626 const U8 ch = *(U8 *) s++;
1629 else if (checksum > bits_in_uv)
1630 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1632 while (len-- > 0) cuv += *(U8 *) s++;
1636 if (explicit_length) {
1637 /* Switch to "bytes in UTF-8" mode */
1638 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1640 /* Should be impossible due to the need_utf8() test */
1641 Perl_croak(aTHX_ "U0 mode on a byte string");
1645 if (len > strend - s) len = strend - s;
1647 if (len && unpack_only_one) len = 1;
1651 while (len-- > 0 && s < strend) {
1655 U8 result[UTF8_MAXLEN];
1656 const char *ptr = s;
1658 /* Bug: warns about bad utf8 even if we are short on bytes
1659 and will break out of the loop */
1660 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1663 len = UTF8SKIP(result);
1664 if (!uni_to_bytes(aTHX_ &ptr, strend,
1665 (char *) &result[1], len-1, 'U')) break;
1666 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1669 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1670 if (retlen == (STRLEN) -1 || retlen == 0)
1671 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1676 else if (checksum > bits_in_uv)
1677 cdouble += (NV) auv;
1682 case 's' | TYPE_IS_SHRIEKING:
1683 #if SHORTSIZE != SIZE16
1686 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1687 DO_BO_UNPACK(ashort, s);
1690 else if (checksum > bits_in_uv)
1691 cdouble += (NV)ashort;
1703 #if U16SIZE > SIZE16
1706 SHIFT16(utf8, s, strend, &ai16, datumtype);
1707 DO_BO_UNPACK(ai16, 16);
1708 #if U16SIZE > SIZE16
1714 else if (checksum > bits_in_uv)
1715 cdouble += (NV)ai16;
1720 case 'S' | TYPE_IS_SHRIEKING:
1721 #if SHORTSIZE != SIZE16
1723 unsigned short aushort;
1724 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1725 DO_BO_UNPACK(aushort, s);
1728 else if (checksum > bits_in_uv)
1729 cdouble += (NV)aushort;
1742 #if U16SIZE > SIZE16
1745 SHIFT16(utf8, s, strend, &au16, datumtype);
1746 DO_BO_UNPACK(au16, 16);
1748 if (datumtype == 'n')
1749 au16 = PerlSock_ntohs(au16);
1752 if (datumtype == 'v')
1757 else if (checksum > bits_in_uv)
1758 cdouble += (NV) au16;
1763 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1764 case 'v' | TYPE_IS_SHRIEKING:
1765 case 'n' | TYPE_IS_SHRIEKING:
1768 # if U16SIZE > SIZE16
1771 SHIFT16(utf8, s, strend, &ai16, datumtype);
1773 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1774 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1775 # endif /* HAS_NTOHS */
1777 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1778 ai16 = (I16) vtohs((U16) ai16);
1779 # endif /* HAS_VTOHS */
1782 else if (checksum > bits_in_uv)
1783 cdouble += (NV) ai16;
1788 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1790 case 'i' | TYPE_IS_SHRIEKING:
1793 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1794 DO_BO_UNPACK(aint, i);
1797 else if (checksum > bits_in_uv)
1798 cdouble += (NV)aint;
1804 case 'I' | TYPE_IS_SHRIEKING:
1807 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1808 DO_BO_UNPACK(auint, i);
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV)auint;
1820 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1821 #if IVSIZE == INTSIZE
1822 DO_BO_UNPACK(aiv, i);
1823 #elif IVSIZE == LONGSIZE
1824 DO_BO_UNPACK(aiv, l);
1825 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1826 DO_BO_UNPACK(aiv, 64);
1828 Perl_croak(aTHX_ "'j' not supported on this platform");
1832 else if (checksum > bits_in_uv)
1841 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1842 #if IVSIZE == INTSIZE
1843 DO_BO_UNPACK(auv, i);
1844 #elif IVSIZE == LONGSIZE
1845 DO_BO_UNPACK(auv, l);
1846 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1847 DO_BO_UNPACK(auv, 64);
1849 Perl_croak(aTHX_ "'J' not supported on this platform");
1853 else if (checksum > bits_in_uv)
1859 case 'l' | TYPE_IS_SHRIEKING:
1860 #if LONGSIZE != SIZE32
1863 SHIFT_VAR(utf8, s, strend, along, datumtype);
1864 DO_BO_UNPACK(along, l);
1867 else if (checksum > bits_in_uv)
1868 cdouble += (NV)along;
1879 #if U32SIZE > SIZE32
1882 SHIFT32(utf8, s, strend, &ai32, datumtype);
1883 DO_BO_UNPACK(ai32, 32);
1884 #if U32SIZE > SIZE32
1885 if (ai32 > 2147483647) ai32 -= 4294967296;
1889 else if (checksum > bits_in_uv)
1890 cdouble += (NV)ai32;
1895 case 'L' | TYPE_IS_SHRIEKING:
1896 #if LONGSIZE != SIZE32
1898 unsigned long aulong;
1899 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1900 DO_BO_UNPACK(aulong, l);
1903 else if (checksum > bits_in_uv)
1904 cdouble += (NV)aulong;
1917 #if U32SIZE > SIZE32
1920 SHIFT32(utf8, s, strend, &au32, datumtype);
1921 DO_BO_UNPACK(au32, 32);
1923 if (datumtype == 'N')
1924 au32 = PerlSock_ntohl(au32);
1927 if (datumtype == 'V')
1932 else if (checksum > bits_in_uv)
1933 cdouble += (NV)au32;
1938 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1939 case 'V' | TYPE_IS_SHRIEKING:
1940 case 'N' | TYPE_IS_SHRIEKING:
1943 # if U32SIZE > SIZE32
1946 SHIFT32(utf8, s, strend, &ai32, datumtype);
1948 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1949 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1952 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1953 ai32 = (I32)vtohl((U32)ai32);
1957 else if (checksum > bits_in_uv)
1958 cdouble += (NV)ai32;
1963 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1967 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1968 DO_BO_UNPACK_PC(aptr);
1969 /* newSVpv generates undef if aptr is NULL */
1970 mPUSHs(newSVpv(aptr, 0));
1978 while (len > 0 && s < strend) {
1980 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1981 auv = (auv << 7) | (ch & 0x7f);
1982 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1990 if (++bytes >= sizeof(UV)) { /* promote to string */
1993 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1994 while (s < strend) {
1995 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1996 sv = mul128(sv, (U8)(ch & 0x7f));
2002 t = SvPV_nolen_const(sv);
2011 if ((s >= strend) && bytes)
2012 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2016 if (symptr->howlen == e_star)
2017 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2019 if (s + sizeof(char*) <= strend) {
2021 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2022 DO_BO_UNPACK_PC(aptr);
2023 /* newSVpvn generates undef if aptr is NULL */
2024 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2031 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2032 DO_BO_UNPACK(aquad, 64);
2034 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2035 newSViv((IV)aquad) : newSVnv((NV)aquad));
2036 else if (checksum > bits_in_uv)
2037 cdouble += (NV)aquad;
2045 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2046 DO_BO_UNPACK(auquad, 64);
2048 mPUSHs(auquad <= UV_MAX ?
2049 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2050 else if (checksum > bits_in_uv)
2051 cdouble += (NV)auquad;
2056 #endif /* HAS_QUAD */
2057 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2061 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2062 DO_BO_UNPACK_N(afloat, float);
2072 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2073 DO_BO_UNPACK_N(adouble, double);
2083 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2084 DO_BO_UNPACK_N(anv, NV);
2091 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2094 long double aldouble;
2095 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2096 DO_BO_UNPACK_N(aldouble, long double);
2100 cdouble += aldouble;
2106 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2107 sv = sv_2mortal(newSV(l));
2108 if (l) SvPOK_on(sv);
2111 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2116 next_uni_uu(aTHX_ &s, strend, &a);
2117 next_uni_uu(aTHX_ &s, strend, &b);
2118 next_uni_uu(aTHX_ &s, strend, &c);
2119 next_uni_uu(aTHX_ &s, strend, &d);
2120 hunk[0] = (char)((a << 2) | (b >> 4));
2121 hunk[1] = (char)((b << 4) | (c >> 2));
2122 hunk[2] = (char)((c << 6) | d);
2123 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2131 /* possible checksum byte */
2132 const char *skip = s+UTF8SKIP(s);
2133 if (skip < strend && *skip == '\n')
2139 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2143 len = PL_uudmap[*(U8*)s++] & 077;
2145 if (s < strend && ISUUCHAR(*s))
2146 a = PL_uudmap[*(U8*)s++] & 077;
2149 if (s < strend && ISUUCHAR(*s))
2150 b = PL_uudmap[*(U8*)s++] & 077;
2153 if (s < strend && ISUUCHAR(*s))
2154 c = PL_uudmap[*(U8*)s++] & 077;
2157 if (s < strend && ISUUCHAR(*s))
2158 d = PL_uudmap[*(U8*)s++] & 077;
2161 hunk[0] = (char)((a << 2) | (b >> 4));
2162 hunk[1] = (char)((b << 4) | (c >> 2));
2163 hunk[2] = (char)((c << 6) | d);
2164 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2169 else /* possible checksum byte */
2170 if (s + 1 < strend && s[1] == '\n')
2179 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2180 (checksum > bits_in_uv &&
2181 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2184 anv = (NV) (1 << (checksum & 15));
2185 while (checksum >= 16) {
2189 while (cdouble < 0.0)
2191 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2192 sv = newSVnv(cdouble);
2195 if (checksum < bits_in_uv) {
2196 UV mask = ((UV)1 << checksum) - 1;
2205 if (symptr->flags & FLAG_SLASH){
2206 if (SP - PL_stack_base - start_sp_offset <= 0)
2207 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2208 if( next_symbol(symptr) ){
2209 if( symptr->howlen == e_number )
2210 Perl_croak(aTHX_ "Count after length/code in unpack" );
2212 /* ...end of char buffer then no decent length available */
2213 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2215 /* take top of stack (hope it's numeric) */
2218 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2221 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2223 datumtype = symptr->code;
2224 explicit_length = FALSE;
2232 return SP - PL_stack_base - start_sp_offset;
2240 I32 gimme = GIMME_V;
2243 const char *pat = SvPV_const(left, llen);
2244 const char *s = SvPV_const(right, rlen);
2245 const char *strend = s + rlen;
2246 const char *patend = pat + llen;
2250 cnt = unpackstring(pat, patend, s, strend,
2251 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2252 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2255 if ( !cnt && gimme == G_SCALAR )
2256 PUSHs(&PL_sv_undef);
2261 doencodes(U8 *h, const char *s, I32 len)
2263 *h++ = PL_uuemap[len];
2265 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2266 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2267 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2268 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2273 const char r = (len > 1 ? s[1] : '\0');
2274 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2275 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2276 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2277 *h++ = PL_uuemap[0];
2284 S_is_an_int(pTHX_ const char *s, STRLEN l)
2286 SV *result = newSVpvn(s, l);
2287 char *const result_c = SvPV_nolen(result); /* convenience */
2288 char *out = result_c;
2292 PERL_ARGS_ASSERT_IS_AN_INT;
2300 SvREFCNT_dec(result);
2323 SvREFCNT_dec(result);
2329 SvCUR_set(result, out - result_c);
2333 /* pnum must be '\0' terminated */
2335 S_div128(pTHX_ SV *pnum, bool *done)
2338 char * const s = SvPV(pnum, len);
2342 PERL_ARGS_ASSERT_DIV128;
2346 const int i = m * 10 + (*t - '0');
2347 const int r = (i >> 7); /* r < 10 */
2355 SvCUR_set(pnum, (STRLEN) (t - s));
2360 =for apidoc packlist
2362 The engine implementing pack() Perl function.
2368 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2373 PERL_ARGS_ASSERT_PACKLIST;
2375 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2377 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2378 Also make sure any UTF8 flag is loaded */
2379 SvPV_force_nolen(cat);
2381 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2383 (void)pack_rec( cat, &sym, beglist, endlist );
2386 /* like sv_utf8_upgrade, but also repoint the group start markers */
2388 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2391 const char *from_ptr, *from_start, *from_end, **marks, **m;
2392 char *to_start, *to_ptr;
2394 if (SvUTF8(sv)) return;
2396 from_start = SvPVX_const(sv);
2397 from_end = from_start + SvCUR(sv);
2398 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2399 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2400 if (from_ptr == from_end) {
2401 /* Simple case: no character needs to be changed */
2406 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2407 Newx(to_start, len, char);
2408 Copy(from_start, to_start, from_ptr-from_start, char);
2409 to_ptr = to_start + (from_ptr-from_start);
2411 Newx(marks, sym_ptr->level+2, const char *);
2412 for (group=sym_ptr; group; group = group->previous)
2413 marks[group->level] = from_start + group->strbeg;
2414 marks[sym_ptr->level+1] = from_end+1;
2415 for (m = marks; *m < from_ptr; m++)
2416 *m = to_start + (*m-from_start);
2418 for (;from_ptr < from_end; from_ptr++) {
2419 while (*m == from_ptr) *m++ = to_ptr;
2420 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2424 while (*m == from_ptr) *m++ = to_ptr;
2425 if (m != marks + sym_ptr->level+1) {
2428 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2430 for (group=sym_ptr; group; group = group->previous)
2431 group->strbeg = marks[group->level] - to_start;
2436 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2437 from_start -= SvIVX(sv);
2440 SvFLAGS(sv) &= ~SVf_OOK;
2443 Safefree(from_start);
2444 SvPV_set(sv, to_start);
2445 SvCUR_set(sv, to_ptr - to_start);
2450 /* Exponential string grower. Makes string extension effectively O(n)
2451 needed says how many extra bytes we need (not counting the final '\0')
2452 Only grows the string if there is an actual lack of space
2455 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2456 const STRLEN cur = SvCUR(sv);
2457 const STRLEN len = SvLEN(sv);
2460 PERL_ARGS_ASSERT_SV_EXP_GROW;
2462 if (len - cur > needed) return SvPVX(sv);
2463 extend = needed > len ? needed : len;
2464 return SvGROW(sv, len+extend+1);
2469 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2472 tempsym_t lookahead;
2473 I32 items = endlist - beglist;
2474 bool found = next_symbol(symptr);
2475 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2476 bool warn_utf8 = ckWARN(WARN_UTF8);
2478 PERL_ARGS_ASSERT_PACK_REC;
2480 if (symptr->level == 0 && found && symptr->code == 'U') {
2481 marked_upgrade(aTHX_ cat, symptr);
2482 symptr->flags |= FLAG_DO_UTF8;
2485 symptr->strbeg = SvCUR(cat);
2491 SV *lengthcode = NULL;
2492 I32 datumtype = symptr->code;
2493 howlen_t howlen = symptr->howlen;
2494 char *start = SvPVX(cat);
2495 char *cur = start + SvCUR(cat);
2497 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2501 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2505 /* e_no_len and e_number */
2506 len = symptr->length;
2511 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2513 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2514 /* We can process this letter. */
2515 STRLEN size = props & PACK_SIZE_MASK;
2516 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2520 /* Look ahead for next symbol. Do we have code/code? */
2521 lookahead = *symptr;
2522 found = next_symbol(&lookahead);
2523 if (symptr->flags & FLAG_SLASH) {
2525 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2526 if (strchr("aAZ", lookahead.code)) {
2527 if (lookahead.howlen == e_number) count = lookahead.length;
2530 if (SvGAMAGIC(*beglist)) {
2531 /* Avoid reading the active data more than once
2532 by copying it to a temporary. */
2534 const char *const pv = SvPV_const(*beglist, len);
2536 = newSVpvn_flags(pv, len,
2537 SVs_TEMP | SvUTF8(*beglist));
2540 count = DO_UTF8(*beglist) ?
2541 sv_len_utf8(*beglist) : sv_len(*beglist);
2544 if (lookahead.code == 'Z') count++;
2547 if (lookahead.howlen == e_number && lookahead.length < items)
2548 count = lookahead.length;
2551 lookahead.howlen = e_number;
2552 lookahead.length = count;
2553 lengthcode = sv_2mortal(newSViv(count));
2556 /* Code inside the switch must take care to properly update
2557 cat (CUR length and '\0' termination) if it updated *cur and
2558 doesn't simply leave using break */
2559 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2561 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2562 (int) TYPE_NO_MODIFIERS(datumtype));
2564 Perl_croak(aTHX_ "'%%' may not be used in pack");
2567 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2568 case '.' | TYPE_IS_SHRIEKING:
2571 if (howlen == e_star) from = start;
2572 else if (len == 0) from = cur;
2574 tempsym_t *group = symptr;
2576 while (--len && group) group = group->previous;
2577 from = group ? start + group->strbeg : start;
2580 len = SvIV(fromstr);
2582 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2583 case '@' | TYPE_IS_SHRIEKING:
2586 from = start + symptr->strbeg;
2588 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2589 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2590 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2594 while (len && from < cur) {
2595 from += UTF8SKIP(from);
2599 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2601 /* Here we know from == cur */
2603 GROWING(0, cat, start, cur, len);
2604 Zero(cur, len, char);
2606 } else if (from < cur) {
2609 } else goto no_change;
2617 if (len > 0) goto grow;
2618 if (len == 0) goto no_change;
2625 tempsym_t savsym = *symptr;
2626 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2627 symptr->flags |= group_modifiers;
2628 symptr->patend = savsym.grpend;
2630 symptr->previous = &lookahead;
2633 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2634 else symptr->flags &= ~FLAG_PARSE_UTF8;
2635 was_utf8 = SvUTF8(cat);
2636 symptr->patptr = savsym.grpbeg;
2637 beglist = pack_rec(cat, symptr, beglist, endlist);
2638 if (SvUTF8(cat) != was_utf8)
2639 /* This had better be an upgrade while in utf8==0 mode */
2642 if (savsym.howlen == e_star && beglist == endlist)
2643 break; /* No way to continue */
2645 items = endlist - beglist;
2646 lookahead.flags = symptr->flags & ~group_modifiers;
2649 case 'X' | TYPE_IS_SHRIEKING:
2650 if (!len) /* Avoid division by 0 */
2657 hop += UTF8SKIP(hop);
2664 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2668 len = (cur-start) % len;
2672 if (len < 1) goto no_change;
2676 Perl_croak(aTHX_ "'%c' outside of string in pack",
2677 (int) TYPE_NO_MODIFIERS(datumtype));
2678 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2680 Perl_croak(aTHX_ "'%c' outside of string in pack",
2681 (int) TYPE_NO_MODIFIERS(datumtype));
2687 if (cur - start < len)
2688 Perl_croak(aTHX_ "'%c' outside of string in pack",
2689 (int) TYPE_NO_MODIFIERS(datumtype));
2692 if (cur < start+symptr->strbeg) {
2693 /* Make sure group starts don't point into the void */
2695 const STRLEN length = cur-start;
2696 for (group = symptr;
2697 group && length < group->strbeg;
2698 group = group->previous) group->strbeg = length;
2699 lookahead.strbeg = length;
2702 case 'x' | TYPE_IS_SHRIEKING: {
2704 if (!len) /* Avoid division by 0 */
2706 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2707 else ai32 = (cur - start) % len;
2708 if (ai32 == 0) goto no_change;
2720 aptr = SvPV_const(fromstr, fromlen);
2721 if (DO_UTF8(fromstr)) {
2722 const char *end, *s;
2724 if (!utf8 && !SvUTF8(cat)) {
2725 marked_upgrade(aTHX_ cat, symptr);
2726 lookahead.flags |= FLAG_DO_UTF8;
2727 lookahead.strbeg = symptr->strbeg;
2730 cur = start + SvCUR(cat);
2732 if (howlen == e_star) {
2733 if (utf8) goto string_copy;
2737 end = aptr + fromlen;
2738 fromlen = datumtype == 'Z' ? len-1 : len;
2739 while ((I32) fromlen > 0 && s < end) {
2744 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2747 if (datumtype == 'Z') len++;
2753 fromlen = len - fromlen;
2754 if (datumtype == 'Z') fromlen--;
2755 if (howlen == e_star) {
2757 if (datumtype == 'Z') len++;
2759 GROWING(0, cat, start, cur, len);
2760 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2761 datumtype | TYPE_IS_PACK))
2762 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2766 if (howlen == e_star) {
2768 if (datumtype == 'Z') len++;
2770 if (len <= (I32) fromlen) {
2772 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2774 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2776 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2777 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2779 while (fromlen > 0) {
2780 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2786 if (howlen == e_star) {
2788 if (datumtype == 'Z') len++;
2790 if (len <= (I32) fromlen) {
2792 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2794 GROWING(0, cat, start, cur, len);
2795 Copy(aptr, cur, fromlen, char);
2799 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2805 const char *str, *end;
2812 str = SvPV_const(fromstr, fromlen);
2813 end = str + fromlen;
2814 if (DO_UTF8(fromstr)) {
2816 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2818 utf8_source = FALSE;
2819 utf8_flags = 0; /* Unused, but keep compilers happy */
2821 if (howlen == e_star) len = fromlen;
2822 field_len = (len+7)/8;
2823 GROWING(utf8, cat, start, cur, field_len);
2824 if (len > (I32)fromlen) len = fromlen;
2827 if (datumtype == 'B')
2831 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2833 } else bits |= *str++ & 1;
2834 if (l & 7) bits <<= 1;
2836 PUSH_BYTE(utf8, cur, bits);
2841 /* datumtype == 'b' */
2845 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2846 if (val & 1) bits |= 0x80;
2847 } else if (*str++ & 1)
2849 if (l & 7) bits >>= 1;
2851 PUSH_BYTE(utf8, cur, bits);
2857 if (datumtype == 'B')
2858 bits <<= 7 - (l & 7);
2860 bits >>= 7 - (l & 7);
2861 PUSH_BYTE(utf8, cur, bits);
2864 /* Determine how many chars are left in the requested field */
2866 if (howlen == e_star) field_len = 0;
2867 else field_len -= l;
2868 Zero(cur, field_len, char);
2874 const char *str, *end;
2881 str = SvPV_const(fromstr, fromlen);
2882 end = str + fromlen;
2883 if (DO_UTF8(fromstr)) {
2885 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2887 utf8_source = FALSE;
2888 utf8_flags = 0; /* Unused, but keep compilers happy */
2890 if (howlen == e_star) len = fromlen;
2891 field_len = (len+1)/2;
2892 GROWING(utf8, cat, start, cur, field_len);
2893 if (!utf8 && len > (I32)fromlen) len = fromlen;
2896 if (datumtype == 'H')
2900 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2901 if (val < 256 && isALPHA(val))
2902 bits |= (val + 9) & 0xf;
2905 } else if (isALPHA(*str))
2906 bits |= (*str++ + 9) & 0xf;
2908 bits |= *str++ & 0xf;
2909 if (l & 1) bits <<= 4;
2911 PUSH_BYTE(utf8, cur, bits);
2919 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2920 if (val < 256 && isALPHA(val))
2921 bits |= ((val + 9) & 0xf) << 4;
2923 bits |= (val & 0xf) << 4;
2924 } else if (isALPHA(*str))
2925 bits |= ((*str++ + 9) & 0xf) << 4;
2927 bits |= (*str++ & 0xf) << 4;
2928 if (l & 1) bits >>= 4;
2930 PUSH_BYTE(utf8, cur, bits);
2936 PUSH_BYTE(utf8, cur, bits);
2939 /* Determine how many chars are left in the requested field */
2941 if (howlen == e_star) field_len = 0;
2942 else field_len -= l;
2943 Zero(cur, field_len, char);
2951 aiv = SvIV(fromstr);
2952 if ((-128 > aiv || aiv > 127) &&
2954 Perl_warner(aTHX_ packWARN(WARN_PACK),
2955 "Character in 'c' format wrapped in pack");
2956 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2961 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2967 aiv = SvIV(fromstr);
2968 if ((0 > aiv || aiv > 0xff) &&
2970 Perl_warner(aTHX_ packWARN(WARN_PACK),
2971 "Character in 'C' format wrapped in pack");
2972 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2977 U8 in_bytes = (U8)IN_BYTES;
2979 end = start+SvLEN(cat)-1;
2980 if (utf8) end -= UTF8_MAXLEN-1;
2984 auv = SvUV(fromstr);
2985 if (in_bytes) auv = auv % 0x100;
2990 SvCUR_set(cat, cur - start);
2992 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2993 end = start+SvLEN(cat)-UTF8_MAXLEN;
2995 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2998 0 : UNICODE_ALLOW_ANY);
3003 SvCUR_set(cat, cur - start);
3004 marked_upgrade(aTHX_ cat, symptr);
3005 lookahead.flags |= FLAG_DO_UTF8;
3006 lookahead.strbeg = symptr->strbeg;
3009 cur = start + SvCUR(cat);
3010 end = start+SvLEN(cat)-UTF8_MAXLEN;
3013 if (ckWARN(WARN_PACK))
3014 Perl_warner(aTHX_ packWARN(WARN_PACK),
3015 "Character in 'W' format wrapped in pack");
3020 SvCUR_set(cat, cur - start);
3021 GROWING(0, cat, start, cur, len+1);
3022 end = start+SvLEN(cat)-1;
3024 *(U8 *) cur++ = (U8)auv;
3033 if (!(symptr->flags & FLAG_DO_UTF8)) {
3034 marked_upgrade(aTHX_ cat, symptr);
3035 lookahead.flags |= FLAG_DO_UTF8;
3036 lookahead.strbeg = symptr->strbeg;
3042 end = start+SvLEN(cat);
3043 if (!utf8) end -= UTF8_MAXLEN;
3047 auv = SvUV(fromstr);
3049 U8 buffer[UTF8_MAXLEN], *endb;
3050 endb = uvuni_to_utf8_flags(buffer, auv,
3052 0 : UNICODE_ALLOW_ANY);
3053 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3055 SvCUR_set(cat, cur - start);
3056 GROWING(0, cat, start, cur,
3057 len+(endb-buffer)*UTF8_EXPAND);
3058 end = start+SvLEN(cat);
3060 cur = bytes_to_uni(buffer, endb-buffer, cur);
3064 SvCUR_set(cat, cur - start);
3065 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3066 end = start+SvLEN(cat)-UTF8_MAXLEN;
3068 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3070 0 : UNICODE_ALLOW_ANY);
3075 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3081 anv = SvNV(fromstr);
3083 /* VOS does not automatically map a floating-point overflow
3084 during conversion from double to float into infinity, so we
3085 do it by hand. This code should either be generalized for
3086 any OS that needs it, or removed if and when VOS implements
3087 posix-976 (suggestion to support mapping to infinity).
3088 Paul.Green@stratus.com 02-04-02. */
3090 extern const float _float_constants[];
3092 afloat = _float_constants[0]; /* single prec. inf. */
3093 else if (anv < -FLT_MAX)
3094 afloat = _float_constants[0]; /* single prec. inf. */
3095 else afloat = (float) anv;
3098 # if defined(VMS) && !defined(__IEEE_FP)
3099 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3100 * on Alpha; fake it if we don't have them.
3104 else if (anv < -FLT_MAX)
3106 else afloat = (float)anv;
3108 afloat = (float)anv;
3110 #endif /* __VOS__ */
3111 DO_BO_PACK_N(afloat, float);
3112 PUSH_VAR(utf8, cur, afloat);
3120 anv = SvNV(fromstr);
3122 /* VOS does not automatically map a floating-point overflow
3123 during conversion from long double to double into infinity,
3124 so we do it by hand. This code should either be generalized
3125 for any OS that needs it, or removed if and when VOS
3126 implements posix-976 (suggestion to support mapping to
3127 infinity). Paul.Green@stratus.com 02-04-02. */
3129 extern const double _double_constants[];
3131 adouble = _double_constants[0]; /* double prec. inf. */
3132 else if (anv < -DBL_MAX)
3133 adouble = _double_constants[0]; /* double prec. inf. */
3134 else adouble = (double) anv;
3137 # if defined(VMS) && !defined(__IEEE_FP)
3138 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3139 * on Alpha; fake it if we don't have them.
3143 else if (anv < -DBL_MAX)
3145 else adouble = (double)anv;
3147 adouble = (double)anv;
3149 #endif /* __VOS__ */
3150 DO_BO_PACK_N(adouble, double);
3151 PUSH_VAR(utf8, cur, adouble);
3156 Zero(&anv, 1, NV); /* can be long double with unused bits */
3159 anv = SvNV(fromstr);
3160 DO_BO_PACK_N(anv, NV);
3161 PUSH_VAR(utf8, cur, anv);
3165 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3167 long double aldouble;
3168 /* long doubles can have unused bits, which may be nonzero */
3169 Zero(&aldouble, 1, long double);
3172 aldouble = (long double)SvNV(fromstr);
3173 DO_BO_PACK_N(aldouble, long double);
3174 PUSH_VAR(utf8, cur, aldouble);
3179 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3180 case 'n' | TYPE_IS_SHRIEKING:
3186 ai16 = (I16)SvIV(fromstr);
3188 ai16 = PerlSock_htons(ai16);
3190 PUSH16(utf8, cur, &ai16);
3193 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3194 case 'v' | TYPE_IS_SHRIEKING:
3200 ai16 = (I16)SvIV(fromstr);
3204 PUSH16(utf8, cur, &ai16);
3207 case 'S' | TYPE_IS_SHRIEKING:
3208 #if SHORTSIZE != SIZE16
3210 unsigned short aushort;
3212 aushort = SvUV(fromstr);
3213 DO_BO_PACK(aushort, s);
3214 PUSH_VAR(utf8, cur, aushort);
3224 au16 = (U16)SvUV(fromstr);
3225 DO_BO_PACK(au16, 16);
3226 PUSH16(utf8, cur, &au16);
3229 case 's' | TYPE_IS_SHRIEKING:
3230 #if SHORTSIZE != SIZE16
3234 ashort = SvIV(fromstr);
3235 DO_BO_PACK(ashort, s);
3236 PUSH_VAR(utf8, cur, ashort);
3246 ai16 = (I16)SvIV(fromstr);
3247 DO_BO_PACK(ai16, 16);
3248 PUSH16(utf8, cur, &ai16);
3252 case 'I' | TYPE_IS_SHRIEKING:
3256 auint = SvUV(fromstr);
3257 DO_BO_PACK(auint, i);
3258 PUSH_VAR(utf8, cur, auint);
3265 aiv = SvIV(fromstr);
3266 #if IVSIZE == INTSIZE
3268 #elif IVSIZE == LONGSIZE
3270 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3271 DO_BO_PACK(aiv, 64);
3273 Perl_croak(aTHX_ "'j' not supported on this platform");
3275 PUSH_VAR(utf8, cur, aiv);
3282 auv = SvUV(fromstr);
3283 #if UVSIZE == INTSIZE
3285 #elif UVSIZE == LONGSIZE
3287 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3288 DO_BO_PACK(auv, 64);
3290 Perl_croak(aTHX_ "'J' not supported on this platform");
3292 PUSH_VAR(utf8, cur, auv);
3299 anv = SvNV(fromstr);
3303 SvCUR_set(cat, cur - start);
3304 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3307 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3308 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3309 any negative IVs will have already been got by the croak()
3310 above. IOK is untrue for fractions, so we test them
3311 against UV_MAX_P1. */
3312 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3313 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3314 char *in = buf + sizeof(buf);
3315 UV auv = SvUV(fromstr);
3318 *--in = (char)((auv & 0x7f) | 0x80);
3321 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3322 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3323 in, (buf + sizeof(buf)) - in);
3324 } else if (SvPOKp(fromstr))
3326 else if (SvNOKp(fromstr)) {
3327 /* 10**NV_MAX_10_EXP is the largest power of 10
3328 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3329 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3330 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3331 And with that many bytes only Inf can overflow.
3332 Some C compilers are strict about integral constant
3333 expressions so we conservatively divide by a slightly
3334 smaller integer instead of multiplying by the exact
3335 floating-point value.
3337 #ifdef NV_MAX_10_EXP
3338 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3339 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3341 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3342 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3344 char *in = buf + sizeof(buf);
3346 anv = Perl_floor(anv);
3348 const NV next = Perl_floor(anv / 128);
3349 if (in <= buf) /* this cannot happen ;-) */
3350 Perl_croak(aTHX_ "Cannot compress integer in pack");
3351 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3354 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3355 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3356 in, (buf + sizeof(buf)) - in);
3365 /* Copy string and check for compliance */
3366 from = SvPV_const(fromstr, len);
3367 if ((norm = is_an_int(from, len)) == NULL)
3368 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3370 Newx(result, len, char);
3373 while (!done) *--in = div128(norm, &done) | 0x80;
3374 result[len - 1] &= 0x7F; /* clear continue bit */
3375 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3376 in, (result + len) - in);
3378 SvREFCNT_dec(norm); /* free norm */
3383 case 'i' | TYPE_IS_SHRIEKING:
3387 aint = SvIV(fromstr);
3388 DO_BO_PACK(aint, i);
3389 PUSH_VAR(utf8, cur, aint);
3392 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3393 case 'N' | TYPE_IS_SHRIEKING:
3399 au32 = SvUV(fromstr);
3401 au32 = PerlSock_htonl(au32);
3403 PUSH32(utf8, cur, &au32);
3406 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3407 case 'V' | TYPE_IS_SHRIEKING:
3413 au32 = SvUV(fromstr);
3417 PUSH32(utf8, cur, &au32);
3420 case 'L' | TYPE_IS_SHRIEKING:
3421 #if LONGSIZE != SIZE32
3423 unsigned long aulong;
3425 aulong = SvUV(fromstr);
3426 DO_BO_PACK(aulong, l);
3427 PUSH_VAR(utf8, cur, aulong);
3437 au32 = SvUV(fromstr);
3438 DO_BO_PACK(au32, 32);
3439 PUSH32(utf8, cur, &au32);
3442 case 'l' | TYPE_IS_SHRIEKING:
3443 #if LONGSIZE != SIZE32
3447 along = SvIV(fromstr);
3448 DO_BO_PACK(along, l);
3449 PUSH_VAR(utf8, cur, along);
3459 ai32 = SvIV(fromstr);
3460 DO_BO_PACK(ai32, 32);
3461 PUSH32(utf8, cur, &ai32);
3469 auquad = (Uquad_t) SvUV(fromstr);
3470 DO_BO_PACK(auquad, 64);
3471 PUSH_VAR(utf8, cur, auquad);
3478 aquad = (Quad_t)SvIV(fromstr);
3479 DO_BO_PACK(aquad, 64);
3480 PUSH_VAR(utf8, cur, aquad);
3483 #endif /* HAS_QUAD */
3485 len = 1; /* assume SV is correct length */
3486 GROWING(utf8, cat, start, cur, sizeof(char *));
3493 SvGETMAGIC(fromstr);
3494 if (!SvOK(fromstr)) aptr = NULL;
3496 /* XXX better yet, could spirit away the string to
3497 * a safe spot and hang on to it until the result
3498 * of pack() (and all copies of the result) are
3501 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3502 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3503 Perl_warner(aTHX_ packWARN(WARN_PACK),
3504 "Attempt to pack pointer to temporary value");
3506 if (SvPOK(fromstr) || SvNIOK(fromstr))
3507 aptr = SvPV_nomg_const_nolen(fromstr);
3509 aptr = SvPV_force_flags_nolen(fromstr, 0);
3511 DO_BO_PACK_PC(aptr);
3512 PUSH_VAR(utf8, cur, aptr);
3516 const char *aptr, *aend;
3520 if (len <= 2) len = 45;
3521 else len = len / 3 * 3;
3523 if (ckWARN(WARN_PACK))
3524 Perl_warner(aTHX_ packWARN(WARN_PACK),
3525 "Field too wide in 'u' format in pack");
3528 aptr = SvPV_const(fromstr, fromlen);
3529 from_utf8 = DO_UTF8(fromstr);
3531 aend = aptr + fromlen;
3532 fromlen = sv_len_utf8(fromstr);
3533 } else aend = NULL; /* Unused, but keep compilers happy */
3534 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3535 while (fromlen > 0) {
3538 U8 hunk[1+63/3*4+1];
3540 if ((I32)fromlen > len)
3546 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3547 'u' | TYPE_IS_PACK)) {
3549 SvCUR_set(cat, cur - start);
3550 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3552 end = doencodes(hunk, buffer, todo);
3554 end = doencodes(hunk, aptr, todo);
3557 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3564 SvCUR_set(cat, cur - start);
3566 *symptr = lookahead;
3575 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3576 register SV *cat = TARG;
3578 SV *pat_sv = *++MARK;
3579 register const char *pat = SvPV_const(pat_sv, fromlen);
3580 register const char *patend = pat + fromlen;
3586 packlist(cat, pat, patend, MARK, SP + 1);
3596 * c-indentation-style: bsd
3598 * indent-tabs-mode: t
3601 * ex: set ts=8 sts=4 sw=4 noet: