3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
74 # define PERL_PACK_CAN_BYTEORDER
75 # define PERL_PACK_CAN_SHRIEKSIGN
81 /* Maximum number of bytes to which a byte can grow due to upgrade */
85 * Offset for integer pack/unpack.
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
105 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
108 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
109 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
110 # define OFF16(p) ((char*)(p))
111 # define OFF32(p) ((char*)(p))
113 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
114 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
117 ++++ bad cray byte order
121 # define OFF16(p) ((char *) (p))
122 # define OFF32(p) ((char *) (p))
125 /* Only to be used inside a loop (see the break) */
126 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
130 Copy(s, OFF16(p), SIZE16, char); \
135 /* Only to be used inside a loop (see the break) */
136 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
140 Copy(s, OFF32(p), SIZE32, char); \
145 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
148 /* Only to be used inside a loop (see the break) */
149 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
155 Copy(s, (char *) &var, sizeof(var), char); \
160 #define PUSH_VAR(utf8, aptr, var) \
161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
163 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164 #define MAX_SUB_TEMPLATE_LEVEL 100
166 /* flags (note that type modifiers can also be used as flags!) */
167 #define FLAG_WAS_UTF8 0x40
168 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
169 #define FLAG_UNPACK_ONLY_ONE 0x10
170 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
171 #define FLAG_SLASH 0x04
172 #define FLAG_COMMA 0x02
173 #define FLAG_PACK 0x01
176 S_mul128(pTHX_ SV *sv, U8 m)
179 char *s = SvPV(sv, len);
182 PERL_ARGS_ASSERT_MUL128;
184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
185 SV * const tmpNew = newSVpvs("0000000000");
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
193 while (!*t) /* trailing '\0'? */
196 const U32 i = ((*t - '0') << 7) + m;
197 *(t--) = '0' + (char)(i % 10);
203 /* Explosives and implosives. */
205 #if 'I' == 73 && 'J' == 74
206 /* On an ASCII/ISO kind of system */
207 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
210 Some other sort of character set - use memchr() so we don't match
213 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
217 #define TYPE_IS_SHRIEKING 0x100
218 #define TYPE_IS_BIG_ENDIAN 0x200
219 #define TYPE_IS_LITTLE_ENDIAN 0x400
220 #define TYPE_IS_PACK 0x800
221 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
222 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
223 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
225 #ifdef PERL_PACK_CAN_SHRIEKSIGN
226 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
228 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
231 #ifndef PERL_PACK_CAN_BYTEORDER
232 /* Put "can't" first because it is shorter */
233 # define TYPE_ENDIANNESS(t) 0
234 # define TYPE_NO_ENDIANNESS(t) (t)
236 # define ENDIANNESS_ALLOWED_TYPES ""
238 # define DO_BO_UNPACK(var, type)
239 # define DO_BO_PACK(var, type)
240 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
242 # define DO_BO_UNPACK_N(var, type)
243 # define DO_BO_PACK_N(var, type)
244 # define DO_BO_UNPACK_P(var)
245 # define DO_BO_PACK_P(var)
246 # define DO_BO_UNPACK_PC(var)
247 # define DO_BO_PACK_PC(var)
249 #else /* PERL_PACK_CAN_BYTEORDER */
251 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
254 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
256 # define DO_BO_UNPACK(var, type) \
258 switch (TYPE_ENDIANNESS(datumtype)) { \
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
265 # define DO_BO_PACK(var, type) \
267 switch (TYPE_ENDIANNESS(datumtype)) { \
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
274 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
276 switch (TYPE_ENDIANNESS(datumtype)) { \
277 case TYPE_IS_BIG_ENDIAN: \
278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
280 case TYPE_IS_LITTLE_ENDIAN: \
281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
288 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
290 switch (TYPE_ENDIANNESS(datumtype)) { \
291 case TYPE_IS_BIG_ENDIAN: \
292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
294 case TYPE_IS_LITTLE_ENDIAN: \
295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
302 # define BO_CANT_DOIT(action, type) \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
318 # if PTRSIZE == INTSIZE
319 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
323 # elif PTRSIZE == LONGSIZE
324 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
325 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
326 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
327 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
328 # elif PTRSIZE == IVSIZE
329 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
330 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
331 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
332 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
334 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
335 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
336 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
337 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
340 # if defined(my_htolen) && defined(my_letohn) && \
341 defined(my_htoben) && defined(my_betohn)
342 # define DO_BO_UNPACK_N(var, type) \
344 switch (TYPE_ENDIANNESS(datumtype)) { \
345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
351 # define DO_BO_PACK_N(var, type) \
353 switch (TYPE_ENDIANNESS(datumtype)) { \
354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
360 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
361 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
364 #endif /* PERL_PACK_CAN_BYTEORDER */
366 #define PACK_SIZE_CANNOT_CSUM 0x80
367 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
368 #define PACK_SIZE_MASK 0x3F
370 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371 in). You're unlikely ever to need to regenerate them. */
373 #if TYPE_IS_SHRIEKING != 0x100
374 ++++shriek offset should be 256
377 typedef U8 packprops_t;
380 STATIC const packprops_t packprops[512] = {
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 /* C */ sizeof(unsigned char),
388 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
389 /* D */ LONG_DOUBLESIZE,
396 /* I */ sizeof(unsigned int),
403 #if defined(HAS_QUAD)
404 /* Q */ sizeof(Uquad_t),
411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* c */ sizeof(char),
416 /* d */ sizeof(double),
418 /* f */ sizeof(float),
427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
428 #if defined(HAS_QUAD)
429 /* q */ sizeof(Quad_t),
437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* I */ sizeof(unsigned int),
455 /* L */ sizeof(unsigned long),
457 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
463 /* S */ sizeof(unsigned short),
465 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 /* l */ sizeof(long),
476 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
482 /* s */ sizeof(short),
484 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0
500 /* EBCDIC (or bust) */
501 STATIC const packprops_t packprops[512] = {
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 /* c */ sizeof(char),
513 /* d */ sizeof(double),
515 /* f */ sizeof(float),
525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
526 #if defined(HAS_QUAD)
527 /* q */ sizeof(Quad_t),
531 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 /* C */ sizeof(unsigned char),
539 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
540 /* D */ LONG_DOUBLESIZE,
547 /* I */ sizeof(unsigned int),
555 #if defined(HAS_QUAD)
556 /* Q */ sizeof(Uquad_t),
560 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* l */ sizeof(long),
582 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* s */ sizeof(short),
590 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 /* I */ sizeof(unsigned int),
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* L */ sizeof(unsigned long),
602 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 /* S */ sizeof(unsigned short),
610 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
621 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
626 /* We try to process malformed UTF-8 as much as possible (preferrably with
627 warnings), but these two mean we make no progress in the string and
628 might enter an infinite loop */
629 if (retlen == (STRLEN) -1 || retlen == 0)
630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 (int) TYPE_NO_MODIFIERS(datumtype));
633 if (ckWARN(WARN_UNPACK))
634 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
635 "Character in '%c' format wrapped in unpack",
636 (int) TYPE_NO_MODIFIERS(datumtype));
643 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
644 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
648 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
652 const char *from = *s;
654 const U32 flags = ckWARN(WARN_UTF8) ?
655 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
656 for (;buf_len > 0; buf_len--) {
657 if (from >= end) return FALSE;
658 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
659 if (retlen == (STRLEN) -1 || retlen == 0) {
660 from += UTF8SKIP(from);
662 } else from += retlen;
667 *(U8 *)buf++ = (U8)val;
669 /* We have enough characters for the buffer. Did we have problems ? */
672 /* Rewalk the string fragment while warning */
674 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
675 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
676 if (ptr >= end) break;
677 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
679 if (from > end) from = end;
681 if ((bad & 2) && ckWARN(WARN_UNPACK))
682 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
683 WARN_PACK : WARN_UNPACK),
684 "Character(s) in '%c' format wrapped in %s",
685 (int) TYPE_NO_MODIFIERS(datumtype),
686 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
693 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
697 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
698 if (val >= 0x100 || !ISUUCHAR(val) ||
699 retlen == (STRLEN) -1 || retlen == 0) {
703 *out = PL_uudmap[val] & 077;
709 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
710 const U8 * const end = start + len;
712 PERL_ARGS_ASSERT_BYTES_TO_UNI;
714 while (start < end) {
715 const UV uv = NATIVE_TO_ASCII(*start);
716 if (UNI_IS_INVARIANT(uv))
717 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
719 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
720 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
727 #define PUSH_BYTES(utf8, cur, buf, len) \
730 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
732 Copy(buf, cur, len, char); \
737 #define GROWING(utf8, cat, start, cur, in_len) \
739 STRLEN glen = (in_len); \
740 if (utf8) glen *= UTF8_EXPAND; \
741 if ((cur) + glen >= (start) + SvLEN(cat)) { \
742 (start) = sv_exp_grow(cat, glen); \
743 (cur) = (start) + SvCUR(cat); \
747 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
749 const STRLEN glen = (in_len); \
751 if (utf8) gl *= UTF8_EXPAND; \
752 if ((cur) + gl >= (start) + SvLEN(cat)) { \
754 SvCUR_set((cat), (cur) - (start)); \
755 (start) = sv_exp_grow(cat, gl); \
756 (cur) = (start) + SvCUR(cat); \
758 PUSH_BYTES(utf8, cur, buf, glen); \
761 #define PUSH_BYTE(utf8, s, byte) \
764 const U8 au8 = (byte); \
765 (s) = bytes_to_uni(&au8, 1, (s)); \
766 } else *(U8 *)(s)++ = (byte); \
769 /* Only to be used inside a loop (see the break) */
770 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
773 if (str >= end) break; \
774 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
775 if (retlen == (STRLEN) -1 || retlen == 0) { \
777 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
782 static const char *_action( const tempsym_t* symptr )
784 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
787 /* Returns the sizeof() struct described by pat */
789 S_measure_struct(pTHX_ tempsym_t* symptr)
793 PERL_ARGS_ASSERT_MEASURE_STRUCT;
795 while (next_symbol(symptr)) {
799 switch (symptr->howlen) {
801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
805 /* e_no_len and e_number */
806 len = symptr->length;
810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
819 #ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
826 case 'U': /* XXXX Is it correct? */
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
830 (int) TYPE_NO_MODIFIERS(symptr->code),
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
851 if (!len) /* Avoid division by 0 */
853 len = total % len; /* Assumed: the start is aligned. */
858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
888 size = sizeof(char*);
898 /* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
902 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
904 PERL_ARGS_ASSERT_GROUP_END;
906 while (patptr < patend) {
907 const char c = *patptr++;
914 while (patptr < patend && *patptr != '\n')
918 patptr = group_end(patptr, patend, ')') + 1;
920 patptr = group_end(patptr, patend, ']') + 1;
922 Perl_croak(aTHX_ "No group ending character '%c' found in template",
928 /* Convert unsigned decimal number to binary.
929 * Expects a pointer to the first digit and address of length variable
930 * Advances char pointer to 1st non-digit char and returns number
933 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
935 I32 len = *patptr++ - '0';
937 PERL_ARGS_ASSERT_GET_NUM;
939 while (isDIGIT(*patptr)) {
940 if (len >= 0x7FFFFFFF/10)
941 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
942 len = (len * 10) + (*patptr++ - '0');
948 /* The marvellous template parsing routine: Using state stored in *symptr,
949 * locates next template code and count
952 S_next_symbol(pTHX_ tempsym_t* symptr )
954 const char* patptr = symptr->patptr;
955 const char* const patend = symptr->patend;
957 PERL_ARGS_ASSERT_NEXT_SYMBOL;
959 symptr->flags &= ~FLAG_SLASH;
961 while (patptr < patend) {
962 if (isSPACE(*patptr))
964 else if (*patptr == '#') {
966 while (patptr < patend && *patptr != '\n')
971 /* We should have found a template code */
972 I32 code = *patptr++ & 0xFF;
973 U32 inherited_modifiers = 0;
975 if (code == ','){ /* grandfather in commas but with a warning */
976 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
977 symptr->flags |= FLAG_COMMA;
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Invalid type ',' in %s", _action( symptr ) );
984 /* for '(', skip to ')' */
986 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
987 Perl_croak(aTHX_ "()-group starts with a count in %s",
989 symptr->grpbeg = patptr;
990 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
991 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
992 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
996 /* look for group modifiers to inherit */
997 if (TYPE_ENDIANNESS(symptr->flags)) {
998 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
999 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1002 /* look for modifiers */
1003 while (patptr < patend) {
1004 const char *allowed;
1008 modifier = TYPE_IS_SHRIEKING;
1009 allowed = SHRIEKING_ALLOWED_TYPES;
1011 #ifdef PERL_PACK_CAN_BYTEORDER
1013 modifier = TYPE_IS_BIG_ENDIAN;
1014 allowed = ENDIANNESS_ALLOWED_TYPES;
1017 modifier = TYPE_IS_LITTLE_ENDIAN;
1018 allowed = ENDIANNESS_ALLOWED_TYPES;
1020 #endif /* PERL_PACK_CAN_BYTEORDER */
1030 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1031 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1032 allowed, _action( symptr ) );
1034 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1035 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1036 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1037 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1038 TYPE_ENDIANNESS_MASK)
1039 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1040 *patptr, _action( symptr ) );
1042 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1043 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1044 "Duplicate modifier '%c' after '%c' in %s",
1045 *patptr, (int) TYPE_NO_MODIFIERS(code),
1046 _action( symptr ) );
1053 /* inherit modifiers */
1054 code |= inherited_modifiers;
1056 /* look for count and/or / */
1057 if (patptr < patend) {
1058 if (isDIGIT(*patptr)) {
1059 patptr = get_num( patptr, &symptr->length );
1060 symptr->howlen = e_number;
1062 } else if (*patptr == '*') {
1064 symptr->howlen = e_star;
1066 } else if (*patptr == '[') {
1067 const char* lenptr = ++patptr;
1068 symptr->howlen = e_number;
1069 patptr = group_end( patptr, patend, ']' ) + 1;
1070 /* what kind of [] is it? */
1071 if (isDIGIT(*lenptr)) {
1072 lenptr = get_num( lenptr, &symptr->length );
1073 if( *lenptr != ']' )
1074 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1075 _action( symptr ) );
1077 tempsym_t savsym = *symptr;
1078 symptr->patend = patptr-1;
1079 symptr->patptr = lenptr;
1080 savsym.length = measure_struct(symptr);
1084 symptr->howlen = e_no_len;
1089 while (patptr < patend) {
1090 if (isSPACE(*patptr))
1092 else if (*patptr == '#') {
1094 while (patptr < patend && *patptr != '\n')
1096 if (patptr < patend)
1099 if (*patptr == '/') {
1100 symptr->flags |= FLAG_SLASH;
1102 if (patptr < patend &&
1103 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1104 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1105 _action( symptr ) );
1111 /* at end - no count, no / */
1112 symptr->howlen = e_no_len;
1116 symptr->code = code;
1117 symptr->patptr = patptr;
1121 symptr->patptr = patptr;
1126 There is no way to cleanly handle the case where we should process the
1127 string per byte in its upgraded form while it's really in downgraded form
1128 (e.g. estimates like strend-s as an upper bound for the number of
1129 characters left wouldn't work). So if we foresee the need of this
1130 (pattern starts with U or contains U0), we want to work on the encoded
1131 version of the string. Users are advised to upgrade their pack string
1132 themselves if they need to do a lot of unpacks like this on it
1135 need_utf8(const char *pat, const char *patend)
1139 PERL_ARGS_ASSERT_NEED_UTF8;
1141 while (pat < patend) {
1142 if (pat[0] == '#') {
1144 pat = (const char *) memchr(pat, '\n', patend-pat);
1145 if (!pat) return FALSE;
1146 } else if (pat[0] == 'U') {
1147 if (first || pat[1] == '0') return TRUE;
1148 } else first = FALSE;
1155 first_symbol(const char *pat, const char *patend) {
1156 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1158 while (pat < patend) {
1159 if (pat[0] != '#') return pat[0];
1161 pat = (const char *) memchr(pat, '\n', patend-pat);
1169 =for apidoc unpackstring
1171 The engine implementing unpack() Perl function. C<unpackstring> puts the
1172 extracted list items on the stack and returns the number of elements.
1173 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1178 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1182 PERL_ARGS_ASSERT_UNPACKSTRING;
1184 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1185 else if (need_utf8(pat, patend)) {
1186 /* We probably should try to avoid this in case a scalar context call
1187 wouldn't get to the "U0" */
1188 STRLEN len = strend - s;
1189 s = (char *) bytes_to_utf8((U8 *) s, &len);
1192 flags |= FLAG_DO_UTF8;
1195 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1196 flags |= FLAG_PARSE_UTF8;
1198 TEMPSYM_INIT(&sym, pat, patend, flags);
1200 return unpack_rec(&sym, s, s, strend, NULL );
1204 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1208 const I32 start_sp_offset = SP - PL_stack_base;
1213 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1214 bool beyond = FALSE;
1215 bool explicit_length;
1216 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1217 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1219 PERL_ARGS_ASSERT_UNPACK_REC;
1221 symptr->strbeg = s - strbeg;
1223 while (next_symbol(symptr)) {
1226 I32 datumtype = symptr->code;
1227 /* do first one only unless in list context
1228 / is implemented by unpacking the count, then popping it from the
1229 stack, so must check that we're not in the middle of a / */
1230 if ( unpack_only_one
1231 && (SP - PL_stack_base == start_sp_offset + 1)
1232 && (datumtype != '/') ) /* XXX can this be omitted */
1235 switch (howlen = symptr->howlen) {
1237 len = strend - strbeg; /* long enough */
1240 /* e_no_len and e_number */
1241 len = symptr->length;
1245 explicit_length = TRUE;
1247 beyond = s >= strend;
1249 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1251 /* props nonzero means we can process this letter. */
1252 const long size = props & PACK_SIZE_MASK;
1253 const long howmany = (strend - s) / size;
1257 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1258 if (len && unpack_only_one) len = 1;
1264 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1266 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1269 if (howlen == e_no_len)
1270 len = 16; /* len is not specified */
1278 tempsym_t savsym = *symptr;
1279 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1280 symptr->flags |= group_modifiers;
1281 symptr->patend = savsym.grpend;
1282 symptr->previous = &savsym;
1285 if (len && unpack_only_one) len = 1;
1287 symptr->patptr = savsym.grpbeg;
1288 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1289 else symptr->flags &= ~FLAG_PARSE_UTF8;
1290 unpack_rec(symptr, s, strbeg, strend, &s);
1291 if (s == strend && savsym.howlen == e_star)
1292 break; /* No way to continue */
1295 savsym.flags = symptr->flags & ~group_modifiers;
1299 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 case '.' | TYPE_IS_SHRIEKING:
1305 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1307 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1308 const bool u8 = utf8;
1310 if (howlen == e_star) from = strbeg;
1311 else if (len <= 0) from = s;
1313 tempsym_t *group = symptr;
1315 while (--len && group) group = group->previous;
1316 from = group ? strbeg + group->strbeg : strbeg;
1319 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1320 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1324 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1325 case '@' | TYPE_IS_SHRIEKING:
1328 s = strbeg + symptr->strbeg;
1329 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1330 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1331 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1337 Perl_croak(aTHX_ "'@' outside of string in unpack");
1342 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1345 Perl_croak(aTHX_ "'@' outside of string in unpack");
1349 case 'X' | TYPE_IS_SHRIEKING:
1350 if (!len) /* Avoid division by 0 */
1353 const char *hop, *last;
1355 hop = last = strbeg;
1357 hop += UTF8SKIP(hop);
1364 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1368 len = (s - strbeg) % len;
1374 Perl_croak(aTHX_ "'X' outside of string in unpack");
1375 while (--s, UTF8_IS_CONTINUATION(*s)) {
1377 Perl_croak(aTHX_ "'X' outside of string in unpack");
1382 if (len > s - strbeg)
1383 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1387 case 'x' | TYPE_IS_SHRIEKING: {
1389 if (!len) /* Avoid division by 0 */
1391 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1392 else ai32 = (s - strbeg) % len;
1393 if (ai32 == 0) break;
1401 Perl_croak(aTHX_ "'x' outside of string in unpack");
1406 if (len > strend - s)
1407 Perl_croak(aTHX_ "'x' outside of string in unpack");
1412 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1418 /* Preliminary length estimate is assumed done in 'W' */
1419 if (len > strend - s) len = strend - s;
1425 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1426 if (hop >= strend) {
1428 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1435 } else if (len > strend - s)
1438 if (datumtype == 'Z') {
1439 /* 'Z' strips stuff after first null */
1440 const char *ptr, *end;
1442 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1443 sv = newSVpvn(s, ptr-s);
1444 if (howlen == e_star) /* exact for 'Z*' */
1445 len = ptr-s + (ptr != strend ? 1 : 0);
1446 } else if (datumtype == 'A') {
1447 /* 'A' strips both nulls and spaces */
1449 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1450 for (ptr = s+len-1; ptr >= s; ptr--)
1451 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1452 !is_utf8_space((U8 *) ptr)) break;
1453 if (ptr >= s) ptr += UTF8SKIP(ptr);
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1458 for (ptr = s+len-1; ptr >= s; ptr--)
1459 if (*ptr != 0 && !isSPACE(*ptr)) break;
1462 sv = newSVpvn(s, ptr-s);
1463 } else sv = newSVpvn(s, len);
1467 /* Undo any upgrade done due to need_utf8() */
1468 if (!(symptr->flags & FLAG_WAS_UTF8))
1469 sv_utf8_downgrade(sv, 0);
1477 if (howlen == e_star || len > (strend - s) * 8)
1478 len = (strend - s) * 8;
1481 while (len >= 8 && s < strend) {
1482 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1487 cuv += PL_bitcount[*(U8 *)s++];
1490 if (len && s < strend) {
1492 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1493 if (datumtype == 'b')
1495 if (bits & 1) cuv++;
1500 if (bits & 0x80) cuv++;
1507 sv = sv_2mortal(newSV(len ? len : 1));
1510 if (datumtype == 'b') {
1512 const I32 ai32 = len;
1513 for (len = 0; len < ai32; len++) {
1514 if (len & 7) bits >>= 1;
1516 if (s >= strend) break;
1517 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1518 } else bits = *(U8 *) s++;
1519 *str++ = bits & 1 ? '1' : '0';
1523 const I32 ai32 = len;
1524 for (len = 0; len < ai32; len++) {
1525 if (len & 7) bits <<= 1;
1527 if (s >= strend) break;
1528 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1529 } else bits = *(U8 *) s++;
1530 *str++ = bits & 0x80 ? '1' : '0';
1534 SvCUR_set(sv, str - SvPVX_const(sv));
1541 /* Preliminary length estimate, acceptable for utf8 too */
1542 if (howlen == e_star || len > (strend - s) * 2)
1543 len = (strend - s) * 2;
1544 sv = sv_2mortal(newSV(len ? len : 1));
1547 if (datumtype == 'h') {
1550 for (len = 0; len < ai32; len++) {
1551 if (len & 1) bits >>= 4;
1553 if (s >= strend) break;
1554 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1555 } else bits = * (U8 *) s++;
1556 *str++ = PL_hexdigit[bits & 15];
1560 const I32 ai32 = len;
1561 for (len = 0; len < ai32; len++) {
1562 if (len & 1) bits <<= 4;
1564 if (s >= strend) break;
1565 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1566 } else bits = *(U8 *) s++;
1567 *str++ = PL_hexdigit[(bits >> 4) & 15];
1571 SvCUR_set(sv, str - SvPVX_const(sv));
1577 if (explicit_length)
1578 /* Switch to "character" mode */
1579 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1584 while (len-- > 0 && s < strend) {
1589 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1590 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1591 if (retlen == (STRLEN) -1 || retlen == 0)
1592 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1596 aint = *(U8 *)(s)++;
1597 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1601 else if (checksum > bits_in_uv)
1602 cdouble += (NV)aint;
1610 while (len-- > 0 && s < strend) {
1612 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1613 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1614 if (retlen == (STRLEN) -1 || retlen == 0)
1615 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1619 else if (checksum > bits_in_uv)
1620 cdouble += (NV) val;
1624 } else if (!checksum)
1626 const U8 ch = *(U8 *) s++;
1629 else if (checksum > bits_in_uv)
1630 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1632 while (len-- > 0) cuv += *(U8 *) s++;
1636 if (explicit_length) {
1637 /* Switch to "bytes in UTF-8" mode */
1638 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1640 /* Should be impossible due to the need_utf8() test */
1641 Perl_croak(aTHX_ "U0 mode on a byte string");
1645 if (len > strend - s) len = strend - s;
1647 if (len && unpack_only_one) len = 1;
1651 while (len-- > 0 && s < strend) {
1655 U8 result[UTF8_MAXLEN];
1656 const char *ptr = s;
1658 /* Bug: warns about bad utf8 even if we are short on bytes
1659 and will break out of the loop */
1660 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1663 len = UTF8SKIP(result);
1664 if (!uni_to_bytes(aTHX_ &ptr, strend,
1665 (char *) &result[1], len-1, 'U')) break;
1666 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1669 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1670 if (retlen == (STRLEN) -1 || retlen == 0)
1671 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1676 else if (checksum > bits_in_uv)
1677 cdouble += (NV) auv;
1682 case 's' | TYPE_IS_SHRIEKING:
1683 #if SHORTSIZE != SIZE16
1686 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1687 DO_BO_UNPACK(ashort, s);
1690 else if (checksum > bits_in_uv)
1691 cdouble += (NV)ashort;
1703 #if U16SIZE > SIZE16
1706 SHIFT16(utf8, s, strend, &ai16, datumtype);
1707 DO_BO_UNPACK(ai16, 16);
1708 #if U16SIZE > SIZE16
1714 else if (checksum > bits_in_uv)
1715 cdouble += (NV)ai16;
1720 case 'S' | TYPE_IS_SHRIEKING:
1721 #if SHORTSIZE != SIZE16
1723 unsigned short aushort;
1724 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1725 DO_BO_UNPACK(aushort, s);
1728 else if (checksum > bits_in_uv)
1729 cdouble += (NV)aushort;
1742 #if U16SIZE > SIZE16
1745 SHIFT16(utf8, s, strend, &au16, datumtype);
1746 DO_BO_UNPACK(au16, 16);
1748 if (datumtype == 'n')
1749 au16 = PerlSock_ntohs(au16);
1752 if (datumtype == 'v')
1757 else if (checksum > bits_in_uv)
1758 cdouble += (NV) au16;
1763 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1764 case 'v' | TYPE_IS_SHRIEKING:
1765 case 'n' | TYPE_IS_SHRIEKING:
1768 # if U16SIZE > SIZE16
1771 SHIFT16(utf8, s, strend, &ai16, datumtype);
1773 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1774 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1775 # endif /* HAS_NTOHS */
1777 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1778 ai16 = (I16) vtohs((U16) ai16);
1779 # endif /* HAS_VTOHS */
1782 else if (checksum > bits_in_uv)
1783 cdouble += (NV) ai16;
1788 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1790 case 'i' | TYPE_IS_SHRIEKING:
1793 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1794 DO_BO_UNPACK(aint, i);
1797 else if (checksum > bits_in_uv)
1798 cdouble += (NV)aint;
1804 case 'I' | TYPE_IS_SHRIEKING:
1807 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1808 DO_BO_UNPACK(auint, i);
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV)auint;
1820 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1821 #if IVSIZE == INTSIZE
1822 DO_BO_UNPACK(aiv, i);
1823 #elif IVSIZE == LONGSIZE
1824 DO_BO_UNPACK(aiv, l);
1825 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1826 DO_BO_UNPACK(aiv, 64);
1828 Perl_croak(aTHX_ "'j' not supported on this platform");
1832 else if (checksum > bits_in_uv)
1841 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1842 #if IVSIZE == INTSIZE
1843 DO_BO_UNPACK(auv, i);
1844 #elif IVSIZE == LONGSIZE
1845 DO_BO_UNPACK(auv, l);
1846 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1847 DO_BO_UNPACK(auv, 64);
1849 Perl_croak(aTHX_ "'J' not supported on this platform");
1853 else if (checksum > bits_in_uv)
1859 case 'l' | TYPE_IS_SHRIEKING:
1860 #if LONGSIZE != SIZE32
1863 SHIFT_VAR(utf8, s, strend, along, datumtype);
1864 DO_BO_UNPACK(along, l);
1867 else if (checksum > bits_in_uv)
1868 cdouble += (NV)along;
1879 #if U32SIZE > SIZE32
1882 SHIFT32(utf8, s, strend, &ai32, datumtype);
1883 DO_BO_UNPACK(ai32, 32);
1884 #if U32SIZE > SIZE32
1885 if (ai32 > 2147483647) ai32 -= 4294967296;
1889 else if (checksum > bits_in_uv)
1890 cdouble += (NV)ai32;
1895 case 'L' | TYPE_IS_SHRIEKING:
1896 #if LONGSIZE != SIZE32
1898 unsigned long aulong;
1899 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1900 DO_BO_UNPACK(aulong, l);
1903 else if (checksum > bits_in_uv)
1904 cdouble += (NV)aulong;
1917 #if U32SIZE > SIZE32
1920 SHIFT32(utf8, s, strend, &au32, datumtype);
1921 DO_BO_UNPACK(au32, 32);
1923 if (datumtype == 'N')
1924 au32 = PerlSock_ntohl(au32);
1927 if (datumtype == 'V')
1932 else if (checksum > bits_in_uv)
1933 cdouble += (NV)au32;
1938 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1939 case 'V' | TYPE_IS_SHRIEKING:
1940 case 'N' | TYPE_IS_SHRIEKING:
1943 # if U32SIZE > SIZE32
1946 SHIFT32(utf8, s, strend, &ai32, datumtype);
1948 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1949 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1952 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1953 ai32 = (I32)vtohl((U32)ai32);
1957 else if (checksum > bits_in_uv)
1958 cdouble += (NV)ai32;
1963 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1967 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1968 DO_BO_UNPACK_PC(aptr);
1969 /* newSVpv generates undef if aptr is NULL */
1970 mPUSHs(newSVpv(aptr, 0));
1978 while (len > 0 && s < strend) {
1980 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1981 auv = (auv << 7) | (ch & 0x7f);
1982 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1990 if (++bytes >= sizeof(UV)) { /* promote to string */
1993 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1994 while (s < strend) {
1995 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1996 sv = mul128(sv, (U8)(ch & 0x7f));
2002 t = SvPV_nolen_const(sv);
2011 if ((s >= strend) && bytes)
2012 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2016 if (symptr->howlen == e_star)
2017 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2019 if (s + sizeof(char*) <= strend) {
2021 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2022 DO_BO_UNPACK_PC(aptr);
2023 /* newSVpvn generates undef if aptr is NULL */
2024 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2031 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2032 DO_BO_UNPACK(aquad, 64);
2034 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2035 newSViv((IV)aquad) : newSVnv((NV)aquad));
2036 else if (checksum > bits_in_uv)
2037 cdouble += (NV)aquad;
2045 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2046 DO_BO_UNPACK(auquad, 64);
2048 mPUSHs(auquad <= UV_MAX ?
2049 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2050 else if (checksum > bits_in_uv)
2051 cdouble += (NV)auquad;
2056 #endif /* HAS_QUAD */
2057 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2061 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2062 DO_BO_UNPACK_N(afloat, float);
2072 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2073 DO_BO_UNPACK_N(adouble, double);
2083 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2084 DO_BO_UNPACK_N(anv, NV);
2091 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2094 long double aldouble;
2095 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2096 DO_BO_UNPACK_N(aldouble, long double);
2100 cdouble += aldouble;
2106 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2107 sv = sv_2mortal(newSV(l));
2108 if (l) SvPOK_on(sv);
2111 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2116 next_uni_uu(aTHX_ &s, strend, &a);
2117 next_uni_uu(aTHX_ &s, strend, &b);
2118 next_uni_uu(aTHX_ &s, strend, &c);
2119 next_uni_uu(aTHX_ &s, strend, &d);
2120 hunk[0] = (char)((a << 2) | (b >> 4));
2121 hunk[1] = (char)((b << 4) | (c >> 2));
2122 hunk[2] = (char)((c << 6) | d);
2123 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2131 /* possible checksum byte */
2132 const char *skip = s+UTF8SKIP(s);
2133 if (skip < strend && *skip == '\n')
2139 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2143 len = PL_uudmap[*(U8*)s++] & 077;
2145 if (s < strend && ISUUCHAR(*s))
2146 a = PL_uudmap[*(U8*)s++] & 077;
2149 if (s < strend && ISUUCHAR(*s))
2150 b = PL_uudmap[*(U8*)s++] & 077;
2153 if (s < strend && ISUUCHAR(*s))
2154 c = PL_uudmap[*(U8*)s++] & 077;
2157 if (s < strend && ISUUCHAR(*s))
2158 d = PL_uudmap[*(U8*)s++] & 077;
2161 hunk[0] = (char)((a << 2) | (b >> 4));
2162 hunk[1] = (char)((b << 4) | (c >> 2));
2163 hunk[2] = (char)((c << 6) | d);
2164 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2169 else /* possible checksum byte */
2170 if (s + 1 < strend && s[1] == '\n')
2179 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2180 (checksum > bits_in_uv &&
2181 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2184 anv = (NV) (1 << (checksum & 15));
2185 while (checksum >= 16) {
2189 while (cdouble < 0.0)
2191 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2192 sv = newSVnv(cdouble);
2195 if (checksum < bits_in_uv) {
2196 UV mask = ((UV)1 << checksum) - 1;
2205 if (symptr->flags & FLAG_SLASH){
2206 if (SP - PL_stack_base - start_sp_offset <= 0)
2207 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2208 if( next_symbol(symptr) ){
2209 if( symptr->howlen == e_number )
2210 Perl_croak(aTHX_ "Count after length/code in unpack" );
2212 /* ...end of char buffer then no decent length available */
2213 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2215 /* take top of stack (hope it's numeric) */
2218 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2221 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2223 datumtype = symptr->code;
2224 explicit_length = FALSE;
2232 return SP - PL_stack_base - start_sp_offset;
2240 I32 gimme = GIMME_V;
2243 const char *pat = SvPV_const(left, llen);
2244 const char *s = SvPV_const(right, rlen);
2245 const char *strend = s + rlen;
2246 const char *patend = pat + llen;
2250 cnt = unpackstring(pat, patend, s, strend,
2251 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2252 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2255 if ( !cnt && gimme == G_SCALAR )
2256 PUSHs(&PL_sv_undef);
2261 doencodes(U8 *h, const char *s, I32 len)
2263 *h++ = PL_uuemap[len];
2265 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2266 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2267 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2268 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2273 const char r = (len > 1 ? s[1] : '\0');
2274 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2275 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2276 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2277 *h++ = PL_uuemap[0];
2284 S_is_an_int(pTHX_ const char *s, STRLEN l)
2286 SV *result = newSVpvn(s, l);
2287 char *const result_c = SvPV_nolen(result); /* convenience */
2288 char *out = result_c;
2292 PERL_ARGS_ASSERT_IS_AN_INT;
2300 SvREFCNT_dec(result);
2323 SvREFCNT_dec(result);
2329 SvCUR_set(result, out - result_c);
2333 /* pnum must be '\0' terminated */
2335 S_div128(pTHX_ SV *pnum, bool *done)
2338 char * const s = SvPV(pnum, len);
2342 PERL_ARGS_ASSERT_DIV128;
2346 const int i = m * 10 + (*t - '0');
2347 const int r = (i >> 7); /* r < 10 */
2355 SvCUR_set(pnum, (STRLEN) (t - s));
2360 =for apidoc packlist
2362 The engine implementing pack() Perl function.
2368 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2373 PERL_ARGS_ASSERT_PACKLIST;
2375 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2377 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2378 Also make sure any UTF8 flag is loaded */
2379 SvPV_force_nolen(cat);
2381 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2383 (void)pack_rec( cat, &sym, beglist, endlist );
2386 /* like sv_utf8_upgrade, but also repoint the group start markers */
2388 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2391 const char *from_ptr, *from_start, *from_end, **marks, **m;
2392 char *to_start, *to_ptr;
2394 if (SvUTF8(sv)) return;
2396 from_start = SvPVX_const(sv);
2397 from_end = from_start + SvCUR(sv);
2398 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2399 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2400 if (from_ptr == from_end) {
2401 /* Simple case: no character needs to be changed */
2406 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2407 Newx(to_start, len, char);
2408 Copy(from_start, to_start, from_ptr-from_start, char);
2409 to_ptr = to_start + (from_ptr-from_start);
2411 Newx(marks, sym_ptr->level+2, const char *);
2412 for (group=sym_ptr; group; group = group->previous)
2413 marks[group->level] = from_start + group->strbeg;
2414 marks[sym_ptr->level+1] = from_end+1;
2415 for (m = marks; *m < from_ptr; m++)
2416 *m = to_start + (*m-from_start);
2418 for (;from_ptr < from_end; from_ptr++) {
2419 while (*m == from_ptr) *m++ = to_ptr;
2420 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2424 while (*m == from_ptr) *m++ = to_ptr;
2425 if (m != marks + sym_ptr->level+1) {
2428 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2430 for (group=sym_ptr; group; group = group->previous)
2431 group->strbeg = marks[group->level] - to_start;
2436 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2437 from_start -= SvIVX(sv);
2440 SvFLAGS(sv) &= ~SVf_OOK;
2443 Safefree(from_start);
2444 SvPV_set(sv, to_start);
2445 SvCUR_set(sv, to_ptr - to_start);
2450 /* Exponential string grower. Makes string extension effectively O(n)
2451 needed says how many extra bytes we need (not counting the final '\0')
2452 Only grows the string if there is an actual lack of space
2455 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2456 const STRLEN cur = SvCUR(sv);
2457 const STRLEN len = SvLEN(sv);
2460 PERL_ARGS_ASSERT_SV_EXP_GROW;
2462 if (len - cur > needed) return SvPVX(sv);
2463 extend = needed > len ? needed : len;
2464 return SvGROW(sv, len+extend+1);
2469 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2472 tempsym_t lookahead;
2473 I32 items = endlist - beglist;
2474 bool found = next_symbol(symptr);
2475 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2476 bool warn_utf8 = ckWARN(WARN_UTF8);
2478 PERL_ARGS_ASSERT_PACK_REC;
2480 if (symptr->level == 0 && found && symptr->code == 'U') {
2481 marked_upgrade(aTHX_ cat, symptr);
2482 symptr->flags |= FLAG_DO_UTF8;
2485 symptr->strbeg = SvCUR(cat);
2491 SV *lengthcode = NULL;
2492 I32 datumtype = symptr->code;
2493 howlen_t howlen = symptr->howlen;
2494 char *start = SvPVX(cat);
2495 char *cur = start + SvCUR(cat);
2497 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2501 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2505 /* e_no_len and e_number */
2506 len = symptr->length;
2511 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2513 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2514 /* We can process this letter. */
2515 STRLEN size = props & PACK_SIZE_MASK;
2516 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2520 /* Look ahead for next symbol. Do we have code/code? */
2521 lookahead = *symptr;
2522 found = next_symbol(&lookahead);
2523 if (symptr->flags & FLAG_SLASH) {
2525 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2526 if (strchr("aAZ", lookahead.code)) {
2527 if (lookahead.howlen == e_number) count = lookahead.length;
2530 if (SvGAMAGIC(*beglist)) {
2531 /* Avoid reading the active data more than once
2532 by copying it to a temporary. */
2534 const char *const pv = SvPV_const(*beglist, len);
2536 = newSVpvn_flags(pv, len,
2537 SVs_TEMP | SvUTF8(*beglist));
2540 count = DO_UTF8(*beglist) ?
2541 sv_len_utf8(*beglist) : sv_len(*beglist);
2544 if (lookahead.code == 'Z') count++;
2547 if (lookahead.howlen == e_number && lookahead.length < items)
2548 count = lookahead.length;
2551 lookahead.howlen = e_number;
2552 lookahead.length = count;
2553 lengthcode = sv_2mortal(newSViv(count));
2556 /* Code inside the switch must take care to properly update
2557 cat (CUR length and '\0' termination) if it updated *cur and
2558 doesn't simply leave using break */
2559 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2561 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2562 (int) TYPE_NO_MODIFIERS(datumtype));
2564 Perl_croak(aTHX_ "'%%' may not be used in pack");
2567 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2568 case '.' | TYPE_IS_SHRIEKING:
2571 if (howlen == e_star) from = start;
2572 else if (len == 0) from = cur;
2574 tempsym_t *group = symptr;
2576 while (--len && group) group = group->previous;
2577 from = group ? start + group->strbeg : start;
2580 len = SvIV(fromstr);
2582 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2583 case '@' | TYPE_IS_SHRIEKING:
2586 from = start + symptr->strbeg;
2588 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2589 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2590 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2594 while (len && from < cur) {
2595 from += UTF8SKIP(from);
2599 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2601 /* Here we know from == cur */
2603 GROWING(0, cat, start, cur, len);
2604 Zero(cur, len, char);
2606 } else if (from < cur) {
2609 } else goto no_change;
2617 if (len > 0) goto grow;
2618 if (len == 0) goto no_change;
2625 tempsym_t savsym = *symptr;
2626 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2627 symptr->flags |= group_modifiers;
2628 symptr->patend = savsym.grpend;
2630 symptr->previous = &lookahead;
2633 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2634 else symptr->flags &= ~FLAG_PARSE_UTF8;
2635 was_utf8 = SvUTF8(cat);
2636 symptr->patptr = savsym.grpbeg;
2637 beglist = pack_rec(cat, symptr, beglist, endlist);
2638 if (SvUTF8(cat) != was_utf8)
2639 /* This had better be an upgrade while in utf8==0 mode */
2642 if (savsym.howlen == e_star && beglist == endlist)
2643 break; /* No way to continue */
2645 items = endlist - beglist;
2646 lookahead.flags = symptr->flags & ~group_modifiers;
2649 case 'X' | TYPE_IS_SHRIEKING:
2650 if (!len) /* Avoid division by 0 */
2657 hop += UTF8SKIP(hop);
2664 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2668 len = (cur-start) % len;
2672 if (len < 1) goto no_change;
2676 Perl_croak(aTHX_ "'%c' outside of string in pack",
2677 (int) TYPE_NO_MODIFIERS(datumtype));
2678 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2680 Perl_croak(aTHX_ "'%c' outside of string in pack",
2681 (int) TYPE_NO_MODIFIERS(datumtype));
2687 if (cur - start < len)
2688 Perl_croak(aTHX_ "'%c' outside of string in pack",
2689 (int) TYPE_NO_MODIFIERS(datumtype));
2692 if (cur < start+symptr->strbeg) {
2693 /* Make sure group starts don't point into the void */
2695 const STRLEN length = cur-start;
2696 for (group = symptr;
2697 group && length < group->strbeg;
2698 group = group->previous) group->strbeg = length;
2699 lookahead.strbeg = length;
2702 case 'x' | TYPE_IS_SHRIEKING: {
2704 if (!len) /* Avoid division by 0 */
2706 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2707 else ai32 = (cur - start) % len;
2708 if (ai32 == 0) goto no_change;
2720 aptr = SvPV_const(fromstr, fromlen);
2721 if (DO_UTF8(fromstr)) {
2722 const char *end, *s;
2724 if (!utf8 && !SvUTF8(cat)) {
2725 marked_upgrade(aTHX_ cat, symptr);
2726 lookahead.flags |= FLAG_DO_UTF8;
2727 lookahead.strbeg = symptr->strbeg;
2730 cur = start + SvCUR(cat);
2732 if (howlen == e_star) {
2733 if (utf8) goto string_copy;
2737 end = aptr + fromlen;
2738 fromlen = datumtype == 'Z' ? len-1 : len;
2739 while ((I32) fromlen > 0 && s < end) {
2744 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2747 if (datumtype == 'Z') len++;
2753 fromlen = len - fromlen;
2754 if (datumtype == 'Z') fromlen--;
2755 if (howlen == e_star) {
2757 if (datumtype == 'Z') len++;
2759 GROWING(0, cat, start, cur, len);
2760 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2761 datumtype | TYPE_IS_PACK))
2762 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2766 if (howlen == e_star) {
2768 if (datumtype == 'Z') len++;
2770 if (len <= (I32) fromlen) {
2772 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2774 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2776 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2777 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2779 while (fromlen > 0) {
2780 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2786 if (howlen == e_star) {
2788 if (datumtype == 'Z') len++;
2790 if (len <= (I32) fromlen) {
2792 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2794 GROWING(0, cat, start, cur, len);
2795 Copy(aptr, cur, fromlen, char);
2799 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2806 const char *str, *end;
2813 str = SvPV_const(fromstr, fromlen);
2814 end = str + fromlen;
2815 if (DO_UTF8(fromstr)) {
2817 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2819 utf8_source = FALSE;
2820 utf8_flags = 0; /* Unused, but keep compilers happy */
2822 if (howlen == e_star) len = fromlen;
2823 field_len = (len+7)/8;
2824 GROWING(utf8, cat, start, cur, field_len);
2825 if (len > (I32)fromlen) len = fromlen;
2828 if (datumtype == 'B')
2832 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2834 } else bits |= *str++ & 1;
2835 if (l & 7) bits <<= 1;
2837 PUSH_BYTE(utf8, cur, bits);
2842 /* datumtype == 'b' */
2846 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2847 if (val & 1) bits |= 0x80;
2848 } else if (*str++ & 1)
2850 if (l & 7) bits >>= 1;
2852 PUSH_BYTE(utf8, cur, bits);
2858 if (datumtype == 'B')
2859 bits <<= 7 - (l & 7);
2861 bits >>= 7 - (l & 7);
2862 PUSH_BYTE(utf8, cur, bits);
2865 /* Determine how many chars are left in the requested field */
2867 if (howlen == e_star) field_len = 0;
2868 else field_len -= l;
2869 Zero(cur, field_len, char);
2875 const char *str, *end;
2882 str = SvPV_const(fromstr, fromlen);
2883 end = str + fromlen;
2884 if (DO_UTF8(fromstr)) {
2886 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2888 utf8_source = FALSE;
2889 utf8_flags = 0; /* Unused, but keep compilers happy */
2891 if (howlen == e_star) len = fromlen;
2892 field_len = (len+1)/2;
2893 GROWING(utf8, cat, start, cur, field_len);
2894 if (!utf8 && len > (I32)fromlen) len = fromlen;
2897 if (datumtype == 'H')
2901 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2902 if (val < 256 && isALPHA(val))
2903 bits |= (val + 9) & 0xf;
2906 } else if (isALPHA(*str))
2907 bits |= (*str++ + 9) & 0xf;
2909 bits |= *str++ & 0xf;
2910 if (l & 1) bits <<= 4;
2912 PUSH_BYTE(utf8, cur, bits);
2920 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2921 if (val < 256 && isALPHA(val))
2922 bits |= ((val + 9) & 0xf) << 4;
2924 bits |= (val & 0xf) << 4;
2925 } else if (isALPHA(*str))
2926 bits |= ((*str++ + 9) & 0xf) << 4;
2928 bits |= (*str++ & 0xf) << 4;
2929 if (l & 1) bits >>= 4;
2931 PUSH_BYTE(utf8, cur, bits);
2937 PUSH_BYTE(utf8, cur, bits);
2940 /* Determine how many chars are left in the requested field */
2942 if (howlen == e_star) field_len = 0;
2943 else field_len -= l;
2944 Zero(cur, field_len, char);
2952 aiv = SvIV(fromstr);
2953 if ((-128 > aiv || aiv > 127) &&
2955 Perl_warner(aTHX_ packWARN(WARN_PACK),
2956 "Character in 'c' format wrapped in pack");
2957 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2962 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2968 aiv = SvIV(fromstr);
2969 if ((0 > aiv || aiv > 0xff) &&
2971 Perl_warner(aTHX_ packWARN(WARN_PACK),
2972 "Character in 'C' format wrapped in pack");
2973 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2978 U8 in_bytes = (U8)IN_BYTES;
2980 end = start+SvLEN(cat)-1;
2981 if (utf8) end -= UTF8_MAXLEN-1;
2985 auv = SvUV(fromstr);
2986 if (in_bytes) auv = auv % 0x100;
2991 SvCUR_set(cat, cur - start);
2993 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2994 end = start+SvLEN(cat)-UTF8_MAXLEN;
2996 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2999 0 : UNICODE_ALLOW_ANY);
3004 SvCUR_set(cat, cur - start);
3005 marked_upgrade(aTHX_ cat, symptr);
3006 lookahead.flags |= FLAG_DO_UTF8;
3007 lookahead.strbeg = symptr->strbeg;
3010 cur = start + SvCUR(cat);
3011 end = start+SvLEN(cat)-UTF8_MAXLEN;
3014 if (ckWARN(WARN_PACK))
3015 Perl_warner(aTHX_ packWARN(WARN_PACK),
3016 "Character in 'W' format wrapped in pack");
3021 SvCUR_set(cat, cur - start);
3022 GROWING(0, cat, start, cur, len+1);
3023 end = start+SvLEN(cat)-1;
3025 *(U8 *) cur++ = (U8)auv;
3034 if (!(symptr->flags & FLAG_DO_UTF8)) {
3035 marked_upgrade(aTHX_ cat, symptr);
3036 lookahead.flags |= FLAG_DO_UTF8;
3037 lookahead.strbeg = symptr->strbeg;
3043 end = start+SvLEN(cat);
3044 if (!utf8) end -= UTF8_MAXLEN;
3048 auv = SvUV(fromstr);
3050 U8 buffer[UTF8_MAXLEN], *endb;
3051 endb = uvuni_to_utf8_flags(buffer, auv,
3053 0 : UNICODE_ALLOW_ANY);
3054 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3056 SvCUR_set(cat, cur - start);
3057 GROWING(0, cat, start, cur,
3058 len+(endb-buffer)*UTF8_EXPAND);
3059 end = start+SvLEN(cat);
3061 cur = bytes_to_uni(buffer, endb-buffer, cur);
3065 SvCUR_set(cat, cur - start);
3066 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3067 end = start+SvLEN(cat)-UTF8_MAXLEN;
3069 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3071 0 : UNICODE_ALLOW_ANY);
3076 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3082 anv = SvNV(fromstr);
3084 /* VOS does not automatically map a floating-point overflow
3085 during conversion from double to float into infinity, so we
3086 do it by hand. This code should either be generalized for
3087 any OS that needs it, or removed if and when VOS implements
3088 posix-976 (suggestion to support mapping to infinity).
3089 Paul.Green@stratus.com 02-04-02. */
3091 extern const float _float_constants[];
3093 afloat = _float_constants[0]; /* single prec. inf. */
3094 else if (anv < -FLT_MAX)
3095 afloat = _float_constants[0]; /* single prec. inf. */
3096 else afloat = (float) anv;
3099 # if defined(VMS) && !defined(__IEEE_FP)
3100 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3101 * on Alpha; fake it if we don't have them.
3105 else if (anv < -FLT_MAX)
3107 else afloat = (float)anv;
3109 afloat = (float)anv;
3111 #endif /* __VOS__ */
3112 DO_BO_PACK_N(afloat, float);
3113 PUSH_VAR(utf8, cur, afloat);
3121 anv = SvNV(fromstr);
3123 /* VOS does not automatically map a floating-point overflow
3124 during conversion from long double to double into infinity,
3125 so we do it by hand. This code should either be generalized
3126 for any OS that needs it, or removed if and when VOS
3127 implements posix-976 (suggestion to support mapping to
3128 infinity). Paul.Green@stratus.com 02-04-02. */
3130 extern const double _double_constants[];
3132 adouble = _double_constants[0]; /* double prec. inf. */
3133 else if (anv < -DBL_MAX)
3134 adouble = _double_constants[0]; /* double prec. inf. */
3135 else adouble = (double) anv;
3138 # if defined(VMS) && !defined(__IEEE_FP)
3139 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3140 * on Alpha; fake it if we don't have them.
3144 else if (anv < -DBL_MAX)
3146 else adouble = (double)anv;
3148 adouble = (double)anv;
3150 #endif /* __VOS__ */
3151 DO_BO_PACK_N(adouble, double);
3152 PUSH_VAR(utf8, cur, adouble);
3157 Zero(&anv, 1, NV); /* can be long double with unused bits */
3160 anv = SvNV(fromstr);
3161 DO_BO_PACK_N(anv, NV);
3162 PUSH_VAR(utf8, cur, anv);
3166 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3168 long double aldouble;
3169 /* long doubles can have unused bits, which may be nonzero */
3170 Zero(&aldouble, 1, long double);
3173 aldouble = (long double)SvNV(fromstr);
3174 DO_BO_PACK_N(aldouble, long double);
3175 PUSH_VAR(utf8, cur, aldouble);
3180 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3181 case 'n' | TYPE_IS_SHRIEKING:
3187 ai16 = (I16)SvIV(fromstr);
3189 ai16 = PerlSock_htons(ai16);
3191 PUSH16(utf8, cur, &ai16);
3194 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3195 case 'v' | TYPE_IS_SHRIEKING:
3201 ai16 = (I16)SvIV(fromstr);
3205 PUSH16(utf8, cur, &ai16);
3208 case 'S' | TYPE_IS_SHRIEKING:
3209 #if SHORTSIZE != SIZE16
3211 unsigned short aushort;
3213 aushort = SvUV(fromstr);
3214 DO_BO_PACK(aushort, s);
3215 PUSH_VAR(utf8, cur, aushort);
3225 au16 = (U16)SvUV(fromstr);
3226 DO_BO_PACK(au16, 16);
3227 PUSH16(utf8, cur, &au16);
3230 case 's' | TYPE_IS_SHRIEKING:
3231 #if SHORTSIZE != SIZE16
3235 ashort = SvIV(fromstr);
3236 DO_BO_PACK(ashort, s);
3237 PUSH_VAR(utf8, cur, ashort);
3247 ai16 = (I16)SvIV(fromstr);
3248 DO_BO_PACK(ai16, 16);
3249 PUSH16(utf8, cur, &ai16);
3253 case 'I' | TYPE_IS_SHRIEKING:
3257 auint = SvUV(fromstr);
3258 DO_BO_PACK(auint, i);
3259 PUSH_VAR(utf8, cur, auint);
3266 aiv = SvIV(fromstr);
3267 #if IVSIZE == INTSIZE
3269 #elif IVSIZE == LONGSIZE
3271 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3272 DO_BO_PACK(aiv, 64);
3274 Perl_croak(aTHX_ "'j' not supported on this platform");
3276 PUSH_VAR(utf8, cur, aiv);
3283 auv = SvUV(fromstr);
3284 #if UVSIZE == INTSIZE
3286 #elif UVSIZE == LONGSIZE
3288 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3289 DO_BO_PACK(auv, 64);
3291 Perl_croak(aTHX_ "'J' not supported on this platform");
3293 PUSH_VAR(utf8, cur, auv);
3300 anv = SvNV(fromstr);
3304 SvCUR_set(cat, cur - start);
3305 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3308 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3309 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3310 any negative IVs will have already been got by the croak()
3311 above. IOK is untrue for fractions, so we test them
3312 against UV_MAX_P1. */
3313 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3314 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3315 char *in = buf + sizeof(buf);
3316 UV auv = SvUV(fromstr);
3319 *--in = (char)((auv & 0x7f) | 0x80);
3322 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3323 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3324 in, (buf + sizeof(buf)) - in);
3325 } else if (SvPOKp(fromstr))
3327 else if (SvNOKp(fromstr)) {
3328 /* 10**NV_MAX_10_EXP is the largest power of 10
3329 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3330 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3331 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3332 And with that many bytes only Inf can overflow.
3333 Some C compilers are strict about integral constant
3334 expressions so we conservatively divide by a slightly
3335 smaller integer instead of multiplying by the exact
3336 floating-point value.
3338 #ifdef NV_MAX_10_EXP
3339 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3340 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3342 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3343 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3345 char *in = buf + sizeof(buf);
3347 anv = Perl_floor(anv);
3349 const NV next = Perl_floor(anv / 128);
3350 if (in <= buf) /* this cannot happen ;-) */
3351 Perl_croak(aTHX_ "Cannot compress integer in pack");
3352 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3355 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3356 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3357 in, (buf + sizeof(buf)) - in);
3366 /* Copy string and check for compliance */
3367 from = SvPV_const(fromstr, len);
3368 if ((norm = is_an_int(from, len)) == NULL)
3369 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3371 Newx(result, len, char);
3374 while (!done) *--in = div128(norm, &done) | 0x80;
3375 result[len - 1] &= 0x7F; /* clear continue bit */
3376 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3377 in, (result + len) - in);
3379 SvREFCNT_dec(norm); /* free norm */
3384 case 'i' | TYPE_IS_SHRIEKING:
3388 aint = SvIV(fromstr);
3389 DO_BO_PACK(aint, i);
3390 PUSH_VAR(utf8, cur, aint);
3393 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3394 case 'N' | TYPE_IS_SHRIEKING:
3400 au32 = SvUV(fromstr);
3402 au32 = PerlSock_htonl(au32);
3404 PUSH32(utf8, cur, &au32);
3407 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3408 case 'V' | TYPE_IS_SHRIEKING:
3414 au32 = SvUV(fromstr);
3418 PUSH32(utf8, cur, &au32);
3421 case 'L' | TYPE_IS_SHRIEKING:
3422 #if LONGSIZE != SIZE32
3424 unsigned long aulong;
3426 aulong = SvUV(fromstr);
3427 DO_BO_PACK(aulong, l);
3428 PUSH_VAR(utf8, cur, aulong);
3438 au32 = SvUV(fromstr);
3439 DO_BO_PACK(au32, 32);
3440 PUSH32(utf8, cur, &au32);
3443 case 'l' | TYPE_IS_SHRIEKING:
3444 #if LONGSIZE != SIZE32
3448 along = SvIV(fromstr);
3449 DO_BO_PACK(along, l);
3450 PUSH_VAR(utf8, cur, along);
3460 ai32 = SvIV(fromstr);
3461 DO_BO_PACK(ai32, 32);
3462 PUSH32(utf8, cur, &ai32);
3470 auquad = (Uquad_t) SvUV(fromstr);
3471 DO_BO_PACK(auquad, 64);
3472 PUSH_VAR(utf8, cur, auquad);
3479 aquad = (Quad_t)SvIV(fromstr);
3480 DO_BO_PACK(aquad, 64);
3481 PUSH_VAR(utf8, cur, aquad);
3484 #endif /* HAS_QUAD */
3486 len = 1; /* assume SV is correct length */
3487 GROWING(utf8, cat, start, cur, sizeof(char *));
3494 SvGETMAGIC(fromstr);
3495 if (!SvOK(fromstr)) aptr = NULL;
3497 /* XXX better yet, could spirit away the string to
3498 * a safe spot and hang on to it until the result
3499 * of pack() (and all copies of the result) are
3502 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3503 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3504 Perl_warner(aTHX_ packWARN(WARN_PACK),
3505 "Attempt to pack pointer to temporary value");
3507 if (SvPOK(fromstr) || SvNIOK(fromstr))
3508 aptr = SvPV_nomg_const_nolen(fromstr);
3510 aptr = SvPV_force_flags_nolen(fromstr, 0);
3512 DO_BO_PACK_PC(aptr);
3513 PUSH_VAR(utf8, cur, aptr);
3517 const char *aptr, *aend;
3521 if (len <= 2) len = 45;
3522 else len = len / 3 * 3;
3524 if (ckWARN(WARN_PACK))
3525 Perl_warner(aTHX_ packWARN(WARN_PACK),
3526 "Field too wide in 'u' format in pack");
3529 aptr = SvPV_const(fromstr, fromlen);
3530 from_utf8 = DO_UTF8(fromstr);
3532 aend = aptr + fromlen;
3533 fromlen = sv_len_utf8(fromstr);
3534 } else aend = NULL; /* Unused, but keep compilers happy */
3535 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3536 while (fromlen > 0) {
3539 U8 hunk[1+63/3*4+1];
3541 if ((I32)fromlen > len)
3547 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3548 'u' | TYPE_IS_PACK)) {
3550 SvCUR_set(cat, cur - start);
3551 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3553 end = doencodes(hunk, buffer, todo);
3555 end = doencodes(hunk, aptr, todo);
3558 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3565 SvCUR_set(cat, cur - start);
3567 *symptr = lookahead;
3576 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3577 register SV *cat = TARG;
3579 SV *pat_sv = *++MARK;
3580 register const char *pat = SvPV_const(pat_sv, fromlen);
3581 register const char *patend = pat + fromlen;
3587 packlist(cat, pat, patend, MARK, SP + 1);
3597 * c-indentation-style: bsd
3599 * indent-tabs-mode: t
3602 * ex: set ts=8 sts=4 sw=4 noet: