3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV * const tmpNew = newSVpvn("0000000000", 10);
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
243 # define DO_BO_UNPACK_PC(var)
244 # define DO_BO_PACK_PC(var)
246 #else /* PERL_PACK_CAN_BYTEORDER */
248 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
251 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
253 # define DO_BO_UNPACK(var, type) \
255 switch (TYPE_ENDIANNESS(datumtype)) { \
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
262 # define DO_BO_PACK(var, type) \
264 switch (TYPE_ENDIANNESS(datumtype)) { \
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
271 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: \
275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
277 case TYPE_IS_LITTLE_ENDIAN: \
278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
285 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
287 switch (TYPE_ENDIANNESS(datumtype)) { \
288 case TYPE_IS_BIG_ENDIAN: \
289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
291 case TYPE_IS_LITTLE_ENDIAN: \
292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
299 # define BO_CANT_DOIT(action, type) \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
315 # if PTRSIZE == INTSIZE
316 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
320 # elif PTRSIZE == LONGSIZE
321 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
326 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
327 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
328 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
329 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
332 # if defined(my_htolen) && defined(my_letohn) && \
333 defined(my_htoben) && defined(my_betohn)
334 # define DO_BO_UNPACK_N(var, type) \
336 switch (TYPE_ENDIANNESS(datumtype)) { \
337 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
338 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
343 # define DO_BO_PACK_N(var, type) \
345 switch (TYPE_ENDIANNESS(datumtype)) { \
346 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
347 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
352 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
353 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
356 #endif /* PERL_PACK_CAN_BYTEORDER */
358 #define PACK_SIZE_CANNOT_CSUM 0x80
359 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
360 #define PACK_SIZE_MASK 0x3F
362 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
363 in). You're unlikely ever to need to regenerate them. */
365 #if TYPE_IS_SHRIEKING != 0x100
366 ++++shriek offset should be 256
369 typedef U8 packprops_t;
372 STATIC const packprops_t packprops[512] = {
374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
375 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
376 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
380 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
381 /* D */ LONG_DOUBLESIZE,
388 /* I */ sizeof(unsigned int),
395 #if defined(HAS_QUAD)
396 /* Q */ sizeof(Uquad_t),
403 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
405 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 /* c */ sizeof(char),
408 /* d */ sizeof(double),
410 /* f */ sizeof(float),
419 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
420 #if defined(HAS_QUAD)
421 /* q */ sizeof(Quad_t),
429 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 /* I */ sizeof(unsigned int),
447 /* L */ sizeof(unsigned long),
449 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
455 /* S */ sizeof(unsigned short),
457 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 /* l */ sizeof(long),
468 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
474 /* s */ sizeof(short),
476 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0
492 /* EBCDIC (or bust) */
493 STATIC const packprops_t packprops[512] = {
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 /* c */ sizeof(char),
505 /* d */ sizeof(double),
507 /* f */ sizeof(float),
517 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
518 #if defined(HAS_QUAD)
519 /* q */ sizeof(Quad_t),
523 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
531 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
532 /* D */ LONG_DOUBLESIZE,
539 /* I */ sizeof(unsigned int),
547 #if defined(HAS_QUAD)
548 /* Q */ sizeof(Uquad_t),
552 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
557 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 /* l */ sizeof(long),
574 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* s */ sizeof(short),
582 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* I */ sizeof(unsigned int),
591 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 /* L */ sizeof(unsigned long),
594 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* S */ sizeof(unsigned short),
602 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
613 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
616 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
617 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
618 /* We try to process malformed UTF-8 as much as possible (preferrably with
619 warnings), but these two mean we make no progress in the string and
620 might enter an infinite loop */
621 if (retlen == (STRLEN) -1 || retlen == 0)
622 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
623 (int) TYPE_NO_MODIFIERS(datumtype));
625 if (ckWARN(WARN_UNPACK))
626 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
627 "Character in '%c' format wrapped in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
635 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
636 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
640 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
644 const char *from = *s;
646 const U32 flags = ckWARN(WARN_UTF8) ?
647 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
648 for (;buf_len > 0; buf_len--) {
649 if (from >= end) return FALSE;
650 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
651 if (retlen == (STRLEN) -1 || retlen == 0) {
652 from += UTF8SKIP(from);
654 } else from += retlen;
659 *(U8 *)buf++ = (U8)val;
661 /* We have enough characters for the buffer. Did we have problems ? */
664 /* Rewalk the string fragment while warning */
666 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
667 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
668 if (ptr >= end) break;
669 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
671 if (from > end) from = end;
673 if ((bad & 2) && ckWARN(WARN_UNPACK))
674 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
675 WARN_PACK : WARN_UNPACK),
676 "Character(s) in '%c' format wrapped in %s",
677 (int) TYPE_NO_MODIFIERS(datumtype),
678 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
685 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
688 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
689 if (val >= 0x100 || !ISUUCHAR(val) ||
690 retlen == (STRLEN) -1 || retlen == 0) {
694 *out = PL_uudmap[val] & 077;
700 bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
701 U8 buffer[UTF8_MAXLEN];
702 const U8 * const end = start + len;
704 while (start < end) {
706 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
716 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
724 #define PUSH_BYTES(utf8, cur, buf, len) \
726 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
728 Copy(buf, cur, len, char); \
733 #define GROWING(utf8, cat, start, cur, in_len) \
735 STRLEN glen = (in_len); \
736 if (utf8) glen *= UTF8_EXPAND; \
737 if ((cur) + glen >= (start) + SvLEN(cat)) { \
738 (start) = sv_exp_grow(aTHX_ cat, glen); \
739 (cur) = (start) + SvCUR(cat); \
743 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
745 const STRLEN glen = (in_len); \
747 if (utf8) gl *= UTF8_EXPAND; \
748 if ((cur) + gl >= (start) + SvLEN(cat)) { \
750 SvCUR_set((cat), (cur) - (start)); \
751 (start) = sv_exp_grow(aTHX_ cat, gl); \
752 (cur) = (start) + SvCUR(cat); \
754 PUSH_BYTES(utf8, cur, buf, glen); \
757 #define PUSH_BYTE(utf8, s, byte) \
760 const U8 au8 = (byte); \
761 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
762 } else *(U8 *)(s)++ = (byte); \
765 /* Only to be used inside a loop (see the break) */
766 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
769 if (str >= end) break; \
770 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
771 if (retlen == (STRLEN) -1 || retlen == 0) { \
773 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
778 static const char *_action( const tempsym_t* symptr )
780 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
783 /* Returns the sizeof() struct described by pat */
785 S_measure_struct(pTHX_ tempsym_t* symptr)
789 while (next_symbol(symptr)) {
793 switch (symptr->howlen) {
795 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
799 /* e_no_len and e_number */
800 len = symptr->length;
804 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
807 /* endianness doesn't influence the size of a type */
808 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
810 Perl_croak(aTHX_ "Invalid type '%c' in %s",
811 (int)TYPE_NO_MODIFIERS(symptr->code),
813 #ifdef PERL_PACK_CAN_SHRIEKSIGN
814 case '.' | TYPE_IS_SHRIEKING:
815 case '@' | TYPE_IS_SHRIEKING:
820 case 'U': /* XXXX Is it correct? */
823 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
824 (int) TYPE_NO_MODIFIERS(symptr->code),
831 tempsym_t savsym = *symptr;
832 symptr->patptr = savsym.grpbeg;
833 symptr->patend = savsym.grpend;
834 /* XXXX Theoretically, we need to measure many times at
835 different positions, since the subexpression may contain
836 alignment commands, but be not of aligned length.
837 Need to detect this and croak(). */
838 size = measure_struct(symptr);
842 case 'X' | TYPE_IS_SHRIEKING:
843 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
845 if (!len) /* Avoid division by 0 */
847 len = total % len; /* Assumed: the start is aligned. */
852 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
854 case 'x' | TYPE_IS_SHRIEKING:
855 if (!len) /* Avoid division by 0 */
857 star = total % len; /* Assumed: the start is aligned. */
858 if (star) /* Other portable ways? */
882 size = sizeof(char*);
892 /* locate matching closing parenthesis or bracket
893 * returns char pointer to char after match, or NULL
896 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
898 while (patptr < patend) {
899 const char c = *patptr++;
906 while (patptr < patend && *patptr != '\n')
910 patptr = group_end(patptr, patend, ')') + 1;
912 patptr = group_end(patptr, patend, ']') + 1;
914 Perl_croak(aTHX_ "No group ending character '%c' found in template",
920 /* Convert unsigned decimal number to binary.
921 * Expects a pointer to the first digit and address of length variable
922 * Advances char pointer to 1st non-digit char and returns number
925 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
927 I32 len = *patptr++ - '0';
928 while (isDIGIT(*patptr)) {
929 if (len >= 0x7FFFFFFF/10)
930 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
931 len = (len * 10) + (*patptr++ - '0');
937 /* The marvellous template parsing routine: Using state stored in *symptr,
938 * locates next template code and count
941 S_next_symbol(pTHX_ tempsym_t* symptr )
943 const char* patptr = symptr->patptr;
944 const char* const patend = symptr->patend;
946 symptr->flags &= ~FLAG_SLASH;
948 while (patptr < patend) {
949 if (isSPACE(*patptr))
951 else if (*patptr == '#') {
953 while (patptr < patend && *patptr != '\n')
958 /* We should have found a template code */
959 I32 code = *patptr++ & 0xFF;
960 U32 inherited_modifiers = 0;
962 if (code == ','){ /* grandfather in commas but with a warning */
963 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
964 symptr->flags |= FLAG_COMMA;
965 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
966 "Invalid type ',' in %s", _action( symptr ) );
971 /* for '(', skip to ')' */
973 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
974 Perl_croak(aTHX_ "()-group starts with a count in %s",
976 symptr->grpbeg = patptr;
977 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
978 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
979 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
983 /* look for group modifiers to inherit */
984 if (TYPE_ENDIANNESS(symptr->flags)) {
985 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
986 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
989 /* look for modifiers */
990 while (patptr < patend) {
995 modifier = TYPE_IS_SHRIEKING;
996 allowed = SHRIEKING_ALLOWED_TYPES;
998 #ifdef PERL_PACK_CAN_BYTEORDER
1000 modifier = TYPE_IS_BIG_ENDIAN;
1001 allowed = ENDIANNESS_ALLOWED_TYPES;
1004 modifier = TYPE_IS_LITTLE_ENDIAN;
1005 allowed = ENDIANNESS_ALLOWED_TYPES;
1007 #endif /* PERL_PACK_CAN_BYTEORDER */
1017 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1018 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1019 allowed, _action( symptr ) );
1021 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1022 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1023 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1024 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1025 TYPE_ENDIANNESS_MASK)
1026 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1027 *patptr, _action( symptr ) );
1029 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1030 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1031 "Duplicate modifier '%c' after '%c' in %s",
1032 *patptr, (int) TYPE_NO_MODIFIERS(code),
1033 _action( symptr ) );
1040 /* inherit modifiers */
1041 code |= inherited_modifiers;
1043 /* look for count and/or / */
1044 if (patptr < patend) {
1045 if (isDIGIT(*patptr)) {
1046 patptr = get_num( patptr, &symptr->length );
1047 symptr->howlen = e_number;
1049 } else if (*patptr == '*') {
1051 symptr->howlen = e_star;
1053 } else if (*patptr == '[') {
1054 const char* lenptr = ++patptr;
1055 symptr->howlen = e_number;
1056 patptr = group_end( patptr, patend, ']' ) + 1;
1057 /* what kind of [] is it? */
1058 if (isDIGIT(*lenptr)) {
1059 lenptr = get_num( lenptr, &symptr->length );
1060 if( *lenptr != ']' )
1061 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1062 _action( symptr ) );
1064 tempsym_t savsym = *symptr;
1065 symptr->patend = patptr-1;
1066 symptr->patptr = lenptr;
1067 savsym.length = measure_struct(symptr);
1071 symptr->howlen = e_no_len;
1076 while (patptr < patend) {
1077 if (isSPACE(*patptr))
1079 else if (*patptr == '#') {
1081 while (patptr < patend && *patptr != '\n')
1083 if (patptr < patend)
1086 if (*patptr == '/') {
1087 symptr->flags |= FLAG_SLASH;
1089 if (patptr < patend &&
1090 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1091 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1092 _action( symptr ) );
1098 /* at end - no count, no / */
1099 symptr->howlen = e_no_len;
1103 symptr->code = code;
1104 symptr->patptr = patptr;
1108 symptr->patptr = patptr;
1113 There is no way to cleanly handle the case where we should process the
1114 string per byte in its upgraded form while it's really in downgraded form
1115 (e.g. estimates like strend-s as an upper bound for the number of
1116 characters left wouldn't work). So if we foresee the need of this
1117 (pattern starts with U or contains U0), we want to work on the encoded
1118 version of the string. Users are advised to upgrade their pack string
1119 themselves if they need to do a lot of unpacks like this on it
1122 need_utf8(const char *pat, const char *patend)
1125 while (pat < patend) {
1126 if (pat[0] == '#') {
1128 pat = (const char *) memchr(pat, '\n', patend-pat);
1129 if (!pat) return FALSE;
1130 } else if (pat[0] == 'U') {
1131 if (first || pat[1] == '0') return TRUE;
1132 } else first = FALSE;
1139 first_symbol(const char *pat, const char *patend) {
1140 while (pat < patend) {
1141 if (pat[0] != '#') return pat[0];
1143 pat = (const char *) memchr(pat, '\n', patend-pat);
1151 =for apidoc unpack_str
1153 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1154 and ocnt are not used. This call should not be used, use unpackstring instead.
1159 Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
1162 PERL_UNUSED_ARG(strbeg);
1163 PERL_UNUSED_ARG(new_s);
1164 PERL_UNUSED_ARG(ocnt);
1166 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1167 else if (need_utf8(pat, patend)) {
1168 /* We probably should try to avoid this in case a scalar context call
1169 wouldn't get to the "U0" */
1170 STRLEN len = strend - s;
1171 s = (char *) bytes_to_utf8((U8 *) s, &len);
1174 flags |= FLAG_DO_UTF8;
1177 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1178 flags |= FLAG_PARSE_UTF8;
1180 TEMPSYM_INIT(&sym, pat, patend, flags);
1182 return unpack_rec(&sym, s, s, strend, NULL );
1186 =for apidoc unpackstring
1188 The engine implementing unpack() Perl function. C<unpackstring> puts the
1189 extracted list items on the stack and returns the number of elements.
1190 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1195 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1199 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1200 else if (need_utf8(pat, patend)) {
1201 /* We probably should try to avoid this in case a scalar context call
1202 wouldn't get to the "U0" */
1203 STRLEN len = strend - s;
1204 s = (char *) bytes_to_utf8((U8 *) s, &len);
1207 flags |= FLAG_DO_UTF8;
1210 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1211 flags |= FLAG_PARSE_UTF8;
1213 TEMPSYM_INIT(&sym, pat, patend, flags);
1215 return unpack_rec(&sym, s, s, strend, NULL );
1220 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1224 const I32 start_sp_offset = SP - PL_stack_base;
1230 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1231 bool beyond = FALSE;
1232 bool explicit_length;
1233 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1234 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1235 symptr->strbeg = s - strbeg;
1237 while (next_symbol(symptr)) {
1240 I32 datumtype = symptr->code;
1241 /* do first one only unless in list context
1242 / is implemented by unpacking the count, then popping it from the
1243 stack, so must check that we're not in the middle of a / */
1244 if ( unpack_only_one
1245 && (SP - PL_stack_base == start_sp_offset + 1)
1246 && (datumtype != '/') ) /* XXX can this be omitted */
1249 switch (howlen = symptr->howlen) {
1251 len = strend - strbeg; /* long enough */
1254 /* e_no_len and e_number */
1255 len = symptr->length;
1259 explicit_length = TRUE;
1261 beyond = s >= strend;
1263 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1265 /* props nonzero means we can process this letter. */
1266 const long size = props & PACK_SIZE_MASK;
1267 const long howmany = (strend - s) / size;
1271 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1272 if (len && unpack_only_one) len = 1;
1278 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1280 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1283 if (howlen == e_no_len)
1284 len = 16; /* len is not specified */
1292 tempsym_t savsym = *symptr;
1293 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1294 symptr->flags |= group_modifiers;
1295 symptr->patend = savsym.grpend;
1296 symptr->previous = &savsym;
1300 symptr->patptr = savsym.grpbeg;
1301 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1302 else symptr->flags &= ~FLAG_PARSE_UTF8;
1303 unpack_rec(symptr, s, strbeg, strend, &s);
1304 if (s == strend && savsym.howlen == e_star)
1305 break; /* No way to continue */
1308 savsym.flags = symptr->flags & ~group_modifiers;
1312 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1313 case '.' | TYPE_IS_SHRIEKING:
1318 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1319 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1320 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1321 const bool u8 = utf8;
1323 if (howlen == e_star) from = strbeg;
1324 else if (len <= 0) from = s;
1326 tempsym_t *group = symptr;
1328 while (--len && group) group = group->previous;
1329 from = group ? strbeg + group->strbeg : strbeg;
1332 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1333 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1334 XPUSHs(sv_2mortal(sv));
1337 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1338 case '@' | TYPE_IS_SHRIEKING:
1341 s = strbeg + symptr->strbeg;
1342 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1343 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1344 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1350 Perl_croak(aTHX_ "'@' outside of string in unpack");
1355 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
1362 case 'X' | TYPE_IS_SHRIEKING:
1363 if (!len) /* Avoid division by 0 */
1366 const char *hop, *last;
1368 hop = last = strbeg;
1370 hop += UTF8SKIP(hop);
1377 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1381 len = (s - strbeg) % len;
1387 Perl_croak(aTHX_ "'X' outside of string in unpack");
1388 while (--s, UTF8_IS_CONTINUATION(*s)) {
1390 Perl_croak(aTHX_ "'X' outside of string in unpack");
1395 if (len > s - strbeg)
1396 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1400 case 'x' | TYPE_IS_SHRIEKING: {
1402 if (!len) /* Avoid division by 0 */
1404 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1405 else ai32 = (s - strbeg) % len;
1406 if (ai32 == 0) break;
1414 Perl_croak(aTHX_ "'x' outside of string in unpack");
1419 if (len > strend - s)
1420 Perl_croak(aTHX_ "'x' outside of string in unpack");
1425 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1431 /* Preliminary length estimate is assumed done in 'W' */
1432 if (len > strend - s) len = strend - s;
1438 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1439 if (hop >= strend) {
1441 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1446 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1448 } else if (len > strend - s)
1451 if (datumtype == 'Z') {
1452 /* 'Z' strips stuff after first null */
1453 const char *ptr, *end;
1455 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1456 sv = newSVpvn(s, ptr-s);
1457 if (howlen == e_star) /* exact for 'Z*' */
1458 len = ptr-s + (ptr != strend ? 1 : 0);
1459 } else if (datumtype == 'A') {
1460 /* 'A' strips both nulls and spaces */
1462 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1463 for (ptr = s+len-1; ptr >= s; ptr--)
1464 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1465 !is_utf8_space((U8 *) ptr)) break;
1466 if (ptr >= s) ptr += UTF8SKIP(ptr);
1469 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !isSPACE(*ptr)) break;
1475 sv = newSVpvn(s, ptr-s);
1476 } else sv = newSVpvn(s, len);
1480 /* Undo any upgrade done due to need_utf8() */
1481 if (!(symptr->flags & FLAG_WAS_UTF8))
1482 sv_utf8_downgrade(sv, 0);
1484 XPUSHs(sv_2mortal(sv));
1490 if (howlen == e_star || len > (strend - s) * 8)
1491 len = (strend - s) * 8;
1495 Newxz(PL_bitcount, 256, char);
1496 for (bits = 1; bits < 256; bits++) {
1497 if (bits & 1) PL_bitcount[bits]++;
1498 if (bits & 2) PL_bitcount[bits]++;
1499 if (bits & 4) PL_bitcount[bits]++;
1500 if (bits & 8) PL_bitcount[bits]++;
1501 if (bits & 16) PL_bitcount[bits]++;
1502 if (bits & 32) PL_bitcount[bits]++;
1503 if (bits & 64) PL_bitcount[bits]++;
1504 if (bits & 128) PL_bitcount[bits]++;
1508 while (len >= 8 && s < strend) {
1509 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1514 cuv += PL_bitcount[*(U8 *)s++];
1517 if (len && s < strend) {
1519 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1520 if (datumtype == 'b')
1522 if (bits & 1) cuv++;
1527 if (bits & 0x80) cuv++;
1534 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1537 if (datumtype == 'b') {
1539 const I32 ai32 = len;
1540 for (len = 0; len < ai32; len++) {
1541 if (len & 7) bits >>= 1;
1543 if (s >= strend) break;
1544 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1545 } else bits = *(U8 *) s++;
1546 *str++ = bits & 1 ? '1' : '0';
1550 const I32 ai32 = len;
1551 for (len = 0; len < ai32; len++) {
1552 if (len & 7) bits <<= 1;
1554 if (s >= strend) break;
1555 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1556 } else bits = *(U8 *) s++;
1557 *str++ = bits & 0x80 ? '1' : '0';
1561 SvCUR_set(sv, str - SvPVX_const(sv));
1568 /* Preliminary length estimate, acceptable for utf8 too */
1569 if (howlen == e_star || len > (strend - s) * 2)
1570 len = (strend - s) * 2;
1571 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1574 if (datumtype == 'h') {
1577 for (len = 0; len < ai32; len++) {
1578 if (len & 1) bits >>= 4;
1580 if (s >= strend) break;
1581 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1582 } else bits = * (U8 *) s++;
1583 *str++ = PL_hexdigit[bits & 15];
1587 const I32 ai32 = len;
1588 for (len = 0; len < ai32; len++) {
1589 if (len & 1) bits <<= 4;
1591 if (s >= strend) break;
1592 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1593 } else bits = *(U8 *) s++;
1594 *str++ = PL_hexdigit[(bits >> 4) & 15];
1598 SvCUR_set(sv, str - SvPVX_const(sv));
1604 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1605 if (aint >= 128) /* fake up signed chars */
1608 PUSHs(sv_2mortal(newSViv((IV)aint)));
1609 else if (checksum > bits_in_uv)
1610 cdouble += (NV)aint;
1619 if (explicit_length && datumtype == 'C')
1620 /* Switch to "character" mode */
1621 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1624 if (datumtype == 'C' ?
1625 (symptr->flags & FLAG_DO_UTF8) &&
1626 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1627 while (len-- > 0 && s < strend) {
1629 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1630 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1631 if (retlen == (STRLEN) -1 || retlen == 0)
1632 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1635 PUSHs(sv_2mortal(newSVuv((UV) val)));
1636 else if (checksum > bits_in_uv)
1637 cdouble += (NV) val;
1641 } else if (!checksum)
1643 const U8 ch = *(U8 *) s++;
1644 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1646 else if (checksum > bits_in_uv)
1647 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1649 while (len-- > 0) cuv += *(U8 *) s++;
1653 if (explicit_length) {
1654 /* Switch to "bytes in UTF-8" mode */
1655 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1657 /* Should be impossible due to the need_utf8() test */
1658 Perl_croak(aTHX_ "U0 mode on a byte string");
1662 if (len > strend - s) len = strend - s;
1664 if (len && unpack_only_one) len = 1;
1668 while (len-- > 0 && s < strend) {
1672 U8 result[UTF8_MAXLEN];
1673 const char *ptr = s;
1675 /* Bug: warns about bad utf8 even if we are short on bytes
1676 and will break out of the loop */
1677 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1680 len = UTF8SKIP(result);
1681 if (!uni_to_bytes(aTHX_ &ptr, strend,
1682 (char *) &result[1], len-1, 'U')) break;
1683 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1686 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1687 if (retlen == (STRLEN) -1 || retlen == 0)
1688 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1692 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1693 else if (checksum > bits_in_uv)
1694 cdouble += (NV) auv;
1699 case 's' | TYPE_IS_SHRIEKING:
1700 #if SHORTSIZE != SIZE16
1703 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1704 DO_BO_UNPACK(ashort, s);
1706 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1707 else if (checksum > bits_in_uv)
1708 cdouble += (NV)ashort;
1720 #if U16SIZE > SIZE16
1723 SHIFT16(utf8, s, strend, &ai16, datumtype);
1724 DO_BO_UNPACK(ai16, 16);
1725 #if U16SIZE > SIZE16
1730 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1731 else if (checksum > bits_in_uv)
1732 cdouble += (NV)ai16;
1737 case 'S' | TYPE_IS_SHRIEKING:
1738 #if SHORTSIZE != SIZE16
1740 unsigned short aushort;
1741 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1742 DO_BO_UNPACK(aushort, s);
1744 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1745 else if (checksum > bits_in_uv)
1746 cdouble += (NV)aushort;
1759 #if U16SIZE > SIZE16
1762 SHIFT16(utf8, s, strend, &au16, datumtype);
1763 DO_BO_UNPACK(au16, 16);
1765 if (datumtype == 'n')
1766 au16 = PerlSock_ntohs(au16);
1769 if (datumtype == 'v')
1773 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1774 else if (checksum > bits_in_uv)
1775 cdouble += (NV) au16;
1780 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1781 case 'v' | TYPE_IS_SHRIEKING:
1782 case 'n' | TYPE_IS_SHRIEKING:
1785 # if U16SIZE > SIZE16
1788 SHIFT16(utf8, s, strend, &ai16, datumtype);
1790 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1791 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1792 # endif /* HAS_NTOHS */
1794 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1795 ai16 = (I16) vtohs((U16) ai16);
1796 # endif /* HAS_VTOHS */
1798 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV) ai16;
1805 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1807 case 'i' | TYPE_IS_SHRIEKING:
1810 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1811 DO_BO_UNPACK(aint, i);
1813 PUSHs(sv_2mortal(newSViv((IV)aint)));
1814 else if (checksum > bits_in_uv)
1815 cdouble += (NV)aint;
1821 case 'I' | TYPE_IS_SHRIEKING:
1824 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1825 DO_BO_UNPACK(auint, i);
1827 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1828 else if (checksum > bits_in_uv)
1829 cdouble += (NV)auint;
1837 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1838 #if IVSIZE == INTSIZE
1839 DO_BO_UNPACK(aiv, i);
1840 #elif IVSIZE == LONGSIZE
1841 DO_BO_UNPACK(aiv, l);
1842 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1843 DO_BO_UNPACK(aiv, 64);
1845 Perl_croak(aTHX_ "'j' not supported on this platform");
1848 PUSHs(sv_2mortal(newSViv(aiv)));
1849 else if (checksum > bits_in_uv)
1858 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1859 #if IVSIZE == INTSIZE
1860 DO_BO_UNPACK(auv, i);
1861 #elif IVSIZE == LONGSIZE
1862 DO_BO_UNPACK(auv, l);
1863 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1864 DO_BO_UNPACK(auv, 64);
1866 Perl_croak(aTHX_ "'J' not supported on this platform");
1869 PUSHs(sv_2mortal(newSVuv(auv)));
1870 else if (checksum > bits_in_uv)
1876 case 'l' | TYPE_IS_SHRIEKING:
1877 #if LONGSIZE != SIZE32
1880 SHIFT_VAR(utf8, s, strend, along, datumtype);
1881 DO_BO_UNPACK(along, l);
1883 PUSHs(sv_2mortal(newSViv((IV)along)));
1884 else if (checksum > bits_in_uv)
1885 cdouble += (NV)along;
1896 #if U32SIZE > SIZE32
1899 SHIFT32(utf8, s, strend, &ai32, datumtype);
1900 DO_BO_UNPACK(ai32, 32);
1901 #if U32SIZE > SIZE32
1902 if (ai32 > 2147483647) ai32 -= 4294967296;
1905 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1906 else if (checksum > bits_in_uv)
1907 cdouble += (NV)ai32;
1912 case 'L' | TYPE_IS_SHRIEKING:
1913 #if LONGSIZE != SIZE32
1915 unsigned long aulong;
1916 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1917 DO_BO_UNPACK(aulong, l);
1919 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1920 else if (checksum > bits_in_uv)
1921 cdouble += (NV)aulong;
1934 #if U32SIZE > SIZE32
1937 SHIFT32(utf8, s, strend, &au32, datumtype);
1938 DO_BO_UNPACK(au32, 32);
1940 if (datumtype == 'N')
1941 au32 = PerlSock_ntohl(au32);
1944 if (datumtype == 'V')
1948 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1949 else if (checksum > bits_in_uv)
1950 cdouble += (NV)au32;
1955 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1956 case 'V' | TYPE_IS_SHRIEKING:
1957 case 'N' | TYPE_IS_SHRIEKING:
1960 # if U32SIZE > SIZE32
1963 SHIFT32(utf8, s, strend, &ai32, datumtype);
1965 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1966 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1969 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1970 ai32 = (I32)vtohl((U32)ai32);
1973 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1974 else if (checksum > bits_in_uv)
1975 cdouble += (NV)ai32;
1980 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1984 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1985 DO_BO_UNPACK_PC(aptr);
1986 /* newSVpv generates undef if aptr is NULL */
1987 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1995 while (len > 0 && s < strend) {
1997 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1998 auv = (auv << 7) | (ch & 0x7f);
1999 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2002 PUSHs(sv_2mortal(newSVuv(auv)));
2007 if (++bytes >= sizeof(UV)) { /* promote to string */
2010 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2011 while (s < strend) {
2012 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2013 sv = mul128(sv, (U8)(ch & 0x7f));
2019 t = SvPV_nolen_const(sv);
2023 PUSHs(sv_2mortal(sv));
2028 if ((s >= strend) && bytes)
2029 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2033 if (symptr->howlen == e_star)
2034 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2036 if (sizeof(char*) <= strend - s) {
2038 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2039 DO_BO_UNPACK_PC(aptr);
2040 /* newSVpvn generates undef if aptr is NULL */
2041 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2048 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2049 DO_BO_UNPACK(aquad, 64);
2051 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2052 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2053 else if (checksum > bits_in_uv)
2054 cdouble += (NV)aquad;
2062 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2063 DO_BO_UNPACK(auquad, 64);
2065 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2066 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2067 else if (checksum > bits_in_uv)
2068 cdouble += (NV)auquad;
2073 #endif /* HAS_QUAD */
2074 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2078 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2079 DO_BO_UNPACK_N(afloat, float);
2081 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2089 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2090 DO_BO_UNPACK_N(adouble, double);
2092 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2100 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2101 DO_BO_UNPACK_N(anv, NV);
2103 PUSHs(sv_2mortal(newSVnv(anv)));
2108 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2111 long double aldouble;
2112 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2113 DO_BO_UNPACK_N(aldouble, long double);
2115 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2117 cdouble += aldouble;
2123 * Initialise the decode mapping. By using a table driven
2124 * algorithm, the code will be character-set independent
2125 * (and just as fast as doing character arithmetic)
2127 if (PL_uudmap['M'] == 0) {
2130 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2131 PL_uudmap[(U8)PL_uuemap[i]] = i;
2133 * Because ' ' and '`' map to the same value,
2134 * we need to decode them both the same.
2139 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2140 sv = sv_2mortal(NEWSV(42, l));
2141 if (l) SvPOK_on(sv);
2144 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2150 next_uni_uu(aTHX_ &s, strend, &a);
2151 next_uni_uu(aTHX_ &s, strend, &b);
2152 next_uni_uu(aTHX_ &s, strend, &c);
2153 next_uni_uu(aTHX_ &s, strend, &d);
2154 hunk[0] = (char)((a << 2) | (b >> 4));
2155 hunk[1] = (char)((b << 4) | (c >> 2));
2156 hunk[2] = (char)((c << 6) | d);
2157 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2165 /* possible checksum byte */
2166 const char *skip = s+UTF8SKIP(s);
2167 if (skip < strend && *skip == '\n')
2173 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2178 len = PL_uudmap[*(U8*)s++] & 077;
2180 if (s < strend && ISUUCHAR(*s))
2181 a = PL_uudmap[*(U8*)s++] & 077;
2184 if (s < strend && ISUUCHAR(*s))
2185 b = PL_uudmap[*(U8*)s++] & 077;
2188 if (s < strend && ISUUCHAR(*s))
2189 c = PL_uudmap[*(U8*)s++] & 077;
2192 if (s < strend && ISUUCHAR(*s))
2193 d = PL_uudmap[*(U8*)s++] & 077;
2196 hunk[0] = (char)((a << 2) | (b >> 4));
2197 hunk[1] = (char)((b << 4) | (c >> 2));
2198 hunk[2] = (char)((c << 6) | d);
2199 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2204 else /* possible checksum byte */
2205 if (s + 1 < strend && s[1] == '\n')
2214 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2215 (checksum > bits_in_uv &&
2216 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2219 anv = (NV) (1 << (checksum & 15));
2220 while (checksum >= 16) {
2224 while (cdouble < 0.0)
2226 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2227 sv = newSVnv(cdouble);
2230 if (checksum < bits_in_uv) {
2231 UV mask = ((UV)1 << checksum) - 1;
2236 XPUSHs(sv_2mortal(sv));
2240 if (symptr->flags & FLAG_SLASH){
2241 if (SP - PL_stack_base - start_sp_offset <= 0)
2242 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2243 if( next_symbol(symptr) ){
2244 if( symptr->howlen == e_number )
2245 Perl_croak(aTHX_ "Count after length/code in unpack" );
2247 /* ...end of char buffer then no decent length available */
2248 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2250 /* take top of stack (hope it's numeric) */
2253 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2256 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2258 datumtype = symptr->code;
2259 explicit_length = FALSE;
2267 return SP - PL_stack_base - start_sp_offset;
2274 I32 gimme = GIMME_V;
2277 const char *pat = SvPV_const(left, llen);
2278 const char *s = SvPV_const(right, rlen);
2279 const char *strend = s + rlen;
2280 const char *patend = pat + llen;
2284 cnt = unpackstring(pat, patend, s, strend,
2285 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2286 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2289 if ( !cnt && gimme == G_SCALAR )
2290 PUSHs(&PL_sv_undef);
2295 doencodes(U8 *h, const char *s, I32 len)
2297 *h++ = PL_uuemap[len];
2299 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2300 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2301 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2302 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2307 const char r = (len > 1 ? s[1] : '\0');
2308 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2309 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2310 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2311 *h++ = PL_uuemap[0];
2318 S_is_an_int(pTHX_ const char *s, STRLEN l)
2320 SV *result = newSVpvn(s, l);
2321 char *const result_c = SvPV_nolen(result); /* convenience */
2322 char *out = result_c;
2332 SvREFCNT_dec(result);
2355 SvREFCNT_dec(result);
2361 SvCUR_set(result, out - result_c);
2365 /* pnum must be '\0' terminated */
2367 S_div128(pTHX_ SV *pnum, bool *done)
2370 char * const s = SvPV(pnum, len);
2376 const int i = m * 10 + (*t - '0');
2377 const int r = (i >> 7); /* r < 10 */
2385 SvCUR_set(pnum, (STRLEN) (t - s));
2390 =for apidoc pack_cat
2392 The engine implementing pack() Perl function. Note: parameters next_in_list and
2393 flags are not used. This call should not be used; use packlist instead.
2399 Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2402 PERL_UNUSED_ARG(next_in_list);
2403 PERL_UNUSED_ARG(flags);
2405 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2407 (void)pack_rec( cat, &sym, beglist, endlist );
2412 =for apidoc packlist
2414 The engine implementing pack() Perl function.
2420 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2425 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2427 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2428 Also make sure any UTF8 flag is loaded */
2429 SvPV_force(cat, no_len);
2431 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2433 (void)pack_rec( cat, &sym, beglist, endlist );
2436 /* like sv_utf8_upgrade, but also repoint the group start markers */
2438 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2441 const char *from_ptr, *from_start, *from_end, **marks, **m;
2442 char *to_start, *to_ptr;
2444 if (SvUTF8(sv)) return;
2446 from_start = SvPVX_const(sv);
2447 from_end = from_start + SvCUR(sv);
2448 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2449 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2450 if (from_ptr == from_end) {
2451 /* Simple case: no character needs to be changed */
2456 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2457 Newx(to_start, len, char);
2458 Copy(from_start, to_start, from_ptr-from_start, char);
2459 to_ptr = to_start + (from_ptr-from_start);
2461 Newx(marks, sym_ptr->level+2, const char *);
2462 for (group=sym_ptr; group; group = group->previous)
2463 marks[group->level] = from_start + group->strbeg;
2464 marks[sym_ptr->level+1] = from_end+1;
2465 for (m = marks; *m < from_ptr; m++)
2466 *m = to_start + (*m-from_start);
2468 for (;from_ptr < from_end; from_ptr++) {
2469 while (*m == from_ptr) *m++ = to_ptr;
2470 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2474 while (*m == from_ptr) *m++ = to_ptr;
2475 if (m != marks + sym_ptr->level+1) {
2478 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2480 for (group=sym_ptr; group; group = group->previous)
2481 group->strbeg = marks[group->level] - to_start;
2486 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2487 from_start -= SvIVX(sv);
2490 SvFLAGS(sv) &= ~SVf_OOK;
2493 Safefree(from_start);
2494 SvPV_set(sv, to_start);
2495 SvCUR_set(sv, to_ptr - to_start);
2500 /* Exponential string grower. Makes string extension effectively O(n)
2501 needed says how many extra bytes we need (not counting the final '\0')
2502 Only grows the string if there is an actual lack of space
2505 sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2506 const STRLEN cur = SvCUR(sv);
2507 const STRLEN len = SvLEN(sv);
2509 if (len - cur > needed) return SvPVX(sv);
2510 extend = needed > len ? needed : len;
2511 return SvGROW(sv, len+extend+1);
2516 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2518 tempsym_t lookahead;
2519 I32 items = endlist - beglist;
2520 bool found = next_symbol(symptr);
2521 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2522 bool warn_utf8 = ckWARN(WARN_UTF8);
2524 if (symptr->level == 0 && found && symptr->code == 'U') {
2525 marked_upgrade(aTHX_ cat, symptr);
2526 symptr->flags |= FLAG_DO_UTF8;
2529 symptr->strbeg = SvCUR(cat);
2535 SV *lengthcode = Nullsv;
2536 I32 datumtype = symptr->code;
2537 howlen_t howlen = symptr->howlen;
2538 char *start = SvPVX(cat);
2539 char *cur = start + SvCUR(cat);
2541 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2545 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2549 /* e_no_len and e_number */
2550 len = symptr->length;
2555 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2557 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2558 /* We can process this letter. */
2559 STRLEN size = props & PACK_SIZE_MASK;
2560 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2564 /* Look ahead for next symbol. Do we have code/code? */
2565 lookahead = *symptr;
2566 found = next_symbol(&lookahead);
2567 if (symptr->flags & FLAG_SLASH) {
2569 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2570 if (strchr("aAZ", lookahead.code)) {
2571 if (lookahead.howlen == e_number) count = lookahead.length;
2574 count = DO_UTF8(*beglist) ?
2575 sv_len_utf8(*beglist) : sv_len(*beglist);
2577 if (lookahead.code == 'Z') count++;
2580 if (lookahead.howlen == e_number && lookahead.length < items)
2581 count = lookahead.length;
2584 lookahead.howlen = e_number;
2585 lookahead.length = count;
2586 lengthcode = sv_2mortal(newSViv(count));
2589 /* Code inside the switch must take care to properly update
2590 cat (CUR length and '\0' termination) if it updated *cur and
2591 doesn't simply leave using break */
2592 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2594 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2595 (int) TYPE_NO_MODIFIERS(datumtype));
2597 Perl_croak(aTHX_ "'%%' may not be used in pack");
2600 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2601 case '.' | TYPE_IS_SHRIEKING:
2604 if (howlen == e_star) from = start;
2605 else if (len == 0) from = cur;
2607 tempsym_t *group = symptr;
2609 while (--len && group) group = group->previous;
2610 from = group ? start + group->strbeg : start;
2613 len = SvIV(fromstr);
2615 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2616 case '@' | TYPE_IS_SHRIEKING:
2619 from = start + symptr->strbeg;
2621 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2622 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2623 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2627 while (len && from < cur) {
2628 from += UTF8SKIP(from);
2632 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2634 /* Here we know from == cur */
2636 GROWING(0, cat, start, cur, len);
2637 Zero(cur, len, char);
2639 } else if (from < cur) {
2642 } else goto no_change;
2650 if (len > 0) goto grow;
2651 if (len == 0) goto no_change;
2658 tempsym_t savsym = *symptr;
2659 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2660 symptr->flags |= group_modifiers;
2661 symptr->patend = savsym.grpend;
2663 symptr->previous = &lookahead;
2666 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2667 else symptr->flags &= ~FLAG_PARSE_UTF8;
2668 was_utf8 = SvUTF8(cat);
2669 symptr->patptr = savsym.grpbeg;
2670 beglist = pack_rec(cat, symptr, beglist, endlist);
2671 if (SvUTF8(cat) != was_utf8)
2672 /* This had better be an upgrade while in utf8==0 mode */
2675 if (savsym.howlen == e_star && beglist == endlist)
2676 break; /* No way to continue */
2678 lookahead.flags = symptr->flags & ~group_modifiers;
2681 case 'X' | TYPE_IS_SHRIEKING:
2682 if (!len) /* Avoid division by 0 */
2689 hop += UTF8SKIP(hop);
2696 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2700 len = (cur-start) % len;
2704 if (len < 1) goto no_change;
2708 Perl_croak(aTHX_ "'%c' outside of string in pack",
2709 (int) TYPE_NO_MODIFIERS(datumtype));
2710 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2712 Perl_croak(aTHX_ "'%c' outside of string in pack",
2713 (int) TYPE_NO_MODIFIERS(datumtype));
2719 if (cur - start < len)
2720 Perl_croak(aTHX_ "'%c' outside of string in pack",
2721 (int) TYPE_NO_MODIFIERS(datumtype));
2724 if (cur < start+symptr->strbeg) {
2725 /* Make sure group starts don't point into the void */
2727 const STRLEN length = cur-start;
2728 for (group = symptr;
2729 group && length < group->strbeg;
2730 group = group->previous) group->strbeg = length;
2731 lookahead.strbeg = length;
2734 case 'x' | TYPE_IS_SHRIEKING: {
2736 if (!len) /* Avoid division by 0 */
2738 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2739 else ai32 = (cur - start) % len;
2740 if (ai32 == 0) goto no_change;
2752 aptr = SvPV_const(fromstr, fromlen);
2753 if (DO_UTF8(fromstr)) {
2754 const char *end, *s;
2756 if (!utf8 && !SvUTF8(cat)) {
2757 marked_upgrade(aTHX_ cat, symptr);
2758 lookahead.flags |= FLAG_DO_UTF8;
2759 lookahead.strbeg = symptr->strbeg;
2762 cur = start + SvCUR(cat);
2764 if (howlen == e_star) {
2765 if (utf8) goto string_copy;
2769 end = aptr + fromlen;
2770 fromlen = datumtype == 'Z' ? len-1 : len;
2771 while ((I32) fromlen > 0 && s < end) {
2776 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2779 if (datumtype == 'Z') len++;
2785 fromlen = len - fromlen;
2786 if (datumtype == 'Z') fromlen--;
2787 if (howlen == e_star) {
2789 if (datumtype == 'Z') len++;
2791 GROWING(0, cat, start, cur, len);
2792 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2793 datumtype | TYPE_IS_PACK))
2794 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2798 if (howlen == e_star) {
2800 if (datumtype == 'Z') len++;
2802 if (len <= (I32) fromlen) {
2804 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2806 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2808 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2809 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2811 while (fromlen > 0) {
2812 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2818 if (howlen == e_star) {
2820 if (datumtype == 'Z') len++;
2822 if (len <= (I32) fromlen) {
2824 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2826 GROWING(0, cat, start, cur, len);
2827 Copy(aptr, cur, fromlen, char);
2831 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2837 const char *str, *end;
2844 str = SvPV_const(fromstr, fromlen);
2845 end = str + fromlen;
2846 if (DO_UTF8(fromstr)) {
2848 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2850 utf8_source = FALSE;
2851 utf8_flags = 0; /* Unused, but keep compilers happy */
2853 if (howlen == e_star) len = fromlen;
2854 field_len = (len+7)/8;
2855 GROWING(utf8, cat, start, cur, field_len);
2856 if (len > (I32)fromlen) len = fromlen;
2859 if (datumtype == 'B')
2863 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2865 } else bits |= *str++ & 1;
2866 if (l & 7) bits <<= 1;
2868 PUSH_BYTE(utf8, cur, bits);
2873 /* datumtype == 'b' */
2877 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2878 if (val & 1) bits |= 0x80;
2879 } else if (*str++ & 1)
2881 if (l & 7) bits >>= 1;
2883 PUSH_BYTE(utf8, cur, bits);
2889 if (datumtype == 'B')
2890 bits <<= 7 - (l & 7);
2892 bits >>= 7 - (l & 7);
2893 PUSH_BYTE(utf8, cur, bits);
2896 /* Determine how many chars are left in the requested field */
2898 if (howlen == e_star) field_len = 0;
2899 else field_len -= l;
2900 Zero(cur, field_len, char);
2906 const char *str, *end;
2913 str = SvPV_const(fromstr, fromlen);
2914 end = str + fromlen;
2915 if (DO_UTF8(fromstr)) {
2917 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2919 utf8_source = FALSE;
2920 utf8_flags = 0; /* Unused, but keep compilers happy */
2922 if (howlen == e_star) len = fromlen;
2923 field_len = (len+1)/2;
2924 GROWING(utf8, cat, start, cur, field_len);
2925 if (!utf8 && len > (I32)fromlen) len = fromlen;
2928 if (datumtype == 'H')
2932 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2933 if (val < 256 && isALPHA(val))
2934 bits |= (val + 9) & 0xf;
2937 } else if (isALPHA(*str))
2938 bits |= (*str++ + 9) & 0xf;
2940 bits |= *str++ & 0xf;
2941 if (l & 1) bits <<= 4;
2943 PUSH_BYTE(utf8, cur, bits);
2951 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2952 if (val < 256 && isALPHA(val))
2953 bits |= ((val + 9) & 0xf) << 4;
2955 bits |= (val & 0xf) << 4;
2956 } else if (isALPHA(*str))
2957 bits |= ((*str++ + 9) & 0xf) << 4;
2959 bits |= (*str++ & 0xf) << 4;
2960 if (l & 1) bits >>= 4;
2962 PUSH_BYTE(utf8, cur, bits);
2968 PUSH_BYTE(utf8, cur, bits);
2971 /* Determine how many chars are left in the requested field */
2973 if (howlen == e_star) field_len = 0;
2974 else field_len -= l;
2975 Zero(cur, field_len, char);
2983 aiv = SvIV(fromstr);
2984 if ((-128 > aiv || aiv > 127) &&
2986 Perl_warner(aTHX_ packWARN(WARN_PACK),
2987 "Character in 'c' format wrapped in pack");
2988 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2993 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2996 GROWING(0, cat, start, cur, len);
3000 aiv = SvIV(fromstr);
3001 if ((0 > aiv || aiv > 0xff) &&
3003 Perl_warner(aTHX_ packWARN(WARN_PACK),
3004 "Character in 'C' format wrapped in pack");
3005 *cur++ = (char)(aiv & 0xff);
3010 U8 in_bytes = IN_BYTES;
3012 end = start+SvLEN(cat)-1;
3013 if (utf8) end -= UTF8_MAXLEN-1;
3017 auv = SvUV(fromstr);
3018 if (in_bytes) auv = auv % 0x100;
3023 SvCUR_set(cat, cur - start);
3025 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3026 end = start+SvLEN(cat)-UTF8_MAXLEN;
3028 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3031 0 : UNICODE_ALLOW_ANY);
3036 SvCUR_set(cat, cur - start);
3037 marked_upgrade(aTHX_ cat, symptr);
3038 lookahead.flags |= FLAG_DO_UTF8;
3039 lookahead.strbeg = symptr->strbeg;
3042 cur = start + SvCUR(cat);
3043 end = start+SvLEN(cat)-UTF8_MAXLEN;
3046 if (ckWARN(WARN_PACK))
3047 Perl_warner(aTHX_ packWARN(WARN_PACK),
3048 "Character in 'W' format wrapped in pack");
3053 SvCUR_set(cat, cur - start);
3054 GROWING(0, cat, start, cur, len+1);
3055 end = start+SvLEN(cat)-1;
3057 *(U8 *) cur++ = (U8)auv;
3066 if (!(symptr->flags & FLAG_DO_UTF8)) {
3067 marked_upgrade(aTHX_ cat, symptr);
3068 lookahead.flags |= FLAG_DO_UTF8;
3069 lookahead.strbeg = symptr->strbeg;
3075 end = start+SvLEN(cat);
3076 if (!utf8) end -= UTF8_MAXLEN;
3080 auv = SvUV(fromstr);
3082 U8 buffer[UTF8_MAXLEN], *endb;
3083 endb = uvuni_to_utf8_flags(buffer, auv,
3085 0 : UNICODE_ALLOW_ANY);
3086 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3088 SvCUR_set(cat, cur - start);
3089 GROWING(0, cat, start, cur,
3090 len+(endb-buffer)*UTF8_EXPAND);
3091 end = start+SvLEN(cat);
3093 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3097 SvCUR_set(cat, cur - start);
3098 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3099 end = start+SvLEN(cat)-UTF8_MAXLEN;
3101 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3103 0 : UNICODE_ALLOW_ANY);
3108 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3114 anv = SvNV(fromstr);
3116 /* VOS does not automatically map a floating-point overflow
3117 during conversion from double to float into infinity, so we
3118 do it by hand. This code should either be generalized for
3119 any OS that needs it, or removed if and when VOS implements
3120 posix-976 (suggestion to support mapping to infinity).
3121 Paul.Green@stratus.com 02-04-02. */
3123 afloat = _float_constants[0]; /* single prec. inf. */
3124 else if (anv < -FLT_MAX)
3125 afloat = _float_constants[0]; /* single prec. inf. */
3126 else afloat = (float) anv;
3128 # if defined(VMS) && !defined(__IEEE_FP)
3129 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3130 * on Alpha; fake it if we don't have them.
3134 else if (anv < -FLT_MAX)
3136 else afloat = (float)anv;
3138 afloat = (float)anv;
3140 #endif /* __VOS__ */
3141 DO_BO_PACK_N(afloat, float);
3142 PUSH_VAR(utf8, cur, afloat);
3150 anv = SvNV(fromstr);
3152 /* VOS does not automatically map a floating-point overflow
3153 during conversion from long double to double into infinity,
3154 so we do it by hand. This code should either be generalized
3155 for any OS that needs it, or removed if and when VOS
3156 implements posix-976 (suggestion to support mapping to
3157 infinity). Paul.Green@stratus.com 02-04-02. */
3159 adouble = _double_constants[0]; /* double prec. inf. */
3160 else if (anv < -DBL_MAX)
3161 adouble = _double_constants[0]; /* double prec. inf. */
3162 else adouble = (double) anv;
3164 # if defined(VMS) && !defined(__IEEE_FP)
3165 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3166 * on Alpha; fake it if we don't have them.
3170 else if (anv < -DBL_MAX)
3172 else adouble = (double)anv;
3174 adouble = (double)anv;
3176 #endif /* __VOS__ */
3177 DO_BO_PACK_N(adouble, double);
3178 PUSH_VAR(utf8, cur, adouble);
3183 Zero(&anv, 1, NV); /* can be long double with unused bits */
3186 anv = SvNV(fromstr);
3187 DO_BO_PACK_N(anv, NV);
3188 PUSH_VAR(utf8, cur, anv);
3192 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3194 long double aldouble;
3195 /* long doubles can have unused bits, which may be nonzero */
3196 Zero(&aldouble, 1, long double);
3199 aldouble = (long double)SvNV(fromstr);
3200 DO_BO_PACK_N(aldouble, long double);
3201 PUSH_VAR(utf8, cur, aldouble);
3206 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3207 case 'n' | TYPE_IS_SHRIEKING:
3213 ai16 = (I16)SvIV(fromstr);
3215 ai16 = PerlSock_htons(ai16);
3217 PUSH16(utf8, cur, &ai16);
3220 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3221 case 'v' | TYPE_IS_SHRIEKING:
3227 ai16 = (I16)SvIV(fromstr);
3231 PUSH16(utf8, cur, &ai16);
3234 case 'S' | TYPE_IS_SHRIEKING:
3235 #if SHORTSIZE != SIZE16
3237 unsigned short aushort;
3239 aushort = SvUV(fromstr);
3240 DO_BO_PACK(aushort, s);
3241 PUSH_VAR(utf8, cur, aushort);
3251 au16 = (U16)SvUV(fromstr);
3252 DO_BO_PACK(au16, 16);
3253 PUSH16(utf8, cur, &au16);
3256 case 's' | TYPE_IS_SHRIEKING:
3257 #if SHORTSIZE != SIZE16
3261 ashort = SvIV(fromstr);
3262 DO_BO_PACK(ashort, s);
3263 PUSH_VAR(utf8, cur, ashort);
3273 ai16 = (I16)SvIV(fromstr);
3274 DO_BO_PACK(ai16, 16);
3275 PUSH16(utf8, cur, &ai16);
3279 case 'I' | TYPE_IS_SHRIEKING:
3283 auint = SvUV(fromstr);
3284 DO_BO_PACK(auint, i);
3285 PUSH_VAR(utf8, cur, auint);
3292 aiv = SvIV(fromstr);
3293 #if IVSIZE == INTSIZE
3295 #elif IVSIZE == LONGSIZE
3297 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3298 DO_BO_PACK(aiv, 64);
3300 Perl_croak(aTHX_ "'j' not supported on this platform");
3302 PUSH_VAR(utf8, cur, aiv);
3309 auv = SvUV(fromstr);
3310 #if UVSIZE == INTSIZE
3312 #elif UVSIZE == LONGSIZE
3314 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3315 DO_BO_PACK(auv, 64);
3317 Perl_croak(aTHX_ "'J' not supported on this platform");
3319 PUSH_VAR(utf8, cur, auv);
3326 anv = SvNV(fromstr);
3330 SvCUR_set(cat, cur - start);
3331 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3334 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3335 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3336 any negative IVs will have already been got by the croak()
3337 above. IOK is untrue for fractions, so we test them
3338 against UV_MAX_P1. */
3339 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3340 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3341 char *in = buf + sizeof(buf);
3342 UV auv = SvUV(fromstr);
3345 *--in = (char)((auv & 0x7f) | 0x80);
3348 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3349 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3350 in, (buf + sizeof(buf)) - in);
3351 } else if (SvPOKp(fromstr))
3353 else if (SvNOKp(fromstr)) {
3354 /* 10**NV_MAX_10_EXP is the largest power of 10
3355 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3356 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3357 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3358 And with that many bytes only Inf can overflow.
3359 Some C compilers are strict about integral constant
3360 expressions so we conservatively divide by a slightly
3361 smaller integer instead of multiplying by the exact
3362 floating-point value.
3364 #ifdef NV_MAX_10_EXP
3365 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3366 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3368 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3369 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3371 char *in = buf + sizeof(buf);
3373 anv = Perl_floor(anv);
3375 const NV next = Perl_floor(anv / 128);
3376 if (in <= buf) /* this cannot happen ;-) */
3377 Perl_croak(aTHX_ "Cannot compress integer in pack");
3378 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3381 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3382 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3383 in, (buf + sizeof(buf)) - in);
3392 /* Copy string and check for compliance */
3393 from = SvPV_const(fromstr, len);
3394 if ((norm = is_an_int(from, len)) == NULL)
3395 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3397 Newx(result, len, char);
3400 while (!done) *--in = div128(norm, &done) | 0x80;
3401 result[len - 1] &= 0x7F; /* clear continue bit */
3402 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3403 in, (result + len) - in);
3405 SvREFCNT_dec(norm); /* free norm */
3410 case 'i' | TYPE_IS_SHRIEKING:
3414 aint = SvIV(fromstr);
3415 DO_BO_PACK(aint, i);
3416 PUSH_VAR(utf8, cur, aint);
3419 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3420 case 'N' | TYPE_IS_SHRIEKING:
3426 au32 = SvUV(fromstr);
3428 au32 = PerlSock_htonl(au32);
3430 PUSH32(utf8, cur, &au32);
3433 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3434 case 'V' | TYPE_IS_SHRIEKING:
3440 au32 = SvUV(fromstr);
3444 PUSH32(utf8, cur, &au32);
3447 case 'L' | TYPE_IS_SHRIEKING:
3448 #if LONGSIZE != SIZE32
3450 unsigned long aulong;
3452 aulong = SvUV(fromstr);
3453 DO_BO_PACK(aulong, l);
3454 PUSH_VAR(utf8, cur, aulong);
3464 au32 = SvUV(fromstr);
3465 DO_BO_PACK(au32, 32);
3466 PUSH32(utf8, cur, &au32);
3469 case 'l' | TYPE_IS_SHRIEKING:
3470 #if LONGSIZE != SIZE32
3474 along = SvIV(fromstr);
3475 DO_BO_PACK(along, l);
3476 PUSH_VAR(utf8, cur, along);
3486 ai32 = SvIV(fromstr);
3487 DO_BO_PACK(ai32, 32);
3488 PUSH32(utf8, cur, &ai32);
3496 auquad = (Uquad_t) SvUV(fromstr);
3497 DO_BO_PACK(auquad, 64);
3498 PUSH_VAR(utf8, cur, auquad);
3505 aquad = (Quad_t)SvIV(fromstr);
3506 DO_BO_PACK(aquad, 64);
3507 PUSH_VAR(utf8, cur, aquad);
3510 #endif /* HAS_QUAD */
3512 len = 1; /* assume SV is correct length */
3513 GROWING(utf8, cat, start, cur, sizeof(char *));
3520 SvGETMAGIC(fromstr);
3521 if (!SvOK(fromstr)) aptr = NULL;
3523 /* XXX better yet, could spirit away the string to
3524 * a safe spot and hang on to it until the result
3525 * of pack() (and all copies of the result) are
3528 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3529 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3530 Perl_warner(aTHX_ packWARN(WARN_PACK),
3531 "Attempt to pack pointer to temporary value");
3533 if (SvPOK(fromstr) || SvNIOK(fromstr))
3534 aptr = SvPV_nomg_const_nolen(fromstr);
3536 aptr = SvPV_force_flags_nolen(fromstr, 0);
3538 DO_BO_PACK_PC(aptr);
3539 PUSH_VAR(utf8, cur, aptr);
3543 const char *aptr, *aend;
3547 if (len <= 2) len = 45;
3548 else len = len / 3 * 3;
3550 if (ckWARN(WARN_PACK))
3551 Perl_warner(aTHX_ packWARN(WARN_PACK),
3552 "Field too wide in 'u' format in pack");
3555 aptr = SvPV_const(fromstr, fromlen);
3556 from_utf8 = DO_UTF8(fromstr);
3558 aend = aptr + fromlen;
3559 fromlen = sv_len_utf8(fromstr);
3560 } else aend = NULL; /* Unused, but keep compilers happy */
3561 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3562 while (fromlen > 0) {
3565 U8 hunk[1+63/3*4+1];
3567 if ((I32)fromlen > len)
3573 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3574 'u' | TYPE_IS_PACK)) {
3576 SvCUR_set(cat, cur - start);
3577 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3579 end = doencodes(hunk, buffer, todo);
3581 end = doencodes(hunk, aptr, todo);
3584 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3591 SvCUR_set(cat, cur - start);
3593 *symptr = lookahead;
3602 dSP; dMARK; dORIGMARK; dTARGET;
3603 register SV *cat = TARG;
3605 SV *pat_sv = *++MARK;
3606 register const char *pat = SvPV_const(pat_sv, fromlen);
3607 register const char *patend = pat + fromlen;
3610 sv_setpvn(cat, "", 0);
3613 packlist(cat, pat, patend, MARK, SP + 1);
3623 * c-indentation-style: bsd
3625 * indent-tabs-mode: t
3628 * ex: set ts=8 sts=4 sw=4 noet: