3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV *tmpNew = newSVpvn("0000000000", 10);
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
243 # define DO_BO_UNPACK_PC(var)
244 # define DO_BO_PACK_PC(var)
246 #else /* PERL_PACK_CAN_BYTEORDER */
248 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
251 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
253 # define DO_BO_UNPACK(var, type) \
255 switch (TYPE_ENDIANNESS(datumtype)) { \
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
262 # define DO_BO_PACK(var, type) \
264 switch (TYPE_ENDIANNESS(datumtype)) { \
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
271 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: \
275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
277 case TYPE_IS_LITTLE_ENDIAN: \
278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
285 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
287 switch (TYPE_ENDIANNESS(datumtype)) { \
288 case TYPE_IS_BIG_ENDIAN: \
289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
291 case TYPE_IS_LITTLE_ENDIAN: \
292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
299 # define BO_CANT_DOIT(action, type) \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
315 # if PTRSIZE == INTSIZE
316 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
320 # elif PTRSIZE == LONGSIZE
321 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
326 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
327 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
328 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
329 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
332 # if defined(my_htolen) && defined(my_letohn) && \
333 defined(my_htoben) && defined(my_betohn)
334 # define DO_BO_UNPACK_N(var, type) \
336 switch (TYPE_ENDIANNESS(datumtype)) { \
337 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
338 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
343 # define DO_BO_PACK_N(var, type) \
345 switch (TYPE_ENDIANNESS(datumtype)) { \
346 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
347 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
352 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
353 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
356 #endif /* PERL_PACK_CAN_BYTEORDER */
358 #define PACK_SIZE_CANNOT_CSUM 0x80
359 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
360 #define PACK_SIZE_MASK 0x3F
362 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
363 in). You're unlikely ever to need to regenerate them. */
365 #if TYPE_IS_SHRIEKING != 0x100
366 ++++shriek offset should be 256
369 typedef U8 packprops_t;
372 const packprops_t packprops[512] = {
374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
375 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
376 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
380 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
381 /* D */ LONG_DOUBLESIZE,
388 /* I */ sizeof(unsigned int),
395 #if defined(HAS_QUAD)
396 /* Q */ sizeof(Uquad_t),
403 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
405 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 /* c */ sizeof(char),
408 /* d */ sizeof(double),
410 /* f */ sizeof(float),
419 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
420 #if defined(HAS_QUAD)
421 /* q */ sizeof(Quad_t),
429 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 /* I */ sizeof(unsigned int),
447 /* L */ sizeof(unsigned long),
449 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
455 /* S */ sizeof(unsigned short),
457 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 /* l */ sizeof(long),
468 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
474 /* s */ sizeof(short),
476 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0
492 /* EBCDIC (or bust) */
493 const packprops_t packprops[512] = {
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 /* c */ sizeof(char),
505 /* d */ sizeof(double),
507 /* f */ sizeof(float),
517 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
518 #if defined(HAS_QUAD)
519 /* q */ sizeof(Quad_t),
523 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
531 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
532 /* D */ LONG_DOUBLESIZE,
539 /* I */ sizeof(unsigned int),
547 #if defined(HAS_QUAD)
548 /* Q */ sizeof(Uquad_t),
552 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
557 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 /* l */ sizeof(long),
574 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* s */ sizeof(short),
582 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* I */ sizeof(unsigned int),
591 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 /* L */ sizeof(unsigned long),
594 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* S */ sizeof(unsigned short),
602 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
613 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
617 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
618 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
619 /* We try to process malformed UTF-8 as much as possible (preferrably with
620 warnings), but these two mean we make no progress in the string and
621 might enter an infinite loop */
622 if (retlen == (STRLEN) -1 || retlen == 0)
623 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
624 (int) TYPE_NO_MODIFIERS(datumtype));
626 if (ckWARN(WARN_UNPACK))
627 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
628 "Character in '%c' format wrapped in unpack",
629 (int) TYPE_NO_MODIFIERS(datumtype));
636 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
637 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
641 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
645 const char *from = *s;
647 const U32 flags = ckWARN(WARN_UTF8) ?
648 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
649 for (;buf_len > 0; buf_len--) {
650 if (from >= end) return FALSE;
651 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
652 if (retlen == (STRLEN) -1 || retlen == 0) {
653 from += UTF8SKIP(from);
655 } else from += retlen;
660 *(U8 *)buf++ = (U8)val;
662 /* We have enough characters for the buffer. Did we have problems ? */
665 /* Rewalk the string fragment while warning */
667 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
668 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
669 if (ptr >= end) break;
670 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
672 if (from > end) from = end;
674 if ((bad & 2) && ckWARN(WARN_UNPACK))
675 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
676 WARN_PACK : WARN_UNPACK),
677 "Character(s) in '%c' format wrapped in %s",
678 (int) TYPE_NO_MODIFIERS(datumtype),
679 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
686 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
690 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
691 if (val >= 0x100 || !ISUUCHAR(val) ||
692 retlen == (STRLEN) -1 || retlen == 0) {
696 *out = PL_uudmap[val] & 077;
702 bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
703 U8 buffer[UTF8_MAXLEN];
704 const U8 *end = start + len;
706 while (start < end) {
708 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
718 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
726 #define PUSH_BYTES(utf8, cur, buf, len) \
728 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
730 Copy(buf, cur, len, char); \
735 #define GROWING(utf8, cat, start, cur, in_len) \
737 STRLEN glen = (in_len); \
738 if (utf8) glen *= UTF8_EXPAND; \
739 if ((cur) + glen >= (start) + SvLEN(cat)) { \
740 (start) = sv_exp_grow(aTHX_ cat, glen); \
741 (cur) = (start) + SvCUR(cat); \
745 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
747 const STRLEN glen = (in_len); \
749 if (utf8) gl *= UTF8_EXPAND; \
750 if ((cur) + gl >= (start) + SvLEN(cat)) { \
752 SvCUR_set((cat), (cur) - (start)); \
753 (start) = sv_exp_grow(aTHX_ cat, gl); \
754 (cur) = (start) + SvCUR(cat); \
756 PUSH_BYTES(utf8, cur, buf, glen); \
759 #define PUSH_BYTE(utf8, s, byte) \
762 const U8 au8 = (byte); \
763 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
764 } else *(U8 *)(s)++ = (byte); \
767 /* Only to be used inside a loop (see the break) */
768 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
771 if (str >= end) break; \
772 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
773 if (retlen == (STRLEN) -1 || retlen == 0) { \
775 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
780 static const char *_action( const tempsym_t* symptr )
782 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
785 /* Returns the sizeof() struct described by pat */
787 S_measure_struct(pTHX_ tempsym_t* symptr)
791 while (next_symbol(symptr)) {
795 switch (symptr->howlen) {
797 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
801 /* e_no_len and e_number */
802 len = symptr->length;
806 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
809 /* endianness doesn't influence the size of a type */
810 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
812 Perl_croak(aTHX_ "Invalid type '%c' in %s",
813 (int)TYPE_NO_MODIFIERS(symptr->code),
815 #ifdef PERL_PACK_CAN_SHRIEKSIGN
816 case '.' | TYPE_IS_SHRIEKING:
817 case '@' | TYPE_IS_SHRIEKING:
822 case 'U': /* XXXX Is it correct? */
825 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
826 (int) TYPE_NO_MODIFIERS(symptr->code),
833 tempsym_t savsym = *symptr;
834 symptr->patptr = savsym.grpbeg;
835 symptr->patend = savsym.grpend;
836 /* XXXX Theoretically, we need to measure many times at
837 different positions, since the subexpression may contain
838 alignment commands, but be not of aligned length.
839 Need to detect this and croak(). */
840 size = measure_struct(symptr);
844 case 'X' | TYPE_IS_SHRIEKING:
845 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
847 if (!len) /* Avoid division by 0 */
849 len = total % len; /* Assumed: the start is aligned. */
854 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
856 case 'x' | TYPE_IS_SHRIEKING:
857 if (!len) /* Avoid division by 0 */
859 star = total % len; /* Assumed: the start is aligned. */
860 if (star) /* Other portable ways? */
884 size = sizeof(char*);
894 /* locate matching closing parenthesis or bracket
895 * returns char pointer to char after match, or NULL
898 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
900 while (patptr < patend) {
901 const char c = *patptr++;
908 while (patptr < patend && *patptr != '\n')
912 patptr = group_end(patptr, patend, ')') + 1;
914 patptr = group_end(patptr, patend, ']') + 1;
916 Perl_croak(aTHX_ "No group ending character '%c' found in template",
922 /* Convert unsigned decimal number to binary.
923 * Expects a pointer to the first digit and address of length variable
924 * Advances char pointer to 1st non-digit char and returns number
927 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
929 I32 len = *patptr++ - '0';
930 while (isDIGIT(*patptr)) {
931 if (len >= 0x7FFFFFFF/10)
932 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
933 len = (len * 10) + (*patptr++ - '0');
939 /* The marvellous template parsing routine: Using state stored in *symptr,
940 * locates next template code and count
943 S_next_symbol(pTHX_ tempsym_t* symptr )
945 const char* patptr = symptr->patptr;
946 const char* patend = symptr->patend;
948 symptr->flags &= ~FLAG_SLASH;
950 while (patptr < patend) {
951 if (isSPACE(*patptr))
953 else if (*patptr == '#') {
955 while (patptr < patend && *patptr != '\n')
960 /* We should have found a template code */
961 I32 code = *patptr++ & 0xFF;
962 U32 inherited_modifiers = 0;
964 if (code == ','){ /* grandfather in commas but with a warning */
965 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
966 symptr->flags |= FLAG_COMMA;
967 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
968 "Invalid type ',' in %s", _action( symptr ) );
973 /* for '(', skip to ')' */
975 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
976 Perl_croak(aTHX_ "()-group starts with a count in %s",
978 symptr->grpbeg = patptr;
979 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
980 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
981 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
985 /* look for group modifiers to inherit */
986 if (TYPE_ENDIANNESS(symptr->flags)) {
987 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
988 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
991 /* look for modifiers */
992 while (patptr < patend) {
997 modifier = TYPE_IS_SHRIEKING;
998 allowed = SHRIEKING_ALLOWED_TYPES;
1000 #ifdef PERL_PACK_CAN_BYTEORDER
1002 modifier = TYPE_IS_BIG_ENDIAN;
1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1006 modifier = TYPE_IS_LITTLE_ENDIAN;
1007 allowed = ENDIANNESS_ALLOWED_TYPES;
1009 #endif /* PERL_PACK_CAN_BYTEORDER */
1019 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1020 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1021 allowed, _action( symptr ) );
1023 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1025 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1026 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1027 TYPE_ENDIANNESS_MASK)
1028 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1029 *patptr, _action( symptr ) );
1031 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1032 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1033 "Duplicate modifier '%c' after '%c' in %s",
1034 *patptr, (int) TYPE_NO_MODIFIERS(code),
1035 _action( symptr ) );
1042 /* inherit modifiers */
1043 code |= inherited_modifiers;
1045 /* look for count and/or / */
1046 if (patptr < patend) {
1047 if (isDIGIT(*patptr)) {
1048 patptr = get_num( patptr, &symptr->length );
1049 symptr->howlen = e_number;
1051 } else if (*patptr == '*') {
1053 symptr->howlen = e_star;
1055 } else if (*patptr == '[') {
1056 const char* lenptr = ++patptr;
1057 symptr->howlen = e_number;
1058 patptr = group_end( patptr, patend, ']' ) + 1;
1059 /* what kind of [] is it? */
1060 if (isDIGIT(*lenptr)) {
1061 lenptr = get_num( lenptr, &symptr->length );
1062 if( *lenptr != ']' )
1063 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1064 _action( symptr ) );
1066 tempsym_t savsym = *symptr;
1067 symptr->patend = patptr-1;
1068 symptr->patptr = lenptr;
1069 savsym.length = measure_struct(symptr);
1073 symptr->howlen = e_no_len;
1078 while (patptr < patend) {
1079 if (isSPACE(*patptr))
1081 else if (*patptr == '#') {
1083 while (patptr < patend && *patptr != '\n')
1085 if (patptr < patend)
1088 if (*patptr == '/') {
1089 symptr->flags |= FLAG_SLASH;
1091 if (patptr < patend &&
1092 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1093 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1094 _action( symptr ) );
1100 /* at end - no count, no / */
1101 symptr->howlen = e_no_len;
1105 symptr->code = code;
1106 symptr->patptr = patptr;
1110 symptr->patptr = patptr;
1115 There is no way to cleanly handle the case where we should process the
1116 string per byte in its upgraded form while it's really in downgraded form
1117 (e.g. estimates like strend-s as an upper bound for the number of
1118 characters left wouldn't work). So if we foresee the need of this
1119 (pattern starts with U or contains U0), we want to work on the encoded
1120 version of the string. Users are advised to upgrade their pack string
1121 themselves if they need to do a lot of unpacks like this on it
1124 need_utf8(const char *pat, const char *patend)
1127 while (pat < patend) {
1128 if (pat[0] == '#') {
1130 pat = (const char *) memchr(pat, '\n', patend-pat);
1131 if (!pat) return FALSE;
1132 } else if (pat[0] == 'U') {
1133 if (first || pat[1] == '0') return TRUE;
1134 } else first = FALSE;
1141 first_symbol(const char *pat, const char *patend) {
1142 while (pat < patend) {
1143 if (pat[0] != '#') return pat[0];
1145 pat = (const char *) memchr(pat, '\n', patend-pat);
1153 =for apidoc unpack_str
1155 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1156 and ocnt are not used. This call should not be used, use unpackstring instead.
1161 Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
1164 PERL_UNUSED_ARG(strbeg);
1165 PERL_UNUSED_ARG(new_s);
1166 PERL_UNUSED_ARG(ocnt);
1168 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1169 else if (need_utf8(pat, patend)) {
1170 /* We probably should try to avoid this in case a scalar context call
1171 wouldn't get to the "U0" */
1172 STRLEN len = strend - s;
1173 s = (char *) bytes_to_utf8((U8 *) s, &len);
1176 flags |= FLAG_DO_UTF8;
1179 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1180 flags |= FLAG_PARSE_UTF8;
1182 TEMPSYM_INIT(&sym, pat, patend, flags);
1184 return unpack_rec(&sym, s, s, strend, NULL );
1188 =for apidoc unpackstring
1190 The engine implementing unpack() Perl function. C<unpackstring> puts the
1191 extracted list items on the stack and returns the number of elements.
1192 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1197 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1201 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1202 else if (need_utf8(pat, patend)) {
1203 /* We probably should try to avoid this in case a scalar context call
1204 wouldn't get to the "U0" */
1205 STRLEN len = strend - s;
1206 s = (char *) bytes_to_utf8((U8 *) s, &len);
1209 flags |= FLAG_DO_UTF8;
1212 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1213 flags |= FLAG_PARSE_UTF8;
1215 TEMPSYM_INIT(&sym, pat, patend, flags);
1217 return unpack_rec(&sym, s, s, strend, NULL );
1222 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1226 const I32 start_sp_offset = SP - PL_stack_base;
1232 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1233 bool beyond = FALSE;
1234 bool explicit_length;
1235 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1236 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1237 symptr->strbeg = s - strbeg;
1239 while (next_symbol(symptr)) {
1242 I32 datumtype = symptr->code;
1243 /* do first one only unless in list context
1244 / is implemented by unpacking the count, then popping it from the
1245 stack, so must check that we're not in the middle of a / */
1246 if ( unpack_only_one
1247 && (SP - PL_stack_base == start_sp_offset + 1)
1248 && (datumtype != '/') ) /* XXX can this be omitted */
1251 switch (howlen = symptr->howlen) {
1253 len = strend - strbeg; /* long enough */
1256 /* e_no_len and e_number */
1257 len = symptr->length;
1261 explicit_length = TRUE;
1263 beyond = s >= strend;
1265 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1267 /* props nonzero means we can process this letter. */
1268 const long size = props & PACK_SIZE_MASK;
1269 const long howmany = (strend - s) / size;
1273 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1274 if (len && unpack_only_one) len = 1;
1280 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1282 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1285 if (howlen == e_no_len)
1286 len = 16; /* len is not specified */
1294 tempsym_t savsym = *symptr;
1295 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1296 symptr->flags |= group_modifiers;
1297 symptr->patend = savsym.grpend;
1298 symptr->previous = &savsym;
1302 symptr->patptr = savsym.grpbeg;
1303 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1304 else symptr->flags &= ~FLAG_PARSE_UTF8;
1305 unpack_rec(symptr, s, strbeg, strend, &s);
1306 if (s == strend && savsym.howlen == e_star)
1307 break; /* No way to continue */
1310 savsym.flags = symptr->flags & ~group_modifiers;
1314 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1315 case '.' | TYPE_IS_SHRIEKING:
1320 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1321 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1322 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1323 const bool u8 = utf8;
1325 if (howlen == e_star) from = strbeg;
1326 else if (len <= 0) from = s;
1328 tempsym_t *group = symptr;
1330 while (--len && group) group = group->previous;
1331 from = group ? strbeg + group->strbeg : strbeg;
1334 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1335 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1336 XPUSHs(sv_2mortal(sv));
1339 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1340 case '@' | TYPE_IS_SHRIEKING:
1343 s = strbeg + symptr->strbeg;
1344 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1345 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1346 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1352 Perl_croak(aTHX_ "'@' outside of string in unpack");
1357 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1360 Perl_croak(aTHX_ "'@' outside of string in unpack");
1364 case 'X' | TYPE_IS_SHRIEKING:
1365 if (!len) /* Avoid division by 0 */
1368 const char *hop, *last;
1370 hop = last = strbeg;
1372 hop += UTF8SKIP(hop);
1379 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1383 len = (s - strbeg) % len;
1389 Perl_croak(aTHX_ "'X' outside of string in unpack");
1390 while (--s, UTF8_IS_CONTINUATION(*s)) {
1392 Perl_croak(aTHX_ "'X' outside of string in unpack");
1397 if (len > s - strbeg)
1398 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1402 case 'x' | TYPE_IS_SHRIEKING: {
1404 if (!len) /* Avoid division by 0 */
1406 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1407 else ai32 = (s - strbeg) % len;
1408 if (ai32 == 0) break;
1416 Perl_croak(aTHX_ "'x' outside of string in unpack");
1421 if (len > strend - s)
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1427 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1433 /* Preliminary length estimate is assumed done in 'W' */
1434 if (len > strend - s) len = strend - s;
1440 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1441 if (hop >= strend) {
1443 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1448 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1450 } else if (len > strend - s)
1453 if (datumtype == 'Z') {
1454 /* 'Z' strips stuff after first null */
1455 const char *ptr, *end;
1457 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1458 sv = newSVpvn(s, ptr-s);
1459 if (howlen == e_star) /* exact for 'Z*' */
1460 len = ptr-s + (ptr != strend ? 1 : 0);
1461 } else if (datumtype == 'A') {
1462 /* 'A' strips both nulls and spaces */
1464 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1465 for (ptr = s+len-1; ptr >= s; ptr--)
1466 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1467 !is_utf8_space((U8 *) ptr)) break;
1468 if (ptr >= s) ptr += UTF8SKIP(ptr);
1471 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1473 for (ptr = s+len-1; ptr >= s; ptr--)
1474 if (*ptr != 0 && !isSPACE(*ptr)) break;
1477 sv = newSVpvn(s, ptr-s);
1478 } else sv = newSVpvn(s, len);
1482 /* Undo any upgrade done due to need_utf8() */
1483 if (!(symptr->flags & FLAG_WAS_UTF8))
1484 sv_utf8_downgrade(sv, 0);
1486 XPUSHs(sv_2mortal(sv));
1492 if (howlen == e_star || len > (strend - s) * 8)
1493 len = (strend - s) * 8;
1497 Newxz(PL_bitcount, 256, char);
1498 for (bits = 1; bits < 256; bits++) {
1499 if (bits & 1) PL_bitcount[bits]++;
1500 if (bits & 2) PL_bitcount[bits]++;
1501 if (bits & 4) PL_bitcount[bits]++;
1502 if (bits & 8) PL_bitcount[bits]++;
1503 if (bits & 16) PL_bitcount[bits]++;
1504 if (bits & 32) PL_bitcount[bits]++;
1505 if (bits & 64) PL_bitcount[bits]++;
1506 if (bits & 128) PL_bitcount[bits]++;
1510 while (len >= 8 && s < strend) {
1511 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1516 cuv += PL_bitcount[*(U8 *)s++];
1519 if (len && s < strend) {
1521 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1522 if (datumtype == 'b')
1524 if (bits & 1) cuv++;
1529 if (bits & 0x80) cuv++;
1536 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1539 if (datumtype == 'b') {
1541 const I32 ai32 = len;
1542 for (len = 0; len < ai32; len++) {
1543 if (len & 7) bits >>= 1;
1545 if (s >= strend) break;
1546 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1547 } else bits = *(U8 *) s++;
1548 *str++ = bits & 1 ? '1' : '0';
1552 const I32 ai32 = len;
1553 for (len = 0; len < ai32; len++) {
1554 if (len & 7) bits <<= 1;
1556 if (s >= strend) break;
1557 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1558 } else bits = *(U8 *) s++;
1559 *str++ = bits & 0x80 ? '1' : '0';
1563 SvCUR_set(sv, str - SvPVX_const(sv));
1570 /* Preliminary length estimate, acceptable for utf8 too */
1571 if (howlen == e_star || len > (strend - s) * 2)
1572 len = (strend - s) * 2;
1573 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1576 if (datumtype == 'h') {
1579 for (len = 0; len < ai32; len++) {
1580 if (len & 1) bits >>= 4;
1582 if (s >= strend) break;
1583 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1584 } else bits = * (U8 *) s++;
1585 *str++ = PL_hexdigit[bits & 15];
1589 const I32 ai32 = len;
1590 for (len = 0; len < ai32; len++) {
1591 if (len & 1) bits <<= 4;
1593 if (s >= strend) break;
1594 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1595 } else bits = *(U8 *) s++;
1596 *str++ = PL_hexdigit[(bits >> 4) & 15];
1600 SvCUR_set(sv, str - SvPVX_const(sv));
1606 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1607 if (aint >= 128) /* fake up signed chars */
1610 PUSHs(sv_2mortal(newSViv((IV)aint)));
1611 else if (checksum > bits_in_uv)
1612 cdouble += (NV)aint;
1621 if (explicit_length && datumtype == 'C')
1622 /* Switch to "character" mode */
1623 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1626 if (datumtype == 'C' ?
1627 (symptr->flags & FLAG_DO_UTF8) &&
1628 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1629 while (len-- > 0 && s < strend) {
1631 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1632 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1633 if (retlen == (STRLEN) -1 || retlen == 0)
1634 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1637 PUSHs(sv_2mortal(newSVuv((UV) val)));
1638 else if (checksum > bits_in_uv)
1639 cdouble += (NV) val;
1643 } else if (!checksum)
1645 const U8 ch = *(U8 *) s++;
1646 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1648 else if (checksum > bits_in_uv)
1649 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1651 while (len-- > 0) cuv += *(U8 *) s++;
1655 if (explicit_length) {
1656 /* Switch to "bytes in UTF-8" mode */
1657 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1659 /* Should be impossible due to the need_utf8() test */
1660 Perl_croak(aTHX_ "U0 mode on a byte string");
1664 if (len > strend - s) len = strend - s;
1666 if (len && unpack_only_one) len = 1;
1670 while (len-- > 0 && s < strend) {
1674 U8 result[UTF8_MAXLEN];
1675 const char *ptr = s;
1677 /* Bug: warns about bad utf8 even if we are short on bytes
1678 and will break out of the loop */
1679 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1682 len = UTF8SKIP(result);
1683 if (!uni_to_bytes(aTHX_ &ptr, strend,
1684 (char *) &result[1], len-1, 'U')) break;
1685 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1688 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1689 if (retlen == (STRLEN) -1 || retlen == 0)
1690 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1694 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1695 else if (checksum > bits_in_uv)
1696 cdouble += (NV) auv;
1701 case 's' | TYPE_IS_SHRIEKING:
1702 #if SHORTSIZE != SIZE16
1705 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1706 DO_BO_UNPACK(ashort, s);
1708 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1709 else if (checksum > bits_in_uv)
1710 cdouble += (NV)ashort;
1722 #if U16SIZE > SIZE16
1725 SHIFT16(utf8, s, strend, &ai16, datumtype);
1726 DO_BO_UNPACK(ai16, 16);
1727 #if U16SIZE > SIZE16
1732 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1733 else if (checksum > bits_in_uv)
1734 cdouble += (NV)ai16;
1739 case 'S' | TYPE_IS_SHRIEKING:
1740 #if SHORTSIZE != SIZE16
1742 unsigned short aushort;
1743 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1744 DO_BO_UNPACK(aushort, s);
1746 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1747 else if (checksum > bits_in_uv)
1748 cdouble += (NV)aushort;
1761 #if U16SIZE > SIZE16
1764 SHIFT16(utf8, s, strend, &au16, datumtype);
1765 DO_BO_UNPACK(au16, 16);
1767 if (datumtype == 'n')
1768 au16 = PerlSock_ntohs(au16);
1771 if (datumtype == 'v')
1775 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1776 else if (checksum > bits_in_uv)
1777 cdouble += (NV) au16;
1782 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1783 case 'v' | TYPE_IS_SHRIEKING:
1784 case 'n' | TYPE_IS_SHRIEKING:
1787 # if U16SIZE > SIZE16
1790 SHIFT16(utf8, s, strend, &ai16, datumtype);
1792 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1793 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1794 # endif /* HAS_NTOHS */
1796 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1797 ai16 = (I16) vtohs((U16) ai16);
1798 # endif /* HAS_VTOHS */
1800 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1801 else if (checksum > bits_in_uv)
1802 cdouble += (NV) ai16;
1807 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1809 case 'i' | TYPE_IS_SHRIEKING:
1812 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1813 DO_BO_UNPACK(aint, i);
1815 PUSHs(sv_2mortal(newSViv((IV)aint)));
1816 else if (checksum > bits_in_uv)
1817 cdouble += (NV)aint;
1823 case 'I' | TYPE_IS_SHRIEKING:
1826 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1827 DO_BO_UNPACK(auint, i);
1829 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1830 else if (checksum > bits_in_uv)
1831 cdouble += (NV)auint;
1839 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1840 #if IVSIZE == INTSIZE
1841 DO_BO_UNPACK(aiv, i);
1842 #elif IVSIZE == LONGSIZE
1843 DO_BO_UNPACK(aiv, l);
1844 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1845 DO_BO_UNPACK(aiv, 64);
1847 Perl_croak(aTHX_ "'j' not supported on this platform");
1850 PUSHs(sv_2mortal(newSViv(aiv)));
1851 else if (checksum > bits_in_uv)
1860 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1861 #if IVSIZE == INTSIZE
1862 DO_BO_UNPACK(auv, i);
1863 #elif IVSIZE == LONGSIZE
1864 DO_BO_UNPACK(auv, l);
1865 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1866 DO_BO_UNPACK(auv, 64);
1868 Perl_croak(aTHX_ "'J' not supported on this platform");
1871 PUSHs(sv_2mortal(newSVuv(auv)));
1872 else if (checksum > bits_in_uv)
1878 case 'l' | TYPE_IS_SHRIEKING:
1879 #if LONGSIZE != SIZE32
1882 SHIFT_VAR(utf8, s, strend, along, datumtype);
1883 DO_BO_UNPACK(along, l);
1885 PUSHs(sv_2mortal(newSViv((IV)along)));
1886 else if (checksum > bits_in_uv)
1887 cdouble += (NV)along;
1898 #if U32SIZE > SIZE32
1901 SHIFT32(utf8, s, strend, &ai32, datumtype);
1902 DO_BO_UNPACK(ai32, 32);
1903 #if U32SIZE > SIZE32
1904 if (ai32 > 2147483647) ai32 -= 4294967296;
1907 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1908 else if (checksum > bits_in_uv)
1909 cdouble += (NV)ai32;
1914 case 'L' | TYPE_IS_SHRIEKING:
1915 #if LONGSIZE != SIZE32
1917 unsigned long aulong;
1918 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1919 DO_BO_UNPACK(aulong, l);
1921 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1922 else if (checksum > bits_in_uv)
1923 cdouble += (NV)aulong;
1936 #if U32SIZE > SIZE32
1939 SHIFT32(utf8, s, strend, &au32, datumtype);
1940 DO_BO_UNPACK(au32, 32);
1942 if (datumtype == 'N')
1943 au32 = PerlSock_ntohl(au32);
1946 if (datumtype == 'V')
1950 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1951 else if (checksum > bits_in_uv)
1952 cdouble += (NV)au32;
1957 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1958 case 'V' | TYPE_IS_SHRIEKING:
1959 case 'N' | TYPE_IS_SHRIEKING:
1962 # if U32SIZE > SIZE32
1965 SHIFT32(utf8, s, strend, &ai32, datumtype);
1967 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1968 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1971 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1972 ai32 = (I32)vtohl((U32)ai32);
1975 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1976 else if (checksum > bits_in_uv)
1977 cdouble += (NV)ai32;
1982 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1986 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1987 DO_BO_UNPACK_PC(aptr);
1988 /* newSVpv generates undef if aptr is NULL */
1989 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1997 while (len > 0 && s < strend) {
1999 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2000 auv = (auv << 7) | (ch & 0x7f);
2001 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2004 PUSHs(sv_2mortal(newSVuv(auv)));
2009 if (++bytes >= sizeof(UV)) { /* promote to string */
2012 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2013 while (s < strend) {
2014 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2015 sv = mul128(sv, (U8)(ch & 0x7f));
2021 t = SvPV_nolen_const(sv);
2025 PUSHs(sv_2mortal(sv));
2030 if ((s >= strend) && bytes)
2031 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2035 if (symptr->howlen == e_star)
2036 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2038 if (sizeof(char*) <= strend - s) {
2040 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2041 DO_BO_UNPACK_PC(aptr);
2042 /* newSVpvn generates undef if aptr is NULL */
2043 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2050 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2051 DO_BO_UNPACK(aquad, 64);
2053 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2054 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2055 else if (checksum > bits_in_uv)
2056 cdouble += (NV)aquad;
2064 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2065 DO_BO_UNPACK(auquad, 64);
2067 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2068 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2069 else if (checksum > bits_in_uv)
2070 cdouble += (NV)auquad;
2075 #endif /* HAS_QUAD */
2076 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2080 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2081 DO_BO_UNPACK_N(afloat, float);
2083 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2091 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2092 DO_BO_UNPACK_N(adouble, double);
2094 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2102 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2103 DO_BO_UNPACK_N(anv, NV);
2105 PUSHs(sv_2mortal(newSVnv(anv)));
2110 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2113 long double aldouble;
2114 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2115 DO_BO_UNPACK_N(aldouble, long double);
2117 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2119 cdouble += aldouble;
2125 * Initialise the decode mapping. By using a table driven
2126 * algorithm, the code will be character-set independent
2127 * (and just as fast as doing character arithmetic)
2129 if (PL_uudmap['M'] == 0) {
2132 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2133 PL_uudmap[(U8)PL_uuemap[i]] = i;
2135 * Because ' ' and '`' map to the same value,
2136 * we need to decode them both the same.
2141 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2142 sv = sv_2mortal(NEWSV(42, l));
2143 if (l) SvPOK_on(sv);
2146 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2152 next_uni_uu(aTHX_ &s, strend, &a);
2153 next_uni_uu(aTHX_ &s, strend, &b);
2154 next_uni_uu(aTHX_ &s, strend, &c);
2155 next_uni_uu(aTHX_ &s, strend, &d);
2156 hunk[0] = (char)((a << 2) | (b >> 4));
2157 hunk[1] = (char)((b << 4) | (c >> 2));
2158 hunk[2] = (char)((c << 6) | d);
2159 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2167 /* possible checksum byte */
2168 const char *skip = s+UTF8SKIP(s);
2169 if (skip < strend && *skip == '\n')
2175 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2180 len = PL_uudmap[*(U8*)s++] & 077;
2182 if (s < strend && ISUUCHAR(*s))
2183 a = PL_uudmap[*(U8*)s++] & 077;
2186 if (s < strend && ISUUCHAR(*s))
2187 b = PL_uudmap[*(U8*)s++] & 077;
2190 if (s < strend && ISUUCHAR(*s))
2191 c = PL_uudmap[*(U8*)s++] & 077;
2194 if (s < strend && ISUUCHAR(*s))
2195 d = PL_uudmap[*(U8*)s++] & 077;
2198 hunk[0] = (char)((a << 2) | (b >> 4));
2199 hunk[1] = (char)((b << 4) | (c >> 2));
2200 hunk[2] = (char)((c << 6) | d);
2201 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2206 else /* possible checksum byte */
2207 if (s + 1 < strend && s[1] == '\n')
2216 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2217 (checksum > bits_in_uv &&
2218 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2221 anv = (NV) (1 << (checksum & 15));
2222 while (checksum >= 16) {
2226 while (cdouble < 0.0)
2228 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2229 sv = newSVnv(cdouble);
2232 if (checksum < bits_in_uv) {
2233 UV mask = ((UV)1 << checksum) - 1;
2238 XPUSHs(sv_2mortal(sv));
2242 if (symptr->flags & FLAG_SLASH){
2243 if (SP - PL_stack_base - start_sp_offset <= 0)
2244 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2245 if( next_symbol(symptr) ){
2246 if( symptr->howlen == e_number )
2247 Perl_croak(aTHX_ "Count after length/code in unpack" );
2249 /* ...end of char buffer then no decent length available */
2250 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2252 /* take top of stack (hope it's numeric) */
2255 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2258 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2260 datumtype = symptr->code;
2261 explicit_length = FALSE;
2269 return SP - PL_stack_base - start_sp_offset;
2276 I32 gimme = GIMME_V;
2279 const char *pat = SvPV_const(left, llen);
2280 const char *s = SvPV_const(right, rlen);
2281 const char *strend = s + rlen;
2282 const char *patend = pat + llen;
2286 cnt = unpackstring(pat, patend, s, strend,
2287 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2288 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2291 if ( !cnt && gimme == G_SCALAR )
2292 PUSHs(&PL_sv_undef);
2297 doencodes(U8 *h, const char *s, I32 len)
2299 *h++ = PL_uuemap[len];
2301 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2302 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2303 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2304 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2309 const char r = (len > 1 ? s[1] : '\0');
2310 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2311 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2312 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2313 *h++ = PL_uuemap[0];
2320 S_is_an_int(pTHX_ const char *s, STRLEN l)
2322 SV *result = newSVpvn(s, l);
2323 char *const result_c = SvPV_nolen(result); /* convenience */
2324 char *out = result_c;
2334 SvREFCNT_dec(result);
2357 SvREFCNT_dec(result);
2363 SvCUR_set(result, out - result_c);
2367 /* pnum must be '\0' terminated */
2369 S_div128(pTHX_ SV *pnum, bool *done)
2372 char * const s = SvPV(pnum, len);
2378 const int i = m * 10 + (*t - '0');
2379 const int r = (i >> 7); /* r < 10 */
2387 SvCUR_set(pnum, (STRLEN) (t - s));
2392 =for apidoc pack_cat
2394 The engine implementing pack() Perl function. Note: parameters next_in_list and
2395 flags are not used. This call should not be used; use packlist instead.
2401 Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2404 PERL_UNUSED_ARG(next_in_list);
2405 PERL_UNUSED_ARG(flags);
2407 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2409 (void)pack_rec( cat, &sym, beglist, endlist );
2414 =for apidoc packlist
2416 The engine implementing pack() Perl function.
2422 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2427 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2429 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2430 Also make sure any UTF8 flag is loaded */
2431 SvPV_force(cat, no_len);
2433 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2435 (void)pack_rec( cat, &sym, beglist, endlist );
2438 /* like sv_utf8_upgrade, but also repoint the group start markers */
2440 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2443 const char *from_ptr, *from_start, *from_end, **marks, **m;
2444 char *to_start, *to_ptr;
2446 if (SvUTF8(sv)) return;
2448 from_start = SvPVX_const(sv);
2449 from_end = from_start + SvCUR(sv);
2450 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2451 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2452 if (from_ptr == from_end) {
2453 /* Simple case: no character needs to be changed */
2458 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2459 Newx(to_start, len, char);
2460 Copy(from_start, to_start, from_ptr-from_start, char);
2461 to_ptr = to_start + (from_ptr-from_start);
2463 Newx(marks, sym_ptr->level+2, const char *);
2464 for (group=sym_ptr; group; group = group->previous)
2465 marks[group->level] = from_start + group->strbeg;
2466 marks[sym_ptr->level+1] = from_end+1;
2467 for (m = marks; *m < from_ptr; m++)
2468 *m = to_start + (*m-from_start);
2470 for (;from_ptr < from_end; from_ptr++) {
2471 while (*m == from_ptr) *m++ = to_ptr;
2472 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2476 while (*m == from_ptr) *m++ = to_ptr;
2477 if (m != marks + sym_ptr->level+1) {
2480 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2482 for (group=sym_ptr; group; group = group->previous)
2483 group->strbeg = marks[group->level] - to_start;
2488 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2489 from_start -= SvIVX(sv);
2492 SvFLAGS(sv) &= ~SVf_OOK;
2495 Safefree(from_start);
2496 SvPV_set(sv, to_start);
2497 SvCUR_set(sv, to_ptr - to_start);
2502 /* Exponential string grower. Makes string extension effectively O(n)
2503 needed says how many extra bytes we need (not counting the final '\0')
2504 Only grows the string if there is an actual lack of space
2507 sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2508 const STRLEN cur = SvCUR(sv);
2509 const STRLEN len = SvLEN(sv);
2511 if (len - cur > needed) return SvPVX(sv);
2512 extend = needed > len ? needed : len;
2513 return SvGROW(sv, len+extend+1);
2518 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2520 tempsym_t lookahead;
2521 I32 items = endlist - beglist;
2522 bool found = next_symbol(symptr);
2523 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2524 bool warn_utf8 = ckWARN(WARN_UTF8);
2526 if (symptr->level == 0 && found && symptr->code == 'U') {
2527 marked_upgrade(aTHX_ cat, symptr);
2528 symptr->flags |= FLAG_DO_UTF8;
2531 symptr->strbeg = SvCUR(cat);
2537 SV *lengthcode = Nullsv;
2538 I32 datumtype = symptr->code;
2539 howlen_t howlen = symptr->howlen;
2540 char *start = SvPVX(cat);
2541 char *cur = start + SvCUR(cat);
2543 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2547 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2551 /* e_no_len and e_number */
2552 len = symptr->length;
2557 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2559 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2560 /* We can process this letter. */
2561 STRLEN size = props & PACK_SIZE_MASK;
2562 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2566 /* Look ahead for next symbol. Do we have code/code? */
2567 lookahead = *symptr;
2568 found = next_symbol(&lookahead);
2569 if (symptr->flags & FLAG_SLASH) {
2571 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2572 if (strchr("aAZ", lookahead.code)) {
2573 if (lookahead.howlen == e_number) count = lookahead.length;
2576 count = DO_UTF8(*beglist) ?
2577 sv_len_utf8(*beglist) : sv_len(*beglist);
2579 if (lookahead.code == 'Z') count++;
2582 if (lookahead.howlen == e_number && lookahead.length < items)
2583 count = lookahead.length;
2586 lookahead.howlen = e_number;
2587 lookahead.length = count;
2588 lengthcode = sv_2mortal(newSViv(count));
2591 /* Code inside the switch must take care to properly update
2592 cat (CUR length and '\0' termination) if it updated *cur and
2593 doesn't simply leave using break */
2594 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2596 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2597 (int) TYPE_NO_MODIFIERS(datumtype));
2599 Perl_croak(aTHX_ "'%%' may not be used in pack");
2602 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2603 case '.' | TYPE_IS_SHRIEKING:
2606 if (howlen == e_star) from = start;
2607 else if (len == 0) from = cur;
2609 tempsym_t *group = symptr;
2611 while (--len && group) group = group->previous;
2612 from = group ? start + group->strbeg : start;
2615 len = SvIV(fromstr);
2617 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2618 case '@' | TYPE_IS_SHRIEKING:
2621 from = start + symptr->strbeg;
2623 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2624 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2625 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2629 while (len && from < cur) {
2630 from += UTF8SKIP(from);
2634 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2636 /* Here we know from == cur */
2638 GROWING(0, cat, start, cur, len);
2639 Zero(cur, len, char);
2641 } else if (from < cur) {
2644 } else goto no_change;
2652 if (len > 0) goto grow;
2653 if (len == 0) goto no_change;
2660 tempsym_t savsym = *symptr;
2661 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2662 symptr->flags |= group_modifiers;
2663 symptr->patend = savsym.grpend;
2665 symptr->previous = &lookahead;
2668 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2669 else symptr->flags &= ~FLAG_PARSE_UTF8;
2670 was_utf8 = SvUTF8(cat);
2671 symptr->patptr = savsym.grpbeg;
2672 beglist = pack_rec(cat, symptr, beglist, endlist);
2673 if (SvUTF8(cat) != was_utf8)
2674 /* This had better be an upgrade while in utf8==0 mode */
2677 if (savsym.howlen == e_star && beglist == endlist)
2678 break; /* No way to continue */
2680 lookahead.flags = symptr->flags & ~group_modifiers;
2683 case 'X' | TYPE_IS_SHRIEKING:
2684 if (!len) /* Avoid division by 0 */
2691 hop += UTF8SKIP(hop);
2698 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2702 len = (cur-start) % len;
2706 if (len < 1) goto no_change;
2710 Perl_croak(aTHX_ "'%c' outside of string in pack",
2711 (int) TYPE_NO_MODIFIERS(datumtype));
2712 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2714 Perl_croak(aTHX_ "'%c' outside of string in pack",
2715 (int) TYPE_NO_MODIFIERS(datumtype));
2721 if (cur - start < len)
2722 Perl_croak(aTHX_ "'%c' outside of string in pack",
2723 (int) TYPE_NO_MODIFIERS(datumtype));
2726 if (cur < start+symptr->strbeg) {
2727 /* Make sure group starts don't point into the void */
2729 const STRLEN length = cur-start;
2730 for (group = symptr;
2731 group && length < group->strbeg;
2732 group = group->previous) group->strbeg = length;
2733 lookahead.strbeg = length;
2736 case 'x' | TYPE_IS_SHRIEKING: {
2738 if (!len) /* Avoid division by 0 */
2740 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2741 else ai32 = (cur - start) % len;
2742 if (ai32 == 0) goto no_change;
2754 aptr = SvPV_const(fromstr, fromlen);
2755 if (DO_UTF8(fromstr)) {
2756 const char *end, *s;
2758 if (!utf8 && !SvUTF8(cat)) {
2759 marked_upgrade(aTHX_ cat, symptr);
2760 lookahead.flags |= FLAG_DO_UTF8;
2761 lookahead.strbeg = symptr->strbeg;
2764 cur = start + SvCUR(cat);
2766 if (howlen == e_star) {
2767 if (utf8) goto string_copy;
2771 end = aptr + fromlen;
2772 fromlen = datumtype == 'Z' ? len-1 : len;
2773 while ((I32) fromlen > 0 && s < end) {
2778 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2781 if (datumtype == 'Z') len++;
2787 fromlen = len - fromlen;
2788 if (datumtype == 'Z') fromlen--;
2789 if (howlen == e_star) {
2791 if (datumtype == 'Z') len++;
2793 GROWING(0, cat, start, cur, len);
2794 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2795 datumtype | TYPE_IS_PACK))
2796 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2800 if (howlen == e_star) {
2802 if (datumtype == 'Z') len++;
2804 if (len <= (I32) fromlen) {
2806 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2808 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2810 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2811 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2813 while (fromlen > 0) {
2814 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2820 if (howlen == e_star) {
2822 if (datumtype == 'Z') len++;
2824 if (len <= (I32) fromlen) {
2826 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2828 GROWING(0, cat, start, cur, len);
2829 Copy(aptr, cur, fromlen, char);
2833 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2839 const char *str, *end;
2846 str = SvPV_const(fromstr, fromlen);
2847 end = str + fromlen;
2848 if (DO_UTF8(fromstr)) {
2850 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2852 utf8_source = FALSE;
2853 utf8_flags = 0; /* Unused, but keep compilers happy */
2855 if (howlen == e_star) len = fromlen;
2856 field_len = (len+7)/8;
2857 GROWING(utf8, cat, start, cur, field_len);
2858 if (len > (I32)fromlen) len = fromlen;
2861 if (datumtype == 'B')
2865 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2867 } else bits |= *str++ & 1;
2868 if (l & 7) bits <<= 1;
2870 PUSH_BYTE(utf8, cur, bits);
2875 /* datumtype == 'b' */
2879 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2880 if (val & 1) bits |= 0x80;
2881 } else if (*str++ & 1)
2883 if (l & 7) bits >>= 1;
2885 PUSH_BYTE(utf8, cur, bits);
2891 if (datumtype == 'B')
2892 bits <<= 7 - (l & 7);
2894 bits >>= 7 - (l & 7);
2895 PUSH_BYTE(utf8, cur, bits);
2898 /* Determine how many chars are left in the requested field */
2900 if (howlen == e_star) field_len = 0;
2901 else field_len -= l;
2902 Zero(cur, field_len, char);
2908 const char *str, *end;
2915 str = SvPV_const(fromstr, fromlen);
2916 end = str + fromlen;
2917 if (DO_UTF8(fromstr)) {
2919 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2921 utf8_source = FALSE;
2922 utf8_flags = 0; /* Unused, but keep compilers happy */
2924 if (howlen == e_star) len = fromlen;
2925 field_len = (len+1)/2;
2926 GROWING(utf8, cat, start, cur, field_len);
2927 if (!utf8 && len > (I32)fromlen) len = fromlen;
2930 if (datumtype == 'H')
2934 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2935 if (val < 256 && isALPHA(val))
2936 bits |= (val + 9) & 0xf;
2939 } else if (isALPHA(*str))
2940 bits |= (*str++ + 9) & 0xf;
2942 bits |= *str++ & 0xf;
2943 if (l & 1) bits <<= 4;
2945 PUSH_BYTE(utf8, cur, bits);
2953 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2954 if (val < 256 && isALPHA(val))
2955 bits |= ((val + 9) & 0xf) << 4;
2957 bits |= (val & 0xf) << 4;
2958 } else if (isALPHA(*str))
2959 bits |= ((*str++ + 9) & 0xf) << 4;
2961 bits |= (*str++ & 0xf) << 4;
2962 if (l & 1) bits >>= 4;
2964 PUSH_BYTE(utf8, cur, bits);
2970 PUSH_BYTE(utf8, cur, bits);
2973 /* Determine how many chars are left in the requested field */
2975 if (howlen == e_star) field_len = 0;
2976 else field_len -= l;
2977 Zero(cur, field_len, char);
2985 aiv = SvIV(fromstr);
2986 if ((-128 > aiv || aiv > 127) &&
2988 Perl_warner(aTHX_ packWARN(WARN_PACK),
2989 "Character in 'c' format wrapped in pack");
2990 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2995 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2998 GROWING(0, cat, start, cur, len);
3002 aiv = SvIV(fromstr);
3003 if ((0 > aiv || aiv > 0xff) &&
3005 Perl_warner(aTHX_ packWARN(WARN_PACK),
3006 "Character in 'C' format wrapped in pack");
3007 *cur++ = (char)(aiv & 0xff);
3012 U8 in_bytes = IN_BYTES;
3014 end = start+SvLEN(cat)-1;
3015 if (utf8) end -= UTF8_MAXLEN-1;
3019 auv = SvUV(fromstr);
3020 if (in_bytes) auv = auv % 0x100;
3025 SvCUR_set(cat, cur - start);
3027 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3028 end = start+SvLEN(cat)-UTF8_MAXLEN;
3030 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3033 0 : UNICODE_ALLOW_ANY);
3038 SvCUR_set(cat, cur - start);
3039 marked_upgrade(aTHX_ cat, symptr);
3040 lookahead.flags |= FLAG_DO_UTF8;
3041 lookahead.strbeg = symptr->strbeg;
3044 cur = start + SvCUR(cat);
3045 end = start+SvLEN(cat)-UTF8_MAXLEN;
3048 if (ckWARN(WARN_PACK))
3049 Perl_warner(aTHX_ packWARN(WARN_PACK),
3050 "Character in 'W' format wrapped in pack");
3055 SvCUR_set(cat, cur - start);
3056 GROWING(0, cat, start, cur, len+1);
3057 end = start+SvLEN(cat)-1;
3059 *(U8 *) cur++ = (U8)auv;
3068 if (!(symptr->flags & FLAG_DO_UTF8)) {
3069 marked_upgrade(aTHX_ cat, symptr);
3070 lookahead.flags |= FLAG_DO_UTF8;
3071 lookahead.strbeg = symptr->strbeg;
3077 end = start+SvLEN(cat);
3078 if (!utf8) end -= UTF8_MAXLEN;
3082 auv = SvUV(fromstr);
3084 U8 buffer[UTF8_MAXLEN], *endb;
3085 endb = uvuni_to_utf8_flags(buffer, auv,
3087 0 : UNICODE_ALLOW_ANY);
3088 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3090 SvCUR_set(cat, cur - start);
3091 GROWING(0, cat, start, cur,
3092 len+(endb-buffer)*UTF8_EXPAND);
3093 end = start+SvLEN(cat);
3095 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3099 SvCUR_set(cat, cur - start);
3100 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3101 end = start+SvLEN(cat)-UTF8_MAXLEN;
3103 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3105 0 : UNICODE_ALLOW_ANY);
3110 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3116 anv = SvNV(fromstr);
3118 /* VOS does not automatically map a floating-point overflow
3119 during conversion from double to float into infinity, so we
3120 do it by hand. This code should either be generalized for
3121 any OS that needs it, or removed if and when VOS implements
3122 posix-976 (suggestion to support mapping to infinity).
3123 Paul.Green@stratus.com 02-04-02. */
3125 afloat = _float_constants[0]; /* single prec. inf. */
3126 else if (anv < -FLT_MAX)
3127 afloat = _float_constants[0]; /* single prec. inf. */
3128 else afloat = (float) anv;
3130 # if defined(VMS) && !defined(__IEEE_FP)
3131 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3132 * on Alpha; fake it if we don't have them.
3136 else if (anv < -FLT_MAX)
3138 else afloat = (float)anv;
3140 afloat = (float)anv;
3142 #endif /* __VOS__ */
3143 DO_BO_PACK_N(afloat, float);
3144 PUSH_VAR(utf8, cur, afloat);
3152 anv = SvNV(fromstr);
3154 /* VOS does not automatically map a floating-point overflow
3155 during conversion from long double to double into infinity,
3156 so we do it by hand. This code should either be generalized
3157 for any OS that needs it, or removed if and when VOS
3158 implements posix-976 (suggestion to support mapping to
3159 infinity). Paul.Green@stratus.com 02-04-02. */
3161 adouble = _double_constants[0]; /* double prec. inf. */
3162 else if (anv < -DBL_MAX)
3163 adouble = _double_constants[0]; /* double prec. inf. */
3164 else adouble = (double) anv;
3166 # if defined(VMS) && !defined(__IEEE_FP)
3167 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3168 * on Alpha; fake it if we don't have them.
3172 else if (anv < -DBL_MAX)
3174 else adouble = (double)anv;
3176 adouble = (double)anv;
3178 #endif /* __VOS__ */
3179 DO_BO_PACK_N(adouble, double);
3180 PUSH_VAR(utf8, cur, adouble);
3185 Zero(&anv, 1, NV); /* can be long double with unused bits */
3188 anv = SvNV(fromstr);
3189 DO_BO_PACK_N(anv, NV);
3190 PUSH_VAR(utf8, cur, anv);
3194 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3196 long double aldouble;
3197 /* long doubles can have unused bits, which may be nonzero */
3198 Zero(&aldouble, 1, long double);
3201 aldouble = (long double)SvNV(fromstr);
3202 DO_BO_PACK_N(aldouble, long double);
3203 PUSH_VAR(utf8, cur, aldouble);
3208 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3209 case 'n' | TYPE_IS_SHRIEKING:
3215 ai16 = (I16)SvIV(fromstr);
3217 ai16 = PerlSock_htons(ai16);
3219 PUSH16(utf8, cur, &ai16);
3222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3223 case 'v' | TYPE_IS_SHRIEKING:
3229 ai16 = (I16)SvIV(fromstr);
3233 PUSH16(utf8, cur, &ai16);
3236 case 'S' | TYPE_IS_SHRIEKING:
3237 #if SHORTSIZE != SIZE16
3239 unsigned short aushort;
3241 aushort = SvUV(fromstr);
3242 DO_BO_PACK(aushort, s);
3243 PUSH_VAR(utf8, cur, aushort);
3253 au16 = (U16)SvUV(fromstr);
3254 DO_BO_PACK(au16, 16);
3255 PUSH16(utf8, cur, &au16);
3258 case 's' | TYPE_IS_SHRIEKING:
3259 #if SHORTSIZE != SIZE16
3263 ashort = SvIV(fromstr);
3264 DO_BO_PACK(ashort, s);
3265 PUSH_VAR(utf8, cur, ashort);
3275 ai16 = (I16)SvIV(fromstr);
3276 DO_BO_PACK(ai16, 16);
3277 PUSH16(utf8, cur, &ai16);
3281 case 'I' | TYPE_IS_SHRIEKING:
3285 auint = SvUV(fromstr);
3286 DO_BO_PACK(auint, i);
3287 PUSH_VAR(utf8, cur, auint);
3294 aiv = SvIV(fromstr);
3295 #if IVSIZE == INTSIZE
3297 #elif IVSIZE == LONGSIZE
3299 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3300 DO_BO_PACK(aiv, 64);
3302 Perl_croak(aTHX_ "'j' not supported on this platform");
3304 PUSH_VAR(utf8, cur, aiv);
3311 auv = SvUV(fromstr);
3312 #if UVSIZE == INTSIZE
3314 #elif UVSIZE == LONGSIZE
3316 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3317 DO_BO_PACK(auv, 64);
3319 Perl_croak(aTHX_ "'J' not supported on this platform");
3321 PUSH_VAR(utf8, cur, auv);
3328 anv = SvNV(fromstr);
3332 SvCUR_set(cat, cur - start);
3333 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3336 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3337 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3338 any negative IVs will have already been got by the croak()
3339 above. IOK is untrue for fractions, so we test them
3340 against UV_MAX_P1. */
3341 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3342 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3343 char *in = buf + sizeof(buf);
3344 UV auv = SvUV(fromstr);
3347 *--in = (char)((auv & 0x7f) | 0x80);
3350 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3351 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3352 in, (buf + sizeof(buf)) - in);
3353 } else if (SvPOKp(fromstr))
3355 else if (SvNOKp(fromstr)) {
3356 /* 10**NV_MAX_10_EXP is the largest power of 10
3357 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3358 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3359 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3360 And with that many bytes only Inf can overflow.
3361 Some C compilers are strict about integral constant
3362 expressions so we conservatively divide by a slightly
3363 smaller integer instead of multiplying by the exact
3364 floating-point value.
3366 #ifdef NV_MAX_10_EXP
3367 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3368 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3370 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3371 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3373 char *in = buf + sizeof(buf);
3375 anv = Perl_floor(anv);
3377 const NV next = Perl_floor(anv / 128);
3378 if (in <= buf) /* this cannot happen ;-) */
3379 Perl_croak(aTHX_ "Cannot compress integer in pack");
3380 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3383 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3384 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3385 in, (buf + sizeof(buf)) - in);
3394 /* Copy string and check for compliance */
3395 from = SvPV_const(fromstr, len);
3396 if ((norm = is_an_int(from, len)) == NULL)
3397 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3399 Newx(result, len, char);
3402 while (!done) *--in = div128(norm, &done) | 0x80;
3403 result[len - 1] &= 0x7F; /* clear continue bit */
3404 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3405 in, (result + len) - in);
3407 SvREFCNT_dec(norm); /* free norm */
3412 case 'i' | TYPE_IS_SHRIEKING:
3416 aint = SvIV(fromstr);
3417 DO_BO_PACK(aint, i);
3418 PUSH_VAR(utf8, cur, aint);
3421 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3422 case 'N' | TYPE_IS_SHRIEKING:
3428 au32 = SvUV(fromstr);
3430 au32 = PerlSock_htonl(au32);
3432 PUSH32(utf8, cur, &au32);
3435 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3436 case 'V' | TYPE_IS_SHRIEKING:
3442 au32 = SvUV(fromstr);
3446 PUSH32(utf8, cur, &au32);
3449 case 'L' | TYPE_IS_SHRIEKING:
3450 #if LONGSIZE != SIZE32
3452 unsigned long aulong;
3454 aulong = SvUV(fromstr);
3455 DO_BO_PACK(aulong, l);
3456 PUSH_VAR(utf8, cur, aulong);
3466 au32 = SvUV(fromstr);
3467 DO_BO_PACK(au32, 32);
3468 PUSH32(utf8, cur, &au32);
3471 case 'l' | TYPE_IS_SHRIEKING:
3472 #if LONGSIZE != SIZE32
3476 along = SvIV(fromstr);
3477 DO_BO_PACK(along, l);
3478 PUSH_VAR(utf8, cur, along);
3488 ai32 = SvIV(fromstr);
3489 DO_BO_PACK(ai32, 32);
3490 PUSH32(utf8, cur, &ai32);
3498 auquad = (Uquad_t) SvUV(fromstr);
3499 DO_BO_PACK(auquad, 64);
3500 PUSH_VAR(utf8, cur, auquad);
3507 aquad = (Quad_t)SvIV(fromstr);
3508 DO_BO_PACK(aquad, 64);
3509 PUSH_VAR(utf8, cur, aquad);
3512 #endif /* HAS_QUAD */
3514 len = 1; /* assume SV is correct length */
3515 GROWING(utf8, cat, start, cur, sizeof(char *));
3522 SvGETMAGIC(fromstr);
3523 if (!SvOK(fromstr)) aptr = NULL;
3525 /* XXX better yet, could spirit away the string to
3526 * a safe spot and hang on to it until the result
3527 * of pack() (and all copies of the result) are
3530 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3531 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3532 Perl_warner(aTHX_ packWARN(WARN_PACK),
3533 "Attempt to pack pointer to temporary value");
3535 if (SvPOK(fromstr) || SvNIOK(fromstr))
3536 aptr = SvPV_nomg_const_nolen(fromstr);
3538 aptr = SvPV_force_flags_nolen(fromstr, 0);
3540 DO_BO_PACK_PC(aptr);
3541 PUSH_VAR(utf8, cur, aptr);
3545 const char *aptr, *aend;
3549 if (len <= 2) len = 45;
3550 else len = len / 3 * 3;
3552 Perl_warner(aTHX_ packWARN(WARN_PACK),
3553 "Field too wide in 'u' format in pack");
3556 aptr = SvPV_const(fromstr, fromlen);
3557 from_utf8 = DO_UTF8(fromstr);
3559 aend = aptr + fromlen;
3560 fromlen = sv_len_utf8(fromstr);
3561 } else aend = NULL; /* Unused, but keep compilers happy */
3562 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3563 while (fromlen > 0) {
3566 U8 hunk[1+63/3*4+1];
3568 if ((I32)fromlen > len)
3574 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3575 'u' | TYPE_IS_PACK)) {
3577 SvCUR_set(cat, cur - start);
3578 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3580 end = doencodes(hunk, buffer, todo);
3582 end = doencodes(hunk, aptr, todo);
3585 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3592 SvCUR_set(cat, cur - start);
3594 *symptr = lookahead;
3603 dSP; dMARK; dORIGMARK; dTARGET;
3604 register SV *cat = TARG;
3606 SV *pat_sv = *++MARK;
3607 register const char *pat = SvPV_const(pat_sv, fromlen);
3608 register const char *patend = pat + fromlen;
3611 sv_setpvn(cat, "", 0);
3614 packlist(cat, pat, patend, MARK, SP + 1);
3624 * c-indentation-style: bsd
3626 * indent-tabs-mode: t
3629 * ex: set ts=8 sts=4 sw=4 noet: