3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
35 # define PERL_PACK_CAN_BYTEORDER
36 # define PERL_PACK_CAN_SHRIEKSIGN
42 /* Maximum number of bytes to which a byte can grow due to upgrade */
46 * Offset for integer pack/unpack.
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
66 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
69 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
70 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
71 # define OFF16(p) ((char*)(p))
72 # define OFF32(p) ((char*)(p))
74 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
75 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
78 ++++ bad cray byte order
82 # define OFF16(p) ((char *) (p))
83 # define OFF32(p) ((char *) (p))
86 /* Only to be used inside a loop (see the break) */
87 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
91 Copy(s, OFF16(p), SIZE16, char); \
96 /* Only to be used inside a loop (see the break) */
97 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
101 Copy(s, OFF32(p), SIZE32, char); \
106 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
109 /* Only to be used inside a loop (see the break) */
110 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
116 Copy(s, (char *) &var, sizeof(var), char); \
121 #define PUSH_VAR(utf8, aptr, var) \
122 PUSH_BYTES(utf8, aptr, (char *) &(var), sizeof(var))
124 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125 #define MAX_SUB_TEMPLATE_LEVEL 100
127 /* flags (note that type modifiers can also be used as flags!) */
128 #define FLAG_WAS_UTF8 0x40
129 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
130 #define FLAG_UNPACK_ONLY_ONE 0x10
131 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
132 #define FLAG_SLASH 0x04
133 #define FLAG_COMMA 0x02
134 #define FLAG_PACK 0x01
137 S_mul128(pTHX_ SV *sv, U8 m)
140 char *s = SvPV(sv, len);
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
153 while (!*t) /* trailing '\0'? */
156 i = ((*t - '0') << 7) + m;
157 *(t--) = '0' + (char)(i % 10);
163 /* Explosives and implosives. */
165 #if 'I' == 73 && 'J' == 74
166 /* On an ASCII/ISO kind of system */
167 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
170 Some other sort of character set - use memchr() so we don't match
173 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
177 #define TYPE_IS_SHRIEKING 0x100
178 #define TYPE_IS_BIG_ENDIAN 0x200
179 #define TYPE_IS_LITTLE_ENDIAN 0x400
180 #define TYPE_IS_PACK 0x800
181 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
182 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
183 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
185 #ifdef PERL_PACK_CAN_SHRIEKSIGN
186 #define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
188 #define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
191 #ifndef PERL_PACK_CAN_BYTEORDER
192 /* Put "can't" first because it is shorter */
193 # define TYPE_ENDIANNESS(t) 0
194 # define TYPE_NO_ENDIANNESS(t) (t)
196 # define ENDIANNESS_ALLOWED_TYPES ""
198 # define DO_BO_UNPACK(var, type)
199 # define DO_BO_PACK(var, type)
200 # define DO_BO_UNPACK_PTR(var, type, pre_cast)
201 # define DO_BO_PACK_PTR(var, type, pre_cast)
202 # define DO_BO_UNPACK_N(var, type)
203 # define DO_BO_PACK_N(var, type)
204 # define DO_BO_UNPACK_P(var)
205 # define DO_BO_PACK_P(var)
207 #else /* PERL_PACK_CAN_BYTEORDER */
209 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
212 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
214 # define DO_BO_UNPACK(var, type) \
216 switch (TYPE_ENDIANNESS(datumtype)) { \
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
223 # define DO_BO_PACK(var, type) \
225 switch (TYPE_ENDIANNESS(datumtype)) { \
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
232 # define DO_BO_UNPACK_PTR(var, type, pre_cast) \
234 switch (TYPE_ENDIANNESS(datumtype)) { \
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (void *) my_betoh ## type ((pre_cast) var); \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (void *) my_letoh ## type ((pre_cast) var); \
246 # define DO_BO_PACK_PTR(var, type, pre_cast) \
248 switch (TYPE_ENDIANNESS(datumtype)) { \
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (void *) my_htobe ## type ((pre_cast) var); \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (void *) my_htole ## type ((pre_cast) var); \
260 # define BO_CANT_DOIT(action, type) \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
276 # if PTRSIZE == INTSIZE
277 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
278 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
279 # elif PTRSIZE == LONGSIZE
280 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
281 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
283 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
284 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
287 # if defined(my_htolen) && defined(my_letohn) && \
288 defined(my_htoben) && defined(my_betohn)
289 # define DO_BO_UNPACK_N(var, type) \
291 switch (TYPE_ENDIANNESS(datumtype)) { \
292 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
293 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
298 # define DO_BO_PACK_N(var, type) \
300 switch (TYPE_ENDIANNESS(datumtype)) { \
301 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
302 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
307 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
308 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
311 #endif /* PERL_PACK_CAN_BYTEORDER */
313 #define PACK_SIZE_CANNOT_CSUM 0x80
314 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
315 #define PACK_SIZE_MASK 0x3F
317 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
318 in). You're unlikely ever to need to regenerate them. */
320 #if TYPE_IS_SHRIEKING != 0x100
321 ++++shriek offset should be 256
324 typedef U8 packprops_t;
327 const packprops_t packprops[512] = {
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
335 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
336 /* D */ LONG_DOUBLESIZE,
343 /* I */ sizeof(unsigned int),
350 #if defined(HAS_QUAD)
351 /* Q */ sizeof(Uquad_t),
358 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
360 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 /* c */ sizeof(char),
363 /* d */ sizeof(double),
365 /* f */ sizeof(float),
374 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
375 #if defined(HAS_QUAD)
376 /* q */ sizeof(Quad_t),
384 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0,
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
399 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 /* I */ sizeof(unsigned int),
402 /* L */ sizeof(unsigned long),
404 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
410 /* S */ sizeof(unsigned short),
412 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
421 /* l */ sizeof(long),
423 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
429 /* s */ sizeof(short),
431 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0
447 /* EBCDIC (or bust) */
448 const packprops_t packprops[512] = {
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 /* c */ sizeof(char),
460 /* d */ sizeof(double),
462 /* f */ sizeof(float),
472 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
473 #if defined(HAS_QUAD)
474 /* q */ sizeof(Quad_t),
478 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
486 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
487 /* D */ LONG_DOUBLESIZE,
494 /* I */ sizeof(unsigned int),
502 #if defined(HAS_QUAD)
503 /* Q */ sizeof(Uquad_t),
507 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
512 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 /* l */ sizeof(long),
529 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* s */ sizeof(short),
537 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 /* I */ sizeof(unsigned int),
546 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 /* L */ sizeof(unsigned long),
549 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 /* S */ sizeof(unsigned short),
557 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
568 uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
572 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
573 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
574 /* We try to process malformed UTF-8 as much as possible (preferrably with
575 warnings), but these two mean we make no progress in the string and
576 might enter an infinite loop */
577 if (retlen == (STRLEN) -1 || retlen == 0)
578 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
579 (int) TYPE_NO_MODIFIERS(datumtype));
581 if (ckWARN(WARN_UNPACK))
582 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
583 "Character in '%c' format wrapped in unpack",
584 (int) TYPE_NO_MODIFIERS(datumtype));
591 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
592 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
596 uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
602 U32 flags = ckWARN(WARN_UTF8) ?
603 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
604 for (;buf_len > 0; buf_len--) {
605 if (from >= end) return FALSE;
606 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
607 if (retlen == (STRLEN) -1 || retlen == 0) {
608 from += UTF8SKIP(from);
610 } else from += retlen;
617 /* We have enough characters for the buffer. Did we have problems ? */
620 /* Rewalk the string fragment while warning */
622 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
623 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
624 if (ptr >= end) break;
625 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
627 if (from > end) from = end;
629 if ((bad & 2) && ckWARN(WARN_UNPACK))
630 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
631 WARN_PACK : WARN_UNPACK),
632 "Character(s) in '%c' format wrapped in %s",
633 (int) TYPE_NO_MODIFIERS(datumtype),
634 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
641 next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
645 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
646 if (val >= 0x100 || !ISUUCHAR(val) ||
647 retlen == (STRLEN) -1 || retlen == 0) {
651 *out = PL_uudmap[val] & 077;
657 bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
658 U8 buffer[UTF8_MAXLEN];
659 U8 *end = start + len;
661 while (start < end) {
663 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
673 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
681 #define PUSH_BYTES(utf8, cur, buf, len) \
683 if (utf8) bytes_to_uni(aTHX_ buf, len, &(cur)); \
685 Copy(buf, cur, len, char); \
690 #define GROWING(utf8, cat, start, cur, in_len) \
692 STRLEN glen = (in_len); \
693 if (utf8) glen *= UTF8_EXPAND; \
694 if ((cur) + glen >= (start) + SvLEN(cat)) { \
695 (start) = sv_exp_grow(aTHX_ cat, glen); \
696 (cur) = (start) + SvCUR(cat); \
700 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
702 STRLEN glen = (in_len); \
704 if (utf8) gl *= UTF8_EXPAND; \
705 if ((cur) + gl >= (start) + SvLEN(cat)) { \
707 SvCUR(cat) = (cur) - (start); \
708 (start) = sv_exp_grow(aTHX_ cat, gl); \
709 (cur) = (start) + SvCUR(cat); \
711 PUSH_BYTES(utf8, cur, buf, glen); \
714 #define PUSH_BYTE(utf8, s, byte) \
718 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
719 } else *(U8 *)(s)++ = (byte); \
722 /* Only to be used inside a loop (see the break) */
723 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
726 if (str >= end) break; \
727 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
728 if (retlen == (STRLEN) -1 || retlen == 0) { \
730 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
735 /* Returns the sizeof() struct described by pat */
737 S_measure_struct(pTHX_ tempsym_t* symptr)
741 while (next_symbol(symptr)) {
745 switch (symptr->howlen) {
747 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
748 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
751 /* e_no_len and e_number */
752 len = symptr->length;
756 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
758 /* endianness doesn't influence the size of a type */
759 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
761 Perl_croak(aTHX_ "Invalid type '%c' in %s",
762 (int)TYPE_NO_MODIFIERS(symptr->code),
763 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
766 case 'U': /* XXXX Is it correct? */
769 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
771 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
777 tempsym_t savsym = *symptr;
778 symptr->patptr = savsym.grpbeg;
779 symptr->patend = savsym.grpend;
780 /* XXXX Theoretically, we need to measure many times at
781 different positions, since the subexpression may contain
782 alignment commands, but be not of aligned length.
783 Need to detect this and croak(). */
784 size = measure_struct(symptr);
788 case 'X' | TYPE_IS_SHRIEKING:
789 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
791 if (!len) /* Avoid division by 0 */
793 len = total % len; /* Assumed: the start is aligned. */
798 Perl_croak(aTHX_ "'X' outside of string in %s",
799 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
801 case 'x' | TYPE_IS_SHRIEKING:
802 if (!len) /* Avoid division by 0 */
804 star = total % len; /* Assumed: the start is aligned. */
805 if (star) /* Other portable ways? */
829 size = sizeof(char*);
839 /* locate matching closing parenthesis or bracket
840 * returns char pointer to char after match, or NULL
843 S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
845 while (patptr < patend) {
853 while (patptr < patend && *patptr != '\n')
857 patptr = group_end(patptr, patend, ')') + 1;
859 patptr = group_end(patptr, patend, ']') + 1;
861 Perl_croak(aTHX_ "No group ending character '%c' found in template",
867 /* Convert unsigned decimal number to binary.
868 * Expects a pointer to the first digit and address of length variable
869 * Advances char pointer to 1st non-digit char and returns number
872 S_get_num(pTHX_ register char *patptr, I32 *lenptr )
874 I32 len = *patptr++ - '0';
875 while (isDIGIT(*patptr)) {
876 if (len >= 0x7FFFFFFF/10)
877 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
878 len = (len * 10) + (*patptr++ - '0');
884 /* The marvellous template parsing routine: Using state stored in *symptr,
885 * locates next template code and count
888 S_next_symbol(pTHX_ tempsym_t* symptr )
890 char* patptr = symptr->patptr;
891 char* patend = symptr->patend;
892 const char *allowed = "";
894 symptr->flags &= ~FLAG_SLASH;
896 while (patptr < patend) {
897 if (isSPACE(*patptr))
899 else if (*patptr == '#') {
901 while (patptr < patend && *patptr != '\n')
906 /* We should have found a template code */
907 I32 code = *patptr++ & 0xFF;
908 U32 inherited_modifiers = 0;
910 if (code == ','){ /* grandfather in commas but with a warning */
911 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
912 symptr->flags |= FLAG_COMMA;
913 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
914 "Invalid type ',' in %s",
915 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
920 /* for '(', skip to ')' */
922 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
923 Perl_croak(aTHX_ "()-group starts with a count in %s",
924 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
925 symptr->grpbeg = patptr;
926 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
927 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
928 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
929 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
932 /* look for group modifiers to inherit */
933 if (TYPE_ENDIANNESS(symptr->flags)) {
934 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
935 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
938 /* look for modifiers */
939 while (patptr < patend) {
943 modifier = TYPE_IS_SHRIEKING;
944 allowed = SHRIEKING_ALLOWED_TYPES;
946 #ifdef PERL_PACK_CAN_BYTEORDER
948 modifier = TYPE_IS_BIG_ENDIAN;
949 allowed = ENDIANNESS_ALLOWED_TYPES;
952 modifier = TYPE_IS_LITTLE_ENDIAN;
953 allowed = ENDIANNESS_ALLOWED_TYPES;
955 #endif /* PERL_PACK_CAN_BYTEORDER */
963 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
964 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
965 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
967 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
968 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
969 (int) TYPE_NO_MODIFIERS(code),
970 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
971 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
972 TYPE_ENDIANNESS_MASK)
973 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
974 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
976 if (ckWARN(WARN_UNPACK)) {
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Duplicate modifier '%c' after '%c' in %s",
980 *patptr, (int) TYPE_NO_MODIFIERS(code),
981 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
988 /* inherit modifiers */
989 code |= inherited_modifiers;
991 /* look for count and/or / */
992 if (patptr < patend) {
993 if (isDIGIT(*patptr)) {
994 patptr = get_num( patptr, &symptr->length );
995 symptr->howlen = e_number;
997 } else if (*patptr == '*') {
999 symptr->howlen = e_star;
1001 } else if (*patptr == '[') {
1002 char* lenptr = ++patptr;
1003 symptr->howlen = e_number;
1004 patptr = group_end( patptr, patend, ']' ) + 1;
1005 /* what kind of [] is it? */
1006 if (isDIGIT(*lenptr)) {
1007 lenptr = get_num( lenptr, &symptr->length );
1008 if( *lenptr != ']' )
1009 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1010 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1012 tempsym_t savsym = *symptr;
1013 symptr->patend = patptr-1;
1014 symptr->patptr = lenptr;
1015 savsym.length = measure_struct(symptr);
1019 symptr->howlen = e_no_len;
1024 while (patptr < patend) {
1025 if (isSPACE(*patptr))
1027 else if (*patptr == '#') {
1029 while (patptr < patend && *patptr != '\n')
1031 if (patptr < patend)
1034 if (*patptr == '/') {
1035 symptr->flags |= FLAG_SLASH;
1037 if (patptr < patend &&
1038 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1039 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1040 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1046 /* at end - no count, no / */
1047 symptr->howlen = e_no_len;
1051 symptr->code = code;
1052 symptr->patptr = patptr;
1056 symptr->patptr = patptr;
1061 There is no way to cleanly handle the case where we should process the
1062 string per byte in its upgraded form while it's really in downgraded form
1063 (e.g. estimates like strend-s as an upper bound for the number of
1064 characters left wouldn't work). So if we foresee the need of this
1065 (pattern starts with U or contains U0), we want to work on the encoded
1066 version of the string. Users are advised to upgrade their pack string
1067 themselves if they need to do a lot of unpacks like this on it
1070 need_utf8(const char *pat, const char *patend)
1073 while (pat < patend) {
1074 if (pat[0] == '#') {
1076 pat = memchr(pat, '\n', patend-pat);
1077 if (!pat) return FALSE;
1078 } else if (pat[0] == 'U') {
1079 if (first || pat[1] == '0') return TRUE;
1080 } else first = FALSE;
1087 first_symbol(const char *pat, const char *patend) {
1088 while (pat < patend) {
1089 if (pat[0] != '#') return pat[0];
1091 pat = memchr(pat, '\n', patend-pat);
1099 =for apidoc unpack_str
1101 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1102 and ocnt are not used. This call should not be used, use unpackstring instead.
1107 Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
1109 tempsym_t sym = { 0 };
1111 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1112 else if (need_utf8(pat, patend)) {
1113 /* We probably should try to avoid this in case a scalar context call
1114 wouldn't get to the "U0" */
1115 STRLEN len = strend - s;
1116 s = (char *) bytes_to_utf8(s, &len);
1119 flags |= FLAG_DO_UTF8;
1122 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1123 flags |= FLAG_PARSE_UTF8;
1126 sym.patend = patend;
1129 return unpack_rec(&sym, s, s, strend, NULL );
1133 =for apidoc unpackstring
1135 The engine implementing unpack() Perl function. C<unpackstring> puts the
1136 extracted list items on the stack and returns the number of elements.
1137 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1142 Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
1144 tempsym_t sym = { 0 };
1146 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1147 else if (need_utf8(pat, patend)) {
1148 /* We probably should try to avoid this in case a scalar context call
1149 wouldn't get to the "U0" */
1150 STRLEN len = strend - s;
1151 s = (char *) bytes_to_utf8(s, &len);
1154 flags |= FLAG_DO_UTF8;
1157 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1158 flags |= FLAG_PARSE_UTF8;
1161 sym.patend = patend;
1164 return unpack_rec(&sym, s, s, strend, NULL );
1169 S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
1173 I32 start_sp_offset = SP - PL_stack_base;
1179 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1180 char* strrelbeg = s;
1181 bool beyond = FALSE;
1182 bool explicit_length;
1183 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1184 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1186 while (next_symbol(symptr)) {
1189 I32 datumtype = symptr->code;
1190 /* do first one only unless in list context
1191 / is implemented by unpacking the count, then popping it from the
1192 stack, so must check that we're not in the middle of a / */
1193 if ( unpack_only_one
1194 && (SP - PL_stack_base == start_sp_offset + 1)
1195 && (datumtype != '/') ) /* XXX can this be omitted */
1198 switch (howlen = symptr->howlen) {
1200 len = strend - strbeg; /* long enough */
1203 /* e_no_len and e_number */
1204 len = symptr->length;
1208 explicit_length = TRUE;
1210 beyond = s >= strend;
1212 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1214 /* props nonzero means we can process this letter. */
1215 long size = props & PACK_SIZE_MASK;
1216 long howmany = (strend - s) / size;
1220 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1221 if (len && unpack_only_one) len = 1;
1227 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1229 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1232 if (howlen == e_no_len)
1233 len = 16; /* len is not specified */
1241 tempsym_t savsym = *symptr;
1242 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1243 symptr->flags |= group_modifiers;
1244 symptr->patend = savsym.grpend;
1248 symptr->patptr = savsym.grpbeg;
1249 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1250 else symptr->flags &= ~FLAG_PARSE_UTF8;
1251 unpack_rec(symptr, s, strbeg, strend, &s);
1252 if (s == strend && savsym.howlen == e_star)
1253 break; /* No way to continue */
1256 symptr->flags &= ~group_modifiers;
1257 savsym.flags = symptr->flags;
1266 Perl_croak(aTHX_ "'@' outside of string in unpack");
1271 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1273 if (len > strend - strrelbeg)
1274 Perl_croak(aTHX_ "'@' outside of string in unpack");
1275 s = strrelbeg + len;
1278 case 'X' | TYPE_IS_SHRIEKING:
1279 if (!len) /* Avoid division by 0 */
1284 hop = last = strbeg;
1286 hop += UTF8SKIP(hop);
1293 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1297 len = (s - strbeg) % len;
1303 Perl_croak(aTHX_ "'X' outside of string in unpack");
1304 while (--s, UTF8_IS_CONTINUATION(*s)) {
1306 Perl_croak(aTHX_ "'X' outside of string in unpack");
1311 if (len > s - strbeg)
1312 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1316 case 'x' | TYPE_IS_SHRIEKING:
1317 if (!len) /* Avoid division by 0 */
1319 if (utf8) ai32 = utf8_length(strbeg, s) % len;
1320 else ai32 = (s - strbeg) % len;
1321 if (ai32 == 0) break;
1328 Perl_croak(aTHX_ "'x' outside of string in unpack");
1333 if (len > strend - s)
1334 Perl_croak(aTHX_ "'x' outside of string in unpack");
1339 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1345 /* Preliminary length estimate is assumed done in 'W' */
1346 if (len > strend - s) len = strend - s;
1352 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1353 if (hop >= strend) {
1355 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1360 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1362 } else if (len > strend - s)
1365 if (datumtype == 'Z') {
1366 /* 'Z' strips stuff after first null */
1369 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1370 sv = newSVpvn(s, ptr-s);
1371 if (howlen == e_star) /* exact for 'Z*' */
1372 len = ptr-s + (ptr != strend ? 1 : 0);
1373 } else if (datumtype == 'A') {
1374 /* 'A' strips both nulls and spaces */
1376 for (ptr = s+len-1; ptr >= s; ptr--)
1377 if (*ptr != 0 && !isSPACE(*ptr)) break;
1379 sv = newSVpvn(s, ptr-s);
1380 } else sv = newSVpvn(s, len);
1384 /* Undo any upgrade done due to need_utf8() */
1385 if (!(symptr->flags & FLAG_WAS_UTF8))
1386 sv_utf8_downgrade(sv, 0);
1388 XPUSHs(sv_2mortal(sv));
1394 if (howlen == e_star || len > (strend - s) * 8)
1395 len = (strend - s) * 8;
1399 Newz(601, PL_bitcount, 256, char);
1400 for (bits = 1; bits < 256; bits++) {
1401 if (bits & 1) PL_bitcount[bits]++;
1402 if (bits & 2) PL_bitcount[bits]++;
1403 if (bits & 4) PL_bitcount[bits]++;
1404 if (bits & 8) PL_bitcount[bits]++;
1405 if (bits & 16) PL_bitcount[bits]++;
1406 if (bits & 32) PL_bitcount[bits]++;
1407 if (bits & 64) PL_bitcount[bits]++;
1408 if (bits & 128) PL_bitcount[bits]++;
1412 while (len >= 8 && s < strend) {
1413 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1418 cuv += PL_bitcount[*(U8 *)s++];
1421 if (len && s < strend) {
1423 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1424 if (datumtype == 'b')
1426 if (bits & 1) cuv++;
1431 if (bits & 0x80) cuv++;
1438 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1441 if (datumtype == 'b') {
1444 for (len = 0; len < ai32; len++) {
1445 if (len & 7) bits >>= 1;
1447 if (s >= strend) break;
1448 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1449 } else bits = *(U8 *) s++;
1450 *str++ = bits & 1 ? '1' : '0';
1455 for (len = 0; len < ai32; len++) {
1456 if (len & 7) bits <<= 1;
1458 if (s >= strend) break;
1459 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1460 } else bits = *(U8 *) s++;
1461 *str++ = bits & 0x80 ? '1' : '0';
1465 SvCUR_set(sv, str - SvPVX(sv));
1472 /* Preliminary length estimate, acceptable for utf8 too */
1473 if (howlen == e_star || len > (strend - s) * 2)
1474 len = (strend - s) * 2;
1475 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1478 if (datumtype == 'h') {
1481 for (len = 0; len < ai32; len++) {
1482 if (len & 1) bits >>= 4;
1484 if (s >= strend) break;
1485 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1486 } else bits = * (U8 *) s++;
1487 *str++ = PL_hexdigit[bits & 15];
1492 for (len = 0; len < ai32; len++) {
1493 if (len & 1) bits <<= 4;
1495 if (s >= strend) break;
1496 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1497 } else bits = *(U8 *) s++;
1498 *str++ = PL_hexdigit[(bits >> 4) & 15];
1502 SvCUR_set(sv, str - SvPVX(sv));
1508 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1509 if (aint >= 128) /* fake up signed chars */
1512 PUSHs(sv_2mortal(newSViv((IV)aint)));
1513 else if (checksum > bits_in_uv)
1514 cdouble += (NV)aint;
1523 if (explicit_length && datumtype == 'C')
1524 /* Switch to "character" mode */
1525 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1528 if (datumtype == 'C' ?
1529 (symptr->flags & FLAG_DO_UTF8) &&
1530 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1531 while (len-- > 0 && s < strend) {
1534 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1535 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1536 if (retlen == (STRLEN) -1 || retlen == 0)
1537 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1540 PUSHs(sv_2mortal(newSVuv((UV) val)));
1541 else if (checksum > bits_in_uv)
1542 cdouble += (NV) val;
1546 } else if (!checksum)
1548 U8 ch = *(U8 *) s++;
1549 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1551 else if (checksum > bits_in_uv)
1552 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1554 while (len-- > 0) cuv += *(U8 *) s++;
1558 if (explicit_length) {
1559 /* Switch to "bytes in UTF-8" mode */
1560 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1562 /* Should be impossible due to the need_utf8() test */
1563 Perl_croak(aTHX_ "U0 mode on a byte string");
1567 if (len > strend - s) len = strend - s;
1569 if (len && unpack_only_one) len = 1;
1573 while (len-- > 0 && s < strend) {
1577 U8 result[UTF8_MAXLEN];
1581 /* Bug: warns about bad utf8 even if we are short on bytes
1582 and will break out of the loop */
1583 if (!uni_to_bytes(aTHX_ &ptr, strend, result, 1, 'U'))
1585 len = UTF8SKIP(result);
1586 if (!uni_to_bytes(aTHX_ &ptr, strend,
1587 &result[1], len-1, 'U')) break;
1588 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1591 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1592 if (retlen == (STRLEN) -1 || retlen == 0)
1593 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1597 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1598 else if (checksum > bits_in_uv)
1599 cdouble += (NV) auv;
1604 case 's' | TYPE_IS_SHRIEKING:
1605 #if SHORTSIZE != SIZE16
1608 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1609 DO_BO_UNPACK(ashort, s);
1611 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1612 else if (checksum > bits_in_uv)
1613 cdouble += (NV)ashort;
1625 #if U16SIZE > SIZE16
1628 SHIFT16(utf8, s, strend, &ai16, datumtype);
1629 DO_BO_UNPACK(ai16, 16);
1630 #if U16SIZE > SIZE16
1635 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1636 else if (checksum > bits_in_uv)
1637 cdouble += (NV)ai16;
1642 case 'S' | TYPE_IS_SHRIEKING:
1643 #if SHORTSIZE != SIZE16
1645 unsigned short aushort;
1646 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1647 DO_BO_UNPACK(aushort, s);
1649 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)aushort;
1664 #if U16SIZE > SIZE16
1667 SHIFT16(utf8, s, strend, &au16, datumtype);
1668 DO_BO_UNPACK(au16, 16);
1670 if (datumtype == 'n')
1671 au16 = PerlSock_ntohs(au16);
1674 if (datumtype == 'v')
1678 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1679 else if (checksum > bits_in_uv)
1680 cdouble += (NV) au16;
1685 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1686 case 'v' | TYPE_IS_SHRIEKING:
1687 case 'n' | TYPE_IS_SHRIEKING:
1690 # if U16SIZE > SIZE16
1693 SHIFT16(utf8, s, strend, &ai16, datumtype);
1695 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1696 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1697 # endif /* HAS_NTOHS */
1699 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1700 ai16 = (I16) vtohs((U16) ai16);
1701 # endif /* HAS_VTOHS */
1703 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1704 else if (checksum > bits_in_uv)
1705 cdouble += (NV) ai16;
1710 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1712 case 'i' | TYPE_IS_SHRIEKING:
1715 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1716 DO_BO_UNPACK(aint, i);
1718 PUSHs(sv_2mortal(newSViv((IV)aint)));
1719 else if (checksum > bits_in_uv)
1720 cdouble += (NV)aint;
1726 case 'I' | TYPE_IS_SHRIEKING:
1729 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1730 DO_BO_UNPACK(auint, i);
1732 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1733 else if (checksum > bits_in_uv)
1734 cdouble += (NV)auint;
1742 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1743 #if IVSIZE == INTSIZE
1744 DO_BO_UNPACK(aiv, i);
1745 #elif IVSIZE == LONGSIZE
1746 DO_BO_UNPACK(aiv, l);
1747 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1748 DO_BO_UNPACK(aiv, 64);
1750 Perl_croak(aTHX_ "'j' not supported on this platform");
1753 PUSHs(sv_2mortal(newSViv(aiv)));
1754 else if (checksum > bits_in_uv)
1763 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1764 #if IVSIZE == INTSIZE
1765 DO_BO_UNPACK(auv, i);
1766 #elif IVSIZE == LONGSIZE
1767 DO_BO_UNPACK(auv, l);
1768 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1769 DO_BO_UNPACK(auv, 64);
1771 Perl_croak(aTHX_ "'J' not supported on this platform");
1774 PUSHs(sv_2mortal(newSVuv(auv)));
1775 else if (checksum > bits_in_uv)
1781 case 'l' | TYPE_IS_SHRIEKING:
1782 #if LONGSIZE != SIZE32
1785 SHIFT_VAR(utf8, s, strend, along, datumtype);
1786 DO_BO_UNPACK(along, l);
1788 PUSHs(sv_2mortal(newSViv((IV)along)));
1789 else if (checksum > bits_in_uv)
1790 cdouble += (NV)along;
1801 #if U32SIZE > SIZE32
1804 SHIFT32(utf8, s, strend, &ai32, datumtype);
1805 DO_BO_UNPACK(ai32, 32);
1806 #if U32SIZE > SIZE32
1807 if (ai32 > 2147483647) ai32 -= 4294967296;
1810 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV)ai32;
1817 case 'L' | TYPE_IS_SHRIEKING:
1818 #if LONGSIZE != SIZE32
1820 unsigned long aulong;
1821 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1822 DO_BO_UNPACK(aulong, l);
1824 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1825 else if (checksum > bits_in_uv)
1826 cdouble += (NV)aulong;
1839 #if U32SIZE > SIZE32
1842 SHIFT32(utf8, s, strend, &au32, datumtype);
1843 DO_BO_UNPACK(au32, 32);
1845 if (datumtype == 'N')
1846 au32 = PerlSock_ntohl(au32);
1849 if (datumtype == 'V')
1853 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1854 else if (checksum > bits_in_uv)
1855 cdouble += (NV)au32;
1860 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1861 case 'V' | TYPE_IS_SHRIEKING:
1862 case 'N' | TYPE_IS_SHRIEKING:
1865 # if U32SIZE > SIZE32
1868 SHIFT32(utf8, s, strend, &ai32, datumtype);
1870 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1871 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1874 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1875 ai32 = (I32)vtohl((U32)ai32);
1878 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1879 else if (checksum > bits_in_uv)
1880 cdouble += (NV)ai32;
1885 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1889 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1890 DO_BO_UNPACK_P(aptr);
1891 /* newSVpv generates undef if aptr is NULL */
1892 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1900 while (len > 0 && s < strend) {
1902 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1903 auv = (auv << 7) | (ch & 0x7f);
1904 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1907 PUSHs(sv_2mortal(newSVuv(auv)));
1912 if (++bytes >= sizeof(UV)) { /* promote to string */
1916 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1917 while (s < strend) {
1918 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1919 sv = mul128(sv, (U8)(ch & 0x7f));
1929 PUSHs(sv_2mortal(sv));
1934 if ((s >= strend) && bytes)
1935 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1939 if (symptr->howlen == e_star)
1940 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1942 if (sizeof(char*) <= strend - s) {
1944 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1945 DO_BO_UNPACK_P(aptr);
1946 /* newSVpvn generates undef if aptr is NULL */
1947 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
1954 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1955 DO_BO_UNPACK(aquad, 64);
1957 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
1958 newSViv((IV)aquad) : newSVnv((NV)aquad)));
1959 else if (checksum > bits_in_uv)
1960 cdouble += (NV)aquad;
1968 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1969 DO_BO_UNPACK(auquad, 64);
1971 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1972 newSVuv((UV)auquad):newSVnv((NV)auquad)));
1973 else if (checksum > bits_in_uv)
1974 cdouble += (NV)auquad;
1979 #endif /* HAS_QUAD */
1980 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1984 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1985 DO_BO_UNPACK_N(afloat, float);
1987 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
1995 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1996 DO_BO_UNPACK_N(adouble, double);
1998 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2006 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2007 DO_BO_UNPACK_N(anv, NV);
2009 PUSHs(sv_2mortal(newSVnv(anv)));
2014 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2017 long double aldouble;
2018 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2019 DO_BO_UNPACK_N(aldouble, long double);
2021 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2023 cdouble += aldouble;
2029 * Initialise the decode mapping. By using a table driven
2030 * algorithm, the code will be character-set independent
2031 * (and just as fast as doing character arithmetic)
2033 if (PL_uudmap['M'] == 0) {
2036 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2037 PL_uudmap[(U8)PL_uuemap[i]] = i;
2039 * Because ' ' and '`' map to the same value,
2040 * we need to decode them both the same.
2045 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2046 sv = sv_2mortal(NEWSV(42, l));
2047 if (l) SvPOK_on(sv);
2050 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2056 next_uni_uu(aTHX_ &s, strend, &a);
2057 next_uni_uu(aTHX_ &s, strend, &b);
2058 next_uni_uu(aTHX_ &s, strend, &c);
2059 next_uni_uu(aTHX_ &s, strend, &d);
2060 hunk[0] = (char)((a << 2) | (b >> 4));
2061 hunk[1] = (char)((b << 4) | (c >> 2));
2062 hunk[2] = (char)((c << 6) | d);
2063 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2067 if (*s == '\n') s++;
2069 /* possible checksum byte */
2070 char *skip = s+UTF8SKIP(s);
2071 if (skip < strend && *skip == '\n') s = skip+1;
2076 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2081 len = PL_uudmap[*(U8*)s++] & 077;
2083 if (s < strend && ISUUCHAR(*s))
2084 a = PL_uudmap[*(U8*)s++] & 077;
2087 if (s < strend && ISUUCHAR(*s))
2088 b = PL_uudmap[*(U8*)s++] & 077;
2091 if (s < strend && ISUUCHAR(*s))
2092 c = PL_uudmap[*(U8*)s++] & 077;
2095 if (s < strend && ISUUCHAR(*s))
2096 d = PL_uudmap[*(U8*)s++] & 077;
2099 hunk[0] = (char)((a << 2) | (b >> 4));
2100 hunk[1] = (char)((b << 4) | (c >> 2));
2101 hunk[2] = (char)((c << 6) | d);
2102 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2107 else /* possible checksum byte */
2108 if (s + 1 < strend && s[1] == '\n')
2117 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2118 (checksum > bits_in_uv &&
2119 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2122 anv = (NV) (1 << (checksum & 15));
2123 while (checksum >= 16) {
2127 while (cdouble < 0.0)
2129 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2130 sv = newSVnv(cdouble);
2133 if (checksum < bits_in_uv) {
2134 UV mask = ((UV)1 << checksum) - 1;
2139 XPUSHs(sv_2mortal(sv));
2143 if (symptr->flags & FLAG_SLASH){
2144 if (SP - PL_stack_base - start_sp_offset <= 0)
2145 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2146 if( next_symbol(symptr) ){
2147 if( symptr->howlen == e_number )
2148 Perl_croak(aTHX_ "Count after length/code in unpack" );
2150 /* ...end of char buffer then no decent length available */
2151 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2153 /* take top of stack (hope it's numeric) */
2156 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2159 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2161 datumtype = symptr->code;
2162 explicit_length = FALSE;
2170 return SP - PL_stack_base - start_sp_offset;
2177 I32 gimme = GIMME_V;
2180 char *pat = SvPV(left, llen);
2181 char *s = SvPV(right, rlen);
2182 char *strend = s + rlen;
2183 char *patend = pat + llen;
2187 cnt = unpackstring(pat, patend, s, strend,
2188 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2189 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2192 if ( !cnt && gimme == G_SCALAR )
2193 PUSHs(&PL_sv_undef);
2198 doencodes(U8 *h, char *s, I32 len)
2200 *h++ = PL_uuemap[len];
2202 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2203 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2204 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2205 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2210 char r = (len > 1 ? s[1] : '\0');
2211 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2212 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2213 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2214 *h++ = PL_uuemap[0];
2221 S_is_an_int(pTHX_ char *s, STRLEN l)
2224 SV *result = newSVpvn(s, l);
2225 char *result_c = SvPV(result, n_a); /* convenience */
2226 char *out = result_c;
2236 SvREFCNT_dec(result);
2259 SvREFCNT_dec(result);
2265 SvCUR_set(result, out - result_c);
2269 /* pnum must be '\0' terminated */
2271 S_div128(pTHX_ SV *pnum, bool *done)
2274 char *s = SvPV(pnum, len);
2283 i = m * 10 + (*t - '0');
2285 r = (i >> 7); /* r < 10 */
2292 SvCUR_set(pnum, (STRLEN) (t - s));
2299 =for apidoc pack_cat
2301 The engine implementing pack() Perl function. Note: parameters next_in_list and
2302 flags are not used. This call should not be used; use packlist instead.
2308 Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2310 tempsym_t sym = { 0 };
2312 sym.patend = patend;
2313 sym.flags = FLAG_PACK;
2315 (void)pack_rec( cat, &sym, beglist, endlist );
2320 =for apidoc packlist
2322 The engine implementing pack() Perl function.
2328 Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2331 tempsym_t sym = { 0 };
2334 sym.patend = patend;
2335 sym.flags = FLAG_PACK;
2337 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2338 Also make sure any UTF8 flag is loaded */
2339 SvPV_force(cat, no_len);
2340 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2342 (void)pack_rec( cat, &sym, beglist, endlist );
2345 /* like sv_utf8_upgrade, but also repoint the group start markers */
2347 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2350 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2352 if (SvUTF8(sv)) return;
2354 from_start = SvPVX(sv);
2355 from_end = from_start + SvCUR(sv);
2356 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2357 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2358 if (from_ptr == from_end) {
2359 /* Simple case: no character needs to be changed */
2364 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2365 New('U', to_start, len, char);
2366 Copy(from_start, to_start, from_ptr-from_start, char);
2367 to_ptr = to_start + (from_ptr-from_start);
2369 New('U', marks, sym_ptr->level+2, char *);
2370 for (group=sym_ptr; group; group = group->previous)
2371 marks[group->level] = from_start + group->strbeg;
2372 marks[sym_ptr->level+1] = from_end+1;
2373 for (m = marks; *m < from_ptr; m++)
2374 *m = to_start + (*m-from_start);
2376 for (;from_ptr < from_end; from_ptr++) {
2377 while (*m == from_ptr) *m++ = to_ptr;
2378 to_ptr = uvchr_to_utf8(to_ptr, *(U8 *) from_ptr);
2382 while (*m == from_ptr) *m++ = to_ptr;
2383 if (m != marks + sym_ptr->level+1) {
2386 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2388 for (group=sym_ptr; group; group = group->previous)
2389 group->strbeg = marks[group->level] - to_start;
2394 SvLEN(sv) += SvIVX(sv);
2395 from_start -= SvIVX(sv);
2398 SvFLAGS(sv) &= ~SVf_OOK;
2401 Safefree(from_start);
2402 SvPVX(sv) = to_start;
2403 SvCUR(sv) = to_ptr - to_start;
2408 /* Exponential string grower. Makes string extension effectively O(n)
2409 needed says how many extra bytes we need (not counting the final '\0')
2410 Only grows the string if there is an actual lack of space
2413 sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2414 STRLEN cur = SvCUR(sv);
2415 STRLEN len = SvLEN(sv);
2417 if (len - cur > needed) return SvPVX(sv);
2418 extend = needed > len ? needed : len;
2419 return SvGROW(sv, len+extend+1);
2424 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2426 tempsym_t lookahead;
2427 I32 items = endlist - beglist;
2428 bool found = next_symbol(symptr);
2429 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2431 if (symptr->level == 0 && found && symptr->code == 'U') {
2432 marked_upgrade(aTHX_ cat, symptr);
2433 symptr->flags |= FLAG_DO_UTF8;
2436 symptr->strbeg = SvCUR(cat);
2442 SV *lengthcode = Nullsv;
2443 I32 datumtype = symptr->code;
2444 howlen_t howlen = symptr->howlen;
2445 char *start = SvPVX(cat);
2446 char *cur = start + SvCUR(cat);
2448 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2452 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2456 /* e_no_len and e_number */
2457 len = symptr->length;
2462 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2464 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2465 /* We can process this letter. */
2466 STRLEN size = props & PACK_SIZE_MASK;
2467 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2471 /* Look ahead for next symbol. Do we have code/code? */
2472 lookahead = *symptr;
2473 found = next_symbol(&lookahead);
2474 if ( symptr->flags & FLAG_SLASH ) {
2475 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2476 if ( 0 == strchr( "aAZ", lookahead.code ) ||
2477 e_star != lookahead.howlen )
2478 Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
2480 sv_2mortal(newSViv((items > 0 ? DO_UTF8(*beglist) ? sv_len_utf8(*beglist) : sv_len(*beglist) : 0) + (lookahead.code == 'Z' ? 1 : 0)));
2483 /* Code inside the switch must take care to properly update
2484 cat (CUR length and '\0' termination) if it updated *cur and
2485 doesn't simply leave using break */
2486 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2488 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2489 (int) TYPE_NO_MODIFIERS(datumtype));
2491 Perl_croak(aTHX_ "'%%' may not be used in pack");
2494 char *s = start + symptr->strbeg;
2495 while (len > 0 && s < cur) {
2500 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2503 GROWING(0, cat, start, cur, len);
2504 Zero(cur, len, char);
2506 } else if (s < cur) cur = s;
2507 else goto no_change;
2509 len -= cur - (start+symptr->strbeg);
2510 if (len > 0) goto grow;
2512 if (len > 0) goto shrink;
2513 else goto no_change;
2517 tempsym_t savsym = *symptr;
2518 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2519 symptr->flags |= group_modifiers;
2520 symptr->patend = savsym.grpend;
2522 symptr->previous = &lookahead;
2525 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2526 else symptr->flags &= ~FLAG_PARSE_UTF8;
2527 was_utf8 = SvUTF8(cat);
2528 symptr->patptr = savsym.grpbeg;
2529 beglist = pack_rec(cat, symptr, beglist, endlist);
2530 if (SvUTF8(cat) != was_utf8)
2531 /* This had better be an upgrade while in utf8==0 mode */
2534 if (savsym.howlen == e_star && beglist == endlist)
2535 break; /* No way to continue */
2537 lookahead.flags = symptr->flags & ~group_modifiers;
2540 case 'X' | TYPE_IS_SHRIEKING:
2541 if (!len) /* Avoid division by 0 */
2548 hop += UTF8SKIP(hop);
2555 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2559 len = (cur-start) % len;
2563 if (len < 1) goto no_change;
2566 Perl_croak(aTHX_ "'X' outside of string in pack");
2567 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2569 Perl_croak(aTHX_ "'X' outside of string in pack");
2575 if (cur - start < len)
2576 Perl_croak(aTHX_ "'X' outside of string in pack");
2579 if (cur < start+symptr->strbeg) {
2580 /* Make sure group starts don't point into the void */
2582 STRLEN length = cur-start;
2583 for (group = symptr;
2584 group && length < group->strbeg;
2585 group = group->previous) group->strbeg = length;
2586 lookahead.strbeg = length;
2589 case 'x' | TYPE_IS_SHRIEKING: {
2591 if (!len) /* Avoid division by 0 */
2593 if (utf8) ai32 = utf8_length(start, cur) % len;
2594 else ai32 = (cur - start) % len;
2595 if (ai32 == 0) goto no_change;
2607 aptr = SvPV(fromstr, fromlen);
2608 if (DO_UTF8(fromstr)) {
2611 if (!utf8 && !SvUTF8(cat)) {
2612 marked_upgrade(aTHX_ cat, symptr);
2613 lookahead.flags |= FLAG_DO_UTF8;
2614 lookahead.strbeg = symptr->strbeg;
2617 cur = start + SvCUR(cat);
2619 if (howlen == e_star) {
2620 if (utf8) goto string_copy;
2624 end = aptr + fromlen;
2625 fromlen = datumtype == 'Z' ? len-1 : len;
2626 while ((I32) fromlen > 0 && s < end) {
2631 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2634 if (datumtype == 'Z') len++;
2640 fromlen = len - fromlen;
2641 if (datumtype == 'Z') fromlen--;
2642 if (howlen == e_star) {
2644 if (datumtype == 'Z') len++;
2646 GROWING(0, cat, start, cur, len);
2647 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2648 datumtype | TYPE_IS_PACK))
2649 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2653 if (howlen == e_star) {
2655 if (datumtype == 'Z') len++;
2657 if (len <= (I32) fromlen) {
2659 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2661 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2663 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2664 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2666 while (fromlen > 0) {
2667 cur = uvchr_to_utf8(cur, * (U8 *) aptr);
2673 if (howlen == e_star) {
2675 if (datumtype == 'Z') len++;
2677 if (len <= (I32) fromlen) {
2679 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2681 GROWING(0, cat, start, cur, len);
2682 Copy(aptr, cur, fromlen, char);
2686 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2699 str = SvPV(fromstr, fromlen);
2700 end = str + fromlen;
2701 if (DO_UTF8(fromstr)) {
2703 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2705 utf8_source = FALSE;
2706 utf8_flags = 0; /* Unused, but keep compilers happy */
2708 if (howlen == e_star) len = fromlen;
2709 field_len = (len+7)/8;
2710 GROWING(utf8, cat, start, cur, field_len);
2711 if (len > (I32)fromlen) len = fromlen;
2714 if (datumtype == 'B')
2718 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2720 } else bits |= *str++ & 1;
2721 if (l & 7) bits <<= 1;
2723 PUSH_BYTE(utf8, cur, bits);
2728 /* datumtype == 'b' */
2732 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2733 if (val & 1) bits |= 0x80;
2734 } else if (*str++ & 1)
2736 if (l & 7) bits >>= 1;
2738 PUSH_BYTE(utf8, cur, bits);
2744 if (datumtype == 'B')
2745 bits <<= 7 - (l & 7);
2747 bits >>= 7 - (l & 7);
2748 PUSH_BYTE(utf8, cur, bits);
2751 /* Determine how many chars are left in the requested field */
2753 if (howlen == e_star) field_len = 0;
2754 else field_len -= l;
2755 Zero(cur, field_len, char);
2768 str = SvPV(fromstr, fromlen);
2769 end = str + fromlen;
2770 if (DO_UTF8(fromstr)) {
2772 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2774 utf8_source = FALSE;
2775 utf8_flags = 0; /* Unused, but keep compilers happy */
2777 if (howlen == e_star) len = fromlen;
2778 field_len = (len+1)/2;
2779 GROWING(utf8, cat, start, cur, field_len);
2780 if (!utf8 && len > (I32)fromlen) len = fromlen;
2783 if (datumtype == 'H')
2787 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2788 if (val < 256 && isALPHA(val))
2789 bits |= (val + 9) & 0xf;
2792 } else if (isALPHA(*str))
2793 bits |= (*str++ + 9) & 0xf;
2795 bits |= *str++ & 0xf;
2796 if (l & 1) bits <<= 4;
2798 PUSH_BYTE(utf8, cur, bits);
2806 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2807 if (val < 256 && isALPHA(val))
2808 bits |= ((val + 9) & 0xf) << 4;
2810 bits |= (val & 0xf) << 4;
2811 } else if (isALPHA(*str))
2812 bits |= ((*str++ + 9) & 0xf) << 4;
2814 bits |= (*str++ & 0xf) << 4;
2815 if (l & 1) bits >>= 4;
2817 PUSH_BYTE(utf8, cur, bits);
2823 PUSH_BYTE(utf8, cur, bits);
2826 /* Determine how many chars are left in the requested field */
2828 if (howlen == e_star) field_len = 0;
2829 else field_len -= l;
2830 Zero(cur, field_len, char);
2838 aiv = SvIV(fromstr);
2839 if ((-128 > aiv || aiv > 127) &&
2841 Perl_warner(aTHX_ packWARN(WARN_PACK),
2842 "Character in 'c' format wrapped in pack");
2843 PUSH_BYTE(utf8, cur, aiv & 0xff);
2848 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2851 GROWING(0, cat, start, cur, len);
2855 aiv = SvIV(fromstr);
2856 if ((0 > aiv || aiv > 0xff) &&
2858 Perl_warner(aTHX_ packWARN(WARN_PACK),
2859 "Character in 'C' format wrapped in pack");
2860 *cur++ = aiv & 0xff;
2865 U8 in_bytes = IN_BYTES;
2867 end = start+SvLEN(cat)-1;
2868 if (utf8) end -= UTF8_MAXLEN-1;
2872 auv = SvUV(fromstr);
2873 if (in_bytes) auv = auv % 0x100;
2878 SvCUR(cat) = cur - start;
2880 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2881 end = start+SvLEN(cat)-UTF8_MAXLEN;
2883 cur = uvuni_to_utf8_flags(cur, NATIVE_TO_UNI(auv),
2885 0 : UNICODE_ALLOW_ANY);
2890 SvCUR(cat) = cur - start;
2891 marked_upgrade(aTHX_ cat, symptr);
2892 lookahead.flags |= FLAG_DO_UTF8;
2893 lookahead.strbeg = symptr->strbeg;
2896 cur = start + SvCUR(cat);
2897 end = start+SvLEN(cat)-UTF8_MAXLEN;
2900 if (ckWARN(WARN_PACK))
2901 Perl_warner(aTHX_ packWARN(WARN_PACK),
2902 "Character in 'W' format wrapped in pack");
2907 SvCUR(cat) = cur - start;
2908 GROWING(0, cat, start, cur, len+1);
2909 end = start+SvLEN(cat)-1;
2911 *(U8 *) cur++ = auv;
2920 if (!(symptr->flags & FLAG_DO_UTF8)) {
2921 marked_upgrade(aTHX_ cat, symptr);
2922 lookahead.flags |= FLAG_DO_UTF8;
2923 lookahead.strbeg = symptr->strbeg;
2929 end = start+SvLEN(cat);
2930 if (!utf8) end -= UTF8_MAXLEN;
2934 auv = SvUV(fromstr);
2936 char buffer[UTF8_MAXLEN], *endb;
2937 endb = uvuni_to_utf8_flags(buffer, auv,
2939 0 : UNICODE_ALLOW_ANY);
2940 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2942 SvCUR(cat) = cur - start;
2943 GROWING(0, cat, start, cur,
2944 len+(endb-buffer)*UTF8_EXPAND);
2945 end = start+SvLEN(cat);
2947 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
2951 SvCUR(cat) = cur - start;
2952 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2953 end = start+SvLEN(cat)-UTF8_MAXLEN;
2955 cur = uvuni_to_utf8_flags(cur, auv,
2957 0 : UNICODE_ALLOW_ANY);
2962 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2968 anv = SvNV(fromstr);
2970 /* VOS does not automatically map a floating-point overflow
2971 during conversion from double to float into infinity, so we
2972 do it by hand. This code should either be generalized for
2973 any OS that needs it, or removed if and when VOS implements
2974 posix-976 (suggestion to support mapping to infinity).
2975 Paul.Green@stratus.com 02-04-02. */
2977 afloat = _float_constants[0]; /* single prec. inf. */
2978 else if (anv < -FLT_MAX)
2979 afloat = _float_constants[0]; /* single prec. inf. */
2980 else afloat = (float) anv;
2982 # if defined(VMS) && !defined(__IEEE_FP)
2983 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2984 * on Alpha; fake it if we don't have them.
2988 else if (anv < -FLT_MAX)
2990 else afloat = (float)anv;
2992 afloat = (float)anv;
2994 #endif /* __VOS__ */
2995 DO_BO_PACK_N(afloat, float);
2996 PUSH_VAR(utf8, cur, afloat);
3004 anv = SvNV(fromstr);
3006 /* VOS does not automatically map a floating-point overflow
3007 during conversion from long double to double into infinity,
3008 so we do it by hand. This code should either be generalized
3009 for any OS that needs it, or removed if and when VOS
3010 implements posix-976 (suggestion to support mapping to
3011 infinity). Paul.Green@stratus.com 02-04-02. */
3013 adouble = _double_constants[0]; /* double prec. inf. */
3014 else if (anv < -DBL_MAX)
3015 adouble = _double_constants[0]; /* double prec. inf. */
3016 else adouble = (double) anv;
3018 # if defined(VMS) && !defined(__IEEE_FP)
3019 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3020 * on Alpha; fake it if we don't have them.
3024 else if (anv < -DBL_MAX)
3026 else adouble = (double)anv;
3028 adouble = (double)anv;
3030 #endif /* __VOS__ */
3031 DO_BO_PACK_N(adouble, double);
3032 PUSH_VAR(utf8, cur, adouble);
3037 Zero(&anv, 1, NV); /* can be long double with unused bits */
3040 anv = SvNV(fromstr);
3041 DO_BO_PACK_N(anv, NV);
3042 PUSH_VAR(utf8, cur, anv);
3046 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3048 long double aldouble;
3049 /* long doubles can have unused bits, which may be nonzero */
3050 Zero(&aldouble, 1, long double);
3053 aldouble = (long double)SvNV(fromstr);
3054 DO_BO_PACK_N(aldouble, long double);
3055 PUSH_VAR(utf8, cur, aldouble);
3060 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3061 case 'n' | TYPE_IS_SHRIEKING:
3067 ai16 = (I16)SvIV(fromstr);
3069 ai16 = PerlSock_htons(ai16);
3071 PUSH16(utf8, cur, &ai16);
3074 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3075 case 'v' | TYPE_IS_SHRIEKING:
3081 ai16 = (I16)SvIV(fromstr);
3085 PUSH16(utf8, cur, &ai16);
3088 case 'S' | TYPE_IS_SHRIEKING:
3089 #if SHORTSIZE != SIZE16
3091 unsigned short aushort;
3093 aushort = SvUV(fromstr);
3094 DO_BO_PACK(aushort, s);
3095 PUSH_VAR(utf8, cur, aushort);
3105 au16 = (U16)SvUV(fromstr);
3106 DO_BO_PACK(au16, 16);
3107 PUSH16(utf8, cur, &au16);
3110 case 's' | TYPE_IS_SHRIEKING:
3111 #if SHORTSIZE != SIZE16
3115 ashort = SvIV(fromstr);
3116 DO_BO_PACK(ashort, s);
3117 PUSH_VAR(utf8, cur, ashort);
3127 ai16 = (I16)SvIV(fromstr);
3128 DO_BO_PACK(ai16, 16);
3129 PUSH16(utf8, cur, &ai16);
3133 case 'I' | TYPE_IS_SHRIEKING:
3137 auint = SvUV(fromstr);
3138 DO_BO_PACK(auint, i);
3139 PUSH_VAR(utf8, cur, auint);
3146 aiv = SvIV(fromstr);
3147 #if IVSIZE == INTSIZE
3149 #elif IVSIZE == LONGSIZE
3151 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3152 DO_BO_PACK(aiv, 64);
3154 Perl_croak(aTHX_ "'j' not supported on this platform");
3156 PUSH_VAR(utf8, cur, aiv);
3163 auv = SvUV(fromstr);
3164 #if UVSIZE == INTSIZE
3166 #elif UVSIZE == LONGSIZE
3168 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3169 DO_BO_PACK(auv, 64);
3171 Perl_croak(aTHX_ "'J' not supported on this platform");
3173 PUSH_VAR(utf8, cur, auv);
3180 anv = SvNV(fromstr);
3184 SvCUR(cat) = cur - start;
3185 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3188 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3189 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3190 any negative IVs will have already been got by the croak()
3191 above. IOK is untrue for fractions, so we test them
3192 against UV_MAX_P1. */
3193 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3194 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3195 char *in = buf + sizeof(buf);
3196 UV auv = SvUV(fromstr);
3199 *--in = (char)((auv & 0x7f) | 0x80);
3202 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3203 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3204 in, (buf + sizeof(buf)) - in);
3205 } else if (SvPOKp(fromstr))
3207 else if (SvNOKp(fromstr)) {
3208 /* 10**NV_MAX_10_EXP is the largest power of 10
3209 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3210 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3211 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3212 And with that many bytes only Inf can overflow.
3213 Some C compilers are strict about integral constant
3214 expressions so we conservatively divide by a slightly
3215 smaller integer instead of multiplying by the exact
3216 floating-point value.
3218 #ifdef NV_MAX_10_EXP
3219 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3220 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3222 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3223 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3225 char *in = buf + sizeof(buf);
3227 anv = Perl_floor(anv);
3229 NV next = Perl_floor(anv / 128);
3230 if (in <= buf) /* this cannot happen ;-) */
3231 Perl_croak(aTHX_ "Cannot compress integer in pack");
3232 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3235 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3236 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3237 in, (buf + sizeof(buf)) - in);
3239 char *from, *result, *in;
3245 /* Copy string and check for compliance */
3246 from = SvPV(fromstr, len);
3247 if ((norm = is_an_int(from, len)) == NULL)
3248 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3250 New('w', result, len, char);
3253 while (!done) *--in = div128(norm, &done) | 0x80;
3254 result[len - 1] &= 0x7F; /* clear continue bit */
3255 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3256 in, (result + len) - in);
3258 SvREFCNT_dec(norm); /* free norm */
3263 case 'i' | TYPE_IS_SHRIEKING:
3267 aint = SvIV(fromstr);
3268 DO_BO_PACK(aint, i);
3269 PUSH_VAR(utf8, cur, aint);
3272 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3273 case 'N' | TYPE_IS_SHRIEKING:
3279 au32 = SvUV(fromstr);
3281 au32 = PerlSock_htonl(au32);
3283 PUSH32(utf8, cur, &au32);
3286 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3287 case 'V' | TYPE_IS_SHRIEKING:
3293 au32 = SvUV(fromstr);
3297 PUSH32(utf8, cur, &au32);
3300 case 'L' | TYPE_IS_SHRIEKING:
3301 #if LONGSIZE != SIZE32
3303 unsigned long aulong;
3305 aulong = SvUV(fromstr);
3306 DO_BO_PACK(aulong, l);
3307 PUSH_VAR(utf8, cur, aulong);
3317 au32 = SvUV(fromstr);
3318 DO_BO_PACK(au32, 32);
3319 PUSH32(utf8, cur, &au32);
3322 case 'l' | TYPE_IS_SHRIEKING:
3323 #if LONGSIZE != SIZE32
3327 along = SvIV(fromstr);
3328 DO_BO_PACK(along, l);
3329 PUSH_VAR(utf8, cur, along);
3339 ai32 = SvIV(fromstr);
3340 DO_BO_PACK(ai32, 32);
3341 PUSH32(utf8, cur, &ai32);
3349 auquad = (Uquad_t) SvUV(fromstr);
3350 DO_BO_PACK(auquad, 64);
3351 PUSH_VAR(utf8, cur, auquad);
3358 aquad = (Quad_t)SvIV(fromstr);
3359 DO_BO_PACK(aquad, 64);
3360 PUSH_VAR(utf8, cur, aquad);
3363 #endif /* HAS_QUAD */
3365 len = 1; /* assume SV is correct length */
3366 GROWING(utf8, cat, start, cur, sizeof(char *));
3373 SvGETMAGIC(fromstr);
3374 if (!SvOK(fromstr)) aptr = NULL;
3377 /* XXX better yet, could spirit away the string to
3378 * a safe spot and hang on to it until the result
3379 * of pack() (and all copies of the result) are
3382 if (ckWARN(WARN_PACK) &&
3383 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3384 !SvREADONLY(fromstr)))) {
3385 Perl_warner(aTHX_ packWARN(WARN_PACK),
3386 "Attempt to pack pointer to temporary value");
3388 if (SvPOK(fromstr) || SvNIOK(fromstr))
3389 aptr = SvPV_flags(fromstr, n_a, 0);
3391 aptr = SvPV_force_flags(fromstr, n_a, 0);
3394 PUSH_VAR(utf8, cur, aptr);
3402 if (len <= 2) len = 45;
3403 else len = len / 3 * 3;
3405 Perl_warner(aTHX_ packWARN(WARN_PACK),
3406 "Field too wide in 'u' format in pack");
3409 aptr = SvPV(fromstr, fromlen);
3410 from_utf8 = DO_UTF8(fromstr);
3412 aend = aptr + fromlen;
3413 fromlen = sv_len_utf8(fromstr);
3414 } else aend = NULL; /* Unused, but keep compilers happy */
3415 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3416 while (fromlen > 0) {
3419 U8 hunk[1+63/3*4+1];
3421 if ((I32)fromlen > len)
3427 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3428 'u' | TYPE_IS_PACK)) {
3430 SvCUR(cat) = cur - start;
3431 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3433 end = doencodes(hunk, buffer, todo);
3435 end = doencodes(hunk, aptr, todo);
3438 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3445 SvCUR(cat) = cur - start;
3447 *symptr = lookahead;
3456 dSP; dMARK; dORIGMARK; dTARGET;
3457 register SV *cat = TARG;
3459 register char *pat = SvPVx(*++MARK, fromlen);
3460 register char *patend = pat + fromlen;
3463 sv_setpvn(cat, "", 0);
3466 packlist(cat, pat, patend, MARK, SP + 1);
3476 * c-indentation-style: bsd
3478 * indent-tabs-mode: t
3481 * vim: shiftwidth=4: