3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
35 # define PERL_PACK_CAN_BYTEORDER
36 # define PERL_PACK_CAN_SHRIEKSIGN
42 /* Maximum number of bytes to which a byte can grow due to upgrade */
46 * Offset for integer pack/unpack.
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
66 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
69 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
70 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
71 # define OFF16(p) ((char*)(p))
72 # define OFF32(p) ((char*)(p))
74 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
75 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
78 ++++ bad cray byte order
82 # define OFF16(p) ((char *) (p))
83 # define OFF32(p) ((char *) (p))
86 /* Only to be used inside a loop (see the break) */
87 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
91 Copy(s, OFF16(p), SIZE16, char); \
96 /* Only to be used inside a loop (see the break) */
97 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
101 Copy(s, OFF32(p), SIZE32, char); \
106 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
109 /* Only to be used inside a loop (see the break) */
110 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
116 Copy(s, (char *) &var, sizeof(var), char); \
121 #define PUSH_VAR(utf8, aptr, var) \
122 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
124 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125 #define MAX_SUB_TEMPLATE_LEVEL 100
127 /* flags (note that type modifiers can also be used as flags!) */
128 #define FLAG_WAS_UTF8 0x40
129 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
130 #define FLAG_UNPACK_ONLY_ONE 0x10
131 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
132 #define FLAG_SLASH 0x04
133 #define FLAG_COMMA 0x02
134 #define FLAG_PACK 0x01
137 S_mul128(pTHX_ SV *sv, U8 m)
140 char *s = SvPV(sv, len);
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
153 while (!*t) /* trailing '\0'? */
156 i = ((*t - '0') << 7) + m;
157 *(t--) = '0' + (char)(i % 10);
163 /* Explosives and implosives. */
165 #if 'I' == 73 && 'J' == 74
166 /* On an ASCII/ISO kind of system */
167 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
170 Some other sort of character set - use memchr() so we don't match
173 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
177 #define TYPE_IS_SHRIEKING 0x100
178 #define TYPE_IS_BIG_ENDIAN 0x200
179 #define TYPE_IS_LITTLE_ENDIAN 0x400
180 #define TYPE_IS_PACK 0x800
181 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
182 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
183 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
185 #ifdef PERL_PACK_CAN_SHRIEKSIGN
186 #define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
188 #define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
191 #ifndef PERL_PACK_CAN_BYTEORDER
192 /* Put "can't" first because it is shorter */
193 # define TYPE_ENDIANNESS(t) 0
194 # define TYPE_NO_ENDIANNESS(t) (t)
196 # define ENDIANNESS_ALLOWED_TYPES ""
198 # define DO_BO_UNPACK(var, type)
199 # define DO_BO_PACK(var, type)
200 # define DO_BO_UNPACK_PTR(var, type, pre_cast)
201 # define DO_BO_PACK_PTR(var, type, pre_cast)
202 # define DO_BO_UNPACK_N(var, type)
203 # define DO_BO_PACK_N(var, type)
204 # define DO_BO_UNPACK_P(var)
205 # define DO_BO_PACK_P(var)
207 #else /* PERL_PACK_CAN_BYTEORDER */
209 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
212 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
214 # define DO_BO_UNPACK(var, type) \
216 switch (TYPE_ENDIANNESS(datumtype)) { \
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
223 # define DO_BO_PACK(var, type) \
225 switch (TYPE_ENDIANNESS(datumtype)) { \
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
232 # define DO_BO_UNPACK_PTR(var, type, pre_cast) \
234 switch (TYPE_ENDIANNESS(datumtype)) { \
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (void *) my_betoh ## type ((pre_cast) var); \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (void *) my_letoh ## type ((pre_cast) var); \
246 # define DO_BO_PACK_PTR(var, type, pre_cast) \
248 switch (TYPE_ENDIANNESS(datumtype)) { \
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (void *) my_htobe ## type ((pre_cast) var); \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (void *) my_htole ## type ((pre_cast) var); \
260 # define BO_CANT_DOIT(action, type) \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
276 # if PTRSIZE == INTSIZE
277 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
278 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
279 # elif PTRSIZE == LONGSIZE
280 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
281 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
283 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
284 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
287 # if defined(my_htolen) && defined(my_letohn) && \
288 defined(my_htoben) && defined(my_betohn)
289 # define DO_BO_UNPACK_N(var, type) \
291 switch (TYPE_ENDIANNESS(datumtype)) { \
292 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
293 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
298 # define DO_BO_PACK_N(var, type) \
300 switch (TYPE_ENDIANNESS(datumtype)) { \
301 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
302 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
307 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
308 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
311 #endif /* PERL_PACK_CAN_BYTEORDER */
313 #define PACK_SIZE_CANNOT_CSUM 0x80
314 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
315 #define PACK_SIZE_MASK 0x3F
317 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
318 in). You're unlikely ever to need to regenerate them. */
320 #if TYPE_IS_SHRIEKING != 0x100
321 ++++shriek offset should be 256
324 typedef U8 packprops_t;
327 const packprops_t packprops[512] = {
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
335 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
336 /* D */ LONG_DOUBLESIZE,
343 /* I */ sizeof(unsigned int),
350 #if defined(HAS_QUAD)
351 /* Q */ sizeof(Uquad_t),
358 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
360 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 /* c */ sizeof(char),
363 /* d */ sizeof(double),
365 /* f */ sizeof(float),
374 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
375 #if defined(HAS_QUAD)
376 /* q */ sizeof(Quad_t),
384 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0,
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
399 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 /* I */ sizeof(unsigned int),
402 /* L */ sizeof(unsigned long),
404 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
410 /* S */ sizeof(unsigned short),
412 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
417 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
421 /* l */ sizeof(long),
423 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
429 /* s */ sizeof(short),
431 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0
447 /* EBCDIC (or bust) */
448 const packprops_t packprops[512] = {
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 /* c */ sizeof(char),
460 /* d */ sizeof(double),
462 /* f */ sizeof(float),
472 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
473 #if defined(HAS_QUAD)
474 /* q */ sizeof(Quad_t),
478 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
486 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
487 /* D */ LONG_DOUBLESIZE,
494 /* I */ sizeof(unsigned int),
502 #if defined(HAS_QUAD)
503 /* Q */ sizeof(Uquad_t),
507 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
512 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 /* l */ sizeof(long),
529 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* s */ sizeof(short),
537 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 /* I */ sizeof(unsigned int),
546 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 /* L */ sizeof(unsigned long),
549 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 /* S */ sizeof(unsigned short),
557 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
568 uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
572 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
573 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
574 /* We try to process malformed UTF-8 as much as possible (preferrably with
575 warnings), but these two mean we make no progress in the string and
576 might enter an infinite loop */
577 if (retlen == (STRLEN) -1 || retlen == 0)
578 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
579 (int) TYPE_NO_MODIFIERS(datumtype));
581 if (ckWARN(WARN_UNPACK))
582 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
583 "Character in '%c' format wrapped in unpack",
584 (int) TYPE_NO_MODIFIERS(datumtype));
591 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
592 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
596 uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
602 U32 flags = ckWARN(WARN_UTF8) ?
603 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
604 for (;buf_len > 0; buf_len--) {
605 if (from >= end) return FALSE;
606 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
607 if (retlen == (STRLEN) -1 || retlen == 0) {
608 from += UTF8SKIP(from);
610 } else from += retlen;
615 *(U8 *)buf++ = (U8)val;
617 /* We have enough characters for the buffer. Did we have problems ? */
620 /* Rewalk the string fragment while warning */
622 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
623 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
624 if (ptr >= end) break;
625 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
627 if (from > end) from = end;
629 if ((bad & 2) && ckWARN(WARN_UNPACK))
630 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
631 WARN_PACK : WARN_UNPACK),
632 "Character(s) in '%c' format wrapped in %s",
633 (int) TYPE_NO_MODIFIERS(datumtype),
634 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
641 next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
645 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
646 if (val >= 0x100 || !ISUUCHAR(val) ||
647 retlen == (STRLEN) -1 || retlen == 0) {
651 *out = PL_uudmap[val] & 077;
657 bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
658 U8 buffer[UTF8_MAXLEN];
659 U8 *end = start + len;
661 while (start < end) {
663 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
673 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
681 #define PUSH_BYTES(utf8, cur, buf, len) \
683 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
685 Copy(buf, cur, len, char); \
690 #define GROWING(utf8, cat, start, cur, in_len) \
692 STRLEN glen = (in_len); \
693 if (utf8) glen *= UTF8_EXPAND; \
694 if ((cur) + glen >= (start) + SvLEN(cat)) { \
695 (start) = sv_exp_grow(aTHX_ cat, glen); \
696 (cur) = (start) + SvCUR(cat); \
700 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
702 STRLEN glen = (in_len); \
704 if (utf8) gl *= UTF8_EXPAND; \
705 if ((cur) + gl >= (start) + SvLEN(cat)) { \
707 SvCUR(cat) = (cur) - (start); \
708 (start) = sv_exp_grow(aTHX_ cat, gl); \
709 (cur) = (start) + SvCUR(cat); \
711 PUSH_BYTES(utf8, cur, buf, glen); \
714 #define PUSH_BYTE(utf8, s, byte) \
718 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
719 } else *(U8 *)(s)++ = (byte); \
722 /* Only to be used inside a loop (see the break) */
723 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
726 if (str >= end) break; \
727 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
728 if (retlen == (STRLEN) -1 || retlen == 0) { \
730 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
735 /* Returns the sizeof() struct described by pat */
737 S_measure_struct(pTHX_ tempsym_t* symptr)
741 while (next_symbol(symptr)) {
745 switch (symptr->howlen) {
747 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
748 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
751 /* e_no_len and e_number */
752 len = symptr->length;
756 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
758 /* endianness doesn't influence the size of a type */
759 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
761 Perl_croak(aTHX_ "Invalid type '%c' in %s",
762 (int)TYPE_NO_MODIFIERS(symptr->code),
763 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
766 case 'U': /* XXXX Is it correct? */
769 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
771 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
777 tempsym_t savsym = *symptr;
778 symptr->patptr = savsym.grpbeg;
779 symptr->patend = savsym.grpend;
780 /* XXXX Theoretically, we need to measure many times at
781 different positions, since the subexpression may contain
782 alignment commands, but be not of aligned length.
783 Need to detect this and croak(). */
784 size = measure_struct(symptr);
788 case 'X' | TYPE_IS_SHRIEKING:
789 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
791 if (!len) /* Avoid division by 0 */
793 len = total % len; /* Assumed: the start is aligned. */
798 Perl_croak(aTHX_ "'X' outside of string in %s",
799 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
801 case 'x' | TYPE_IS_SHRIEKING:
802 if (!len) /* Avoid division by 0 */
804 star = total % len; /* Assumed: the start is aligned. */
805 if (star) /* Other portable ways? */
829 size = sizeof(char*);
839 /* locate matching closing parenthesis or bracket
840 * returns char pointer to char after match, or NULL
843 S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
845 while (patptr < patend) {
853 while (patptr < patend && *patptr != '\n')
857 patptr = group_end(patptr, patend, ')') + 1;
859 patptr = group_end(patptr, patend, ']') + 1;
861 Perl_croak(aTHX_ "No group ending character '%c' found in template",
867 /* Convert unsigned decimal number to binary.
868 * Expects a pointer to the first digit and address of length variable
869 * Advances char pointer to 1st non-digit char and returns number
872 S_get_num(pTHX_ register char *patptr, I32 *lenptr )
874 I32 len = *patptr++ - '0';
875 while (isDIGIT(*patptr)) {
876 if (len >= 0x7FFFFFFF/10)
877 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
878 len = (len * 10) + (*patptr++ - '0');
884 /* The marvellous template parsing routine: Using state stored in *symptr,
885 * locates next template code and count
888 S_next_symbol(pTHX_ tempsym_t* symptr )
890 char* patptr = symptr->patptr;
891 char* patend = symptr->patend;
892 const char *allowed = "";
894 symptr->flags &= ~FLAG_SLASH;
896 while (patptr < patend) {
897 if (isSPACE(*patptr))
899 else if (*patptr == '#') {
901 while (patptr < patend && *patptr != '\n')
906 /* We should have found a template code */
907 I32 code = *patptr++ & 0xFF;
908 U32 inherited_modifiers = 0;
910 if (code == ','){ /* grandfather in commas but with a warning */
911 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
912 symptr->flags |= FLAG_COMMA;
913 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
914 "Invalid type ',' in %s",
915 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
920 /* for '(', skip to ')' */
922 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
923 Perl_croak(aTHX_ "()-group starts with a count in %s",
924 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
925 symptr->grpbeg = patptr;
926 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
927 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
928 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
929 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
932 /* look for group modifiers to inherit */
933 if (TYPE_ENDIANNESS(symptr->flags)) {
934 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
935 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
938 /* look for modifiers */
939 while (patptr < patend) {
943 modifier = TYPE_IS_SHRIEKING;
944 allowed = SHRIEKING_ALLOWED_TYPES;
946 #ifdef PERL_PACK_CAN_BYTEORDER
948 modifier = TYPE_IS_BIG_ENDIAN;
949 allowed = ENDIANNESS_ALLOWED_TYPES;
952 modifier = TYPE_IS_LITTLE_ENDIAN;
953 allowed = ENDIANNESS_ALLOWED_TYPES;
955 #endif /* PERL_PACK_CAN_BYTEORDER */
963 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
964 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
965 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
967 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
968 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
969 (int) TYPE_NO_MODIFIERS(code),
970 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
971 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
972 TYPE_ENDIANNESS_MASK)
973 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
974 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
976 if (ckWARN(WARN_UNPACK)) {
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Duplicate modifier '%c' after '%c' in %s",
980 *patptr, (int) TYPE_NO_MODIFIERS(code),
981 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
988 /* inherit modifiers */
989 code |= inherited_modifiers;
991 /* look for count and/or / */
992 if (patptr < patend) {
993 if (isDIGIT(*patptr)) {
994 patptr = get_num( patptr, &symptr->length );
995 symptr->howlen = e_number;
997 } else if (*patptr == '*') {
999 symptr->howlen = e_star;
1001 } else if (*patptr == '[') {
1002 char* lenptr = ++patptr;
1003 symptr->howlen = e_number;
1004 patptr = group_end( patptr, patend, ']' ) + 1;
1005 /* what kind of [] is it? */
1006 if (isDIGIT(*lenptr)) {
1007 lenptr = get_num( lenptr, &symptr->length );
1008 if( *lenptr != ']' )
1009 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1010 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1012 tempsym_t savsym = *symptr;
1013 symptr->patend = patptr-1;
1014 symptr->patptr = lenptr;
1015 savsym.length = measure_struct(symptr);
1019 symptr->howlen = e_no_len;
1024 while (patptr < patend) {
1025 if (isSPACE(*patptr))
1027 else if (*patptr == '#') {
1029 while (patptr < patend && *patptr != '\n')
1031 if (patptr < patend)
1034 if (*patptr == '/') {
1035 symptr->flags |= FLAG_SLASH;
1037 if (patptr < patend &&
1038 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1039 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1040 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1046 /* at end - no count, no / */
1047 symptr->howlen = e_no_len;
1051 symptr->code = code;
1052 symptr->patptr = patptr;
1056 symptr->patptr = patptr;
1061 There is no way to cleanly handle the case where we should process the
1062 string per byte in its upgraded form while it's really in downgraded form
1063 (e.g. estimates like strend-s as an upper bound for the number of
1064 characters left wouldn't work). So if we foresee the need of this
1065 (pattern starts with U or contains U0), we want to work on the encoded
1066 version of the string. Users are advised to upgrade their pack string
1067 themselves if they need to do a lot of unpacks like this on it
1070 need_utf8(const char *pat, const char *patend)
1073 while (pat < patend) {
1074 if (pat[0] == '#') {
1076 pat = memchr(pat, '\n', patend-pat);
1077 if (!pat) return FALSE;
1078 } else if (pat[0] == 'U') {
1079 if (first || pat[1] == '0') return TRUE;
1080 } else first = FALSE;
1087 first_symbol(const char *pat, const char *patend) {
1088 while (pat < patend) {
1089 if (pat[0] != '#') return pat[0];
1091 pat = memchr(pat, '\n', patend-pat);
1099 =for apidoc unpack_str
1101 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1102 and ocnt are not used. This call should not be used, use unpackstring instead.
1107 Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
1109 tempsym_t sym = { 0 };
1111 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1112 else if (need_utf8(pat, patend)) {
1113 /* We probably should try to avoid this in case a scalar context call
1114 wouldn't get to the "U0" */
1115 STRLEN len = strend - s;
1116 s = (char *) bytes_to_utf8((U8 *) s, &len);
1119 flags |= FLAG_DO_UTF8;
1122 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1123 flags |= FLAG_PARSE_UTF8;
1126 sym.patend = patend;
1129 return unpack_rec(&sym, s, s, strend, NULL );
1133 =for apidoc unpackstring
1135 The engine implementing unpack() Perl function. C<unpackstring> puts the
1136 extracted list items on the stack and returns the number of elements.
1137 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1142 Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
1144 tempsym_t sym = { 0 };
1146 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1147 else if (need_utf8(pat, patend)) {
1148 /* We probably should try to avoid this in case a scalar context call
1149 wouldn't get to the "U0" */
1150 STRLEN len = strend - s;
1151 s = (char *) bytes_to_utf8((U8 *) s, &len);
1154 flags |= FLAG_DO_UTF8;
1157 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1158 flags |= FLAG_PARSE_UTF8;
1161 sym.patend = patend;
1164 return unpack_rec(&sym, s, s, strend, NULL );
1169 S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
1173 I32 start_sp_offset = SP - PL_stack_base;
1179 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1180 char* strrelbeg = s;
1181 bool beyond = FALSE;
1182 bool explicit_length;
1183 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1184 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1186 while (next_symbol(symptr)) {
1189 I32 datumtype = symptr->code;
1190 /* do first one only unless in list context
1191 / is implemented by unpacking the count, then popping it from the
1192 stack, so must check that we're not in the middle of a / */
1193 if ( unpack_only_one
1194 && (SP - PL_stack_base == start_sp_offset + 1)
1195 && (datumtype != '/') ) /* XXX can this be omitted */
1198 switch (howlen = symptr->howlen) {
1200 len = strend - strbeg; /* long enough */
1203 /* e_no_len and e_number */
1204 len = symptr->length;
1208 explicit_length = TRUE;
1210 beyond = s >= strend;
1212 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1214 /* props nonzero means we can process this letter. */
1215 long size = props & PACK_SIZE_MASK;
1216 long howmany = (strend - s) / size;
1220 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1221 if (len && unpack_only_one) len = 1;
1227 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1229 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1232 if (howlen == e_no_len)
1233 len = 16; /* len is not specified */
1241 tempsym_t savsym = *symptr;
1242 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1243 symptr->flags |= group_modifiers;
1244 symptr->patend = savsym.grpend;
1248 symptr->patptr = savsym.grpbeg;
1249 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1250 else symptr->flags &= ~FLAG_PARSE_UTF8;
1251 unpack_rec(symptr, s, strbeg, strend, &s);
1252 if (s == strend && savsym.howlen == e_star)
1253 break; /* No way to continue */
1256 symptr->flags &= ~group_modifiers;
1257 savsym.flags = symptr->flags;
1266 Perl_croak(aTHX_ "'@' outside of string in unpack");
1271 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1273 if (len > strend - strrelbeg)
1274 Perl_croak(aTHX_ "'@' outside of string in unpack");
1275 s = strrelbeg + len;
1278 case 'X' | TYPE_IS_SHRIEKING:
1279 if (!len) /* Avoid division by 0 */
1284 hop = last = strbeg;
1286 hop += UTF8SKIP(hop);
1293 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1297 len = (s - strbeg) % len;
1303 Perl_croak(aTHX_ "'X' outside of string in unpack");
1304 while (--s, UTF8_IS_CONTINUATION(*s)) {
1306 Perl_croak(aTHX_ "'X' outside of string in unpack");
1311 if (len > s - strbeg)
1312 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1316 case 'x' | TYPE_IS_SHRIEKING:
1317 if (!len) /* Avoid division by 0 */
1319 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1320 else ai32 = (s - strbeg) % len;
1321 if (ai32 == 0) break;
1328 Perl_croak(aTHX_ "'x' outside of string in unpack");
1333 if (len > strend - s)
1334 Perl_croak(aTHX_ "'x' outside of string in unpack");
1339 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1345 /* Preliminary length estimate is assumed done in 'W' */
1346 if (len > strend - s) len = strend - s;
1352 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1353 if (hop >= strend) {
1355 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1360 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1362 } else if (len > strend - s)
1365 if (datumtype == 'Z') {
1366 /* 'Z' strips stuff after first null */
1369 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1370 sv = newSVpvn(s, ptr-s);
1371 if (howlen == e_star) /* exact for 'Z*' */
1372 len = ptr-s + (ptr != strend ? 1 : 0);
1373 } else if (datumtype == 'A') {
1374 /* 'A' strips both nulls and spaces */
1376 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1377 for (ptr = s+len-1; ptr >= s; ptr--)
1378 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1379 !is_utf8_space((U8 *) ptr)) break;
1380 if (ptr >= s) ptr += UTF8SKIP(ptr);
1383 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1385 for (ptr = s+len-1; ptr >= s; ptr--)
1386 if (*ptr != 0 && !isSPACE(*ptr)) break;
1389 sv = newSVpvn(s, ptr-s);
1390 } else sv = newSVpvn(s, len);
1394 /* Undo any upgrade done due to need_utf8() */
1395 if (!(symptr->flags & FLAG_WAS_UTF8))
1396 sv_utf8_downgrade(sv, 0);
1398 XPUSHs(sv_2mortal(sv));
1404 if (howlen == e_star || len > (strend - s) * 8)
1405 len = (strend - s) * 8;
1409 Newz(601, PL_bitcount, 256, char);
1410 for (bits = 1; bits < 256; bits++) {
1411 if (bits & 1) PL_bitcount[bits]++;
1412 if (bits & 2) PL_bitcount[bits]++;
1413 if (bits & 4) PL_bitcount[bits]++;
1414 if (bits & 8) PL_bitcount[bits]++;
1415 if (bits & 16) PL_bitcount[bits]++;
1416 if (bits & 32) PL_bitcount[bits]++;
1417 if (bits & 64) PL_bitcount[bits]++;
1418 if (bits & 128) PL_bitcount[bits]++;
1422 while (len >= 8 && s < strend) {
1423 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1428 cuv += PL_bitcount[*(U8 *)s++];
1431 if (len && s < strend) {
1433 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1434 if (datumtype == 'b')
1436 if (bits & 1) cuv++;
1441 if (bits & 0x80) cuv++;
1448 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1451 if (datumtype == 'b') {
1454 for (len = 0; len < ai32; len++) {
1455 if (len & 7) bits >>= 1;
1457 if (s >= strend) break;
1458 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1459 } else bits = *(U8 *) s++;
1460 *str++ = bits & 1 ? '1' : '0';
1465 for (len = 0; len < ai32; len++) {
1466 if (len & 7) bits <<= 1;
1468 if (s >= strend) break;
1469 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1470 } else bits = *(U8 *) s++;
1471 *str++ = bits & 0x80 ? '1' : '0';
1475 SvCUR_set(sv, str - SvPVX(sv));
1482 /* Preliminary length estimate, acceptable for utf8 too */
1483 if (howlen == e_star || len > (strend - s) * 2)
1484 len = (strend - s) * 2;
1485 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1488 if (datumtype == 'h') {
1491 for (len = 0; len < ai32; len++) {
1492 if (len & 1) bits >>= 4;
1494 if (s >= strend) break;
1495 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1496 } else bits = * (U8 *) s++;
1497 *str++ = PL_hexdigit[bits & 15];
1502 for (len = 0; len < ai32; len++) {
1503 if (len & 1) bits <<= 4;
1505 if (s >= strend) break;
1506 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1507 } else bits = *(U8 *) s++;
1508 *str++ = PL_hexdigit[(bits >> 4) & 15];
1512 SvCUR_set(sv, str - SvPVX(sv));
1518 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1519 if (aint >= 128) /* fake up signed chars */
1522 PUSHs(sv_2mortal(newSViv((IV)aint)));
1523 else if (checksum > bits_in_uv)
1524 cdouble += (NV)aint;
1533 if (explicit_length && datumtype == 'C')
1534 /* Switch to "character" mode */
1535 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1538 if (datumtype == 'C' ?
1539 (symptr->flags & FLAG_DO_UTF8) &&
1540 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1541 while (len-- > 0 && s < strend) {
1544 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1545 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1546 if (retlen == (STRLEN) -1 || retlen == 0)
1547 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1550 PUSHs(sv_2mortal(newSVuv((UV) val)));
1551 else if (checksum > bits_in_uv)
1552 cdouble += (NV) val;
1556 } else if (!checksum)
1558 U8 ch = *(U8 *) s++;
1559 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1561 else if (checksum > bits_in_uv)
1562 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1564 while (len-- > 0) cuv += *(U8 *) s++;
1568 if (explicit_length) {
1569 /* Switch to "bytes in UTF-8" mode */
1570 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1572 /* Should be impossible due to the need_utf8() test */
1573 Perl_croak(aTHX_ "U0 mode on a byte string");
1577 if (len > strend - s) len = strend - s;
1579 if (len && unpack_only_one) len = 1;
1583 while (len-- > 0 && s < strend) {
1587 U8 result[UTF8_MAXLEN];
1591 /* Bug: warns about bad utf8 even if we are short on bytes
1592 and will break out of the loop */
1593 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1596 len = UTF8SKIP(result);
1597 if (!uni_to_bytes(aTHX_ &ptr, strend,
1598 (char *) &result[1], len-1, 'U')) break;
1599 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1602 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1603 if (retlen == (STRLEN) -1 || retlen == 0)
1604 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1608 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1609 else if (checksum > bits_in_uv)
1610 cdouble += (NV) auv;
1615 case 's' | TYPE_IS_SHRIEKING:
1616 #if SHORTSIZE != SIZE16
1619 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1620 DO_BO_UNPACK(ashort, s);
1622 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1623 else if (checksum > bits_in_uv)
1624 cdouble += (NV)ashort;
1636 #if U16SIZE > SIZE16
1639 SHIFT16(utf8, s, strend, &ai16, datumtype);
1640 DO_BO_UNPACK(ai16, 16);
1641 #if U16SIZE > SIZE16
1646 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1647 else if (checksum > bits_in_uv)
1648 cdouble += (NV)ai16;
1653 case 'S' | TYPE_IS_SHRIEKING:
1654 #if SHORTSIZE != SIZE16
1656 unsigned short aushort;
1657 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1658 DO_BO_UNPACK(aushort, s);
1660 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1661 else if (checksum > bits_in_uv)
1662 cdouble += (NV)aushort;
1675 #if U16SIZE > SIZE16
1678 SHIFT16(utf8, s, strend, &au16, datumtype);
1679 DO_BO_UNPACK(au16, 16);
1681 if (datumtype == 'n')
1682 au16 = PerlSock_ntohs(au16);
1685 if (datumtype == 'v')
1689 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1690 else if (checksum > bits_in_uv)
1691 cdouble += (NV) au16;
1696 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1697 case 'v' | TYPE_IS_SHRIEKING:
1698 case 'n' | TYPE_IS_SHRIEKING:
1701 # if U16SIZE > SIZE16
1704 SHIFT16(utf8, s, strend, &ai16, datumtype);
1706 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1707 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1708 # endif /* HAS_NTOHS */
1710 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1711 ai16 = (I16) vtohs((U16) ai16);
1712 # endif /* HAS_VTOHS */
1714 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1715 else if (checksum > bits_in_uv)
1716 cdouble += (NV) ai16;
1721 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1723 case 'i' | TYPE_IS_SHRIEKING:
1726 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1727 DO_BO_UNPACK(aint, i);
1729 PUSHs(sv_2mortal(newSViv((IV)aint)));
1730 else if (checksum > bits_in_uv)
1731 cdouble += (NV)aint;
1737 case 'I' | TYPE_IS_SHRIEKING:
1740 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1741 DO_BO_UNPACK(auint, i);
1743 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1744 else if (checksum > bits_in_uv)
1745 cdouble += (NV)auint;
1753 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1754 #if IVSIZE == INTSIZE
1755 DO_BO_UNPACK(aiv, i);
1756 #elif IVSIZE == LONGSIZE
1757 DO_BO_UNPACK(aiv, l);
1758 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1759 DO_BO_UNPACK(aiv, 64);
1761 Perl_croak(aTHX_ "'j' not supported on this platform");
1764 PUSHs(sv_2mortal(newSViv(aiv)));
1765 else if (checksum > bits_in_uv)
1774 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1775 #if IVSIZE == INTSIZE
1776 DO_BO_UNPACK(auv, i);
1777 #elif IVSIZE == LONGSIZE
1778 DO_BO_UNPACK(auv, l);
1779 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1780 DO_BO_UNPACK(auv, 64);
1782 Perl_croak(aTHX_ "'J' not supported on this platform");
1785 PUSHs(sv_2mortal(newSVuv(auv)));
1786 else if (checksum > bits_in_uv)
1792 case 'l' | TYPE_IS_SHRIEKING:
1793 #if LONGSIZE != SIZE32
1796 SHIFT_VAR(utf8, s, strend, along, datumtype);
1797 DO_BO_UNPACK(along, l);
1799 PUSHs(sv_2mortal(newSViv((IV)along)));
1800 else if (checksum > bits_in_uv)
1801 cdouble += (NV)along;
1812 #if U32SIZE > SIZE32
1815 SHIFT32(utf8, s, strend, &ai32, datumtype);
1816 DO_BO_UNPACK(ai32, 32);
1817 #if U32SIZE > SIZE32
1818 if (ai32 > 2147483647) ai32 -= 4294967296;
1821 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1822 else if (checksum > bits_in_uv)
1823 cdouble += (NV)ai32;
1828 case 'L' | TYPE_IS_SHRIEKING:
1829 #if LONGSIZE != SIZE32
1831 unsigned long aulong;
1832 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1833 DO_BO_UNPACK(aulong, l);
1835 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1836 else if (checksum > bits_in_uv)
1837 cdouble += (NV)aulong;
1850 #if U32SIZE > SIZE32
1853 SHIFT32(utf8, s, strend, &au32, datumtype);
1854 DO_BO_UNPACK(au32, 32);
1856 if (datumtype == 'N')
1857 au32 = PerlSock_ntohl(au32);
1860 if (datumtype == 'V')
1864 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1865 else if (checksum > bits_in_uv)
1866 cdouble += (NV)au32;
1871 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1872 case 'V' | TYPE_IS_SHRIEKING:
1873 case 'N' | TYPE_IS_SHRIEKING:
1876 # if U32SIZE > SIZE32
1879 SHIFT32(utf8, s, strend, &ai32, datumtype);
1881 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1882 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1885 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1886 ai32 = (I32)vtohl((U32)ai32);
1889 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1890 else if (checksum > bits_in_uv)
1891 cdouble += (NV)ai32;
1896 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1900 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1901 DO_BO_UNPACK_P(aptr);
1902 /* newSVpv generates undef if aptr is NULL */
1903 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1911 while (len > 0 && s < strend) {
1913 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1914 auv = (auv << 7) | (ch & 0x7f);
1915 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1918 PUSHs(sv_2mortal(newSVuv(auv)));
1923 if (++bytes >= sizeof(UV)) { /* promote to string */
1927 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1928 while (s < strend) {
1929 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1930 sv = mul128(sv, (U8)(ch & 0x7f));
1940 PUSHs(sv_2mortal(sv));
1945 if ((s >= strend) && bytes)
1946 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1950 if (symptr->howlen == e_star)
1951 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1953 if (sizeof(char*) <= strend - s) {
1955 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1956 DO_BO_UNPACK_P(aptr);
1957 /* newSVpvn generates undef if aptr is NULL */
1958 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
1965 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1966 DO_BO_UNPACK(aquad, 64);
1968 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
1969 newSViv((IV)aquad) : newSVnv((NV)aquad)));
1970 else if (checksum > bits_in_uv)
1971 cdouble += (NV)aquad;
1979 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1980 DO_BO_UNPACK(auquad, 64);
1982 PUSHs(sv_2mortal(auquad <= UV_MAX ?
1983 newSVuv((UV)auquad):newSVnv((NV)auquad)));
1984 else if (checksum > bits_in_uv)
1985 cdouble += (NV)auquad;
1990 #endif /* HAS_QUAD */
1991 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1995 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1996 DO_BO_UNPACK_N(afloat, float);
1998 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2006 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2007 DO_BO_UNPACK_N(adouble, double);
2009 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2017 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2018 DO_BO_UNPACK_N(anv, NV);
2020 PUSHs(sv_2mortal(newSVnv(anv)));
2025 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2028 long double aldouble;
2029 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2030 DO_BO_UNPACK_N(aldouble, long double);
2032 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2034 cdouble += aldouble;
2040 * Initialise the decode mapping. By using a table driven
2041 * algorithm, the code will be character-set independent
2042 * (and just as fast as doing character arithmetic)
2044 if (PL_uudmap['M'] == 0) {
2047 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2048 PL_uudmap[(U8)PL_uuemap[i]] = i;
2050 * Because ' ' and '`' map to the same value,
2051 * we need to decode them both the same.
2056 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2057 sv = sv_2mortal(NEWSV(42, l));
2058 if (l) SvPOK_on(sv);
2061 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2067 next_uni_uu(aTHX_ &s, strend, &a);
2068 next_uni_uu(aTHX_ &s, strend, &b);
2069 next_uni_uu(aTHX_ &s, strend, &c);
2070 next_uni_uu(aTHX_ &s, strend, &d);
2071 hunk[0] = (char)((a << 2) | (b >> 4));
2072 hunk[1] = (char)((b << 4) | (c >> 2));
2073 hunk[2] = (char)((c << 6) | d);
2074 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2078 if (*s == '\n') s++;
2080 /* possible checksum byte */
2081 char *skip = s+UTF8SKIP(s);
2082 if (skip < strend && *skip == '\n') s = skip+1;
2087 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2092 len = PL_uudmap[*(U8*)s++] & 077;
2094 if (s < strend && ISUUCHAR(*s))
2095 a = PL_uudmap[*(U8*)s++] & 077;
2098 if (s < strend && ISUUCHAR(*s))
2099 b = PL_uudmap[*(U8*)s++] & 077;
2102 if (s < strend && ISUUCHAR(*s))
2103 c = PL_uudmap[*(U8*)s++] & 077;
2106 if (s < strend && ISUUCHAR(*s))
2107 d = PL_uudmap[*(U8*)s++] & 077;
2110 hunk[0] = (char)((a << 2) | (b >> 4));
2111 hunk[1] = (char)((b << 4) | (c >> 2));
2112 hunk[2] = (char)((c << 6) | d);
2113 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2118 else /* possible checksum byte */
2119 if (s + 1 < strend && s[1] == '\n')
2128 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2129 (checksum > bits_in_uv &&
2130 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2133 anv = (NV) (1 << (checksum & 15));
2134 while (checksum >= 16) {
2138 while (cdouble < 0.0)
2140 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2141 sv = newSVnv(cdouble);
2144 if (checksum < bits_in_uv) {
2145 UV mask = ((UV)1 << checksum) - 1;
2150 XPUSHs(sv_2mortal(sv));
2154 if (symptr->flags & FLAG_SLASH){
2155 if (SP - PL_stack_base - start_sp_offset <= 0)
2156 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2157 if( next_symbol(symptr) ){
2158 if( symptr->howlen == e_number )
2159 Perl_croak(aTHX_ "Count after length/code in unpack" );
2161 /* ...end of char buffer then no decent length available */
2162 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2164 /* take top of stack (hope it's numeric) */
2167 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2170 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2172 datumtype = symptr->code;
2173 explicit_length = FALSE;
2181 return SP - PL_stack_base - start_sp_offset;
2188 I32 gimme = GIMME_V;
2191 char *pat = SvPV(left, llen);
2192 char *s = SvPV(right, rlen);
2193 char *strend = s + rlen;
2194 char *patend = pat + llen;
2198 cnt = unpackstring(pat, patend, s, strend,
2199 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2200 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2203 if ( !cnt && gimme == G_SCALAR )
2204 PUSHs(&PL_sv_undef);
2209 doencodes(U8 *h, char *s, I32 len)
2211 *h++ = PL_uuemap[len];
2213 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2214 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2215 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2216 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2221 char r = (len > 1 ? s[1] : '\0');
2222 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2223 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2224 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2225 *h++ = PL_uuemap[0];
2232 S_is_an_int(pTHX_ char *s, STRLEN l)
2235 SV *result = newSVpvn(s, l);
2236 char *result_c = SvPV(result, n_a); /* convenience */
2237 char *out = result_c;
2247 SvREFCNT_dec(result);
2270 SvREFCNT_dec(result);
2276 SvCUR_set(result, out - result_c);
2280 /* pnum must be '\0' terminated */
2282 S_div128(pTHX_ SV *pnum, bool *done)
2285 char *s = SvPV(pnum, len);
2294 i = m * 10 + (*t - '0');
2296 r = (i >> 7); /* r < 10 */
2303 SvCUR_set(pnum, (STRLEN) (t - s));
2310 =for apidoc pack_cat
2312 The engine implementing pack() Perl function. Note: parameters next_in_list and
2313 flags are not used. This call should not be used; use packlist instead.
2319 Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2321 tempsym_t sym = { 0 };
2323 sym.patend = patend;
2324 sym.flags = FLAG_PACK;
2326 (void)pack_rec( cat, &sym, beglist, endlist );
2331 =for apidoc packlist
2333 The engine implementing pack() Perl function.
2339 Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2342 tempsym_t sym = { 0 };
2345 sym.patend = patend;
2346 sym.flags = FLAG_PACK;
2348 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2349 Also make sure any UTF8 flag is loaded */
2350 SvPV_force(cat, no_len);
2351 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2353 (void)pack_rec( cat, &sym, beglist, endlist );
2356 /* like sv_utf8_upgrade, but also repoint the group start markers */
2358 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2361 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2363 if (SvUTF8(sv)) return;
2365 from_start = SvPVX(sv);
2366 from_end = from_start + SvCUR(sv);
2367 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2368 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2369 if (from_ptr == from_end) {
2370 /* Simple case: no character needs to be changed */
2375 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2376 New('U', to_start, len, char);
2377 Copy(from_start, to_start, from_ptr-from_start, char);
2378 to_ptr = to_start + (from_ptr-from_start);
2380 New('U', marks, sym_ptr->level+2, char *);
2381 for (group=sym_ptr; group; group = group->previous)
2382 marks[group->level] = from_start + group->strbeg;
2383 marks[sym_ptr->level+1] = from_end+1;
2384 for (m = marks; *m < from_ptr; m++)
2385 *m = to_start + (*m-from_start);
2387 for (;from_ptr < from_end; from_ptr++) {
2388 while (*m == from_ptr) *m++ = to_ptr;
2389 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2393 while (*m == from_ptr) *m++ = to_ptr;
2394 if (m != marks + sym_ptr->level+1) {
2397 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2399 for (group=sym_ptr; group; group = group->previous)
2400 group->strbeg = marks[group->level] - to_start;
2405 SvLEN(sv) += SvIVX(sv);
2406 from_start -= SvIVX(sv);
2409 SvFLAGS(sv) &= ~SVf_OOK;
2412 Safefree(from_start);
2413 SvPVX(sv) = to_start;
2414 SvCUR(sv) = to_ptr - to_start;
2419 /* Exponential string grower. Makes string extension effectively O(n)
2420 needed says how many extra bytes we need (not counting the final '\0')
2421 Only grows the string if there is an actual lack of space
2424 sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2425 STRLEN cur = SvCUR(sv);
2426 STRLEN len = SvLEN(sv);
2428 if (len - cur > needed) return SvPVX(sv);
2429 extend = needed > len ? needed : len;
2430 return SvGROW(sv, len+extend+1);
2435 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2437 tempsym_t lookahead;
2438 I32 items = endlist - beglist;
2439 bool found = next_symbol(symptr);
2440 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2442 if (symptr->level == 0 && found && symptr->code == 'U') {
2443 marked_upgrade(aTHX_ cat, symptr);
2444 symptr->flags |= FLAG_DO_UTF8;
2447 symptr->strbeg = SvCUR(cat);
2453 SV *lengthcode = Nullsv;
2454 I32 datumtype = symptr->code;
2455 howlen_t howlen = symptr->howlen;
2456 char *start = SvPVX(cat);
2457 char *cur = start + SvCUR(cat);
2459 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2463 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2467 /* e_no_len and e_number */
2468 len = symptr->length;
2473 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2475 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2476 /* We can process this letter. */
2477 STRLEN size = props & PACK_SIZE_MASK;
2478 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2482 /* Look ahead for next symbol. Do we have code/code? */
2483 lookahead = *symptr;
2484 found = next_symbol(&lookahead);
2485 if (symptr->flags & FLAG_SLASH) {
2487 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2488 if (strchr("aAZ", lookahead.code)) {
2489 if (lookahead.howlen == e_number) count = lookahead.length;
2492 count = DO_UTF8(*beglist) ?
2493 sv_len_utf8(*beglist) : sv_len(*beglist);
2495 if (lookahead.code == 'Z') count++;
2498 if (lookahead.howlen == e_number && lookahead.length < items)
2499 count = lookahead.length;
2502 lookahead.howlen = e_number;
2503 lookahead.length = count;
2504 lengthcode = sv_2mortal(newSViv(count));
2507 /* Code inside the switch must take care to properly update
2508 cat (CUR length and '\0' termination) if it updated *cur and
2509 doesn't simply leave using break */
2510 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2512 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2513 (int) TYPE_NO_MODIFIERS(datumtype));
2515 Perl_croak(aTHX_ "'%%' may not be used in pack");
2518 char *s = start + symptr->strbeg;
2519 while (len > 0 && s < cur) {
2524 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2527 GROWING(0, cat, start, cur, len);
2528 Zero(cur, len, char);
2530 } else if (s < cur) cur = s;
2531 else goto no_change;
2533 len -= cur - (start+symptr->strbeg);
2534 if (len > 0) goto grow;
2536 if (len > 0) goto shrink;
2537 else goto no_change;
2541 tempsym_t savsym = *symptr;
2542 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2543 symptr->flags |= group_modifiers;
2544 symptr->patend = savsym.grpend;
2546 symptr->previous = &lookahead;
2549 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2550 else symptr->flags &= ~FLAG_PARSE_UTF8;
2551 was_utf8 = SvUTF8(cat);
2552 symptr->patptr = savsym.grpbeg;
2553 beglist = pack_rec(cat, symptr, beglist, endlist);
2554 if (SvUTF8(cat) != was_utf8)
2555 /* This had better be an upgrade while in utf8==0 mode */
2558 if (savsym.howlen == e_star && beglist == endlist)
2559 break; /* No way to continue */
2561 lookahead.flags = symptr->flags & ~group_modifiers;
2564 case 'X' | TYPE_IS_SHRIEKING:
2565 if (!len) /* Avoid division by 0 */
2572 hop += UTF8SKIP(hop);
2579 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2583 len = (cur-start) % len;
2587 if (len < 1) goto no_change;
2590 Perl_croak(aTHX_ "'X' outside of string in pack");
2591 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2593 Perl_croak(aTHX_ "'X' outside of string in pack");
2599 if (cur - start < len)
2600 Perl_croak(aTHX_ "'X' outside of string in pack");
2603 if (cur < start+symptr->strbeg) {
2604 /* Make sure group starts don't point into the void */
2606 STRLEN length = cur-start;
2607 for (group = symptr;
2608 group && length < group->strbeg;
2609 group = group->previous) group->strbeg = length;
2610 lookahead.strbeg = length;
2613 case 'x' | TYPE_IS_SHRIEKING: {
2615 if (!len) /* Avoid division by 0 */
2617 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2618 else ai32 = (cur - start) % len;
2619 if (ai32 == 0) goto no_change;
2631 aptr = SvPV(fromstr, fromlen);
2632 if (DO_UTF8(fromstr)) {
2635 if (!utf8 && !SvUTF8(cat)) {
2636 marked_upgrade(aTHX_ cat, symptr);
2637 lookahead.flags |= FLAG_DO_UTF8;
2638 lookahead.strbeg = symptr->strbeg;
2641 cur = start + SvCUR(cat);
2643 if (howlen == e_star) {
2644 if (utf8) goto string_copy;
2648 end = aptr + fromlen;
2649 fromlen = datumtype == 'Z' ? len-1 : len;
2650 while ((I32) fromlen > 0 && s < end) {
2655 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2658 if (datumtype == 'Z') len++;
2664 fromlen = len - fromlen;
2665 if (datumtype == 'Z') fromlen--;
2666 if (howlen == e_star) {
2668 if (datumtype == 'Z') len++;
2670 GROWING(0, cat, start, cur, len);
2671 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2672 datumtype | TYPE_IS_PACK))
2673 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2677 if (howlen == e_star) {
2679 if (datumtype == 'Z') len++;
2681 if (len <= (I32) fromlen) {
2683 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2685 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2687 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2688 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2690 while (fromlen > 0) {
2691 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2697 if (howlen == e_star) {
2699 if (datumtype == 'Z') len++;
2701 if (len <= (I32) fromlen) {
2703 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2705 GROWING(0, cat, start, cur, len);
2706 Copy(aptr, cur, fromlen, char);
2710 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2723 str = SvPV(fromstr, fromlen);
2724 end = str + fromlen;
2725 if (DO_UTF8(fromstr)) {
2727 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2729 utf8_source = FALSE;
2730 utf8_flags = 0; /* Unused, but keep compilers happy */
2732 if (howlen == e_star) len = fromlen;
2733 field_len = (len+7)/8;
2734 GROWING(utf8, cat, start, cur, field_len);
2735 if (len > (I32)fromlen) len = fromlen;
2738 if (datumtype == 'B')
2742 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2744 } else bits |= *str++ & 1;
2745 if (l & 7) bits <<= 1;
2747 PUSH_BYTE(utf8, cur, bits);
2752 /* datumtype == 'b' */
2756 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2757 if (val & 1) bits |= 0x80;
2758 } else if (*str++ & 1)
2760 if (l & 7) bits >>= 1;
2762 PUSH_BYTE(utf8, cur, bits);
2768 if (datumtype == 'B')
2769 bits <<= 7 - (l & 7);
2771 bits >>= 7 - (l & 7);
2772 PUSH_BYTE(utf8, cur, bits);
2775 /* Determine how many chars are left in the requested field */
2777 if (howlen == e_star) field_len = 0;
2778 else field_len -= l;
2779 Zero(cur, field_len, char);
2792 str = SvPV(fromstr, fromlen);
2793 end = str + fromlen;
2794 if (DO_UTF8(fromstr)) {
2796 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2798 utf8_source = FALSE;
2799 utf8_flags = 0; /* Unused, but keep compilers happy */
2801 if (howlen == e_star) len = fromlen;
2802 field_len = (len+1)/2;
2803 GROWING(utf8, cat, start, cur, field_len);
2804 if (!utf8 && len > (I32)fromlen) len = fromlen;
2807 if (datumtype == 'H')
2811 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2812 if (val < 256 && isALPHA(val))
2813 bits |= (val + 9) & 0xf;
2816 } else if (isALPHA(*str))
2817 bits |= (*str++ + 9) & 0xf;
2819 bits |= *str++ & 0xf;
2820 if (l & 1) bits <<= 4;
2822 PUSH_BYTE(utf8, cur, bits);
2830 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2831 if (val < 256 && isALPHA(val))
2832 bits |= ((val + 9) & 0xf) << 4;
2834 bits |= (val & 0xf) << 4;
2835 } else if (isALPHA(*str))
2836 bits |= ((*str++ + 9) & 0xf) << 4;
2838 bits |= (*str++ & 0xf) << 4;
2839 if (l & 1) bits >>= 4;
2841 PUSH_BYTE(utf8, cur, bits);
2847 PUSH_BYTE(utf8, cur, bits);
2850 /* Determine how many chars are left in the requested field */
2852 if (howlen == e_star) field_len = 0;
2853 else field_len -= l;
2854 Zero(cur, field_len, char);
2862 aiv = SvIV(fromstr);
2863 if ((-128 > aiv || aiv > 127) &&
2865 Perl_warner(aTHX_ packWARN(WARN_PACK),
2866 "Character in 'c' format wrapped in pack");
2867 PUSH_BYTE(utf8, cur, aiv & 0xff);
2872 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2875 GROWING(0, cat, start, cur, len);
2879 aiv = SvIV(fromstr);
2880 if ((0 > aiv || aiv > 0xff) &&
2882 Perl_warner(aTHX_ packWARN(WARN_PACK),
2883 "Character in 'C' format wrapped in pack");
2884 *cur++ = aiv & 0xff;
2889 U8 in_bytes = IN_BYTES;
2891 end = start+SvLEN(cat)-1;
2892 if (utf8) end -= UTF8_MAXLEN-1;
2896 auv = SvUV(fromstr);
2897 if (in_bytes) auv = auv % 0x100;
2902 SvCUR(cat) = cur - start;
2904 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2905 end = start+SvLEN(cat)-UTF8_MAXLEN;
2907 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2910 0 : UNICODE_ALLOW_ANY);
2915 SvCUR(cat) = cur - start;
2916 marked_upgrade(aTHX_ cat, symptr);
2917 lookahead.flags |= FLAG_DO_UTF8;
2918 lookahead.strbeg = symptr->strbeg;
2921 cur = start + SvCUR(cat);
2922 end = start+SvLEN(cat)-UTF8_MAXLEN;
2925 if (ckWARN(WARN_PACK))
2926 Perl_warner(aTHX_ packWARN(WARN_PACK),
2927 "Character in 'W' format wrapped in pack");
2932 SvCUR(cat) = cur - start;
2933 GROWING(0, cat, start, cur, len+1);
2934 end = start+SvLEN(cat)-1;
2936 *(U8 *) cur++ = (U8)auv;
2945 if (!(symptr->flags & FLAG_DO_UTF8)) {
2946 marked_upgrade(aTHX_ cat, symptr);
2947 lookahead.flags |= FLAG_DO_UTF8;
2948 lookahead.strbeg = symptr->strbeg;
2954 end = start+SvLEN(cat);
2955 if (!utf8) end -= UTF8_MAXLEN;
2959 auv = SvUV(fromstr);
2961 U8 buffer[UTF8_MAXLEN], *endb;
2962 endb = uvuni_to_utf8_flags(buffer, auv,
2964 0 : UNICODE_ALLOW_ANY);
2965 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2967 SvCUR(cat) = cur - start;
2968 GROWING(0, cat, start, cur,
2969 len+(endb-buffer)*UTF8_EXPAND);
2970 end = start+SvLEN(cat);
2972 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
2976 SvCUR(cat) = cur - start;
2977 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2978 end = start+SvLEN(cat)-UTF8_MAXLEN;
2980 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2982 0 : UNICODE_ALLOW_ANY);
2987 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2993 anv = SvNV(fromstr);
2995 /* VOS does not automatically map a floating-point overflow
2996 during conversion from double to float into infinity, so we
2997 do it by hand. This code should either be generalized for
2998 any OS that needs it, or removed if and when VOS implements
2999 posix-976 (suggestion to support mapping to infinity).
3000 Paul.Green@stratus.com 02-04-02. */
3002 afloat = _float_constants[0]; /* single prec. inf. */
3003 else if (anv < -FLT_MAX)
3004 afloat = _float_constants[0]; /* single prec. inf. */
3005 else afloat = (float) anv;
3007 # if defined(VMS) && !defined(__IEEE_FP)
3008 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3009 * on Alpha; fake it if we don't have them.
3013 else if (anv < -FLT_MAX)
3015 else afloat = (float)anv;
3017 afloat = (float)anv;
3019 #endif /* __VOS__ */
3020 DO_BO_PACK_N(afloat, float);
3021 PUSH_VAR(utf8, cur, afloat);
3029 anv = SvNV(fromstr);
3031 /* VOS does not automatically map a floating-point overflow
3032 during conversion from long double to double into infinity,
3033 so we do it by hand. This code should either be generalized
3034 for any OS that needs it, or removed if and when VOS
3035 implements posix-976 (suggestion to support mapping to
3036 infinity). Paul.Green@stratus.com 02-04-02. */
3038 adouble = _double_constants[0]; /* double prec. inf. */
3039 else if (anv < -DBL_MAX)
3040 adouble = _double_constants[0]; /* double prec. inf. */
3041 else adouble = (double) anv;
3043 # if defined(VMS) && !defined(__IEEE_FP)
3044 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3045 * on Alpha; fake it if we don't have them.
3049 else if (anv < -DBL_MAX)
3051 else adouble = (double)anv;
3053 adouble = (double)anv;
3055 #endif /* __VOS__ */
3056 DO_BO_PACK_N(adouble, double);
3057 PUSH_VAR(utf8, cur, adouble);
3062 Zero(&anv, 1, NV); /* can be long double with unused bits */
3065 anv = SvNV(fromstr);
3066 DO_BO_PACK_N(anv, NV);
3067 PUSH_VAR(utf8, cur, anv);
3071 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3073 long double aldouble;
3074 /* long doubles can have unused bits, which may be nonzero */
3075 Zero(&aldouble, 1, long double);
3078 aldouble = (long double)SvNV(fromstr);
3079 DO_BO_PACK_N(aldouble, long double);
3080 PUSH_VAR(utf8, cur, aldouble);
3085 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3086 case 'n' | TYPE_IS_SHRIEKING:
3092 ai16 = (I16)SvIV(fromstr);
3094 ai16 = PerlSock_htons(ai16);
3096 PUSH16(utf8, cur, &ai16);
3099 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3100 case 'v' | TYPE_IS_SHRIEKING:
3106 ai16 = (I16)SvIV(fromstr);
3110 PUSH16(utf8, cur, &ai16);
3113 case 'S' | TYPE_IS_SHRIEKING:
3114 #if SHORTSIZE != SIZE16
3116 unsigned short aushort;
3118 aushort = SvUV(fromstr);
3119 DO_BO_PACK(aushort, s);
3120 PUSH_VAR(utf8, cur, aushort);
3130 au16 = (U16)SvUV(fromstr);
3131 DO_BO_PACK(au16, 16);
3132 PUSH16(utf8, cur, &au16);
3135 case 's' | TYPE_IS_SHRIEKING:
3136 #if SHORTSIZE != SIZE16
3140 ashort = SvIV(fromstr);
3141 DO_BO_PACK(ashort, s);
3142 PUSH_VAR(utf8, cur, ashort);
3152 ai16 = (I16)SvIV(fromstr);
3153 DO_BO_PACK(ai16, 16);
3154 PUSH16(utf8, cur, &ai16);
3158 case 'I' | TYPE_IS_SHRIEKING:
3162 auint = SvUV(fromstr);
3163 DO_BO_PACK(auint, i);
3164 PUSH_VAR(utf8, cur, auint);
3171 aiv = SvIV(fromstr);
3172 #if IVSIZE == INTSIZE
3174 #elif IVSIZE == LONGSIZE
3176 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3177 DO_BO_PACK(aiv, 64);
3179 Perl_croak(aTHX_ "'j' not supported on this platform");
3181 PUSH_VAR(utf8, cur, aiv);
3188 auv = SvUV(fromstr);
3189 #if UVSIZE == INTSIZE
3191 #elif UVSIZE == LONGSIZE
3193 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3194 DO_BO_PACK(auv, 64);
3196 Perl_croak(aTHX_ "'J' not supported on this platform");
3198 PUSH_VAR(utf8, cur, auv);
3205 anv = SvNV(fromstr);
3209 SvCUR(cat) = cur - start;
3210 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3213 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3214 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3215 any negative IVs will have already been got by the croak()
3216 above. IOK is untrue for fractions, so we test them
3217 against UV_MAX_P1. */
3218 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3219 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3220 char *in = buf + sizeof(buf);
3221 UV auv = SvUV(fromstr);
3224 *--in = (char)((auv & 0x7f) | 0x80);
3227 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3228 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3229 in, (buf + sizeof(buf)) - in);
3230 } else if (SvPOKp(fromstr))
3232 else if (SvNOKp(fromstr)) {
3233 /* 10**NV_MAX_10_EXP is the largest power of 10
3234 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3235 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3236 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3237 And with that many bytes only Inf can overflow.
3238 Some C compilers are strict about integral constant
3239 expressions so we conservatively divide by a slightly
3240 smaller integer instead of multiplying by the exact
3241 floating-point value.
3243 #ifdef NV_MAX_10_EXP
3244 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3245 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3247 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3248 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3250 char *in = buf + sizeof(buf);
3252 anv = Perl_floor(anv);
3254 NV next = Perl_floor(anv / 128);
3255 if (in <= buf) /* this cannot happen ;-) */
3256 Perl_croak(aTHX_ "Cannot compress integer in pack");
3257 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3260 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3261 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3262 in, (buf + sizeof(buf)) - in);
3264 char *from, *result, *in;
3270 /* Copy string and check for compliance */
3271 from = SvPV(fromstr, len);
3272 if ((norm = is_an_int(from, len)) == NULL)
3273 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3275 New('w', result, len, char);
3278 while (!done) *--in = div128(norm, &done) | 0x80;
3279 result[len - 1] &= 0x7F; /* clear continue bit */
3280 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3281 in, (result + len) - in);
3283 SvREFCNT_dec(norm); /* free norm */
3288 case 'i' | TYPE_IS_SHRIEKING:
3292 aint = SvIV(fromstr);
3293 DO_BO_PACK(aint, i);
3294 PUSH_VAR(utf8, cur, aint);
3297 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3298 case 'N' | TYPE_IS_SHRIEKING:
3304 au32 = SvUV(fromstr);
3306 au32 = PerlSock_htonl(au32);
3308 PUSH32(utf8, cur, &au32);
3311 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3312 case 'V' | TYPE_IS_SHRIEKING:
3318 au32 = SvUV(fromstr);
3322 PUSH32(utf8, cur, &au32);
3325 case 'L' | TYPE_IS_SHRIEKING:
3326 #if LONGSIZE != SIZE32
3328 unsigned long aulong;
3330 aulong = SvUV(fromstr);
3331 DO_BO_PACK(aulong, l);
3332 PUSH_VAR(utf8, cur, aulong);
3342 au32 = SvUV(fromstr);
3343 DO_BO_PACK(au32, 32);
3344 PUSH32(utf8, cur, &au32);
3347 case 'l' | TYPE_IS_SHRIEKING:
3348 #if LONGSIZE != SIZE32
3352 along = SvIV(fromstr);
3353 DO_BO_PACK(along, l);
3354 PUSH_VAR(utf8, cur, along);
3364 ai32 = SvIV(fromstr);
3365 DO_BO_PACK(ai32, 32);
3366 PUSH32(utf8, cur, &ai32);
3374 auquad = (Uquad_t) SvUV(fromstr);
3375 DO_BO_PACK(auquad, 64);
3376 PUSH_VAR(utf8, cur, auquad);
3383 aquad = (Quad_t)SvIV(fromstr);
3384 DO_BO_PACK(aquad, 64);
3385 PUSH_VAR(utf8, cur, aquad);
3388 #endif /* HAS_QUAD */
3390 len = 1; /* assume SV is correct length */
3391 GROWING(utf8, cat, start, cur, sizeof(char *));
3398 SvGETMAGIC(fromstr);
3399 if (!SvOK(fromstr)) aptr = NULL;
3402 /* XXX better yet, could spirit away the string to
3403 * a safe spot and hang on to it until the result
3404 * of pack() (and all copies of the result) are
3407 if (ckWARN(WARN_PACK) &&
3408 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3409 !SvREADONLY(fromstr)))) {
3410 Perl_warner(aTHX_ packWARN(WARN_PACK),
3411 "Attempt to pack pointer to temporary value");
3413 if (SvPOK(fromstr) || SvNIOK(fromstr))
3414 aptr = SvPV_flags(fromstr, n_a, 0);
3416 aptr = SvPV_force_flags(fromstr, n_a, 0);
3419 PUSH_VAR(utf8, cur, aptr);
3427 if (len <= 2) len = 45;
3428 else len = len / 3 * 3;
3430 Perl_warner(aTHX_ packWARN(WARN_PACK),
3431 "Field too wide in 'u' format in pack");
3434 aptr = SvPV(fromstr, fromlen);
3435 from_utf8 = DO_UTF8(fromstr);
3437 aend = aptr + fromlen;
3438 fromlen = sv_len_utf8(fromstr);
3439 } else aend = NULL; /* Unused, but keep compilers happy */
3440 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3441 while (fromlen > 0) {
3444 U8 hunk[1+63/3*4+1];
3446 if ((I32)fromlen > len)
3452 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3453 'u' | TYPE_IS_PACK)) {
3455 SvCUR(cat) = cur - start;
3456 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3458 end = doencodes(hunk, buffer, todo);
3460 end = doencodes(hunk, aptr, todo);
3463 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3470 SvCUR(cat) = cur - start;
3472 *symptr = lookahead;
3481 dSP; dMARK; dORIGMARK; dTARGET;
3482 register SV *cat = TARG;
3484 register char *pat = SvPVx(*++MARK, fromlen);
3485 register char *patend = pat + fromlen;
3488 sv_setpvn(cat, "", 0);
3491 packlist(cat, pat, patend, MARK, SP + 1);
3501 * c-indentation-style: bsd
3503 * indent-tabs-mode: t
3506 * vim: shiftwidth=4: