3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
35 # define PERL_PACK_CAN_BYTEORDER
36 # define PERL_PACK_CAN_SHRIEKSIGN
42 /* Maximum number of bytes to which a byte can grow due to upgrade */
46 * Offset for integer pack/unpack.
48 * On architectures where I16 and I32 aren't really 16 and 32 bits,
49 * which for now are all Crays, pack and unpack have to play games.
53 * These values are required for portability of pack() output.
54 * If they're not right on your machine, then pack() and unpack()
55 * wouldn't work right anyway; you'll need to apply the Cray hack.
56 * (I'd like to check them with #if, but you can't use sizeof() in
57 * the preprocessor.) --???
60 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
61 defines are now in config.h. --Andy Dougherty April 1998
66 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
69 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
70 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
71 # define OFF16(p) ((char*)(p))
72 # define OFF32(p) ((char*)(p))
74 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
75 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
76 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
78 ++++ bad cray byte order
82 # define OFF16(p) ((char *) (p))
83 # define OFF32(p) ((char *) (p))
86 /* Only to be used inside a loop (see the break) */
87 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
89 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
91 Copy(s, OFF16(p), SIZE16, char); \
96 /* Only to be used inside a loop (see the break) */
97 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
99 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
101 Copy(s, OFF32(p), SIZE32, char); \
106 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
107 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
109 /* Only to be used inside a loop (see the break) */
110 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
113 if (!uni_to_bytes(aTHX_ &s, strend, \
114 (char *) &var, sizeof(var), datumtype)) break;\
116 Copy(s, (char *) &var, sizeof(var), char); \
121 #define PUSH_VAR(utf8, aptr, var) \
122 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
124 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
125 #define MAX_SUB_TEMPLATE_LEVEL 100
127 /* flags (note that type modifiers can also be used as flags!) */
128 #define FLAG_WAS_UTF8 0x40
129 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
130 #define FLAG_UNPACK_ONLY_ONE 0x10
131 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
132 #define FLAG_SLASH 0x04
133 #define FLAG_COMMA 0x02
134 #define FLAG_PACK 0x01
137 S_mul128(pTHX_ SV *sv, U8 m)
140 char *s = SvPV(sv, len);
144 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
145 SV *tmpNew = newSVpvn("0000000000", 10);
147 sv_catsv(tmpNew, sv);
148 SvREFCNT_dec(sv); /* free old sv */
153 while (!*t) /* trailing '\0'? */
156 i = ((*t - '0') << 7) + m;
157 *(t--) = '0' + (char)(i % 10);
163 /* Explosives and implosives. */
165 #if 'I' == 73 && 'J' == 74
166 /* On an ASCII/ISO kind of system */
167 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
170 Some other sort of character set - use memchr() so we don't match
173 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
177 #define TYPE_IS_SHRIEKING 0x100
178 #define TYPE_IS_BIG_ENDIAN 0x200
179 #define TYPE_IS_LITTLE_ENDIAN 0x400
180 #define TYPE_IS_PACK 0x800
181 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
182 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
183 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
185 #ifdef PERL_PACK_CAN_SHRIEKSIGN
186 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
188 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
191 #ifndef PERL_PACK_CAN_BYTEORDER
192 /* Put "can't" first because it is shorter */
193 # define TYPE_ENDIANNESS(t) 0
194 # define TYPE_NO_ENDIANNESS(t) (t)
196 # define ENDIANNESS_ALLOWED_TYPES ""
198 # define DO_BO_UNPACK(var, type)
199 # define DO_BO_PACK(var, type)
200 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
201 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
202 # define DO_BO_UNPACK_N(var, type)
203 # define DO_BO_PACK_N(var, type)
204 # define DO_BO_UNPACK_P(var)
205 # define DO_BO_PACK_P(var)
207 #else /* PERL_PACK_CAN_BYTEORDER */
209 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
210 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
212 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
214 # define DO_BO_UNPACK(var, type) \
216 switch (TYPE_ENDIANNESS(datumtype)) { \
217 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
218 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
223 # define DO_BO_PACK(var, type) \
225 switch (TYPE_ENDIANNESS(datumtype)) { \
226 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
227 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
232 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
234 switch (TYPE_ENDIANNESS(datumtype)) { \
235 case TYPE_IS_BIG_ENDIAN: \
236 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
238 case TYPE_IS_LITTLE_ENDIAN: \
239 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
246 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
248 switch (TYPE_ENDIANNESS(datumtype)) { \
249 case TYPE_IS_BIG_ENDIAN: \
250 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
252 case TYPE_IS_LITTLE_ENDIAN: \
253 var = (post_cast *) my_htole ## type ((pre_cast) var); \
260 # define BO_CANT_DOIT(action, type) \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: \
264 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
265 "platform", #action, #type); \
267 case TYPE_IS_LITTLE_ENDIAN: \
268 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
269 "platform", #action, #type); \
276 # if PTRSIZE == INTSIZE
277 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
278 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
279 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
280 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
281 # elif PTRSIZE == LONGSIZE
282 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
283 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
284 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
285 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
287 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
288 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
291 # if defined(my_htolen) && defined(my_letohn) && \
292 defined(my_htoben) && defined(my_betohn)
293 # define DO_BO_UNPACK_N(var, type) \
295 switch (TYPE_ENDIANNESS(datumtype)) { \
296 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
297 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
302 # define DO_BO_PACK_N(var, type) \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
305 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
306 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
311 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
312 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
315 #endif /* PERL_PACK_CAN_BYTEORDER */
317 #define PACK_SIZE_CANNOT_CSUM 0x80
318 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
319 #define PACK_SIZE_MASK 0x3F
321 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
322 in). You're unlikely ever to need to regenerate them. */
324 #if TYPE_IS_SHRIEKING != 0x100
325 ++++shriek offset should be 256
328 typedef U8 packprops_t;
331 const packprops_t packprops[512] = {
333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
338 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
339 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
340 /* D */ LONG_DOUBLESIZE,
347 /* I */ sizeof(unsigned int),
354 #if defined(HAS_QUAD)
355 /* Q */ sizeof(Uquad_t),
362 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
364 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366 /* c */ sizeof(char),
367 /* d */ sizeof(double),
369 /* f */ sizeof(float),
378 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
379 #if defined(HAS_QUAD)
380 /* q */ sizeof(Quad_t),
388 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
393 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
394 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 0, 0, 0, 0, 0, 0, 0, 0,
399 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
400 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
401 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 0, 0, 0, 0, 0, 0, 0, 0, 0,
404 /* I */ sizeof(unsigned int),
406 /* L */ sizeof(unsigned long),
408 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
414 /* S */ sizeof(unsigned short),
416 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
421 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
425 /* l */ sizeof(long),
427 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
433 /* s */ sizeof(short),
435 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0
451 /* EBCDIC (or bust) */
452 const packprops_t packprops[512] = {
454 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
455 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 /* c */ sizeof(char),
464 /* d */ sizeof(double),
466 /* f */ sizeof(float),
476 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
477 #if defined(HAS_QUAD)
478 /* q */ sizeof(Quad_t),
482 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
490 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
491 /* D */ LONG_DOUBLESIZE,
498 /* I */ sizeof(unsigned int),
506 #if defined(HAS_QUAD)
507 /* Q */ sizeof(Uquad_t),
511 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
516 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 /* l */ sizeof(long),
533 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
539 /* s */ sizeof(short),
541 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
549 /* I */ sizeof(unsigned int),
550 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 /* L */ sizeof(unsigned long),
553 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 /* S */ sizeof(unsigned short),
561 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
572 uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
576 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
577 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
578 /* We try to process malformed UTF-8 as much as possible (preferrably with
579 warnings), but these two mean we make no progress in the string and
580 might enter an infinite loop */
581 if (retlen == (STRLEN) -1 || retlen == 0)
582 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
583 (int) TYPE_NO_MODIFIERS(datumtype));
585 if (ckWARN(WARN_UNPACK))
586 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
587 "Character in '%c' format wrapped in unpack",
588 (int) TYPE_NO_MODIFIERS(datumtype));
595 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
596 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
600 uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
606 U32 flags = ckWARN(WARN_UTF8) ?
607 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
608 for (;buf_len > 0; buf_len--) {
609 if (from >= end) return FALSE;
610 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
611 if (retlen == (STRLEN) -1 || retlen == 0) {
612 from += UTF8SKIP(from);
614 } else from += retlen;
619 *(U8 *)buf++ = (U8)val;
621 /* We have enough characters for the buffer. Did we have problems ? */
624 /* Rewalk the string fragment while warning */
626 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
627 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
628 if (ptr >= end) break;
629 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
631 if (from > end) from = end;
633 if ((bad & 2) && ckWARN(WARN_UNPACK))
634 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
635 WARN_PACK : WARN_UNPACK),
636 "Character(s) in '%c' format wrapped in %s",
637 (int) TYPE_NO_MODIFIERS(datumtype),
638 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
645 next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
649 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
650 if (val >= 0x100 || !ISUUCHAR(val) ||
651 retlen == (STRLEN) -1 || retlen == 0) {
655 *out = PL_uudmap[val] & 077;
661 bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
662 U8 buffer[UTF8_MAXLEN];
663 U8 *end = start + len;
665 while (start < end) {
667 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
677 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
685 #define PUSH_BYTES(utf8, cur, buf, len) \
687 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
689 Copy(buf, cur, len, char); \
694 #define GROWING(utf8, cat, start, cur, in_len) \
696 STRLEN glen = (in_len); \
697 if (utf8) glen *= UTF8_EXPAND; \
698 if ((cur) + glen >= (start) + SvLEN(cat)) { \
699 (start) = sv_exp_grow(aTHX_ cat, glen); \
700 (cur) = (start) + SvCUR(cat); \
704 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
706 STRLEN glen = (in_len); \
708 if (utf8) gl *= UTF8_EXPAND; \
709 if ((cur) + gl >= (start) + SvLEN(cat)) { \
711 SvCUR_set((cat), (cur) - (start)); \
712 (start) = sv_exp_grow(aTHX_ cat, gl); \
713 (cur) = (start) + SvCUR(cat); \
715 PUSH_BYTES(utf8, cur, buf, glen); \
718 #define PUSH_BYTE(utf8, s, byte) \
722 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
723 } else *(U8 *)(s)++ = (byte); \
726 /* Only to be used inside a loop (see the break) */
727 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
730 if (str >= end) break; \
731 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
732 if (retlen == (STRLEN) -1 || retlen == 0) { \
734 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
739 /* Returns the sizeof() struct described by pat */
741 S_measure_struct(pTHX_ tempsym_t* symptr)
745 while (next_symbol(symptr)) {
749 switch (symptr->howlen) {
751 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
752 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
755 /* e_no_len and e_number */
756 len = symptr->length;
760 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
762 /* endianness doesn't influence the size of a type */
763 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
765 Perl_croak(aTHX_ "Invalid type '%c' in %s",
766 (int)TYPE_NO_MODIFIERS(symptr->code),
767 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
768 #ifdef PERL_PACK_CAN_SHRIEKSIGN
769 case '.' | TYPE_IS_SHRIEKING:
770 case '@' | TYPE_IS_SHRIEKING:
775 case 'U': /* XXXX Is it correct? */
778 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
779 (int) TYPE_NO_MODIFIERS(symptr->code),
780 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
786 tempsym_t savsym = *symptr;
787 symptr->patptr = savsym.grpbeg;
788 symptr->patend = savsym.grpend;
789 /* XXXX Theoretically, we need to measure many times at
790 different positions, since the subexpression may contain
791 alignment commands, but be not of aligned length.
792 Need to detect this and croak(). */
793 size = measure_struct(symptr);
797 case 'X' | TYPE_IS_SHRIEKING:
798 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
800 if (!len) /* Avoid division by 0 */
802 len = total % len; /* Assumed: the start is aligned. */
807 Perl_croak(aTHX_ "'X' outside of string in %s",
808 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
810 case 'x' | TYPE_IS_SHRIEKING:
811 if (!len) /* Avoid division by 0 */
813 star = total % len; /* Assumed: the start is aligned. */
814 if (star) /* Other portable ways? */
838 size = sizeof(char*);
848 /* locate matching closing parenthesis or bracket
849 * returns char pointer to char after match, or NULL
852 S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
854 while (patptr < patend) {
862 while (patptr < patend && *patptr != '\n')
866 patptr = group_end(patptr, patend, ')') + 1;
868 patptr = group_end(patptr, patend, ']') + 1;
870 Perl_croak(aTHX_ "No group ending character '%c' found in template",
876 /* Convert unsigned decimal number to binary.
877 * Expects a pointer to the first digit and address of length variable
878 * Advances char pointer to 1st non-digit char and returns number
881 S_get_num(pTHX_ register char *patptr, I32 *lenptr )
883 I32 len = *patptr++ - '0';
884 while (isDIGIT(*patptr)) {
885 if (len >= 0x7FFFFFFF/10)
886 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
887 len = (len * 10) + (*patptr++ - '0');
893 /* The marvellous template parsing routine: Using state stored in *symptr,
894 * locates next template code and count
897 S_next_symbol(pTHX_ tempsym_t* symptr )
899 char* patptr = symptr->patptr;
900 char* patend = symptr->patend;
901 const char *allowed = "";
903 symptr->flags &= ~FLAG_SLASH;
905 while (patptr < patend) {
906 if (isSPACE(*patptr))
908 else if (*patptr == '#') {
910 while (patptr < patend && *patptr != '\n')
915 /* We should have found a template code */
916 I32 code = *patptr++ & 0xFF;
917 U32 inherited_modifiers = 0;
919 if (code == ','){ /* grandfather in commas but with a warning */
920 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
921 symptr->flags |= FLAG_COMMA;
922 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
923 "Invalid type ',' in %s",
924 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
929 /* for '(', skip to ')' */
931 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
932 Perl_croak(aTHX_ "()-group starts with a count in %s",
933 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
934 symptr->grpbeg = patptr;
935 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
936 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
937 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
938 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
941 /* look for group modifiers to inherit */
942 if (TYPE_ENDIANNESS(symptr->flags)) {
943 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
944 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
947 /* look for modifiers */
948 while (patptr < patend) {
952 modifier = TYPE_IS_SHRIEKING;
953 allowed = SHRIEKING_ALLOWED_TYPES;
955 #ifdef PERL_PACK_CAN_BYTEORDER
957 modifier = TYPE_IS_BIG_ENDIAN;
958 allowed = ENDIANNESS_ALLOWED_TYPES;
961 modifier = TYPE_IS_LITTLE_ENDIAN;
962 allowed = ENDIANNESS_ALLOWED_TYPES;
964 #endif /* PERL_PACK_CAN_BYTEORDER */
972 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
973 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
974 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
976 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
977 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
978 (int) TYPE_NO_MODIFIERS(code),
979 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
980 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
981 TYPE_ENDIANNESS_MASK)
982 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
983 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
985 if (ckWARN(WARN_UNPACK)) {
987 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
988 "Duplicate modifier '%c' after '%c' in %s",
989 *patptr, (int) TYPE_NO_MODIFIERS(code),
990 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
997 /* inherit modifiers */
998 code |= inherited_modifiers;
1000 /* look for count and/or / */
1001 if (patptr < patend) {
1002 if (isDIGIT(*patptr)) {
1003 patptr = get_num( patptr, &symptr->length );
1004 symptr->howlen = e_number;
1006 } else if (*patptr == '*') {
1008 symptr->howlen = e_star;
1010 } else if (*patptr == '[') {
1011 char* lenptr = ++patptr;
1012 symptr->howlen = e_number;
1013 patptr = group_end( patptr, patend, ']' ) + 1;
1014 /* what kind of [] is it? */
1015 if (isDIGIT(*lenptr)) {
1016 lenptr = get_num( lenptr, &symptr->length );
1017 if( *lenptr != ']' )
1018 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1019 symptr->flags & FLAG_PACK ? "pack" : "unpack");
1021 tempsym_t savsym = *symptr;
1022 symptr->patend = patptr-1;
1023 symptr->patptr = lenptr;
1024 savsym.length = measure_struct(symptr);
1028 symptr->howlen = e_no_len;
1033 while (patptr < patend) {
1034 if (isSPACE(*patptr))
1036 else if (*patptr == '#') {
1038 while (patptr < patend && *patptr != '\n')
1040 if (patptr < patend)
1043 if (*patptr == '/') {
1044 symptr->flags |= FLAG_SLASH;
1046 if (patptr < patend &&
1047 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1048 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1049 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
1055 /* at end - no count, no / */
1056 symptr->howlen = e_no_len;
1060 symptr->code = code;
1061 symptr->patptr = patptr;
1065 symptr->patptr = patptr;
1070 There is no way to cleanly handle the case where we should process the
1071 string per byte in its upgraded form while it's really in downgraded form
1072 (e.g. estimates like strend-s as an upper bound for the number of
1073 characters left wouldn't work). So if we foresee the need of this
1074 (pattern starts with U or contains U0), we want to work on the encoded
1075 version of the string. Users are advised to upgrade their pack string
1076 themselves if they need to do a lot of unpacks like this on it
1079 need_utf8(const char *pat, const char *patend)
1082 while (pat < patend) {
1083 if (pat[0] == '#') {
1085 pat = (char *) memchr(pat, '\n', patend-pat);
1086 if (!pat) return FALSE;
1087 } else if (pat[0] == 'U') {
1088 if (first || pat[1] == '0') return TRUE;
1089 } else first = FALSE;
1096 first_symbol(const char *pat, const char *patend) {
1097 while (pat < patend) {
1098 if (pat[0] != '#') return pat[0];
1100 pat = (char *) memchr(pat, '\n', patend-pat);
1108 =for apidoc unpack_str
1110 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1111 and ocnt are not used. This call should not be used, use unpackstring instead.
1116 Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
1118 tempsym_t sym = { 0 };
1120 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1121 else if (need_utf8(pat, patend)) {
1122 /* We probably should try to avoid this in case a scalar context call
1123 wouldn't get to the "U0" */
1124 STRLEN len = strend - s;
1125 s = (char *) bytes_to_utf8((U8 *) s, &len);
1128 flags |= FLAG_DO_UTF8;
1131 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1132 flags |= FLAG_PARSE_UTF8;
1135 sym.patend = patend;
1138 return unpack_rec(&sym, s, s, strend, NULL );
1142 =for apidoc unpackstring
1144 The engine implementing unpack() Perl function. C<unpackstring> puts the
1145 extracted list items on the stack and returns the number of elements.
1146 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1151 Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
1153 tempsym_t sym = { 0 };
1155 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1156 else if (need_utf8(pat, patend)) {
1157 /* We probably should try to avoid this in case a scalar context call
1158 wouldn't get to the "U0" */
1159 STRLEN len = strend - s;
1160 s = (char *) bytes_to_utf8((U8 *) s, &len);
1163 flags |= FLAG_DO_UTF8;
1166 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1167 flags |= FLAG_PARSE_UTF8;
1170 sym.patend = patend;
1173 return unpack_rec(&sym, s, s, strend, NULL );
1178 S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
1182 I32 start_sp_offset = SP - PL_stack_base;
1188 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1189 bool beyond = FALSE;
1190 bool explicit_length;
1191 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1192 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1193 symptr->strbeg = s - strbeg;
1195 while (next_symbol(symptr)) {
1198 I32 datumtype = symptr->code;
1199 /* do first one only unless in list context
1200 / is implemented by unpacking the count, then popping it from the
1201 stack, so must check that we're not in the middle of a / */
1202 if ( unpack_only_one
1203 && (SP - PL_stack_base == start_sp_offset + 1)
1204 && (datumtype != '/') ) /* XXX can this be omitted */
1207 switch (howlen = symptr->howlen) {
1209 len = strend - strbeg; /* long enough */
1212 /* e_no_len and e_number */
1213 len = symptr->length;
1217 explicit_length = TRUE;
1219 beyond = s >= strend;
1221 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1223 /* props nonzero means we can process this letter. */
1224 long size = props & PACK_SIZE_MASK;
1225 long howmany = (strend - s) / size;
1229 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1230 if (len && unpack_only_one) len = 1;
1236 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1238 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1241 if (howlen == e_no_len)
1242 len = 16; /* len is not specified */
1250 tempsym_t savsym = *symptr;
1251 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1252 symptr->flags |= group_modifiers;
1253 symptr->patend = savsym.grpend;
1254 symptr->previous = &savsym;
1258 symptr->patptr = savsym.grpbeg;
1259 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1260 else symptr->flags &= ~FLAG_PARSE_UTF8;
1261 unpack_rec(symptr, s, strbeg, strend, &s);
1262 if (s == strend && savsym.howlen == e_star)
1263 break; /* No way to continue */
1266 savsym.flags = symptr->flags & ~group_modifiers;
1270 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1271 case '.' | TYPE_IS_SHRIEKING:
1276 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1277 bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1278 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1281 if (howlen == e_star) from = strbeg;
1282 else if (len <= 0) from = s;
1284 tempsym_t *group = symptr;
1286 while (--len && group) group = group->previous;
1287 from = group ? strbeg + group->strbeg : strbeg;
1290 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1291 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1292 XPUSHs(sv_2mortal(sv));
1295 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1296 case '@' | TYPE_IS_SHRIEKING:
1299 s = strbeg + symptr->strbeg;
1300 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1301 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1302 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1308 Perl_croak(aTHX_ "'@' outside of string in unpack");
1313 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1316 Perl_croak(aTHX_ "'@' outside of string in unpack");
1320 case 'X' | TYPE_IS_SHRIEKING:
1321 if (!len) /* Avoid division by 0 */
1326 hop = last = strbeg;
1328 hop += UTF8SKIP(hop);
1335 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1339 len = (s - strbeg) % len;
1345 Perl_croak(aTHX_ "'X' outside of string in unpack");
1346 while (--s, UTF8_IS_CONTINUATION(*s)) {
1348 Perl_croak(aTHX_ "'X' outside of string in unpack");
1353 if (len > s - strbeg)
1354 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1358 case 'x' | TYPE_IS_SHRIEKING:
1359 if (!len) /* Avoid division by 0 */
1361 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1362 else ai32 = (s - strbeg) % len;
1363 if (ai32 == 0) break;
1370 Perl_croak(aTHX_ "'x' outside of string in unpack");
1375 if (len > strend - s)
1376 Perl_croak(aTHX_ "'x' outside of string in unpack");
1381 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1387 /* Preliminary length estimate is assumed done in 'W' */
1388 if (len > strend - s) len = strend - s;
1394 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1395 if (hop >= strend) {
1397 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1402 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1404 } else if (len > strend - s)
1407 if (datumtype == 'Z') {
1408 /* 'Z' strips stuff after first null */
1411 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1412 sv = newSVpvn(s, ptr-s);
1413 if (howlen == e_star) /* exact for 'Z*' */
1414 len = ptr-s + (ptr != strend ? 1 : 0);
1415 } else if (datumtype == 'A') {
1416 /* 'A' strips both nulls and spaces */
1418 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1419 for (ptr = s+len-1; ptr >= s; ptr--)
1420 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1421 !is_utf8_space((U8 *) ptr)) break;
1422 if (ptr >= s) ptr += UTF8SKIP(ptr);
1425 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1427 for (ptr = s+len-1; ptr >= s; ptr--)
1428 if (*ptr != 0 && !isSPACE(*ptr)) break;
1431 sv = newSVpvn(s, ptr-s);
1432 } else sv = newSVpvn(s, len);
1436 /* Undo any upgrade done due to need_utf8() */
1437 if (!(symptr->flags & FLAG_WAS_UTF8))
1438 sv_utf8_downgrade(sv, 0);
1440 XPUSHs(sv_2mortal(sv));
1446 if (howlen == e_star || len > (strend - s) * 8)
1447 len = (strend - s) * 8;
1451 Newz(601, PL_bitcount, 256, char);
1452 for (bits = 1; bits < 256; bits++) {
1453 if (bits & 1) PL_bitcount[bits]++;
1454 if (bits & 2) PL_bitcount[bits]++;
1455 if (bits & 4) PL_bitcount[bits]++;
1456 if (bits & 8) PL_bitcount[bits]++;
1457 if (bits & 16) PL_bitcount[bits]++;
1458 if (bits & 32) PL_bitcount[bits]++;
1459 if (bits & 64) PL_bitcount[bits]++;
1460 if (bits & 128) PL_bitcount[bits]++;
1464 while (len >= 8 && s < strend) {
1465 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1470 cuv += PL_bitcount[*(U8 *)s++];
1473 if (len && s < strend) {
1475 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1476 if (datumtype == 'b')
1478 if (bits & 1) cuv++;
1483 if (bits & 0x80) cuv++;
1490 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1493 if (datumtype == 'b') {
1496 for (len = 0; len < ai32; len++) {
1497 if (len & 7) bits >>= 1;
1499 if (s >= strend) break;
1500 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1501 } else bits = *(U8 *) s++;
1502 *str++ = bits & 1 ? '1' : '0';
1507 for (len = 0; len < ai32; len++) {
1508 if (len & 7) bits <<= 1;
1510 if (s >= strend) break;
1511 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1512 } else bits = *(U8 *) s++;
1513 *str++ = bits & 0x80 ? '1' : '0';
1517 SvCUR_set(sv, str - SvPVX(sv));
1524 /* Preliminary length estimate, acceptable for utf8 too */
1525 if (howlen == e_star || len > (strend - s) * 2)
1526 len = (strend - s) * 2;
1527 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1530 if (datumtype == 'h') {
1533 for (len = 0; len < ai32; len++) {
1534 if (len & 1) bits >>= 4;
1536 if (s >= strend) break;
1537 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1538 } else bits = * (U8 *) s++;
1539 *str++ = PL_hexdigit[bits & 15];
1544 for (len = 0; len < ai32; len++) {
1545 if (len & 1) bits <<= 4;
1547 if (s >= strend) break;
1548 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1549 } else bits = *(U8 *) s++;
1550 *str++ = PL_hexdigit[(bits >> 4) & 15];
1554 SvCUR_set(sv, str - SvPVX(sv));
1560 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1561 if (aint >= 128) /* fake up signed chars */
1564 PUSHs(sv_2mortal(newSViv((IV)aint)));
1565 else if (checksum > bits_in_uv)
1566 cdouble += (NV)aint;
1575 if (explicit_length && datumtype == 'C')
1576 /* Switch to "character" mode */
1577 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1580 if (datumtype == 'C' ?
1581 (symptr->flags & FLAG_DO_UTF8) &&
1582 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1583 while (len-- > 0 && s < strend) {
1586 val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1587 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1588 if (retlen == (STRLEN) -1 || retlen == 0)
1589 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1592 PUSHs(sv_2mortal(newSVuv((UV) val)));
1593 else if (checksum > bits_in_uv)
1594 cdouble += (NV) val;
1598 } else if (!checksum)
1600 U8 ch = *(U8 *) s++;
1601 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1603 else if (checksum > bits_in_uv)
1604 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1606 while (len-- > 0) cuv += *(U8 *) s++;
1610 if (explicit_length) {
1611 /* Switch to "bytes in UTF-8" mode */
1612 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1614 /* Should be impossible due to the need_utf8() test */
1615 Perl_croak(aTHX_ "U0 mode on a byte string");
1619 if (len > strend - s) len = strend - s;
1621 if (len && unpack_only_one) len = 1;
1625 while (len-- > 0 && s < strend) {
1629 U8 result[UTF8_MAXLEN];
1633 /* Bug: warns about bad utf8 even if we are short on bytes
1634 and will break out of the loop */
1635 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1638 len = UTF8SKIP(result);
1639 if (!uni_to_bytes(aTHX_ &ptr, strend,
1640 (char *) &result[1], len-1, 'U')) break;
1641 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1644 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1645 if (retlen == (STRLEN) -1 || retlen == 0)
1646 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1650 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1651 else if (checksum > bits_in_uv)
1652 cdouble += (NV) auv;
1657 case 's' | TYPE_IS_SHRIEKING:
1658 #if SHORTSIZE != SIZE16
1661 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1662 DO_BO_UNPACK(ashort, s);
1664 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1665 else if (checksum > bits_in_uv)
1666 cdouble += (NV)ashort;
1678 #if U16SIZE > SIZE16
1681 SHIFT16(utf8, s, strend, &ai16, datumtype);
1682 DO_BO_UNPACK(ai16, 16);
1683 #if U16SIZE > SIZE16
1688 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1689 else if (checksum > bits_in_uv)
1690 cdouble += (NV)ai16;
1695 case 'S' | TYPE_IS_SHRIEKING:
1696 #if SHORTSIZE != SIZE16
1698 unsigned short aushort;
1699 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1700 DO_BO_UNPACK(aushort, s);
1702 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1703 else if (checksum > bits_in_uv)
1704 cdouble += (NV)aushort;
1717 #if U16SIZE > SIZE16
1720 SHIFT16(utf8, s, strend, &au16, datumtype);
1721 DO_BO_UNPACK(au16, 16);
1723 if (datumtype == 'n')
1724 au16 = PerlSock_ntohs(au16);
1727 if (datumtype == 'v')
1731 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1732 else if (checksum > bits_in_uv)
1733 cdouble += (NV) au16;
1738 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1739 case 'v' | TYPE_IS_SHRIEKING:
1740 case 'n' | TYPE_IS_SHRIEKING:
1743 # if U16SIZE > SIZE16
1746 SHIFT16(utf8, s, strend, &ai16, datumtype);
1748 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1749 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1750 # endif /* HAS_NTOHS */
1752 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1753 ai16 = (I16) vtohs((U16) ai16);
1754 # endif /* HAS_VTOHS */
1756 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1757 else if (checksum > bits_in_uv)
1758 cdouble += (NV) ai16;
1763 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1765 case 'i' | TYPE_IS_SHRIEKING:
1768 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1769 DO_BO_UNPACK(aint, i);
1771 PUSHs(sv_2mortal(newSViv((IV)aint)));
1772 else if (checksum > bits_in_uv)
1773 cdouble += (NV)aint;
1779 case 'I' | TYPE_IS_SHRIEKING:
1782 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1783 DO_BO_UNPACK(auint, i);
1785 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1786 else if (checksum > bits_in_uv)
1787 cdouble += (NV)auint;
1795 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1796 #if IVSIZE == INTSIZE
1797 DO_BO_UNPACK(aiv, i);
1798 #elif IVSIZE == LONGSIZE
1799 DO_BO_UNPACK(aiv, l);
1800 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1801 DO_BO_UNPACK(aiv, 64);
1803 Perl_croak(aTHX_ "'j' not supported on this platform");
1806 PUSHs(sv_2mortal(newSViv(aiv)));
1807 else if (checksum > bits_in_uv)
1816 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1817 #if IVSIZE == INTSIZE
1818 DO_BO_UNPACK(auv, i);
1819 #elif IVSIZE == LONGSIZE
1820 DO_BO_UNPACK(auv, l);
1821 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1822 DO_BO_UNPACK(auv, 64);
1824 Perl_croak(aTHX_ "'J' not supported on this platform");
1827 PUSHs(sv_2mortal(newSVuv(auv)));
1828 else if (checksum > bits_in_uv)
1834 case 'l' | TYPE_IS_SHRIEKING:
1835 #if LONGSIZE != SIZE32
1838 SHIFT_VAR(utf8, s, strend, along, datumtype);
1839 DO_BO_UNPACK(along, l);
1841 PUSHs(sv_2mortal(newSViv((IV)along)));
1842 else if (checksum > bits_in_uv)
1843 cdouble += (NV)along;
1854 #if U32SIZE > SIZE32
1857 SHIFT32(utf8, s, strend, &ai32, datumtype);
1858 DO_BO_UNPACK(ai32, 32);
1859 #if U32SIZE > SIZE32
1860 if (ai32 > 2147483647) ai32 -= 4294967296;
1863 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1864 else if (checksum > bits_in_uv)
1865 cdouble += (NV)ai32;
1870 case 'L' | TYPE_IS_SHRIEKING:
1871 #if LONGSIZE != SIZE32
1873 unsigned long aulong;
1874 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1875 DO_BO_UNPACK(aulong, l);
1877 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1878 else if (checksum > bits_in_uv)
1879 cdouble += (NV)aulong;
1892 #if U32SIZE > SIZE32
1895 SHIFT32(utf8, s, strend, &au32, datumtype);
1896 DO_BO_UNPACK(au32, 32);
1898 if (datumtype == 'N')
1899 au32 = PerlSock_ntohl(au32);
1902 if (datumtype == 'V')
1906 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1907 else if (checksum > bits_in_uv)
1908 cdouble += (NV)au32;
1913 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1914 case 'V' | TYPE_IS_SHRIEKING:
1915 case 'N' | TYPE_IS_SHRIEKING:
1918 # if U32SIZE > SIZE32
1921 SHIFT32(utf8, s, strend, &ai32, datumtype);
1923 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1924 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1927 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1928 ai32 = (I32)vtohl((U32)ai32);
1931 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1932 else if (checksum > bits_in_uv)
1933 cdouble += (NV)ai32;
1938 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1942 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1943 DO_BO_UNPACK_PC(aptr);
1944 /* newSVpv generates undef if aptr is NULL */
1945 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1953 while (len > 0 && s < strend) {
1955 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1956 auv = (auv << 7) | (ch & 0x7f);
1957 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1960 PUSHs(sv_2mortal(newSVuv(auv)));
1965 if (++bytes >= sizeof(UV)) { /* promote to string */
1969 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1970 while (s < strend) {
1971 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1972 sv = mul128(sv, (U8)(ch & 0x7f));
1982 PUSHs(sv_2mortal(sv));
1987 if ((s >= strend) && bytes)
1988 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1992 if (symptr->howlen == e_star)
1993 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1995 if (sizeof(char*) <= strend - s) {
1997 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1998 DO_BO_UNPACK_PC(aptr);
1999 /* newSVpvn generates undef if aptr is NULL */
2000 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2007 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2008 DO_BO_UNPACK(aquad, 64);
2010 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2011 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2012 else if (checksum > bits_in_uv)
2013 cdouble += (NV)aquad;
2021 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2022 DO_BO_UNPACK(auquad, 64);
2024 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2025 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2026 else if (checksum > bits_in_uv)
2027 cdouble += (NV)auquad;
2032 #endif /* HAS_QUAD */
2033 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2037 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2038 DO_BO_UNPACK_N(afloat, float);
2040 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2048 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2049 DO_BO_UNPACK_N(adouble, double);
2051 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2059 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2060 DO_BO_UNPACK_N(anv, NV);
2062 PUSHs(sv_2mortal(newSVnv(anv)));
2067 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2070 long double aldouble;
2071 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2072 DO_BO_UNPACK_N(aldouble, long double);
2074 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2076 cdouble += aldouble;
2082 * Initialise the decode mapping. By using a table driven
2083 * algorithm, the code will be character-set independent
2084 * (and just as fast as doing character arithmetic)
2086 if (PL_uudmap['M'] == 0) {
2089 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2090 PL_uudmap[(U8)PL_uuemap[i]] = i;
2092 * Because ' ' and '`' map to the same value,
2093 * we need to decode them both the same.
2098 STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2099 sv = sv_2mortal(NEWSV(42, l));
2100 if (l) SvPOK_on(sv);
2103 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2109 next_uni_uu(aTHX_ &s, strend, &a);
2110 next_uni_uu(aTHX_ &s, strend, &b);
2111 next_uni_uu(aTHX_ &s, strend, &c);
2112 next_uni_uu(aTHX_ &s, strend, &d);
2113 hunk[0] = (char)((a << 2) | (b >> 4));
2114 hunk[1] = (char)((b << 4) | (c >> 2));
2115 hunk[2] = (char)((c << 6) | d);
2116 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2120 if (*s == '\n') s++;
2122 /* possible checksum byte */
2123 char *skip = s+UTF8SKIP(s);
2124 if (skip < strend && *skip == '\n') s = skip+1;
2129 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2134 len = PL_uudmap[*(U8*)s++] & 077;
2136 if (s < strend && ISUUCHAR(*s))
2137 a = PL_uudmap[*(U8*)s++] & 077;
2140 if (s < strend && ISUUCHAR(*s))
2141 b = PL_uudmap[*(U8*)s++] & 077;
2144 if (s < strend && ISUUCHAR(*s))
2145 c = PL_uudmap[*(U8*)s++] & 077;
2148 if (s < strend && ISUUCHAR(*s))
2149 d = PL_uudmap[*(U8*)s++] & 077;
2152 hunk[0] = (char)((a << 2) | (b >> 4));
2153 hunk[1] = (char)((b << 4) | (c >> 2));
2154 hunk[2] = (char)((c << 6) | d);
2155 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2160 else /* possible checksum byte */
2161 if (s + 1 < strend && s[1] == '\n')
2170 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2171 (checksum > bits_in_uv &&
2172 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2175 anv = (NV) (1 << (checksum & 15));
2176 while (checksum >= 16) {
2180 while (cdouble < 0.0)
2182 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2183 sv = newSVnv(cdouble);
2186 if (checksum < bits_in_uv) {
2187 UV mask = ((UV)1 << checksum) - 1;
2192 XPUSHs(sv_2mortal(sv));
2196 if (symptr->flags & FLAG_SLASH){
2197 if (SP - PL_stack_base - start_sp_offset <= 0)
2198 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2199 if( next_symbol(symptr) ){
2200 if( symptr->howlen == e_number )
2201 Perl_croak(aTHX_ "Count after length/code in unpack" );
2203 /* ...end of char buffer then no decent length available */
2204 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2206 /* take top of stack (hope it's numeric) */
2209 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2212 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2214 datumtype = symptr->code;
2215 explicit_length = FALSE;
2223 return SP - PL_stack_base - start_sp_offset;
2230 I32 gimme = GIMME_V;
2233 char *pat = SvPV(left, llen);
2234 char *s = SvPV(right, rlen);
2235 char *strend = s + rlen;
2236 char *patend = pat + llen;
2240 cnt = unpackstring(pat, patend, s, strend,
2241 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2242 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2245 if ( !cnt && gimme == G_SCALAR )
2246 PUSHs(&PL_sv_undef);
2251 doencodes(U8 *h, char *s, I32 len)
2253 *h++ = PL_uuemap[len];
2255 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2256 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2257 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2258 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2263 char r = (len > 1 ? s[1] : '\0');
2264 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2265 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2266 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2267 *h++ = PL_uuemap[0];
2274 S_is_an_int(pTHX_ char *s, STRLEN l)
2277 SV *result = newSVpvn(s, l);
2278 char *result_c = SvPV(result, n_a); /* convenience */
2279 char *out = result_c;
2289 SvREFCNT_dec(result);
2312 SvREFCNT_dec(result);
2318 SvCUR_set(result, out - result_c);
2322 /* pnum must be '\0' terminated */
2324 S_div128(pTHX_ SV *pnum, bool *done)
2327 char *s = SvPV(pnum, len);
2336 i = m * 10 + (*t - '0');
2338 r = (i >> 7); /* r < 10 */
2345 SvCUR_set(pnum, (STRLEN) (t - s));
2352 =for apidoc pack_cat
2354 The engine implementing pack() Perl function. Note: parameters next_in_list and
2355 flags are not used. This call should not be used; use packlist instead.
2361 Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2363 tempsym_t sym = { 0 };
2365 sym.patend = patend;
2366 sym.flags = FLAG_PACK;
2368 (void)pack_rec( cat, &sym, beglist, endlist );
2373 =for apidoc packlist
2375 The engine implementing pack() Perl function.
2381 Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
2384 tempsym_t sym = { 0 };
2387 sym.patend = patend;
2388 sym.flags = FLAG_PACK;
2390 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2391 Also make sure any UTF8 flag is loaded */
2392 SvPV_force(cat, no_len);
2393 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2395 (void)pack_rec( cat, &sym, beglist, endlist );
2398 /* like sv_utf8_upgrade, but also repoint the group start markers */
2400 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2403 char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
2405 if (SvUTF8(sv)) return;
2407 from_start = SvPVX(sv);
2408 from_end = from_start + SvCUR(sv);
2409 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2410 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2411 if (from_ptr == from_end) {
2412 /* Simple case: no character needs to be changed */
2417 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2418 New('U', to_start, len, char);
2419 Copy(from_start, to_start, from_ptr-from_start, char);
2420 to_ptr = to_start + (from_ptr-from_start);
2422 New('U', marks, sym_ptr->level+2, char *);
2423 for (group=sym_ptr; group; group = group->previous)
2424 marks[group->level] = from_start + group->strbeg;
2425 marks[sym_ptr->level+1] = from_end+1;
2426 for (m = marks; *m < from_ptr; m++)
2427 *m = to_start + (*m-from_start);
2429 for (;from_ptr < from_end; from_ptr++) {
2430 while (*m == from_ptr) *m++ = to_ptr;
2431 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2435 while (*m == from_ptr) *m++ = to_ptr;
2436 if (m != marks + sym_ptr->level+1) {
2439 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2441 for (group=sym_ptr; group; group = group->previous)
2442 group->strbeg = marks[group->level] - to_start;
2447 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2448 from_start -= SvIVX(sv);
2451 SvFLAGS(sv) &= ~SVf_OOK;
2454 Safefree(from_start);
2455 SvPV_set(sv, to_start);
2456 SvCUR_set(sv, to_ptr - to_start);
2461 /* Exponential string grower. Makes string extension effectively O(n)
2462 needed says how many extra bytes we need (not counting the final '\0')
2463 Only grows the string if there is an actual lack of space
2466 sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2467 STRLEN cur = SvCUR(sv);
2468 STRLEN len = SvLEN(sv);
2470 if (len - cur > needed) return SvPVX(sv);
2471 extend = needed > len ? needed : len;
2472 return SvGROW(sv, len+extend+1);
2477 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2479 tempsym_t lookahead;
2480 I32 items = endlist - beglist;
2481 bool found = next_symbol(symptr);
2482 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2484 if (symptr->level == 0 && found && symptr->code == 'U') {
2485 marked_upgrade(aTHX_ cat, symptr);
2486 symptr->flags |= FLAG_DO_UTF8;
2489 symptr->strbeg = SvCUR(cat);
2495 SV *lengthcode = Nullsv;
2496 I32 datumtype = symptr->code;
2497 howlen_t howlen = symptr->howlen;
2498 char *start = SvPVX(cat);
2499 char *cur = start + SvCUR(cat);
2501 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2505 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2509 /* e_no_len and e_number */
2510 len = symptr->length;
2515 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2517 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2518 /* We can process this letter. */
2519 STRLEN size = props & PACK_SIZE_MASK;
2520 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2524 /* Look ahead for next symbol. Do we have code/code? */
2525 lookahead = *symptr;
2526 found = next_symbol(&lookahead);
2527 if (symptr->flags & FLAG_SLASH) {
2529 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2530 if (strchr("aAZ", lookahead.code)) {
2531 if (lookahead.howlen == e_number) count = lookahead.length;
2534 count = DO_UTF8(*beglist) ?
2535 sv_len_utf8(*beglist) : sv_len(*beglist);
2537 if (lookahead.code == 'Z') count++;
2540 if (lookahead.howlen == e_number && lookahead.length < items)
2541 count = lookahead.length;
2544 lookahead.howlen = e_number;
2545 lookahead.length = count;
2546 lengthcode = sv_2mortal(newSViv(count));
2549 /* Code inside the switch must take care to properly update
2550 cat (CUR length and '\0' termination) if it updated *cur and
2551 doesn't simply leave using break */
2552 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2554 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2555 (int) TYPE_NO_MODIFIERS(datumtype));
2557 Perl_croak(aTHX_ "'%%' may not be used in pack");
2560 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2561 case '.' | TYPE_IS_SHRIEKING:
2564 if (howlen == e_star) from = start;
2565 else if (len == 0) from = cur;
2567 tempsym_t *group = symptr;
2569 while (--len && group) group = group->previous;
2570 from = group ? start + group->strbeg : start;
2573 len = SvIV(fromstr);
2575 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2576 case '@' | TYPE_IS_SHRIEKING:
2579 from = start + symptr->strbeg;
2581 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2582 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2583 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2587 while (len && from < cur) {
2588 from += UTF8SKIP(from);
2592 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2594 /* Here we know from == cur */
2596 GROWING(0, cat, start, cur, len);
2597 Zero(cur, len, char);
2599 } else if (from < cur) {
2602 } else goto no_change;
2610 if (len > 0) goto grow;
2611 if (len == 0) goto no_change;
2618 tempsym_t savsym = *symptr;
2619 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2620 symptr->flags |= group_modifiers;
2621 symptr->patend = savsym.grpend;
2623 symptr->previous = &lookahead;
2626 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2627 else symptr->flags &= ~FLAG_PARSE_UTF8;
2628 was_utf8 = SvUTF8(cat);
2629 symptr->patptr = savsym.grpbeg;
2630 beglist = pack_rec(cat, symptr, beglist, endlist);
2631 if (SvUTF8(cat) != was_utf8)
2632 /* This had better be an upgrade while in utf8==0 mode */
2635 if (savsym.howlen == e_star && beglist == endlist)
2636 break; /* No way to continue */
2638 lookahead.flags = symptr->flags & ~group_modifiers;
2641 case 'X' | TYPE_IS_SHRIEKING:
2642 if (!len) /* Avoid division by 0 */
2649 hop += UTF8SKIP(hop);
2656 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2660 len = (cur-start) % len;
2664 if (len < 1) goto no_change;
2668 Perl_croak(aTHX_ "'%c' outside of string in pack",
2669 (int) TYPE_NO_MODIFIERS(datumtype));
2670 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2672 Perl_croak(aTHX_ "'%c' outside of string in pack",
2673 (int) TYPE_NO_MODIFIERS(datumtype));
2679 if (cur - start < len)
2680 Perl_croak(aTHX_ "'%c' outside of string in pack",
2681 (int) TYPE_NO_MODIFIERS(datumtype));
2684 if (cur < start+symptr->strbeg) {
2685 /* Make sure group starts don't point into the void */
2687 STRLEN length = cur-start;
2688 for (group = symptr;
2689 group && length < group->strbeg;
2690 group = group->previous) group->strbeg = length;
2691 lookahead.strbeg = length;
2694 case 'x' | TYPE_IS_SHRIEKING: {
2696 if (!len) /* Avoid division by 0 */
2698 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2699 else ai32 = (cur - start) % len;
2700 if (ai32 == 0) goto no_change;
2712 aptr = SvPV(fromstr, fromlen);
2713 if (DO_UTF8(fromstr)) {
2716 if (!utf8 && !SvUTF8(cat)) {
2717 marked_upgrade(aTHX_ cat, symptr);
2718 lookahead.flags |= FLAG_DO_UTF8;
2719 lookahead.strbeg = symptr->strbeg;
2722 cur = start + SvCUR(cat);
2724 if (howlen == e_star) {
2725 if (utf8) goto string_copy;
2729 end = aptr + fromlen;
2730 fromlen = datumtype == 'Z' ? len-1 : len;
2731 while ((I32) fromlen > 0 && s < end) {
2736 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2739 if (datumtype == 'Z') len++;
2745 fromlen = len - fromlen;
2746 if (datumtype == 'Z') fromlen--;
2747 if (howlen == e_star) {
2749 if (datumtype == 'Z') len++;
2751 GROWING(0, cat, start, cur, len);
2752 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2753 datumtype | TYPE_IS_PACK))
2754 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2758 if (howlen == e_star) {
2760 if (datumtype == 'Z') len++;
2762 if (len <= (I32) fromlen) {
2764 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2766 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2768 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2769 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2771 while (fromlen > 0) {
2772 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2778 if (howlen == e_star) {
2780 if (datumtype == 'Z') len++;
2782 if (len <= (I32) fromlen) {
2784 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2786 GROWING(0, cat, start, cur, len);
2787 Copy(aptr, cur, fromlen, char);
2791 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2804 str = SvPV(fromstr, fromlen);
2805 end = str + fromlen;
2806 if (DO_UTF8(fromstr)) {
2808 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2810 utf8_source = FALSE;
2811 utf8_flags = 0; /* Unused, but keep compilers happy */
2813 if (howlen == e_star) len = fromlen;
2814 field_len = (len+7)/8;
2815 GROWING(utf8, cat, start, cur, field_len);
2816 if (len > (I32)fromlen) len = fromlen;
2819 if (datumtype == 'B')
2823 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2825 } else bits |= *str++ & 1;
2826 if (l & 7) bits <<= 1;
2828 PUSH_BYTE(utf8, cur, bits);
2833 /* datumtype == 'b' */
2837 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2838 if (val & 1) bits |= 0x80;
2839 } else if (*str++ & 1)
2841 if (l & 7) bits >>= 1;
2843 PUSH_BYTE(utf8, cur, bits);
2849 if (datumtype == 'B')
2850 bits <<= 7 - (l & 7);
2852 bits >>= 7 - (l & 7);
2853 PUSH_BYTE(utf8, cur, bits);
2856 /* Determine how many chars are left in the requested field */
2858 if (howlen == e_star) field_len = 0;
2859 else field_len -= l;
2860 Zero(cur, field_len, char);
2873 str = SvPV(fromstr, fromlen);
2874 end = str + fromlen;
2875 if (DO_UTF8(fromstr)) {
2877 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2879 utf8_source = FALSE;
2880 utf8_flags = 0; /* Unused, but keep compilers happy */
2882 if (howlen == e_star) len = fromlen;
2883 field_len = (len+1)/2;
2884 GROWING(utf8, cat, start, cur, field_len);
2885 if (!utf8 && len > (I32)fromlen) len = fromlen;
2888 if (datumtype == 'H')
2892 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2893 if (val < 256 && isALPHA(val))
2894 bits |= (val + 9) & 0xf;
2897 } else if (isALPHA(*str))
2898 bits |= (*str++ + 9) & 0xf;
2900 bits |= *str++ & 0xf;
2901 if (l & 1) bits <<= 4;
2903 PUSH_BYTE(utf8, cur, bits);
2911 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2912 if (val < 256 && isALPHA(val))
2913 bits |= ((val + 9) & 0xf) << 4;
2915 bits |= (val & 0xf) << 4;
2916 } else if (isALPHA(*str))
2917 bits |= ((*str++ + 9) & 0xf) << 4;
2919 bits |= (*str++ & 0xf) << 4;
2920 if (l & 1) bits >>= 4;
2922 PUSH_BYTE(utf8, cur, bits);
2928 PUSH_BYTE(utf8, cur, bits);
2931 /* Determine how many chars are left in the requested field */
2933 if (howlen == e_star) field_len = 0;
2934 else field_len -= l;
2935 Zero(cur, field_len, char);
2943 aiv = SvIV(fromstr);
2944 if ((-128 > aiv || aiv > 127) &&
2946 Perl_warner(aTHX_ packWARN(WARN_PACK),
2947 "Character in 'c' format wrapped in pack");
2948 PUSH_BYTE(utf8, cur, aiv & 0xff);
2953 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2956 GROWING(0, cat, start, cur, len);
2960 aiv = SvIV(fromstr);
2961 if ((0 > aiv || aiv > 0xff) &&
2963 Perl_warner(aTHX_ packWARN(WARN_PACK),
2964 "Character in 'C' format wrapped in pack");
2965 *cur++ = aiv & 0xff;
2970 U8 in_bytes = IN_BYTES;
2972 end = start+SvLEN(cat)-1;
2973 if (utf8) end -= UTF8_MAXLEN-1;
2977 auv = SvUV(fromstr);
2978 if (in_bytes) auv = auv % 0x100;
2983 SvCUR_set(cat, cur - start);
2985 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2986 end = start+SvLEN(cat)-UTF8_MAXLEN;
2988 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2991 0 : UNICODE_ALLOW_ANY);
2996 SvCUR_set(cat, cur - start);
2997 marked_upgrade(aTHX_ cat, symptr);
2998 lookahead.flags |= FLAG_DO_UTF8;
2999 lookahead.strbeg = symptr->strbeg;
3002 cur = start + SvCUR(cat);
3003 end = start+SvLEN(cat)-UTF8_MAXLEN;
3006 if (ckWARN(WARN_PACK))
3007 Perl_warner(aTHX_ packWARN(WARN_PACK),
3008 "Character in 'W' format wrapped in pack");
3013 SvCUR_set(cat, cur - start);
3014 GROWING(0, cat, start, cur, len+1);
3015 end = start+SvLEN(cat)-1;
3017 *(U8 *) cur++ = (U8)auv;
3026 if (!(symptr->flags & FLAG_DO_UTF8)) {
3027 marked_upgrade(aTHX_ cat, symptr);
3028 lookahead.flags |= FLAG_DO_UTF8;
3029 lookahead.strbeg = symptr->strbeg;
3035 end = start+SvLEN(cat);
3036 if (!utf8) end -= UTF8_MAXLEN;
3040 auv = SvUV(fromstr);
3042 U8 buffer[UTF8_MAXLEN], *endb;
3043 endb = uvuni_to_utf8_flags(buffer, auv,
3045 0 : UNICODE_ALLOW_ANY);
3046 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3048 SvCUR_set(cat, cur - start);
3049 GROWING(0, cat, start, cur,
3050 len+(endb-buffer)*UTF8_EXPAND);
3051 end = start+SvLEN(cat);
3053 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3057 SvCUR_set(cat, cur - start);
3058 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3059 end = start+SvLEN(cat)-UTF8_MAXLEN;
3061 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3063 0 : UNICODE_ALLOW_ANY);
3068 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3074 anv = SvNV(fromstr);
3076 /* VOS does not automatically map a floating-point overflow
3077 during conversion from double to float into infinity, so we
3078 do it by hand. This code should either be generalized for
3079 any OS that needs it, or removed if and when VOS implements
3080 posix-976 (suggestion to support mapping to infinity).
3081 Paul.Green@stratus.com 02-04-02. */
3083 afloat = _float_constants[0]; /* single prec. inf. */
3084 else if (anv < -FLT_MAX)
3085 afloat = _float_constants[0]; /* single prec. inf. */
3086 else afloat = (float) anv;
3088 # if defined(VMS) && !defined(__IEEE_FP)
3089 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3090 * on Alpha; fake it if we don't have them.
3094 else if (anv < -FLT_MAX)
3096 else afloat = (float)anv;
3098 afloat = (float)anv;
3100 #endif /* __VOS__ */
3101 DO_BO_PACK_N(afloat, float);
3102 PUSH_VAR(utf8, cur, afloat);
3110 anv = SvNV(fromstr);
3112 /* VOS does not automatically map a floating-point overflow
3113 during conversion from long double to double into infinity,
3114 so we do it by hand. This code should either be generalized
3115 for any OS that needs it, or removed if and when VOS
3116 implements posix-976 (suggestion to support mapping to
3117 infinity). Paul.Green@stratus.com 02-04-02. */
3119 adouble = _double_constants[0]; /* double prec. inf. */
3120 else if (anv < -DBL_MAX)
3121 adouble = _double_constants[0]; /* double prec. inf. */
3122 else adouble = (double) anv;
3124 # if defined(VMS) && !defined(__IEEE_FP)
3125 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3126 * on Alpha; fake it if we don't have them.
3130 else if (anv < -DBL_MAX)
3132 else adouble = (double)anv;
3134 adouble = (double)anv;
3136 #endif /* __VOS__ */
3137 DO_BO_PACK_N(adouble, double);
3138 PUSH_VAR(utf8, cur, adouble);
3143 Zero(&anv, 1, NV); /* can be long double with unused bits */
3146 anv = SvNV(fromstr);
3147 DO_BO_PACK_N(anv, NV);
3148 PUSH_VAR(utf8, cur, anv);
3152 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3154 long double aldouble;
3155 /* long doubles can have unused bits, which may be nonzero */
3156 Zero(&aldouble, 1, long double);
3159 aldouble = (long double)SvNV(fromstr);
3160 DO_BO_PACK_N(aldouble, long double);
3161 PUSH_VAR(utf8, cur, aldouble);
3166 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3167 case 'n' | TYPE_IS_SHRIEKING:
3173 ai16 = (I16)SvIV(fromstr);
3175 ai16 = PerlSock_htons(ai16);
3177 PUSH16(utf8, cur, &ai16);
3180 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3181 case 'v' | TYPE_IS_SHRIEKING:
3187 ai16 = (I16)SvIV(fromstr);
3191 PUSH16(utf8, cur, &ai16);
3194 case 'S' | TYPE_IS_SHRIEKING:
3195 #if SHORTSIZE != SIZE16
3197 unsigned short aushort;
3199 aushort = SvUV(fromstr);
3200 DO_BO_PACK(aushort, s);
3201 PUSH_VAR(utf8, cur, aushort);
3211 au16 = (U16)SvUV(fromstr);
3212 DO_BO_PACK(au16, 16);
3213 PUSH16(utf8, cur, &au16);
3216 case 's' | TYPE_IS_SHRIEKING:
3217 #if SHORTSIZE != SIZE16
3221 ashort = SvIV(fromstr);
3222 DO_BO_PACK(ashort, s);
3223 PUSH_VAR(utf8, cur, ashort);
3233 ai16 = (I16)SvIV(fromstr);
3234 DO_BO_PACK(ai16, 16);
3235 PUSH16(utf8, cur, &ai16);
3239 case 'I' | TYPE_IS_SHRIEKING:
3243 auint = SvUV(fromstr);
3244 DO_BO_PACK(auint, i);
3245 PUSH_VAR(utf8, cur, auint);
3252 aiv = SvIV(fromstr);
3253 #if IVSIZE == INTSIZE
3255 #elif IVSIZE == LONGSIZE
3257 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3258 DO_BO_PACK(aiv, 64);
3260 Perl_croak(aTHX_ "'j' not supported on this platform");
3262 PUSH_VAR(utf8, cur, aiv);
3269 auv = SvUV(fromstr);
3270 #if UVSIZE == INTSIZE
3272 #elif UVSIZE == LONGSIZE
3274 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3275 DO_BO_PACK(auv, 64);
3277 Perl_croak(aTHX_ "'J' not supported on this platform");
3279 PUSH_VAR(utf8, cur, auv);
3286 anv = SvNV(fromstr);
3290 SvCUR_set(cat, cur - start);
3291 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3294 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3295 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3296 any negative IVs will have already been got by the croak()
3297 above. IOK is untrue for fractions, so we test them
3298 against UV_MAX_P1. */
3299 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3300 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3301 char *in = buf + sizeof(buf);
3302 UV auv = SvUV(fromstr);
3305 *--in = (char)((auv & 0x7f) | 0x80);
3308 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3309 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3310 in, (buf + sizeof(buf)) - in);
3311 } else if (SvPOKp(fromstr))
3313 else if (SvNOKp(fromstr)) {
3314 /* 10**NV_MAX_10_EXP is the largest power of 10
3315 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3316 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3317 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3318 And with that many bytes only Inf can overflow.
3319 Some C compilers are strict about integral constant
3320 expressions so we conservatively divide by a slightly
3321 smaller integer instead of multiplying by the exact
3322 floating-point value.
3324 #ifdef NV_MAX_10_EXP
3325 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3326 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3328 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3329 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3331 char *in = buf + sizeof(buf);
3333 anv = Perl_floor(anv);
3335 NV next = Perl_floor(anv / 128);
3336 if (in <= buf) /* this cannot happen ;-) */
3337 Perl_croak(aTHX_ "Cannot compress integer in pack");
3338 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3341 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3342 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3343 in, (buf + sizeof(buf)) - in);
3345 char *from, *result, *in;
3351 /* Copy string and check for compliance */
3352 from = SvPV(fromstr, len);
3353 if ((norm = is_an_int(from, len)) == NULL)
3354 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3356 New('w', result, len, char);
3359 while (!done) *--in = div128(norm, &done) | 0x80;
3360 result[len - 1] &= 0x7F; /* clear continue bit */
3361 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3362 in, (result + len) - in);
3364 SvREFCNT_dec(norm); /* free norm */
3369 case 'i' | TYPE_IS_SHRIEKING:
3373 aint = SvIV(fromstr);
3374 DO_BO_PACK(aint, i);
3375 PUSH_VAR(utf8, cur, aint);
3378 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3379 case 'N' | TYPE_IS_SHRIEKING:
3385 au32 = SvUV(fromstr);
3387 au32 = PerlSock_htonl(au32);
3389 PUSH32(utf8, cur, &au32);
3392 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3393 case 'V' | TYPE_IS_SHRIEKING:
3399 au32 = SvUV(fromstr);
3403 PUSH32(utf8, cur, &au32);
3406 case 'L' | TYPE_IS_SHRIEKING:
3407 #if LONGSIZE != SIZE32
3409 unsigned long aulong;
3411 aulong = SvUV(fromstr);
3412 DO_BO_PACK(aulong, l);
3413 PUSH_VAR(utf8, cur, aulong);
3423 au32 = SvUV(fromstr);
3424 DO_BO_PACK(au32, 32);
3425 PUSH32(utf8, cur, &au32);
3428 case 'l' | TYPE_IS_SHRIEKING:
3429 #if LONGSIZE != SIZE32
3433 along = SvIV(fromstr);
3434 DO_BO_PACK(along, l);
3435 PUSH_VAR(utf8, cur, along);
3445 ai32 = SvIV(fromstr);
3446 DO_BO_PACK(ai32, 32);
3447 PUSH32(utf8, cur, &ai32);
3455 auquad = (Uquad_t) SvUV(fromstr);
3456 DO_BO_PACK(auquad, 64);
3457 PUSH_VAR(utf8, cur, auquad);
3464 aquad = (Quad_t)SvIV(fromstr);
3465 DO_BO_PACK(aquad, 64);
3466 PUSH_VAR(utf8, cur, aquad);
3469 #endif /* HAS_QUAD */
3471 len = 1; /* assume SV is correct length */
3472 GROWING(utf8, cat, start, cur, sizeof(char *));
3479 SvGETMAGIC(fromstr);
3480 if (!SvOK(fromstr)) aptr = NULL;
3483 /* XXX better yet, could spirit away the string to
3484 * a safe spot and hang on to it until the result
3485 * of pack() (and all copies of the result) are
3488 if (ckWARN(WARN_PACK) &&
3489 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3490 !SvREADONLY(fromstr)))) {
3491 Perl_warner(aTHX_ packWARN(WARN_PACK),
3492 "Attempt to pack pointer to temporary value");
3494 if (SvPOK(fromstr) || SvNIOK(fromstr))
3495 aptr = SvPV_flags(fromstr, n_a, 0);
3497 aptr = SvPV_force_flags(fromstr, n_a, 0);
3499 DO_BO_PACK_PC(aptr);
3500 PUSH_VAR(utf8, cur, aptr);
3508 if (len <= 2) len = 45;
3509 else len = len / 3 * 3;
3511 Perl_warner(aTHX_ packWARN(WARN_PACK),
3512 "Field too wide in 'u' format in pack");
3515 aptr = SvPV(fromstr, fromlen);
3516 from_utf8 = DO_UTF8(fromstr);
3518 aend = aptr + fromlen;
3519 fromlen = sv_len_utf8(fromstr);
3520 } else aend = NULL; /* Unused, but keep compilers happy */
3521 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3522 while (fromlen > 0) {
3525 U8 hunk[1+63/3*4+1];
3527 if ((I32)fromlen > len)
3533 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3534 'u' | TYPE_IS_PACK)) {
3536 SvCUR_set(cat, cur - start);
3537 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3539 end = doencodes(hunk, buffer, todo);
3541 end = doencodes(hunk, aptr, todo);
3544 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3551 SvCUR_set(cat, cur - start);
3553 *symptr = lookahead;
3562 dSP; dMARK; dORIGMARK; dTARGET;
3563 register SV *cat = TARG;
3565 register char *pat = SvPVx(*++MARK, fromlen);
3566 register char *patend = pat + fromlen;
3569 sv_setpvn(cat, "", 0);
3572 packlist(cat, pat, patend, MARK, SP + 1);
3582 * c-indentation-style: bsd
3584 * indent-tabs-mode: t
3587 * vim: shiftwidth=4: