Re: encoding neutral unpack
[p5sagit/p5-mst-13.2.git] / pp_pack.c
CommitLineData
a6ec74c1 1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1 5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517 11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29 19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1 30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
7212898e 34#if PERL_VERSION >= 9
35#define PERL_PACK_CAN_BYTEORDER
36#define PERL_PACK_CAN_SHRIEKSIGN
37#endif
38
a6ec74c1 39/*
a6ec74c1 40 * Offset for integer pack/unpack.
41 *
42 * On architectures where I16 and I32 aren't really 16 and 32 bits,
43 * which for now are all Crays, pack and unpack have to play games.
44 */
45
46/*
47 * These values are required for portability of pack() output.
48 * If they're not right on your machine, then pack() and unpack()
49 * wouldn't work right anyway; you'll need to apply the Cray hack.
50 * (I'd like to check them with #if, but you can't use sizeof() in
51 * the preprocessor.) --???
52 */
53/*
54 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
55 defines are now in config.h. --Andy Dougherty April 1998
56 */
57#define SIZE16 2
58#define SIZE32 4
59
60/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
61 --jhi Feb 1999 */
62
1109a392 63#if U16SIZE > SIZE16 || U32SIZE > SIZE32
64# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
a6ec74c1 65# define OFF16(p) (char*)(p)
66# define OFF32(p) (char*)(p)
67# else
1109a392 68# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1 69# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
70# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
71# else
72 }}}} bad cray byte order
73# endif
74# endif
75# define COPY16(s,p) (*(p) = 0, Copy(s, OFF16(p), SIZE16, char))
76# define COPY32(s,p) (*(p) = 0, Copy(s, OFF32(p), SIZE32, char))
77# define COPYNN(s,p,n) (*(p) = 0, Copy(s, (char *)(p), n, char))
78# define CAT16(sv,p) sv_catpvn(sv, OFF16(p), SIZE16)
79# define CAT32(sv,p) sv_catpvn(sv, OFF32(p), SIZE32)
80#else
81# define COPY16(s,p) Copy(s, p, SIZE16, char)
82# define COPY32(s,p) Copy(s, p, SIZE32, char)
83# define COPYNN(s,p,n) Copy(s, (char *)(p), n, char)
84# define CAT16(sv,p) sv_catpvn(sv, (char*)(p), SIZE16)
85# define CAT32(sv,p) sv_catpvn(sv, (char*)(p), SIZE32)
86#endif
87
49704364 88/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
89#define MAX_SUB_TEMPLATE_LEVEL 100
90
66c611c5 91/* flags (note that type modifiers can also be used as flags!) */
49704364 92#define FLAG_UNPACK_ONLY_ONE 0x10
93#define FLAG_UNPACK_DO_UTF8 0x08
94#define FLAG_SLASH 0x04
95#define FLAG_COMMA 0x02
96#define FLAG_PACK 0x01
97
a6ec74c1 98STATIC SV *
99S_mul128(pTHX_ SV *sv, U8 m)
100{
101 STRLEN len;
102 char *s = SvPV(sv, len);
103 char *t;
104 U32 i = 0;
105
106 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
107 SV *tmpNew = newSVpvn("0000000000", 10);
108
109 sv_catsv(tmpNew, sv);
110 SvREFCNT_dec(sv); /* free old sv */
111 sv = tmpNew;
112 s = SvPV(sv, len);
113 }
114 t = s + len - 1;
115 while (!*t) /* trailing '\0'? */
116 t--;
117 while (t > s) {
118 i = ((*t - '0') << 7) + m;
eb160463 119 *(t--) = '0' + (char)(i % 10);
120 m = (char)(i / 10);
a6ec74c1 121 }
122 return (sv);
123}
124
125/* Explosives and implosives. */
126
127#if 'I' == 73 && 'J' == 74
128/* On an ASCII/ISO kind of system */
129#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
130#else
131/*
132 Some other sort of character set - use memchr() so we don't match
133 the null byte.
134 */
135#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
136#endif
137
66c611c5 138/* type modifiers */
62f95557 139#define TYPE_IS_SHRIEKING 0x100
1109a392 140#define TYPE_IS_BIG_ENDIAN 0x200
141#define TYPE_IS_LITTLE_ENDIAN 0x400
142#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 143#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392 144#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
145
7212898e 146#ifdef PERL_PACK_CAN_SHRIEKSIGN
147#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
148#else
149#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
150#endif
151
152#ifndef PERL_PACK_CAN_BYTEORDER
153/* Put "can't" first because it is shorter */
154# define TYPE_ENDIANNESS(t) 0
155# define TYPE_NO_ENDIANNESS(t) (t)
156
157# define ENDIANNESS_ALLOWED_TYPES ""
158
159# define DO_BO_UNPACK(var, type)
160# define DO_BO_PACK(var, type)
161# define DO_BO_UNPACK_PTR(var, type, pre_cast)
162# define DO_BO_PACK_PTR(var, type, pre_cast)
163# define DO_BO_UNPACK_N(var, type)
164# define DO_BO_PACK_N(var, type)
165# define DO_BO_UNPACK_P(var)
166# define DO_BO_PACK_P(var)
66c611c5 167
7212898e 168#else
169
170# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
171# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
172
173# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
174
175# define DO_BO_UNPACK(var, type) \
1109a392 176 STMT_START { \
66c611c5 177 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 178 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
179 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
180 default: break; \
181 } \
182 } STMT_END
183
7212898e 184# define DO_BO_PACK(var, type) \
1109a392 185 STMT_START { \
66c611c5 186 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 187 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
188 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
189 default: break; \
190 } \
191 } STMT_END
192
7212898e 193# define DO_BO_UNPACK_PTR(var, type, pre_cast) \
1109a392 194 STMT_START { \
66c611c5 195 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 196 case TYPE_IS_BIG_ENDIAN: \
197 var = (void *) my_betoh ## type ((pre_cast) var); \
198 break; \
199 case TYPE_IS_LITTLE_ENDIAN: \
200 var = (void *) my_letoh ## type ((pre_cast) var); \
201 break; \
202 default: \
203 break; \
204 } \
205 } STMT_END
206
7212898e 207# define DO_BO_PACK_PTR(var, type, pre_cast) \
1109a392 208 STMT_START { \
66c611c5 209 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 210 case TYPE_IS_BIG_ENDIAN: \
211 var = (void *) my_htobe ## type ((pre_cast) var); \
212 break; \
213 case TYPE_IS_LITTLE_ENDIAN: \
214 var = (void *) my_htole ## type ((pre_cast) var); \
215 break; \
216 default: \
217 break; \
218 } \
219 } STMT_END
220
7212898e 221# define BO_CANT_DOIT(action, type) \
66c611c5 222 STMT_START { \
223 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 224 case TYPE_IS_BIG_ENDIAN: \
225 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
226 "platform", #action, #type); \
227 break; \
228 case TYPE_IS_LITTLE_ENDIAN: \
229 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
230 "platform", #action, #type); \
231 break; \
232 default: \
233 break; \
234 } \
235 } STMT_END
236
7212898e 237# if PTRSIZE == INTSIZE
238# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int)
239# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int)
240# elif PTRSIZE == LONGSIZE
241# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long)
242# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long)
243# else
244# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
245# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
246# endif
1109a392 247
7212898e 248# if defined(my_htolen) && defined(my_letohn) && \
1109a392 249 defined(my_htoben) && defined(my_betohn)
7212898e 250# define DO_BO_UNPACK_N(var, type) \
1109a392 251 STMT_START { \
66c611c5 252 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 253 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
254 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
255 default: break; \
256 } \
257 } STMT_END
258
7212898e 259# define DO_BO_PACK_N(var, type) \
1109a392 260 STMT_START { \
66c611c5 261 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 262 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
263 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
264 default: break; \
265 } \
266 } STMT_END
7212898e 267# else
268# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
269# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
270# endif
271
1109a392 272#endif
62f95557 273
78d46eaa 274#define PACK_SIZE_CANNOT_CSUM 0x80
a72c271b 275#define PACK_SIZE_SPARE 0x40
78d46eaa 276#define PACK_SIZE_MASK 0x3F
277
278
279struct packsize_t {
58c0bd94 280 const unsigned char *array;
78d46eaa 281 int first;
282 int size;
283};
284
285#define PACK_SIZE_NORMAL 0
286#define PACK_SIZE_SHRIEKING 1
287
288/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
289 in). You're unlikely ever to need to regenerate them. */
290#if 'J'-'I' == 1
291/* ASCII */
292unsigned char size_normal[53] = {
293 /* C */ sizeof(unsigned char),
80a13697 294#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
78d46eaa 295 /* D */ LONG_DOUBLESIZE,
80a13697 296#else
297 0,
298#endif
78d46eaa 299 0,
300 /* F */ NVSIZE,
301 0, 0,
302 /* I */ sizeof(unsigned int),
303 /* J */ UVSIZE,
304 0,
305 /* L */ SIZE32,
306 0,
307 /* N */ SIZE32,
308 0, 0,
80a13697 309#if defined(HAS_QUAD)
78d46eaa 310 /* Q */ sizeof(Uquad_t),
80a13697 311#else
312 0,
313#endif
78d46eaa 314 0,
315 /* S */ SIZE16,
316 0,
317 /* U */ sizeof(char),
318 /* V */ SIZE32,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
320 /* c */ sizeof(char),
321 /* d */ sizeof(double),
322 0,
323 /* f */ sizeof(float),
324 0, 0,
325 /* i */ sizeof(int),
326 /* j */ IVSIZE,
327 0,
328 /* l */ SIZE32,
329 0,
330 /* n */ SIZE16,
331 0,
a72c271b 332 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 333#if defined(HAS_QUAD)
78d46eaa 334 /* q */ sizeof(Quad_t),
80a13697 335#else
336 0,
337#endif
78d46eaa 338 0,
339 /* s */ SIZE16,
340 0, 0,
341 /* v */ SIZE16,
7212898e 342 /* w */ sizeof(char) | PACK_SIZE_CANNOT_CSUM,
78d46eaa 343};
344unsigned char size_shrieking[46] = {
345 /* I */ sizeof(unsigned int),
346 0, 0,
347 /* L */ sizeof(unsigned long),
348 0,
7212898e 349#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 350 /* N */ SIZE32,
7212898e 351#else
352 0,
353#endif
78d46eaa 354 0, 0, 0, 0,
355 /* S */ sizeof(unsigned short),
356 0, 0,
7212898e 357#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 358 /* V */ SIZE32,
7212898e 359#else
360 0,
361#endif
78d46eaa 362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363 /* i */ sizeof(int),
364 0, 0,
365 /* l */ sizeof(long),
366 0,
7212898e 367#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 368 /* n */ SIZE16,
7212898e 369#else
370 0,
371#endif
78d46eaa 372 0, 0, 0, 0,
373 /* s */ sizeof(short),
374 0, 0,
7212898e 375#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 376 /* v */ SIZE16
7212898e 377#else
378 0
379#endif
78d46eaa 380};
381struct packsize_t packsize[2] = {
382 {size_normal, 67, 53},
383 {size_shrieking, 73, 46}
384};
385#else
386/* EBCDIC (or bust) */
387unsigned char size_normal[99] = {
388 /* c */ sizeof(char),
389 /* d */ sizeof(double),
390 0,
391 /* f */ sizeof(float),
392 0, 0,
393 /* i */ sizeof(int),
394 0, 0, 0, 0, 0, 0, 0,
395 /* j */ IVSIZE,
396 0,
397 /* l */ SIZE32,
398 0,
399 /* n */ SIZE16,
400 0,
a72c271b 401 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 402#if defined(HAS_QUAD)
78d46eaa 403 /* q */ sizeof(Quad_t),
80a13697 404#else
405 0,
406#endif
78d46eaa 407 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 /* s */ SIZE16,
409 0, 0,
410 /* v */ SIZE16,
411 /* w */ sizeof(char) | PACK_SIZE_CANNOT_CSUM,
412 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
413 0, 0,
414 /* C */ sizeof(unsigned char),
80a13697 415#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
78d46eaa 416 /* D */ LONG_DOUBLESIZE,
80a13697 417#else
418 0,
419#endif
78d46eaa 420 0,
421 /* F */ NVSIZE,
422 0, 0,
423 /* I */ sizeof(unsigned int),
424 0, 0, 0, 0, 0, 0, 0,
425 /* J */ UVSIZE,
426 0,
427 /* L */ SIZE32,
428 0,
429 /* N */ SIZE32,
430 0, 0,
80a13697 431#if defined(HAS_QUAD)
78d46eaa 432 /* Q */ sizeof(Uquad_t),
80a13697 433#else
434 0,
435#endif
78d46eaa 436 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* S */ SIZE16,
438 0,
439 /* U */ sizeof(char),
7212898e 440 /* V */ SIZE32,
78d46eaa 441};
442unsigned char size_shrieking[93] = {
443 /* i */ sizeof(int),
444 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 /* l */ sizeof(long),
446 0,
7212898e 447#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 448 /* n */ SIZE16,
7212898e 449#else
450 0,
451#endif
78d46eaa 452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* s */ sizeof(short),
454 0, 0,
7212898e 455#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 456 /* v */ SIZE16,
7212898e 457#else
458 0,
459#endif
78d46eaa 460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 /* I */ sizeof(unsigned int),
463 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 /* L */ sizeof(unsigned long),
465 0,
7212898e 466#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 467 /* N */ SIZE32,
7212898e 468#else
469 0,
470#endif
78d46eaa 471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 /* S */ sizeof(unsigned short),
473 0, 0,
7212898e 474#if defined(PERL_PACK_CAN_SHRIEKSIGN)
78d46eaa 475 /* V */ SIZE32
7212898e 476#else
477 0
478#endif
78d46eaa 479};
480struct packsize_t packsize[2] = {
481 {size_normal, 131, 99},
482 {size_shrieking, 137, 93}
483};
484#endif
485
486
206947d2 487/* Returns the sizeof() struct described by pat */
028d1f6d 488STATIC I32
49704364 489S_measure_struct(pTHX_ register tempsym_t* symptr)
206947d2 490{
49704364 491 register I32 len = 0;
206947d2 492 register I32 total = 0;
49704364 493 int star;
494
206947d2 495 register int size;
496
49704364 497 while (next_symbol(symptr)) {
80a13697 498 int which = (symptr->code & TYPE_IS_SHRIEKING)
499 ? PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
500 int offset
501 = TYPE_NO_MODIFIERS(symptr->code) - packsize[which].first;
206947d2 502
49704364 503 switch( symptr->howlen ){
504 case e_no_len:
505 case e_number:
506 len = symptr->length;
507 break;
508 case e_star:
509 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
510 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
511 break;
512 }
513
80a13697 514 if ((offset >= 0) && (offset < packsize[which].size))
515 size = packsize[which].array[offset] & PACK_SIZE_MASK;
516 else
206947d2 517 size = 0;
78d46eaa 518
80a13697 519 if (!size) {
520 /* endianness doesn't influence the size of a type */
521 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
522 default:
523 Perl_croak(aTHX_ "Invalid type '%c' in %s",
524 (int)TYPE_NO_MODIFIERS(symptr->code),
525 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
526 case '@':
527 case '/':
528 case 'U': /* XXXX Is it correct? */
529 case 'w':
530 case 'u':
531 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
532 (int)symptr->code,
533 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
534 case '%':
535 size = 0;
536 break;
537 case '(':
538 {
539 tempsym_t savsym = *symptr;
540 symptr->patptr = savsym.grpbeg;
541 symptr->patend = savsym.grpend;
542 /* XXXX Theoretically, we need to measure many times at
543 different positions, since the subexpression may contain
544 alignment commands, but be not of aligned length.
545 Need to detect this and croak(). */
546 size = measure_struct(symptr);
547 *symptr = savsym;
548 break;
549 }
550 case 'X' | TYPE_IS_SHRIEKING:
551 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
552 */
553 if (!len) /* Avoid division by 0 */
554 len = 1;
555 len = total % len; /* Assumed: the start is aligned. */
556 /* FALL THROUGH */
557 case 'X':
558 size = -1;
559 if (total < len)
560 Perl_croak(aTHX_ "'X' outside of string in %s",
561 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
562 break;
563 case 'x' | TYPE_IS_SHRIEKING:
564 if (!len) /* Avoid division by 0 */
565 len = 1;
566 star = total % len; /* Assumed: the start is aligned. */
567 if (star) /* Other portable ways? */
568 len = len - star;
569 else
570 len = 0;
571 /* FALL THROUGH */
572 case 'x':
573 case 'A':
574 case 'Z':
575 case 'a':
576 case 'c':
577 case 'C':
578 size = 1;
579 break;
580 case 'B':
581 case 'b':
582 len = (len + 7)/8;
583 size = 1;
584 break;
585 case 'H':
586 case 'h':
587 len = (len + 1)/2;
588 size = 1;
589 break;
78d46eaa 590
80a13697 591 case 'P':
592 len = 1;
593 size = sizeof(char*);
78d46eaa 594 break;
595 }
206947d2 596 }
597 total += len * size;
598 }
599 return total;
600}
601
49704364 602
603/* locate matching closing parenthesis or bracket
604 * returns char pointer to char after match, or NULL
605 */
606STATIC char *
607S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
18529408 608{
49704364 609 while (patptr < patend) {
610 char c = *patptr++;
611
612 if (isSPACE(c))
613 continue;
614 else if (c == ender)
615 return patptr-1;
616 else if (c == '#') {
617 while (patptr < patend && *patptr != '\n')
618 patptr++;
619 continue;
620 } else if (c == '(')
621 patptr = group_end(patptr, patend, ')') + 1;
622 else if (c == '[')
623 patptr = group_end(patptr, patend, ']') + 1;
18529408 624 }
49704364 625 Perl_croak(aTHX_ "No group ending character '%c' found in template",
626 ender);
627 return 0;
18529408 628}
629
49704364 630
631/* Convert unsigned decimal number to binary.
632 * Expects a pointer to the first digit and address of length variable
633 * Advances char pointer to 1st non-digit char and returns number
634 */
18529408 635STATIC char *
49704364 636S_get_num(pTHX_ register char *patptr, I32 *lenptr )
637{
638 I32 len = *patptr++ - '0';
639 while (isDIGIT(*patptr)) {
640 if (len >= 0x7FFFFFFF/10)
641 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
642 len = (len * 10) + (*patptr++ - '0');
643 }
644 *lenptr = len;
645 return patptr;
646}
647
648/* The marvellous template parsing routine: Using state stored in *symptr,
649 * locates next template code and count
650 */
651STATIC bool
652S_next_symbol(pTHX_ register tempsym_t* symptr )
18529408 653{
49704364 654 register char* patptr = symptr->patptr;
655 register char* patend = symptr->patend;
656
657 symptr->flags &= ~FLAG_SLASH;
658
659 while (patptr < patend) {
660 if (isSPACE(*patptr))
661 patptr++;
662 else if (*patptr == '#') {
663 patptr++;
664 while (patptr < patend && *patptr != '\n')
665 patptr++;
666 if (patptr < patend)
667 patptr++;
668 } else {
669 /* We should have found a template code */
670 I32 code = *patptr++ & 0xFF;
66c611c5 671 U32 inherited_modifiers = 0;
49704364 672
673 if (code == ','){ /* grandfather in commas but with a warning */
674 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
675 symptr->flags |= FLAG_COMMA;
676 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
677 "Invalid type ',' in %s",
678 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
679 }
680 continue;
681 }
682
683 /* for '(', skip to ')' */
684 if (code == '(') {
685 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
686 Perl_croak(aTHX_ "()-group starts with a count in %s",
687 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
688 symptr->grpbeg = patptr;
689 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
690 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
691 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
692 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
693 }
694
66c611c5 695 /* look for group modifiers to inherit */
696 if (TYPE_ENDIANNESS(symptr->flags)) {
697 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
698 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
699 }
700
1109a392 701 /* look for modifiers */
702 while (patptr < patend) {
703 const char *allowed;
704 I32 modifier = 0;
705 switch (*patptr) {
706 case '!':
707 modifier = TYPE_IS_SHRIEKING;
7212898e 708 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 709 break;
7212898e 710#ifdef PERL_PACK_CAN_BYTEORDER
1109a392 711 case '>':
712 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 713 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 714 break;
715 case '<':
716 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 717 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 718 break;
7212898e 719#endif
1109a392 720 default:
721 break;
722 }
66c611c5 723
1109a392 724 if (modifier == 0)
725 break;
66c611c5 726
1109a392 727 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
728 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
729 allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5 730
731 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 732 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
733 (int) TYPE_NO_MODIFIERS(code),
734 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
66c611c5 735 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
736 TYPE_ENDIANNESS_MASK)
737 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
738 *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
739
1109a392 740 if (ckWARN(WARN_UNPACK)) {
741 if (code & modifier)
742 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
743 "Duplicate modifier '%c' after '%c' in %s",
744 *patptr, (int) TYPE_NO_MODIFIERS(code),
745 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
746 }
66c611c5 747
1109a392 748 code |= modifier;
749 patptr++;
49704364 750 }
751
66c611c5 752 /* inherit modifiers */
753 code |= inherited_modifiers;
754
49704364 755 /* look for count and/or / */
756 if (patptr < patend) {
757 if (isDIGIT(*patptr)) {
758 patptr = get_num( patptr, &symptr->length );
759 symptr->howlen = e_number;
760
761 } else if (*patptr == '*') {
762 patptr++;
763 symptr->howlen = e_star;
764
765 } else if (*patptr == '[') {
766 char* lenptr = ++patptr;
767 symptr->howlen = e_number;
768 patptr = group_end( patptr, patend, ']' ) + 1;
769 /* what kind of [] is it? */
770 if (isDIGIT(*lenptr)) {
771 lenptr = get_num( lenptr, &symptr->length );
772 if( *lenptr != ']' )
773 Perl_croak(aTHX_ "Malformed integer in [] in %s",
774 symptr->flags & FLAG_PACK ? "pack" : "unpack");
775 } else {
776 tempsym_t savsym = *symptr;
777 symptr->patend = patptr-1;
778 symptr->patptr = lenptr;
779 savsym.length = measure_struct(symptr);
780 *symptr = savsym;
781 }
782 } else {
783 symptr->howlen = e_no_len;
784 symptr->length = 1;
785 }
786
787 /* try to find / */
788 while (patptr < patend) {
789 if (isSPACE(*patptr))
790 patptr++;
791 else if (*patptr == '#') {
792 patptr++;
793 while (patptr < patend && *patptr != '\n')
794 patptr++;
795 if (patptr < patend)
796 patptr++;
797 } else {
66c611c5 798 if (*patptr == '/') {
49704364 799 symptr->flags |= FLAG_SLASH;
800 patptr++;
66c611c5 801 if (patptr < patend &&
802 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 803 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
804 symptr->flags & FLAG_PACK ? "pack" : "unpack" );
805 }
806 break;
807 }
18529408 808 }
49704364 809 } else {
810 /* at end - no count, no / */
811 symptr->howlen = e_no_len;
812 symptr->length = 1;
813 }
814
815 symptr->code = code;
816 symptr->patptr = patptr;
817 return TRUE;
18529408 818 }
49704364 819 }
820 symptr->patptr = patptr;
821 return FALSE;
18529408 822}
823
18529408 824/*
825=for apidoc unpack_str
826
7accc089 827The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
828and ocnt are not used. This call should not be used, use unpackstring instead.
18529408 829
830=cut */
831
832I32
833Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 834{
49704364 835 tempsym_t sym = { 0 };
836 sym.patptr = pat;
837 sym.patend = patend;
838 sym.flags = flags;
839
840 return unpack_rec(&sym, s, s, strend, NULL );
841}
842
7accc089 843/*
844=for apidoc unpackstring
845
608d3aed 846The engine implementing unpack() Perl function. C<unpackstring> puts the
847extracted list items on the stack and returns the number of elements.
848Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089 849
850=cut */
851
852I32
853Perl_unpackstring(pTHX_ char *pat, register char *patend, register char *s, char *strend, U32 flags)
854{
855 tempsym_t sym = { 0 };
856 sym.patptr = pat;
857 sym.patend = patend;
858 sym.flags = flags;
859
860 return unpack_rec(&sym, s, s, strend, NULL );
861}
862
49704364 863STATIC
864I32
865S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, char *strend, char **new_s )
866{
a6ec74c1 867 dSP;
a6ec74c1 868 I32 datumtype;
49704364 869 register I32 len = 0;
a6ec74c1 870 register I32 bits = 0;
871 register char *str;
18529408 872 SV *sv;
873 I32 start_sp_offset = SP - PL_stack_base;
49704364 874 howlen_t howlen;
a6ec74c1 875
876 /* These must not be in registers: */
ef108786 877 I16 ai16;
878 U16 au16;
879 I32 ai32;
880 U32 au32;
a6ec74c1 881#ifdef HAS_QUAD
882 Quad_t aquad;
ef108786 883 Uquad_t auquad;
884#endif
885#if SHORTSIZE != SIZE16
886 short ashort;
887 unsigned short aushort;
a6ec74c1 888#endif
ef108786 889 int aint;
a6ec74c1 890 unsigned int auint;
ef108786 891 long along;
892#if LONGSIZE != SIZE32
893 unsigned long aulong;
a6ec74c1 894#endif
895 char *aptr;
896 float afloat;
897 double adouble;
ef108786 898#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
899 long double aldouble;
900#endif
901 IV aiv;
902 UV auv;
903 NV anv;
904
a6ec74c1 905 I32 checksum = 0;
92d41999 906 UV cuv = 0;
a6ec74c1 907 NV cdouble = 0.0;
92d41999 908 const int bits_in_uv = 8 * sizeof(cuv);
49704364 909 char* strrelbeg = s;
910 bool beyond = FALSE;
911 bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
912
49704364 913 while (next_symbol(symptr)) {
914 datumtype = symptr->code;
206947d2 915 /* do first one only unless in list context
916 / is implemented by unpacking the count, then poping it from the
917 stack, so must check that we're not in the middle of a / */
49704364 918 if ( unpack_only_one
206947d2 919 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 920 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 921 break;
49704364 922
923 switch( howlen = symptr->howlen ){
924 case e_no_len:
925 case e_number:
926 len = symptr->length;
927 break;
928 case e_star:
929 len = strend - strbeg; /* long enough */
930 break;
931 }
18529408 932
a6ec74c1 933 redo_switch:
49704364 934 beyond = s >= strend;
78d46eaa 935 {
936 int which = (symptr->code & TYPE_IS_SHRIEKING)
937 ? PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
a72c271b 938 const int rawtype = TYPE_NO_MODIFIERS(datumtype);
939 int offset = rawtype - packsize[which].first;
78d46eaa 940
941 if (offset >= 0 && offset < packsize[which].size) {
942 /* Data about this template letter */
943 unsigned char data = packsize[which].array[offset];
944
945 if (data) {
946 /* data nonzero means we can process this letter. */
947 long size = data & PACK_SIZE_MASK;
948 long howmany = (strend - s) / size;
949 if (len > howmany)
950 len = howmany;
951
a72c271b 952 /* In the old code, 'p' was the only type without shortcut
953 code to curtail unpacking to only one. As far as I can
954 see the only point of retaining this anomaly is to make
955 code such as $_ = unpack "p2", pack "pI", "Hi", 2
956 continue to segfault. ie, it probably should be
957 construed as a bug.
958 */
959
78d46eaa 960 if (!checksum || (data & PACK_SIZE_CANNOT_CSUM)) {
961 if (len && unpack_only_one &&
a72c271b 962 rawtype != 'p')
78d46eaa 963 len = 1;
964 EXTEND(SP, len);
965 EXTEND_MORTAL(len);
966 }
967 }
968 }
969 }
1109a392 970 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 971 default:
1109a392 972 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 973
a6ec74c1 974 case '%':
49704364 975 if (howlen == e_no_len)
18529408 976 len = 16; /* len is not specified */
a6ec74c1 977 checksum = len;
92d41999 978 cuv = 0;
a6ec74c1 979 cdouble = 0;
18529408 980 continue;
a6ec74c1 981 break;
18529408 982 case '(':
983 {
18529408 984 char *ss = s; /* Move from register */
49704364 985 tempsym_t savsym = *symptr;
66c611c5 986 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
987 symptr->flags |= group_modifiers;
49704364 988 symptr->patend = savsym.grpend;
989 symptr->level++;
18529408 990 PUTBACK;
991 while (len--) {
49704364 992 symptr->patptr = savsym.grpbeg;
993 unpack_rec(symptr, ss, strbeg, strend, &ss );
0c81e54b 994 if (savsym.flags & FLAG_UNPACK_DO_UTF8)
995 symptr->flags |= FLAG_UNPACK_DO_UTF8;
996 else
997 symptr->flags &= ~FLAG_UNPACK_DO_UTF8;
49704364 998 if (ss == strend && savsym.howlen == e_star)
999 break; /* No way to continue */
18529408 1000 }
1001 SPAGAIN;
1002 s = ss;
66c611c5 1003 symptr->flags &= ~group_modifiers;
49704364 1004 savsym.flags = symptr->flags;
1005 *symptr = savsym;
18529408 1006 break;
1007 }
a6ec74c1 1008 case '@':
49704364 1009 if (len > strend - strrelbeg)
1010 Perl_croak(aTHX_ "'@' outside of string in unpack");
1011 s = strrelbeg + len;
a6ec74c1 1012 break;
62f95557 1013 case 'X' | TYPE_IS_SHRIEKING:
1014 if (!len) /* Avoid division by 0 */
1015 len = 1;
1016 len = (s - strbeg) % len;
1017 /* FALL THROUGH */
a6ec74c1 1018 case 'X':
1019 if (len > s - strbeg)
49704364 1020 Perl_croak(aTHX_ "'X' outside of string in unpack" );
a6ec74c1 1021 s -= len;
1022 break;
62f95557 1023 case 'x' | TYPE_IS_SHRIEKING:
1024 if (!len) /* Avoid division by 0 */
1025 len = 1;
1026 aint = (s - strbeg) % len;
1027 if (aint) /* Other portable ways? */
1028 len = len - aint;
1029 else
1030 len = 0;
1031 /* FALL THROUGH */
a6ec74c1 1032 case 'x':
1033 if (len > strend - s)
49704364 1034 Perl_croak(aTHX_ "'x' outside of string in unpack");
a6ec74c1 1035 s += len;
1036 break;
1037 case '/':
49704364 1038 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1039 break;
a6ec74c1 1040 case 'A':
1041 case 'Z':
1042 case 'a':
1043 if (len > strend - s)
1044 len = strend - s;
1045 if (checksum)
1046 goto uchar_checksum;
c4c5f44a 1047 sv = newSVpvn(s, len);
49704364 1048 if (len > 0 && (datumtype == 'A' || datumtype == 'Z')) {
a6ec74c1 1049 aptr = s; /* borrow register */
1050 if (datumtype == 'Z') { /* 'Z' strips stuff after first null */
1051 s = SvPVX(sv);
1052 while (*s)
1053 s++;
49704364 1054 if (howlen == e_star) /* exact for 'Z*' */
d50dd4e4 1055 len = s - SvPVX(sv) + 1;
a6ec74c1 1056 }
1057 else { /* 'A' strips both nulls and spaces */
1058 s = SvPVX(sv) + len - 1;
1059 while (s >= SvPVX(sv) && (!*s || isSPACE(*s)))
1060 s--;
1061 *++s = '\0';
1062 }
1063 SvCUR_set(sv, s - SvPVX(sv));
1064 s = aptr; /* unborrow register */
1065 }
d50dd4e4 1066 s += len;
a6ec74c1 1067 XPUSHs(sv_2mortal(sv));
1068 break;
1069 case 'B':
1070 case 'b':
49704364 1071 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1 1072 len = (strend - s) * 8;
1073 if (checksum) {
1074 if (!PL_bitcount) {
1075 Newz(601, PL_bitcount, 256, char);
1076 for (bits = 1; bits < 256; bits++) {
1077 if (bits & 1) PL_bitcount[bits]++;
1078 if (bits & 2) PL_bitcount[bits]++;
1079 if (bits & 4) PL_bitcount[bits]++;
1080 if (bits & 8) PL_bitcount[bits]++;
1081 if (bits & 16) PL_bitcount[bits]++;
1082 if (bits & 32) PL_bitcount[bits]++;
1083 if (bits & 64) PL_bitcount[bits]++;
1084 if (bits & 128) PL_bitcount[bits]++;
1085 }
1086 }
1087 while (len >= 8) {
92d41999 1088 cuv += PL_bitcount[*(unsigned char*)s++];
a6ec74c1 1089 len -= 8;
1090 }
1091 if (len) {
0ed7c1bb 1092 bits = *s++;
a6ec74c1 1093 if (datumtype == 'b') {
1094 while (len-- > 0) {
92d41999 1095 if (bits & 1) cuv++;
a6ec74c1 1096 bits >>= 1;
1097 }
1098 }
1099 else {
1100 while (len-- > 0) {
92d41999 1101 if (bits & 128) cuv++;
a6ec74c1 1102 bits <<= 1;
1103 }
1104 }
1105 }
1106 break;
1107 }
1108 sv = NEWSV(35, len + 1);
1109 SvCUR_set(sv, len);
1110 SvPOK_on(sv);
1111 str = SvPVX(sv);
1112 if (datumtype == 'b') {
1113 aint = len;
1114 for (len = 0; len < aint; len++) {
1115 if (len & 7) /*SUPPRESS 595*/
1116 bits >>= 1;
1117 else
1118 bits = *s++;
1119 *str++ = '0' + (bits & 1);
1120 }
1121 }
1122 else {
1123 aint = len;
1124 for (len = 0; len < aint; len++) {
1125 if (len & 7)
1126 bits <<= 1;
1127 else
1128 bits = *s++;
1129 *str++ = '0' + ((bits & 128) != 0);
1130 }
1131 }
1132 *str = '\0';
1133 XPUSHs(sv_2mortal(sv));
1134 break;
1135 case 'H':
1136 case 'h':
49704364 1137 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1138 len = (strend - s) * 2;
1139 sv = NEWSV(35, len + 1);
1140 SvCUR_set(sv, len);
1141 SvPOK_on(sv);
1142 str = SvPVX(sv);
1143 if (datumtype == 'h') {
1144 aint = len;
1145 for (len = 0; len < aint; len++) {
1146 if (len & 1)
1147 bits >>= 4;
1148 else
1149 bits = *s++;
1150 *str++ = PL_hexdigit[bits & 15];
1151 }
1152 }
1153 else {
1154 aint = len;
1155 for (len = 0; len < aint; len++) {
1156 if (len & 1)
1157 bits <<= 4;
1158 else
1159 bits = *s++;
1160 *str++ = PL_hexdigit[(bits >> 4) & 15];
1161 }
1162 }
1163 *str = '\0';
1164 XPUSHs(sv_2mortal(sv));
1165 break;
1166 case 'c':
73cb7263 1167 while (len-- > 0) {
1168 aint = *s++;
1169 if (aint >= 128) /* fake up signed chars */
1170 aint -= 256;
1171 if (!checksum) {
ac7f3b1b 1172 PUSHs(sv_2mortal(newSViv((IV)aint)));
a6ec74c1 1173 }
73cb7263 1174 else if (checksum > bits_in_uv)
1175 cdouble += (NV)aint;
1176 else
1177 cuv += aint;
a6ec74c1 1178 }
1179 break;
1180 case 'C':
35bcd338 1181 unpack_C: /* unpack U will jump here if not UTF-8 */
1182 if (len == 0) {
49704364 1183 symptr->flags &= ~FLAG_UNPACK_DO_UTF8;
35bcd338 1184 break;
1185 }
a6ec74c1 1186 if (checksum) {
1187 uchar_checksum:
1188 while (len-- > 0) {
1189 auint = *s++ & 255;
d6d3e8bd 1190 if (checksum > bits_in_uv)
1191 cdouble += (NV)auint;
1192 else
1193 cuv += auint;
a6ec74c1 1194 }
1195 }
1196 else {
a6ec74c1 1197 while (len-- > 0) {
1198 auint = *s++ & 255;
ac7f3b1b 1199 PUSHs(sv_2mortal(newSViv((IV)auint)));
a6ec74c1 1200 }
1201 }
1202 break;
1203 case 'U':
35bcd338 1204 if (len == 0) {
49704364 1205 symptr->flags |= FLAG_UNPACK_DO_UTF8;
35bcd338 1206 break;
1207 }
49704364 1208 if ((symptr->flags & FLAG_UNPACK_DO_UTF8) == 0)
35bcd338 1209 goto unpack_C;
73cb7263 1210 while (len-- > 0 && s < strend) {
1211 STRLEN alen;
1212 auint = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, strend - s, &alen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV));
1213 along = alen;
1214 s += along;
1215 if (!checksum) {
ac7f3b1b 1216 PUSHs(sv_2mortal(newSVuv((UV)auint)));
a6ec74c1 1217 }
73cb7263 1218 else if (checksum > bits_in_uv)
1219 cdouble += (NV)auint;
1220 else
1221 cuv += auint;
a6ec74c1 1222 }
1223 break;
49704364 1224 case 's' | TYPE_IS_SHRIEKING:
1225#if SHORTSIZE != SIZE16
73cb7263 1226 while (len-- > 0) {
1227 COPYNN(s, &ashort, sizeof(short));
1228 DO_BO_UNPACK(ashort, s);
1229 s += sizeof(short);
1230 if (!checksum) {
ac7f3b1b 1231 PUSHs(sv_2mortal(newSViv((IV)ashort)));
49704364 1232 }
73cb7263 1233 else if (checksum > bits_in_uv)
1234 cdouble += (NV)ashort;
1235 else
1236 cuv += ashort;
49704364 1237 }
1238 break;
1239#else
1240 /* Fallthrough! */
a6ec74c1 1241#endif
49704364 1242 case 's':
73cb7263 1243 while (len-- > 0) {
1244 COPY16(s, &ai16);
1245 DO_BO_UNPACK(ai16, 16);
1109a392 1246#if U16SIZE > SIZE16
73cb7263 1247 if (ai16 > 32767)
1248 ai16 -= 65536;
a6ec74c1 1249#endif
73cb7263 1250 s += SIZE16;
1251 if (!checksum) {
ac7f3b1b 1252 PUSHs(sv_2mortal(newSViv((IV)ai16)));
a6ec74c1 1253 }
73cb7263 1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)ai16;
1256 else
1257 cuv += ai16;
a6ec74c1 1258 }
1259 break;
49704364 1260 case 'S' | TYPE_IS_SHRIEKING:
1261#if SHORTSIZE != SIZE16
73cb7263 1262 while (len-- > 0) {
1263 COPYNN(s, &aushort, sizeof(unsigned short));
1264 DO_BO_UNPACK(aushort, s);
1265 s += sizeof(unsigned short);
1266 if (!checksum) {
ac7f3b1b 1267 PUSHs(sv_2mortal(newSViv((UV)aushort)));
49704364 1268 }
73cb7263 1269 else if (checksum > bits_in_uv)
1270 cdouble += (NV)aushort;
1271 else
1272 cuv += aushort;
49704364 1273 }
1274 break;
1275#else
1276 /* Fallhrough! */
1277#endif
a6ec74c1 1278 case 'v':
1279 case 'n':
1280 case 'S':
73cb7263 1281 while (len-- > 0) {
1282 COPY16(s, &au16);
1283 DO_BO_UNPACK(au16, 16);
1284 s += SIZE16;
a6ec74c1 1285#ifdef HAS_NTOHS
73cb7263 1286 if (datumtype == 'n')
1287 au16 = PerlSock_ntohs(au16);
a6ec74c1 1288#endif
1289#ifdef HAS_VTOHS
73cb7263 1290 if (datumtype == 'v')
1291 au16 = vtohs(au16);
a6ec74c1 1292#endif
73cb7263 1293 if (!checksum) {
ac7f3b1b 1294 PUSHs(sv_2mortal(newSViv((UV)au16)));
a6ec74c1 1295 }
73cb7263 1296 else if (checksum > bits_in_uv)
1297 cdouble += (NV)au16;
1298 else
1299 cuv += au16;
a6ec74c1 1300 }
1301 break;
7212898e 1302#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 1303 case 'v' | TYPE_IS_SHRIEKING:
1304 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1305 while (len-- > 0) {
1306 COPY16(s, &ai16);
1307 s += SIZE16;
068bd2e7 1308#ifdef HAS_NTOHS
73cb7263 1309 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1310 ai16 = (I16)PerlSock_ntohs((U16)ai16);
068bd2e7 1311#endif
1312#ifdef HAS_VTOHS
73cb7263 1313 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1314 ai16 = (I16)vtohs((U16)ai16);
068bd2e7 1315#endif
73cb7263 1316 if (!checksum) {
ac7f3b1b 1317 PUSHs(sv_2mortal(newSViv((IV)ai16)));
068bd2e7 1318 }
73cb7263 1319 else if (checksum > bits_in_uv)
1320 cdouble += (NV)ai16;
1321 else
1322 cuv += ai16;
068bd2e7 1323 }
1324 break;
7212898e 1325#endif
a6ec74c1 1326 case 'i':
49704364 1327 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1328 while (len-- > 0) {
1329 Copy(s, &aint, 1, int);
1330 DO_BO_UNPACK(aint, i);
1331 s += sizeof(int);
1332 if (!checksum) {
ac7f3b1b 1333 PUSHs(sv_2mortal(newSViv((IV)aint)));
a6ec74c1 1334 }
73cb7263 1335 else if (checksum > bits_in_uv)
1336 cdouble += (NV)aint;
1337 else
1338 cuv += aint;
a6ec74c1 1339 }
1340 break;
1341 case 'I':
49704364 1342 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1343 while (len-- > 0) {
1344 Copy(s, &auint, 1, unsigned int);
1345 DO_BO_UNPACK(auint, i);
1346 s += sizeof(unsigned int);
1347 if (!checksum) {
ac7f3b1b 1348 PUSHs(sv_2mortal(newSVuv((UV)auint)));
a6ec74c1 1349 }
73cb7263 1350 else if (checksum > bits_in_uv)
1351 cdouble += (NV)auint;
1352 else
1353 cuv += auint;
a6ec74c1 1354 }
1355 break;
92d41999 1356 case 'j':
73cb7263 1357 while (len-- > 0) {
1358 Copy(s, &aiv, 1, IV);
1109a392 1359#if IVSIZE == INTSIZE
73cb7263 1360 DO_BO_UNPACK(aiv, i);
1109a392 1361#elif IVSIZE == LONGSIZE
73cb7263 1362 DO_BO_UNPACK(aiv, l);
1109a392 1363#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
73cb7263 1364 DO_BO_UNPACK(aiv, 64);
1109a392 1365#endif
73cb7263 1366 s += IVSIZE;
1367 if (!checksum) {
ac7f3b1b 1368 PUSHs(sv_2mortal(newSViv(aiv)));
92d41999 1369 }
73cb7263 1370 else if (checksum > bits_in_uv)
1371 cdouble += (NV)aiv;
1372 else
1373 cuv += aiv;
92d41999 1374 }
1375 break;
1376 case 'J':
73cb7263 1377 while (len-- > 0) {
1378 Copy(s, &auv, 1, UV);
1109a392 1379#if UVSIZE == INTSIZE
73cb7263 1380 DO_BO_UNPACK(auv, i);
1109a392 1381#elif UVSIZE == LONGSIZE
73cb7263 1382 DO_BO_UNPACK(auv, l);
1109a392 1383#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
73cb7263 1384 DO_BO_UNPACK(auv, 64);
1109a392 1385#endif
73cb7263 1386 s += UVSIZE;
1387 if (!checksum) {
ac7f3b1b 1388 PUSHs(sv_2mortal(newSVuv(auv)));
92d41999 1389 }
73cb7263 1390 else if (checksum > bits_in_uv)
1391 cdouble += (NV)auv;
1392 else
1393 cuv += auv;
92d41999 1394 }
1395 break;
49704364 1396 case 'l' | TYPE_IS_SHRIEKING:
1397#if LONGSIZE != SIZE32
73cb7263 1398 while (len-- > 0) {
1399 COPYNN(s, &along, sizeof(long));
1400 DO_BO_UNPACK(along, l);
1401 s += sizeof(long);
1402 if (!checksum) {
ac7f3b1b 1403 PUSHs(sv_2mortal(newSViv((IV)along)));
49704364 1404 }
73cb7263 1405 else if (checksum > bits_in_uv)
1406 cdouble += (NV)along;
1407 else
1408 cuv += along;
49704364 1409 }
1410 break;
1411#else
1412 /* Fallthrough! */
a6ec74c1 1413#endif
49704364 1414 case 'l':
73cb7263 1415 while (len-- > 0) {
1416 COPY32(s, &ai32);
1417 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1418#if U32SIZE > SIZE32
73cb7263 1419 if (ai32 > 2147483647)
1420 ai32 -= 4294967296;
a6ec74c1 1421#endif
73cb7263 1422 s += SIZE32;
1423 if (!checksum) {
ac7f3b1b 1424 PUSHs(sv_2mortal(newSViv((IV)ai32)));
a6ec74c1 1425 }
73cb7263 1426 else if (checksum > bits_in_uv)
1427 cdouble += (NV)ai32;
1428 else
1429 cuv += ai32;
a6ec74c1 1430 }
1431 break;
49704364 1432 case 'L' | TYPE_IS_SHRIEKING:
1433#if LONGSIZE != SIZE32
73cb7263 1434 while (len-- > 0) {
1435 COPYNN(s, &aulong, sizeof(unsigned long));
1436 DO_BO_UNPACK(aulong, l);
1437 s += sizeof(unsigned long);
1438 if (!checksum) {
ac7f3b1b 1439 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
49704364 1440 }
73cb7263 1441 else if (checksum > bits_in_uv)
1442 cdouble += (NV)aulong;
1443 else
1444 cuv += aulong;
49704364 1445 }
1446 break;
1447#else
1448 /* Fall through! */
1449#endif
a6ec74c1 1450 case 'V':
1451 case 'N':
1452 case 'L':
73cb7263 1453 while (len-- > 0) {
1454 COPY32(s, &au32);
1455 DO_BO_UNPACK(au32, 32);
1456 s += SIZE32;
a6ec74c1 1457#ifdef HAS_NTOHL
73cb7263 1458 if (datumtype == 'N')
1459 au32 = PerlSock_ntohl(au32);
a6ec74c1 1460#endif
1461#ifdef HAS_VTOHL
73cb7263 1462 if (datumtype == 'V')
1463 au32 = vtohl(au32);
a6ec74c1 1464#endif
73cb7263 1465 if (!checksum) {
ac7f3b1b 1466 PUSHs(sv_2mortal(newSVuv((UV)au32)));
73cb7263 1467 }
1468 else if (checksum > bits_in_uv)
1469 cdouble += (NV)au32;
1470 else
1471 cuv += au32;
a6ec74c1 1472 }
1473 break;
7212898e 1474#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 1475 case 'V' | TYPE_IS_SHRIEKING:
1476 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1477 while (len-- > 0) {
1478 COPY32(s, &ai32);
1479 s += SIZE32;
068bd2e7 1480#ifdef HAS_NTOHL
73cb7263 1481 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1482 ai32 = (I32)PerlSock_ntohl((U32)ai32);
068bd2e7 1483#endif
1484#ifdef HAS_VTOHL
73cb7263 1485 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1486 ai32 = (I32)vtohl((U32)ai32);
068bd2e7 1487#endif
73cb7263 1488 if (!checksum) {
ac7f3b1b 1489 PUSHs(sv_2mortal(newSViv((IV)ai32)));
068bd2e7 1490 }
73cb7263 1491 else if (checksum > bits_in_uv)
1492 cdouble += (NV)ai32;
1493 else
1494 cuv += ai32;
068bd2e7 1495 }
1496 break;
7212898e 1497#endif
a6ec74c1 1498 case 'p':
a6ec74c1 1499 while (len-- > 0) {
5512a2f9 1500 assert (sizeof(char*) <= strend - s);
1501 Copy(s, &aptr, 1, char*);
1502 DO_BO_UNPACK_P(aptr);
1503 s += sizeof(char*);
c4c5f44a 1504 /* newSVpv generates undef if aptr is NULL */
1505 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1 1506 }
1507 break;
1508 case 'w':
a6ec74c1 1509 {
1510 UV auv = 0;
1511 U32 bytes = 0;
1512
1513 while ((len > 0) && (s < strend)) {
1514 auv = (auv << 7) | (*s & 0x7f);
1515 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1516 if ((U8)(*s++) < 0x80) {
1517 bytes = 0;
ac7f3b1b 1518 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1 1519 len--;
1520 auv = 0;
1521 }
1522 else if (++bytes >= sizeof(UV)) { /* promote to string */
1523 char *t;
1524 STRLEN n_a;
1525
1526 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1527 while (s < strend) {
eb160463 1528 sv = mul128(sv, (U8)(*s & 0x7f));
a6ec74c1 1529 if (!(*s++ & 0x80)) {
1530 bytes = 0;
1531 break;
1532 }
1533 }
1534 t = SvPV(sv, n_a);
1535 while (*t == '0')
1536 t++;
1537 sv_chop(sv, t);
1538 PUSHs(sv_2mortal(sv));
1539 len--;
1540 auv = 0;
1541 }
1542 }
1543 if ((s >= strend) && bytes)
49704364 1544 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1 1545 }
1546 break;
1547 case 'P':
49704364 1548 if (symptr->howlen == e_star)
1549 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1550 EXTEND(SP, 1);
1551 if (sizeof(char*) > strend - s)
1552 break;
1553 else {
1554 Copy(s, &aptr, 1, char*);
1109a392 1555 DO_BO_UNPACK_P(aptr);
a6ec74c1 1556 s += sizeof(char*);
1557 }
c4c5f44a 1558 /* newSVpvn generates undef if aptr is NULL */
1559 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
a6ec74c1 1560 break;
1561#ifdef HAS_QUAD
1562 case 'q':
73cb7263 1563 while (len-- > 0) {
c75bde89 1564 assert (s + sizeof(Quad_t) <= strend);
1565 Copy(s, &aquad, 1, Quad_t);
1566 DO_BO_UNPACK(aquad, 64);
1567 s += sizeof(Quad_t);
73cb7263 1568 if (!checksum) {
ac7f3b1b 1569 PUSHs(sv_2mortal((aquad >= IV_MIN && aquad <= IV_MAX) ?
1570 newSViv((IV)aquad) : newSVnv((NV)aquad)));
fa8ec7c1 1571 }
73cb7263 1572 else if (checksum > bits_in_uv)
1573 cdouble += (NV)aquad;
1574 else
1575 cuv += aquad;
1576 }
a6ec74c1 1577 break;
1578 case 'Q':
73cb7263 1579 while (len-- > 0) {
c75bde89 1580 assert (s + sizeof(Uquad_t) <= strend);
1581 Copy(s, &auquad, 1, Uquad_t);
1582 DO_BO_UNPACK(auquad, 64);
1583 s += sizeof(Uquad_t);
73cb7263 1584 if (!checksum) {
ac7f3b1b 1585 PUSHs(sv_2mortal((auquad <= UV_MAX) ?
1586 newSVuv((UV)auquad) : newSVnv((NV)auquad)));
a6ec74c1 1587 }
73cb7263 1588 else if (checksum > bits_in_uv)
1589 cdouble += (NV)auquad;
1590 else
1591 cuv += auquad;
a6ec74c1 1592 }
1593 break;
1594#endif
1595 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1596 case 'f':
73cb7263 1597 while (len-- > 0) {
1598 Copy(s, &afloat, 1, float);
1599 DO_BO_UNPACK_N(afloat, float);
1600 s += sizeof(float);
1601 if (!checksum) {
ac7f3b1b 1602 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
a6ec74c1 1603 }
73cb7263 1604 else {
1605 cdouble += afloat;
1606 }
a6ec74c1 1607 }
1608 break;
1609 case 'd':
73cb7263 1610 while (len-- > 0) {
1611 Copy(s, &adouble, 1, double);
1612 DO_BO_UNPACK_N(adouble, double);
1613 s += sizeof(double);
1614 if (!checksum) {
ac7f3b1b 1615 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
a6ec74c1 1616 }
73cb7263 1617 else {
1618 cdouble += adouble;
1619 }
a6ec74c1 1620 }
1621 break;
92d41999 1622 case 'F':
73cb7263 1623 while (len-- > 0) {
1624 Copy(s, &anv, 1, NV);
1625 DO_BO_UNPACK_N(anv, NV);
1626 s += NVSIZE;
1627 if (!checksum) {
ac7f3b1b 1628 PUSHs(sv_2mortal(newSVnv(anv)));
92d41999 1629 }
73cb7263 1630 else {
1631 cdouble += anv;
1632 }
92d41999 1633 }
1634 break;
1635#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1636 case 'D':
73cb7263 1637 while (len-- > 0) {
1638 Copy(s, &aldouble, 1, long double);
1639 DO_BO_UNPACK_N(aldouble, long double);
1640 s += LONG_DOUBLESIZE;
1641 if (!checksum) {
ac7f3b1b 1642 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
92d41999 1643 }
73cb7263 1644 else {cdouble += aldouble;
1645 }
92d41999 1646 }
1647 break;
1648#endif
a6ec74c1 1649 case 'u':
1650 /* MKS:
1651 * Initialise the decode mapping. By using a table driven
1652 * algorithm, the code will be character-set independent
1653 * (and just as fast as doing character arithmetic)
1654 */
1655 if (PL_uudmap['M'] == 0) {
1656 int i;
1657
1658 for (i = 0; i < sizeof(PL_uuemap); i += 1)
1659 PL_uudmap[(U8)PL_uuemap[i]] = i;
1660 /*
1661 * Because ' ' and '`' map to the same value,
1662 * we need to decode them both the same.
1663 */
1664 PL_uudmap[' '] = 0;
1665 }
1666
1667 along = (strend - s) * 3 / 4;
1668 sv = NEWSV(42, along);
1669 if (along)
1670 SvPOK_on(sv);
1671 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1672 I32 a, b, c, d;
1673 char hunk[4];
1674
1675 hunk[3] = '\0';
1676 len = PL_uudmap[*(U8*)s++] & 077;
1677 while (len > 0) {
1678 if (s < strend && ISUUCHAR(*s))
1679 a = PL_uudmap[*(U8*)s++] & 077;
1680 else
1681 a = 0;
1682 if (s < strend && ISUUCHAR(*s))
1683 b = PL_uudmap[*(U8*)s++] & 077;
1684 else
1685 b = 0;
1686 if (s < strend && ISUUCHAR(*s))
1687 c = PL_uudmap[*(U8*)s++] & 077;
1688 else
1689 c = 0;
1690 if (s < strend && ISUUCHAR(*s))
1691 d = PL_uudmap[*(U8*)s++] & 077;
1692 else
1693 d = 0;
eb160463 1694 hunk[0] = (char)((a << 2) | (b >> 4));
1695 hunk[1] = (char)((b << 4) | (c >> 2));
1696 hunk[2] = (char)((c << 6) | d);
a6ec74c1 1697 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1698 len -= 3;
1699 }
1700 if (*s == '\n')
1701 s++;
92aa5668 1702 else /* possible checksum byte */
1703 if (s + 1 < strend && s[1] == '\n')
1704 s += 2;
a6ec74c1 1705 }
1706 XPUSHs(sv_2mortal(sv));
1707 break;
1708 }
49704364 1709
a6ec74c1 1710 if (checksum) {
1109a392 1711 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 1712 (checksum > bits_in_uv &&
d6d3e8bd 1713 strchr("cCsSiIlLnNUvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
a6ec74c1 1714 NV trouble;
1715
fa8ec7c1 1716 adouble = (NV) (1 << (checksum & 15));
a6ec74c1 1717 while (checksum >= 16) {
1718 checksum -= 16;
1719 adouble *= 65536.0;
1720 }
a6ec74c1 1721 while (cdouble < 0.0)
1722 cdouble += adouble;
1723 cdouble = Perl_modf(cdouble / adouble, &trouble) * adouble;
c4c5f44a 1724 sv = newSVnv(cdouble);
a6ec74c1 1725 }
1726 else {
fa8ec7c1 1727 if (checksum < bits_in_uv) {
1728 UV mask = ((UV)1 << checksum) - 1;
92d41999 1729 cuv &= mask;
a6ec74c1 1730 }
c4c5f44a 1731 sv = newSVuv(cuv);
a6ec74c1 1732 }
1733 XPUSHs(sv_2mortal(sv));
1734 checksum = 0;
1735 }
49704364 1736
1737 if (symptr->flags & FLAG_SLASH){
1738 if (SP - PL_stack_base - start_sp_offset <= 0)
1739 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1740 if( next_symbol(symptr) ){
1741 if( symptr->howlen == e_number )
1742 Perl_croak(aTHX_ "Count after length/code in unpack" );
1743 if( beyond ){
1744 /* ...end of char buffer then no decent length available */
1745 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1746 } else {
1747 /* take top of stack (hope it's numeric) */
1748 len = POPi;
1749 if( len < 0 )
1750 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1751 }
1752 } else {
1753 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1754 }
1755 datumtype = symptr->code;
1756 goto redo_switch;
1757 }
a6ec74c1 1758 }
49704364 1759
18529408 1760 if (new_s)
1761 *new_s = s;
1762 PUTBACK;
1763 return SP - PL_stack_base - start_sp_offset;
1764}
1765
1766PP(pp_unpack)
1767{
1768 dSP;
bab9c0ac 1769 dPOPPOPssrl;
18529408 1770 I32 gimme = GIMME_V;
1771 STRLEN llen;
1772 STRLEN rlen;
1773 register char *pat = SvPV(left, llen);
1774#ifdef PACKED_IS_OCTETS
1775 /* Packed side is assumed to be octets - so force downgrade if it
1776 has been UTF-8 encoded by accident
1777 */
1778 register char *s = SvPVbyte(right, rlen);
1779#else
1780 register char *s = SvPV(right, rlen);
1781#endif
1782 char *strend = s + rlen;
1783 register char *patend = pat + llen;
1784 register I32 cnt;
1785
1786 PUTBACK;
7accc089 1787 cnt = unpackstring(pat, patend, s, strend,
49704364 1788 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1789 | (DO_UTF8(right) ? FLAG_UNPACK_DO_UTF8 : 0));
1790
18529408 1791 SPAGAIN;
1792 if ( !cnt && gimme == G_SCALAR )
1793 PUSHs(&PL_sv_undef);
a6ec74c1 1794 RETURN;
1795}
1796
1797STATIC void
1798S_doencodes(pTHX_ register SV *sv, register char *s, register I32 len)
1799{
1800 char hunk[5];
1801
1802 *hunk = PL_uuemap[len];
1803 sv_catpvn(sv, hunk, 1);
1804 hunk[4] = '\0';
1805 while (len > 2) {
1806 hunk[0] = PL_uuemap[(077 & (*s >> 2))];
1807 hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017)))];
1808 hunk[2] = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1809 hunk[3] = PL_uuemap[(077 & (s[2] & 077))];
1810 sv_catpvn(sv, hunk, 4);
1811 s += 3;
1812 len -= 3;
1813 }
1814 if (len > 0) {
1815 char r = (len > 1 ? s[1] : '\0');
1816 hunk[0] = PL_uuemap[(077 & (*s >> 2))];
1817 hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((r >> 4) & 017)))];
1818 hunk[2] = PL_uuemap[(077 & ((r << 2) & 074))];
1819 hunk[3] = PL_uuemap[0];
1820 sv_catpvn(sv, hunk, 4);
1821 }
1822 sv_catpvn(sv, "\n", 1);
1823}
1824
1825STATIC SV *
1826S_is_an_int(pTHX_ char *s, STRLEN l)
1827{
1828 STRLEN n_a;
1829 SV *result = newSVpvn(s, l);
1830 char *result_c = SvPV(result, n_a); /* convenience */
1831 char *out = result_c;
1832 bool skip = 1;
1833 bool ignore = 0;
1834
1835 while (*s) {
1836 switch (*s) {
1837 case ' ':
1838 break;
1839 case '+':
1840 if (!skip) {
1841 SvREFCNT_dec(result);
1842 return (NULL);
1843 }
1844 break;
1845 case '0':
1846 case '1':
1847 case '2':
1848 case '3':
1849 case '4':
1850 case '5':
1851 case '6':
1852 case '7':
1853 case '8':
1854 case '9':
1855 skip = 0;
1856 if (!ignore) {
1857 *(out++) = *s;
1858 }
1859 break;
1860 case '.':
1861 ignore = 1;
1862 break;
1863 default:
1864 SvREFCNT_dec(result);
1865 return (NULL);
1866 }
1867 s++;
1868 }
1869 *(out++) = '\0';
1870 SvCUR_set(result, out - result_c);
1871 return (result);
1872}
1873
1874/* pnum must be '\0' terminated */
1875STATIC int
1876S_div128(pTHX_ SV *pnum, bool *done)
1877{
1878 STRLEN len;
1879 char *s = SvPV(pnum, len);
1880 int m = 0;
1881 int r = 0;
1882 char *t = s;
1883
1884 *done = 1;
1885 while (*t) {
1886 int i;
1887
1888 i = m * 10 + (*t - '0');
1889 m = i & 0x7F;
1890 r = (i >> 7); /* r < 10 */
1891 if (r) {
1892 *done = 0;
1893 }
1894 *(t++) = '0' + r;
1895 }
1896 *(t++) = '\0';
1897 SvCUR_set(pnum, (STRLEN) (t - s));
1898 return (m);
1899}
1900
49704364 1901
a6ec74c1 1902
18529408 1903/*
1904=for apidoc pack_cat
1905
7accc089 1906The engine implementing pack() Perl function. Note: parameters next_in_list and
1907flags are not used. This call should not be used; use packlist instead.
18529408 1908
1909=cut */
1910
49704364 1911
18529408 1912void
1913Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 1914{
49704364 1915 tempsym_t sym = { 0 };
1916 sym.patptr = pat;
1917 sym.patend = patend;
7accc089 1918 sym.flags = FLAG_PACK;
1919
1920 (void)pack_rec( cat, &sym, beglist, endlist );
1921}
1922
1923
1924/*
1925=for apidoc packlist
1926
1927The engine implementing pack() Perl function.
1928
1929=cut */
1930
1931
1932void
1933Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
1934{
1935 tempsym_t sym = { 0 };
1936 sym.patptr = pat;
1937 sym.patend = patend;
1938 sym.flags = FLAG_PACK;
49704364 1939
1940 (void)pack_rec( cat, &sym, beglist, endlist );
1941}
1942
1943
1944STATIC
1945SV **
1946S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV **endlist )
1947{
a6ec74c1 1948 register I32 items;
1949 STRLEN fromlen;
49704364 1950 register I32 len = 0;
a6ec74c1 1951 SV *fromstr;
1952 /*SUPPRESS 442*/
1953 static char null10[] = {0,0,0,0,0,0,0,0,0,0};
1954 static char *space10 = " ";
49704364 1955 bool found;
a6ec74c1 1956
1957 /* These must not be in registers: */
1958 char achar;
ef108786 1959 I16 ai16;
1960 U16 au16;
1961 I32 ai32;
1962 U32 au32;
a6ec74c1 1963#ifdef HAS_QUAD
1964 Quad_t aquad;
1965 Uquad_t auquad;
1966#endif
ef108786 1967#if SHORTSIZE != SIZE16
1968 short ashort;
1969 unsigned short aushort;
1970#endif
1971 int aint;
1972 unsigned int auint;
1973#if LONGSIZE != SIZE32
1974 long along;
1975 unsigned long aulong;
1976#endif
a6ec74c1 1977 char *aptr;
1978 float afloat;
1979 double adouble;
ef108786 1980#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1981 long double aldouble;
1982#endif
1983 IV aiv;
1984 UV auv;
1985 NV anv;
1986
49704364 1987 int strrelbeg = SvCUR(cat);
1988 tempsym_t lookahead;
a6ec74c1 1989
18529408 1990 items = endlist - beglist;
49704364 1991 found = next_symbol( symptr );
1992
18529408 1993#ifndef PACKED_IS_OCTETS
49704364 1994 if (symptr->level == 0 && found && symptr->code == 'U' ){
18529408 1995 SvUTF8_on(cat);
49704364 1996 }
18529408 1997#endif
49704364 1998
1999 while (found) {
a6ec74c1 2000 SV *lengthcode = Nullsv;
18529408 2001#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
49704364 2002
2003 I32 datumtype = symptr->code;
2004 howlen_t howlen;
2005
2006 switch( howlen = symptr->howlen ){
2007 case e_no_len:
2008 case e_number:
2009 len = symptr->length;
2010 break;
2011 case e_star:
1109a392 2012 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ? 0 : items;
49704364 2013 break;
2014 }
2015
2016 /* Look ahead for next symbol. Do we have code/code? */
2017 lookahead = *symptr;
2018 found = next_symbol(&lookahead);
2019 if ( symptr->flags & FLAG_SLASH ) {
2020 if (found){
2021 if ( 0 == strchr( "aAZ", lookahead.code ) ||
2022 e_star != lookahead.howlen )
2023 Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
2024 lengthcode = sv_2mortal(newSViv(sv_len(items > 0
18529408 2025 ? *beglist : &PL_sv_no)
49704364 2026 + (lookahead.code == 'Z' ? 1 : 0)));
2027 } else {
2028 Perl_croak(aTHX_ "Code missing after '/' in pack");
2029 }
a6ec74c1 2030 }
49704364 2031
1109a392 2032 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2033 default:
1109a392 2034 Perl_croak(aTHX_ "Invalid type '%c' in pack", (int)TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2035 case '%':
49704364 2036 Perl_croak(aTHX_ "'%%' may not be used in pack");
a6ec74c1 2037 case '@':
49704364 2038 len += strrelbeg - SvCUR(cat);
a6ec74c1 2039 if (len > 0)
2040 goto grow;
2041 len = -len;
2042 if (len > 0)
2043 goto shrink;
2044 break;
18529408 2045 case '(':
2046 {
49704364 2047 tempsym_t savsym = *symptr;
66c611c5 2048 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2049 symptr->flags |= group_modifiers;
49704364 2050 symptr->patend = savsym.grpend;
2051 symptr->level++;
18529408 2052 while (len--) {
49704364 2053 symptr->patptr = savsym.grpbeg;
2054 beglist = pack_rec(cat, symptr, beglist, endlist );
2055 if (savsym.howlen == e_star && beglist == endlist)
18529408 2056 break; /* No way to continue */
2057 }
66c611c5 2058 symptr->flags &= ~group_modifiers;
49704364 2059 lookahead.flags = symptr->flags;
2060 *symptr = savsym;
18529408 2061 break;
2062 }
62f95557 2063 case 'X' | TYPE_IS_SHRIEKING:
2064 if (!len) /* Avoid division by 0 */
2065 len = 1;
2066 len = (SvCUR(cat)) % len;
2067 /* FALL THROUGH */
a6ec74c1 2068 case 'X':
2069 shrink:
eb160463 2070 if ((I32)SvCUR(cat) < len)
49704364 2071 Perl_croak(aTHX_ "'X' outside of string in pack");
a6ec74c1 2072 SvCUR(cat) -= len;
2073 *SvEND(cat) = '\0';
2074 break;
62f95557 2075 case 'x' | TYPE_IS_SHRIEKING:
2076 if (!len) /* Avoid division by 0 */
2077 len = 1;
2078 aint = (SvCUR(cat)) % len;
2079 if (aint) /* Other portable ways? */
2080 len = len - aint;
2081 else
2082 len = 0;
2083 /* FALL THROUGH */
49704364 2084
a6ec74c1 2085 case 'x':
2086 grow:
2087 while (len >= 10) {
2088 sv_catpvn(cat, null10, 10);
2089 len -= 10;
2090 }
2091 sv_catpvn(cat, null10, len);
2092 break;
2093 case 'A':
2094 case 'Z':
2095 case 'a':
2096 fromstr = NEXTFROM;
2097 aptr = SvPV(fromstr, fromlen);
49704364 2098 if (howlen == e_star) {
a6ec74c1 2099 len = fromlen;
2100 if (datumtype == 'Z')
2101 ++len;
2102 }
eb160463 2103 if ((I32)fromlen >= len) {
a6ec74c1 2104 sv_catpvn(cat, aptr, len);
2105 if (datumtype == 'Z')
2106 *(SvEND(cat)-1) = '\0';
2107 }
2108 else {
2109 sv_catpvn(cat, aptr, fromlen);
2110 len -= fromlen;
2111 if (datumtype == 'A') {
2112 while (len >= 10) {
2113 sv_catpvn(cat, space10, 10);
2114 len -= 10;
2115 }
2116 sv_catpvn(cat, space10, len);
2117 }
2118 else {
2119 while (len >= 10) {
2120 sv_catpvn(cat, null10, 10);
2121 len -= 10;
2122 }
2123 sv_catpvn(cat, null10, len);
2124 }
2125 }
2126 break;
2127 case 'B':
2128 case 'b':
2129 {
2130 register char *str;
2131 I32 saveitems;
2132
2133 fromstr = NEXTFROM;
2134 saveitems = items;
2135 str = SvPV(fromstr, fromlen);
49704364 2136 if (howlen == e_star)
a6ec74c1 2137 len = fromlen;
2138 aint = SvCUR(cat);
2139 SvCUR(cat) += (len+7)/8;
2140 SvGROW(cat, SvCUR(cat) + 1);
2141 aptr = SvPVX(cat) + aint;
eb160463 2142 if (len > (I32)fromlen)
a6ec74c1 2143 len = fromlen;
2144 aint = len;
2145 items = 0;
2146 if (datumtype == 'B') {
2147 for (len = 0; len++ < aint;) {
2148 items |= *str++ & 1;
2149 if (len & 7)
2150 items <<= 1;
2151 else {
2152 *aptr++ = items & 0xff;
2153 items = 0;
2154 }
2155 }
2156 }
2157 else {
2158 for (len = 0; len++ < aint;) {
2159 if (*str++ & 1)
2160 items |= 128;
2161 if (len & 7)
2162 items >>= 1;
2163 else {
2164 *aptr++ = items & 0xff;
2165 items = 0;
2166 }
2167 }
2168 }
2169 if (aint & 7) {
2170 if (datumtype == 'B')
2171 items <<= 7 - (aint & 7);
2172 else
2173 items >>= 7 - (aint & 7);
2174 *aptr++ = items & 0xff;
2175 }
2176 str = SvPVX(cat) + SvCUR(cat);
2177 while (aptr <= str)
2178 *aptr++ = '\0';
2179
2180 items = saveitems;
2181 }
2182 break;
2183 case 'H':
2184 case 'h':
2185 {
2186 register char *str;
2187 I32 saveitems;
2188
2189 fromstr = NEXTFROM;
2190 saveitems = items;
2191 str = SvPV(fromstr, fromlen);
49704364 2192 if (howlen == e_star)
a6ec74c1 2193 len = fromlen;
2194 aint = SvCUR(cat);
2195 SvCUR(cat) += (len+1)/2;
2196 SvGROW(cat, SvCUR(cat) + 1);
2197 aptr = SvPVX(cat) + aint;
eb160463 2198 if (len > (I32)fromlen)
a6ec74c1 2199 len = fromlen;
2200 aint = len;
2201 items = 0;
2202 if (datumtype == 'H') {
2203 for (len = 0; len++ < aint;) {
2204 if (isALPHA(*str))
2205 items |= ((*str++ & 15) + 9) & 15;
2206 else
2207 items |= *str++ & 15;
2208 if (len & 1)
2209 items <<= 4;
2210 else {
2211 *aptr++ = items & 0xff;
2212 items = 0;
2213 }
2214 }
2215 }
2216 else {
2217 for (len = 0; len++ < aint;) {
2218 if (isALPHA(*str))
2219 items |= (((*str++ & 15) + 9) & 15) << 4;
2220 else
2221 items |= (*str++ & 15) << 4;
2222 if (len & 1)
2223 items >>= 4;
2224 else {
2225 *aptr++ = items & 0xff;
2226 items = 0;
2227 }
2228 }
2229 }
2230 if (aint & 1)
2231 *aptr++ = items & 0xff;
2232 str = SvPVX(cat) + SvCUR(cat);
2233 while (aptr <= str)
2234 *aptr++ = '\0';
2235
2236 items = saveitems;
2237 }
2238 break;
2239 case 'C':
2240 case 'c':
2241 while (len-- > 0) {
2242 fromstr = NEXTFROM;
1109a392 2243 switch (TYPE_NO_MODIFIERS(datumtype)) {
a6ec74c1 2244 case 'C':
2245 aint = SvIV(fromstr);
2246 if ((aint < 0 || aint > 255) &&
2247 ckWARN(WARN_PACK))
9014280d 2248 Perl_warner(aTHX_ packWARN(WARN_PACK),
49704364 2249 "Character in 'C' format wrapped in pack");
a6ec74c1 2250 achar = aint & 255;
2251 sv_catpvn(cat, &achar, sizeof(char));
2252 break;
2253 case 'c':
2254 aint = SvIV(fromstr);
2255 if ((aint < -128 || aint > 127) &&
2256 ckWARN(WARN_PACK))
9014280d 2257 Perl_warner(aTHX_ packWARN(WARN_PACK),
49704364 2258 "Character in 'c' format wrapped in pack" );
a6ec74c1 2259 achar = aint & 255;
2260 sv_catpvn(cat, &achar, sizeof(char));
2261 break;
2262 }
2263 }
2264 break;
2265 case 'U':
2266 while (len-- > 0) {
2267 fromstr = NEXTFROM;
e87322b2 2268 auint = UNI_TO_NATIVE(SvUV(fromstr));
89ebb4a3 2269 SvGROW(cat, SvCUR(cat) + UTF8_MAXBYTES + 1);
52ea3e69 2270 SvCUR_set(cat,
2271 (char*)uvchr_to_utf8_flags((U8*)SvEND(cat),
2272 auint,
2273 ckWARN(WARN_UTF8) ?
2274 0 : UNICODE_ALLOW_ANY)
2275 - SvPVX(cat));
a6ec74c1 2276 }
2277 *SvEND(cat) = '\0';
2278 break;
2279 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2280 case 'f':
a6ec74c1 2281 while (len-- > 0) {
2282 fromstr = NEXTFROM;
5cdb9e01 2283#ifdef __VOS__
2284/* VOS does not automatically map a floating-point overflow
2285 during conversion from double to float into infinity, so we
2286 do it by hand. This code should either be generalized for
2287 any OS that needs it, or removed if and when VOS implements
2288 posix-976 (suggestion to support mapping to infinity).
2289 Paul.Green@stratus.com 02-04-02. */
2290 if (SvNV(fromstr) > FLT_MAX)
2291 afloat = _float_constants[0]; /* single prec. inf. */
2292 else if (SvNV(fromstr) < -FLT_MAX)
2293 afloat = _float_constants[0]; /* single prec. inf. */
2294 else afloat = (float)SvNV(fromstr);
2295#else
baf3cf9c 2296# if defined(VMS) && !defined(__IEEE_FP)
2297/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2298 * on Alpha; fake it if we don't have them.
2299 */
2300 if (SvNV(fromstr) > FLT_MAX)
2301 afloat = FLT_MAX;
2302 else if (SvNV(fromstr) < -FLT_MAX)
2303 afloat = -FLT_MAX;
2304 else afloat = (float)SvNV(fromstr);
2305# else
a6ec74c1 2306 afloat = (float)SvNV(fromstr);
baf3cf9c 2307# endif
5cdb9e01 2308#endif
1109a392 2309 DO_BO_PACK_N(afloat, float);
a6ec74c1 2310 sv_catpvn(cat, (char *)&afloat, sizeof (float));
2311 }
2312 break;
2313 case 'd':
a6ec74c1 2314 while (len-- > 0) {
2315 fromstr = NEXTFROM;
5cdb9e01 2316#ifdef __VOS__
2317/* VOS does not automatically map a floating-point overflow
2318 during conversion from long double to double into infinity,
2319 so we do it by hand. This code should either be generalized
2320 for any OS that needs it, or removed if and when VOS
2321 implements posix-976 (suggestion to support mapping to
2322 infinity). Paul.Green@stratus.com 02-04-02. */
2323 if (SvNV(fromstr) > DBL_MAX)
2324 adouble = _double_constants[0]; /* double prec. inf. */
2325 else if (SvNV(fromstr) < -DBL_MAX)
2326 adouble = _double_constants[0]; /* double prec. inf. */
2327 else adouble = (double)SvNV(fromstr);
2328#else
baf3cf9c 2329# if defined(VMS) && !defined(__IEEE_FP)
2330/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2331 * on Alpha; fake it if we don't have them.
2332 */
2333 if (SvNV(fromstr) > DBL_MAX)
2334 adouble = DBL_MAX;
2335 else if (SvNV(fromstr) < -DBL_MAX)
2336 adouble = -DBL_MAX;
2337 else adouble = (double)SvNV(fromstr);
2338# else
a6ec74c1 2339 adouble = (double)SvNV(fromstr);
baf3cf9c 2340# endif
5cdb9e01 2341#endif
1109a392 2342 DO_BO_PACK_N(adouble, double);
a6ec74c1 2343 sv_catpvn(cat, (char *)&adouble, sizeof (double));
2344 }
2345 break;
92d41999 2346 case 'F':
1109a392 2347 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999 2348 while (len-- > 0) {
2349 fromstr = NEXTFROM;
2350 anv = SvNV(fromstr);
1109a392 2351 DO_BO_PACK_N(anv, NV);
92d41999 2352 sv_catpvn(cat, (char *)&anv, NVSIZE);
2353 }
2354 break;
2355#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2356 case 'D':
1109a392 2357 /* long doubles can have unused bits, which may be nonzero */
2358 Zero(&aldouble, 1, long double);
92d41999 2359 while (len-- > 0) {
2360 fromstr = NEXTFROM;
2361 aldouble = (long double)SvNV(fromstr);
1109a392 2362 DO_BO_PACK_N(aldouble, long double);
92d41999 2363 sv_catpvn(cat, (char *)&aldouble, LONG_DOUBLESIZE);
2364 }
2365 break;
2366#endif
7212898e 2367#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2368 case 'n' | TYPE_IS_SHRIEKING:
7212898e 2369#endif
a6ec74c1 2370 case 'n':
2371 while (len-- > 0) {
2372 fromstr = NEXTFROM;
ef108786 2373 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2374#ifdef HAS_HTONS
ef108786 2375 ai16 = PerlSock_htons(ai16);
a6ec74c1 2376#endif
ef108786 2377 CAT16(cat, &ai16);
a6ec74c1 2378 }
2379 break;
7212898e 2380#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2381 case 'v' | TYPE_IS_SHRIEKING:
7212898e 2382#endif
a6ec74c1 2383 case 'v':
2384 while (len-- > 0) {
2385 fromstr = NEXTFROM;
ef108786 2386 ai16 = (I16)SvIV(fromstr);
a6ec74c1 2387#ifdef HAS_HTOVS
ef108786 2388 ai16 = htovs(ai16);
a6ec74c1 2389#endif
ef108786 2390 CAT16(cat, &ai16);
a6ec74c1 2391 }
2392 break;
49704364 2393 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 2394#if SHORTSIZE != SIZE16
49704364 2395 {
a6ec74c1 2396 while (len-- > 0) {
2397 fromstr = NEXTFROM;
2398 aushort = SvUV(fromstr);
1109a392 2399 DO_BO_PACK(aushort, s);
a6ec74c1 2400 sv_catpvn(cat, (char *)&aushort, sizeof(unsigned short));
2401 }
49704364 2402 }
2403 break;
2404#else
2405 /* Fall through! */
a6ec74c1 2406#endif
49704364 2407 case 'S':
a6ec74c1 2408 {
a6ec74c1 2409 while (len-- > 0) {
2410 fromstr = NEXTFROM;
ef108786 2411 au16 = (U16)SvUV(fromstr);
2412 DO_BO_PACK(au16, 16);
2413 CAT16(cat, &au16);
a6ec74c1 2414 }
2415
2416 }
2417 break;
49704364 2418 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 2419#if SHORTSIZE != SIZE16
49704364 2420 {
a6ec74c1 2421 while (len-- > 0) {
2422 fromstr = NEXTFROM;
2423 ashort = SvIV(fromstr);
1109a392 2424 DO_BO_PACK(ashort, s);
a6ec74c1 2425 sv_catpvn(cat, (char *)&ashort, sizeof(short));
2426 }
2427 }
49704364 2428 break;
2429#else
2430 /* Fall through! */
a6ec74c1 2431#endif
49704364 2432 case 's':
2433 while (len-- > 0) {
2434 fromstr = NEXTFROM;
ef108786 2435 ai16 = (I16)SvIV(fromstr);
2436 DO_BO_PACK(ai16, 16);
2437 CAT16(cat, &ai16);
a6ec74c1 2438 }
2439 break;
2440 case 'I':
49704364 2441 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 2442 while (len-- > 0) {
2443 fromstr = NEXTFROM;
2444 auint = SvUV(fromstr);
1109a392 2445 DO_BO_PACK(auint, i);
a6ec74c1 2446 sv_catpvn(cat, (char*)&auint, sizeof(unsigned int));
2447 }
2448 break;
92d41999 2449 case 'j':
2450 while (len-- > 0) {
2451 fromstr = NEXTFROM;
2452 aiv = SvIV(fromstr);
1109a392 2453#if IVSIZE == INTSIZE
2454 DO_BO_PACK(aiv, i);
2455#elif IVSIZE == LONGSIZE
2456 DO_BO_PACK(aiv, l);
2457#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2458 DO_BO_PACK(aiv, 64);
2459#endif
92d41999 2460 sv_catpvn(cat, (char*)&aiv, IVSIZE);
2461 }
2462 break;
2463 case 'J':
2464 while (len-- > 0) {
2465 fromstr = NEXTFROM;
2466 auv = SvUV(fromstr);
1109a392 2467#if UVSIZE == INTSIZE
2468 DO_BO_PACK(auv, i);
2469#elif UVSIZE == LONGSIZE
2470 DO_BO_PACK(auv, l);
2471#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2472 DO_BO_PACK(auv, 64);
2473#endif
92d41999 2474 sv_catpvn(cat, (char*)&auv, UVSIZE);
2475 }
2476 break;
a6ec74c1 2477 case 'w':
2478 while (len-- > 0) {
2479 fromstr = NEXTFROM;
15e9f109 2480 anv = SvNV(fromstr);
a6ec74c1 2481
15e9f109 2482 if (anv < 0)
49704364 2483 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
a6ec74c1 2484
196b62db 2485 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2486 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2487 any negative IVs will have already been got by the croak()
2488 above. IOK is untrue for fractions, so we test them
2489 against UV_MAX_P1. */
15e9f109 2490 if (SvIOK(fromstr) || anv < UV_MAX_P1)
a6ec74c1 2491 {
7c1b502b 2492 char buf[(sizeof(UV)*8)/7+1];
a6ec74c1 2493 char *in = buf + sizeof(buf);
196b62db 2494 UV auv = SvUV(fromstr);
a6ec74c1 2495
2496 do {
eb160463 2497 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1 2498 auv >>= 7;
2499 } while (auv);
2500 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2501 sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
2502 }
2503 else if (SvPOKp(fromstr)) { /* decimal string arithmetics */
2504 char *from, *result, *in;
2505 SV *norm;
2506 STRLEN len;
2507 bool done;
2508
2509 /* Copy string and check for compliance */
2510 from = SvPV(fromstr, len);
2511 if ((norm = is_an_int(from, len)) == NULL)
49704364 2512 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
a6ec74c1 2513
2514 New('w', result, len, char);
2515 in = result + len;
2516 done = FALSE;
2517 while (!done)
2518 *--in = div128(norm, &done) | 0x80;
2519 result[len - 1] &= 0x7F; /* clear continue bit */
2520 sv_catpvn(cat, in, (result + len) - in);
2521 Safefree(result);
2522 SvREFCNT_dec(norm); /* free norm */
2523 }
2524 else if (SvNOKp(fromstr)) {
0258719b 2525 /* 10**NV_MAX_10_EXP is the largest power of 10
2526 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
2527 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2528 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2529 And with that many bytes only Inf can overflow.
8f8d40ab 2530 Some C compilers are strict about integral constant
2531 expressions so we conservatively divide by a slightly
2532 smaller integer instead of multiplying by the exact
2533 floating-point value.
0258719b 2534 */
2535#ifdef NV_MAX_10_EXP
8f8d40ab 2536/* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2537 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 2538#else
8f8d40ab 2539/* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2540 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 2541#endif
a6ec74c1 2542 char *in = buf + sizeof(buf);
2543
15e9f109 2544 anv = Perl_floor(anv);
a6ec74c1 2545 do {
15e9f109 2546 NV next = Perl_floor(anv / 128);
a6ec74c1 2547 if (in <= buf) /* this cannot happen ;-) */
49704364 2548 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 2549 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109 2550 anv = next;
2551 } while (anv > 0);
a6ec74c1 2552 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2553 sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
2554 }
735b914b 2555 else {
2556 char *from, *result, *in;
2557 SV *norm;
2558 STRLEN len;
2559 bool done;
2560
2561 /* Copy string and check for compliance */
2562 from = SvPV(fromstr, len);
2563 if ((norm = is_an_int(from, len)) == NULL)
49704364 2564 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 2565
2566 New('w', result, len, char);
2567 in = result + len;
2568 done = FALSE;
2569 while (!done)
2570 *--in = div128(norm, &done) | 0x80;
2571 result[len - 1] &= 0x7F; /* clear continue bit */
2572 sv_catpvn(cat, in, (result + len) - in);
2573 Safefree(result);
2574 SvREFCNT_dec(norm); /* free norm */
2575 }
a6ec74c1 2576 }
2577 break;
2578 case 'i':
49704364 2579 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 2580 while (len-- > 0) {
2581 fromstr = NEXTFROM;
2582 aint = SvIV(fromstr);
1109a392 2583 DO_BO_PACK(aint, i);
a6ec74c1 2584 sv_catpvn(cat, (char*)&aint, sizeof(int));
2585 }
2586 break;
7212898e 2587#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2588 case 'N' | TYPE_IS_SHRIEKING:
7212898e 2589#endif
a6ec74c1 2590 case 'N':
2591 while (len-- > 0) {
2592 fromstr = NEXTFROM;
ef108786 2593 au32 = SvUV(fromstr);
a6ec74c1 2594#ifdef HAS_HTONL
ef108786 2595 au32 = PerlSock_htonl(au32);
a6ec74c1 2596#endif
ef108786 2597 CAT32(cat, &au32);
a6ec74c1 2598 }
2599 break;
7212898e 2600#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 2601 case 'V' | TYPE_IS_SHRIEKING:
7212898e 2602#endif
a6ec74c1 2603 case 'V':
2604 while (len-- > 0) {
2605 fromstr = NEXTFROM;
ef108786 2606 au32 = SvUV(fromstr);
a6ec74c1 2607#ifdef HAS_HTOVL
ef108786 2608 au32 = htovl(au32);
a6ec74c1 2609#endif
ef108786 2610 CAT32(cat, &au32);
a6ec74c1 2611 }
2612 break;
49704364 2613 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 2614#if LONGSIZE != SIZE32
49704364 2615 {
a6ec74c1 2616 while (len-- > 0) {
2617 fromstr = NEXTFROM;
2618 aulong = SvUV(fromstr);
1109a392 2619 DO_BO_PACK(aulong, l);
a6ec74c1 2620 sv_catpvn(cat, (char *)&aulong, sizeof(unsigned long));
2621 }
2622 }
49704364 2623 break;
2624#else
2625 /* Fall though! */
a6ec74c1 2626#endif
49704364 2627 case 'L':
a6ec74c1 2628 {
2629 while (len-- > 0) {
2630 fromstr = NEXTFROM;
ef108786 2631 au32 = SvUV(fromstr);
2632 DO_BO_PACK(au32, 32);
2633 CAT32(cat, &au32);
a6ec74c1 2634 }
2635 }
2636 break;
49704364 2637 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 2638#if LONGSIZE != SIZE32
49704364 2639 {
a6ec74c1 2640 while (len-- > 0) {
2641 fromstr = NEXTFROM;
2642 along = SvIV(fromstr);
1109a392 2643 DO_BO_PACK(along, l);
a6ec74c1 2644 sv_catpvn(cat, (char *)&along, sizeof(long));
2645 }
2646 }
49704364 2647 break;
2648#else
2649 /* Fall though! */
a6ec74c1 2650#endif
49704364 2651 case 'l':
2652 while (len-- > 0) {
2653 fromstr = NEXTFROM;
ef108786 2654 ai32 = SvIV(fromstr);
2655 DO_BO_PACK(ai32, 32);
2656 CAT32(cat, &ai32);
a6ec74c1 2657 }
2658 break;
2659#ifdef HAS_QUAD
2660 case 'Q':
2661 while (len-- > 0) {
2662 fromstr = NEXTFROM;
2663 auquad = (Uquad_t)SvUV(fromstr);
1109a392 2664 DO_BO_PACK(auquad, 64);
a6ec74c1 2665 sv_catpvn(cat, (char*)&auquad, sizeof(Uquad_t));
2666 }
2667 break;
2668 case 'q':
2669 while (len-- > 0) {
2670 fromstr = NEXTFROM;
2671 aquad = (Quad_t)SvIV(fromstr);
1109a392 2672 DO_BO_PACK(aquad, 64);
a6ec74c1 2673 sv_catpvn(cat, (char*)&aquad, sizeof(Quad_t));
2674 }
2675 break;
2676#endif
2677 case 'P':
2678 len = 1; /* assume SV is correct length */
49704364 2679 /* Fall through! */
a6ec74c1 2680 case 'p':
2681 while (len-- > 0) {
2682 fromstr = NEXTFROM;
2683 if (fromstr == &PL_sv_undef)
2684 aptr = NULL;
2685 else {
2686 STRLEN n_a;
2687 /* XXX better yet, could spirit away the string to
2688 * a safe spot and hang on to it until the result
2689 * of pack() (and all copies of the result) are
2690 * gone.
2691 */
2692 if (ckWARN(WARN_PACK) && (SvTEMP(fromstr)
2693 || (SvPADTMP(fromstr)
2694 && !SvREADONLY(fromstr))))
2695 {
9014280d 2696 Perl_warner(aTHX_ packWARN(WARN_PACK),
a6ec74c1 2697 "Attempt to pack pointer to temporary value");
2698 }
2699 if (SvPOK(fromstr) || SvNIOK(fromstr))
2700 aptr = SvPV(fromstr,n_a);
2701 else
2702 aptr = SvPV_force(fromstr,n_a);
2703 }
1109a392 2704 DO_BO_PACK_P(aptr);
a6ec74c1 2705 sv_catpvn(cat, (char*)&aptr, sizeof(char*));
2706 }
2707 break;
2708 case 'u':
2709 fromstr = NEXTFROM;
2710 aptr = SvPV(fromstr, fromlen);
2711 SvGROW(cat, fromlen * 4 / 3);
19c9db5e 2712 if (len <= 2)
a6ec74c1 2713 len = 45;
2714 else
2715 len = len / 3 * 3;
2716 while (fromlen > 0) {
2717 I32 todo;
2718
eb160463 2719 if ((I32)fromlen > len)
a6ec74c1 2720 todo = len;
2721 else
2722 todo = fromlen;
2723 doencodes(cat, aptr, todo);
2724 fromlen -= todo;
2725 aptr += todo;
2726 }
2727 break;
2728 }
49704364 2729 *symptr = lookahead;
a6ec74c1 2730 }
49704364 2731 return beglist;
18529408 2732}
2733#undef NEXTFROM
2734
2735
2736PP(pp_pack)
2737{
2738 dSP; dMARK; dORIGMARK; dTARGET;
2739 register SV *cat = TARG;
2740 STRLEN fromlen;
2741 register char *pat = SvPVx(*++MARK, fromlen);
2742 register char *patend = pat + fromlen;
2743
2744 MARK++;
2745 sv_setpvn(cat, "", 0);
2746
7accc089 2747 packlist(cat, pat, patend, MARK, SP + 1);
18529408 2748
a6ec74c1 2749 SvSETMAGIC(cat);
2750 SP = ORIGMARK;
2751 PUSHs(cat);
2752 RETURN;
2753}
a6ec74c1 2754
73cb7263 2755/*
2756 * Local variables:
2757 * c-indentation-style: bsd
2758 * c-basic-offset: 4
2759 * indent-tabs-mode: t
2760 * End:
2761 *
edf815fd 2762 * vim: shiftwidth=4:
73cb7263 2763*/