X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pp_pack.c;h=7d7bd32736e2723f9a39b23d47f52b1fc93c8aba;hb=2ccddc9020a3f40c0b46168e45b42ae6d0c415f2;hp=1075143a7048748df6821d6c39d54c3d14ab57ec;hpb=fa8ec7c13dcb82551b3b5da77efcc0da9b1b45f5;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pp_pack.c b/pp_pack.c index 1075143..7d7bd32 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -1,12 +1,21 @@ /* pp_pack.c * - * Copyright (c) 1991-2001, Larry Wall + * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + * 2000, 2001, 2002, 2003, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. * */ +/* + * He still hopefully carried some of his gear in his pack: a small tinder-box, + * two small shallow pans, the smaller fitting into the larger; inside them a + * wooden spoon, a short two-pronged fork and some skewers were stowed; and + * hidden at the bottom of the pack in a flat wooden box a dwindling treasure, + * some salt. + */ + #include "EXTERN.h" #define PERL_IN_PP_PACK_C #include "perl.h" @@ -75,6 +84,16 @@ static double UV_MAX_cxux = ((double)UV_MAX); # define CAT32(sv,p) sv_catpvn(sv, (char*)(p), SIZE32) #endif +/* Avoid stack overflow due to pathological templates. 100 should be plenty. */ +#define MAX_SUB_TEMPLATE_LEVEL 100 + +/* flags */ +#define FLAG_UNPACK_ONLY_ONE 0x10 +#define FLAG_UNPACK_DO_UTF8 0x08 +#define FLAG_SLASH 0x04 +#define FLAG_COMMA 0x02 +#define FLAG_PACK 0x01 + STATIC SV * S_mul128(pTHX_ SV *sv, U8 m) { @@ -96,8 +115,8 @@ S_mul128(pTHX_ SV *sv, U8 m) t--; while (t > s) { i = ((*t - '0') << 7) + m; - *(t--) = '0' + (i % 10); - m = i / 10; + *(t--) = '0' + (char)(i % 10); + m = (char)(i / 10); } return (sv); } @@ -115,32 +134,412 @@ S_mul128(pTHX_ SV *sv, U8 m) #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') #endif +#define TYPE_IS_SHRIEKING 0x100 -PP(pp_unpack) +/* Returns the sizeof() struct described by pat */ +STATIC I32 +S_measure_struct(pTHX_ register tempsym_t* symptr) { - dSP; - dPOPPOPssrl; - I32 start_sp_offset = SP - PL_stack_base; - I32 gimme = GIMME_V; - SV *sv; - STRLEN llen; - STRLEN rlen; - register char *pat = SvPV(left, llen); -#ifdef PACKED_IS_OCTETS - /* Packed side is assumed to be octets - so force downgrade if it - has been UTF-8 encoded by accident - */ - register char *s = SvPVbyte(right, rlen); + register I32 len = 0; + register I32 total = 0; + int star; + + register int size; + + while (next_symbol(symptr)) { + + switch( symptr->howlen ){ + case e_no_len: + case e_number: + len = symptr->length; + break; + case e_star: + Perl_croak(aTHX_ "Within []-length '*' not allowed in %s", + symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + break; + } + + switch(symptr->code) { + default: + Perl_croak(aTHX_ "Invalid type '%c' in %s", + (int)symptr->code, + symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + case '@': + case '/': + case 'U': /* XXXX Is it correct? */ + case 'w': + case 'u': + Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s", + (int)symptr->code, + symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + case '%': + size = 0; + break; + case '(': + { + tempsym_t savsym = *symptr; + symptr->patptr = savsym.grpbeg; + symptr->patend = savsym.grpend; + /* XXXX Theoretically, we need to measure many times at different + positions, since the subexpression may contain + alignment commands, but be not of aligned length. + Need to detect this and croak(). */ + size = measure_struct(symptr); + *symptr = savsym; + break; + } + case 'X' | TYPE_IS_SHRIEKING: + /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS. */ + if (!len) /* Avoid division by 0 */ + len = 1; + len = total % len; /* Assumed: the start is aligned. */ + /* FALL THROUGH */ + case 'X': + size = -1; + if (total < len) + Perl_croak(aTHX_ "'X' outside of string in %s", + symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + break; + case 'x' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + star = total % len; /* Assumed: the start is aligned. */ + if (star) /* Other portable ways? */ + len = len - star; + else + len = 0; + /* FALL THROUGH */ + case 'x': + case 'A': + case 'Z': + case 'a': + case 'c': + case 'C': + size = 1; + break; + case 'B': + case 'b': + len = (len + 7)/8; + size = 1; + break; + case 'H': + case 'h': + len = (len + 1)/2; + size = 1; + break; + case 's' | TYPE_IS_SHRIEKING: +#if SHORTSIZE != SIZE16 + size = sizeof(short); + break; #else - register char *s = SvPV(right, rlen); + /* FALL THROUGH */ #endif - char *strend = s + rlen; - char *strbeg = s; - register char *patend = pat + llen; + case 's': + size = SIZE16; + break; + case 'S' | TYPE_IS_SHRIEKING: +#if SHORTSIZE != SIZE16 + size = sizeof(unsigned short); + break; +#else + /* FALL THROUGH */ +#endif + case 'v': + case 'n': + case 'S': + size = SIZE16; + break; + case 'i' | TYPE_IS_SHRIEKING: + case 'i': + size = sizeof(int); + break; + case 'I' | TYPE_IS_SHRIEKING: + case 'I': + size = sizeof(unsigned int); + break; + case 'j': + size = IVSIZE; + break; + case 'J': + size = UVSIZE; + break; + case 'l' | TYPE_IS_SHRIEKING: +#if LONGSIZE != SIZE32 + size = sizeof(long); + break; +#else + /* FALL THROUGH */ +#endif + case 'l': + size = SIZE32; + break; + case 'L' | TYPE_IS_SHRIEKING: +#if LONGSIZE != SIZE32 + size = sizeof(unsigned long); + break; +#else + /* FALL THROUGH */ +#endif + case 'V': + case 'N': + case 'L': + size = SIZE32; + break; + case 'P': + len = 1; + /* FALL THROUGH */ + case 'p': + size = sizeof(char*); + break; +#ifdef HAS_QUAD + case 'q': + size = sizeof(Quad_t); + break; + case 'Q': + size = sizeof(Uquad_t); + break; +#endif + case 'f': + size = sizeof(float); + break; + case 'd': + size = sizeof(double); + break; + case 'F': + size = NVSIZE; + break; +#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) + case 'D': + size = LONG_DOUBLESIZE; + break; +#endif + } + total += len * size; + } + return total; +} + + +/* locate matching closing parenthesis or bracket + * returns char pointer to char after match, or NULL + */ +STATIC char * +S_group_end(pTHX_ register char *patptr, register char *patend, char ender) +{ + while (patptr < patend) { + char c = *patptr++; + + if (isSPACE(c)) + continue; + else if (c == ender) + return patptr-1; + else if (c == '#') { + while (patptr < patend && *patptr != '\n') + patptr++; + continue; + } else if (c == '(') + patptr = group_end(patptr, patend, ')') + 1; + else if (c == '[') + patptr = group_end(patptr, patend, ']') + 1; + } + Perl_croak(aTHX_ "No group ending character '%c' found in template", + ender); + return 0; +} + + +/* Convert unsigned decimal number to binary. + * Expects a pointer to the first digit and address of length variable + * Advances char pointer to 1st non-digit char and returns number + */ +STATIC char * +S_get_num(pTHX_ register char *patptr, I32 *lenptr ) +{ + I32 len = *patptr++ - '0'; + while (isDIGIT(*patptr)) { + if (len >= 0x7FFFFFFF/10) + Perl_croak(aTHX_ "pack/unpack repeat count overflow"); + len = (len * 10) + (*patptr++ - '0'); + } + *lenptr = len; + return patptr; +} + +/* The marvellous template parsing routine: Using state stored in *symptr, + * locates next template code and count + */ +STATIC bool +S_next_symbol(pTHX_ register tempsym_t* symptr ) +{ + register char* patptr = symptr->patptr; + register char* patend = symptr->patend; + + symptr->flags &= ~FLAG_SLASH; + + while (patptr < patend) { + if (isSPACE(*patptr)) + patptr++; + else if (*patptr == '#') { + patptr++; + while (patptr < patend && *patptr != '\n') + patptr++; + if (patptr < patend) + patptr++; + } else { + /* We should have found a template code */ + I32 code = *patptr++ & 0xFF; + + if (code == ','){ /* grandfather in commas but with a warning */ + if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){ + symptr->flags |= FLAG_COMMA; + Perl_warner(aTHX_ packWARN(WARN_UNPACK), + "Invalid type ',' in %s", + symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + } + continue; + } + + /* for '(', skip to ')' */ + if (code == '(') { + if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' ) + Perl_croak(aTHX_ "()-group starts with a count in %s", + symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + symptr->grpbeg = patptr; + patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') ); + if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL ) + Perl_croak(aTHX_ "Too deeply nested ()-groups in %s", + symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + } + + /* test for '!' modifier */ + if (patptr < patend && *patptr == '!') { + static const char natstr[] = "sSiIlLxX"; + patptr++; + if (strchr(natstr, code)) + code |= TYPE_IS_SHRIEKING; + else + Perl_croak(aTHX_ "'!' allowed only after types %s in %s", + natstr, symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + } + + /* look for count and/or / */ + if (patptr < patend) { + if (isDIGIT(*patptr)) { + patptr = get_num( patptr, &symptr->length ); + symptr->howlen = e_number; + + } else if (*patptr == '*') { + patptr++; + symptr->howlen = e_star; + + } else if (*patptr == '[') { + char* lenptr = ++patptr; + symptr->howlen = e_number; + patptr = group_end( patptr, patend, ']' ) + 1; + /* what kind of [] is it? */ + if (isDIGIT(*lenptr)) { + lenptr = get_num( lenptr, &symptr->length ); + if( *lenptr != ']' ) + Perl_croak(aTHX_ "Malformed integer in [] in %s", + symptr->flags & FLAG_PACK ? "pack" : "unpack"); + } else { + tempsym_t savsym = *symptr; + symptr->patend = patptr-1; + symptr->patptr = lenptr; + savsym.length = measure_struct(symptr); + *symptr = savsym; + } + } else { + symptr->howlen = e_no_len; + symptr->length = 1; + } + + /* try to find / */ + while (patptr < patend) { + if (isSPACE(*patptr)) + patptr++; + else if (*patptr == '#') { + patptr++; + while (patptr < patend && *patptr != '\n') + patptr++; + if (patptr < patend) + patptr++; + } else { + if( *patptr == '/' ){ + symptr->flags |= FLAG_SLASH; + patptr++; + if( patptr < patend && + (isDIGIT(*patptr) || *patptr == '*' || *patptr == '[') ) + Perl_croak(aTHX_ "'/' does not take a repeat count in %s", + symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + } + break; + } + } + } else { + /* at end - no count, no / */ + symptr->howlen = e_no_len; + symptr->length = 1; + } + + symptr->code = code; + symptr->patptr = patptr; + return TRUE; + } + } + symptr->patptr = patptr; + return FALSE; +} + +/* +=for apidoc unpack_str + +The engine implementing unpack() Perl function. Note: parameters strbeg, new_s +and ocnt are not used. This call should not be used, use unpackstring instead. + +=cut */ + +I32 +Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags) +{ + tempsym_t sym = { 0 }; + sym.patptr = pat; + sym.patend = patend; + sym.flags = flags; + + return unpack_rec(&sym, s, s, strend, NULL ); +} + +/* +=for apidoc unpackstring + +The engine implementing unpack() Perl function. + +=cut */ + +I32 +Perl_unpackstring(pTHX_ char *pat, register char *patend, register char *s, char *strend, U32 flags) +{ + tempsym_t sym = { 0 }; + sym.patptr = pat; + sym.patend = patend; + sym.flags = flags; + + return unpack_rec(&sym, s, s, strend, NULL ); +} + +STATIC +I32 +S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, char *strend, char **new_s ) +{ + dSP; I32 datumtype; - register I32 len; + register I32 len = 0; register I32 bits = 0; register char *str; + SV *sv; + I32 start_sp_offset = SP - PL_stack_base; + howlen_t howlen; /* These must not be in registers: */ short ashort; @@ -159,131 +558,105 @@ PP(pp_unpack) float afloat; double adouble; I32 checksum = 0; - UV culong = 0; + UV cuv = 0; NV cdouble = 0.0; - const int bits_in_uv = 8 * sizeof(culong); - int commas = 0; - int star; -#ifdef PERL_NATINT_PACK - int natint; /* native integer */ - int unatint; /* unsigned native integer */ + const int bits_in_uv = 8 * sizeof(cuv); + char* strrelbeg = s; + bool beyond = FALSE; + bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0; + + IV aiv; + UV auv; + NV anv; +#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) + long double aldouble; #endif - bool do_utf8 = DO_UTF8(right); - - if (gimme != G_ARRAY) { /* arrange to do first one only */ - /*SUPPRESS 530*/ - /* Skipping spaces will be useful later on. */ - while (isSPACE(*pat)) - pat++; - /* Give up on optimisation of only doing first if the pattern - is getting too complex to parse. */ - if (*pat != '#') { - /* This pre-parser will let through certain invalid patterns - such as rows of !s, but the nothing that would cause multiple - conversions to be attempted. */ - char *here = pat; - bool seen_percent = FALSE; - if (*here == '%') - seen_percent = TRUE; - while (!isALPHA(*here) || *here == 'x') - here++; - if (strchr("aAZbBhHP", *here) || seen_percent) { - here++; - while (isDIGIT(*here) || *here == '*' || *here == '!') - here++; - } - else - here++; - patend = here; + + while (next_symbol(symptr)) { + datumtype = symptr->code; + /* do first one only unless in list context + / is implemented by unpacking the count, then poping it from the + stack, so must check that we're not in the middle of a / */ + if ( unpack_only_one + && (SP - PL_stack_base == start_sp_offset + 1) + && (datumtype != '/') ) /* XXX can this be omitted */ + break; + + switch( howlen = symptr->howlen ){ + case e_no_len: + case e_number: + len = symptr->length; + break; + case e_star: + len = strend - strbeg; /* long enough */ + break; } - } - while (pat < patend) { - reparse: - datumtype = *pat++ & 0xFF; -#ifdef PERL_NATINT_PACK - natint = 0; -#endif - if (isSPACE(datumtype)) - continue; - if (datumtype == '#') { - while (pat < patend && *pat != '\n') - pat++; - continue; - } - if (*pat == '!') { - char *natstr = "sSiIlL"; - if (strchr(natstr, datumtype)) { -#ifdef PERL_NATINT_PACK - natint = 1; -#endif - pat++; - } - else - DIE(aTHX_ "'!' allowed only after types %s", natstr); - } - star = 0; - if (pat >= patend) - len = 1; - else if (*pat == '*') { - len = strend - strbeg; /* long enough */ - pat++; - star = 1; - } - else if (isDIGIT(*pat)) { - len = *pat++ - '0'; - while (isDIGIT(*pat)) { - len = (len * 10) + (*pat++ - '0'); - if (len < 0) - DIE(aTHX_ "Repeat count in unpack overflows"); - } - } - else - len = (datumtype != '@'); redo_switch: + beyond = s >= strend; switch(datumtype) { default: - DIE(aTHX_ "Invalid type in unpack: '%c'", (int)datumtype); - case ',': /* grandfather in commas but with a warning */ - if (commas++ == 0 && ckWARN(WARN_UNPACK)) - Perl_warner(aTHX_ WARN_UNPACK, - "Invalid type in unpack: '%c'", (int)datumtype); - break; + Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)datumtype ); + case '%': - if (len == 1 && pat[-1] != '1') - len = 16; + if (howlen == e_no_len) + len = 16; /* len is not specified */ checksum = len; - culong = 0; + cuv = 0; cdouble = 0; - if (pat < patend) - goto reparse; + continue; + break; + case '(': + { + char *ss = s; /* Move from register */ + tempsym_t savsym = *symptr; + symptr->patend = savsym.grpend; + symptr->level++; + PUTBACK; + while (len--) { + symptr->patptr = savsym.grpbeg; + unpack_rec(symptr, ss, strbeg, strend, &ss ); + if (ss == strend && savsym.howlen == e_star) + break; /* No way to continue */ + } + SPAGAIN; + s = ss; + savsym.flags = symptr->flags; + *symptr = savsym; break; + } case '@': - if (len > strend - strbeg) - DIE(aTHX_ "@ outside of string"); - s = strbeg + len; + if (len > strend - strrelbeg) + Perl_croak(aTHX_ "'@' outside of string in unpack"); + s = strrelbeg + len; break; + case 'X' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + len = (s - strbeg) % len; + /* FALL THROUGH */ case 'X': if (len > s - strbeg) - DIE(aTHX_ "X outside of string"); + Perl_croak(aTHX_ "'X' outside of string in unpack" ); s -= len; break; + case 'x' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + aint = (s - strbeg) % len; + if (aint) /* Other portable ways? */ + len = len - aint; + else + len = 0; + /* FALL THROUGH */ case 'x': if (len > strend - s) - DIE(aTHX_ "x outside of string"); + Perl_croak(aTHX_ "'x' outside of string in unpack"); s += len; break; case '/': - if (start_sp_offset >= SP - PL_stack_base) - DIE(aTHX_ "/ must follow a numeric type"); - datumtype = *pat++; - if (*pat == '*') - pat++; /* ignore '*' for compatibility with pack */ - if (isDIGIT(*pat)) - DIE(aTHX_ "/ cannot take a count" ); - len = POPi; - star = 0; - goto redo_switch; + Perl_croak(aTHX_ "'/' must follow a numeric type in unpack"); + break; case 'A': case 'Z': case 'a': @@ -293,13 +666,14 @@ PP(pp_unpack) goto uchar_checksum; sv = NEWSV(35, len); sv_setpvn(sv, s, len); - s += len; - if (datumtype == 'A' || datumtype == 'Z') { + if (len > 0 && (datumtype == 'A' || datumtype == 'Z')) { aptr = s; /* borrow register */ if (datumtype == 'Z') { /* 'Z' strips stuff after first null */ s = SvPVX(sv); while (*s) s++; + if (howlen == e_star) /* exact for 'Z*' */ + len = s - SvPVX(sv) + 1; } else { /* 'A' strips both nulls and spaces */ s = SvPVX(sv) + len - 1; @@ -310,11 +684,12 @@ PP(pp_unpack) SvCUR_set(sv, s - SvPVX(sv)); s = aptr; /* unborrow register */ } + s += len; XPUSHs(sv_2mortal(sv)); break; case 'B': case 'b': - if (star || len > (strend - s) * 8) + if (howlen == e_star || len > (strend - s) * 8) len = (strend - s) * 8; if (checksum) { if (!PL_bitcount) { @@ -331,20 +706,20 @@ PP(pp_unpack) } } while (len >= 8) { - culong += PL_bitcount[*(unsigned char*)s++]; + cuv += PL_bitcount[*(unsigned char*)s++]; len -= 8; } if (len) { bits = *s; if (datumtype == 'b') { while (len-- > 0) { - if (bits & 1) culong++; + if (bits & 1) cuv++; bits >>= 1; } } else { while (len-- > 0) { - if (bits & 128) culong++; + if (bits & 128) cuv++; bits <<= 1; } } @@ -380,7 +755,7 @@ PP(pp_unpack) break; case 'H': case 'h': - if (star || len > (strend - s) * 2) + if (howlen == e_star || len > (strend - s) * 2) len = (strend - s) * 2; sv = NEWSV(35, len + 1); SvCUR_set(sv, len); @@ -420,10 +795,12 @@ PP(pp_unpack) if (checksum > bits_in_uv) cdouble += (NV)aint; else - culong += aint; + cuv += aint; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0) { @@ -439,7 +816,7 @@ PP(pp_unpack) case 'C': unpack_C: /* unpack U will jump here if not UTF-8 */ if (len == 0) { - do_utf8 = FALSE; + symptr->flags &= ~FLAG_UNPACK_DO_UTF8; break; } if (len > strend - s) @@ -448,10 +825,12 @@ PP(pp_unpack) uchar_checksum: while (len-- > 0) { auint = *s++ & 255; - culong += auint; + cuv += auint; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0) { @@ -464,31 +843,33 @@ PP(pp_unpack) break; case 'U': if (len == 0) { - do_utf8 = TRUE; + symptr->flags |= FLAG_UNPACK_DO_UTF8; break; } - if (!do_utf8) + if ((symptr->flags & FLAG_UNPACK_DO_UTF8) == 0) goto unpack_C; if (len > strend - s) len = strend - s; if (checksum) { while (len-- > 0 && s < strend) { STRLEN alen; - auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); + auint = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, strend - s, &alen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV)); along = alen; s += along; if (checksum > bits_in_uv) cdouble += (NV)auint; else - culong += auint; + cuv += auint; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0 && s < strend) { STRLEN alen; - auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0); + auint = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, strend - s, &alen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV)); along = alen; s += along; sv = NEWSV(37, 0); @@ -497,157 +878,160 @@ PP(pp_unpack) } } break; - case 's': -#if SHORTSIZE == SIZE16 - along = (strend - s) / SIZE16; -#else - along = (strend - s) / (natint ? sizeof(short) : SIZE16); -#endif + case 's' | TYPE_IS_SHRIEKING: +#if SHORTSIZE != SIZE16 + along = (strend - s) / sizeof(short); if (len > along) len = along; if (checksum) { -#if SHORTSIZE != SIZE16 - if (natint) { - short ashort; - while (len-- > 0) { - COPYNN(s, &ashort, sizeof(short)); - s += sizeof(short); - if (checksum > bits_in_uv) - cdouble += (NV)ashort; - else - culong += ashort; + short ashort; + while (len-- > 0) { + COPYNN(s, &ashort, sizeof(short)); + s += sizeof(short); + if (checksum > bits_in_uv) + cdouble += (NV)ashort; + else + cuv += ashort; - } } - else + } + else { + short ashort; + if (len && unpack_only_one) + len = 1; + EXTEND(SP, len); + EXTEND_MORTAL(len); + while (len-- > 0) { + COPYNN(s, &ashort, sizeof(short)); + s += sizeof(short); + sv = NEWSV(38, 0); + sv_setiv(sv, (IV)ashort); + PUSHs(sv_2mortal(sv)); + } + } + break; +#else + /* Fallthrough! */ #endif - { - while (len-- > 0) { - COPY16(s, &ashort); + case 's': + along = (strend - s) / SIZE16; + if (len > along) + len = along; + if (checksum) { + while (len-- > 0) { + COPY16(s, &ashort); #if SHORTSIZE > SIZE16 - if (ashort > 32767) - ashort -= 65536; + if (ashort > 32767) + ashort -= 65536; #endif - s += SIZE16; - if (checksum > bits_in_uv) - cdouble += (NV)ashort; - else - culong += ashort; - } + s += SIZE16; + if (checksum > bits_in_uv) + cdouble += (NV)ashort; + else + cuv += ashort; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); -#if SHORTSIZE != SIZE16 - if (natint) { - short ashort; - while (len-- > 0) { - COPYNN(s, &ashort, sizeof(short)); - s += sizeof(short); - sv = NEWSV(38, 0); - sv_setiv(sv, (IV)ashort); - PUSHs(sv_2mortal(sv)); - } - } - else -#endif - { - while (len-- > 0) { - COPY16(s, &ashort); + + while (len-- > 0) { + COPY16(s, &ashort); #if SHORTSIZE > SIZE16 - if (ashort > 32767) - ashort -= 65536; + if (ashort > 32767) + ashort -= 65536; #endif - s += SIZE16; - sv = NEWSV(38, 0); - sv_setiv(sv, (IV)ashort); - PUSHs(sv_2mortal(sv)); - } + s += SIZE16; + sv = NEWSV(38, 0); + sv_setiv(sv, (IV)ashort); + PUSHs(sv_2mortal(sv)); + } + } + break; + case 'S' | TYPE_IS_SHRIEKING: +#if SHORTSIZE != SIZE16 + along = (strend - s) / sizeof(unsigned short); + if (len > along) + len = along; + if (checksum) { + unsigned short aushort; + while (len-- > 0) { + COPYNN(s, &aushort, sizeof(unsigned short)); + s += sizeof(unsigned short); + if (checksum > bits_in_uv) + cdouble += (NV)aushort; + else + cuv += aushort; + } + } + else { + if (len && unpack_only_one) + len = 1; + EXTEND(SP, len); + EXTEND_MORTAL(len); + while (len-- > 0) { + unsigned short aushort; + COPYNN(s, &aushort, sizeof(unsigned short)); + s += sizeof(unsigned short); + sv = NEWSV(39, 0); + sv_setiv(sv, (UV)aushort); + PUSHs(sv_2mortal(sv)); } } break; +#else + /* Fallhrough! */ +#endif case 'v': case 'n': case 'S': -#if SHORTSIZE == SIZE16 along = (strend - s) / SIZE16; -#else - unatint = natint && datumtype == 'S'; - along = (strend - s) / (unatint ? sizeof(unsigned short) : SIZE16); -#endif if (len > along) len = along; if (checksum) { -#if SHORTSIZE != SIZE16 - if (unatint) { - unsigned short aushort; - while (len-- > 0) { - COPYNN(s, &aushort, sizeof(unsigned short)); - s += sizeof(unsigned short); - if (checksum > bits_in_uv) - cdouble += (NV)aushort; - else - culong += aushort; - } - } - else -#endif - { - while (len-- > 0) { - COPY16(s, &aushort); - s += SIZE16; + while (len-- > 0) { + COPY16(s, &aushort); + s += SIZE16; #ifdef HAS_NTOHS - if (datumtype == 'n') - aushort = PerlSock_ntohs(aushort); + if (datumtype == 'n') + aushort = PerlSock_ntohs(aushort); #endif #ifdef HAS_VTOHS - if (datumtype == 'v') - aushort = vtohs(aushort); + if (datumtype == 'v') + aushort = vtohs(aushort); #endif - if (checksum > bits_in_uv) - cdouble += (NV)aushort; - else - culong += aushort; - } + if (checksum > bits_in_uv) + cdouble += (NV)aushort; + else + cuv += aushort; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); -#if SHORTSIZE != SIZE16 - if (unatint) { - unsigned short aushort; - while (len-- > 0) { - COPYNN(s, &aushort, sizeof(unsigned short)); - s += sizeof(unsigned short); - sv = NEWSV(39, 0); - sv_setiv(sv, (UV)aushort); - PUSHs(sv_2mortal(sv)); - } - } - else -#endif - { - while (len-- > 0) { - COPY16(s, &aushort); - s += SIZE16; - sv = NEWSV(39, 0); + while (len-- > 0) { + COPY16(s, &aushort); + s += SIZE16; + sv = NEWSV(39, 0); #ifdef HAS_NTOHS - if (datumtype == 'n') - aushort = PerlSock_ntohs(aushort); + if (datumtype == 'n') + aushort = PerlSock_ntohs(aushort); #endif #ifdef HAS_VTOHS - if (datumtype == 'v') - aushort = vtohs(aushort); + if (datumtype == 'v') + aushort = vtohs(aushort); #endif - sv_setiv(sv, (UV)aushort); - PUSHs(sv_2mortal(sv)); - } + sv_setiv(sv, (UV)aushort); + PUSHs(sv_2mortal(sv)); } } break; case 'i': + case 'i' | TYPE_IS_SHRIEKING: along = (strend - s) / sizeof(int); if (len > along) len = along; @@ -658,10 +1042,12 @@ PP(pp_unpack) if (checksum > bits_in_uv) cdouble += (NV)aint; else - culong += aint; + cuv += aint; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0) { @@ -699,6 +1085,7 @@ PP(pp_unpack) } break; case 'I': + case 'I' | TYPE_IS_SHRIEKING: along = (strend - s) / sizeof(unsigned int); if (len > along) len = along; @@ -709,10 +1096,12 @@ PP(pp_unpack) if (checksum > bits_in_uv) cdouble += (NV)auint; else - culong += auint; + cuv += auint; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0) { @@ -731,156 +1120,213 @@ PP(pp_unpack) } } break; - case 'l': -#if LONGSIZE == SIZE32 - along = (strend - s) / SIZE32; -#else - along = (strend - s) / (natint ? sizeof(long) : SIZE32); -#endif + case 'j': + along = (strend - s) / IVSIZE; + if (len > along) + len = along; + if (checksum) { + while (len-- > 0) { + Copy(s, &aiv, 1, IV); + s += IVSIZE; + if (checksum > bits_in_uv) + cdouble += (NV)aiv; + else + cuv += aiv; + } + } + else { + if (len && unpack_only_one) + len = 1; + EXTEND(SP, len); + EXTEND_MORTAL(len); + while (len-- > 0) { + Copy(s, &aiv, 1, IV); + s += IVSIZE; + sv = NEWSV(40, 0); + sv_setiv(sv, aiv); + PUSHs(sv_2mortal(sv)); + } + } + break; + case 'J': + along = (strend - s) / UVSIZE; if (len > along) len = along; if (checksum) { + while (len-- > 0) { + Copy(s, &auv, 1, UV); + s += UVSIZE; + if (checksum > bits_in_uv) + cdouble += (NV)auv; + else + cuv += auv; + } + } + else { + if (len && unpack_only_one) + len = 1; + EXTEND(SP, len); + EXTEND_MORTAL(len); + while (len-- > 0) { + Copy(s, &auv, 1, UV); + s += UVSIZE; + sv = NEWSV(41, 0); + sv_setuv(sv, auv); + PUSHs(sv_2mortal(sv)); + } + } + break; + case 'l' | TYPE_IS_SHRIEKING: #if LONGSIZE != SIZE32 - if (natint) { - while (len-- > 0) { - COPYNN(s, &along, sizeof(long)); - s += sizeof(long); - if (checksum > bits_in_uv) - cdouble += (NV)along; - else - culong += along; - } + along = (strend - s) / sizeof(long); + if (len > along) + len = along; + if (checksum) { + while (len-- > 0) { + COPYNN(s, &along, sizeof(long)); + s += sizeof(long); + if (checksum > bits_in_uv) + cdouble += (NV)along; + else + cuv += along; } - else + } + else { + if (len && unpack_only_one) + len = 1; + EXTEND(SP, len); + EXTEND_MORTAL(len); + while (len-- > 0) { + COPYNN(s, &along, sizeof(long)); + s += sizeof(long); + sv = NEWSV(42, 0); + sv_setiv(sv, (IV)along); + PUSHs(sv_2mortal(sv)); + } + } + break; +#else + /* Fallthrough! */ #endif - { - while (len-- > 0) { + case 'l': + along = (strend - s) / SIZE32; + if (len > along) + len = along; + if (checksum) { + while (len-- > 0) { #if LONGSIZE > SIZE32 && INTSIZE == SIZE32 - I32 along; + I32 along; #endif - COPY32(s, &along); + COPY32(s, &along); #if LONGSIZE > SIZE32 - if (along > 2147483647) - along -= 4294967296; + if (along > 2147483647) + along -= 4294967296; #endif - s += SIZE32; - if (checksum > bits_in_uv) - cdouble += (NV)along; - else - culong += along; - } + s += SIZE32; + if (checksum > bits_in_uv) + cdouble += (NV)along; + else + cuv += along; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); -#if LONGSIZE != SIZE32 - if (natint) { - while (len-- > 0) { - COPYNN(s, &along, sizeof(long)); - s += sizeof(long); - sv = NEWSV(42, 0); - sv_setiv(sv, (IV)along); - PUSHs(sv_2mortal(sv)); - } - } - else -#endif - { - while (len-- > 0) { + while (len-- > 0) { #if LONGSIZE > SIZE32 && INTSIZE == SIZE32 - I32 along; + I32 along; #endif - COPY32(s, &along); + COPY32(s, &along); #if LONGSIZE > SIZE32 - if (along > 2147483647) - along -= 4294967296; + if (along > 2147483647) + along -= 4294967296; #endif - s += SIZE32; - sv = NEWSV(42, 0); - sv_setiv(sv, (IV)along); - PUSHs(sv_2mortal(sv)); - } + s += SIZE32; + sv = NEWSV(42, 0); + sv_setiv(sv, (IV)along); + PUSHs(sv_2mortal(sv)); } } break; - case 'V': - case 'N': - case 'L': -#if LONGSIZE == SIZE32 - along = (strend - s) / SIZE32; -#else - unatint = natint && datumtype == 'L'; - along = (strend - s) / (unatint ? sizeof(unsigned long) : SIZE32); -#endif + case 'L' | TYPE_IS_SHRIEKING: +#if LONGSIZE != SIZE32 + along = (strend - s) / sizeof(unsigned long); if (len > along) len = along; if (checksum) { -#if LONGSIZE != SIZE32 - if (unatint) { + while (len-- > 0) { unsigned long aulong; - while (len-- > 0) { - COPYNN(s, &aulong, sizeof(unsigned long)); - s += sizeof(unsigned long); - if (checksum > bits_in_uv) - cdouble += (NV)aulong; - else - culong += aulong; - } + COPYNN(s, &aulong, sizeof(unsigned long)); + s += sizeof(unsigned long); + if (checksum > bits_in_uv) + cdouble += (NV)aulong; + else + cuv += aulong; } - else + } + else { + if (len && unpack_only_one) + len = 1; + EXTEND(SP, len); + EXTEND_MORTAL(len); + while (len-- > 0) { + unsigned long aulong; + COPYNN(s, &aulong, sizeof(unsigned long)); + s += sizeof(unsigned long); + sv = NEWSV(43, 0); + sv_setuv(sv, (UV)aulong); + PUSHs(sv_2mortal(sv)); + } + } + break; +#else + /* Fall through! */ #endif - { - while (len-- > 0) { - COPY32(s, &aulong); - s += SIZE32; + case 'V': + case 'N': + case 'L': + along = (strend - s) / SIZE32; + if (len > along) + len = along; + if (checksum) { + while (len-- > 0) { + COPY32(s, &aulong); + s += SIZE32; #ifdef HAS_NTOHL - if (datumtype == 'N') - aulong = PerlSock_ntohl(aulong); + if (datumtype == 'N') + aulong = PerlSock_ntohl(aulong); #endif #ifdef HAS_VTOHL - if (datumtype == 'V') - aulong = vtohl(aulong); + if (datumtype == 'V') + aulong = vtohl(aulong); #endif - if (checksum > bits_in_uv) - cdouble += (NV)aulong; - else - culong += aulong; - } + if (checksum > bits_in_uv) + cdouble += (NV)aulong; + else + cuv += aulong; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); -#if LONGSIZE != SIZE32 - if (unatint) { - unsigned long aulong; - while (len-- > 0) { - COPYNN(s, &aulong, sizeof(unsigned long)); - s += sizeof(unsigned long); - sv = NEWSV(43, 0); - sv_setuv(sv, (UV)aulong); - PUSHs(sv_2mortal(sv)); - } - } - else -#endif - { - while (len-- > 0) { - COPY32(s, &aulong); - s += SIZE32; + while (len-- > 0) { + COPY32(s, &aulong); + s += SIZE32; #ifdef HAS_NTOHL - if (datumtype == 'N') - aulong = PerlSock_ntohl(aulong); + if (datumtype == 'N') + aulong = PerlSock_ntohl(aulong); #endif #ifdef HAS_VTOHL - if (datumtype == 'V') - aulong = vtohl(aulong); + if (datumtype == 'V') + aulong = vtohl(aulong); #endif - sv = NEWSV(43, 0); - sv_setuv(sv, (UV)aulong); - PUSHs(sv_2mortal(sv)); - } + sv = NEWSV(43, 0); + sv_setuv(sv, (UV)aulong); + PUSHs(sv_2mortal(sv)); } } break; @@ -904,6 +1350,8 @@ PP(pp_unpack) } break; case 'w': + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); { @@ -927,7 +1375,7 @@ PP(pp_unpack) sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv); while (s < strend) { - sv = mul128(sv, *s & 0x7f); + sv = mul128(sv, (U8)(*s & 0x7f)); if (!(*s++ & 0x80)) { bytes = 0; break; @@ -943,10 +1391,12 @@ PP(pp_unpack) } } if ((s >= strend) && bytes) - DIE(aTHX_ "Unterminated compressed integer"); + Perl_croak(aTHX_ "Unterminated compressed integer in unpack"); } break; case 'P': + if (symptr->howlen == e_star) + Perl_croak(aTHX_ "'P' must have an explicit size in unpack"); EXTEND(SP, 1); if (sizeof(char*) > strend - s) break; @@ -971,22 +1421,24 @@ PP(pp_unpack) if (checksum > bits_in_uv) cdouble += (NV)aquad; else - culong += aquad; + cuv += aquad; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0) { if (s + sizeof(Quad_t) > strend) aquad = 0; else { - Copy(s, &aquad, 1, Quad_t); - s += sizeof(Quad_t); + Copy(s, &aquad, 1, Quad_t); + s += sizeof(Quad_t); } sv = NEWSV(42, 0); if (aquad >= IV_MIN && aquad <= IV_MAX) - sv_setiv(sv, (IV)aquad); + sv_setiv(sv, (IV)aquad); else sv_setnv(sv, (NV)aquad); PUSHs(sv_2mortal(sv)); @@ -994,7 +1446,7 @@ PP(pp_unpack) } break; case 'Q': - along = (strend - s) / sizeof(Quad_t); + along = (strend - s) / sizeof(Uquad_t); if (len > along) len = along; if (checksum) { @@ -1004,10 +1456,12 @@ PP(pp_unpack) if (checksum > bits_in_uv) cdouble += (NV)auquad; else - culong += auquad; + cuv += auquad; } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0) { @@ -1029,7 +1483,6 @@ PP(pp_unpack) #endif /* float and double added gnb@melba.bby.oz.au 22/11/89 */ case 'f': - case 'F': along = (strend - s) / sizeof(float); if (len > along) len = along; @@ -1041,6 +1494,8 @@ PP(pp_unpack) } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0) { @@ -1053,7 +1508,6 @@ PP(pp_unpack) } break; case 'd': - case 'D': along = (strend - s) / sizeof(double); if (len > along) len = along; @@ -1065,6 +1519,8 @@ PP(pp_unpack) } } else { + if (len && unpack_only_one) + len = 1; EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0) { @@ -1076,6 +1532,58 @@ PP(pp_unpack) } } break; + case 'F': + along = (strend - s) / NVSIZE; + if (len > along) + len = along; + if (checksum) { + while (len-- > 0) { + Copy(s, &anv, 1, NV); + s += NVSIZE; + cdouble += anv; + } + } + else { + if (len && unpack_only_one) + len = 1; + EXTEND(SP, len); + EXTEND_MORTAL(len); + while (len-- > 0) { + Copy(s, &anv, 1, NV); + s += NVSIZE; + sv = NEWSV(48, 0); + sv_setnv(sv, anv); + PUSHs(sv_2mortal(sv)); + } + } + break; +#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) + case 'D': + along = (strend - s) / LONG_DOUBLESIZE; + if (len > along) + len = along; + if (checksum) { + while (len-- > 0) { + Copy(s, &aldouble, 1, long double); + s += LONG_DOUBLESIZE; + cdouble += aldouble; + } + } + else { + if (len && unpack_only_one) + len = 1; + EXTEND(SP, len); + EXTEND_MORTAL(len); + while (len-- > 0) { + Copy(s, &aldouble, 1, long double); + s += LONG_DOUBLESIZE; + sv = NEWSV(48, 0); + sv_setnv(sv, (NV)aldouble); + PUSHs(sv_2mortal(sv)); + } + } + break; +#endif case 'u': /* MKS: * Initialise the decode mapping. By using a table driven @@ -1121,24 +1629,27 @@ PP(pp_unpack) d = PL_uudmap[*(U8*)s++] & 077; else d = 0; - hunk[0] = (a << 2) | (b >> 4); - hunk[1] = (b << 4) | (c >> 2); - hunk[2] = (c << 6) | d; + hunk[0] = (char)((a << 2) | (b >> 4)); + hunk[1] = (char)((b << 4) | (c >> 2)); + hunk[2] = (char)((c << 6) | d); sv_catpvn(sv, hunk, (len > 3) ? 3 : len); len -= 3; } if (*s == '\n') s++; - else if (s[1] == '\n') /* possible checksum byte */ - s += 2; + else /* possible checksum byte */ + if (s + 1 < strend && s[1] == '\n') + s += 2; } XPUSHs(sv_2mortal(sv)); break; } + if (checksum) { sv = NEWSV(42, 0); if (strchr("fFdD", datumtype) || - (checksum > bits_in_uv && strchr("csSiIlLnNUvVqQ", datumtype)) ) { + (checksum > bits_in_uv && + strchr("csSiIlLnNUvVqQjJ", datumtype&0xFF)) ) { NV trouble; adouble = (NV) (1 << (checksum & 15)); @@ -1154,16 +1665,72 @@ PP(pp_unpack) else { if (checksum < bits_in_uv) { UV mask = ((UV)1 << checksum) - 1; - culong &= mask; + cuv &= mask; } - sv_setuv(sv, (UV)culong); + sv_setuv(sv, cuv); } XPUSHs(sv_2mortal(sv)); checksum = 0; } + + if (symptr->flags & FLAG_SLASH){ + if (SP - PL_stack_base - start_sp_offset <= 0) + Perl_croak(aTHX_ "'/' must follow a numeric type in unpack"); + if( next_symbol(symptr) ){ + if( symptr->howlen == e_number ) + Perl_croak(aTHX_ "Count after length/code in unpack" ); + if( beyond ){ + /* ...end of char buffer then no decent length available */ + Perl_croak(aTHX_ "length/code after end of string in unpack" ); + } else { + /* take top of stack (hope it's numeric) */ + len = POPi; + if( len < 0 ) + Perl_croak(aTHX_ "Negative '/' count in unpack" ); + } + } else { + Perl_croak(aTHX_ "Code missing after '/' in unpack" ); + } + datumtype = symptr->code; + goto redo_switch; + } } - if (SP - PL_stack_base == start_sp_offset && gimme == G_SCALAR) - PUSHs(&PL_sv_undef); + + if (new_s) + *new_s = s; + PUTBACK; + return SP - PL_stack_base - start_sp_offset; +} + +PP(pp_unpack) +{ + dSP; + SV *right = (MAXARG > 1) ? POPs : GvSV(PL_defgv); + SV *left = POPs; + I32 gimme = GIMME_V; + STRLEN llen; + STRLEN rlen; + register char *pat = SvPV(left, llen); +#ifdef PACKED_IS_OCTETS + /* Packed side is assumed to be octets - so force downgrade if it + has been UTF-8 encoded by accident + */ + register char *s = SvPVbyte(right, rlen); +#else + register char *s = SvPV(right, rlen); +#endif + char *strend = s + rlen; + register char *patend = pat + llen; + register I32 cnt; + + PUTBACK; + cnt = unpackstring(pat, patend, s, strend, + ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0) + | (DO_UTF8(right) ? FLAG_UNPACK_DO_UTF8 : 0)); + + SPAGAIN; + if ( !cnt && gimme == G_SCALAR ) + PUSHs(&PL_sv_undef); RETURN; } @@ -1272,21 +1839,60 @@ S_div128(pTHX_ SV *pnum, bool *done) } -PP(pp_pack) + +/* +=for apidoc pack_cat + +The engine implementing pack() Perl function. Note: parameters next_in_list and +flags are not used. This call should not be used; use packlist instead. + +=cut */ + + +void +Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags) +{ + tempsym_t sym = { 0 }; + sym.patptr = pat; + sym.patend = patend; + sym.flags = FLAG_PACK; + + (void)pack_rec( cat, &sym, beglist, endlist ); +} + + +/* +=for apidoc packlist + +The engine implementing pack() Perl function. + +=cut */ + + +void +Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist ) +{ + tempsym_t sym = { 0 }; + sym.patptr = pat; + sym.patend = patend; + sym.flags = FLAG_PACK; + + (void)pack_rec( cat, &sym, beglist, endlist ); +} + + +STATIC +SV ** +S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV **endlist ) { - dSP; dMARK; dORIGMARK; dTARGET; - register SV *cat = TARG; register I32 items; STRLEN fromlen; - register char *pat = SvPVx(*++MARK, fromlen); - char *patcopy; - register char *patend = pat + fromlen; - register I32 len; - I32 datumtype; + register I32 len = 0; SV *fromstr; /*SUPPRESS 442*/ static char null10[] = {0,0,0,0,0,0,0,0,0,0}; static char *space10 = " "; + bool found; /* These must not be in registers: */ char achar; @@ -1295,6 +1901,12 @@ PP(pp_pack) unsigned int auint; I32 along; U32 aulong; + IV aiv; + UV auv; + NV anv; +#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) + long double aldouble; +#endif #ifdef HAS_QUAD Quad_t aquad; Uquad_t auquad; @@ -1302,94 +1914,101 @@ PP(pp_pack) char *aptr; float afloat; double adouble; - int commas = 0; -#ifdef PERL_NATINT_PACK - int natint; /* native integer */ + int strrelbeg = SvCUR(cat); + tempsym_t lookahead; + + items = endlist - beglist; + found = next_symbol( symptr ); + +#ifndef PACKED_IS_OCTETS + if (symptr->level == 0 && found && symptr->code == 'U' ){ + SvUTF8_on(cat); + } #endif - items = SP - MARK; - MARK++; - sv_setpvn(cat, "", 0); - patcopy = pat; - while (pat < patend) { + while (found) { SV *lengthcode = Nullsv; -#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *MARK++ : &PL_sv_no) - datumtype = *pat++ & 0xFF; -#ifdef PERL_NATINT_PACK - natint = 0; -#endif - if (isSPACE(datumtype)) { - patcopy++; - continue; +#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no) + + I32 datumtype = symptr->code; + howlen_t howlen; + + switch( howlen = symptr->howlen ){ + case e_no_len: + case e_number: + len = symptr->length; + break; + case e_star: + len = strchr("@Xxu", datumtype) ? 0 : items; + break; } -#ifndef PACKED_IS_OCTETS - if (datumtype == 'U' && pat == patcopy+1) - SvUTF8_on(cat); -#endif - if (datumtype == '#') { - while (pat < patend && *pat != '\n') - pat++; - continue; - } - if (*pat == '!') { - char *natstr = "sSiIlL"; - if (strchr(natstr, datumtype)) { -#ifdef PERL_NATINT_PACK - natint = 1; -#endif - pat++; - } - else - DIE(aTHX_ "'!' allowed only after types %s", natstr); - } - if (*pat == '*') { - len = strchr("@Xxu", datumtype) ? 0 : items; - pat++; - } - else if (isDIGIT(*pat)) { - len = *pat++ - '0'; - while (isDIGIT(*pat)) { - len = (len * 10) + (*pat++ - '0'); - if (len < 0) - DIE(aTHX_ "Repeat count in pack overflows"); - } - } - else - len = 1; - if (*pat == '/') { - ++pat; - if ((*pat != 'a' && *pat != 'A' && *pat != 'Z') || pat[1] != '*') - DIE(aTHX_ "/ must be followed by a*, A* or Z*"); - lengthcode = sv_2mortal(newSViv(sv_len(items > 0 - ? *MARK : &PL_sv_no) - + (*pat == 'Z' ? 1 : 0))); + /* Look ahead for next symbol. Do we have code/code? */ + lookahead = *symptr; + found = next_symbol(&lookahead); + if ( symptr->flags & FLAG_SLASH ) { + if (found){ + if ( 0 == strchr( "aAZ", lookahead.code ) || + e_star != lookahead.howlen ) + Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack"); + lengthcode = sv_2mortal(newSViv(sv_len(items > 0 + ? *beglist : &PL_sv_no) + + (lookahead.code == 'Z' ? 1 : 0))); + } else { + Perl_croak(aTHX_ "Code missing after '/' in pack"); + } } + switch(datumtype) { default: - DIE(aTHX_ "Invalid type in pack: '%c'", (int)datumtype); - case ',': /* grandfather in commas but with a warning */ - if (commas++ == 0 && ckWARN(WARN_PACK)) - Perl_warner(aTHX_ WARN_PACK, - "Invalid type in pack: '%c'", (int)datumtype); - break; + Perl_croak(aTHX_ "Invalid type '%c' in pack", (int)datumtype); case '%': - DIE(aTHX_ "%% may only be used in unpack"); + Perl_croak(aTHX_ "'%%' may not be used in pack"); case '@': - len -= SvCUR(cat); + len += strrelbeg - SvCUR(cat); if (len > 0) goto grow; len = -len; if (len > 0) goto shrink; break; + case '(': + { + tempsym_t savsym = *symptr; + symptr->patend = savsym.grpend; + symptr->level++; + while (len--) { + symptr->patptr = savsym.grpbeg; + beglist = pack_rec(cat, symptr, beglist, endlist ); + if (savsym.howlen == e_star && beglist == endlist) + break; /* No way to continue */ + } + lookahead.flags = symptr->flags; + *symptr = savsym; + break; + } + case 'X' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + len = (SvCUR(cat)) % len; + /* FALL THROUGH */ case 'X': shrink: - if (SvCUR(cat) < len) - DIE(aTHX_ "X outside of string"); + if ((I32)SvCUR(cat) < len) + Perl_croak(aTHX_ "'X' outside of string in pack"); SvCUR(cat) -= len; *SvEND(cat) = '\0'; break; + case 'x' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + aint = (SvCUR(cat)) % len; + if (aint) /* Other portable ways? */ + len = len - aint; + else + len = 0; + /* FALL THROUGH */ + case 'x': grow: while (len >= 10) { @@ -1403,12 +2022,12 @@ PP(pp_pack) case 'a': fromstr = NEXTFROM; aptr = SvPV(fromstr, fromlen); - if (pat[-1] == '*') { + if (howlen == e_star) { len = fromlen; if (datumtype == 'Z') ++len; } - if (fromlen >= len) { + if ((I32)fromlen >= len) { sv_catpvn(cat, aptr, len); if (datumtype == 'Z') *(SvEND(cat)-1) = '\0'; @@ -1441,13 +2060,13 @@ PP(pp_pack) fromstr = NEXTFROM; saveitems = items; str = SvPV(fromstr, fromlen); - if (pat[-1] == '*') + if (howlen == e_star) len = fromlen; aint = SvCUR(cat); SvCUR(cat) += (len+7)/8; SvGROW(cat, SvCUR(cat) + 1); aptr = SvPVX(cat) + aint; - if (len > fromlen) + if (len > (I32)fromlen) len = fromlen; aint = len; items = 0; @@ -1497,13 +2116,13 @@ PP(pp_pack) fromstr = NEXTFROM; saveitems = items; str = SvPV(fromstr, fromlen); - if (pat[-1] == '*') + if (howlen == e_star) len = fromlen; aint = SvCUR(cat); SvCUR(cat) += (len+1)/2; SvGROW(cat, SvCUR(cat) + 1); aptr = SvPVX(cat) + aint; - if (len > fromlen) + if (len > (I32)fromlen) len = fromlen; aint = len; items = 0; @@ -1553,8 +2172,8 @@ PP(pp_pack) aint = SvIV(fromstr); if ((aint < 0 || aint > 255) && ckWARN(WARN_PACK)) - Perl_warner(aTHX_ WARN_PACK, - "Character in \"C\" format wrapped"); + Perl_warner(aTHX_ packWARN(WARN_PACK), + "Character in 'C' format wrapped in pack"); achar = aint & 255; sv_catpvn(cat, &achar, sizeof(char)); break; @@ -1562,8 +2181,8 @@ PP(pp_pack) aint = SvIV(fromstr); if ((aint < -128 || aint > 127) && ckWARN(WARN_PACK)) - Perl_warner(aTHX_ WARN_PACK, - "Character in \"c\" format wrapped"); + Perl_warner(aTHX_ packWARN(WARN_PACK), + "Character in 'c' format wrapped in pack" ); achar = aint & 255; sv_catpvn(cat, &achar, sizeof(char)); break; @@ -1573,30 +2192,98 @@ PP(pp_pack) case 'U': while (len-- > 0) { fromstr = NEXTFROM; - auint = SvUV(fromstr); + auint = UNI_TO_NATIVE(SvUV(fromstr)); SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1); - SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint) - - SvPVX(cat)); + SvCUR_set(cat, + (char*)uvchr_to_utf8_flags((U8*)SvEND(cat), + auint, + ckWARN(WARN_UTF8) ? + 0 : UNICODE_ALLOW_ANY) + - SvPVX(cat)); } *SvEND(cat) = '\0'; break; /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */ case 'f': - case 'F': while (len-- > 0) { fromstr = NEXTFROM; +#ifdef __VOS__ +/* VOS does not automatically map a floating-point overflow + during conversion from double to float into infinity, so we + do it by hand. This code should either be generalized for + any OS that needs it, or removed if and when VOS implements + posix-976 (suggestion to support mapping to infinity). + Paul.Green@stratus.com 02-04-02. */ + if (SvNV(fromstr) > FLT_MAX) + afloat = _float_constants[0]; /* single prec. inf. */ + else if (SvNV(fromstr) < -FLT_MAX) + afloat = _float_constants[0]; /* single prec. inf. */ + else afloat = (float)SvNV(fromstr); +#else +# if defined(VMS) && !defined(__IEEE_FP) +/* IEEE fp overflow shenanigans are unavailable on VAX and optional + * on Alpha; fake it if we don't have them. + */ + if (SvNV(fromstr) > FLT_MAX) + afloat = FLT_MAX; + else if (SvNV(fromstr) < -FLT_MAX) + afloat = -FLT_MAX; + else afloat = (float)SvNV(fromstr); +# else afloat = (float)SvNV(fromstr); +# endif +#endif sv_catpvn(cat, (char *)&afloat, sizeof (float)); } break; case 'd': - case 'D': while (len-- > 0) { fromstr = NEXTFROM; +#ifdef __VOS__ +/* VOS does not automatically map a floating-point overflow + during conversion from long double to double into infinity, + so we do it by hand. This code should either be generalized + for any OS that needs it, or removed if and when VOS + implements posix-976 (suggestion to support mapping to + infinity). Paul.Green@stratus.com 02-04-02. */ + if (SvNV(fromstr) > DBL_MAX) + adouble = _double_constants[0]; /* double prec. inf. */ + else if (SvNV(fromstr) < -DBL_MAX) + adouble = _double_constants[0]; /* double prec. inf. */ + else adouble = (double)SvNV(fromstr); +#else +# if defined(VMS) && !defined(__IEEE_FP) +/* IEEE fp overflow shenanigans are unavailable on VAX and optional + * on Alpha; fake it if we don't have them. + */ + if (SvNV(fromstr) > DBL_MAX) + adouble = DBL_MAX; + else if (SvNV(fromstr) < -DBL_MAX) + adouble = -DBL_MAX; + else adouble = (double)SvNV(fromstr); +# else adouble = (double)SvNV(fromstr); +# endif +#endif sv_catpvn(cat, (char *)&adouble, sizeof (double)); } break; + case 'F': + while (len-- > 0) { + fromstr = NEXTFROM; + anv = SvNV(fromstr); + sv_catpvn(cat, (char *)&anv, NVSIZE); + } + break; +#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) + case 'D': + while (len-- > 0) { + fromstr = NEXTFROM; + aldouble = (long double)SvNV(fromstr); + sv_catpvn(cat, (char *)&aldouble, LONG_DOUBLESIZE); + } + break; +#endif case 'n': while (len-- > 0) { fromstr = NEXTFROM; @@ -1617,9 +2304,9 @@ PP(pp_pack) CAT16(cat, &ashort); } break; - case 'S': + case 'S' | TYPE_IS_SHRIEKING: #if SHORTSIZE != SIZE16 - if (natint) { + { unsigned short aushort; while (len-- > 0) { @@ -1627,9 +2314,12 @@ PP(pp_pack) aushort = SvUV(fromstr); sv_catpvn(cat, (char *)&aushort, sizeof(unsigned short)); } - } - else + } + break; +#else + /* Fall through! */ #endif + case 'S': { U16 aushort; @@ -1641,9 +2331,9 @@ PP(pp_pack) } break; - case 's': + case 's' | TYPE_IS_SHRIEKING: #if SHORTSIZE != SIZE16 - if (natint) { + { short ashort; while (len-- > 0) { @@ -1652,49 +2342,60 @@ PP(pp_pack) sv_catpvn(cat, (char *)&ashort, sizeof(short)); } } - else + break; +#else + /* Fall through! */ #endif - { - while (len-- > 0) { - fromstr = NEXTFROM; - ashort = (I16)SvIV(fromstr); - CAT16(cat, &ashort); - } + case 's': + while (len-- > 0) { + fromstr = NEXTFROM; + ashort = (I16)SvIV(fromstr); + CAT16(cat, &ashort); } break; case 'I': + case 'I' | TYPE_IS_SHRIEKING: while (len-- > 0) { fromstr = NEXTFROM; auint = SvUV(fromstr); sv_catpvn(cat, (char*)&auint, sizeof(unsigned int)); } break; + case 'j': + while (len-- > 0) { + fromstr = NEXTFROM; + aiv = SvIV(fromstr); + sv_catpvn(cat, (char*)&aiv, IVSIZE); + } + break; + case 'J': + while (len-- > 0) { + fromstr = NEXTFROM; + auv = SvUV(fromstr); + sv_catpvn(cat, (char*)&auv, UVSIZE); + } + break; case 'w': while (len-- > 0) { fromstr = NEXTFROM; - adouble = Perl_floor(SvNV(fromstr)); + anv = SvNV(fromstr); - if (adouble < 0) - DIE(aTHX_ "Cannot compress negative numbers"); + if (anv < 0) + Perl_croak(aTHX_ "Cannot compress negative numbers in pack"); - if ( -#if UVSIZE > 4 && UVSIZE >= NVSIZE - adouble <= 0xffffffff -#else -# ifdef CXUX_BROKEN_CONSTANT_CONVERT - adouble <= UV_MAX_cxux -# else - adouble <= UV_MAX -# endif -#endif - ) + /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0, + which is == UV_MAX_P1. IOK is fine (instead of UV_only), as + any negative IVs will have already been got by the croak() + above. IOK is untrue for fractions, so we test them + against UV_MAX_P1. */ + if (SvIOK(fromstr) || anv < UV_MAX_P1) { - char buf[1 + sizeof(UV)]; + char buf[(sizeof(UV)*8)/7+1]; char *in = buf + sizeof(buf); - UV auv = U_V(adouble); + UV auv = SvUV(fromstr); do { - *--in = (auv & 0x7f) | 0x80; + *--in = (char)((auv & 0x7f) | 0x80); auv >>= 7; } while (auv); buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ @@ -1709,7 +2410,7 @@ PP(pp_pack) /* Copy string and check for compliance */ from = SvPV(fromstr, len); if ((norm = is_an_int(from, len)) == NULL) - DIE(aTHX_ "can compress only unsigned integer"); + Perl_croak(aTHX_ "Can only compress unsigned integers in pack"); New('w', result, len, char); in = result + len; @@ -1722,24 +2423,55 @@ PP(pp_pack) SvREFCNT_dec(norm); /* free norm */ } else if (SvNOKp(fromstr)) { - char buf[sizeof(double) * 2]; /* 8/7 <= 2 */ + /* 10**NV_MAX_10_EXP is the largest power of 10 + so 10**(NV_MAX_10_EXP+1) is definately unrepresentable + given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x: + x = (NV_MAX_10_EXP+1) * log (10) / log (128) + And with that many bytes only Inf can overflow. + */ +#ifdef NV_MAX_10_EXP + char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; +#else + char buf[1 + (int)((308 + 1) * 0.47456)]; +#endif char *in = buf + sizeof(buf); + anv = Perl_floor(anv); do { - double next = floor(adouble / 128); - *--in = (unsigned char)(adouble - (next * 128)) | 0x80; + NV next = Perl_floor(anv / 128); if (in <= buf) /* this cannot happen ;-) */ - DIE(aTHX_ "Cannot compress integer"); - adouble = next; - } while (adouble > 0); + Perl_croak(aTHX_ "Cannot compress integer in pack"); + *--in = (unsigned char)(anv - (next * 128)) | 0x80; + anv = next; + } while (anv > 0); buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ sv_catpvn(cat, in, (buf + sizeof(buf)) - in); } - else - DIE(aTHX_ "Cannot compress non integer"); + else { + char *from, *result, *in; + SV *norm; + STRLEN len; + bool done; + + /* Copy string and check for compliance */ + from = SvPV(fromstr, len); + if ((norm = is_an_int(from, len)) == NULL) + Perl_croak(aTHX_ "Can only compress unsigned integers in pack"); + + New('w', result, len, char); + in = result + len; + done = FALSE; + while (!done) + *--in = div128(norm, &done) | 0x80; + result[len - 1] &= 0x7F; /* clear continue bit */ + sv_catpvn(cat, in, (result + len) - in); + Safefree(result); + SvREFCNT_dec(norm); /* free norm */ + } } break; case 'i': + case 'i' | TYPE_IS_SHRIEKING: while (len-- > 0) { fromstr = NEXTFROM; aint = SvIV(fromstr); @@ -1766,9 +2498,9 @@ PP(pp_pack) CAT32(cat, &aulong); } break; - case 'L': + case 'L' | TYPE_IS_SHRIEKING: #if LONGSIZE != SIZE32 - if (natint) { + { unsigned long aulong; while (len-- > 0) { @@ -1777,8 +2509,11 @@ PP(pp_pack) sv_catpvn(cat, (char *)&aulong, sizeof(unsigned long)); } } - else + break; +#else + /* Fall though! */ #endif + case 'L': { while (len-- > 0) { fromstr = NEXTFROM; @@ -1787,9 +2522,9 @@ PP(pp_pack) } } break; - case 'l': + case 'l' | TYPE_IS_SHRIEKING: #if LONGSIZE != SIZE32 - if (natint) { + { long along; while (len-- > 0) { @@ -1798,14 +2533,15 @@ PP(pp_pack) sv_catpvn(cat, (char *)&along, sizeof(long)); } } - else + break; +#else + /* Fall though! */ #endif - { - while (len-- > 0) { - fromstr = NEXTFROM; - along = SvIV(fromstr); - CAT32(cat, &along); - } + case 'l': + while (len-- > 0) { + fromstr = NEXTFROM; + along = SvIV(fromstr); + CAT32(cat, &along); } break; #ifdef HAS_QUAD @@ -1826,7 +2562,7 @@ PP(pp_pack) #endif case 'P': len = 1; /* assume SV is correct length */ - /* FALL THROUGH */ + /* Fall through! */ case 'p': while (len-- > 0) { fromstr = NEXTFROM; @@ -1843,7 +2579,7 @@ PP(pp_pack) || (SvPADTMP(fromstr) && !SvREADONLY(fromstr)))) { - Perl_warner(aTHX_ WARN_PACK, + Perl_warner(aTHX_ packWARN(WARN_PACK), "Attempt to pack pointer to temporary value"); } if (SvPOK(fromstr) || SvNIOK(fromstr)) @@ -1858,14 +2594,14 @@ PP(pp_pack) fromstr = NEXTFROM; aptr = SvPV(fromstr, fromlen); SvGROW(cat, fromlen * 4 / 3); - if (len <= 1) + if (len <= 2) len = 45; else len = len / 3 * 3; while (fromlen > 0) { I32 todo; - if (fromlen > len) + if ((I32)fromlen > len) todo = len; else todo = fromlen; @@ -1875,11 +2611,29 @@ PP(pp_pack) } break; } + *symptr = lookahead; } + return beglist; +} +#undef NEXTFROM + + +PP(pp_pack) +{ + dSP; dMARK; dORIGMARK; dTARGET; + register SV *cat = TARG; + STRLEN fromlen; + register char *pat = SvPVx(*++MARK, fromlen); + register char *patend = pat + fromlen; + + MARK++; + sv_setpvn(cat, "", 0); + + packlist(cat, pat, patend, MARK, SP + 1); + SvSETMAGIC(cat); SP = ORIGMARK; PUSHs(cat); RETURN; } -#undef NEXTFROM