From: Ilya Zakharevich Date: Thu, 21 Feb 2002 06:06:14 +0000 (-0500) Subject: grouping in pack/unpack X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=18529408d66077360bcde31227a7fb97dfc49222;p=p5sagit%2Fp5-mst-13.2.git grouping in pack/unpack Message-ID: <20020221060614.A29836@math.ohio-state.edu> p4raw-id: //depot/perl@14815 --- diff --git a/embed.fnc b/embed.fnc index 1c6403f..60cd3e0 100644 --- a/embed.fnc +++ b/embed.fnc @@ -572,6 +572,7 @@ Ap |void |set_numeric_local Ap |void |set_numeric_radix Ap |void |set_numeric_standard Apd |void |require_pv |const char* pv +Apd |void |pack_cat |SV *cat|char *pat|char *patend|SV **beglist|SV **endlist|SV ***next_in_list|U32 flags p |void |pidgone |Pid_t pid|int status Ap |void |pmflag |U16* pmfl|int ch p |OP* |pmruntime |OP* pm|OP* expr|OP* repl @@ -792,6 +793,7 @@ Ap |I32 |unlnk |char* f #if defined(USE_5005THREADS) Ap |void |unlock_condpair|void* svv #endif +Apd |I32 |unpack_str |char *pat|char *patend|char *s|char *strbeg|char *strend|char **new_s|I32 ocnt|U32 flags Ap |void |unsharepvn |const char* sv|I32 len|U32 hash p |void |unshare_hek |HEK* hek p |void |utilize |int aver|I32 floor|OP* version|OP* id|OP* arg @@ -1062,6 +1064,9 @@ s |void |doencodes |SV* sv|char* s|I32 len s |SV* |mul128 |SV *sv|U8 m s |SV* |is_an_int |char *s|STRLEN l s |int |div128 |SV *pnum|bool *done +s |char * |next_symbol |char *pat|char *patend +s |I32 |find_count |char **ppat|char *patend|int *star +s |char * |group_end |char *pat|char *patend|char ender #endif #if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT) diff --git a/embed.h b/embed.h index a84707b..40bfb28 100644 --- a/embed.h +++ b/embed.h @@ -552,6 +552,7 @@ #define set_numeric_radix Perl_set_numeric_radix #define set_numeric_standard Perl_set_numeric_standard #define require_pv Perl_require_pv +#define pack_cat Perl_pack_cat #define pidgone Perl_pidgone #define pmflag Perl_pmflag #define pmruntime Perl_pmruntime @@ -749,6 +750,7 @@ #if defined(USE_5005THREADS) #define unlock_condpair Perl_unlock_condpair #endif +#define unpack_str Perl_unpack_str #define unsharepvn Perl_unsharepvn #define unshare_hek Perl_unshare_hek #define utilize Perl_utilize @@ -995,6 +997,9 @@ #define mul128 S_mul128 #define is_an_int S_is_an_int #define div128 S_div128 +#define next_symbol S_next_symbol +#define find_count S_find_count +#define group_end S_group_end #endif #if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT) #define docatch S_docatch @@ -2099,6 +2104,7 @@ #define set_numeric_radix() Perl_set_numeric_radix(aTHX) #define set_numeric_standard() Perl_set_numeric_standard(aTHX) #define require_pv(a) Perl_require_pv(aTHX_ a) +#define pack_cat(a,b,c,d,e,f,g) Perl_pack_cat(aTHX_ a,b,c,d,e,f,g) #define pidgone(a,b) Perl_pidgone(aTHX_ a,b) #define pmflag(a,b) Perl_pmflag(aTHX_ a,b) #define pmruntime(a,b,c) Perl_pmruntime(aTHX_ a,b,c) @@ -2294,6 +2300,7 @@ #if defined(USE_5005THREADS) #define unlock_condpair(a) Perl_unlock_condpair(aTHX_ a) #endif +#define unpack_str(a,b,c,d,e,f,g,h) Perl_unpack_str(aTHX_ a,b,c,d,e,f,g,h) #define unsharepvn(a,b,c) Perl_unsharepvn(aTHX_ a,b,c) #define unshare_hek(a) Perl_unshare_hek(aTHX_ a) #define utilize(a,b,c,d,e) Perl_utilize(aTHX_ a,b,c,d,e) @@ -2534,6 +2541,9 @@ #define mul128(a,b) S_mul128(aTHX_ a,b) #define is_an_int(a,b) S_is_an_int(aTHX_ a,b) #define div128(a,b) S_div128(aTHX_ a,b) +#define next_symbol(a,b) S_next_symbol(aTHX_ a,b) +#define find_count(a,b,c) S_find_count(aTHX_ a,b,c) +#define group_end(a,b,c) S_group_end(aTHX_ a,b,c) #endif #if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT) #define docatch(a) S_docatch(aTHX_ a) diff --git a/global.sym b/global.sym index 757e1bd..624f356 100644 --- a/global.sym +++ b/global.sym @@ -341,6 +341,7 @@ Perl_set_numeric_local Perl_set_numeric_radix Perl_set_numeric_standard Perl_require_pv +Perl_pack_cat Perl_pmflag Perl_pop_scope Perl_push_scope @@ -496,6 +497,7 @@ Perl_to_utf8_title Perl_to_utf8_fold Perl_unlnk Perl_unlock_condpair +Perl_unpack_str Perl_unsharepvn Perl_utf16_to_utf8 Perl_utf16_to_utf8_reversed diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 5be9ced..0c87d94 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1559,6 +1559,11 @@ version of Perl, and this should not happen anyway. (F) Unlike with "next" or "last", you're not allowed to goto an unspecified destination. See L. +=item %s-group starts with a count + +(F) In pack/unpack a ()-group started with a count. A count is +supposed to follow something: a template character or a ()-group. + =item %s had compilation errors (F) The final summary message when a C fails. diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index ea196c2..56ad58f 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -3104,9 +3104,8 @@ the converted values. Typically, each converted value looks like its machine-level representation. For example, on 32-bit machines a converted integer may be represented by a sequence of 4 bytes. -The TEMPLATE is a -sequence of characters that give the order and type of values, as -follows: +The TEMPLATE is a sequence of characters that give the order and type +of values, as follows: a A string with arbitrary binary data, will be null padded. A A text (ASCII) string, will be space padded. @@ -3170,6 +3169,7 @@ follows: x A null byte. X Back up a byte. @ Null fill to absolute position. + ( Beginning of a ()-group. The following rules apply: @@ -3183,7 +3183,8 @@ C, and C

the pack function will gobble up that many values from the LIST. A C<*> for the repeat count means to use however many items are left, except for C<@>, C, C, where it is equivalent to C<0>, and C, where it is equivalent to 1 (or 45, what is the -same). +same). A numeric repeat count may optionally be enclosed in brackets, as in +C. When used with C, C<*> results in the addition of a trailing null byte (so the packed result will be one longer than the byte C @@ -3400,6 +3401,12 @@ sequences of bytes. =item * +A ()-group is a sub-TEMPLATE enclosed in parentheses. A group may +take a repeat count, both as postfix, and via the C template +character. + +=item * + A comment in a TEMPLATE starts with C<#> and goes to the end of line. =item * diff --git a/pod/perltodo.pod b/pod/perltodo.pod index 1b64991..8606f07 100644 --- a/pod/perltodo.pod +++ b/pod/perltodo.pod @@ -307,10 +307,6 @@ would rely on even more sed hackery in F. j, J, g, G? -=head2 pack "(stuff)*" - -That's to say, C would be the same as C - =head2 bitfields in pack =head2 Cross compilation diff --git a/pp_pack.c b/pp_pack.c index 52d86d1..6160e64 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -123,32 +123,102 @@ S_mul128(pTHX_ SV *sv, U8 m) #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') #endif +#define UNPACK_ONLY_ONE 0x1 +#define UNPACK_DO_UTF8 0x2 -PP(pp_unpack) +STATIC char * +S_group_end(pTHX_ register char *pat, register char *patend, char ender) +{ + while (pat < patend) { + char c = *pat++; + + if (isSPACE(c)) + continue; + else if (c == ender) + return --pat; + else if (c == '#') { + while (pat < patend && *pat != '\n') + pat++; + continue; + } else if (c == '(') + pat = group_end(pat, patend, ')') + 1; + } + croak("No group ending character `%c' found", ender); +} + +/* Returns -1 on no count or on star */ +STATIC I32 +S_find_count(pTHX_ char **ppat, register char *patend, int *star) +{ + register char *pat = *ppat; + I32 len; + + *star = 0; + if (pat >= patend) + len = 1; + else if (*pat == '*') { + pat++; + *star = 1; + len = -1; + } + else if (isDIGIT(*pat) || *pat == '[') { + bool brackets = *pat == '['; + + if (brackets) + ++pat, len = 0; + else + len = *pat++ - '0'; + while (isDIGIT(*pat)) { + len = (len * 10) + (*pat++ - '0'); + if (len < 0) + croak("Repeat count in unpack overflows"); + } + if (brackets && *pat++ != ']') + croak("No repeat count ender ] found after digits"); + } + else + len = *star = -1; + *ppat = pat; + return len; +} + +STATIC char * +S_next_symbol(pTHX_ register char *pat, register char *patend) +{ + while (pat < patend) { + if (isSPACE(*pat)) + pat++; + else if (*pat == '#') { + pat++; + while (pat < patend && *pat != '\n') + pat++; + if (pat < patend) + pat++; + } + else + return pat; + } + return pat; +} + + +/* +=for apidoc unpack_str + +The engine implementing unpack() Perl function. + +=cut */ + +I32 +Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags) { dSP; - dPOPPOPssrl; - I32 start_sp_offset = SP - PL_stack_base; - I32 gimme = GIMME_V; - SV *sv; - STRLEN llen; - STRLEN rlen; - register char *pat = SvPV(left, llen); -#ifdef PACKED_IS_OCTETS - /* Packed side is assumed to be octets - so force downgrade if it - has been UTF-8 encoded by accident - */ - register char *s = SvPVbyte(right, rlen); -#else - register char *s = SvPV(right, rlen); -#endif - char *strend = s + rlen; - char *strbeg = s; - register char *patend = pat + llen; I32 datumtype; register I32 len; register I32 bits = 0; register char *str; + SV *sv; + I32 start_sp_offset = SP - PL_stack_base; /* These must not be in registers: */ short ashort; @@ -171,26 +241,18 @@ PP(pp_unpack) NV cdouble = 0.0; const int bits_in_uv = 8 * sizeof(culong); int commas = 0; - int star; + int star; /* 1 if count is *, -1 if no count given, -2 for / */ #ifdef PERL_NATINT_PACK int natint; /* native integer */ int unatint; /* unsigned native integer */ #endif - bool do_utf8 = DO_UTF8(right); + bool do_utf8 = flags & UNPACK_DO_UTF8; - while (pat < patend) { - reparse: + while ((pat = next_symbol(pat, patend)) < patend) { datumtype = *pat++ & 0xFF; #ifdef PERL_NATINT_PACK natint = 0; #endif - if (isSPACE(datumtype)) - continue; - if (datumtype == '#') { - while (pat < patend && *pat != '\n') - pat++; - continue; - } if (*pat == '!') { char *natstr = "sSiIlL"; @@ -201,69 +263,83 @@ PP(pp_unpack) pat++; } else - DIE(aTHX_ "'!' allowed only after types %s", natstr); - } - star = 0; - if (pat >= patend) - len = 1; - else if (*pat == '*') { - len = strend - strbeg; /* long enough */ - pat++; - star = 1; - } - else if (isDIGIT(*pat)) { - len = *pat++ - '0'; - while (isDIGIT(*pat)) { - len = (len * 10) + (*pat++ - '0'); - if (len < 0) - DIE(aTHX_ "Repeat count in unpack overflows"); - } + croak("'!' allowed only after types %s", natstr); } - else - len = (datumtype != '@'); + len = find_count(&pat, patend, &star); + if (star > 0) + len = strend - strbeg; /* long enough */ + else if (star < 0) /* No explicit len */ + len = datumtype != '@'; + redo_switch: switch(datumtype) { default: - DIE(aTHX_ "Invalid type in unpack: '%c'", (int)datumtype); + croak("Invalid type in unpack: '%c'", (int)datumtype); case ',': /* grandfather in commas but with a warning */ if (commas++ == 0 && ckWARN(WARN_UNPACK)) Perl_warner(aTHX_ WARN_UNPACK, "Invalid type in unpack: '%c'", (int)datumtype); break; case '%': - if (len == 1 && pat[-1] != '1') - len = 16; + if (len == 1 && pat[-1] != '1' && pat[-1] != ']') + len = 16; /* len is not specified */ checksum = len; culong = 0; cdouble = 0; - if (pat < patend) - goto reparse; + continue; break; + case '(': + { + char *beg = pat; + char *ss = s; /* Move from register */ + + if (star >= 0) + croak("()-group starts with a count"); + aptr = group_end(beg, patend, ')'); + pat = aptr + 1; + if (star != -2) { + len = find_count(&pat, patend, &star); + if (star < 0) /* No count */ + len = 1; + else if (star > 0) /* Star */ + len = strend - strbeg; /* long enough? */ + } + PUTBACK; + while (len--) { + unpack_str(beg, aptr, ss, strbeg, strend, &ss, + ocnt + SP - PL_stack_base - start_sp_offset, flags); + if (star > 0 && ss == strend) + break; /* No way to continue */ + } + SPAGAIN; + s = ss; + break; + } case '@': if (len > strend - strbeg) - DIE(aTHX_ "@ outside of string"); + croak("@ outside of string"); s = strbeg + len; break; case 'X': if (len > s - strbeg) - DIE(aTHX_ "X outside of string"); + croak("X outside of string"); s -= len; break; case 'x': if (len > strend - s) - DIE(aTHX_ "x outside of string"); + croak("x outside of string"); s += len; break; case '/': - if (start_sp_offset >= SP - PL_stack_base) - DIE(aTHX_ "/ must follow a numeric type"); + if (ocnt + SP - PL_stack_base - start_sp_offset <= 0) + croak("/ must follow a numeric type"); datumtype = *pat++; if (*pat == '*') pat++; /* ignore '*' for compatibility with pack */ if (isDIGIT(*pat)) - DIE(aTHX_ "/ cannot take a count" ); + croak("/ cannot take a count" ); len = POPi; - star = 0; + star = -2; goto redo_switch; case 'A': case 'Z': @@ -280,7 +356,7 @@ PP(pp_unpack) s = SvPVX(sv); while (*s) s++; - if (star) /* exact for 'Z*' */ + if (star > 0) /* exact for 'Z*' */ len = s - SvPVX(sv) + 1; } else { /* 'A' strips both nulls and spaces */ @@ -297,7 +373,7 @@ PP(pp_unpack) break; case 'B': case 'b': - if (star || len > (strend - s) * 8) + if (star > 0 || len > (strend - s) * 8) len = (strend - s) * 8; if (checksum) { if (!PL_bitcount) { @@ -363,7 +439,7 @@ PP(pp_unpack) break; case 'H': case 'h': - if (star || len > (strend - s) * 2) + if (star > 0 || len > (strend - s) * 2) len = (strend - s) * 2; sv = NEWSV(35, len + 1); SvCUR_set(sv, len); @@ -926,12 +1002,12 @@ PP(pp_unpack) } } if ((s >= strend) && bytes) - DIE(aTHX_ "Unterminated compressed integer"); + croak("Unterminated compressed integer"); } break; case 'P': - if (star) - DIE(aTHX_ "P must have an explicit size"); + if (star > 0) + croak("P must have an explicit size"); EXTEND(SP, 1); if (sizeof(char*) > strend - s) break; @@ -1146,17 +1222,48 @@ PP(pp_unpack) XPUSHs(sv_2mortal(sv)); checksum = 0; } - if (gimme != G_ARRAY && - SP - PL_stack_base == start_sp_offset + 1) { - /* do first one only unless in list context + if ((flags & UNPACK_ONLY_ONE) + && SP - PL_stack_base == start_sp_offset + 1) { + /* do first one only unless in list context / is implmented by unpacking the count, then poping it from the stack, so must check that we're not in the middle of a / */ if ((pat >= patend) || *pat != '/') - RETURN; + break; } } - if (SP - PL_stack_base == start_sp_offset && gimme == G_SCALAR) - PUSHs(&PL_sv_undef); + if (new_s) + *new_s = s; + PUTBACK; + return SP - PL_stack_base - start_sp_offset; +} + +PP(pp_unpack) +{ + dSP; + dPOPPOPssrl; + I32 gimme = GIMME_V; + STRLEN llen; + STRLEN rlen; + register char *pat = SvPV(left, llen); +#ifdef PACKED_IS_OCTETS + /* Packed side is assumed to be octets - so force downgrade if it + has been UTF-8 encoded by accident + */ + register char *s = SvPVbyte(right, rlen); +#else + register char *s = SvPV(right, rlen); +#endif + char *strend = s + rlen; + register char *patend = pat + llen; + register I32 cnt; + + PUTBACK; + cnt = unpack_str(pat, patend, s, s, strend, NULL, 0, + ((gimme == G_SCALAR) ? UNPACK_ONLY_ONE : 0) + | (DO_UTF8(right) ? UNPACK_DO_UTF8 : 0)); + SPAGAIN; + if ( !cnt && gimme == G_SCALAR ) + PUSHs(&PL_sv_undef); RETURN; } @@ -1264,22 +1371,27 @@ S_div128(pTHX_ SV *pnum, bool *done) return (m); } +#define PACK_CHILD 0x1 -PP(pp_pack) +/* +=for apidoc pack_cat + +The engine implementing pack() Perl function. + +=cut */ + +void +Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags) { - dSP; dMARK; dORIGMARK; dTARGET; - register SV *cat = TARG; register I32 items; STRLEN fromlen; - register char *pat = SvPVx(*++MARK, fromlen); - char *patcopy; - register char *patend = pat + fromlen; register I32 len; I32 datumtype; SV *fromstr; /*SUPPRESS 442*/ static char null10[] = {0,0,0,0,0,0,0,0,0,0}; static char *space10 = " "; + int star; /* These must not be in registers: */ char achar; @@ -1300,30 +1412,19 @@ PP(pp_pack) int natint; /* native integer */ #endif - items = SP - MARK; - MARK++; - sv_setpvn(cat, "", 0); - patcopy = pat; - while (pat < patend) { + items = endlist - beglist; +#ifndef PACKED_IS_OCTETS + pat = next_symbol(pat, patend); + if (pat < patend && *pat == 'U' && !flags) + SvUTF8_on(cat); +#endif + while ((pat = next_symbol(pat, patend)) < patend) { SV *lengthcode = Nullsv; -#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *MARK++ : &PL_sv_no) +#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no) datumtype = *pat++ & 0xFF; #ifdef PERL_NATINT_PACK natint = 0; #endif - if (isSPACE(datumtype)) { - patcopy++; - continue; - } -#ifndef PACKED_IS_OCTETS - if (datumtype == 'U' && pat == patcopy+1) - SvUTF8_on(cat); -#endif - if (datumtype == '#') { - while (pat < patend && *pat != '\n') - pat++; - continue; - } if (*pat == '!') { char *natstr = "sSiIlL"; @@ -1334,40 +1435,31 @@ PP(pp_pack) pat++; } else - DIE(aTHX_ "'!' allowed only after types %s", natstr); + croak("'!' allowed only after types %s", natstr); } - if (*pat == '*') { + len = find_count(&pat, patend, &star); + if (star > 0) /* Count is '*' */ len = strchr("@Xxu", datumtype) ? 0 : items; - pat++; - } - else if (isDIGIT(*pat)) { - len = *pat++ - '0'; - while (isDIGIT(*pat)) { - len = (len * 10) + (*pat++ - '0'); - if (len < 0) - DIE(aTHX_ "Repeat count in pack overflows"); - } - } - else + else if (star < 0) /* Default len */ len = 1; - if (*pat == '/') { + if (*pat == '/') { /* doing lookahead how... */ ++pat; if ((*pat != 'a' && *pat != 'A' && *pat != 'Z') || pat[1] != '*') - DIE(aTHX_ "/ must be followed by a*, A* or Z*"); + croak("/ must be followed by a*, A* or Z*"); lengthcode = sv_2mortal(newSViv(sv_len(items > 0 - ? *MARK : &PL_sv_no) + ? *beglist : &PL_sv_no) + (*pat == 'Z' ? 1 : 0))); } switch(datumtype) { default: - DIE(aTHX_ "Invalid type in pack: '%c'", (int)datumtype); + croak("Invalid type in pack: '%c'", (int)datumtype); case ',': /* grandfather in commas but with a warning */ if (commas++ == 0 && ckWARN(WARN_PACK)) Perl_warner(aTHX_ WARN_PACK, "Invalid type in pack: '%c'", (int)datumtype); break; case '%': - DIE(aTHX_ "%% may only be used in unpack"); + croak("%% may only be used in unpack"); case '@': len -= SvCUR(cat); if (len > 0) @@ -1376,10 +1468,35 @@ PP(pp_pack) if (len > 0) goto shrink; break; + case '(': + { + char *beg = pat; + SV **savebeglist = beglist; /* beglist de-register-ed */ + + if (star >= 0) + croak("()-group starts with a count"); + aptr = group_end(beg, patend, ')'); + pat = aptr + 1; + if (star != -2) { + len = find_count(&pat, patend, &star); + if (star < 0) /* No count */ + len = 1; + else if (star > 0) /* Star */ + len = items; /* long enough? */ + } + while (len--) { + pack_cat(cat, beg, aptr, savebeglist, endlist, + &savebeglist, PACK_CHILD); + if (star > 0 && savebeglist == endlist) + break; /* No way to continue */ + } + beglist = savebeglist; + break; + } case 'X': shrink: if (SvCUR(cat) < len) - DIE(aTHX_ "X outside of string"); + croak("X outside of string"); SvCUR(cat) -= len; *SvEND(cat) = '\0'; break; @@ -1396,7 +1513,7 @@ PP(pp_pack) case 'a': fromstr = NEXTFROM; aptr = SvPV(fromstr, fromlen); - if (pat[lengthcode ? -2 : -1] == '*') { /* -2 after '/' */ + if (star > 0) { /* -2 after '/' */ len = fromlen; if (datumtype == 'Z') ++len; @@ -1434,7 +1551,7 @@ PP(pp_pack) fromstr = NEXTFROM; saveitems = items; str = SvPV(fromstr, fromlen); - if (pat[-1] == '*') + if (star > 0) len = fromlen; aint = SvCUR(cat); SvCUR(cat) += (len+7)/8; @@ -1490,7 +1607,7 @@ PP(pp_pack) fromstr = NEXTFROM; saveitems = items; str = SvPV(fromstr, fromlen); - if (pat[-1] == '*') + if (star > 0) len = fromlen; aint = SvCUR(cat); SvCUR(cat) += (len+1)/2; @@ -1668,7 +1785,7 @@ PP(pp_pack) adouble = Perl_floor(SvNV(fromstr)); if (adouble < 0) - DIE(aTHX_ "Cannot compress negative numbers"); + croak("Cannot compress negative numbers"); if ( #if UVSIZE > 4 && UVSIZE >= NVSIZE @@ -1702,7 +1819,7 @@ PP(pp_pack) /* Copy string and check for compliance */ from = SvPV(fromstr, len); if ((norm = is_an_int(from, len)) == NULL) - DIE(aTHX_ "can compress only unsigned integer"); + croak("can compress only unsigned integer"); New('w', result, len, char); in = result + len; @@ -1722,7 +1839,7 @@ PP(pp_pack) double next = floor(adouble / 128); *--in = (unsigned char)(adouble - (next * 128)) | 0x80; if (in <= buf) /* this cannot happen ;-) */ - DIE(aTHX_ "Cannot compress integer"); + croak("Cannot compress integer"); adouble = next; } while (adouble > 0); buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ @@ -1737,7 +1854,7 @@ PP(pp_pack) /* Copy string and check for compliance */ from = SvPV(fromstr, len); if ((norm = is_an_int(from, len)) == NULL) - DIE(aTHX_ "can compress only unsigned integer"); + croak("can compress only unsigned integer"); New('w', result, len, char); in = result + len; @@ -1888,10 +2005,28 @@ PP(pp_pack) break; } } + if (next_in_list) + *next_in_list = beglist; +} +#undef NEXTFROM + + +PP(pp_pack) +{ + dSP; dMARK; dORIGMARK; dTARGET; + register SV *cat = TARG; + STRLEN fromlen; + register char *pat = SvPVx(*++MARK, fromlen); + register char *patend = pat + fromlen; + + MARK++; + sv_setpvn(cat, "", 0); + + pack_cat(cat, pat, patend, MARK, SP + 1, NULL, 0); + SvSETMAGIC(cat); SP = ORIGMARK; PUSHs(cat); RETURN; } -#undef NEXTFROM diff --git a/proto.h b/proto.h index 963c70c..f3e894c 100644 --- a/proto.h +++ b/proto.h @@ -614,6 +614,7 @@ PERL_CALLCONV void Perl_set_numeric_local(pTHX); PERL_CALLCONV void Perl_set_numeric_radix(pTHX); PERL_CALLCONV void Perl_set_numeric_standard(pTHX); PERL_CALLCONV void Perl_require_pv(pTHX_ const char* pv); +PERL_CALLCONV void Perl_pack_cat(pTHX_ SV *cat, char *pat, char *patend, SV **beglist, SV **endlist, SV ***next_in_list, U32 flags); PERL_CALLCONV void Perl_pidgone(pTHX_ Pid_t pid, int status); PERL_CALLCONV void Perl_pmflag(pTHX_ U16* pmfl, int ch); PERL_CALLCONV OP* Perl_pmruntime(pTHX_ OP* pm, OP* expr, OP* repl); @@ -824,6 +825,7 @@ PERL_CALLCONV I32 Perl_unlnk(pTHX_ char* f); #if defined(USE_5005THREADS) PERL_CALLCONV void Perl_unlock_condpair(pTHX_ void* svv); #endif +PERL_CALLCONV I32 Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags); PERL_CALLCONV void Perl_unsharepvn(pTHX_ const char* sv, I32 len, U32 hash); PERL_CALLCONV void Perl_unshare_hek(pTHX_ HEK* hek); PERL_CALLCONV void Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* id, OP* arg); @@ -1104,6 +1106,9 @@ STATIC void S_doencodes(pTHX_ SV* sv, char* s, I32 len); STATIC SV* S_mul128(pTHX_ SV *sv, U8 m); STATIC SV* S_is_an_int(pTHX_ char *s, STRLEN l); STATIC int S_div128(pTHX_ SV *pnum, bool *done); +STATIC char * S_next_symbol(pTHX_ char *pat, char *patend); +STATIC I32 S_find_count(pTHX_ char **ppat, char *patend, int *star); +STATIC char * S_group_end(pTHX_ char *pat, char *patend, char ender); #endif #if defined(PERL_IN_PP_CTL_C) || defined(PERL_DECL_PROT) diff --git a/t/op/pack.t b/t/op/pack.t index 6bbd737..c0f379b 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 1477; +plan tests => 1493; use strict; use warnings; @@ -711,3 +711,41 @@ foreach ( eval { my $t=unpack("P*", "abc") }; like($@, qr/P must have an explicit size/); } + +{ # Grouping constructs + my (@a, @b); + @a = unpack '(SL)', pack 'SLSLSL', 67..90; + is("@a", "67 68"); + @a = unpack '(SL)3', pack 'SLSLSL', 67..90; + @b = (67..72); + is("@a", "@b"); + @a = unpack '(SL)3', pack 'SLSLSLSL', 67..90; + is("@a", "@b"); + @a = unpack '(SL)[3]', pack 'SLSLSLSL', 67..90; + is("@a", "@b"); + @a = unpack '(SL)[2] SL', pack 'SLSLSLSL', 67..90; + is("@a", "@b"); + @a = unpack 'A/(SL)', pack 'ASLSLSLSL', 3, 67..90; + is("@a", "@b"); + @a = unpack 'A/(SL)SL', pack 'ASLSLSLSL', 2, 67..90; + is("@a", "@b"); + @a = unpack '(SL)*', pack 'SLSLSLSL', 67..90; + @b = (67..74); + is("@a", "@b"); + @a = unpack '(SL)*SL', pack 'SLSLSLSL', 67..90; + is("@a", "@b"); + eval { @a = unpack '(*SL)', '' }; + like($@, qr/\(\)-group starts with a count/); + eval { @a = unpack '(3SL)', '' }; + like($@, qr/\(\)-group starts with a count/); + eval { @a = unpack '([3]SL)', '' }; + like($@, qr/\(\)-group starts with a count/); + eval { @a = pack '(*SL)' }; + like($@, qr/\(\)-group starts with a count/); + @a = unpack '(SL)3 SL', pack '(SL)4', 67..74; + is("@a", "@b"); + @a = unpack '(SL)3 SL', pack '(SL)[4]', 67..74; + is("@a", "@b"); + @a = unpack '(SL)3 SL', pack '(SL)*', 67..74; + is("@a", "@b"); +}