From: Ton Hospel Date: Sun, 27 Mar 2005 18:32:11 +0000 (+0000) Subject: Re: PATCH: byte count feature request for unpack X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=28be1210e1847088dea44932568ceeb145a4a140;p=p5sagit%2Fp5-mst-13.2.git Re: PATCH: byte count feature request for unpack Message-Id: (rework of a patch from Arne Ahrend ) p4raw-id: //depot/perl@24100 --- diff --git a/pod/perldiag.pod b/pod/perldiag.pod index c6354cc..c3035a1 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -2775,6 +2775,11 @@ C<$arr[time]> instead of C<$arr[$time]>. parsing, but realloc() wouldn't give it more memory, virtual or otherwise. +=item '.' outside of string in pack + +(F) The argument to a '.' in your template tried to move the working +position to before the start of the packed string being built. + =item '@' outside of string in unpack (F) You had a template that specified an absolute position outside diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 87d3c9c..97d0b75 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -3365,8 +3365,9 @@ of values, as follows: x A null byte. X Back up a byte. - @ Null fill to absolute position, counted from the start of - the innermost ()-group. + @ Null fill or truncate to absolute position, counted from the + start of the innermost ()-group. + . Null fill or truncate to absolute position specified by value. ( Start of a ()-group. Some letters in the TEMPLATE may optionally be followed by one or @@ -3380,6 +3381,10 @@ which the modifier is valid): nNvV Treat integers as signed instead of unsigned. + @. Specify position as byte offset in the internal + representation of the packed string. Efficient but + dangerous. + > sSiIlLqQ Force big-endian byte-order on the type. jJfFdDpP (The "big end" touches the construct.) @@ -3398,12 +3403,13 @@ The following rules apply: Each letter may optionally be followed by a number giving a repeat count. With all types except C, C, C, C, C, C, -C, C<@>, C, C and C

the pack function will gobble up that -many values from the LIST. A C<*> for the repeat count means to use -however many items are left, except for C<@>, C, C, where it is -equivalent to C<0>, and C, where it is equivalent to 1 (or 45, what -is the same). A numeric repeat count may optionally be enclosed in -brackets, as in C. +C, C<@>, C<.>, C, C and C

the pack function will gobble up +that many values from the LIST. A C<*> for the repeat count means to +use however many items are left, except for C<@>, C, C, where it +is equivalent to C<0>, for <.> where it means relative to string start +and C, where it is equivalent to 1 (or 45, which is the same). +A numeric repeat count may optionally be enclosed in brackets, as in +C. One can replace the numeric repeat count by a template enclosed in brackets; then the packed length of this template in bytes is used as a count. @@ -3417,6 +3423,17 @@ When used with C, C<*> results in the addition of a trailing null byte (so the packed result will be one longer than the byte C of the item). +When used with C<@>, the repeat count represents an offset from the start +of the innermost () group. + +When used with C<.>, the repeat count is used to determine the starting +position from where the value offset is calculated. If the repeat count +is 0, it's relative to the current position. If the repeat count is C<*>, +the offset is relative to the start of the packed string. And if its an +integer C the offset is relative to the start of the n-th innermost +() group (or the start of the string if C is bigger then the group +level). + The repeat count for C is interpreted as the maximal number of bytes to encode per line of output, with 0, 1 and 2 replaced by 45. The repeat count should not be more than 65. @@ -3689,7 +3706,6 @@ C<@> starts again at 0. Therefore, the result of is the string "\0a\0\0bc". - =item * C and C accept C modifier. In this case they act as @@ -3780,6 +3796,8 @@ Examples: $bar = pack('s@4l', 12, 34); # short 12, zero fill to position 4, long 34 # $foo eq $bar + $baz = pack('s.l', 12, 4, 34); + # short 12, zero fill to position 4, long 34 $foo = pack('nN', 42, 4711); # pack big-endian 16- and 32-bit unsigned integers diff --git a/pp_pack.c b/pp_pack.c index 98f1bed..dcebd5b 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -183,9 +183,9 @@ S_mul128(pTHX_ SV *sv, U8 m) #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF) #ifdef PERL_PACK_CAN_SHRIEKSIGN -#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV" +# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@." #else -#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX" +# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX" #endif #ifndef PERL_PACK_CAN_BYTEORDER @@ -761,13 +761,18 @@ S_measure_struct(pTHX_ tempsym_t* symptr) Perl_croak(aTHX_ "Invalid type '%c' in %s", (int)TYPE_NO_MODIFIERS(symptr->code), symptr->flags & FLAG_PACK ? "pack" : "unpack" ); +#ifdef PERL_PACK_CAN_SHRIEKSIGN + case '.' | TYPE_IS_SHRIEKING: + case '@' | TYPE_IS_SHRIEKING: +#endif case '@': + case '.': case '/': case 'U': /* XXXX Is it correct? */ case 'w': case 'u': Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s", - (int)symptr->code, + (int) TYPE_NO_MODIFIERS(symptr->code), symptr->flags & FLAG_PACK ? "pack" : "unpack" ); case '%': size = 0; @@ -1177,11 +1182,11 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char UV cuv = 0; NV cdouble = 0.0; const int bits_in_uv = CHAR_BIT * sizeof(cuv); - char* strrelbeg = s; bool beyond = FALSE; bool explicit_length; bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0; bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; + symptr->strbeg = s - strbeg; while (next_symbol(symptr)) { packprops_t props; @@ -1242,6 +1247,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); symptr->flags |= group_modifiers; symptr->patend = savsym.grpend; + symptr->previous = &savsym; symptr->level++; PUTBACK; while (len--) { @@ -1253,14 +1259,46 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char break; /* No way to continue */ } SPAGAIN; - symptr->flags &= ~group_modifiers; - savsym.flags = symptr->flags; + savsym.flags = symptr->flags & ~group_modifiers; *symptr = savsym; break; } +#ifdef PERL_PACK_CAN_SHRIEKSIGN + case '.' | TYPE_IS_SHRIEKING: +#endif + case '.': { + char *from; + SV *sv; +#ifdef PERL_PACK_CAN_SHRIEKSIGN + bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING); +#else /* PERL_PACK_CAN_SHRIEKSIGN */ + bool u8 = utf8; +#endif + if (howlen == e_star) from = strbeg; + else if (len <= 0) from = s; + else { + tempsym_t *group = symptr; + + while (--len && group) group = group->previous; + from = group ? strbeg + group->strbeg : strbeg; + } + sv = from <= s ? + newSVuv( u8 ? (UV) utf8_length(from, s) : (UV) (s-from)) : + newSViv(-(u8 ? (IV) utf8_length(s, from) : (IV) (from-s))); + XPUSHs(sv_2mortal(sv)); + break; + } +#ifdef PERL_PACK_CAN_SHRIEKSIGN + case '@' | TYPE_IS_SHRIEKING: +#endif case '@': - if (utf8) { - s = strrelbeg; + s = strbeg + symptr->strbeg; +#ifdef PERL_PACK_CAN_SHRIEKSIGN + if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) +#else /* PERL_PACK_CAN_SHRIEKSIGN */ + if (utf8) +#endif + { while (len > 0) { if (s >= strend) Perl_croak(aTHX_ "'@' outside of string in unpack"); @@ -1270,9 +1308,9 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char if (s > strend) Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack"); } else { - if (len > strend - strrelbeg) + if (strend-s < len) Perl_croak(aTHX_ "'@' outside of string in unpack"); - s = strrelbeg + len; + s += len; } break; case 'X' | TYPE_IS_SHRIEKING: @@ -1379,7 +1417,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char !is_utf8_space((U8 *) ptr)) break; if (ptr >= s) ptr += UTF8SKIP(ptr); else ptr++; - if (ptr > s+len) + if (ptr > s+len) Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); } else { for (ptr = s+len-1; ptr >= s; ptr--) @@ -2513,30 +2551,65 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) (int) TYPE_NO_MODIFIERS(datumtype)); case '%': Perl_croak(aTHX_ "'%%' may not be used in pack"); + { + char *from; +#ifdef PERL_PACK_CAN_SHRIEKSIGN + case '.' | TYPE_IS_SHRIEKING: +#endif + case '.': + if (howlen == e_star) from = start; + else if (len == 0) from = cur; + else { + tempsym_t *group = symptr; + + while (--len && group) group = group->previous; + from = group ? start + group->strbeg : start; + } + fromstr = NEXTFROM; + len = SvIV(fromstr); + goto resize; +#ifdef PERL_PACK_CAN_SHRIEKSIGN + case '@' | TYPE_IS_SHRIEKING: +#endif case '@': - if (utf8) { - char *s = start + symptr->strbeg; - while (len > 0 && s < cur) { - s += UTF8SKIP(s); - len--; + from = start + symptr->strbeg; + resize: +#ifdef PERL_PACK_CAN_SHRIEKSIGN + if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) +#else /* PERL_PACK_CAN_SHRIEKSIGN */ + if (utf8) +#endif + if (len >= 0) { + while (len && from < cur) { + from += UTF8SKIP(from); + len--; + } + if (from > cur) + Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); + if (len) { + /* Here we know from == cur */ + grow: + GROWING(0, cat, start, cur, len); + Zero(cur, len, char); + cur += len; + } else if (from < cur) { + len = cur - from; + goto shrink; + } else goto no_change; + } else { + cur = from; + len = -len; + goto utf8_shrink; } - if (s > cur) - Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); - if (len > 0) { - grow: - GROWING(0, cat, start, cur, len); - Zero(cur, len, char); - cur += len; - } else if (s < cur) cur = s; - else goto no_change; - } else { - len -= cur - (start+symptr->strbeg); + else { + len -= cur - from; if (len > 0) goto grow; + if (len == 0) goto no_change; len = -len; - if (len > 0) goto shrink; - else goto no_change; + goto shrink; } break; + } case '(': { tempsym_t savsym = *symptr; U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); @@ -2585,19 +2658,23 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) case 'X': if (utf8) { if (len < 1) goto no_change; + utf8_shrink: while (len > 0) { if (cur <= start) - Perl_croak(aTHX_ "'X' outside of string in pack"); + Perl_croak(aTHX_ "'%c' outside of string in pack", + (int) TYPE_NO_MODIFIERS(datumtype)); while (--cur, UTF8_IS_CONTINUATION(*cur)) { if (cur <= start) - Perl_croak(aTHX_ "'X' outside of string in pack"); + Perl_croak(aTHX_ "'%c' outside of string in pack", + (int) TYPE_NO_MODIFIERS(datumtype)); } len--; } } else { shrink: if (cur - start < len) - Perl_croak(aTHX_ "'X' outside of string in pack"); + Perl_croak(aTHX_ "'%c' outside of string in pack", + (int) TYPE_NO_MODIFIERS(datumtype)); cur -= len; } if (cur < start+symptr->strbeg) { diff --git a/t/op/pack.t b/t/op/pack.t index 08cf811..66d2ee6 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -12,7 +12,7 @@ my $no_endianness = $] > 5.009 ? '' : my $no_signedness = $] > 5.009 ? '' : "Signed/unsigned pack modifiers not available on this perl"; -plan tests => 14627; +plan tests => 14697; use strict; use warnings; @@ -507,7 +507,7 @@ foreach ( ['p', 'Z3', "foo", "fo\0"], ['u', 'Z*', "foo\0bar \0", "foo"], ['u', 'Z8', "foo\0bar \0", "foo"], -) +) { my ($what, $template, $in, $out) = @$_; my $got = $what eq 'u' ? (unpack $template, $in) : (pack $template, $in); @@ -612,7 +612,7 @@ sub numbers_with_total { } if ($calc_sum == $calc_sum - 1 && $calc_sum == $max_p1) { # we're into floating point (either by getting out of the range of - # UV arithmetic, or because we're doing a floating point checksum) + # UV arithmetic, or because we're doing a floating point checksum) # and our calculation of the checksum has become rounded up to # max_checksum + 1 $calc_sum = 0; @@ -858,13 +858,13 @@ SKIP: { ['a/a*/a*', '212ab345678901234567','ab3456789012'], ['a/a*/a*', '3012ab345678901234567', 'ab3456789012'], ['a/a*/b*', '212ab', $Is_EBCDIC ? '100000010100' : '100001100100'], - ) + ) { my ($pat, $in, $expect) = @$_; undef $x; eval { ($x) = unpack $pat, $in }; is($@, ''); - is($x, $expect) || + is($x, $expect) || printf "# list unpack ('$pat', '$in') gave %s, expected '$expect'\n", encode_list ($x); @@ -1000,7 +1000,7 @@ foreach ( ['@4', 'N', "\0"x4], ['a*@8a*', 'Camel', 'Dromedary', "Camel\0\0\0Dromedary"], ['a*@4a', 'Perl rules', '!', 'Perl!'], -) +) { my ($template, @in) = @$_; my $out = pop @in; @@ -1020,7 +1020,7 @@ foreach ( ['@3', "ice"], ['@2a2', "water", "te"], ['a*@1a3', "steam", "steam", "tea"], -) +) { my ($template, $in, @out) = @$_; my @got = eval {unpack $template, $in}; @@ -1205,7 +1205,7 @@ SKIP: { my @a = unpack( '(@1c)((@2c)@3c)', $buf ); is( "@a", "@b" ); - # various unpack count/code scenarios + # various unpack count/code scenarios my @Env = ( a => 'AAA', b => 'BBB' ); my $env = pack( 'S(S/A*S/A*)*', @Env/2, @Env ); @@ -1218,7 +1218,7 @@ SKIP: { # 2 4 5 7 10 1213 eval { @pup = unpack( 'S/(S/A* S/A*)', substr( $env, 0, 13 ) ) }; like( $@, qr{length/code after end of string} ); - + # postfix repeat count $env = pack( '(S/A* S/A*)' . @Env/2, @Env ); @@ -1251,7 +1251,7 @@ SKIP: { eval { my @inf = unpack( 'c/*a', "\x03AAA\x02BB" ); }; like( $@, qr{'/' does not take a repeat count} ); - # white space where possible + # white space where possible my @Env = ( a => 'AAA', b => 'BBB' ); my $env = pack( ' S ( S / A* S / A* )* ', @Env/2, @Env ); my @pup = unpack( ' S / ( S / A* S / A* ) ', $env ); @@ -1280,8 +1280,8 @@ SKIP: { # @ repeat default 1 my $s = pack( 'AA@A', 'A', 'B', 'C' ); my @c = unpack( 'AA@A', $s ); - is( $s, 'AC' ); - is( "@c", "A C C" ); + is( $s, 'AC' ); + is( "@c", "A C C" ); # no unpack code after / eval { my @a = unpack( "C/", "\3" ); }; @@ -1701,11 +1701,11 @@ is(unpack('c'), 65, "one-arg unpack (change #18751)"); # defaulting to $_ is(unpack('@5X!8W', $up), 0xf8, "X! moving on upgraded string"); is(pack("W2x", 0xfa, 0xe3), "\xfa\xe3\x00", "x on downgraded string"); - is(pack("W2x!4", 0xfa, 0xe3), "\xfa\xe3\x00\x00", + is(pack("W2x!4", 0xfa, 0xe3), "\xfa\xe3\x00\x00", "x! on downgraded string"); is(pack("W2x!2", 0xfa, 0xe3), "\xfa\xe3", "x! on downgraded string"); is(pack("U0C0W2x", 0xfa, 0xe3), "\xfa\xe3\x00", "x on upgraded string"); - is(pack("U0C0W2x!4", 0xfa, 0xe3), "\xfa\xe3\x00\x00", + is(pack("U0C0W2x!4", 0xfa, 0xe3), "\xfa\xe3\x00\x00", "x! on upgraded string"); is(pack("U0C0W2x!2", 0xfa, 0xe3), "\xfa\xe3", "x! on upgraded string"); is(pack("W2X", 0xfa, 0xe3), "\xfa", "X on downgraded string"); @@ -1713,13 +1713,13 @@ is(unpack('c'), 65, "one-arg unpack (change #18751)"); # defaulting to $_ is(pack("W2X!2", 0xfa, 0xe3), "\xfa\xe3", "X! on downgraded string"); is(pack("U0C0W2X!2", 0xfa, 0xe3), "\xfa\xe3", "X! on upgraded string"); is(pack("W3X!2", 0xfa, 0xe3, 0xa6), "\xfa\xe3", "X! on downgraded string"); - is(pack("U0C0W3X!2", 0xfa, 0xe3, 0xa6), "\xfa\xe3", + is(pack("U0C0W3X!2", 0xfa, 0xe3, 0xa6), "\xfa\xe3", "X! on upgraded string"); # backward eating through a ( moves the group starting point backwards - is(pack("a*(Xa)", "abc", "q"), "abq", + is(pack("a*(Xa)", "abc", "q"), "abq", "eating before strbeg moves it back"); - is(pack("a*(Xa)", "ab" . chr(512), "q"), "abq", + is(pack("a*(Xa)", "ab" . chr(512), "q"), "abq", "eating before strbeg moves it back"); # Check marked_upgrade @@ -1730,7 +1730,7 @@ is(unpack('c'), 65, "one-arg unpack (change #18751)"); # defaulting to $_ is(pack('W(W(Wa@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, $up, 0xa4, 0xa5, 0xa6), "\xa1\xa2\xa3a\x00\xa4\x00\xa5\x00\xa6", "marked upgrade caused by a"); is(pack('W(W(WW@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, 256, 0xa4, 0xa5, 0xa6), - "\xa1\xa2\xa3\x{100}\x00\xa4\x00\xa5\x00\xa6", + "\xa1\xa2\xa3\x{100}\x00\xa4\x00\xa5\x00\xa6", "marked upgrade caused by W"); is(pack('W(W(WU0aC0@3W)@6W)@9W', 0xa1, 0xa2, 0xa3, "a", 0xa4, 0xa5, 0xa6), "\xa1\xa2\xa3a\x00\xa4\x00\xa5\x00\xa6", "marked upgrade caused by U0"); @@ -1742,11 +1742,11 @@ is(unpack('c'), 65, "one-arg unpack (change #18751)"); # defaulting to $_ utf8::upgrade(my $high = "\xfeb"); for my $format ("a0", "A0", "Z0", "U0a0C0", "U0A0C0", "U0Z0C0") { - is(pack("a* $format a*", "ab", $down, "cd"), "abcd", + is(pack("a* $format a*", "ab", $down, "cd"), "abcd", "$format format on plain string"); is(pack("a* $format a*", "ab", $up, "cd"), "abcd", "$format format on upgraded string"); - is(pack("a* $format a*", $high, $down, "cd"), "\xfebcd", + is(pack("a* $format a*", $high, $down, "cd"), "\xfebcd", "$format format on plain string"); is(pack("a* $format a*", $high, $up, "cd"), "\xfebcd", "$format format on upgraded string"); @@ -1809,9 +1809,9 @@ is(unpack('c'), 65, "one-arg unpack (change #18751)"); # defaulting to $_ } { # unpack("A*", $unicode) strips general unicode spaces - is(unpack("A*", "ab \n\xa0 \0"), "ab \n\xa0", + is(unpack("A*", "ab \n\xa0 \0"), "ab \n\xa0", 'normal A* strip leaves \xa0'); - is(unpack("U0C0A*", "ab \n\xa0 \0"), "ab \n\xa0", + is(unpack("U0C0A*", "ab \n\xa0 \0"), "ab \n\xa0", 'normal A* strip leaves \xa0 even if it got upgraded for technical reasons'); is(unpack("A*", pack("a*(U0U)a*", "ab \n", 0xa0, " \0")), "ab", 'upgraded strings A* removes \xa0'); @@ -1822,3 +1822,151 @@ is(unpack('c'), 65, "one-arg unpack (change #18751)"); # defaulting to $_ is(unpack("A*", pack("U", 0x1680)), "", 'upgraded strings A* with nothing left'); } +{ + # Testing unpack . and .! + is(unpack(".", "ABCD"), 0, "offset at start of string is 0"); + is(unpack(".", ""), 0, "offset at start of empty string is 0"); + is(unpack("x3.", "ABCDEF"), 3, "simple offset works"); + is(unpack("x3.", "ABC"), 3, "simple offset at end of string works"); + is(unpack("x3.0", "ABC"), 0, "self offset is 0"); + is(unpack("x3(x2.)", "ABCDEF"), 2, "offset is relative to inner group"); + is(unpack("x3(X2.)", "ABCDEF"), -2, + "negative offset relative to inner group"); + is(unpack("x3(X2.2)", "ABCDEF"), 1, "offset is relative to inner group"); + is(unpack("x3(x2.0)", "ABCDEF"), 0, "self offset in group is still 0"); + is(unpack("x3(x2.2)", "ABCDEF"), 5, "offset counts groups"); + is(unpack("x3(x2.*)", "ABCDEF"), 5, "star offset is relative to start"); + + my $high = chr(8188) x 6; + is(unpack("x3(x2.)", $high), 2, "utf8 offset is relative to inner group"); + is(unpack("x3(X2.)", $high), -2, + "utf8 negative offset relative to inner group"); + is(unpack("x3(X2.2)", $high), 1, "utf8 offset counts groups"); + is(unpack("x3(x2.0)", $high), 0, "utf8 self offset in group is still 0"); + is(unpack("x3(x2.2)", $high), 5, "utf8 offset counts groups"); + is(unpack("x3(x2.*)", $high), 5, "utf8 star offset is relative to start"); + + is(unpack("U0x3(x2.)", $high), 2, + "U0 mode utf8 offset is relative to inner group"); + is(unpack("U0x3(X2.)", $high), -2, + "U0 mode utf8 negative offset relative to inner group"); + is(unpack("U0x3(X2.2)", $high), 1, + "U0 mode utf8 offset counts groups"); + is(unpack("U0x3(x2.0)", $high), 0, + "U0 mode utf8 self offset in group is still 0"); + is(unpack("U0x3(x2.2)", $high), 5, + "U0 mode utf8 offset counts groups"); + is(unpack("U0x3(x2.*)", $high), 5, + "U0 mode utf8 star offset is relative to start"); + + is(unpack("x3(x2.!)", $high), 2*3, + "utf8 offset is relative to inner group"); + is(unpack("x3(X2.!)", $high), -2*3, + "utf8 negative offset relative to inner group"); + is(unpack("x3(X2.!2)", $high), 1*3, + "utf8 offset counts groups"); + is(unpack("x3(x2.!0)", $high), 0, + "utf8 self offset in group is still 0"); + is(unpack("x3(x2.!2)", $high), 5*3, + "utf8 offset counts groups"); + is(unpack("x3(x2.!*)", $high), 5*3, + "utf8 star offset is relative to start"); + + is(unpack("U0x3(x2.!)", $high), 2, + "U0 mode utf8 offset is relative to inner group"); + is(unpack("U0x3(X2.!)", $high), -2, + "U0 mode utf8 negative offset relative to inner group"); + is(unpack("U0x3(X2.!2)", $high), 1, + "U0 mode utf8 offset counts groups"); + is(unpack("U0x3(x2.!0)", $high), 0, + "U0 mode utf8 self offset in group is still 0"); + is(unpack("U0x3(x2.!2)", $high), 5, + "U0 mode utf8 offset counts groups"); + is(unpack("U0x3(x2.!*)", $high), 5, + "U0 mode utf8 star offset is relative to start"); +} +{ + # Testing pack . and .! + is(pack("(a)5 .", 1..5, 3), "123", ". relative to string start, shorten"); + eval { () = pack("(a)5 .", 1..5, -3) }; + like($@, qr{'\.' outside of string in pack}, "Proper error message"); + is(pack("(a)5 .", 1..5, 8), "12345\x00\x00\x00", + ". relative to string start, extend"); + is(pack("(a)5 .", 1..5, 5), "12345", ". relative to string start, keep"); + + is(pack("(a)5 .0", 1..5, -3), "12", + ". relative to string current, shorten"); + is(pack("(a)5 .0", 1..5, 2), "12345\x00\x00", + ". relative to string current, extend"); + is(pack("(a)5 .0", 1..5, 0), "12345", + ". relative to string current, keep"); + + is(pack("(a)5 (.)", 1..5, -3), "12", + ". relative to group, shorten"); + is(pack("(a)5 (.)", 1..5, 2), "12345\x00\x00", + ". relative to group, extend"); + is(pack("(a)5 (.)", 1..5, 0), "12345", + ". relative to group, keep"); + + is(pack("(a)3 ((a)2 .)", 1..5, -2), "1", + ". relative to group, shorten"); + is(pack("(a)3 ((a)2 .)", 1..5, 2), "12345", + ". relative to group, keep"); + is(pack("(a)3 ((a)2 .)", 1..5, 4), "12345\x00\x00", + ". relative to group, extend"); + + is(pack("(a)3 ((a)2 .2)", 1..5, 2), "12", + ". relative to counted group, shorten"); + is(pack("(a)3 ((a)2 .2)", 1..5, 7), "12345\x00\x00", + ". relative to counted group, extend"); + is(pack("(a)3 ((a)2 .2)", 1..5, 5), "12345", + ". relative to counted group, keep"); + + is(pack("(a)3 ((a)2 .*)", 1..5, 2), "12", + ". relative to start, shorten"); + is(pack("(a)3 ((a)2 .*)", 1..5, 7), "12345\x00\x00", + ". relative to start, extend"); + is(pack("(a)3 ((a)2 .*)", 1..5, 5), "12345", + ". relative to start, keep"); + + is(pack('(a)5 (. @2 a)', 1..5, -3, "a"), "12\x00\x00a", + ". based shrink properly updates group starts"); + + is(pack("(W)3 ((W)2 .)", 0x301..0x305, -2), "\x{301}", + "utf8 . relative to group, shorten"); + is(pack("(W)3 ((W)2 .)", 0x301..0x305, 2), + "\x{301}\x{302}\x{303}\x{304}\x{305}", + "utf8 . relative to group, keep"); + is(pack("(W)3 ((W)2 .)", 0x301..0x305, 4), + "\x{301}\x{302}\x{303}\x{304}\x{305}\x00\x00", + "utf8 . relative to group, extend"); + + is(pack("(W)3 ((W)2 .!)", 0x301..0x305, -2), "\x{301}\x{302}", + "utf8 . relative to group, shorten"); + is(pack("(W)3 ((W)2 .!)", 0x301..0x305, 4), + "\x{301}\x{302}\x{303}\x{304}\x{305}", + "utf8 . relative to group, keep"); + is(pack("(W)3 ((W)2 .!)", 0x301..0x305, 6), + "\x{301}\x{302}\x{303}\x{304}\x{305}\x00\x00", + "utf8 . relative to group, extend"); + + is(pack('(W)5 (. @2 a)', 0x301..0x305, -3, "a"), + "\x{301}\x{302}\x00\x00a", + "utf8 . based shrink properly updates group starts"); +} +{ + # Testing @! + is(pack('a* @3', "abcde"), "abc", 'Test basic @'); + is(pack('a* @!3', "abcde"), "abc", 'Test basic @!'); + is(pack('a* @2', "\x{301}\x{302}\x{303}\x{304}\x{305}"), "\x{301}\x{302}", + 'Test basic utf8 @'); + is(pack('a* @!2', "\x{301}\x{302}\x{303}\x{304}\x{305}"), "\x{301}", + 'Test basic utf8 @!'); + + is(unpack('@4 a*', "abcde"), "e", 'Test basic @'); + is(unpack('@!4 a*', "abcde"), "e", 'Test basic @!'); + is(unpack('@4 a*', "\x{301}\x{302}\x{303}\x{304}\x{305}"), "\x{305}", + 'Test basic utf8 @'); + is(unpack('@!4 a*', "\x{301}\x{302}\x{303}\x{304}\x{305}"), + "\x{303}\x{304}\x{305}", 'Test basic utf8 @!'); +}