From: Ilya Zakharevich Date: Fri, 22 Feb 2002 03:55:13 +0000 (-0500) Subject: pack with a human face: part ]|[ X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=62f955573e85dc949b6e396624a9434d6c330a5f;p=p5sagit%2Fp5-mst-13.2.git pack with a human face: part ]|[ Message-ID: <20020222035513.A894@math.ohio-state.edu> p4raw-id: //depot/perl@14829 --- diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index dfacad5..e0ca04f 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -3188,7 +3188,11 @@ brackets, as in C. One can replace the numeric repeat count by a template enclosed in brackets; then the packed length of this template in bytes is used as a count. -For example, C skips a long (it skips the number of bytes in a long). +For example, C skips a long (it skips the number of bytes in a long); +the template C<$t X[$t] $t> unpack()s twice what $t unpacks. +If the template in brackets contains alignment commands (such as C), +its packed length is calculated as if the start of the template has the maximal +possible alignment. When used with C, C<*> results in the addition of a trailing null byte (so the packed result will be one longer than the byte C @@ -3411,6 +3415,18 @@ character. =item * +C and C accept C modifier. In this case they act as +alignment commands: they jump forward/back to the closest position +aligned at a multiple of C bytes. For example, to pack() or +unpack() C's C one may need to +use the template C; this assumes that doubles must be +aligned on the double's size. + +For alignment commands C of 0 is equivalent to C of 1; +both result in no-ops. + +=item * + A comment in a TEMPLATE starts with C<#> and goes to the end of line. =item * diff --git a/pp_pack.c b/pp_pack.c index 6d77eab..5d620ee 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -148,6 +148,8 @@ S_group_end(pTHX_ register char *pat, register char *patend, char ender) Perl_croak(aTHX_ "No group ending character `%c' found", ender); } +#define TYPE_IS_SHRIEKING 0x100 + /* Returns the sizeof() struct described by pat */ STATIC I32 S_measure_struct(pTHX_ char *pat, register char *patend) @@ -170,12 +172,16 @@ S_measure_struct(pTHX_ char *pat, register char *patend) natint = 0; #endif if (*pat == '!') { - static const char *natstr = "sSiIlL"; + static const char *natstr = "sSiIlLxX"; if (strchr(natstr, datumtype)) { + if (datumtype == 'x' || datumtype == 'X') { + datumtype |= TYPE_IS_SHRIEKING; + } else { /* XXXX Should be redone similarly! */ #ifdef PERL_NATINT_PACK - natint = 1; + natint = 1; #endif + } pat++; } else @@ -219,14 +225,33 @@ S_measure_struct(pTHX_ char *pat, register char *patend) len = 1; else if (star > 0) /* Star */ Perl_croak(aTHX_ "%s not allowed in length fields", "count *"); + /* XXXX Theoretically, we need to measure many times at different + positions, since the subexpression may contain + alignment commands, but be not of aligned length. + Need to detect this and croak(). */ size = measure_struct(beg, end); break; } + case 'X' | TYPE_IS_SHRIEKING: + /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS. */ + if (!len) /* Avoid division by 0 */ + len = 1; + len = total % len; /* Assumed: the start is aligned. */ + /* FALL THROUGH */ case 'X': size = -1; if (total < len) Perl_croak(aTHX_ "X outside of string"); break; + case 'x' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + star = total % len; /* Assumed: the start is aligned. */ + if (star) /* Other portable ways? */ + len = len - star; + else + len = 0; + /* FALL THROUGH */ case 'x': case 'A': case 'Z': @@ -317,7 +342,7 @@ S_measure_struct(pTHX_ char *pat, register char *patend) STATIC I32 S_find_count(pTHX_ char **ppat, register char *patend, int *star) { - register char *pat = *ppat; + char *pat = *ppat; I32 len; *star = 0; @@ -328,27 +353,22 @@ S_find_count(pTHX_ char **ppat, register char *patend, int *star) *star = 1; len = -1; } - else if (isDIGIT(*pat) || *pat == '[') { - bool brackets = *pat == '['; - - if (brackets) { - ++pat, len = 0; - if (!isDIGIT(*pat)) { - char *end = group_end(pat, patend, ']'); - - *ppat = end + 1; - return measure_struct(pat, end); - } - } - else - len = *pat++ - '0'; + else if (isDIGIT(*pat)) { + len = *pat++ - '0'; while (isDIGIT(*pat)) { len = (len * 10) + (*pat++ - '0'); - if (len < 0) - Perl_croak(aTHX_ "Repeat count in unpack overflows"); + if (len < 0) /* 50% chance of catching... */ + Perl_croak(aTHX_ "Repeat count in pack/unpack overflows"); } - if (brackets && *pat++ != ']') - Perl_croak(aTHX_ "No repeat count ender ] found after digits"); + } + else if (*pat == '[') { + char *end = group_end(++pat, patend, ']'); + + len = 0; + *ppat = end + 1; + if (isDIGIT(*pat)) + return find_count(&pat, end, star); + return measure_struct(pat, end); } else len = *star = -1; @@ -434,12 +454,16 @@ Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char * && (datumtype != '/') ) break; if (*pat == '!') { - static const char natstr[] = "sSiIlL"; + static const char natstr[] = "sSiIlLxX"; if (strchr(natstr, datumtype)) { + if (datumtype == 'x' || datumtype == 'X') { + datumtype |= TYPE_IS_SHRIEKING; + } else { /* XXXX Should be redone similarly! */ #ifdef PERL_NATINT_PACK - natint = 1; + natint = 1; #endif + } pat++; } else @@ -500,11 +524,25 @@ Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char * Perl_croak(aTHX_ "@ outside of string"); s = strbeg + len; break; + case 'X' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + len = (s - strbeg) % len; + /* FALL THROUGH */ case 'X': if (len > s - strbeg) Perl_croak(aTHX_ "X outside of string"); s -= len; break; + case 'x' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + aint = (s - strbeg) % len; + if (aint) /* Other portable ways? */ + len = len - aint; + else + len = 0; + /* FALL THROUGH */ case 'x': if (len > strend - s) Perl_croak(aTHX_ "x outside of string"); @@ -1598,12 +1636,16 @@ Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beg natint = 0; #endif if (*pat == '!') { - static const char natstr[] = "sSiIlL"; + static const char natstr[] = "sSiIlLxX"; if (strchr(natstr, datumtype)) { + if (datumtype == 'x' || datumtype == 'X') { + datumtype |= TYPE_IS_SHRIEKING; + } else { /* XXXX Should be redone similarly! */ #ifdef PERL_NATINT_PACK - natint = 1; + natint = 1; #endif + } pat++; } else @@ -1665,6 +1707,11 @@ Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beg beglist = savebeglist; break; } + case 'X' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + len = (SvCUR(cat)) % len; + /* FALL THROUGH */ case 'X': shrink: if (SvCUR(cat) < len) @@ -1672,6 +1719,15 @@ Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beg SvCUR(cat) -= len; *SvEND(cat) = '\0'; break; + case 'x' | TYPE_IS_SHRIEKING: + if (!len) /* Avoid division by 0 */ + len = 1; + aint = (SvCUR(cat)) % len; + if (aint) /* Other portable ways? */ + len = len - aint; + else + len = 0; + /* FALL THROUGH */ case 'x': grow: while (len >= 10) { diff --git a/t/op/pack.t b/t/op/pack.t index 5984be5..f217934 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 3943; +plan tests => 5179; use strict; use warnings; @@ -791,9 +791,9 @@ foreach ( # print "# junk1=$junk1\n"; my $p = pack $junk1, @list2; my $half = int( (length $p)/2 ); - for my $move ('', "X$half", 'x1', "x$half") { + for my $move ('', "X$half", "X!$half", 'x1', 'x!8', "x$half") { my $junk = "$junk1 $move"; - # print "# junk=$junk list=(@list2)\n"; + # print "# junk='$junk', list=(@list2)\n"; $p = pack "$junk $end", @list2, @end; my @l = unpack "x[$junk] $end", $p; is(scalar @l, scalar @end); @@ -808,3 +808,41 @@ foreach ( # XXXX no spaces are allowed in pack... In pack only before the slash... is(scalar unpack('A /A Z20', pack 'A/A* Z20', 'bcde', 'xxxxx'), 'bcde'); is(scalar unpack('A /A /A Z20', '3004bcde'), 'bcde'); + +{ # X! and x! + my $t = 'C[3] x!8 C[2]'; + my @a = (0x73..0x77); + my $p = pack($t, @a); + is($p, "\x73\x74\x75\0\0\0\0\0\x76\x77"); + my @b = unpack $t, $p; + is(scalar @b, scalar @a); + is("@b", "@a", 'x!8'); + $t = 'x[5] C[6] X!8 C[2]'; + @a = (0x73..0x7a); + $p = pack($t, @a); + is($p, "\0\0\0\0\0\x73\x74\x75\x79\x7a"); + @b = unpack $t, $p; + @a = (0x73..0x75, 0x79, 0x7a, 0x79, 0x7a); + is(scalar @b, scalar @a); + is("@b", "@a"); +} + +{ # struct {char c1; double d; char cc[2];} + my $t = 'C x![d] d C[2]'; + my @a = (173, 1.283476517e-45, 42, 215); + my $p = pack $t, @a; + ok( length $p); + my @b = unpack "$t X[$t] $t", $p; # Extract, step back, extract again + is(scalar @b, 2 * scalar @a); + is("@b", "@a @a"); + + my $warning; + local $SIG{__WARN__} = sub { + $warning = $_[0]; + }; + @b = unpack "x[C] x[$t] X[$t] X[C] $t", "$p\0"; + + is($warning, undef); + is(scalar @b, scalar @a); + is("@b", "@a"); +}