From: Marcus Holland-Moritz Date: Mon, 3 May 2004 20:14:41 +0000 (+0200) Subject: Add byte-order group modifiers to (un)pack templates. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=66c611c54494622936416a3e5713bc7d44ef96ba;p=p5sagit%2Fp5-mst-13.2.git Add byte-order group modifiers to (un)pack templates. Follow-up on: #22734, #22745, #22753, #22754. Subject: Group modifiers in (un)pack templates Message-Id: <20040503201441.1b058e0d@r2d2> p4raw-id: //depot/perl@22780 --- diff --git a/perl.h b/perl.h index 1ee4756..ba7e3dc 100644 --- a/perl.h +++ b/perl.h @@ -3719,11 +3719,12 @@ typedef struct { char* patend; /* one after last char */ char* grpbeg; /* 1st char of ()-group */ char* grpend; /* end of ()-group */ - I32 code; /* template code (!) */ + I32 code; /* template code (!<>) */ I32 length; /* length/repeat count */ howlen_t howlen; /* how length is given */ int level; /* () nesting level */ U32 flags; /* /=4, comma=2, pack=1 */ + /* and group modifiers */ } tempsym_t; #include "thread.h" diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 923d0ab..51d260a 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1035,6 +1035,13 @@ indicates that such a conversion was attempted. upgradability. Upgrading to undef indicates an error in the code calling sv_upgrade. +=item Can't use '%c' in a group with different byte-order in %s + +(F) You attempted to force a different byte-order on a type +that is already inside a group with a byte-order modifier. +For example you cannot force little-endianness on a type that +is inside a big-endian group. + =item Can't use anonymous symbol table for method lookup (F) The internal routine that does method lookup was handed a symbol diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index c7fb1f8..a4e71b7 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -3344,6 +3344,10 @@ which the modifier is valid): < sSiIlLqQ Force little-endian byte-order on the type. jJfFdDpP (The "little end" touches the construct.) +The C> and C> modifiers can also be used on C<()>-groups, +in which case they force a certain byte-order on all components of +that group, including subgroups. + The following rules apply: =over 8 @@ -3557,12 +3561,12 @@ See also L. =item * -All integer and floating point formats as well as C

and C

may -be followed by the C> or C> modifiers to force big- or -little- endian byte-order, respectively. This is especially useful, -since C, C, C and C don't cover signed integers, 64-bit -integers and floating point values. However, there are some things -to keep in mind. +All integer and floating point formats as well as C

and C

and +C<()>-groups may be followed by the C> or C> modifiers +to force big- or little- endian byte-order, respectively. +This is especially useful, since C, C, C and C don't cover +signed integers, 64-bit integers and floating point values. However, +there are some things to keep in mind. Exchanging signed integers between different platforms only works if all platforms store them in the same format. Most platforms store @@ -3581,6 +3585,12 @@ but also very dangerous if you don't know exactly what you're doing. It is definetely not a general way to portably store floating point values. +When using C> or C> on an C<()>-group, this will affect +all types inside the group that accept the byte-order modifiers, +including all subgroups. It will silently be ignored for all other +types. You are not allowed to override the byte-order within a group +that already has a byte-order modifier suffix. + =item * Real numbers (floats and doubles) are in the native machine format only; @@ -3719,6 +3729,8 @@ Examples: # exactly the same $foo = pack('sflags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){ @@ -503,6 +509,12 @@ S_next_symbol(pTHX_ register tempsym_t* symptr ) symptr->flags & FLAG_PACK ? "pack" : "unpack" ); } + /* look for group modifiers to inherit */ + if (TYPE_ENDIANNESS(symptr->flags)) { + if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code))) + inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags); + } + /* look for modifiers */ while (patptr < patend) { const char *allowed; @@ -514,24 +526,32 @@ S_next_symbol(pTHX_ register tempsym_t* symptr ) break; case '>': modifier = TYPE_IS_BIG_ENDIAN; - allowed = "sSiIlLqQjJfFdDpP"; + allowed = ENDIANNESS_ALLOWED_TYPES; break; case '<': modifier = TYPE_IS_LITTLE_ENDIAN; - allowed = "sSiIlLqQjJfFdDpP"; + allowed = ENDIANNESS_ALLOWED_TYPES; break; default: break; } + if (modifier == 0) break; + if (!strchr(allowed, TYPE_NO_MODIFIERS(code))) Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr, allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" ); - if ((code | modifier) == (code | TYPE_IS_BIG_ENDIAN | TYPE_IS_LITTLE_ENDIAN)) + + if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK) Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s", (int) TYPE_NO_MODIFIERS(code), symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) == + TYPE_ENDIANNESS_MASK) + Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s", + *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" ); + if (ckWARN(WARN_UNPACK)) { if (code & modifier) Perl_warner(aTHX_ packWARN(WARN_UNPACK), @@ -539,10 +559,14 @@ S_next_symbol(pTHX_ register tempsym_t* symptr ) *patptr, (int) TYPE_NO_MODIFIERS(code), symptr->flags & FLAG_PACK ? "pack" : "unpack" ); } + code |= modifier; patptr++; } + /* inherit modifiers */ + code |= inherited_modifiers; + /* look for count and/or / */ if (patptr < patend) { if (isDIGIT(*patptr)) { @@ -586,11 +610,11 @@ S_next_symbol(pTHX_ register tempsym_t* symptr ) if (patptr < patend) patptr++; } else { - if( *patptr == '/' ){ + if (*patptr == '/') { symptr->flags |= FLAG_SLASH; patptr++; - if( patptr < patend && - (isDIGIT(*patptr) || *patptr == '*' || *patptr == '[') ) + if (patptr < patend && + (isDIGIT(*patptr) || *patptr == '*' || *patptr == '[')) Perl_croak(aTHX_ "'/' does not take a repeat count in %s", symptr->flags & FLAG_PACK ? "pack" : "unpack" ); } @@ -739,6 +763,8 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c { char *ss = s; /* Move from register */ tempsym_t savsym = *symptr; + U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); + symptr->flags |= group_modifiers; symptr->patend = savsym.grpend; symptr->level++; PUTBACK; @@ -750,6 +776,7 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c } SPAGAIN; s = ss; + symptr->flags &= ~group_modifiers; savsym.flags = symptr->flags; *symptr = savsym; break; @@ -2252,6 +2279,8 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV case '(': { tempsym_t savsym = *symptr; + U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); + symptr->flags |= group_modifiers; symptr->patend = savsym.grpend; symptr->level++; while (len--) { @@ -2260,6 +2289,7 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV if (savsym.howlen == e_star && beglist == endlist) break; /* No way to continue */ } + symptr->flags &= ~group_modifiers; lookahead.flags = symptr->flags; *symptr = savsym; break; diff --git a/t/op/pack.t b/t/op/pack.t index d7a4137..2d4f6a3 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 13576; +plan tests => 13679; use strict; use warnings; @@ -214,10 +214,10 @@ sub list_eq ($$) { for my $mod (qw( ! < > )) { eval { $x = pack "a$mod", 42 }; - like ($@, qr/^'$mod' allowed only after types \w+ in pack/); + like ($@, qr/^'$mod' allowed only after types \S+ in pack/); eval { $x = unpack "a$mod", 'x'x8 }; - like ($@, qr/^'$mod' allowed only after types \w+ in unpack/); + like ($@, qr/^'$mod' allowed only after types \S+ in unpack/); } for my $mod (qw( <> >< !<> !>< >!< <>! >s)> (s(l(sl) (sl>s)< (s(l(sl) }) { + print "# testing pattern '$t'\n"; + eval { ($_) = unpack($t, 'x'x18); }; + like($@, qr/Can't use '[<>]' in a group with different byte-order in unpack/); + eval { $_ = pack($t, (0)x6); }; + like($@, qr/Can't use '[<>]' in a group with different byte-order in pack/); + } + + sub compress_template { + my $t = shift; + for my $mod (qw( < > )) { + $t =~ s/((?:(?:[SILQJFDP]!?$mod|[^SILQJFDP\W]!?)(?:\d+|\*|\[(?:[^]]+)\])?\/?){2,})/ + my $x = $1; $x =~ s!$mod!!g ? "($x)$mod" : $x /ieg; + } + return $t; + } + + is(pack('L', (0x12345678)x2), + pack('(((L1)1)<)(((L)1)1)>1', (0x12345678)x2)); + + my %templates = ( + 's<' => [-42], + 's [-42, -11, 12, 4711], + '(i [-11, -22, -33, 1000000, 1100, 2201, 3302, + -1000000, 32767, -32768, 1, -123456789 ], + '(I!<4(J<2L<)3)5' => [1 .. 65], + 'q [-50000000005, 60000000006], + 'f [3.14159, 111.11, 2222.22], + 'D [1e42, -128, 255, 1e-42], + 'n/a*' => ['/usr/bin/perl'], + 'C/a*S [qw(Just another Perl hacker)], + ); + + for my $tle (sort keys %templates) { + my @d = @{$templates{$tle}}; + my $tbe = $tle; + $tbe =~ y//; + for my $t ($tbe, $tle) { + my $c = compress_template($t); + print "# '$t' -> '$c'\n"; + SKIP: { + my $p1 = eval { pack $t, @d }; + skip "cannot pack '$t' on this perl", 5 if is_valid_error($@); + my $p2 = eval { pack $c, @d }; + is($@, ''); + is($p1, $p2); + s!(/[aAZ])\*!$1!g for $t, $c; + my @u1 = eval { unpack $t, $p1 }; + is($@, ''); + my @u2 = eval { unpack $c, $p2 }; + is($@, ''); + is(join('!', @u1), join('!', @u2)); + } + } + } +} + +{ # from Wolfgang Laun: fix in change #13163 my $s = 'ABC' x 10;