char* patend; /* one after last char */
char* grpbeg; /* 1st char of ()-group */
char* grpend; /* end of ()-group */
- I32 code; /* template code (!) */
+ I32 code; /* template code (!<>) */
I32 length; /* length/repeat count */
howlen_t howlen; /* how length is given */
int level; /* () nesting level */
U32 flags; /* /=4, comma=2, pack=1 */
+ /* and group modifiers */
} tempsym_t;
#include "thread.h"
upgradability. Upgrading to undef indicates an error in the code
calling sv_upgrade.
+=item Can't use '%c' in a group with different byte-order in %s
+
+(F) You attempted to force a different byte-order on a type
+that is already inside a group with a byte-order modifier.
+For example you cannot force little-endianness on a type that
+is inside a big-endian group.
+
=item Can't use anonymous symbol table for method lookup
(F) The internal routine that does method lookup was handed a symbol
< sSiIlLqQ Force little-endian byte-order on the type.
jJfFdDpP (The "little end" touches the construct.)
+The C<E<gt>> and C<E<lt>> modifiers can also be used on C<()>-groups,
+in which case they force a certain byte-order on all components of
+that group, including subgroups.
+
The following rules apply:
=over 8
=item *
-All integer and floating point formats as well as C<p> and C<P> may
-be followed by the C<E<gt>> or C<E<lt>> modifiers to force big- or
-little- endian byte-order, respectively. This is especially useful,
-since C<n>, C<N>, C<v> and C<V> don't cover signed integers, 64-bit
-integers and floating point values. However, there are some things
-to keep in mind.
+All integer and floating point formats as well as C<p> and C<P> and
+C<()>-groups may be followed by the C<E<gt>> or C<E<lt>> modifiers
+to force big- or little- endian byte-order, respectively.
+This is especially useful, since C<n>, C<N>, C<v> and C<V> don't cover
+signed integers, 64-bit integers and floating point values. However,
+there are some things to keep in mind.
Exchanging signed integers between different platforms only works
if all platforms store them in the same format. Most platforms store
It is definetely not a general way to portably store floating point
values.
+When using C<E<gt>> or C<E<lt>> on an C<()>-group, this will affect
+all types inside the group that accept the byte-order modifiers,
+including all subgroups. It will silently be ignored for all other
+types. You are not allowed to override the byte-order within a group
+that already has a byte-order modifier suffix.
+
=item *
Real numbers (floats and doubles) are in the native machine format only;
# exactly the same
$foo = pack('s<l<', -42, 4711);
# pack little-endian 16- and 32-bit signed integers
+ $foo = pack('(sl)<', -42, 4711);
+ # exactly the same
The same template may generally also be used in unpack().
/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
#define MAX_SUB_TEMPLATE_LEVEL 100
-/* flags */
+/* flags (note that type modifiers can also be used as flags!) */
#define FLAG_UNPACK_ONLY_ONE 0x10
#define FLAG_UNPACK_DO_UTF8 0x08
#define FLAG_SLASH 0x04
#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
#endif
+/* type modifiers */
#define TYPE_IS_SHRIEKING 0x100
#define TYPE_IS_BIG_ENDIAN 0x200
#define TYPE_IS_LITTLE_ENDIAN 0x400
#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
+#define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
#define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
+#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
+#define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
+
#define DO_BO_UNPACK(var, type) \
STMT_START { \
- switch (datumtype & TYPE_ENDIANNESS_MASK) { \
+ switch (TYPE_ENDIANNESS(datumtype)) { \
case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
default: break; \
#define DO_BO_PACK(var, type) \
STMT_START { \
- switch (datumtype & TYPE_ENDIANNESS_MASK) { \
+ switch (TYPE_ENDIANNESS(datumtype)) { \
case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
default: break; \
#define DO_BO_UNPACK_PTR(var, type, pre_cast) \
STMT_START { \
- switch (datumtype & TYPE_ENDIANNESS_MASK) { \
+ switch (TYPE_ENDIANNESS(datumtype)) { \
case TYPE_IS_BIG_ENDIAN: \
var = (void *) my_betoh ## type ((pre_cast) var); \
break; \
#define DO_BO_PACK_PTR(var, type, pre_cast) \
STMT_START { \
- switch (datumtype & TYPE_ENDIANNESS_MASK) { \
+ switch (TYPE_ENDIANNESS(datumtype)) { \
case TYPE_IS_BIG_ENDIAN: \
var = (void *) my_htobe ## type ((pre_cast) var); \
break; \
} STMT_END
#define BO_CANT_DOIT(action, type) \
- STMT_START { \
- switch (datumtype & TYPE_ENDIANNESS_MASK) { \
+ STMT_START { \
+ switch (TYPE_ENDIANNESS(datumtype)) { \
case TYPE_IS_BIG_ENDIAN: \
Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
"platform", #action, #type); \
defined(my_htoben) && defined(my_betohn)
# define DO_BO_UNPACK_N(var, type) \
STMT_START { \
- switch (datumtype & TYPE_ENDIANNESS_MASK) { \
+ switch (TYPE_ENDIANNESS(datumtype)) { \
case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
default: break; \
# define DO_BO_PACK_N(var, type) \
STMT_START { \
- switch (datumtype & TYPE_ENDIANNESS_MASK) { \
+ switch (TYPE_ENDIANNESS(datumtype)) { \
case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
default: break; \
} else {
/* We should have found a template code */
I32 code = *patptr++ & 0xFF;
+ U32 inherited_modifiers = 0;
if (code == ','){ /* grandfather in commas but with a warning */
if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
symptr->flags & FLAG_PACK ? "pack" : "unpack" );
}
+ /* look for group modifiers to inherit */
+ if (TYPE_ENDIANNESS(symptr->flags)) {
+ if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
+ inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
+ }
+
/* look for modifiers */
while (patptr < patend) {
const char *allowed;
break;
case '>':
modifier = TYPE_IS_BIG_ENDIAN;
- allowed = "sSiIlLqQjJfFdDpP";
+ allowed = ENDIANNESS_ALLOWED_TYPES;
break;
case '<':
modifier = TYPE_IS_LITTLE_ENDIAN;
- allowed = "sSiIlLqQjJfFdDpP";
+ allowed = ENDIANNESS_ALLOWED_TYPES;
break;
default:
break;
}
+
if (modifier == 0)
break;
+
if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
allowed, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
- if ((code | modifier) == (code | TYPE_IS_BIG_ENDIAN | TYPE_IS_LITTLE_ENDIAN))
+
+ if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
(int) TYPE_NO_MODIFIERS(code),
symptr->flags & FLAG_PACK ? "pack" : "unpack" );
+ else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
+ TYPE_ENDIANNESS_MASK)
+ Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
+ *patptr, symptr->flags & FLAG_PACK ? "pack" : "unpack" );
+
if (ckWARN(WARN_UNPACK)) {
if (code & modifier)
Perl_warner(aTHX_ packWARN(WARN_UNPACK),
*patptr, (int) TYPE_NO_MODIFIERS(code),
symptr->flags & FLAG_PACK ? "pack" : "unpack" );
}
+
code |= modifier;
patptr++;
}
+ /* inherit modifiers */
+ code |= inherited_modifiers;
+
/* look for count and/or / */
if (patptr < patend) {
if (isDIGIT(*patptr)) {
if (patptr < patend)
patptr++;
} else {
- if( *patptr == '/' ){
+ if (*patptr == '/') {
symptr->flags |= FLAG_SLASH;
patptr++;
- if( patptr < patend &&
- (isDIGIT(*patptr) || *patptr == '*' || *patptr == '[') )
+ if (patptr < patend &&
+ (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
symptr->flags & FLAG_PACK ? "pack" : "unpack" );
}
{
char *ss = s; /* Move from register */
tempsym_t savsym = *symptr;
+ U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
+ symptr->flags |= group_modifiers;
symptr->patend = savsym.grpend;
symptr->level++;
PUTBACK;
}
SPAGAIN;
s = ss;
+ symptr->flags &= ~group_modifiers;
savsym.flags = symptr->flags;
*symptr = savsym;
break;
case '(':
{
tempsym_t savsym = *symptr;
+ U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
+ symptr->flags |= group_modifiers;
symptr->patend = savsym.grpend;
symptr->level++;
while (len--) {
if (savsym.howlen == e_star && beglist == endlist)
break; /* No way to continue */
}
+ symptr->flags &= ~group_modifiers;
lookahead.flags = symptr->flags;
*symptr = savsym;
break;
require './test.pl';
}
-plan tests => 13576;
+plan tests => 13679;
use strict;
use warnings;
for my $mod (qw( ! < > )) {
eval { $x = pack "a$mod", 42 };
- like ($@, qr/^'$mod' allowed only after types \w+ in pack/);
+ like ($@, qr/^'$mod' allowed only after types \S+ in pack/);
eval { $x = unpack "a$mod", 'x'x8 };
- like ($@, qr/^'$mod' allowed only after types \w+ in unpack/);
+ like ($@, qr/^'$mod' allowed only after types \S+ in unpack/);
}
for my $mod (qw( <> >< !<> !>< <!> >!< <>! ><! )) {
}
{
+ print "# group modifiers\n";
+
+ for my $t (qw{ (s<)< (sl>s)> (s(l(sl)<l)s)< }) {
+ print "# testing pattern '$t'\n";
+ eval { ($_) = unpack($t, 'x'x18); };
+ is($@, '');
+ eval { $_ = pack($t, (0)x6); };
+ is($@, '');
+ }
+
+ for my $t (qw{ (s<)> (sl>s)< (s(l(sl)<l)s)> }) {
+ print "# testing pattern '$t'\n";
+ eval { ($_) = unpack($t, 'x'x18); };
+ like($@, qr/Can't use '[<>]' in a group with different byte-order in unpack/);
+ eval { $_ = pack($t, (0)x6); };
+ like($@, qr/Can't use '[<>]' in a group with different byte-order in pack/);
+ }
+
+ sub compress_template {
+ my $t = shift;
+ for my $mod (qw( < > )) {
+ $t =~ s/((?:(?:[SILQJFDP]!?$mod|[^SILQJFDP\W]!?)(?:\d+|\*|\[(?:[^]]+)\])?\/?){2,})/
+ my $x = $1; $x =~ s!$mod!!g ? "($x)$mod" : $x /ieg;
+ }
+ return $t;
+ }
+
+ is(pack('L<L>', (0x12345678)x2),
+ pack('(((L1)1)<)(((L)1)1)>1', (0x12345678)x2));
+
+ my %templates = (
+ 's<' => [-42],
+ 's<c2x![S]S<' => [-42, -11, 12, 4711],
+ '(i<j<[s]l<)3' => [-11, -22, -33, 1000000, 1100, 2201, 3302,
+ -1000000, 32767, -32768, 1, -123456789 ],
+ '(I!<4(J<2L<)3)5' => [1 .. 65],
+ 'q<Q<' => [-50000000005, 60000000006],
+ 'f<F<d<' => [3.14159, 111.11, 2222.22],
+ 'D<cCD<' => [1e42, -128, 255, 1e-42],
+ 'n/a*' => ['/usr/bin/perl'],
+ 'C/a*S</A*L</Z*I</a*' => [qw(Just another Perl hacker)],
+ );
+
+ for my $tle (sort keys %templates) {
+ my @d = @{$templates{$tle}};
+ my $tbe = $tle;
+ $tbe =~ y/</>/;
+ for my $t ($tbe, $tle) {
+ my $c = compress_template($t);
+ print "# '$t' -> '$c'\n";
+ SKIP: {
+ my $p1 = eval { pack $t, @d };
+ skip "cannot pack '$t' on this perl", 5 if is_valid_error($@);
+ my $p2 = eval { pack $c, @d };
+ is($@, '');
+ is($p1, $p2);
+ s!(/[aAZ])\*!$1!g for $t, $c;
+ my @u1 = eval { unpack $t, $p1 };
+ is($@, '');
+ my @u2 = eval { unpack $c, $p2 };
+ is($@, '');
+ is(join('!', @u1), join('!', @u2));
+ }
+ }
+ }
+}
+
+{
# from Wolfgang Laun: fix in change #13163
my $s = 'ABC' x 10;