From: Valeriy E. Ushakov Date: Mon, 16 Jun 1997 03:00:31 +0000 (+0400) Subject: a modified version of suggested patch for pack template 'Z'; added docs X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5a929a98cca1fca196d5fd6d9350568e529e8825;p=p5sagit%2Fp5-mst-13.2.git a modified version of suggested patch for pack template 'Z'; added docs Message-ID: <%lOHpzIuGV@snark.ptc.spbu.ru> Subject: lack of pack/unpack letter with useful symmetry for C null delimited strings p4raw-id: //depot/perl@2846 --- diff --git a/pod/perldelta.pod b/pod/perldelta.pod index da8a24e..efa52de 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -107,7 +107,7 @@ behavior of: remains unchanged. See L. -=item Improved C operator +=head2 Improved C operator The C operator is now evaluated at compile time into a true list instead of being replaced with a run time call to C. This @@ -120,6 +120,11 @@ Thus: now correctly prints "3|a", instead of "2|a". +=head2 pack() format 'Z' supported + +The new format type 'Z' is useful for packing and unpacking null-terminated +strings. See L. + =head1 Supported Platforms =over 4 diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 3b5c5dd..435db65 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -2485,8 +2485,10 @@ returning the string containing the structure. The TEMPLATE is a sequence of characters that give the order and type of values, as follows: + a A string with arbitrary binary data, will be null padded. A An ascii string, will be space padded. - a An ascii string, will be null padded. + Z A null terminated (asciz) string, will be null padded. + b A bit string (ascending bit order, like vec()). B A bit string (descending bit order). h A hex string (low nybble first). @@ -2544,30 +2546,58 @@ follows: X Back up a byte. @ Null fill to absolute position. +The following rules apply: + +=over 8 + +=item * + Each letter may optionally be followed by a number giving a repeat -count. With all types except C<"a">, C<"A">, C<"b">, C<"B">, C<"h">, C<"H">, and C<"P"> the -pack function will gobble up that many values from the LIST. A C<*> for the -repeat count means to use however many items are left. The C<"a"> and C<"A"> -types gobble just one value, but pack it as a string of length count, -padding with nulls or spaces as necessary. (When unpacking, C<"A"> strips -trailing spaces and nulls, but C<"a"> does not.) Likewise, the C<"b"> and C<"B"> -fields pack a string that many bits long. The C<"h"> and C<"H"> fields pack a -string that many nybbles long. The C<"p"> type packs a pointer to a null- -terminated string. You are responsible for ensuring the string is not a -temporary value (which can potentially get deallocated before you get -around to using the packed result). The C<"P"> packs a pointer to a structure -of the size indicated by the length. A NULL pointer is created if the -corresponding value for C<"p"> or C<"P"> is C. -Real numbers (floats and doubles) are -in the native machine format only; due to the multiplicity of floating -formats around, and the lack of a standard "network" representation, no -facility for interchange has been made. This means that packed floating -point data written on one machine may not be readable on another - even if -both use IEEE floating point arithmetic (as the endian-ness of the memory -representation is not part of the IEEE spec). Note that Perl uses doubles -internally for all numeric calculation, and converting from double into -float and thence back to double again will lose precision (i.e., -C) will not in general equal C<$foo>). +count. With all types except C<"a">, C<"A">, C<"Z">, C<"b">, C<"B">, C<"h">, +C<"H">, and C<"P"> the pack function will gobble up that many values from +the LIST. A C<*> for the repeat count means to use however many items are +left. + +=item * + +The C<"a">, C<"A"> and C<"Z"> types gobble just one value, but pack it as a +string of length count, padding with nulls or spaces as necessary. When +unpacking, C<"A"> strips trailing spaces and nulls, C<"Z"> strips everything +after the first null, and C<"a"> returns data verbatim. + +=item * + +Likewise, the C<"b"> and C<"B"> fields pack a string that many bits long. + +=item * + +The C<"h"> and C<"H"> fields pack a string that many nybbles long. + +=item * + +The C<"p"> type packs a pointer to a null-terminated string. You are +responsible for ensuring the string is not a temporary value (which can +potentially get deallocated before you get around to using the packed result). +The C<"P"> type packs a pointer to a structure of the size indicated by the +length. A NULL pointer is created if the corresponding value for C<"p"> or +C<"P"> is C. + +=item * + +Real numbers (floats and doubles) are in the native machine format only; +due to the multiplicity of floating formats around, and the lack of a +standard "network" representation, no facility for interchange has been +made. This means that packed floating point data written on one machine +may not be readable on another - even if both use IEEE floating point +arithmetic (as the endian-ness of the memory representation is not part +of the IEEE spec). + +Note that Perl uses doubles internally for all numeric calculation, and +converting from double into float and thence back to double again will +lose precision (i.e., C) will not in general +equal C<$foo>). + +=back Examples: @@ -2597,11 +2627,18 @@ Examples: $foo = pack("i9pl", gmtime); # a real struct tm (on my system anyway) + $utmp_template = "Z8 Z8 Z16 L"; + $utmp = pack($utmp_template, @utmp1); + # a struct utmp (BSDish) + + @utmp2 = unpack($utmp_template, $utmp); + # "@utmp1" eq "@utmp2" + sub bintodec { unpack("N", pack("B32", substr("0" x 32 . shift, -32))); } -The same template may generally also be used in the unpack function. +The same template may generally also be used in unpack(). =item package @@ -4347,6 +4384,8 @@ The following efficiently counts the number of set bits in a bit vector: $setbits = unpack("%32b*", $selectmask); +See L for more examples. + =item untie VARIABLE Breaks the binding between a variable and a package. (See C.) diff --git a/pp.c b/pp.c index cd8c078..729d1e7 100644 --- a/pp.c +++ b/pp.c @@ -3222,7 +3222,7 @@ PP(pp_unpack) if (gimme != G_ARRAY) { /* arrange to do first one only */ /*SUPPRESS 530*/ for (patend = pat; !isALPHA(*patend) || *patend == 'x'; patend++) ; - if (strchr("aAbBhHP", *patend) || *pat == '%') { + if (strchr("aAZbBhHP", *patend) || *pat == '%') { patend++; while (isDIGIT(*patend) || *patend == '*') patend++; @@ -3280,6 +3280,7 @@ PP(pp_unpack) s += len; break; case 'A': + case 'Z': case 'a': if (len > strend - s) len = strend - s; @@ -3288,12 +3289,19 @@ PP(pp_unpack) sv = NEWSV(35, len); sv_setpvn(sv, s, len); s += len; - if (datumtype == 'A') { + if (datumtype == 'A' || datumtype == 'Z') { aptr = s; /* borrow register */ - s = SvPVX(sv) + len - 1; - while (s >= SvPVX(sv) && (!*s || isSPACE(*s))) - s--; - *++s = '\0'; + if (datumtype == 'Z') { /* 'Z' strips stuff after first null */ + s = SvPVX(sv); + while (*s) + s++; + } + else { /* 'A' strips both nulls and spaces */ + s = SvPVX(sv) + len - 1; + while (s >= SvPVX(sv) && (!*s || isSPACE(*s))) + s--; + *++s = '\0'; + } SvCUR_set(sv, s - SvPVX(sv)); s = aptr; /* unborrow register */ } @@ -4128,6 +4136,7 @@ PP(pp_pack) sv_catpvn(cat, null10, len); break; case 'A': + case 'Z': case 'a': fromstr = NEXTFROM; aptr = SvPV(fromstr, fromlen); diff --git a/t/op/pack.t b/t/op/pack.t index 6b4e634..f2f8582 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -8,7 +8,7 @@ BEGIN { require Config; import Config; } -print "1..60\n"; +print "1..72\n"; $format = "c2 x5 C C x s d i l a6"; # Need the expression in here to force ary[5] to be numeric. This avoids @@ -209,3 +209,42 @@ print "ok ", $test++, "\n"; # binary values of the uuencoded version would not be portable between # character sets. Uuencoding is meant for encoding binary data, not # text data. + +# test the ascii template types (A, a, Z) + +print "not " unless pack('A*', "foo\0bar\0 ") eq "foo\0bar\0 "; +print "ok ", $test++, "\n"; + +print "not " unless pack('A11', "foo\0bar\0 ") eq "foo\0bar\0 "; +print "ok ", $test++, "\n"; + +print "not " unless unpack('A*', "foo\0bar \0") eq "foo\0bar"; +print "ok ", $test++, "\n"; + +print "not " unless unpack('A8', "foo\0bar \0") eq "foo\0bar"; +print "ok ", $test++, "\n"; + +print "not " unless pack('a*', "foo\0bar\0 ") eq "foo\0bar\0 "; +print "ok ", $test++, "\n"; + +print "not " unless pack('a11', "foo\0bar\0 ") eq "foo\0bar\0 \0\0"; +print "ok ", $test++, "\n"; + +print "not " unless unpack('a*', "foo\0bar \0") eq "foo\0bar \0"; +print "ok ", $test++, "\n"; + +print "not " unless unpack('a8', "foo\0bar \0") eq "foo\0bar "; +print "ok ", $test++, "\n"; + +print "not " unless pack('Z*', "foo\0bar\0 ") eq "foo\0bar\0 "; +print "ok ", $test++, "\n"; + +print "not " unless pack('Z11', "foo\0bar\0 ") eq "foo\0bar\0 \0\0"; +print "ok ", $test++, "\n"; + +print "not " unless unpack('Z*', "foo\0bar \0") eq "foo"; +print "ok ", $test++, "\n"; + +print "not " unless unpack('Z8', "foo\0bar \0") eq "foo"; +print "ok ", $test++, "\n"; +