From: Ian Phillipps Date: Fri, 23 Jul 1999 23:35:56 +0000 (+0100) Subject: (Version 2) Extending unpack to deal with counted strings X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=43192e07925c626b8d615aff545160df8bd7c3f8;p=p5sagit%2Fp5-mst-13.2.git (Version 2) Extending unpack to deal with counted strings Message-ID: <19990723233556.B2435@homer.diplex.co.uk> p4raw-id: //depot/perl@3765 --- diff --git a/pod/perldiag.pod b/pod/perldiag.pod index b3265ff..e86bfbf 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -56,6 +56,31 @@ no useful value. See L. (F) The '!' is allowed in pack() and unpack() only after certain types. See L. +=item # cannot take a count + +(F) You had an unpack template indicating a counted-length string, +but you have also specified an explicit size for the string. +See L. + +=item # must be followed by a, A or Z + +(F) You had an unpack template indicating a counted-length string, +which must be followed by one of the letters a, A or Z +to indicate what sort of string is to be unpacked. +See L. + +=item # must be followed by a*, A* or Z* + +(F) You had an pack template indicating a counted-length string, +Currently the only things that can have their length counted are a*, A* or Z*. +See L. + +=item # must follow a numeric type + +(F) You had an unpack template that contained a '#', +but this did not follow some numeric unpack specification. +See L. + =item % may only be used in unpack (F) You can't pack a string by supplying a checksum, because the diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 3e79181..efa7b58 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -2754,6 +2754,35 @@ C<"P"> is C. =item * +The C<"#"> character allows packing and unpacking of strings where the +packed structure contains a byte count followed by the string itself. +You write IC<#>I. + +The I can be any C template letter, +and describes how the length value is packed. +The ones likely to be of most use are integer-packing ones like +C<"n"> (for Java strings), C<"w"> (for ASN.1 or SNMP) +and C<"N"> (for Sun XDR). + +The I must, at present, be C<"A*">, C<"a*"> or C<"Z*">. +For C the length of the string is obtained from the I, +but if you put in the '*' it will be ignored. + + unpack 'C#a', "\04Gurusamy"; gives 'Guru' + unpack 'a3#A* A*', '007 Bond J '; gives (' Bond','J') + pack 'n#a* w#a*','hello,','world'; gives "\000\006hello,\005world" + +The I is not returned explicitly from C. + +Adding a count to the I letter +is unlikely to do anything useful, +unless that letter is C<"A">, C<"a"> or C<"Z">. +Packing with a I of C<"a"> or C<"Z"> +may introduce C<"\000"> characters, +which Perl does not regard as legal in numeric strings. + +=item * + The integer types C<"s">, C<"S">, C<"l">, and C<"L"> may be immediately followed by a C<"!"> to signify native shorts or longs--as you can see from above for example a bare C<"l"> does mean exactly 32 diff --git a/pp.c b/pp.c index c7fd585..69d3795 100644 --- a/pp.c +++ b/pp.c @@ -900,7 +900,7 @@ PP(pp_postinc) PP(pp_postdec) { djSP; dTARGET; - if(SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) + if (SvREADONLY(TOPs) || SvTYPE(TOPs) > SVt_PVLV) Perl_croak(aTHX_ PL_no_modify); sv_setsv(TARG, TOPs); if (SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) && @@ -3386,6 +3386,18 @@ PP(pp_unpack) DIE(aTHX_ "x outside of string"); s += len; break; + case '#': + if (oldsp >= SP) + DIE(aTHX_ "# must follow a numeric type"); + if (*pat != 'a' && *pat != 'A' && *pat != 'Z') + DIE(aTHX_ "# must be followed by a, A or Z"); + datumtype = *pat++; + if (*pat == '*') + pat++; /* ignore '*' for compatibility with pack */ + if (isDIGIT(*pat)) + DIE(aTHX_ "# cannot take a count" ); + len = POPi; + /* drop through */ case 'A': case 'Z': case 'a': @@ -4356,7 +4368,8 @@ PP(pp_pack) MARK++; sv_setpvn(cat, "", 0); while (pat < patend) { -#define NEXTFROM (items-- > 0 ? *MARK++ : &PL_sv_no) + SV *lengthcode = Nullsv; +#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *MARK++ : &PL_sv_no) datumtype = *pat++ & 0xFF; #ifdef PERL_NATINT_PACK natint = 0; @@ -4386,12 +4399,20 @@ PP(pp_pack) } else len = 1; + if (*pat == '#') { + ++pat; + if (*pat != 'a' && *pat != 'A' && *pat != 'Z' || pat[1] != '*') + DIE(aTHX_ "# must be followed by a*, A* or Z*"); + lengthcode = sv_2mortal(newSViv(sv_len(items > 0 + ? *MARK : &PL_sv_no))); + } switch(datumtype) { default: Perl_croak(aTHX_ "Invalid type in pack: '%c'", (int)datumtype); case ',': /* grandfather in commas but with a warning */ if (commas++ == 0 && ckWARN(WARN_UNSAFE)) - Perl_warner(aTHX_ WARN_UNSAFE, "Invalid type in pack: '%c'", (int)datumtype); + Perl_warner(aTHX_ WARN_UNSAFE, + "Invalid type in pack: '%c'", (int)datumtype); break; case '%': DIE(aTHX_ "%% may only be used in unpack"); diff --git a/t/op/pack.t b/t/op/pack.t index 5b72797..082b954 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -6,7 +6,7 @@ BEGIN { require Config; import Config; } -print "1..142\n"; +print "1..148\n"; $format = "c2 x5 C C x s d i l a6"; # Need the expression in here to force ary[5] to be numeric. This avoids @@ -353,3 +353,19 @@ print "ok ", $test++, "\n"; print "not " unless pack("V", 0xdeadbeef) eq "\xef\xbe\xad\xde"; print "ok ", $test++, "\n"; + +# 143..148: # + +my $z; +eval { ($x) = unpack '#a*','hello' }; +print 'not ' unless $@; print "ok $test\n"; $test++; +eval { ($z,$x,$y) = unpack 'a3#A C#a* C#Z', "003ok \003yes\004z\000abc" }; +print $@ eq '' && $z eq 'ok' ? "ok $test\n" : "not ok $test\n"; $test++; +print $@ eq '' && $x eq 'yes' ? "ok $test\n" : "not ok $test\n"; $test++; +print $@ eq '' && $y eq 'z' ? "ok $test\n" : "not ok $test\n"; $test++; + +eval { ($x) = pack '#a*','hello' }; +print 'not ' unless $@; print "ok $test\n"; $test++; +$z = pack 'n#a* w#A*','string','etc'; +print 'not ' unless $z eq "\000\006string\003etc"; print "ok $test\n"; $test++; +