From: Jarkko Hietaniemi Date: Wed, 10 Feb 1999 00:04:52 +0000 (+0200) Subject: support native integers, pack("L_",...) etc. (via private mail) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ef54e1a45e68bbd668c909c97e266f20578d5516;p=p5sagit%2Fp5-mst-13.2.git support native integers, pack("L_",...) etc. (via private mail) Message-Id: <199902092204.AAA29065@alpha.hut.fi> Subject: the "packnative" patch p4raw-id: //depot/perl@2936 --- diff --git a/pod/perldelta.pod b/pod/perldelta.pod index f64a1da..7f944c2 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -144,6 +144,11 @@ Note that the behavior of: is unchanged (it continues to leave the file empty). +=head2 pack() format modifier '_' supported + +The new format type modifer '_' is useful for packing and unpacking +native shorts, ints, and longs. See L. + =head1 Supported Platforms =over 4 diff --git a/pod/perldiag.pod b/pod/perldiag.pod index eb84876..6594e0c 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -51,6 +51,11 @@ no useful value. See L. (F) The "use" keyword is recognized and executed at compile time, and returns no useful value. See L. +=item '_' allowed only after types %s + +(F) The '_' is allowed in pack() and unpack() only after certain types. +See L. + =item % may only be used in unpack (F) You can't pack a string by supplying a checksum, because the diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index d5b631b..07e2361 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -2591,6 +2591,58 @@ C<"P"> is C. =item * +The integer types C<"s">, C<"S">, C<"l">, and C<"L"> may be +immediately followed by a C<"_"> to signify a native short or long--as +you can see from above for example a bare C<"l"> does mean exactly 32 +bits, the native C (as seen by the local C compiler) may be +larger. This is an issue mainly in 64-bit platforms. + +C<"i_"> and C<"I_"> also work but only because of completeness; +they are identical to C<"i"> and C<"I">. + +The actual size (in bytes) of native shorts, ints, and longs on +the platform where Perl was built are available from L: + + use Config; + print $Config{shortsize}, "\n"; + +=item * + +The integer formats C<"s">, C<"S">, C<"i">, C<"I">, C<"l">, and C<"L"> +are inherently non-portable between processors and operating systems +because they obey the native byteorder and endianness. For example a +4-byte integer 0x12345678 (305419896 decimal) be ordered natively +(arranged in and handled by the CPU registers) into bytes as + + 0x12 0x34 0x56 0x78 # big-endian + 0x78 0x56 0x34 0x12 # little-endian + +The names `big-endian' and `little-endian' are joking references to +the classic "Gulliver's Travels" (via the paper "On Holy Wars and a +Plea for Peace" by Danny Cohen, USC/ISI IEN 137, April 1, 1980) and +the egg-eating habits of the lilliputs. + +Some systems may even have weird byte orders such as + + 0x56 0x78 0x12 0x34 + 0x34 0x12 0x78 0x56 + +You can see your system's preference with + + print join(" ", map { sprintf "%#02x", $_ } + unpack("C*",pack("L",0x12345678))), "\n"; + +The actual byteorder on the platform where Perl was built are available +from L: + + use Config; + print $Config{byteorder}, "\n"; + +If you want portable integers use the formats C<"n">, C<"N">, C<"v">, and +"V", their byte endianness and size is known. + +=item * + Real numbers (floats and doubles) are in the native machine format only; due to the multiplicity of floating formats around, and the lack of a standard "network" representation, no facility for interchange has been diff --git a/pp.c b/pp.c index 348cff9..985a3ed 100644 --- a/pp.c +++ b/pp.c @@ -92,11 +92,13 @@ typedef unsigned UBW; # endif # define COPY16(s,p) (*(p) = 0, Copy(s, OFF16(p), SIZE16, char)) # define COPY32(s,p) (*(p) = 0, Copy(s, OFF32(p), SIZE32, char)) +# define COPYNN(s,p,n) (*(p) = 0, Copy(s, (char *)(p), n, char)) # define CAT16(sv,p) sv_catpvn(sv, OFF16(p), SIZE16) # define CAT32(sv,p) sv_catpvn(sv, OFF32(p), SIZE32) #else # define COPY16(s,p) Copy(s, p, SIZE16, char) # define COPY32(s,p) Copy(s, p, SIZE32, char) +# define COPYNN(s,p,n) Copy(s, (char *)(p), n, char) # define CAT16(sv,p) sv_catpvn(sv, (char*)(p), SIZE16) # define CAT32(sv,p) sv_catpvn(sv, (char*)(p), SIZE32) #endif @@ -218,7 +220,8 @@ PP(pp_rv2gv) GvIOp(gv) = (IO *)sv; (void)SvREFCNT_inc(sv); sv = (SV*) gv; - } else if (SvTYPE(sv) != SVt_PVGV) + } + else if (SvTYPE(sv) != SVt_PVGV) DIE("Not a GLOB reference"); } else { @@ -426,7 +429,8 @@ PP(pp_prototype) if (oa & OA_OPTIONAL) { seen_question = 1; str[n++] = ';'; - } else if (seen_question) + } + else if (seen_question) goto set; /* XXXX system, exec */ if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF && (oa & (OA_OPTIONAL - 1)) <= OA_HVREF) { @@ -438,7 +442,8 @@ PP(pp_prototype) } str[n++] = '\0'; ret = sv_2mortal(newSVpv(str, n - 1)); - } else if (code) /* Non-Overridable */ + } + else if (code) /* Non-Overridable */ goto set; else { /* None such */ nonesuch: @@ -932,7 +937,8 @@ PP(pp_divide) (double)I_V(right) == right && (k = I_V(left)/I_V(right))*I_V(right) == I_V(left)) { value = k; - } else { + } + else { value = left / right; } } @@ -2652,10 +2658,12 @@ PP(pp_exists) if (SvTYPE(hv) == SVt_PVHV) { if (hv_exists_ent(hv, tmpsv, 0)) RETPUSHYES; - } else if (SvTYPE(hv) == SVt_PVAV) { + } + else if (SvTYPE(hv) == SVt_PVAV) { if (avhv_exists_ent((AV*)hv, tmpsv, 0)) RETPUSHYES; - } else { + } + else { DIE("Not a HASH reference"); } RETPUSHNO; @@ -2678,7 +2686,8 @@ PP(pp_hslice) if (realhv) { HE *he = hv_fetch_ent(hv, keysv, lval, 0); svp = he ? &HeVAL(he) : 0; - } else { + } + else { svp = avhv_fetch_ent((AV*)hv, keysv, lval, 0); } if (lval) { @@ -3234,6 +3243,8 @@ PP(pp_unpack) register U32 culong; double cdouble; int commas = 0; + int natint; /* native integer */ + int unatint; /* unsigned native integer */ if (gimme != G_ARRAY) { /* arrange to do first one only */ /*SUPPRESS 530*/ @@ -3249,8 +3260,19 @@ PP(pp_unpack) while (pat < patend) { reparse: datumtype = *pat++ & 0xFF; + natint = 0; if (isSPACE(datumtype)) continue; + if (*pat == '_') { + char *natstr = "sSiIlL"; + + if (strchr(natstr, datumtype)) { + natint = 1; + pat++; + } + else + croak("'_' allowed only after types %s", natstr); + } if (pat >= patend) len = 1; else if (*pat == '*') { @@ -3495,66 +3517,108 @@ PP(pp_unpack) } break; case 's': - along = (strend - s) / SIZE16; + along = (strend - s) / (natint ? sizeof(short) : SIZE16); if (len > along) len = along; if (checksum) { - while (len-- > 0) { - COPY16(s, &ashort); - s += SIZE16; - culong += ashort; + if (natint) { + while (len-- > 0) { + COPYNN(s, &ashort, sizeof(short)); + s += sizeof(short); + culong += ashort; + + } + } + else { + while (len-- > 0) { + COPY16(s, &ashort); + s += SIZE16; + culong += ashort; + } } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - while (len-- > 0) { - COPY16(s, &ashort); - s += SIZE16; - sv = NEWSV(38, 0); - sv_setiv(sv, (IV)ashort); - PUSHs(sv_2mortal(sv)); + if (natint) { + while (len-- > 0) { + COPYNN(s, &ashort, sizeof(short)); + s += sizeof(short); + sv = NEWSV(38, 0); + sv_setiv(sv, (IV)ashort); + PUSHs(sv_2mortal(sv)); + } + } + else { + while (len-- > 0) { + COPY16(s, &ashort); + s += SIZE16; + sv = NEWSV(38, 0); + sv_setiv(sv, (IV)ashort); + PUSHs(sv_2mortal(sv)); + } } } break; case 'v': case 'n': case 'S': - along = (strend - s) / SIZE16; + unatint = natint && datumtype == 'S'; + along = (strend - s) / (unatint ? sizeof(unsigned short) : SIZE16); if (len > along) len = along; if (checksum) { - while (len-- > 0) { - COPY16(s, &aushort); - s += SIZE16; + if (unatint) { + while (len-- > 0) { + COPYNN(s, &aushort, sizeof(unsigned short)); + s += sizeof(unsigned short); + culong += aushort; + } + } + else { + while (len-- > 0) { + COPY16(s, &aushort); + s += SIZE16; #ifdef HAS_NTOHS - if (datumtype == 'n') - aushort = PerlSock_ntohs(aushort); + if (datumtype == 'n') + aushort = PerlSock_ntohs(aushort); #endif #ifdef HAS_VTOHS - if (datumtype == 'v') - aushort = vtohs(aushort); + if (datumtype == 'v') + aushort = vtohs(aushort); #endif - culong += aushort; + culong += aushort; + } } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - while (len-- > 0) { - COPY16(s, &aushort); - s += SIZE16; - sv = NEWSV(39, 0); + if (unatint) { + while (len-- > 0) { + COPYNN(s, &aushort, sizeof(unsigned short)); + s += sizeof(unsigned short); + sv = NEWSV(39, 0); + sv_setiv(sv, (IV)aushort); + PUSHs(sv_2mortal(sv)); + } + } + else { + while (len-- > 0) { + COPY16(s, &aushort); + s += SIZE16; + sv = NEWSV(39, 0); #ifdef HAS_NTOHS - if (datumtype == 'n') - aushort = PerlSock_ntohs(aushort); + if (datumtype == 'n') + aushort = PerlSock_ntohs(aushort); #endif #ifdef HAS_VTOHS - if (datumtype == 'v') - aushort = vtohs(aushort); + if (datumtype == 'v') + aushort = vtohs(aushort); #endif - sv_setiv(sv, (IV)aushort); - PUSHs(sv_2mortal(sv)); + sv_setiv(sv, (IV)aushort); + PUSHs(sv_2mortal(sv)); + } } } break; @@ -3629,72 +3693,119 @@ PP(pp_unpack) } break; case 'l': - along = (strend - s) / SIZE32; + along = (strend - s) / (natint ? sizeof(long) : SIZE32); if (len > along) len = along; if (checksum) { - while (len-- > 0) { - COPY32(s, &along); - s += SIZE32; - if (checksum > 32) - cdouble += (double)along; - else - culong += along; + if (natint) { + while (len-- > 0) { + COPYNN(s, &along, sizeof(long)); + s += sizeof(long); + if (checksum > 32) + cdouble += (double)along; + else + culong += along; + } + } + else { + while (len-- > 0) { + COPY32(s, &along); + s += SIZE32; + if (checksum > 32) + cdouble += (double)along; + else + culong += along; + } } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - while (len-- > 0) { - COPY32(s, &along); - s += SIZE32; - sv = NEWSV(42, 0); - sv_setiv(sv, (IV)along); - PUSHs(sv_2mortal(sv)); + if (natint) { + while (len-- > 0) { + COPYNN(s, &along, sizeof(long)); + s += sizeof(long); + sv = NEWSV(42, 0); + sv_setiv(sv, (IV)along); + PUSHs(sv_2mortal(sv)); + } + } + else { + while (len-- > 0) { + COPY32(s, &along); + s += SIZE32; + sv = NEWSV(42, 0); + sv_setiv(sv, (IV)along); + PUSHs(sv_2mortal(sv)); + } } } break; case 'V': case 'N': case 'L': - along = (strend - s) / SIZE32; + unatint = natint && datumtype; + along = (strend - s) / (unatint ? sizeof(unsigned long) : SIZE32); if (len > along) len = along; if (checksum) { - while (len-- > 0) { - COPY32(s, &aulong); - s += SIZE32; + if (unatint) { + while (len-- > 0) { + COPYNN(s, &aulong, sizeof(unsigned long)); + s += sizeof(unsigned long); + if (checksum > 32) + cdouble += (double)aulong; + else + culong += aulong; + } + } + else { + while (len-- > 0) { + COPY32(s, &aulong); + s += SIZE32; #ifdef HAS_NTOHL - if (datumtype == 'N') - aulong = PerlSock_ntohl(aulong); + if (datumtype == 'N') + aulong = PerlSock_ntohl(aulong); #endif #ifdef HAS_VTOHL - if (datumtype == 'V') - aulong = vtohl(aulong); + if (datumtype == 'V') + aulong = vtohl(aulong); #endif - if (checksum > 32) - cdouble += (double)aulong; - else - culong += aulong; + if (checksum > 32) + cdouble += (double)aulong; + else + culong += aulong; + } } } else { EXTEND(SP, len); EXTEND_MORTAL(len); - while (len-- > 0) { - COPY32(s, &aulong); - s += SIZE32; + if (unatint) { + while (len-- > 0) { + COPYNN(s, &aulong, sizeof(unsigned long)); + s += sizeof(unsigned long); + sv = NEWSV(43, 0); + sv_setuv(sv, (UV)aulong); + PUSHs(sv_2mortal(sv)); + } + } + else { + while (len-- > 0) { + COPY32(s, &aulong); + s += SIZE32; #ifdef HAS_NTOHL - if (datumtype == 'N') - aulong = PerlSock_ntohl(aulong); + if (datumtype == 'N') + aulong = PerlSock_ntohl(aulong); #endif #ifdef HAS_VTOHL - if (datumtype == 'V') - aulong = vtohl(aulong); + if (datumtype == 'V') + aulong = vtohl(aulong); #endif - sv = NEWSV(43, 0); - sv_setuv(sv, (UV)aulong); - PUSHs(sv_2mortal(sv)); + sv = NEWSV(43, 0); + sv_setuv(sv, (UV)aulong); + PUSHs(sv_2mortal(sv)); + } } } break; @@ -4099,6 +4210,7 @@ PP(pp_pack) float afloat; double adouble; int commas = 0; + int natint; /* native integer */ items = SP - MARK; MARK++; @@ -4106,8 +4218,19 @@ PP(pp_pack) while (pat < patend) { #define NEXTFROM (items-- > 0 ? *MARK++ : &PL_sv_no) datumtype = *pat++ & 0xFF; + natint = 0; if (isSPACE(datumtype)) continue; + if (*pat == '_') { + char *natstr = "sSiIlL"; + + if (strchr(natstr, datumtype)) { + natint = 1; + pat++; + } + else + croak("'_' allowed only after types %s", natstr); + } if (*pat == '*') { len = strchr("@Xxu", datumtype) ? 0 : items; pat++; @@ -4352,11 +4475,39 @@ PP(pp_pack) } break; case 'S': + if (natint) { + unsigned short aushort; + + while (len-- > 0) { + fromstr = NEXTFROM; + aushort = SvUV(fromstr); + sv_catpvn(cat, (char *)&aushort, sizeof(unsigned short)); + } + } + else { + U16 aushort; + + while (len-- > 0) { + fromstr = NEXTFROM; + aushort = (U16)SvIV(fromstr); + CAT16(cat, &aushort); + } + } + break; case 's': - while (len-- > 0) { - fromstr = NEXTFROM; - ashort = (I16)SvIV(fromstr); - CAT16(cat, &ashort); + if (natint) { + while (len-- > 0) { + fromstr = NEXTFROM; + ashort = SvIV(fromstr); + sv_catpvn(cat, (char *)&ashort, sizeof(short)); + } + } + else { + while (len-- > 0) { + fromstr = NEXTFROM; + ashort = (I16)SvIV(fromstr); + CAT16(cat, &ashort); + } } break; case 'I': @@ -4464,17 +4615,35 @@ PP(pp_pack) } break; case 'L': - while (len-- > 0) { - fromstr = NEXTFROM; - aulong = SvUV(fromstr); - CAT32(cat, &aulong); + if (natint) { + while (len-- > 0) { + fromstr = NEXTFROM; + aulong = SvUV(fromstr); + sv_catpvn(cat, (char *)&aulong, sizeof(unsigned long)); + } + } + else { + while (len-- > 0) { + fromstr = NEXTFROM; + aulong = SvUV(fromstr); + CAT32(cat, &aulong); + } } break; case 'l': - while (len-- > 0) { - fromstr = NEXTFROM; - along = SvIV(fromstr); - CAT32(cat, &along); + if (natint) { + while (len-- > 0) { + fromstr = NEXTFROM; + along = SvIV(fromstr); + sv_catpvn(cat, (char *)&along, sizeof(long)); + } + } + else { + while (len-- > 0) { + fromstr = NEXTFROM; + along = SvIV(fromstr); + CAT32(cat, &along); + } } break; #ifdef HAS_QUAD diff --git a/t/lib/ipc_sysv.t b/t/lib/ipc_sysv.t index 1cbde96..23476e1 100755 --- a/t/lib/ipc_sysv.t +++ b/t/lib/ipc_sysv.t @@ -113,43 +113,19 @@ if($Config{'d_semget'} eq 'define' && print "not " unless length($data); print "ok 9\n"; - my $template; - - # Find the pack/unpack template capable of handling native C shorts. - - if ($Config{shortsize} == 2) { - $template = "s"; - } elsif ($Config{shortsize} == 4) { - $template = "l"; - } elsif ($Config{shortsize} == 8) { - # Try quad last because not supported everywhere. - foreach my $t (qw(i q)) { - # We could trap the unsupported quad template with eval - # but if we get this far we should have quad support anyway. - if (length(pack($t, 0)) == 8) { - $template = $t; - last; - } - } - } - - die "$0: cannot pack native shorts\n" unless defined $template; - - $template .= "*"; - my $nsem = 10; - semctl($sem,0,SETALL,pack($template,(0) x $nsem)) or print "not "; + semctl($sem,0,SETALL,pack("s_*",(0) x $nsem)) or print "not "; print "ok 10\n"; $data = ""; semctl($sem,0,GETALL,$data) or print "not "; print "ok 11\n"; - print "not " unless length($data) == length(pack($template,(0) x $nsem)); + print "not " unless length($data) == length(pack("s_*",(0) x $nsem)); print "ok 12\n"; - my @data = unpack($template,$data); + my @data = unpack("s_*",$data); my $adata = "0" x $nsem; @@ -159,14 +135,14 @@ if($Config{'d_semget'} eq 'define' && my $poke = 2; $data[$poke] = 1; - semctl($sem,0,SETALL,pack($template,@data)) or print "not "; + semctl($sem,0,SETALL,pack("s_*",@data)) or print "not "; print "ok 14\n"; $data = ""; semctl($sem,0,GETALL,$data) or print "not "; print "ok 15\n"; - @data = unpack($template,$data); + @data = unpack("s_*",$data); my $bdata = "0" x $poke . "1" . "0" x ($nsem-$poke-1); diff --git a/t/op/pack.t b/t/op/pack.t index 53b001d..82f2b1c 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -1,14 +1,12 @@ #!./perl -# $RCSfile: pack.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:11 $ - BEGIN { chdir 't' if -d 't'; unshift @INC, '../lib' if -d '../lib'; require Config; import Config; } -print "1..72\n"; +print "1..78\n"; $format = "c2 x5 C C x s d i l a6"; # Need the expression in here to force ary[5] to be numeric. This avoids @@ -166,6 +164,11 @@ foreach my $t (@templates) { # 57..60: uuencode/decode +# Note that first uuencoding known 'text' data and then checking the +# binary values of the uuencoded version would not be portable between +# character sets. Uuencoding is meant for encoding binary data, not +# text data. + $in = pack 'C*', 0 .. 255; # just to be anal, we do some random tr/`/ / @@ -205,12 +208,7 @@ EOUU print "not " unless unpack('u', $uu) eq $in; print "ok ", $test++, "\n"; -# Note that first uuencoding known 'text' data and then checking the -# binary values of the uuencoded version would not be portable between -# character sets. Uuencoding is meant for encoding binary data, not -# text data. - -# test the ascii template types (A, a, Z) +# 61..72: test the ascii template types (A, a, Z) print "not " unless pack('A*', "foo\0bar\0 ") eq "foo\0bar\0 "; print "ok ", $test++, "\n"; @@ -248,3 +246,23 @@ print "ok ", $test++, "\n"; print "not " unless unpack('Z8', "foo\0bar \0") eq "foo"; print "ok ", $test++, "\n"; +# 73..77: packing native shorts/ints/longs + +print "not " unless length(pack("s_", 0)) == $Config{shortsize}; +print "ok ", $test++, "\n"; + +print "not " unless length(pack("i_", 0)) == $Config{intsize}; +print "ok ", $test++, "\n"; + +print "not " unless length(pack("l_", 0)) == $Config{longsize}; +print "ok ", $test++, "\n"; + +print "not " unless length(pack("s_", 0)) <= length(pack("i_", 0)); +print "ok ", $test++, "\n"; + +print "not " unless length(pack("i_", 0)) <= length(pack("l_", 0)); +print "ok ", $test++, "\n"; + +print "not " unless length(pack("i_", 0)) == length(pack("i", 0)); +print "ok ", $test++, "\n"; +