From: Gisle Aas Date: Sat, 12 Nov 2005 13:13:23 +0000 (-0800) Subject: Re: [PATCH] Enhanced Storable::read_magic() X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d4b9b6e4cc25d0e932fd120c48e967f642ccbc07;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH] Enhanced Storable::read_magic() Message-ID: p4raw-id: //depot/perl@26107 --- diff --git a/MANIFEST b/MANIFEST index 6d6231a..24f3d55 100644 --- a/MANIFEST +++ b/MANIFEST @@ -897,6 +897,7 @@ ext/Storable/t/compat06.t See if Storable works ext/Storable/t/croak.t See if Storable works ext/Storable/t/dclone.t See if Storable works ext/Storable/t/downgrade.t See if Storable works +ext/Storable/t/file_magic.t See if file_magic function works ext/Storable/t/forgive.t See if Storable works ext/Storable/t/freeze.t See if Storable works ext/Storable/t/HAS_ATTACH.pm For auto-requiring of modules for STORABLE_attach diff --git a/ext/Storable/MANIFEST b/ext/Storable/MANIFEST index 8fc574e..64a5764 100644 --- a/ext/Storable/MANIFEST +++ b/ext/Storable/MANIFEST @@ -20,6 +20,7 @@ t/compat06.t See if Storable works t/croak.t See if Storable works t/dclone.t See if Storable works t/downgrade.t See if Storable works +t/file_magic.t See if file_magic function works t/forgive.t See if Storable works t/freeze.t See if Storable works t/integer.t For "use integer" testing diff --git a/ext/Storable/Storable.pm b/ext/Storable/Storable.pm index 1e0f590..dd02fe6 100644 --- a/ext/Storable/Storable.pm +++ b/ext/Storable/Storable.pm @@ -16,6 +16,7 @@ package Storable; @ISA = qw(Exporter DynaLoader); dclone retrieve_fd lock_store lock_nstore lock_retrieve + file_magic read_magic ); use AutoLoader; @@ -113,39 +114,85 @@ sub show_file_magic { EOM } +sub file_magic { + my $file = shift; + open(my $fh, "<", $file) || die "Can't open '$file': $!"; + binmode($fh); + defined(sysread($fh, my $buf, 32)) || die "Can't read from '$file': $!"; + close($fh); + + $file = "./$file" unless $file; # ensure TRUE value + + return read_magic($buf, $file); +} + sub read_magic { - my $header = shift; - return unless defined $header and length $header > 11; - my $result; - if ($header =~ s/^perl-store//) { - die "Can't deal with version 0 headers"; - } elsif ($header =~ s/^pst0//) { - $result->{file} = 1; - } - # Assume it's a string. - my ($major, $minor, $bytelen) = unpack "C3", $header; - - my $net_order = $major & 1; - $major >>= 1; - @$result{qw(major minor netorder)} = ($major, $minor, $net_order); - - return $result if $net_order; - - # I assume that it is rare to find v1 files, so this is an intentionally - # inefficient way of doing it, to make the rest of the code constant. - if ($major < 2) { - delete $result->{minor}; - $header = '.' . $header; - $bytelen = $minor; - } - - @$result{qw(byteorder intsize longsize ptrsize)} = - unpack "x3 A$bytelen C3", $header; - - if ($major >= 2 and $minor >= 2) { - $result->{nvsize} = unpack "x6 x$bytelen C", $header; - } - $result; + my($buf, $file) = @_; + my %info; + + my $buflen = length($buf); + my $magic; + if ($buf =~ s/^(pst0|perl-store)//) { + $magic = $1; + $info{file} = $file || 1; + } + else { + return undef if $file; + $magic = ""; + } + + return undef unless length($buf); + + my $net_order; + if ($magic eq "perl-store" && ord(substr($buf, 0, 1)) > 1) { + $info{version} = -1; + $net_order = 0; + } + else { + $net_order = ord(substr($buf, 0, 1, "")); + my $major = $net_order >> 1; + return undef if $major > 4; # sanity (assuming we never go that high) + $info{major} = $major; + $net_order &= 0x01; + if ($major > 1) { + return undef unless length($buf); + my $minor = ord(substr($buf, 0, 1, "")); + $info{minor} = $minor; + $info{version} = "$major.$minor"; + $info{version_nv} = sprintf "%d.%03d", $major, $minor; + } + else { + $info{version} = $major; + } + } + $info{version_nv} ||= $info{version}; + $info{netorder} = $net_order; + + unless ($net_order) { + return undef unless length($buf); + my $len = ord(substr($buf, 0, 1, "")); + return undef unless length($buf) >= $len; + return undef unless $len == 4 || $len == 8; # sanity + $info{byteorder} = substr($buf, 0, $len, ""); + $info{intsize} = ord(substr($buf, 0, 1, "")); + $info{longsize} = ord(substr($buf, 0, 1, "")); + $info{ptrsize} = ord(substr($buf, 0, 1, "")); + if ($info{version_nv} >= 2.002) { + return undef unless length($buf); + $info{nvsize} = ord(substr($buf, 0, 1, "")); + } + } + $info{hdrsize} = $buflen - length($buf); + + return \%info; +} + +sub BIN_VERSION_NV { + sprintf "%d.%03d", BIN_MAJOR(), BIN_MINOR(); +} + +sub BIN_WRITE_VERSION_NV { + sprintf "%d.%03d", BIN_MAJOR(), BIN_WRITE_MINOR(); } # @@ -820,6 +867,107 @@ implementation of the C utility, version 3.38 or later, is expected to contain support for recognising Storable files out-of-the-box, in addition to other kinds of Perl files. +You can also use the following functions to extract the file header +information from Storable images: + +=over + +=item $info = Storable::file_magic( $filename ) + +If the given file is a Storable image return a hash describing it. If +the file is readable, but not a Storable image return C. If +the file does not exist or is unreadable then croak. + +The hash returned has the following elements: + +=over + +=item C + +This returns the file format version. It is a string like "2.7". + +Note that this version number is not the same as the version number of +the Storable module itself. For instance Storable v0.7 create files +in format v2.0 and Storable v2.15 create files in format v2.7. The +file format version number only increment when additional features +that would confuse older versions of the module are added. + +Files older than v2.0 will have the one of the version numbers "-1", +"0" or "1". No minor number was used at that time. + +=item C + +This returns the file format version as number. It is a string like +"2.007". This value is suitable for numeric comparisons. + +The constant function C returns a comparable +number that represent the highest file version number that this +version of Storable fully support (but see discussion of +C<$Storable::accept_future_minor> above). The constant +C function returns what file version +is written and might be less than C in some +configuations. + +=item C, C + +This also returns the file format version. If the version is "2.7" +then major would be 2 and minor would be 7. The minor element is +missing for when major is less than 2. + +=item C + +The is the number of bytes that the Storable header occupies. + +=item C + +This is TRUE if the image store data in network order. This means +that it was created with nstore() or similar. + +=item C + +This is only present when C is FALSE. It is the +$Config{byteorder} string of the perl that created this image. It is +a string like "1234" (32 bit little endian) or "87654321" (64 bit big +endian). This must match the current perl for the image to be +readable by Storable. + +=item C, C, C, C + +These are only present when C is FALSE. These are the sizes of +various C datatypes of the perl that created this image. These must +match the current perl for the image to be readable by Storable. + +The C element is only present for file format v2.2 and +higher. + +=item C + +The name of the file. + +=back + +=item $info = Storable::read_magic( $buffer ) + +=item $info = Storable::read_magic( $buffer, $must_be_file ) + +The $buffer should be a Storable image or the first few bytes of it. +If $buffer starts with a Storable header, then a hash describing the +image is returned, otherwise C is returned. + +The hash has the same structure as the one returned by +Storable::file_magic(). The C element is true if the image is a +file image. + +If the $must_be_file argument is provided and is TRUE, then return +C unless the image looks like it belongs to a file dump. + +The maximum size of a Storable header is currently 21 bytes. If the +provided $buffer is only the first part of a Storable image it should +at least be this long to ensure that read_magic() will recognize it as +such. + +=back + =head1 EXAMPLES Here are some code samples showing a possible usage of Storable: diff --git a/ext/Storable/Storable.xs b/ext/Storable/Storable.xs index a2e2d5a..cd3a41a 100644 --- a/ext/Storable/Storable.xs +++ b/ext/Storable/Storable.xs @@ -6342,6 +6342,11 @@ MODULE = Storable PACKAGE = Storable PROTOTYPES: ENABLE BOOT: + HV *stash = gv_stashpvn("Storable", 8, TRUE); + newCONSTSUB(stash, "BIN_MAJOR", newSViv(STORABLE_BIN_MAJOR)); + newCONSTSUB(stash, "BIN_MINOR", newSViv(STORABLE_BIN_MINOR)); + newCONSTSUB(stash, "BIN_WRITE_MINOR", newSViv(STORABLE_BIN_WRITE_MINOR)); + init_perinterp(aTHX); gv_fetchpv("Storable::drop_utf8", GV_ADDMULTI, SVt_PV); #ifdef DEBUGME diff --git a/ext/Storable/t/file_magic.t b/ext/Storable/t/file_magic.t new file mode 100644 index 0000000..5b4be82 --- /dev/null +++ b/ext/Storable/t/file_magic.t @@ -0,0 +1,446 @@ +#!perl -w + +use strict; +use Test::More; +use Storable qw(store nstore); +use Config qw(%Config); + +# The @tests array below was create by the following program +my $dummy = <<'EOT'; +use Storable; +use Data::Dump qw(dump); + +print "my \@tests = (\n"; +for my $f () { + print " [\n"; + print " " . dump(substr(`cat $f`, 0, 32) . "...") , ",\n"; + + my $x = dump(Storable::file_magic($f)); + $x =~ s/^/ /gm; + print "$x,\n"; + + print " ],\n"; +} +print ");\n"; +EOT + +my @tests = ( + [ + "perl-store\x041234\4\4\4\xD4\xC2\32\b\3\13\0\0\0v\b\xC5\32\b...", + { + byteorder => 1234, + file => "data_perl-5.006001_i686-linux-thread-multi_Storable-0.1.le32", + hdrsize => 18, + intsize => 4, + longsize => 4, + netorder => 0, + ptrsize => 4, + version => -1, + version_nv => -1, + }, + ], + [ + "perl-store\0\x041234\4\4\4\x8Co\34\b\3\13\0\0\0v\x94v\34...", + { + byteorder => 1234, + file => "data_perl-5.006001_i686-linux-thread-multi_Storable-0.4_07.le32", + hdrsize => 19, + intsize => 4, + longsize => 4, + major => 0, + netorder => 0, + ptrsize => 4, + version => 0, + version_nv => 0, + }, + ], + [ + "perl-store\1\x8Co\34\b\3\0\0\0\13v\x94v\34\b\1\0\0\4\0\0\0...", + { + file => "data_perl-5.006001_i686-linux-thread-multi_Storable-0.4_07.neutral", + hdrsize => 11, + major => 0, + netorder => 1, + version => 0, + version_nv => 0, + }, + ], + [ + "pst0\2\x041234\4\4\4\3\13\0\0\0\1\0\4\0\0\0\0\0\0\0\0\0\0\0...", + { + byteorder => 1234, + file => "data_perl-5.006001_i686-linux-thread-multi_Storable-0.604.le32", + hdrsize => 13, + intsize => 4, + longsize => 4, + major => 1, + netorder => 0, + ptrsize => 4, + version => 1, + version_nv => 1, + }, + ], + [ + "pst0\3\3\0\0\0\13\1\0\0\4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0...", + { + file => "data_perl-5.006001_i686-linux-thread-multi_Storable-0.604.neutral", + hdrsize => 5, + major => 1, + netorder => 1, + version => 1, + version_nv => 1, + }, + ], + [ + "pst0\4\0\x041234\4\4\4\3\13\0\0\0\1\0\4\0\0\0\0\0\0\0\0\0\0...", + { + byteorder => 1234, + file => "data_perl-5.006001_i686-linux-thread-multi_Storable-0.700.le32", + hdrsize => 14, + intsize => 4, + longsize => 4, + major => 2, + minor => 0, + netorder => 0, + ptrsize => 4, + version => "2.0", + version_nv => "2.000", + }, + ], + [ + "pst0\5\0\3\0\0\0\13\1\0\0\4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0...", + { + file => "data_perl-5.006001_i686-linux-thread-multi_Storable-0.700.neutral", + hdrsize => 6, + major => 2, + minor => 0, + netorder => 1, + version => "2.0", + version_nv => "2.000", + }, + ], + [ + "pst0\4\4\x041234\4\4\4\b\3\13\0\0\0\1\0\4\0\0\0\0\0\0\0\0\0...", + { + byteorder => 1234, + file => "data_perl-5.006001_i686-linux-thread-multi_Storable-1.012.le32", + hdrsize => 15, + intsize => 4, + longsize => 4, + major => 2, + minor => 4, + netorder => 0, + nvsize => 8, + ptrsize => 4, + version => "2.4", + version_nv => "2.004", + }, + ], + [ + "pst0\4\3\x044321\4\4\4\b\3\0\0\0\13\1\0\0\4\0\0\0\0\0\0\0\0...", + { + byteorder => 4321, + file => "data_perl-5.006001_IA64.ARCHREV_0-thread-multi_Storable-1.006.be32", + hdrsize => 15, + intsize => 4, + longsize => 4, + major => 2, + minor => 3, + netorder => 0, + nvsize => 8, + ptrsize => 4, + version => "2.3", + version_nv => "2.003", + }, + ], + [ + "pst0\5\3\3\0\0\0\13\1\0\0\4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0...", + { + file => "data_perl-5.006001_IA64.ARCHREV_0-thread-multi_Storable-1.006.neutral", + hdrsize => 6, + major => 2, + minor => 3, + netorder => 1, + version => "2.3", + version_nv => "2.003", + }, + ], + [ + "pst0\4\4\x044321\4\4\4\b\3\0\0\0\13\1\0\0\4\0\0\0\0\0\0\0\0...", + { + byteorder => 4321, + file => "data_perl-5.006001_IA64.ARCHREV_0-thread-multi_Storable-1.012.be32", + hdrsize => 15, + intsize => 4, + longsize => 4, + major => 2, + minor => 4, + netorder => 0, + nvsize => 8, + ptrsize => 4, + version => "2.4", + version_nv => "2.004", + }, + ], + [ + "pst0\5\4\3\0\0\0\13\1\0\0\4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0...", + { + file => "data_perl-5.006001_IA64.ARCHREV_0-thread-multi_Storable-1.012.neutral", + hdrsize => 6, + major => 2, + minor => 4, + netorder => 1, + version => "2.4", + version_nv => "2.004", + }, + ], + [ + "pst0\4\6\x044321\4\4\4\b\3\0\0\0\13\n\n4294967296...", + { + byteorder => 4321, + file => "data_perl-5.008001_darwin-thread-multi-2level_Storable-2.08.be32", + hdrsize => 15, + intsize => 4, + longsize => 4, + major => 2, + minor => 6, + netorder => 0, + nvsize => 8, + ptrsize => 4, + version => "2.6", + version_nv => "2.006", + }, + ], + [ + "pst0\5\6\3\0\0\0\13\n\n4294967296\0\0\0\bfour_...", + { + file => "data_perl-5.008001_darwin-thread-multi-2level_Storable-2.08.neutral", + hdrsize => 6, + major => 2, + minor => 6, + netorder => 1, + version => "2.6", + version_nv => "2.006", + }, + ], + [ + "pst0\4\6\x044321\4\4\4\b\3\0\0\0\13\4\3\0\0\0\0\0\0\0\nem...", + { + byteorder => 4321, + file => "data_perl-5.008003_PA-RISC1.1-thread-multi_Storable-2.09.be32", + hdrsize => 15, + intsize => 4, + longsize => 4, + major => 2, + minor => 6, + netorder => 0, + nvsize => 8, + ptrsize => 4, + version => "2.6", + version_nv => "2.006", + }, + ], + [ + "pst0\5\6\3\0\0\0\13\4\3\0\0\0\0\0\0\0\nempty_hash\n...", + { + file => "data_perl-5.008003_PA-RISC1.1-thread-multi_Storable-2.09.neutral", + hdrsize => 6, + major => 2, + minor => 6, + netorder => 1, + version => "2.6", + version_nv => "2.006", + }, + ], + [ + "pst0\4\6\b12345678\4\4\4\b\3\13\0\0\0\4\3\0\0\0\0\n\0...", + { + byteorder => 12_345_678, + file => "data_perl-5.008004_i86pc-solaris-64int_Storable-2.12.le64", + hdrsize => 19, + intsize => 4, + longsize => 4, + major => 2, + minor => 6, + netorder => 0, + nvsize => 8, + ptrsize => 4, + version => "2.6", + version_nv => "2.006", + }, + ], + [ + "pst0\4\6\x041234\4\4\4\b\3\13\0\0\0\4\3\0\0\0\0\n\0\0\0em...", + { + byteorder => 1234, + file => "data_perl-5.008006_i686-linux-thread-multi_Storable-2.13.le32", + hdrsize => 15, + intsize => 4, + longsize => 4, + major => 2, + minor => 6, + netorder => 0, + nvsize => 8, + ptrsize => 4, + version => "2.6", + version_nv => "2.006", + }, + ], + [ + "pst0\4\6\b87654321\4\b\b\b\3\0\0\0\13\4\3\0\0\0\0\0\0...", + { + byteorder => 87_654_321, + file => "data_perl-5.008007_IA64.ARCHREV_0-thread-multi-LP64_Storable-2.13.be64", + hdrsize => 19, + intsize => 4, + longsize => 8, + major => 2, + minor => 6, + netorder => 0, + nvsize => 8, + ptrsize => 8, + version => "2.6", + version_nv => "2.006", + }, + ], + [ + "pst0\4\a\b12345678\4\b\b\b\3\13\0\0\0\4\3\0\0\0\0\n\0...", + { + byteorder => 12_345_678, + file => "data_perl-5.008007_x86-solaris-thread-multi-64_Storable-2.15.le64", + hdrsize => 19, + intsize => 4, + longsize => 8, + major => 2, + minor => 7, + netorder => 0, + nvsize => 8, + ptrsize => 8, + version => "2.7", + version_nv => "2.007", + }, + ], + [ + "pst0\5\a\3\0\0\0\13\4\3\0\0\0\0\0\0\0\nempty_hash\n...", + { + file => "data_perl-5.008007_x86-solaris-thread-multi-64_Storable-2.15.neutral", + hdrsize => 6, + major => 2, + minor => 7, + netorder => 1, + version => "2.7", + version_nv => "2.007", + }, + ], + [ + "pst0\4\5\x041234\4\4\4\b\3\13\0\0\0\4\3\0\0\0\0\n\0\0\0em...", + { + byteorder => 1234, + file => "data_perl-5.008_i686-linux-thread-multi_Storable-2.04.le32", + hdrsize => 15, + intsize => 4, + longsize => 4, + major => 2, + minor => 5, + netorder => 0, + nvsize => 8, + ptrsize => 4, + version => "2.5", + version_nv => "2.005", + }, + ], + [ + "pst0\5\5\3\0\0\0\13\4\3\0\0\0\0\0\0\0\nempty_hash\n...", + { + file => "data_perl-5.008_i686-linux-thread-multi_Storable-2.04.neutral", + hdrsize => 6, + major => 2, + minor => 5, + netorder => 1, + version => "2.5", + version_nv => "2.005", + }, + ], + [ + "pst0\4\a\x041234\4\4\4\b\3\13\0\0\0\4\3\0\0\0\0\n\0\0\0em...", + { + byteorder => 1234, + file => "data_perl-5.009003_i686-linux_Storable-2.15.le32", + hdrsize => 15, + intsize => 4, + longsize => 4, + major => 2, + minor => 7, + netorder => 0, + nvsize => 8, + ptrsize => 4, + version => "2.7", + version_nv => "2.007", + }, + ], +); + +plan tests => 31 + 2 * @tests; + +my $file = "xx-$$.pst"; + +is(eval { Storable::file_magic($file) }, undef, "empty file give undef"); +like($@, qr/^Can't open '\Q$file\E':/, "...and croaks"); +is(Storable::file_magic(__FILE__), undef, "not an image"); + +store({}, $file); +{ + my $info = Storable::file_magic($file); + unlink($file); + ok($info, "got info"); + is($info->{file}, $file, "file set"); + is($info->{hdrsize}, 11 + length($Config{byteorder}), "hdrsize"); + like($info->{version}, qr/^2\.\d+$/, "sane version"); + is($info->{version_nv}, Storable::BIN_WRITE_VERSION_NV, "version_nv match"); + is($info->{major}, 2, "sane major"); + ok($info->{minor}, "have minor"); + ok($info->{minor} >= Storable::BIN_WRITE_MINOR, "large enough minor"); + + ok(!$info->{netorder}, "no netorder"); + for (qw(byteorder intsize longsize ptrsize nvsize)) { + is($info->{$_}, $Config{$_}, "$_ match Config"); + } +} + +nstore({}, $file); +{ + my $info = Storable::file_magic($file); + unlink($file); + ok($info, "got info"); + is($info->{file}, $file, "file set"); + is($info->{hdrsize}, 6, "hdrsize"); + like($info->{version}, qr/^2\.\d+$/, "sane version"); + is($info->{version_nv}, Storable::BIN_WRITE_VERSION_NV, "version_nv match"); + is($info->{major}, 2, "sane major"); + ok($info->{minor}, "have minor"); + ok($info->{minor} >= Storable::BIN_WRITE_MINOR, "large enough minor"); + + ok($info->{netorder}, "no netorder"); + for (qw(byteorder intsize longsize ptrsize nvsize)) { + ok(!exists $info->{$_}, "no $_"); + } +} + +for my $test (@tests) { + my($data, $expected) = @$test; + open(FH, ">$file") || die "Can't create $file: $!"; + binmode(FH); + print FH $data; + close(FH) || die "Can't write $file: $!"; + + my $name = $expected->{file}; + $expected->{file} = $file; + + my $info = Storable::file_magic($file); + unlink($file); + + is_deeply($info, $expected, "file_magic $name"); + + $expected->{file} = 1; + is_deeply(Storable::read_magic($data), $expected, "read magic $name"); +}