1 ;# $Id: Storable.pm,v 1.0.1.5 2000/10/26 17:10:18 ram Exp $
3 ;# Copyright (c) 1995-2000, Raphael Manfredi
5 ;# You may redistribute only under the same terms as Perl 5, as specified
6 ;# in the README file that comes with the distribution.
8 ;# $Log: Storable.pm,v $
9 ;# Revision 1.0.1.6 2000/11/05 17:20:25 ram
10 ;# patch6: increased version number
12 ;# Revision 1.0.1.5 2000/10/26 17:10:18 ram
13 ;# patch5: documented that store() and retrieve() can return undef
14 ;# patch5: added paragraph explaining the auto require for thaw hooks
16 ;# Revision 1.0.1.4 2000/10/23 18:02:57 ram
17 ;# patch4: protected calls to flock() for dos platform
18 ;# patch4: added logcarp emulation if they don't have Log::Agent
20 ;# $Log: Storable.pm,v $
21 ;# Revision 1.0 2000/09/01 19:40:41 ram
22 ;# Baseline for first official release.
27 package Storable; @ISA = qw(Exporter DynaLoader);
29 @EXPORT = qw(store retrieve);
31 nstore store_fd nstore_fd fd_retrieve
35 lock_store lock_nstore lock_retrieve
39 use vars qw($forgive_me $VERSION);
42 *AUTOLOAD = \&AutoLoader::AUTOLOAD; # Grrr...
45 # Use of Log::Agent is optional
48 eval "use Log::Agent";
50 unless (defined @Log::Agent::EXPORT) {
64 # They might miss :flock in Fcntl
69 if (exists $Fcntl::EXPORT_TAGS{'flock'}) {
70 Fcntl->import(':flock');
82 sub retrieve_fd { &fd_retrieve } # Backward compatibility
91 # Store target object hierarchy, identified by a reference to its root.
92 # The stored object tree may later be retrieved to memory via retrieve.
93 # Returns undef if an I/O error occurred, in which case the file is
97 return _store(\&pstore, @_, 0);
103 # Same as store, but in network order.
106 return _store(\&net_pstore, @_, 0);
112 # Same as store, but flock the file first (advisory locking).
115 return _store(\&pstore, @_, 1);
121 # Same as nstore, but flock the file first (advisory locking).
124 return _store(\&net_pstore, @_, 1);
127 # Internal store to file routine
131 my ($file, $use_locking) = @_;
132 logcroak "not a reference" unless ref($self);
133 logcroak "too many arguments" unless @_ == 2; # No @foo in arglist
135 open(FILE, ">$file") || logcroak "can't create $file: $!";
136 binmode FILE; # Archaic systems...
139 logcarp "Storable::lock_store: fcntl/flock emulation broken on $^O";
142 flock(FILE, LOCK_EX) ||
143 logcroak "can't get exclusive lock on $file: $!";
145 # Unlocking will happen when FILE is closed
147 my $da = $@; # Don't mess if called from exception handler
149 # Call C routine nstore or pstore, depending on network order
150 eval { $ret = &$xsptr(*FILE, $self) };
151 close(FILE) or $ret = undef;
152 unlink($file) or warn "Can't unlink $file: $!\n" if $@ || !defined $ret;
153 logcroak $@ if $@ =~ s/\.?\n$/,/;
155 return $ret ? $ret : undef;
161 # Same as store, but perform on an already opened file descriptor instead.
162 # Returns undef if an I/O error occurred.
165 return _store_fd(\&pstore, @_);
171 # Same as store_fd, but in network order.
174 my ($self, $file) = @_;
175 return _store_fd(\&net_pstore, @_);
178 # Internal store routine on opened file descriptor
183 logcroak "not a reference" unless ref($self);
184 logcroak "too many arguments" unless @_ == 1; # No @foo in arglist
185 my $fd = fileno($file);
186 logcroak "not a valid file descriptor" unless defined $fd;
187 my $da = $@; # Don't mess if called from exception handler
189 # Call C routine nstore or pstore, depending on network order
190 eval { $ret = &$xsptr($file, $self) };
191 logcroak $@ if $@ =~ s/\.?\n$/,/;
193 return $ret ? $ret : undef;
199 # Store oject and its hierarchy in memory and return a scalar
200 # containing the result.
203 _freeze(\&mstore, @_);
209 # Same as freeze but in network order.
212 _freeze(\&net_mstore, @_);
215 # Internal freeze routine
219 logcroak "not a reference" unless ref($self);
220 logcroak "too many arguments" unless @_ == 0; # No @foo in arglist
221 my $da = $@; # Don't mess if called from exception handler
223 # Call C routine mstore or net_mstore, depending on network order
224 eval { $ret = &$xsptr($self) };
225 logcroak $@ if $@ =~ s/\.?\n$/,/;
227 return $ret ? $ret : undef;
233 # Retrieve object hierarchy from disk, returning a reference to the root
234 # object of that tree.
243 # Same as retrieve, but with advisory locking.
249 # Internal retrieve routine
251 my ($file, $use_locking) = @_;
253 open(FILE, $file) || logcroak "can't open $file: $!";
254 binmode FILE; # Archaic systems...
256 my $da = $@; # Could be from exception handler
259 logcarp "Storable::lock_store: fcntl/flock emulation broken on $^O";
262 flock(FILE, LOCK_SH) || logcroak "can't get shared lock on $file: $!";
263 # Unlocking will happen when FILE is closed
265 eval { $self = pretrieve(*FILE) }; # Call C routine
267 logcroak $@ if $@ =~ s/\.?\n$/,/;
275 # Same as retrieve, but perform from an already opened file descriptor instead.
279 my $fd = fileno($file);
280 logcroak "not a valid file descriptor" unless defined $fd;
282 my $da = $@; # Could be from exception handler
283 eval { $self = pretrieve($file) }; # Call C routine
284 logcroak $@ if $@ =~ s/\.?\n$/,/;
292 # Recreate objects in memory from an existing frozen image created
293 # by freeze. If the frozen image passed is undef, return undef.
297 return undef unless defined $frozen;
299 my $da = $@; # Could be from exception handler
300 eval { $self = mretrieve($frozen) }; # Call C routine
301 logcroak $@ if $@ =~ s/\.?\n$/,/;
308 Storable - persistency for perl data structures
313 store \%table, 'file';
314 $hashref = retrieve('file');
316 use Storable qw(nstore store_fd nstore_fd freeze thaw dclone);
319 nstore \%table, 'file';
320 $hashref = retrieve('file'); # There is NO nretrieve()
322 # Storing to and retrieving from an already opened file
323 store_fd \@array, \*STDOUT;
324 nstore_fd \%table, \*STDOUT;
325 $aryref = fd_retrieve(\*SOCKET);
326 $hashref = fd_retrieve(\*SOCKET);
328 # Serializing to memory
329 $serialized = freeze \%table;
330 %table_clone = %{ thaw($serialized) };
332 # Deep (recursive) cloning
333 $cloneref = dclone($ref);
336 use Storable qw(lock_store lock_nstore lock_retrieve)
337 lock_store \%table, 'file';
338 lock_nstore \%table, 'file';
339 $hashref = lock_retrieve('file');
343 The Storable package brings persistency to your perl data structures
344 containing SCALAR, ARRAY, HASH or REF objects, i.e. anything that can be
345 convenientely stored to disk and retrieved at a later time.
347 It can be used in the regular procedural way by calling C<store> with
348 a reference to the object to be stored, along with the file name where
349 the image should be written.
350 The routine returns C<undef> for I/O problems or other internal error,
351 a true value otherwise. Serious errors are propagated as a C<die> exception.
353 To retrieve data stored to disk, use C<retrieve> with a file name,
354 and the objects stored into that file are recreated into memory for you,
355 a I<reference> to the root object being returned. In case an I/O error
356 occurs while reading, C<undef> is returned instead. Other serious
357 errors are propagated via C<die>.
359 Since storage is performed recursively, you might want to stuff references
360 to objects that share a lot of common data into a single array or hash
361 table, and then store that object. That way, when you retrieve back the
362 whole thing, the objects will continue to share what they originally shared.
364 At the cost of a slight header overhead, you may store to an already
365 opened file descriptor using the C<store_fd> routine, and retrieve
366 from a file via C<fd_retrieve>. Those names aren't imported by default,
367 so you will have to do that explicitely if you need those routines.
368 The file descriptor you supply must be already opened, for read
369 if you're going to retrieve and for write if you wish to store.
371 store_fd(\%table, *STDOUT) || die "can't store to stdout\n";
372 $hashref = fd_retrieve(*STDIN);
374 You can also store data in network order to allow easy sharing across
375 multiple platforms, or when storing on a socket known to be remotely
376 connected. The routines to call have an initial C<n> prefix for I<network>,
377 as in C<nstore> and C<nstore_fd>. At retrieval time, your data will be
378 correctly restored so you don't have to know whether you're restoring
379 from native or network ordered data. Double values are stored stringified
380 to ensure portability as well, at the slight risk of loosing some precision
381 in the last decimals.
383 When using C<fd_retrieve>, objects are retrieved in sequence, one
384 object (i.e. one recursive tree) per associated C<store_fd>.
386 If you're more from the object-oriented camp, you can inherit from
387 Storable and directly store your objects by invoking C<store> as
388 a method. The fact that the root of the to-be-stored tree is a
389 blessed reference (i.e. an object) is special-cased so that the
390 retrieve does not provide a reference to that object but rather the
391 blessed object reference itself. (Otherwise, you'd get a reference
392 to that blessed object).
396 The Storable engine can also store data into a Perl scalar instead, to
397 later retrieve them. This is mainly used to freeze a complex structure in
398 some safe compact memory place (where it can possibly be sent to another
399 process via some IPC, since freezing the structure also serializes it in
400 effect). Later on, and maybe somewhere else, you can thaw the Perl scalar
401 out and recreate the original complex structure in memory.
403 Surprisingly, the routines to be called are named C<freeze> and C<thaw>.
404 If you wish to send out the frozen scalar to another machine, use
405 C<nfreeze> instead to get a portable image.
407 Note that freezing an object structure and immediately thawing it
408 actually achieves a deep cloning of that structure:
410 dclone(.) = thaw(freeze(.))
412 Storable provides you with a C<dclone> interface which does not create
413 that intermediary scalar but instead freezes the structure in some
414 internal memory space and then immediatly thaws it out.
416 =head1 ADVISORY LOCKING
418 The C<lock_store> and C<lock_nstore> routine are equivalent to C<store>
419 and C<nstore>, only they get an exclusive lock on the file before
420 writing. Likewise, C<lock_retrieve> performs as C<retrieve>, but also
421 gets a shared lock on the file before reading.
423 Like with any advisory locking scheme, the protection only works if
424 you systematically use C<lock_store> and C<lock_retrieve>. If one
425 side of your application uses C<store> whilst the other uses C<lock_retrieve>,
426 you will get no protection at all.
428 The internal advisory locking is implemented using Perl's flock() routine.
429 If your system does not support any form of flock(), or if you share
430 your files across NFS, you might wish to use other forms of locking by
431 using modules like LockFile::Simple which lock a file using a filesystem
432 entry, instead of locking the file descriptor.
436 The heart of Storable is written in C for decent speed. Extra low-level
437 optimization have been made when manipulating perl internals, to
438 sacrifice encapsulation for the benefit of a greater speed.
440 =head1 CANONICAL REPRESENTATION
442 Normally Storable stores elements of hashes in the order they are
443 stored internally by Perl, i.e. pseudo-randomly. If you set
444 C<$Storable::canonical> to some C<TRUE> value, Storable will store
445 hashes with the elements sorted by their key. This allows you to
446 compare data structures by comparing their frozen representations (or
447 even the compressed frozen representations), which can be useful for
448 creating lookup tables for complicated queries.
450 Canonical order does not imply network order, those are two orthogonal
453 =head1 ERROR REPORTING
455 Storable uses the "exception" paradigm, in that it does not try to workaround
456 failures: if something bad happens, an exception is generated from the
457 caller's perspective (see L<Carp> and C<croak()>). Use eval {} to trap
460 When Storable croaks, it tries to report the error via the C<logcroak()>
461 routine from the C<Log::Agent> package, if it is available.
463 Normal errors are reported by having store() or retrieve() return C<undef>.
464 Such errors are usually I/O errors (or truncated stream errors at retrieval).
470 Any class may define hooks that will be called during the serialization
471 and deserialization process on objects that are instances of that class.
472 Those hooks can redefine the way serialization is performed (and therefore,
473 how the symetrical deserialization should be conducted).
475 Since we said earlier:
477 dclone(.) = thaw(freeze(.))
479 everything we say about hooks should also hold for deep cloning. However,
480 hooks get to know whether the operation is a mere serialization, or a cloning.
482 Therefore, when serializing hooks are involved,
484 dclone(.) <> thaw(freeze(.))
486 Well, you could keep them in sync, but there's no guarantee it will always
487 hold on classes somebody else wrote. Besides, there is little to gain in
488 doing so: a serializing hook could only keep one attribute of an object,
489 which is probably not what should happen during a deep cloning of that
492 Here is the hooking interface:
496 =item C<STORABLE_freeze> I<obj>, I<cloning>
498 The serializing hook, called on the object during serialization. It can be
499 inherited, or defined in the class itself, like any other method.
501 Arguments: I<obj> is the object to serialize, I<cloning> is a flag indicating
502 whether we're in a dclone() or a regular serialization via store() or freeze().
504 Returned value: A LIST C<($serialized, $ref1, $ref2, ...)> where $serialized
505 is the serialized form to be used, and the optional $ref1, $ref2, etc... are
506 extra references that you wish to let the Storable engine serialize.
508 At deserialization time, you will be given back the same LIST, but all the
509 extra references will be pointing into the deserialized structure.
511 The B<first time> the hook is hit in a serialization flow, you may have it
512 return an empty list. That will signal the Storable engine to further
513 discard that hook for this class and to therefore revert to the default
514 serialization of the underlying Perl data. The hook will again be normally
515 processed in the next serialization.
517 Unless you know better, serializing hook should always say:
519 sub STORABLE_freeze {
520 my ($self, $cloning) = @_;
521 return if $cloning; # Regular default serialization
525 in order to keep reasonable dclone() semantics.
527 =item C<STORABLE_thaw> I<obj>, I<cloning>, I<serialized>, ...
529 The deserializing hook called on the object during deserialization.
530 But wait. If we're deserializing, there's no object yet... right?
532 Wrong: the Storable engine creates an empty one for you. If you know Eiffel,
533 you can view C<STORABLE_thaw> as an alternate creation routine.
535 This means the hook can be inherited like any other method, and that
536 I<obj> is your blessed reference for this particular instance.
538 The other arguments should look familiar if you know C<STORABLE_freeze>:
539 I<cloning> is true when we're part of a deep clone operation, I<serialized>
540 is the serialized string you returned to the engine in C<STORABLE_freeze>,
541 and there may be an optional list of references, in the same order you gave
542 them at serialization time, pointing to the deserialized objects (which
543 have been processed courtesy of the Storable engine).
545 When the Storable engine does not find any C<STORABLE_thaw> hook routine,
546 it tries to load the class by requiring the package dynamically (using
547 the blessed package name), and then re-attempts the lookup. If at that
548 time the hook cannot be located, the engine croaks. Note that this mechanism
549 will fail if you define several classes in the same file, but perlmod(1)
552 It is up to you to use these information to populate I<obj> the way you want.
554 Returned value: none.
560 Predicates are not exportable. They must be called by explicitely prefixing
561 them with the Storable package name.
565 =item C<Storable::last_op_in_netorder>
567 The C<Storable::last_op_in_netorder()> predicate will tell you whether
568 network order was used in the last store or retrieve operation. If you
569 don't know how to use this, just forget about it.
571 =item C<Storable::is_storing>
573 Returns true if within a store operation (via STORABLE_freeze hook).
575 =item C<Storable::is_retrieving>
577 Returns true if within a retrieve operation, (via STORABLE_thaw hook).
583 With hooks comes the ability to recurse back to the Storable engine. Indeed,
584 hooks are regular Perl code, and Storable is convenient when it comes to
585 serialize and deserialize things, so why not use it to handle the
586 serialization string?
588 There are a few things you need to know however:
594 You can create endless loops if the things you serialize via freeze()
595 (for instance) point back to the object we're trying to serialize in the hook.
599 Shared references among objects will not stay shared: if we're serializing
600 the list of object [A, C] where both object A and C refer to the SAME object
601 B, and if there is a serializing hook in A that says freeze(B), then when
602 deserializing, we'll get [A', C'] where A' refers to B', but C' refers to D,
603 a deep clone of B'. The topology was not preserved.
607 That's why C<STORABLE_freeze> lets you provide a list of references
608 to serialize. The engine guarantees that those will be serialized in the
609 same context as the other objects, and therefore that shared objects will
612 In the above [A, C] example, the C<STORABLE_freeze> hook could return:
614 ("something", $self->{B})
616 and the B part would be serialized by the engine. In C<STORABLE_thaw>, you
617 would get back the reference to the B' object, deserialized for you.
619 Therefore, recursion should normally be avoided, but is nonetheless supported.
623 There is a new Clone module available on CPAN which implements deep cloning
624 natively, i.e. without freezing to memory and thawing the result. It is
625 aimed to replace Storable's dclone() some day. However, it does not currently
626 support Storable hooks to redefine the way deep cloning is performed.
630 Here are some code samples showing a possible usage of Storable:
632 use Storable qw(store retrieve freeze thaw dclone);
634 %color = ('Blue' => 0.1, 'Red' => 0.8, 'Black' => 0, 'White' => 1);
636 store(\%color, '/tmp/colors') or die "Can't store %a in /tmp/colors!\n";
638 $colref = retrieve('/tmp/colors');
639 die "Unable to retrieve from /tmp/colors!\n" unless defined $colref;
640 printf "Blue is still %lf\n", $colref->{'Blue'};
642 $colref2 = dclone(\%color);
644 $str = freeze(\%color);
645 printf "Serialization of %%color is %d bytes long.\n", length($str);
646 $colref3 = thaw($str);
648 which prints (on my machine):
650 Blue is still 0.100000
651 Serialization of %color is 102 bytes long.
655 If you're using references as keys within your hash tables, you're bound
656 to disapointment when retrieving your data. Indeed, Perl stringifies
657 references used as hash table keys. If you later wish to access the
658 items via another reference stringification (i.e. using the same
659 reference that was used for the key originally to record the value into
660 the hash table), it will work because both references stringify to the
663 It won't work across a C<store> and C<retrieve> operations however, because
664 the addresses in the retrieved objects, which are part of the stringified
665 references, will probably differ from the original addresses. The
666 topology of your structure is preserved, but not hidden semantics
669 On platforms where it matters, be sure to call C<binmode()> on the
670 descriptors that you pass to Storable functions.
672 Storing data canonically that contains large hashes can be
673 significantly slower than storing the same data normally, as
674 temprorary arrays to hold the keys for each hash have to be allocated,
675 populated, sorted and freed. Some tests have shown a halving of the
676 speed of storing -- the exact penalty will depend on the complexity of
677 your data. There is no slowdown on retrieval.
681 You can't store GLOB, CODE, FORMLINE, etc... If you can define
682 semantics for those operations, feel free to enhance Storable so that
683 it can deal with them.
685 The store functions will C<croak> if they run into such references
686 unless you set C<$Storable::forgive_me> to some C<TRUE> value. In that
687 case, the fatal message is turned in a warning and some
688 meaningless string is stored instead.
690 Setting C<$Storable::canonical> may not yield frozen strings that
691 compare equal due to possible stringification of numbers. When the
692 string version of a scalar exists, it is the form stored, therefore
693 if you happen to use your numbers as strings between two freezing
694 operations on the same data structures, you will get different
697 When storing doubles in network order, their value is stored as text.
698 However, you should also not expect non-numeric floating-point values
699 such as infinity and "not a number" to pass successfully through a
700 nstore()/retrieve() pair.
702 As Storable neither knows nor cares about character sets (although it
703 does know that characters may be more than eight bits wide), any difference
704 in the interpretation of character codes between a host and a target
705 system is your problem. In particular, if host and target use different
706 code points to represent the characters used in the text representation
707 of floating-point numbers, you will not be able be able to exchange
708 floating-point data, even with nstore().
712 Thank you to (in chronological order):
714 Jarkko Hietaniemi <jhi@iki.fi>
715 Ulrich Pfeifer <pfeifer@charly.informatik.uni-dortmund.de>
716 Benjamin A. Holzman <bah@ecnvantage.com>
717 Andrew Ford <A.Ford@ford-mason.co.uk>
718 Gisle Aas <gisle@aas.no>
719 Jeff Gresham <gresham_jeffrey@jpmorgan.com>
720 Murray Nesbitt <murray@activestate.com>
721 Marc Lehmann <pcg@opengroup.org>
722 Justin Banks <justinb@wamnet.com>
723 Jarkko Hietaniemi <jhi@iki.fi> (AGAIN, as perl 5.7.0 Pumpkin!)
724 Salvador Ortiz Garcia <sog@msg.com.mx>
725 Dominic Dunlop <domo@computer.org>
726 Erik Haugan <erik@solbors.no>
728 for their bug reports, suggestions and contributions.
730 Benjamin Holzman contributed the tied variable support, Andrew Ford
731 contributed the canonical order for hashes, and Gisle Aas fixed
732 a few misunderstandings of mine regarding the Perl internals,
733 and optimized the emission of "tags" in the output streams by
734 simply counting the objects instead of tagging them (leading to
735 a binary incompatibility for the Storable image starting at version
736 0.6--older images are of course still properly understood).
737 Murray Nesbitt made Storable thread-safe. Marc Lehmann added overloading
738 and reference to tied items support.
742 There is a Japanese translation of this man page available at
743 http://member.nifty.ne.jp/hippo2000/perltips/storable.htm ,
744 courtesy of Kawai, Takanori <kawai@nippon-rad.co.jp>.
748 Raphael Manfredi F<E<lt>Raphael_Manfredi@pobox.comE<gt>>