1 ;# $Id: Storable.pm,v 1.0.1.11 2001/07/01 11:22:14 ram Exp $
3 ;# Copyright (c) 1995-2000, Raphael Manfredi
5 ;# You may redistribute only under the same terms as Perl 5, as specified
6 ;# in the README file that comes with the distribution.
8 ;# $Log: Storable.pm,v $
9 ;# Revision 1.0.1.11 2001/07/01 11:22:14 ram
10 ;# patch12: systematically use "=over 4" for POD linters
11 ;# patch12: updated version number
13 ;# Revision 1.0.1.10 2001/03/15 00:20:25 ram
14 ;# patch11: updated version number
16 ;# Revision 1.0.1.9 2001/02/17 12:37:32 ram
17 ;# patch10: forgot to increase version number at previous patch
19 ;# Revision 1.0.1.8 2001/02/17 12:24:37 ram
20 ;# patch8: fixed incorrect error message
22 ;# Revision 1.0.1.7 2001/01/03 09:39:02 ram
23 ;# patch7: added CAN_FLOCK to determine whether we can flock() or not
25 ;# Revision 1.0.1.6 2000/11/05 17:20:25 ram
26 ;# patch6: increased version number
28 ;# Revision 1.0.1.5 2000/10/26 17:10:18 ram
29 ;# patch5: documented that store() and retrieve() can return undef
30 ;# patch5: added paragraph explaining the auto require for thaw hooks
32 ;# Revision 1.0.1.4 2000/10/23 18:02:57 ram
33 ;# patch4: protected calls to flock() for dos platform
34 ;# patch4: added logcarp emulation if they don't have Log::Agent
36 ;# Revision 1.0.1.3 2000/09/29 19:49:01 ram
37 ;# patch3: updated version number
39 ;# Revision 1.0.1.2 2000/09/28 21:42:51 ram
40 ;# patch2: added lock_store lock_nstore lock_retrieve
42 ;# Revision 1.0.1.1 2000/09/17 16:46:21 ram
43 ;# patch1: documented that doubles are stringified by nstore()
44 ;# patch1: added Salvador Ortiz Garcia in CREDITS section
46 ;# Revision 1.0 2000/09/01 19:40:41 ram
47 ;# Baseline for first official release.
52 package Storable; @ISA = qw(Exporter DynaLoader);
54 @EXPORT = qw(store retrieve);
56 nstore store_fd nstore_fd fd_retrieve
60 lock_store lock_nstore lock_retrieve
64 use vars qw($forgive_me $VERSION);
67 *AUTOLOAD = \&AutoLoader::AUTOLOAD; # Grrr...
70 # Use of Log::Agent is optional
73 eval "use Log::Agent";
75 unless (defined @Log::Agent::EXPORT) {
89 # They might miss :flock in Fcntl
94 if (exists $Fcntl::EXPORT_TAGS{'flock'}) {
95 Fcntl->import(':flock');
107 sub retrieve_fd { &fd_retrieve } # Backward compatibility
110 # Determine whether locking is possible, but only when needed.
116 return $CAN_FLOCK if defined $CAN_FLOCK;
117 require Config; import Config;
119 $Config{'d_flock'} ||
120 $Config{'d_fcntl_can_lock'} ||
131 # Store target object hierarchy, identified by a reference to its root.
132 # The stored object tree may later be retrieved to memory via retrieve.
133 # Returns undef if an I/O error occurred, in which case the file is
137 return _store(\&pstore, @_, 0);
143 # Same as store, but in network order.
146 return _store(\&net_pstore, @_, 0);
152 # Same as store, but flock the file first (advisory locking).
155 return _store(\&pstore, @_, 1);
161 # Same as nstore, but flock the file first (advisory locking).
164 return _store(\&net_pstore, @_, 1);
167 # Internal store to file routine
171 my ($file, $use_locking) = @_;
172 logcroak "not a reference" unless ref($self);
173 logcroak "wrong argument number" unless @_ == 2; # No @foo in arglist
175 open(FILE, ">$file") || logcroak "can't create $file: $!";
176 binmode FILE; # Archaic systems...
178 unless (&CAN_FLOCK) {
179 logcarp "Storable::lock_store: fcntl/flock emulation broken on $^O";
182 flock(FILE, LOCK_EX) ||
183 logcroak "can't get exclusive lock on $file: $!";
185 # Unlocking will happen when FILE is closed
187 my $da = $@; # Don't mess if called from exception handler
189 # Call C routine nstore or pstore, depending on network order
190 eval { $ret = &$xsptr(*FILE, $self) };
191 close(FILE) or $ret = undef;
192 unlink($file) or warn "Can't unlink $file: $!\n" if $@ || !defined $ret;
193 logcroak $@ if $@ =~ s/\.?\n$/,/;
195 return $ret ? $ret : undef;
201 # Same as store, but perform on an already opened file descriptor instead.
202 # Returns undef if an I/O error occurred.
205 return _store_fd(\&pstore, @_);
211 # Same as store_fd, but in network order.
214 my ($self, $file) = @_;
215 return _store_fd(\&net_pstore, @_);
218 # Internal store routine on opened file descriptor
223 logcroak "not a reference" unless ref($self);
224 logcroak "too many arguments" unless @_ == 1; # No @foo in arglist
225 my $fd = fileno($file);
226 logcroak "not a valid file descriptor" unless defined $fd;
227 my $da = $@; # Don't mess if called from exception handler
229 # Call C routine nstore or pstore, depending on network order
230 eval { $ret = &$xsptr($file, $self) };
231 logcroak $@ if $@ =~ s/\.?\n$/,/;
233 return $ret ? $ret : undef;
239 # Store oject and its hierarchy in memory and return a scalar
240 # containing the result.
243 _freeze(\&mstore, @_);
249 # Same as freeze but in network order.
252 _freeze(\&net_mstore, @_);
255 # Internal freeze routine
259 logcroak "not a reference" unless ref($self);
260 logcroak "too many arguments" unless @_ == 0; # No @foo in arglist
261 my $da = $@; # Don't mess if called from exception handler
263 # Call C routine mstore or net_mstore, depending on network order
264 eval { $ret = &$xsptr($self) };
265 logcroak $@ if $@ =~ s/\.?\n$/,/;
267 return $ret ? $ret : undef;
273 # Retrieve object hierarchy from disk, returning a reference to the root
274 # object of that tree.
283 # Same as retrieve, but with advisory locking.
289 # Internal retrieve routine
291 my ($file, $use_locking) = @_;
293 open(FILE, $file) || logcroak "can't open $file: $!";
294 binmode FILE; # Archaic systems...
296 my $da = $@; # Could be from exception handler
298 unless (&CAN_FLOCK) {
299 logcarp "Storable::lock_store: fcntl/flock emulation broken on $^O";
302 flock(FILE, LOCK_SH) || logcroak "can't get shared lock on $file: $!";
303 # Unlocking will happen when FILE is closed
305 eval { $self = pretrieve(*FILE) }; # Call C routine
307 logcroak $@ if $@ =~ s/\.?\n$/,/;
315 # Same as retrieve, but perform from an already opened file descriptor instead.
319 my $fd = fileno($file);
320 logcroak "not a valid file descriptor" unless defined $fd;
322 my $da = $@; # Could be from exception handler
323 eval { $self = pretrieve($file) }; # Call C routine
324 logcroak $@ if $@ =~ s/\.?\n$/,/;
332 # Recreate objects in memory from an existing frozen image created
333 # by freeze. If the frozen image passed is undef, return undef.
337 return undef unless defined $frozen;
339 my $da = $@; # Could be from exception handler
340 eval { $self = mretrieve($frozen) }; # Call C routine
341 logcroak $@ if $@ =~ s/\.?\n$/,/;
348 Storable - persistency for perl data structures
353 store \%table, 'file';
354 $hashref = retrieve('file');
356 use Storable qw(nstore store_fd nstore_fd freeze thaw dclone);
359 nstore \%table, 'file';
360 $hashref = retrieve('file'); # There is NO nretrieve()
362 # Storing to and retrieving from an already opened file
363 store_fd \@array, \*STDOUT;
364 nstore_fd \%table, \*STDOUT;
365 $aryref = fd_retrieve(\*SOCKET);
366 $hashref = fd_retrieve(\*SOCKET);
368 # Serializing to memory
369 $serialized = freeze \%table;
370 %table_clone = %{ thaw($serialized) };
372 # Deep (recursive) cloning
373 $cloneref = dclone($ref);
376 use Storable qw(lock_store lock_nstore lock_retrieve)
377 lock_store \%table, 'file';
378 lock_nstore \%table, 'file';
379 $hashref = lock_retrieve('file');
383 The Storable package brings persistency to your perl data structures
384 containing SCALAR, ARRAY, HASH or REF objects, i.e. anything that can be
385 convenientely stored to disk and retrieved at a later time.
387 It can be used in the regular procedural way by calling C<store> with
388 a reference to the object to be stored, along with the file name where
389 the image should be written.
390 The routine returns C<undef> for I/O problems or other internal error,
391 a true value otherwise. Serious errors are propagated as a C<die> exception.
393 To retrieve data stored to disk, use C<retrieve> with a file name,
394 and the objects stored into that file are recreated into memory for you,
395 a I<reference> to the root object being returned. In case an I/O error
396 occurs while reading, C<undef> is returned instead. Other serious
397 errors are propagated via C<die>.
399 Since storage is performed recursively, you might want to stuff references
400 to objects that share a lot of common data into a single array or hash
401 table, and then store that object. That way, when you retrieve back the
402 whole thing, the objects will continue to share what they originally shared.
404 At the cost of a slight header overhead, you may store to an already
405 opened file descriptor using the C<store_fd> routine, and retrieve
406 from a file via C<fd_retrieve>. Those names aren't imported by default,
407 so you will have to do that explicitely if you need those routines.
408 The file descriptor you supply must be already opened, for read
409 if you're going to retrieve and for write if you wish to store.
411 store_fd(\%table, *STDOUT) || die "can't store to stdout\n";
412 $hashref = fd_retrieve(*STDIN);
414 You can also store data in network order to allow easy sharing across
415 multiple platforms, or when storing on a socket known to be remotely
416 connected. The routines to call have an initial C<n> prefix for I<network>,
417 as in C<nstore> and C<nstore_fd>. At retrieval time, your data will be
418 correctly restored so you don't have to know whether you're restoring
419 from native or network ordered data. Double values are stored stringified
420 to ensure portability as well, at the slight risk of loosing some precision
421 in the last decimals.
423 When using C<fd_retrieve>, objects are retrieved in sequence, one
424 object (i.e. one recursive tree) per associated C<store_fd>.
426 If you're more from the object-oriented camp, you can inherit from
427 Storable and directly store your objects by invoking C<store> as
428 a method. The fact that the root of the to-be-stored tree is a
429 blessed reference (i.e. an object) is special-cased so that the
430 retrieve does not provide a reference to that object but rather the
431 blessed object reference itself. (Otherwise, you'd get a reference
432 to that blessed object).
436 The Storable engine can also store data into a Perl scalar instead, to
437 later retrieve them. This is mainly used to freeze a complex structure in
438 some safe compact memory place (where it can possibly be sent to another
439 process via some IPC, since freezing the structure also serializes it in
440 effect). Later on, and maybe somewhere else, you can thaw the Perl scalar
441 out and recreate the original complex structure in memory.
443 Surprisingly, the routines to be called are named C<freeze> and C<thaw>.
444 If you wish to send out the frozen scalar to another machine, use
445 C<nfreeze> instead to get a portable image.
447 Note that freezing an object structure and immediately thawing it
448 actually achieves a deep cloning of that structure:
450 dclone(.) = thaw(freeze(.))
452 Storable provides you with a C<dclone> interface which does not create
453 that intermediary scalar but instead freezes the structure in some
454 internal memory space and then immediatly thaws it out.
456 =head1 ADVISORY LOCKING
458 The C<lock_store> and C<lock_nstore> routine are equivalent to C<store>
459 and C<nstore>, only they get an exclusive lock on the file before
460 writing. Likewise, C<lock_retrieve> performs as C<retrieve>, but also
461 gets a shared lock on the file before reading.
463 Like with any advisory locking scheme, the protection only works if
464 you systematically use C<lock_store> and C<lock_retrieve>. If one
465 side of your application uses C<store> whilst the other uses C<lock_retrieve>,
466 you will get no protection at all.
468 The internal advisory locking is implemented using Perl's flock() routine.
469 If your system does not support any form of flock(), or if you share
470 your files across NFS, you might wish to use other forms of locking by
471 using modules like LockFile::Simple which lock a file using a filesystem
472 entry, instead of locking the file descriptor.
476 The heart of Storable is written in C for decent speed. Extra low-level
477 optimization have been made when manipulating perl internals, to
478 sacrifice encapsulation for the benefit of a greater speed.
480 =head1 CANONICAL REPRESENTATION
482 Normally Storable stores elements of hashes in the order they are
483 stored internally by Perl, i.e. pseudo-randomly. If you set
484 C<$Storable::canonical> to some C<TRUE> value, Storable will store
485 hashes with the elements sorted by their key. This allows you to
486 compare data structures by comparing their frozen representations (or
487 even the compressed frozen representations), which can be useful for
488 creating lookup tables for complicated queries.
490 Canonical order does not imply network order, those are two orthogonal
493 =head1 ERROR REPORTING
495 Storable uses the "exception" paradigm, in that it does not try to workaround
496 failures: if something bad happens, an exception is generated from the
497 caller's perspective (see L<Carp> and C<croak()>). Use eval {} to trap
500 When Storable croaks, it tries to report the error via the C<logcroak()>
501 routine from the C<Log::Agent> package, if it is available.
503 Normal errors are reported by having store() or retrieve() return C<undef>.
504 Such errors are usually I/O errors (or truncated stream errors at retrieval).
510 Any class may define hooks that will be called during the serialization
511 and deserialization process on objects that are instances of that class.
512 Those hooks can redefine the way serialization is performed (and therefore,
513 how the symetrical deserialization should be conducted).
515 Since we said earlier:
517 dclone(.) = thaw(freeze(.))
519 everything we say about hooks should also hold for deep cloning. However,
520 hooks get to know whether the operation is a mere serialization, or a cloning.
522 Therefore, when serializing hooks are involved,
524 dclone(.) <> thaw(freeze(.))
526 Well, you could keep them in sync, but there's no guarantee it will always
527 hold on classes somebody else wrote. Besides, there is little to gain in
528 doing so: a serializing hook could only keep one attribute of an object,
529 which is probably not what should happen during a deep cloning of that
532 Here is the hooking interface:
536 =item C<STORABLE_freeze> I<obj>, I<cloning>
538 The serializing hook, called on the object during serialization. It can be
539 inherited, or defined in the class itself, like any other method.
541 Arguments: I<obj> is the object to serialize, I<cloning> is a flag indicating
542 whether we're in a dclone() or a regular serialization via store() or freeze().
544 Returned value: A LIST C<($serialized, $ref1, $ref2, ...)> where $serialized
545 is the serialized form to be used, and the optional $ref1, $ref2, etc... are
546 extra references that you wish to let the Storable engine serialize.
548 At deserialization time, you will be given back the same LIST, but all the
549 extra references will be pointing into the deserialized structure.
551 The B<first time> the hook is hit in a serialization flow, you may have it
552 return an empty list. That will signal the Storable engine to further
553 discard that hook for this class and to therefore revert to the default
554 serialization of the underlying Perl data. The hook will again be normally
555 processed in the next serialization.
557 Unless you know better, serializing hook should always say:
559 sub STORABLE_freeze {
560 my ($self, $cloning) = @_;
561 return if $cloning; # Regular default serialization
565 in order to keep reasonable dclone() semantics.
567 =item C<STORABLE_thaw> I<obj>, I<cloning>, I<serialized>, ...
569 The deserializing hook called on the object during deserialization.
570 But wait. If we're deserializing, there's no object yet... right?
572 Wrong: the Storable engine creates an empty one for you. If you know Eiffel,
573 you can view C<STORABLE_thaw> as an alternate creation routine.
575 This means the hook can be inherited like any other method, and that
576 I<obj> is your blessed reference for this particular instance.
578 The other arguments should look familiar if you know C<STORABLE_freeze>:
579 I<cloning> is true when we're part of a deep clone operation, I<serialized>
580 is the serialized string you returned to the engine in C<STORABLE_freeze>,
581 and there may be an optional list of references, in the same order you gave
582 them at serialization time, pointing to the deserialized objects (which
583 have been processed courtesy of the Storable engine).
585 When the Storable engine does not find any C<STORABLE_thaw> hook routine,
586 it tries to load the class by requiring the package dynamically (using
587 the blessed package name), and then re-attempts the lookup. If at that
588 time the hook cannot be located, the engine croaks. Note that this mechanism
589 will fail if you define several classes in the same file, but perlmod(1)
592 It is up to you to use these information to populate I<obj> the way you want.
594 Returned value: none.
600 Predicates are not exportable. They must be called by explicitely prefixing
601 them with the Storable package name.
605 =item C<Storable::last_op_in_netorder>
607 The C<Storable::last_op_in_netorder()> predicate will tell you whether
608 network order was used in the last store or retrieve operation. If you
609 don't know how to use this, just forget about it.
611 =item C<Storable::is_storing>
613 Returns true if within a store operation (via STORABLE_freeze hook).
615 =item C<Storable::is_retrieving>
617 Returns true if within a retrieve operation, (via STORABLE_thaw hook).
623 With hooks comes the ability to recurse back to the Storable engine. Indeed,
624 hooks are regular Perl code, and Storable is convenient when it comes to
625 serialize and deserialize things, so why not use it to handle the
626 serialization string?
628 There are a few things you need to know however:
634 You can create endless loops if the things you serialize via freeze()
635 (for instance) point back to the object we're trying to serialize in the hook.
639 Shared references among objects will not stay shared: if we're serializing
640 the list of object [A, C] where both object A and C refer to the SAME object
641 B, and if there is a serializing hook in A that says freeze(B), then when
642 deserializing, we'll get [A', C'] where A' refers to B', but C' refers to D,
643 a deep clone of B'. The topology was not preserved.
647 That's why C<STORABLE_freeze> lets you provide a list of references
648 to serialize. The engine guarantees that those will be serialized in the
649 same context as the other objects, and therefore that shared objects will
652 In the above [A, C] example, the C<STORABLE_freeze> hook could return:
654 ("something", $self->{B})
656 and the B part would be serialized by the engine. In C<STORABLE_thaw>, you
657 would get back the reference to the B' object, deserialized for you.
659 Therefore, recursion should normally be avoided, but is nonetheless supported.
663 There is a new Clone module available on CPAN which implements deep cloning
664 natively, i.e. without freezing to memory and thawing the result. It is
665 aimed to replace Storable's dclone() some day. However, it does not currently
666 support Storable hooks to redefine the way deep cloning is performed.
670 Here are some code samples showing a possible usage of Storable:
672 use Storable qw(store retrieve freeze thaw dclone);
674 %color = ('Blue' => 0.1, 'Red' => 0.8, 'Black' => 0, 'White' => 1);
676 store(\%color, '/tmp/colors') or die "Can't store %a in /tmp/colors!\n";
678 $colref = retrieve('/tmp/colors');
679 die "Unable to retrieve from /tmp/colors!\n" unless defined $colref;
680 printf "Blue is still %lf\n", $colref->{'Blue'};
682 $colref2 = dclone(\%color);
684 $str = freeze(\%color);
685 printf "Serialization of %%color is %d bytes long.\n", length($str);
686 $colref3 = thaw($str);
688 which prints (on my machine):
690 Blue is still 0.100000
691 Serialization of %color is 102 bytes long.
695 If you're using references as keys within your hash tables, you're bound
696 to disapointment when retrieving your data. Indeed, Perl stringifies
697 references used as hash table keys. If you later wish to access the
698 items via another reference stringification (i.e. using the same
699 reference that was used for the key originally to record the value into
700 the hash table), it will work because both references stringify to the
703 It won't work across a C<store> and C<retrieve> operations however, because
704 the addresses in the retrieved objects, which are part of the stringified
705 references, will probably differ from the original addresses. The
706 topology of your structure is preserved, but not hidden semantics
709 On platforms where it matters, be sure to call C<binmode()> on the
710 descriptors that you pass to Storable functions.
712 Storing data canonically that contains large hashes can be
713 significantly slower than storing the same data normally, as
714 temprorary arrays to hold the keys for each hash have to be allocated,
715 populated, sorted and freed. Some tests have shown a halving of the
716 speed of storing -- the exact penalty will depend on the complexity of
717 your data. There is no slowdown on retrieval.
721 You can't store GLOB, CODE, FORMLINE, etc... If you can define
722 semantics for those operations, feel free to enhance Storable so that
723 it can deal with them.
725 The store functions will C<croak> if they run into such references
726 unless you set C<$Storable::forgive_me> to some C<TRUE> value. In that
727 case, the fatal message is turned in a warning and some
728 meaningless string is stored instead.
730 Setting C<$Storable::canonical> may not yield frozen strings that
731 compare equal due to possible stringification of numbers. When the
732 string version of a scalar exists, it is the form stored, therefore
733 if you happen to use your numbers as strings between two freezing
734 operations on the same data structures, you will get different
737 When storing doubles in network order, their value is stored as text.
738 However, you should also not expect non-numeric floating-point values
739 such as infinity and "not a number" to pass successfully through a
740 nstore()/retrieve() pair.
742 As Storable neither knows nor cares about character sets (although it
743 does know that characters may be more than eight bits wide), any difference
744 in the interpretation of character codes between a host and a target
745 system is your problem. In particular, if host and target use different
746 code points to represent the characters used in the text representation
747 of floating-point numbers, you will not be able be able to exchange
748 floating-point data, even with nstore().
752 Thank you to (in chronological order):
754 Jarkko Hietaniemi <jhi@iki.fi>
755 Ulrich Pfeifer <pfeifer@charly.informatik.uni-dortmund.de>
756 Benjamin A. Holzman <bah@ecnvantage.com>
757 Andrew Ford <A.Ford@ford-mason.co.uk>
758 Gisle Aas <gisle@aas.no>
759 Jeff Gresham <gresham_jeffrey@jpmorgan.com>
760 Murray Nesbitt <murray@activestate.com>
761 Marc Lehmann <pcg@opengroup.org>
762 Justin Banks <justinb@wamnet.com>
763 Jarkko Hietaniemi <jhi@iki.fi> (AGAIN, as perl 5.7.0 Pumpkin!)
764 Salvador Ortiz Garcia <sog@msg.com.mx>
765 Dominic Dunlop <domo@computer.org>
766 Erik Haugan <erik@solbors.no>
768 for their bug reports, suggestions and contributions.
770 Benjamin Holzman contributed the tied variable support, Andrew Ford
771 contributed the canonical order for hashes, and Gisle Aas fixed
772 a few misunderstandings of mine regarding the Perl internals,
773 and optimized the emission of "tags" in the output streams by
774 simply counting the objects instead of tagging them (leading to
775 a binary incompatibility for the Storable image starting at version
776 0.6--older images are of course still properly understood).
777 Murray Nesbitt made Storable thread-safe. Marc Lehmann added overloading
778 and reference to tied items support.
782 There is a Japanese translation of this man page available at
783 http://member.nifty.ne.jp/hippo2000/perltips/storable.htm ,
784 courtesy of Kawai, Takanori <kawai@nippon-rad.co.jp>.
788 Raphael Manfredi F<E<lt>Raphael_Manfredi@pobox.comE<gt>>