1 ;# $Id: Storable.pm,v 1.0.1.10 2001/03/15 00:20:25 ram Exp $
3 ;# Copyright (c) 1995-2000, Raphael Manfredi
5 ;# You may redistribute only under the same terms as Perl 5, as specified
6 ;# in the README file that comes with the distribution.
8 ;# $Log: Storable.pm,v $
9 ;# Revision 1.0.1.10 2001/03/15 00:20:25 ram
10 ;# patch11: updated version number
12 ;# Revision 1.0.1.9 2001/02/17 12:37:32 ram
13 ;# patch10: forgot to increase version number at previous patch
15 ;# Revision 1.0.1.8 2001/02/17 12:24:37 ram
16 ;# patch8: fixed incorrect error message
18 ;# Revision 1.0.1.7 2001/01/03 09:39:02 ram
19 ;# patch7: added CAN_FLOCK to determine whether we can flock() or not
21 ;# Revision 1.0.1.6 2000/11/05 17:20:25 ram
22 ;# patch6: increased version number
24 ;# Revision 1.0.1.5 2000/10/26 17:10:18 ram
25 ;# patch5: documented that store() and retrieve() can return undef
26 ;# patch5: added paragraph explaining the auto require for thaw hooks
28 ;# Revision 1.0.1.4 2000/10/23 18:02:57 ram
29 ;# patch4: protected calls to flock() for dos platform
30 ;# patch4: added logcarp emulation if they don't have Log::Agent
32 ;# Revision 1.0.1.3 2000/09/29 19:49:01 ram
33 ;# patch3: updated version number
35 ;# Revision 1.0.1.2 2000/09/28 21:42:51 ram
36 ;# patch2: added lock_store lock_nstore lock_retrieve
38 ;# Revision 1.0.1.1 2000/09/17 16:46:21 ram
39 ;# patch1: documented that doubles are stringified by nstore()
40 ;# patch1: added Salvador Ortiz Garcia in CREDITS section
42 ;# Revision 1.0 2000/09/01 19:40:41 ram
43 ;# Baseline for first official release.
48 package Storable; @ISA = qw(Exporter DynaLoader);
50 @EXPORT = qw(store retrieve);
52 nstore store_fd nstore_fd fd_retrieve
56 lock_store lock_nstore lock_retrieve
60 use vars qw($forgive_me $VERSION);
63 *AUTOLOAD = \&AutoLoader::AUTOLOAD; # Grrr...
66 # Use of Log::Agent is optional
69 eval "use Log::Agent";
71 unless (defined @Log::Agent::EXPORT) {
85 # They might miss :flock in Fcntl
90 if (exists $Fcntl::EXPORT_TAGS{'flock'}) {
91 Fcntl->import(':flock');
103 sub retrieve_fd { &fd_retrieve } # Backward compatibility
106 # Determine whether locking is possible, but only when needed.
112 return $CAN_FLOCK if defined $CAN_FLOCK;
113 require Config; import Config;
115 $Config{'d_flock'} ||
116 $Config{'d_fcntl_can_lock'} ||
127 # Store target object hierarchy, identified by a reference to its root.
128 # The stored object tree may later be retrieved to memory via retrieve.
129 # Returns undef if an I/O error occurred, in which case the file is
133 return _store(\&pstore, @_, 0);
139 # Same as store, but in network order.
142 return _store(\&net_pstore, @_, 0);
148 # Same as store, but flock the file first (advisory locking).
151 return _store(\&pstore, @_, 1);
157 # Same as nstore, but flock the file first (advisory locking).
160 return _store(\&net_pstore, @_, 1);
163 # Internal store to file routine
167 my ($file, $use_locking) = @_;
168 logcroak "not a reference" unless ref($self);
169 logcroak "wrong argument number" unless @_ == 2; # No @foo in arglist
171 open(FILE, ">$file") || logcroak "can't create $file: $!";
172 binmode FILE; # Archaic systems...
174 unless (&CAN_FLOCK) {
175 logcarp "Storable::lock_store: fcntl/flock emulation broken on $^O";
178 flock(FILE, LOCK_EX) ||
179 logcroak "can't get exclusive lock on $file: $!";
181 # Unlocking will happen when FILE is closed
183 my $da = $@; # Don't mess if called from exception handler
185 # Call C routine nstore or pstore, depending on network order
186 eval { $ret = &$xsptr(*FILE, $self) };
187 close(FILE) or $ret = undef;
188 unlink($file) or warn "Can't unlink $file: $!\n" if $@ || !defined $ret;
189 logcroak $@ if $@ =~ s/\.?\n$/,/;
191 return $ret ? $ret : undef;
197 # Same as store, but perform on an already opened file descriptor instead.
198 # Returns undef if an I/O error occurred.
201 return _store_fd(\&pstore, @_);
207 # Same as store_fd, but in network order.
210 my ($self, $file) = @_;
211 return _store_fd(\&net_pstore, @_);
214 # Internal store routine on opened file descriptor
219 logcroak "not a reference" unless ref($self);
220 logcroak "too many arguments" unless @_ == 1; # No @foo in arglist
221 my $fd = fileno($file);
222 logcroak "not a valid file descriptor" unless defined $fd;
223 my $da = $@; # Don't mess if called from exception handler
225 # Call C routine nstore or pstore, depending on network order
226 eval { $ret = &$xsptr($file, $self) };
227 logcroak $@ if $@ =~ s/\.?\n$/,/;
229 return $ret ? $ret : undef;
235 # Store oject and its hierarchy in memory and return a scalar
236 # containing the result.
239 _freeze(\&mstore, @_);
245 # Same as freeze but in network order.
248 _freeze(\&net_mstore, @_);
251 # Internal freeze routine
255 logcroak "not a reference" unless ref($self);
256 logcroak "too many arguments" unless @_ == 0; # No @foo in arglist
257 my $da = $@; # Don't mess if called from exception handler
259 # Call C routine mstore or net_mstore, depending on network order
260 eval { $ret = &$xsptr($self) };
261 logcroak $@ if $@ =~ s/\.?\n$/,/;
263 return $ret ? $ret : undef;
269 # Retrieve object hierarchy from disk, returning a reference to the root
270 # object of that tree.
279 # Same as retrieve, but with advisory locking.
285 # Internal retrieve routine
287 my ($file, $use_locking) = @_;
289 open(FILE, $file) || logcroak "can't open $file: $!";
290 binmode FILE; # Archaic systems...
292 my $da = $@; # Could be from exception handler
294 unless (&CAN_FLOCK) {
295 logcarp "Storable::lock_store: fcntl/flock emulation broken on $^O";
298 flock(FILE, LOCK_SH) || logcroak "can't get shared lock on $file: $!";
299 # Unlocking will happen when FILE is closed
301 eval { $self = pretrieve(*FILE) }; # Call C routine
303 logcroak $@ if $@ =~ s/\.?\n$/,/;
311 # Same as retrieve, but perform from an already opened file descriptor instead.
315 my $fd = fileno($file);
316 logcroak "not a valid file descriptor" unless defined $fd;
318 my $da = $@; # Could be from exception handler
319 eval { $self = pretrieve($file) }; # Call C routine
320 logcroak $@ if $@ =~ s/\.?\n$/,/;
328 # Recreate objects in memory from an existing frozen image created
329 # by freeze. If the frozen image passed is undef, return undef.
333 return undef unless defined $frozen;
335 my $da = $@; # Could be from exception handler
336 eval { $self = mretrieve($frozen) }; # Call C routine
337 logcroak $@ if $@ =~ s/\.?\n$/,/;
344 Storable - persistency for perl data structures
349 store \%table, 'file';
350 $hashref = retrieve('file');
352 use Storable qw(nstore store_fd nstore_fd freeze thaw dclone);
355 nstore \%table, 'file';
356 $hashref = retrieve('file'); # There is NO nretrieve()
358 # Storing to and retrieving from an already opened file
359 store_fd \@array, \*STDOUT;
360 nstore_fd \%table, \*STDOUT;
361 $aryref = fd_retrieve(\*SOCKET);
362 $hashref = fd_retrieve(\*SOCKET);
364 # Serializing to memory
365 $serialized = freeze \%table;
366 %table_clone = %{ thaw($serialized) };
368 # Deep (recursive) cloning
369 $cloneref = dclone($ref);
372 use Storable qw(lock_store lock_nstore lock_retrieve)
373 lock_store \%table, 'file';
374 lock_nstore \%table, 'file';
375 $hashref = lock_retrieve('file');
379 The Storable package brings persistency to your perl data structures
380 containing SCALAR, ARRAY, HASH or REF objects, i.e. anything that can be
381 convenientely stored to disk and retrieved at a later time.
383 It can be used in the regular procedural way by calling C<store> with
384 a reference to the object to be stored, along with the file name where
385 the image should be written.
386 The routine returns C<undef> for I/O problems or other internal error,
387 a true value otherwise. Serious errors are propagated as a C<die> exception.
389 To retrieve data stored to disk, use C<retrieve> with a file name,
390 and the objects stored into that file are recreated into memory for you,
391 a I<reference> to the root object being returned. In case an I/O error
392 occurs while reading, C<undef> is returned instead. Other serious
393 errors are propagated via C<die>.
395 Since storage is performed recursively, you might want to stuff references
396 to objects that share a lot of common data into a single array or hash
397 table, and then store that object. That way, when you retrieve back the
398 whole thing, the objects will continue to share what they originally shared.
400 At the cost of a slight header overhead, you may store to an already
401 opened file descriptor using the C<store_fd> routine, and retrieve
402 from a file via C<fd_retrieve>. Those names aren't imported by default,
403 so you will have to do that explicitely if you need those routines.
404 The file descriptor you supply must be already opened, for read
405 if you're going to retrieve and for write if you wish to store.
407 store_fd(\%table, *STDOUT) || die "can't store to stdout\n";
408 $hashref = fd_retrieve(*STDIN);
410 You can also store data in network order to allow easy sharing across
411 multiple platforms, or when storing on a socket known to be remotely
412 connected. The routines to call have an initial C<n> prefix for I<network>,
413 as in C<nstore> and C<nstore_fd>. At retrieval time, your data will be
414 correctly restored so you don't have to know whether you're restoring
415 from native or network ordered data. Double values are stored stringified
416 to ensure portability as well, at the slight risk of loosing some precision
417 in the last decimals.
419 When using C<fd_retrieve>, objects are retrieved in sequence, one
420 object (i.e. one recursive tree) per associated C<store_fd>.
422 If you're more from the object-oriented camp, you can inherit from
423 Storable and directly store your objects by invoking C<store> as
424 a method. The fact that the root of the to-be-stored tree is a
425 blessed reference (i.e. an object) is special-cased so that the
426 retrieve does not provide a reference to that object but rather the
427 blessed object reference itself. (Otherwise, you'd get a reference
428 to that blessed object).
432 The Storable engine can also store data into a Perl scalar instead, to
433 later retrieve them. This is mainly used to freeze a complex structure in
434 some safe compact memory place (where it can possibly be sent to another
435 process via some IPC, since freezing the structure also serializes it in
436 effect). Later on, and maybe somewhere else, you can thaw the Perl scalar
437 out and recreate the original complex structure in memory.
439 Surprisingly, the routines to be called are named C<freeze> and C<thaw>.
440 If you wish to send out the frozen scalar to another machine, use
441 C<nfreeze> instead to get a portable image.
443 Note that freezing an object structure and immediately thawing it
444 actually achieves a deep cloning of that structure:
446 dclone(.) = thaw(freeze(.))
448 Storable provides you with a C<dclone> interface which does not create
449 that intermediary scalar but instead freezes the structure in some
450 internal memory space and then immediatly thaws it out.
452 =head1 ADVISORY LOCKING
454 The C<lock_store> and C<lock_nstore> routine are equivalent to C<store>
455 and C<nstore>, only they get an exclusive lock on the file before
456 writing. Likewise, C<lock_retrieve> performs as C<retrieve>, but also
457 gets a shared lock on the file before reading.
459 Like with any advisory locking scheme, the protection only works if
460 you systematically use C<lock_store> and C<lock_retrieve>. If one
461 side of your application uses C<store> whilst the other uses C<lock_retrieve>,
462 you will get no protection at all.
464 The internal advisory locking is implemented using Perl's flock() routine.
465 If your system does not support any form of flock(), or if you share
466 your files across NFS, you might wish to use other forms of locking by
467 using modules like LockFile::Simple which lock a file using a filesystem
468 entry, instead of locking the file descriptor.
472 The heart of Storable is written in C for decent speed. Extra low-level
473 optimization have been made when manipulating perl internals, to
474 sacrifice encapsulation for the benefit of a greater speed.
476 =head1 CANONICAL REPRESENTATION
478 Normally Storable stores elements of hashes in the order they are
479 stored internally by Perl, i.e. pseudo-randomly. If you set
480 C<$Storable::canonical> to some C<TRUE> value, Storable will store
481 hashes with the elements sorted by their key. This allows you to
482 compare data structures by comparing their frozen representations (or
483 even the compressed frozen representations), which can be useful for
484 creating lookup tables for complicated queries.
486 Canonical order does not imply network order, those are two orthogonal
489 =head1 ERROR REPORTING
491 Storable uses the "exception" paradigm, in that it does not try to workaround
492 failures: if something bad happens, an exception is generated from the
493 caller's perspective (see L<Carp> and C<croak()>). Use eval {} to trap
496 When Storable croaks, it tries to report the error via the C<logcroak()>
497 routine from the C<Log::Agent> package, if it is available.
499 Normal errors are reported by having store() or retrieve() return C<undef>.
500 Such errors are usually I/O errors (or truncated stream errors at retrieval).
506 Any class may define hooks that will be called during the serialization
507 and deserialization process on objects that are instances of that class.
508 Those hooks can redefine the way serialization is performed (and therefore,
509 how the symetrical deserialization should be conducted).
511 Since we said earlier:
513 dclone(.) = thaw(freeze(.))
515 everything we say about hooks should also hold for deep cloning. However,
516 hooks get to know whether the operation is a mere serialization, or a cloning.
518 Therefore, when serializing hooks are involved,
520 dclone(.) <> thaw(freeze(.))
522 Well, you could keep them in sync, but there's no guarantee it will always
523 hold on classes somebody else wrote. Besides, there is little to gain in
524 doing so: a serializing hook could only keep one attribute of an object,
525 which is probably not what should happen during a deep cloning of that
528 Here is the hooking interface:
532 =item C<STORABLE_freeze> I<obj>, I<cloning>
534 The serializing hook, called on the object during serialization. It can be
535 inherited, or defined in the class itself, like any other method.
537 Arguments: I<obj> is the object to serialize, I<cloning> is a flag indicating
538 whether we're in a dclone() or a regular serialization via store() or freeze().
540 Returned value: A LIST C<($serialized, $ref1, $ref2, ...)> where $serialized
541 is the serialized form to be used, and the optional $ref1, $ref2, etc... are
542 extra references that you wish to let the Storable engine serialize.
544 At deserialization time, you will be given back the same LIST, but all the
545 extra references will be pointing into the deserialized structure.
547 The B<first time> the hook is hit in a serialization flow, you may have it
548 return an empty list. That will signal the Storable engine to further
549 discard that hook for this class and to therefore revert to the default
550 serialization of the underlying Perl data. The hook will again be normally
551 processed in the next serialization.
553 Unless you know better, serializing hook should always say:
555 sub STORABLE_freeze {
556 my ($self, $cloning) = @_;
557 return if $cloning; # Regular default serialization
561 in order to keep reasonable dclone() semantics.
563 =item C<STORABLE_thaw> I<obj>, I<cloning>, I<serialized>, ...
565 The deserializing hook called on the object during deserialization.
566 But wait. If we're deserializing, there's no object yet... right?
568 Wrong: the Storable engine creates an empty one for you. If you know Eiffel,
569 you can view C<STORABLE_thaw> as an alternate creation routine.
571 This means the hook can be inherited like any other method, and that
572 I<obj> is your blessed reference for this particular instance.
574 The other arguments should look familiar if you know C<STORABLE_freeze>:
575 I<cloning> is true when we're part of a deep clone operation, I<serialized>
576 is the serialized string you returned to the engine in C<STORABLE_freeze>,
577 and there may be an optional list of references, in the same order you gave
578 them at serialization time, pointing to the deserialized objects (which
579 have been processed courtesy of the Storable engine).
581 When the Storable engine does not find any C<STORABLE_thaw> hook routine,
582 it tries to load the class by requiring the package dynamically (using
583 the blessed package name), and then re-attempts the lookup. If at that
584 time the hook cannot be located, the engine croaks. Note that this mechanism
585 will fail if you define several classes in the same file, but perlmod(1)
588 It is up to you to use these information to populate I<obj> the way you want.
590 Returned value: none.
596 Predicates are not exportable. They must be called by explicitely prefixing
597 them with the Storable package name.
601 =item C<Storable::last_op_in_netorder>
603 The C<Storable::last_op_in_netorder()> predicate will tell you whether
604 network order was used in the last store or retrieve operation. If you
605 don't know how to use this, just forget about it.
607 =item C<Storable::is_storing>
609 Returns true if within a store operation (via STORABLE_freeze hook).
611 =item C<Storable::is_retrieving>
613 Returns true if within a retrieve operation, (via STORABLE_thaw hook).
619 With hooks comes the ability to recurse back to the Storable engine. Indeed,
620 hooks are regular Perl code, and Storable is convenient when it comes to
621 serialize and deserialize things, so why not use it to handle the
622 serialization string?
624 There are a few things you need to know however:
630 You can create endless loops if the things you serialize via freeze()
631 (for instance) point back to the object we're trying to serialize in the hook.
635 Shared references among objects will not stay shared: if we're serializing
636 the list of object [A, C] where both object A and C refer to the SAME object
637 B, and if there is a serializing hook in A that says freeze(B), then when
638 deserializing, we'll get [A', C'] where A' refers to B', but C' refers to D,
639 a deep clone of B'. The topology was not preserved.
643 That's why C<STORABLE_freeze> lets you provide a list of references
644 to serialize. The engine guarantees that those will be serialized in the
645 same context as the other objects, and therefore that shared objects will
648 In the above [A, C] example, the C<STORABLE_freeze> hook could return:
650 ("something", $self->{B})
652 and the B part would be serialized by the engine. In C<STORABLE_thaw>, you
653 would get back the reference to the B' object, deserialized for you.
655 Therefore, recursion should normally be avoided, but is nonetheless supported.
659 There is a new Clone module available on CPAN which implements deep cloning
660 natively, i.e. without freezing to memory and thawing the result. It is
661 aimed to replace Storable's dclone() some day. However, it does not currently
662 support Storable hooks to redefine the way deep cloning is performed.
666 Here are some code samples showing a possible usage of Storable:
668 use Storable qw(store retrieve freeze thaw dclone);
670 %color = ('Blue' => 0.1, 'Red' => 0.8, 'Black' => 0, 'White' => 1);
672 store(\%color, '/tmp/colors') or die "Can't store %a in /tmp/colors!\n";
674 $colref = retrieve('/tmp/colors');
675 die "Unable to retrieve from /tmp/colors!\n" unless defined $colref;
676 printf "Blue is still %lf\n", $colref->{'Blue'};
678 $colref2 = dclone(\%color);
680 $str = freeze(\%color);
681 printf "Serialization of %%color is %d bytes long.\n", length($str);
682 $colref3 = thaw($str);
684 which prints (on my machine):
686 Blue is still 0.100000
687 Serialization of %color is 102 bytes long.
691 If you're using references as keys within your hash tables, you're bound
692 to disapointment when retrieving your data. Indeed, Perl stringifies
693 references used as hash table keys. If you later wish to access the
694 items via another reference stringification (i.e. using the same
695 reference that was used for the key originally to record the value into
696 the hash table), it will work because both references stringify to the
699 It won't work across a C<store> and C<retrieve> operations however, because
700 the addresses in the retrieved objects, which are part of the stringified
701 references, will probably differ from the original addresses. The
702 topology of your structure is preserved, but not hidden semantics
705 On platforms where it matters, be sure to call C<binmode()> on the
706 descriptors that you pass to Storable functions.
708 Storing data canonically that contains large hashes can be
709 significantly slower than storing the same data normally, as
710 temprorary arrays to hold the keys for each hash have to be allocated,
711 populated, sorted and freed. Some tests have shown a halving of the
712 speed of storing -- the exact penalty will depend on the complexity of
713 your data. There is no slowdown on retrieval.
717 You can't store GLOB, CODE, FORMLINE, etc... If you can define
718 semantics for those operations, feel free to enhance Storable so that
719 it can deal with them.
721 The store functions will C<croak> if they run into such references
722 unless you set C<$Storable::forgive_me> to some C<TRUE> value. In that
723 case, the fatal message is turned in a warning and some
724 meaningless string is stored instead.
726 Setting C<$Storable::canonical> may not yield frozen strings that
727 compare equal due to possible stringification of numbers. When the
728 string version of a scalar exists, it is the form stored, therefore
729 if you happen to use your numbers as strings between two freezing
730 operations on the same data structures, you will get different
733 When storing doubles in network order, their value is stored as text.
734 However, you should also not expect non-numeric floating-point values
735 such as infinity and "not a number" to pass successfully through a
736 nstore()/retrieve() pair.
738 As Storable neither knows nor cares about character sets (although it
739 does know that characters may be more than eight bits wide), any difference
740 in the interpretation of character codes between a host and a target
741 system is your problem. In particular, if host and target use different
742 code points to represent the characters used in the text representation
743 of floating-point numbers, you will not be able be able to exchange
744 floating-point data, even with nstore().
748 Thank you to (in chronological order):
750 Jarkko Hietaniemi <jhi@iki.fi>
751 Ulrich Pfeifer <pfeifer@charly.informatik.uni-dortmund.de>
752 Benjamin A. Holzman <bah@ecnvantage.com>
753 Andrew Ford <A.Ford@ford-mason.co.uk>
754 Gisle Aas <gisle@aas.no>
755 Jeff Gresham <gresham_jeffrey@jpmorgan.com>
756 Murray Nesbitt <murray@activestate.com>
757 Marc Lehmann <pcg@opengroup.org>
758 Justin Banks <justinb@wamnet.com>
759 Jarkko Hietaniemi <jhi@iki.fi> (AGAIN, as perl 5.7.0 Pumpkin!)
760 Salvador Ortiz Garcia <sog@msg.com.mx>
761 Dominic Dunlop <domo@computer.org>
762 Erik Haugan <erik@solbors.no>
764 for their bug reports, suggestions and contributions.
766 Benjamin Holzman contributed the tied variable support, Andrew Ford
767 contributed the canonical order for hashes, and Gisle Aas fixed
768 a few misunderstandings of mine regarding the Perl internals,
769 and optimized the emission of "tags" in the output streams by
770 simply counting the objects instead of tagging them (leading to
771 a binary incompatibility for the Storable image starting at version
772 0.6--older images are of course still properly understood).
773 Murray Nesbitt made Storable thread-safe. Marc Lehmann added overloading
774 and reference to tied items support.
778 There is a Japanese translation of this man page available at
779 http://member.nifty.ne.jp/hippo2000/perltips/storable.htm ,
780 courtesy of Kawai, Takanori <kawai@nippon-rad.co.jp>.
784 Raphael Manfredi F<E<lt>Raphael_Manfredi@pobox.comE<gt>>