ext/Storable/Storable.xs

   1 /*
   2  * Store and retrieve mechanism.
   3  */
   4
   5 /*
   6  * $Id: Storable.xs,v 0.7.1.3 2000/08/23 23:00:41 ram Exp $
   7  *
   8  *  Copyright (c) 1995-2000, Raphael Manfredi
   9  *
  10  *  You may redistribute only under the terms of the Artistic License,
  11  *  as specified in the README file that comes with the distribution.
  12  *
  13  * $Log: Storable.xs,v $
  14  * Revision 0.7.1.3  2000/08/23 23:00:41  ram
  15  * patch3: ANSI-fied most of the code, preparing for Perl core integration
  16  * patch3: dispatch tables moved upfront to relieve some compilers
  17  * patch3: merged 64-bit fixes from perl5-porters
  18  *
  19  * Revision 0.7.1.2  2000/08/14 07:19:27  ram
  20  * patch2: added a refcnt dec in retrieve_tied_key()
  21  *
  22  * Revision 0.7.1.1  2000/08/13 20:10:06  ram
  23  * patch1: was wrongly optimizing for "undef" values in hashes
  24  * patch1: added support for ref to tied items in hash/array
  25  * patch1: added overloading support
  26  *
  27  * Revision 0.7  2000/08/03 22:04:44  ram
  28  * Baseline for second beta release.
  29  *
  30  */
  31
  32 #include <EXTERN.h>
  33 #include <perl.h>
  34 #include <patchlevel.h>         /* Perl's one, needed since 5.6 */
  35 #include <XSUB.h>
  36
  37 /*#define DEBUGME /* Debug mode, turns assertions on as well */
  38 /*#define DASSERT /* Assertion mode */
  39
  40 /*
  41  * Pre PerlIO time when none of USE_PERLIO and PERLIO_IS_STDIO is defined
  42  * Provide them with the necessary defines so they can build with pre-5.004.
  43  */
  44 #ifndef USE_PERLIO
  45 #ifndef PERLIO_IS_STDIO
  46 #define PerlIO FILE
  47 #define PerlIO_getc(x) getc(x)
  48 #define PerlIO_putc(f,x) putc(x,f)
  49 #define PerlIO_read(x,y,z) fread(y,1,z,x)
  50 #define PerlIO_write(x,y,z) fwrite(y,1,z,x)
  51 #define PerlIO_stdoutf printf
  52 #endif  /* PERLIO_IS_STDIO */
  53 #endif  /* USE_PERLIO */
  54
  55 /*
  56  * Earlier versions of perl might be used, we can't assume they have the latest!
  57  */
  58
  59 #ifndef PERL_VERSION            /* For perls < 5.6 */
  60 #define PERL_VERSION PATCHLEVEL
  61 #ifndef newRV_noinc
  62 #define newRV_noinc(sv)         ((Sv = newRV(sv)), --SvREFCNT(SvRV(Sv)), Sv)
  63 #endif
  64 #if (PATCHLEVEL <= 4)           /* Older perls (<= 5.004) lack PL_ namespace */
  65 #define PL_sv_yes       sv_yes
  66 #define PL_sv_no        sv_no
  67 #define PL_sv_undef     sv_undef
  68 #endif
  69 #ifndef HvSHAREKEYS_off
  70 #define HvSHAREKEYS_off(hv)     /* Ignore */
  71 #endif
  72 #ifndef INT2PTR
  73 #define INT2PTR(t,v)    (t)(IV)(v)
  74 #endif
  75 #ifndef AvFILLp                         /* Older perls (<=5.003) lack AvFILLp */
  76 #define AvFILLp AvFILL
  77 #endif
  78 typedef double NV;                      /* Older perls lack the NV type */
  79 #endif                                          /* PERL_VERSION -- perls < 5.6 */
  80
  81 #ifdef DEBUGME
  82 #ifndef DASSERT
  83 #define DASSERT
  84 #endif
  85 #define TRACEME(x)      do { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); } while (0)
  86 #else
  87 #define TRACEME(x)
  88 #endif
  89
  90 #ifdef DASSERT
  91 #define ASSERT(x,y)     do {                                                                    \
  92         if (!(x)) {                                                                                             \
  93                 PerlIO_stdoutf("ASSERT FAILED (\"%s\", line %d): ",     \
  94                         __FILE__, __LINE__);                                                    \
  95                 PerlIO_stdoutf y; PerlIO_stdoutf("\n");                         \
  96         }                                                                                                               \
  97 } while (0)
  98 #else
  99 #define ASSERT(x,y)
 100 #endif
 101
 102 /*
 103  * Type markers.
 104  */
 105
 106 #define C(x) ((char) (x))       /* For markers with dynamic retrieval handling */
 107
 108 #define SX_OBJECT       C(0)    /* Already stored object */
 109 #define SX_LSCALAR      C(1)    /* Scalar (string) forthcoming (length, data) */
 110 #define SX_ARRAY        C(2)    /* Array forthcominng (size, item list) */
 111 #define SX_HASH         C(3)    /* Hash forthcoming (size, key/value pair list) */
 112 #define SX_REF          C(4)    /* Reference to object forthcoming */
 113 #define SX_UNDEF        C(5)    /* Undefined scalar */
 114 #define SX_INTEGER      C(6)    /* Integer forthcoming */
 115 #define SX_DOUBLE       C(7)    /* Double forthcoming */
 116 #define SX_BYTE         C(8)    /* (signed) byte forthcoming */
 117 #define SX_NETINT       C(9)    /* Integer in network order forthcoming */
 118 #define SX_SCALAR       C(10)   /* Scalar (small) forthcoming (length, data) */
 119 #define SX_TIED_ARRAY  C(11)  /* Tied array forthcoming */
 120 #define SX_TIED_HASH   C(12)  /* Tied hash forthcoming */
 121 #define SX_TIED_SCALAR C(13)  /* Tied scalar forthcoming */
 122 #define SX_SV_UNDEF     C(14)   /* Perl's immortal PL_sv_undef */
 123 #define SX_SV_YES       C(15)   /* Perl's immortal PL_sv_yes */
 124 #define SX_SV_NO        C(16)   /* Perl's immortal PL_sv_no */
 125 #define SX_BLESS        C(17)   /* Object is blessed */
 126 #define SX_IX_BLESS     C(18)   /* Object is blessed, classname given by index */
 127 #define SX_HOOK         C(19)   /* Stored via hook, user-defined */
 128 #define SX_OVERLOAD     C(20)   /* Overloaded reference */
 129 #define SX_TIED_KEY C(21)   /* Tied magic key forthcoming */
 130 #define SX_TIED_IDX C(22)   /* Tied magic index forthcoming */
 131 #define SX_ERROR        C(23)   /* Error */
 132
 133 /*
 134  * Those are only used to retrieve "old" pre-0.6 binary images.
 135  */
 136 #define SX_ITEM         'i'             /* An array item introducer */
 137 #define SX_IT_UNDEF     'I'             /* Undefined array item */
 138 #define SX_KEY          'k'             /* An hash key introducer */
 139 #define SX_VALUE        'v'             /* An hash value introducer */
 140 #define SX_VL_UNDEF     'V'             /* Undefined hash value */
 141
 142 /*
 143  * Those are only used to retrieve "old" pre-0.7 binary images
 144  */
 145
 146 #define SX_CLASS        'b'             /* Object is blessed, class name length <255 */
 147 #define SX_LG_CLASS 'B'         /* Object is blessed, class name length >255 */
 148 #define SX_STORED       'X'             /* End of object */
 149
 150 /*
 151  * Limits between short/long length representation.
 152  */
 153
 154 #define LG_SCALAR       255             /* Large scalar length limit */
 155 #define LG_BLESS        127             /* Large classname bless limit */
 156
 157 /*
 158  * Operation types
 159  */
 160
 161 #define ST_STORE        0x1             /* Store operation */
 162 #define ST_RETRIEVE     0x2             /* Retrieval operation */
 163 #define ST_CLONE        0x4             /* Deep cloning operation */
 164
 165 /*
 166  * The following structure is used for hash table key retrieval. Since, when
 167  * retrieving objects, we'll be facing blessed hash references, it's best
 168  * to pre-allocate that buffer once and resize it as the need arises, never
 169  * freeing it (keys will be saved away someplace else anyway, so even large
 170  * keys are not enough a motivation to reclaim that space).
 171  *
 172  * This structure is also used for memory store/retrieve operations which
 173  * happen in a fixed place before being malloc'ed elsewhere if persistency
 174  * is required. Hence the aptr pointer.
 175  */
 176 struct extendable {
 177         char *arena;            /* Will hold hash key strings, resized as needed */
 178         STRLEN asiz;            /* Size of aforementionned buffer */
 179         char *aptr;                     /* Arena pointer, for in-place read/write ops */
 180         char *aend;                     /* First invalid address */
 181 };
 182
 183 /*
 184  * At store time:
 185  * An hash table records the objects which have already been stored.
 186  * Those are referred to as SX_OBJECT in the file, and their "tag" (i.e.
 187  * an arbitrary sequence number) is used to identify them.
 188  *
 189  * At retrieve time:
 190  * An array table records the objects which have already been retrieved,
 191  * as seen by the tag determind by counting the objects themselves. The
 192  * reference to that retrieved object is kept in the table, and is returned
 193  * when an SX_OBJECT is found bearing that same tag.
 194  *
 195  * The same processing is used to record "classname" for blessed objects:
 196  * indexing by a hash at store time, and via an array at retrieve time.
 197  */
 198
 199 typedef unsigned long stag_t;   /* Used by pre-0.6 binary format */
 200
 201 /*
 202  * The following "thread-safe" related defines were contributed by
 203  * Murray Nesbitt <murray@activestate.com> and integrated by RAM, who
 204  * only renamed things a little bit to ensure consistency with surrounding
 205  * code.        -- RAM, 14/09/1999
 206  *
 207  * The original patch suffered from the fact that the stcxt_t structure
 208  * was global.  Murray tried to minimize the impact on the code as much as
 209  * possible.
 210  *
 211  * Starting with 0.7, Storable can be re-entrant, via the STORABLE_xxx hooks
 212  * on objects.  Therefore, the notion of context needs to be generalized,
 213  * threading or not.
 214  */
 215
 216 #define MY_VERSION "Storable(" XS_VERSION ")"
 217
 218 typedef struct stcxt {
 219         int entry;                      /* flags recursion */
 220         int optype;                     /* type of traversal operation */
 221     HV *hseen;                  /* which objects have been seen, store time */
 222     AV *aseen;                  /* which objects have been seen, retrieve time */
 223     HV *hclass;                 /* which classnames have been seen, store time */
 224     AV *aclass;                 /* which classnames have been seen, retrieve time */
 225     HV *hook;                   /* cache for hook methods per class name */
 226     I32 tagnum;                 /* incremented at store time for each seen object */
 227     I32 classnum;               /* incremented at store time for each seen classname */
 228     int netorder;               /* true if network order used */
 229     int forgive_me;             /* whether to be forgiving... */
 230     int canonical;              /* whether to store hashes sorted by key */
 231         int dirty;                      /* context is dirty due to CROAK() -- can be cleaned */
 232     struct extendable keybuf;   /* for hash key retrieval */
 233     struct extendable membuf;   /* for memory store/retrieve operations */
 234         PerlIO *fio;            /* where I/O are performed, NULL for memory */
 235         int ver_major;          /* major of version for retrieved object */
 236         int ver_minor;          /* minor of version for retrieved object */
 237         SV *(**retrieve_vtbl)();        /* retrieve dispatch table */
 238         struct stcxt *prev;     /* contexts chained backwards in real recursion */
 239 } stcxt_t;
 240
 241 #if defined(MULTIPLICITY) || defined(PERL_OBJECT) || defined(PERL_CAPI)
 242
 243 #if (PATCHLEVEL <= 4) && (SUBVERSION < 68)
 244 #define dSTCXT_SV                                                                       \
 245         SV *perinterp_sv = perl_get_sv(MY_VERSION, FALSE)
 246 #else   /* >= perl5.004_68 */
 247 #define dSTCXT_SV                                                                       \
 248         SV *perinterp_sv = *hv_fetch(PL_modglobal,              \
 249                 MY_VERSION, sizeof(MY_VERSION)-1, TRUE)
 250 #endif  /* < perl5.004_68 */
 251
 252 #define dSTCXT_PTR(T,name)                                                      \
 253         T name = (T)(perinterp_sv && SvIOK(perinterp_sv)\
 254                                 ? SvIVX(perinterp_sv) : NULL)
 255 #define dSTCXT                                                                          \
 256         dSTCXT_SV;                                                                              \
 257         dSTCXT_PTR(stcxt_t *, cxt)
 258
 259 #define INIT_STCXT                                                                      \
 260       dSTCXT;                                                                           \
 261       Newz(0, cxt, 1, stcxt_t);                                         \
 262       sv_setiv(perinterp_sv, PTR2IV(cxt))
 263
 264 #define SET_STCXT(x) do {                                                       \
 265         dSTCXT_SV;                                                                              \
 266         sv_setiv(perinterp_sv, PTR2IV(x));                              \
 267 } while (0)
 268
 269 #else /* !MULTIPLICITY && !PERL_OBJECT && !PERL_CAPI */
 270
 271 static stcxt_t Context;
 272 static stcxt_t *Context_ptr = &Context;
 273 #define dSTCXT                  stcxt_t *cxt = Context_ptr
 274 #define INIT_STCXT              dSTCXT
 275 #define SET_STCXT(x)    Context_ptr = x
 276
 277 #endif /* MULTIPLICITY || PERL_OBJECT || PERL_CAPI */
 278
 279 /*
 280  * KNOWN BUG:
 281  *   Croaking implies a memory leak, since we don't use setjmp/longjmp
 282  *   to catch the exit and free memory used during store or retrieve
 283  *   operations.  This is not too difficult to fix, but I need to understand
 284  *   how Perl does it, and croaking is exceptional anyway, so I lack the
 285  *   motivation to do it.
 286  *
 287  * The current workaround is to mark the context as dirty when croaking,
 288  * so that data structures can be freed whenever we renter Storable code
 289  * (but only *then*: it's a workaround, not a fix).
 290  *
 291  * This is also imperfect, because we don't really know how far they trapped
 292  * the croak(), and when we were recursing, we won't be able to clean anything
 293  * but the topmost context stacked.
 294  */
 295
 296 #define CROAK(x)        do { cxt->dirty = 1; croak x; } while (0)
 297
 298 /*
 299  * End of "thread-safe" related definitions.
 300  */
 301
 302 /*
 303  * key buffer handling
 304  */
 305 #define kbuf    (cxt->keybuf).arena
 306 #define ksiz    (cxt->keybuf).asiz
 307 #define KBUFINIT() do {                                 \
 308         if (!kbuf) {                                            \
 309                 TRACEME(("** allocating kbuf of 128 bytes")); \
 310                 New(10003, kbuf, 128, char);    \
 311                 ksiz = 128;                                             \
 312         }                                                                       \
 313 } while (0)
 314 #define KBUFCHK(x) do {                 \
 315         if (x >= ksiz) {                        \
 316                 TRACEME(("** extending kbuf to %d bytes", x+1)); \
 317                 Renew(kbuf, x+1, char); \
 318                 ksiz = x+1;                             \
 319         }                                                       \
 320 } while (0)
 321
 322 /*
 323  * memory buffer handling
 324  */
 325 #define mbase   (cxt->membuf).arena
 326 #define msiz    (cxt->membuf).asiz
 327 #define mptr    (cxt->membuf).aptr
 328 #define mend    (cxt->membuf).aend
 329
 330 #define MGROW   (1 << 13)
 331 #define MMASK   (MGROW - 1)
 332
 333 #define round_mgrow(x)  \
 334         ((unsigned long) (((unsigned long) (x) + MMASK) & ~MMASK))
 335 #define trunc_int(x)    \
 336         ((unsigned long) ((unsigned long) (x) & ~(sizeof(int)-1)))
 337 #define int_aligned(x)  \
 338         ((unsigned long) (x) == trunc_int(x))
 339
 340 #define MBUF_INIT(x) do {                               \
 341         if (!mbase) {                                           \
 342                 TRACEME(("** allocating mbase of %d bytes", MGROW)); \
 343                 New(10003, mbase, MGROW, char); \
 344                 msiz = MGROW;                                   \
 345         }                                                                       \
 346         mptr = mbase;                                           \
 347         if (x)                                                          \
 348                 mend = mbase + x;                               \
 349         else                                                            \
 350                 mend = mbase + msiz;                    \
 351 } while (0)
 352
 353 #define MBUF_TRUNC(x)   mptr = mbase + x
 354 #define MBUF_SIZE()             (mptr - mbase)
 355
 356 /*
 357  * Use SvPOKp(), because SvPOK() fails on tainted scalars.
 358  * See store_scalar() for other usage of this workaround.
 359  */
 360 #define MBUF_LOAD(v) do {                               \
 361         if (!SvPOKp(v))                                         \
 362                 CROAK(("Not a scalar string")); \
 363         mptr = mbase = SvPV(v, msiz);           \
 364         mend = mbase + msiz;                            \
 365 } while (0)
 366
 367 #define MBUF_XTEND(x) do {                      \
 368         int nsz = (int) round_mgrow((x)+msiz);  \
 369         int offset = mptr - mbase;              \
 370         TRACEME(("** extending mbase to %d bytes", nsz));       \
 371         Renew(mbase, nsz, char);                \
 372         msiz = nsz;                                             \
 373         mptr = mbase + offset;                  \
 374         mend = mbase + nsz;                             \
 375 } while (0)
 376
 377 #define MBUF_CHK(x) do {                        \
 378         if ((mptr + (x)) > mend)                \
 379                 MBUF_XTEND(x);                          \
 380 } while (0)
 381
 382 #define MBUF_GETC(x) do {                       \
 383         if (mptr < mend)                                \
 384                 x = (int) (unsigned char) *mptr++;      \
 385         else                                                    \
 386                 return (SV *) 0;                        \
 387 } while (0)
 388
 389 #define MBUF_GETINT(x) do {                             \
 390         if ((mptr + sizeof(int)) <= mend) {     \
 391                 if (int_aligned(mptr))                  \
 392                         x = *(int *) mptr;                      \
 393                 else                                                    \
 394                         memcpy(&x, mptr, sizeof(int));  \
 395                 mptr += sizeof(int);                    \
 396         } else                                                          \
 397                 return (SV *) 0;                                \
 398 } while (0)
 399
 400 #define MBUF_READ(x,s) do {                     \
 401         if ((mptr + (s)) <= mend) {             \
 402                 memcpy(x, mptr, s);                     \
 403                 mptr += s;                                      \
 404         } else                                                  \
 405                 return (SV *) 0;                        \
 406 } while (0)
 407
 408 #define MBUF_SAFEREAD(x,s,z) do {       \
 409         if ((mptr + (s)) <= mend) {             \
 410                 memcpy(x, mptr, s);                     \
 411                 mptr += s;                                      \
 412         } else {                                                \
 413                 sv_free(z);                                     \
 414                 return (SV *) 0;                        \
 415         }                                                               \
 416 } while (0)
 417
 418 #define MBUF_PUTC(c) do {                       \
 419         if (mptr < mend)                                \
 420                 *mptr++ = (char) c;                     \
 421         else {                                                  \
 422                 MBUF_XTEND(1);                          \
 423                 *mptr++ = (char) c;                     \
 424         }                                                               \
 425 } while (0)
 426
 427 #define MBUF_PUTINT(i) do {                     \
 428         MBUF_CHK(sizeof(int));                  \
 429         if (int_aligned(mptr))                  \
 430                 *(int *) mptr = i;                      \
 431         else                                                    \
 432                 memcpy(mptr, &i, sizeof(int));  \
 433         mptr += sizeof(int);                    \
 434 } while (0)
 435
 436 #define MBUF_WRITE(x,s) do {            \
 437         MBUF_CHK(s);                                    \
 438         memcpy(mptr, x, s);                             \
 439         mptr += s;                                              \
 440 } while (0)
 441
 442 /*
 443  * LOW_32BITS
 444  *
 445  * Keep only the low 32 bits of a pointer (used for tags, which are not
 446  * really pointers).
 447  */
 448
 449 #if PTRSIZE <= 4
 450 #define LOW_32BITS(x)   ((I32) (x))
 451 #else
 452 #define LOW_32BITS(x)   ((I32) ((unsigned long) (x) & 0xffffffffUL))
 453 #endif
 454
 455 /*
 456  * Possible return values for sv_type().
 457  */
 458
 459 #define svis_REF                0
 460 #define svis_SCALAR             1
 461 #define svis_ARRAY              2
 462 #define svis_HASH               3
 463 #define svis_TIED               4
 464 #define svis_TIED_ITEM  5
 465 #define svis_OTHER              6
 466
 467 /*
 468  * Flags for SX_HOOK.
 469  */
 470
 471 #define SHF_TYPE_MASK           0x03
 472 #define SHF_LARGE_CLASSLEN      0x04
 473 #define SHF_LARGE_STRLEN        0x08
 474 #define SHF_LARGE_LISTLEN       0x10
 475 #define SHF_IDX_CLASSNAME       0x20
 476 #define SHF_NEED_RECURSE        0x40
 477 #define SHF_HAS_LIST            0x80
 478
 479 /*
 480  * Types for SX_HOOK (2 bits).
 481  */
 482
 483 #define SHT_SCALAR                      0
 484 #define SHT_ARRAY                       1
 485 #define SHT_HASH                        2
 486
 487 /*
 488  * Before 0.6, the magic string was "perl-store" (binary version number 0).
 489  *
 490  * Since 0.6 introduced many binary incompatibilities, the magic string has
 491  * been changed to "pst0" to allow an old image to be properly retrieved by
 492  * a newer Storable, but ensure a newer image cannot be retrieved with an
 493  * older version.
 494  *
 495  * At 0.7, objects are given the ability to serialize themselves, and the
 496  * set of markers is extended, backward compatibility is not jeopardized,
 497  * so the binary version number could have remained unchanged.  To correctly
 498  * spot errors if a file making use of 0.7-specific extensions is given to
 499  * 0.6 for retrieval, the binary version was moved to "2".  And I'm introducing
 500  * a "minor" version, to better track this kind of evolution from now on.
 501  *
 502  */
 503 static char old_magicstr[] = "perl-store";      /* Magic number before 0.6 */
 504 static char magicstr[] = "pst0";                        /* Used as a magic number */
 505
 506 #define STORABLE_BIN_MAJOR      2                               /* Binary major "version" */
 507 #define STORABLE_BIN_MINOR      1                               /* Binary minor "version" */
 508
 509 /*
 510  * Useful store shortcuts...
 511  */
 512
 513 #define PUTMARK(x) do {                                         \
 514         if (!cxt->fio)                                                  \
 515                 MBUF_PUTC(x);                                           \
 516         else if (PerlIO_putc(cxt->fio, x) == EOF)       \
 517                 return -1;                                                      \
 518 } while (0)
 519
 520 #ifdef HAS_HTONL
 521 #define WLEN(x) do {                            \
 522         if (cxt->netorder) {                    \
 523                 int y = (int) htonl(x);         \
 524                 if (!cxt->fio)                          \
 525                         MBUF_PUTINT(y);                 \
 526                 else if (PerlIO_write(cxt->fio, &y, sizeof(y)) != sizeof(y))    \
 527                         return -1;                              \
 528         } else {                                                \
 529                 if (!cxt->fio)                          \
 530                         MBUF_PUTINT(x);                 \
 531                 else if (PerlIO_write(cxt->fio, &x, sizeof(x)) != sizeof(x))    \
 532                         return -1;                              \
 533         }                                                               \
 534 } while (0)
 535 #else
 536 #define WLEN(x) do {                            \
 537         if (!cxt->fio)                                  \
 538                 MBUF_PUTINT(x);                         \
 539         else if (PerlIO_write(cxt->fio, &x, sizeof(x)) != sizeof(x))    \
 540                 return -1;                                      \
 541         } while (0)
 542 #endif
 543
 544 #define WRITE(x,y) do {                                         \
 545         if (!cxt->fio)                                                  \
 546                 MBUF_WRITE(x,y);                                        \
 547         else if (PerlIO_write(cxt->fio, x, y) != y)     \
 548                 return -1;                                                      \
 549         } while (0)
 550
 551 #define STORE_SCALAR(pv, len) do {              \
 552         if (len <= LG_SCALAR) {                         \
 553                 unsigned char clen = (unsigned char) len;       \
 554                 PUTMARK(SX_SCALAR);                             \
 555                 PUTMARK(clen);                                  \
 556                 if (len)                                                \
 557                         WRITE(pv, len);                         \
 558         } else {                                                        \
 559                 PUTMARK(SX_LSCALAR);                    \
 560                 WLEN(len);                                              \
 561                 WRITE(pv, len);                                 \
 562         }                                                                       \
 563 } while (0)
 564
 565 /*
 566  * Store undef in arrays and hashes without recursing through store().
 567  */
 568 #define STORE_UNDEF() do {                              \
 569         cxt->tagnum++;                                          \
 570         PUTMARK(SX_UNDEF);                                      \
 571 } while (0)
 572
 573 /*
 574  * Useful retrieve shortcuts...
 575  */
 576
 577 #define GETCHAR() \
 578         (cxt->fio ? PerlIO_getc(cxt->fio) : (mptr >= mend ? EOF : (int) *mptr++))
 579
 580 #define GETMARK(x) do {                                                 \
 581         if (!cxt->fio)                                                          \
 582                 MBUF_GETC(x);                                                   \
 583         else if ((x = PerlIO_getc(cxt->fio)) == EOF)    \
 584                 return (SV *) 0;                                                \
 585 } while (0)
 586
 587 #ifdef HAS_NTOHL
 588 #define RLEN(x) do {                                    \
 589         if (!cxt->fio)                                          \
 590                 MBUF_GETINT(x);                                 \
 591         else if (PerlIO_read(cxt->fio, &x, sizeof(x)) != sizeof(x))     \
 592                 return (SV *) 0;                                \
 593         if (cxt->netorder)                                      \
 594                 x = (int) ntohl(x);                             \
 595 } while (0)
 596 #else
 597 #define RLEN(x) do {                                    \
 598         if (!cxt->fio)                                          \
 599                 MBUF_GETINT(x);                                 \
 600         else if (PerlIO_read(cxt->fio, &x, sizeof(x)) != sizeof(x))     \
 601                 return (SV *) 0;                                \
 602 } while (0)
 603 #endif
 604
 605 #define READ(x,y) do {                                          \
 606         if (!cxt->fio)                                                  \
 607                 MBUF_READ(x, y);                                        \
 608         else if (PerlIO_read(cxt->fio, x, y) != y)      \
 609                 return (SV *) 0;                                        \
 610 } while (0)
 611
 612 #define SAFEREAD(x,y,z) do {                                    \
 613         if (!cxt->fio)                                                          \
 614                 MBUF_SAFEREAD(x,y,z);                                   \
 615         else if (PerlIO_read(cxt->fio, x, y) != y)       {      \
 616                 sv_free(z);                                                             \
 617                 return (SV *) 0;                                                \
 618         }                                                                                       \
 619 } while (0)
 620
 621 /*
 622  * This macro is used at retrieve time, to remember where object 'y', bearing a
 623  * given tag 'tagnum', has been retrieved. Next time we see an SX_OBJECT marker,
 624  * we'll therefore know where it has been retrieved and will be able to
 625  * share the same reference, as in the original stored memory image.
 626  */
 627 #define SEEN(y) do {                                            \
 628         if (!y)                                                                 \
 629                 return (SV *) 0;                                        \
 630         if (av_store(cxt->aseen, cxt->tagnum++, SvREFCNT_inc(y)) == 0) \
 631                 return (SV *) 0;                                        \
 632         TRACEME(("aseen(#%d) = 0x%"UVxf" (refcnt=%d)", cxt->tagnum-1, \
 633                  PTR2UV(y), SvREFCNT(y)-1)); \
 634 } while (0)
 635
 636 /*
 637  * Bless `s' in `p', via a temporary reference, required by sv_bless().
 638  */
 639 #define BLESS(s,p) do {                                 \
 640         SV *ref;                                                                \
 641         HV *stash;                                                              \
 642         TRACEME(("blessing 0x%"UVxf" in %s", PTR2UV(s), (p))); \
 643         stash = gv_stashpv((p), TRUE);                  \
 644         ref = newRV_noinc(s);                                   \
 645         (void) sv_bless(ref, stash);                    \
 646         SvRV(ref) = 0;                                                  \
 647         SvREFCNT_dec(ref);                                              \
 648 } while (0)
 649
 650 static int store();
 651 static SV *retrieve();
 652
 653 /*
 654  * Dynamic dispatching table for SV store.
 655  */
 656
 657 static int store_ref(stcxt_t *cxt, SV *sv);
 658 static int store_scalar(stcxt_t *cxt, SV *sv);
 659 static int store_array(stcxt_t *cxt, AV *av);
 660 static int store_hash(stcxt_t *cxt, HV *hv);
 661 static int store_tied(stcxt_t *cxt, SV *sv);
 662 static int store_tied_item(stcxt_t *cxt, SV *sv);
 663 static int store_other(stcxt_t *cxt, SV *sv);
 664 static int store_blessed(stcxt_t *cxt, SV *sv, int type, HV *pkg);
 665
 666 static int (*sv_store[])() = {
 667         store_ref,                      /* svis_REF */
 668         store_scalar,           /* svis_SCALAR */
 669         store_array,            /* svis_ARRAY */
 670         store_hash,                     /* svis_HASH */
 671         store_tied,                     /* svis_TIED */
 672         store_tied_item,        /* svis_TIED_ITEM */
 673         store_other,            /* svis_OTHER */
 674 };
 675
 676 #define SV_STORE(x)     (*sv_store[x])
 677
 678 /*
 679  * Dynamic dispatching tables for SV retrieval.
 680  */
 681
 682 static SV *retrieve_lscalar(stcxt_t *cxt);
 683 static SV *old_retrieve_array(stcxt_t *cxt);
 684 static SV *old_retrieve_hash(stcxt_t *cxt);
 685 static SV *retrieve_ref(stcxt_t *cxt);
 686 static SV *retrieve_undef(stcxt_t *cxt);
 687 static SV *retrieve_integer(stcxt_t *cxt);
 688 static SV *retrieve_double(stcxt_t *cxt);
 689 static SV *retrieve_byte(stcxt_t *cxt);
 690 static SV *retrieve_netint(stcxt_t *cxt);
 691 static SV *retrieve_scalar(stcxt_t *cxt);
 692 static SV *retrieve_tied_array(stcxt_t *cxt);
 693 static SV *retrieve_tied_hash(stcxt_t *cxt);
 694 static SV *retrieve_tied_scalar(stcxt_t *cxt);
 695 static SV *retrieve_other(stcxt_t *cxt);
 696
 697 static SV *(*sv_old_retrieve[])() = {
 698         0,                      /* SX_OBJECT -- entry unused dynamically */
 699         retrieve_lscalar,               /* SX_LSCALAR */
 700         old_retrieve_array,             /* SX_ARRAY -- for pre-0.6 binaries */
 701         old_retrieve_hash,              /* SX_HASH -- for pre-0.6 binaries */
 702         retrieve_ref,                   /* SX_REF */
 703         retrieve_undef,                 /* SX_UNDEF */
 704         retrieve_integer,               /* SX_INTEGER */
 705         retrieve_double,                /* SX_DOUBLE */
 706         retrieve_byte,                  /* SX_BYTE */
 707         retrieve_netint,                /* SX_NETINT */
 708         retrieve_scalar,                /* SX_SCALAR */
 709         retrieve_tied_array,    /* SX_ARRAY */
 710         retrieve_tied_hash,             /* SX_HASH */
 711         retrieve_tied_scalar,   /* SX_SCALAR */
 712         retrieve_other,                 /* SX_SV_UNDEF not supported */
 713         retrieve_other,                 /* SX_SV_YES not supported */
 714         retrieve_other,                 /* SX_SV_NO not supported */
 715         retrieve_other,                 /* SX_BLESS not supported */
 716         retrieve_other,                 /* SX_IX_BLESS not supported */
 717         retrieve_other,                 /* SX_HOOK not supported */
 718         retrieve_other,                 /* SX_OVERLOADED not supported */
 719         retrieve_other,                 /* SX_TIED_KEY not supported */
 720         retrieve_other,                 /* SX_TIED_IDX not supported */
 721         retrieve_other,                 /* SX_ERROR */
 722 };
 723
 724 static SV *retrieve_array(stcxt_t *cxt);
 725 static SV *retrieve_hash(stcxt_t *cxt);
 726 static SV *retrieve_sv_undef(stcxt_t *cxt);
 727 static SV *retrieve_sv_yes(stcxt_t *cxt);
 728 static SV *retrieve_sv_no(stcxt_t *cxt);
 729 static SV *retrieve_blessed(stcxt_t *cxt);
 730 static SV *retrieve_idx_blessed(stcxt_t *cxt);
 731 static SV *retrieve_hook(stcxt_t *cxt);
 732 static SV *retrieve_overloaded(stcxt_t *cxt);
 733 static SV *retrieve_tied_key(stcxt_t *cxt);
 734 static SV *retrieve_tied_idx(stcxt_t *cxt);
 735
 736 static SV *(*sv_retrieve[])() = {
 737         0,                      /* SX_OBJECT -- entry unused dynamically */
 738         retrieve_lscalar,               /* SX_LSCALAR */
 739         retrieve_array,                 /* SX_ARRAY */
 740         retrieve_hash,                  /* SX_HASH */
 741         retrieve_ref,                   /* SX_REF */
 742         retrieve_undef,                 /* SX_UNDEF */
 743         retrieve_integer,               /* SX_INTEGER */
 744         retrieve_double,                /* SX_DOUBLE */
 745         retrieve_byte,                  /* SX_BYTE */
 746         retrieve_netint,                /* SX_NETINT */
 747         retrieve_scalar,                /* SX_SCALAR */
 748         retrieve_tied_array,    /* SX_ARRAY */
 749         retrieve_tied_hash,             /* SX_HASH */
 750         retrieve_tied_scalar,   /* SX_SCALAR */
 751         retrieve_sv_undef,              /* SX_SV_UNDEF */
 752         retrieve_sv_yes,                /* SX_SV_YES */
 753         retrieve_sv_no,                 /* SX_SV_NO */
 754         retrieve_blessed,               /* SX_BLESS */
 755         retrieve_idx_blessed,   /* SX_IX_BLESS */
 756         retrieve_hook,                  /* SX_HOOK */
 757         retrieve_overloaded,    /* SX_OVERLOAD */
 758         retrieve_tied_key,              /* SX_TIED_KEY */
 759         retrieve_tied_idx,              /* SX_TIED_IDX */
 760         retrieve_other,                 /* SX_ERROR */
 761 };
 762
 763 #define RETRIEVE(c,x) (*(c)->retrieve_vtbl[(x) >= SX_ERROR ? SX_ERROR : (x)])
 764
 765 static SV *mbuf2sv(void);
 766
 767 /***
 768  *** Context management.
 769  ***/
 770
 771 /*
 772  * init_perinterp
 773  *
 774  * Called once per "thread" (interpreter) to initialize some global context.
 775  */
 776 static void init_perinterp(void)
 777 {
 778     INIT_STCXT;
 779
 780     cxt->netorder = 0;          /* true if network order used */
 781     cxt->forgive_me = -1;       /* whether to be forgiving... */
 782 }
 783
 784 /*
 785  * init_store_context
 786  *
 787  * Initialize a new store context for real recursion.
 788  */
 789 static void init_store_context(
 790         stcxt_t *cxt,
 791         PerlIO *f,
 792         int optype,
 793         int network_order)
 794 {
 795         TRACEME(("init_store_context"));
 796
 797         cxt->netorder = network_order;
 798         cxt->forgive_me = -1;                   /* Fetched from perl if needed */
 799         cxt->canonical = -1;                    /* Idem */
 800         cxt->tagnum = -1;                               /* Reset tag numbers */
 801         cxt->classnum = -1;                             /* Reset class numbers */
 802         cxt->fio = f;                                   /* Where I/O are performed */
 803         cxt->optype = optype;                   /* A store, or a deep clone */
 804         cxt->entry = 1;                                 /* No recursion yet */
 805
 806         /*
 807          * The `hseen' table is used to keep track of each SV stored and their
 808          * associated tag numbers is special. It is "abused" because the
 809          * values stored are not real SV, just integers cast to (SV *),
 810          * which explains the freeing below.
 811          *
 812          * It is also one possible bottlneck to achieve good storing speed,
 813          * so the "shared keys" optimization is turned off (unlikely to be
 814          * of any use here), and the hash table is "pre-extended". Together,
 815          * those optimizations increase the throughput by 12%.
 816          */
 817
 818         cxt->hseen = newHV();                   /* Table where seen objects are stored */
 819         HvSHAREKEYS_off(cxt->hseen);
 820
 821         /*
 822          * The following does not work well with perl5.004_04, and causes
 823          * a core dump later on, in a completely unrelated spot, which
 824          * makes me think there is a memory corruption going on.
 825          *
 826          * Calling hv_ksplit(hseen, HBUCKETS) instead of manually hacking
 827          * it below does not make any difference. It seems to work fine
 828          * with perl5.004_68 but given the probable nature of the bug,
 829          * that does not prove anything.
 830          *
 831          * It's a shame because increasing the amount of buckets raises
 832          * store() throughput by 5%, but until I figure this out, I can't
 833          * allow for this to go into production.
 834          *
 835          * It is reported fixed in 5.005, hence the #if.
 836          */
 837 #if PERL_VERSION >= 5
 838 #define HBUCKETS        4096                            /* Buckets for %hseen */
 839         HvMAX(cxt->hseen) = HBUCKETS - 1;       /* keys %hseen = $HBUCKETS; */
 840 #endif
 841
 842         /*
 843          * The `hclass' hash uses the same settings as `hseen' above, but it is
 844          * used to assign sequential tags (numbers) to class names for blessed
 845          * objects.
 846          *
 847          * We turn the shared key optimization on.
 848          */
 849
 850         cxt->hclass = newHV();                  /* Where seen classnames are stored */
 851
 852 #if PERL_VERSION >= 5
 853         HvMAX(cxt->hclass) = HBUCKETS - 1;      /* keys %hclass = $HBUCKETS; */
 854 #endif
 855
 856         /*
 857          * The `hook' hash table is used to keep track of the references on
 858          * the STORABLE_freeze hook routines, when found in some class name.
 859          *
 860          * It is assumed that the inheritance tree will not be changed during
 861          * storing, and that no new method will be dynamically created by the
 862          * hooks.
 863          */
 864
 865         cxt->hook = newHV();                    /* Table where hooks are cached */
 866 }
 867
 868 /*
 869  * clean_store_context
 870  *
 871  * Clean store context by
 872  */
 873 static void clean_store_context(stcxt_t *cxt)
 874 {
 875         HE *he;
 876
 877         TRACEME(("clean_store_context"));
 878
 879         ASSERT(cxt->optype & ST_STORE, ("was performing a store()"));
 880
 881         /*
 882          * Insert real values into hashes where we stored faked pointers.
 883          */
 884
 885         hv_iterinit(cxt->hseen);
 886         while (he = hv_iternext(cxt->hseen))
 887                 HeVAL(he) = &PL_sv_undef;
 888
 889         hv_iterinit(cxt->hclass);
 890         while (he = hv_iternext(cxt->hclass))
 891                 HeVAL(he) = &PL_sv_undef;
 892
 893         /*
 894          * And now dispose of them...
 895          */
 896
 897         hv_undef(cxt->hseen);
 898         sv_free((SV *) cxt->hseen);
 899
 900         hv_undef(cxt->hclass);
 901         sv_free((SV *) cxt->hclass);
 902
 903         hv_undef(cxt->hook);
 904         sv_free((SV *) cxt->hook);
 905
 906         cxt->entry = 0;
 907         cxt->dirty = 0;
 908 }
 909
 910 /*
 911  * init_retrieve_context
 912  *
 913  * Initialize a new retrieve context for real recursion.
 914  */
 915 static void init_retrieve_context(cxt, optype)
 916 stcxt_t *cxt;
 917 int optype;
 918 {
 919         TRACEME(("init_retrieve_context"));
 920
 921         /*
 922          * The hook hash table is used to keep track of the references on
 923          * the STORABLE_thaw hook routines, when found in some class name.
 924          *
 925          * It is assumed that the inheritance tree will not be changed during
 926          * storing, and that no new method will be dynamically created by the
 927          * hooks.
 928          */
 929
 930         cxt->hook  = newHV();                   /* Caches STORABLE_thaw */
 931
 932         /*
 933          * If retrieving an old binary version, the cxt->retrieve_vtbl variable
 934          * was set to sv_old_retrieve. We'll need a hash table to keep track of
 935          * the correspondance between the tags and the tag number used by the
 936          * new retrieve routines.
 937          */
 938
 939         cxt->hseen = (cxt->retrieve_vtbl == sv_old_retrieve) ? newHV() : 0;
 940
 941         cxt->aseen = newAV();                   /* Where retrieved objects are kept */
 942         cxt->aclass = newAV();                  /* Where seen classnames are kept */
 943         cxt->tagnum = 0;                                /* Have to count objects... */
 944         cxt->classnum = 0;                              /* ...and class names as well */
 945         cxt->optype = optype;
 946         cxt->entry = 1;                                 /* No recursion yet */
 947 }
 948
 949 /*
 950  * clean_retrieve_context
 951  *
 952  * Clean retrieve context by
 953  */
 954 static void clean_retrieve_context(cxt)
 955 stcxt_t *cxt;
 956 {
 957         TRACEME(("clean_retrieve_context"));
 958
 959         ASSERT(cxt->optype & ST_RETRIEVE, ("was performing a retrieve()"));
 960
 961         av_undef(cxt->aseen);
 962         sv_free((SV *) cxt->aseen);
 963
 964         av_undef(cxt->aclass);
 965         sv_free((SV *) cxt->aclass);
 966
 967         hv_undef(cxt->hook);
 968         sv_free((SV *) cxt->hook);
 969
 970         if (cxt->hseen)
 971                 sv_free((SV *) cxt->hseen);             /* optional HV, for backward compat. */
 972
 973         cxt->entry = 0;
 974         cxt->dirty = 0;
 975 }
 976
 977 /*
 978  * clean_context
 979  *
 980  * A workaround for the CROAK bug: cleanup the last context.
 981  */
 982 static void clean_context(cxt)
 983 stcxt_t *cxt;
 984 {
 985         TRACEME(("clean_context"));
 986
 987         ASSERT(cxt->dirty, ("dirty context"));
 988
 989         if (cxt->optype & ST_RETRIEVE)
 990                 clean_retrieve_context(cxt);
 991         else
 992                 clean_store_context(cxt);
 993 }
 994
 995 /*
 996  * allocate_context
 997  *
 998  * Allocate a new context and push it on top of the parent one.
 999  * This new context is made globally visible via SET_STCXT().
1000  */
1001 static stcxt_t *allocate_context(parent_cxt)
1002 stcxt_t *parent_cxt;
1003 {
1004         stcxt_t *cxt;
1005
1006         TRACEME(("allocate_context"));
1007
1008         ASSERT(!parent_cxt->dirty, ("parent context clean"));
1009
1010         Newz(0, cxt, 1, stcxt_t);
1011         cxt->prev = parent_cxt;
1012         SET_STCXT(cxt);
1013
1014         return cxt;
1015 }
1016
1017 /*
1018  * free_context
1019  *
1020  * Free current context, which cannot be the "root" one.
1021  * Make the context underneath globally visible via SET_STCXT().
1022  */
1023 static void free_context(cxt)
1024 stcxt_t *cxt;
1025 {
1026         stcxt_t *prev = cxt->prev;
1027
1028         TRACEME(("free_context"));
1029
1030         ASSERT(!cxt->dirty, ("clean context"));
1031         ASSERT(prev, ("not freeing root context"));
1032
1033         if (kbuf)
1034                 Safefree(kbuf);
1035         if (mbase)
1036                 Safefree(mbase);
1037
1038         Safefree(cxt);
1039         SET_STCXT(prev);
1040 }
1041
1042 /***
1043  *** Predicates.
1044  ***/
1045
1046 /*
1047  * is_storing
1048  *
1049  * Tells whether we're in the middle of a store operation.
1050  */
1051 int is_storing(void)
1052 {
1053         dSTCXT;
1054
1055         return cxt->entry && (cxt->optype & ST_STORE);
1056 }
1057
1058 /*
1059  * is_retrieving
1060  *
1061  * Tells whether we're in the middle of a retrieve operation.
1062  */
1063 int is_retrieving(void)
1064 {
1065         dSTCXT;
1066
1067         return cxt->entry && (cxt->optype & ST_RETRIEVE);
1068 }
1069
1070 /*
1071  * last_op_in_netorder
1072  *
1073  * Returns whether last operation was made using network order.
1074  *
1075  * This is typically out-of-band information that might prove useful
1076  * to people wishing to convert native to network order data when used.
1077  */
1078 int last_op_in_netorder(void)
1079 {
1080         dSTCXT;
1081
1082         return cxt->netorder;
1083 }
1084
1085 /***
1086  *** Hook lookup and calling routines.
1087  ***/
1088
1089 /*
1090  * pkg_fetchmeth
1091  *
1092  * A wrapper on gv_fetchmethod_autoload() which caches results.
1093  *
1094  * Returns the routine reference as an SV*, or null if neither the package
1095  * nor its ancestors know about the method.
1096  */
1097 static SV *pkg_fetchmeth(
1098         HV *cache,
1099         HV *pkg,
1100         char *method)
1101 {
1102         GV *gv;
1103         SV *sv;
1104         SV **svh;
1105
1106         /*
1107          * The following code is the same as the one performed by UNIVERSAL::can
1108          * in the Perl core.
1109          */
1110
1111         gv = gv_fetchmethod_autoload(pkg, method, FALSE);
1112         if (gv && isGV(gv)) {
1113                 sv = newRV((SV*) GvCV(gv));
1114                 TRACEME(("%s->%s: 0x%"UVxf,
1115                          HvNAME(pkg), method,
1116                          PTR2UV(sv)));
1117         } else {
1118                 sv = newSVsv(&PL_sv_undef);
1119                 TRACEME(("%s->%s: not found", HvNAME(pkg), method));
1120         }
1121
1122         /*
1123          * Cache the result, ignoring failure: if we can't store the value,
1124          * it just won't be cached.
1125          */
1126
1127         (void) hv_store(cache, HvNAME(pkg), strlen(HvNAME(pkg)), sv, 0);
1128
1129         return SvOK(sv) ? sv : (SV *) 0;
1130 }
1131
1132 /*
1133  * pkg_hide
1134  *
1135  * Force cached value to be undef: hook ignored even if present.
1136  */
1137 static void pkg_hide(
1138         HV *cache,
1139         HV *pkg,
1140         char *method)
1141 {
1142         (void) hv_store(cache,
1143                 HvNAME(pkg), strlen(HvNAME(pkg)), newSVsv(&PL_sv_undef), 0);
1144 }
1145
1146 /*
1147  * pkg_can
1148  *
1149  * Our own "UNIVERSAL::can", which caches results.
1150  *
1151  * Returns the routine reference as an SV*, or null if the object does not
1152  * know about the method.
1153  */
1154 static SV *pkg_can(
1155         HV *cache,
1156         HV *pkg,
1157         char *method)
1158 {
1159         SV **svh;
1160         SV *sv;
1161
1162         TRACEME(("pkg_can for %s->%s", HvNAME(pkg), method));
1163
1164         /*
1165          * Look into the cache to see whether we already have determined
1166          * where the routine was, if any.
1167          *
1168          * NOTA BENE: we don't use `method' at all in our lookup, since we know
1169          * that only one hook (i.e. always the same) is cached in a given cache.
1170          */
1171
1172         svh = hv_fetch(cache, HvNAME(pkg), strlen(HvNAME(pkg)), FALSE);
1173         if (svh) {
1174                 sv = *svh;
1175                 if (!SvOK(sv)) {
1176                         TRACEME(("cached %s->%s: not found", HvNAME(pkg), method));
1177                         return (SV *) 0;
1178                 } else {
1179                         TRACEME(("cached %s->%s: 0x%"UVxf,
1180                                  HvNAME(pkg), method,
1181                                  PTR2UV(sv)));
1182                         return sv;
1183                 }
1184         }
1185
1186         TRACEME(("not cached yet"));
1187         return pkg_fetchmeth(cache, pkg, method);               /* Fetch and cache */
1188 }
1189
1190 /*
1191  * scalar_call
1192  *
1193  * Call routine as obj->hook(av) in scalar context.
1194  * Propagates the single returned value if not called in void context.
1195  */
1196 static SV *scalar_call(
1197         SV *obj,
1198         SV *hook,
1199         int cloning,
1200         AV *av,
1201         I32 flags)
1202 {
1203         dSP;
1204         int count;
1205         SV *sv = 0;
1206
1207         TRACEME(("scalar_call (cloning=%d)", cloning));
1208
1209         ENTER;
1210         SAVETMPS;
1211
1212         PUSHMARK(sp);
1213         XPUSHs(obj);
1214         XPUSHs(sv_2mortal(newSViv(cloning)));           /* Cloning flag */
1215         if (av) {
1216                 SV **ary = AvARRAY(av);
1217                 int cnt = AvFILLp(av) + 1;
1218                 int i;
1219                 XPUSHs(ary[0]);                                                 /* Frozen string */
1220                 for (i = 1; i < cnt; i++) {
1221                         TRACEME(("pushing arg #%d (0x%"UVxf")...",
1222                                  i, PTR2UV(ary[i])));
1223                         XPUSHs(sv_2mortal(newRV(ary[i])));
1224                 }
1225         }
1226         PUTBACK;
1227
1228         TRACEME(("calling..."));
1229         count = perl_call_sv(hook, flags);              /* Go back to Perl code */
1230         TRACEME(("count = %d", count));
1231
1232         SPAGAIN;
1233
1234         if (count) {
1235                 sv = POPs;
1236                 SvREFCNT_inc(sv);               /* We're returning it, must stay alive! */
1237         }
1238
1239         PUTBACK;
1240         FREETMPS;
1241         LEAVE;
1242
1243         return sv;
1244 }
1245
1246 /*
1247  * array_call
1248  *
1249  * Call routine obj->hook(cloning) in list context.
1250  * Returns the list of returned values in an array.
1251  */
1252 static AV *array_call(
1253         SV *obj,
1254         SV *hook,
1255         int cloning)
1256 {
1257         dSP;
1258         int count;
1259         AV *av;
1260         int i;
1261
1262         TRACEME(("array_call (cloning=%d)", cloning));
1263
1264         ENTER;
1265         SAVETMPS;
1266
1267         PUSHMARK(sp);
1268         XPUSHs(obj);                                                            /* Target object */
1269         XPUSHs(sv_2mortal(newSViv(cloning)));           /* Cloning flag */
1270         PUTBACK;
1271
1272         count = perl_call_sv(hook, G_ARRAY);            /* Go back to Perl code */
1273
1274         SPAGAIN;
1275
1276         av = newAV();
1277         for (i = count - 1; i >= 0; i--) {
1278                 SV *sv = POPs;
1279                 av_store(av, i, SvREFCNT_inc(sv));
1280         }
1281
1282         PUTBACK;
1283         FREETMPS;
1284         LEAVE;
1285
1286         return av;
1287 }
1288
1289 /*
1290  * known_class
1291  *
1292  * Lookup the class name in the `hclass' table and either assign it a new ID
1293  * or return the existing one, by filling in `classnum'.
1294  *
1295  * Return true if the class was known, false if the ID was just generated.
1296  */
1297 static int known_class(
1298         stcxt_t *cxt,
1299         char *name,             /* Class name */
1300         int len,                /* Name length */
1301         I32 *classnum)
1302 {
1303         SV **svh;
1304         HV *hclass = cxt->hclass;
1305
1306         TRACEME(("known_class (%s)", name));
1307
1308         /*
1309          * Recall that we don't store pointers in this hash table, but tags.
1310          * Therefore, we need LOW_32BITS() to extract the relevant parts.
1311          */
1312
1313         svh = hv_fetch(hclass, name, len, FALSE);
1314         if (svh) {
1315                 *classnum = LOW_32BITS(*svh);
1316                 return TRUE;
1317         }
1318
1319         /*
1320          * Unknown classname, we need to record it.
1321          */
1322
1323         cxt->classnum++;
1324         if (!hv_store(hclass, name, len, INT2PTR(SV*, cxt->classnum), 0))
1325                 CROAK(("Unable to record new classname"));
1326
1327         *classnum = cxt->classnum;
1328         return FALSE;
1329 }
1330
1331 /***
1332  *** Sepcific store routines.
1333  ***/
1334
1335 /*
1336  * store_ref
1337  *
1338  * Store a reference.
1339  * Layout is SX_REF <object> or SX_OVERLOAD <object>.
1340  */
1341 static int store_ref(stcxt_t *cxt, SV *sv)
1342 {
1343         TRACEME(("store_ref (0x%"UVxf")", PTR2UV(sv)));
1344
1345         /*
1346          * Follow reference, and check if target is overloaded.
1347          */
1348
1349         sv = SvRV(sv);
1350
1351         if (SvOBJECT(sv)) {
1352                 HV *stash = (HV *) SvSTASH(sv);
1353                 if (stash && Gv_AMG(stash)) {
1354                         TRACEME(("ref (0x%"UVxf") is overloaded",
1355                                  PTR2UV(sv)));
1356                         PUTMARK(SX_OVERLOAD);
1357                 } else
1358                         PUTMARK(SX_REF);
1359         } else
1360                 PUTMARK(SX_REF);
1361
1362         return store(cxt, sv);
1363 }
1364
1365 /*
1366  * store_scalar
1367  *
1368  * Store a scalar.
1369  *
1370  * Layout is SX_LSCALAR <length> <data>, SX_SCALAR <lenght> <data> or SX_UNDEF.
1371  * The <data> section is omitted if <length> is 0.
1372  *
1373  * If integer or double, the layout is SX_INTEGER <data> or SX_DOUBLE <data>.
1374  * Small integers (within [-127, +127]) are stored as SX_BYTE <byte>.
1375  */
1376 static int store_scalar(stcxt_t *cxt, SV *sv)
1377 {
1378         IV iv;
1379         char *pv;
1380         STRLEN len;
1381         U32 flags = SvFLAGS(sv);                        /* "cc -O" may put it in register */
1382
1383         TRACEME(("store_scalar (0x%"UVxf")", PTR2UV(sv)));
1384
1385         /*
1386          * For efficiency, break the SV encapsulation by peaking at the flags
1387          * directly without using the Perl macros to avoid dereferencing
1388          * sv->sv_flags each time we wish to check the flags.
1389          */
1390
1391         if (!(flags & SVf_OK)) {                        /* !SvOK(sv) */
1392                 if (sv == &PL_sv_undef) {
1393                         TRACEME(("immortal undef"));
1394                         PUTMARK(SX_SV_UNDEF);
1395                 } else {
1396                         TRACEME(("undef at 0x%x", sv));
1397                         PUTMARK(SX_UNDEF);
1398                 }
1399                 return 0;
1400         }
1401
1402         /*
1403          * Always store the string representation of a scalar if it exists.
1404          * Gisle Aas provided me with this test case, better than a long speach:
1405          *
1406          *  perl -MDevel::Peek -le '$a="abc"; $a+0; Dump($a)'
1407          *  SV = PVNV(0x80c8520)
1408          *       REFCNT = 1
1409          *       FLAGS = (NOK,POK,pNOK,pPOK)
1410          *       IV = 0
1411          *       NV = 0
1412          *       PV = 0x80c83d0 "abc"\0
1413          *       CUR = 3
1414          *       LEN = 4
1415          *
1416          * Write SX_SCALAR, length, followed by the actual data.
1417          *
1418          * Otherwise, write an SX_BYTE, SX_INTEGER or an SX_DOUBLE as
1419          * appropriate, followed by the actual (binary) data. A double
1420          * is written as a string if network order, for portability.
1421          *
1422          * NOTE: instead of using SvNOK(sv), we test for SvNOKp(sv).
1423          * The reason is that when the scalar value is tainted, the SvNOK(sv)
1424          * value is false.
1425          *
1426          * The test for a read-only scalar with both POK and NOK set is meant
1427          * to quickly detect &PL_sv_yes and &PL_sv_no without having to pay the
1428          * address comparison for each scalar we store.
1429          */
1430
1431 #define SV_MAYBE_IMMORTAL (SVf_READONLY|SVf_POK|SVf_NOK)
1432
1433         if ((flags & SV_MAYBE_IMMORTAL) == SV_MAYBE_IMMORTAL) {
1434                 if (sv == &PL_sv_yes) {
1435                         TRACEME(("immortal yes"));
1436                         PUTMARK(SX_SV_YES);
1437                 } else if (sv == &PL_sv_no) {
1438                         TRACEME(("immortal no"));
1439                         PUTMARK(SX_SV_NO);
1440                 } else {
1441                         pv = SvPV(sv, len);                     /* We know it's SvPOK */
1442                         goto string;                            /* Share code below */
1443                 }
1444         } else if (flags & SVp_POK) {           /* SvPOKp(sv) => string */
1445                 pv = SvPV(sv, len);
1446
1447                 /*
1448                  * Will come here from below with pv and len set if double & netorder,
1449                  * or from above if it was readonly, POK and NOK but neither &PL_sv_yes
1450                  * nor &PL_sv_no.
1451                  */
1452         string:
1453
1454                 STORE_SCALAR(pv, len);
1455                 TRACEME(("ok (scalar 0x%"UVxf" '%s', length = %d)",
1456                          PTR2UV(sv), SvPVX(sv), len));
1457
1458         } else if (flags & SVp_NOK) {           /* SvNOKp(sv) => double */
1459                 NV nv = SvNV(sv);
1460
1461                 /*
1462                  * Watch for number being an integer in disguise.
1463                  */
1464                 if (nv == (NV) (iv = I_V(nv))) {
1465                         TRACEME(("double %"NVff" is actually integer %ld", nv, iv));
1466                         goto integer;           /* Share code below */
1467                 }
1468
1469                 if (cxt->netorder) {
1470                         TRACEME(("double %"NVff" stored as string", nv));
1471                         pv = SvPV(sv, len);
1472                         goto string;            /* Share code above */
1473                 }
1474
1475                 PUTMARK(SX_DOUBLE);
1476                 WRITE(&nv, sizeof(nv));
1477
1478                 TRACEME(("ok (double 0x%"UVxf", value = %"NVff")",
1479                          PTR2UV(sv), nv));
1480
1481         } else if (flags & SVp_IOK) {           /* SvIOKp(sv) => integer */
1482                 iv = SvIV(sv);
1483
1484                 /*
1485                  * Will come here from above with iv set if double is an integer.
1486                  */
1487         integer:
1488
1489                 /*
1490                  * Optimize small integers into a single byte, otherwise store as
1491                  * a real integer (converted into network order if they asked).
1492                  */
1493
1494                 if (iv >= -128 && iv <= 127) {
1495                         unsigned char siv = (unsigned char) (iv + 128); /* [0,255] */
1496                         PUTMARK(SX_BYTE);
1497                         PUTMARK(siv);
1498                         TRACEME(("small integer stored as %d", siv));
1499                 } else if (cxt->netorder) {
1500                         int niv;
1501 #ifdef HAS_HTONL
1502                         niv = (int) htonl(iv);
1503                         TRACEME(("using network order"));
1504 #else
1505                         niv = (int) iv;
1506                         TRACEME(("as-is for network order"));
1507 #endif
1508                         PUTMARK(SX_NETINT);
1509                         WRITE(&niv, sizeof(niv));
1510                 } else {
1511                         PUTMARK(SX_INTEGER);
1512                         WRITE(&iv, sizeof(iv));
1513                 }
1514
1515                 TRACEME(("ok (integer 0x%"UVxf", value = %d)",
1516                          PTR2UV(sv), iv));
1517
1518         } else
1519                 CROAK(("Can't determine type of %s(0x%"UVxf")",
1520                        sv_reftype(sv, FALSE),
1521                        PTR2UV(sv)));
1522
1523         return 0;               /* Ok, no recursion on scalars */
1524 }
1525
1526 /*
1527  * store_array
1528  *
1529  * Store an array.
1530  *
1531  * Layout is SX_ARRAY <size> followed by each item, in increading index order.
1532  * Each item is stored as <object>.
1533  */
1534 static int store_array(stcxt_t *cxt, AV *av)
1535 {
1536         SV **sav;
1537         I32 len = av_len(av) + 1;
1538         I32 i;
1539         int ret;
1540
1541         TRACEME(("store_array (0x%"UVxf")", PTR2UV(av)));
1542
1543         /*
1544          * Signal array by emitting SX_ARRAY, followed by the array length.
1545          */
1546
1547         PUTMARK(SX_ARRAY);
1548         WLEN(len);
1549         TRACEME(("size = %d", len));
1550
1551         /*
1552          * Now store each item recursively.
1553          */
1554
1555         for (i = 0; i < len; i++) {
1556                 sav = av_fetch(av, i, 0);
1557                 if (!sav) {
1558                         TRACEME(("(#%d) undef item", i));
1559                         STORE_UNDEF();
1560                         continue;
1561                 }
1562                 TRACEME(("(#%d) item", i));
1563                 if (ret = store(cxt, *sav))
1564                         return ret;
1565         }
1566
1567         TRACEME(("ok (array)"));
1568
1569         return 0;
1570 }
1571
1572 /*
1573  * sortcmp
1574  *
1575  * Sort two SVs
1576  * Borrowed from perl source file pp_ctl.c, where it is used by pp_sort.
1577  */
1578 static int
1579 sortcmp(const void *a, const void *b)
1580 {
1581         return sv_cmp(*(SV * const *) a, *(SV * const *) b);
1582 }
1583
1584
1585 /*
1586  * store_hash
1587  *
1588  * Store an hash table.
1589  *
1590  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
1591  * Values are stored as <object>.
1592  * Keys are stored as <length> <data>, the <data> section being omitted
1593  * if length is 0.
1594  */
1595 static int store_hash(stcxt_t *cxt, HV *hv)
1596 {
1597         I32 len = HvKEYS(hv);
1598         I32 i;
1599         int ret = 0;
1600         I32 riter;
1601         HE *eiter;
1602
1603         TRACEME(("store_hash (0x%"UVxf")", PTR2UV(hv)));
1604
1605         /*
1606          * Signal hash by emitting SX_HASH, followed by the table length.
1607          */
1608
1609         PUTMARK(SX_HASH);
1610         WLEN(len);
1611         TRACEME(("size = %d", len));
1612
1613         /*
1614          * Save possible iteration state via each() on that table.
1615          */
1616
1617         riter = HvRITER(hv);
1618         eiter = HvEITER(hv);
1619         hv_iterinit(hv);
1620
1621         /*
1622          * Now store each item recursively.
1623          *
1624      * If canonical is defined to some true value then store each
1625      * key/value pair in sorted order otherwise the order is random.
1626          * Canonical order is irrelevant when a deep clone operation is performed.
1627          *
1628          * Fetch the value from perl only once per store() operation, and only
1629          * when needed.
1630          */
1631
1632         if (
1633                 !(cxt->optype & ST_CLONE) && (cxt->canonical == 1 ||
1634                 (cxt->canonical < 0 && (cxt->canonical =
1635                         SvTRUE(perl_get_sv("Storable::canonical", TRUE)) ? 1 : 0)))
1636         ) {
1637                 /*
1638                  * Storing in order, sorted by key.
1639                  * Run through the hash, building up an array of keys in a
1640                  * mortal array, sort the array and then run through the
1641                  * array.
1642                  */
1643
1644                 AV *av = newAV();
1645
1646                 TRACEME(("using canonical order"));
1647
1648                 for (i = 0; i < len; i++) {
1649                         HE *he = hv_iternext(hv);
1650                         SV *key = hv_iterkeysv(he);
1651                         av_store(av, AvFILLp(av)+1, key);       /* av_push(), really */
1652                 }
1653
1654                 qsort((char *) AvARRAY(av), len, sizeof(SV *), sortcmp);
1655
1656                 for (i = 0; i < len; i++) {
1657                         char *keyval;
1658                         I32 keylen;
1659                         SV *key = av_shift(av);
1660                         HE *he  = hv_fetch_ent(hv, key, 0, 0);
1661                         SV *val = HeVAL(he);
1662                         if (val == 0)
1663                                 return 1;               /* Internal error, not I/O error */
1664
1665                         /*
1666                          * Store value first.
1667                          */
1668
1669                         TRACEME(("(#%d) value 0x%"UVxf,
1670                                  PTR2UV(val)));
1671
1672                         if (ret = store(cxt, val))
1673                                 goto out;
1674
1675                         /*
1676                          * Write key string.
1677                          * Keys are written after values to make sure retrieval
1678                          * can be optimal in terms of memory usage, where keys are
1679                          * read into a fixed unique buffer called kbuf.
1680                          * See retrieve_hash() for details.
1681                          */
1682
1683                         keyval = hv_iterkey(he, &keylen);
1684                         TRACEME(("(#%d) key '%s'", i, keyval));
1685                         WLEN(keylen);
1686                         if (keylen)
1687                                 WRITE(keyval, keylen);
1688                 }
1689
1690                 /*
1691                  * Free up the temporary array
1692                  */
1693
1694                 av_undef(av);
1695                 sv_free((SV *) av);
1696
1697         } else {
1698
1699                 /*
1700                  * Storing in "random" order (in the order the keys are stored
1701                  * within the the hash).  This is the default and will be faster!
1702                  */
1703
1704                 for (i = 0; i < len; i++) {
1705                         char *key;
1706                         I32 len;
1707                         SV *val = hv_iternextsv(hv, &key, &len);
1708
1709                         if (val == 0)
1710                                 return 1;               /* Internal error, not I/O error */
1711
1712                         /*
1713                          * Store value first.
1714                          */
1715
1716                         TRACEME(("(#%d) value 0x%"UVxf,
1717                                  i, PTR2UV(val)));
1718
1719                         if (ret = store(cxt, val))
1720                                 goto out;
1721
1722                         /*
1723                          * Write key string.
1724                          * Keys are written after values to make sure retrieval
1725                          * can be optimal in terms of memory usage, where keys are
1726                          * read into a fixed unique buffer called kbuf.
1727                          * See retrieve_hash() for details.
1728                          */
1729
1730                         TRACEME(("(#%d) key '%s'", i, key));
1731                         WLEN(len);
1732                         if (len)
1733                                 WRITE(key, len);
1734                 }
1735     }
1736
1737         TRACEME(("ok (hash 0x%"UVxf")", PTR2UV(hv)));
1738
1739 out:
1740         HvRITER(hv) = riter;            /* Restore hash iterator state */
1741         HvEITER(hv) = eiter;
1742
1743         return ret;
1744 }
1745
1746 /*
1747  * store_tied
1748  *
1749  * When storing a tied object (be it a tied scalar, array or hash), we lay out
1750  * a special mark, followed by the underlying tied object. For instance, when
1751  * dealing with a tied hash, we store SX_TIED_HASH <hash object>, where
1752  * <hash object> stands for the serialization of the tied hash.
1753  */
1754 static int store_tied(stcxt_t *cxt, SV *sv)
1755 {
1756         MAGIC *mg;
1757         int ret = 0;
1758         int svt = SvTYPE(sv);
1759         char mtype = 'P';
1760
1761         TRACEME(("store_tied (0x%"UVxf")", PTR2UV(sv)));
1762
1763         /*
1764          * We have a small run-time penalty here because we chose to factorise
1765          * all tieds objects into the same routine, and not have a store_tied_hash,
1766          * a store_tied_array, etc...
1767          *
1768          * Don't use a switch() statement, as most compilers don't optimize that
1769          * well for 2/3 values. An if() else if() cascade is just fine. We put
1770          * tied hashes first, as they are the most likely beasts.
1771          */
1772
1773         if (svt == SVt_PVHV) {
1774                 TRACEME(("tied hash"));
1775                 PUTMARK(SX_TIED_HASH);                  /* Introduces tied hash */
1776         } else if (svt == SVt_PVAV) {
1777                 TRACEME(("tied array"));
1778                 PUTMARK(SX_TIED_ARRAY);                 /* Introduces tied array */
1779         } else {
1780                 TRACEME(("tied scalar"));
1781                 PUTMARK(SX_TIED_SCALAR);                /* Introduces tied scalar */
1782                 mtype = 'q';
1783         }
1784
1785         if (!(mg = mg_find(sv, mtype)))
1786                 CROAK(("No magic '%c' found while storing tied %s", mtype,
1787                         (svt == SVt_PVHV) ? "hash" :
1788                                 (svt == SVt_PVAV) ? "array" : "scalar"));
1789
1790         /*
1791          * The mg->mg_obj found by mg_find() above actually points to the
1792          * underlying tied Perl object implementation. For instance, if the
1793          * original SV was that of a tied array, then mg->mg_obj is an AV.
1794          *
1795          * Note that we store the Perl object as-is. We don't call its FETCH
1796          * method along the way. At retrieval time, we won't call its STORE
1797          * method either, but the tieing magic will be re-installed. In itself,
1798          * that ensures that the tieing semantics are preserved since futher
1799          * accesses on the retrieved object will indeed call the magic methods...
1800          */
1801
1802         if (ret = store(cxt, mg->mg_obj))
1803                 return ret;
1804
1805         TRACEME(("ok (tied)"));
1806
1807         return 0;
1808 }
1809
1810 /*
1811  * store_tied_item
1812  *
1813  * Stores a reference to an item within a tied structure:
1814  *
1815  *  . \$h{key}, stores both the (tied %h) object and 'key'.
1816  *  . \$a[idx], stores both the (tied @a) object and 'idx'.
1817  *
1818  * Layout is therefore either:
1819  *     SX_TIED_KEY <object> <key>
1820  *     SX_TIED_IDX <object> <index>
1821  */
1822 static int store_tied_item(stcxt_t *cxt, SV *sv)
1823 {
1824         MAGIC *mg;
1825         int ret;
1826
1827         TRACEME(("store_tied_item (0x%"UVxf")", PTR2UV(sv)));
1828
1829         if (!(mg = mg_find(sv, 'p')))
1830                 CROAK(("No magic 'p' found while storing reference to tied item"));
1831
1832         /*
1833          * We discriminate between \$h{key} and \$a[idx] via mg_ptr.
1834          */
1835
1836         if (mg->mg_ptr) {
1837                 TRACEME(("store_tied_item: storing a ref to a tied hash item"));
1838                 PUTMARK(SX_TIED_KEY);
1839                 TRACEME(("store_tied_item: storing OBJ 0x%"UVxf,
1840                          PTR2UV(mg->mg_obj)));
1841
1842                 if (ret = store(cxt, mg->mg_obj))
1843                         return ret;
1844
1845                 TRACEME(("store_tied_item: storing PTR 0x%"UVxf,
1846                          PTR2UV(mg->mg_ptr)));
1847
1848                 if (ret = store(cxt, (SV *) mg->mg_ptr))
1849                         return ret;
1850         } else {
1851                 I32 idx = mg->mg_len;
1852
1853                 TRACEME(("store_tied_item: storing a ref to a tied array item "));
1854                 PUTMARK(SX_TIED_IDX);
1855                 TRACEME(("store_tied_item: storing OBJ 0x%"UVxf,
1856                          PTR2UV(mg->mg_obj)));
1857
1858                 if (ret = store(cxt, mg->mg_obj))
1859                         return ret;
1860
1861                 TRACEME(("store_tied_item: storing IDX %d", idx));
1862
1863                 WLEN(idx);
1864         }
1865
1866         TRACEME(("ok (tied item)"));
1867
1868         return 0;
1869 }
1870
1871 /*
1872  * store_hook           -- dispatched manually, not via sv_store[]
1873  *
1874  * The blessed SV is serialized by a hook.
1875  *
1876  * Simple Layout is:
1877  *
1878  *     SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
1879  *
1880  * where <flags> indicates how long <len>, <len2> and <len3> are, whether
1881  * the trailing part [] is present, the type of object (scalar, array or hash).
1882  * There is also a bit which says how the classname is stored between:
1883  *
1884  *     <len> <classname>
1885  *     <index>
1886  *
1887  * and when the <index> form is used (classname already seen), the "large
1888  * classname" bit in <flags> indicates how large the <index> is.
1889  *
1890  * The serialized string returned by the hook is of length <len2> and comes
1891  * next.  It is an opaque string for us.
1892  *
1893  * Those <len3> object IDs which are listed last represent the extra references
1894  * not directly serialized by the hook, but which are linked to the object.
1895  *
1896  * When recursion is mandated to resolve object-IDs not yet seen, we have
1897  * instead, with <header> being flags with bits set to indicate the object type
1898  * and that recursion was indeed needed:
1899  *
1900  *     SX_HOOK <header> <object> <header> <object> <flags>
1901  *
1902  * that same header being repeated between serialized objects obtained through
1903  * recursion, until we reach flags indicating no recursion, at which point
1904  * we know we've resynchronized with a single layout, after <flags>.
1905  */
1906 static int store_hook(
1907         stcxt_t *cxt,
1908         SV *sv,
1909         int type,
1910         HV *pkg,
1911         SV *hook)
1912 {
1913         I32 len;
1914         char *class;
1915         STRLEN len2;
1916         SV *ref;
1917         AV *av;
1918         SV **ary;
1919         int count;                              /* really len3 + 1 */
1920         unsigned char flags;
1921         char *pv;
1922         int i;
1923         int recursed = 0;               /* counts recursion */
1924         int obj_type;                   /* object type, on 2 bits */
1925         I32 classnum;
1926         int ret;
1927         int clone = cxt->optype & ST_CLONE;
1928
1929         TRACEME(("store_hook, class \"%s\", tagged #%d", HvNAME(pkg), cxt->tagnum));
1930
1931         /*
1932          * Determine object type on 2 bits.
1933          */
1934
1935         switch (type) {
1936         case svis_SCALAR:
1937                 obj_type = SHT_SCALAR;
1938                 break;
1939         case svis_ARRAY:
1940                 obj_type = SHT_ARRAY;
1941                 break;
1942         case svis_HASH:
1943                 obj_type = SHT_HASH;
1944                 break;
1945         default:
1946                 CROAK(("Unexpected object type (%d) in store_hook()", type));
1947         }
1948         flags = SHF_NEED_RECURSE | obj_type;
1949
1950         class = HvNAME(pkg);
1951         len = strlen(class);
1952
1953         /*
1954          * To call the hook, we need to fake a call like:
1955          *
1956          *    $object->STORABLE_freeze($cloning);
1957          *
1958          * but we don't have the $object here.  For instance, if $object is
1959          * a blessed array, what we have in `sv' is the array, and we can't
1960          * call a method on those.
1961          *
1962          * Therefore, we need to create a temporary reference to the object and
1963          * make the call on that reference.
1964          */
1965
1966         TRACEME(("about to call STORABLE_freeze on class %s", class));
1967
1968         ref = newRV_noinc(sv);                          /* Temporary reference */
1969         av = array_call(ref, hook, clone);      /* @a = $object->STORABLE_freeze($c) */
1970         SvRV(ref) = 0;
1971         SvREFCNT_dec(ref);                                      /* Reclaim temporary reference */
1972
1973         count = AvFILLp(av) + 1;
1974         TRACEME(("store_hook, array holds %d items", count));
1975
1976         /*
1977          * If they return an empty list, it means they wish to ignore the
1978          * hook for this class (and not just this instance -- that's for them
1979          * to handle if they so wish).
1980          *
1981          * Simply disable the cached entry for the hook (it won't be recomputed
1982          * since it's present in the cache) and recurse to store_blessed().
1983          */
1984
1985         if (!count) {
1986                 /*
1987                  * They must not change their mind in the middle of a serialization.
1988                  */
1989
1990                 if (hv_fetch(cxt->hclass, class, len, FALSE))
1991                         CROAK(("Too late to ignore hooks for %s class \"%s\"",
1992                                 (cxt->optype & ST_CLONE) ? "cloning" : "storing", class));
1993
1994                 pkg_hide(cxt->hook, pkg, "STORABLE_freeze");
1995
1996                 ASSERT(!pkg_can(cxt->hook, pkg, "STORABLE_freeze"), ("hook invisible"));
1997                 TRACEME(("Ignoring STORABLE_freeze in class \"%s\"", class));
1998
1999                 return store_blessed(cxt, sv, type, pkg);
2000         }
2001
2002         /*
2003          * Get frozen string.
2004          */
2005
2006         ary = AvARRAY(av);
2007         pv = SvPV(ary[0], len2);
2008
2009         /*
2010          * Allocate a class ID if not already done.
2011          */
2012
2013         if (!known_class(cxt, class, len, &classnum)) {
2014                 TRACEME(("first time we see class %s, ID = %d", class, classnum));
2015                 classnum = -1;                          /* Mark: we must store classname */
2016         } else {
2017                 TRACEME(("already seen class %s, ID = %d", class, classnum));
2018         }
2019
2020         /*
2021          * If they returned more than one item, we need to serialize some
2022          * extra references if not already done.
2023          *
2024          * Loop over the array, starting at postion #1, and for each item,
2025          * ensure it is a reference, serialize it if not already done, and
2026          * replace the entry with the tag ID of the corresponding serialized
2027          * object.
2028          *
2029          * We CHEAT by not calling av_fetch() and read directly within the
2030          * array, for speed.
2031          */
2032
2033         for (i = 1; i < count; i++) {
2034                 SV **svh;
2035                 SV *xsv = ary[i];
2036
2037                 if (!SvROK(xsv))
2038                         CROAK(("Item #%d from hook in %s is not a reference", i, class));
2039                 xsv = SvRV(xsv);                /* Follow ref to know what to look for */
2040
2041                 /*
2042                  * Look in hseen and see if we have a tag already.
2043                  * Serialize entry if not done already, and get its tag.
2044                  */
2045
2046                 if (svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE))
2047                         goto sv_seen;           /* Avoid moving code too far to the right */
2048
2049                 TRACEME(("listed object %d at 0x%"UVxf" is unknown",
2050                         i-1, PTR2UV(xsv)));
2051
2052                 /*
2053                  * We need to recurse to store that object and get it to be known
2054                  * so that we can resolve the list of object-IDs at retrieve time.
2055                  *
2056                  * The first time we do this, we need to emit the proper header
2057                  * indicating that we recursed, and what the type of object is (the
2058                  * object we're storing via a user-hook).  Indeed, during retrieval,
2059                  * we'll have to create the object before recursing to retrieve the
2060                  * others, in case those would point back at that object.
2061                  */
2062
2063                 /* [SX_HOOK] <flags> <object>*/
2064                 if (!recursed++)
2065                         PUTMARK(SX_HOOK);
2066                 PUTMARK(flags);
2067
2068                 if (ret = store(cxt, xsv))              /* Given by hook for us to store */
2069                         return ret;
2070
2071                 svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE);
2072                 if (!svh)
2073                         CROAK(("Could not serialize item #%d from hook in %s", i, class));
2074
2075                 /*
2076                  * Replace entry with its tag (not a real SV, so no refcnt increment)
2077                  */
2078
2079         sv_seen:
2080                 SvREFCNT_dec(xsv);
2081                 ary[i] = *svh;
2082                 TRACEME(("listed object %d at 0x%"UVxf" is tag #%d",
2083                          i-1, PTR2UV(xsv), (I32) *svh));
2084         }
2085
2086         /*
2087          * Compute leading flags.
2088          */
2089
2090         flags = obj_type;
2091         if (((classnum == -1) ? len : classnum) > LG_SCALAR)
2092                 flags |= SHF_LARGE_CLASSLEN;
2093         if (classnum != -1)
2094                 flags |= SHF_IDX_CLASSNAME;
2095         if (len2 > LG_SCALAR)
2096                 flags |= SHF_LARGE_STRLEN;
2097         if (count > 1)
2098                 flags |= SHF_HAS_LIST;
2099         if (count > (LG_SCALAR + 1))
2100                 flags |= SHF_LARGE_LISTLEN;
2101
2102         /*
2103          * We're ready to emit either serialized form:
2104          *
2105          *   SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
2106          *   SX_HOOK <flags> <index>           <len2> <str> [<len3> <object-IDs>]
2107          *
2108          * If we recursed, the SX_HOOK has already been emitted.
2109          */
2110
2111         TRACEME(("SX_HOOK (recursed=%d) flags=0x%x class=%d len=%d len2=%d len3=%d",
2112                 recursed, flags, classnum, len, len2, count-1));
2113
2114         /* SX_HOOK <flags> */
2115         if (!recursed)
2116                 PUTMARK(SX_HOOK);
2117         PUTMARK(flags);
2118
2119         /* <len> <classname> or <index> */
2120         if (flags & SHF_IDX_CLASSNAME) {
2121                 if (flags & SHF_LARGE_CLASSLEN)
2122                         WLEN(classnum);
2123                 else {
2124                         unsigned char cnum = (unsigned char) classnum;
2125                         PUTMARK(cnum);
2126                 }
2127         } else {
2128                 if (flags & SHF_LARGE_CLASSLEN)
2129                         WLEN(len);
2130                 else {
2131                         unsigned char clen = (unsigned char) len;
2132                         PUTMARK(clen);
2133                 }
2134                 WRITE(class, len);              /* Final \0 is omitted */
2135         }
2136
2137         /* <len2> <frozen-str> */
2138         if (flags & SHF_LARGE_STRLEN)
2139                 WLEN(len2);
2140         else {
2141                 unsigned char clen = (unsigned char) len2;
2142                 PUTMARK(clen);
2143         }
2144         if (len2)
2145                 WRITE(pv, len2);        /* Final \0 is omitted */
2146
2147         /* [<len3> <object-IDs>] */
2148         if (flags & SHF_HAS_LIST) {
2149                 int len3 = count - 1;
2150                 if (flags & SHF_LARGE_LISTLEN)
2151                         WLEN(len3);
2152                 else {
2153                         unsigned char clen = (unsigned char) len3;
2154                         PUTMARK(clen);
2155                 }
2156
2157                 /*
2158                  * NOTA BENE, for 64-bit machines: the ary[i] below does not yield a
2159                  * real pointer, rather a tag number, well under the 32-bit limit.
2160                  */
2161
2162                 for (i = 1; i < count; i++) {
2163                         I32 tagval = htonl(LOW_32BITS(ary[i]));
2164                         WRITE(&tagval, sizeof(I32));
2165                         TRACEME(("object %d, tag #%d", i-1, ntohl(tagval)));
2166                 }
2167         }
2168
2169         /*
2170          * Free the array.  We need extra care for indices after 0, since they
2171          * don't hold real SVs but integers cast.
2172          */
2173
2174         if (count > 1)
2175                 AvFILLp(av) = 0;        /* Cheat, nothing after 0 interests us */
2176         av_undef(av);
2177         sv_free((SV *) av);
2178
2179         return 0;
2180 }
2181
2182 /*
2183  * store_blessed        -- dispatched manually, not via sv_store[]
2184  *
2185  * Check whether there is a STORABLE_xxx hook defined in the class or in one
2186  * of its ancestors.  If there is, then redispatch to store_hook();
2187  *
2188  * Otherwise, the blessed SV is stored using the following layout:
2189  *
2190  *    SX_BLESS <flag> <len> <classname> <object>
2191  *
2192  * where <flag> indicates whether <len> is stored on 0 or 4 bytes, depending
2193  * on the high-order bit in flag: if 1, then length follows on 4 bytes.
2194  * Otherwise, the low order bits give the length, thereby giving a compact
2195  * representation for class names less than 127 chars long.
2196  *
2197  * Each <classname> seen is remembered and indexed, so that the next time
2198  * an object in the blessed in the same <classname> is stored, the following
2199  * will be emitted:
2200  *
2201  *    SX_IX_BLESS <flag> <index> <object>
2202  *
2203  * where <index> is the classname index, stored on 0 or 4 bytes depending
2204  * on the high-order bit in flag (same encoding as above for <len>).
2205  */
2206 static int store_blessed(
2207         stcxt_t *cxt,
2208         SV *sv,
2209         int type,
2210         HV *pkg)
2211 {
2212         SV *hook;
2213         I32 len;
2214         char *class;
2215         I32 classnum;
2216
2217         TRACEME(("store_blessed, type %d, class \"%s\"", type, HvNAME(pkg)));
2218
2219         /*
2220          * Look for a hook for this blessed SV and redirect to store_hook()
2221          * if needed.
2222          */
2223
2224         hook = pkg_can(cxt->hook, pkg, "STORABLE_freeze");
2225         if (hook)
2226                 return store_hook(cxt, sv, type, pkg, hook);
2227
2228         /*
2229          * This is a blessed SV without any serialization hook.
2230          */
2231
2232         class = HvNAME(pkg);
2233         len = strlen(class);
2234
2235         TRACEME(("blessed 0x%"UVxf" in %s, no hook: tagged #%d",
2236                  PTR2UV(sv), class, cxt->tagnum));
2237
2238         /*
2239          * Determine whether it is the first time we see that class name (in which
2240          * case it will be stored in the SX_BLESS form), or whether we already
2241          * saw that class name before (in which case the SX_IX_BLESS form will be
2242          * used).
2243          */
2244
2245         if (known_class(cxt, class, len, &classnum)) {
2246                 TRACEME(("already seen class %s, ID = %d", class, classnum));
2247                 PUTMARK(SX_IX_BLESS);
2248                 if (classnum <= LG_BLESS) {
2249                         unsigned char cnum = (unsigned char) classnum;
2250                         PUTMARK(cnum);
2251                 } else {
2252                         unsigned char flag = (unsigned char) 0x80;
2253                         PUTMARK(flag);
2254                         WLEN(classnum);
2255                 }
2256         } else {
2257                 TRACEME(("first time we see class %s, ID = %d", class, classnum));
2258                 PUTMARK(SX_BLESS);
2259                 if (len <= LG_BLESS) {
2260                         unsigned char clen = (unsigned char) len;
2261                         PUTMARK(clen);
2262                 } else {
2263                         unsigned char flag = (unsigned char) 0x80;
2264                         PUTMARK(flag);
2265                         WLEN(len);                                      /* Don't BER-encode, this should be rare */
2266                 }
2267                 WRITE(class, len);                              /* Final \0 is omitted */
2268         }
2269
2270         /*
2271          * Now emit the <object> part.
2272          */
2273
2274         return SV_STORE(type)(cxt, sv);
2275 }
2276
2277 /*
2278  * store_other
2279  *
2280  * We don't know how to store the item we reached, so return an error condition.
2281  * (it's probably a GLOB, some CODE reference, etc...)
2282  *
2283  * If they defined the `forgive_me' variable at the Perl level to some
2284  * true value, then don't croak, just warn, and store a placeholder string
2285  * instead.
2286  */
2287 static int store_other(stcxt_t *cxt, SV *sv)
2288 {
2289         STRLEN len;
2290         static char buf[80];
2291
2292         TRACEME(("store_other"));
2293
2294         /*
2295          * Fetch the value from perl only once per store() operation.
2296          */
2297
2298         if (
2299                 cxt->forgive_me == 0 ||
2300                 (cxt->forgive_me < 0 && !(cxt->forgive_me =
2301                         SvTRUE(perl_get_sv("Storable::forgive_me", TRUE)) ? 1 : 0))
2302         )
2303                 CROAK(("Can't store %s items", sv_reftype(sv, FALSE)));
2304
2305         warn("Can't store item %s(0x%"UVxf")",
2306                 sv_reftype(sv, FALSE), PTR2UV(sv));
2307
2308         /*
2309          * Store placeholder string as a scalar instead...
2310          */
2311
2312         (void) sprintf(buf, "You lost %s(0x%"UVxf")\0", sv_reftype(sv, FALSE),
2313                        PTR2UV(sv));
2314
2315         len = strlen(buf);
2316         STORE_SCALAR(buf, len);
2317         TRACEME(("ok (dummy \"%s\", length = %d)", buf, len));
2318
2319         return 0;
2320 }
2321
2322 /***
2323  *** Store driving routines
2324  ***/
2325
2326 /*
2327  * sv_type
2328  *
2329  * WARNING: partially duplicates Perl's sv_reftype for speed.
2330  *
2331  * Returns the type of the SV, identified by an integer. That integer
2332  * may then be used to index the dynamic routine dispatch table.
2333  */
2334 static int sv_type(SV *sv)
2335 {
2336         switch (SvTYPE(sv)) {
2337         case SVt_NULL:
2338         case SVt_IV:
2339         case SVt_NV:
2340                 /*
2341                  * No need to check for ROK, that can't be set here since there
2342                  * is no field capable of hodling the xrv_rv reference.
2343                  */
2344                 return svis_SCALAR;
2345         case SVt_PV:
2346         case SVt_RV:
2347         case SVt_PVIV:
2348         case SVt_PVNV:
2349                 /*
2350                  * Starting from SVt_PV, it is possible to have the ROK flag
2351                  * set, the pointer to the other SV being either stored in
2352                  * the xrv_rv (in the case of a pure SVt_RV), or as the
2353                  * xpv_pv field of an SVt_PV and its heirs.
2354                  *
2355                  * However, those SV cannot be magical or they would be an
2356                  * SVt_PVMG at least.
2357                  */
2358                 return SvROK(sv) ? svis_REF : svis_SCALAR;
2359         case SVt_PVMG:
2360         case SVt_PVLV:          /* Workaround for perl5.004_04 "LVALUE" bug */
2361                 if (SvRMAGICAL(sv) && (mg_find(sv, 'p')))
2362                         return svis_TIED_ITEM;
2363                 /* FALL THROUGH */
2364         case SVt_PVBM:
2365                 if (SvRMAGICAL(sv) && (mg_find(sv, 'q')))
2366                         return svis_TIED;
2367                 return SvROK(sv) ? svis_REF : svis_SCALAR;
2368         case SVt_PVAV:
2369                 if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
2370                         return svis_TIED;
2371                 return svis_ARRAY;
2372         case SVt_PVHV:
2373                 if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
2374                         return svis_TIED;
2375                 return svis_HASH;
2376         default:
2377                 break;
2378         }
2379
2380         return svis_OTHER;
2381 }
2382
2383 /*
2384  * store
2385  *
2386  * Recursively store objects pointed to by the sv to the specified file.
2387  *
2388  * Layout is <content> or SX_OBJECT <tagnum> if we reach an already stored
2389  * object (one for which storage has started -- it may not be over if we have
2390  * a self-referenced structure). This data set forms a stored <object>.
2391  */
2392 static int store(stcxt_t *cxt, SV *sv)
2393 {
2394         SV **svh;
2395         int ret;
2396         SV *tag;
2397         int type;
2398         HV *hseen = cxt->hseen;
2399
2400         TRACEME(("store (0x%"UVxf")", PTR2UV(sv)));
2401
2402         /*
2403          * If object has already been stored, do not duplicate data.
2404          * Simply emit the SX_OBJECT marker followed by its tag data.
2405          * The tag is always written in network order.
2406          *
2407          * NOTA BENE, for 64-bit machines: the "*svh" below does not yield a
2408          * real pointer, rather a tag number (watch the insertion code below).
2409          * That means it pobably safe to assume it is well under the 32-bit limit,
2410          * and makes the truncation safe.
2411          *              -- RAM, 14/09/1999
2412          */
2413
2414         svh = hv_fetch(hseen, (char *) &sv, sizeof(sv), FALSE);
2415         if (svh) {
2416                 I32 tagval = htonl(LOW_32BITS(*svh));
2417
2418                 TRACEME(("object 0x%"UVxf" seen as #%d",
2419                          PTR2UV(sv), ntohl(tagval)));
2420
2421                 PUTMARK(SX_OBJECT);
2422                 WRITE(&tagval, sizeof(I32));
2423                 return 0;
2424         }
2425
2426         /*
2427          * Allocate a new tag and associate it with the address of the sv being
2428          * stored, before recursing...
2429          *
2430          * In order to avoid creating new SvIVs to hold the tagnum we just
2431          * cast the tagnum to a SV pointer and store that in the hash.  This
2432          * means that we must clean up the hash manually afterwards, but gives
2433          * us a 15% throughput increase.
2434          *
2435          */
2436
2437         cxt->tagnum++;
2438         if (!hv_store(hseen,
2439                         (char *) &sv, sizeof(sv), INT2PTR(SV*, cxt->tagnum), 0))
2440                 return -1;
2441
2442         /*
2443          * Store `sv' and everything beneath it, using appropriate routine.
2444          * Abort immediately if we get a non-zero status back.
2445          */
2446
2447         type = sv_type(sv);
2448
2449         TRACEME(("storing 0x%"UVxf" tag #%d, type %d...",
2450                  PTR2UV(sv), cxt->tagnum, type));
2451
2452         if (SvOBJECT(sv)) {
2453                 HV *pkg = SvSTASH(sv);
2454                 ret = store_blessed(cxt, sv, type, pkg);
2455         } else
2456                 ret = SV_STORE(type)(cxt, sv);
2457
2458         TRACEME(("%s (stored 0x%"UVxf", refcnt=%d, %s)",
2459                 ret ? "FAILED" : "ok", PTR2UV(sv),
2460                 SvREFCNT(sv), sv_reftype(sv, FALSE)));
2461
2462         return ret;
2463 }
2464
2465 /*
2466  * magic_write
2467  *
2468  * Write magic number and system information into the file.
2469  * Layout is <magic> <network> [<len> <byteorder> <sizeof int> <sizeof long>
2470  * <sizeof ptr>] where <len> is the length of the byteorder hexa string.
2471  * All size and lenghts are written as single characters here.
2472  *
2473  * Note that no byte ordering info is emitted when <network> is true, since
2474  * integers will be emitted in network order in that case.
2475  */
2476 static int magic_write(stcxt_t *cxt)
2477 {
2478         char buf[256];  /* Enough room for 256 hexa digits */
2479         unsigned char c;
2480         int use_network_order = cxt->netorder;
2481
2482         TRACEME(("magic_write on fd=%d", cxt->fio ? fileno(cxt->fio) : -1));
2483
2484         if (cxt->fio)
2485                 WRITE(magicstr, strlen(magicstr));      /* Don't write final \0 */
2486
2487         /*
2488          * Starting with 0.6, the "use_network_order" byte flag is also used to
2489          * indicate the version number of the binary image, encoded in the upper
2490          * bits. The bit 0 is always used to indicate network order.
2491          */
2492
2493         c = (unsigned char)
2494                 ((use_network_order ? 0x1 : 0x0) | (STORABLE_BIN_MAJOR << 1));
2495         PUTMARK(c);
2496
2497         /*
2498          * Starting with 0.7, a full byte is dedicated to the minor version of
2499          * the binary format, which is incremented only when new markers are
2500          * introduced, for instance, but when backward compatibility is preserved.
2501          */
2502
2503         PUTMARK((unsigned char) STORABLE_BIN_MINOR);
2504
2505         if (use_network_order)
2506                 return 0;                                               /* Don't bother with byte ordering */
2507
2508         sprintf(buf, "%lx", (unsigned long) BYTEORDER);
2509         c = (unsigned char) strlen(buf);
2510         PUTMARK(c);
2511         WRITE(buf, (unsigned int) c);           /* Don't write final \0 */
2512         PUTMARK((unsigned char) sizeof(int));
2513         PUTMARK((unsigned char) sizeof(long));
2514         PUTMARK((unsigned char) sizeof(char *));
2515
2516         TRACEME(("ok (magic_write byteorder = 0x%lx [%d], I%d L%d P%d)",
2517                  (unsigned long) BYTEORDER, (int) c,
2518                  sizeof(int), sizeof(long), sizeof(char *)));
2519
2520         return 0;
2521 }
2522
2523 /*
2524  * do_store
2525  *
2526  * Common code for store operations.
2527  *
2528  * When memory store is requested (f = NULL) and a non null SV* is given in
2529  * `res', it is filled with a new SV created out of the memory buffer.
2530  *
2531  * It is required to provide a non-null `res' when the operation type is not
2532  * dclone() and store() is performed to memory.
2533  */
2534 static int do_store(
2535         PerlIO *f,
2536         SV *sv,
2537         int optype,
2538         int network_order,
2539         SV **res)
2540 {
2541         dSTCXT;
2542         int status;
2543
2544         ASSERT(!(f == 0 && !(optype & ST_CLONE)) || res,
2545                 ("must supply result SV pointer for real recursion to memory"));
2546
2547         TRACEME(("do_store (optype=%d, netorder=%d)",
2548                 optype, network_order));
2549
2550         optype |= ST_STORE;
2551
2552         /*
2553          * Workaround for CROAK leak: if they enter with a "dirty" context,
2554          * free up memory for them now.
2555          */
2556
2557         if (cxt->dirty)
2558                 clean_context(cxt);
2559
2560         /*
2561          * Now that STORABLE_xxx hooks exist, it is possible that they try to
2562          * re-enter store() via the hooks.  We need to stack contexts.
2563          */
2564
2565         if (cxt->entry)
2566                 cxt = allocate_context(cxt);
2567
2568         cxt->entry++;
2569
2570         ASSERT(cxt->entry == 1, ("starting new recursion"));
2571         ASSERT(!cxt->dirty, ("clean context"));
2572
2573         /*
2574          * Ensure sv is actually a reference. From perl, we called something
2575          * like:
2576          *       pstore(FILE, \@array);
2577          * so we must get the scalar value behing that reference.
2578          */
2579
2580         if (!SvROK(sv))
2581                 CROAK(("Not a reference"));
2582         sv = SvRV(sv);                  /* So follow it to know what to store */
2583
2584         /*
2585          * If we're going to store to memory, reset the buffer.
2586          */
2587
2588         if (!f)
2589                 MBUF_INIT(0);
2590
2591         /*
2592          * Prepare context and emit headers.
2593          */
2594
2595         init_store_context(cxt, f, optype, network_order);
2596
2597         if (-1 == magic_write(cxt))             /* Emit magic and ILP info */
2598                 return 0;                                       /* Error */
2599
2600         /*
2601          * Recursively store object...
2602          */
2603
2604         ASSERT(is_storing(), ("within store operation"));
2605
2606         status = store(cxt, sv);                /* Just do it! */
2607
2608         /*
2609          * If they asked for a memory store and they provided an SV pointer,
2610          * make an SV string out of the buffer and fill their pointer.
2611          *
2612          * When asking for ST_REAL, it's MANDATORY for the caller to provide
2613          * an SV, since context cleanup might free the buffer if we did recurse.
2614          * (unless caller is dclone(), which is aware of that).
2615          */
2616
2617         if (!cxt->fio && res)
2618                 *res = mbuf2sv();
2619
2620         /*
2621          * Final cleanup.
2622          *
2623          * The "root" context is never freed, since it is meant to be always
2624          * handy for the common case where no recursion occurs at all (i.e.
2625          * we enter store() outside of any Storable code and leave it, period).
2626          * We know it's the "root" context because there's nothing stacked
2627          * underneath it.
2628          *
2629          * OPTIMIZATION:
2630          *
2631          * When deep cloning, we don't free the context: doing so would force
2632          * us to copy the data in the memory buffer.  Sicne we know we're
2633          * about to enter do_retrieve...
2634          */
2635
2636         clean_store_context(cxt);
2637         if (cxt->prev && !(cxt->optype & ST_CLONE))
2638                 free_context(cxt);
2639
2640         TRACEME(("do_store returns %d", status));
2641
2642         return status == 0;
2643 }
2644
2645 /*
2646  * pstore
2647  *
2648  * Store the transitive data closure of given object to disk.
2649  * Returns 0 on error, a true value otherwise.
2650  */
2651 int pstore(PerlIO *f, SV *sv)
2652 {
2653         TRACEME(("pstore"));
2654         return do_store(f, sv, 0, FALSE, (SV**) 0);
2655
2656 }
2657
2658 /*
2659  * net_pstore
2660  *
2661  * Same as pstore(), but network order is used for integers and doubles are
2662  * emitted as strings.
2663  */
2664 int net_pstore(PerlIO *f, SV *sv)
2665 {
2666         TRACEME(("net_pstore"));
2667         return do_store(f, sv, 0, TRUE, (SV**) 0);
2668 }
2669
2670 /***
2671  *** Memory stores.
2672  ***/
2673
2674 /*
2675  * mbuf2sv
2676  *
2677  * Build a new SV out of the content of the internal memory buffer.
2678  */
2679 static SV *mbuf2sv(void)
2680 {
2681         dSTCXT;
2682
2683         return newSVpv(mbase, MBUF_SIZE());
2684 }
2685
2686 /*
2687  * mstore
2688  *
2689  * Store the transitive data closure of given object to memory.
2690  * Returns undef on error, a scalar value containing the data otherwise.
2691  */
2692 SV *mstore(SV *sv)
2693 {
2694         dSTCXT;
2695         SV *out;
2696
2697         TRACEME(("mstore"));
2698
2699         if (!do_store((PerlIO*) 0, sv, 0, FALSE, &out))
2700                 return &PL_sv_undef;
2701
2702         return out;
2703 }
2704
2705 /*
2706  * net_mstore
2707  *
2708  * Same as mstore(), but network order is used for integers and doubles are
2709  * emitted as strings.
2710  */
2711 SV *net_mstore(SV *sv)
2712 {
2713         dSTCXT;
2714         SV *out;
2715
2716         TRACEME(("net_mstore"));
2717
2718         if (!do_store((PerlIO*) 0, sv, 0, TRUE, &out))
2719                 return &PL_sv_undef;
2720
2721         return out;
2722 }
2723
2724 /***
2725  *** Specific retrieve callbacks.
2726  ***/
2727
2728 /*
2729  * retrieve_other
2730  *
2731  * Return an error via croak, since it is not possible that we get here
2732  * under normal conditions, when facing a file produced via pstore().
2733  */
2734 static SV *retrieve_other(stcxt_t *cxt)
2735 {
2736         if (
2737                 cxt->ver_major != STORABLE_BIN_MAJOR &&
2738                 cxt->ver_minor != STORABLE_BIN_MINOR
2739         ) {
2740                 CROAK(("Corrupted storable %s (binary v%d.%d), current is v%d.%d",
2741                         cxt->fio ? "file" : "string",
2742                         cxt->ver_major, cxt->ver_minor,
2743                         STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
2744         } else {
2745                 CROAK(("Corrupted storable %s (binary v%d.%d)",
2746                         cxt->fio ? "file" : "string",
2747                         cxt->ver_major, cxt->ver_minor));
2748         }
2749
2750         return (SV *) 0;                /* Just in case */
2751 }
2752
2753 /*
2754  * retrieve_idx_blessed
2755  *
2756  * Layout is SX_IX_BLESS <index> <object> with SX_IX_BLESS already read.
2757  * <index> can be coded on either 1 or 5 bytes.
2758  */
2759 static SV *retrieve_idx_blessed(stcxt_t *cxt)
2760 {
2761         I32 idx;
2762         char *class;
2763         SV **sva;
2764         SV *sv;
2765
2766         TRACEME(("retrieve_idx_blessed (#%d)", cxt->tagnum));
2767
2768         GETMARK(idx);                   /* Index coded on a single char? */
2769         if (idx & 0x80)
2770                 RLEN(idx);
2771
2772         /*
2773          * Fetch classname in `aclass'
2774          */
2775
2776         sva = av_fetch(cxt->aclass, idx, FALSE);
2777         if (!sva)
2778                 CROAK(("Class name #%d should have been seen already", idx));
2779
2780         class = SvPVX(*sva);    /* We know it's a PV, by construction */
2781
2782         TRACEME(("class ID %d => %s", idx, class));
2783
2784         /*
2785          * Retrieve object and bless it.
2786          */
2787
2788         sv = retrieve(cxt);
2789         if (sv)
2790                 BLESS(sv, class);
2791
2792         return sv;
2793 }
2794
2795 /*
2796  * retrieve_blessed
2797  *
2798  * Layout is SX_BLESS <len> <classname> <object> with SX_BLESS already read.
2799  * <len> can be coded on either 1 or 5 bytes.
2800  */
2801 static SV *retrieve_blessed(stcxt_t *cxt)
2802 {
2803         I32 len;
2804         SV *sv;
2805         char buf[LG_BLESS + 1];         /* Avoid malloc() if possible */
2806         char *class = buf;
2807
2808         TRACEME(("retrieve_blessed (#%d)", cxt->tagnum));
2809
2810         /*
2811          * Decode class name length and read that name.
2812          *
2813          * Short classnames have two advantages: their length is stored on one
2814          * single byte, and the string can be read on the stack.
2815          */
2816
2817         GETMARK(len);                   /* Length coded on a single char? */
2818         if (len & 0x80) {
2819                 RLEN(len);
2820                 TRACEME(("** allocating %d bytes for class name", len+1));
2821                 New(10003, class, len+1, char);
2822         }
2823         READ(class, len);
2824         class[len] = '\0';              /* Mark string end */
2825
2826         /*
2827          * It's a new classname, otherwise it would have been an SX_IX_BLESS.
2828          */
2829
2830         if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(class, len)))
2831                 return (SV *) 0;
2832
2833         /*
2834          * Retrieve object and bless it.
2835          */
2836
2837         sv = retrieve(cxt);
2838         if (sv) {
2839                 BLESS(sv, class);
2840                 if (class != buf)
2841                         Safefree(class);
2842         }
2843
2844         return sv;
2845 }
2846
2847 /*
2848  * retrieve_hook
2849  *
2850  * Layout: SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
2851  * with leading mark already read, as usual.
2852  *
2853  * When recursion was involved during serialization of the object, there
2854  * is an unknown amount of serialized objects after the SX_HOOK mark.  Until
2855  * we reach a <flags> marker with the recursion bit cleared.
2856  */
2857 static SV *retrieve_hook(stcxt_t *cxt)
2858 {
2859         I32 len;
2860         char buf[LG_BLESS + 1];         /* Avoid malloc() if possible */
2861         char *class = buf;
2862         unsigned int flags;
2863         I32 len2;
2864         SV *frozen;
2865         I32 len3 = 0;
2866         AV *av = 0;
2867         SV *hook;
2868         SV *sv;
2869         SV *rv;
2870         int obj_type;
2871         I32 classname;
2872         int clone = cxt->optype & ST_CLONE;
2873
2874         TRACEME(("retrieve_hook (#%d)", cxt->tagnum));
2875
2876         /*
2877          * Read flags, which tell us about the type, and whether we need to recurse.
2878          */
2879
2880         GETMARK(flags);
2881
2882         /*
2883          * Create the (empty) object, and mark it as seen.
2884          *
2885          * This must be done now, because tags are incremented, and during
2886          * serialization, the object tag was affected before recursion could
2887          * take place.
2888          */
2889
2890         obj_type = flags & SHF_TYPE_MASK;
2891         switch (obj_type) {
2892         case SHT_SCALAR:
2893                 sv = newSV(0);
2894                 break;
2895         case SHT_ARRAY:
2896                 sv = (SV *) newAV();
2897                 break;
2898         case SHT_HASH:
2899                 sv = (SV *) newHV();
2900                 break;
2901         default:
2902                 return retrieve_other(cxt);             /* Let it croak */
2903         }
2904         SEEN(sv);
2905
2906         /*
2907          * Whilst flags tell us to recurse, do so.
2908          *
2909          * We don't need to remember the addresses returned by retrieval, because
2910          * all the references will be obtained through indirection via the object
2911          * tags in the object-ID list.
2912          */
2913
2914         while (flags & SHF_NEED_RECURSE) {
2915                 TRACEME(("retrieve_hook recursing..."));
2916                 rv = retrieve(cxt);
2917                 if (!rv)
2918                         return (SV *) 0;
2919                 TRACEME(("retrieve_hook back with rv=0x%"UVxf,
2920                          PTR2UV(rv)));
2921                 GETMARK(flags);
2922         }
2923
2924         if (flags & SHF_IDX_CLASSNAME) {
2925                 SV **sva;
2926                 I32 idx;
2927
2928                 /*
2929                  * Fetch index from `aclass'
2930                  */
2931
2932                 if (flags & SHF_LARGE_CLASSLEN)
2933                         RLEN(idx);
2934                 else
2935                         GETMARK(idx);
2936
2937                 sva = av_fetch(cxt->aclass, idx, FALSE);
2938                 if (!sva)
2939                         CROAK(("Class name #%d should have been seen already", idx));
2940
2941                 class = SvPVX(*sva);    /* We know it's a PV, by construction */
2942                 TRACEME(("class ID %d => %s", idx, class));
2943
2944         } else {
2945                 /*
2946                  * Decode class name length and read that name.
2947                  *
2948                  * NOTA BENE: even if the length is stored on one byte, we don't read
2949                  * on the stack.  Just like retrieve_blessed(), we limit the name to
2950                  * LG_BLESS bytes.  This is an arbitrary decision.
2951                  */
2952
2953                 if (flags & SHF_LARGE_CLASSLEN)
2954                         RLEN(len);
2955                 else
2956                         GETMARK(len);
2957
2958                 if (len > LG_BLESS) {
2959                         TRACEME(("** allocating %d bytes for class name", len+1));
2960                         New(10003, class, len+1, char);
2961                 }
2962
2963                 READ(class, len);
2964                 class[len] = '\0';              /* Mark string end */
2965
2966                 /*
2967                  * Record new classname.
2968                  */
2969
2970                 if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(class, len)))
2971                         return (SV *) 0;
2972         }
2973
2974         TRACEME(("class name: %s", class));
2975
2976         /*
2977          * Decode user-frozen string length and read it in a SV.
2978          *
2979          * For efficiency reasons, we read data directly into the SV buffer.
2980          * To understand that code, read retrieve_scalar()
2981          */
2982
2983         if (flags & SHF_LARGE_STRLEN)
2984                 RLEN(len2);
2985         else
2986                 GETMARK(len2);
2987
2988         frozen = NEWSV(10002, len2);
2989         if (len2) {
2990                 SAFEREAD(SvPVX(frozen), len2, frozen);
2991                 SvCUR_set(frozen, len2);
2992                 *SvEND(frozen) = '\0';
2993         }
2994         (void) SvPOK_only(frozen);              /* Validates string pointer */
2995         SvTAINT(frozen);
2996
2997         TRACEME(("frozen string: %d bytes", len2));
2998
2999         /*
3000          * Decode object-ID list length, if present.
3001          */
3002
3003         if (flags & SHF_HAS_LIST) {
3004                 if (flags & SHF_LARGE_LISTLEN)
3005                         RLEN(len3);
3006                 else
3007                         GETMARK(len3);
3008                 if (len3) {
3009                         av = newAV();
3010                         av_extend(av, len3 + 1);        /* Leave room for [0] */
3011                         AvFILLp(av) = len3;                     /* About to be filled anyway */
3012                 }
3013         }
3014
3015         TRACEME(("has %d object IDs to link", len3));
3016
3017         /*
3018          * Read object-ID list into array.
3019          * Because we pre-extended it, we can cheat and fill it manually.
3020          *
3021          * We read object tags and we can convert them into SV* on the fly
3022          * because we know all the references listed in there (as tags)
3023          * have been already serialized, hence we have a valid correspondance
3024          * between each of those tags and the recreated SV.
3025          */
3026
3027         if (av) {
3028                 SV **ary = AvARRAY(av);
3029                 int i;
3030                 for (i = 1; i <= len3; i++) {   /* We leave [0] alone */
3031                         I32 tag;
3032                         SV **svh;
3033                         SV *xsv;
3034
3035                         READ(&tag, sizeof(I32));
3036                         tag = ntohl(tag);
3037                         svh = av_fetch(cxt->aseen, tag, FALSE);
3038                         if (!svh)
3039                                 CROAK(("Object #%d should have been retrieved already", tag));
3040                         xsv = *svh;
3041                         ary[i] = SvREFCNT_inc(xsv);
3042                 }
3043         }
3044
3045         /*
3046          * Bless the object and look up the STORABLE_thaw hook.
3047          */
3048
3049         BLESS(sv, class);
3050         hook = pkg_can(cxt->hook, SvSTASH(sv), "STORABLE_thaw");
3051         if (!hook)
3052                 CROAK(("No STORABLE_thaw defined for objects of class %s", class));
3053
3054         /*
3055          * If we don't have an `av' yet, prepare one.
3056          * Then insert the frozen string as item [0].
3057          */
3058
3059         if (!av) {
3060                 av = newAV();
3061                 av_extend(av, 1);
3062                 AvFILLp(av) = 0;
3063         }
3064         AvARRAY(av)[0] = SvREFCNT_inc(frozen);
3065
3066         /*
3067          * Call the hook as:
3068          *
3069          *   $object->STORABLE_thaw($cloning, $frozen, @refs);
3070          *
3071          * where $object is our blessed (empty) object, $cloning is a boolean
3072          * telling whether we're running a deep clone, $frozen is the frozen
3073          * string the user gave us in his serializing hook, and @refs, which may
3074          * be empty, is the list of extra references he returned along for us
3075          * to serialize.
3076          *
3077          * In effect, the hook is an alternate creation routine for the class,
3078          * the object itself being already created by the runtime.
3079          */
3080
3081         TRACEME(("calling STORABLE_thaw on %s at 0x%"UVxf" (%d args)",
3082                  class, PTR2UV(sv), AvFILLp(av) + 1));
3083
3084         rv = newRV(sv);
3085         (void) scalar_call(rv, hook, clone, av, G_SCALAR|G_DISCARD);
3086         SvREFCNT_dec(rv);
3087
3088         /*
3089          * Final cleanup.
3090          */
3091
3092         SvREFCNT_dec(frozen);
3093         av_undef(av);
3094         sv_free((SV *) av);
3095         if (!(flags & SHF_IDX_CLASSNAME) && class != buf)
3096                 Safefree(class);
3097
3098         return sv;
3099 }
3100
3101 /*
3102  * retrieve_ref
3103  *
3104  * Retrieve reference to some other scalar.
3105  * Layout is SX_REF <object>, with SX_REF already read.
3106  */
3107 static SV *retrieve_ref(stcxt_t *cxt)
3108 {
3109         SV *rv;
3110         SV *sv;
3111
3112         TRACEME(("retrieve_ref (#%d)", cxt->tagnum));
3113
3114         /*
3115          * We need to create the SV that holds the reference to the yet-to-retrieve
3116          * object now, so that we may record the address in the seen table.
3117          * Otherwise, if the object to retrieve references us, we won't be able
3118          * to resolve the SX_OBJECT we'll see at that point! Hence we cannot
3119          * do the retrieve first and use rv = newRV(sv) since it will be too late
3120          * for SEEN() recording.
3121          */
3122
3123         rv = NEWSV(10002, 0);
3124         SEEN(rv);                               /* Will return if rv is null */
3125         sv = retrieve(cxt);             /* Retrieve <object> */
3126         if (!sv)
3127                 return (SV *) 0;        /* Failed */
3128
3129         /*
3130          * WARNING: breaks RV encapsulation.
3131          *
3132          * Now for the tricky part. We have to upgrade our existing SV, so that
3133          * it is now an RV on sv... Again, we cheat by duplicating the code
3134          * held in newSVrv(), since we already got our SV from retrieve().
3135          *
3136          * We don't say:
3137          *
3138          *              SvRV(rv) = SvREFCNT_inc(sv);
3139          *
3140          * here because the reference count we got from retrieve() above is
3141          * already correct: if the object was retrieved from the file, then
3142          * its reference count is one. Otherwise, if it was retrieved via
3143          * an SX_OBJECT indication, a ref count increment was done.
3144          */
3145
3146         sv_upgrade(rv, SVt_RV);
3147         SvRV(rv) = sv;                          /* $rv = \$sv */
3148         SvROK_on(rv);
3149
3150         TRACEME(("ok (retrieve_ref at 0x%"UVxf")", PTR2UV(rv)));
3151
3152         return rv;
3153 }
3154
3155 /*
3156  * retrieve_overloaded
3157  *
3158  * Retrieve reference to some other scalar with overloading.
3159  * Layout is SX_OVERLOAD <object>, with SX_OVERLOAD already read.
3160  */
3161 static SV *retrieve_overloaded(stcxt_t *cxt)
3162 {
3163         SV *rv;
3164         SV *sv;
3165         HV *stash;
3166
3167         TRACEME(("retrieve_overloaded (#%d)", cxt->tagnum));
3168
3169         /*
3170          * Same code as retrieve_ref(), duplicated to avoid extra call.
3171          */
3172
3173         rv = NEWSV(10002, 0);
3174         SEEN(rv);                               /* Will return if rv is null */
3175         sv = retrieve(cxt);             /* Retrieve <object> */
3176         if (!sv)
3177                 return (SV *) 0;        /* Failed */
3178
3179         /*
3180          * WARNING: breaks RV encapsulation.
3181          */
3182
3183         sv_upgrade(rv, SVt_RV);
3184         SvRV(rv) = sv;                          /* $rv = \$sv */
3185         SvROK_on(rv);
3186
3187         /*
3188          * Restore overloading magic.
3189          */
3190
3191         stash = (HV *) SvSTASH (sv);
3192         if (!stash || !Gv_AMG(stash))
3193                 CROAK(("Cannot restore overloading on %s(0x%"UVxf")",
3194                        sv_reftype(sv, FALSE),
3195                        PTR2UV(sv)));
3196
3197         SvAMAGIC_on(rv);
3198
3199         TRACEME(("ok (retrieve_overloaded at 0x%"UVxf")", PTR2UV(rv)));
3200
3201         return rv;
3202 }
3203
3204 /*
3205  * retrieve_tied_array
3206  *
3207  * Retrieve tied array
3208  * Layout is SX_TIED_ARRAY <object>, with SX_TIED_ARRAY already read.
3209  */
3210 static SV *retrieve_tied_array(stcxt_t *cxt)
3211 {
3212         SV *tv;
3213         SV *sv;
3214
3215         TRACEME(("retrieve_tied_array (#%d)", cxt->tagnum));
3216
3217         tv = NEWSV(10002, 0);
3218         SEEN(tv);                                       /* Will return if tv is null */
3219         sv = retrieve(cxt);                     /* Retrieve <object> */
3220         if (!sv)
3221                 return (SV *) 0;                /* Failed */
3222
3223         sv_upgrade(tv, SVt_PVAV);
3224         AvREAL_off((AV *)tv);
3225         sv_magic(tv, sv, 'P', Nullch, 0);
3226         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3227
3228         TRACEME(("ok (retrieve_tied_array at 0x%"UVxf")", PTR2UV(tv)));
3229
3230         return tv;
3231 }
3232
3233 /*
3234  * retrieve_tied_hash
3235  *
3236  * Retrieve tied hash
3237  * Layout is SX_TIED_HASH <object>, with SX_TIED_HASH already read.
3238  */
3239 static SV *retrieve_tied_hash(stcxt_t *cxt)
3240 {
3241         SV *tv;
3242         SV *sv;
3243
3244         TRACEME(("retrieve_tied_hash (#%d)", cxt->tagnum));
3245
3246         tv = NEWSV(10002, 0);
3247         SEEN(tv);                                       /* Will return if tv is null */
3248         sv = retrieve(cxt);                     /* Retrieve <object> */
3249         if (!sv)
3250                 return (SV *) 0;                /* Failed */
3251
3252         sv_upgrade(tv, SVt_PVHV);
3253         sv_magic(tv, sv, 'P', Nullch, 0);
3254         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3255
3256         TRACEME(("ok (retrieve_tied_hash at 0x%"UVxf")", PTR2UV(tv)));
3257
3258         return tv;
3259 }
3260
3261 /*
3262  * retrieve_tied_scalar
3263  *
3264  * Retrieve tied scalar
3265  * Layout is SX_TIED_SCALAR <object>, with SX_TIED_SCALAR already read.
3266  */
3267 static SV *retrieve_tied_scalar(cxt)
3268 stcxt_t *cxt;
3269 {
3270         SV *tv;
3271         SV *sv;
3272
3273         TRACEME(("retrieve_tied_scalar (#%d)", cxt->tagnum));
3274
3275         tv = NEWSV(10002, 0);
3276         SEEN(tv);                                       /* Will return if rv is null */
3277         sv = retrieve(cxt);                     /* Retrieve <object> */
3278         if (!sv)
3279                 return (SV *) 0;                /* Failed */
3280
3281         sv_upgrade(tv, SVt_PVMG);
3282         sv_magic(tv, sv, 'q', Nullch, 0);
3283         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3284
3285         TRACEME(("ok (retrieve_tied_scalar at 0x%"UVxf")", PTR2UV(tv)));
3286
3287         return tv;
3288 }
3289
3290 /*
3291  * retrieve_tied_key
3292  *
3293  * Retrieve reference to value in a tied hash.
3294  * Layout is SX_TIED_KEY <object> <key>, with SX_TIED_KEY already read.
3295  */
3296 static SV *retrieve_tied_key(stcxt_t *cxt)
3297 {
3298         SV *tv;
3299         SV *sv;
3300         SV *key;
3301
3302         TRACEME(("retrieve_tied_key (#%d)", cxt->tagnum));
3303
3304         tv = NEWSV(10002, 0);
3305         SEEN(tv);                                       /* Will return if tv is null */
3306         sv = retrieve(cxt);                     /* Retrieve <object> */
3307         if (!sv)
3308                 return (SV *) 0;                /* Failed */
3309
3310         key = retrieve(cxt);            /* Retrieve <key> */
3311         if (!key)
3312                 return (SV *) 0;                /* Failed */
3313
3314         sv_upgrade(tv, SVt_PVMG);
3315         sv_magic(tv, sv, 'p', (char *)key, HEf_SVKEY);
3316         SvREFCNT_dec(key);                      /* Undo refcnt inc from sv_magic() */
3317         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3318
3319         return tv;
3320 }
3321
3322 /*
3323  * retrieve_tied_idx
3324  *
3325  * Retrieve reference to value in a tied array.
3326  * Layout is SX_TIED_IDX <object> <idx>, with SX_TIED_IDX already read.
3327  */
3328 static SV *retrieve_tied_idx(stcxt_t *cxt)
3329 {
3330         SV *tv;
3331         SV *sv;
3332         I32 idx;
3333
3334         TRACEME(("retrieve_tied_idx (#%d)", cxt->tagnum));
3335
3336         tv = NEWSV(10002, 0);
3337         SEEN(tv);                                       /* Will return if tv is null */
3338         sv = retrieve(cxt);                     /* Retrieve <object> */
3339         if (!sv)
3340                 return (SV *) 0;                /* Failed */
3341
3342         RLEN(idx);                                      /* Retrieve <idx> */
3343
3344         sv_upgrade(tv, SVt_PVMG);
3345         sv_magic(tv, sv, 'p', Nullch, idx);
3346         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3347
3348         return tv;
3349 }
3350
3351
3352 /*
3353  * retrieve_lscalar
3354  *
3355  * Retrieve defined long (string) scalar.
3356  *
3357  * Layout is SX_LSCALAR <length> <data>, with SX_LSCALAR already read.
3358  * The scalar is "long" in that <length> is larger than LG_SCALAR so it
3359  * was not stored on a single byte.
3360  */
3361 static SV *retrieve_lscalar(stcxt_t *cxt)
3362 {
3363         STRLEN len;
3364         SV *sv;
3365
3366         RLEN(len);
3367         TRACEME(("retrieve_lscalar (#%d), len = %d", cxt->tagnum, len));
3368
3369         /*
3370          * Allocate an empty scalar of the suitable length.
3371          */
3372
3373         sv = NEWSV(10002, len);
3374         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3375
3376         /*
3377          * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation.
3378          *
3379          * Now, for efficiency reasons, read data directly inside the SV buffer,
3380          * and perform the SV final settings directly by duplicating the final
3381          * work done by sv_setpv. Since we're going to allocate lots of scalars
3382          * this way, it's worth the hassle and risk.
3383          */
3384
3385         SAFEREAD(SvPVX(sv), len, sv);
3386         SvCUR_set(sv, len);                             /* Record C string length */
3387         *SvEND(sv) = '\0';                              /* Ensure it's null terminated anyway */
3388         (void) SvPOK_only(sv);                  /* Validate string pointer */
3389         SvTAINT(sv);                                    /* External data cannot be trusted */
3390
3391         TRACEME(("large scalar len %d '%s'", len, SvPVX(sv)));
3392         TRACEME(("ok (retrieve_lscalar at 0x%"UVxf")", PTR2UV(sv)));
3393
3394         return sv;
3395 }
3396
3397 /*
3398  * retrieve_scalar
3399  *
3400  * Retrieve defined short (string) scalar.
3401  *
3402  * Layout is SX_SCALAR <length> <data>, with SX_SCALAR already read.
3403  * The scalar is "short" so <length> is single byte. If it is 0, there
3404  * is no <data> section.
3405  */
3406 static SV *retrieve_scalar(stcxt_t *cxt)
3407 {
3408         int len;
3409         SV *sv;
3410
3411         GETMARK(len);
3412         TRACEME(("retrieve_scalar (#%d), len = %d", cxt->tagnum, len));
3413
3414         /*
3415          * Allocate an empty scalar of the suitable length.
3416          */
3417
3418         sv = NEWSV(10002, len);
3419         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3420
3421         /*
3422          * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation.
3423          */
3424
3425         if (len == 0) {
3426                 /*
3427                  * newSV did not upgrade to SVt_PV so the scalar is undefined.
3428                  * To make it defined with an empty length, upgrade it now...
3429                  */
3430                 sv_upgrade(sv, SVt_PV);
3431                 SvGROW(sv, 1);
3432                 *SvEND(sv) = '\0';                      /* Ensure it's null terminated anyway */
3433                 TRACEME(("ok (retrieve_scalar empty at 0x%"UVxf")", PTR2UV(sv)));
3434         } else {
3435                 /*
3436                  * Now, for efficiency reasons, read data directly inside the SV buffer,
3437                  * and perform the SV final settings directly by duplicating the final
3438                  * work done by sv_setpv. Since we're going to allocate lots of scalars
3439                  * this way, it's worth the hassle and risk.
3440                  */
3441                 SAFEREAD(SvPVX(sv), len, sv);
3442                 SvCUR_set(sv, len);                     /* Record C string length */
3443                 *SvEND(sv) = '\0';                      /* Ensure it's null terminated anyway */
3444                 TRACEME(("small scalar len %d '%s'", len, SvPVX(sv)));
3445         }
3446
3447         (void) SvPOK_only(sv);                  /* Validate string pointer */
3448         SvTAINT(sv);                                    /* External data cannot be trusted */
3449
3450         TRACEME(("ok (retrieve_scalar at 0x%"UVxf")", PTR2UV(sv)));
3451         return sv;
3452 }
3453
3454 /*
3455  * retrieve_integer
3456  *
3457  * Retrieve defined integer.
3458  * Layout is SX_INTEGER <data>, whith SX_INTEGER already read.
3459  */
3460 static SV *retrieve_integer(stcxt_t *cxt)
3461 {
3462         SV *sv;
3463         IV iv;
3464
3465         TRACEME(("retrieve_integer (#%d)", cxt->tagnum));
3466
3467         READ(&iv, sizeof(iv));
3468         sv = newSViv(iv);
3469         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3470
3471         TRACEME(("integer %d", iv));
3472         TRACEME(("ok (retrieve_integer at 0x%"UVxf")", PTR2UV(sv)));
3473
3474         return sv;
3475 }
3476
3477 /*
3478  * retrieve_netint
3479  *
3480  * Retrieve defined integer in network order.
3481  * Layout is SX_NETINT <data>, whith SX_NETINT already read.
3482  */
3483 static SV *retrieve_netint(stcxt_t *cxt)
3484 {
3485         SV *sv;
3486         int iv;
3487
3488         TRACEME(("retrieve_netint (#%d)", cxt->tagnum));
3489
3490         READ(&iv, sizeof(iv));
3491 #ifdef HAS_NTOHL
3492         sv = newSViv((int) ntohl(iv));
3493         TRACEME(("network integer %d", (int) ntohl(iv)));
3494 #else
3495         sv = newSViv(iv);
3496         TRACEME(("network integer (as-is) %d", iv));
3497 #endif
3498         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3499
3500         TRACEME(("ok (retrieve_netint at 0x%"UVxf")", PTR2UV(sv)));
3501
3502         return sv;
3503 }
3504
3505 /*
3506  * retrieve_double
3507  *
3508  * Retrieve defined double.
3509  * Layout is SX_DOUBLE <data>, whith SX_DOUBLE already read.
3510  */
3511 static SV *retrieve_double(stcxt_t *cxt)
3512 {
3513         SV *sv;
3514         NV nv;
3515
3516         TRACEME(("retrieve_double (#%d)", cxt->tagnum));
3517
3518         READ(&nv, sizeof(nv));
3519         sv = newSVnv(nv);
3520         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3521
3522         TRACEME(("double %"NVff, nv));
3523         TRACEME(("ok (retrieve_double at 0x%"UVxf")", PTR2UV(sv)));
3524
3525         return sv;
3526 }
3527
3528 /*
3529  * retrieve_byte
3530  *
3531  * Retrieve defined byte (small integer within the [-128, +127] range).
3532  * Layout is SX_BYTE <data>, whith SX_BYTE already read.
3533  */
3534 static SV *retrieve_byte(stcxt_t *cxt)
3535 {
3536         SV *sv;
3537         int siv;
3538
3539         TRACEME(("retrieve_byte (#%d)", cxt->tagnum));
3540
3541         GETMARK(siv);
3542         TRACEME(("small integer read as %d", (unsigned char) siv));
3543         sv = newSViv((unsigned char) siv - 128);
3544         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3545
3546         TRACEME(("byte %d", (unsigned char) siv - 128));
3547         TRACEME(("ok (retrieve_byte at 0x%"UVxf")", PTR2UV(sv)));
3548
3549         return sv;
3550 }
3551
3552 /*
3553  * retrieve_undef
3554  *
3555  * Return the undefined value.
3556  */
3557 static SV *retrieve_undef(stcxt_t *cxt)
3558 {
3559         SV* sv;
3560
3561         TRACEME(("retrieve_undef"));
3562
3563         sv = newSV(0);
3564         SEEN(sv);
3565
3566         return sv;
3567 }
3568
3569 /*
3570  * retrieve_sv_undef
3571  *
3572  * Return the immortal undefined value.
3573  */
3574 static SV *retrieve_sv_undef(stcxt_t *cxt)
3575 {
3576         SV *sv = &PL_sv_undef;
3577
3578         TRACEME(("retrieve_sv_undef"));
3579
3580         SEEN(sv);
3581         return sv;
3582 }
3583
3584 /*
3585  * retrieve_sv_yes
3586  *
3587  * Return the immortal yes value.
3588  */
3589 static SV *retrieve_sv_yes(stcxt_t *cxt)
3590 {
3591         SV *sv = &PL_sv_yes;
3592
3593         TRACEME(("retrieve_sv_yes"));
3594
3595         SEEN(sv);
3596         return sv;
3597 }
3598
3599 /*
3600  * retrieve_sv_no
3601  *
3602  * Return the immortal no value.
3603  */
3604 static SV *retrieve_sv_no(stcxt_t *cxt)
3605 {
3606         SV *sv = &PL_sv_no;
3607
3608         TRACEME(("retrieve_sv_no"));
3609
3610         SEEN(sv);
3611         return sv;
3612 }
3613
3614 /*
3615  * retrieve_array
3616  *
3617  * Retrieve a whole array.
3618  * Layout is SX_ARRAY <size> followed by each item, in increading index order.
3619  * Each item is stored as <object>.
3620  *
3621  * When we come here, SX_ARRAY has been read already.
3622  */
3623 static SV *retrieve_array(stcxt_t *cxt)
3624 {
3625         I32 len;
3626         I32 i;
3627         AV *av;
3628         SV *sv;
3629
3630         TRACEME(("retrieve_array (#%d)", cxt->tagnum));
3631
3632         /*
3633          * Read length, and allocate array, then pre-extend it.
3634          */
3635
3636         RLEN(len);
3637         TRACEME(("size = %d", len));
3638         av = newAV();
3639         SEEN(av);                                       /* Will return if array not allocated nicely */
3640         if (len)
3641                 av_extend(av, len);
3642         else
3643                 return (SV *) av;               /* No data follow if array is empty */
3644
3645         /*
3646          * Now get each item in turn...
3647          */
3648
3649         for (i = 0; i < len; i++) {
3650                 TRACEME(("(#%d) item", i));
3651                 sv = retrieve(cxt);                             /* Retrieve item */
3652                 if (!sv)
3653                         return (SV *) 0;
3654                 if (av_store(av, i, sv) == 0)
3655                         return (SV *) 0;
3656         }
3657
3658         TRACEME(("ok (retrieve_array at 0x%"UVxf")", PTR2UV(av)));
3659
3660         return (SV *) av;
3661 }
3662
3663 /*
3664  * retrieve_hash
3665  *
3666  * Retrieve a whole hash table.
3667  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
3668  * Keys are stored as <length> <data>, the <data> section being omitted
3669  * if length is 0.
3670  * Values are stored as <object>.
3671  *
3672  * When we come here, SX_HASH has been read already.
3673  */
3674 static SV *retrieve_hash(stcxt_t *cxt)
3675 {
3676         I32 len;
3677         I32 size;
3678         I32 i;
3679         HV *hv;
3680         SV *sv;
3681         static SV *sv_h_undef = (SV *) 0;               /* hv_store() bug */
3682
3683         TRACEME(("retrieve_hash (#%d)", cxt->tagnum));
3684
3685         /*
3686          * Read length, allocate table.
3687          */
3688
3689         RLEN(len);
3690         TRACEME(("size = %d", len));
3691         hv = newHV();
3692         SEEN(hv);                       /* Will return if table not allocated properly */
3693         if (len == 0)
3694                 return (SV *) hv;       /* No data follow if table empty */
3695
3696         /*
3697          * Now get each key/value pair in turn...
3698          */
3699
3700         for (i = 0; i < len; i++) {
3701                 /*
3702                  * Get value first.
3703                  */
3704
3705                 TRACEME(("(#%d) value", i));
3706                 sv = retrieve(cxt);
3707                 if (!sv)
3708                         return (SV *) 0;
3709
3710                 /*
3711                  * Get key.
3712                  * Since we're reading into kbuf, we must ensure we're not
3713                  * recursing between the read and the hv_store() where it's used.
3714                  * Hence the key comes after the value.
3715                  */
3716
3717                 RLEN(size);                                             /* Get key size */
3718                 KBUFCHK(size);                                  /* Grow hash key read pool if needed */
3719                 if (size)
3720                         READ(kbuf, size);
3721                 kbuf[size] = '\0';                              /* Mark string end, just in case */
3722                 TRACEME(("(#%d) key '%s'", i, kbuf));
3723
3724                 /*
3725                  * Enter key/value pair into hash table.
3726                  */
3727
3728                 if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
3729                         return (SV *) 0;
3730         }
3731
3732         TRACEME(("ok (retrieve_hash at 0x%"UVxf")", PTR2UV(hv)));
3733
3734         return (SV *) hv;
3735 }
3736
3737 /*
3738  * old_retrieve_array
3739  *
3740  * Retrieve a whole array in pre-0.6 binary format.
3741  *
3742  * Layout is SX_ARRAY <size> followed by each item, in increading index order.
3743  * Each item is stored as SX_ITEM <object> or SX_IT_UNDEF for "holes".
3744  *
3745  * When we come here, SX_ARRAY has been read already.
3746  */
3747 static SV *old_retrieve_array(stcxt_t *cxt)
3748 {
3749         I32 len;
3750         I32 i;
3751         AV *av;
3752         SV *sv;
3753         int c;
3754
3755         TRACEME(("old_retrieve_array (#%d)", cxt->tagnum));
3756
3757         /*
3758          * Read length, and allocate array, then pre-extend it.
3759          */
3760
3761         RLEN(len);
3762         TRACEME(("size = %d", len));
3763         av = newAV();
3764         SEEN(av);                                       /* Will return if array not allocated nicely */
3765         if (len)
3766                 av_extend(av, len);
3767         else
3768                 return (SV *) av;               /* No data follow if array is empty */
3769
3770         /*
3771          * Now get each item in turn...
3772          */
3773
3774         for (i = 0; i < len; i++) {
3775                 GETMARK(c);
3776                 if (c == SX_IT_UNDEF) {
3777                         TRACEME(("(#%d) undef item", i));
3778                         continue;                       /* av_extend() already filled us with undef */
3779                 }
3780                 if (c != SX_ITEM)
3781                         (void) retrieve_other((stcxt_t *) 0);   /* Will croak out */
3782                 TRACEME(("(#%d) item", i));
3783                 sv = retrieve(cxt);                                                     /* Retrieve item */
3784                 if (!sv)
3785                         return (SV *) 0;
3786                 if (av_store(av, i, sv) == 0)
3787                         return (SV *) 0;
3788         }
3789
3790         TRACEME(("ok (old_retrieve_array at 0x%"UVxf")", PTR2UV(av)));
3791
3792         return (SV *) av;
3793 }
3794
3795 /*
3796  * old_retrieve_hash
3797  *
3798  * Retrieve a whole hash table in pre-0.6 binary format.
3799  *
3800  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
3801  * Keys are stored as SX_KEY <length> <data>, the <data> section being omitted
3802  * if length is 0.
3803  * Values are stored as SX_VALUE <object> or SX_VL_UNDEF for "holes".
3804  *
3805  * When we come here, SX_HASH has been read already.
3806  */
3807 static SV *old_retrieve_hash(stcxt_t *cxt)
3808 {
3809         I32 len;
3810         I32 size;
3811         I32 i;
3812         HV *hv;
3813         SV *sv;
3814         int c;
3815         static SV *sv_h_undef = (SV *) 0;               /* hv_store() bug */
3816
3817         TRACEME(("old_retrieve_hash (#%d)", cxt->tagnum));
3818
3819         /*
3820          * Read length, allocate table.
3821          */
3822
3823         RLEN(len);
3824         TRACEME(("size = %d", len));
3825         hv = newHV();
3826         SEEN(hv);                               /* Will return if table not allocated properly */
3827         if (len == 0)
3828                 return (SV *) hv;       /* No data follow if table empty */
3829
3830         /*
3831          * Now get each key/value pair in turn...
3832          */
3833
3834         for (i = 0; i < len; i++) {
3835                 /*
3836                  * Get value first.
3837                  */
3838
3839                 GETMARK(c);
3840                 if (c == SX_VL_UNDEF) {
3841                         TRACEME(("(#%d) undef value", i));
3842                         /*
3843                          * Due to a bug in hv_store(), it's not possible to pass
3844                          * &PL_sv_undef to hv_store() as a value, otherwise the
3845                          * associated key will not be creatable any more. -- RAM, 14/01/97
3846                          */
3847                         if (!sv_h_undef)
3848                                 sv_h_undef = newSVsv(&PL_sv_undef);
3849                         sv = SvREFCNT_inc(sv_h_undef);
3850                 } else if (c == SX_VALUE) {
3851                         TRACEME(("(#%d) value", i));
3852                         sv = retrieve(cxt);
3853                         if (!sv)
3854                                 return (SV *) 0;
3855                 } else
3856                         (void) retrieve_other((stcxt_t *) 0);   /* Will croak out */
3857
3858                 /*
3859                  * Get key.
3860                  * Since we're reading into kbuf, we must ensure we're not
3861                  * recursing between the read and the hv_store() where it's used.
3862                  * Hence the key comes after the value.
3863                  */
3864
3865                 GETMARK(c);
3866                 if (c != SX_KEY)
3867                         (void) retrieve_other((stcxt_t *) 0);   /* Will croak out */
3868                 RLEN(size);                                             /* Get key size */
3869                 KBUFCHK(size);                                  /* Grow hash key read pool if needed */
3870                 if (size)
3871                         READ(kbuf, size);
3872                 kbuf[size] = '\0';                              /* Mark string end, just in case */
3873                 TRACEME(("(#%d) key '%s'", i, kbuf));
3874
3875                 /*
3876                  * Enter key/value pair into hash table.
3877                  */
3878
3879                 if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
3880                         return (SV *) 0;
3881         }
3882
3883         TRACEME(("ok (retrieve_hash at 0x%"UVxf")", PTR2UV(hv)));
3884
3885         return (SV *) hv;
3886 }
3887
3888 /***
3889  *** Retrieval engine.
3890  ***/
3891
3892 /*
3893  * magic_check
3894  *
3895  * Make sure the stored data we're trying to retrieve has been produced
3896  * on an ILP compatible system with the same byteorder. It croaks out in
3897  * case an error is detected. [ILP = integer-long-pointer sizes]
3898  * Returns null if error is detected, &PL_sv_undef otherwise.
3899  *
3900  * Note that there's no byte ordering info emitted when network order was
3901  * used at store time.
3902  */
3903 static SV *magic_check(stcxt_t *cxt)
3904 {
3905         char buf[256];
3906         char byteorder[256];
3907         int c;
3908         int use_network_order;
3909         int version_major;
3910         int version_minor = 0;
3911
3912         TRACEME(("magic_check"));
3913
3914         /*
3915          * The "magic number" is only for files, not when freezing in memory.
3916          */
3917
3918         if (cxt->fio) {
3919                 STRLEN len = sizeof(magicstr) - 1;
3920                 STRLEN old_len;
3921
3922                 READ(buf, len);                                 /* Not null-terminated */
3923                 buf[len] = '\0';                                /* Is now */
3924
3925                 if (0 == strcmp(buf, magicstr))
3926                         goto magic_ok;
3927
3928                 /*
3929                  * Try to read more bytes to check for the old magic number, which
3930                  * was longer.
3931                  */
3932
3933                 old_len = sizeof(old_magicstr) - 1;
3934                 READ(&buf[len], old_len - len);
3935                 buf[old_len] = '\0';                    /* Is now null-terminated */
3936
3937                 if (strcmp(buf, old_magicstr))
3938                         CROAK(("File is not a perl storable"));
3939         }
3940
3941 magic_ok:
3942         /*
3943          * Starting with 0.6, the "use_network_order" byte flag is also used to
3944          * indicate the version number of the binary, and therefore governs the
3945          * setting of sv_retrieve_vtbl. See magic_write().
3946          */
3947
3948         GETMARK(use_network_order);
3949         version_major = use_network_order >> 1;
3950         cxt->retrieve_vtbl = version_major ? sv_retrieve : sv_old_retrieve;
3951
3952         TRACEME(("magic_check: netorder = 0x%x", use_network_order));
3953
3954
3955         /*
3956          * Starting with 0.7 (binary major 2), a full byte is dedicated to the
3957          * minor version of the protocol.  See magic_write().
3958          */
3959
3960         if (version_major > 1)
3961                 GETMARK(version_minor);
3962
3963         cxt->ver_major = version_major;
3964         cxt->ver_minor = version_minor;
3965
3966         TRACEME(("binary image version is %d.%d", version_major, version_minor));
3967
3968         /*
3969          * Inter-operability sanity check: we can't retrieve something stored
3970          * using a format more recent than ours, because we have no way to
3971          * know what has changed, and letting retrieval go would mean a probable
3972          * failure reporting a "corrupted" storable file.
3973          */
3974
3975         if (
3976                 version_major > STORABLE_BIN_MAJOR ||
3977                         (version_major == STORABLE_BIN_MAJOR &&
3978                         version_minor > STORABLE_BIN_MINOR)
3979         )
3980                 CROAK(("Storable binary image v%d.%d more recent than I am (v%d.%d)",
3981                         version_major, version_minor,
3982                         STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
3983
3984         /*
3985          * If they stored using network order, there's no byte ordering
3986          * information to check.
3987          */
3988
3989         if (cxt->netorder = (use_network_order & 0x1))
3990                 return &PL_sv_undef;                    /* No byte ordering info */
3991
3992         sprintf(byteorder, "%lx", (unsigned long) BYTEORDER);
3993         GETMARK(c);
3994         READ(buf, c);                                           /* Not null-terminated */
3995         buf[c] = '\0';                                          /* Is now */
3996
3997         if (strcmp(buf, byteorder))
3998                 CROAK(("Byte order is not compatible"));
3999
4000         GETMARK(c);             /* sizeof(int) */
4001         if ((int) c != sizeof(int))
4002                 CROAK(("Integer size is not compatible"));
4003
4004         GETMARK(c);             /* sizeof(long) */
4005         if ((int) c != sizeof(long))
4006                 CROAK(("Long integer size is not compatible"));
4007
4008         GETMARK(c);             /* sizeof(char *) */
4009         if ((int) c != sizeof(char *))
4010                 CROAK(("Pointer integer size is not compatible"));
4011
4012         return &PL_sv_undef;    /* OK */
4013 }
4014
4015 /*
4016  * retrieve
4017  *
4018  * Recursively retrieve objects from the specified file and return their
4019  * root SV (which may be an AV or an HV for what we care).
4020  * Returns null if there is a problem.
4021  */
4022 static SV *retrieve(stcxt_t *cxt)
4023 {
4024         int type;
4025         SV **svh;
4026         SV *sv;
4027
4028         TRACEME(("retrieve"));
4029
4030         /*
4031          * Grab address tag which identifies the object if we are retrieving
4032          * an older format. Since the new binary format counts objects and no
4033          * longer explicitely tags them, we must keep track of the correspondance
4034          * ourselves.
4035          *
4036          * The following section will disappear one day when the old format is
4037          * no longer supported, hence the final "goto" in the "if" block.
4038          */
4039
4040         if (cxt->hseen) {                                               /* Retrieving old binary */
4041                 stag_t tag;
4042                 if (cxt->netorder) {
4043                         I32 nettag;
4044                         READ(&nettag, sizeof(I32));             /* Ordered sequence of I32 */
4045                         tag = (stag_t) nettag;
4046                 } else
4047                         READ(&tag, sizeof(stag_t));             /* Original address of the SV */
4048
4049                 GETMARK(type);
4050                 if (type == SX_OBJECT) {
4051                         I32 tagn;
4052                         svh = hv_fetch(cxt->hseen, (char *) &tag, sizeof(tag), FALSE);
4053                         if (!svh)
4054                                 CROAK(("Old tag 0x%x should have been mapped already", tag));
4055                         tagn = SvIV(*svh);      /* Mapped tag number computed earlier below */
4056
4057                         /*
4058                          * The following code is common with the SX_OBJECT case below.
4059                          */
4060
4061                         svh = av_fetch(cxt->aseen, tagn, FALSE);
4062                         if (!svh)
4063                                 CROAK(("Object #%d should have been retrieved already", tagn));
4064                         sv = *svh;
4065                         TRACEME(("has retrieved #%d at 0x%"UVxf, tagn, PTR2UV(sv)));
4066                         SvREFCNT_inc(sv);       /* One more reference to this same sv */
4067                         return sv;                      /* The SV pointer where object was retrieved */
4068                 }
4069
4070                 /*
4071                  * Map new object, but don't increase tagnum. This will be done
4072                  * by each of the retrieve_* functions when they call SEEN().
4073                  *
4074                  * The mapping associates the "tag" initially present with a unique
4075                  * tag number. See test for SX_OBJECT above to see how this is perused.
4076                  */
4077
4078                 if (!hv_store(cxt->hseen, (char *) &tag, sizeof(tag),
4079                                 newSViv(cxt->tagnum), 0))
4080                         return (SV *) 0;
4081
4082                 goto first_time;
4083         }
4084
4085         /*
4086          * Regular post-0.6 binary format.
4087          */
4088
4089 again:
4090         GETMARK(type);
4091
4092         TRACEME(("retrieve type = %d", type));
4093
4094         /*
4095          * Are we dealing with an object we should have already retrieved?
4096          */
4097
4098         if (type == SX_OBJECT) {
4099                 I32 tag;
4100                 READ(&tag, sizeof(I32));
4101                 tag = ntohl(tag);
4102                 svh = av_fetch(cxt->aseen, tag, FALSE);
4103                 if (!svh)
4104                         CROAK(("Object #%d should have been retrieved already", tag));
4105                 sv = *svh;
4106                 TRACEME(("had retrieved #%d at 0x%"UVxf, tag, PTR2UV(sv)));
4107                 SvREFCNT_inc(sv);       /* One more reference to this same sv */
4108                 return sv;                      /* The SV pointer where object was retrieved */
4109         }
4110
4111 first_time:             /* Will disappear when support for old format is dropped */
4112
4113         /*
4114          * Okay, first time through for this one.
4115          */
4116
4117         sv = RETRIEVE(cxt, type)(cxt);
4118         if (!sv)
4119                 return (SV *) 0;                        /* Failed */
4120
4121         /*
4122          * Old binary formats (pre-0.7).
4123          *
4124          * Final notifications, ended by SX_STORED may now follow.
4125          * Currently, the only pertinent notification to apply on the
4126          * freshly retrieved object is either:
4127          *    SX_CLASS <char-len> <classname> for short classnames.
4128          *    SX_LG_CLASS <int-len> <classname> for larger one (rare!).
4129          * Class name is then read into the key buffer pool used by
4130          * hash table key retrieval.
4131          */
4132
4133         if (cxt->ver_major < 2) {
4134                 while ((type = GETCHAR()) != SX_STORED) {
4135                         I32 len;
4136                         switch (type) {
4137                         case SX_CLASS:
4138                                 GETMARK(len);                   /* Length coded on a single char */
4139                                 break;
4140                         case SX_LG_CLASS:                       /* Length coded on a regular integer */
4141                                 RLEN(len);
4142                                 break;
4143                         case EOF:
4144                         default:
4145                                 return (SV *) 0;                /* Failed */
4146                         }
4147                         KBUFCHK(len);                           /* Grow buffer as necessary */
4148                         if (len)
4149                                 READ(kbuf, len);
4150                         kbuf[len] = '\0';                       /* Mark string end */
4151                         BLESS(sv, kbuf);
4152                 }
4153         }
4154
4155         TRACEME(("ok (retrieved 0x%"UVxf", refcnt=%d, %s)", PTR2UV(sv),
4156                 SvREFCNT(sv) - 1, sv_reftype(sv, FALSE)));
4157
4158         return sv;      /* Ok */
4159 }
4160
4161 /*
4162  * do_retrieve
4163  *
4164  * Retrieve data held in file and return the root object.
4165  * Common routine for pretrieve and mretrieve.
4166  */
4167 static SV *do_retrieve(
4168         PerlIO *f,
4169         SV *in,
4170         int optype)
4171 {
4172         dSTCXT;
4173         SV *sv;
4174         struct extendable msave;        /* Where potentially valid mbuf is saved */
4175
4176         TRACEME(("do_retrieve (optype = 0x%x)", optype));
4177
4178         optype |= ST_RETRIEVE;
4179
4180         /*
4181          * Sanity assertions for retrieve dispatch tables.
4182          */
4183
4184         ASSERT(sizeof(sv_old_retrieve) == sizeof(sv_retrieve),
4185                 ("old and new retrieve dispatch table have same size"));
4186         ASSERT(sv_old_retrieve[SX_ERROR] == retrieve_other,
4187                 ("SX_ERROR entry correctly initialized in old dispatch table"));
4188         ASSERT(sv_retrieve[SX_ERROR] == retrieve_other,
4189                 ("SX_ERROR entry correctly initialized in new dispatch table"));
4190
4191         /*
4192          * Workaround for CROAK leak: if they enter with a "dirty" context,
4193          * free up memory for them now.
4194          */
4195
4196         if (cxt->dirty)
4197                 clean_context(cxt);
4198
4199         /*
4200          * Now that STORABLE_xxx hooks exist, it is possible that they try to
4201          * re-enter retrieve() via the hooks.
4202          */
4203
4204         if (cxt->entry)
4205                 cxt = allocate_context(cxt);
4206
4207         cxt->entry++;
4208
4209         ASSERT(cxt->entry == 1, ("starting new recursion"));
4210         ASSERT(!cxt->dirty, ("clean context"));
4211
4212         /*
4213          * Prepare context.
4214          *
4215          * Data is loaded into the memory buffer when f is NULL, unless `in' is
4216          * also NULL, in which case we're expecting the data to already lie
4217          * in the buffer (dclone case).
4218          */
4219
4220         KBUFINIT();                                     /* Allocate hash key reading pool once */
4221
4222         if (!f && in) {
4223                 StructCopy(&cxt->membuf, &msave, struct extendable);
4224                 MBUF_LOAD(in);
4225         }
4226
4227
4228         /*
4229          * Magic number verifications.
4230          *
4231          * This needs to be done before calling init_retrieve_context()
4232          * since the format indication in the file are necessary to conduct
4233          * some of the initializations.
4234          */
4235
4236         cxt->fio = f;                           /* Where I/O are performed */
4237
4238         if (!magic_check(cxt))
4239                 CROAK(("Magic number checking on storable %s failed",
4240                         cxt->fio ? "file" : "string"));
4241
4242         TRACEME(("data stored in %s format",
4243                 cxt->netorder ? "net order" : "native"));
4244
4245         init_retrieve_context(cxt, optype);
4246
4247         ASSERT(is_retrieving(), ("within retrieve operation"));
4248
4249         sv = retrieve(cxt);             /* Recursively retrieve object, get root SV */
4250
4251         /*
4252          * Final cleanup.
4253          */
4254
4255         if (!f && in)
4256                 StructCopy(&msave, &cxt->membuf, struct extendable);
4257
4258         /*
4259          * The "root" context is never freed.
4260          */
4261
4262         clean_retrieve_context(cxt);
4263         if (cxt->prev)                          /* This context was stacked */
4264                 free_context(cxt);              /* It was not the "root" context */
4265
4266         /*
4267          * Prepare returned value.
4268          */
4269
4270         if (!sv) {
4271                 TRACEME(("retrieve ERROR"));
4272                 return &PL_sv_undef;            /* Something went wrong, return undef */
4273         }
4274
4275         TRACEME(("retrieve got %s(0x%"UVxf")",
4276                 sv_reftype(sv, FALSE), PTR2UV(sv)));
4277
4278         /*
4279          * Backward compatibility with Storable-0.5@9 (which we know we
4280          * are retrieving if hseen is non-null): don't create an extra RV
4281          * for objects since we special-cased it at store time.
4282          *
4283          * Build a reference to the SV returned by pretrieve even if it is
4284          * already one and not a scalar, for consistency reasons.
4285          *
4286          * NB: although context might have been cleaned, the value of `cxt->hseen'
4287          * remains intact, and can be used as a flag.
4288          */
4289
4290         if (cxt->hseen) {                       /* Was not handling overloading by then */
4291                 SV *rv;
4292                 if (sv_type(sv) == svis_REF && (rv = SvRV(sv)) && SvOBJECT(rv))
4293                         return sv;
4294         }
4295
4296         /*
4297          * If reference is overloaded, restore behaviour.
4298          *
4299          * NB: minor glitch here: normally, overloaded refs are stored specially
4300          * so that we can croak when behaviour cannot be re-installed, and also
4301          * avoid testing for overloading magic at each reference retrieval.
4302          *
4303          * Unfortunately, the root reference is implicitely stored, so we must
4304          * check for possible overloading now.  Furthermore, if we don't restore
4305          * overloading, we cannot croak as if the original ref was, because we
4306          * have no way to determine whether it was an overloaded ref or not in
4307          * the first place.
4308          *
4309          * It's a pity that overloading magic is attached to the rv, and not to
4310          * the underlying sv as blessing is.
4311          */
4312
4313         if (SvOBJECT(sv)) {
4314                 HV *stash = (HV *) SvSTASH (sv);
4315                 SV *rv = newRV_noinc(sv);
4316                 if (stash && Gv_AMG(stash)) {
4317                         SvAMAGIC_on(rv);
4318                         TRACEME(("restored overloading on root reference"));
4319                 }
4320                 return rv;
4321         }
4322
4323         return newRV_noinc(sv);
4324 }
4325
4326 /*
4327  * pretrieve
4328  *
4329  * Retrieve data held in file and return the root object, undef on error.
4330  */
4331 SV *pretrieve(PerlIO *f)
4332 {
4333         TRACEME(("pretrieve"));
4334         return do_retrieve(f, Nullsv, 0);
4335 }
4336
4337 /*
4338  * mretrieve
4339  *
4340  * Retrieve data held in scalar and return the root object, undef on error.
4341  */
4342 SV *mretrieve(SV *sv)
4343 {
4344         TRACEME(("mretrieve"));
4345         return do_retrieve((PerlIO*) 0, sv, 0);
4346 }
4347
4348 /***
4349  *** Deep cloning
4350  ***/
4351
4352 /*
4353  * dclone
4354  *
4355  * Deep clone: returns a fresh copy of the original referenced SV tree.
4356  *
4357  * This is achieved by storing the object in memory and restoring from
4358  * there. Not that efficient, but it should be faster than doing it from
4359  * pure perl anyway.
4360  */
4361 SV *dclone(SV *sv)
4362 {
4363         dSTCXT;
4364         int size;
4365         stcxt_t *real_context;
4366         SV *out;
4367
4368         TRACEME(("dclone"));
4369
4370         /*
4371          * Workaround for CROAK leak: if they enter with a "dirty" context,
4372          * free up memory for them now.
4373          */
4374
4375         if (cxt->dirty)
4376                 clean_context(cxt);
4377
4378         /*
4379          * do_store() optimizes for dclone by not freeing its context, should
4380          * we need to allocate one because we're deep cloning from a hook.
4381          */
4382
4383         if (!do_store((PerlIO*) 0, sv, ST_CLONE, FALSE, (SV**) 0))
4384                 return &PL_sv_undef;                            /* Error during store */
4385
4386         /*
4387          * Because of the above optimization, we have to refresh the context,
4388          * since a new one could have been allocated and stacked by do_store().
4389          */
4390
4391         { dSTCXT; real_context = cxt; }         /* Sub-block needed for macro */
4392         cxt = real_context;                                     /* And we need this temporary... */
4393
4394         /*
4395          * Now, `cxt' may refer to a new context.
4396          */
4397
4398         ASSERT(!cxt->dirty, ("clean context"));
4399         ASSERT(!cxt->entry, ("entry will not cause new context allocation"));
4400
4401         size = MBUF_SIZE();
4402         TRACEME(("dclone stored %d bytes", size));
4403
4404         MBUF_INIT(size);
4405         out = do_retrieve((PerlIO*) 0, Nullsv, ST_CLONE);       /* Will free non-root context */
4406
4407         TRACEME(("dclone returns 0x%"UVxf, PTR2UV(out)));
4408
4409         return out;
4410 }
4411
4412 /***
4413  *** Glue with perl.
4414  ***/
4415
4416 /*
4417  * The Perl IO GV object distinguishes between input and output for sockets
4418  * but not for plain files. To allow Storable to transparently work on
4419  * plain files and sockets transparently, we have to ask xsubpp to fetch the
4420  * right object for us. Hence the OutputStream and InputStream declarations.
4421  *
4422  * Before perl 5.004_05, those entries in the standard typemap are not
4423  * defined in perl include files, so we do that here.
4424  */
4425
4426 #ifndef OutputStream
4427 #define OutputStream    PerlIO *
4428 #define InputStream             PerlIO *
4429 #endif  /* !OutputStream */
4430
4431 MODULE = Storable       PACKAGE = Storable
4432
4433 PROTOTYPES: ENABLE
4434
4435 BOOT:
4436     init_perinterp();
4437
4438 int
4439 pstore(f,obj)
4440 OutputStream    f
4441 SV *    obj
4442
4443 int
4444 net_pstore(f,obj)
4445 OutputStream    f
4446 SV *    obj
4447
4448 SV *
4449 mstore(obj)
4450 SV *    obj
4451
4452 SV *
4453 net_mstore(obj)
4454 SV *    obj
4455
4456 SV *
4457 pretrieve(f)
4458 InputStream     f
4459
4460 SV *
4461 mretrieve(sv)
4462 SV *    sv
4463
4464 SV *
4465 dclone(sv)
4466 SV *    sv
4467
4468 int
4469 last_op_in_netorder()
4470
4471 int
4472 is_storing()
4473
4474 int
4475 is_retrieving()
4476