ext/Storable/Storable.xs

   1 /*
   2  * Store and retrieve mechanism.
   3  */
   4
   5 /*
   6  * $Id: Storable.xs,v 1.0.1.4 2000/10/26 17:11:04 ram Exp ram $
   7  *
   8  *  Copyright (c) 1995-2000, Raphael Manfredi
   9  *
  10  *  You may redistribute only under the same terms as Perl 5, as specified
  11  *  in the README file that comes with the distribution.
  12  *
  13  * $Log: Storable.xs,v $
  14  * Revision 1.0.1.4  2000/10/26 17:11:04  ram
  15  * patch5: auto requires module of blessed ref when STORABLE_thaw misses
  16  *
  17  * Revision 1.0.1.3  2000/09/29 19:49:57  ram
  18  * patch3: avoid using "tainted" and "dirty" since Perl remaps them via cpp
  19  *
  20  * $Log: Storable.xs,v $
  21  * Revision 1.0  2000/09/01 19:40:41  ram
  22  * Baseline for first official release.
  23  *
  24  */
  25
  26 #include <EXTERN.h>
  27 #include <perl.h>
  28 #include <patchlevel.h>         /* Perl's one, needed since 5.6 */
  29 #include <XSUB.h>
  30
  31 #if 0
  32 #define DEBUGME /* Debug mode, turns assertions on as well */
  33 #define DASSERT /* Assertion mode */
  34 #endif
  35
  36 /*
  37  * Pre PerlIO time when none of USE_PERLIO and PERLIO_IS_STDIO is defined
  38  * Provide them with the necessary defines so they can build with pre-5.004.
  39  */
  40 #ifndef USE_PERLIO
  41 #ifndef PERLIO_IS_STDIO
  42 #define PerlIO FILE
  43 #define PerlIO_getc(x) getc(x)
  44 #define PerlIO_putc(f,x) putc(x,f)
  45 #define PerlIO_read(x,y,z) fread(y,1,z,x)
  46 #define PerlIO_write(x,y,z) fwrite(y,1,z,x)
  47 #define PerlIO_stdoutf printf
  48 #endif  /* PERLIO_IS_STDIO */
  49 #endif  /* USE_PERLIO */
  50
  51 /*
  52  * Earlier versions of perl might be used, we can't assume they have the latest!
  53  */
  54
  55 #ifndef PERL_VERSION            /* For perls < 5.6 */
  56 #define PERL_VERSION PATCHLEVEL
  57 #ifndef newRV_noinc
  58 #define newRV_noinc(sv)         ((Sv = newRV(sv)), --SvREFCNT(SvRV(Sv)), Sv)
  59 #endif
  60 #if (PATCHLEVEL <= 4)           /* Older perls (<= 5.004) lack PL_ namespace */
  61 #define PL_sv_yes       sv_yes
  62 #define PL_sv_no        sv_no
  63 #define PL_sv_undef     sv_undef
  64 #if (SUBVERSION <= 4)           /* 5.004_04 has been reported to lack newSVpvn */
  65 #define newSVpvn newSVpv
  66 #endif
  67 #endif                                          /* PATCHLEVEL <= 4 */
  68 #ifndef HvSHAREKEYS_off
  69 #define HvSHAREKEYS_off(hv)     /* Ignore */
  70 #endif
  71 #ifndef AvFILLp                         /* Older perls (<=5.003) lack AvFILLp */
  72 #define AvFILLp AvFILL
  73 #endif
  74 typedef double NV;                      /* Older perls lack the NV type */
  75 #define IVdf            "ld"    /* Various printf formats for Perl types */
  76 #define UVuf            "lu"
  77 #define UVof            "lo"
  78 #define UVxf            "lx"
  79 #define INT2PTR(t,v) (t)(IV)(v)
  80 #define PTR2UV(v)    (unsigned long)(v)
  81 #endif                                          /* PERL_VERSION -- perls < 5.6 */
  82
  83 #ifndef NVef                            /* The following were not part of perl 5.6 */
  84 #if defined(USE_LONG_DOUBLE) && \
  85         defined(HAS_LONG_DOUBLE) && defined(PERL_PRIfldbl)
  86 #define NVef            PERL_PRIeldbl
  87 #define NVff            PERL_PRIfldbl
  88 #define NVgf            PERL_PRIgldbl
  89 #else
  90 #define NVef            "e"
  91 #define NVff            "f"
  92 #define NVgf            "g"
  93 #endif
  94 #endif
  95
  96 #ifdef DEBUGME
  97 #ifndef DASSERT
  98 #define DASSERT
  99 #endif
 100 #define TRACEME(x)      do { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); } while (0)
 101 #else
 102 #define TRACEME(x)
 103 #endif
 104
 105 #ifdef DASSERT
 106 #define ASSERT(x,y)     do {                                                                    \
 107         if (!(x)) {                                                                                             \
 108                 PerlIO_stdoutf("ASSERT FAILED (\"%s\", line %d): ",     \
 109                         __FILE__, __LINE__);                                                    \
 110                 PerlIO_stdoutf y; PerlIO_stdoutf("\n");                         \
 111         }                                                                                                               \
 112 } while (0)
 113 #else
 114 #define ASSERT(x,y)
 115 #endif
 116
 117 /*
 118  * Type markers.
 119  */
 120
 121 #define C(x) ((char) (x))       /* For markers with dynamic retrieval handling */
 122
 123 #define SX_OBJECT       C(0)    /* Already stored object */
 124 #define SX_LSCALAR      C(1)    /* Scalar (large binary) follows (length, data) */
 125 #define SX_ARRAY        C(2)    /* Array forthcominng (size, item list) */
 126 #define SX_HASH         C(3)    /* Hash forthcoming (size, key/value pair list) */
 127 #define SX_REF          C(4)    /* Reference to object forthcoming */
 128 #define SX_UNDEF        C(5)    /* Undefined scalar */
 129 #define SX_INTEGER      C(6)    /* Integer forthcoming */
 130 #define SX_DOUBLE       C(7)    /* Double forthcoming */
 131 #define SX_BYTE         C(8)    /* (signed) byte forthcoming */
 132 #define SX_NETINT       C(9)    /* Integer in network order forthcoming */
 133 #define SX_SCALAR       C(10)   /* Scalar (binary, small) follows (length, data) */
 134 #define SX_TIED_ARRAY  C(11)  /* Tied array forthcoming */
 135 #define SX_TIED_HASH   C(12)  /* Tied hash forthcoming */
 136 #define SX_TIED_SCALAR C(13)  /* Tied scalar forthcoming */
 137 #define SX_SV_UNDEF     C(14)   /* Perl's immortal PL_sv_undef */
 138 #define SX_SV_YES       C(15)   /* Perl's immortal PL_sv_yes */
 139 #define SX_SV_NO        C(16)   /* Perl's immortal PL_sv_no */
 140 #define SX_BLESS        C(17)   /* Object is blessed */
 141 #define SX_IX_BLESS     C(18)   /* Object is blessed, classname given by index */
 142 #define SX_HOOK         C(19)   /* Stored via hook, user-defined */
 143 #define SX_OVERLOAD     C(20)   /* Overloaded reference */
 144 #define SX_TIED_KEY C(21)   /* Tied magic key forthcoming */
 145 #define SX_TIED_IDX C(22)   /* Tied magic index forthcoming */
 146 #define SX_UTF8STR      C(23)   /* UTF-8 string forthcoming (small) */
 147 #define SX_LUTF8STR     C(24)   /* UTF-8 string forthcoming (large) */
 148 #define SX_ERROR        C(25)   /* Error */
 149
 150 /*
 151  * Those are only used to retrieve "old" pre-0.6 binary images.
 152  */
 153 #define SX_ITEM         'i'             /* An array item introducer */
 154 #define SX_IT_UNDEF     'I'             /* Undefined array item */
 155 #define SX_KEY          'k'             /* An hash key introducer */
 156 #define SX_VALUE        'v'             /* An hash value introducer */
 157 #define SX_VL_UNDEF     'V'             /* Undefined hash value */
 158
 159 /*
 160  * Those are only used to retrieve "old" pre-0.7 binary images
 161  */
 162
 163 #define SX_CLASS        'b'             /* Object is blessed, class name length <255 */
 164 #define SX_LG_CLASS 'B'         /* Object is blessed, class name length >255 */
 165 #define SX_STORED       'X'             /* End of object */
 166
 167 /*
 168  * Limits between short/long length representation.
 169  */
 170
 171 #define LG_SCALAR       255             /* Large scalar length limit */
 172 #define LG_BLESS        127             /* Large classname bless limit */
 173
 174 /*
 175  * Operation types
 176  */
 177
 178 #define ST_STORE        0x1             /* Store operation */
 179 #define ST_RETRIEVE     0x2             /* Retrieval operation */
 180 #define ST_CLONE        0x4             /* Deep cloning operation */
 181
 182 /*
 183  * The following structure is used for hash table key retrieval. Since, when
 184  * retrieving objects, we'll be facing blessed hash references, it's best
 185  * to pre-allocate that buffer once and resize it as the need arises, never
 186  * freeing it (keys will be saved away someplace else anyway, so even large
 187  * keys are not enough a motivation to reclaim that space).
 188  *
 189  * This structure is also used for memory store/retrieve operations which
 190  * happen in a fixed place before being malloc'ed elsewhere if persistency
 191  * is required. Hence the aptr pointer.
 192  */
 193 struct extendable {
 194         char *arena;            /* Will hold hash key strings, resized as needed */
 195         STRLEN asiz;            /* Size of aforementionned buffer */
 196         char *aptr;                     /* Arena pointer, for in-place read/write ops */
 197         char *aend;                     /* First invalid address */
 198 };
 199
 200 /*
 201  * At store time:
 202  * An hash table records the objects which have already been stored.
 203  * Those are referred to as SX_OBJECT in the file, and their "tag" (i.e.
 204  * an arbitrary sequence number) is used to identify them.
 205  *
 206  * At retrieve time:
 207  * An array table records the objects which have already been retrieved,
 208  * as seen by the tag determind by counting the objects themselves. The
 209  * reference to that retrieved object is kept in the table, and is returned
 210  * when an SX_OBJECT is found bearing that same tag.
 211  *
 212  * The same processing is used to record "classname" for blessed objects:
 213  * indexing by a hash at store time, and via an array at retrieve time.
 214  */
 215
 216 typedef unsigned long stag_t;   /* Used by pre-0.6 binary format */
 217
 218 /*
 219  * The following "thread-safe" related defines were contributed by
 220  * Murray Nesbitt <murray@activestate.com> and integrated by RAM, who
 221  * only renamed things a little bit to ensure consistency with surrounding
 222  * code.        -- RAM, 14/09/1999
 223  *
 224  * The original patch suffered from the fact that the stcxt_t structure
 225  * was global.  Murray tried to minimize the impact on the code as much as
 226  * possible.
 227  *
 228  * Starting with 0.7, Storable can be re-entrant, via the STORABLE_xxx hooks
 229  * on objects.  Therefore, the notion of context needs to be generalized,
 230  * threading or not.
 231  */
 232
 233 #define MY_VERSION "Storable(" XS_VERSION ")"
 234
 235 /*
 236  * Fields s_tainted and s_dirty are prefixed with s_ because Perl's include
 237  * files remap tainted and dirty when threading is enabled.  That's bad for
 238  * perl to remap such common words.     -- RAM, 29/09/00
 239  */
 240
 241 typedef struct stcxt {
 242         int entry;                      /* flags recursion */
 243         int optype;                     /* type of traversal operation */
 244     HV *hseen;                  /* which objects have been seen, store time */
 245     AV *aseen;                  /* which objects have been seen, retrieve time */
 246     HV *hclass;                 /* which classnames have been seen, store time */
 247     AV *aclass;                 /* which classnames have been seen, retrieve time */
 248     HV *hook;                   /* cache for hook methods per class name */
 249     I32 tagnum;                 /* incremented at store time for each seen object */
 250     I32 classnum;               /* incremented at store time for each seen classname */
 251     int netorder;               /* true if network order used */
 252     int s_tainted;              /* true if input source is tainted, at retrieve time */
 253     int forgive_me;             /* whether to be forgiving... */
 254     int canonical;              /* whether to store hashes sorted by key */
 255         int s_dirty;            /* context is dirty due to CROAK() -- can be cleaned */
 256     struct extendable keybuf;   /* for hash key retrieval */
 257     struct extendable membuf;   /* for memory store/retrieve operations */
 258         PerlIO *fio;            /* where I/O are performed, NULL for memory */
 259         int ver_major;          /* major of version for retrieved object */
 260         int ver_minor;          /* minor of version for retrieved object */
 261         SV *(**retrieve_vtbl)();        /* retrieve dispatch table */
 262         struct stcxt *prev;     /* contexts chained backwards in real recursion */
 263 } stcxt_t;
 264
 265 #if defined(MULTIPLICITY) || defined(PERL_OBJECT) || defined(PERL_CAPI)
 266
 267 #if (PATCHLEVEL <= 4) && (SUBVERSION < 68)
 268 #define dSTCXT_SV                                                                       \
 269         SV *perinterp_sv = perl_get_sv(MY_VERSION, FALSE)
 270 #else   /* >= perl5.004_68 */
 271 #define dSTCXT_SV                                                                       \
 272         SV *perinterp_sv = *hv_fetch(PL_modglobal,              \
 273                 MY_VERSION, sizeof(MY_VERSION)-1, TRUE)
 274 #endif  /* < perl5.004_68 */
 275
 276 #define dSTCXT_PTR(T,name)                                                      \
 277         T name = (perinterp_sv && SvIOK(perinterp_sv)   \
 278                                 ? INT2PTR(T, SvIVX(perinterp_sv)) : (T) 0)
 279 #define dSTCXT                                                                          \
 280         dSTCXT_SV;                                                                              \
 281         dSTCXT_PTR(stcxt_t *, cxt)
 282
 283 #define INIT_STCXT                                                                      \
 284       dSTCXT;                                                                           \
 285       Newz(0, cxt, 1, stcxt_t);                                         \
 286       sv_setiv(perinterp_sv, PTR2IV(cxt))
 287
 288 #define SET_STCXT(x) do {                                                       \
 289         dSTCXT_SV;                                                                              \
 290         sv_setiv(perinterp_sv, PTR2IV(x));                              \
 291 } while (0)
 292
 293 #else /* !MULTIPLICITY && !PERL_OBJECT && !PERL_CAPI */
 294
 295 static stcxt_t Context;
 296 static stcxt_t *Context_ptr = &Context;
 297 #define dSTCXT                  stcxt_t *cxt = Context_ptr
 298 #define INIT_STCXT              dSTCXT
 299 #define SET_STCXT(x)    Context_ptr = x
 300
 301 #endif /* MULTIPLICITY || PERL_OBJECT || PERL_CAPI */
 302
 303 /*
 304  * KNOWN BUG:
 305  *   Croaking implies a memory leak, since we don't use setjmp/longjmp
 306  *   to catch the exit and free memory used during store or retrieve
 307  *   operations.  This is not too difficult to fix, but I need to understand
 308  *   how Perl does it, and croaking is exceptional anyway, so I lack the
 309  *   motivation to do it.
 310  *
 311  * The current workaround is to mark the context as dirty when croaking,
 312  * so that data structures can be freed whenever we renter Storable code
 313  * (but only *then*: it's a workaround, not a fix).
 314  *
 315  * This is also imperfect, because we don't really know how far they trapped
 316  * the croak(), and when we were recursing, we won't be able to clean anything
 317  * but the topmost context stacked.
 318  */
 319
 320 #define CROAK(x)        do { cxt->s_dirty = 1; croak x; } while (0)
 321
 322 /*
 323  * End of "thread-safe" related definitions.
 324  */
 325
 326 /*
 327  * LOW_32BITS
 328  *
 329  * Keep only the low 32 bits of a pointer (used for tags, which are not
 330  * really pointers).
 331  */
 332
 333 #if PTRSIZE <= 4
 334 #define LOW_32BITS(x)   ((I32) (x))
 335 #else
 336 #define LOW_32BITS(x)   ((I32) ((unsigned long) (x) & 0xffffffffUL))
 337 #endif
 338
 339 /*
 340  * oI, oS, oC
 341  *
 342  * Hack for Crays, where sizeof(I32) == 8, and which are big-endians.
 343  * Used in the WLEN and RLEN macros.
 344  */
 345
 346 #if INTSIZE > 4
 347 #define oI(x)   ((I32 *) ((char *) (x) + 4))
 348 #define oS(x)   ((x) - 4)
 349 #define oC(x)   (x = 0)
 350 #define CRAY_HACK
 351 #else
 352 #define oI(x)   (x)
 353 #define oS(x)   (x)
 354 #define oC(x)
 355 #endif
 356
 357 /*
 358  * key buffer handling
 359  */
 360 #define kbuf    (cxt->keybuf).arena
 361 #define ksiz    (cxt->keybuf).asiz
 362 #define KBUFINIT() do {                                 \
 363         if (!kbuf) {                                            \
 364                 TRACEME(("** allocating kbuf of 128 bytes")); \
 365                 New(10003, kbuf, 128, char);    \
 366                 ksiz = 128;                                             \
 367         }                                                                       \
 368 } while (0)
 369 #define KBUFCHK(x) do {                 \
 370         if (x >= ksiz) {                        \
 371                 TRACEME(("** extending kbuf to %d bytes", x+1)); \
 372                 Renew(kbuf, x+1, char); \
 373                 ksiz = x+1;                             \
 374         }                                                       \
 375 } while (0)
 376
 377 /*
 378  * memory buffer handling
 379  */
 380 #define mbase   (cxt->membuf).arena
 381 #define msiz    (cxt->membuf).asiz
 382 #define mptr    (cxt->membuf).aptr
 383 #define mend    (cxt->membuf).aend
 384
 385 #define MGROW   (1 << 13)
 386 #define MMASK   (MGROW - 1)
 387
 388 #define round_mgrow(x)  \
 389         ((unsigned long) (((unsigned long) (x) + MMASK) & ~MMASK))
 390 #define trunc_int(x)    \
 391         ((unsigned long) ((unsigned long) (x) & ~(sizeof(int)-1)))
 392 #define int_aligned(x)  \
 393         ((unsigned long) (x) == trunc_int(x))
 394
 395 #define MBUF_INIT(x) do {                               \
 396         if (!mbase) {                                           \
 397                 TRACEME(("** allocating mbase of %d bytes", MGROW)); \
 398                 New(10003, mbase, MGROW, char); \
 399                 msiz = MGROW;                                   \
 400         }                                                                       \
 401         mptr = mbase;                                           \
 402         if (x)                                                          \
 403                 mend = mbase + x;                               \
 404         else                                                            \
 405                 mend = mbase + msiz;                    \
 406 } while (0)
 407
 408 #define MBUF_TRUNC(x)   mptr = mbase + x
 409 #define MBUF_SIZE()             (mptr - mbase)
 410
 411 /*
 412  * Use SvPOKp(), because SvPOK() fails on tainted scalars.
 413  * See store_scalar() for other usage of this workaround.
 414  */
 415 #define MBUF_LOAD(v) do {                               \
 416         if (!SvPOKp(v))                                         \
 417                 CROAK(("Not a scalar string")); \
 418         mptr = mbase = SvPV(v, msiz);           \
 419         mend = mbase + msiz;                            \
 420 } while (0)
 421
 422 #define MBUF_XTEND(x) do {                      \
 423         int nsz = (int) round_mgrow((x)+msiz);  \
 424         int offset = mptr - mbase;              \
 425         TRACEME(("** extending mbase to %d bytes", nsz));       \
 426         Renew(mbase, nsz, char);                \
 427         msiz = nsz;                                             \
 428         mptr = mbase + offset;                  \
 429         mend = mbase + nsz;                             \
 430 } while (0)
 431
 432 #define MBUF_CHK(x) do {                        \
 433         if ((mptr + (x)) > mend)                \
 434                 MBUF_XTEND(x);                          \
 435 } while (0)
 436
 437 #define MBUF_GETC(x) do {                       \
 438         if (mptr < mend)                                \
 439                 x = (int) (unsigned char) *mptr++;      \
 440         else                                                    \
 441                 return (SV *) 0;                        \
 442 } while (0)
 443
 444 #ifdef CRAY_HACK
 445 #define MBUF_GETINT(x) do {                             \
 446         oC(x);                                                          \
 447         if ((mptr + 4) <= mend) {                       \
 448                 memcpy(oI(&x), mptr, 4);                \
 449                 mptr += 4;                                              \
 450         } else                                                          \
 451                 return (SV *) 0;                                \
 452 } while (0)
 453 #else
 454 #define MBUF_GETINT(x) do {                             \
 455         if ((mptr + sizeof(int)) <= mend) {     \
 456                 if (int_aligned(mptr))                  \
 457                         x = *(int *) mptr;                      \
 458                 else                                                    \
 459                         memcpy(&x, mptr, sizeof(int));  \
 460                 mptr += sizeof(int);                    \
 461         } else                                                          \
 462                 return (SV *) 0;                                \
 463 } while (0)
 464 #endif
 465
 466 #define MBUF_READ(x,s) do {                     \
 467         if ((mptr + (s)) <= mend) {             \
 468                 memcpy(x, mptr, s);                     \
 469                 mptr += s;                                      \
 470         } else                                                  \
 471                 return (SV *) 0;                        \
 472 } while (0)
 473
 474 #define MBUF_SAFEREAD(x,s,z) do {       \
 475         if ((mptr + (s)) <= mend) {             \
 476                 memcpy(x, mptr, s);                     \
 477                 mptr += s;                                      \
 478         } else {                                                \
 479                 sv_free(z);                                     \
 480                 return (SV *) 0;                        \
 481         }                                                               \
 482 } while (0)
 483
 484 #define MBUF_PUTC(c) do {                       \
 485         if (mptr < mend)                                \
 486                 *mptr++ = (char) c;                     \
 487         else {                                                  \
 488                 MBUF_XTEND(1);                          \
 489                 *mptr++ = (char) c;                     \
 490         }                                                               \
 491 } while (0)
 492
 493 #ifdef CRAY_HACK
 494 #define MBUF_PUTINT(i) do {                     \
 495         MBUF_CHK(4);                                    \
 496         memcpy(mptr, oI(&i), 4);                \
 497         mptr += 4;                                              \
 498 } while (0)
 499 #else
 500 #define MBUF_PUTINT(i) do {                     \
 501         MBUF_CHK(sizeof(int));                  \
 502         if (int_aligned(mptr))                  \
 503                 *(int *) mptr = i;                      \
 504         else                                                    \
 505                 memcpy(mptr, &i, sizeof(int));  \
 506         mptr += sizeof(int);                    \
 507 } while (0)
 508 #endif
 509
 510 #define MBUF_WRITE(x,s) do {            \
 511         MBUF_CHK(s);                                    \
 512         memcpy(mptr, x, s);                             \
 513         mptr += s;                                              \
 514 } while (0)
 515
 516 /*
 517  * Possible return values for sv_type().
 518  */
 519
 520 #define svis_REF                0
 521 #define svis_SCALAR             1
 522 #define svis_ARRAY              2
 523 #define svis_HASH               3
 524 #define svis_TIED               4
 525 #define svis_TIED_ITEM  5
 526 #define svis_OTHER              6
 527
 528 /*
 529  * Flags for SX_HOOK.
 530  */
 531
 532 #define SHF_TYPE_MASK           0x03
 533 #define SHF_LARGE_CLASSLEN      0x04
 534 #define SHF_LARGE_STRLEN        0x08
 535 #define SHF_LARGE_LISTLEN       0x10
 536 #define SHF_IDX_CLASSNAME       0x20
 537 #define SHF_NEED_RECURSE        0x40
 538 #define SHF_HAS_LIST            0x80
 539
 540 /*
 541  * Types for SX_HOOK (2 bits).
 542  */
 543
 544 #define SHT_SCALAR                      0
 545 #define SHT_ARRAY                       1
 546 #define SHT_HASH                        2
 547
 548 /*
 549  * Before 0.6, the magic string was "perl-store" (binary version number 0).
 550  *
 551  * Since 0.6 introduced many binary incompatibilities, the magic string has
 552  * been changed to "pst0" to allow an old image to be properly retrieved by
 553  * a newer Storable, but ensure a newer image cannot be retrieved with an
 554  * older version.
 555  *
 556  * At 0.7, objects are given the ability to serialize themselves, and the
 557  * set of markers is extended, backward compatibility is not jeopardized,
 558  * so the binary version number could have remained unchanged.  To correctly
 559  * spot errors if a file making use of 0.7-specific extensions is given to
 560  * 0.6 for retrieval, the binary version was moved to "2".  And I'm introducing
 561  * a "minor" version, to better track this kind of evolution from now on.
 562  *
 563  */
 564 static char old_magicstr[] = "perl-store";      /* Magic number before 0.6 */
 565 static char magicstr[] = "pst0";                        /* Used as a magic number */
 566
 567 #define STORABLE_BIN_MAJOR      2                               /* Binary major "version" */
 568 #define STORABLE_BIN_MINOR      3                               /* Binary minor "version" */
 569
 570 /*
 571  * Useful store shortcuts...
 572  */
 573
 574 #define PUTMARK(x) do {                                         \
 575         if (!cxt->fio)                                                  \
 576                 MBUF_PUTC(x);                                           \
 577         else if (PerlIO_putc(cxt->fio, x) == EOF)       \
 578                 return -1;                                                      \
 579 } while (0)
 580
 581 #define WRITE_I32(x)    do {                    \
 582         ASSERT(sizeof(x) == sizeof(I32), ("writing an I32"));   \
 583         if (!cxt->fio)                                          \
 584                 MBUF_PUTINT(x);                                 \
 585         else if (PerlIO_write(cxt->fio, oI(&x), oS(sizeof(x))) != oS(sizeof(x))) \
 586                 return -1;                                      \
 587         } while (0)
 588
 589 #ifdef HAS_HTONL
 590 #define WLEN(x) do {                            \
 591         if (cxt->netorder) {                    \
 592                 int y = (int) htonl(x);         \
 593                 if (!cxt->fio)                          \
 594                         MBUF_PUTINT(y);                 \
 595                 else if (PerlIO_write(cxt->fio,oI(&y),oS(sizeof(y))) != oS(sizeof(y))) \
 596                         return -1;                              \
 597         } else {                                                \
 598                 if (!cxt->fio)                          \
 599                         MBUF_PUTINT(x);                 \
 600                 else if (PerlIO_write(cxt->fio,oI(&x),oS(sizeof(x))) != oS(sizeof(x))) \
 601                         return -1;                              \
 602         }                                                               \
 603 } while (0)
 604 #else
 605 #define WLEN(x) WRITE_I32(x)
 606 #endif
 607
 608 #define WRITE(x,y) do {                                         \
 609         if (!cxt->fio)                                                  \
 610                 MBUF_WRITE(x,y);                                        \
 611         else if (PerlIO_write(cxt->fio, x, y) != y)     \
 612                 return -1;                                                      \
 613         } while (0)
 614
 615 #define STORE_PV_LEN(pv, len, small, large) do {        \
 616         if (len <= LG_SCALAR) {                         \
 617                 unsigned char clen = (unsigned char) len;       \
 618                 PUTMARK(small);                                 \
 619                 PUTMARK(clen);                                  \
 620                 if (len)                                                \
 621                         WRITE(pv, len);                         \
 622         } else {                                                        \
 623                 PUTMARK(large);                                 \
 624                 WLEN(len);                                              \
 625                 WRITE(pv, len);                                 \
 626         }                                                                       \
 627 } while (0)
 628
 629 #define STORE_SCALAR(pv, len)   STORE_PV_LEN(pv, len, SX_SCALAR, SX_LSCALAR)
 630
 631 /*
 632  * Conditional UTF8 support.
 633  * On non-UTF8 perls, UTF8 strings are returned as normal strings.
 634  *
 635  */
 636 #ifdef SvUTF8_on
 637 #define STORE_UTF8STR(pv, len)  STORE_PV_LEN(pv, len, SX_UTF8STR, SX_LUTF8STR)
 638 #else
 639 #define SvUTF8(sv) 0
 640 #define STORE_UTF8STR(pv, len) CROAK(("panic: storing UTF8 in non-UTF8 perl"))
 641 #define SvUTF8_on(sv) CROAK(("Cannot retrieve UTF8 data in non-UTF8 perl"))
 642 #endif
 643
 644 /*
 645  * Store undef in arrays and hashes without recursing through store().
 646  */
 647 #define STORE_UNDEF() do {                              \
 648         cxt->tagnum++;                                          \
 649         PUTMARK(SX_UNDEF);                                      \
 650 } while (0)
 651
 652 /*
 653  * Useful retrieve shortcuts...
 654  */
 655
 656 #define GETCHAR() \
 657         (cxt->fio ? PerlIO_getc(cxt->fio) : (mptr >= mend ? EOF : (int) *mptr++))
 658
 659 #define GETMARK(x) do {                                                 \
 660         if (!cxt->fio)                                                          \
 661                 MBUF_GETC(x);                                                   \
 662         else if ((x = PerlIO_getc(cxt->fio)) == EOF)    \
 663                 return (SV *) 0;                                                \
 664 } while (0)
 665
 666 #define READ_I32(x)     do {                            \
 667         ASSERT(sizeof(x) == sizeof(I32), ("reading an I32"));   \
 668         oC(x);                                                          \
 669         if (!cxt->fio)                                          \
 670                 MBUF_GETINT(x);                                 \
 671         else if (PerlIO_read(cxt->fio, oI(&x), oS(sizeof(x))) != oS(sizeof(x))) \
 672                 return (SV *) 0;                                \
 673 } while (0)
 674
 675 #ifdef HAS_NTOHL
 676 #define RLEN(x) do {                                    \
 677         oC(x);                                                          \
 678         if (!cxt->fio)                                          \
 679                 MBUF_GETINT(x);                                 \
 680         else if (PerlIO_read(cxt->fio, oI(&x), oS(sizeof(x))) != oS(sizeof(x))) \
 681                 return (SV *) 0;                                \
 682         if (cxt->netorder)                                      \
 683                 x = (int) ntohl(x);                             \
 684 } while (0)
 685 #else
 686 #define RLEN(x) READ_I32(x)
 687 #endif
 688
 689 #define READ(x,y) do {                                          \
 690         if (!cxt->fio)                                                  \
 691                 MBUF_READ(x, y);                                        \
 692         else if (PerlIO_read(cxt->fio, x, y) != y)      \
 693                 return (SV *) 0;                                        \
 694 } while (0)
 695
 696 #define SAFEREAD(x,y,z) do {                                    \
 697         if (!cxt->fio)                                                          \
 698                 MBUF_SAFEREAD(x,y,z);                                   \
 699         else if (PerlIO_read(cxt->fio, x, y) != y)       {      \
 700                 sv_free(z);                                                             \
 701                 return (SV *) 0;                                                \
 702         }                                                                                       \
 703 } while (0)
 704
 705 /*
 706  * This macro is used at retrieve time, to remember where object 'y', bearing a
 707  * given tag 'tagnum', has been retrieved. Next time we see an SX_OBJECT marker,
 708  * we'll therefore know where it has been retrieved and will be able to
 709  * share the same reference, as in the original stored memory image.
 710  */
 711 #define SEEN(y) do {                                            \
 712         if (!y)                                                                 \
 713                 return (SV *) 0;                                        \
 714         if (av_store(cxt->aseen, cxt->tagnum++, SvREFCNT_inc(y)) == 0) \
 715                 return (SV *) 0;                                        \
 716         TRACEME(("aseen(#%d) = 0x%"UVxf" (refcnt=%d)", cxt->tagnum-1, \
 717                  PTR2UV(y), SvREFCNT(y)-1)); \
 718 } while (0)
 719
 720 /*
 721  * Bless `s' in `p', via a temporary reference, required by sv_bless().
 722  */
 723 #define BLESS(s,p) do {                                 \
 724         SV *ref;                                                                \
 725         HV *stash;                                                              \
 726         TRACEME(("blessing 0x%"UVxf" in %s", PTR2UV(s), (p))); \
 727         stash = gv_stashpv((p), TRUE);                  \
 728         ref = newRV_noinc(s);                                   \
 729         (void) sv_bless(ref, stash);                    \
 730         SvRV(ref) = 0;                                                  \
 731         SvREFCNT_dec(ref);                                              \
 732 } while (0)
 733
 734 static int store();
 735 static SV *retrieve();
 736
 737 /*
 738  * Dynamic dispatching table for SV store.
 739  */
 740
 741 static int store_ref(stcxt_t *cxt, SV *sv);
 742 static int store_scalar(stcxt_t *cxt, SV *sv);
 743 static int store_array(stcxt_t *cxt, AV *av);
 744 static int store_hash(stcxt_t *cxt, HV *hv);
 745 static int store_tied(stcxt_t *cxt, SV *sv);
 746 static int store_tied_item(stcxt_t *cxt, SV *sv);
 747 static int store_other(stcxt_t *cxt, SV *sv);
 748 static int store_blessed(stcxt_t *cxt, SV *sv, int type, HV *pkg);
 749
 750 static int (*sv_store[])() = {
 751         store_ref,                      /* svis_REF */
 752         store_scalar,           /* svis_SCALAR */
 753         store_array,            /* svis_ARRAY */
 754         store_hash,                     /* svis_HASH */
 755         store_tied,                     /* svis_TIED */
 756         store_tied_item,        /* svis_TIED_ITEM */
 757         store_other,            /* svis_OTHER */
 758 };
 759
 760 #define SV_STORE(x)     (*sv_store[x])
 761
 762 /*
 763  * Dynamic dispatching tables for SV retrieval.
 764  */
 765
 766 static SV *retrieve_lscalar(stcxt_t *cxt);
 767 static SV *retrieve_lutf8str(stcxt_t *cxt);
 768 static SV *old_retrieve_array(stcxt_t *cxt);
 769 static SV *old_retrieve_hash(stcxt_t *cxt);
 770 static SV *retrieve_ref(stcxt_t *cxt);
 771 static SV *retrieve_undef(stcxt_t *cxt);
 772 static SV *retrieve_integer(stcxt_t *cxt);
 773 static SV *retrieve_double(stcxt_t *cxt);
 774 static SV *retrieve_byte(stcxt_t *cxt);
 775 static SV *retrieve_netint(stcxt_t *cxt);
 776 static SV *retrieve_scalar(stcxt_t *cxt);
 777 static SV *retrieve_utf8str(stcxt_t *cxt);
 778 static SV *retrieve_tied_array(stcxt_t *cxt);
 779 static SV *retrieve_tied_hash(stcxt_t *cxt);
 780 static SV *retrieve_tied_scalar(stcxt_t *cxt);
 781 static SV *retrieve_other(stcxt_t *cxt);
 782
 783 static SV *(*sv_old_retrieve[])() = {
 784         0,                      /* SX_OBJECT -- entry unused dynamically */
 785         retrieve_lscalar,               /* SX_LSCALAR */
 786         old_retrieve_array,             /* SX_ARRAY -- for pre-0.6 binaries */
 787         old_retrieve_hash,              /* SX_HASH -- for pre-0.6 binaries */
 788         retrieve_ref,                   /* SX_REF */
 789         retrieve_undef,                 /* SX_UNDEF */
 790         retrieve_integer,               /* SX_INTEGER */
 791         retrieve_double,                /* SX_DOUBLE */
 792         retrieve_byte,                  /* SX_BYTE */
 793         retrieve_netint,                /* SX_NETINT */
 794         retrieve_scalar,                /* SX_SCALAR */
 795         retrieve_tied_array,    /* SX_ARRAY */
 796         retrieve_tied_hash,             /* SX_HASH */
 797         retrieve_tied_scalar,   /* SX_SCALAR */
 798         retrieve_other,                 /* SX_SV_UNDEF not supported */
 799         retrieve_other,                 /* SX_SV_YES not supported */
 800         retrieve_other,                 /* SX_SV_NO not supported */
 801         retrieve_other,                 /* SX_BLESS not supported */
 802         retrieve_other,                 /* SX_IX_BLESS not supported */
 803         retrieve_other,                 /* SX_HOOK not supported */
 804         retrieve_other,                 /* SX_OVERLOADED not supported */
 805         retrieve_other,                 /* SX_TIED_KEY not supported */
 806         retrieve_other,                 /* SX_TIED_IDX not supported */
 807         retrieve_other,                 /* SX_UTF8STR not supported */
 808         retrieve_other,                 /* SX_LUTF8STR not supported */
 809         retrieve_other,                 /* SX_ERROR */
 810 };
 811
 812 static SV *retrieve_array(stcxt_t *cxt);
 813 static SV *retrieve_hash(stcxt_t *cxt);
 814 static SV *retrieve_sv_undef(stcxt_t *cxt);
 815 static SV *retrieve_sv_yes(stcxt_t *cxt);
 816 static SV *retrieve_sv_no(stcxt_t *cxt);
 817 static SV *retrieve_blessed(stcxt_t *cxt);
 818 static SV *retrieve_idx_blessed(stcxt_t *cxt);
 819 static SV *retrieve_hook(stcxt_t *cxt);
 820 static SV *retrieve_overloaded(stcxt_t *cxt);
 821 static SV *retrieve_tied_key(stcxt_t *cxt);
 822 static SV *retrieve_tied_idx(stcxt_t *cxt);
 823
 824 static SV *(*sv_retrieve[])() = {
 825         0,                      /* SX_OBJECT -- entry unused dynamically */
 826         retrieve_lscalar,               /* SX_LSCALAR */
 827         retrieve_array,                 /* SX_ARRAY */
 828         retrieve_hash,                  /* SX_HASH */
 829         retrieve_ref,                   /* SX_REF */
 830         retrieve_undef,                 /* SX_UNDEF */
 831         retrieve_integer,               /* SX_INTEGER */
 832         retrieve_double,                /* SX_DOUBLE */
 833         retrieve_byte,                  /* SX_BYTE */
 834         retrieve_netint,                /* SX_NETINT */
 835         retrieve_scalar,                /* SX_SCALAR */
 836         retrieve_tied_array,    /* SX_ARRAY */
 837         retrieve_tied_hash,             /* SX_HASH */
 838         retrieve_tied_scalar,   /* SX_SCALAR */
 839         retrieve_sv_undef,              /* SX_SV_UNDEF */
 840         retrieve_sv_yes,                /* SX_SV_YES */
 841         retrieve_sv_no,                 /* SX_SV_NO */
 842         retrieve_blessed,               /* SX_BLESS */
 843         retrieve_idx_blessed,   /* SX_IX_BLESS */
 844         retrieve_hook,                  /* SX_HOOK */
 845         retrieve_overloaded,    /* SX_OVERLOAD */
 846         retrieve_tied_key,              /* SX_TIED_KEY */
 847         retrieve_tied_idx,              /* SX_TIED_IDX */
 848         retrieve_utf8str,               /* SX_UTF8STR  */
 849         retrieve_lutf8str,              /* SX_LUTF8STR */
 850         retrieve_other,                 /* SX_ERROR */
 851 };
 852
 853 #define RETRIEVE(c,x) (*(c)->retrieve_vtbl[(x) >= SX_ERROR ? SX_ERROR : (x)])
 854
 855 static SV *mbuf2sv(void);
 856
 857 /***
 858  *** Context management.
 859  ***/
 860
 861 /*
 862  * init_perinterp
 863  *
 864  * Called once per "thread" (interpreter) to initialize some global context.
 865  */
 866 static void init_perinterp(void)
 867 {
 868     INIT_STCXT;
 869
 870     cxt->netorder = 0;          /* true if network order used */
 871     cxt->forgive_me = -1;       /* whether to be forgiving... */
 872 }
 873
 874 /*
 875  * init_store_context
 876  *
 877  * Initialize a new store context for real recursion.
 878  */
 879 static void init_store_context(
 880         stcxt_t *cxt,
 881         PerlIO *f,
 882         int optype,
 883         int network_order)
 884 {
 885         TRACEME(("init_store_context"));
 886
 887         cxt->netorder = network_order;
 888         cxt->forgive_me = -1;                   /* Fetched from perl if needed */
 889         cxt->canonical = -1;                    /* Idem */
 890         cxt->tagnum = -1;                               /* Reset tag numbers */
 891         cxt->classnum = -1;                             /* Reset class numbers */
 892         cxt->fio = f;                                   /* Where I/O are performed */
 893         cxt->optype = optype;                   /* A store, or a deep clone */
 894         cxt->entry = 1;                                 /* No recursion yet */
 895
 896         /*
 897          * The `hseen' table is used to keep track of each SV stored and their
 898          * associated tag numbers is special. It is "abused" because the
 899          * values stored are not real SV, just integers cast to (SV *),
 900          * which explains the freeing below.
 901          *
 902          * It is also one possible bottlneck to achieve good storing speed,
 903          * so the "shared keys" optimization is turned off (unlikely to be
 904          * of any use here), and the hash table is "pre-extended". Together,
 905          * those optimizations increase the throughput by 12%.
 906          */
 907
 908         cxt->hseen = newHV();                   /* Table where seen objects are stored */
 909         HvSHAREKEYS_off(cxt->hseen);
 910
 911         /*
 912          * The following does not work well with perl5.004_04, and causes
 913          * a core dump later on, in a completely unrelated spot, which
 914          * makes me think there is a memory corruption going on.
 915          *
 916          * Calling hv_ksplit(hseen, HBUCKETS) instead of manually hacking
 917          * it below does not make any difference. It seems to work fine
 918          * with perl5.004_68 but given the probable nature of the bug,
 919          * that does not prove anything.
 920          *
 921          * It's a shame because increasing the amount of buckets raises
 922          * store() throughput by 5%, but until I figure this out, I can't
 923          * allow for this to go into production.
 924          *
 925          * It is reported fixed in 5.005, hence the #if.
 926          */
 927 #if PERL_VERSION >= 5
 928 #define HBUCKETS        4096                            /* Buckets for %hseen */
 929         HvMAX(cxt->hseen) = HBUCKETS - 1;       /* keys %hseen = $HBUCKETS; */
 930 #endif
 931
 932         /*
 933          * The `hclass' hash uses the same settings as `hseen' above, but it is
 934          * used to assign sequential tags (numbers) to class names for blessed
 935          * objects.
 936          *
 937          * We turn the shared key optimization on.
 938          */
 939
 940         cxt->hclass = newHV();                  /* Where seen classnames are stored */
 941
 942 #if PERL_VERSION >= 5
 943         HvMAX(cxt->hclass) = HBUCKETS - 1;      /* keys %hclass = $HBUCKETS; */
 944 #endif
 945
 946         /*
 947          * The `hook' hash table is used to keep track of the references on
 948          * the STORABLE_freeze hook routines, when found in some class name.
 949          *
 950          * It is assumed that the inheritance tree will not be changed during
 951          * storing, and that no new method will be dynamically created by the
 952          * hooks.
 953          */
 954
 955         cxt->hook = newHV();                    /* Table where hooks are cached */
 956 }
 957
 958 /*
 959  * clean_store_context
 960  *
 961  * Clean store context by
 962  */
 963 static void clean_store_context(stcxt_t *cxt)
 964 {
 965         HE *he;
 966
 967         TRACEME(("clean_store_context"));
 968
 969         ASSERT(cxt->optype & ST_STORE, ("was performing a store()"));
 970
 971         /*
 972          * Insert real values into hashes where we stored faked pointers.
 973          */
 974
 975         hv_iterinit(cxt->hseen);
 976         while (he = hv_iternext(cxt->hseen))
 977                 HeVAL(he) = &PL_sv_undef;
 978
 979         hv_iterinit(cxt->hclass);
 980         while (he = hv_iternext(cxt->hclass))
 981                 HeVAL(he) = &PL_sv_undef;
 982
 983         /*
 984          * And now dispose of them...
 985          */
 986
 987         hv_undef(cxt->hseen);
 988         sv_free((SV *) cxt->hseen);
 989
 990         hv_undef(cxt->hclass);
 991         sv_free((SV *) cxt->hclass);
 992
 993         hv_undef(cxt->hook);
 994         sv_free((SV *) cxt->hook);
 995
 996         cxt->entry = 0;
 997         cxt->s_dirty = 0;
 998 }
 999
1000 /*
1001  * init_retrieve_context
1002  *
1003  * Initialize a new retrieve context for real recursion.
1004  */
1005 static void init_retrieve_context(stcxt_t *cxt, int optype, int is_tainted)
1006 {
1007         TRACEME(("init_retrieve_context"));
1008
1009         /*
1010          * The hook hash table is used to keep track of the references on
1011          * the STORABLE_thaw hook routines, when found in some class name.
1012          *
1013          * It is assumed that the inheritance tree will not be changed during
1014          * storing, and that no new method will be dynamically created by the
1015          * hooks.
1016          */
1017
1018         cxt->hook  = newHV();                   /* Caches STORABLE_thaw */
1019
1020         /*
1021          * If retrieving an old binary version, the cxt->retrieve_vtbl variable
1022          * was set to sv_old_retrieve. We'll need a hash table to keep track of
1023          * the correspondance between the tags and the tag number used by the
1024          * new retrieve routines.
1025          */
1026
1027         cxt->hseen = (cxt->retrieve_vtbl == sv_old_retrieve) ? newHV() : 0;
1028
1029         cxt->aseen = newAV();                   /* Where retrieved objects are kept */
1030         cxt->aclass = newAV();                  /* Where seen classnames are kept */
1031         cxt->tagnum = 0;                                /* Have to count objects... */
1032         cxt->classnum = 0;                              /* ...and class names as well */
1033         cxt->optype = optype;
1034         cxt->s_tainted = is_tainted;
1035         cxt->entry = 1;                                 /* No recursion yet */
1036 }
1037
1038 /*
1039  * clean_retrieve_context
1040  *
1041  * Clean retrieve context by
1042  */
1043 static void clean_retrieve_context(stcxt_t *cxt)
1044 {
1045         TRACEME(("clean_retrieve_context"));
1046
1047         ASSERT(cxt->optype & ST_RETRIEVE, ("was performing a retrieve()"));
1048
1049         av_undef(cxt->aseen);
1050         sv_free((SV *) cxt->aseen);
1051
1052         av_undef(cxt->aclass);
1053         sv_free((SV *) cxt->aclass);
1054
1055         hv_undef(cxt->hook);
1056         sv_free((SV *) cxt->hook);
1057
1058         if (cxt->hseen)
1059                 sv_free((SV *) cxt->hseen);             /* optional HV, for backward compat. */
1060
1061         cxt->entry = 0;
1062         cxt->s_dirty = 0;
1063 }
1064
1065 /*
1066  * clean_context
1067  *
1068  * A workaround for the CROAK bug: cleanup the last context.
1069  */
1070 static void clean_context(cxt)
1071 stcxt_t *cxt;
1072 {
1073         TRACEME(("clean_context"));
1074
1075         ASSERT(cxt->s_dirty, ("dirty context"));
1076
1077         if (cxt->optype & ST_RETRIEVE)
1078                 clean_retrieve_context(cxt);
1079         else
1080                 clean_store_context(cxt);
1081 }
1082
1083 /*
1084  * allocate_context
1085  *
1086  * Allocate a new context and push it on top of the parent one.
1087  * This new context is made globally visible via SET_STCXT().
1088  */
1089 static stcxt_t *allocate_context(parent_cxt)
1090 stcxt_t *parent_cxt;
1091 {
1092         stcxt_t *cxt;
1093
1094         TRACEME(("allocate_context"));
1095
1096         ASSERT(!parent_cxt->s_dirty, ("parent context clean"));
1097
1098         Newz(0, cxt, 1, stcxt_t);
1099         cxt->prev = parent_cxt;
1100         SET_STCXT(cxt);
1101
1102         return cxt;
1103 }
1104
1105 /*
1106  * free_context
1107  *
1108  * Free current context, which cannot be the "root" one.
1109  * Make the context underneath globally visible via SET_STCXT().
1110  */
1111 static void free_context(cxt)
1112 stcxt_t *cxt;
1113 {
1114         stcxt_t *prev = cxt->prev;
1115
1116         TRACEME(("free_context"));
1117
1118         ASSERT(!cxt->s_dirty, ("clean context"));
1119         ASSERT(prev, ("not freeing root context"));
1120
1121         if (kbuf)
1122                 Safefree(kbuf);
1123         if (mbase)
1124                 Safefree(mbase);
1125
1126         Safefree(cxt);
1127         SET_STCXT(prev);
1128 }
1129
1130 /***
1131  *** Predicates.
1132  ***/
1133
1134 /*
1135  * is_storing
1136  *
1137  * Tells whether we're in the middle of a store operation.
1138  */
1139 int is_storing(void)
1140 {
1141         dSTCXT;
1142
1143         return cxt->entry && (cxt->optype & ST_STORE);
1144 }
1145
1146 /*
1147  * is_retrieving
1148  *
1149  * Tells whether we're in the middle of a retrieve operation.
1150  */
1151 int is_retrieving(void)
1152 {
1153         dSTCXT;
1154
1155         return cxt->entry && (cxt->optype & ST_RETRIEVE);
1156 }
1157
1158 /*
1159  * last_op_in_netorder
1160  *
1161  * Returns whether last operation was made using network order.
1162  *
1163  * This is typically out-of-band information that might prove useful
1164  * to people wishing to convert native to network order data when used.
1165  */
1166 int last_op_in_netorder(void)
1167 {
1168         dSTCXT;
1169
1170         return cxt->netorder;
1171 }
1172
1173 /***
1174  *** Hook lookup and calling routines.
1175  ***/
1176
1177 /*
1178  * pkg_fetchmeth
1179  *
1180  * A wrapper on gv_fetchmethod_autoload() which caches results.
1181  *
1182  * Returns the routine reference as an SV*, or null if neither the package
1183  * nor its ancestors know about the method.
1184  */
1185 static SV *pkg_fetchmeth(
1186         HV *cache,
1187         HV *pkg,
1188         char *method)
1189 {
1190         GV *gv;
1191         SV *sv;
1192         SV **svh;
1193
1194         /*
1195          * The following code is the same as the one performed by UNIVERSAL::can
1196          * in the Perl core.
1197          */
1198
1199         gv = gv_fetchmethod_autoload(pkg, method, FALSE);
1200         if (gv && isGV(gv)) {
1201                 sv = newRV((SV*) GvCV(gv));
1202                 TRACEME(("%s->%s: 0x%"UVxf, HvNAME(pkg), method, PTR2UV(sv)));
1203         } else {
1204                 sv = newSVsv(&PL_sv_undef);
1205                 TRACEME(("%s->%s: not found", HvNAME(pkg), method));
1206         }
1207
1208         /*
1209          * Cache the result, ignoring failure: if we can't store the value,
1210          * it just won't be cached.
1211          */
1212
1213         (void) hv_store(cache, HvNAME(pkg), strlen(HvNAME(pkg)), sv, 0);
1214
1215         return SvOK(sv) ? sv : (SV *) 0;
1216 }
1217
1218 /*
1219  * pkg_hide
1220  *
1221  * Force cached value to be undef: hook ignored even if present.
1222  */
1223 static void pkg_hide(
1224         HV *cache,
1225         HV *pkg,
1226         char *method)
1227 {
1228         (void) hv_store(cache,
1229                 HvNAME(pkg), strlen(HvNAME(pkg)), newSVsv(&PL_sv_undef), 0);
1230 }
1231
1232 /*
1233  * pkg_uncache
1234  *
1235  * Discard cached value: a whole fetch loop will be retried at next lookup.
1236  */
1237 static void pkg_uncache(
1238         HV *cache,
1239         HV *pkg,
1240         char *method)
1241 {
1242         (void) hv_delete(cache, HvNAME(pkg), strlen(HvNAME(pkg)), G_DISCARD);
1243 }
1244
1245 /*
1246  * pkg_can
1247  *
1248  * Our own "UNIVERSAL::can", which caches results.
1249  *
1250  * Returns the routine reference as an SV*, or null if the object does not
1251  * know about the method.
1252  */
1253 static SV *pkg_can(
1254         HV *cache,
1255         HV *pkg,
1256         char *method)
1257 {
1258         SV **svh;
1259         SV *sv;
1260
1261         TRACEME(("pkg_can for %s->%s", HvNAME(pkg), method));
1262
1263         /*
1264          * Look into the cache to see whether we already have determined
1265          * where the routine was, if any.
1266          *
1267          * NOTA BENE: we don't use `method' at all in our lookup, since we know
1268          * that only one hook (i.e. always the same) is cached in a given cache.
1269          */
1270
1271         svh = hv_fetch(cache, HvNAME(pkg), strlen(HvNAME(pkg)), FALSE);
1272         if (svh) {
1273                 sv = *svh;
1274                 if (!SvOK(sv)) {
1275                         TRACEME(("cached %s->%s: not found", HvNAME(pkg), method));
1276                         return (SV *) 0;
1277                 } else {
1278                         TRACEME(("cached %s->%s: 0x%"UVxf,
1279                                 HvNAME(pkg), method, PTR2UV(sv)));
1280                         return sv;
1281                 }
1282         }
1283
1284         TRACEME(("not cached yet"));
1285         return pkg_fetchmeth(cache, pkg, method);               /* Fetch and cache */
1286 }
1287
1288 /*
1289  * scalar_call
1290  *
1291  * Call routine as obj->hook(av) in scalar context.
1292  * Propagates the single returned value if not called in void context.
1293  */
1294 static SV *scalar_call(
1295         SV *obj,
1296         SV *hook,
1297         int cloning,
1298         AV *av,
1299         I32 flags)
1300 {
1301         dSP;
1302         int count;
1303         SV *sv = 0;
1304
1305         TRACEME(("scalar_call (cloning=%d)", cloning));
1306
1307         ENTER;
1308         SAVETMPS;
1309
1310         PUSHMARK(sp);
1311         XPUSHs(obj);
1312         XPUSHs(sv_2mortal(newSViv(cloning)));           /* Cloning flag */
1313         if (av) {
1314                 SV **ary = AvARRAY(av);
1315                 int cnt = AvFILLp(av) + 1;
1316                 int i;
1317                 XPUSHs(ary[0]);                                                 /* Frozen string */
1318                 for (i = 1; i < cnt; i++) {
1319                         TRACEME(("pushing arg #%d (0x%"UVxf")...",
1320                                  i, PTR2UV(ary[i])));
1321                         XPUSHs(sv_2mortal(newRV(ary[i])));
1322                 }
1323         }
1324         PUTBACK;
1325
1326         TRACEME(("calling..."));
1327         count = perl_call_sv(hook, flags);              /* Go back to Perl code */
1328         TRACEME(("count = %d", count));
1329
1330         SPAGAIN;
1331
1332         if (count) {
1333                 sv = POPs;
1334                 SvREFCNT_inc(sv);               /* We're returning it, must stay alive! */
1335         }
1336
1337         PUTBACK;
1338         FREETMPS;
1339         LEAVE;
1340
1341         return sv;
1342 }
1343
1344 /*
1345  * array_call
1346  *
1347  * Call routine obj->hook(cloning) in list context.
1348  * Returns the list of returned values in an array.
1349  */
1350 static AV *array_call(
1351         SV *obj,
1352         SV *hook,
1353         int cloning)
1354 {
1355         dSP;
1356         int count;
1357         AV *av;
1358         int i;
1359
1360         TRACEME(("array_call (cloning=%d)", cloning));
1361
1362         ENTER;
1363         SAVETMPS;
1364
1365         PUSHMARK(sp);
1366         XPUSHs(obj);                                                            /* Target object */
1367         XPUSHs(sv_2mortal(newSViv(cloning)));           /* Cloning flag */
1368         PUTBACK;
1369
1370         count = perl_call_sv(hook, G_ARRAY);            /* Go back to Perl code */
1371
1372         SPAGAIN;
1373
1374         av = newAV();
1375         for (i = count - 1; i >= 0; i--) {
1376                 SV *sv = POPs;
1377                 av_store(av, i, SvREFCNT_inc(sv));
1378         }
1379
1380         PUTBACK;
1381         FREETMPS;
1382         LEAVE;
1383
1384         return av;
1385 }
1386
1387 /*
1388  * known_class
1389  *
1390  * Lookup the class name in the `hclass' table and either assign it a new ID
1391  * or return the existing one, by filling in `classnum'.
1392  *
1393  * Return true if the class was known, false if the ID was just generated.
1394  */
1395 static int known_class(
1396         stcxt_t *cxt,
1397         char *name,             /* Class name */
1398         int len,                /* Name length */
1399         I32 *classnum)
1400 {
1401         SV **svh;
1402         HV *hclass = cxt->hclass;
1403
1404         TRACEME(("known_class (%s)", name));
1405
1406         /*
1407          * Recall that we don't store pointers in this hash table, but tags.
1408          * Therefore, we need LOW_32BITS() to extract the relevant parts.
1409          */
1410
1411         svh = hv_fetch(hclass, name, len, FALSE);
1412         if (svh) {
1413                 *classnum = LOW_32BITS(*svh);
1414                 return TRUE;
1415         }
1416
1417         /*
1418          * Unknown classname, we need to record it.
1419          */
1420
1421         cxt->classnum++;
1422         if (!hv_store(hclass, name, len, INT2PTR(SV*, cxt->classnum), 0))
1423                 CROAK(("Unable to record new classname"));
1424
1425         *classnum = cxt->classnum;
1426         return FALSE;
1427 }
1428
1429 /***
1430  *** Sepcific store routines.
1431  ***/
1432
1433 /*
1434  * store_ref
1435  *
1436  * Store a reference.
1437  * Layout is SX_REF <object> or SX_OVERLOAD <object>.
1438  */
1439 static int store_ref(stcxt_t *cxt, SV *sv)
1440 {
1441         TRACEME(("store_ref (0x%"UVxf")", PTR2UV(sv)));
1442
1443         /*
1444          * Follow reference, and check if target is overloaded.
1445          */
1446
1447         sv = SvRV(sv);
1448
1449         if (SvOBJECT(sv)) {
1450                 HV *stash = (HV *) SvSTASH(sv);
1451                 if (stash && Gv_AMG(stash)) {
1452                         TRACEME(("ref (0x%"UVxf") is overloaded", PTR2UV(sv)));
1453                         PUTMARK(SX_OVERLOAD);
1454                 } else
1455                         PUTMARK(SX_REF);
1456         } else
1457                 PUTMARK(SX_REF);
1458
1459         return store(cxt, sv);
1460 }
1461
1462 /*
1463  * store_scalar
1464  *
1465  * Store a scalar.
1466  *
1467  * Layout is SX_LSCALAR <length> <data>, SX_SCALAR <lenght> <data> or SX_UNDEF.
1468  * The <data> section is omitted if <length> is 0.
1469  *
1470  * If integer or double, the layout is SX_INTEGER <data> or SX_DOUBLE <data>.
1471  * Small integers (within [-127, +127]) are stored as SX_BYTE <byte>.
1472  */
1473 static int store_scalar(stcxt_t *cxt, SV *sv)
1474 {
1475         IV iv;
1476         char *pv;
1477         STRLEN len;
1478         U32 flags = SvFLAGS(sv);                        /* "cc -O" may put it in register */
1479
1480         TRACEME(("store_scalar (0x%"UVxf")", PTR2UV(sv)));
1481
1482         /*
1483          * For efficiency, break the SV encapsulation by peaking at the flags
1484          * directly without using the Perl macros to avoid dereferencing
1485          * sv->sv_flags each time we wish to check the flags.
1486          */
1487
1488         if (!(flags & SVf_OK)) {                        /* !SvOK(sv) */
1489                 if (sv == &PL_sv_undef) {
1490                         TRACEME(("immortal undef"));
1491                         PUTMARK(SX_SV_UNDEF);
1492                 } else {
1493                         TRACEME(("undef at 0x%"UVxf, PTR2UV(sv)));
1494                         PUTMARK(SX_UNDEF);
1495                 }
1496                 return 0;
1497         }
1498
1499         /*
1500          * Always store the string representation of a scalar if it exists.
1501          * Gisle Aas provided me with this test case, better than a long speach:
1502          *
1503          *  perl -MDevel::Peek -le '$a="abc"; $a+0; Dump($a)'
1504          *  SV = PVNV(0x80c8520)
1505          *       REFCNT = 1
1506          *       FLAGS = (NOK,POK,pNOK,pPOK)
1507          *       IV = 0
1508          *       NV = 0
1509          *       PV = 0x80c83d0 "abc"\0
1510          *       CUR = 3
1511          *       LEN = 4
1512          *
1513          * Write SX_SCALAR, length, followed by the actual data.
1514          *
1515          * Otherwise, write an SX_BYTE, SX_INTEGER or an SX_DOUBLE as
1516          * appropriate, followed by the actual (binary) data. A double
1517          * is written as a string if network order, for portability.
1518          *
1519          * NOTE: instead of using SvNOK(sv), we test for SvNOKp(sv).
1520          * The reason is that when the scalar value is tainted, the SvNOK(sv)
1521          * value is false.
1522          *
1523          * The test for a read-only scalar with both POK and NOK set is meant
1524          * to quickly detect &PL_sv_yes and &PL_sv_no without having to pay the
1525          * address comparison for each scalar we store.
1526          */
1527
1528 #define SV_MAYBE_IMMORTAL (SVf_READONLY|SVf_POK|SVf_NOK)
1529
1530         if ((flags & SV_MAYBE_IMMORTAL) == SV_MAYBE_IMMORTAL) {
1531                 if (sv == &PL_sv_yes) {
1532                         TRACEME(("immortal yes"));
1533                         PUTMARK(SX_SV_YES);
1534                 } else if (sv == &PL_sv_no) {
1535                         TRACEME(("immortal no"));
1536                         PUTMARK(SX_SV_NO);
1537                 } else {
1538                         pv = SvPV(sv, len);                     /* We know it's SvPOK */
1539                         goto string;                            /* Share code below */
1540                 }
1541         } else if (flags & SVp_POK) {           /* SvPOKp(sv) => string */
1542                 I32 wlen;                                               /* For 64-bit machines */
1543                 pv = SvPV(sv, len);
1544
1545                 /*
1546                  * Will come here from below with pv and len set if double & netorder,
1547                  * or from above if it was readonly, POK and NOK but neither &PL_sv_yes
1548                  * nor &PL_sv_no.
1549                  */
1550         string:
1551
1552                 wlen = (I32) len;                               /* WLEN via STORE_SCALAR expects I32 */
1553                 if (SvUTF8 (sv))
1554                         STORE_UTF8STR(pv, wlen);
1555                 else
1556                         STORE_SCALAR(pv, wlen);
1557                 TRACEME(("ok (scalar 0x%"UVxf" '%s', length = %"IVdf")",
1558                          PTR2UV(sv), SvPVX(sv), (IV)len));
1559
1560         } else if (flags & SVp_NOK) {           /* SvNOKp(sv) => double */
1561                 NV nv = SvNV(sv);
1562
1563                 /*
1564                  * Watch for number being an integer in disguise.
1565                  */
1566                 if (nv == (NV) (iv = I_V(nv))) {
1567                         TRACEME(("double %"NVff" is actually integer %"IVdf, nv, iv));
1568                         goto integer;           /* Share code below */
1569                 }
1570
1571                 if (cxt->netorder) {
1572                         TRACEME(("double %"NVff" stored as string", nv));
1573                         pv = SvPV(sv, len);
1574                         goto string;            /* Share code above */
1575                 }
1576
1577                 PUTMARK(SX_DOUBLE);
1578                 WRITE(&nv, sizeof(nv));
1579
1580                 TRACEME(("ok (double 0x%"UVxf", value = %"NVff")", PTR2UV(sv), nv));
1581
1582         } else if (flags & SVp_IOK) {           /* SvIOKp(sv) => integer */
1583                 iv = SvIV(sv);
1584
1585                 /*
1586                  * Will come here from above with iv set if double is an integer.
1587                  */
1588         integer:
1589
1590                 /*
1591                  * Optimize small integers into a single byte, otherwise store as
1592                  * a real integer (converted into network order if they asked).
1593                  */
1594
1595                 if (iv >= -128 && iv <= 127) {
1596                         unsigned char siv = (unsigned char) (iv + 128); /* [0,255] */
1597                         PUTMARK(SX_BYTE);
1598                         PUTMARK(siv);
1599                         TRACEME(("small integer stored as %d", siv));
1600                 } else if (cxt->netorder) {
1601                         I32 niv;
1602 #ifdef HAS_HTONL
1603                         niv = (I32) htonl(iv);
1604                         TRACEME(("using network order"));
1605 #else
1606                         niv = (I32) iv;
1607                         TRACEME(("as-is for network order"));
1608 #endif
1609                         PUTMARK(SX_NETINT);
1610                         WRITE_I32(niv);
1611                 } else {
1612                         PUTMARK(SX_INTEGER);
1613                         WRITE(&iv, sizeof(iv));
1614                 }
1615
1616                 TRACEME(("ok (integer 0x%"UVxf", value = %"IVdf")", PTR2UV(sv), iv));
1617
1618         } else
1619                 CROAK(("Can't determine type of %s(0x%"UVxf")",
1620                        sv_reftype(sv, FALSE),
1621                        PTR2UV(sv)));
1622
1623         return 0;               /* Ok, no recursion on scalars */
1624 }
1625
1626 /*
1627  * store_array
1628  *
1629  * Store an array.
1630  *
1631  * Layout is SX_ARRAY <size> followed by each item, in increading index order.
1632  * Each item is stored as <object>.
1633  */
1634 static int store_array(stcxt_t *cxt, AV *av)
1635 {
1636         SV **sav;
1637         I32 len = av_len(av) + 1;
1638         I32 i;
1639         int ret;
1640
1641         TRACEME(("store_array (0x%"UVxf")", PTR2UV(av)));
1642
1643         /*
1644          * Signal array by emitting SX_ARRAY, followed by the array length.
1645          */
1646
1647         PUTMARK(SX_ARRAY);
1648         WLEN(len);
1649         TRACEME(("size = %d", len));
1650
1651         /*
1652          * Now store each item recursively.
1653          */
1654
1655         for (i = 0; i < len; i++) {
1656                 sav = av_fetch(av, i, 0);
1657                 if (!sav) {
1658                         TRACEME(("(#%d) undef item", i));
1659                         STORE_UNDEF();
1660                         continue;
1661                 }
1662                 TRACEME(("(#%d) item", i));
1663                 if (ret = store(cxt, *sav))
1664                         return ret;
1665         }
1666
1667         TRACEME(("ok (array)"));
1668
1669         return 0;
1670 }
1671
1672 /*
1673  * sortcmp
1674  *
1675  * Sort two SVs
1676  * Borrowed from perl source file pp_ctl.c, where it is used by pp_sort.
1677  */
1678 static int
1679 sortcmp(const void *a, const void *b)
1680 {
1681         return sv_cmp(*(SV * const *) a, *(SV * const *) b);
1682 }
1683
1684
1685 /*
1686  * store_hash
1687  *
1688  * Store an hash table.
1689  *
1690  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
1691  * Values are stored as <object>.
1692  * Keys are stored as <length> <data>, the <data> section being omitted
1693  * if length is 0.
1694  */
1695 static int store_hash(stcxt_t *cxt, HV *hv)
1696 {
1697         I32 len = HvKEYS(hv);
1698         I32 i;
1699         int ret = 0;
1700         I32 riter;
1701         HE *eiter;
1702
1703         TRACEME(("store_hash (0x%"UVxf")", PTR2UV(hv)));
1704
1705         /*
1706          * Signal hash by emitting SX_HASH, followed by the table length.
1707          */
1708
1709         PUTMARK(SX_HASH);
1710         WLEN(len);
1711         TRACEME(("size = %d", len));
1712
1713         /*
1714          * Save possible iteration state via each() on that table.
1715          */
1716
1717         riter = HvRITER(hv);
1718         eiter = HvEITER(hv);
1719         hv_iterinit(hv);
1720
1721         /*
1722          * Now store each item recursively.
1723          *
1724      * If canonical is defined to some true value then store each
1725      * key/value pair in sorted order otherwise the order is random.
1726          * Canonical order is irrelevant when a deep clone operation is performed.
1727          *
1728          * Fetch the value from perl only once per store() operation, and only
1729          * when needed.
1730          */
1731
1732         if (
1733                 !(cxt->optype & ST_CLONE) && (cxt->canonical == 1 ||
1734                 (cxt->canonical < 0 && (cxt->canonical =
1735                         SvTRUE(perl_get_sv("Storable::canonical", TRUE)) ? 1 : 0)))
1736         ) {
1737                 /*
1738                  * Storing in order, sorted by key.
1739                  * Run through the hash, building up an array of keys in a
1740                  * mortal array, sort the array and then run through the
1741                  * array.
1742                  */
1743
1744                 AV *av = newAV();
1745
1746                 TRACEME(("using canonical order"));
1747
1748                 for (i = 0; i < len; i++) {
1749                         HE *he = hv_iternext(hv);
1750                         SV *key = hv_iterkeysv(he);
1751                         av_store(av, AvFILLp(av)+1, key);       /* av_push(), really */
1752                 }
1753
1754                 qsort((char *) AvARRAY(av), len, sizeof(SV *), sortcmp);
1755
1756                 for (i = 0; i < len; i++) {
1757                         char *keyval;
1758                         I32 keylen;
1759                         SV *key = av_shift(av);
1760                         HE *he  = hv_fetch_ent(hv, key, 0, 0);
1761                         SV *val = HeVAL(he);
1762                         if (val == 0)
1763                                 return 1;               /* Internal error, not I/O error */
1764
1765                         /*
1766                          * Store value first.
1767                          */
1768
1769                         TRACEME(("(#%d) value 0x%"UVxf, i, PTR2UV(val)));
1770
1771                         if (ret = store(cxt, val))
1772                                 goto out;
1773
1774                         /*
1775                          * Write key string.
1776                          * Keys are written after values to make sure retrieval
1777                          * can be optimal in terms of memory usage, where keys are
1778                          * read into a fixed unique buffer called kbuf.
1779                          * See retrieve_hash() for details.
1780                          */
1781
1782                         keyval = hv_iterkey(he, &keylen);
1783                         TRACEME(("(#%d) key '%s'", i, keyval));
1784                         WLEN(keylen);
1785                         if (keylen)
1786                                 WRITE(keyval, keylen);
1787                 }
1788
1789                 /*
1790                  * Free up the temporary array
1791                  */
1792
1793                 av_undef(av);
1794                 sv_free((SV *) av);
1795
1796         } else {
1797
1798                 /*
1799                  * Storing in "random" order (in the order the keys are stored
1800                  * within the the hash).  This is the default and will be faster!
1801                  */
1802
1803                 for (i = 0; i < len; i++) {
1804                         char *key;
1805                         I32 len;
1806                         SV *val = hv_iternextsv(hv, &key, &len);
1807
1808                         if (val == 0)
1809                                 return 1;               /* Internal error, not I/O error */
1810
1811                         /*
1812                          * Store value first.
1813                          */
1814
1815                         TRACEME(("(#%d) value 0x%"UVxf, i, PTR2UV(val)));
1816
1817                         if (ret = store(cxt, val))
1818                                 goto out;
1819
1820                         /*
1821                          * Write key string.
1822                          * Keys are written after values to make sure retrieval
1823                          * can be optimal in terms of memory usage, where keys are
1824                          * read into a fixed unique buffer called kbuf.
1825                          * See retrieve_hash() for details.
1826                          */
1827
1828                         TRACEME(("(#%d) key '%s'", i, key));
1829                         WLEN(len);
1830                         if (len)
1831                                 WRITE(key, len);
1832                 }
1833     }
1834
1835         TRACEME(("ok (hash 0x%"UVxf")", PTR2UV(hv)));
1836
1837 out:
1838         HvRITER(hv) = riter;            /* Restore hash iterator state */
1839         HvEITER(hv) = eiter;
1840
1841         return ret;
1842 }
1843
1844 /*
1845  * store_tied
1846  *
1847  * When storing a tied object (be it a tied scalar, array or hash), we lay out
1848  * a special mark, followed by the underlying tied object. For instance, when
1849  * dealing with a tied hash, we store SX_TIED_HASH <hash object>, where
1850  * <hash object> stands for the serialization of the tied hash.
1851  */
1852 static int store_tied(stcxt_t *cxt, SV *sv)
1853 {
1854         MAGIC *mg;
1855         int ret = 0;
1856         int svt = SvTYPE(sv);
1857         char mtype = 'P';
1858
1859         TRACEME(("store_tied (0x%"UVxf")", PTR2UV(sv)));
1860
1861         /*
1862          * We have a small run-time penalty here because we chose to factorise
1863          * all tieds objects into the same routine, and not have a store_tied_hash,
1864          * a store_tied_array, etc...
1865          *
1866          * Don't use a switch() statement, as most compilers don't optimize that
1867          * well for 2/3 values. An if() else if() cascade is just fine. We put
1868          * tied hashes first, as they are the most likely beasts.
1869          */
1870
1871         if (svt == SVt_PVHV) {
1872                 TRACEME(("tied hash"));
1873                 PUTMARK(SX_TIED_HASH);                  /* Introduces tied hash */
1874         } else if (svt == SVt_PVAV) {
1875                 TRACEME(("tied array"));
1876                 PUTMARK(SX_TIED_ARRAY);                 /* Introduces tied array */
1877         } else {
1878                 TRACEME(("tied scalar"));
1879                 PUTMARK(SX_TIED_SCALAR);                /* Introduces tied scalar */
1880                 mtype = 'q';
1881         }
1882
1883         if (!(mg = mg_find(sv, mtype)))
1884                 CROAK(("No magic '%c' found while storing tied %s", mtype,
1885                         (svt == SVt_PVHV) ? "hash" :
1886                                 (svt == SVt_PVAV) ? "array" : "scalar"));
1887
1888         /*
1889          * The mg->mg_obj found by mg_find() above actually points to the
1890          * underlying tied Perl object implementation. For instance, if the
1891          * original SV was that of a tied array, then mg->mg_obj is an AV.
1892          *
1893          * Note that we store the Perl object as-is. We don't call its FETCH
1894          * method along the way. At retrieval time, we won't call its STORE
1895          * method either, but the tieing magic will be re-installed. In itself,
1896          * that ensures that the tieing semantics are preserved since futher
1897          * accesses on the retrieved object will indeed call the magic methods...
1898          */
1899
1900         if (ret = store(cxt, mg->mg_obj))
1901                 return ret;
1902
1903         TRACEME(("ok (tied)"));
1904
1905         return 0;
1906 }
1907
1908 /*
1909  * store_tied_item
1910  *
1911  * Stores a reference to an item within a tied structure:
1912  *
1913  *  . \$h{key}, stores both the (tied %h) object and 'key'.
1914  *  . \$a[idx], stores both the (tied @a) object and 'idx'.
1915  *
1916  * Layout is therefore either:
1917  *     SX_TIED_KEY <object> <key>
1918  *     SX_TIED_IDX <object> <index>
1919  */
1920 static int store_tied_item(stcxt_t *cxt, SV *sv)
1921 {
1922         MAGIC *mg;
1923         int ret;
1924
1925         TRACEME(("store_tied_item (0x%"UVxf")", PTR2UV(sv)));
1926
1927         if (!(mg = mg_find(sv, 'p')))
1928                 CROAK(("No magic 'p' found while storing reference to tied item"));
1929
1930         /*
1931          * We discriminate between \$h{key} and \$a[idx] via mg_ptr.
1932          */
1933
1934         if (mg->mg_ptr) {
1935                 TRACEME(("store_tied_item: storing a ref to a tied hash item"));
1936                 PUTMARK(SX_TIED_KEY);
1937                 TRACEME(("store_tied_item: storing OBJ 0x%"UVxf, PTR2UV(mg->mg_obj)));
1938
1939                 if (ret = store(cxt, mg->mg_obj))
1940                         return ret;
1941
1942                 TRACEME(("store_tied_item: storing PTR 0x%"UVxf, PTR2UV(mg->mg_ptr)));
1943
1944                 if (ret = store(cxt, (SV *) mg->mg_ptr))
1945                         return ret;
1946         } else {
1947                 I32 idx = mg->mg_len;
1948
1949                 TRACEME(("store_tied_item: storing a ref to a tied array item "));
1950                 PUTMARK(SX_TIED_IDX);
1951                 TRACEME(("store_tied_item: storing OBJ 0x%"UVxf, PTR2UV(mg->mg_obj)));
1952
1953                 if (ret = store(cxt, mg->mg_obj))
1954                         return ret;
1955
1956                 TRACEME(("store_tied_item: storing IDX %d", idx));
1957
1958                 WLEN(idx);
1959         }
1960
1961         TRACEME(("ok (tied item)"));
1962
1963         return 0;
1964 }
1965
1966 /*
1967  * store_hook           -- dispatched manually, not via sv_store[]
1968  *
1969  * The blessed SV is serialized by a hook.
1970  *
1971  * Simple Layout is:
1972  *
1973  *     SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
1974  *
1975  * where <flags> indicates how long <len>, <len2> and <len3> are, whether
1976  * the trailing part [] is present, the type of object (scalar, array or hash).
1977  * There is also a bit which says how the classname is stored between:
1978  *
1979  *     <len> <classname>
1980  *     <index>
1981  *
1982  * and when the <index> form is used (classname already seen), the "large
1983  * classname" bit in <flags> indicates how large the <index> is.
1984  *
1985  * The serialized string returned by the hook is of length <len2> and comes
1986  * next.  It is an opaque string for us.
1987  *
1988  * Those <len3> object IDs which are listed last represent the extra references
1989  * not directly serialized by the hook, but which are linked to the object.
1990  *
1991  * When recursion is mandated to resolve object-IDs not yet seen, we have
1992  * instead, with <header> being flags with bits set to indicate the object type
1993  * and that recursion was indeed needed:
1994  *
1995  *     SX_HOOK <header> <object> <header> <object> <flags>
1996  *
1997  * that same header being repeated between serialized objects obtained through
1998  * recursion, until we reach flags indicating no recursion, at which point
1999  * we know we've resynchronized with a single layout, after <flags>.
2000  */
2001 static int store_hook(
2002         stcxt_t *cxt,
2003         SV *sv,
2004         int type,
2005         HV *pkg,
2006         SV *hook)
2007 {
2008         I32 len;
2009         char *class;
2010         STRLEN len2;
2011         SV *ref;
2012         AV *av;
2013         SV **ary;
2014         int count;                              /* really len3 + 1 */
2015         unsigned char flags;
2016         char *pv;
2017         int i;
2018         int recursed = 0;               /* counts recursion */
2019         int obj_type;                   /* object type, on 2 bits */
2020         I32 classnum;
2021         int ret;
2022         int clone = cxt->optype & ST_CLONE;
2023
2024         TRACEME(("store_hook, class \"%s\", tagged #%d", HvNAME(pkg), cxt->tagnum));
2025
2026         /*
2027          * Determine object type on 2 bits.
2028          */
2029
2030         switch (type) {
2031         case svis_SCALAR:
2032                 obj_type = SHT_SCALAR;
2033                 break;
2034         case svis_ARRAY:
2035                 obj_type = SHT_ARRAY;
2036                 break;
2037         case svis_HASH:
2038                 obj_type = SHT_HASH;
2039                 break;
2040         default:
2041                 CROAK(("Unexpected object type (%d) in store_hook()", type));
2042         }
2043         flags = SHF_NEED_RECURSE | obj_type;
2044
2045         class = HvNAME(pkg);
2046         len = strlen(class);
2047
2048         /*
2049          * To call the hook, we need to fake a call like:
2050          *
2051          *    $object->STORABLE_freeze($cloning);
2052          *
2053          * but we don't have the $object here.  For instance, if $object is
2054          * a blessed array, what we have in `sv' is the array, and we can't
2055          * call a method on those.
2056          *
2057          * Therefore, we need to create a temporary reference to the object and
2058          * make the call on that reference.
2059          */
2060
2061         TRACEME(("about to call STORABLE_freeze on class %s", class));
2062
2063         ref = newRV_noinc(sv);                          /* Temporary reference */
2064         av = array_call(ref, hook, clone);      /* @a = $object->STORABLE_freeze($c) */
2065         SvRV(ref) = 0;
2066         SvREFCNT_dec(ref);                                      /* Reclaim temporary reference */
2067
2068         count = AvFILLp(av) + 1;
2069         TRACEME(("store_hook, array holds %d items", count));
2070
2071         /*
2072          * If they return an empty list, it means they wish to ignore the
2073          * hook for this class (and not just this instance -- that's for them
2074          * to handle if they so wish).
2075          *
2076          * Simply disable the cached entry for the hook (it won't be recomputed
2077          * since it's present in the cache) and recurse to store_blessed().
2078          */
2079
2080         if (!count) {
2081                 /*
2082                  * They must not change their mind in the middle of a serialization.
2083                  */
2084
2085                 if (hv_fetch(cxt->hclass, class, len, FALSE))
2086                         CROAK(("Too late to ignore hooks for %s class \"%s\"",
2087                                 (cxt->optype & ST_CLONE) ? "cloning" : "storing", class));
2088
2089                 pkg_hide(cxt->hook, pkg, "STORABLE_freeze");
2090
2091                 ASSERT(!pkg_can(cxt->hook, pkg, "STORABLE_freeze"), ("hook invisible"));
2092                 TRACEME(("ignoring STORABLE_freeze in class \"%s\"", class));
2093
2094                 return store_blessed(cxt, sv, type, pkg);
2095         }
2096
2097         /*
2098          * Get frozen string.
2099          */
2100
2101         ary = AvARRAY(av);
2102         pv = SvPV(ary[0], len2);
2103
2104         /*
2105          * If they returned more than one item, we need to serialize some
2106          * extra references if not already done.
2107          *
2108          * Loop over the array, starting at postion #1, and for each item,
2109          * ensure it is a reference, serialize it if not already done, and
2110          * replace the entry with the tag ID of the corresponding serialized
2111          * object.
2112          *
2113          * We CHEAT by not calling av_fetch() and read directly within the
2114          * array, for speed.
2115          */
2116
2117         for (i = 1; i < count; i++) {
2118                 SV **svh;
2119                 SV *xsv = ary[i];
2120
2121                 if (!SvROK(xsv))
2122                         CROAK(("Item #%d from hook in %s is not a reference", i, class));
2123                 xsv = SvRV(xsv);                /* Follow ref to know what to look for */
2124
2125                 /*
2126                  * Look in hseen and see if we have a tag already.
2127                  * Serialize entry if not done already, and get its tag.
2128                  */
2129
2130                 if (svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE))
2131                         goto sv_seen;           /* Avoid moving code too far to the right */
2132
2133                 TRACEME(("listed object %d at 0x%"UVxf" is unknown", i-1, PTR2UV(xsv)));
2134
2135                 /*
2136                  * We need to recurse to store that object and get it to be known
2137                  * so that we can resolve the list of object-IDs at retrieve time.
2138                  *
2139                  * The first time we do this, we need to emit the proper header
2140                  * indicating that we recursed, and what the type of object is (the
2141                  * object we're storing via a user-hook).  Indeed, during retrieval,
2142                  * we'll have to create the object before recursing to retrieve the
2143                  * others, in case those would point back at that object.
2144                  */
2145
2146                 /* [SX_HOOK] <flags> <object>*/
2147                 if (!recursed++)
2148                         PUTMARK(SX_HOOK);
2149                 PUTMARK(flags);
2150
2151                 if (ret = store(cxt, xsv))              /* Given by hook for us to store */
2152                         return ret;
2153
2154                 svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE);
2155                 if (!svh)
2156                         CROAK(("Could not serialize item #%d from hook in %s", i, class));
2157
2158                 /*
2159                  * Replace entry with its tag (not a real SV, so no refcnt increment)
2160                  */
2161
2162         sv_seen:
2163                 SvREFCNT_dec(xsv);
2164                 ary[i] = *svh;
2165                 TRACEME(("listed object %d at 0x%"UVxf" is tag #%"UVuf,
2166                          i-1, PTR2UV(xsv), PTR2UV(*svh)));
2167         }
2168
2169         /*
2170          * Allocate a class ID if not already done.
2171          *
2172          * This needs to be done after the recursion above, since at retrieval
2173          * time, we'll see the inner objects first.  Many thanks to
2174          * Salvador Ortiz Garcia <sog@msg.com.mx> who spot that bug and
2175          * proposed the right fix.  -- RAM, 15/09/2000
2176          */
2177
2178         if (!known_class(cxt, class, len, &classnum)) {
2179                 TRACEME(("first time we see class %s, ID = %d", class, classnum));
2180                 classnum = -1;                          /* Mark: we must store classname */
2181         } else {
2182                 TRACEME(("already seen class %s, ID = %d", class, classnum));
2183         }
2184
2185         /*
2186          * Compute leading flags.
2187          */
2188
2189         flags = obj_type;
2190         if (((classnum == -1) ? len : classnum) > LG_SCALAR)
2191                 flags |= SHF_LARGE_CLASSLEN;
2192         if (classnum != -1)
2193                 flags |= SHF_IDX_CLASSNAME;
2194         if (len2 > LG_SCALAR)
2195                 flags |= SHF_LARGE_STRLEN;
2196         if (count > 1)
2197                 flags |= SHF_HAS_LIST;
2198         if (count > (LG_SCALAR + 1))
2199                 flags |= SHF_LARGE_LISTLEN;
2200
2201         /*
2202          * We're ready to emit either serialized form:
2203          *
2204          *   SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
2205          *   SX_HOOK <flags> <index>           <len2> <str> [<len3> <object-IDs>]
2206          *
2207          * If we recursed, the SX_HOOK has already been emitted.
2208          */
2209
2210         TRACEME(("SX_HOOK (recursed=%d) flags=0x%x "
2211                         "class=%"IVdf" len=%"IVdf" len2=%"IVdf" len3=%d",
2212                  recursed, flags, (IV)classnum, (IV)len, (IV)len2, count-1));
2213
2214         /* SX_HOOK <flags> */
2215         if (!recursed)
2216                 PUTMARK(SX_HOOK);
2217         PUTMARK(flags);
2218
2219         /* <len> <classname> or <index> */
2220         if (flags & SHF_IDX_CLASSNAME) {
2221                 if (flags & SHF_LARGE_CLASSLEN)
2222                         WLEN(classnum);
2223                 else {
2224                         unsigned char cnum = (unsigned char) classnum;
2225                         PUTMARK(cnum);
2226                 }
2227         } else {
2228                 if (flags & SHF_LARGE_CLASSLEN)
2229                         WLEN(len);
2230                 else {
2231                         unsigned char clen = (unsigned char) len;
2232                         PUTMARK(clen);
2233                 }
2234                 WRITE(class, len);              /* Final \0 is omitted */
2235         }
2236
2237         /* <len2> <frozen-str> */
2238         if (flags & SHF_LARGE_STRLEN) {
2239                 I32 wlen2 = len2;               /* STRLEN might be 8 bytes */
2240                 WLEN(wlen2);                    /* Must write an I32 for 64-bit machines */
2241         } else {
2242                 unsigned char clen = (unsigned char) len2;
2243                 PUTMARK(clen);
2244         }
2245         if (len2)
2246                 WRITE(pv, len2);        /* Final \0 is omitted */
2247
2248         /* [<len3> <object-IDs>] */
2249         if (flags & SHF_HAS_LIST) {
2250                 int len3 = count - 1;
2251                 if (flags & SHF_LARGE_LISTLEN)
2252                         WLEN(len3);
2253                 else {
2254                         unsigned char clen = (unsigned char) len3;
2255                         PUTMARK(clen);
2256                 }
2257
2258                 /*
2259                  * NOTA BENE, for 64-bit machines: the ary[i] below does not yield a
2260                  * real pointer, rather a tag number, well under the 32-bit limit.
2261                  */
2262
2263                 for (i = 1; i < count; i++) {
2264                         I32 tagval = htonl(LOW_32BITS(ary[i]));
2265                         WRITE_I32(tagval);
2266                         TRACEME(("object %d, tag #%d", i-1, ntohl(tagval)));
2267                 }
2268         }
2269
2270         /*
2271          * Free the array.  We need extra care for indices after 0, since they
2272          * don't hold real SVs but integers cast.
2273          */
2274
2275         if (count > 1)
2276                 AvFILLp(av) = 0;        /* Cheat, nothing after 0 interests us */
2277         av_undef(av);
2278         sv_free((SV *) av);
2279
2280         return 0;
2281 }
2282
2283 /*
2284  * store_blessed        -- dispatched manually, not via sv_store[]
2285  *
2286  * Check whether there is a STORABLE_xxx hook defined in the class or in one
2287  * of its ancestors.  If there is, then redispatch to store_hook();
2288  *
2289  * Otherwise, the blessed SV is stored using the following layout:
2290  *
2291  *    SX_BLESS <flag> <len> <classname> <object>
2292  *
2293  * where <flag> indicates whether <len> is stored on 0 or 4 bytes, depending
2294  * on the high-order bit in flag: if 1, then length follows on 4 bytes.
2295  * Otherwise, the low order bits give the length, thereby giving a compact
2296  * representation for class names less than 127 chars long.
2297  *
2298  * Each <classname> seen is remembered and indexed, so that the next time
2299  * an object in the blessed in the same <classname> is stored, the following
2300  * will be emitted:
2301  *
2302  *    SX_IX_BLESS <flag> <index> <object>
2303  *
2304  * where <index> is the classname index, stored on 0 or 4 bytes depending
2305  * on the high-order bit in flag (same encoding as above for <len>).
2306  */
2307 static int store_blessed(
2308         stcxt_t *cxt,
2309         SV *sv,
2310         int type,
2311         HV *pkg)
2312 {
2313         SV *hook;
2314         I32 len;
2315         char *class;
2316         I32 classnum;
2317
2318         TRACEME(("store_blessed, type %d, class \"%s\"", type, HvNAME(pkg)));
2319
2320         /*
2321          * Look for a hook for this blessed SV and redirect to store_hook()
2322          * if needed.
2323          */
2324
2325         hook = pkg_can(cxt->hook, pkg, "STORABLE_freeze");
2326         if (hook)
2327                 return store_hook(cxt, sv, type, pkg, hook);
2328
2329         /*
2330          * This is a blessed SV without any serialization hook.
2331          */
2332
2333         class = HvNAME(pkg);
2334         len = strlen(class);
2335
2336         TRACEME(("blessed 0x%"UVxf" in %s, no hook: tagged #%d",
2337                  PTR2UV(sv), class, cxt->tagnum));
2338
2339         /*
2340          * Determine whether it is the first time we see that class name (in which
2341          * case it will be stored in the SX_BLESS form), or whether we already
2342          * saw that class name before (in which case the SX_IX_BLESS form will be
2343          * used).
2344          */
2345
2346         if (known_class(cxt, class, len, &classnum)) {
2347                 TRACEME(("already seen class %s, ID = %d", class, classnum));
2348                 PUTMARK(SX_IX_BLESS);
2349                 if (classnum <= LG_BLESS) {
2350                         unsigned char cnum = (unsigned char) classnum;
2351                         PUTMARK(cnum);
2352                 } else {
2353                         unsigned char flag = (unsigned char) 0x80;
2354                         PUTMARK(flag);
2355                         WLEN(classnum);
2356                 }
2357         } else {
2358                 TRACEME(("first time we see class %s, ID = %d", class, classnum));
2359                 PUTMARK(SX_BLESS);
2360                 if (len <= LG_BLESS) {
2361                         unsigned char clen = (unsigned char) len;
2362                         PUTMARK(clen);
2363                 } else {
2364                         unsigned char flag = (unsigned char) 0x80;
2365                         PUTMARK(flag);
2366                         WLEN(len);                                      /* Don't BER-encode, this should be rare */
2367                 }
2368                 WRITE(class, len);                              /* Final \0 is omitted */
2369         }
2370
2371         /*
2372          * Now emit the <object> part.
2373          */
2374
2375         return SV_STORE(type)(cxt, sv);
2376 }
2377
2378 /*
2379  * store_other
2380  *
2381  * We don't know how to store the item we reached, so return an error condition.
2382  * (it's probably a GLOB, some CODE reference, etc...)
2383  *
2384  * If they defined the `forgive_me' variable at the Perl level to some
2385  * true value, then don't croak, just warn, and store a placeholder string
2386  * instead.
2387  */
2388 static int store_other(stcxt_t *cxt, SV *sv)
2389 {
2390         I32 len;
2391         static char buf[80];
2392
2393         TRACEME(("store_other"));
2394
2395         /*
2396          * Fetch the value from perl only once per store() operation.
2397          */
2398
2399         if (
2400                 cxt->forgive_me == 0 ||
2401                 (cxt->forgive_me < 0 && !(cxt->forgive_me =
2402                         SvTRUE(perl_get_sv("Storable::forgive_me", TRUE)) ? 1 : 0))
2403         )
2404                 CROAK(("Can't store %s items", sv_reftype(sv, FALSE)));
2405
2406         warn("Can't store item %s(0x%"UVxf")",
2407                 sv_reftype(sv, FALSE), PTR2UV(sv));
2408
2409         /*
2410          * Store placeholder string as a scalar instead...
2411          */
2412
2413         (void) sprintf(buf, "You lost %s(0x%"UVxf")\0", sv_reftype(sv, FALSE),
2414                        PTR2UV(sv));
2415
2416         len = strlen(buf);
2417         STORE_SCALAR(buf, len);
2418         TRACEME(("ok (dummy \"%s\", length = %"IVdf")", buf, len));
2419
2420         return 0;
2421 }
2422
2423 /***
2424  *** Store driving routines
2425  ***/
2426
2427 /*
2428  * sv_type
2429  *
2430  * WARNING: partially duplicates Perl's sv_reftype for speed.
2431  *
2432  * Returns the type of the SV, identified by an integer. That integer
2433  * may then be used to index the dynamic routine dispatch table.
2434  */
2435 static int sv_type(SV *sv)
2436 {
2437         switch (SvTYPE(sv)) {
2438         case SVt_NULL:
2439         case SVt_IV:
2440         case SVt_NV:
2441                 /*
2442                  * No need to check for ROK, that can't be set here since there
2443                  * is no field capable of hodling the xrv_rv reference.
2444                  */
2445                 return svis_SCALAR;
2446         case SVt_PV:
2447         case SVt_RV:
2448         case SVt_PVIV:
2449         case SVt_PVNV:
2450                 /*
2451                  * Starting from SVt_PV, it is possible to have the ROK flag
2452                  * set, the pointer to the other SV being either stored in
2453                  * the xrv_rv (in the case of a pure SVt_RV), or as the
2454                  * xpv_pv field of an SVt_PV and its heirs.
2455                  *
2456                  * However, those SV cannot be magical or they would be an
2457                  * SVt_PVMG at least.
2458                  */
2459                 return SvROK(sv) ? svis_REF : svis_SCALAR;
2460         case SVt_PVMG:
2461         case SVt_PVLV:          /* Workaround for perl5.004_04 "LVALUE" bug */
2462                 if (SvRMAGICAL(sv) && (mg_find(sv, 'p')))
2463                         return svis_TIED_ITEM;
2464                 /* FALL THROUGH */
2465         case SVt_PVBM:
2466                 if (SvRMAGICAL(sv) && (mg_find(sv, 'q')))
2467                         return svis_TIED;
2468                 return SvROK(sv) ? svis_REF : svis_SCALAR;
2469         case SVt_PVAV:
2470                 if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
2471                         return svis_TIED;
2472                 return svis_ARRAY;
2473         case SVt_PVHV:
2474                 if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
2475                         return svis_TIED;
2476                 return svis_HASH;
2477         default:
2478                 break;
2479         }
2480
2481         return svis_OTHER;
2482 }
2483
2484 /*
2485  * store
2486  *
2487  * Recursively store objects pointed to by the sv to the specified file.
2488  *
2489  * Layout is <content> or SX_OBJECT <tagnum> if we reach an already stored
2490  * object (one for which storage has started -- it may not be over if we have
2491  * a self-referenced structure). This data set forms a stored <object>.
2492  */
2493 static int store(stcxt_t *cxt, SV *sv)
2494 {
2495         SV **svh;
2496         int ret;
2497         SV *tag;
2498         int type;
2499         HV *hseen = cxt->hseen;
2500
2501         TRACEME(("store (0x%"UVxf")", PTR2UV(sv)));
2502
2503         /*
2504          * If object has already been stored, do not duplicate data.
2505          * Simply emit the SX_OBJECT marker followed by its tag data.
2506          * The tag is always written in network order.
2507          *
2508          * NOTA BENE, for 64-bit machines: the "*svh" below does not yield a
2509          * real pointer, rather a tag number (watch the insertion code below).
2510          * That means it pobably safe to assume it is well under the 32-bit limit,
2511          * and makes the truncation safe.
2512          *              -- RAM, 14/09/1999
2513          */
2514
2515         svh = hv_fetch(hseen, (char *) &sv, sizeof(sv), FALSE);
2516         if (svh) {
2517                 I32 tagval = htonl(LOW_32BITS(*svh));
2518
2519                 TRACEME(("object 0x%"UVxf" seen as #%d", PTR2UV(sv), ntohl(tagval)));
2520
2521                 PUTMARK(SX_OBJECT);
2522                 WRITE_I32(tagval);
2523                 return 0;
2524         }
2525
2526         /*
2527          * Allocate a new tag and associate it with the address of the sv being
2528          * stored, before recursing...
2529          *
2530          * In order to avoid creating new SvIVs to hold the tagnum we just
2531          * cast the tagnum to a SV pointer and store that in the hash.  This
2532          * means that we must clean up the hash manually afterwards, but gives
2533          * us a 15% throughput increase.
2534          *
2535          */
2536
2537         cxt->tagnum++;
2538         if (!hv_store(hseen,
2539                         (char *) &sv, sizeof(sv), INT2PTR(SV*, cxt->tagnum), 0))
2540                 return -1;
2541
2542         /*
2543          * Store `sv' and everything beneath it, using appropriate routine.
2544          * Abort immediately if we get a non-zero status back.
2545          */
2546
2547         type = sv_type(sv);
2548
2549         TRACEME(("storing 0x%"UVxf" tag #%d, type %d...",
2550                  PTR2UV(sv), cxt->tagnum, type));
2551
2552         if (SvOBJECT(sv)) {
2553                 HV *pkg = SvSTASH(sv);
2554                 ret = store_blessed(cxt, sv, type, pkg);
2555         } else
2556                 ret = SV_STORE(type)(cxt, sv);
2557
2558         TRACEME(("%s (stored 0x%"UVxf", refcnt=%d, %s)",
2559                 ret ? "FAILED" : "ok", PTR2UV(sv),
2560                 SvREFCNT(sv), sv_reftype(sv, FALSE)));
2561
2562         return ret;
2563 }
2564
2565 /*
2566  * magic_write
2567  *
2568  * Write magic number and system information into the file.
2569  * Layout is <magic> <network> [<len> <byteorder> <sizeof int> <sizeof long>
2570  * <sizeof ptr>] where <len> is the length of the byteorder hexa string.
2571  * All size and lenghts are written as single characters here.
2572  *
2573  * Note that no byte ordering info is emitted when <network> is true, since
2574  * integers will be emitted in network order in that case.
2575  */
2576 static int magic_write(stcxt_t *cxt)
2577 {
2578         char buf[256];  /* Enough room for 256 hexa digits */
2579         unsigned char c;
2580         int use_network_order = cxt->netorder;
2581
2582         TRACEME(("magic_write on fd=%d", cxt->fio ? fileno(cxt->fio) : -1));
2583
2584         if (cxt->fio)
2585                 WRITE(magicstr, strlen(magicstr));      /* Don't write final \0 */
2586
2587         /*
2588          * Starting with 0.6, the "use_network_order" byte flag is also used to
2589          * indicate the version number of the binary image, encoded in the upper
2590          * bits. The bit 0 is always used to indicate network order.
2591          */
2592
2593         c = (unsigned char)
2594                 ((use_network_order ? 0x1 : 0x0) | (STORABLE_BIN_MAJOR << 1));
2595         PUTMARK(c);
2596
2597         /*
2598          * Starting with 0.7, a full byte is dedicated to the minor version of
2599          * the binary format, which is incremented only when new markers are
2600          * introduced, for instance, but when backward compatibility is preserved.
2601          */
2602
2603         PUTMARK((unsigned char) STORABLE_BIN_MINOR);
2604
2605         if (use_network_order)
2606                 return 0;                                               /* Don't bother with byte ordering */
2607
2608         sprintf(buf, "%lx", (unsigned long) BYTEORDER);
2609         c = (unsigned char) strlen(buf);
2610         PUTMARK(c);
2611         WRITE(buf, (unsigned int) c);           /* Don't write final \0 */
2612         PUTMARK((unsigned char) sizeof(int));
2613         PUTMARK((unsigned char) sizeof(long));
2614         PUTMARK((unsigned char) sizeof(char *));
2615         PUTMARK((unsigned char) sizeof(NV));
2616
2617         TRACEME(("ok (magic_write byteorder = 0x%lx [%d], I%d L%d P%d D%d)",
2618                  (unsigned long) BYTEORDER, (int) c,
2619                  (int) sizeof(int), (int) sizeof(long),
2620                  (int) sizeof(char *), (int) sizeof(NV)));
2621
2622         return 0;
2623 }
2624
2625 /*
2626  * do_store
2627  *
2628  * Common code for store operations.
2629  *
2630  * When memory store is requested (f = NULL) and a non null SV* is given in
2631  * `res', it is filled with a new SV created out of the memory buffer.
2632  *
2633  * It is required to provide a non-null `res' when the operation type is not
2634  * dclone() and store() is performed to memory.
2635  */
2636 static int do_store(
2637         PerlIO *f,
2638         SV *sv,
2639         int optype,
2640         int network_order,
2641         SV **res)
2642 {
2643         dSTCXT;
2644         int status;
2645
2646         ASSERT(!(f == 0 && !(optype & ST_CLONE)) || res,
2647                 ("must supply result SV pointer for real recursion to memory"));
2648
2649         TRACEME(("do_store (optype=%d, netorder=%d)",
2650                 optype, network_order));
2651
2652         optype |= ST_STORE;
2653
2654         /*
2655          * Workaround for CROAK leak: if they enter with a "dirty" context,
2656          * free up memory for them now.
2657          */
2658
2659         if (cxt->s_dirty)
2660                 clean_context(cxt);
2661
2662         /*
2663          * Now that STORABLE_xxx hooks exist, it is possible that they try to
2664          * re-enter store() via the hooks.  We need to stack contexts.
2665          */
2666
2667         if (cxt->entry)
2668                 cxt = allocate_context(cxt);
2669
2670         cxt->entry++;
2671
2672         ASSERT(cxt->entry == 1, ("starting new recursion"));
2673         ASSERT(!cxt->s_dirty, ("clean context"));
2674
2675         /*
2676          * Ensure sv is actually a reference. From perl, we called something
2677          * like:
2678          *       pstore(FILE, \@array);
2679          * so we must get the scalar value behing that reference.
2680          */
2681
2682         if (!SvROK(sv))
2683                 CROAK(("Not a reference"));
2684         sv = SvRV(sv);                  /* So follow it to know what to store */
2685
2686         /*
2687          * If we're going to store to memory, reset the buffer.
2688          */
2689
2690         if (!f)
2691                 MBUF_INIT(0);
2692
2693         /*
2694          * Prepare context and emit headers.
2695          */
2696
2697         init_store_context(cxt, f, optype, network_order);
2698
2699         if (-1 == magic_write(cxt))             /* Emit magic and ILP info */
2700                 return 0;                                       /* Error */
2701
2702         /*
2703          * Recursively store object...
2704          */
2705
2706         ASSERT(is_storing(), ("within store operation"));
2707
2708         status = store(cxt, sv);                /* Just do it! */
2709
2710         /*
2711          * If they asked for a memory store and they provided an SV pointer,
2712          * make an SV string out of the buffer and fill their pointer.
2713          *
2714          * When asking for ST_REAL, it's MANDATORY for the caller to provide
2715          * an SV, since context cleanup might free the buffer if we did recurse.
2716          * (unless caller is dclone(), which is aware of that).
2717          */
2718
2719         if (!cxt->fio && res)
2720                 *res = mbuf2sv();
2721
2722         /*
2723          * Final cleanup.
2724          *
2725          * The "root" context is never freed, since it is meant to be always
2726          * handy for the common case where no recursion occurs at all (i.e.
2727          * we enter store() outside of any Storable code and leave it, period).
2728          * We know it's the "root" context because there's nothing stacked
2729          * underneath it.
2730          *
2731          * OPTIMIZATION:
2732          *
2733          * When deep cloning, we don't free the context: doing so would force
2734          * us to copy the data in the memory buffer.  Sicne we know we're
2735          * about to enter do_retrieve...
2736          */
2737
2738         clean_store_context(cxt);
2739         if (cxt->prev && !(cxt->optype & ST_CLONE))
2740                 free_context(cxt);
2741
2742         TRACEME(("do_store returns %d", status));
2743
2744         return status == 0;
2745 }
2746
2747 /*
2748  * pstore
2749  *
2750  * Store the transitive data closure of given object to disk.
2751  * Returns 0 on error, a true value otherwise.
2752  */
2753 int pstore(PerlIO *f, SV *sv)
2754 {
2755         TRACEME(("pstore"));
2756         return do_store(f, sv, 0, FALSE, (SV**) 0);
2757
2758 }
2759
2760 /*
2761  * net_pstore
2762  *
2763  * Same as pstore(), but network order is used for integers and doubles are
2764  * emitted as strings.
2765  */
2766 int net_pstore(PerlIO *f, SV *sv)
2767 {
2768         TRACEME(("net_pstore"));
2769         return do_store(f, sv, 0, TRUE, (SV**) 0);
2770 }
2771
2772 /***
2773  *** Memory stores.
2774  ***/
2775
2776 /*
2777  * mbuf2sv
2778  *
2779  * Build a new SV out of the content of the internal memory buffer.
2780  */
2781 static SV *mbuf2sv(void)
2782 {
2783         dSTCXT;
2784
2785         return newSVpv(mbase, MBUF_SIZE());
2786 }
2787
2788 /*
2789  * mstore
2790  *
2791  * Store the transitive data closure of given object to memory.
2792  * Returns undef on error, a scalar value containing the data otherwise.
2793  */
2794 SV *mstore(SV *sv)
2795 {
2796         dSTCXT;
2797         SV *out;
2798
2799         TRACEME(("mstore"));
2800
2801         if (!do_store((PerlIO*) 0, sv, 0, FALSE, &out))
2802                 return &PL_sv_undef;
2803
2804         return out;
2805 }
2806
2807 /*
2808  * net_mstore
2809  *
2810  * Same as mstore(), but network order is used for integers and doubles are
2811  * emitted as strings.
2812  */
2813 SV *net_mstore(SV *sv)
2814 {
2815         dSTCXT;
2816         SV *out;
2817
2818         TRACEME(("net_mstore"));
2819
2820         if (!do_store((PerlIO*) 0, sv, 0, TRUE, &out))
2821                 return &PL_sv_undef;
2822
2823         return out;
2824 }
2825
2826 /***
2827  *** Specific retrieve callbacks.
2828  ***/
2829
2830 /*
2831  * retrieve_other
2832  *
2833  * Return an error via croak, since it is not possible that we get here
2834  * under normal conditions, when facing a file produced via pstore().
2835  */
2836 static SV *retrieve_other(stcxt_t *cxt)
2837 {
2838         if (
2839                 cxt->ver_major != STORABLE_BIN_MAJOR &&
2840                 cxt->ver_minor != STORABLE_BIN_MINOR
2841         ) {
2842                 CROAK(("Corrupted storable %s (binary v%d.%d), current is v%d.%d",
2843                         cxt->fio ? "file" : "string",
2844                         cxt->ver_major, cxt->ver_minor,
2845                         STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
2846         } else {
2847                 CROAK(("Corrupted storable %s (binary v%d.%d)",
2848                         cxt->fio ? "file" : "string",
2849                         cxt->ver_major, cxt->ver_minor));
2850         }
2851
2852         return (SV *) 0;                /* Just in case */
2853 }
2854
2855 /*
2856  * retrieve_idx_blessed
2857  *
2858  * Layout is SX_IX_BLESS <index> <object> with SX_IX_BLESS already read.
2859  * <index> can be coded on either 1 or 5 bytes.
2860  */
2861 static SV *retrieve_idx_blessed(stcxt_t *cxt)
2862 {
2863         I32 idx;
2864         char *class;
2865         SV **sva;
2866         SV *sv;
2867
2868         TRACEME(("retrieve_idx_blessed (#%d)", cxt->tagnum));
2869
2870         GETMARK(idx);                   /* Index coded on a single char? */
2871         if (idx & 0x80)
2872                 RLEN(idx);
2873
2874         /*
2875          * Fetch classname in `aclass'
2876          */
2877
2878         sva = av_fetch(cxt->aclass, idx, FALSE);
2879         if (!sva)
2880                 CROAK(("Class name #%d should have been seen already", (int)idx));
2881
2882         class = SvPVX(*sva);    /* We know it's a PV, by construction */
2883
2884         TRACEME(("class ID %d => %s", idx, class));
2885
2886         /*
2887          * Retrieve object and bless it.
2888          */
2889
2890         sv = retrieve(cxt);
2891         if (sv)
2892                 BLESS(sv, class);
2893
2894         return sv;
2895 }
2896
2897 /*
2898  * retrieve_blessed
2899  *
2900  * Layout is SX_BLESS <len> <classname> <object> with SX_BLESS already read.
2901  * <len> can be coded on either 1 or 5 bytes.
2902  */
2903 static SV *retrieve_blessed(stcxt_t *cxt)
2904 {
2905         I32 len;
2906         SV *sv;
2907         char buf[LG_BLESS + 1];         /* Avoid malloc() if possible */
2908         char *class = buf;
2909
2910         TRACEME(("retrieve_blessed (#%d)", cxt->tagnum));
2911
2912         /*
2913          * Decode class name length and read that name.
2914          *
2915          * Short classnames have two advantages: their length is stored on one
2916          * single byte, and the string can be read on the stack.
2917          */
2918
2919         GETMARK(len);                   /* Length coded on a single char? */
2920         if (len & 0x80) {
2921                 RLEN(len);
2922                 TRACEME(("** allocating %d bytes for class name", len+1));
2923                 New(10003, class, len+1, char);
2924         }
2925         READ(class, len);
2926         class[len] = '\0';              /* Mark string end */
2927
2928         /*
2929          * It's a new classname, otherwise it would have been an SX_IX_BLESS.
2930          */
2931
2932         if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(class, len)))
2933                 return (SV *) 0;
2934
2935         /*
2936          * Retrieve object and bless it.
2937          */
2938
2939         sv = retrieve(cxt);
2940         if (sv) {
2941                 BLESS(sv, class);
2942                 if (class != buf)
2943                         Safefree(class);
2944         }
2945
2946         return sv;
2947 }
2948
2949 /*
2950  * retrieve_hook
2951  *
2952  * Layout: SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
2953  * with leading mark already read, as usual.
2954  *
2955  * When recursion was involved during serialization of the object, there
2956  * is an unknown amount of serialized objects after the SX_HOOK mark.  Until
2957  * we reach a <flags> marker with the recursion bit cleared.
2958  */
2959 static SV *retrieve_hook(stcxt_t *cxt)
2960 {
2961         I32 len;
2962         char buf[LG_BLESS + 1];         /* Avoid malloc() if possible */
2963         char *class = buf;
2964         unsigned int flags;
2965         I32 len2;
2966         SV *frozen;
2967         I32 len3 = 0;
2968         AV *av = 0;
2969         SV *hook;
2970         SV *sv;
2971         SV *rv;
2972         int obj_type;
2973         I32 classname;
2974         int clone = cxt->optype & ST_CLONE;
2975
2976         TRACEME(("retrieve_hook (#%d)", cxt->tagnum));
2977
2978         /*
2979          * Read flags, which tell us about the type, and whether we need to recurse.
2980          */
2981
2982         GETMARK(flags);
2983
2984         /*
2985          * Create the (empty) object, and mark it as seen.
2986          *
2987          * This must be done now, because tags are incremented, and during
2988          * serialization, the object tag was affected before recursion could
2989          * take place.
2990          */
2991
2992         obj_type = flags & SHF_TYPE_MASK;
2993         switch (obj_type) {
2994         case SHT_SCALAR:
2995                 sv = newSV(0);
2996                 break;
2997         case SHT_ARRAY:
2998                 sv = (SV *) newAV();
2999                 break;
3000         case SHT_HASH:
3001                 sv = (SV *) newHV();
3002                 break;
3003         default:
3004                 return retrieve_other(cxt);             /* Let it croak */
3005         }
3006         SEEN(sv);
3007
3008         /*
3009          * Whilst flags tell us to recurse, do so.
3010          *
3011          * We don't need to remember the addresses returned by retrieval, because
3012          * all the references will be obtained through indirection via the object
3013          * tags in the object-ID list.
3014          */
3015
3016         while (flags & SHF_NEED_RECURSE) {
3017                 TRACEME(("retrieve_hook recursing..."));
3018                 rv = retrieve(cxt);
3019                 if (!rv)
3020                         return (SV *) 0;
3021                 TRACEME(("retrieve_hook back with rv=0x%"UVxf,
3022                          PTR2UV(rv)));
3023                 GETMARK(flags);
3024         }
3025
3026         if (flags & SHF_IDX_CLASSNAME) {
3027                 SV **sva;
3028                 I32 idx;
3029
3030                 /*
3031                  * Fetch index from `aclass'
3032                  */
3033
3034                 if (flags & SHF_LARGE_CLASSLEN)
3035                         RLEN(idx);
3036                 else
3037                         GETMARK(idx);
3038
3039                 sva = av_fetch(cxt->aclass, idx, FALSE);
3040                 if (!sva)
3041                         CROAK(("Class name #%d should have been seen already", (int)idx));
3042
3043                 class = SvPVX(*sva);    /* We know it's a PV, by construction */
3044                 TRACEME(("class ID %d => %s", idx, class));
3045
3046         } else {
3047                 /*
3048                  * Decode class name length and read that name.
3049                  *
3050                  * NOTA BENE: even if the length is stored on one byte, we don't read
3051                  * on the stack.  Just like retrieve_blessed(), we limit the name to
3052                  * LG_BLESS bytes.  This is an arbitrary decision.
3053                  */
3054
3055                 if (flags & SHF_LARGE_CLASSLEN)
3056                         RLEN(len);
3057                 else
3058                         GETMARK(len);
3059
3060                 if (len > LG_BLESS) {
3061                         TRACEME(("** allocating %d bytes for class name", len+1));
3062                         New(10003, class, len+1, char);
3063                 }
3064
3065                 READ(class, len);
3066                 class[len] = '\0';              /* Mark string end */
3067
3068                 /*
3069                  * Record new classname.
3070                  */
3071
3072                 if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(class, len)))
3073                         return (SV *) 0;
3074         }
3075
3076         TRACEME(("class name: %s", class));
3077
3078         /*
3079          * Decode user-frozen string length and read it in a SV.
3080          *
3081          * For efficiency reasons, we read data directly into the SV buffer.
3082          * To understand that code, read retrieve_scalar()
3083          */
3084
3085         if (flags & SHF_LARGE_STRLEN)
3086                 RLEN(len2);
3087         else
3088                 GETMARK(len2);
3089
3090         frozen = NEWSV(10002, len2);
3091         if (len2) {
3092                 SAFEREAD(SvPVX(frozen), len2, frozen);
3093                 SvCUR_set(frozen, len2);
3094                 *SvEND(frozen) = '\0';
3095         }
3096         (void) SvPOK_only(frozen);              /* Validates string pointer */
3097         if (cxt->s_tainted)                             /* Is input source tainted? */
3098                 SvTAINT(frozen);
3099
3100         TRACEME(("frozen string: %d bytes", len2));
3101
3102         /*
3103          * Decode object-ID list length, if present.
3104          */
3105
3106         if (flags & SHF_HAS_LIST) {
3107                 if (flags & SHF_LARGE_LISTLEN)
3108                         RLEN(len3);
3109                 else
3110                         GETMARK(len3);
3111                 if (len3) {
3112                         av = newAV();
3113                         av_extend(av, len3 + 1);        /* Leave room for [0] */
3114                         AvFILLp(av) = len3;                     /* About to be filled anyway */
3115                 }
3116         }
3117
3118         TRACEME(("has %d object IDs to link", len3));
3119
3120         /*
3121          * Read object-ID list into array.
3122          * Because we pre-extended it, we can cheat and fill it manually.
3123          *
3124          * We read object tags and we can convert them into SV* on the fly
3125          * because we know all the references listed in there (as tags)
3126          * have been already serialized, hence we have a valid correspondance
3127          * between each of those tags and the recreated SV.
3128          */
3129
3130         if (av) {
3131                 SV **ary = AvARRAY(av);
3132                 int i;
3133                 for (i = 1; i <= len3; i++) {   /* We leave [0] alone */
3134                         I32 tag;
3135                         SV **svh;
3136                         SV *xsv;
3137
3138                         READ_I32(tag);
3139                         tag = ntohl(tag);
3140                         svh = av_fetch(cxt->aseen, tag, FALSE);
3141                         if (!svh)
3142                                 CROAK(("Object #%d should have been retrieved already", (int)tag));
3143                         xsv = *svh;
3144                         ary[i] = SvREFCNT_inc(xsv);
3145                 }
3146         }
3147
3148         /*
3149          * Bless the object and look up the STORABLE_thaw hook.
3150          */
3151
3152         BLESS(sv, class);
3153         hook = pkg_can(cxt->hook, SvSTASH(sv), "STORABLE_thaw");
3154         if (!hook) {
3155                 /*
3156                  * Hook not found.  Maybe they did not require the module where this
3157                  * hook is defined yet?
3158                  *
3159                  * If the require below succeeds, we'll be able to find the hook.
3160                  * Still, it only works reliably when each class is defined in a
3161                  * file of its own.
3162                  */
3163
3164                 SV *psv = newSVpvn("require ", 8);
3165                 sv_catpv(psv, class);
3166
3167                 TRACEME(("No STORABLE_thaw defined for objects of class %s", class));
3168                 TRACEME(("Going to require module '%s' with '%s'", class, SvPVX(psv)));
3169
3170                 perl_eval_sv(psv, G_DISCARD);
3171                 sv_free(psv);
3172
3173                 /*
3174                  * We cache results of pkg_can, so we need to uncache before attempting
3175                  * the lookup again.
3176                  */
3177
3178                 pkg_uncache(cxt->hook, SvSTASH(sv), "STORABLE_thaw");
3179                 hook = pkg_can(cxt->hook, SvSTASH(sv), "STORABLE_thaw");
3180
3181                 if (!hook)
3182                         CROAK(("No STORABLE_thaw defined for objects of class %s "
3183                                         "(even after a \"require %s;\")", class, class));
3184         }
3185
3186         /*
3187          * If we don't have an `av' yet, prepare one.
3188          * Then insert the frozen string as item [0].
3189          */
3190
3191         if (!av) {
3192                 av = newAV();
3193                 av_extend(av, 1);
3194                 AvFILLp(av) = 0;
3195         }
3196         AvARRAY(av)[0] = SvREFCNT_inc(frozen);
3197
3198         /*
3199          * Call the hook as:
3200          *
3201          *   $object->STORABLE_thaw($cloning, $frozen, @refs);
3202          *
3203          * where $object is our blessed (empty) object, $cloning is a boolean
3204          * telling whether we're running a deep clone, $frozen is the frozen
3205          * string the user gave us in his serializing hook, and @refs, which may
3206          * be empty, is the list of extra references he returned along for us
3207          * to serialize.
3208          *
3209          * In effect, the hook is an alternate creation routine for the class,
3210          * the object itself being already created by the runtime.
3211          */
3212
3213         TRACEME(("calling STORABLE_thaw on %s at 0x%"UVxf" (%"IVdf" args)",
3214                  class, PTR2UV(sv), AvFILLp(av) + 1));
3215
3216         rv = newRV(sv);
3217         (void) scalar_call(rv, hook, clone, av, G_SCALAR|G_DISCARD);
3218         SvREFCNT_dec(rv);
3219
3220         /*
3221          * Final cleanup.
3222          */
3223
3224         SvREFCNT_dec(frozen);
3225         av_undef(av);
3226         sv_free((SV *) av);
3227         if (!(flags & SHF_IDX_CLASSNAME) && class != buf)
3228                 Safefree(class);
3229
3230         return sv;
3231 }
3232
3233 /*
3234  * retrieve_ref
3235  *
3236  * Retrieve reference to some other scalar.
3237  * Layout is SX_REF <object>, with SX_REF already read.
3238  */
3239 static SV *retrieve_ref(stcxt_t *cxt)
3240 {
3241         SV *rv;
3242         SV *sv;
3243
3244         TRACEME(("retrieve_ref (#%d)", cxt->tagnum));
3245
3246         /*
3247          * We need to create the SV that holds the reference to the yet-to-retrieve
3248          * object now, so that we may record the address in the seen table.
3249          * Otherwise, if the object to retrieve references us, we won't be able
3250          * to resolve the SX_OBJECT we'll see at that point! Hence we cannot
3251          * do the retrieve first and use rv = newRV(sv) since it will be too late
3252          * for SEEN() recording.
3253          */
3254
3255         rv = NEWSV(10002, 0);
3256         SEEN(rv);                               /* Will return if rv is null */
3257         sv = retrieve(cxt);             /* Retrieve <object> */
3258         if (!sv)
3259                 return (SV *) 0;        /* Failed */
3260
3261         /*
3262          * WARNING: breaks RV encapsulation.
3263          *
3264          * Now for the tricky part. We have to upgrade our existing SV, so that
3265          * it is now an RV on sv... Again, we cheat by duplicating the code
3266          * held in newSVrv(), since we already got our SV from retrieve().
3267          *
3268          * We don't say:
3269          *
3270          *              SvRV(rv) = SvREFCNT_inc(sv);
3271          *
3272          * here because the reference count we got from retrieve() above is
3273          * already correct: if the object was retrieved from the file, then
3274          * its reference count is one. Otherwise, if it was retrieved via
3275          * an SX_OBJECT indication, a ref count increment was done.
3276          */
3277
3278         sv_upgrade(rv, SVt_RV);
3279         SvRV(rv) = sv;                          /* $rv = \$sv */
3280         SvROK_on(rv);
3281
3282         TRACEME(("ok (retrieve_ref at 0x%"UVxf")", PTR2UV(rv)));
3283
3284         return rv;
3285 }
3286
3287 /*
3288  * retrieve_overloaded
3289  *
3290  * Retrieve reference to some other scalar with overloading.
3291  * Layout is SX_OVERLOAD <object>, with SX_OVERLOAD already read.
3292  */
3293 static SV *retrieve_overloaded(stcxt_t *cxt)
3294 {
3295         SV *rv;
3296         SV *sv;
3297         HV *stash;
3298
3299         TRACEME(("retrieve_overloaded (#%d)", cxt->tagnum));
3300
3301         /*
3302          * Same code as retrieve_ref(), duplicated to avoid extra call.
3303          */
3304
3305         rv = NEWSV(10002, 0);
3306         SEEN(rv);                               /* Will return if rv is null */
3307         sv = retrieve(cxt);             /* Retrieve <object> */
3308         if (!sv)
3309                 return (SV *) 0;        /* Failed */
3310
3311         /*
3312          * WARNING: breaks RV encapsulation.
3313          */
3314
3315         sv_upgrade(rv, SVt_RV);
3316         SvRV(rv) = sv;                          /* $rv = \$sv */
3317         SvROK_on(rv);
3318
3319         /*
3320          * Restore overloading magic.
3321          */
3322
3323         stash = (HV *) SvSTASH (sv);
3324         if (!stash || !Gv_AMG(stash))
3325                 CROAK(("Cannot restore overloading on %s(0x%"UVxf")",
3326                        sv_reftype(sv, FALSE),
3327                        PTR2UV(sv)));
3328
3329         SvAMAGIC_on(rv);
3330
3331         TRACEME(("ok (retrieve_overloaded at 0x%"UVxf")", PTR2UV(rv)));
3332
3333         return rv;
3334 }
3335
3336 /*
3337  * retrieve_tied_array
3338  *
3339  * Retrieve tied array
3340  * Layout is SX_TIED_ARRAY <object>, with SX_TIED_ARRAY already read.
3341  */
3342 static SV *retrieve_tied_array(stcxt_t *cxt)
3343 {
3344         SV *tv;
3345         SV *sv;
3346
3347         TRACEME(("retrieve_tied_array (#%d)", cxt->tagnum));
3348
3349         tv = NEWSV(10002, 0);
3350         SEEN(tv);                                       /* Will return if tv is null */
3351         sv = retrieve(cxt);                     /* Retrieve <object> */
3352         if (!sv)
3353                 return (SV *) 0;                /* Failed */
3354
3355         sv_upgrade(tv, SVt_PVAV);
3356         AvREAL_off((AV *)tv);
3357         sv_magic(tv, sv, 'P', Nullch, 0);
3358         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3359
3360         TRACEME(("ok (retrieve_tied_array at 0x%"UVxf")", PTR2UV(tv)));
3361
3362         return tv;
3363 }
3364
3365 /*
3366  * retrieve_tied_hash
3367  *
3368  * Retrieve tied hash
3369  * Layout is SX_TIED_HASH <object>, with SX_TIED_HASH already read.
3370  */
3371 static SV *retrieve_tied_hash(stcxt_t *cxt)
3372 {
3373         SV *tv;
3374         SV *sv;
3375
3376         TRACEME(("retrieve_tied_hash (#%d)", cxt->tagnum));
3377
3378         tv = NEWSV(10002, 0);
3379         SEEN(tv);                                       /* Will return if tv is null */
3380         sv = retrieve(cxt);                     /* Retrieve <object> */
3381         if (!sv)
3382                 return (SV *) 0;                /* Failed */
3383
3384         sv_upgrade(tv, SVt_PVHV);
3385         sv_magic(tv, sv, 'P', Nullch, 0);
3386         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3387
3388         TRACEME(("ok (retrieve_tied_hash at 0x%"UVxf")", PTR2UV(tv)));
3389
3390         return tv;
3391 }
3392
3393 /*
3394  * retrieve_tied_scalar
3395  *
3396  * Retrieve tied scalar
3397  * Layout is SX_TIED_SCALAR <object>, with SX_TIED_SCALAR already read.
3398  */
3399 static SV *retrieve_tied_scalar(cxt)
3400 stcxt_t *cxt;
3401 {
3402         SV *tv;
3403         SV *sv;
3404
3405         TRACEME(("retrieve_tied_scalar (#%d)", cxt->tagnum));
3406
3407         tv = NEWSV(10002, 0);
3408         SEEN(tv);                                       /* Will return if rv is null */
3409         sv = retrieve(cxt);                     /* Retrieve <object> */
3410         if (!sv)
3411                 return (SV *) 0;                /* Failed */
3412
3413         sv_upgrade(tv, SVt_PVMG);
3414         sv_magic(tv, sv, 'q', Nullch, 0);
3415         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3416
3417         TRACEME(("ok (retrieve_tied_scalar at 0x%"UVxf")", PTR2UV(tv)));
3418
3419         return tv;
3420 }
3421
3422 /*
3423  * retrieve_tied_key
3424  *
3425  * Retrieve reference to value in a tied hash.
3426  * Layout is SX_TIED_KEY <object> <key>, with SX_TIED_KEY already read.
3427  */
3428 static SV *retrieve_tied_key(stcxt_t *cxt)
3429 {
3430         SV *tv;
3431         SV *sv;
3432         SV *key;
3433
3434         TRACEME(("retrieve_tied_key (#%d)", cxt->tagnum));
3435
3436         tv = NEWSV(10002, 0);
3437         SEEN(tv);                                       /* Will return if tv is null */
3438         sv = retrieve(cxt);                     /* Retrieve <object> */
3439         if (!sv)
3440                 return (SV *) 0;                /* Failed */
3441
3442         key = retrieve(cxt);            /* Retrieve <key> */
3443         if (!key)
3444                 return (SV *) 0;                /* Failed */
3445
3446         sv_upgrade(tv, SVt_PVMG);
3447         sv_magic(tv, sv, 'p', (char *)key, HEf_SVKEY);
3448         SvREFCNT_dec(key);                      /* Undo refcnt inc from sv_magic() */
3449         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3450
3451         return tv;
3452 }
3453
3454 /*
3455  * retrieve_tied_idx
3456  *
3457  * Retrieve reference to value in a tied array.
3458  * Layout is SX_TIED_IDX <object> <idx>, with SX_TIED_IDX already read.
3459  */
3460 static SV *retrieve_tied_idx(stcxt_t *cxt)
3461 {
3462         SV *tv;
3463         SV *sv;
3464         I32 idx;
3465
3466         TRACEME(("retrieve_tied_idx (#%d)", cxt->tagnum));
3467
3468         tv = NEWSV(10002, 0);
3469         SEEN(tv);                                       /* Will return if tv is null */
3470         sv = retrieve(cxt);                     /* Retrieve <object> */
3471         if (!sv)
3472                 return (SV *) 0;                /* Failed */
3473
3474         RLEN(idx);                                      /* Retrieve <idx> */
3475
3476         sv_upgrade(tv, SVt_PVMG);
3477         sv_magic(tv, sv, 'p', Nullch, idx);
3478         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3479
3480         return tv;
3481 }
3482
3483
3484 /*
3485  * retrieve_lscalar
3486  *
3487  * Retrieve defined long (string) scalar.
3488  *
3489  * Layout is SX_LSCALAR <length> <data>, with SX_LSCALAR already read.
3490  * The scalar is "long" in that <length> is larger than LG_SCALAR so it
3491  * was not stored on a single byte.
3492  */
3493 static SV *retrieve_lscalar(stcxt_t *cxt)
3494 {
3495         I32 len;
3496         SV *sv;
3497
3498         RLEN(len);
3499         TRACEME(("retrieve_lscalar (#%d), len = %"IVdf, cxt->tagnum, len));
3500
3501         /*
3502          * Allocate an empty scalar of the suitable length.
3503          */
3504
3505         sv = NEWSV(10002, len);
3506         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3507
3508         /*
3509          * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation.
3510          *
3511          * Now, for efficiency reasons, read data directly inside the SV buffer,
3512          * and perform the SV final settings directly by duplicating the final
3513          * work done by sv_setpv. Since we're going to allocate lots of scalars
3514          * this way, it's worth the hassle and risk.
3515          */
3516
3517         SAFEREAD(SvPVX(sv), len, sv);
3518         SvCUR_set(sv, len);                             /* Record C string length */
3519         *SvEND(sv) = '\0';                              /* Ensure it's null terminated anyway */
3520         (void) SvPOK_only(sv);                  /* Validate string pointer */
3521         if (cxt->s_tainted)                             /* Is input source tainted? */
3522                 SvTAINT(sv);                            /* External data cannot be trusted */
3523
3524         TRACEME(("large scalar len %"IVdf" '%s'", len, SvPVX(sv)));
3525         TRACEME(("ok (retrieve_lscalar at 0x%"UVxf")", PTR2UV(sv)));
3526
3527         return sv;
3528 }
3529
3530 /*
3531  * retrieve_scalar
3532  *
3533  * Retrieve defined short (string) scalar.
3534  *
3535  * Layout is SX_SCALAR <length> <data>, with SX_SCALAR already read.
3536  * The scalar is "short" so <length> is single byte. If it is 0, there
3537  * is no <data> section.
3538  */
3539 static SV *retrieve_scalar(stcxt_t *cxt)
3540 {
3541         int len;
3542         SV *sv;
3543
3544         GETMARK(len);
3545         TRACEME(("retrieve_scalar (#%d), len = %d", cxt->tagnum, len));
3546
3547         /*
3548          * Allocate an empty scalar of the suitable length.
3549          */
3550
3551         sv = NEWSV(10002, len);
3552         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3553
3554         /*
3555          * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation.
3556          */
3557
3558         if (len == 0) {
3559                 /*
3560                  * newSV did not upgrade to SVt_PV so the scalar is undefined.
3561                  * To make it defined with an empty length, upgrade it now...
3562                  */
3563                 sv_upgrade(sv, SVt_PV);
3564                 SvGROW(sv, 1);
3565                 *SvEND(sv) = '\0';                      /* Ensure it's null terminated anyway */
3566                 TRACEME(("ok (retrieve_scalar empty at 0x%"UVxf")", PTR2UV(sv)));
3567         } else {
3568                 /*
3569                  * Now, for efficiency reasons, read data directly inside the SV buffer,
3570                  * and perform the SV final settings directly by duplicating the final
3571                  * work done by sv_setpv. Since we're going to allocate lots of scalars
3572                  * this way, it's worth the hassle and risk.
3573                  */
3574                 SAFEREAD(SvPVX(sv), len, sv);
3575                 SvCUR_set(sv, len);                     /* Record C string length */
3576                 *SvEND(sv) = '\0';                      /* Ensure it's null terminated anyway */
3577                 TRACEME(("small scalar len %d '%s'", len, SvPVX(sv)));
3578         }
3579
3580         (void) SvPOK_only(sv);                  /* Validate string pointer */
3581         if (cxt->s_tainted)                             /* Is input source tainted? */
3582                 SvTAINT(sv);                            /* External data cannot be trusted */
3583
3584         TRACEME(("ok (retrieve_scalar at 0x%"UVxf")", PTR2UV(sv)));
3585         return sv;
3586 }
3587
3588 /*
3589  * retrieve_utf8str
3590  *
3591  * Like retrieve_scalar(), but tag result as utf8.
3592  * If we're retrieving UTF8 data in a non-UTF8 perl, croaks.
3593  */
3594 static SV *retrieve_utf8str(stcxt_t *cxt)
3595 {
3596         SV *sv;
3597
3598         TRACEME(("retrieve_utf8str"));
3599
3600         sv = retrieve_scalar(cxt);
3601         if (sv)
3602                 SvUTF8_on(sv);
3603
3604         return sv;
3605 }
3606
3607 /*
3608  * retrieve_lutf8str
3609  *
3610  * Like retrieve_lscalar(), but tag result as utf8.
3611  * If we're retrieving UTF8 data in a non-UTF8 perl, croaks.
3612  */
3613 static SV *retrieve_lutf8str(stcxt_t *cxt)
3614 {
3615         SV *sv;
3616
3617         TRACEME(("retrieve_lutf8str"));
3618
3619         sv = retrieve_lscalar(cxt);
3620         if (sv)
3621                 SvUTF8_on(sv);
3622
3623         return sv;
3624 }
3625
3626 /*
3627  * retrieve_integer
3628  *
3629  * Retrieve defined integer.
3630  * Layout is SX_INTEGER <data>, whith SX_INTEGER already read.
3631  */
3632 static SV *retrieve_integer(stcxt_t *cxt)
3633 {
3634         SV *sv;
3635         IV iv;
3636
3637         TRACEME(("retrieve_integer (#%d)", cxt->tagnum));
3638
3639         READ(&iv, sizeof(iv));
3640         sv = newSViv(iv);
3641         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3642
3643         TRACEME(("integer %"IVdf, iv));
3644         TRACEME(("ok (retrieve_integer at 0x%"UVxf")", PTR2UV(sv)));
3645
3646         return sv;
3647 }
3648
3649 /*
3650  * retrieve_netint
3651  *
3652  * Retrieve defined integer in network order.
3653  * Layout is SX_NETINT <data>, whith SX_NETINT already read.
3654  */
3655 static SV *retrieve_netint(stcxt_t *cxt)
3656 {
3657         SV *sv;
3658         I32 iv;
3659
3660         TRACEME(("retrieve_netint (#%d)", cxt->tagnum));
3661
3662         READ_I32(iv);
3663 #ifdef HAS_NTOHL
3664         sv = newSViv((int) ntohl(iv));
3665         TRACEME(("network integer %d", (int) ntohl(iv)));
3666 #else
3667         sv = newSViv(iv);
3668         TRACEME(("network integer (as-is) %d", iv));
3669 #endif
3670         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3671
3672         TRACEME(("ok (retrieve_netint at 0x%"UVxf")", PTR2UV(sv)));
3673
3674         return sv;
3675 }
3676
3677 /*
3678  * retrieve_double
3679  *
3680  * Retrieve defined double.
3681  * Layout is SX_DOUBLE <data>, whith SX_DOUBLE already read.
3682  */
3683 static SV *retrieve_double(stcxt_t *cxt)
3684 {
3685         SV *sv;
3686         NV nv;
3687
3688         TRACEME(("retrieve_double (#%d)", cxt->tagnum));
3689
3690         READ(&nv, sizeof(nv));
3691         sv = newSVnv(nv);
3692         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3693
3694         TRACEME(("double %"NVff, nv));
3695         TRACEME(("ok (retrieve_double at 0x%"UVxf")", PTR2UV(sv)));
3696
3697         return sv;
3698 }
3699
3700 /*
3701  * retrieve_byte
3702  *
3703  * Retrieve defined byte (small integer within the [-128, +127] range).
3704  * Layout is SX_BYTE <data>, whith SX_BYTE already read.
3705  */
3706 static SV *retrieve_byte(stcxt_t *cxt)
3707 {
3708         SV *sv;
3709         int siv;
3710
3711         TRACEME(("retrieve_byte (#%d)", cxt->tagnum));
3712
3713         GETMARK(siv);
3714         TRACEME(("small integer read as %d", (unsigned char) siv));
3715         sv = newSViv((unsigned char) siv - 128);
3716         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3717
3718         TRACEME(("byte %d", (unsigned char) siv - 128));
3719         TRACEME(("ok (retrieve_byte at 0x%"UVxf")", PTR2UV(sv)));
3720
3721         return sv;
3722 }
3723
3724 /*
3725  * retrieve_undef
3726  *
3727  * Return the undefined value.
3728  */
3729 static SV *retrieve_undef(stcxt_t *cxt)
3730 {
3731         SV* sv;
3732
3733         TRACEME(("retrieve_undef"));
3734
3735         sv = newSV(0);
3736         SEEN(sv);
3737
3738         return sv;
3739 }
3740
3741 /*
3742  * retrieve_sv_undef
3743  *
3744  * Return the immortal undefined value.
3745  */
3746 static SV *retrieve_sv_undef(stcxt_t *cxt)
3747 {
3748         SV *sv = &PL_sv_undef;
3749
3750         TRACEME(("retrieve_sv_undef"));
3751
3752         SEEN(sv);
3753         return sv;
3754 }
3755
3756 /*
3757  * retrieve_sv_yes
3758  *
3759  * Return the immortal yes value.
3760  */
3761 static SV *retrieve_sv_yes(stcxt_t *cxt)
3762 {
3763         SV *sv = &PL_sv_yes;
3764
3765         TRACEME(("retrieve_sv_yes"));
3766
3767         SEEN(sv);
3768         return sv;
3769 }
3770
3771 /*
3772  * retrieve_sv_no
3773  *
3774  * Return the immortal no value.
3775  */
3776 static SV *retrieve_sv_no(stcxt_t *cxt)
3777 {
3778         SV *sv = &PL_sv_no;
3779
3780         TRACEME(("retrieve_sv_no"));
3781
3782         SEEN(sv);
3783         return sv;
3784 }
3785
3786 /*
3787  * retrieve_array
3788  *
3789  * Retrieve a whole array.
3790  * Layout is SX_ARRAY <size> followed by each item, in increading index order.
3791  * Each item is stored as <object>.
3792  *
3793  * When we come here, SX_ARRAY has been read already.
3794  */
3795 static SV *retrieve_array(stcxt_t *cxt)
3796 {
3797         I32 len;
3798         I32 i;
3799         AV *av;
3800         SV *sv;
3801
3802         TRACEME(("retrieve_array (#%d)", cxt->tagnum));
3803
3804         /*
3805          * Read length, and allocate array, then pre-extend it.
3806          */
3807
3808         RLEN(len);
3809         TRACEME(("size = %d", len));
3810         av = newAV();
3811         SEEN(av);                                       /* Will return if array not allocated nicely */
3812         if (len)
3813                 av_extend(av, len);
3814         else
3815                 return (SV *) av;               /* No data follow if array is empty */
3816
3817         /*
3818          * Now get each item in turn...
3819          */
3820
3821         for (i = 0; i < len; i++) {
3822                 TRACEME(("(#%d) item", i));
3823                 sv = retrieve(cxt);                             /* Retrieve item */
3824                 if (!sv)
3825                         return (SV *) 0;
3826                 if (av_store(av, i, sv) == 0)
3827                         return (SV *) 0;
3828         }
3829
3830         TRACEME(("ok (retrieve_array at 0x%"UVxf")", PTR2UV(av)));
3831
3832         return (SV *) av;
3833 }
3834
3835 /*
3836  * retrieve_hash
3837  *
3838  * Retrieve a whole hash table.
3839  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
3840  * Keys are stored as <length> <data>, the <data> section being omitted
3841  * if length is 0.
3842  * Values are stored as <object>.
3843  *
3844  * When we come here, SX_HASH has been read already.
3845  */
3846 static SV *retrieve_hash(stcxt_t *cxt)
3847 {
3848         I32 len;
3849         I32 size;
3850         I32 i;
3851         HV *hv;
3852         SV *sv;
3853         static SV *sv_h_undef = (SV *) 0;               /* hv_store() bug */
3854
3855         TRACEME(("retrieve_hash (#%d)", cxt->tagnum));
3856
3857         /*
3858          * Read length, allocate table.
3859          */
3860
3861         RLEN(len);
3862         TRACEME(("size = %d", len));
3863         hv = newHV();
3864         SEEN(hv);                       /* Will return if table not allocated properly */
3865         if (len == 0)
3866                 return (SV *) hv;       /* No data follow if table empty */
3867
3868         /*
3869          * Now get each key/value pair in turn...
3870          */
3871
3872         for (i = 0; i < len; i++) {
3873                 /*
3874                  * Get value first.
3875                  */
3876
3877                 TRACEME(("(#%d) value", i));
3878                 sv = retrieve(cxt);
3879                 if (!sv)
3880                         return (SV *) 0;
3881
3882                 /*
3883                  * Get key.
3884                  * Since we're reading into kbuf, we must ensure we're not
3885                  * recursing between the read and the hv_store() where it's used.
3886                  * Hence the key comes after the value.
3887                  */
3888
3889                 RLEN(size);                                             /* Get key size */
3890                 KBUFCHK(size);                                  /* Grow hash key read pool if needed */
3891                 if (size)
3892                         READ(kbuf, size);
3893                 kbuf[size] = '\0';                              /* Mark string end, just in case */
3894                 TRACEME(("(#%d) key '%s'", i, kbuf));
3895
3896                 /*
3897                  * Enter key/value pair into hash table.
3898                  */
3899
3900                 if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
3901                         return (SV *) 0;
3902         }
3903
3904         TRACEME(("ok (retrieve_hash at 0x%"UVxf")", PTR2UV(hv)));
3905
3906         return (SV *) hv;
3907 }
3908
3909 /*
3910  * old_retrieve_array
3911  *
3912  * Retrieve a whole array in pre-0.6 binary format.
3913  *
3914  * Layout is SX_ARRAY <size> followed by each item, in increading index order.
3915  * Each item is stored as SX_ITEM <object> or SX_IT_UNDEF for "holes".
3916  *
3917  * When we come here, SX_ARRAY has been read already.
3918  */
3919 static SV *old_retrieve_array(stcxt_t *cxt)
3920 {
3921         I32 len;
3922         I32 i;
3923         AV *av;
3924         SV *sv;
3925         int c;
3926
3927         TRACEME(("old_retrieve_array (#%d)", cxt->tagnum));
3928
3929         /*
3930          * Read length, and allocate array, then pre-extend it.
3931          */
3932
3933         RLEN(len);
3934         TRACEME(("size = %d", len));
3935         av = newAV();
3936         SEEN(av);                                       /* Will return if array not allocated nicely */
3937         if (len)
3938                 av_extend(av, len);
3939         else
3940                 return (SV *) av;               /* No data follow if array is empty */
3941
3942         /*
3943          * Now get each item in turn...
3944          */
3945
3946         for (i = 0; i < len; i++) {
3947                 GETMARK(c);
3948                 if (c == SX_IT_UNDEF) {
3949                         TRACEME(("(#%d) undef item", i));
3950                         continue;                       /* av_extend() already filled us with undef */
3951                 }
3952                 if (c != SX_ITEM)
3953                         (void) retrieve_other((stcxt_t *) 0);   /* Will croak out */
3954                 TRACEME(("(#%d) item", i));
3955                 sv = retrieve(cxt);                                                     /* Retrieve item */
3956                 if (!sv)
3957                         return (SV *) 0;
3958                 if (av_store(av, i, sv) == 0)
3959                         return (SV *) 0;
3960         }
3961
3962         TRACEME(("ok (old_retrieve_array at 0x%"UVxf")", PTR2UV(av)));
3963
3964         return (SV *) av;
3965 }
3966
3967 /*
3968  * old_retrieve_hash
3969  *
3970  * Retrieve a whole hash table in pre-0.6 binary format.
3971  *
3972  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
3973  * Keys are stored as SX_KEY <length> <data>, the <data> section being omitted
3974  * if length is 0.
3975  * Values are stored as SX_VALUE <object> or SX_VL_UNDEF for "holes".
3976  *
3977  * When we come here, SX_HASH has been read already.
3978  */
3979 static SV *old_retrieve_hash(stcxt_t *cxt)
3980 {
3981         I32 len;
3982         I32 size;
3983         I32 i;
3984         HV *hv;
3985         SV *sv;
3986         int c;
3987         static SV *sv_h_undef = (SV *) 0;               /* hv_store() bug */
3988
3989         TRACEME(("old_retrieve_hash (#%d)", cxt->tagnum));
3990
3991         /*
3992          * Read length, allocate table.
3993          */
3994
3995         RLEN(len);
3996         TRACEME(("size = %d", len));
3997         hv = newHV();
3998         SEEN(hv);                               /* Will return if table not allocated properly */
3999         if (len == 0)
4000                 return (SV *) hv;       /* No data follow if table empty */
4001
4002         /*
4003          * Now get each key/value pair in turn...
4004          */
4005
4006         for (i = 0; i < len; i++) {
4007                 /*
4008                  * Get value first.
4009                  */
4010
4011                 GETMARK(c);
4012                 if (c == SX_VL_UNDEF) {
4013                         TRACEME(("(#%d) undef value", i));
4014                         /*
4015                          * Due to a bug in hv_store(), it's not possible to pass
4016                          * &PL_sv_undef to hv_store() as a value, otherwise the
4017                          * associated key will not be creatable any more. -- RAM, 14/01/97
4018                          */
4019                         if (!sv_h_undef)
4020                                 sv_h_undef = newSVsv(&PL_sv_undef);
4021                         sv = SvREFCNT_inc(sv_h_undef);
4022                 } else if (c == SX_VALUE) {
4023                         TRACEME(("(#%d) value", i));
4024                         sv = retrieve(cxt);
4025                         if (!sv)
4026                                 return (SV *) 0;
4027                 } else
4028                         (void) retrieve_other((stcxt_t *) 0);   /* Will croak out */
4029
4030                 /*
4031                  * Get key.
4032                  * Since we're reading into kbuf, we must ensure we're not
4033                  * recursing between the read and the hv_store() where it's used.
4034                  * Hence the key comes after the value.
4035                  */
4036
4037                 GETMARK(c);
4038                 if (c != SX_KEY)
4039                         (void) retrieve_other((stcxt_t *) 0);   /* Will croak out */
4040                 RLEN(size);                                             /* Get key size */
4041                 KBUFCHK(size);                                  /* Grow hash key read pool if needed */
4042                 if (size)
4043                         READ(kbuf, size);
4044                 kbuf[size] = '\0';                              /* Mark string end, just in case */
4045                 TRACEME(("(#%d) key '%s'", i, kbuf));
4046
4047                 /*
4048                  * Enter key/value pair into hash table.
4049                  */
4050
4051                 if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
4052                         return (SV *) 0;
4053         }
4054
4055         TRACEME(("ok (retrieve_hash at 0x%"UVxf")", PTR2UV(hv)));
4056
4057         return (SV *) hv;
4058 }
4059
4060 /***
4061  *** Retrieval engine.
4062  ***/
4063
4064 /*
4065  * magic_check
4066  *
4067  * Make sure the stored data we're trying to retrieve has been produced
4068  * on an ILP compatible system with the same byteorder. It croaks out in
4069  * case an error is detected. [ILP = integer-long-pointer sizes]
4070  * Returns null if error is detected, &PL_sv_undef otherwise.
4071  *
4072  * Note that there's no byte ordering info emitted when network order was
4073  * used at store time.
4074  */
4075 static SV *magic_check(stcxt_t *cxt)
4076 {
4077         char buf[256];
4078         char byteorder[256];
4079         int c;
4080         int use_network_order;
4081         int version_major;
4082         int version_minor = 0;
4083
4084         TRACEME(("magic_check"));
4085
4086         /*
4087          * The "magic number" is only for files, not when freezing in memory.
4088          */
4089
4090         if (cxt->fio) {
4091                 STRLEN len = sizeof(magicstr) - 1;
4092                 STRLEN old_len;
4093
4094                 READ(buf, len);                                 /* Not null-terminated */
4095                 buf[len] = '\0';                                /* Is now */
4096
4097                 if (0 == strcmp(buf, magicstr))
4098                         goto magic_ok;
4099
4100                 /*
4101                  * Try to read more bytes to check for the old magic number, which
4102                  * was longer.
4103                  */
4104
4105                 old_len = sizeof(old_magicstr) - 1;
4106                 READ(&buf[len], old_len - len);
4107                 buf[old_len] = '\0';                    /* Is now null-terminated */
4108
4109                 if (strcmp(buf, old_magicstr))
4110                         CROAK(("File is not a perl storable"));
4111         }
4112
4113 magic_ok:
4114         /*
4115          * Starting with 0.6, the "use_network_order" byte flag is also used to
4116          * indicate the version number of the binary, and therefore governs the
4117          * setting of sv_retrieve_vtbl. See magic_write().
4118          */
4119
4120         GETMARK(use_network_order);
4121         version_major = use_network_order >> 1;
4122         cxt->retrieve_vtbl = version_major ? sv_retrieve : sv_old_retrieve;
4123
4124         TRACEME(("magic_check: netorder = 0x%x", use_network_order));
4125
4126
4127         /*
4128          * Starting with 0.7 (binary major 2), a full byte is dedicated to the
4129          * minor version of the protocol.  See magic_write().
4130          */
4131
4132         if (version_major > 1)
4133                 GETMARK(version_minor);
4134
4135         cxt->ver_major = version_major;
4136         cxt->ver_minor = version_minor;
4137
4138         TRACEME(("binary image version is %d.%d", version_major, version_minor));
4139
4140         /*
4141          * Inter-operability sanity check: we can't retrieve something stored
4142          * using a format more recent than ours, because we have no way to
4143          * know what has changed, and letting retrieval go would mean a probable
4144          * failure reporting a "corrupted" storable file.
4145          */
4146
4147         if (
4148                 version_major > STORABLE_BIN_MAJOR ||
4149                         (version_major == STORABLE_BIN_MAJOR &&
4150                         version_minor > STORABLE_BIN_MINOR)
4151         )
4152                 CROAK(("Storable binary image v%d.%d more recent than I am (v%d.%d)",
4153                         version_major, version_minor,
4154                         STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
4155
4156         /*
4157          * If they stored using network order, there's no byte ordering
4158          * information to check.
4159          */
4160
4161         if (cxt->netorder = (use_network_order & 0x1))
4162                 return &PL_sv_undef;                    /* No byte ordering info */
4163
4164         sprintf(byteorder, "%lx", (unsigned long) BYTEORDER);
4165         GETMARK(c);
4166         READ(buf, c);                                           /* Not null-terminated */
4167         buf[c] = '\0';                                          /* Is now */
4168
4169         if (strcmp(buf, byteorder))
4170                 CROAK(("Byte order is not compatible"));
4171
4172         GETMARK(c);             /* sizeof(int) */
4173         if ((int) c != sizeof(int))
4174                 CROAK(("Integer size is not compatible"));
4175
4176         GETMARK(c);             /* sizeof(long) */
4177         if ((int) c != sizeof(long))
4178                 CROAK(("Long integer size is not compatible"));
4179
4180         GETMARK(c);             /* sizeof(char *) */
4181         if ((int) c != sizeof(char *))
4182                 CROAK(("Pointer integer size is not compatible"));
4183
4184         if (version_major >= 2 && version_minor >= 2) {
4185                 GETMARK(c);             /* sizeof(NV) */
4186                 if ((int) c != sizeof(NV))
4187                         CROAK(("Double size is not compatible"));
4188         }
4189
4190         return &PL_sv_undef;    /* OK */
4191 }
4192
4193 /*
4194  * retrieve
4195  *
4196  * Recursively retrieve objects from the specified file and return their
4197  * root SV (which may be an AV or an HV for what we care).
4198  * Returns null if there is a problem.
4199  */
4200 static SV *retrieve(stcxt_t *cxt)
4201 {
4202         int type;
4203         SV **svh;
4204         SV *sv;
4205
4206         TRACEME(("retrieve"));
4207
4208         /*
4209          * Grab address tag which identifies the object if we are retrieving
4210          * an older format. Since the new binary format counts objects and no
4211          * longer explicitely tags them, we must keep track of the correspondance
4212          * ourselves.
4213          *
4214          * The following section will disappear one day when the old format is
4215          * no longer supported, hence the final "goto" in the "if" block.
4216          */
4217
4218         if (cxt->hseen) {                                               /* Retrieving old binary */
4219                 stag_t tag;
4220                 if (cxt->netorder) {
4221                         I32 nettag;
4222                         READ(&nettag, sizeof(I32));             /* Ordered sequence of I32 */
4223                         tag = (stag_t) nettag;
4224                 } else
4225                         READ(&tag, sizeof(stag_t));             /* Original address of the SV */
4226
4227                 GETMARK(type);
4228                 if (type == SX_OBJECT) {
4229                         I32 tagn;
4230                         svh = hv_fetch(cxt->hseen, (char *) &tag, sizeof(tag), FALSE);
4231                         if (!svh)
4232                                 CROAK(("Old tag 0x%x should have been mapped already", (unsigned)tag));
4233                         tagn = SvIV(*svh);      /* Mapped tag number computed earlier below */
4234
4235                         /*
4236                          * The following code is common with the SX_OBJECT case below.
4237                          */
4238
4239                         svh = av_fetch(cxt->aseen, tagn, FALSE);
4240                         if (!svh)
4241                                 CROAK(("Object #%d should have been retrieved already", (int)tagn));
4242                         sv = *svh;
4243                         TRACEME(("has retrieved #%d at 0x%"UVxf, tagn, PTR2UV(sv)));
4244                         SvREFCNT_inc(sv);       /* One more reference to this same sv */
4245                         return sv;                      /* The SV pointer where object was retrieved */
4246                 }
4247
4248                 /*
4249                  * Map new object, but don't increase tagnum. This will be done
4250                  * by each of the retrieve_* functions when they call SEEN().
4251                  *
4252                  * The mapping associates the "tag" initially present with a unique
4253                  * tag number. See test for SX_OBJECT above to see how this is perused.
4254                  */
4255
4256                 if (!hv_store(cxt->hseen, (char *) &tag, sizeof(tag),
4257                                 newSViv(cxt->tagnum), 0))
4258                         return (SV *) 0;
4259
4260                 goto first_time;
4261         }
4262
4263         /*
4264          * Regular post-0.6 binary format.
4265          */
4266
4267 again:
4268         GETMARK(type);
4269
4270         TRACEME(("retrieve type = %d", type));
4271
4272         /*
4273          * Are we dealing with an object we should have already retrieved?
4274          */
4275
4276         if (type == SX_OBJECT) {
4277                 I32 tag;
4278                 READ_I32(tag);
4279                 tag = ntohl(tag);
4280                 svh = av_fetch(cxt->aseen, tag, FALSE);
4281                 if (!svh)
4282                         CROAK(("Object #%d should have been retrieved already", (int)tag));
4283                 sv = *svh;
4284                 TRACEME(("had retrieved #%d at 0x%"UVxf, tag, PTR2UV(sv)));
4285                 SvREFCNT_inc(sv);       /* One more reference to this same sv */
4286                 return sv;                      /* The SV pointer where object was retrieved */
4287         }
4288
4289 first_time:             /* Will disappear when support for old format is dropped */
4290
4291         /*
4292          * Okay, first time through for this one.
4293          */
4294
4295         sv = RETRIEVE(cxt, type)(cxt);
4296         if (!sv)
4297                 return (SV *) 0;                        /* Failed */
4298
4299         /*
4300          * Old binary formats (pre-0.7).
4301          *
4302          * Final notifications, ended by SX_STORED may now follow.
4303          * Currently, the only pertinent notification to apply on the
4304          * freshly retrieved object is either:
4305          *    SX_CLASS <char-len> <classname> for short classnames.
4306          *    SX_LG_CLASS <int-len> <classname> for larger one (rare!).
4307          * Class name is then read into the key buffer pool used by
4308          * hash table key retrieval.
4309          */
4310
4311         if (cxt->ver_major < 2) {
4312                 while ((type = GETCHAR()) != SX_STORED) {
4313                         I32 len;
4314                         switch (type) {
4315                         case SX_CLASS:
4316                                 GETMARK(len);                   /* Length coded on a single char */
4317                                 break;
4318                         case SX_LG_CLASS:                       /* Length coded on a regular integer */
4319                                 RLEN(len);
4320                                 break;
4321                         case EOF:
4322                         default:
4323                                 return (SV *) 0;                /* Failed */
4324                         }
4325                         KBUFCHK(len);                           /* Grow buffer as necessary */
4326                         if (len)
4327                                 READ(kbuf, len);
4328                         kbuf[len] = '\0';                       /* Mark string end */
4329                         BLESS(sv, kbuf);
4330                 }
4331         }
4332
4333         TRACEME(("ok (retrieved 0x%"UVxf", refcnt=%d, %s)", PTR2UV(sv),
4334                 SvREFCNT(sv) - 1, sv_reftype(sv, FALSE)));
4335
4336         return sv;      /* Ok */
4337 }
4338
4339 /*
4340  * do_retrieve
4341  *
4342  * Retrieve data held in file and return the root object.
4343  * Common routine for pretrieve and mretrieve.
4344  */
4345 static SV *do_retrieve(
4346         PerlIO *f,
4347         SV *in,
4348         int optype)
4349 {
4350         dSTCXT;
4351         SV *sv;
4352         int is_tainted;                         /* Is input source tainted? */
4353         struct extendable msave;        /* Where potentially valid mbuf is saved */
4354
4355         TRACEME(("do_retrieve (optype = 0x%x)", optype));
4356
4357         optype |= ST_RETRIEVE;
4358
4359         /*
4360          * Sanity assertions for retrieve dispatch tables.
4361          */
4362
4363         ASSERT(sizeof(sv_old_retrieve) == sizeof(sv_retrieve),
4364                 ("old and new retrieve dispatch table have same size"));
4365         ASSERT(sv_old_retrieve[SX_ERROR] == retrieve_other,
4366                 ("SX_ERROR entry correctly initialized in old dispatch table"));
4367         ASSERT(sv_retrieve[SX_ERROR] == retrieve_other,
4368                 ("SX_ERROR entry correctly initialized in new dispatch table"));
4369
4370         /*
4371          * Workaround for CROAK leak: if they enter with a "dirty" context,
4372          * free up memory for them now.
4373          */
4374
4375         if (cxt->s_dirty)
4376                 clean_context(cxt);
4377
4378         /*
4379          * Now that STORABLE_xxx hooks exist, it is possible that they try to
4380          * re-enter retrieve() via the hooks.
4381          */
4382
4383         if (cxt->entry)
4384                 cxt = allocate_context(cxt);
4385
4386         cxt->entry++;
4387
4388         ASSERT(cxt->entry == 1, ("starting new recursion"));
4389         ASSERT(!cxt->s_dirty, ("clean context"));
4390
4391         /*
4392          * Prepare context.
4393          *
4394          * Data is loaded into the memory buffer when f is NULL, unless `in' is
4395          * also NULL, in which case we're expecting the data to already lie
4396          * in the buffer (dclone case).
4397          */
4398
4399         KBUFINIT();                                     /* Allocate hash key reading pool once */
4400
4401         if (!f && in) {
4402                 StructCopy(&cxt->membuf, &msave, struct extendable);
4403                 MBUF_LOAD(in);
4404         }
4405
4406
4407         /*
4408          * Magic number verifications.
4409          *
4410          * This needs to be done before calling init_retrieve_context()
4411          * since the format indication in the file are necessary to conduct
4412          * some of the initializations.
4413          */
4414
4415         cxt->fio = f;                           /* Where I/O are performed */
4416
4417         if (!magic_check(cxt))
4418                 CROAK(("Magic number checking on storable %s failed",
4419                         cxt->fio ? "file" : "string"));
4420
4421         TRACEME(("data stored in %s format",
4422                 cxt->netorder ? "net order" : "native"));
4423
4424         /*
4425          * Check whether input source is tainted, so that we don't wrongly
4426          * taint perfectly good values...
4427          *
4428          * We assume file input is always tainted.  If both `f' and `in' are
4429          * NULL, then we come from dclone, and tainted is already filled in
4430          * the context.  That's a kludge, but the whole dclone() thing is
4431          * already quite a kludge anyway! -- RAM, 15/09/2000.
4432          */
4433
4434         is_tainted = f ? 1 : (in ? SvTAINTED(in) : cxt->s_tainted);
4435         TRACEME(("input source is %s", is_tainted ? "tainted" : "trusted"));
4436         init_retrieve_context(cxt, optype, is_tainted);
4437
4438         ASSERT(is_retrieving(), ("within retrieve operation"));
4439
4440         sv = retrieve(cxt);             /* Recursively retrieve object, get root SV */
4441
4442         /*
4443          * Final cleanup.
4444          */
4445
4446         if (!f && in)
4447                 StructCopy(&msave, &cxt->membuf, struct extendable);
4448
4449         /*
4450          * The "root" context is never freed.
4451          */
4452
4453         clean_retrieve_context(cxt);
4454         if (cxt->prev)                          /* This context was stacked */
4455                 free_context(cxt);              /* It was not the "root" context */
4456
4457         /*
4458          * Prepare returned value.
4459          */
4460
4461         if (!sv) {
4462                 TRACEME(("retrieve ERROR"));
4463                 return &PL_sv_undef;            /* Something went wrong, return undef */
4464         }
4465
4466         TRACEME(("retrieve got %s(0x%"UVxf")",
4467                 sv_reftype(sv, FALSE), PTR2UV(sv)));
4468
4469         /*
4470          * Backward compatibility with Storable-0.5@9 (which we know we
4471          * are retrieving if hseen is non-null): don't create an extra RV
4472          * for objects since we special-cased it at store time.
4473          *
4474          * Build a reference to the SV returned by pretrieve even if it is
4475          * already one and not a scalar, for consistency reasons.
4476          *
4477          * NB: although context might have been cleaned, the value of `cxt->hseen'
4478          * remains intact, and can be used as a flag.
4479          */
4480
4481         if (cxt->hseen) {                       /* Was not handling overloading by then */
4482                 SV *rv;
4483                 if (sv_type(sv) == svis_REF && (rv = SvRV(sv)) && SvOBJECT(rv))
4484                         return sv;
4485         }
4486
4487         /*
4488          * If reference is overloaded, restore behaviour.
4489          *
4490          * NB: minor glitch here: normally, overloaded refs are stored specially
4491          * so that we can croak when behaviour cannot be re-installed, and also
4492          * avoid testing for overloading magic at each reference retrieval.
4493          *
4494          * Unfortunately, the root reference is implicitely stored, so we must
4495          * check for possible overloading now.  Furthermore, if we don't restore
4496          * overloading, we cannot croak as if the original ref was, because we
4497          * have no way to determine whether it was an overloaded ref or not in
4498          * the first place.
4499          *
4500          * It's a pity that overloading magic is attached to the rv, and not to
4501          * the underlying sv as blessing is.
4502          */
4503
4504         if (SvOBJECT(sv)) {
4505                 HV *stash = (HV *) SvSTASH (sv);
4506                 SV *rv = newRV_noinc(sv);
4507                 if (stash && Gv_AMG(stash)) {
4508                         SvAMAGIC_on(rv);
4509                         TRACEME(("restored overloading on root reference"));
4510                 }
4511                 return rv;
4512         }
4513
4514         return newRV_noinc(sv);
4515 }
4516
4517 /*
4518  * pretrieve
4519  *
4520  * Retrieve data held in file and return the root object, undef on error.
4521  */
4522 SV *pretrieve(PerlIO *f)
4523 {
4524         TRACEME(("pretrieve"));
4525         return do_retrieve(f, Nullsv, 0);
4526 }
4527
4528 /*
4529  * mretrieve
4530  *
4531  * Retrieve data held in scalar and return the root object, undef on error.
4532  */
4533 SV *mretrieve(SV *sv)
4534 {
4535         TRACEME(("mretrieve"));
4536         return do_retrieve((PerlIO*) 0, sv, 0);
4537 }
4538
4539 /***
4540  *** Deep cloning
4541  ***/
4542
4543 /*
4544  * dclone
4545  *
4546  * Deep clone: returns a fresh copy of the original referenced SV tree.
4547  *
4548  * This is achieved by storing the object in memory and restoring from
4549  * there. Not that efficient, but it should be faster than doing it from
4550  * pure perl anyway.
4551  */
4552 SV *dclone(SV *sv)
4553 {
4554         dSTCXT;
4555         int size;
4556         stcxt_t *real_context;
4557         SV *out;
4558
4559         TRACEME(("dclone"));
4560
4561         /*
4562          * Workaround for CROAK leak: if they enter with a "dirty" context,
4563          * free up memory for them now.
4564          */
4565
4566         if (cxt->s_dirty)
4567                 clean_context(cxt);
4568
4569         /*
4570          * do_store() optimizes for dclone by not freeing its context, should
4571          * we need to allocate one because we're deep cloning from a hook.
4572          */
4573
4574         if (!do_store((PerlIO*) 0, sv, ST_CLONE, FALSE, (SV**) 0))
4575                 return &PL_sv_undef;                            /* Error during store */
4576
4577         /*
4578          * Because of the above optimization, we have to refresh the context,
4579          * since a new one could have been allocated and stacked by do_store().
4580          */
4581
4582         { dSTCXT; real_context = cxt; }         /* Sub-block needed for macro */
4583         cxt = real_context;                                     /* And we need this temporary... */
4584
4585         /*
4586          * Now, `cxt' may refer to a new context.
4587          */
4588
4589         ASSERT(!cxt->s_dirty, ("clean context"));
4590         ASSERT(!cxt->entry, ("entry will not cause new context allocation"));
4591
4592         size = MBUF_SIZE();
4593         TRACEME(("dclone stored %d bytes", size));
4594         MBUF_INIT(size);
4595
4596         /*
4597          * Since we're passing do_retrieve() both a NULL file and sv, we need
4598          * to pre-compute the taintedness of the input by setting cxt->tainted
4599          * to whatever state our own input string was.  -- RAM, 15/09/2000
4600          *
4601          * do_retrieve() will free non-root context.
4602          */
4603
4604         cxt->s_tainted = SvTAINTED(sv);
4605         out = do_retrieve((PerlIO*) 0, Nullsv, ST_CLONE);
4606
4607         TRACEME(("dclone returns 0x%"UVxf, PTR2UV(out)));
4608
4609         return out;
4610 }
4611
4612 /***
4613  *** Glue with perl.
4614  ***/
4615
4616 /*
4617  * The Perl IO GV object distinguishes between input and output for sockets
4618  * but not for plain files. To allow Storable to transparently work on
4619  * plain files and sockets transparently, we have to ask xsubpp to fetch the
4620  * right object for us. Hence the OutputStream and InputStream declarations.
4621  *
4622  * Before perl 5.004_05, those entries in the standard typemap are not
4623  * defined in perl include files, so we do that here.
4624  */
4625
4626 #ifndef OutputStream
4627 #define OutputStream    PerlIO *
4628 #define InputStream             PerlIO *
4629 #endif  /* !OutputStream */
4630
4631 MODULE = Storable       PACKAGE = Storable
4632
4633 PROTOTYPES: ENABLE
4634
4635 BOOT:
4636     init_perinterp();
4637
4638 int
4639 pstore(f,obj)
4640 OutputStream    f
4641 SV *    obj
4642
4643 int
4644 net_pstore(f,obj)
4645 OutputStream    f
4646 SV *    obj
4647
4648 SV *
4649 mstore(obj)
4650 SV *    obj
4651
4652 SV *
4653 net_mstore(obj)
4654 SV *    obj
4655
4656 SV *
4657 pretrieve(f)
4658 InputStream     f
4659
4660 SV *
4661 mretrieve(sv)
4662 SV *    sv
4663
4664 SV *
4665 dclone(sv)
4666 SV *    sv
4667
4668 int
4669 last_op_in_netorder()
4670
4671 int
4672 is_storing()
4673
4674 int
4675 is_retrieving()
4676