ext/Storable/Storable.xs

   1 /*
   2  * Store and retrieve mechanism.
   3  */
   4
   5 /*
   6  * $Id: Storable.xs,v 1.0.1.4 2000/10/26 17:11:04 ram Exp $
   7  *
   8  *  Copyright (c) 1995-2000, Raphael Manfredi
   9  *
  10  *  You may redistribute only under the same terms as Perl 5, as specified
  11  *  in the README file that comes with the distribution.
  12  *
  13  * $Log: Storable.xs,v $
  14  * Revision 1.0.1.5  2000/11/05 17:21:24  ram
  15  * patch6: fixed severe "object lost" bug for STORABLE_freeze returns
  16  *
  17  * Revision 1.0.1.4  2000/10/26 17:11:04  ram
  18  * patch5: auto requires module of blessed ref when STORABLE_thaw misses
  19  *
  20  * Revision 1.0.1.3  2000/09/29 19:49:57  ram
  21  * patch3: avoid using "tainted" and "dirty" since Perl remaps them via cpp
  22  *
  23  * $Log: Storable.xs,v $
  24  * Revision 1.0  2000/09/01 19:40:41  ram
  25  * Baseline for first official release.
  26  *
  27  */
  28
  29 #include <EXTERN.h>
  30 #include <perl.h>
  31 #include <patchlevel.h>         /* Perl's one, needed since 5.6 */
  32 #include <XSUB.h>
  33
  34 #if 0
  35 #define DEBUGME /* Debug mode, turns assertions on as well */
  36 #define DASSERT /* Assertion mode */
  37 #endif
  38
  39 /*
  40  * Pre PerlIO time when none of USE_PERLIO and PERLIO_IS_STDIO is defined
  41  * Provide them with the necessary defines so they can build with pre-5.004.
  42  */
  43 #ifndef USE_PERLIO
  44 #ifndef PERLIO_IS_STDIO
  45 #define PerlIO FILE
  46 #define PerlIO_getc(x) getc(x)
  47 #define PerlIO_putc(f,x) putc(x,f)
  48 #define PerlIO_read(x,y,z) fread(y,1,z,x)
  49 #define PerlIO_write(x,y,z) fwrite(y,1,z,x)
  50 #define PerlIO_stdoutf printf
  51 #endif  /* PERLIO_IS_STDIO */
  52 #endif  /* USE_PERLIO */
  53
  54 /*
  55  * Earlier versions of perl might be used, we can't assume they have the latest!
  56  */
  57
  58 #ifndef PERL_VERSION            /* For perls < 5.6 */
  59 #define PERL_VERSION PATCHLEVEL
  60 #ifndef newRV_noinc
  61 #define newRV_noinc(sv)         ((Sv = newRV(sv)), --SvREFCNT(SvRV(Sv)), Sv)
  62 #endif
  63 #if (PATCHLEVEL <= 4)           /* Older perls (<= 5.004) lack PL_ namespace */
  64 #define PL_sv_yes       sv_yes
  65 #define PL_sv_no        sv_no
  66 #define PL_sv_undef     sv_undef
  67 #if (SUBVERSION <= 4)           /* 5.004_04 has been reported to lack newSVpvn */
  68 #define newSVpvn newSVpv
  69 #endif
  70 #endif                                          /* PATCHLEVEL <= 4 */
  71 #ifndef HvSHAREKEYS_off
  72 #define HvSHAREKEYS_off(hv)     /* Ignore */
  73 #endif
  74 #ifndef AvFILLp                         /* Older perls (<=5.003) lack AvFILLp */
  75 #define AvFILLp AvFILL
  76 #endif
  77 typedef double NV;                      /* Older perls lack the NV type */
  78 #define IVdf            "ld"    /* Various printf formats for Perl types */
  79 #define UVuf            "lu"
  80 #define UVof            "lo"
  81 #define UVxf            "lx"
  82 #define INT2PTR(t,v) (t)(IV)(v)
  83 #define PTR2UV(v)    (unsigned long)(v)
  84 #endif                                          /* PERL_VERSION -- perls < 5.6 */
  85
  86 #ifndef NVef                            /* The following were not part of perl 5.6 */
  87 #if defined(USE_LONG_DOUBLE) && \
  88         defined(HAS_LONG_DOUBLE) && defined(PERL_PRIfldbl)
  89 #define NVef            PERL_PRIeldbl
  90 #define NVff            PERL_PRIfldbl
  91 #define NVgf            PERL_PRIgldbl
  92 #else
  93 #define NVef            "e"
  94 #define NVff            "f"
  95 #define NVgf            "g"
  96 #endif
  97 #endif
  98
  99 #ifdef DEBUGME
 100 /*
 101  * TRACEME() will only output things when the $Storable::DEBUGME is true.
 102  */
 103
 104 #define TRACEME(x)      do {                                                                    \
 105         if (SvTRUE(perl_get_sv("Storable::DEBUGME", TRUE)))     \
 106                 { PerlIO_stdoutf x; PerlIO_stdoutf("\n"); }                     \
 107 } while (0)
 108 #else
 109 #define TRACEME(x)
 110 #endif
 111
 112 #ifndef DASSERT
 113 #define DASSERT
 114 #endif
 115 #ifdef DASSERT
 116 #define ASSERT(x,y)     do {                                                                    \
 117         if (!(x)) {                                                                                             \
 118                 PerlIO_stdoutf("ASSERT FAILED (\"%s\", line %d): ",     \
 119                         __FILE__, __LINE__);                                                    \
 120                 PerlIO_stdoutf y; PerlIO_stdoutf("\n");                         \
 121         }                                                                                                               \
 122 } while (0)
 123 #else
 124 #define ASSERT(x,y)
 125 #endif
 126
 127 /*
 128  * Type markers.
 129  */
 130
 131 #define C(x) ((char) (x))       /* For markers with dynamic retrieval handling */
 132
 133 #define SX_OBJECT       C(0)    /* Already stored object */
 134 #define SX_LSCALAR      C(1)    /* Scalar (large binary) follows (length, data) */
 135 #define SX_ARRAY        C(2)    /* Array forthcominng (size, item list) */
 136 #define SX_HASH         C(3)    /* Hash forthcoming (size, key/value pair list) */
 137 #define SX_REF          C(4)    /* Reference to object forthcoming */
 138 #define SX_UNDEF        C(5)    /* Undefined scalar */
 139 #define SX_INTEGER      C(6)    /* Integer forthcoming */
 140 #define SX_DOUBLE       C(7)    /* Double forthcoming */
 141 #define SX_BYTE         C(8)    /* (signed) byte forthcoming */
 142 #define SX_NETINT       C(9)    /* Integer in network order forthcoming */
 143 #define SX_SCALAR       C(10)   /* Scalar (binary, small) follows (length, data) */
 144 #define SX_TIED_ARRAY  C(11)  /* Tied array forthcoming */
 145 #define SX_TIED_HASH   C(12)  /* Tied hash forthcoming */
 146 #define SX_TIED_SCALAR C(13)  /* Tied scalar forthcoming */
 147 #define SX_SV_UNDEF     C(14)   /* Perl's immortal PL_sv_undef */
 148 #define SX_SV_YES       C(15)   /* Perl's immortal PL_sv_yes */
 149 #define SX_SV_NO        C(16)   /* Perl's immortal PL_sv_no */
 150 #define SX_BLESS        C(17)   /* Object is blessed */
 151 #define SX_IX_BLESS     C(18)   /* Object is blessed, classname given by index */
 152 #define SX_HOOK         C(19)   /* Stored via hook, user-defined */
 153 #define SX_OVERLOAD     C(20)   /* Overloaded reference */
 154 #define SX_TIED_KEY C(21)   /* Tied magic key forthcoming */
 155 #define SX_TIED_IDX C(22)   /* Tied magic index forthcoming */
 156 #define SX_UTF8STR      C(23)   /* UTF-8 string forthcoming (small) */
 157 #define SX_LUTF8STR     C(24)   /* UTF-8 string forthcoming (large) */
 158 #define SX_ERROR        C(25)   /* Error */
 159
 160 /*
 161  * Those are only used to retrieve "old" pre-0.6 binary images.
 162  */
 163 #define SX_ITEM         'i'             /* An array item introducer */
 164 #define SX_IT_UNDEF     'I'             /* Undefined array item */
 165 #define SX_KEY          'k'             /* An hash key introducer */
 166 #define SX_VALUE        'v'             /* An hash value introducer */
 167 #define SX_VL_UNDEF     'V'             /* Undefined hash value */
 168
 169 /*
 170  * Those are only used to retrieve "old" pre-0.7 binary images
 171  */
 172
 173 #define SX_CLASS        'b'             /* Object is blessed, class name length <255 */
 174 #define SX_LG_CLASS 'B'         /* Object is blessed, class name length >255 */
 175 #define SX_STORED       'X'             /* End of object */
 176
 177 /*
 178  * Limits between short/long length representation.
 179  */
 180
 181 #define LG_SCALAR       255             /* Large scalar length limit */
 182 #define LG_BLESS        127             /* Large classname bless limit */
 183
 184 /*
 185  * Operation types
 186  */
 187
 188 #define ST_STORE        0x1             /* Store operation */
 189 #define ST_RETRIEVE     0x2             /* Retrieval operation */
 190 #define ST_CLONE        0x4             /* Deep cloning operation */
 191
 192 /*
 193  * The following structure is used for hash table key retrieval. Since, when
 194  * retrieving objects, we'll be facing blessed hash references, it's best
 195  * to pre-allocate that buffer once and resize it as the need arises, never
 196  * freeing it (keys will be saved away someplace else anyway, so even large
 197  * keys are not enough a motivation to reclaim that space).
 198  *
 199  * This structure is also used for memory store/retrieve operations which
 200  * happen in a fixed place before being malloc'ed elsewhere if persistency
 201  * is required. Hence the aptr pointer.
 202  */
 203 struct extendable {
 204         char *arena;            /* Will hold hash key strings, resized as needed */
 205         STRLEN asiz;            /* Size of aforementionned buffer */
 206         char *aptr;                     /* Arena pointer, for in-place read/write ops */
 207         char *aend;                     /* First invalid address */
 208 };
 209
 210 /*
 211  * At store time:
 212  * An hash table records the objects which have already been stored.
 213  * Those are referred to as SX_OBJECT in the file, and their "tag" (i.e.
 214  * an arbitrary sequence number) is used to identify them.
 215  *
 216  * At retrieve time:
 217  * An array table records the objects which have already been retrieved,
 218  * as seen by the tag determind by counting the objects themselves. The
 219  * reference to that retrieved object is kept in the table, and is returned
 220  * when an SX_OBJECT is found bearing that same tag.
 221  *
 222  * The same processing is used to record "classname" for blessed objects:
 223  * indexing by a hash at store time, and via an array at retrieve time.
 224  */
 225
 226 typedef unsigned long stag_t;   /* Used by pre-0.6 binary format */
 227
 228 /*
 229  * The following "thread-safe" related defines were contributed by
 230  * Murray Nesbitt <murray@activestate.com> and integrated by RAM, who
 231  * only renamed things a little bit to ensure consistency with surrounding
 232  * code.        -- RAM, 14/09/1999
 233  *
 234  * The original patch suffered from the fact that the stcxt_t structure
 235  * was global.  Murray tried to minimize the impact on the code as much as
 236  * possible.
 237  *
 238  * Starting with 0.7, Storable can be re-entrant, via the STORABLE_xxx hooks
 239  * on objects.  Therefore, the notion of context needs to be generalized,
 240  * threading or not.
 241  */
 242
 243 #define MY_VERSION "Storable(" XS_VERSION ")"
 244
 245 /*
 246  * Fields s_tainted and s_dirty are prefixed with s_ because Perl's include
 247  * files remap tainted and dirty when threading is enabled.  That's bad for
 248  * perl to remap such common words.     -- RAM, 29/09/00
 249  */
 250
 251 typedef struct stcxt {
 252         int entry;                      /* flags recursion */
 253         int optype;                     /* type of traversal operation */
 254     HV *hseen;                  /* which objects have been seen, store time */
 255     AV *hook_seen;              /* which SVs were returned by STORABLE_freeze() */
 256     AV *aseen;                  /* which objects have been seen, retrieve time */
 257     HV *hclass;                 /* which classnames have been seen, store time */
 258     AV *aclass;                 /* which classnames have been seen, retrieve time */
 259     HV *hook;                   /* cache for hook methods per class name */
 260     I32 tagnum;                 /* incremented at store time for each seen object */
 261     I32 classnum;               /* incremented at store time for each seen classname */
 262     int netorder;               /* true if network order used */
 263     int s_tainted;              /* true if input source is tainted, at retrieve time */
 264     int forgive_me;             /* whether to be forgiving... */
 265     int canonical;              /* whether to store hashes sorted by key */
 266         int s_dirty;            /* context is dirty due to CROAK() -- can be cleaned */
 267     struct extendable keybuf;   /* for hash key retrieval */
 268     struct extendable membuf;   /* for memory store/retrieve operations */
 269         PerlIO *fio;            /* where I/O are performed, NULL for memory */
 270         int ver_major;          /* major of version for retrieved object */
 271         int ver_minor;          /* minor of version for retrieved object */
 272         SV *(**retrieve_vtbl)();        /* retrieve dispatch table */
 273         struct stcxt *prev;     /* contexts chained backwards in real recursion */
 274 } stcxt_t;
 275
 276 #if defined(MULTIPLICITY) || defined(PERL_OBJECT) || defined(PERL_CAPI)
 277
 278 #if (PATCHLEVEL <= 4) && (SUBVERSION < 68)
 279 #define dSTCXT_SV                                                                       \
 280         SV *perinterp_sv = perl_get_sv(MY_VERSION, FALSE)
 281 #else   /* >= perl5.004_68 */
 282 #define dSTCXT_SV                                                                       \
 283         SV *perinterp_sv = *hv_fetch(PL_modglobal,              \
 284                 MY_VERSION, sizeof(MY_VERSION)-1, TRUE)
 285 #endif  /* < perl5.004_68 */
 286
 287 #define dSTCXT_PTR(T,name)                                                      \
 288         T name = (perinterp_sv && SvIOK(perinterp_sv)   \
 289                                 ? INT2PTR(T, SvIVX(perinterp_sv)) : (T) 0)
 290 #define dSTCXT                                                                          \
 291         dSTCXT_SV;                                                                              \
 292         dSTCXT_PTR(stcxt_t *, cxt)
 293
 294 #define INIT_STCXT                                                                      \
 295       dSTCXT;                                                                           \
 296       Newz(0, cxt, 1, stcxt_t);                                         \
 297       sv_setiv(perinterp_sv, PTR2IV(cxt))
 298
 299 #define SET_STCXT(x) do {                                                       \
 300         dSTCXT_SV;                                                                              \
 301         sv_setiv(perinterp_sv, PTR2IV(x));                              \
 302 } while (0)
 303
 304 #else /* !MULTIPLICITY && !PERL_OBJECT && !PERL_CAPI */
 305
 306 static stcxt_t Context;
 307 static stcxt_t *Context_ptr = &Context;
 308 #define dSTCXT                  stcxt_t *cxt = Context_ptr
 309 #define INIT_STCXT              dSTCXT
 310 #define SET_STCXT(x)    Context_ptr = x
 311
 312 #endif /* MULTIPLICITY || PERL_OBJECT || PERL_CAPI */
 313
 314 /*
 315  * KNOWN BUG:
 316  *   Croaking implies a memory leak, since we don't use setjmp/longjmp
 317  *   to catch the exit and free memory used during store or retrieve
 318  *   operations.  This is not too difficult to fix, but I need to understand
 319  *   how Perl does it, and croaking is exceptional anyway, so I lack the
 320  *   motivation to do it.
 321  *
 322  * The current workaround is to mark the context as dirty when croaking,
 323  * so that data structures can be freed whenever we renter Storable code
 324  * (but only *then*: it's a workaround, not a fix).
 325  *
 326  * This is also imperfect, because we don't really know how far they trapped
 327  * the croak(), and when we were recursing, we won't be able to clean anything
 328  * but the topmost context stacked.
 329  */
 330
 331 #define CROAK(x)        do { cxt->s_dirty = 1; croak x; } while (0)
 332
 333 /*
 334  * End of "thread-safe" related definitions.
 335  */
 336
 337 /*
 338  * LOW_32BITS
 339  *
 340  * Keep only the low 32 bits of a pointer (used for tags, which are not
 341  * really pointers).
 342  */
 343
 344 #if PTRSIZE <= 4
 345 #define LOW_32BITS(x)   ((I32) (x))
 346 #else
 347 #define LOW_32BITS(x)   ((I32) ((unsigned long) (x) & 0xffffffffUL))
 348 #endif
 349
 350 /*
 351  * oI, oS, oC
 352  *
 353  * Hack for Crays, where sizeof(I32) == 8, and which are big-endians.
 354  * Used in the WLEN and RLEN macros.
 355  */
 356
 357 #if INTSIZE > 4
 358 #define oI(x)   ((I32 *) ((char *) (x) + 4))
 359 #define oS(x)   ((x) - 4)
 360 #define oC(x)   (x = 0)
 361 #define CRAY_HACK
 362 #else
 363 #define oI(x)   (x)
 364 #define oS(x)   (x)
 365 #define oC(x)
 366 #endif
 367
 368 /*
 369  * key buffer handling
 370  */
 371 #define kbuf    (cxt->keybuf).arena
 372 #define ksiz    (cxt->keybuf).asiz
 373 #define KBUFINIT() do {                                 \
 374         if (!kbuf) {                                            \
 375                 TRACEME(("** allocating kbuf of 128 bytes")); \
 376                 New(10003, kbuf, 128, char);    \
 377                 ksiz = 128;                                             \
 378         }                                                                       \
 379 } while (0)
 380 #define KBUFCHK(x) do {                 \
 381         if (x >= ksiz) {                        \
 382                 TRACEME(("** extending kbuf to %d bytes", x+1)); \
 383                 Renew(kbuf, x+1, char); \
 384                 ksiz = x+1;                             \
 385         }                                                       \
 386 } while (0)
 387
 388 /*
 389  * memory buffer handling
 390  */
 391 #define mbase   (cxt->membuf).arena
 392 #define msiz    (cxt->membuf).asiz
 393 #define mptr    (cxt->membuf).aptr
 394 #define mend    (cxt->membuf).aend
 395
 396 #define MGROW   (1 << 13)
 397 #define MMASK   (MGROW - 1)
 398
 399 #define round_mgrow(x)  \
 400         ((unsigned long) (((unsigned long) (x) + MMASK) & ~MMASK))
 401 #define trunc_int(x)    \
 402         ((unsigned long) ((unsigned long) (x) & ~(sizeof(int)-1)))
 403 #define int_aligned(x)  \
 404         ((unsigned long) (x) == trunc_int(x))
 405
 406 #define MBUF_INIT(x) do {                               \
 407         if (!mbase) {                                           \
 408                 TRACEME(("** allocating mbase of %d bytes", MGROW)); \
 409                 New(10003, mbase, MGROW, char); \
 410                 msiz = MGROW;                                   \
 411         }                                                                       \
 412         mptr = mbase;                                           \
 413         if (x)                                                          \
 414                 mend = mbase + x;                               \
 415         else                                                            \
 416                 mend = mbase + msiz;                    \
 417 } while (0)
 418
 419 #define MBUF_TRUNC(x)   mptr = mbase + x
 420 #define MBUF_SIZE()             (mptr - mbase)
 421
 422 /*
 423  * Use SvPOKp(), because SvPOK() fails on tainted scalars.
 424  * See store_scalar() for other usage of this workaround.
 425  */
 426 #define MBUF_LOAD(v) do {                               \
 427         if (!SvPOKp(v))                                         \
 428                 CROAK(("Not a scalar string")); \
 429         mptr = mbase = SvPV(v, msiz);           \
 430         mend = mbase + msiz;                            \
 431 } while (0)
 432
 433 #define MBUF_XTEND(x) do {                      \
 434         int nsz = (int) round_mgrow((x)+msiz);  \
 435         int offset = mptr - mbase;              \
 436         TRACEME(("** extending mbase to %d bytes", nsz));       \
 437         Renew(mbase, nsz, char);                \
 438         msiz = nsz;                                             \
 439         mptr = mbase + offset;                  \
 440         mend = mbase + nsz;                             \
 441 } while (0)
 442
 443 #define MBUF_CHK(x) do {                        \
 444         if ((mptr + (x)) > mend)                \
 445                 MBUF_XTEND(x);                          \
 446 } while (0)
 447
 448 #define MBUF_GETC(x) do {                       \
 449         if (mptr < mend)                                \
 450                 x = (int) (unsigned char) *mptr++;      \
 451         else                                                    \
 452                 return (SV *) 0;                        \
 453 } while (0)
 454
 455 #ifdef CRAY_HACK
 456 #define MBUF_GETINT(x) do {                             \
 457         oC(x);                                                          \
 458         if ((mptr + 4) <= mend) {                       \
 459                 memcpy(oI(&x), mptr, 4);                \
 460                 mptr += 4;                                              \
 461         } else                                                          \
 462                 return (SV *) 0;                                \
 463 } while (0)
 464 #else
 465 #define MBUF_GETINT(x) do {                             \
 466         if ((mptr + sizeof(int)) <= mend) {     \
 467                 if (int_aligned(mptr))                  \
 468                         x = *(int *) mptr;                      \
 469                 else                                                    \
 470                         memcpy(&x, mptr, sizeof(int));  \
 471                 mptr += sizeof(int);                    \
 472         } else                                                          \
 473                 return (SV *) 0;                                \
 474 } while (0)
 475 #endif
 476
 477 #define MBUF_READ(x,s) do {                     \
 478         if ((mptr + (s)) <= mend) {             \
 479                 memcpy(x, mptr, s);                     \
 480                 mptr += s;                                      \
 481         } else                                                  \
 482                 return (SV *) 0;                        \
 483 } while (0)
 484
 485 #define MBUF_SAFEREAD(x,s,z) do {       \
 486         if ((mptr + (s)) <= mend) {             \
 487                 memcpy(x, mptr, s);                     \
 488                 mptr += s;                                      \
 489         } else {                                                \
 490                 sv_free(z);                                     \
 491                 return (SV *) 0;                        \
 492         }                                                               \
 493 } while (0)
 494
 495 #define MBUF_PUTC(c) do {                       \
 496         if (mptr < mend)                                \
 497                 *mptr++ = (char) c;                     \
 498         else {                                                  \
 499                 MBUF_XTEND(1);                          \
 500                 *mptr++ = (char) c;                     \
 501         }                                                               \
 502 } while (0)
 503
 504 #ifdef CRAY_HACK
 505 #define MBUF_PUTINT(i) do {                     \
 506         MBUF_CHK(4);                                    \
 507         memcpy(mptr, oI(&i), 4);                \
 508         mptr += 4;                                              \
 509 } while (0)
 510 #else
 511 #define MBUF_PUTINT(i) do {                     \
 512         MBUF_CHK(sizeof(int));                  \
 513         if (int_aligned(mptr))                  \
 514                 *(int *) mptr = i;                      \
 515         else                                                    \
 516                 memcpy(mptr, &i, sizeof(int));  \
 517         mptr += sizeof(int);                    \
 518 } while (0)
 519 #endif
 520
 521 #define MBUF_WRITE(x,s) do {            \
 522         MBUF_CHK(s);                                    \
 523         memcpy(mptr, x, s);                             \
 524         mptr += s;                                              \
 525 } while (0)
 526
 527 /*
 528  * Possible return values for sv_type().
 529  */
 530
 531 #define svis_REF                0
 532 #define svis_SCALAR             1
 533 #define svis_ARRAY              2
 534 #define svis_HASH               3
 535 #define svis_TIED               4
 536 #define svis_TIED_ITEM  5
 537 #define svis_OTHER              6
 538
 539 /*
 540  * Flags for SX_HOOK.
 541  */
 542
 543 #define SHF_TYPE_MASK           0x03
 544 #define SHF_LARGE_CLASSLEN      0x04
 545 #define SHF_LARGE_STRLEN        0x08
 546 #define SHF_LARGE_LISTLEN       0x10
 547 #define SHF_IDX_CLASSNAME       0x20
 548 #define SHF_NEED_RECURSE        0x40
 549 #define SHF_HAS_LIST            0x80
 550
 551 /*
 552  * Types for SX_HOOK (2 bits).
 553  */
 554
 555 #define SHT_SCALAR                      0
 556 #define SHT_ARRAY                       1
 557 #define SHT_HASH                        2
 558
 559 /*
 560  * Before 0.6, the magic string was "perl-store" (binary version number 0).
 561  *
 562  * Since 0.6 introduced many binary incompatibilities, the magic string has
 563  * been changed to "pst0" to allow an old image to be properly retrieved by
 564  * a newer Storable, but ensure a newer image cannot be retrieved with an
 565  * older version.
 566  *
 567  * At 0.7, objects are given the ability to serialize themselves, and the
 568  * set of markers is extended, backward compatibility is not jeopardized,
 569  * so the binary version number could have remained unchanged.  To correctly
 570  * spot errors if a file making use of 0.7-specific extensions is given to
 571  * 0.6 for retrieval, the binary version was moved to "2".  And I'm introducing
 572  * a "minor" version, to better track this kind of evolution from now on.
 573  *
 574  */
 575 static char old_magicstr[] = "perl-store";      /* Magic number before 0.6 */
 576 static char magicstr[] = "pst0";                        /* Used as a magic number */
 577
 578 #define STORABLE_BIN_MAJOR      2                               /* Binary major "version" */
 579 #define STORABLE_BIN_MINOR      3                               /* Binary minor "version" */
 580
 581 /*
 582  * Useful store shortcuts...
 583  */
 584
 585 #define PUTMARK(x) do {                                         \
 586         if (!cxt->fio)                                                  \
 587                 MBUF_PUTC(x);                                           \
 588         else if (PerlIO_putc(cxt->fio, x) == EOF)       \
 589                 return -1;                                                      \
 590 } while (0)
 591
 592 #define WRITE_I32(x)    do {                    \
 593         ASSERT(sizeof(x) == sizeof(I32), ("writing an I32"));   \
 594         if (!cxt->fio)                                          \
 595                 MBUF_PUTINT(x);                                 \
 596         else if (PerlIO_write(cxt->fio, oI(&x), oS(sizeof(x))) != oS(sizeof(x))) \
 597                 return -1;                                      \
 598         } while (0)
 599
 600 #ifdef HAS_HTONL
 601 #define WLEN(x) do {                            \
 602         if (cxt->netorder) {                    \
 603                 int y = (int) htonl(x);         \
 604                 if (!cxt->fio)                          \
 605                         MBUF_PUTINT(y);                 \
 606                 else if (PerlIO_write(cxt->fio,oI(&y),oS(sizeof(y))) != oS(sizeof(y))) \
 607                         return -1;                              \
 608         } else {                                                \
 609                 if (!cxt->fio)                          \
 610                         MBUF_PUTINT(x);                 \
 611                 else if (PerlIO_write(cxt->fio,oI(&x),oS(sizeof(x))) != oS(sizeof(x))) \
 612                         return -1;                              \
 613         }                                                               \
 614 } while (0)
 615 #else
 616 #define WLEN(x) WRITE_I32(x)
 617 #endif
 618
 619 #define WRITE(x,y) do {                                         \
 620         if (!cxt->fio)                                                  \
 621                 MBUF_WRITE(x,y);                                        \
 622         else if (PerlIO_write(cxt->fio, x, y) != y)     \
 623                 return -1;                                                      \
 624         } while (0)
 625
 626 #define STORE_PV_LEN(pv, len, small, large) do {        \
 627         if (len <= LG_SCALAR) {                         \
 628                 unsigned char clen = (unsigned char) len;       \
 629                 PUTMARK(small);                                 \
 630                 PUTMARK(clen);                                  \
 631                 if (len)                                                \
 632                         WRITE(pv, len);                         \
 633         } else {                                                        \
 634                 PUTMARK(large);                                 \
 635                 WLEN(len);                                              \
 636                 WRITE(pv, len);                                 \
 637         }                                                                       \
 638 } while (0)
 639
 640 #define STORE_SCALAR(pv, len)   STORE_PV_LEN(pv, len, SX_SCALAR, SX_LSCALAR)
 641
 642 /*
 643  * Conditional UTF8 support.
 644  * On non-UTF8 perls, UTF8 strings are returned as normal strings.
 645  *
 646  */
 647 #ifdef SvUTF8_on
 648 #define STORE_UTF8STR(pv, len)  STORE_PV_LEN(pv, len, SX_UTF8STR, SX_LUTF8STR)
 649 #else
 650 #define SvUTF8(sv) 0
 651 #define STORE_UTF8STR(pv, len) CROAK(("panic: storing UTF8 in non-UTF8 perl"))
 652 #define SvUTF8_on(sv) CROAK(("Cannot retrieve UTF8 data in non-UTF8 perl"))
 653 #endif
 654
 655 /*
 656  * Store undef in arrays and hashes without recursing through store().
 657  */
 658 #define STORE_UNDEF() do {                              \
 659         cxt->tagnum++;                                          \
 660         PUTMARK(SX_UNDEF);                                      \
 661 } while (0)
 662
 663 /*
 664  * Useful retrieve shortcuts...
 665  */
 666
 667 #define GETCHAR() \
 668         (cxt->fio ? PerlIO_getc(cxt->fio) : (mptr >= mend ? EOF : (int) *mptr++))
 669
 670 #define GETMARK(x) do {                                                 \
 671         if (!cxt->fio)                                                          \
 672                 MBUF_GETC(x);                                                   \
 673         else if ((x = PerlIO_getc(cxt->fio)) == EOF)    \
 674                 return (SV *) 0;                                                \
 675 } while (0)
 676
 677 #define READ_I32(x)     do {                            \
 678         ASSERT(sizeof(x) == sizeof(I32), ("reading an I32"));   \
 679         oC(x);                                                          \
 680         if (!cxt->fio)                                          \
 681                 MBUF_GETINT(x);                                 \
 682         else if (PerlIO_read(cxt->fio, oI(&x), oS(sizeof(x))) != oS(sizeof(x))) \
 683                 return (SV *) 0;                                \
 684 } while (0)
 685
 686 #ifdef HAS_NTOHL
 687 #define RLEN(x) do {                                    \
 688         oC(x);                                                          \
 689         if (!cxt->fio)                                          \
 690                 MBUF_GETINT(x);                                 \
 691         else if (PerlIO_read(cxt->fio, oI(&x), oS(sizeof(x))) != oS(sizeof(x))) \
 692                 return (SV *) 0;                                \
 693         if (cxt->netorder)                                      \
 694                 x = (int) ntohl(x);                             \
 695 } while (0)
 696 #else
 697 #define RLEN(x) READ_I32(x)
 698 #endif
 699
 700 #define READ(x,y) do {                                          \
 701         if (!cxt->fio)                                                  \
 702                 MBUF_READ(x, y);                                        \
 703         else if (PerlIO_read(cxt->fio, x, y) != y)      \
 704                 return (SV *) 0;                                        \
 705 } while (0)
 706
 707 #define SAFEREAD(x,y,z) do {                                    \
 708         if (!cxt->fio)                                                          \
 709                 MBUF_SAFEREAD(x,y,z);                                   \
 710         else if (PerlIO_read(cxt->fio, x, y) != y)       {      \
 711                 sv_free(z);                                                             \
 712                 return (SV *) 0;                                                \
 713         }                                                                                       \
 714 } while (0)
 715
 716 /*
 717  * This macro is used at retrieve time, to remember where object 'y', bearing a
 718  * given tag 'tagnum', has been retrieved. Next time we see an SX_OBJECT marker,
 719  * we'll therefore know where it has been retrieved and will be able to
 720  * share the same reference, as in the original stored memory image.
 721  */
 722 #define SEEN(y) do {                                            \
 723         if (!y)                                                                 \
 724                 return (SV *) 0;                                        \
 725         if (av_store(cxt->aseen, cxt->tagnum++, SvREFCNT_inc(y)) == 0) \
 726                 return (SV *) 0;                                        \
 727         TRACEME(("aseen(#%d) = 0x%"UVxf" (refcnt=%d)", cxt->tagnum-1, \
 728                  PTR2UV(y), SvREFCNT(y)-1)); \
 729 } while (0)
 730
 731 /*
 732  * Bless `s' in `p', via a temporary reference, required by sv_bless().
 733  */
 734 #define BLESS(s,p) do {                                 \
 735         SV *ref;                                                                \
 736         HV *stash;                                                              \
 737         TRACEME(("blessing 0x%"UVxf" in %s", PTR2UV(s), (p))); \
 738         stash = gv_stashpv((p), TRUE);                  \
 739         ref = newRV_noinc(s);                                   \
 740         (void) sv_bless(ref, stash);                    \
 741         SvRV(ref) = 0;                                                  \
 742         SvREFCNT_dec(ref);                                              \
 743 } while (0)
 744
 745 static int store();
 746 static SV *retrieve();
 747
 748 /*
 749  * Dynamic dispatching table for SV store.
 750  */
 751
 752 static int store_ref(stcxt_t *cxt, SV *sv);
 753 static int store_scalar(stcxt_t *cxt, SV *sv);
 754 static int store_array(stcxt_t *cxt, AV *av);
 755 static int store_hash(stcxt_t *cxt, HV *hv);
 756 static int store_tied(stcxt_t *cxt, SV *sv);
 757 static int store_tied_item(stcxt_t *cxt, SV *sv);
 758 static int store_other(stcxt_t *cxt, SV *sv);
 759 static int store_blessed(stcxt_t *cxt, SV *sv, int type, HV *pkg);
 760
 761 static int (*sv_store[])() = {
 762         store_ref,                      /* svis_REF */
 763         store_scalar,           /* svis_SCALAR */
 764         store_array,            /* svis_ARRAY */
 765         store_hash,                     /* svis_HASH */
 766         store_tied,                     /* svis_TIED */
 767         store_tied_item,        /* svis_TIED_ITEM */
 768         store_other,            /* svis_OTHER */
 769 };
 770
 771 #define SV_STORE(x)     (*sv_store[x])
 772
 773 /*
 774  * Dynamic dispatching tables for SV retrieval.
 775  */
 776
 777 static SV *retrieve_lscalar(stcxt_t *cxt);
 778 static SV *retrieve_lutf8str(stcxt_t *cxt);
 779 static SV *old_retrieve_array(stcxt_t *cxt);
 780 static SV *old_retrieve_hash(stcxt_t *cxt);
 781 static SV *retrieve_ref(stcxt_t *cxt);
 782 static SV *retrieve_undef(stcxt_t *cxt);
 783 static SV *retrieve_integer(stcxt_t *cxt);
 784 static SV *retrieve_double(stcxt_t *cxt);
 785 static SV *retrieve_byte(stcxt_t *cxt);
 786 static SV *retrieve_netint(stcxt_t *cxt);
 787 static SV *retrieve_scalar(stcxt_t *cxt);
 788 static SV *retrieve_utf8str(stcxt_t *cxt);
 789 static SV *retrieve_tied_array(stcxt_t *cxt);
 790 static SV *retrieve_tied_hash(stcxt_t *cxt);
 791 static SV *retrieve_tied_scalar(stcxt_t *cxt);
 792 static SV *retrieve_other(stcxt_t *cxt);
 793
 794 static SV *(*sv_old_retrieve[])() = {
 795         0,                      /* SX_OBJECT -- entry unused dynamically */
 796         retrieve_lscalar,               /* SX_LSCALAR */
 797         old_retrieve_array,             /* SX_ARRAY -- for pre-0.6 binaries */
 798         old_retrieve_hash,              /* SX_HASH -- for pre-0.6 binaries */
 799         retrieve_ref,                   /* SX_REF */
 800         retrieve_undef,                 /* SX_UNDEF */
 801         retrieve_integer,               /* SX_INTEGER */
 802         retrieve_double,                /* SX_DOUBLE */
 803         retrieve_byte,                  /* SX_BYTE */
 804         retrieve_netint,                /* SX_NETINT */
 805         retrieve_scalar,                /* SX_SCALAR */
 806         retrieve_tied_array,    /* SX_ARRAY */
 807         retrieve_tied_hash,             /* SX_HASH */
 808         retrieve_tied_scalar,   /* SX_SCALAR */
 809         retrieve_other,                 /* SX_SV_UNDEF not supported */
 810         retrieve_other,                 /* SX_SV_YES not supported */
 811         retrieve_other,                 /* SX_SV_NO not supported */
 812         retrieve_other,                 /* SX_BLESS not supported */
 813         retrieve_other,                 /* SX_IX_BLESS not supported */
 814         retrieve_other,                 /* SX_HOOK not supported */
 815         retrieve_other,                 /* SX_OVERLOADED not supported */
 816         retrieve_other,                 /* SX_TIED_KEY not supported */
 817         retrieve_other,                 /* SX_TIED_IDX not supported */
 818         retrieve_other,                 /* SX_UTF8STR not supported */
 819         retrieve_other,                 /* SX_LUTF8STR not supported */
 820         retrieve_other,                 /* SX_ERROR */
 821 };
 822
 823 static SV *retrieve_array(stcxt_t *cxt);
 824 static SV *retrieve_hash(stcxt_t *cxt);
 825 static SV *retrieve_sv_undef(stcxt_t *cxt);
 826 static SV *retrieve_sv_yes(stcxt_t *cxt);
 827 static SV *retrieve_sv_no(stcxt_t *cxt);
 828 static SV *retrieve_blessed(stcxt_t *cxt);
 829 static SV *retrieve_idx_blessed(stcxt_t *cxt);
 830 static SV *retrieve_hook(stcxt_t *cxt);
 831 static SV *retrieve_overloaded(stcxt_t *cxt);
 832 static SV *retrieve_tied_key(stcxt_t *cxt);
 833 static SV *retrieve_tied_idx(stcxt_t *cxt);
 834
 835 static SV *(*sv_retrieve[])() = {
 836         0,                      /* SX_OBJECT -- entry unused dynamically */
 837         retrieve_lscalar,               /* SX_LSCALAR */
 838         retrieve_array,                 /* SX_ARRAY */
 839         retrieve_hash,                  /* SX_HASH */
 840         retrieve_ref,                   /* SX_REF */
 841         retrieve_undef,                 /* SX_UNDEF */
 842         retrieve_integer,               /* SX_INTEGER */
 843         retrieve_double,                /* SX_DOUBLE */
 844         retrieve_byte,                  /* SX_BYTE */
 845         retrieve_netint,                /* SX_NETINT */
 846         retrieve_scalar,                /* SX_SCALAR */
 847         retrieve_tied_array,    /* SX_ARRAY */
 848         retrieve_tied_hash,             /* SX_HASH */
 849         retrieve_tied_scalar,   /* SX_SCALAR */
 850         retrieve_sv_undef,              /* SX_SV_UNDEF */
 851         retrieve_sv_yes,                /* SX_SV_YES */
 852         retrieve_sv_no,                 /* SX_SV_NO */
 853         retrieve_blessed,               /* SX_BLESS */
 854         retrieve_idx_blessed,   /* SX_IX_BLESS */
 855         retrieve_hook,                  /* SX_HOOK */
 856         retrieve_overloaded,    /* SX_OVERLOAD */
 857         retrieve_tied_key,              /* SX_TIED_KEY */
 858         retrieve_tied_idx,              /* SX_TIED_IDX */
 859         retrieve_utf8str,               /* SX_UTF8STR  */
 860         retrieve_lutf8str,              /* SX_LUTF8STR */
 861         retrieve_other,                 /* SX_ERROR */
 862 };
 863
 864 #define RETRIEVE(c,x) (*(c)->retrieve_vtbl[(x) >= SX_ERROR ? SX_ERROR : (x)])
 865
 866 static SV *mbuf2sv(void);
 867
 868 /***
 869  *** Context management.
 870  ***/
 871
 872 /*
 873  * init_perinterp
 874  *
 875  * Called once per "thread" (interpreter) to initialize some global context.
 876  */
 877 static void init_perinterp(void)
 878 {
 879     INIT_STCXT;
 880
 881     cxt->netorder = 0;          /* true if network order used */
 882     cxt->forgive_me = -1;       /* whether to be forgiving... */
 883 }
 884
 885 /*
 886  * init_store_context
 887  *
 888  * Initialize a new store context for real recursion.
 889  */
 890 static void init_store_context(
 891         stcxt_t *cxt,
 892         PerlIO *f,
 893         int optype,
 894         int network_order)
 895 {
 896         TRACEME(("init_store_context"));
 897
 898         cxt->netorder = network_order;
 899         cxt->forgive_me = -1;                   /* Fetched from perl if needed */
 900         cxt->canonical = -1;                    /* Idem */
 901         cxt->tagnum = -1;                               /* Reset tag numbers */
 902         cxt->classnum = -1;                             /* Reset class numbers */
 903         cxt->fio = f;                                   /* Where I/O are performed */
 904         cxt->optype = optype;                   /* A store, or a deep clone */
 905         cxt->entry = 1;                                 /* No recursion yet */
 906
 907         /*
 908          * The `hseen' table is used to keep track of each SV stored and their
 909          * associated tag numbers is special. It is "abused" because the
 910          * values stored are not real SV, just integers cast to (SV *),
 911          * which explains the freeing below.
 912          *
 913          * It is also one possible bottlneck to achieve good storing speed,
 914          * so the "shared keys" optimization is turned off (unlikely to be
 915          * of any use here), and the hash table is "pre-extended". Together,
 916          * those optimizations increase the throughput by 12%.
 917          */
 918
 919         cxt->hseen = newHV();                   /* Table where seen objects are stored */
 920         HvSHAREKEYS_off(cxt->hseen);
 921
 922         /*
 923          * The following does not work well with perl5.004_04, and causes
 924          * a core dump later on, in a completely unrelated spot, which
 925          * makes me think there is a memory corruption going on.
 926          *
 927          * Calling hv_ksplit(hseen, HBUCKETS) instead of manually hacking
 928          * it below does not make any difference. It seems to work fine
 929          * with perl5.004_68 but given the probable nature of the bug,
 930          * that does not prove anything.
 931          *
 932          * It's a shame because increasing the amount of buckets raises
 933          * store() throughput by 5%, but until I figure this out, I can't
 934          * allow for this to go into production.
 935          *
 936          * It is reported fixed in 5.005, hence the #if.
 937          */
 938 #if PERL_VERSION >= 5
 939 #define HBUCKETS        4096                            /* Buckets for %hseen */
 940         HvMAX(cxt->hseen) = HBUCKETS - 1;       /* keys %hseen = $HBUCKETS; */
 941 #endif
 942
 943         /*
 944          * The `hclass' hash uses the same settings as `hseen' above, but it is
 945          * used to assign sequential tags (numbers) to class names for blessed
 946          * objects.
 947          *
 948          * We turn the shared key optimization on.
 949          */
 950
 951         cxt->hclass = newHV();                  /* Where seen classnames are stored */
 952
 953 #if PERL_VERSION >= 5
 954         HvMAX(cxt->hclass) = HBUCKETS - 1;      /* keys %hclass = $HBUCKETS; */
 955 #endif
 956
 957         /*
 958          * The `hook' hash table is used to keep track of the references on
 959          * the STORABLE_freeze hook routines, when found in some class name.
 960          *
 961          * It is assumed that the inheritance tree will not be changed during
 962          * storing, and that no new method will be dynamically created by the
 963          * hooks.
 964          */
 965
 966         cxt->hook = newHV();                    /* Table where hooks are cached */
 967
 968         /*
 969          * The `hook_seen' array keeps track of all the SVs returned by
 970          * STORABLE_freeze hooks for us to serialize, so that they are not
 971          * reclaimed until the end of the serialization process.  Each SV is
 972          * only stored once, the first time it is seen.
 973          */
 974
 975         cxt->hook_seen = newAV();               /* Lists SVs returned by STORABLE_freeze */
 976 }
 977
 978 /*
 979  * clean_store_context
 980  *
 981  * Clean store context by
 982  */
 983 static void clean_store_context(stcxt_t *cxt)
 984 {
 985         HE *he;
 986
 987         TRACEME(("clean_store_context"));
 988
 989         ASSERT(cxt->optype & ST_STORE, ("was performing a store()"));
 990
 991         /*
 992          * Insert real values into hashes where we stored faked pointers.
 993          */
 994
 995         hv_iterinit(cxt->hseen);
 996         while (he = hv_iternext(cxt->hseen))
 997                 HeVAL(he) = &PL_sv_undef;
 998
 999         hv_iterinit(cxt->hclass);
1000         while (he = hv_iternext(cxt->hclass))
1001                 HeVAL(he) = &PL_sv_undef;
1002
1003         /*
1004          * And now dispose of them...
1005          */
1006
1007         hv_undef(cxt->hseen);
1008         sv_free((SV *) cxt->hseen);
1009
1010         hv_undef(cxt->hclass);
1011         sv_free((SV *) cxt->hclass);
1012
1013         hv_undef(cxt->hook);
1014         sv_free((SV *) cxt->hook);
1015
1016         av_undef(cxt->hook_seen);
1017         sv_free((SV *) cxt->hook_seen);
1018
1019         cxt->entry = 0;
1020         cxt->s_dirty = 0;
1021 }
1022
1023 /*
1024  * init_retrieve_context
1025  *
1026  * Initialize a new retrieve context for real recursion.
1027  */
1028 static void init_retrieve_context(stcxt_t *cxt, int optype, int is_tainted)
1029 {
1030         TRACEME(("init_retrieve_context"));
1031
1032         /*
1033          * The hook hash table is used to keep track of the references on
1034          * the STORABLE_thaw hook routines, when found in some class name.
1035          *
1036          * It is assumed that the inheritance tree will not be changed during
1037          * storing, and that no new method will be dynamically created by the
1038          * hooks.
1039          */
1040
1041         cxt->hook  = newHV();                   /* Caches STORABLE_thaw */
1042
1043         /*
1044          * If retrieving an old binary version, the cxt->retrieve_vtbl variable
1045          * was set to sv_old_retrieve. We'll need a hash table to keep track of
1046          * the correspondance between the tags and the tag number used by the
1047          * new retrieve routines.
1048          */
1049
1050         cxt->hseen = (cxt->retrieve_vtbl == sv_old_retrieve) ? newHV() : 0;
1051
1052         cxt->aseen = newAV();                   /* Where retrieved objects are kept */
1053         cxt->aclass = newAV();                  /* Where seen classnames are kept */
1054         cxt->tagnum = 0;                                /* Have to count objects... */
1055         cxt->classnum = 0;                              /* ...and class names as well */
1056         cxt->optype = optype;
1057         cxt->s_tainted = is_tainted;
1058         cxt->entry = 1;                                 /* No recursion yet */
1059 }
1060
1061 /*
1062  * clean_retrieve_context
1063  *
1064  * Clean retrieve context by
1065  */
1066 static void clean_retrieve_context(stcxt_t *cxt)
1067 {
1068         TRACEME(("clean_retrieve_context"));
1069
1070         ASSERT(cxt->optype & ST_RETRIEVE, ("was performing a retrieve()"));
1071
1072         av_undef(cxt->aseen);
1073         sv_free((SV *) cxt->aseen);
1074
1075         av_undef(cxt->aclass);
1076         sv_free((SV *) cxt->aclass);
1077
1078         hv_undef(cxt->hook);
1079         sv_free((SV *) cxt->hook);
1080
1081         if (cxt->hseen)
1082                 sv_free((SV *) cxt->hseen);             /* optional HV, for backward compat. */
1083
1084         cxt->entry = 0;
1085         cxt->s_dirty = 0;
1086 }
1087
1088 /*
1089  * clean_context
1090  *
1091  * A workaround for the CROAK bug: cleanup the last context.
1092  */
1093 static void clean_context(cxt)
1094 stcxt_t *cxt;
1095 {
1096         TRACEME(("clean_context"));
1097
1098         ASSERT(cxt->s_dirty, ("dirty context"));
1099
1100         if (cxt->optype & ST_RETRIEVE)
1101                 clean_retrieve_context(cxt);
1102         else
1103                 clean_store_context(cxt);
1104 }
1105
1106 /*
1107  * allocate_context
1108  *
1109  * Allocate a new context and push it on top of the parent one.
1110  * This new context is made globally visible via SET_STCXT().
1111  */
1112 static stcxt_t *allocate_context(parent_cxt)
1113 stcxt_t *parent_cxt;
1114 {
1115         stcxt_t *cxt;
1116
1117         TRACEME(("allocate_context"));
1118
1119         ASSERT(!parent_cxt->s_dirty, ("parent context clean"));
1120
1121         Newz(0, cxt, 1, stcxt_t);
1122         cxt->prev = parent_cxt;
1123         SET_STCXT(cxt);
1124
1125         return cxt;
1126 }
1127
1128 /*
1129  * free_context
1130  *
1131  * Free current context, which cannot be the "root" one.
1132  * Make the context underneath globally visible via SET_STCXT().
1133  */
1134 static void free_context(cxt)
1135 stcxt_t *cxt;
1136 {
1137         stcxt_t *prev = cxt->prev;
1138
1139         TRACEME(("free_context"));
1140
1141         ASSERT(!cxt->s_dirty, ("clean context"));
1142         ASSERT(prev, ("not freeing root context"));
1143
1144         if (kbuf)
1145                 Safefree(kbuf);
1146         if (mbase)
1147                 Safefree(mbase);
1148
1149         Safefree(cxt);
1150         SET_STCXT(prev);
1151 }
1152
1153 /***
1154  *** Predicates.
1155  ***/
1156
1157 /*
1158  * is_storing
1159  *
1160  * Tells whether we're in the middle of a store operation.
1161  */
1162 int is_storing(void)
1163 {
1164         dSTCXT;
1165
1166         return cxt->entry && (cxt->optype & ST_STORE);
1167 }
1168
1169 /*
1170  * is_retrieving
1171  *
1172  * Tells whether we're in the middle of a retrieve operation.
1173  */
1174 int is_retrieving(void)
1175 {
1176         dSTCXT;
1177
1178         return cxt->entry && (cxt->optype & ST_RETRIEVE);
1179 }
1180
1181 /*
1182  * last_op_in_netorder
1183  *
1184  * Returns whether last operation was made using network order.
1185  *
1186  * This is typically out-of-band information that might prove useful
1187  * to people wishing to convert native to network order data when used.
1188  */
1189 int last_op_in_netorder(void)
1190 {
1191         dSTCXT;
1192
1193         return cxt->netorder;
1194 }
1195
1196 /***
1197  *** Hook lookup and calling routines.
1198  ***/
1199
1200 /*
1201  * pkg_fetchmeth
1202  *
1203  * A wrapper on gv_fetchmethod_autoload() which caches results.
1204  *
1205  * Returns the routine reference as an SV*, or null if neither the package
1206  * nor its ancestors know about the method.
1207  */
1208 static SV *pkg_fetchmeth(
1209         HV *cache,
1210         HV *pkg,
1211         char *method)
1212 {
1213         GV *gv;
1214         SV *sv;
1215         SV **svh;
1216
1217         /*
1218          * The following code is the same as the one performed by UNIVERSAL::can
1219          * in the Perl core.
1220          */
1221
1222         gv = gv_fetchmethod_autoload(pkg, method, FALSE);
1223         if (gv && isGV(gv)) {
1224                 sv = newRV((SV*) GvCV(gv));
1225                 TRACEME(("%s->%s: 0x%"UVxf, HvNAME(pkg), method, PTR2UV(sv)));
1226         } else {
1227                 sv = newSVsv(&PL_sv_undef);
1228                 TRACEME(("%s->%s: not found", HvNAME(pkg), method));
1229         }
1230
1231         /*
1232          * Cache the result, ignoring failure: if we can't store the value,
1233          * it just won't be cached.
1234          */
1235
1236         (void) hv_store(cache, HvNAME(pkg), strlen(HvNAME(pkg)), sv, 0);
1237
1238         return SvOK(sv) ? sv : (SV *) 0;
1239 }
1240
1241 /*
1242  * pkg_hide
1243  *
1244  * Force cached value to be undef: hook ignored even if present.
1245  */
1246 static void pkg_hide(
1247         HV *cache,
1248         HV *pkg,
1249         char *method)
1250 {
1251         (void) hv_store(cache,
1252                 HvNAME(pkg), strlen(HvNAME(pkg)), newSVsv(&PL_sv_undef), 0);
1253 }
1254
1255 /*
1256  * pkg_uncache
1257  *
1258  * Discard cached value: a whole fetch loop will be retried at next lookup.
1259  */
1260 static void pkg_uncache(
1261         HV *cache,
1262         HV *pkg,
1263         char *method)
1264 {
1265         (void) hv_delete(cache, HvNAME(pkg), strlen(HvNAME(pkg)), G_DISCARD);
1266 }
1267
1268 /*
1269  * pkg_can
1270  *
1271  * Our own "UNIVERSAL::can", which caches results.
1272  *
1273  * Returns the routine reference as an SV*, or null if the object does not
1274  * know about the method.
1275  */
1276 static SV *pkg_can(
1277         HV *cache,
1278         HV *pkg,
1279         char *method)
1280 {
1281         SV **svh;
1282         SV *sv;
1283
1284         TRACEME(("pkg_can for %s->%s", HvNAME(pkg), method));
1285
1286         /*
1287          * Look into the cache to see whether we already have determined
1288          * where the routine was, if any.
1289          *
1290          * NOTA BENE: we don't use `method' at all in our lookup, since we know
1291          * that only one hook (i.e. always the same) is cached in a given cache.
1292          */
1293
1294         svh = hv_fetch(cache, HvNAME(pkg), strlen(HvNAME(pkg)), FALSE);
1295         if (svh) {
1296                 sv = *svh;
1297                 if (!SvOK(sv)) {
1298                         TRACEME(("cached %s->%s: not found", HvNAME(pkg), method));
1299                         return (SV *) 0;
1300                 } else {
1301                         TRACEME(("cached %s->%s: 0x%"UVxf,
1302                                 HvNAME(pkg), method, PTR2UV(sv)));
1303                         return sv;
1304                 }
1305         }
1306
1307         TRACEME(("not cached yet"));
1308         return pkg_fetchmeth(cache, pkg, method);               /* Fetch and cache */
1309 }
1310
1311 /*
1312  * scalar_call
1313  *
1314  * Call routine as obj->hook(av) in scalar context.
1315  * Propagates the single returned value if not called in void context.
1316  */
1317 static SV *scalar_call(
1318         SV *obj,
1319         SV *hook,
1320         int cloning,
1321         AV *av,
1322         I32 flags)
1323 {
1324         dSP;
1325         int count;
1326         SV *sv = 0;
1327
1328         TRACEME(("scalar_call (cloning=%d)", cloning));
1329
1330         ENTER;
1331         SAVETMPS;
1332
1333         PUSHMARK(sp);
1334         XPUSHs(obj);
1335         XPUSHs(sv_2mortal(newSViv(cloning)));           /* Cloning flag */
1336         if (av) {
1337                 SV **ary = AvARRAY(av);
1338                 int cnt = AvFILLp(av) + 1;
1339                 int i;
1340                 XPUSHs(ary[0]);                                                 /* Frozen string */
1341                 for (i = 1; i < cnt; i++) {
1342                         TRACEME(("pushing arg #%d (0x%"UVxf")...",
1343                                  i, PTR2UV(ary[i])));
1344                         XPUSHs(sv_2mortal(newRV(ary[i])));
1345                 }
1346         }
1347         PUTBACK;
1348
1349         TRACEME(("calling..."));
1350         count = perl_call_sv(hook, flags);              /* Go back to Perl code */
1351         TRACEME(("count = %d", count));
1352
1353         SPAGAIN;
1354
1355         if (count) {
1356                 sv = POPs;
1357                 SvREFCNT_inc(sv);               /* We're returning it, must stay alive! */
1358         }
1359
1360         PUTBACK;
1361         FREETMPS;
1362         LEAVE;
1363
1364         return sv;
1365 }
1366
1367 /*
1368  * array_call
1369  *
1370  * Call routine obj->hook(cloning) in list context.
1371  * Returns the list of returned values in an array.
1372  */
1373 static AV *array_call(
1374         SV *obj,
1375         SV *hook,
1376         int cloning)
1377 {
1378         dSP;
1379         int count;
1380         AV *av;
1381         int i;
1382
1383         TRACEME(("array_call (cloning=%d)", cloning));
1384
1385         ENTER;
1386         SAVETMPS;
1387
1388         PUSHMARK(sp);
1389         XPUSHs(obj);                                                            /* Target object */
1390         XPUSHs(sv_2mortal(newSViv(cloning)));           /* Cloning flag */
1391         PUTBACK;
1392
1393         count = perl_call_sv(hook, G_ARRAY);            /* Go back to Perl code */
1394
1395         SPAGAIN;
1396
1397         av = newAV();
1398         for (i = count - 1; i >= 0; i--) {
1399                 SV *sv = POPs;
1400                 av_store(av, i, SvREFCNT_inc(sv));
1401         }
1402
1403         PUTBACK;
1404         FREETMPS;
1405         LEAVE;
1406
1407         return av;
1408 }
1409
1410 /*
1411  * known_class
1412  *
1413  * Lookup the class name in the `hclass' table and either assign it a new ID
1414  * or return the existing one, by filling in `classnum'.
1415  *
1416  * Return true if the class was known, false if the ID was just generated.
1417  */
1418 static int known_class(
1419         stcxt_t *cxt,
1420         char *name,             /* Class name */
1421         int len,                /* Name length */
1422         I32 *classnum)
1423 {
1424         SV **svh;
1425         HV *hclass = cxt->hclass;
1426
1427         TRACEME(("known_class (%s)", name));
1428
1429         /*
1430          * Recall that we don't store pointers in this hash table, but tags.
1431          * Therefore, we need LOW_32BITS() to extract the relevant parts.
1432          */
1433
1434         svh = hv_fetch(hclass, name, len, FALSE);
1435         if (svh) {
1436                 *classnum = LOW_32BITS(*svh);
1437                 return TRUE;
1438         }
1439
1440         /*
1441          * Unknown classname, we need to record it.
1442          */
1443
1444         cxt->classnum++;
1445         if (!hv_store(hclass, name, len, INT2PTR(SV*, cxt->classnum), 0))
1446                 CROAK(("Unable to record new classname"));
1447
1448         *classnum = cxt->classnum;
1449         return FALSE;
1450 }
1451
1452 /***
1453  *** Sepcific store routines.
1454  ***/
1455
1456 /*
1457  * store_ref
1458  *
1459  * Store a reference.
1460  * Layout is SX_REF <object> or SX_OVERLOAD <object>.
1461  */
1462 static int store_ref(stcxt_t *cxt, SV *sv)
1463 {
1464         TRACEME(("store_ref (0x%"UVxf")", PTR2UV(sv)));
1465
1466         /*
1467          * Follow reference, and check if target is overloaded.
1468          */
1469
1470         sv = SvRV(sv);
1471
1472         if (SvOBJECT(sv)) {
1473                 HV *stash = (HV *) SvSTASH(sv);
1474                 if (stash && Gv_AMG(stash)) {
1475                         TRACEME(("ref (0x%"UVxf") is overloaded", PTR2UV(sv)));
1476                         PUTMARK(SX_OVERLOAD);
1477                 } else
1478                         PUTMARK(SX_REF);
1479         } else
1480                 PUTMARK(SX_REF);
1481
1482         return store(cxt, sv);
1483 }
1484
1485 /*
1486  * store_scalar
1487  *
1488  * Store a scalar.
1489  *
1490  * Layout is SX_LSCALAR <length> <data>, SX_SCALAR <lenght> <data> or SX_UNDEF.
1491  * The <data> section is omitted if <length> is 0.
1492  *
1493  * If integer or double, the layout is SX_INTEGER <data> or SX_DOUBLE <data>.
1494  * Small integers (within [-127, +127]) are stored as SX_BYTE <byte>.
1495  */
1496 static int store_scalar(stcxt_t *cxt, SV *sv)
1497 {
1498         IV iv;
1499         char *pv;
1500         STRLEN len;
1501         U32 flags = SvFLAGS(sv);                        /* "cc -O" may put it in register */
1502
1503         TRACEME(("store_scalar (0x%"UVxf")", PTR2UV(sv)));
1504
1505         /*
1506          * For efficiency, break the SV encapsulation by peaking at the flags
1507          * directly without using the Perl macros to avoid dereferencing
1508          * sv->sv_flags each time we wish to check the flags.
1509          */
1510
1511         if (!(flags & SVf_OK)) {                        /* !SvOK(sv) */
1512                 if (sv == &PL_sv_undef) {
1513                         TRACEME(("immortal undef"));
1514                         PUTMARK(SX_SV_UNDEF);
1515                 } else {
1516                         TRACEME(("undef at 0x%"UVxf, PTR2UV(sv)));
1517                         PUTMARK(SX_UNDEF);
1518                 }
1519                 return 0;
1520         }
1521
1522         /*
1523          * Always store the string representation of a scalar if it exists.
1524          * Gisle Aas provided me with this test case, better than a long speach:
1525          *
1526          *  perl -MDevel::Peek -le '$a="abc"; $a+0; Dump($a)'
1527          *  SV = PVNV(0x80c8520)
1528          *       REFCNT = 1
1529          *       FLAGS = (NOK,POK,pNOK,pPOK)
1530          *       IV = 0
1531          *       NV = 0
1532          *       PV = 0x80c83d0 "abc"\0
1533          *       CUR = 3
1534          *       LEN = 4
1535          *
1536          * Write SX_SCALAR, length, followed by the actual data.
1537          *
1538          * Otherwise, write an SX_BYTE, SX_INTEGER or an SX_DOUBLE as
1539          * appropriate, followed by the actual (binary) data. A double
1540          * is written as a string if network order, for portability.
1541          *
1542          * NOTE: instead of using SvNOK(sv), we test for SvNOKp(sv).
1543          * The reason is that when the scalar value is tainted, the SvNOK(sv)
1544          * value is false.
1545          *
1546          * The test for a read-only scalar with both POK and NOK set is meant
1547          * to quickly detect &PL_sv_yes and &PL_sv_no without having to pay the
1548          * address comparison for each scalar we store.
1549          */
1550
1551 #define SV_MAYBE_IMMORTAL (SVf_READONLY|SVf_POK|SVf_NOK)
1552
1553         if ((flags & SV_MAYBE_IMMORTAL) == SV_MAYBE_IMMORTAL) {
1554                 if (sv == &PL_sv_yes) {
1555                         TRACEME(("immortal yes"));
1556                         PUTMARK(SX_SV_YES);
1557                 } else if (sv == &PL_sv_no) {
1558                         TRACEME(("immortal no"));
1559                         PUTMARK(SX_SV_NO);
1560                 } else {
1561                         pv = SvPV(sv, len);                     /* We know it's SvPOK */
1562                         goto string;                            /* Share code below */
1563                 }
1564         } else if (flags & SVp_POK) {           /* SvPOKp(sv) => string */
1565                 I32 wlen;                                               /* For 64-bit machines */
1566                 pv = SvPV(sv, len);
1567
1568                 /*
1569                  * Will come here from below with pv and len set if double & netorder,
1570                  * or from above if it was readonly, POK and NOK but neither &PL_sv_yes
1571                  * nor &PL_sv_no.
1572                  */
1573         string:
1574
1575                 wlen = (I32) len;                               /* WLEN via STORE_SCALAR expects I32 */
1576                 if (SvUTF8 (sv))
1577                         STORE_UTF8STR(pv, wlen);
1578                 else
1579                         STORE_SCALAR(pv, wlen);
1580                 TRACEME(("ok (scalar 0x%"UVxf" '%s', length = %"IVdf")",
1581                          PTR2UV(sv), SvPVX(sv), (IV)len));
1582
1583         } else if (flags & SVp_NOK) {           /* SvNOKp(sv) => double */
1584                 NV nv = SvNV(sv);
1585
1586                 /*
1587                  * Watch for number being an integer in disguise.
1588                  */
1589                 if (nv == (NV) (iv = I_V(nv))) {
1590                         TRACEME(("double %"NVff" is actually integer %"IVdf, nv, iv));
1591                         goto integer;           /* Share code below */
1592                 }
1593
1594                 if (cxt->netorder) {
1595                         TRACEME(("double %"NVff" stored as string", nv));
1596                         pv = SvPV(sv, len);
1597                         goto string;            /* Share code above */
1598                 }
1599
1600                 PUTMARK(SX_DOUBLE);
1601                 WRITE(&nv, sizeof(nv));
1602
1603                 TRACEME(("ok (double 0x%"UVxf", value = %"NVff")", PTR2UV(sv), nv));
1604
1605         } else if (flags & SVp_IOK) {           /* SvIOKp(sv) => integer */
1606                 iv = SvIV(sv);
1607
1608                 /*
1609                  * Will come here from above with iv set if double is an integer.
1610                  */
1611         integer:
1612
1613                 /*
1614                  * Optimize small integers into a single byte, otherwise store as
1615                  * a real integer (converted into network order if they asked).
1616                  */
1617
1618                 if (iv >= -128 && iv <= 127) {
1619                         unsigned char siv = (unsigned char) (iv + 128); /* [0,255] */
1620                         PUTMARK(SX_BYTE);
1621                         PUTMARK(siv);
1622                         TRACEME(("small integer stored as %d", siv));
1623                 } else if (cxt->netorder) {
1624                         I32 niv;
1625 #ifdef HAS_HTONL
1626                         niv = (I32) htonl(iv);
1627                         TRACEME(("using network order"));
1628 #else
1629                         niv = (I32) iv;
1630                         TRACEME(("as-is for network order"));
1631 #endif
1632                         PUTMARK(SX_NETINT);
1633                         WRITE_I32(niv);
1634                 } else {
1635                         PUTMARK(SX_INTEGER);
1636                         WRITE(&iv, sizeof(iv));
1637                 }
1638
1639                 TRACEME(("ok (integer 0x%"UVxf", value = %"IVdf")", PTR2UV(sv), iv));
1640
1641         } else
1642                 CROAK(("Can't determine type of %s(0x%"UVxf")",
1643                        sv_reftype(sv, FALSE),
1644                        PTR2UV(sv)));
1645
1646         return 0;               /* Ok, no recursion on scalars */
1647 }
1648
1649 /*
1650  * store_array
1651  *
1652  * Store an array.
1653  *
1654  * Layout is SX_ARRAY <size> followed by each item, in increading index order.
1655  * Each item is stored as <object>.
1656  */
1657 static int store_array(stcxt_t *cxt, AV *av)
1658 {
1659         SV **sav;
1660         I32 len = av_len(av) + 1;
1661         I32 i;
1662         int ret;
1663
1664         TRACEME(("store_array (0x%"UVxf")", PTR2UV(av)));
1665
1666         /*
1667          * Signal array by emitting SX_ARRAY, followed by the array length.
1668          */
1669
1670         PUTMARK(SX_ARRAY);
1671         WLEN(len);
1672         TRACEME(("size = %d", len));
1673
1674         /*
1675          * Now store each item recursively.
1676          */
1677
1678         for (i = 0; i < len; i++) {
1679                 sav = av_fetch(av, i, 0);
1680                 if (!sav) {
1681                         TRACEME(("(#%d) undef item", i));
1682                         STORE_UNDEF();
1683                         continue;
1684                 }
1685                 TRACEME(("(#%d) item", i));
1686                 if (ret = store(cxt, *sav))
1687                         return ret;
1688         }
1689
1690         TRACEME(("ok (array)"));
1691
1692         return 0;
1693 }
1694
1695 /*
1696  * sortcmp
1697  *
1698  * Sort two SVs
1699  * Borrowed from perl source file pp_ctl.c, where it is used by pp_sort.
1700  */
1701 static int
1702 sortcmp(const void *a, const void *b)
1703 {
1704         return sv_cmp(*(SV * const *) a, *(SV * const *) b);
1705 }
1706
1707
1708 /*
1709  * store_hash
1710  *
1711  * Store an hash table.
1712  *
1713  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
1714  * Values are stored as <object>.
1715  * Keys are stored as <length> <data>, the <data> section being omitted
1716  * if length is 0.
1717  */
1718 static int store_hash(stcxt_t *cxt, HV *hv)
1719 {
1720         I32 len = HvKEYS(hv);
1721         I32 i;
1722         int ret = 0;
1723         I32 riter;
1724         HE *eiter;
1725
1726         TRACEME(("store_hash (0x%"UVxf")", PTR2UV(hv)));
1727
1728         /*
1729          * Signal hash by emitting SX_HASH, followed by the table length.
1730          */
1731
1732         PUTMARK(SX_HASH);
1733         WLEN(len);
1734         TRACEME(("size = %d", len));
1735
1736         /*
1737          * Save possible iteration state via each() on that table.
1738          */
1739
1740         riter = HvRITER(hv);
1741         eiter = HvEITER(hv);
1742         hv_iterinit(hv);
1743
1744         /*
1745          * Now store each item recursively.
1746          *
1747      * If canonical is defined to some true value then store each
1748      * key/value pair in sorted order otherwise the order is random.
1749          * Canonical order is irrelevant when a deep clone operation is performed.
1750          *
1751          * Fetch the value from perl only once per store() operation, and only
1752          * when needed.
1753          */
1754
1755         if (
1756                 !(cxt->optype & ST_CLONE) && (cxt->canonical == 1 ||
1757                 (cxt->canonical < 0 && (cxt->canonical =
1758                         SvTRUE(perl_get_sv("Storable::canonical", TRUE)) ? 1 : 0)))
1759         ) {
1760                 /*
1761                  * Storing in order, sorted by key.
1762                  * Run through the hash, building up an array of keys in a
1763                  * mortal array, sort the array and then run through the
1764                  * array.
1765                  */
1766
1767                 AV *av = newAV();
1768
1769                 TRACEME(("using canonical order"));
1770
1771                 for (i = 0; i < len; i++) {
1772                         HE *he = hv_iternext(hv);
1773                         SV *key = hv_iterkeysv(he);
1774                         av_store(av, AvFILLp(av)+1, key);       /* av_push(), really */
1775                 }
1776
1777                 qsort((char *) AvARRAY(av), len, sizeof(SV *), sortcmp);
1778
1779                 for (i = 0; i < len; i++) {
1780                         char *keyval;
1781                         I32 keylen;
1782                         SV *key = av_shift(av);
1783                         HE *he  = hv_fetch_ent(hv, key, 0, 0);
1784                         SV *val = HeVAL(he);
1785                         if (val == 0)
1786                                 return 1;               /* Internal error, not I/O error */
1787
1788                         /*
1789                          * Store value first.
1790                          */
1791
1792                         TRACEME(("(#%d) value 0x%"UVxf, i, PTR2UV(val)));
1793
1794                         if (ret = store(cxt, val))
1795                                 goto out;
1796
1797                         /*
1798                          * Write key string.
1799                          * Keys are written after values to make sure retrieval
1800                          * can be optimal in terms of memory usage, where keys are
1801                          * read into a fixed unique buffer called kbuf.
1802                          * See retrieve_hash() for details.
1803                          */
1804
1805                         keyval = hv_iterkey(he, &keylen);
1806                         TRACEME(("(#%d) key '%s'", i, keyval));
1807                         WLEN(keylen);
1808                         if (keylen)
1809                                 WRITE(keyval, keylen);
1810                 }
1811
1812                 /*
1813                  * Free up the temporary array
1814                  */
1815
1816                 av_undef(av);
1817                 sv_free((SV *) av);
1818
1819         } else {
1820
1821                 /*
1822                  * Storing in "random" order (in the order the keys are stored
1823                  * within the the hash).  This is the default and will be faster!
1824                  */
1825
1826                 for (i = 0; i < len; i++) {
1827                         char *key;
1828                         I32 len;
1829                         SV *val = hv_iternextsv(hv, &key, &len);
1830
1831                         if (val == 0)
1832                                 return 1;               /* Internal error, not I/O error */
1833
1834                         /*
1835                          * Store value first.
1836                          */
1837
1838                         TRACEME(("(#%d) value 0x%"UVxf, i, PTR2UV(val)));
1839
1840                         if (ret = store(cxt, val))
1841                                 goto out;
1842
1843                         /*
1844                          * Write key string.
1845                          * Keys are written after values to make sure retrieval
1846                          * can be optimal in terms of memory usage, where keys are
1847                          * read into a fixed unique buffer called kbuf.
1848                          * See retrieve_hash() for details.
1849                          */
1850
1851                         TRACEME(("(#%d) key '%s'", i, key));
1852                         WLEN(len);
1853                         if (len)
1854                                 WRITE(key, len);
1855                 }
1856     }
1857
1858         TRACEME(("ok (hash 0x%"UVxf")", PTR2UV(hv)));
1859
1860 out:
1861         HvRITER(hv) = riter;            /* Restore hash iterator state */
1862         HvEITER(hv) = eiter;
1863
1864         return ret;
1865 }
1866
1867 /*
1868  * store_tied
1869  *
1870  * When storing a tied object (be it a tied scalar, array or hash), we lay out
1871  * a special mark, followed by the underlying tied object. For instance, when
1872  * dealing with a tied hash, we store SX_TIED_HASH <hash object>, where
1873  * <hash object> stands for the serialization of the tied hash.
1874  */
1875 static int store_tied(stcxt_t *cxt, SV *sv)
1876 {
1877         MAGIC *mg;
1878         int ret = 0;
1879         int svt = SvTYPE(sv);
1880         char mtype = 'P';
1881
1882         TRACEME(("store_tied (0x%"UVxf")", PTR2UV(sv)));
1883
1884         /*
1885          * We have a small run-time penalty here because we chose to factorise
1886          * all tieds objects into the same routine, and not have a store_tied_hash,
1887          * a store_tied_array, etc...
1888          *
1889          * Don't use a switch() statement, as most compilers don't optimize that
1890          * well for 2/3 values. An if() else if() cascade is just fine. We put
1891          * tied hashes first, as they are the most likely beasts.
1892          */
1893
1894         if (svt == SVt_PVHV) {
1895                 TRACEME(("tied hash"));
1896                 PUTMARK(SX_TIED_HASH);                  /* Introduces tied hash */
1897         } else if (svt == SVt_PVAV) {
1898                 TRACEME(("tied array"));
1899                 PUTMARK(SX_TIED_ARRAY);                 /* Introduces tied array */
1900         } else {
1901                 TRACEME(("tied scalar"));
1902                 PUTMARK(SX_TIED_SCALAR);                /* Introduces tied scalar */
1903                 mtype = 'q';
1904         }
1905
1906         if (!(mg = mg_find(sv, mtype)))
1907                 CROAK(("No magic '%c' found while storing tied %s", mtype,
1908                         (svt == SVt_PVHV) ? "hash" :
1909                                 (svt == SVt_PVAV) ? "array" : "scalar"));
1910
1911         /*
1912          * The mg->mg_obj found by mg_find() above actually points to the
1913          * underlying tied Perl object implementation. For instance, if the
1914          * original SV was that of a tied array, then mg->mg_obj is an AV.
1915          *
1916          * Note that we store the Perl object as-is. We don't call its FETCH
1917          * method along the way. At retrieval time, we won't call its STORE
1918          * method either, but the tieing magic will be re-installed. In itself,
1919          * that ensures that the tieing semantics are preserved since futher
1920          * accesses on the retrieved object will indeed call the magic methods...
1921          */
1922
1923         if (ret = store(cxt, mg->mg_obj))
1924                 return ret;
1925
1926         TRACEME(("ok (tied)"));
1927
1928         return 0;
1929 }
1930
1931 /*
1932  * store_tied_item
1933  *
1934  * Stores a reference to an item within a tied structure:
1935  *
1936  *  . \$h{key}, stores both the (tied %h) object and 'key'.
1937  *  . \$a[idx], stores both the (tied @a) object and 'idx'.
1938  *
1939  * Layout is therefore either:
1940  *     SX_TIED_KEY <object> <key>
1941  *     SX_TIED_IDX <object> <index>
1942  */
1943 static int store_tied_item(stcxt_t *cxt, SV *sv)
1944 {
1945         MAGIC *mg;
1946         int ret;
1947
1948         TRACEME(("store_tied_item (0x%"UVxf")", PTR2UV(sv)));
1949
1950         if (!(mg = mg_find(sv, 'p')))
1951                 CROAK(("No magic 'p' found while storing reference to tied item"));
1952
1953         /*
1954          * We discriminate between \$h{key} and \$a[idx] via mg_ptr.
1955          */
1956
1957         if (mg->mg_ptr) {
1958                 TRACEME(("store_tied_item: storing a ref to a tied hash item"));
1959                 PUTMARK(SX_TIED_KEY);
1960                 TRACEME(("store_tied_item: storing OBJ 0x%"UVxf, PTR2UV(mg->mg_obj)));
1961
1962                 if (ret = store(cxt, mg->mg_obj))
1963                         return ret;
1964
1965                 TRACEME(("store_tied_item: storing PTR 0x%"UVxf, PTR2UV(mg->mg_ptr)));
1966
1967                 if (ret = store(cxt, (SV *) mg->mg_ptr))
1968                         return ret;
1969         } else {
1970                 I32 idx = mg->mg_len;
1971
1972                 TRACEME(("store_tied_item: storing a ref to a tied array item "));
1973                 PUTMARK(SX_TIED_IDX);
1974                 TRACEME(("store_tied_item: storing OBJ 0x%"UVxf, PTR2UV(mg->mg_obj)));
1975
1976                 if (ret = store(cxt, mg->mg_obj))
1977                         return ret;
1978
1979                 TRACEME(("store_tied_item: storing IDX %d", idx));
1980
1981                 WLEN(idx);
1982         }
1983
1984         TRACEME(("ok (tied item)"));
1985
1986         return 0;
1987 }
1988
1989 /*
1990  * store_hook           -- dispatched manually, not via sv_store[]
1991  *
1992  * The blessed SV is serialized by a hook.
1993  *
1994  * Simple Layout is:
1995  *
1996  *     SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
1997  *
1998  * where <flags> indicates how long <len>, <len2> and <len3> are, whether
1999  * the trailing part [] is present, the type of object (scalar, array or hash).
2000  * There is also a bit which says how the classname is stored between:
2001  *
2002  *     <len> <classname>
2003  *     <index>
2004  *
2005  * and when the <index> form is used (classname already seen), the "large
2006  * classname" bit in <flags> indicates how large the <index> is.
2007  *
2008  * The serialized string returned by the hook is of length <len2> and comes
2009  * next.  It is an opaque string for us.
2010  *
2011  * Those <len3> object IDs which are listed last represent the extra references
2012  * not directly serialized by the hook, but which are linked to the object.
2013  *
2014  * When recursion is mandated to resolve object-IDs not yet seen, we have
2015  * instead, with <header> being flags with bits set to indicate the object type
2016  * and that recursion was indeed needed:
2017  *
2018  *     SX_HOOK <header> <object> <header> <object> <flags>
2019  *
2020  * that same header being repeated between serialized objects obtained through
2021  * recursion, until we reach flags indicating no recursion, at which point
2022  * we know we've resynchronized with a single layout, after <flags>.
2023  */
2024 static int store_hook(
2025         stcxt_t *cxt,
2026         SV *sv,
2027         int type,
2028         HV *pkg,
2029         SV *hook)
2030 {
2031         I32 len;
2032         char *class;
2033         STRLEN len2;
2034         SV *ref;
2035         AV *av;
2036         SV **ary;
2037         int count;                              /* really len3 + 1 */
2038         unsigned char flags;
2039         char *pv;
2040         int i;
2041         int recursed = 0;               /* counts recursion */
2042         int obj_type;                   /* object type, on 2 bits */
2043         I32 classnum;
2044         int ret;
2045         int clone = cxt->optype & ST_CLONE;
2046
2047         TRACEME(("store_hook, class \"%s\", tagged #%d", HvNAME(pkg), cxt->tagnum));
2048
2049         /*
2050          * Determine object type on 2 bits.
2051          */
2052
2053         switch (type) {
2054         case svis_SCALAR:
2055                 obj_type = SHT_SCALAR;
2056                 break;
2057         case svis_ARRAY:
2058                 obj_type = SHT_ARRAY;
2059                 break;
2060         case svis_HASH:
2061                 obj_type = SHT_HASH;
2062                 break;
2063         default:
2064                 CROAK(("Unexpected object type (%d) in store_hook()", type));
2065         }
2066         flags = SHF_NEED_RECURSE | obj_type;
2067
2068         class = HvNAME(pkg);
2069         len = strlen(class);
2070
2071         /*
2072          * To call the hook, we need to fake a call like:
2073          *
2074          *    $object->STORABLE_freeze($cloning);
2075          *
2076          * but we don't have the $object here.  For instance, if $object is
2077          * a blessed array, what we have in `sv' is the array, and we can't
2078          * call a method on those.
2079          *
2080          * Therefore, we need to create a temporary reference to the object and
2081          * make the call on that reference.
2082          */
2083
2084         TRACEME(("about to call STORABLE_freeze on class %s", class));
2085
2086         ref = newRV_noinc(sv);                          /* Temporary reference */
2087         av = array_call(ref, hook, clone);      /* @a = $object->STORABLE_freeze($c) */
2088         SvRV(ref) = 0;
2089         SvREFCNT_dec(ref);                                      /* Reclaim temporary reference */
2090
2091         count = AvFILLp(av) + 1;
2092         TRACEME(("store_hook, array holds %d items", count));
2093
2094         /*
2095          * If they return an empty list, it means they wish to ignore the
2096          * hook for this class (and not just this instance -- that's for them
2097          * to handle if they so wish).
2098          *
2099          * Simply disable the cached entry for the hook (it won't be recomputed
2100          * since it's present in the cache) and recurse to store_blessed().
2101          */
2102
2103         if (!count) {
2104                 /*
2105                  * They must not change their mind in the middle of a serialization.
2106                  */
2107
2108                 if (hv_fetch(cxt->hclass, class, len, FALSE))
2109                         CROAK(("Too late to ignore hooks for %s class \"%s\"",
2110                                 (cxt->optype & ST_CLONE) ? "cloning" : "storing", class));
2111
2112                 pkg_hide(cxt->hook, pkg, "STORABLE_freeze");
2113
2114                 ASSERT(!pkg_can(cxt->hook, pkg, "STORABLE_freeze"), ("hook invisible"));
2115                 TRACEME(("ignoring STORABLE_freeze in class \"%s\"", class));
2116
2117                 return store_blessed(cxt, sv, type, pkg);
2118         }
2119
2120         /*
2121          * Get frozen string.
2122          */
2123
2124         ary = AvARRAY(av);
2125         pv = SvPV(ary[0], len2);
2126
2127         /*
2128          * If they returned more than one item, we need to serialize some
2129          * extra references if not already done.
2130          *
2131          * Loop over the array, starting at postion #1, and for each item,
2132          * ensure it is a reference, serialize it if not already done, and
2133          * replace the entry with the tag ID of the corresponding serialized
2134          * object.
2135          *
2136          * We CHEAT by not calling av_fetch() and read directly within the
2137          * array, for speed.
2138          */
2139
2140         for (i = 1; i < count; i++) {
2141                 SV **svh;
2142                 SV *rsv = ary[i];
2143                 SV *xsv;
2144                 AV *av_hook = cxt->hook_seen;
2145
2146                 if (!SvROK(rsv))
2147                         CROAK(("Item #%d returned by STORABLE_freeze "
2148                                 "for %s is not a reference", i, class));
2149                 xsv = SvRV(rsv);                /* Follow ref to know what to look for */
2150
2151                 /*
2152                  * Look in hseen and see if we have a tag already.
2153                  * Serialize entry if not done already, and get its tag.
2154                  */
2155
2156                 if (svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE))
2157                         goto sv_seen;           /* Avoid moving code too far to the right */
2158
2159                 TRACEME(("listed object %d at 0x%"UVxf" is unknown", i-1, PTR2UV(xsv)));
2160
2161                 /*
2162                  * We need to recurse to store that object and get it to be known
2163                  * so that we can resolve the list of object-IDs at retrieve time.
2164                  *
2165                  * The first time we do this, we need to emit the proper header
2166                  * indicating that we recursed, and what the type of object is (the
2167                  * object we're storing via a user-hook).  Indeed, during retrieval,
2168                  * we'll have to create the object before recursing to retrieve the
2169                  * others, in case those would point back at that object.
2170                  */
2171
2172                 /* [SX_HOOK] <flags> <object>*/
2173                 if (!recursed++)
2174                         PUTMARK(SX_HOOK);
2175                 PUTMARK(flags);
2176
2177                 if (ret = store(cxt, xsv))              /* Given by hook for us to store */
2178                         return ret;
2179
2180                 svh = hv_fetch(cxt->hseen, (char *) &xsv, sizeof(xsv), FALSE);
2181                 if (!svh)
2182                         CROAK(("Could not serialize item #%d from hook in %s", i, class));
2183
2184                 /*
2185                  * It was the first time we serialized `xsv'.
2186                  *
2187                  * Keep this SV alive until the end of the serialization: if we
2188                  * disposed of it right now by decrementing its refcount, and it was
2189                  * a temporary value, some next temporary value allocated during
2190                  * another STORABLE_freeze might take its place, and we'd wrongly
2191                  * assume that new SV was already serialized, based on its presence
2192                  * in cxt->hseen.
2193                  *
2194                  * Therefore, push it away in cxt->hook_seen.
2195                  */
2196
2197                 av_store(av_hook, AvFILLp(av_hook)+1, SvREFCNT_inc(xsv));
2198
2199         sv_seen:
2200                 /*
2201                  * Dispose of the REF they returned.  If we saved the `xsv' away
2202                  * in the array of returned SVs, that will not cause the underlying
2203                  * referenced SV to be reclaimed.
2204                  */
2205
2206                 ASSERT(SvREFCNT(xsv) > 1, ("SV will survive disposal of its REF"));
2207                 SvREFCNT_dec(rsv);                      /* Dispose of reference */
2208
2209                 /*
2210                  * Replace entry with its tag (not a real SV, so no refcnt increment)
2211                  */
2212
2213                 ary[i] = *svh;
2214                 TRACEME(("listed object %d at 0x%"UVxf" is tag #%"UVuf,
2215                          i-1, PTR2UV(xsv), PTR2UV(*svh)));
2216         }
2217
2218         /*
2219          * Allocate a class ID if not already done.
2220          *
2221          * This needs to be done after the recursion above, since at retrieval
2222          * time, we'll see the inner objects first.  Many thanks to
2223          * Salvador Ortiz Garcia <sog@msg.com.mx> who spot that bug and
2224          * proposed the right fix.  -- RAM, 15/09/2000
2225          */
2226
2227         if (!known_class(cxt, class, len, &classnum)) {
2228                 TRACEME(("first time we see class %s, ID = %d", class, classnum));
2229                 classnum = -1;                          /* Mark: we must store classname */
2230         } else {
2231                 TRACEME(("already seen class %s, ID = %d", class, classnum));
2232         }
2233
2234         /*
2235          * Compute leading flags.
2236          */
2237
2238         flags = obj_type;
2239         if (((classnum == -1) ? len : classnum) > LG_SCALAR)
2240                 flags |= SHF_LARGE_CLASSLEN;
2241         if (classnum != -1)
2242                 flags |= SHF_IDX_CLASSNAME;
2243         if (len2 > LG_SCALAR)
2244                 flags |= SHF_LARGE_STRLEN;
2245         if (count > 1)
2246                 flags |= SHF_HAS_LIST;
2247         if (count > (LG_SCALAR + 1))
2248                 flags |= SHF_LARGE_LISTLEN;
2249
2250         /*
2251          * We're ready to emit either serialized form:
2252          *
2253          *   SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
2254          *   SX_HOOK <flags> <index>           <len2> <str> [<len3> <object-IDs>]
2255          *
2256          * If we recursed, the SX_HOOK has already been emitted.
2257          */
2258
2259         TRACEME(("SX_HOOK (recursed=%d) flags=0x%x "
2260                         "class=%"IVdf" len=%"IVdf" len2=%"IVdf" len3=%d",
2261                  recursed, flags, (IV)classnum, (IV)len, (IV)len2, count-1));
2262
2263         /* SX_HOOK <flags> */
2264         if (!recursed)
2265                 PUTMARK(SX_HOOK);
2266         PUTMARK(flags);
2267
2268         /* <len> <classname> or <index> */
2269         if (flags & SHF_IDX_CLASSNAME) {
2270                 if (flags & SHF_LARGE_CLASSLEN)
2271                         WLEN(classnum);
2272                 else {
2273                         unsigned char cnum = (unsigned char) classnum;
2274                         PUTMARK(cnum);
2275                 }
2276         } else {
2277                 if (flags & SHF_LARGE_CLASSLEN)
2278                         WLEN(len);
2279                 else {
2280                         unsigned char clen = (unsigned char) len;
2281                         PUTMARK(clen);
2282                 }
2283                 WRITE(class, len);              /* Final \0 is omitted */
2284         }
2285
2286         /* <len2> <frozen-str> */
2287         if (flags & SHF_LARGE_STRLEN) {
2288                 I32 wlen2 = len2;               /* STRLEN might be 8 bytes */
2289                 WLEN(wlen2);                    /* Must write an I32 for 64-bit machines */
2290         } else {
2291                 unsigned char clen = (unsigned char) len2;
2292                 PUTMARK(clen);
2293         }
2294         if (len2)
2295                 WRITE(pv, len2);        /* Final \0 is omitted */
2296
2297         /* [<len3> <object-IDs>] */
2298         if (flags & SHF_HAS_LIST) {
2299                 int len3 = count - 1;
2300                 if (flags & SHF_LARGE_LISTLEN)
2301                         WLEN(len3);
2302                 else {
2303                         unsigned char clen = (unsigned char) len3;
2304                         PUTMARK(clen);
2305                 }
2306
2307                 /*
2308                  * NOTA BENE, for 64-bit machines: the ary[i] below does not yield a
2309                  * real pointer, rather a tag number, well under the 32-bit limit.
2310                  */
2311
2312                 for (i = 1; i < count; i++) {
2313                         I32 tagval = htonl(LOW_32BITS(ary[i]));
2314                         WRITE_I32(tagval);
2315                         TRACEME(("object %d, tag #%d", i-1, ntohl(tagval)));
2316                 }
2317         }
2318
2319         /*
2320          * Free the array.  We need extra care for indices after 0, since they
2321          * don't hold real SVs but integers cast.
2322          */
2323
2324         if (count > 1)
2325                 AvFILLp(av) = 0;        /* Cheat, nothing after 0 interests us */
2326         av_undef(av);
2327         sv_free((SV *) av);
2328
2329         return 0;
2330 }
2331
2332 /*
2333  * store_blessed        -- dispatched manually, not via sv_store[]
2334  *
2335  * Check whether there is a STORABLE_xxx hook defined in the class or in one
2336  * of its ancestors.  If there is, then redispatch to store_hook();
2337  *
2338  * Otherwise, the blessed SV is stored using the following layout:
2339  *
2340  *    SX_BLESS <flag> <len> <classname> <object>
2341  *
2342  * where <flag> indicates whether <len> is stored on 0 or 4 bytes, depending
2343  * on the high-order bit in flag: if 1, then length follows on 4 bytes.
2344  * Otherwise, the low order bits give the length, thereby giving a compact
2345  * representation for class names less than 127 chars long.
2346  *
2347  * Each <classname> seen is remembered and indexed, so that the next time
2348  * an object in the blessed in the same <classname> is stored, the following
2349  * will be emitted:
2350  *
2351  *    SX_IX_BLESS <flag> <index> <object>
2352  *
2353  * where <index> is the classname index, stored on 0 or 4 bytes depending
2354  * on the high-order bit in flag (same encoding as above for <len>).
2355  */
2356 static int store_blessed(
2357         stcxt_t *cxt,
2358         SV *sv,
2359         int type,
2360         HV *pkg)
2361 {
2362         SV *hook;
2363         I32 len;
2364         char *class;
2365         I32 classnum;
2366
2367         TRACEME(("store_blessed, type %d, class \"%s\"", type, HvNAME(pkg)));
2368
2369         /*
2370          * Look for a hook for this blessed SV and redirect to store_hook()
2371          * if needed.
2372          */
2373
2374         hook = pkg_can(cxt->hook, pkg, "STORABLE_freeze");
2375         if (hook)
2376                 return store_hook(cxt, sv, type, pkg, hook);
2377
2378         /*
2379          * This is a blessed SV without any serialization hook.
2380          */
2381
2382         class = HvNAME(pkg);
2383         len = strlen(class);
2384
2385         TRACEME(("blessed 0x%"UVxf" in %s, no hook: tagged #%d",
2386                  PTR2UV(sv), class, cxt->tagnum));
2387
2388         /*
2389          * Determine whether it is the first time we see that class name (in which
2390          * case it will be stored in the SX_BLESS form), or whether we already
2391          * saw that class name before (in which case the SX_IX_BLESS form will be
2392          * used).
2393          */
2394
2395         if (known_class(cxt, class, len, &classnum)) {
2396                 TRACEME(("already seen class %s, ID = %d", class, classnum));
2397                 PUTMARK(SX_IX_BLESS);
2398                 if (classnum <= LG_BLESS) {
2399                         unsigned char cnum = (unsigned char) classnum;
2400                         PUTMARK(cnum);
2401                 } else {
2402                         unsigned char flag = (unsigned char) 0x80;
2403                         PUTMARK(flag);
2404                         WLEN(classnum);
2405                 }
2406         } else {
2407                 TRACEME(("first time we see class %s, ID = %d", class, classnum));
2408                 PUTMARK(SX_BLESS);
2409                 if (len <= LG_BLESS) {
2410                         unsigned char clen = (unsigned char) len;
2411                         PUTMARK(clen);
2412                 } else {
2413                         unsigned char flag = (unsigned char) 0x80;
2414                         PUTMARK(flag);
2415                         WLEN(len);                                      /* Don't BER-encode, this should be rare */
2416                 }
2417                 WRITE(class, len);                              /* Final \0 is omitted */
2418         }
2419
2420         /*
2421          * Now emit the <object> part.
2422          */
2423
2424         return SV_STORE(type)(cxt, sv);
2425 }
2426
2427 /*
2428  * store_other
2429  *
2430  * We don't know how to store the item we reached, so return an error condition.
2431  * (it's probably a GLOB, some CODE reference, etc...)
2432  *
2433  * If they defined the `forgive_me' variable at the Perl level to some
2434  * true value, then don't croak, just warn, and store a placeholder string
2435  * instead.
2436  */
2437 static int store_other(stcxt_t *cxt, SV *sv)
2438 {
2439         I32 len;
2440         static char buf[80];
2441
2442         TRACEME(("store_other"));
2443
2444         /*
2445          * Fetch the value from perl only once per store() operation.
2446          */
2447
2448         if (
2449                 cxt->forgive_me == 0 ||
2450                 (cxt->forgive_me < 0 && !(cxt->forgive_me =
2451                         SvTRUE(perl_get_sv("Storable::forgive_me", TRUE)) ? 1 : 0))
2452         )
2453                 CROAK(("Can't store %s items", sv_reftype(sv, FALSE)));
2454
2455         warn("Can't store item %s(0x%"UVxf")",
2456                 sv_reftype(sv, FALSE), PTR2UV(sv));
2457
2458         /*
2459          * Store placeholder string as a scalar instead...
2460          */
2461
2462         (void) sprintf(buf, "You lost %s(0x%"UVxf")\0", sv_reftype(sv, FALSE),
2463                        PTR2UV(sv));
2464
2465         len = strlen(buf);
2466         STORE_SCALAR(buf, len);
2467         TRACEME(("ok (dummy \"%s\", length = %"IVdf")", buf, len));
2468
2469         return 0;
2470 }
2471
2472 /***
2473  *** Store driving routines
2474  ***/
2475
2476 /*
2477  * sv_type
2478  *
2479  * WARNING: partially duplicates Perl's sv_reftype for speed.
2480  *
2481  * Returns the type of the SV, identified by an integer. That integer
2482  * may then be used to index the dynamic routine dispatch table.
2483  */
2484 static int sv_type(SV *sv)
2485 {
2486         switch (SvTYPE(sv)) {
2487         case SVt_NULL:
2488         case SVt_IV:
2489         case SVt_NV:
2490                 /*
2491                  * No need to check for ROK, that can't be set here since there
2492                  * is no field capable of hodling the xrv_rv reference.
2493                  */
2494                 return svis_SCALAR;
2495         case SVt_PV:
2496         case SVt_RV:
2497         case SVt_PVIV:
2498         case SVt_PVNV:
2499                 /*
2500                  * Starting from SVt_PV, it is possible to have the ROK flag
2501                  * set, the pointer to the other SV being either stored in
2502                  * the xrv_rv (in the case of a pure SVt_RV), or as the
2503                  * xpv_pv field of an SVt_PV and its heirs.
2504                  *
2505                  * However, those SV cannot be magical or they would be an
2506                  * SVt_PVMG at least.
2507                  */
2508                 return SvROK(sv) ? svis_REF : svis_SCALAR;
2509         case SVt_PVMG:
2510         case SVt_PVLV:          /* Workaround for perl5.004_04 "LVALUE" bug */
2511                 if (SvRMAGICAL(sv) && (mg_find(sv, 'p')))
2512                         return svis_TIED_ITEM;
2513                 /* FALL THROUGH */
2514         case SVt_PVBM:
2515                 if (SvRMAGICAL(sv) && (mg_find(sv, 'q')))
2516                         return svis_TIED;
2517                 return SvROK(sv) ? svis_REF : svis_SCALAR;
2518         case SVt_PVAV:
2519                 if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
2520                         return svis_TIED;
2521                 return svis_ARRAY;
2522         case SVt_PVHV:
2523                 if (SvRMAGICAL(sv) && (mg_find(sv, 'P')))
2524                         return svis_TIED;
2525                 return svis_HASH;
2526         default:
2527                 break;
2528         }
2529
2530         return svis_OTHER;
2531 }
2532
2533 /*
2534  * store
2535  *
2536  * Recursively store objects pointed to by the sv to the specified file.
2537  *
2538  * Layout is <content> or SX_OBJECT <tagnum> if we reach an already stored
2539  * object (one for which storage has started -- it may not be over if we have
2540  * a self-referenced structure). This data set forms a stored <object>.
2541  */
2542 static int store(stcxt_t *cxt, SV *sv)
2543 {
2544         SV **svh;
2545         int ret;
2546         SV *tag;
2547         int type;
2548         HV *hseen = cxt->hseen;
2549
2550         TRACEME(("store (0x%"UVxf")", PTR2UV(sv)));
2551
2552         /*
2553          * If object has already been stored, do not duplicate data.
2554          * Simply emit the SX_OBJECT marker followed by its tag data.
2555          * The tag is always written in network order.
2556          *
2557          * NOTA BENE, for 64-bit machines: the "*svh" below does not yield a
2558          * real pointer, rather a tag number (watch the insertion code below).
2559          * That means it pobably safe to assume it is well under the 32-bit limit,
2560          * and makes the truncation safe.
2561          *              -- RAM, 14/09/1999
2562          */
2563
2564         svh = hv_fetch(hseen, (char *) &sv, sizeof(sv), FALSE);
2565         if (svh) {
2566                 I32 tagval = htonl(LOW_32BITS(*svh));
2567
2568                 TRACEME(("object 0x%"UVxf" seen as #%d", PTR2UV(sv), ntohl(tagval)));
2569
2570                 PUTMARK(SX_OBJECT);
2571                 WRITE_I32(tagval);
2572                 return 0;
2573         }
2574
2575         /*
2576          * Allocate a new tag and associate it with the address of the sv being
2577          * stored, before recursing...
2578          *
2579          * In order to avoid creating new SvIVs to hold the tagnum we just
2580          * cast the tagnum to a SV pointer and store that in the hash.  This
2581          * means that we must clean up the hash manually afterwards, but gives
2582          * us a 15% throughput increase.
2583          *
2584          */
2585
2586         cxt->tagnum++;
2587         if (!hv_store(hseen,
2588                         (char *) &sv, sizeof(sv), INT2PTR(SV*, cxt->tagnum), 0))
2589                 return -1;
2590
2591         /*
2592          * Store `sv' and everything beneath it, using appropriate routine.
2593          * Abort immediately if we get a non-zero status back.
2594          */
2595
2596         type = sv_type(sv);
2597
2598         TRACEME(("storing 0x%"UVxf" tag #%d, type %d...",
2599                  PTR2UV(sv), cxt->tagnum, type));
2600
2601         if (SvOBJECT(sv)) {
2602                 HV *pkg = SvSTASH(sv);
2603                 ret = store_blessed(cxt, sv, type, pkg);
2604         } else
2605                 ret = SV_STORE(type)(cxt, sv);
2606
2607         TRACEME(("%s (stored 0x%"UVxf", refcnt=%d, %s)",
2608                 ret ? "FAILED" : "ok", PTR2UV(sv),
2609                 SvREFCNT(sv), sv_reftype(sv, FALSE)));
2610
2611         return ret;
2612 }
2613
2614 /*
2615  * magic_write
2616  *
2617  * Write magic number and system information into the file.
2618  * Layout is <magic> <network> [<len> <byteorder> <sizeof int> <sizeof long>
2619  * <sizeof ptr>] where <len> is the length of the byteorder hexa string.
2620  * All size and lenghts are written as single characters here.
2621  *
2622  * Note that no byte ordering info is emitted when <network> is true, since
2623  * integers will be emitted in network order in that case.
2624  */
2625 static int magic_write(stcxt_t *cxt)
2626 {
2627         char buf[256];  /* Enough room for 256 hexa digits */
2628         unsigned char c;
2629         int use_network_order = cxt->netorder;
2630
2631         TRACEME(("magic_write on fd=%d", cxt->fio ? fileno(cxt->fio) : -1));
2632
2633         if (cxt->fio)
2634                 WRITE(magicstr, strlen(magicstr));      /* Don't write final \0 */
2635
2636         /*
2637          * Starting with 0.6, the "use_network_order" byte flag is also used to
2638          * indicate the version number of the binary image, encoded in the upper
2639          * bits. The bit 0 is always used to indicate network order.
2640          */
2641
2642         c = (unsigned char)
2643                 ((use_network_order ? 0x1 : 0x0) | (STORABLE_BIN_MAJOR << 1));
2644         PUTMARK(c);
2645
2646         /*
2647          * Starting with 0.7, a full byte is dedicated to the minor version of
2648          * the binary format, which is incremented only when new markers are
2649          * introduced, for instance, but when backward compatibility is preserved.
2650          */
2651
2652         PUTMARK((unsigned char) STORABLE_BIN_MINOR);
2653
2654         if (use_network_order)
2655                 return 0;                                               /* Don't bother with byte ordering */
2656
2657         sprintf(buf, "%lx", (unsigned long) BYTEORDER);
2658         c = (unsigned char) strlen(buf);
2659         PUTMARK(c);
2660         WRITE(buf, (unsigned int) c);           /* Don't write final \0 */
2661         PUTMARK((unsigned char) sizeof(int));
2662         PUTMARK((unsigned char) sizeof(long));
2663         PUTMARK((unsigned char) sizeof(char *));
2664         PUTMARK((unsigned char) sizeof(NV));
2665
2666         TRACEME(("ok (magic_write byteorder = 0x%lx [%d], I%d L%d P%d D%d)",
2667                  (unsigned long) BYTEORDER, (int) c,
2668                  (int) sizeof(int), (int) sizeof(long),
2669                  (int) sizeof(char *), (int) sizeof(NV)));
2670
2671         return 0;
2672 }
2673
2674 /*
2675  * do_store
2676  *
2677  * Common code for store operations.
2678  *
2679  * When memory store is requested (f = NULL) and a non null SV* is given in
2680  * `res', it is filled with a new SV created out of the memory buffer.
2681  *
2682  * It is required to provide a non-null `res' when the operation type is not
2683  * dclone() and store() is performed to memory.
2684  */
2685 static int do_store(
2686         PerlIO *f,
2687         SV *sv,
2688         int optype,
2689         int network_order,
2690         SV **res)
2691 {
2692         dSTCXT;
2693         int status;
2694
2695         ASSERT(!(f == 0 && !(optype & ST_CLONE)) || res,
2696                 ("must supply result SV pointer for real recursion to memory"));
2697
2698         TRACEME(("do_store (optype=%d, netorder=%d)",
2699                 optype, network_order));
2700
2701         optype |= ST_STORE;
2702
2703         /*
2704          * Workaround for CROAK leak: if they enter with a "dirty" context,
2705          * free up memory for them now.
2706          */
2707
2708         if (cxt->s_dirty)
2709                 clean_context(cxt);
2710
2711         /*
2712          * Now that STORABLE_xxx hooks exist, it is possible that they try to
2713          * re-enter store() via the hooks.  We need to stack contexts.
2714          */
2715
2716         if (cxt->entry)
2717                 cxt = allocate_context(cxt);
2718
2719         cxt->entry++;
2720
2721         ASSERT(cxt->entry == 1, ("starting new recursion"));
2722         ASSERT(!cxt->s_dirty, ("clean context"));
2723
2724         /*
2725          * Ensure sv is actually a reference. From perl, we called something
2726          * like:
2727          *       pstore(FILE, \@array);
2728          * so we must get the scalar value behing that reference.
2729          */
2730
2731         if (!SvROK(sv))
2732                 CROAK(("Not a reference"));
2733         sv = SvRV(sv);                  /* So follow it to know what to store */
2734
2735         /*
2736          * If we're going to store to memory, reset the buffer.
2737          */
2738
2739         if (!f)
2740                 MBUF_INIT(0);
2741
2742         /*
2743          * Prepare context and emit headers.
2744          */
2745
2746         init_store_context(cxt, f, optype, network_order);
2747
2748         if (-1 == magic_write(cxt))             /* Emit magic and ILP info */
2749                 return 0;                                       /* Error */
2750
2751         /*
2752          * Recursively store object...
2753          */
2754
2755         ASSERT(is_storing(), ("within store operation"));
2756
2757         status = store(cxt, sv);                /* Just do it! */
2758
2759         /*
2760          * If they asked for a memory store and they provided an SV pointer,
2761          * make an SV string out of the buffer and fill their pointer.
2762          *
2763          * When asking for ST_REAL, it's MANDATORY for the caller to provide
2764          * an SV, since context cleanup might free the buffer if we did recurse.
2765          * (unless caller is dclone(), which is aware of that).
2766          */
2767
2768         if (!cxt->fio && res)
2769                 *res = mbuf2sv();
2770
2771         /*
2772          * Final cleanup.
2773          *
2774          * The "root" context is never freed, since it is meant to be always
2775          * handy for the common case where no recursion occurs at all (i.e.
2776          * we enter store() outside of any Storable code and leave it, period).
2777          * We know it's the "root" context because there's nothing stacked
2778          * underneath it.
2779          *
2780          * OPTIMIZATION:
2781          *
2782          * When deep cloning, we don't free the context: doing so would force
2783          * us to copy the data in the memory buffer.  Sicne we know we're
2784          * about to enter do_retrieve...
2785          */
2786
2787         clean_store_context(cxt);
2788         if (cxt->prev && !(cxt->optype & ST_CLONE))
2789                 free_context(cxt);
2790
2791         TRACEME(("do_store returns %d", status));
2792
2793         return status == 0;
2794 }
2795
2796 /*
2797  * pstore
2798  *
2799  * Store the transitive data closure of given object to disk.
2800  * Returns 0 on error, a true value otherwise.
2801  */
2802 int pstore(PerlIO *f, SV *sv)
2803 {
2804         TRACEME(("pstore"));
2805         return do_store(f, sv, 0, FALSE, (SV**) 0);
2806
2807 }
2808
2809 /*
2810  * net_pstore
2811  *
2812  * Same as pstore(), but network order is used for integers and doubles are
2813  * emitted as strings.
2814  */
2815 int net_pstore(PerlIO *f, SV *sv)
2816 {
2817         TRACEME(("net_pstore"));
2818         return do_store(f, sv, 0, TRUE, (SV**) 0);
2819 }
2820
2821 /***
2822  *** Memory stores.
2823  ***/
2824
2825 /*
2826  * mbuf2sv
2827  *
2828  * Build a new SV out of the content of the internal memory buffer.
2829  */
2830 static SV *mbuf2sv(void)
2831 {
2832         dSTCXT;
2833
2834         return newSVpv(mbase, MBUF_SIZE());
2835 }
2836
2837 /*
2838  * mstore
2839  *
2840  * Store the transitive data closure of given object to memory.
2841  * Returns undef on error, a scalar value containing the data otherwise.
2842  */
2843 SV *mstore(SV *sv)
2844 {
2845         dSTCXT;
2846         SV *out;
2847
2848         TRACEME(("mstore"));
2849
2850         if (!do_store((PerlIO*) 0, sv, 0, FALSE, &out))
2851                 return &PL_sv_undef;
2852
2853         return out;
2854 }
2855
2856 /*
2857  * net_mstore
2858  *
2859  * Same as mstore(), but network order is used for integers and doubles are
2860  * emitted as strings.
2861  */
2862 SV *net_mstore(SV *sv)
2863 {
2864         dSTCXT;
2865         SV *out;
2866
2867         TRACEME(("net_mstore"));
2868
2869         if (!do_store((PerlIO*) 0, sv, 0, TRUE, &out))
2870                 return &PL_sv_undef;
2871
2872         return out;
2873 }
2874
2875 /***
2876  *** Specific retrieve callbacks.
2877  ***/
2878
2879 /*
2880  * retrieve_other
2881  *
2882  * Return an error via croak, since it is not possible that we get here
2883  * under normal conditions, when facing a file produced via pstore().
2884  */
2885 static SV *retrieve_other(stcxt_t *cxt)
2886 {
2887         if (
2888                 cxt->ver_major != STORABLE_BIN_MAJOR &&
2889                 cxt->ver_minor != STORABLE_BIN_MINOR
2890         ) {
2891                 CROAK(("Corrupted storable %s (binary v%d.%d), current is v%d.%d",
2892                         cxt->fio ? "file" : "string",
2893                         cxt->ver_major, cxt->ver_minor,
2894                         STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
2895         } else {
2896                 CROAK(("Corrupted storable %s (binary v%d.%d)",
2897                         cxt->fio ? "file" : "string",
2898                         cxt->ver_major, cxt->ver_minor));
2899         }
2900
2901         return (SV *) 0;                /* Just in case */
2902 }
2903
2904 /*
2905  * retrieve_idx_blessed
2906  *
2907  * Layout is SX_IX_BLESS <index> <object> with SX_IX_BLESS already read.
2908  * <index> can be coded on either 1 or 5 bytes.
2909  */
2910 static SV *retrieve_idx_blessed(stcxt_t *cxt)
2911 {
2912         I32 idx;
2913         char *class;
2914         SV **sva;
2915         SV *sv;
2916
2917         TRACEME(("retrieve_idx_blessed (#%d)", cxt->tagnum));
2918
2919         GETMARK(idx);                   /* Index coded on a single char? */
2920         if (idx & 0x80)
2921                 RLEN(idx);
2922
2923         /*
2924          * Fetch classname in `aclass'
2925          */
2926
2927         sva = av_fetch(cxt->aclass, idx, FALSE);
2928         if (!sva)
2929                 CROAK(("Class name #%d should have been seen already", (int)idx));
2930
2931         class = SvPVX(*sva);    /* We know it's a PV, by construction */
2932
2933         TRACEME(("class ID %d => %s", idx, class));
2934
2935         /*
2936          * Retrieve object and bless it.
2937          */
2938
2939         sv = retrieve(cxt);
2940         if (sv)
2941                 BLESS(sv, class);
2942
2943         return sv;
2944 }
2945
2946 /*
2947  * retrieve_blessed
2948  *
2949  * Layout is SX_BLESS <len> <classname> <object> with SX_BLESS already read.
2950  * <len> can be coded on either 1 or 5 bytes.
2951  */
2952 static SV *retrieve_blessed(stcxt_t *cxt)
2953 {
2954         I32 len;
2955         SV *sv;
2956         char buf[LG_BLESS + 1];         /* Avoid malloc() if possible */
2957         char *class = buf;
2958
2959         TRACEME(("retrieve_blessed (#%d)", cxt->tagnum));
2960
2961         /*
2962          * Decode class name length and read that name.
2963          *
2964          * Short classnames have two advantages: their length is stored on one
2965          * single byte, and the string can be read on the stack.
2966          */
2967
2968         GETMARK(len);                   /* Length coded on a single char? */
2969         if (len & 0x80) {
2970                 RLEN(len);
2971                 TRACEME(("** allocating %d bytes for class name", len+1));
2972                 New(10003, class, len+1, char);
2973         }
2974         READ(class, len);
2975         class[len] = '\0';              /* Mark string end */
2976
2977         /*
2978          * It's a new classname, otherwise it would have been an SX_IX_BLESS.
2979          */
2980
2981         if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(class, len)))
2982                 return (SV *) 0;
2983
2984         /*
2985          * Retrieve object and bless it.
2986          */
2987
2988         sv = retrieve(cxt);
2989         if (sv) {
2990                 BLESS(sv, class);
2991                 if (class != buf)
2992                         Safefree(class);
2993         }
2994
2995         return sv;
2996 }
2997
2998 /*
2999  * retrieve_hook
3000  *
3001  * Layout: SX_HOOK <flags> <len> <classname> <len2> <str> [<len3> <object-IDs>]
3002  * with leading mark already read, as usual.
3003  *
3004  * When recursion was involved during serialization of the object, there
3005  * is an unknown amount of serialized objects after the SX_HOOK mark.  Until
3006  * we reach a <flags> marker with the recursion bit cleared.
3007  */
3008 static SV *retrieve_hook(stcxt_t *cxt)
3009 {
3010         I32 len;
3011         char buf[LG_BLESS + 1];         /* Avoid malloc() if possible */
3012         char *class = buf;
3013         unsigned int flags;
3014         I32 len2;
3015         SV *frozen;
3016         I32 len3 = 0;
3017         AV *av = 0;
3018         SV *hook;
3019         SV *sv;
3020         SV *rv;
3021         int obj_type;
3022         I32 classname;
3023         int clone = cxt->optype & ST_CLONE;
3024
3025         TRACEME(("retrieve_hook (#%d)", cxt->tagnum));
3026
3027         /*
3028          * Read flags, which tell us about the type, and whether we need to recurse.
3029          */
3030
3031         GETMARK(flags);
3032
3033         /*
3034          * Create the (empty) object, and mark it as seen.
3035          *
3036          * This must be done now, because tags are incremented, and during
3037          * serialization, the object tag was affected before recursion could
3038          * take place.
3039          */
3040
3041         obj_type = flags & SHF_TYPE_MASK;
3042         switch (obj_type) {
3043         case SHT_SCALAR:
3044                 sv = newSV(0);
3045                 break;
3046         case SHT_ARRAY:
3047                 sv = (SV *) newAV();
3048                 break;
3049         case SHT_HASH:
3050                 sv = (SV *) newHV();
3051                 break;
3052         default:
3053                 return retrieve_other(cxt);             /* Let it croak */
3054         }
3055         SEEN(sv);
3056
3057         /*
3058          * Whilst flags tell us to recurse, do so.
3059          *
3060          * We don't need to remember the addresses returned by retrieval, because
3061          * all the references will be obtained through indirection via the object
3062          * tags in the object-ID list.
3063          */
3064
3065         while (flags & SHF_NEED_RECURSE) {
3066                 TRACEME(("retrieve_hook recursing..."));
3067                 rv = retrieve(cxt);
3068                 if (!rv)
3069                         return (SV *) 0;
3070                 TRACEME(("retrieve_hook back with rv=0x%"UVxf,
3071                          PTR2UV(rv)));
3072                 GETMARK(flags);
3073         }
3074
3075         if (flags & SHF_IDX_CLASSNAME) {
3076                 SV **sva;
3077                 I32 idx;
3078
3079                 /*
3080                  * Fetch index from `aclass'
3081                  */
3082
3083                 if (flags & SHF_LARGE_CLASSLEN)
3084                         RLEN(idx);
3085                 else
3086                         GETMARK(idx);
3087
3088                 sva = av_fetch(cxt->aclass, idx, FALSE);
3089                 if (!sva)
3090                         CROAK(("Class name #%d should have been seen already", (int)idx));
3091
3092                 class = SvPVX(*sva);    /* We know it's a PV, by construction */
3093                 TRACEME(("class ID %d => %s", idx, class));
3094
3095         } else {
3096                 /*
3097                  * Decode class name length and read that name.
3098                  *
3099                  * NOTA BENE: even if the length is stored on one byte, we don't read
3100                  * on the stack.  Just like retrieve_blessed(), we limit the name to
3101                  * LG_BLESS bytes.  This is an arbitrary decision.
3102                  */
3103
3104                 if (flags & SHF_LARGE_CLASSLEN)
3105                         RLEN(len);
3106                 else
3107                         GETMARK(len);
3108
3109                 if (len > LG_BLESS) {
3110                         TRACEME(("** allocating %d bytes for class name", len+1));
3111                         New(10003, class, len+1, char);
3112                 }
3113
3114                 READ(class, len);
3115                 class[len] = '\0';              /* Mark string end */
3116
3117                 /*
3118                  * Record new classname.
3119                  */
3120
3121                 if (!av_store(cxt->aclass, cxt->classnum++, newSVpvn(class, len)))
3122                         return (SV *) 0;
3123         }
3124
3125         TRACEME(("class name: %s", class));
3126
3127         /*
3128          * Decode user-frozen string length and read it in a SV.
3129          *
3130          * For efficiency reasons, we read data directly into the SV buffer.
3131          * To understand that code, read retrieve_scalar()
3132          */
3133
3134         if (flags & SHF_LARGE_STRLEN)
3135                 RLEN(len2);
3136         else
3137                 GETMARK(len2);
3138
3139         frozen = NEWSV(10002, len2);
3140         if (len2) {
3141                 SAFEREAD(SvPVX(frozen), len2, frozen);
3142                 SvCUR_set(frozen, len2);
3143                 *SvEND(frozen) = '\0';
3144         }
3145         (void) SvPOK_only(frozen);              /* Validates string pointer */
3146         if (cxt->s_tainted)                             /* Is input source tainted? */
3147                 SvTAINT(frozen);
3148
3149         TRACEME(("frozen string: %d bytes", len2));
3150
3151         /*
3152          * Decode object-ID list length, if present.
3153          */
3154
3155         if (flags & SHF_HAS_LIST) {
3156                 if (flags & SHF_LARGE_LISTLEN)
3157                         RLEN(len3);
3158                 else
3159                         GETMARK(len3);
3160                 if (len3) {
3161                         av = newAV();
3162                         av_extend(av, len3 + 1);        /* Leave room for [0] */
3163                         AvFILLp(av) = len3;                     /* About to be filled anyway */
3164                 }
3165         }
3166
3167         TRACEME(("has %d object IDs to link", len3));
3168
3169         /*
3170          * Read object-ID list into array.
3171          * Because we pre-extended it, we can cheat and fill it manually.
3172          *
3173          * We read object tags and we can convert them into SV* on the fly
3174          * because we know all the references listed in there (as tags)
3175          * have been already serialized, hence we have a valid correspondance
3176          * between each of those tags and the recreated SV.
3177          */
3178
3179         if (av) {
3180                 SV **ary = AvARRAY(av);
3181                 int i;
3182                 for (i = 1; i <= len3; i++) {   /* We leave [0] alone */
3183                         I32 tag;
3184                         SV **svh;
3185                         SV *xsv;
3186
3187                         READ_I32(tag);
3188                         tag = ntohl(tag);
3189                         svh = av_fetch(cxt->aseen, tag, FALSE);
3190                         if (!svh)
3191                                 CROAK(("Object #%d should have been retrieved already", (int)tag));
3192                         xsv = *svh;
3193                         ary[i] = SvREFCNT_inc(xsv);
3194                 }
3195         }
3196
3197         /*
3198          * Bless the object and look up the STORABLE_thaw hook.
3199          */
3200
3201         BLESS(sv, class);
3202         hook = pkg_can(cxt->hook, SvSTASH(sv), "STORABLE_thaw");
3203         if (!hook) {
3204                 /*
3205                  * Hook not found.  Maybe they did not require the module where this
3206                  * hook is defined yet?
3207                  *
3208                  * If the require below succeeds, we'll be able to find the hook.
3209                  * Still, it only works reliably when each class is defined in a
3210                  * file of its own.
3211                  */
3212
3213                 SV *psv = newSVpvn("require ", 8);
3214                 sv_catpv(psv, class);
3215
3216                 TRACEME(("No STORABLE_thaw defined for objects of class %s", class));
3217                 TRACEME(("Going to require module '%s' with '%s'", class, SvPVX(psv)));
3218
3219                 perl_eval_sv(psv, G_DISCARD);
3220                 sv_free(psv);
3221
3222                 /*
3223                  * We cache results of pkg_can, so we need to uncache before attempting
3224                  * the lookup again.
3225                  */
3226
3227                 pkg_uncache(cxt->hook, SvSTASH(sv), "STORABLE_thaw");
3228                 hook = pkg_can(cxt->hook, SvSTASH(sv), "STORABLE_thaw");
3229
3230                 if (!hook)
3231                         CROAK(("No STORABLE_thaw defined for objects of class %s "
3232                                         "(even after a \"require %s;\")", class, class));
3233         }
3234
3235         /*
3236          * If we don't have an `av' yet, prepare one.
3237          * Then insert the frozen string as item [0].
3238          */
3239
3240         if (!av) {
3241                 av = newAV();
3242                 av_extend(av, 1);
3243                 AvFILLp(av) = 0;
3244         }
3245         AvARRAY(av)[0] = SvREFCNT_inc(frozen);
3246
3247         /*
3248          * Call the hook as:
3249          *
3250          *   $object->STORABLE_thaw($cloning, $frozen, @refs);
3251          *
3252          * where $object is our blessed (empty) object, $cloning is a boolean
3253          * telling whether we're running a deep clone, $frozen is the frozen
3254          * string the user gave us in his serializing hook, and @refs, which may
3255          * be empty, is the list of extra references he returned along for us
3256          * to serialize.
3257          *
3258          * In effect, the hook is an alternate creation routine for the class,
3259          * the object itself being already created by the runtime.
3260          */
3261
3262         TRACEME(("calling STORABLE_thaw on %s at 0x%"UVxf" (%"IVdf" args)",
3263                  class, PTR2UV(sv), AvFILLp(av) + 1));
3264
3265         rv = newRV(sv);
3266         (void) scalar_call(rv, hook, clone, av, G_SCALAR|G_DISCARD);
3267         SvREFCNT_dec(rv);
3268
3269         /*
3270          * Final cleanup.
3271          */
3272
3273         SvREFCNT_dec(frozen);
3274         av_undef(av);
3275         sv_free((SV *) av);
3276         if (!(flags & SHF_IDX_CLASSNAME) && class != buf)
3277                 Safefree(class);
3278
3279         return sv;
3280 }
3281
3282 /*
3283  * retrieve_ref
3284  *
3285  * Retrieve reference to some other scalar.
3286  * Layout is SX_REF <object>, with SX_REF already read.
3287  */
3288 static SV *retrieve_ref(stcxt_t *cxt)
3289 {
3290         SV *rv;
3291         SV *sv;
3292
3293         TRACEME(("retrieve_ref (#%d)", cxt->tagnum));
3294
3295         /*
3296          * We need to create the SV that holds the reference to the yet-to-retrieve
3297          * object now, so that we may record the address in the seen table.
3298          * Otherwise, if the object to retrieve references us, we won't be able
3299          * to resolve the SX_OBJECT we'll see at that point! Hence we cannot
3300          * do the retrieve first and use rv = newRV(sv) since it will be too late
3301          * for SEEN() recording.
3302          */
3303
3304         rv = NEWSV(10002, 0);
3305         SEEN(rv);                               /* Will return if rv is null */
3306         sv = retrieve(cxt);             /* Retrieve <object> */
3307         if (!sv)
3308                 return (SV *) 0;        /* Failed */
3309
3310         /*
3311          * WARNING: breaks RV encapsulation.
3312          *
3313          * Now for the tricky part. We have to upgrade our existing SV, so that
3314          * it is now an RV on sv... Again, we cheat by duplicating the code
3315          * held in newSVrv(), since we already got our SV from retrieve().
3316          *
3317          * We don't say:
3318          *
3319          *              SvRV(rv) = SvREFCNT_inc(sv);
3320          *
3321          * here because the reference count we got from retrieve() above is
3322          * already correct: if the object was retrieved from the file, then
3323          * its reference count is one. Otherwise, if it was retrieved via
3324          * an SX_OBJECT indication, a ref count increment was done.
3325          */
3326
3327         sv_upgrade(rv, SVt_RV);
3328         SvRV(rv) = sv;                          /* $rv = \$sv */
3329         SvROK_on(rv);
3330
3331         TRACEME(("ok (retrieve_ref at 0x%"UVxf")", PTR2UV(rv)));
3332
3333         return rv;
3334 }
3335
3336 /*
3337  * retrieve_overloaded
3338  *
3339  * Retrieve reference to some other scalar with overloading.
3340  * Layout is SX_OVERLOAD <object>, with SX_OVERLOAD already read.
3341  */
3342 static SV *retrieve_overloaded(stcxt_t *cxt)
3343 {
3344         SV *rv;
3345         SV *sv;
3346         HV *stash;
3347
3348         TRACEME(("retrieve_overloaded (#%d)", cxt->tagnum));
3349
3350         /*
3351          * Same code as retrieve_ref(), duplicated to avoid extra call.
3352          */
3353
3354         rv = NEWSV(10002, 0);
3355         SEEN(rv);                               /* Will return if rv is null */
3356         sv = retrieve(cxt);             /* Retrieve <object> */
3357         if (!sv)
3358                 return (SV *) 0;        /* Failed */
3359
3360         /*
3361          * WARNING: breaks RV encapsulation.
3362          */
3363
3364         sv_upgrade(rv, SVt_RV);
3365         SvRV(rv) = sv;                          /* $rv = \$sv */
3366         SvROK_on(rv);
3367
3368         /*
3369          * Restore overloading magic.
3370          */
3371
3372         stash = (HV *) SvSTASH (sv);
3373         if (!stash || !Gv_AMG(stash))
3374                 CROAK(("Cannot restore overloading on %s(0x%"UVxf")",
3375                        sv_reftype(sv, FALSE),
3376                        PTR2UV(sv)));
3377
3378         SvAMAGIC_on(rv);
3379
3380         TRACEME(("ok (retrieve_overloaded at 0x%"UVxf")", PTR2UV(rv)));
3381
3382         return rv;
3383 }
3384
3385 /*
3386  * retrieve_tied_array
3387  *
3388  * Retrieve tied array
3389  * Layout is SX_TIED_ARRAY <object>, with SX_TIED_ARRAY already read.
3390  */
3391 static SV *retrieve_tied_array(stcxt_t *cxt)
3392 {
3393         SV *tv;
3394         SV *sv;
3395
3396         TRACEME(("retrieve_tied_array (#%d)", cxt->tagnum));
3397
3398         tv = NEWSV(10002, 0);
3399         SEEN(tv);                                       /* Will return if tv is null */
3400         sv = retrieve(cxt);                     /* Retrieve <object> */
3401         if (!sv)
3402                 return (SV *) 0;                /* Failed */
3403
3404         sv_upgrade(tv, SVt_PVAV);
3405         AvREAL_off((AV *)tv);
3406         sv_magic(tv, sv, 'P', Nullch, 0);
3407         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3408
3409         TRACEME(("ok (retrieve_tied_array at 0x%"UVxf")", PTR2UV(tv)));
3410
3411         return tv;
3412 }
3413
3414 /*
3415  * retrieve_tied_hash
3416  *
3417  * Retrieve tied hash
3418  * Layout is SX_TIED_HASH <object>, with SX_TIED_HASH already read.
3419  */
3420 static SV *retrieve_tied_hash(stcxt_t *cxt)
3421 {
3422         SV *tv;
3423         SV *sv;
3424
3425         TRACEME(("retrieve_tied_hash (#%d)", cxt->tagnum));
3426
3427         tv = NEWSV(10002, 0);
3428         SEEN(tv);                                       /* Will return if tv is null */
3429         sv = retrieve(cxt);                     /* Retrieve <object> */
3430         if (!sv)
3431                 return (SV *) 0;                /* Failed */
3432
3433         sv_upgrade(tv, SVt_PVHV);
3434         sv_magic(tv, sv, 'P', Nullch, 0);
3435         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3436
3437         TRACEME(("ok (retrieve_tied_hash at 0x%"UVxf")", PTR2UV(tv)));
3438
3439         return tv;
3440 }
3441
3442 /*
3443  * retrieve_tied_scalar
3444  *
3445  * Retrieve tied scalar
3446  * Layout is SX_TIED_SCALAR <object>, with SX_TIED_SCALAR already read.
3447  */
3448 static SV *retrieve_tied_scalar(cxt)
3449 stcxt_t *cxt;
3450 {
3451         SV *tv;
3452         SV *sv;
3453
3454         TRACEME(("retrieve_tied_scalar (#%d)", cxt->tagnum));
3455
3456         tv = NEWSV(10002, 0);
3457         SEEN(tv);                                       /* Will return if rv is null */
3458         sv = retrieve(cxt);                     /* Retrieve <object> */
3459         if (!sv)
3460                 return (SV *) 0;                /* Failed */
3461
3462         sv_upgrade(tv, SVt_PVMG);
3463         sv_magic(tv, sv, 'q', Nullch, 0);
3464         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3465
3466         TRACEME(("ok (retrieve_tied_scalar at 0x%"UVxf")", PTR2UV(tv)));
3467
3468         return tv;
3469 }
3470
3471 /*
3472  * retrieve_tied_key
3473  *
3474  * Retrieve reference to value in a tied hash.
3475  * Layout is SX_TIED_KEY <object> <key>, with SX_TIED_KEY already read.
3476  */
3477 static SV *retrieve_tied_key(stcxt_t *cxt)
3478 {
3479         SV *tv;
3480         SV *sv;
3481         SV *key;
3482
3483         TRACEME(("retrieve_tied_key (#%d)", cxt->tagnum));
3484
3485         tv = NEWSV(10002, 0);
3486         SEEN(tv);                                       /* Will return if tv is null */
3487         sv = retrieve(cxt);                     /* Retrieve <object> */
3488         if (!sv)
3489                 return (SV *) 0;                /* Failed */
3490
3491         key = retrieve(cxt);            /* Retrieve <key> */
3492         if (!key)
3493                 return (SV *) 0;                /* Failed */
3494
3495         sv_upgrade(tv, SVt_PVMG);
3496         sv_magic(tv, sv, 'p', (char *)key, HEf_SVKEY);
3497         SvREFCNT_dec(key);                      /* Undo refcnt inc from sv_magic() */
3498         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3499
3500         return tv;
3501 }
3502
3503 /*
3504  * retrieve_tied_idx
3505  *
3506  * Retrieve reference to value in a tied array.
3507  * Layout is SX_TIED_IDX <object> <idx>, with SX_TIED_IDX already read.
3508  */
3509 static SV *retrieve_tied_idx(stcxt_t *cxt)
3510 {
3511         SV *tv;
3512         SV *sv;
3513         I32 idx;
3514
3515         TRACEME(("retrieve_tied_idx (#%d)", cxt->tagnum));
3516
3517         tv = NEWSV(10002, 0);
3518         SEEN(tv);                                       /* Will return if tv is null */
3519         sv = retrieve(cxt);                     /* Retrieve <object> */
3520         if (!sv)
3521                 return (SV *) 0;                /* Failed */
3522
3523         RLEN(idx);                                      /* Retrieve <idx> */
3524
3525         sv_upgrade(tv, SVt_PVMG);
3526         sv_magic(tv, sv, 'p', Nullch, idx);
3527         SvREFCNT_dec(sv);                       /* Undo refcnt inc from sv_magic() */
3528
3529         return tv;
3530 }
3531
3532
3533 /*
3534  * retrieve_lscalar
3535  *
3536  * Retrieve defined long (string) scalar.
3537  *
3538  * Layout is SX_LSCALAR <length> <data>, with SX_LSCALAR already read.
3539  * The scalar is "long" in that <length> is larger than LG_SCALAR so it
3540  * was not stored on a single byte.
3541  */
3542 static SV *retrieve_lscalar(stcxt_t *cxt)
3543 {
3544         I32 len;
3545         SV *sv;
3546
3547         RLEN(len);
3548         TRACEME(("retrieve_lscalar (#%d), len = %"IVdf, cxt->tagnum, len));
3549
3550         /*
3551          * Allocate an empty scalar of the suitable length.
3552          */
3553
3554         sv = NEWSV(10002, len);
3555         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3556
3557         /*
3558          * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation.
3559          *
3560          * Now, for efficiency reasons, read data directly inside the SV buffer,
3561          * and perform the SV final settings directly by duplicating the final
3562          * work done by sv_setpv. Since we're going to allocate lots of scalars
3563          * this way, it's worth the hassle and risk.
3564          */
3565
3566         SAFEREAD(SvPVX(sv), len, sv);
3567         SvCUR_set(sv, len);                             /* Record C string length */
3568         *SvEND(sv) = '\0';                              /* Ensure it's null terminated anyway */
3569         (void) SvPOK_only(sv);                  /* Validate string pointer */
3570         if (cxt->s_tainted)                             /* Is input source tainted? */
3571                 SvTAINT(sv);                            /* External data cannot be trusted */
3572
3573         TRACEME(("large scalar len %"IVdf" '%s'", len, SvPVX(sv)));
3574         TRACEME(("ok (retrieve_lscalar at 0x%"UVxf")", PTR2UV(sv)));
3575
3576         return sv;
3577 }
3578
3579 /*
3580  * retrieve_scalar
3581  *
3582  * Retrieve defined short (string) scalar.
3583  *
3584  * Layout is SX_SCALAR <length> <data>, with SX_SCALAR already read.
3585  * The scalar is "short" so <length> is single byte. If it is 0, there
3586  * is no <data> section.
3587  */
3588 static SV *retrieve_scalar(stcxt_t *cxt)
3589 {
3590         int len;
3591         SV *sv;
3592
3593         GETMARK(len);
3594         TRACEME(("retrieve_scalar (#%d), len = %d", cxt->tagnum, len));
3595
3596         /*
3597          * Allocate an empty scalar of the suitable length.
3598          */
3599
3600         sv = NEWSV(10002, len);
3601         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3602
3603         /*
3604          * WARNING: duplicates parts of sv_setpv and breaks SV data encapsulation.
3605          */
3606
3607         if (len == 0) {
3608                 /*
3609                  * newSV did not upgrade to SVt_PV so the scalar is undefined.
3610                  * To make it defined with an empty length, upgrade it now...
3611                  */
3612                 sv_upgrade(sv, SVt_PV);
3613                 SvGROW(sv, 1);
3614                 *SvEND(sv) = '\0';                      /* Ensure it's null terminated anyway */
3615                 TRACEME(("ok (retrieve_scalar empty at 0x%"UVxf")", PTR2UV(sv)));
3616         } else {
3617                 /*
3618                  * Now, for efficiency reasons, read data directly inside the SV buffer,
3619                  * and perform the SV final settings directly by duplicating the final
3620                  * work done by sv_setpv. Since we're going to allocate lots of scalars
3621                  * this way, it's worth the hassle and risk.
3622                  */
3623                 SAFEREAD(SvPVX(sv), len, sv);
3624                 SvCUR_set(sv, len);                     /* Record C string length */
3625                 *SvEND(sv) = '\0';                      /* Ensure it's null terminated anyway */
3626                 TRACEME(("small scalar len %d '%s'", len, SvPVX(sv)));
3627         }
3628
3629         (void) SvPOK_only(sv);                  /* Validate string pointer */
3630         if (cxt->s_tainted)                             /* Is input source tainted? */
3631                 SvTAINT(sv);                            /* External data cannot be trusted */
3632
3633         TRACEME(("ok (retrieve_scalar at 0x%"UVxf")", PTR2UV(sv)));
3634         return sv;
3635 }
3636
3637 /*
3638  * retrieve_utf8str
3639  *
3640  * Like retrieve_scalar(), but tag result as utf8.
3641  * If we're retrieving UTF8 data in a non-UTF8 perl, croaks.
3642  */
3643 static SV *retrieve_utf8str(stcxt_t *cxt)
3644 {
3645         SV *sv;
3646
3647         TRACEME(("retrieve_utf8str"));
3648
3649         sv = retrieve_scalar(cxt);
3650         if (sv)
3651                 SvUTF8_on(sv);
3652
3653         return sv;
3654 }
3655
3656 /*
3657  * retrieve_lutf8str
3658  *
3659  * Like retrieve_lscalar(), but tag result as utf8.
3660  * If we're retrieving UTF8 data in a non-UTF8 perl, croaks.
3661  */
3662 static SV *retrieve_lutf8str(stcxt_t *cxt)
3663 {
3664         SV *sv;
3665
3666         TRACEME(("retrieve_lutf8str"));
3667
3668         sv = retrieve_lscalar(cxt);
3669         if (sv)
3670                 SvUTF8_on(sv);
3671
3672         return sv;
3673 }
3674
3675 /*
3676  * retrieve_integer
3677  *
3678  * Retrieve defined integer.
3679  * Layout is SX_INTEGER <data>, whith SX_INTEGER already read.
3680  */
3681 static SV *retrieve_integer(stcxt_t *cxt)
3682 {
3683         SV *sv;
3684         IV iv;
3685
3686         TRACEME(("retrieve_integer (#%d)", cxt->tagnum));
3687
3688         READ(&iv, sizeof(iv));
3689         sv = newSViv(iv);
3690         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3691
3692         TRACEME(("integer %"IVdf, iv));
3693         TRACEME(("ok (retrieve_integer at 0x%"UVxf")", PTR2UV(sv)));
3694
3695         return sv;
3696 }
3697
3698 /*
3699  * retrieve_netint
3700  *
3701  * Retrieve defined integer in network order.
3702  * Layout is SX_NETINT <data>, whith SX_NETINT already read.
3703  */
3704 static SV *retrieve_netint(stcxt_t *cxt)
3705 {
3706         SV *sv;
3707         I32 iv;
3708
3709         TRACEME(("retrieve_netint (#%d)", cxt->tagnum));
3710
3711         READ_I32(iv);
3712 #ifdef HAS_NTOHL
3713         sv = newSViv((int) ntohl(iv));
3714         TRACEME(("network integer %d", (int) ntohl(iv)));
3715 #else
3716         sv = newSViv(iv);
3717         TRACEME(("network integer (as-is) %d", iv));
3718 #endif
3719         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3720
3721         TRACEME(("ok (retrieve_netint at 0x%"UVxf")", PTR2UV(sv)));
3722
3723         return sv;
3724 }
3725
3726 /*
3727  * retrieve_double
3728  *
3729  * Retrieve defined double.
3730  * Layout is SX_DOUBLE <data>, whith SX_DOUBLE already read.
3731  */
3732 static SV *retrieve_double(stcxt_t *cxt)
3733 {
3734         SV *sv;
3735         NV nv;
3736
3737         TRACEME(("retrieve_double (#%d)", cxt->tagnum));
3738
3739         READ(&nv, sizeof(nv));
3740         sv = newSVnv(nv);
3741         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3742
3743         TRACEME(("double %"NVff, nv));
3744         TRACEME(("ok (retrieve_double at 0x%"UVxf")", PTR2UV(sv)));
3745
3746         return sv;
3747 }
3748
3749 /*
3750  * retrieve_byte
3751  *
3752  * Retrieve defined byte (small integer within the [-128, +127] range).
3753  * Layout is SX_BYTE <data>, whith SX_BYTE already read.
3754  */
3755 static SV *retrieve_byte(stcxt_t *cxt)
3756 {
3757         SV *sv;
3758         int siv;
3759
3760         TRACEME(("retrieve_byte (#%d)", cxt->tagnum));
3761
3762         GETMARK(siv);
3763         TRACEME(("small integer read as %d", (unsigned char) siv));
3764         sv = newSViv((unsigned char) siv - 128);
3765         SEEN(sv);                       /* Associate this new scalar with tag "tagnum" */
3766
3767         TRACEME(("byte %d", (unsigned char) siv - 128));
3768         TRACEME(("ok (retrieve_byte at 0x%"UVxf")", PTR2UV(sv)));
3769
3770         return sv;
3771 }
3772
3773 /*
3774  * retrieve_undef
3775  *
3776  * Return the undefined value.
3777  */
3778 static SV *retrieve_undef(stcxt_t *cxt)
3779 {
3780         SV* sv;
3781
3782         TRACEME(("retrieve_undef"));
3783
3784         sv = newSV(0);
3785         SEEN(sv);
3786
3787         return sv;
3788 }
3789
3790 /*
3791  * retrieve_sv_undef
3792  *
3793  * Return the immortal undefined value.
3794  */
3795 static SV *retrieve_sv_undef(stcxt_t *cxt)
3796 {
3797         SV *sv = &PL_sv_undef;
3798
3799         TRACEME(("retrieve_sv_undef"));
3800
3801         SEEN(sv);
3802         return sv;
3803 }
3804
3805 /*
3806  * retrieve_sv_yes
3807  *
3808  * Return the immortal yes value.
3809  */
3810 static SV *retrieve_sv_yes(stcxt_t *cxt)
3811 {
3812         SV *sv = &PL_sv_yes;
3813
3814         TRACEME(("retrieve_sv_yes"));
3815
3816         SEEN(sv);
3817         return sv;
3818 }
3819
3820 /*
3821  * retrieve_sv_no
3822  *
3823  * Return the immortal no value.
3824  */
3825 static SV *retrieve_sv_no(stcxt_t *cxt)
3826 {
3827         SV *sv = &PL_sv_no;
3828
3829         TRACEME(("retrieve_sv_no"));
3830
3831         SEEN(sv);
3832         return sv;
3833 }
3834
3835 /*
3836  * retrieve_array
3837  *
3838  * Retrieve a whole array.
3839  * Layout is SX_ARRAY <size> followed by each item, in increading index order.
3840  * Each item is stored as <object>.
3841  *
3842  * When we come here, SX_ARRAY has been read already.
3843  */
3844 static SV *retrieve_array(stcxt_t *cxt)
3845 {
3846         I32 len;
3847         I32 i;
3848         AV *av;
3849         SV *sv;
3850
3851         TRACEME(("retrieve_array (#%d)", cxt->tagnum));
3852
3853         /*
3854          * Read length, and allocate array, then pre-extend it.
3855          */
3856
3857         RLEN(len);
3858         TRACEME(("size = %d", len));
3859         av = newAV();
3860         SEEN(av);                                       /* Will return if array not allocated nicely */
3861         if (len)
3862                 av_extend(av, len);
3863         else
3864                 return (SV *) av;               /* No data follow if array is empty */
3865
3866         /*
3867          * Now get each item in turn...
3868          */
3869
3870         for (i = 0; i < len; i++) {
3871                 TRACEME(("(#%d) item", i));
3872                 sv = retrieve(cxt);                             /* Retrieve item */
3873                 if (!sv)
3874                         return (SV *) 0;
3875                 if (av_store(av, i, sv) == 0)
3876                         return (SV *) 0;
3877         }
3878
3879         TRACEME(("ok (retrieve_array at 0x%"UVxf")", PTR2UV(av)));
3880
3881         return (SV *) av;
3882 }
3883
3884 /*
3885  * retrieve_hash
3886  *
3887  * Retrieve a whole hash table.
3888  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
3889  * Keys are stored as <length> <data>, the <data> section being omitted
3890  * if length is 0.
3891  * Values are stored as <object>.
3892  *
3893  * When we come here, SX_HASH has been read already.
3894  */
3895 static SV *retrieve_hash(stcxt_t *cxt)
3896 {
3897         I32 len;
3898         I32 size;
3899         I32 i;
3900         HV *hv;
3901         SV *sv;
3902         static SV *sv_h_undef = (SV *) 0;               /* hv_store() bug */
3903
3904         TRACEME(("retrieve_hash (#%d)", cxt->tagnum));
3905
3906         /*
3907          * Read length, allocate table.
3908          */
3909
3910         RLEN(len);
3911         TRACEME(("size = %d", len));
3912         hv = newHV();
3913         SEEN(hv);                       /* Will return if table not allocated properly */
3914         if (len == 0)
3915                 return (SV *) hv;       /* No data follow if table empty */
3916
3917         /*
3918          * Now get each key/value pair in turn...
3919          */
3920
3921         for (i = 0; i < len; i++) {
3922                 /*
3923                  * Get value first.
3924                  */
3925
3926                 TRACEME(("(#%d) value", i));
3927                 sv = retrieve(cxt);
3928                 if (!sv)
3929                         return (SV *) 0;
3930
3931                 /*
3932                  * Get key.
3933                  * Since we're reading into kbuf, we must ensure we're not
3934                  * recursing between the read and the hv_store() where it's used.
3935                  * Hence the key comes after the value.
3936                  */
3937
3938                 RLEN(size);                                             /* Get key size */
3939                 KBUFCHK(size);                                  /* Grow hash key read pool if needed */
3940                 if (size)
3941                         READ(kbuf, size);
3942                 kbuf[size] = '\0';                              /* Mark string end, just in case */
3943                 TRACEME(("(#%d) key '%s'", i, kbuf));
3944
3945                 /*
3946                  * Enter key/value pair into hash table.
3947                  */
3948
3949                 if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
3950                         return (SV *) 0;
3951         }
3952
3953         TRACEME(("ok (retrieve_hash at 0x%"UVxf")", PTR2UV(hv)));
3954
3955         return (SV *) hv;
3956 }
3957
3958 /*
3959  * old_retrieve_array
3960  *
3961  * Retrieve a whole array in pre-0.6 binary format.
3962  *
3963  * Layout is SX_ARRAY <size> followed by each item, in increading index order.
3964  * Each item is stored as SX_ITEM <object> or SX_IT_UNDEF for "holes".
3965  *
3966  * When we come here, SX_ARRAY has been read already.
3967  */
3968 static SV *old_retrieve_array(stcxt_t *cxt)
3969 {
3970         I32 len;
3971         I32 i;
3972         AV *av;
3973         SV *sv;
3974         int c;
3975
3976         TRACEME(("old_retrieve_array (#%d)", cxt->tagnum));
3977
3978         /*
3979          * Read length, and allocate array, then pre-extend it.
3980          */
3981
3982         RLEN(len);
3983         TRACEME(("size = %d", len));
3984         av = newAV();
3985         SEEN(av);                                       /* Will return if array not allocated nicely */
3986         if (len)
3987                 av_extend(av, len);
3988         else
3989                 return (SV *) av;               /* No data follow if array is empty */
3990
3991         /*
3992          * Now get each item in turn...
3993          */
3994
3995         for (i = 0; i < len; i++) {
3996                 GETMARK(c);
3997                 if (c == SX_IT_UNDEF) {
3998                         TRACEME(("(#%d) undef item", i));
3999                         continue;                       /* av_extend() already filled us with undef */
4000                 }
4001                 if (c != SX_ITEM)
4002                         (void) retrieve_other((stcxt_t *) 0);   /* Will croak out */
4003                 TRACEME(("(#%d) item", i));
4004                 sv = retrieve(cxt);                                                     /* Retrieve item */
4005                 if (!sv)
4006                         return (SV *) 0;
4007                 if (av_store(av, i, sv) == 0)
4008                         return (SV *) 0;
4009         }
4010
4011         TRACEME(("ok (old_retrieve_array at 0x%"UVxf")", PTR2UV(av)));
4012
4013         return (SV *) av;
4014 }
4015
4016 /*
4017  * old_retrieve_hash
4018  *
4019  * Retrieve a whole hash table in pre-0.6 binary format.
4020  *
4021  * Layout is SX_HASH <size> followed by each key/value pair, in random order.
4022  * Keys are stored as SX_KEY <length> <data>, the <data> section being omitted
4023  * if length is 0.
4024  * Values are stored as SX_VALUE <object> or SX_VL_UNDEF for "holes".
4025  *
4026  * When we come here, SX_HASH has been read already.
4027  */
4028 static SV *old_retrieve_hash(stcxt_t *cxt)
4029 {
4030         I32 len;
4031         I32 size;
4032         I32 i;
4033         HV *hv;
4034         SV *sv;
4035         int c;
4036         static SV *sv_h_undef = (SV *) 0;               /* hv_store() bug */
4037
4038         TRACEME(("old_retrieve_hash (#%d)", cxt->tagnum));
4039
4040         /*
4041          * Read length, allocate table.
4042          */
4043
4044         RLEN(len);
4045         TRACEME(("size = %d", len));
4046         hv = newHV();
4047         SEEN(hv);                               /* Will return if table not allocated properly */
4048         if (len == 0)
4049                 return (SV *) hv;       /* No data follow if table empty */
4050
4051         /*
4052          * Now get each key/value pair in turn...
4053          */
4054
4055         for (i = 0; i < len; i++) {
4056                 /*
4057                  * Get value first.
4058                  */
4059
4060                 GETMARK(c);
4061                 if (c == SX_VL_UNDEF) {
4062                         TRACEME(("(#%d) undef value", i));
4063                         /*
4064                          * Due to a bug in hv_store(), it's not possible to pass
4065                          * &PL_sv_undef to hv_store() as a value, otherwise the
4066                          * associated key will not be creatable any more. -- RAM, 14/01/97
4067                          */
4068                         if (!sv_h_undef)
4069                                 sv_h_undef = newSVsv(&PL_sv_undef);
4070                         sv = SvREFCNT_inc(sv_h_undef);
4071                 } else if (c == SX_VALUE) {
4072                         TRACEME(("(#%d) value", i));
4073                         sv = retrieve(cxt);
4074                         if (!sv)
4075                                 return (SV *) 0;
4076                 } else
4077                         (void) retrieve_other((stcxt_t *) 0);   /* Will croak out */
4078
4079                 /*
4080                  * Get key.
4081                  * Since we're reading into kbuf, we must ensure we're not
4082                  * recursing between the read and the hv_store() where it's used.
4083                  * Hence the key comes after the value.
4084                  */
4085
4086                 GETMARK(c);
4087                 if (c != SX_KEY)
4088                         (void) retrieve_other((stcxt_t *) 0);   /* Will croak out */
4089                 RLEN(size);                                             /* Get key size */
4090                 KBUFCHK(size);                                  /* Grow hash key read pool if needed */
4091                 if (size)
4092                         READ(kbuf, size);
4093                 kbuf[size] = '\0';                              /* Mark string end, just in case */
4094                 TRACEME(("(#%d) key '%s'", i, kbuf));
4095
4096                 /*
4097                  * Enter key/value pair into hash table.
4098                  */
4099
4100                 if (hv_store(hv, kbuf, (U32) size, sv, 0) == 0)
4101                         return (SV *) 0;
4102         }
4103
4104         TRACEME(("ok (retrieve_hash at 0x%"UVxf")", PTR2UV(hv)));
4105
4106         return (SV *) hv;
4107 }
4108
4109 /***
4110  *** Retrieval engine.
4111  ***/
4112
4113 /*
4114  * magic_check
4115  *
4116  * Make sure the stored data we're trying to retrieve has been produced
4117  * on an ILP compatible system with the same byteorder. It croaks out in
4118  * case an error is detected. [ILP = integer-long-pointer sizes]
4119  * Returns null if error is detected, &PL_sv_undef otherwise.
4120  *
4121  * Note that there's no byte ordering info emitted when network order was
4122  * used at store time.
4123  */
4124 static SV *magic_check(stcxt_t *cxt)
4125 {
4126         char buf[256];
4127         char byteorder[256];
4128         int c;
4129         int use_network_order;
4130         int version_major;
4131         int version_minor = 0;
4132
4133         TRACEME(("magic_check"));
4134
4135         /*
4136          * The "magic number" is only for files, not when freezing in memory.
4137          */
4138
4139         if (cxt->fio) {
4140                 STRLEN len = sizeof(magicstr) - 1;
4141                 STRLEN old_len;
4142
4143                 READ(buf, len);                                 /* Not null-terminated */
4144                 buf[len] = '\0';                                /* Is now */
4145
4146                 if (0 == strcmp(buf, magicstr))
4147                         goto magic_ok;
4148
4149                 /*
4150                  * Try to read more bytes to check for the old magic number, which
4151                  * was longer.
4152                  */
4153
4154                 old_len = sizeof(old_magicstr) - 1;
4155                 READ(&buf[len], old_len - len);
4156                 buf[old_len] = '\0';                    /* Is now null-terminated */
4157
4158                 if (strcmp(buf, old_magicstr))
4159                         CROAK(("File is not a perl storable"));
4160         }
4161
4162 magic_ok:
4163         /*
4164          * Starting with 0.6, the "use_network_order" byte flag is also used to
4165          * indicate the version number of the binary, and therefore governs the
4166          * setting of sv_retrieve_vtbl. See magic_write().
4167          */
4168
4169         GETMARK(use_network_order);
4170         version_major = use_network_order >> 1;
4171         cxt->retrieve_vtbl = version_major ? sv_retrieve : sv_old_retrieve;
4172
4173         TRACEME(("magic_check: netorder = 0x%x", use_network_order));
4174
4175
4176         /*
4177          * Starting with 0.7 (binary major 2), a full byte is dedicated to the
4178          * minor version of the protocol.  See magic_write().
4179          */
4180
4181         if (version_major > 1)
4182                 GETMARK(version_minor);
4183
4184         cxt->ver_major = version_major;
4185         cxt->ver_minor = version_minor;
4186
4187         TRACEME(("binary image version is %d.%d", version_major, version_minor));
4188
4189         /*
4190          * Inter-operability sanity check: we can't retrieve something stored
4191          * using a format more recent than ours, because we have no way to
4192          * know what has changed, and letting retrieval go would mean a probable
4193          * failure reporting a "corrupted" storable file.
4194          */
4195
4196         if (
4197                 version_major > STORABLE_BIN_MAJOR ||
4198                         (version_major == STORABLE_BIN_MAJOR &&
4199                         version_minor > STORABLE_BIN_MINOR)
4200         )
4201                 CROAK(("Storable binary image v%d.%d more recent than I am (v%d.%d)",
4202                         version_major, version_minor,
4203                         STORABLE_BIN_MAJOR, STORABLE_BIN_MINOR));
4204
4205         /*
4206          * If they stored using network order, there's no byte ordering
4207          * information to check.
4208          */
4209
4210         if (cxt->netorder = (use_network_order & 0x1))
4211                 return &PL_sv_undef;                    /* No byte ordering info */
4212
4213         sprintf(byteorder, "%lx", (unsigned long) BYTEORDER);
4214         GETMARK(c);
4215         READ(buf, c);                                           /* Not null-terminated */
4216         buf[c] = '\0';                                          /* Is now */
4217
4218         if (strcmp(buf, byteorder))
4219                 CROAK(("Byte order is not compatible"));
4220
4221         GETMARK(c);             /* sizeof(int) */
4222         if ((int) c != sizeof(int))
4223                 CROAK(("Integer size is not compatible"));
4224
4225         GETMARK(c);             /* sizeof(long) */
4226         if ((int) c != sizeof(long))
4227                 CROAK(("Long integer size is not compatible"));
4228
4229         GETMARK(c);             /* sizeof(char *) */
4230         if ((int) c != sizeof(char *))
4231                 CROAK(("Pointer integer size is not compatible"));
4232
4233         if (version_major >= 2 && version_minor >= 2) {
4234                 GETMARK(c);             /* sizeof(NV) */
4235                 if ((int) c != sizeof(NV))
4236                         CROAK(("Double size is not compatible"));
4237         }
4238
4239         return &PL_sv_undef;    /* OK */
4240 }
4241
4242 /*
4243  * retrieve
4244  *
4245  * Recursively retrieve objects from the specified file and return their
4246  * root SV (which may be an AV or an HV for what we care).
4247  * Returns null if there is a problem.
4248  */
4249 static SV *retrieve(stcxt_t *cxt)
4250 {
4251         int type;
4252         SV **svh;
4253         SV *sv;
4254
4255         TRACEME(("retrieve"));
4256
4257         /*
4258          * Grab address tag which identifies the object if we are retrieving
4259          * an older format. Since the new binary format counts objects and no
4260          * longer explicitely tags them, we must keep track of the correspondance
4261          * ourselves.
4262          *
4263          * The following section will disappear one day when the old format is
4264          * no longer supported, hence the final "goto" in the "if" block.
4265          */
4266
4267         if (cxt->hseen) {                                               /* Retrieving old binary */
4268                 stag_t tag;
4269                 if (cxt->netorder) {
4270                         I32 nettag;
4271                         READ(&nettag, sizeof(I32));             /* Ordered sequence of I32 */
4272                         tag = (stag_t) nettag;
4273                 } else
4274                         READ(&tag, sizeof(stag_t));             /* Original address of the SV */
4275
4276                 GETMARK(type);
4277                 if (type == SX_OBJECT) {
4278                         I32 tagn;
4279                         svh = hv_fetch(cxt->hseen, (char *) &tag, sizeof(tag), FALSE);
4280                         if (!svh)
4281                                 CROAK(("Old tag 0x%x should have been mapped already", (unsigned)tag));
4282                         tagn = SvIV(*svh);      /* Mapped tag number computed earlier below */
4283
4284                         /*
4285                          * The following code is common with the SX_OBJECT case below.
4286                          */
4287
4288                         svh = av_fetch(cxt->aseen, tagn, FALSE);
4289                         if (!svh)
4290                                 CROAK(("Object #%d should have been retrieved already", (int)tagn));
4291                         sv = *svh;
4292                         TRACEME(("has retrieved #%d at 0x%"UVxf, tagn, PTR2UV(sv)));
4293                         SvREFCNT_inc(sv);       /* One more reference to this same sv */
4294                         return sv;                      /* The SV pointer where object was retrieved */
4295                 }
4296
4297                 /*
4298                  * Map new object, but don't increase tagnum. This will be done
4299                  * by each of the retrieve_* functions when they call SEEN().
4300                  *
4301                  * The mapping associates the "tag" initially present with a unique
4302                  * tag number. See test for SX_OBJECT above to see how this is perused.
4303                  */
4304
4305                 if (!hv_store(cxt->hseen, (char *) &tag, sizeof(tag),
4306                                 newSViv(cxt->tagnum), 0))
4307                         return (SV *) 0;
4308
4309                 goto first_time;
4310         }
4311
4312         /*
4313          * Regular post-0.6 binary format.
4314          */
4315
4316 again:
4317         GETMARK(type);
4318
4319         TRACEME(("retrieve type = %d", type));
4320
4321         /*
4322          * Are we dealing with an object we should have already retrieved?
4323          */
4324
4325         if (type == SX_OBJECT) {
4326                 I32 tag;
4327                 READ_I32(tag);
4328                 tag = ntohl(tag);
4329                 svh = av_fetch(cxt->aseen, tag, FALSE);
4330                 if (!svh)
4331                         CROAK(("Object #%d should have been retrieved already", (int)tag));
4332                 sv = *svh;
4333                 TRACEME(("had retrieved #%d at 0x%"UVxf, tag, PTR2UV(sv)));
4334                 SvREFCNT_inc(sv);       /* One more reference to this same sv */
4335                 return sv;                      /* The SV pointer where object was retrieved */
4336         }
4337
4338 first_time:             /* Will disappear when support for old format is dropped */
4339
4340         /*
4341          * Okay, first time through for this one.
4342          */
4343
4344         sv = RETRIEVE(cxt, type)(cxt);
4345         if (!sv)
4346                 return (SV *) 0;                        /* Failed */
4347
4348         /*
4349          * Old binary formats (pre-0.7).
4350          *
4351          * Final notifications, ended by SX_STORED may now follow.
4352          * Currently, the only pertinent notification to apply on the
4353          * freshly retrieved object is either:
4354          *    SX_CLASS <char-len> <classname> for short classnames.
4355          *    SX_LG_CLASS <int-len> <classname> for larger one (rare!).
4356          * Class name is then read into the key buffer pool used by
4357          * hash table key retrieval.
4358          */
4359
4360         if (cxt->ver_major < 2) {
4361                 while ((type = GETCHAR()) != SX_STORED) {
4362                         I32 len;
4363                         switch (type) {
4364                         case SX_CLASS:
4365                                 GETMARK(len);                   /* Length coded on a single char */
4366                                 break;
4367                         case SX_LG_CLASS:                       /* Length coded on a regular integer */
4368                                 RLEN(len);
4369                                 break;
4370                         case EOF:
4371                         default:
4372                                 return (SV *) 0;                /* Failed */
4373                         }
4374                         KBUFCHK(len);                           /* Grow buffer as necessary */
4375                         if (len)
4376                                 READ(kbuf, len);
4377                         kbuf[len] = '\0';                       /* Mark string end */
4378                         BLESS(sv, kbuf);
4379                 }
4380         }
4381
4382         TRACEME(("ok (retrieved 0x%"UVxf", refcnt=%d, %s)", PTR2UV(sv),
4383                 SvREFCNT(sv) - 1, sv_reftype(sv, FALSE)));
4384
4385         return sv;      /* Ok */
4386 }
4387
4388 /*
4389  * do_retrieve
4390  *
4391  * Retrieve data held in file and return the root object.
4392  * Common routine for pretrieve and mretrieve.
4393  */
4394 static SV *do_retrieve(
4395         PerlIO *f,
4396         SV *in,
4397         int optype)
4398 {
4399         dSTCXT;
4400         SV *sv;
4401         int is_tainted;                         /* Is input source tainted? */
4402         struct extendable msave;        /* Where potentially valid mbuf is saved */
4403
4404         TRACEME(("do_retrieve (optype = 0x%x)", optype));
4405
4406         optype |= ST_RETRIEVE;
4407
4408         /*
4409          * Sanity assertions for retrieve dispatch tables.
4410          */
4411
4412         ASSERT(sizeof(sv_old_retrieve) == sizeof(sv_retrieve),
4413                 ("old and new retrieve dispatch table have same size"));
4414         ASSERT(sv_old_retrieve[SX_ERROR] == retrieve_other,
4415                 ("SX_ERROR entry correctly initialized in old dispatch table"));
4416         ASSERT(sv_retrieve[SX_ERROR] == retrieve_other,
4417                 ("SX_ERROR entry correctly initialized in new dispatch table"));
4418
4419         /*
4420          * Workaround for CROAK leak: if they enter with a "dirty" context,
4421          * free up memory for them now.
4422          */
4423
4424         if (cxt->s_dirty)
4425                 clean_context(cxt);
4426
4427         /*
4428          * Now that STORABLE_xxx hooks exist, it is possible that they try to
4429          * re-enter retrieve() via the hooks.
4430          */
4431
4432         if (cxt->entry)
4433                 cxt = allocate_context(cxt);
4434
4435         cxt->entry++;
4436
4437         ASSERT(cxt->entry == 1, ("starting new recursion"));
4438         ASSERT(!cxt->s_dirty, ("clean context"));
4439
4440         /*
4441          * Prepare context.
4442          *
4443          * Data is loaded into the memory buffer when f is NULL, unless `in' is
4444          * also NULL, in which case we're expecting the data to already lie
4445          * in the buffer (dclone case).
4446          */
4447
4448         KBUFINIT();                                     /* Allocate hash key reading pool once */
4449
4450         if (!f && in) {
4451                 StructCopy(&cxt->membuf, &msave, struct extendable);
4452                 MBUF_LOAD(in);
4453         }
4454
4455
4456         /*
4457          * Magic number verifications.
4458          *
4459          * This needs to be done before calling init_retrieve_context()
4460          * since the format indication in the file are necessary to conduct
4461          * some of the initializations.
4462          */
4463
4464         cxt->fio = f;                           /* Where I/O are performed */
4465
4466         if (!magic_check(cxt))
4467                 CROAK(("Magic number checking on storable %s failed",
4468                         cxt->fio ? "file" : "string"));
4469
4470         TRACEME(("data stored in %s format",
4471                 cxt->netorder ? "net order" : "native"));
4472
4473         /*
4474          * Check whether input source is tainted, so that we don't wrongly
4475          * taint perfectly good values...
4476          *
4477          * We assume file input is always tainted.  If both `f' and `in' are
4478          * NULL, then we come from dclone, and tainted is already filled in
4479          * the context.  That's a kludge, but the whole dclone() thing is
4480          * already quite a kludge anyway! -- RAM, 15/09/2000.
4481          */
4482
4483         is_tainted = f ? 1 : (in ? SvTAINTED(in) : cxt->s_tainted);
4484         TRACEME(("input source is %s", is_tainted ? "tainted" : "trusted"));
4485         init_retrieve_context(cxt, optype, is_tainted);
4486
4487         ASSERT(is_retrieving(), ("within retrieve operation"));
4488
4489         sv = retrieve(cxt);             /* Recursively retrieve object, get root SV */
4490
4491         /*
4492          * Final cleanup.
4493          */
4494
4495         if (!f && in)
4496                 StructCopy(&msave, &cxt->membuf, struct extendable);
4497
4498         /*
4499          * The "root" context is never freed.
4500          */
4501
4502         clean_retrieve_context(cxt);
4503         if (cxt->prev)                          /* This context was stacked */
4504                 free_context(cxt);              /* It was not the "root" context */
4505
4506         /*
4507          * Prepare returned value.
4508          */
4509
4510         if (!sv) {
4511                 TRACEME(("retrieve ERROR"));
4512                 return &PL_sv_undef;            /* Something went wrong, return undef */
4513         }
4514
4515         TRACEME(("retrieve got %s(0x%"UVxf")",
4516                 sv_reftype(sv, FALSE), PTR2UV(sv)));
4517
4518         /*
4519          * Backward compatibility with Storable-0.5@9 (which we know we
4520          * are retrieving if hseen is non-null): don't create an extra RV
4521          * for objects since we special-cased it at store time.
4522          *
4523          * Build a reference to the SV returned by pretrieve even if it is
4524          * already one and not a scalar, for consistency reasons.
4525          *
4526          * NB: although context might have been cleaned, the value of `cxt->hseen'
4527          * remains intact, and can be used as a flag.
4528          */
4529
4530         if (cxt->hseen) {                       /* Was not handling overloading by then */
4531                 SV *rv;
4532                 if (sv_type(sv) == svis_REF && (rv = SvRV(sv)) && SvOBJECT(rv))
4533                         return sv;
4534         }
4535
4536         /*
4537          * If reference is overloaded, restore behaviour.
4538          *
4539          * NB: minor glitch here: normally, overloaded refs are stored specially
4540          * so that we can croak when behaviour cannot be re-installed, and also
4541          * avoid testing for overloading magic at each reference retrieval.
4542          *
4543          * Unfortunately, the root reference is implicitely stored, so we must
4544          * check for possible overloading now.  Furthermore, if we don't restore
4545          * overloading, we cannot croak as if the original ref was, because we
4546          * have no way to determine whether it was an overloaded ref or not in
4547          * the first place.
4548          *
4549          * It's a pity that overloading magic is attached to the rv, and not to
4550          * the underlying sv as blessing is.
4551          */
4552
4553         if (SvOBJECT(sv)) {
4554                 HV *stash = (HV *) SvSTASH (sv);
4555                 SV *rv = newRV_noinc(sv);
4556                 if (stash && Gv_AMG(stash)) {
4557                         SvAMAGIC_on(rv);
4558                         TRACEME(("restored overloading on root reference"));
4559                 }
4560                 return rv;
4561         }
4562
4563         return newRV_noinc(sv);
4564 }
4565
4566 /*
4567  * pretrieve
4568  *
4569  * Retrieve data held in file and return the root object, undef on error.
4570  */
4571 SV *pretrieve(PerlIO *f)
4572 {
4573         TRACEME(("pretrieve"));
4574         return do_retrieve(f, Nullsv, 0);
4575 }
4576
4577 /*
4578  * mretrieve
4579  *
4580  * Retrieve data held in scalar and return the root object, undef on error.
4581  */
4582 SV *mretrieve(SV *sv)
4583 {
4584         TRACEME(("mretrieve"));
4585         return do_retrieve((PerlIO*) 0, sv, 0);
4586 }
4587
4588 /***
4589  *** Deep cloning
4590  ***/
4591
4592 /*
4593  * dclone
4594  *
4595  * Deep clone: returns a fresh copy of the original referenced SV tree.
4596  *
4597  * This is achieved by storing the object in memory and restoring from
4598  * there. Not that efficient, but it should be faster than doing it from
4599  * pure perl anyway.
4600  */
4601 SV *dclone(SV *sv)
4602 {
4603         dSTCXT;
4604         int size;
4605         stcxt_t *real_context;
4606         SV *out;
4607
4608         TRACEME(("dclone"));
4609
4610         /*
4611          * Workaround for CROAK leak: if they enter with a "dirty" context,
4612          * free up memory for them now.
4613          */
4614
4615         if (cxt->s_dirty)
4616                 clean_context(cxt);
4617
4618         /*
4619          * do_store() optimizes for dclone by not freeing its context, should
4620          * we need to allocate one because we're deep cloning from a hook.
4621          */
4622
4623         if (!do_store((PerlIO*) 0, sv, ST_CLONE, FALSE, (SV**) 0))
4624                 return &PL_sv_undef;                            /* Error during store */
4625
4626         /*
4627          * Because of the above optimization, we have to refresh the context,
4628          * since a new one could have been allocated and stacked by do_store().
4629          */
4630
4631         { dSTCXT; real_context = cxt; }         /* Sub-block needed for macro */
4632         cxt = real_context;                                     /* And we need this temporary... */
4633
4634         /*
4635          * Now, `cxt' may refer to a new context.
4636          */
4637
4638         ASSERT(!cxt->s_dirty, ("clean context"));
4639         ASSERT(!cxt->entry, ("entry will not cause new context allocation"));
4640
4641         size = MBUF_SIZE();
4642         TRACEME(("dclone stored %d bytes", size));
4643         MBUF_INIT(size);
4644
4645         /*
4646          * Since we're passing do_retrieve() both a NULL file and sv, we need
4647          * to pre-compute the taintedness of the input by setting cxt->tainted
4648          * to whatever state our own input string was.  -- RAM, 15/09/2000
4649          *
4650          * do_retrieve() will free non-root context.
4651          */
4652
4653         cxt->s_tainted = SvTAINTED(sv);
4654         out = do_retrieve((PerlIO*) 0, Nullsv, ST_CLONE);
4655
4656         TRACEME(("dclone returns 0x%"UVxf, PTR2UV(out)));
4657
4658         return out;
4659 }
4660
4661 /***
4662  *** Glue with perl.
4663  ***/
4664
4665 /*
4666  * The Perl IO GV object distinguishes between input and output for sockets
4667  * but not for plain files. To allow Storable to transparently work on
4668  * plain files and sockets transparently, we have to ask xsubpp to fetch the
4669  * right object for us. Hence the OutputStream and InputStream declarations.
4670  *
4671  * Before perl 5.004_05, those entries in the standard typemap are not
4672  * defined in perl include files, so we do that here.
4673  */
4674
4675 #ifndef OutputStream
4676 #define OutputStream    PerlIO *
4677 #define InputStream             PerlIO *
4678 #endif  /* !OutputStream */
4679
4680 MODULE = Storable       PACKAGE = Storable
4681
4682 PROTOTYPES: ENABLE
4683
4684 BOOT:
4685     init_perinterp();
4686
4687 int
4688 pstore(f,obj)
4689 OutputStream    f
4690 SV *    obj
4691
4692 int
4693 net_pstore(f,obj)
4694 OutputStream    f
4695 SV *    obj
4696
4697 SV *
4698 mstore(obj)
4699 SV *    obj
4700
4701 SV *
4702 net_mstore(obj)
4703 SV *    obj
4704
4705 SV *
4706 pretrieve(f)
4707 InputStream     f
4708
4709 SV *
4710 mretrieve(sv)
4711 SV *    sv
4712
4713 SV *
4714 dclone(sv)
4715 SV *    sv
4716
4717 int
4718 last_op_in_netorder()
4719
4720 int
4721 is_storing()
4722
4723 int
4724 is_retrieving()
4725