From: Nicholas Clark Date: Sat, 13 Apr 2002 02:58:07 +0000 (+0100) Subject: [dodgy PATCH] Re: [Storable] utf8 keys started working! X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e16e2ff836b177c52744755a35f12e818d01328b;p=p5sagit%2Fp5-mst-13.2.git [dodgy PATCH] Re: [Storable] utf8 keys started working! Message-ID: <20020413015806.GA371@Bagpuss.unfortu.net> p4raw-id: //depot/perl@15893 --- diff --git a/MANIFEST b/MANIFEST index 42c2809..0ea3148 100644 --- a/MANIFEST +++ b/MANIFEST @@ -590,12 +590,14 @@ ext/Storable/t/freeze.t See if Storable works ext/Storable/t/lock.t See if Storable works ext/Storable/t/overload.t See if Storable works ext/Storable/t/recurse.t See if Storable works +ext/Storable/t/restrict.t See if Storable works ext/Storable/t/retrieve.t See if Storable works ext/Storable/t/store.t See if Storable works ext/Storable/t/tied.t See if Storable works ext/Storable/t/tied_hook.t See if Storable works ext/Storable/t/tied_items.t See if Storable works ext/Storable/t/utf8.t See if Storable works +ext/Storable/t/utf8hash.t See if Storable works ext/Sys/Hostname/Hostname.pm Sys::Hostname extension Perl module ext/Sys/Hostname/Hostname.t See if Sys::Hostname works ext/Sys/Hostname/Hostname.xs Sys::Hostname extension external subroutines diff --git a/dump.c b/dump.c index 240d1c2..e3ece94 100644 --- a/dump.c +++ b/dump.c @@ -1238,7 +1238,8 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo int count = maxnest - nest; hv_iterinit(hv); - while ((he = hv_iternext(hv)) && count--) { + while ((he = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS)) + && count--) { SV *elt, *keysv; char *keypv; STRLEN len; diff --git a/embed.fnc b/embed.fnc index bf0ecd5..e431c3c 100644 --- a/embed.fnc +++ b/embed.fnc @@ -278,12 +278,15 @@ Apd |char* |hv_iterkey |HE* entry|I32* retlen Apd |SV* |hv_iterkeysv |HE* entry Apd |HE* |hv_iternext |HV* tb Apd |SV* |hv_iternextsv |HV* hv|char** key|I32* retlen +ApM |HE* |hv_iternext_flags|HV* tb|I32 flags Apd |SV* |hv_iterval |HV* tb|HE* entry Ap |void |hv_ksplit |HV* hv|IV newmax Apd |void |hv_magic |HV* hv|GV* gv|int how Apd |SV** |hv_store |HV* tb|const char* key|I32 klen|SV* val \ |U32 hash Apd |HE* |hv_store_ent |HV* tb|SV* key|SV* val|U32 hash +ApM |SV** |hv_store_flags |HV* tb|const char* key|I32 klen|SV* val \ + |U32 hash|int flags Apd |void |hv_undef |HV* tb Ap |I32 |ibcmp |const char* a|const char* b|I32 len Ap |I32 |ibcmp_locale |const char* a|const char* b|I32 len @@ -990,8 +993,6 @@ s |HEK* |save_hek_flags |const char *str|I32 len|U32 hash|int flags s |void |hv_magic_check |HV *hv|bool *needs_copy|bool *needs_store s |void |unshare_hek_or_pvn|HEK* hek|const char* sv|I32 len|U32 hash s |HEK* |share_hek_flags|const char* sv|I32 len|U32 hash|int flags -s |SV** |hv_store_flags |HV* tb|const char* key|I32 klen|SV* val \ - |U32 hash|int flags s |SV** |hv_fetch_flags |HV* tb|const char* key|I32 klen|I32 lval \ |int flags s |void |hv_notallowed |int flags|const char *key|I32 klen|const char *msg diff --git a/embed.h b/embed.h index 3dc9e1f..5df6a20 100644 --- a/embed.h +++ b/embed.h @@ -263,11 +263,13 @@ #define hv_iterkeysv Perl_hv_iterkeysv #define hv_iternext Perl_hv_iternext #define hv_iternextsv Perl_hv_iternextsv +#define hv_iternext_flags Perl_hv_iternext_flags #define hv_iterval Perl_hv_iterval #define hv_ksplit Perl_hv_ksplit #define hv_magic Perl_hv_magic #define hv_store Perl_hv_store #define hv_store_ent Perl_hv_store_ent +#define hv_store_flags Perl_hv_store_flags #define hv_undef Perl_hv_undef #define ibcmp Perl_ibcmp #define ibcmp_locale Perl_ibcmp_locale @@ -926,7 +928,6 @@ #define hv_magic_check S_hv_magic_check #define unshare_hek_or_pvn S_unshare_hek_or_pvn #define share_hek_flags S_share_hek_flags -#define hv_store_flags S_hv_store_flags #define hv_fetch_flags S_hv_fetch_flags #define hv_notallowed S_hv_notallowed #endif @@ -1829,11 +1830,13 @@ #define hv_iterkeysv(a) Perl_hv_iterkeysv(aTHX_ a) #define hv_iternext(a) Perl_hv_iternext(aTHX_ a) #define hv_iternextsv(a,b,c) Perl_hv_iternextsv(aTHX_ a,b,c) +#define hv_iternext_flags(a,b) Perl_hv_iternext_flags(aTHX_ a,b) #define hv_iterval(a,b) Perl_hv_iterval(aTHX_ a,b) #define hv_ksplit(a,b) Perl_hv_ksplit(aTHX_ a,b) #define hv_magic(a,b,c) Perl_hv_magic(aTHX_ a,b,c) #define hv_store(a,b,c,d,e) Perl_hv_store(aTHX_ a,b,c,d,e) #define hv_store_ent(a,b,c,d) Perl_hv_store_ent(aTHX_ a,b,c,d) +#define hv_store_flags(a,b,c,d,e,f) Perl_hv_store_flags(aTHX_ a,b,c,d,e,f) #define hv_undef(a) Perl_hv_undef(aTHX_ a) #define ibcmp(a,b,c) Perl_ibcmp(aTHX_ a,b,c) #define ibcmp_locale(a,b,c) Perl_ibcmp_locale(aTHX_ a,b,c) @@ -2480,7 +2483,6 @@ #define hv_magic_check(a,b,c) S_hv_magic_check(aTHX_ a,b,c) #define unshare_hek_or_pvn(a,b,c,d) S_unshare_hek_or_pvn(aTHX_ a,b,c,d) #define share_hek_flags(a,b,c,d) S_share_hek_flags(aTHX_ a,b,c,d) -#define hv_store_flags(a,b,c,d,e,f) S_hv_store_flags(aTHX_ a,b,c,d,e,f) #define hv_fetch_flags(a,b,c,d,e) S_hv_fetch_flags(aTHX_ a,b,c,d,e) #define hv_notallowed(a,b,c,d) S_hv_notallowed(aTHX_ a,b,c,d) #endif diff --git a/ext/Storable/Storable.xs b/ext/Storable/Storable.xs index 279cd1f..c87ad92 100644 --- a/ext/Storable/Storable.xs +++ b/ext/Storable/Storable.xs @@ -58,7 +58,7 @@ #include /* Perl's one, needed since 5.6 */ #include -#if 0 +#if 1 #define DEBUGME /* Debug mode, turns assertions on as well */ #define DASSERT /* Assertion mode */ #endif @@ -184,7 +184,8 @@ typedef double NV; /* Older perls lack the NV type */ #define SX_TIED_IDX C(22) /* Tied magic index forthcoming */ #define SX_UTF8STR C(23) /* UTF-8 string forthcoming (small) */ #define SX_LUTF8STR C(24) /* UTF-8 string forthcoming (large) */ -#define SX_ERROR C(25) /* Error */ +#define SX_FLAG_HASH C(25) /* Hash with flags forthcoming (size, flags, key/flags/value triplet list) */ +#define SX_ERROR C(26) /* Error */ /* * Those are only used to retrieve "old" pre-0.6 binary images. @@ -316,8 +317,8 @@ typedef struct stcxt { #endif /* < perl5.004_68 */ #define dSTCXT_PTR(T,name) \ - T name = (perinterp_sv && SvIOK(perinterp_sv) \ - ? INT2PTR(T, SvIVX(perinterp_sv)) : (T) 0) + T name = ((perinterp_sv && SvIOK(perinterp_sv) \ + ? INT2PTR(T, SvIVX(perinterp_sv)) : (T) 0)) #define dSTCXT \ dSTCXT_SV; \ dSTCXT_PTR(stcxt_t *, cxt) @@ -623,6 +624,22 @@ static stcxt_t *Context_ptr = &Context; #define SHT_THASH 6 /* 4 + 2 -- tied hash */ /* + * per hash flags for flagged hashes + */ + +#define SHV_RESTRICTED 0x01 + +/* + * per key flags for flagged hashes + */ + +#define SHV_K_UTF8 0x01 +#define SHV_K_WASUTF8 0x02 +#define SHV_K_LOCKED 0x04 +#define SHV_K_ISSV 0x08 +#define SHV_K_PLACEHOLDER 0x10 + +/* * Before 0.6, the magic string was "perl-store" (binary version number 0). * * Since 0.6 introduced many binary incompatibilities, the magic string has @@ -641,8 +658,16 @@ static stcxt_t *Context_ptr = &Context; static char old_magicstr[] = "perl-store"; /* Magic number before 0.6 */ static char magicstr[] = "pst0"; /* Used as a magic number */ -#define STORABLE_BIN_MAJOR 2 /* Binary major "version" */ -#define STORABLE_BIN_MINOR 4 /* Binary minor "version" */ +#define STORABLE_BIN_MAJOR 2 /* Binary major "version" */ +#if (PATCHLEVEL <= 6) +#define STORABLE_BIN_MINOR 4 /* Binary minor "version" */ +#else +/* + * As of perl 5.7.3, utf8 hash key is introduced. + * So this must change -- dankogai +*/ +#define STORABLE_BIN_MINOR 5 /* Binary minor "version" */ +#endif /* (PATCHLEVEL <= 6) */ /* * Useful store shortcuts... @@ -897,6 +922,7 @@ static SV *(*sv_old_retrieve[])(stcxt_t *cxt, char *cname) = { retrieve_other, /* SX_TIED_IDX not supported */ retrieve_other, /* SX_UTF8STR not supported */ retrieve_other, /* SX_LUTF8STR not supported */ + retrieve_other, /* SX_FLAG_HASH not supported */ retrieve_other, /* SX_ERROR */ }; @@ -911,6 +937,7 @@ static SV *retrieve_hook(stcxt_t *cxt, char *cname); static SV *retrieve_overloaded(stcxt_t *cxt, char *cname); static SV *retrieve_tied_key(stcxt_t *cxt, char *cname); static SV *retrieve_tied_idx(stcxt_t *cxt, char *cname); +static SV *retrieve_flag_hash(stcxt_t *cxt, char *cname); static SV *(*sv_retrieve[])(stcxt_t *cxt, char *cname) = { 0, /* SX_OBJECT -- entry unused dynamically */ @@ -938,6 +965,7 @@ static SV *(*sv_retrieve[])(stcxt_t *cxt, char *cname) = { retrieve_tied_idx, /* SX_TIED_IDX */ retrieve_utf8str, /* SX_UTF8STR */ retrieve_lutf8str, /* SX_LUTF8STR */ + retrieve_flag_hash, /* SX_HASH */ retrieve_other, /* SX_ERROR */ }; @@ -1165,7 +1193,7 @@ static void init_retrieve_context(stcxt_t *cxt, int optype, int is_tainted) * new retrieve routines. */ - cxt->hseen = (cxt->retrieve_vtbl == sv_old_retrieve) ? newHV() : 0; + cxt->hseen = ((cxt->retrieve_vtbl == sv_old_retrieve) ? newHV() : 0); cxt->aseen = newAV(); /* Where retrieved objects are kept */ cxt->aclass = newAV(); /* Where seen classnames are kept */ @@ -1632,7 +1660,7 @@ static int store_ref(stcxt_t *cxt, SV *sv) * * Store a scalar. * - * Layout is SX_LSCALAR , SX_SCALAR or SX_UNDEF. + * Layout is SX_LSCALAR , SX_SCALAR or SX_UNDEF. * The section is omitted if is 0. * * If integer or double, the layout is SX_INTEGER or SX_DOUBLE . @@ -1855,26 +1883,49 @@ sortcmp(const void *a, const void *b) * * Store a hash table. * + * For a "normal" hash (not restricted, no utf8 keys): + * * Layout is SX_HASH followed by each key/value pair, in random order. * Values are stored as . * Keys are stored as , the section being omitted * if length is 0. + + * Layout is SX_HASH followed by each key/value pair, + * in random order. + * Values are stored as . + * Keys are stored as , the section being omitted + * if length is 0. + * Currently the only hash flag is "restriced" + * Key flags are as for hv.h */ static int store_hash(stcxt_t *cxt, HV *hv) { - I32 len = HvKEYS(hv); + I32 len = HvTOTALKEYS(hv); I32 i; int ret = 0; I32 riter; HE *eiter; + int flagged_hash = ((SvREADONLY(hv) || HvHASKFLAGS(hv)) ? 1 : 0); + unsigned char hash_flags = (SvREADONLY(hv) ? SHV_RESTRICTED : 0); - TRACEME(("store_hash (0x%"UVxf")", PTR2UV(hv))); + if (flagged_hash) { + /* needs int cast for C++ compilers, doesn't it? */ + TRACEME(("store_hash (0x%"UVxf") (flags %x)", PTR2UV(hv), + (int) hash_flags)); + } else { + TRACEME(("store_hash (0x%"UVxf")", PTR2UV(hv))); + } /* * Signal hash by emitting SX_HASH, followed by the table length. */ - PUTMARK(SX_HASH); + if (flagged_hash) { + PUTMARK(SX_FLAG_HASH); + PUTMARK(hash_flags); + } else { + PUTMARK(SX_HASH); + } WLEN(len); TRACEME(("size = %d", len)); @@ -1900,7 +1951,7 @@ static int store_hash(stcxt_t *cxt, HV *hv) if ( !(cxt->optype & ST_CLONE) && (cxt->canonical == 1 || (cxt->canonical < 0 && (cxt->canonical = - SvTRUE(perl_get_sv("Storable::canonical", TRUE)) ? 1 : 0))) + (SvTRUE(perl_get_sv("Storable::canonical", TRUE)) ? 1 : 0)))) ) { /* * Storing in order, sorted by key. @@ -1911,10 +1962,12 @@ static int store_hash(stcxt_t *cxt, HV *hv) AV *av = newAV(); + /*av_extend (av, len);*/ + TRACEME(("using canonical order")); for (i = 0; i < len; i++) { - HE *he = hv_iternext(hv); + HE *he = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS); SV *key = hv_iterkeysv(he); av_store(av, AvFILLp(av)+1, key); /* av_push(), really */ } @@ -1922,8 +1975,10 @@ static int store_hash(stcxt_t *cxt, HV *hv) qsort((char *) AvARRAY(av), len, sizeof(SV *), sortcmp); for (i = 0; i < len; i++) { + unsigned char flags; char *keyval; - I32 keylen; + STRLEN keylen_tmp; + I32 keylen; SV *key = av_shift(av); HE *he = hv_fetch_ent(hv, key, 0, 0); SV *val = HeVAL(he); @@ -1947,11 +2002,54 @@ static int store_hash(stcxt_t *cxt, HV *hv) * See retrieve_hash() for details. */ - keyval = hv_iterkey(he, &keylen); - TRACEME(("(#%d) key '%s'", i, keyval)); + /* Implementation of restricted hashes isn't nicely + abstracted: */ + flags + = (((hash_flags & SHV_RESTRICTED) + && SvREADONLY(val)) + ? SHV_K_LOCKED : 0); + if (val == &PL_sv_undef) + flags |= SHV_K_PLACEHOLDER; + + keyval = SvPV(key, keylen_tmp); + keylen = keylen_tmp; + if (SvUTF8(key)) { + const char *keysave = keyval; + bool is_utf8 = TRUE; + + /* Just casting the &klen to (STRLEN) won't work + well if STRLEN and I32 are of different widths. + --jhi */ + keyval = (char*)bytes_from_utf8((U8*)keyval, + &keylen_tmp, + &is_utf8); + + /* If we were able to downgrade here, then than + means that we have a key which only had chars + 0-255, but was utf8 encoded. */ + + if (keyval != keysave) { + keylen = keylen_tmp; + flags |= SHV_K_WASUTF8; + } else { + /* keylen_tmp can't have changed, so no need + to assign back to keylen. */ + flags |= SHV_K_UTF8; + } + } + + if (flagged_hash) { + PUTMARK(flags); + TRACEME(("(#%d) key '%s' flags %x %u", i, keyval, flags, *keyval)); + } else { + assert (flags == 0); + TRACEME(("(#%d) key '%s'", i, keyval)); + } WLEN(keylen); if (keylen) WRITE(keyval, keylen); + if (flags & SHV_K_WASUTF8) + Safefree (keyval); } /* @@ -1971,7 +2069,11 @@ static int store_hash(stcxt_t *cxt, HV *hv) for (i = 0; i < len; i++) { char *key; I32 len; - SV *val = hv_iternextsv(hv, &key, &len); + unsigned char flags; + HE *he = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS); + SV *val = (he ? hv_iterval(hv, he) : 0); + SV *key_sv = NULL; + HEK *hek; if (val == 0) return 1; /* Internal error, not I/O error */ @@ -1985,6 +2087,34 @@ static int store_hash(stcxt_t *cxt, HV *hv) if ((ret = store(cxt, val))) /* Extra () for -Wall, grr... */ goto out; + /* Implementation of restricted hashes isn't nicely + abstracted: */ + flags + = (((hash_flags & SHV_RESTRICTED) + && SvREADONLY(val)) + ? SHV_K_LOCKED : 0); + if (val == &PL_sv_undef) + flags |= SHV_K_PLACEHOLDER; + + hek = HeKEY_hek(he); + len = HEK_LEN(hek); + if (len == HEf_SVKEY) { + /* This is somewhat sick, but the internal APIs are + * such that XS code could put one of these in in + * a regular hash. + * Maybe we should be capable of storing one if + * found. + */ + key_sv = HeKEY_sv(he); + flags |= SHV_K_ISSV; + } else { + /* Regular string key. */ + if (HEK_UTF8(hek)) + flags |= SHV_K_UTF8; + if (HEK_WASUTF8(hek)) + flags |= SHV_K_WASUTF8; + key = HEK_KEY(hek); + } /* * Write key string. * Keys are written after values to make sure retrieval @@ -1993,10 +2123,20 @@ static int store_hash(stcxt_t *cxt, HV *hv) * See retrieve_hash() for details. */ - TRACEME(("(#%d) key '%s'", i, key)); - WLEN(len); - if (len) + if (flagged_hash) { + PUTMARK(flags); + TRACEME(("(#%d) key '%s' flags %x", i, key, flags)); + } else { + assert (flags == 0); + TRACEME(("(#%d) key '%s'", i, key)); + } + if (flags & SHV_K_ISSV) { + store(cxt, key_sv); + } else { + WLEN(len); + if (len) WRITE(key, len); + } } } @@ -2847,7 +2987,8 @@ static int magic_write(stcxt_t *cxt) unsigned char c; int use_network_order = cxt->netorder; - TRACEME(("magic_write on fd=%d", cxt->fio ? fileno(cxt->fio) : -1)); + TRACEME(("magic_write on fd=%d", cxt->fio ? PerlIO_fileno(cxt->fio) + : -1)); if (cxt->fio) WRITE(magicstr, strlen(magicstr)); /* Don't write final \0 */ @@ -4271,6 +4412,115 @@ static SV *retrieve_hash(stcxt_t *cxt, char *cname) } /* + * retrieve_hash + * + * Retrieve a whole hash table. + * Layout is SX_HASH followed by each key/value pair, in random order. + * Keys are stored as , the section being omitted + * if length is 0. + * Values are stored as . + * + * When we come here, SX_HASH has been read already. + */ +static SV *retrieve_flag_hash(stcxt_t *cxt, char *cname) +{ + I32 len; + I32 size; + I32 i; + HV *hv; + SV *sv; + int hash_flags; + + GETMARK(hash_flags); + TRACEME(("retrieve_flag_hash (#%d)", cxt->tagnum)); + /* + * Read length, allocate table. + */ + + RLEN(len); + TRACEME(("size = %d, flags = %d", len, hash_flags)); + hv = newHV(); + SEEN(hv, cname); /* Will return if table not allocated properly */ + if (len == 0) + return (SV *) hv; /* No data follow if table empty */ + hv_ksplit(hv, len); /* pre-extend hash to save multiple splits */ + + /* + * Now get each key/value pair in turn... + */ + + for (i = 0; i < len; i++) { + int flags; + int store_flags = 0; + /* + * Get value first. + */ + + TRACEME(("(#%d) value", i)); + sv = retrieve(cxt, 0); + if (!sv) + return (SV *) 0; + + GETMARK(flags); + if ((hash_flags & SHV_RESTRICTED) && (flags & SHV_K_LOCKED)) + SvREADONLY_on(sv); + + if (flags & SHV_K_ISSV) { + /* XXX you can't set a placeholder with an SV key. + Then again, you can't get an SV key. + Without messing around beyond what the API is supposed to do. + */ + SV *keysv; + TRACEME(("(#%d) keysv, flags=%d", i, flags)); + keysv = retrieve(cxt, 0); + if (!keysv) + return (SV *) 0; + + if (!hv_store_ent(hv, keysv, sv, 0)) + return (SV *) 0; + } else { + /* + * Get key. + * Since we're reading into kbuf, we must ensure we're not + * recursing between the read and the hv_store() where it's used. + * Hence the key comes after the value. + */ + + if (flags & SHV_K_PLACEHOLDER) { + SvREFCNT_dec (sv); + sv = &PL_sv_undef; + store_flags |= HVhek_PLACEHOLD; + } + if (flags & SHV_K_UTF8) + store_flags |= HVhek_UTF8; + if (flags & SHV_K_WASUTF8) + store_flags |= HVhek_WASUTF8; + + RLEN(size); /* Get key size */ + KBUFCHK(size); /* Grow hash key read pool if needed */ + if (size) + READ(kbuf, size); + kbuf[size] = '\0'; /* Mark string end, just in case */ + TRACEME(("(#%d) key '%s' flags %X store_flags %X", i, kbuf, + flags, store_flags)); + + /* + * Enter key/value pair into hash table. + */ + + if (hv_store_flags(hv, kbuf, size, sv, 0, flags) == 0) + return (SV *) 0; + } + } + if (hash_flags & SHV_RESTRICTED) + SvREADONLY_on(hv); + + TRACEME(("ok (retrieve_hash at 0x%"UVxf")", PTR2UV(hv))); + + return (SV *) hv; +} + +/* * old_retrieve_array * * Retrieve a whole array in pre-0.6 binary format. diff --git a/ext/Storable/t/restrict.t b/ext/Storable/t/restrict.t new file mode 100644 index 0000000..0eb299f --- /dev/null +++ b/ext/Storable/t/restrict.t @@ -0,0 +1,89 @@ +#!./perl + +# +# Copyright 2002, Larry Wall. +# +# You may redistribute only under the same terms as Perl 5, as specified +# in the README file that comes with the distribution. +# + +sub BEGIN { + chdir('t') if -d 't'; + @INC = '.'; + push @INC, '../lib'; + require Config; import Config; + if ($Config{'extensions'} !~ /\bStorable\b/) { + print "1..0 # Skip: Storable was not built\n"; + exit 0; + } + require 'lib/st-dump.pl'; +} + + +use Storable qw(dclone); +use Hash::Util qw(lock_hash unlock_value); + +print "1..16\n"; + +my %hash = (question => '?', answer => 42, extra => 'junk', undef => undef); +lock_hash %hash; +unlock_value %hash, 'answer'; +unlock_value %hash, 'extra'; +delete $hash{'extra'}; + +my $test; + +package Restrict_Test; + +sub me_second { + return (undef, $_[0]); +} + +package main; + +sub testit { + my $hash = shift; + my $copy = dclone $hash; + + my @in_keys = sort keys %$hash; + my @out_keys = sort keys %$copy; + unless (ok ++$test, "@in_keys" eq "@out_keys") { + print "# Failed: keys mis-match after deep clone.\n"; + print "# Original keys: @in_keys\n"; + print "# Copy's keys: @out_keys\n"; + } + + # $copy = $hash; # used in initial debug of the tests + + ok ++$test, Internals::SvREADONLY(%$copy), "cloned hash restricted?"; + + ok ++$test, Internals::SvREADONLY($copy->{question}), + "key 'question' not locked in copy?"; + + ok ++$test, !Internals::SvREADONLY($copy->{answer}), + "key 'answer' not locked in copy?"; + + eval { $copy->{extra} = 15 } ; + unless (ok ++$test, !$@, "Can assign to reserved key 'extra'?") { + my $diag = $@; + $diag =~ s/\n.*\z//s; + print "# \$@: $diag\n"; + } + + eval { $copy->{nono} = 7 } ; + ok ++$test, $@, "Can not assign to invalid key 'nono'?"; + + ok ++$test, exists $copy->{undef}, + "key 'undef' exists"; + + ok ++$test, !defined $copy->{undef}, + "value for key 'undef' is undefined"; +} + +for $Storable::canonical (0, 1) { + print "# \$Storable::canonical = $Storable::canonical\n"; + testit (\%hash); + my $object = \%hash; + # bless {}, "Restrict_Test"; +} + diff --git a/ext/Storable/t/utf8.t b/ext/Storable/t/utf8.t index 607478a..600bcf2 100644 --- a/ext/Storable/t/utf8.t +++ b/ext/Storable/t/utf8.t @@ -1,8 +1,11 @@ -#!./perl +#!./perl -w # $Id: utf8.t,v 1.0.1.2 2000/09/28 21:44:17 ram Exp $ # -# @COPYRIGHT@ +# Copyright (c) 1995-2000, Raphael Manfredi +# +# You may redistribute only under the same terms as Perl 5, as specified +# in the README file that comes with the distribution. # # $Log: utf8.t,v $ # Revision 1.0.1.2 2000/09/28 21:44:17 ram @@ -31,12 +34,21 @@ sub BEGIN { require 'lib/st-dump.pl'; } +use strict; sub ok; use Storable qw(thaw freeze); -print "1..1\n"; +print "1..3\n"; -$x = chr(1234); +my $x = chr(1234); ok 1, $x eq ${thaw freeze \$x}; +# Long scalar +$x = join '', map {chr $_} (0..1023); +ok 2, $x eq ${thaw freeze \$x}; + +# Char in the range 127-255 (probably) in utf8 +$x = chr (175) . chr (256); +chop $x; +ok 3, $x eq ${thaw freeze \$x}; diff --git a/ext/Storable/t/utf8hash.t b/ext/Storable/t/utf8hash.t new file mode 100644 index 0000000..5e93914 --- /dev/null +++ b/ext/Storable/t/utf8hash.t @@ -0,0 +1,204 @@ +#!./perl +# +# $Id: utf8.t,v 1.0.1.2 2000/09/28 21:44:17 ram Exp $ +# +# + +sub BEGIN { + if ($] < 5.007) { + print "1..0 # Skip: no utf8 hash key support\n"; + exit 0; + } + if ($ENV{PERL_CORE}){ + chdir('t') if -d 't'; + @INC = '.'; + push @INC, '../lib'; + } + require Config; import Config; + if ($ENV{PERL_CORE}){ + if($Config{'extensions'} !~ /\bStorable\b/) { + print "1..0 # Skip: Storable was not built\n"; + exit 0; + } + } + # require 'lib/st-dump.pl'; +} + +use strict; +our $DEBUGME = shift || 0; +use Storable qw(store nstore retrieve thaw freeze); +{ + no warnings; + $Storable::DEBUGME = ($DEBUGME > 1); +} +# Better than no plan, because I was getting out of memory errors, at which +# point Test::More tidily prints up 1..79 as if I meant to finish there. +use Test::More tests=>148; +use bytes (); +use Encode qw(is_utf8); +my %utf8hash; + +for $Storable::canonical (0, 1) { + +# first we generate a nasty hash which keys include both utf8 +# on and off with identical PVs + +my @ords = ( + 0xc0, # LATIN CAPITAL LETTER A WITH GRAVE + 0x3000, #IDEOGRAPHIC SPACE + ); + +foreach my $i (@ords){ + my $u = chr($i); utf8::upgrade($u); + # warn sprintf "%d,%d", bytes::length($u), is_utf8($u); + my $b = pack("C*", unpack("C*", $u)); + # warn sprintf "%d,%d" ,bytes::length($b), is_utf8($b); + + isnt($u, $b, + "equivalence - with utf8flag"); + is (pack("C*", unpack("C*", $u)), pack("C*", unpack("C*", $b)), + "equivalence - without utf8flag"); + + $utf8hash{$u} = $utf8hash{$b} = $i; +} + +sub nkeys($){ + my $href = shift; + return scalar keys %$href; +} + +my $nk; +is($nk = nkeys(\%utf8hash), scalar(@ords)*2, + "nasty hash generated (nkeys=$nk)"); + +# now let the show begin! + +my $thawed = thaw(freeze(\%utf8hash)); + +is($nk = nkeys($thawed), + nkeys(\%utf8hash), + "scalar keys \%{\$thawed} (nkeys=$nk)"); +for my $k (sort keys %$thawed){ + is($utf8hash{$k}, $thawed->{$k}, "frozen item chr($utf8hash{$k})"); +} + +my $storage = "utfhash.po"; # po = perl object! +my $retrieved; + +ok((nstore \%utf8hash, $storage), "nstore to $storage"); +ok(($retrieved = retrieve($storage)), "retrieve from $storage"); + +is($nk = nkeys($retrieved), + nkeys(\%utf8hash), + "scalar keys \%{\$retrieved} (nkeys=$nk)"); +for my $k (sort keys %$retrieved){ + is($utf8hash{$k}, $retrieved->{$k}, "nstored item chr($utf8hash{$k})"); +} +unlink $storage; + + +ok((store \%utf8hash, $storage), "store to $storage"); +ok(($retrieved = retrieve($storage)), "retrieve from $storage"); +is($nk = nkeys($retrieved), + nkeys(\%utf8hash), + "scalar keys \%{\$retrieved} (nkeys=$nk)"); +for my $k (sort keys %$retrieved){ + is($utf8hash{$k}, $retrieved->{$k}, "stored item chr($utf8hash{$k})"); +} +$DEBUGME or unlink $storage; + +# On the premis that more tests are good, here are NWC's tests: + +package Hash_Test; + +sub me_second { + return (undef, $_[0]); +} + +package main; + +my $utf8 = "Schlo\xdf" . chr 256; +chop $utf8; + +# Set this to 1 to test the test by bypassing Storable. +my $bypass = 0; + +sub class_test { + my ($object, $package) = @_; + unless ($package) { + is ref $object, 'HASH', "$object is unblessed"; + return; + } + isa_ok ($object, $package); + my ($garbage, $copy) = eval {$object->me_second}; + is $@, "", "check it has correct method"; + cmp_ok $copy, '==', $object, "and that it returns the same object"; +} + +# Thanks to Dan Kogai for the Kanji for "castle" (which he informs me also +# means 'a city' in Mandarin). +my %hash = (map {$_, $_} 'castle', "ch\xe5teau", $utf8, "\x{57CE}"); + +for my $package ('', 'Hash_Test') { + # Run through and sanity check these. + if ($package) { + bless \%hash, $package; + } + for (keys %hash) { + my $l = 0 + /^\w+$/; + my $r = 0 + $hash{$_} =~ /^\w+$/; + cmp_ok ($l, '==', $r); + } + + # Grr. This cperl mode thinks that ${ is a punctuation variable. + # I presume it's punishment for using xemacs rather than emacs. Or OS/2 :-) + my $copy = $bypass ? \%hash : ${thaw freeze \\%hash}; + class_test ($copy, $package); + + for (keys %$copy) { + my $l = 0 + /^\w+$/; + my $r = 0 + $copy->{$_} =~ /^\w+$/; + cmp_ok ($l, '==', $r, sprintf "key length %d", length $_); + } + + + my $bytes = my $char = chr 27182; + utf8::encode ($bytes); + + my $orig = {$char => 1}; + if ($package) { + bless $orig, $package; + } + my $just_utf8 = $bypass ? $orig : ${thaw freeze \$orig}; + class_test ($just_utf8, $package); + cmp_ok (scalar keys %$just_utf8, '==', 1, "1 key in utf8?"); + cmp_ok ($just_utf8->{$char}, '==', 1, "utf8 key present?"); + ok (!exists $just_utf8->{$bytes}, "bytes key absent?"); + + $orig = {$bytes => 1}; + if ($package) { + bless $orig, $package; + } + my $just_bytes = $bypass ? $orig : ${thaw freeze \$orig}; + class_test ($just_bytes, $package); + + cmp_ok (scalar keys %$just_bytes, '==', 1, "1 key in bytes?"); + cmp_ok ($just_bytes->{$bytes}, '==', 1, "bytes key present?"); + ok (!exists $just_bytes->{$char}, "utf8 key absent?"); + + die sprintf "Both have length %d, which is crazy", length $char + if length $char == length $bytes; + + $orig = {$bytes => length $bytes, $char => length $char}; + if ($package) { + bless $orig, $package; + } + my $both = $bypass ? $orig : ${thaw freeze \$orig}; + class_test ($both, $package); + + cmp_ok (scalar keys %$both, '==', 2, "2 keys?"); + cmp_ok ($both->{$bytes}, '==', length $bytes, "bytes key present?"); + cmp_ok ($both->{$char}, '==', length $char, "utf8 key present?"); +} + +} diff --git a/global.sym b/global.sym index 3eb8d34..cc4d2a1 100644 --- a/global.sym +++ b/global.sym @@ -153,11 +153,13 @@ Perl_hv_iterkey Perl_hv_iterkeysv Perl_hv_iternext Perl_hv_iternextsv +Perl_hv_iternext_flags Perl_hv_iterval Perl_hv_ksplit Perl_hv_magic Perl_hv_store Perl_hv_store_ent +Perl_hv_store_flags Perl_hv_undef Perl_ibcmp Perl_ibcmp_locale diff --git a/hv.c b/hv.c index 1d967ce..51f47fd 100644 --- a/hv.c +++ b/hv.c @@ -516,6 +516,11 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, U32 hash) const char *keysave = key; int flags = 0; + if (klen < 0) { + klen = -klen; + is_utf8 = TRUE; + } + if (is_utf8) { STRLEN tmplen = klen; /* Just casting the &klen to (STRLEN) won't work well @@ -536,7 +541,7 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, U32 hash) } SV** -S_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, +Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 hash, int flags) { register XPVHV* xhv; @@ -597,7 +602,13 @@ S_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, xhv->xhv_placeholders--; /* yes, can store into placeholder slot */ else SvREFCNT_dec(HeVAL(entry)); - HeVAL(entry) = val; + if (flags & HVhek_PLACEHOLD) { + /* We have been requested to insert a placeholder. Currently + only Storable is allowed to do this. */ + xhv->xhv_placeholders++; + HeVAL(entry) = &PL_sv_undef; + } else + HeVAL(entry) = val; if (HeKFLAGS(entry) != flags) { /* We match if HVhek_UTF8 bit in our flags and hash key's match. @@ -634,7 +645,13 @@ S_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, HeKEY_hek(entry) = share_hek_flags(key, klen, hash, flags); else /* gotta do the real thing */ HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags); - HeVAL(entry) = val; + if (flags & HVhek_PLACEHOLD) { + /* We have been requested to insert a placeholder. Currently + only Storable is allowed to do this. */ + xhv->xhv_placeholders++; + HeVAL(entry) = &PL_sv_undef; + } else + HeVAL(entry) = val; HeNEXT(entry) = *oentry; *oentry = entry; @@ -1551,7 +1568,7 @@ Perl_newHVhv(pTHX_ HV *ohv) HvMAX(hv) = hv_max; hv_iterinit(ohv); - while ((entry = hv_iternext(ohv))) { + while ((entry = hv_iternext_flags(ohv, 0))) { hv_store_flags(hv, HeKEY(entry), HeKLEN(entry), newSVsv(HeVAL(entry)), HeHASH(entry), HeKFLAGS(entry)); @@ -1713,6 +1730,7 @@ NOTE: Before version 5.004_65, C used to return the number of hash buckets that happen to be in use. If you still need that esoteric value, you can get it through the macro C. + =cut */ @@ -1735,7 +1753,6 @@ Perl_hv_iterinit(pTHX_ HV *hv) /* used to be xhv->xhv_fill before 5.004_65 */ return XHvTOTALKEYS(xhv); } - /* =for apidoc hv_iternext @@ -1747,6 +1764,20 @@ Returns entries from a hash iterator. See C. HE * Perl_hv_iternext(pTHX_ HV *hv) { + return hv_iternext_flags(hv, 0); +} + +/* +XXX=for apidoc hv_iternext + +Returns entries from a hash iterator. See C. + +XXX=cut +*/ + +HE * +Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) +{ register XPVHV* xhv; register HE *entry; HE *oldentry; @@ -1800,12 +1831,14 @@ Perl_hv_iternext(pTHX_ HV *hv) if (entry) { entry = HeNEXT(entry); - /* - * Skip past any placeholders -- don't want to include them in - * any iteration. - */ - while (entry && HeVAL(entry) == &PL_sv_undef) { - entry = HeNEXT(entry); + if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) { + /* + * Skip past any placeholders -- don't want to include them in + * any iteration. + */ + while (entry && HeVAL(entry) == &PL_sv_undef) { + entry = HeNEXT(entry); + } } } while (!entry) { @@ -1817,10 +1850,11 @@ Perl_hv_iternext(pTHX_ HV *hv) /* entry = (HvARRAY(hv))[HvRITER(hv)]; */ entry = ((HE**)xhv->xhv_array)[xhv->xhv_riter]; - /* if we have an entry, but it's a placeholder, don't count it */ - if (entry && HeVAL(entry) == &PL_sv_undef) - entry = 0; - + if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) { + /* if we have an entry, but it's a placeholder, don't count it */ + if (entry && HeVAL(entry) == &PL_sv_undef) + entry = 0; + } } if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */ @@ -1931,7 +1965,7 @@ SV * Perl_hv_iternextsv(pTHX_ HV *hv, char **key, I32 *retlen) { HE *he; - if ( (he = hv_iternext(hv)) == NULL) + if ( (he = hv_iternext_flags(hv, 0)) == NULL) return NULL; *key = hv_iterkey(he, retlen); return hv_iterval(hv, he); diff --git a/hv.h b/hv.h index 3746b60..6dc0a88 100644 --- a/hv.h +++ b/hv.h @@ -231,6 +231,8 @@ C. #define HVhek_UTF8 0x01 /* Key is utf8 encoded. */ #define HVhek_WASUTF8 0x02 /* Key is bytes here, but was supplied as utf8. */ #define HVhek_FREEKEY 0x100 /* Internal flag to say key is malloc()ed. */ +#define HVhek_PLACEHOLD 0x200 /* Internal flag to create placeholder. + * (may change, but Storable is a core module) */ #define HVhek_MASK 0xFF #define HEK_UTF8(hek) (HEK_FLAGS(hek) & HVhek_UTF8) @@ -251,6 +253,9 @@ C. : (size) * sizeof(HE*) * 2 - MALLOC_OVERHEAD) #endif +/* Flags for hv_iternext_flags. */ +#define HV_ITERNEXT_WANTPLACEHOLDERS 0x01 /* Don't skip placeholders. */ + /* available as a function in hv.c */ #define Perl_sharepvn(sv, len, hash) HEK_KEY(share_hek(sv, len, hash)) #define sharepvn(sv, len, hash) Perl_sharepvn(sv, len, hash) diff --git a/proto.h b/proto.h index 3bd1a61..1b55ae9 100644 --- a/proto.h +++ b/proto.h @@ -315,11 +315,13 @@ PERL_CALLCONV char* Perl_hv_iterkey(pTHX_ HE* entry, I32* retlen); PERL_CALLCONV SV* Perl_hv_iterkeysv(pTHX_ HE* entry); PERL_CALLCONV HE* Perl_hv_iternext(pTHX_ HV* tb); PERL_CALLCONV SV* Perl_hv_iternextsv(pTHX_ HV* hv, char** key, I32* retlen); +PERL_CALLCONV HE* Perl_hv_iternext_flags(pTHX_ HV* tb, I32 flags); PERL_CALLCONV SV* Perl_hv_iterval(pTHX_ HV* tb, HE* entry); PERL_CALLCONV void Perl_hv_ksplit(pTHX_ HV* hv, IV newmax); PERL_CALLCONV void Perl_hv_magic(pTHX_ HV* hv, GV* gv, int how); PERL_CALLCONV SV** Perl_hv_store(pTHX_ HV* tb, const char* key, I32 klen, SV* val, U32 hash); PERL_CALLCONV HE* Perl_hv_store_ent(pTHX_ HV* tb, SV* key, SV* val, U32 hash); +PERL_CALLCONV SV** Perl_hv_store_flags(pTHX_ HV* tb, const char* key, I32 klen, SV* val, U32 hash, int flags); PERL_CALLCONV void Perl_hv_undef(pTHX_ HV* tb); PERL_CALLCONV I32 Perl_ibcmp(pTHX_ const char* a, const char* b, I32 len); PERL_CALLCONV I32 Perl_ibcmp_locale(pTHX_ const char* a, const char* b, I32 len); @@ -660,9 +662,9 @@ PERL_CALLCONV void Perl_rxres_save(pTHX_ void** rsp, REGEXP* prx); #if !defined(HAS_RENAME) PERL_CALLCONV I32 Perl_same_dirent(pTHX_ char* a, char* b); #endif -PERL_CALLCONV char* Perl_savepv(pTHX_ const char* sv); -PERL_CALLCONV char* Perl_savesharedpv(pTHX_ const char* sv); -PERL_CALLCONV char* Perl_savepvn(pTHX_ const char* sv, I32 len); +PERL_CALLCONV char* Perl_savepv(pTHX_ const char* pv); +PERL_CALLCONV char* Perl_savesharedpv(pTHX_ const char* pv); +PERL_CALLCONV char* Perl_savepvn(pTHX_ const char* pv, I32 len); PERL_CALLCONV void Perl_savestack_grow(pTHX); PERL_CALLCONV void Perl_save_aelem(pTHX_ AV* av, I32 idx, SV **sptr); PERL_CALLCONV I32 Perl_save_alloc(pTHX_ I32 size, I32 pad); @@ -1037,7 +1039,6 @@ STATIC HEK* S_save_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags STATIC void S_hv_magic_check(pTHX_ HV *hv, bool *needs_copy, bool *needs_store); STATIC void S_unshare_hek_or_pvn(pTHX_ HEK* hek, const char* sv, I32 len, U32 hash); STATIC HEK* S_share_hek_flags(pTHX_ const char* sv, I32 len, U32 hash, int flags); -STATIC SV** S_hv_store_flags(pTHX_ HV* tb, const char* key, I32 klen, SV* val, U32 hash, int flags); STATIC SV** S_hv_fetch_flags(pTHX_ HV* tb, const char* key, I32 klen, I32 lval, int flags); STATIC void S_hv_notallowed(pTHX_ int flags, const char *key, I32 klen, const char *msg); #endif diff --git a/t/lib/st-dump.pl b/t/lib/st-dump.pl index 9b1f3d1..05028f3 100644 --- a/t/lib/st-dump.pl +++ b/t/lib/st-dump.pl @@ -11,9 +11,27 @@ ;# sub ok { - my ($num, $ok) = @_; - print "not " unless $ok; - print "ok $num\n"; + my ($num, $ok, $name) = @_; + $num .= " - $name" if defined $name and length $name; + print $ok ? "ok $num\n" : "not ok $num\n"; + $ok; +} + +sub num_equal { + my ($num, $left, $right, $name) = @_; + my $ok = ((defined $left) ? $left == $right : undef); + unless (ok ($num, $ok, $name)) { + print "# Expected $right\n"; + if (!defined $left) { + print "# Got undef\n"; + } elsif ($left !~ tr/0-9//c) { + print "# Got $left\n"; + } else { + $left =~ s/([^-a-zA-Z0-9_+])/sprintf "\\%03o", ord $1/ge; + print "# Got \"$left\"\n"; + } + } + $ok; } package dump;