From: Nicholas Clark Date: Sun, 16 Nov 2003 20:20:58 +0000 (+0000) Subject: utf8 keys now work for tied hashes via hv_fetch, hv_store, hv_delete X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b60cf05ab72950309ce22f1294b53484e06a00ac;p=p5sagit%2Fp5-mst-13.2.git utf8 keys now work for tied hashes via hv_fetch, hv_store, hv_delete (pp functions use the _ent variants, and as the implementation is duplicated, these bugs aren't tested, and aren't noticed) p4raw-id: //depot/perl@21735 --- diff --git a/ext/XS/APItest/APItest.xs b/ext/XS/APItest/APItest.xs index 1fac6cb..b346588 100644 --- a/ext/XS/APItest/APItest.xs +++ b/ext/XS/APItest/APItest.xs @@ -19,6 +19,69 @@ exists(hash, key_sv) OUTPUT: RETVAL +SV * +delete(hash, key_sv) + PREINIT: + STRLEN len; + const char *key; + INPUT: + HV *hash + SV *key_sv + CODE: + key = SvPV(key_sv, len); + /* It's already mortal, so need to increase reference count. */ + RETVAL = SvREFCNT_inc(hv_delete(hash, key, + SvUTF8(key_sv) ? -len : len, 0)); + OUTPUT: + RETVAL + +SV * +store(hash, key_sv, value) + PREINIT: + STRLEN len; + const char *key; + SV *copy; + SV **result; + INPUT: + HV *hash + SV *key_sv + SV *value + CODE: + key = SvPV(key_sv, len); + copy = newSV(0); + result = hv_store(hash, key, SvUTF8(key_sv) ? -len : len, copy, 0); + SvSetMagicSV(*result, value); + if (!result) { + SvREFCNT_dec(copy); + XSRETURN_EMPTY; + } + /* It's about to become mortal, so need to increase reference count. + */ + RETVAL = SvREFCNT_inc(*result); + OUTPUT: + RETVAL + + +SV * +fetch(hash, key_sv) + PREINIT: + STRLEN len; + const char *key; + SV **result; + INPUT: + HV *hash + SV *key_sv + CODE: + key = SvPV(key_sv, len); + result = hv_fetch(hash, key, SvUTF8(key_sv) ? -len : len, 0); + if (!result) { + XSRETURN_EMPTY; + } + /* Force mg_get */ + RETVAL = newSVsv(*result); + OUTPUT: + RETVAL + =pod sub TIEHASH { bless {}, $_[0] } diff --git a/ext/XS/APItest/t/hash.t b/ext/XS/APItest/t/hash.t index 0fc5eae..2a2a4e0 100644 --- a/ext/XS/APItest/t/hash.t +++ b/ext/XS/APItest/t/hash.t @@ -17,15 +17,15 @@ use Tie::Hash; my @testkeys = ('N', chr 256); -my $temp = chr 258; -utf8::encode $temp; +my $utf8_for_258 = chr 258; +utf8::encode $utf8_for_258; -my @keys = (@testkeys, $temp); +my @keys = (@testkeys, $utf8_for_258); my (%hash, %tiehash); tie %tiehash, 'Tie::StdHash'; -@hash{@keys} = (); -@tiehash{@keys} = (); +@hash{@keys} = @keys; +@tiehash{@keys} = @keys; use Test::More 'no_plan'; @@ -56,16 +56,137 @@ sub test_absent { "hv_exists tie absent $printable"); } +sub test_delete_present { + my $key = shift; + my $printable = join ',', map {ord} split //, $key; + + my $copy = {%hash}; + is (delete $copy->{$key}, $key, "hv_delete_ent present $printable"); + $copy = {%hash}; + is (XS::APItest::Hash::delete ($copy, $key), $key, + "hv_delete present $printable"); + + $copy = {}; + tie %$copy, 'Tie::StdHash'; + %$copy = %tiehash; + is (delete $copy->{$key}, $key, "hv_delete_ent tie present $printable"); + + %$copy = %tiehash; + is (XS::APItest::Hash::delete ($copy, $key), $key, + "hv_delete tie present $printable"); +} + +sub test_delete_absent { + my $key = shift; + my $printable = join ',', map {ord} split //, $key; + + my $copy = {%hash}; + is (delete $copy->{$key}, undef, "hv_delete_ent absent $printable"); + $copy = {%hash}; + is (XS::APItest::Hash::delete ($copy, $key), undef, + "hv_delete absent $printable"); + + $copy = {}; + tie %$copy, 'Tie::StdHash'; + %$copy = %tiehash; + is (delete $copy->{$key}, undef, "hv_delete_ent tie absent $printable"); + + %$copy = %tiehash; + is (XS::APItest::Hash::delete ($copy, $key), undef, + "hv_delete tie absent $printable"); +} + +sub brute_force_exists { + my ($hash, $key) = @_; + foreach (keys %$hash) { + return 1 if $key eq $_; + } + return 0; +} + +sub test_store { + my $key = shift; + my $printable = join ',', map {ord} split //, $key; + + # We are cheating - hv_store returns NULL for a store into an empty + # tied hash. This isn't helpful here. + + my %h1 = (a=>'cheat'); + is ($h1{$key} = 1, 1); + ok (brute_force_exists (\%h1, $key), "hv_store_ent $printable"); + my %h2 = (a=>'cheat'); + is (XS::APItest::Hash::store(\%h2, $key, 1), 1); + ok (brute_force_exists (\%h2, $key), "hv_store $printable"); + my %h3 = (a=>'cheat'); + tie %h3, 'Tie::StdHash'; + is ($h3{$key} = 1, 1); + ok (brute_force_exists (\%h3, $key), "hv_store_ent tie $printable"); + + my %h4 = (a=>'cheat'); + tie %h4, 'Tie::StdHash'; + is (XS::APItest::Hash::store(\%h4, $key, 1), 1); + ok (brute_force_exists (\%h4, $key), "hv_store tie $printable"); +} + +sub test_fetch_present { + my $key = shift; + my $printable = join ',', map {ord} split //, $key; + + is ($hash{$key}, $key, "hv_fetch_ent present $printable"); + is (XS::APItest::Hash::fetch (\%hash, $key), $key, + "hv_fetch present $printable"); + + is ($tiehash{$key}, $key, "hv_fetch_ent tie present $printable"); + is (XS::APItest::Hash::fetch (\%tiehash, $key), $key, + "hv_fetch tie present $printable"); +} + +sub test_fetch_absent { + my $key = shift; + my $printable = join ',', map {ord} split //, $key; + + is ($hash{$key}, undef, "hv_fetch_ent absent $printable"); + is (XS::APItest::Hash::fetch (\%hash, $key), undef, + "hv_fetch absent $printable"); + + is ($tiehash{$key}, undef, "hv_fetch_ent tie absent $printable"); + is (XS::APItest::Hash::fetch (\%tiehash, $key), undef, + "hv_fetch tie absent $printable"); +} + foreach my $key (@testkeys) { test_present ($key); + test_fetch_present ($key); + test_delete_present ($key); + + test_store ($key); my $lckey = lc $key; test_absent ($lckey); + test_fetch_absent ($lckey); + test_delete_absent ($lckey); my $unikey = $key; utf8::encode $unikey; - test_absent ($unikey) unless $unikey eq $key; + next if $unikey eq $key; + + test_absent ($unikey); + test_fetch_absent ($unikey); + test_delete_absent ($unikey); } +# hv_exists was buggy for tied hashes, in that the raw utf8 key was being +# used - the utf8 flag was being lost. test_absent (chr 258); +test_fetch_absent (chr 258); +test_delete_absent (chr 258); + +{ + my %h = (a=>'cheat'); + tie %h, 'Tie::StdHash'; + is (XS::APItest::Hash::store(\%h, chr 258, 1), 1); + + ok (!exists $h{$utf8_for_258}, + "hv_store doesn't insert a key with the raw utf8 on a tied hash"); +} diff --git a/hv.c b/hv.c index 2d9b06e..dd30035 100644 --- a/hv.c +++ b/hv.c @@ -226,15 +226,19 @@ S_hv_fetch_flags(pTHX_ HV *hv, const char *key, I32 klen, I32 lval, int flags) return 0; if (SvRMAGICAL(hv)) { - /* All this clause seems to be utf8 unaware. - By moving the utf8 stuff out to hv_fetch_flags I need to ensure - key doesn't leak. I've not tried solving the utf8-ness. - NWC. - */ if (mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)) { sv = sv_newmortal(); sv_upgrade(sv, SVt_PVLV); - mg_copy((SV*)hv, sv, key, klen); + if (flags & HVhek_UTF8) { + /* This hack based on the code in hv_exists_ent seems to be + the easiest way to pass the utf8 flag through and fix + the bug in hv_exists for tied hashes with utf8 keys. */ + SV *keysv = sv_2mortal(newSVpvn(key, klen)); + SvUTF8_on(keysv); + mg_copy((SV*)hv, sv, (char *)keysv, HEf_SVKEY); + } else { + mg_copy((SV*)hv, sv, key, klen); + } if (flags & HVhek_FREEKEY) Safefree(key); LvTYPE(sv) = 't'; @@ -627,7 +631,16 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, bool needs_store; hv_magic_check (hv, &needs_copy, &needs_store); if (needs_copy) { - mg_copy((SV*)hv, val, key, klen); + if (flags & HVhek_UTF8) { + /* This hack based on the code in hv_exists_ent seems to be + the easiest way to pass the utf8 flag through and fix + the bug in hv_exists for tied hashes with utf8 keys. */ + SV *keysv = sv_2mortal(newSVpvn(key, klen)); + SvUTF8_on(keysv); + mg_copy((SV*)hv, val, (char *)keysv, HEf_SVKEY); + } else { + mg_copy((SV*)hv, val, key, klen); + } if (!xhv->xhv_array /* !HvARRAY */ && !needs_store) { if (flags & HVhek_FREEKEY) Safefree(key); @@ -957,7 +970,9 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags) bool needs_store; hv_magic_check (hv, &needs_copy, &needs_store); - if (needs_copy && (svp = hv_fetch(hv, key, klen, TRUE))) { +// XXX PerlIO_printf(PerlIO_stderr(), "%d %d\n", is_utf8, klen); + if (needs_copy + && (svp = hv_fetch(hv, key, is_utf8 ? -klen : klen, TRUE))) { sv = *svp; if (SvMAGICAL(sv)) { mg_clear(sv);