From: Anno Siegel Date: Sun, 17 Jun 2007 17:13:06 +0000 (+0200) Subject: [patch] Hash::Util::FieldHash v1.01 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d74d639bc8e61d741d9a79acc1bd92b4db9c8347;p=p5sagit%2Fp5-mst-13.2.git [patch] Hash::Util::FieldHash v1.01 Message-Id: <1750E68D-4F6E-48B9-A255-BCC0DA5C886E@mailbox.tu-berlin.de> p4raw-id: //depot/perl@31406 --- diff --git a/ext/Hash/Util/FieldHash/Changes b/ext/Hash/Util/FieldHash/Changes index 5ffc28f..dfddfb1 100644 --- a/ext/Hash/Util/FieldHash/Changes +++ b/ext/Hash/Util/FieldHash/Changes @@ -10,3 +10,14 @@ Revision history for Perl extension Hash::Util::FieldHash. 0.02 Fri Apr 20 22:22:57 CEST 2007 - Bugfix: string keys are now checked whether they represent an object, so %fieldhash_clone = %fieldhash_orig works. + +1.01 Thu May 31 11:12:20 CEST 2007 + - Introduced magic id chaching after a suggestion by Jerry Hedden + - Added functions id, id_2obj, register, idhash, idhashes + + Sun Jun 17 15:10:45 CEST 2007 + In preparation for release + - added tests for new functions + - pod partially re-written to describe the multi-level + interface + - updated pod part of lib/Hash/Util.pm diff --git a/ext/Hash/Util/FieldHash/FieldHash.xs b/ext/Hash/Util/FieldHash/FieldHash.xs index 25d06f5..972f99d 100644 --- a/ext/Hash/Util/FieldHash/FieldHash.xs +++ b/ext/Hash/Util/FieldHash/FieldHash.xs @@ -9,6 +9,8 @@ /* Magic cookies to recognize object id's. Hi, Eva, David */ #define HUF_COOKIE 2805.1980 #define HUF_REFADDR_COOKIE 1811.1976 +/* Identifier for PERL_MAGIC_ext magic */ +#define HUF_IDCACHE 0x4944 /* For global cache of object registry */ #define MY_CXT_KEY "Hash::Util::FieldHash::_guts" XS_VERSION @@ -58,29 +60,34 @@ void HUF_global(I32 how) { } } -/* the id as an SV, optionally marked in the nv (unused feature) */ -SV* HUF_id(SV* ref, NV cookie) { - SV* id = sv_newmortal(); - if (cookie == 0 ) { - SvUPGRADE(id, SVt_PVIV); - } else { - SvUPGRADE(id, SVt_PVNV); - SvNV_set(id, cookie); - SvNOK_on(id); - } - SvIV_set(id, PTR2UV(SvRV(ref))); - SvIOK_on(id); - return id; -} +/* Object id */ -/* plain id, only used for field hash entries in field lists */ -SV* HUF_field_id(SV* obj) { - return HUF_id(obj, 0.0); -} +/* definition of id transformation */ +#define HUF_OBJ_ID(x) newSVuv(PTR2UV(x)) -/* object id (same as plain, may be different in future) */ SV* HUF_obj_id(SV* obj) { - return HUF_id(obj, 0.0); + SV *item = SvRV(obj); + MAGIC *mg; + SV *id; + + /* Get cached object ID, if it exists */ + if (SvTYPE(item) >= SVt_PVMG) { + for ( mg = SvMAGIC(item); mg; mg = mg->mg_moremagic ) { + if ((mg->mg_type == PERL_MAGIC_ext) && + (mg->mg_private == HUF_IDCACHE) + ) { + return mg->mg_obj; + } + } + } + + /* Create an object ID, cache it */ + id = HUF_OBJ_ID(item); + mg = sv_magicext(item, id, PERL_MAGIC_ext, NULL, NULL, 0); + mg->mg_private = HUF_IDCACHE; + + /* Return the object ID */ + return id; } /* set up uvar magic for any sv */ @@ -127,17 +134,19 @@ I32 HUF_destroy_obj(pTHX_ IV index, SV* trigger) { } /* make it safe in case we must run in global clenaup, after all */ if (PL_in_clean_all) - HUF_global(HUF_RESET); + HUF_global(HUF_RESET); /* shoudn't be needed */ hv_delete_ent(MY_CXT.ob_reg, ob_id, G_DISCARD, 0); } return 0; } -/* Create a trigger for an object. The trigger is a magical weak ref - * that fires when the weak ref expires. it holds the original id of - * the object, and a list of field hashes from which the object may - * have to be deleted. The trigger is stored in the object registry - * and also deleted when the object expires. +/* Create a trigger for an object. The trigger is a magical SV + * that holds a weak ref to the object. The magic fires when the object + * expires and takes care of garbage collection in registred hashes. + * For that purpose, the magic structure holds the original id of + * the object, and a list (a hash, really) of hashes from which the + * object may * have to be deleted. The trigger is stored in the + * object registry and is also deleted when the object expires. */ SV* HUF_new_trigger(SV* obj, SV* ob_id) { dMY_CXT; @@ -161,46 +170,68 @@ SV* HUF_ask_trigger(SV* ob_id) { } /* get the trigger for an object, creating it if necessary */ -SV* HUF_get_trigger(SV* obj, SV* ob_id) { +SV* HUF_get_trigger0(SV* obj, SV* ob_id) { SV* trigger; if (!(trigger = HUF_ask_trigger(ob_id))) trigger = HUF_new_trigger(obj, ob_id); return trigger; } -/* mark an object (trigger) as having been used with a field */ +SV* HUF_get_trigger(SV* obj, SV* ob_id) { + SV* trigger = HUF_ask_trigger(ob_id); + if (!trigger) + trigger = HUF_new_trigger(obj, ob_id); + return( trigger); +} + +/* mark an object (trigger) as having been used with a field + (a clenup-liability) +*/ void HUF_mark_field(SV* trigger, SV* field) { AV* cont = HUF_get_trigger_content(trigger); HV* field_tab = (HV*) *av_fetch(cont, 1, 0); SV* field_ref = newRV_inc(field); - SV* field_id = HUF_field_id(field_ref); - hv_store_ent(field_tab, field_id, field_ref, 0); + UV field_addr = PTR2UV(field); + hv_store(field_tab, (char *)&field_addr, sizeof(field_addr), field_ref, 0); } +/* Determine, from the value of action, whether this call may create a new + * hash key */ #define HUF_WOULD_CREATE_KEY(x) ((x) != HV_DELETE && ((x) & (HV_FETCH_ISSTORE | HV_FETCH_LVALUE))) -/* The key exchange function. It communicates with S_hv_magic_uvar_xkey +/* The key exchange functions. They communicate with S_hv_magic_uvar_xkey * in hv.c */ -I32 HUF_watch_key(pTHX_ IV action, SV* field) { +I32 HUF_watch_key_safe(pTHX_ IV action, SV* field) { MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); SV* keysv; - if (mg) { - keysv = mg->mg_obj; - if (keysv && !SvROK(keysv)) { /* is string an object-id? */ - SV* obj = HUF_ask_trigger(keysv); - if (obj) - keysv = obj; /* use the object instead, so registry happens */ - } - if (keysv && SvROK(keysv)) { + if (mg && (keysv = mg->mg_obj)) { + if (SvROK(keysv)) { /* ref key */ SV* ob_id = HUF_obj_id(keysv); mg->mg_obj = ob_id; /* key replacement */ if (HUF_WOULD_CREATE_KEY(action)) { SV* trigger = HUF_get_trigger(keysv, ob_id); HUF_mark_field(trigger, field); } + } else if (HUF_WOULD_CREATE_KEY(action)) { /* string key */ + /* registered as object id? */ + SV* trigger; + if ( trigger = HUF_ask_trigger(keysv)) + HUF_mark_field( trigger, field); } } else { - Perl_die(aTHX_ "Rogue call of 'HUF_watch_key'"); + Perl_die(aTHX_ "Rogue call of 'HUF_watch_key_safe'"); + } + return 0; +} + +I32 HUF_watch_key_id(pTHX_ IV action, SV* field) { + MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); + SV* keysv; + if (mg && (keysv = mg->mg_obj)) { + if (SvROK(keysv)) /* ref key */ + mg->mg_obj = HUF_obj_id(keysv); /* key replacement */ + } else { + Perl_die(aTHX_ "Rogue call of 'HUF_watch_key_id'"); } return 0; } @@ -211,14 +242,38 @@ int HUF_get_status(HV* hash) { if (hash && (SvTYPE(hash) == SVt_PVHV)) { MAGIC* mg; struct ufuncs* uf; - ans = (mg = mg_find((SV*)hash, PERL_MAGIC_uvar)) && + if ((mg = mg_find((SV*)hash, PERL_MAGIC_uvar)) && (uf = (struct ufuncs *)mg->mg_ptr) && - (uf->uf_val == &HUF_watch_key) && - (uf->uf_set == NULL); + (uf->uf_set == NULL) + ) { + ans = HUF_func_2mode(uf->uf_val); + } } return ans; } +int HUF_func_2mode( I32(* val)(pTHX_ IV, SV*)) { + int ans = 0; + if (val == &HUF_watch_key_id) + ans = 1; + if (val == &HUF_watch_key_safe) + ans = 2; + return(ans); +} + +I32(* HUF_mode_2func( int mode))(pTHX_ IV, SV*) { + I32(* ans)(pTHX_ IV, SV*) = NULL; + switch (mode) { + case 1: + ans = &HUF_watch_key_id; + break; + case 2: + ans = &HUF_watch_key_safe; + break; + } + return(ans); +} + /* Thread support. These routines are called by CLONE (and nothing else) */ /* Fix entries for one object in all field hashes */ @@ -231,11 +286,11 @@ void HUF_fix_trigger(SV* trigger, SV* new_id) { hv_iterinit(field_tab); while (ent = hv_iternext(field_tab)) { SV* field_ref = HeVAL(ent); - SV* field_id = HUF_field_id(field_ref); HV* field = (HV*)SvRV(field_ref); + UV field_addr = PTR2UV(field); SV* val; /* recreate field tab entry */ - hv_store_ent(new_tab, field_id, SvREFCNT_inc(field_ref), 0); + hv_store(new_tab, (char *)&field_addr, sizeof(field_addr), SvREFCNT_inc(field_ref), 0); /* recreate field entry, if any */ if (val = hv_delete_ent(field, old_id, 0, 0)) hv_store_ent(field, new_id, SvREFCNT_inc(val), 0); @@ -260,7 +315,20 @@ void HUF_fix_objects(void) { for (i = 0; i <= len; ++i) { SV* old_id = *av_fetch(oblist, i, 0); SV* trigger = hv_delete_ent(MY_CXT.ob_reg, old_id, 0, 0); - SV* new_id = HUF_obj_id(trigger); + SV* obj = SvRV(trigger); + MAGIC *mg; + + SV* new_id = HUF_OBJ_ID(obj); + + /* Replace cached object ID with this new one */ + for (mg = SvMAGIC(obj); mg; mg = mg->mg_moremagic) { + if ((mg->mg_type == PERL_MAGIC_ext) && + (mg->mg_private == HUF_IDCACHE) + ) { + mg->mg_obj = new_id; + } + } + HUF_fix_trigger(trigger, new_id); hv_store_ent(MY_CXT.ob_reg, new_id, SvREFCNT_inc(trigger), 0); } @@ -292,9 +360,10 @@ CODE: (field = (HV*)SvRV(href)) && SvTYPE(field) == SVt_PVHV ) { + HUF_add_uvar_magic( SvRV(href), - &HUF_watch_key, + HUF_mode_2func( mode), NULL, 0, NULL @@ -305,6 +374,50 @@ OUTPUT: RETVAL void +id(SV* ref) +PPCODE: + if (SvROK(ref)) { + XPUSHs(HUF_obj_id(ref)); + } else { + XPUSHs(ref); + } + +SV* +id_2obj(SV* id) +PROTOTYPE: $ +CODE: + SV* obj = HUF_ask_trigger(id); + if (obj) { + RETVAL = newRV_inc(SvRV(obj)); + } else { + RETVAL = &PL_sv_undef; + } +OUTPUT: + RETVAL + +SV* +register(SV* obj, ...) +PROTOTYPE: $@ +CODE: + SV* trigger; + int i; + RETVAL = NULL; + if (!SvROK(obj)) { + Perl_die(aTHX_ "Attempt to register a non-ref"); + } else { + RETVAL = newRV_inc(SvRV(obj)); + } + trigger = HUF_get_trigger(obj, HUF_obj_id(obj)); + for (i = 1; i < items; ++ i) { + SV* field_ref = POPs; + if (SvROK(field_ref) && (SvTYPE(SvRV(field_ref)) == SVt_PVHV)) { + HUF_mark_field(trigger, SvRV(field_ref)); + } + } +OUTPUT: + RETVAL + +void CLONE(char* classname) CODE: if (0 == strcmp(classname, "Hash::Util::FieldHash")) { diff --git a/ext/Hash/Util/FieldHash/lib/Hash/Util/FieldHash.pm b/ext/Hash/Util/FieldHash/lib/Hash/Util/FieldHash.pm index be1ab9a..b025400 100644 --- a/ext/Hash/Util/FieldHash/lib/Hash/Util/FieldHash.pm +++ b/ext/Hash/Util/FieldHash/lib/Hash/Util/FieldHash.pm @@ -11,11 +11,16 @@ our %EXPORT_TAGS = ( 'all' => [ qw( fieldhash fieldhashes + idhash + idhashes + id + id_2obj + register )], ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); -our $VERSION = '0.02'; +our $VERSION = '1.01'; { require XSLoader; @@ -28,22 +33,33 @@ sub fieldhash (\%) { for ( shift ) { return unless ref() && reftype( $_) eq 'HASH'; return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); - return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1); + return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2; + return; + } +} + +sub idhash (\%) { + for ( shift ) { + return unless ref() && reftype( $_) eq 'HASH'; + return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); + return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1; return; } } sub fieldhashes { map &fieldhash( $_), @_ } +sub idhashes { map &idhash( $_), @_ } 1; __END__ =head1 NAME -Hash::Util::FieldHash - Associate references with data +Hash::Util::FieldHash - Support for Inside-Out Classes -=head1 SYNOPSIS +=head1 Synopsis + ### Create fieldhashes use Hash::Util qw(fieldhash fieldhashes); # Create a single field hash @@ -54,123 +70,299 @@ Hash::Util::FieldHash - Associate references with data # ...or any number fieldhashes @hashrefs; + ### Create an idhash and register it for garbage collection + use Hash::Util::FieldHash qw( idhash register); + idhash my %name; + my $obj = \ do { my $o }; + # register the idhash for garbage collection with $object + register( $obj, \ %name); + # the following entry wil be deleted when $obj goes out of scope + $name{ $obj} = 'John Doe'; + + ### Register an ordinary hash for garbage collection + use Hash::Util::FieldHash qw( id register); + my %name; + my $obj = \ do { my $o }; + # register the hash %name for garbage collection of $obj's id + register $obj, \ %name; + # the following entry wil be deleted when $obj goes out of scope + $name{id $obj} = 'John Doe'; + + =head1 Functions -Two functions generate field hashes: +C offers a number of functions in support of +L of class construction. =over -=item fieldhash +=item id - fieldhash %hash; + id($obj) -Creates a single field hash. The argument must be a hash. Returns -a reference to the given hash if successful, otherwise nothing. +Returns the reference address of a reference $obj. If $obj is +not a reference, returns $obj. -=item fieldhashes +This function is a stand-in replacement for +L, that is, it returns +the reference address of its argument as a numeric value. The only +difference is that C returns C when given a +non-reference while C returns its argument unchanged. - fieldhashes @hashrefs; +C also uses a caching technique that makes it faster when +the id of an object is requested often, but slower if it is needed +only once or twice. -Creates any number of field hashes. Arguments must be hash references. -Returns the converted hashrefs in list context, their number in scalar -context. +=item id_2obj -=back + $obj = id_2obj($id) -=head1 Description - -=head2 Features - -Field hashes have three basic features: - -=over - -=item Key exchange - -If a I is used as a field hash key, it is replaced by -the integer value of the reference address. - -=item Thread support +If C<$id> is the id of a registered object (see L), returns +the object, otherwise an undefined value. For registered objects this +is the inverse function of C. -In a new I a field hash is updated so that its keys reflect -the new reference addresses of the original objects. +=item register -=item Garbage collection + register($obj) + register($obj, @hashrefs) -When a reference goes I after having been used as a field hash key, -the hash entry will be deleted. +In the first form, registers an object to work with for the function +C. In the second form, it additionally marks the given +hashrefs down for garbage collection. This means that when the object +goes out of scope, any entries in the given hashes under the key of +C will be deleted from the hashes. -=back +It is a fatal error to register a non-reference $obj. Any non-hashrefs +among the following arguments are silently ignored. -Field hashes are designed to maintain an association of a reference -with a value. The association is independent of the bless status of -the key, it is thread safe and garbage-collected. These properties -are desirable in the construction of inside-out classes. +It is I an error to register the same object multiple times with +varying sets of hashrefs. Any hashrefs that are not registered yet +will be added, others ignored. -When used with keys that are plain scalars (not references), field -hashes behave like normal hashes. +Registry also implies thread support. When a new thread is created, +all references are replaced with new ones, including all objects. +If a hash uses the reference address of an object as a key, that +connection would be broken. With a registered object, its id will +be updated in all hashes registered with it. -=head2 Rationale +=item idhash -The association of a reference (namely an object) with a value is -central to the concept of inside-out classes. These classes don't -store the values of object variables (fields) inside the object itself, -but outside, as it were, in private hashes keyed by the object. + idhash my %hash -Normal hashes can be used for the purpose, but turn out to have -some disadvantages: +Makes an idhash from the argument, which must be a hash. -=over +An I works like a normal hash, except that it stringifies a +I differently. A reference is stringified +as if the C function had been invoked on it, that is, its +reference address in decimal is used as the key. -=item Stringification +=item idhashes -The stringification of references depends on the bless status of the -reference. A plain hash reference C<$ref> may stringify as C, -but after being blessed into class C the same reference will look like -as C, unless class C overloads stringification, -in which case it may show up as C. In a normal hash, the -stringified reference wouldn't be found again after the blessing. + idhashes \ my( %hash, %gnash, %trash) + idhashes \ @hashrefs -Bypassing stringification by use of C has been -used to correct this. Field hashes automatically stringify their -keys to the reference address in decimal. +Creates many idhashes from its hashref arguments. Returns those +arguments that could be converted or their number in scalar context. -=item Thread Dependency +=item fieldhash -When a new thread is created, the Perl interpreter is cloned, which -implies that all variables change their reference address. Thus, -in a daughter thread, the "same" reference C<$ref> contains a different -address, but the cloned hash still holds the key based on the original -address. Again, the association is broken. + fieldhash %hash; -A C method is required to update the hash on thread creation. -Field hashes come with an appropriate C. +Creates a single fieldhash. The argument must be a hash. Returns +a reference to the given hash if successful, otherwise nothing. -=item Garbage Collection +A I is, in short, an idhash with auto-registry. When an +object (or, indeed, any reference) is used as a fieldhash key, the +fieldhash is automatically registered for garbage collection with +the object, as if C had been called. -When a reference (an object) is used as a hash key, the entry stays -in the hash when the object eventually goes out of scope. That can result -in a memory leak because the data associated with the object is not -freed. Worse than that, it can lead to a false association if the -reference address of the original object is later re-used. This -is not a remote possibility, address re-use happens all the time and -is a certainty under many conditions. +=item fieldhashes -If the references in question are indeed objects, a C method -I clean up hashes that the object uses for storage. Special -methods are needed when unblessed references can occur. + fieldhashes @hashrefs; -Field hashes have garbage collection built in. If a reference -(blessed or unblessed) goes out of scope, corresponding entries -will be deleted from all field hashes. +Creates any number of field hashes. Arguments must be hash references. +Returns the converted hashrefs in list context, their number in scalar +context. =back -Thus, an inside-out class based on field hashes doesn't need a C -method, nor a C method for thread support. That facilitates the -construction considerably. +=head1 Description -=head2 How to use +A word on terminology: I shall use the term I for a scalar +piece of data that a class associates with an object. Other terms that +have been used for this concept are "object variable", "(object) property", +"(object) attribute" and more. Especally "attribute" has some currency +among Perl programmer, but that clashes with the C pragma. The +term "field" also has some currency in this sense and doesn't seem +to conflict with other Perl terminology. + +In Perl, an object is a blessed reference. The standard way of associating +data with an object ist to store the data inside the object's body, that is, +the piece of data pointed to by the reference. + +In consequence, if two or more classes want to access an object they +I agree on the type of refrerence and also on the organization of +data within the object body. Failure to agree on the type results in +immediate death when the wrong method tries to access an object. Failure +to agree on data organization may lead to one class trampling over the +data of another. + +This object model leads to a tight coupling between subclasses. +If one class wants to inherit from another (and both classes access +object data), the classes must agree about implementation details. +Inheritance can only be used among classes that are maintained together, +in a single source or not. + +In particular, it is not possible to write general-purpose classes +in this technique, classes that can advertise themselves as "Put me +on your @ISA list and use my methods". If the other class has different +ideas about how the object body is used, there is trouble. + +For reference L in L shows the standard implementation of +a simple class C in the well-known hash based way. It also demonstrates +the predictable failure to construct a common subclass C +of C and the class C (whose objects I be globrefs). + +Thus, techniques are of interest that store object data I in +the object body but some other place. + +=head2 The Inside-out Technique + +With I classes, each class declares a (typically lexical) +hash for each field it wants to use. The reference address of an +object is used as the hash key. By definition, the reference address +is unique to each object so this guarantees a place for each field that +is private to the class and unique to each object. See L in +L for a simple example. + +In comparison to the standard implementation where the object is a +hash and the fields correspond to hash keys, here the fields correspond +to hashes, and the object determines the hash key. Thus the hashes +appear to be turned I. + +The body of an object is never examined by an inside-out class, only +its reference address is used. This allows for the body of an actual +object to be I while the object methods of the class +still work as designed. This is a key feature of inside-out classes. + +=head2 Problems of Inside-out + +Inside-out classes give us freedom of inheritance, but as usual there +is a price. + +Most obviously, there is the necessity of retrieving the reference +address of an object for each data access. It's a minor inconvenience, +but it does clutter the code. + +More important (and less obvious) is the necessity of garbage +collection. When a normal object dies, anything stored in the +object body is garbage-collected by perl. With inside-out objects, +Perl knows nothing about the data stored in field hashes by a class, +but these must be deleted when the object goes out of scope. Thus +the class must provide a C method to take care of that. + +In the presence of multiple classes it can be non-trivial +to make sure that every relevant destructor is called for +every object. Perl calls the first one it finds on the +inheritance tree (if any) and that's it. + +A related issue is thread-safety. When a new thread is created, +the Perl interpreter is cloned, which implies that all reference +addresses in use will be replaced with new ones. Thus, if a class +tries to access a field of a cloned object its (cloned) data will +still be stored under the now invalid reference address of the +original in the parent thread. A general C method must +be provided to re-establish the association. + +=head2 Solutions + +C addresses these issues on several +levels. + +The C function is provided in addition to the +existing C. Besides its short name +it can be a little faster under some circumstances (and a +bit slower under others). Benchmark if it matters. The +working of C also allows the use of the class name +as a I as described L. + +The C function is incorporated in I in the sense +that it is called automatically on every key that is used with +the hash. No explicit call is necessary. + +The problems of garbage collection and thread safety are both +addressed by the function C. It registers an object +together with any number of hashes. Registry means that when the +object dies, an entry in any of the hashes under the reference +address of this object will be deleted. This guarantees garbage +collection in these hashes. It also means that on thread +cloning the object's entries in registered hashes will be +replaced with updated entries whose key is the cloned object's +reference address. Thus the object-data association becomes +thread-safe. + +Object registry is best done when the object is initialized +for use with a class. That way, garbage collection and thread +safety are established for every object and every field that is +initialized. + +Finally, I incorporate all these functions in one +package. Besides automatically calling the C function +on every object used as a key, the object is registered with +the field hash on first use. Classes based on field hashes +are fully garbage-collected and thread safe without further +measures. + +=head2 More Problems + +Another problem that occurs with inside-out classes is serialization. +Since the object data is not in its usual place, standard routines +like C, C and +C can't deal with it on their own. Both +C and C provide the necessary hooks to +make things work, but the functions or methods used by the hooks +must be provided by each inside-out class. + +A general solution to the serialization problem would require another +level of registry, one that that associates I and fields. +So far, he functions of C are unaware of +any classes, which I consider a feature. Therefore C +doesn't address the serialization problems. + +=head2 The Generic Object + +Classes based on the C function (and hence classes based on +C and C) show a peculiar behavior in that +the class name can be used like an object. Specifically, methods +that set or read data associated with an object continue to work as +class methods, just as if the class name were an object, distinct from +all other objects, with its own data. This object may be called +the I of the class. + +This works because field hashes respond to keys that are not refrences +like a normal hash would and use the string offered as the hash key. +Thus, if a method is called as a class method, the field hash is presented +with the class name instead of an object and blithely uses it as a key. +Since the keys of real objects are decimal numbers, there is no +conflict and the slot in the field hash can be used like any other. +The C function behaves correspondingly with respect to non-reference +arguments. + +Two possible uses (besides ignoring the property) come to mind. +A singleton class could be implemented this using the generic object. +If necessary, an C method could die or ignore calls with +actual objects (references), so only the generic object will ever exist. + +Another use of the generic object would be as a template. It is +a convenient place to store class-specific defaults for various +fields to be used in actual object initialization. + +Usually, the feature can be entirely ignored. Calling I as I normally leads to an error and isn't used +routinely anywhere. It may be a problem that this error isn't +indicated by a class with a generic object. + +=head2 How to use Field Hashes Traditionally, the definition of an inside-out class contains a bare block inside which a number of lexical hashes are declared and the @@ -214,7 +406,7 @@ early and call the functions qualified: Hash::Util::FieldHash::fieldhash my %foo; Otherwise, import the functions into a convenient package like -C or, more generic, C +C or, more general, C { package Aux; @@ -227,112 +419,338 @@ and call as needed. -=head2 Examples +=head2 Garbage-Collected Hashes + +Garbage collection in a field hash means that entries will "spontaneously" +disappear when the object that created them disappears. That must be +borne in mind, especially when looping over a field hash. If anything +you do inside the loop could cause an object to go out of scope, a +random key may be deleted from the hash you are looping over. That +can throw the loop iterator, so it's best to cache a consistent snapshot +of the keys and/or values and loop over that. You will still have to +check that a cached entry still exists when you get to it. + +Garbage collection can be confusing when keys are created in a field hash +from normal scalars as well as references. Once a reference is I with +a field hash, the entry will be collected, even if it was later overwritten +with a plain scalar key (every positive integer is a candidate). This +is true even if the original entry was deleted in the meantime. In fact, +deletion from a field hash, and also a test for existence constitute +I in this sense and create a liability to delete the entry when +the reference goes out of scope. If you happen to create an entry +with an identical key from a string or integer, that will be collected +instead. Thus, mixed use of references and plain scalars as field hash +keys is not entirely supported. + +=head1 Examples -Well... really only one example, and a rather trivial one at that. -There isn't much to exemplify. +The examples show a very simple class that implements a I, consisting +of a first and last name (no middle initial). The name class has four +methods: -=head3 A simple class... +=over -The following example shows an utterly simple inside-out class -C, created using field hashes. It has a single field, -incorporated as the field hash C<%time>. Besides C it has only -two methods: an initializer called C that sets the field to -the current time, and a read-only accessor C that returns the -time in C format. +=item * C - # The class TimeStamp +An object method that initializes the first and last name to its +two arguments. If called as a class method, C creates an +object in the given class and initializes that. - use Hash::Util::FieldHash; - { - package TimeStamp; +=item * C - Hash::Util::FieldHash::fieldhash my %time; +Retrieve the first name - sub stamp { $time{ $_[ 0]} = time; shift } # initializer - sub when { scalar localtime $time{ shift()} } # read accessor - sub new { bless( do { \ my $x }, shift)->stamp } # creator - } +=item * C - # See if it works - my $ts = TimeStamp->new; - print $ts->when, "\n"; +Retrieve the last name -Remarkable about this class definition is what isn't there: there -is no C method, inherited or local, and no C method -is needed to make it thread-safe. Not to mention no need to call -C or something similar in the accessors. +=item * C -=head3 ...in action +Retrieve the full name, the first and last name joined by a blank. -The outstanding property of inside-out classes is their "inheritability". -Like all inside-out classes, C is a I. -We can put it on the C<@ISA> list of arbitrary classes and its methods -will just work, no matter how the host class is constructed. No traditional -Perl class allows that. The following program demonstrates the feat: +=back - # Make a sample of objects to add time stamps to. +The examples show this class implemented with different levels of +support by C. All supported combinations +are shown. The difference between implementations is often quite +small. The implementations are: - use Math::Complex; - use IO::Handle; +=over - my @objects = ( - Math::Complex->new( 12, 13), - IO::Handle->new(), - qr/abc/, # in class Regexp - bless( [], 'Boing'), # made up on the spot - # add more - ); +=item * C - # Prepare for use with TimeStamp +A conventional (not inside-out) implementation where an object is +a hash that stores the field values, without support by +C. This implementation doesn't allow +arbitrary inheritance. + +=item * C + +Inside-out implementation based on the C function. It needs +a C method. For thread support a C method (not shown) +would also be needed. Instead of C the +function C could be used with very little +functional difference. This is the basic pattern of an inside-out +class. + +=item * C + +Idhash-based inside-out implementation. Like L it needs +a C method and would need C for thread support. + +=item * C + +Inside-out implementation based on the C function with explicit +object registry. No destructor is needed and objects are thread safe. + +=item * C + +Idhash-based inside-out implementation with explicit object registry. +No destructor is needed and objects are thread safe. + +=item * C + +Fieldhash-based inside-out implementation. Object registry happens +automatically. No destructor is needed and objects are thread safe. + +=back + +These examples are realized in the code below, which could be copied +to a file F. + +=head2 Example 1 + + use strict; use warnings; + + { + package Name_hash; # standard implementation: the object is a hash + + sub init { + my $obj = shift; + my ( $first, $last) = @_; + # create an object if called as class method + $obj = bless {}, $obj unless ref $obj; + $obj->{ first} = $first; + $obj->{ last} = $last; + $obj; + } + + sub first { shift()->{ first} } + sub last { shift()->{ last} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } - for ( @objects ) { - no strict 'refs'; - push @{ ref() . '::ISA' }, 'TimeStamp'; } - # Now apply TimeStamp methods to all objects and show the result + { + package Name_id; + use Hash::Util::FieldHash qw( id); + + my ( %first, %last); + + sub init { + my $obj = shift; + my ( $first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + $first{ id $obj} = $first; + $last{ id $obj} = $last; + $obj; + } + + sub first { $first{ id shift()} } + sub last { $last{ id shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + + sub DESTROY { + my $id = id shift; + delete $first{ $id}; + delete $last{ $id}; + } - for my $obj ( @objects ) { - $obj->stamp; - report( $obj, $obj->when); } - # print a description of the object and the result of ->when - - use Scalar::Util qw( reftype); - sub report { - my ( $obj, $when) = @_; - my $msg = sprintf "This is a %s object(a %s), its time is %s", - ref $obj, - reftype $obj, - $when; - $msg =~ s/\ba(?= [aeiouAEIOU])/an/g; # grammar matters :) - print "$msg\n"; + { + package Name_idhash; + use Hash::Util::FieldHash; + + Hash::Util::FieldHash::idhashes( \ my ( %first, %last)); + + sub init { + my $obj = shift; + my ( $first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + $first{ $obj} = $first; + $last{ $obj} = $last; + $obj; + } + + sub first { $first{ shift()} } + sub last { $last{ shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + + sub DESTROY { + my $n = shift; + delete $first{ $n}; + delete $last{ $n}; + } + } -=head2 Garbage-Collected Hashes + { + package Name_id_reg; + use Hash::Util::FieldHash qw( id register); + + my ( %first, %last); + + sub init { + my $obj = shift; + my ( $first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + register( $obj, \ ( %first, %last)); + $first{ id $obj} = $first; + $last{ id $obj} = $last; + $obj; + } + + sub first { $first{ id shift()} } + sub last { $last{ id shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + } + + { + package Name_idhash_reg; + use Hash::Util::FieldHash qw( register); + + Hash::Util::FieldHash::idhashes \ my ( %first, %last); + + sub init { + my $obj = shift; + my ( $first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + register( $obj, \ ( %first, %last)); + $first{ $obj} = $first; + $last{ $obj} = $last; + $obj; + } + + sub first { $first{ shift()} } + sub last { $last{ shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + } + + { + package Name_fieldhash; + use Hash::Util::FieldHash; + + Hash::Util::FieldHash::fieldhashes \ my ( %first, %last); + + sub init { + my $obj = shift; + my ( $first, $last) = @_; + # create an object if called as class method + $obj = bless \ my $o, $obj unless ref $obj; + $first{ $obj} = $first; + $last{ $obj} = $last; + $obj; + } + + sub first { $first{ shift()} } + sub last { $last{ shift()} } + + sub name { + my $n = shift; + join ' ' => $n->first, $n->last; + } + } + + 1; + +To exercise the various implementations the script L can +be used. + +It sets up a class C that is a mirror of one of the implementation +classes C, C, ..., C. That determines +which implementation is run. + +The script first verifies the function of the C class. + +In the second step, the free inheritablility of the implementation +(or lack thereof) is demonstrated. For this purpose it constructs +a class called C which is a common subclass of C and +the standard class C. This puts inheritability to the test +because objects of C I be globrefs. Objects of C +should behave like a file opened for reading and also support the C +method. This class juncture works with exception of the C +implementation, where object initialization fails because of the +incompatibility of object bodies. + +=head2 Example 2 + + use strict; use warnings; $| = 1; + + use Example; + + { + package Name; + use base 'Name_id'; # define here which implementation to run + } + + + # Verify that the base package works + my $n = Name->init( qw( Albert Einstein)); + print $n->name, "\n"; + print "\n"; + + # Create a named file handle (See definition below) + my $nf = NamedFile->init( qw( /tmp/x Filomena File)); + # use as a file handle... + for ( 1 .. 3 ) { + my $l = <$nf>; + print "line $_: $l"; + } + # ...and as a Name object + print "...brought to you by ", $nf->name, "\n"; + exit; + + + # Definition of NamedFile + package NamedFile; + use base 'Name'; + use base 'IO::File'; + + sub init { + my $obj = shift; + my ( $file, $first, $last) = @_; + $obj = $obj->IO::File::new() unless ref $obj; + $obj->open( $file) or die "Can't read '$file': $!"; + $obj->Name::init( $first, $last); + } + __END__ -Garbage collection in a field hash means that entries will "spontaneously" -disappear when the object that created them disappears. That must be -borne in mind, especially when looping over a field hash. If anything -you do inside the loop could cause an object to go out of scope, a -random key may be deleted from the hash you are looping over. That -can throw the loop iterator, so it's best to cache a consistent snapshot -of the keys and/or values and loop over that. You will still have to -check that a cached entry still exists when you get to it. -Garbage collection can be confusing when keys are created in a field hash -from normal scalars as well as references. Once a reference is I with -a field hash, the entry will be collected, even if it was later overwritten -with a plain scalar key (every positive integer is a candidate). This -is true even if the original entry was deleted in the meantime. In fact, -deletion from a field hash, and also a test for existence constitute -I in this sense and create a liability to delete the entry when -the reference goes out of scope. If you happen to create an entry -with an identical key from a string or integer, that will be collected -instead. Thus, mixed use of references and plain scalars as field hash -keys is not entirely supported. =head1 Guts diff --git a/ext/Hash/Util/FieldHash/t/02_function.t b/ext/Hash/Util/FieldHash/t/02_function.t index 8ffbae6..012ada7 100644 --- a/ext/Hash/Util/FieldHash/t/02_function.t +++ b/ext/Hash/Util/FieldHash/t/02_function.t @@ -16,6 +16,8 @@ my $ob_reg = Hash::Util::FieldHash::_ob_reg; ######################### +my $fieldhash_mode = 2; + # define ref types to use with some tests my @test_types; BEGIN { @@ -23,15 +25,66 @@ BEGIN { @test_types = qw( SCALAR ARRAY HASH GLOB); } -### Object registry +### The id() function +{ + BEGIN { $n_tests += 4 } + my $ref = []; + is id( $ref), refaddr( $ref), "id is refaddr"; + my %h; + Hash::Util::FieldHash::_fieldhash \ %h, $fieldhash_mode; + $h{ $ref} = (); + my ( $key) = keys %h; + is id( $ref), $key, "id is FieldHash key"; + my $scalar = 'string'; + is id( $scalar), $scalar, "string passes unchanged"; + $scalar = 1234; + is id( $scalar), $scalar, "number passes unchanged"; +} + +### idhash functionality +{ + BEGIN { $n_tests += 3 } + Hash::Util::FieldHash::idhash my %h; + my $ref = sub {}; + my $val = 123; + $h{ $ref} = $val; + my ( $key) = keys %h; + is $key, id( $ref), "idhash key correct"; + is $h{ $ref}, $val, "value retrieved through ref"; + is scalar keys %$ob_reg, 0, "no auto-registry in idhash"; +} + +### the register() and id_2obj functions +{ + BEGIN { $n_tests += 9 } + my $obj = {}; + my $id = id( $obj); + is id_2obj( $id), undef, "unregistered object not retrieved"; + is scalar keys %$ob_reg, 0, "object registry empty"; + is register( $obj), $obj, "object returned by register"; + is scalar keys %$ob_reg, 1, "object registry nonempty"; + is id_2obj( $id), $obj, "registered object retrieved"; + my %hash; + register( $obj, \ %hash); + $hash{ $id} = 123; + is scalar keys %hash, 1, "key present in registered hash"; + undef $obj; + is scalar keys %hash, 0, "key collected from registered hash"; + is scalar keys %$ob_reg, 0, "object registry empty again"; + eval { register( 1234) }; + like $@, qr/^Attempt to register/, "registering non-ref is fatal"; + +} + +### Object auto-registry BEGIN { $n_tests += 3 } { { my $obj = {}; { - my $h; - fieldhash %$h; + my $h = {}; + Hash::Util::FieldHash::_fieldhash $h, $fieldhash_mode; $h->{ $obj} = 123; is( keys %$ob_reg, 1, "one object registered"); } @@ -46,7 +99,7 @@ BEGIN { $n_tests += 6 } { no warnings 'misc'; my $val = 123; - fieldhash my %h; + Hash::Util::FieldHash::_fieldhash \ my( %h), $fieldhash_mode; for ( [ str => 'abc'], [ ref => {}] ) { my ( $keytype, $key) = @$_; $h{ $key} = $val; @@ -58,15 +111,19 @@ BEGIN { $n_tests += 6 } } ### id-action (stringification independent of bless) -BEGIN { $n_tests += 4 } +BEGIN { $n_tests += 5 } +# use Scalar::Util qw( refaddr); { my( %f, %g, %h, %i); - fieldhash %f; - fieldhash %g; + Hash::Util::FieldHash::_fieldhash \ %f, $fieldhash_mode; + Hash::Util::FieldHash::_fieldhash \ %g, $fieldhash_mode; my $val = 123; my $key = []; $f{ $key} = $val; is( $f{ $key}, $val, "plain key set in field"); + my ( $id) = keys %f; + my $refaddr = hex +($key =~ /\(0x([[:xdigit:]]+)\)$/)[ 0]; + is $id, $refaddr, "key is refaddr"; bless $key; is( $f{ $key}, $val, "access through blessed"); $key = []; @@ -77,17 +134,19 @@ BEGIN { $n_tests += 4 } } # Garbage collection -BEGIN { $n_tests += 1 + 2*( 3*@test_types + 5) + 1 } +BEGIN { $n_tests += 1 + 2*( 3*@test_types + 5) + 1 + 2 } { - fieldhash my %h; + my %h; + Hash::Util::FieldHash::_fieldhash \ %h, $fieldhash_mode; $h{ []} = 123; is( keys %h, 0, "blip"); } for my $preload ( [], [ map {}, 1 .. 3] ) { my $pre = @$preload ? ' (preloaded)' : ''; - fieldhash my %f; + my %f; + Hash::Util::FieldHash::_fieldhash \ %f, $fieldhash_mode; my @preval = map "$_", @$preload; @f{ @$preload} = @preval; # Garbage collection separately @@ -127,11 +186,23 @@ for my $preload ( [], [ map {}, 1 .. 3] ) { } is( keys %$ob_reg, 0, "preload gone after loop"); +# autovivified key +{ + my %h; + Hash::Util::FieldHash::_fieldhash \ %h, $fieldhash_mode; + my $ref = {}; + my $x = $h{ $ref}->[ 0]; + is keys %h, 1, "autovivified key present"; + undef $ref; + is keys %h, 0, "autovivified key collected"; +} + # big key sets BEGIN { $n_tests += 8 } { my $size = 10_000; - fieldhash( my %f); + my %f; + Hash::Util::FieldHash::_fieldhash \ %f, $fieldhash_mode; { my @refs = map [], 1 .. $size; $f{ $_} = 1 for @refs; @@ -171,7 +242,8 @@ BEGIN { $n_tests += 8 } BEGIN { $n_tests += 6 } { my $n_fields = 1000; - my @fields = map &fieldhash( {}), 1 .. $n_fields; + my @fields = map {}, $n_fields; + Hash::Util::FieldHash::_fieldhash( $_, $fieldhash_mode) for @fields; my @obs = map gen_ref( $_), @test_types; my $n_obs = @obs; for my $field ( @fields ) { @@ -194,7 +266,7 @@ BEGIN { $n_tests += 6 } # direct hash assignment BEGIN { $n_tests += 4 } { - fieldhashes \ my( %f, %g, %h); + Hash::Util::FieldHash::_fieldhash( $_, $fieldhash_mode) for \ my( %f, %g, %h); my $size = 6; my @obs = map [], 1 .. $size; @f{ @obs} = ( 1) x $size; @@ -208,9 +280,8 @@ BEGIN { $n_tests += 4 } } { - BEGIN { $n_tests += 1 } - fieldhash my %h; + Hash::Util::FieldHash::_fieldhash \ my( %h), $fieldhash_mode; bless \ %h, 'abc'; # this bus-errors with a certain bug ok( 1, "no bus error on bless") } @@ -219,11 +290,16 @@ BEGIN { plan tests => $n_tests } ####################################################################### +sub refaddr { + my $ref = shift; + hex +($ref =~ /\(0x([[:xdigit:]]+)\)$/)[ 0]; +} + use Symbol qw( gensym); BEGIN { my %gen = ( - SCALAR => sub { \ my $x }, + SCALAR => sub { \ my $o }, ARRAY => sub { [] }, HASH => sub { {} }, GLOB => sub { gensym }, diff --git a/ext/Hash/Util/lib/Hash/Util.pm b/ext/Hash/Util/lib/Hash/Util.pm index a4f143e..49f78e4 100644 --- a/ext/Hash/Util/lib/Hash/Util.pm +++ b/ext/Hash/Util/lib/Hash/Util.pm @@ -50,18 +50,6 @@ Hash::Util - A selection of general-utility hash subroutines =head1 SYNOPSIS - # Field hashes - - use Hash::Util qw(fieldhash fieldhashes); - - # Create a single field hash - fieldhash my %foo; - - # Create three at once... - fieldhashes \ my(%foo, %bar, %baz); - # ...or any number - fieldhashes @hashrefs; - # Restricted hashes use Hash::Util qw( @@ -100,23 +88,16 @@ Hash::Util - A selection of general-utility hash subroutines =head1 DESCRIPTION -C contains special functions for manipulating hashes that -don't really warrant a keyword. - -By default C does not export anything. - -=head2 Field hashes - -Field hashes are designed to maintain an association of a reference -with a value. The association is independent of the bless status of -the key, it is thread safe and garbage-collected. These properties -are desirable in the construction of inside-out classes. +C and C contain special functions +for manipulating hashes that don't really warrant a keyword. -When used with keys that are plain scalars (not references), field -hashes behave like normal hashes. +C contains as set of functions that support +L. These are described in +this document. C contains an (unrelated) +set of functions that support the use of hashes in +I, described in L. -Field hashes are defined in a separate module for which C -is a front end. For a detailed description see L. +By default C does not export anything. =head2 Restricted hashes @@ -530,8 +511,9 @@ Additional code by Yves Orton. =head1 SEE ALSO -L, L, L, -and L. +L, L and L. + +L. =cut