Commit | Line | Data |
1e73acc8 |
1 | package Hash::Util::FieldHash; |
2 | |
3 | use 5.009004; |
4 | use strict; |
5 | use warnings; |
1e73acc8 |
6 | use Scalar::Util qw( reftype); |
7 | |
8 | require Exporter; |
9 | our @ISA = qw(Exporter); |
10 | our %EXPORT_TAGS = ( |
11 | 'all' => [ qw( |
12 | fieldhash |
13 | fieldhashes |
d74d639b |
14 | idhash |
15 | idhashes |
16 | id |
17 | id_2obj |
18 | register |
1e73acc8 |
19 | )], |
20 | ); |
21 | our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
1e73acc8 |
22 | |
d74d639b |
23 | our $VERSION = '1.01'; |
1e73acc8 |
24 | |
25 | { |
26 | require XSLoader; |
6ff38c27 |
27 | my %ob_reg; # private object registry |
28 | sub _ob_reg { \ %ob_reg } |
1e73acc8 |
29 | XSLoader::load('Hash::Util::FieldHash', $VERSION); |
30 | } |
31 | |
32 | sub fieldhash (\%) { |
33 | for ( shift ) { |
34 | return unless ref() && reftype( $_) eq 'HASH'; |
35 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); |
d74d639b |
36 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2; |
37 | return; |
38 | } |
39 | } |
40 | |
41 | sub idhash (\%) { |
42 | for ( shift ) { |
43 | return unless ref() && reftype( $_) eq 'HASH'; |
44 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); |
45 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1; |
1e73acc8 |
46 | return; |
47 | } |
48 | } |
49 | |
50 | sub fieldhashes { map &fieldhash( $_), @_ } |
d74d639b |
51 | sub idhashes { map &idhash( $_), @_ } |
1e73acc8 |
52 | |
53 | 1; |
54 | __END__ |
55 | |
56 | =head1 NAME |
57 | |
d74d639b |
58 | Hash::Util::FieldHash - Support for Inside-Out Classes |
1e73acc8 |
59 | |
b299c47c |
60 | =head1 SYNOPSIS |
1e73acc8 |
61 | |
d74d639b |
62 | ### Create fieldhashes |
1e73acc8 |
63 | use Hash::Util qw(fieldhash fieldhashes); |
6ff38c27 |
64 | |
1e73acc8 |
65 | # Create a single field hash |
66 | fieldhash my %foo; |
6ff38c27 |
67 | |
1e73acc8 |
68 | # Create three at once... |
69 | fieldhashes \ my(%foo, %bar, %baz); |
70 | # ...or any number |
71 | fieldhashes @hashrefs; |
72 | |
d74d639b |
73 | ### Create an idhash and register it for garbage collection |
74 | use Hash::Util::FieldHash qw( idhash register); |
75 | idhash my %name; |
76 | my $obj = \ do { my $o }; |
77 | # register the idhash for garbage collection with $object |
78 | register( $obj, \ %name); |
79 | # the following entry wil be deleted when $obj goes out of scope |
80 | $name{ $obj} = 'John Doe'; |
b299c47c |
81 | |
d74d639b |
82 | ### Register an ordinary hash for garbage collection |
83 | use Hash::Util::FieldHash qw( id register); |
84 | my %name; |
85 | my $obj = \ do { my $o }; |
86 | # register the hash %name for garbage collection of $obj's id |
87 | register $obj, \ %name; |
88 | # the following entry wil be deleted when $obj goes out of scope |
89 | $name{id $obj} = 'John Doe'; |
d74d639b |
90 | |
b299c47c |
91 | =head1 FUNCTIONS |
1e73acc8 |
92 | |
d74d639b |
93 | C<Hash::Util::FieldHash> offers a number of functions in support of |
b299c47c |
94 | L<The Inside-out Technique> of class construction. |
1e73acc8 |
95 | |
96 | =over |
97 | |
d74d639b |
98 | =item id |
1e73acc8 |
99 | |
d74d639b |
100 | id($obj) |
1e73acc8 |
101 | |
d74d639b |
102 | Returns the reference address of a reference $obj. If $obj is |
103 | not a reference, returns $obj. |
1e73acc8 |
104 | |
d74d639b |
105 | This function is a stand-in replacement for |
106 | L<Scalar::Util::refaddr|Scalar::Util/refaddr>, that is, it returns |
107 | the reference address of its argument as a numeric value. The only |
108 | difference is that C<refaddr()> returns C<undef> when given a |
109 | non-reference while C<id()> returns its argument unchanged. |
1e73acc8 |
110 | |
d74d639b |
111 | C<id()> also uses a caching technique that makes it faster when |
112 | the id of an object is requested often, but slower if it is needed |
113 | only once or twice. |
1e73acc8 |
114 | |
d74d639b |
115 | =item id_2obj |
1e73acc8 |
116 | |
d74d639b |
117 | $obj = id_2obj($id) |
1e73acc8 |
118 | |
d74d639b |
119 | If C<$id> is the id of a registered object (see L</register>), returns |
120 | the object, otherwise an undefined value. For registered objects this |
121 | is the inverse function of C<id()>. |
1e73acc8 |
122 | |
d74d639b |
123 | =item register |
1e73acc8 |
124 | |
d74d639b |
125 | register($obj) |
126 | register($obj, @hashrefs) |
1e73acc8 |
127 | |
d74d639b |
128 | In the first form, registers an object to work with for the function |
129 | C<id_2obj()>. In the second form, it additionally marks the given |
130 | hashrefs down for garbage collection. This means that when the object |
131 | goes out of scope, any entries in the given hashes under the key of |
132 | C<id($obj)> will be deleted from the hashes. |
1e73acc8 |
133 | |
d74d639b |
134 | It is a fatal error to register a non-reference $obj. Any non-hashrefs |
135 | among the following arguments are silently ignored. |
1e73acc8 |
136 | |
d74d639b |
137 | It is I<not> an error to register the same object multiple times with |
138 | varying sets of hashrefs. Any hashrefs that are not registered yet |
139 | will be added, others ignored. |
1e73acc8 |
140 | |
d74d639b |
141 | Registry also implies thread support. When a new thread is created, |
142 | all references are replaced with new ones, including all objects. |
143 | If a hash uses the reference address of an object as a key, that |
144 | connection would be broken. With a registered object, its id will |
145 | be updated in all hashes registered with it. |
1e73acc8 |
146 | |
d74d639b |
147 | =item idhash |
1e73acc8 |
148 | |
d74d639b |
149 | idhash my %hash |
1e73acc8 |
150 | |
d74d639b |
151 | Makes an idhash from the argument, which must be a hash. |
1e73acc8 |
152 | |
d74d639b |
153 | An I<idhash> works like a normal hash, except that it stringifies a |
154 | I<reference used as a key> differently. A reference is stringified |
155 | as if the C<id()> function had been invoked on it, that is, its |
156 | reference address in decimal is used as the key. |
1e73acc8 |
157 | |
d74d639b |
158 | =item idhashes |
1e73acc8 |
159 | |
d74d639b |
160 | idhashes \ my( %hash, %gnash, %trash) |
161 | idhashes \ @hashrefs |
1e73acc8 |
162 | |
d74d639b |
163 | Creates many idhashes from its hashref arguments. Returns those |
164 | arguments that could be converted or their number in scalar context. |
1e73acc8 |
165 | |
d74d639b |
166 | =item fieldhash |
1e73acc8 |
167 | |
d74d639b |
168 | fieldhash %hash; |
1e73acc8 |
169 | |
d74d639b |
170 | Creates a single fieldhash. The argument must be a hash. Returns |
171 | a reference to the given hash if successful, otherwise nothing. |
1e73acc8 |
172 | |
d74d639b |
173 | A I<fieldhash> is, in short, an idhash with auto-registry. When an |
174 | object (or, indeed, any reference) is used as a fieldhash key, the |
175 | fieldhash is automatically registered for garbage collection with |
176 | the object, as if C<register $obj, \ %fieldhash> had been called. |
1e73acc8 |
177 | |
d74d639b |
178 | =item fieldhashes |
1e73acc8 |
179 | |
d74d639b |
180 | fieldhashes @hashrefs; |
1e73acc8 |
181 | |
d74d639b |
182 | Creates any number of field hashes. Arguments must be hash references. |
183 | Returns the converted hashrefs in list context, their number in scalar |
184 | context. |
1e73acc8 |
185 | |
186 | =back |
187 | |
b299c47c |
188 | =head1 DESCRIPTION |
1e73acc8 |
189 | |
d74d639b |
190 | A word on terminology: I shall use the term I<field> for a scalar |
191 | piece of data that a class associates with an object. Other terms that |
192 | have been used for this concept are "object variable", "(object) property", |
193 | "(object) attribute" and more. Especally "attribute" has some currency |
194 | among Perl programmer, but that clashes with the C<attributes> pragma. The |
195 | term "field" also has some currency in this sense and doesn't seem |
196 | to conflict with other Perl terminology. |
197 | |
198 | In Perl, an object is a blessed reference. The standard way of associating |
199 | data with an object ist to store the data inside the object's body, that is, |
200 | the piece of data pointed to by the reference. |
201 | |
202 | In consequence, if two or more classes want to access an object they |
203 | I<must> agree on the type of refrerence and also on the organization of |
204 | data within the object body. Failure to agree on the type results in |
205 | immediate death when the wrong method tries to access an object. Failure |
206 | to agree on data organization may lead to one class trampling over the |
207 | data of another. |
208 | |
209 | This object model leads to a tight coupling between subclasses. |
210 | If one class wants to inherit from another (and both classes access |
211 | object data), the classes must agree about implementation details. |
212 | Inheritance can only be used among classes that are maintained together, |
213 | in a single source or not. |
214 | |
215 | In particular, it is not possible to write general-purpose classes |
216 | in this technique, classes that can advertise themselves as "Put me |
217 | on your @ISA list and use my methods". If the other class has different |
218 | ideas about how the object body is used, there is trouble. |
219 | |
220 | For reference L<Name_hash> in L<Example 1> shows the standard implementation of |
221 | a simple class C<Name> in the well-known hash based way. It also demonstrates |
222 | the predictable failure to construct a common subclass C<NamedFile> |
223 | of C<Name> and the class C<IO::File> (whose objects I<must> be globrefs). |
224 | |
225 | Thus, techniques are of interest that store object data I<not> in |
226 | the object body but some other place. |
227 | |
228 | =head2 The Inside-out Technique |
229 | |
230 | With I<inside-out> classes, each class declares a (typically lexical) |
231 | hash for each field it wants to use. The reference address of an |
232 | object is used as the hash key. By definition, the reference address |
233 | is unique to each object so this guarantees a place for each field that |
234 | is private to the class and unique to each object. See L<Name_id> in |
235 | L<Example 1> for a simple example. |
236 | |
237 | In comparison to the standard implementation where the object is a |
238 | hash and the fields correspond to hash keys, here the fields correspond |
239 | to hashes, and the object determines the hash key. Thus the hashes |
240 | appear to be turned I<inside out>. |
241 | |
242 | The body of an object is never examined by an inside-out class, only |
243 | its reference address is used. This allows for the body of an actual |
244 | object to be I<anything at all> while the object methods of the class |
245 | still work as designed. This is a key feature of inside-out classes. |
246 | |
247 | =head2 Problems of Inside-out |
248 | |
249 | Inside-out classes give us freedom of inheritance, but as usual there |
250 | is a price. |
251 | |
252 | Most obviously, there is the necessity of retrieving the reference |
253 | address of an object for each data access. It's a minor inconvenience, |
254 | but it does clutter the code. |
255 | |
256 | More important (and less obvious) is the necessity of garbage |
257 | collection. When a normal object dies, anything stored in the |
258 | object body is garbage-collected by perl. With inside-out objects, |
259 | Perl knows nothing about the data stored in field hashes by a class, |
260 | but these must be deleted when the object goes out of scope. Thus |
261 | the class must provide a C<DESTROY> method to take care of that. |
262 | |
263 | In the presence of multiple classes it can be non-trivial |
264 | to make sure that every relevant destructor is called for |
265 | every object. Perl calls the first one it finds on the |
266 | inheritance tree (if any) and that's it. |
267 | |
268 | A related issue is thread-safety. When a new thread is created, |
269 | the Perl interpreter is cloned, which implies that all reference |
270 | addresses in use will be replaced with new ones. Thus, if a class |
271 | tries to access a field of a cloned object its (cloned) data will |
272 | still be stored under the now invalid reference address of the |
273 | original in the parent thread. A general C<CLONE> method must |
274 | be provided to re-establish the association. |
275 | |
276 | =head2 Solutions |
277 | |
278 | C<Hash::Util::FieldHash> addresses these issues on several |
279 | levels. |
280 | |
281 | The C<id()> function is provided in addition to the |
282 | existing C<Scalar::Util::refaddr()>. Besides its short name |
283 | it can be a little faster under some circumstances (and a |
284 | bit slower under others). Benchmark if it matters. The |
285 | working of C<id()> also allows the use of the class name |
b299c47c |
286 | as a I<generic object> as described L<further down|/"The Generic Object">. |
d74d639b |
287 | |
288 | The C<id()> function is incorporated in I<id hashes> in the sense |
289 | that it is called automatically on every key that is used with |
290 | the hash. No explicit call is necessary. |
291 | |
292 | The problems of garbage collection and thread safety are both |
293 | addressed by the function C<register()>. It registers an object |
294 | together with any number of hashes. Registry means that when the |
295 | object dies, an entry in any of the hashes under the reference |
296 | address of this object will be deleted. This guarantees garbage |
297 | collection in these hashes. It also means that on thread |
298 | cloning the object's entries in registered hashes will be |
299 | replaced with updated entries whose key is the cloned object's |
300 | reference address. Thus the object-data association becomes |
301 | thread-safe. |
302 | |
303 | Object registry is best done when the object is initialized |
304 | for use with a class. That way, garbage collection and thread |
305 | safety are established for every object and every field that is |
306 | initialized. |
307 | |
308 | Finally, I<field hashes> incorporate all these functions in one |
309 | package. Besides automatically calling the C<id()> function |
310 | on every object used as a key, the object is registered with |
311 | the field hash on first use. Classes based on field hashes |
312 | are fully garbage-collected and thread safe without further |
313 | measures. |
314 | |
315 | =head2 More Problems |
316 | |
317 | Another problem that occurs with inside-out classes is serialization. |
318 | Since the object data is not in its usual place, standard routines |
319 | like C<Storable::freeze()>, C<Storable::thaw()> and |
320 | C<Data::Dumper::Dumper()> can't deal with it on their own. Both |
321 | C<Data::Dumper> and C<Storable> provide the necessary hooks to |
322 | make things work, but the functions or methods used by the hooks |
323 | must be provided by each inside-out class. |
324 | |
325 | A general solution to the serialization problem would require another |
326 | level of registry, one that that associates I<classes> and fields. |
327 | So far, he functions of C<Hash::Util::Fieldhash> are unaware of |
328 | any classes, which I consider a feature. Therefore C<Hash::Util::FieldHash> |
329 | doesn't address the serialization problems. |
330 | |
331 | =head2 The Generic Object |
332 | |
333 | Classes based on the C<id()> function (and hence classes based on |
334 | C<idhash()> and C<fieldhash()>) show a peculiar behavior in that |
335 | the class name can be used like an object. Specifically, methods |
336 | that set or read data associated with an object continue to work as |
337 | class methods, just as if the class name were an object, distinct from |
338 | all other objects, with its own data. This object may be called |
339 | the I<generic object> of the class. |
340 | |
341 | This works because field hashes respond to keys that are not refrences |
342 | like a normal hash would and use the string offered as the hash key. |
343 | Thus, if a method is called as a class method, the field hash is presented |
344 | with the class name instead of an object and blithely uses it as a key. |
345 | Since the keys of real objects are decimal numbers, there is no |
346 | conflict and the slot in the field hash can be used like any other. |
347 | The C<id()> function behaves correspondingly with respect to non-reference |
348 | arguments. |
349 | |
350 | Two possible uses (besides ignoring the property) come to mind. |
351 | A singleton class could be implemented this using the generic object. |
352 | If necessary, an C<init()> method could die or ignore calls with |
353 | actual objects (references), so only the generic object will ever exist. |
354 | |
355 | Another use of the generic object would be as a template. It is |
356 | a convenient place to store class-specific defaults for various |
357 | fields to be used in actual object initialization. |
358 | |
359 | Usually, the feature can be entirely ignored. Calling I<object |
360 | methods> as I<class methods> normally leads to an error and isn't used |
361 | routinely anywhere. It may be a problem that this error isn't |
362 | indicated by a class with a generic object. |
363 | |
364 | =head2 How to use Field Hashes |
1e73acc8 |
365 | |
366 | Traditionally, the definition of an inside-out class contains a bare |
367 | block inside which a number of lexical hashes are declared and the |
368 | basic accessor methods defined, usually through C<Scalar::Util::refaddr>. |
369 | Further methods may be defined outside this block. There has to be |
370 | a DESTROY method and, for thread support, a CLONE method. |
371 | |
372 | When field hashes are used, the basic structure reamins the same. |
373 | Each lexical hash will be made a field hash. The call to C<refaddr> |
374 | can be omitted from the accessor methods. DESTROY and CLONE methods |
375 | are not necessary. |
376 | |
377 | If you have an existing inside-out class, simply making all hashes |
378 | field hashes with no other change should make no difference. Through |
379 | the calls to C<refaddr> or equivalent, the field hashes never get to |
380 | see a reference and work like normal hashes. Your DESTROY (and |
381 | CLONE) methods are still needed. |
382 | |
383 | To make the field hashes kick in, it is easiest to redefine C<refaddr> |
384 | as |
385 | |
386 | sub refaddr { shift } |
387 | |
388 | instead of importing it from C<Scalar::Util>. It should now be possible |
389 | to disable DESTROY and CLONE. Note that while it isn't disabled, |
390 | DESTROY will be called before the garbage collection of field hashes, |
6ff38c27 |
391 | so it will be invoked with a functional object and will continue to |
392 | function. |
393 | |
394 | It is not desirable to import the functions C<fieldhash> and/or |
395 | C<fieldhashes> into every class that is going to use them. They |
396 | are only used once to set up the class. When the class is up and running, |
397 | these functions serve no more purpose. |
1e73acc8 |
398 | |
1e73acc8 |
399 | If there are only a few field hashes to declare, it is simplest to |
400 | |
401 | use Hash::Util::FieldHash; |
402 | |
403 | early and call the functions qualified: |
404 | |
405 | Hash::Util::FieldHash::fieldhash my %foo; |
406 | |
407 | Otherwise, import the functions into a convenient package like |
d74d639b |
408 | C<HUF> or, more general, C<Aux> |
1e73acc8 |
409 | |
410 | { |
411 | package Aux; |
412 | use Hash::Util::FieldHash ':all'; |
413 | } |
414 | |
415 | and call |
416 | |
417 | Aux::fieldhash my %foo; |
418 | |
419 | as needed. |
420 | |
d74d639b |
421 | =head2 Garbage-Collected Hashes |
422 | |
423 | Garbage collection in a field hash means that entries will "spontaneously" |
424 | disappear when the object that created them disappears. That must be |
425 | borne in mind, especially when looping over a field hash. If anything |
426 | you do inside the loop could cause an object to go out of scope, a |
427 | random key may be deleted from the hash you are looping over. That |
428 | can throw the loop iterator, so it's best to cache a consistent snapshot |
429 | of the keys and/or values and loop over that. You will still have to |
430 | check that a cached entry still exists when you get to it. |
431 | |
432 | Garbage collection can be confusing when keys are created in a field hash |
433 | from normal scalars as well as references. Once a reference is I<used> with |
434 | a field hash, the entry will be collected, even if it was later overwritten |
435 | with a plain scalar key (every positive integer is a candidate). This |
436 | is true even if the original entry was deleted in the meantime. In fact, |
437 | deletion from a field hash, and also a test for existence constitute |
438 | I<use> in this sense and create a liability to delete the entry when |
439 | the reference goes out of scope. If you happen to create an entry |
440 | with an identical key from a string or integer, that will be collected |
441 | instead. Thus, mixed use of references and plain scalars as field hash |
442 | keys is not entirely supported. |
443 | |
b299c47c |
444 | =head1 EXAMPLES |
1e73acc8 |
445 | |
d74d639b |
446 | The examples show a very simple class that implements a I<name>, consisting |
447 | of a first and last name (no middle initial). The name class has four |
448 | methods: |
1e73acc8 |
449 | |
d74d639b |
450 | =over |
1e73acc8 |
451 | |
d74d639b |
452 | =item * C<init()> |
1e73acc8 |
453 | |
d74d639b |
454 | An object method that initializes the first and last name to its |
455 | two arguments. If called as a class method, C<init()> creates an |
456 | object in the given class and initializes that. |
1e73acc8 |
457 | |
d74d639b |
458 | =item * C<first()> |
1e73acc8 |
459 | |
d74d639b |
460 | Retrieve the first name |
1e73acc8 |
461 | |
d74d639b |
462 | =item * C<last()> |
1e73acc8 |
463 | |
d74d639b |
464 | Retrieve the last name |
1e73acc8 |
465 | |
d74d639b |
466 | =item * C<name()> |
1e73acc8 |
467 | |
d74d639b |
468 | Retrieve the full name, the first and last name joined by a blank. |
1e73acc8 |
469 | |
d74d639b |
470 | =back |
1e73acc8 |
471 | |
d74d639b |
472 | The examples show this class implemented with different levels of |
473 | support by C<Hash::Util::FieldHash>. All supported combinations |
474 | are shown. The difference between implementations is often quite |
475 | small. The implementations are: |
1e73acc8 |
476 | |
d74d639b |
477 | =over |
1e73acc8 |
478 | |
d74d639b |
479 | =item * C<Name_hash> |
1e73acc8 |
480 | |
d74d639b |
481 | A conventional (not inside-out) implementation where an object is |
482 | a hash that stores the field values, without support by |
483 | C<Hash::Util::FieldHash>. This implementation doesn't allow |
484 | arbitrary inheritance. |
485 | |
486 | =item * C<Name_id> |
487 | |
488 | Inside-out implementation based on the C<id()> function. It needs |
489 | a C<DESTROY> method. For thread support a C<CLONE> method (not shown) |
490 | would also be needed. Instead of C<Hash::Util::FieldHash::id()> the |
491 | function C<Scalar::Util::refaddr> could be used with very little |
492 | functional difference. This is the basic pattern of an inside-out |
493 | class. |
494 | |
495 | =item * C<Name_idhash> |
496 | |
497 | Idhash-based inside-out implementation. Like L<Name_id> it needs |
498 | a C<DESTROY> method and would need C<CLONE> for thread support. |
499 | |
500 | =item * C<Name_id_reg> |
501 | |
502 | Inside-out implementation based on the C<id()> function with explicit |
503 | object registry. No destructor is needed and objects are thread safe. |
504 | |
505 | =item * C<Name_idhash_reg> |
506 | |
507 | Idhash-based inside-out implementation with explicit object registry. |
508 | No destructor is needed and objects are thread safe. |
509 | |
510 | =item * C<Name_fieldhash> |
511 | |
512 | Fieldhash-based inside-out implementation. Object registry happens |
513 | automatically. No destructor is needed and objects are thread safe. |
514 | |
515 | =back |
516 | |
517 | These examples are realized in the code below, which could be copied |
518 | to a file F<Example.pm>. |
519 | |
520 | =head2 Example 1 |
521 | |
522 | use strict; use warnings; |
523 | |
524 | { |
525 | package Name_hash; # standard implementation: the object is a hash |
526 | |
527 | sub init { |
528 | my $obj = shift; |
529 | my ( $first, $last) = @_; |
530 | # create an object if called as class method |
531 | $obj = bless {}, $obj unless ref $obj; |
532 | $obj->{ first} = $first; |
533 | $obj->{ last} = $last; |
534 | $obj; |
535 | } |
536 | |
537 | sub first { shift()->{ first} } |
538 | sub last { shift()->{ last} } |
539 | |
540 | sub name { |
541 | my $n = shift; |
542 | join ' ' => $n->first, $n->last; |
543 | } |
6ff38c27 |
544 | |
1e73acc8 |
545 | } |
546 | |
d74d639b |
547 | { |
548 | package Name_id; |
549 | use Hash::Util::FieldHash qw( id); |
550 | |
551 | my ( %first, %last); |
552 | |
553 | sub init { |
554 | my $obj = shift; |
555 | my ( $first, $last) = @_; |
556 | # create an object if called as class method |
557 | $obj = bless \ my $o, $obj unless ref $obj; |
558 | $first{ id $obj} = $first; |
559 | $last{ id $obj} = $last; |
560 | $obj; |
561 | } |
562 | |
563 | sub first { $first{ id shift()} } |
564 | sub last { $last{ id shift()} } |
565 | |
566 | sub name { |
567 | my $n = shift; |
568 | join ' ' => $n->first, $n->last; |
569 | } |
570 | |
571 | sub DESTROY { |
572 | my $id = id shift; |
573 | delete $first{ $id}; |
574 | delete $last{ $id}; |
575 | } |
1e73acc8 |
576 | |
1e73acc8 |
577 | } |
578 | |
d74d639b |
579 | { |
580 | package Name_idhash; |
581 | use Hash::Util::FieldHash; |
582 | |
583 | Hash::Util::FieldHash::idhashes( \ my ( %first, %last)); |
584 | |
585 | sub init { |
586 | my $obj = shift; |
587 | my ( $first, $last) = @_; |
588 | # create an object if called as class method |
589 | $obj = bless \ my $o, $obj unless ref $obj; |
590 | $first{ $obj} = $first; |
591 | $last{ $obj} = $last; |
592 | $obj; |
593 | } |
594 | |
595 | sub first { $first{ shift()} } |
596 | sub last { $last{ shift()} } |
597 | |
598 | sub name { |
599 | my $n = shift; |
600 | join ' ' => $n->first, $n->last; |
601 | } |
602 | |
603 | sub DESTROY { |
604 | my $n = shift; |
605 | delete $first{ $n}; |
606 | delete $last{ $n}; |
607 | } |
608 | |
1e73acc8 |
609 | } |
610 | |
d74d639b |
611 | { |
612 | package Name_id_reg; |
613 | use Hash::Util::FieldHash qw( id register); |
614 | |
615 | my ( %first, %last); |
616 | |
617 | sub init { |
618 | my $obj = shift; |
619 | my ( $first, $last) = @_; |
620 | # create an object if called as class method |
621 | $obj = bless \ my $o, $obj unless ref $obj; |
622 | register( $obj, \ ( %first, %last)); |
623 | $first{ id $obj} = $first; |
624 | $last{ id $obj} = $last; |
625 | $obj; |
626 | } |
627 | |
628 | sub first { $first{ id shift()} } |
629 | sub last { $last{ id shift()} } |
630 | |
631 | sub name { |
632 | my $n = shift; |
633 | join ' ' => $n->first, $n->last; |
634 | } |
635 | } |
636 | |
637 | { |
638 | package Name_idhash_reg; |
639 | use Hash::Util::FieldHash qw( register); |
640 | |
641 | Hash::Util::FieldHash::idhashes \ my ( %first, %last); |
642 | |
643 | sub init { |
644 | my $obj = shift; |
645 | my ( $first, $last) = @_; |
646 | # create an object if called as class method |
647 | $obj = bless \ my $o, $obj unless ref $obj; |
648 | register( $obj, \ ( %first, %last)); |
649 | $first{ $obj} = $first; |
650 | $last{ $obj} = $last; |
651 | $obj; |
652 | } |
653 | |
654 | sub first { $first{ shift()} } |
655 | sub last { $last{ shift()} } |
656 | |
657 | sub name { |
658 | my $n = shift; |
659 | join ' ' => $n->first, $n->last; |
660 | } |
661 | } |
662 | |
663 | { |
664 | package Name_fieldhash; |
665 | use Hash::Util::FieldHash; |
666 | |
667 | Hash::Util::FieldHash::fieldhashes \ my ( %first, %last); |
668 | |
669 | sub init { |
670 | my $obj = shift; |
671 | my ( $first, $last) = @_; |
672 | # create an object if called as class method |
673 | $obj = bless \ my $o, $obj unless ref $obj; |
674 | $first{ $obj} = $first; |
675 | $last{ $obj} = $last; |
676 | $obj; |
677 | } |
678 | |
679 | sub first { $first{ shift()} } |
680 | sub last { $last{ shift()} } |
681 | |
682 | sub name { |
683 | my $n = shift; |
684 | join ' ' => $n->first, $n->last; |
685 | } |
686 | } |
687 | |
688 | 1; |
689 | |
b299c47c |
690 | To exercise the various implementations the script L<below|/"Example 2"> can |
d74d639b |
691 | be used. |
692 | |
693 | It sets up a class C<Name> that is a mirror of one of the implementation |
694 | classes C<Name_hash>, C<Name_id>, ..., C<Name_fieldhash>. That determines |
695 | which implementation is run. |
696 | |
697 | The script first verifies the function of the C<Name> class. |
698 | |
699 | In the second step, the free inheritablility of the implementation |
700 | (or lack thereof) is demonstrated. For this purpose it constructs |
701 | a class called C<NamedFile> which is a common subclass of C<Name> and |
702 | the standard class C<IO::File>. This puts inheritability to the test |
703 | because objects of C<IO::File> I<must> be globrefs. Objects of C<NamedFile> |
704 | should behave like a file opened for reading and also support the C<name()> |
705 | method. This class juncture works with exception of the C<Name_hash> |
706 | implementation, where object initialization fails because of the |
707 | incompatibility of object bodies. |
708 | |
709 | =head2 Example 2 |
710 | |
711 | use strict; use warnings; $| = 1; |
712 | |
713 | use Example; |
714 | |
715 | { |
716 | package Name; |
717 | use base 'Name_id'; # define here which implementation to run |
718 | } |
719 | |
720 | |
721 | # Verify that the base package works |
722 | my $n = Name->init( qw( Albert Einstein)); |
723 | print $n->name, "\n"; |
724 | print "\n"; |
725 | |
726 | # Create a named file handle (See definition below) |
727 | my $nf = NamedFile->init( qw( /tmp/x Filomena File)); |
728 | # use as a file handle... |
729 | for ( 1 .. 3 ) { |
730 | my $l = <$nf>; |
731 | print "line $_: $l"; |
732 | } |
733 | # ...and as a Name object |
734 | print "...brought to you by ", $nf->name, "\n"; |
735 | exit; |
736 | |
737 | |
738 | # Definition of NamedFile |
739 | package NamedFile; |
740 | use base 'Name'; |
741 | use base 'IO::File'; |
742 | |
743 | sub init { |
744 | my $obj = shift; |
745 | my ( $file, $first, $last) = @_; |
746 | $obj = $obj->IO::File::new() unless ref $obj; |
747 | $obj->open( $file) or die "Can't read '$file': $!"; |
748 | $obj->Name::init( $first, $last); |
749 | } |
750 | __END__ |
1e73acc8 |
751 | |
1e73acc8 |
752 | |
1e73acc8 |
753 | |
b299c47c |
754 | =head1 GUTS |
1e73acc8 |
755 | |
756 | To make C<Hash::Util::FieldHash> work, there were two changes to |
757 | F<perl> itself. C<PERL_MAGIC_uvar> was made avaliable for hashes, |
758 | and weak references now call uvar C<get> magic after a weakref has been |
759 | cleared. The first feature is used to make field hashes intercept |
760 | their keys upon access. The second one triggers garbage collection. |
761 | |
762 | =head2 The C<PERL_MAGIC_uvar> interface for hashes |
763 | |
764 | C<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and |
765 | C<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which |
766 | defines the interface. The call happens for hashes with "uvar" magic |
767 | if the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set> |
768 | fields. Hashes are unaffected if (and as long as) these fields |
769 | hold different values. |
770 | |
771 | Upon the call, the C<mg_obj> field will hold the hash key to be accessed. |
772 | Upon return, the C<SV*> value in C<mg_obj> will be used in place of the |
773 | original key in the hash access. The integer index value in the first |
774 | parameter will be the C<action> value from C<hv_fetch_common>, or -1 |
775 | if the call is from C<hv_delete_common>. |
776 | |
777 | This is a template for a function suitable for the C<uf_val> field in |
778 | a C<ufuncs> structure for this call. The C<uf_set> and C<uf_index> |
779 | fields are irrelevant. |
780 | |
781 | IV watch_key(pTHX_ IV action, SV* field) { |
782 | MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); |
783 | SV* keysv = mg->mg_obj; |
784 | /* Do whatever you need to. If you decide to |
785 | supply a different key newkey, return it like this |
786 | */ |
787 | sv_2mortal(newkey); |
788 | mg->mg_obj = newkey; |
789 | return 0; |
790 | } |
791 | |
792 | =head2 Weakrefs call uvar magic |
793 | |
794 | When a weak reference is stored in an C<SV> that has "uvar" magic, C<set> |
795 | magic is called after the reference has gone stale. This hook can be |
796 | used to trigger further garbage-collection activities associated with |
797 | the referenced object. |
798 | |
799 | =head2 How field hashes work |
800 | |
801 | The three features of key hashes, I<key replacement>, I<thread support>, |
802 | and I<garbage collection> are supported by a data structure called |
6ff38c27 |
803 | the I<object registry>. This is a private hash where every object |
804 | is stored. An "object" in this sense is any reference (blessed or |
805 | unblessed) that has been used as a field hash key. |
1e73acc8 |
806 | |
807 | The object registry keeps track of references that have been used as |
808 | field hash keys. The keys are generated from the reference address |
809 | like in a field hash (though the registry isn't a field hash). Each |
810 | value is a weak copy of the original reference, stored in an C<SV> that |
811 | is itself magical (C<PERL_MAGIC_uvar> again). The magical structure |
812 | holds a list (another hash, really) of field hashes that the reference |
813 | has been used with. When the weakref becomes stale, the magic is |
814 | activated and uses the list to delete the reference from all field |
815 | hashes it has been used with. After that, the entry is removed from |
816 | the object registry itself. Implicitly, that frees the magic structure |
817 | and the storage it has been using. |
818 | |
819 | Whenever a reference is used as a field hash key, the object registry |
820 | is checked and a new entry is made if necessary. The field hash is |
821 | then added to the list of fields this reference has used. |
822 | |
823 | The object registry is also used to repair a field hash after thread |
824 | cloning. Here, the entire object registry is processed. For every |
825 | reference found there, the field hashes it has used are visited and |
826 | the entry is updated. |
827 | |
828 | =head2 Internal function Hash::Util::FieldHash::_fieldhash |
829 | |
830 | # test if %hash is a field hash |
831 | my $result = _fieldhash \ %hash, 0; |
832 | |
833 | # make %hash a field hash |
834 | my $result = _fieldhash \ %hash, 1; |
835 | |
836 | C<_fieldhash> is the internal function used to create field hashes. |
837 | It takes two arguments, a hashref and a mode. If the mode is boolean |
838 | false, the hash is not changed but tested if it is a field hash. If |
839 | the hash isn't a field hash the return value is boolean false. If it |
840 | is, the return value indicates the mode of field hash. When called with |
841 | a boolean true mode, it turns the given hash into a field hash of this |
842 | mode, returning the mode of the created field hash. C<_fieldhash> |
843 | does not erase the given hash. |
844 | |
845 | Currently there is only one type of field hash, and only the boolean |
846 | value of the mode makes a difference, but that may change. |
847 | |
848 | =head1 AUTHOR |
849 | |
850 | Anno Siegel, E<lt>anno4000@zrz.tu-berlin.deE<gt> |
851 | |
852 | =head1 COPYRIGHT AND LICENSE |
853 | |
b299c47c |
854 | Copyright (C) 2006-2007 by (Anno Siegel) |
1e73acc8 |
855 | |
856 | This library is free software; you can redistribute it and/or modify |
857 | it under the same terms as Perl itself, either Perl version 5.8.7 or, |
858 | at your option, any later version of Perl 5 you may have available. |
859 | |
860 | =cut |