Commit | Line | Data |
0e9b1cbd |
1 | package DBM_Filter ; |
2 | |
3 | use strict; |
4 | use warnings; |
902fde96 |
5 | our $VERSION = '0.03'; |
0e9b1cbd |
6 | |
7 | package Tie::Hash ; |
8 | |
9 | use strict; |
10 | use warnings; |
11 | |
12 | use Carp; |
13 | |
14 | |
15 | our %LayerStack = (); |
16 | our %origDESTROY = (); |
17 | |
18 | our %Filters = map { $_, undef } qw( |
19 | Fetch_Key |
20 | Fetch_Value |
21 | Store_Key |
22 | Store_Value |
23 | ); |
24 | |
25 | our %Options = map { $_, 1 } qw( |
26 | fetch |
27 | store |
28 | ); |
29 | |
30 | #sub Filter_Enable |
31 | #{ |
32 | #} |
33 | # |
34 | #sub Filter_Disable |
35 | #{ |
36 | #} |
37 | |
38 | sub Filtered |
39 | { |
40 | my $this = shift; |
41 | return defined $LayerStack{$this} ; |
42 | } |
43 | |
44 | sub Filter_Pop |
45 | { |
46 | my $this = shift; |
47 | my $stack = $LayerStack{$this} || return undef ; |
48 | my $filter = pop @{ $stack }; |
49 | |
50 | # remove the filter hooks if this is the last filter to pop |
51 | if ( @{ $stack } == 0 ) { |
52 | $this->filter_store_key ( undef ); |
53 | $this->filter_store_value( undef ); |
54 | $this->filter_fetch_key ( undef ); |
55 | $this->filter_fetch_value( undef ); |
56 | delete $LayerStack{$this}; |
57 | } |
58 | |
59 | return $filter; |
60 | } |
61 | |
62 | sub Filter_Key_Push |
63 | { |
64 | &_do_Filter_Push; |
65 | } |
66 | |
67 | sub Filter_Value_Push |
68 | { |
69 | &_do_Filter_Push; |
70 | } |
71 | |
72 | |
73 | sub Filter_Push |
74 | { |
75 | &_do_Filter_Push; |
76 | } |
77 | |
78 | sub _do_Filter_Push |
79 | { |
80 | my $this = shift; |
81 | my %callbacks = (); |
82 | my $caller = (caller(1))[3]; |
83 | $caller =~ s/^.*:://; |
84 | |
85 | croak "$caller: no parameters present" unless @_ ; |
86 | |
87 | if ( ! $Options{lc $_[0]} ) { |
88 | my $class = shift; |
89 | my @params = @_; |
90 | |
91 | # if $class already contains "::", don't prefix "DBM_Filter::" |
92 | $class = "DBM_Filter::$class" unless $class =~ /::/; |
93 | |
d9f30342 |
94 | no strict 'refs'; |
0e9b1cbd |
95 | # does the "DBM_Filter::$class" exist? |
902fde96 |
96 | if ( ! %{ "${class}::"} ) { |
0e9b1cbd |
97 | # Nope, so try to load it. |
98 | eval " require $class ; " ; |
99 | croak "$caller: Cannot Load DBM Filter '$class': $@" if $@; |
100 | } |
101 | |
0e9b1cbd |
102 | my $fetch = *{ "${class}::Fetch" }{CODE}; |
103 | my $store = *{ "${class}::Store" }{CODE}; |
104 | my $filter = *{ "${class}::Filter" }{CODE}; |
105 | use strict 'refs'; |
106 | |
107 | my $count = defined($filter) + defined($store) + defined($fetch) ; |
108 | |
109 | if ( $count == 0 ) |
110 | { croak "$caller: No methods (Filter, Fetch or Store) found in class '$class'" } |
111 | elsif ( $count == 1 && ! defined $filter) { |
112 | my $need = defined($fetch) ? 'Store' : 'Fetch'; |
113 | croak "$caller: Missing method '$need' in class '$class'" ; |
114 | } |
115 | elsif ( $count >= 2 && defined $filter) |
116 | { croak "$caller: Can't mix Filter with Store and Fetch in class '$class'" } |
117 | |
118 | if (defined $filter) { |
119 | my $callbacks = &{ $filter }(@params); |
120 | croak "$caller: '${class}::Filter' did not return a hash reference" |
121 | unless ref $callbacks && ref $callbacks eq 'HASH'; |
122 | %callbacks = %{ $callbacks } ; |
123 | } |
124 | else { |
125 | $callbacks{Fetch} = $fetch; |
126 | $callbacks{Store} = $store; |
127 | } |
128 | } |
129 | else { |
130 | croak "$caller: not even params" unless @_ % 2 == 0; |
131 | %callbacks = @_; |
132 | } |
133 | |
134 | my %filters = %Filters ; |
135 | my @got = (); |
136 | while (my ($k, $v) = each %callbacks ) |
137 | { |
138 | my $key = $k; |
139 | $k = lc $k; |
140 | if ($k eq 'fetch') { |
141 | push @got, 'Fetch'; |
142 | if ($caller eq 'Filter_Push') |
143 | { $filters{Fetch_Key} = $filters{Fetch_Value} = $v } |
144 | elsif ($caller eq 'Filter_Key_Push') |
145 | { $filters{Fetch_Key} = $v } |
146 | elsif ($caller eq 'Filter_Value_Push') |
147 | { $filters{Fetch_Value} = $v } |
148 | } |
149 | elsif ($k eq 'store') { |
150 | push @got, 'Store'; |
151 | if ($caller eq 'Filter_Push') |
152 | { $filters{Store_Key} = $filters{Store_Value} = $v } |
153 | elsif ($caller eq 'Filter_Key_Push') |
154 | { $filters{Store_Key} = $v } |
155 | elsif ($caller eq 'Filter_Value_Push') |
156 | { $filters{Store_Value} = $v } |
157 | } |
158 | else |
159 | { croak "$caller: Unknown key '$key'" } |
160 | |
161 | croak "$caller: value associated with key '$key' is not a code reference" |
162 | unless ref $v && ref $v eq 'CODE'; |
163 | } |
164 | |
165 | if ( @got != 2 ) { |
166 | push @got, 'neither' if @got == 0 ; |
167 | croak "$caller: expected both Store & Fetch - got @got"; |
168 | } |
169 | |
170 | # remember the class |
171 | push @{ $LayerStack{$this} }, \%filters ; |
172 | |
173 | my $str_this = "$this" ; # Avoid a closure with $this in the subs below |
174 | |
175 | $this->filter_store_key ( sub { store_hook($str_this, 'Store_Key') }); |
176 | $this->filter_store_value( sub { store_hook($str_this, 'Store_Value') }); |
177 | $this->filter_fetch_key ( sub { fetch_hook($str_this, 'Fetch_Key') }); |
178 | $this->filter_fetch_value( sub { fetch_hook($str_this, 'Fetch_Value') }); |
179 | |
180 | # Hijack the callers DESTROY method |
181 | $this =~ /^(.*)=/; |
182 | my $type = $1 ; |
183 | no strict 'refs'; |
184 | if ( *{ "${type}::DESTROY" }{CODE} ne \&MyDESTROY ) |
185 | { |
186 | $origDESTROY{$type} = *{ "${type}::DESTROY" }{CODE}; |
187 | no warnings 'redefine'; |
188 | *{ "${type}::DESTROY" } = \&MyDESTROY ; |
189 | } |
190 | } |
191 | |
192 | sub store_hook |
193 | { |
194 | my $this = shift ; |
195 | my $type = shift ; |
196 | foreach my $layer (@{ $LayerStack{$this} }) |
197 | { |
198 | &{ $layer->{$type} }() if defined $layer->{$type} ; |
199 | } |
200 | } |
201 | |
202 | sub fetch_hook |
203 | { |
204 | my $this = shift ; |
205 | my $type = shift ; |
206 | foreach my $layer (reverse @{ $LayerStack{$this} }) |
207 | { |
208 | &{ $layer->{$type} }() if defined $layer->{$type} ; |
209 | } |
210 | } |
211 | |
212 | sub MyDESTROY |
213 | { |
214 | my $this = shift ; |
215 | delete $LayerStack{$this} ; |
216 | |
217 | # call real DESTROY |
218 | $this =~ /^(.*)=/; |
219 | &{ $origDESTROY{$1} }($this); |
220 | } |
221 | |
222 | 1; |
223 | |
224 | __END__ |
225 | |
226 | =head1 NAME |
227 | |
228 | DBM_Filter -- Filter DBM keys/values |
229 | |
230 | =head1 SYNOPSIS |
231 | |
232 | use DBM_Filter ; |
233 | use SDBM_File; # or DB_File, or GDBM_File, or NDBM_File, or ODBM_File |
234 | |
235 | $db = tie %hash, ... |
236 | |
237 | $db->Filter_Push(Fetch => sub {...}, |
238 | Store => sub {...}); |
239 | |
240 | $db->Filter_Push('my_filter1'); |
241 | $db->Filter_Push('my_filter2', params...); |
242 | |
243 | $db->Filter_Key_Push(...) ; |
244 | $db->Filter_Value_Push(...) ; |
245 | |
246 | $db->Filter_Pop(); |
247 | $db->Filtered(); |
248 | |
249 | package DBM_Filter::my_filter1; |
250 | |
251 | sub Store { ... } |
252 | sub Fetch { ... } |
253 | |
254 | 1; |
255 | |
256 | package DBM_Filter::my_filter2; |
257 | |
258 | sub Filter |
259 | { |
260 | my @opts = @_; |
261 | ... |
262 | return ( |
263 | sub Store { ... }, |
264 | sub Fetch { ... } ); |
265 | } |
266 | |
267 | 1; |
268 | |
269 | =head1 DESCRIPTION |
270 | |
271 | This module provides an interface that allows filters to be applied |
272 | to tied Hashes associated with DBM files. It builds on the DBM Filter |
273 | hooks that are present in all the *DB*_File modules included with the |
274 | standard Perl source distribution from version 5.6.1 onwards. In addition |
275 | to the *DB*_File modules distributed with Perl, the BerkeleyDB module, |
276 | available on CPAN, supports the DBM Filter hooks. See L<perldbmfilter> |
277 | for more details on the DBM Filter hooks. |
278 | |
279 | =head1 What is a DBM Filter? |
280 | |
281 | A DBM Filter allows the keys and/or values in a tied hash to be modified |
282 | by some user-defined code just before it is written to the DBM file and |
283 | just after it is read back from the DBM file. For example, this snippet |
284 | of code |
285 | |
286 | $some_hash{"abc"} = 42; |
287 | |
288 | could potentially trigger two filters, one for the writing of the key |
289 | "abc" and another for writing the value 42. Similarly, this snippet |
290 | |
291 | my ($key, $value) = each %some_hash |
292 | |
293 | will trigger two filters, one for the reading of the key and one for |
294 | the reading of the value. |
295 | |
296 | Like the existing DBM Filter functionality, this module arranges for the |
297 | C<$_> variable to be populated with the key or value that a filter will |
298 | check. This usually means that most DBM filters tend to be very short. |
299 | |
300 | =head2 So what's new? |
301 | |
302 | The main enhancements over the standard DBM Filter hooks are: |
303 | |
304 | =over 4 |
305 | |
306 | =item * |
307 | |
308 | A cleaner interface. |
309 | |
310 | =item * |
311 | |
312 | The ability to easily apply multiple filters to a single DBM file. |
313 | |
314 | =item * |
315 | |
316 | The ability to create "canned" filters. These allow commonly used filters |
317 | to be packaged into a stand-alone module. |
318 | |
319 | =back |
320 | |
321 | =head1 METHODS |
322 | |
323 | This module will arrange for the following methods to be available via |
324 | the object returned from the C<tie> call. |
325 | |
326 | =head2 $db->Filter_Push() |
327 | |
328 | =head2 $db->Filter_Key_Push() |
329 | |
330 | =head2 $db->Filter_Value_Push() |
331 | |
332 | Add a filter to filter stack for the database, C<$db>. The three formats |
333 | vary only in whether they apply to the DBM key, the DBM value or both. |
334 | |
335 | =over 5 |
336 | |
337 | =item Filter_Push |
338 | |
339 | The filter is applied to I<both> keys and values. |
340 | |
341 | =item Filter_Key_Push |
342 | |
343 | The filter is applied to the key I<only>. |
344 | |
345 | =item Filter_Value_Push |
346 | |
347 | The filter is applied to the value I<only>. |
348 | |
349 | =back |
350 | |
351 | |
352 | =head2 $db->Filter_Pop() |
353 | |
354 | Removes the last filter that was applied to the DBM file associated with |
355 | C<$db>, if present. |
356 | |
357 | =head2 $db->Filtered() |
358 | |
359 | Returns TRUE if there are any filters applied to the DBM associated |
360 | with C<$db>. Otherwise returns FALSE. |
361 | |
362 | |
363 | |
364 | =head1 Writing a Filter |
365 | |
366 | Filters can be created in two main ways |
367 | |
368 | =head2 Immediate Filters |
369 | |
370 | An immediate filter allows you to specify the filter code to be used |
371 | at the point where the filter is applied to a dbm. In this mode the |
372 | Filter_*_Push methods expects to receive exactly two parameters. |
373 | |
374 | my $db = tie %hash, 'SDBM_File', ... |
375 | $db->Filter_Push( Store => sub { }, |
376 | Fetch => sub { }); |
377 | |
378 | The code reference associated with C<Store> will be called before any |
379 | key/value is written to the database and the code reference associated |
380 | with C<Fetch> will be called after any key/value is read from the |
381 | database. |
382 | |
383 | For example, here is a sample filter that adds a trailing NULL character |
384 | to all strings before they are written to the DBM file, and removes the |
385 | trailing NULL when they are read from the DBM file |
386 | |
387 | my $db = tie %hash, 'SDBM_File', ... |
388 | $db->Filter_Push( Store => sub { $_ .= "\x00" ; }, |
389 | Fetch => sub { s/\x00$// ; }); |
390 | |
391 | |
392 | Points to note: |
393 | |
394 | =over 5 |
395 | |
396 | =item 1. |
397 | |
398 | Both the Store and Fetch filters manipulate C<$_>. |
399 | |
400 | =back |
401 | |
402 | =head2 Canned Filters |
403 | |
404 | Immediate filters are useful for one-off situations. For more generic |
405 | problems it can be useful to package the filter up in its own module. |
406 | |
407 | The usage is for a canned filter is: |
408 | |
409 | $db->Filter_Push("name", params) |
410 | |
411 | where |
412 | |
413 | =over 5 |
414 | |
415 | =item "name" |
416 | |
417 | is the name of the module to load. If the string specified does not |
418 | contain the package separator characters "::", it is assumed to refer to |
419 | the full module name "DBM_Filter::name". This means that the full names |
420 | for canned filters, "null" and "utf8", included with this module are: |
421 | |
422 | DBM_Filter::null |
423 | DBM_Filter::utf8 |
424 | |
425 | =item params |
426 | |
427 | any optional parameters that need to be sent to the filter. See the |
428 | encode filter for an example of a module that uses parameters. |
429 | |
430 | =back |
431 | |
432 | The module that implements the canned filter can take one of two |
433 | forms. Here is a template for the first |
434 | |
435 | package DBM_Filter::null ; |
436 | |
437 | use strict; |
438 | use warnings; |
439 | |
440 | sub Store |
441 | { |
442 | # store code here |
443 | } |
444 | |
445 | sub Fetch |
446 | { |
447 | # fetch code here |
448 | } |
449 | |
450 | 1; |
451 | |
452 | |
453 | Notes: |
454 | |
455 | =over 5 |
456 | |
457 | =item 1. |
458 | |
459 | The package name uses the C<DBM_Filter::> prefix. |
460 | |
461 | =item 2. |
462 | |
463 | The module I<must> have both a Store and a Fetch method. If only one is |
464 | present, or neither are present, a fatal error will be thrown. |
465 | |
466 | =back |
467 | |
468 | The second form allows the filter to hold state information using a |
469 | closure, thus: |
470 | |
471 | package DBM_Filter::encoding ; |
472 | |
473 | use strict; |
474 | use warnings; |
475 | |
476 | sub Filter |
477 | { |
478 | my @params = @_ ; |
479 | |
480 | ... |
481 | return { |
482 | Store => sub { $_ = $encoding->encode($_) }, |
483 | Fetch => sub { $_ = $encoding->decode($_) } |
484 | } ; |
485 | } |
486 | |
487 | 1; |
488 | |
489 | |
490 | In this instance the "Store" and "Fetch" methods are encapsulated inside a |
491 | "Filter" method. |
492 | |
493 | |
494 | =head1 Filters Included |
495 | |
496 | A number of canned filers are provided with this module. They cover a |
497 | number of the main areas that filters are needed when interfacing with |
498 | DBM files. They also act as templates for your own filters. |
499 | |
500 | The filter included are: |
501 | |
502 | =over 5 |
503 | |
504 | =item * utf8 |
505 | |
506 | This module will ensure that all data written to the DBM will be encoded |
507 | in UTF-8. |
508 | |
509 | This module needs the Encode module. |
510 | |
511 | =item * encode |
512 | |
513 | Allows you to choose the character encoding will be store in the DBM file. |
514 | |
515 | =item * compress |
516 | |
517 | This filter will compress all data before it is written to the database |
518 | and uncompressed it on reading. |
519 | |
520 | This module needs Compress::Zlib. |
521 | |
522 | =item * int32 |
523 | |
524 | This module is used when interoperating with a C/C++ application that |
525 | uses a C int as either the key and/or value in the DBM file. |
526 | |
527 | =item * null |
528 | |
529 | This module ensures that all data written to the DBM file is null |
530 | terminated. This is useful when you have a perl script that needs |
531 | to interoperate with a DBM file that a C program also uses. A fairly |
532 | common issue is for the C application to include the terminating null |
533 | in a string when it writes to the DBM file. This filter will ensure that |
534 | all data written to the DBM file can be read by the C application. |
535 | |
536 | =back |
537 | |
538 | =head1 NOTES |
539 | |
540 | =head2 Maintain Round Trip Integrity |
541 | |
542 | When writing a DBM filter it is I<very> important to ensure that it is |
543 | possible to retrieve all data that you have written when the DBM filter |
544 | is in place. In practice, this means that whatever transformation is |
545 | applied to the data in the Store method, the I<exact> inverse operation |
546 | should be applied in the Fetch method. |
547 | |
548 | If you don't provide an exact inverse transformation, you will find that |
549 | code like this will not behave as you expect. |
550 | |
551 | while (my ($k, $v) = each %hash) |
552 | { |
553 | ... |
554 | } |
555 | |
556 | Depending on the transformation, you will find that one or more of the |
557 | following will happen |
558 | |
559 | =over 5 |
560 | |
561 | =item 1 |
562 | |
563 | The loop will never terminate. |
564 | |
565 | =item 2 |
566 | |
567 | Too few records will be retrieved. |
568 | |
569 | =item 3 |
570 | |
571 | Too many will be retrieved. |
572 | |
573 | =item 4 |
574 | |
575 | The loop will do the right thing for a while, but it will unexpectedly fail. |
576 | |
577 | =back |
578 | |
579 | =head2 Don't mix filtered & non-filtered data in the same database file. |
580 | |
581 | This is just a restatement of the previous section. Unless you are |
582 | completely certain you know what you are doing, avoid mixing filtered & |
583 | non-filtered data. |
584 | |
585 | =head1 EXAMPLE |
586 | |
587 | Say you need to interoperate with a legacy C application that stores |
588 | keys as C ints and the values and null terminated UTF-8 strings. Here |
589 | is how you would set that up |
590 | |
591 | my $db = tie %hash, 'SDBM_File', ... |
592 | |
593 | $db->Filter_Key_Push('int32') ; |
594 | |
595 | $db->Filter_Value_Push('utf8'); |
596 | $db->Filter_Value_Push('null'); |
597 | |
598 | =head1 SEE ALSO |
599 | |
600 | <DB_File>, L<GDBM_File>, L<NDBM_File>, L<ODBM_File>, L<SDBM_File>, L<perldbmfilter> |
601 | |
602 | =head1 AUTHOR |
603 | |
604 | Paul Marquess <pmqs@cpan.org> |
605 | |