X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSearch%2FDict.pm;h=199fa5f9b41274bdbaf474515b447d59221096eb;hb=446eaa427e017001f2d47e21b0ad20ce965cd808;hp=58c7543ced6539d4402d5ad6ad94cb21797c324d;hpb=0e06870bf080a38cda51c06c6612359afc2334e1;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Search/Dict.pm b/lib/Search/Dict.pm index 58c7543..199fa5f 100644 --- a/lib/Search/Dict.pm +++ b/lib/Search/Dict.pm @@ -4,7 +4,7 @@ require Exporter; use strict; -our $VERSION = '1.00'; +our $VERSION = '1.02'; our @ISA = qw(Exporter); our @EXPORT = qw(look); @@ -17,6 +17,9 @@ Search::Dict, look - search for key in dictionary file use Search::Dict; look *FILEHANDLE, $key, $dict, $fold; + use Search::Dict; + look *FILEHANDLE, $params; + =head1 DESCRIPTION Sets file position in FILEHANDLE to be first line greater than or equal @@ -26,38 +29,64 @@ occurs. The flags specify dictionary order and case folding: If I<$dict> is true, search by dictionary order (ignore anything but word -characters and whitespace). +characters and whitespace). The default is honour all characters. + +If I<$fold> is true, ignore case. The default is to honour case. -If I<$fold> is true, ignore case. +If there are only three arguments and the third argument is a hash +reference, the keys of that hash can have values C, C, and +C or C (see below), and their correponding values will be +used as the parameters. + +If a comparison subroutine (comp) is defined, it must return less than zero, +zero, or greater than zero, if the first comparand is less than, +equal, or greater than the second comparand. + +If a transformation subroutine (xfrm) is defined, its value is used to +transform the lines read from the filehandle before their comparison. =cut sub look { my($fh,$key,$dict,$fold) = @_; + my ($comp, $xfrm); + if (@_ == 3 && ref $dict eq 'HASH') { + my $params = $dict; + $dict = 0; + $dict = $params->{dict} if exists $params->{dict}; + $fold = $params->{fold} if exists $params->{fold}; + $comp = $params->{comp} if exists $params->{comp}; + $xfrm = $params->{xfrm} if exists $params->{xfrm}; + } + $comp = sub { $_[0] cmp $_[1] } unless defined $comp; local($_); my(@stat) = stat($fh) or return -1; my($size, $blksize) = @stat[7,11]; $blksize ||= 8192; $key =~ s/[^\w\s]//g if $dict; - $key = lc $key if $fold; - my($min, $max, $mid) = (0, int($size / $blksize)); + $key = lc $key if $fold; + # find the right block + my($min, $max) = (0, int($size / $blksize)); + my $mid; while ($max - $min > 1) { $mid = int(($max + $min) / 2); seek($fh, $mid * $blksize, 0) or return -1; <$fh> if $mid; # probably a partial line $_ = <$fh>; - chop; + $_ = $xfrm->($_) if defined $xfrm; + chomp; s/[^\w\s]//g if $dict; - $_ = lc $_ if $fold; - if (defined($_) && $_ lt $key) { + $_ = lc $_ if $fold; + if (defined($_) && $comp->($_, $key) < 0) { $min = $mid; } else { $max = $mid; } } + # find the right line $min *= $blksize; seek($fh,$min,0) or return -1; @@ -66,10 +95,11 @@ sub look { $min = tell($fh); defined($_ = <$fh>) or last; - chop; + $_ = $xfrm->($_) if defined $xfrm; + chomp; s/[^\w\s]//g if $dict; - $_ = lc $_ if $fold; - last if $_ ge $key; + $_ = lc $_ if $fold; + last if $comp->($_, $key) >= 0; } seek($fh,$min,0); $min;