X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSearch%2FDict.pm;h=199fa5f9b41274bdbaf474515b447d59221096eb;hb=2fb44b4522b8956ab337b2f83a5fe619b0773788;hp=10aa4ff583b44819618e205189d06ba69e2eeaca;hpb=a0d0e21ea6ea90a22318550944fe6cb09ae10cda;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Search/Dict.pm b/lib/Search/Dict.pm index 10aa4ff..199fa5f 100644 --- a/lib/Search/Dict.pm +++ b/lib/Search/Dict.pm @@ -2,51 +2,107 @@ package Search::Dict; require 5.000; require Exporter; -@ISA = qw(Exporter); -@EXPORT = qw(look); +use strict; -# Usage: look(*FILEHANDLE,$key,$dict,$fold) +our $VERSION = '1.02'; +our @ISA = qw(Exporter); +our @EXPORT = qw(look); -# Sets file position in FILEHANDLE to be first line greater than or equal -# (stringwise) to $key. Pass flags for dictionary order and case folding. +=head1 NAME + +Search::Dict, look - search for key in dictionary file + +=head1 SYNOPSIS + + use Search::Dict; + look *FILEHANDLE, $key, $dict, $fold; + + use Search::Dict; + look *FILEHANDLE, $params; + +=head1 DESCRIPTION + +Sets file position in FILEHANDLE to be first line greater than or equal +(stringwise) to I<$key>. Returns the new file position, or -1 if an error +occurs. + +The flags specify dictionary order and case folding: + +If I<$dict> is true, search by dictionary order (ignore anything but word +characters and whitespace). The default is honour all characters. + +If I<$fold> is true, ignore case. The default is to honour case. + +If there are only three arguments and the third argument is a hash +reference, the keys of that hash can have values C, C, and +C or C (see below), and their correponding values will be +used as the parameters. + +If a comparison subroutine (comp) is defined, it must return less than zero, +zero, or greater than zero, if the first comparand is less than, +equal, or greater than the second comparand. + +If a transformation subroutine (xfrm) is defined, its value is used to +transform the lines read from the filehandle before their comparison. + +=cut sub look { - local(*FH,$key,$dict,$fold) = @_; - local($max,$min,$mid,$_); - local($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime, - $blksize,$blocks) = stat(FH); - $blksize = 8192 unless $blksize; + my($fh,$key,$dict,$fold) = @_; + my ($comp, $xfrm); + if (@_ == 3 && ref $dict eq 'HASH') { + my $params = $dict; + $dict = 0; + $dict = $params->{dict} if exists $params->{dict}; + $fold = $params->{fold} if exists $params->{fold}; + $comp = $params->{comp} if exists $params->{comp}; + $xfrm = $params->{xfrm} if exists $params->{xfrm}; + } + $comp = sub { $_[0] cmp $_[1] } unless defined $comp; + local($_); + my(@stat) = stat($fh) + or return -1; + my($size, $blksize) = @stat[7,11]; + $blksize ||= 8192; $key =~ s/[^\w\s]//g if $dict; - $key =~ tr/A-Z/a-z/ if $fold; - $max = int($size / $blksize); + $key = lc $key if $fold; + # find the right block + my($min, $max) = (0, int($size / $blksize)); + my $mid; while ($max - $min > 1) { $mid = int(($max + $min) / 2); - seek(FH,$mid * $blksize,0); - $_ = if $mid; # probably a partial line - $_ = ; - chop; + seek($fh, $mid * $blksize, 0) + or return -1; + <$fh> if $mid; # probably a partial line + $_ = <$fh>; + $_ = $xfrm->($_) if defined $xfrm; + chomp; s/[^\w\s]//g if $dict; - tr/A-Z/a-z/ if $fold; - if ($_ lt $key) { + $_ = lc $_ if $fold; + if (defined($_) && $comp->($_, $key) < 0) { $min = $mid; } else { $max = $mid; } } + # find the right line $min *= $blksize; - seek(FH,$min,0); - if $min; - while () { - chop; + seek($fh,$min,0) + or return -1; + <$fh> if $min; + for (;;) { + $min = tell($fh); + defined($_ = <$fh>) + or last; + $_ = $xfrm->($_) if defined $xfrm; + chomp; s/[^\w\s]//g if $dict; - y/A-Z/a-z/ if $fold; - last if $_ ge $key; - $min = tell(FH); + $_ = lc $_ if $fold; + last if $comp->($_, $key) >= 0; } - seek(FH,$min,0); + seek($fh,$min,0); $min; } 1; -