Commit | Line | Data |
a0d0e21e |
1 | package Text::Soundex; |
2 | require 5.000; |
3 | require Exporter; |
4 | |
5 | @ISA = qw(Exporter); |
6 | @EXPORT = qw(&soundex $soundex_nocode); |
7 | |
8 | # $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $ |
9 | # |
10 | # Implementation of soundex algorithm as described by Knuth in volume |
11 | # 3 of The Art of Computer Programming, with ideas stolen from Ian |
12 | # Phillips <ian@pipex.net>. |
13 | # |
14 | # Mike Stok <Mike.Stok@meiko.concord.ma.us>, 2 March 1994. |
15 | # |
16 | # Knuth's test cases are: |
17 | # |
18 | # Euler, Ellery -> E460 |
19 | # Gauss, Ghosh -> G200 |
20 | # Hilbert, Heilbronn -> H416 |
21 | # Knuth, Kant -> K530 |
22 | # Lloyd, Ladd -> L300 |
23 | # Lukasiewicz, Lissajous -> L222 |
24 | # |
25 | # $Log: soundex.pl,v $ |
26 | # Revision 1.2 1994/03/24 00:30:27 mike |
27 | # Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu> |
28 | # in the way I handles leasing characters which were different but had |
29 | # the same soundex code. This showed up comparing it with Oracle's |
30 | # soundex output. |
31 | # |
32 | # Revision 1.1 1994/03/02 13:01:30 mike |
33 | # Initial revision |
34 | # |
35 | # |
36 | ############################################################################## |
37 | |
38 | # $soundex_nocode is used to indicate a string doesn't have a soundex |
39 | # code, I like undef other people may want to set it to 'Z000'. |
40 | |
41 | $soundex_nocode = undef; |
42 | |
43 | # soundex |
44 | # |
45 | # usage: |
46 | # |
47 | # @codes = &soundex (@wordList); |
48 | # $code = &soundex ($word); |
49 | # |
50 | # This strenuously avoids 0 |
51 | |
52 | sub soundex |
53 | { |
54 | local (@s, $f, $fc, $_) = @_; |
55 | |
56 | foreach (@s) |
57 | { |
58 | tr/a-z/A-Z/; |
59 | tr/A-Z//cd; |
60 | |
61 | if ($_ eq '') |
62 | { |
63 | $_ = $soundex_nocode; |
64 | } |
65 | else |
66 | { |
67 | ($f) = /^(.)/; |
68 | tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/; |
69 | ($fc) = /^(.)/; |
70 | s/^$fc+//; |
71 | tr///cs; |
72 | tr/0//d; |
73 | $_ = $f . $_ . '000'; |
74 | s/^(.{4}).*/$1/; |
75 | } |
76 | } |
77 | |
78 | wantarray ? @s : shift @s; |
79 | } |
80 | |
81 | 1; |
82 | |