This is patch.2b1f to perl5.002beta1.
[p5sagit/p5-mst-13.2.git] / lib / Text / Soundex.pm
CommitLineData
a0d0e21e 1package Text::Soundex;
2require 5.000;
3require Exporter;
4
5@ISA = qw(Exporter);
6@EXPORT = qw(&soundex $soundex_nocode);
7
8# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $
9#
10# Implementation of soundex algorithm as described by Knuth in volume
11# 3 of The Art of Computer Programming, with ideas stolen from Ian
12# Phillips <ian@pipex.net>.
13#
14# Mike Stok <Mike.Stok@meiko.concord.ma.us>, 2 March 1994.
15#
16# Knuth's test cases are:
17#
18# Euler, Ellery -> E460
19# Gauss, Ghosh -> G200
20# Hilbert, Heilbronn -> H416
21# Knuth, Kant -> K530
22# Lloyd, Ladd -> L300
23# Lukasiewicz, Lissajous -> L222
24#
25# $Log: soundex.pl,v $
26# Revision 1.2 1994/03/24 00:30:27 mike
27# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu>
28# in the way I handles leasing characters which were different but had
29# the same soundex code. This showed up comparing it with Oracle's
30# soundex output.
31#
32# Revision 1.1 1994/03/02 13:01:30 mike
33# Initial revision
34#
35#
36##############################################################################
37
38# $soundex_nocode is used to indicate a string doesn't have a soundex
39# code, I like undef other people may want to set it to 'Z000'.
40
41$soundex_nocode = undef;
42
43# soundex
44#
45# usage:
46#
47# @codes = &soundex (@wordList);
48# $code = &soundex ($word);
49#
50# This strenuously avoids 0
51
52sub soundex
53{
54 local (@s, $f, $fc, $_) = @_;
55
56 foreach (@s)
57 {
58 tr/a-z/A-Z/;
59 tr/A-Z//cd;
60
61 if ($_ eq '')
62 {
63 $_ = $soundex_nocode;
64 }
65 else
66 {
67 ($f) = /^(.)/;
68 tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/;
69 ($fc) = /^(.)/;
70 s/^$fc+//;
71 tr///cs;
72 tr/0//d;
73 $_ = $f . $_ . '000';
74 s/^(.{4}).*/$1/;
75 }
76 }
77
78 wantarray ? @s : shift @s;
79}
80
811;
82