perl 5.0 alpha 9
[p5sagit/p5-mst-13.2.git] / lib / soundex.pl.art
CommitLineData
2304df62 1Article 20106 of comp.lang.perl:
2Path: netlabs!news.cerf.net!ihnp4.ucsd.edu!mvb.saic.com!MathWorks.Com!noc.near.net!newshost.meiko.com!not-for-mail
3From: mike@meiko.com (Mike Stok)
4Newsgroups: comp.lang.perl
5Subject: Soundex (again :-)
6Date: 23 Mar 1994 19:44:35 -0500
7Organization: Meiko Scientific, Inc., MA
8Lines: 272
9Message-ID: <2mqnpj$qk4@hibbert.meiko.com>
10NNTP-Posting-Host: hibbert.meiko.com
11
12Thanks to Rich Pinder <rpinder@hsc.usc.edu> for finding a little bug in my
13soundex code I posted a while back. This showed up when he compared it
14with the output from Oracle's soundex function, and were caused by leading
15characters which were different but shared the same soundex code.
16
17Here's a fixed shar file...
18
19Mike
20
21#!/bin/sh
22# This is a shell archive (produced by shar 3.49)
23# To extract the files from this archive, save it to a file, remove
24# everything above the "!/bin/sh" line above, and type "sh file_name".
25#
26# made 03/24/1994 00:35 UTC by Mike.Stok@meiko.concord.ma.us
27# Source directory /tmp_mnt/develop/sw/misc/mike/soundex
28#
29# existing files will NOT be overwritten unless -c is specified
30#
31# This shar contains:
32# length mode name
33# ------ ---------- ------------------------------------------
34# 1677 -r--r--r-- soundex.pl
35# 2408 -r-xr-xr-x soundex.t
36#
37# ============= soundex.pl ==============
38if test -f 'soundex.pl' -a X"$1" != X"-c"; then
39 echo 'x - skipping soundex.pl (File already exists)'
40else
41echo 'x - extracting soundex.pl (Text)'
42sed 's/^X//' << 'SHAR_EOF' > 'soundex.pl' &&
43package soundex;
44X
45;# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $
46;#
47;# Implementation of soundex algorithm as described by Knuth in volume
48;# 3 of The Art of Computer Programming, with ideas stolen from Ian
49;# Phillips <ian@pipex.net>.
50;#
51;# Mike Stok <Mike.Stok@meiko.concord.ma.us>, 2 March 1994.
52;#
53;# Knuth's test cases are:
54;#
55;# Euler, Ellery -> E460
56;# Gauss, Ghosh -> G200
57;# Hilbert, Heilbronn -> H416
58;# Knuth, Kant -> K530
59;# Lloyd, Ladd -> L300
60;# Lukasiewicz, Lissajous -> L222
61;#
62;# $Log: soundex.pl,v $
63;# Revision 1.2 1994/03/24 00:30:27 mike
64;# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu>
65;# in the way I handles leasing characters which were different but had
66;# the same soundex code. This showed up comparing it with Oracle's
67;# soundex output.
68;#
69;# Revision 1.1 1994/03/02 13:01:30 mike
70;# Initial revision
71;#
72;#
73;##############################################################################
74X
75;# $soundex'noCode is used to indicate a string doesn't have a soundex
76;# code, I like undef other people may want to set it to 'Z000'.
77X
78$noCode = undef;
79X
80;# main'soundex
81;#
82;# usage:
83;#
84;# @codes = &main'soundex (@wordList);
85;# $code = &main'soundex ($word);
86;#
87;# This strenuously avoids $[
88X
89sub main'soundex
90{
91X local (@s, $f, $fc, $_) = @_;
92X
93X foreach (@s)
94X {
95X tr/a-z/A-Z/;
96X tr/A-Z//cd;
97X
98X if ($_ eq '')
99X {
100X $_ = $noCode;
101X }
102X else
103X {
104X ($f) = /^(.)/;
105X tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/;
106X ($fc) = /^(.)/;
107X s/^$fc+//;
108X tr///cs;
109X tr/0//d;
110X $_ = $f . $_ . '000';
111X s/^(.{4}).*/$1/;
112X }
113X }
114X
115X wantarray ? @s : shift @s;
116}
117X
1181;
119SHAR_EOF
120chmod 0444 soundex.pl ||
121echo 'restore of soundex.pl failed'
122Wc_c="`wc -c < 'soundex.pl'`"
123test 1677 -eq "$Wc_c" ||
124 echo 'soundex.pl: original size 1677, current size' "$Wc_c"
125fi
126# ============= soundex.t ==============
127if test -f 'soundex.t' -a X"$1" != X"-c"; then
128 echo 'x - skipping soundex.t (File already exists)'
129else
130echo 'x - extracting soundex.t (Text)'
131sed 's/^X//' << 'SHAR_EOF' > 'soundex.t' &&
132#!./perl
133;#
134;# $Id: soundex.t,v 1.2 1994/03/24 00:30:27 mike Exp $
135;#
136;# test module for soundex.pl
137;#
138;# $Log: soundex.t,v $
139;# Revision 1.2 1994/03/24 00:30:27 mike
140;# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu>
141;# in the way I handles leasing characters which were different but had
142;# the same soundex code. This showed up comparing it with Oracle's
143;# soundex output.
144;#
145;# Revision 1.1 1994/03/02 13:03:02 mike
146;# Initial revision
147;#
148;#
149X
150require '../lib/soundex.pl';
151X
152$test = 0;
153print "1..13\n";
154X
155while (<DATA>)
156{
157X chop;
158X next if /^\s*;?#/;
159X next if /^\s*$/;
160X
161X ++$test;
162X $bad = 0;
163X
164X if (/^eval\s+/)
165X {
166X ($try = $_) =~ s/^eval\s+//;
167X
168X eval ($try);
169X if ($@)
170X {
171X $bad++;
172X print "not ok $test\n";
173X print "# eval '$try' returned $@";
174X }
175X }
176X elsif (/^\(/)
177X {
178X ($in, $out) = split (':');
179X
180X $try = "\@expect = $out; \@got = &soundex $in;";
181X eval ($try);
182X
183X if (@expect != @got)
184X {
185X $bad++;
186X print "not ok $test\n";
187X print "# expected ", scalar @expect, " results, got ", scalar @got, "\n";
188X print "# expected (", join (', ', @expect),
189X ") got (", join (', ', @got), ")\n";
190X }
191X else
192X {
193X while (@got)
194X {
195X $expect = shift @expect;
196X $got = shift @got;
197X
198X if ($expect ne $got)
199X {
200X $bad++;
201X print "not ok $test\n";
202X print "# expected $expect, got $got\n";
203X }
204X }
205X }
206X }
207X else
208X {
209X ($in, $out) = split (':');
210X
211X $try = "\$expect = $out; \$got = &soundex ($in);";
212X eval ($try);
213X
214X if ($expect ne $got)
215X {
216X $bad++;
217X print "not ok $test\n";
218X print "# expected $expect, got $got\n";
219X }
220X }
221X
222X print "ok $test\n" unless $bad;
223}
224X
225__END__
226#
227# 1..6
228#
229# Knuth's test cases, scalar in, scalar out
230#
231'Euler':'E460'
232'Gauss':'G200'
233'Hilbert':'H416'
234'Knuth':'K530'
235'Lloyd':'L300'
236'Lukasiewicz':'L222'
237#
238# 7..8
239#
240# check default bad code
241#
242'2 + 2 = 4':undef
243undef:undef
244#
245# 9
246#
247# check array in, array out
248#
249('Ellery', 'Ghosh', 'Heilbronn', 'Kant', 'Ladd', 'Lissajous'):('E460', 'G200', 'H416', 'K530', 'L300', 'L222')
250#
251# 10
252#
253# check array with explicit undef
254#
255('Mike', undef, 'Stok'):('M200', undef, 'S320')
256#
257# 11..12
258#
259# check setting $soundex'noCode
260#
261eval $soundex'noCode = 'Z000';
262('Mike', undef, 'Stok'):('M200', 'Z000', 'S320')
263#
264# 13
265#
266# a subtle difference between me & oracle, spotted by Rich Pinder
267# <rpinder@hsc.usc.edu>
268#
269CZARKOWSKA:C622
270SHAR_EOF
271chmod 0555 soundex.t ||
272echo 'restore of soundex.t failed'
273Wc_c="`wc -c < 'soundex.t'`"
274test 2408 -eq "$Wc_c" ||
275 echo 'soundex.t: original size 2408, current size' "$Wc_c"
276fi
277exit 0
278
279--
280The "usual disclaimers" apply. | Meiko
281Mike Stok | 130C Baker Ave. Ext
282Mike.Stok@meiko.concord.ma.us | Concord, MA 01742
283Meiko tel: (508) 371 0088 |
284
285