3 /* (c) Copyright 1998-2003 by Mark Mielke
5 * Freedom to use these sources for whatever you want, as long as credit
6 * is given where credit is due, is hereby granted. You may make modifications
7 * where you see fit but leave this copyright somewhere visible. As well try
8 * to initial any changes you make so that if i like the changes i can
9 * incorporate them into any later versions of mine.
11 * - Mark Mielke <mark@mielke.cc>
18 #define SOUNDEX_ACCURACY (4) /* The maximum code length... (should be>=2) */
20 #if !(PERL_REVISION >= 5 && PERL_VERSION >= 8)
21 # define utf8n_to_uvchr utf8_to_uv
24 static char sv_soundex_table[0x100];
25 static void sv_soundex_initialize (void)
27 memset(&sv_soundex_table[0], '\0', sizeof(sv_soundex_table));
28 sv_soundex_table['A'] = '0';
29 sv_soundex_table['a'] = '0';
30 sv_soundex_table['E'] = '0';
31 sv_soundex_table['e'] = '0';
32 sv_soundex_table['H'] = '0';
33 sv_soundex_table['h'] = '0';
34 sv_soundex_table['I'] = '0';
35 sv_soundex_table['i'] = '0';
36 sv_soundex_table['O'] = '0';
37 sv_soundex_table['o'] = '0';
38 sv_soundex_table['U'] = '0';
39 sv_soundex_table['u'] = '0';
40 sv_soundex_table['W'] = '0';
41 sv_soundex_table['w'] = '0';
42 sv_soundex_table['Y'] = '0';
43 sv_soundex_table['y'] = '0';
44 sv_soundex_table['B'] = '1';
45 sv_soundex_table['b'] = '1';
46 sv_soundex_table['F'] = '1';
47 sv_soundex_table['f'] = '1';
48 sv_soundex_table['P'] = '1';
49 sv_soundex_table['p'] = '1';
50 sv_soundex_table['V'] = '1';
51 sv_soundex_table['v'] = '1';
52 sv_soundex_table['C'] = '2';
53 sv_soundex_table['c'] = '2';
54 sv_soundex_table['G'] = '2';
55 sv_soundex_table['g'] = '2';
56 sv_soundex_table['J'] = '2';
57 sv_soundex_table['j'] = '2';
58 sv_soundex_table['K'] = '2';
59 sv_soundex_table['k'] = '2';
60 sv_soundex_table['Q'] = '2';
61 sv_soundex_table['q'] = '2';
62 sv_soundex_table['S'] = '2';
63 sv_soundex_table['s'] = '2';
64 sv_soundex_table['X'] = '2';
65 sv_soundex_table['x'] = '2';
66 sv_soundex_table['Z'] = '2';
67 sv_soundex_table['z'] = '2';
68 sv_soundex_table['D'] = '3';
69 sv_soundex_table['d'] = '3';
70 sv_soundex_table['T'] = '3';
71 sv_soundex_table['t'] = '3';
72 sv_soundex_table['L'] = '4';
73 sv_soundex_table['l'] = '4';
74 sv_soundex_table['M'] = '5';
75 sv_soundex_table['m'] = '5';
76 sv_soundex_table['N'] = '5';
77 sv_soundex_table['n'] = '5';
78 sv_soundex_table['R'] = '6';
79 sv_soundex_table['r'] = '6';
82 static SV *sv_soundex (SV *source)
89 source_p = SvPV(source, source_len);
90 source_end = &source_p[source_len];
93 while (source_p != source_end)
95 char codepart_last = sv_soundex_table[(unsigned char) *source_p];
97 if (codepart_last != '\0')
99 SV *code = newSV(SOUNDEX_ACCURACY);
100 char *code_p = SvPVX(code);
101 char *code_end = &code_p[SOUNDEX_ACCURACY];
103 SvCUR_set(code, SOUNDEX_ACCURACY);
106 *code_p++ = toupper(*source_p++);
108 while (source_p != source_end && code_p != code_end)
110 char c = *source_p++;
111 char codepart = sv_soundex_table[(unsigned char) c];
113 if (codepart != '\0')
114 if (codepart != codepart_last && (codepart_last = codepart) != '0')
115 *code_p++ = codepart;
118 while (code_p != code_end)
129 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
132 static SV *sv_soundex_utf8 (SV* source)
139 source_p = (U8 *) SvPV(source, source_len);
140 source_end = &source_p[source_len];
143 while (source_p < source_end)
146 UV c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
147 char codepart_last = (c <= 0xFF) ? sv_soundex_table[c] : '\0';
148 source_p = (offset >= 1) ? &source_p[offset] : source_end;
150 if (codepart_last != '\0')
152 SV *code = newSV(SOUNDEX_ACCURACY);
153 char *code_p = SvPVX(code);
154 char *code_end = &code_p[SOUNDEX_ACCURACY];
156 SvCUR_set(code, SOUNDEX_ACCURACY);
159 *code_p++ = toupper(c);
161 while (source_p != source_end && code_p != code_end)
164 c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
165 codepart = (c <= 0xFF) ? sv_soundex_table[c] : '\0';
166 source_p = (offset >= 1) ? &source_p[offset] : source_end;
168 if (codepart != '\0')
169 if (codepart != codepart_last && (codepart_last = codepart) != '0')
170 *code_p++ = codepart;
173 while (code_p != code_end)
184 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
187 MODULE = Text::Soundex PACKAGE = Text::Soundex
195 sv_soundex_initialize();
200 for (i = 0; i < items; i++)
205 sv = sv_soundex_utf8(sv);
209 PUSHs(sv_2mortal(sv));