3 /* (c) Copyright 1998-2003 by Mark Mielke
5 * Freedom to use these sources for whatever you want, as long as credit
6 * is given where credit is due, is hereby granted. You may make modifications
7 * where you see fit but leave this copyright somewhere visible. As well try
8 * to initial any changes you make so that if i like the changes i can
9 * incorporate them into any later versions of mine.
11 * - Mark Mielke <mark@mielke.cc>
18 #define SOUNDEX_ACCURACY (4) /* The maximum code length... (should be>=2) */
20 #if !(PERL_REVISION >= 5 && PERL_VERSION >= 8)
21 # define utf8n_to_uvchr utf8_to_uv
24 static char *soundex_table =
25 /*ABCDEFGHIJKLMNOPQRSTUVWXYZ*/
26 "01230120022455012623010202";
28 static SV *sv_soundex (source)
36 source_p = SvPV(source, source_len);
37 source_end = &source_p[source_len];
40 while (source_p != source_end)
42 if ((*source_p & ~((UV) 0x7F)) == 0 && isalpha(*source_p))
44 SV *code = newSV(SOUNDEX_ACCURACY);
45 char *code_p = SvPVX(code);
46 char *code_end = &code_p[SOUNDEX_ACCURACY];
49 SvCUR_set(code, SOUNDEX_ACCURACY);
52 code_last = soundex_table[(*code_p++ = toupper(*source_p++)) - 'A'];
54 while (source_p != source_end && code_p != code_end)
58 if ((c & ~((UV) 0x7F)) == 0 && isalpha(c))
60 *code_p = soundex_table[toupper(c) - 'A'];
61 if (*code_p != code_last && (code_last = *code_p) != '0')
66 while (code_p != code_end)
77 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
80 static SV *sv_soundex_utf8 (source)
88 source_p = (U8 *) SvPV(source, source_len);
89 source_end = &source_p[source_len];
92 while (source_p < source_end)
95 UV c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
96 source_p = (offset >= 1) ? &source_p[offset] : source_end;
98 if ((c & ~((UV) 0x7F)) == 0 && isalpha(c))
100 SV *code = newSV(SOUNDEX_ACCURACY);
101 char *code_p = SvPVX(code);
102 char *code_end = &code_p[SOUNDEX_ACCURACY];
105 SvCUR_set(code, SOUNDEX_ACCURACY);
108 code_last = soundex_table[(*code_p++ = toupper(c)) - 'A'];
110 while (source_p != source_end && code_p != code_end)
112 c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
113 source_p = (offset >= 1) ? &source_p[offset] : source_end;
115 if ((c & ~((UV) 0x7F)) == 0 && isalpha(c))
117 *code_p = soundex_table[toupper(c) - 'A'];
118 if (*code_p != code_last && (code_last = *code_p) != '0')
123 while (code_p != code_end)
134 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
137 MODULE = Text::Soundex PACKAGE = Text::Soundex
146 for (i = 0; i < items; i++)
151 sv = sv_soundex_utf8(sv);
155 PUSHs(sv_2mortal(sv));