3 /* (c) Copyright 1998-2003 by Mark Mielke
5 * Freedom to use these sources for whatever you want, as long as credit
6 * is given where credit is due, is hereby granted. You may make modifications
7 * where you see fit but leave this copyright somewhere visible. As well try
8 * to initial any changes you make so that if i like the changes i can
9 * incorporate them into any later versions of mine.
11 * - Mark Mielke <mark@mielke.cc>
18 #define SOUNDEX_ACCURACY (4) /* The maximum code length... (should be>=2) */
20 #if !(PERL_REVISION >= 5 && PERL_VERSION >= 8)
21 # define utf8n_to_uvchr utf8_to_uv
24 static char *soundex_table =
25 /*ABCDEFGHIJKLMNOPQRSTUVWXYZ*/
26 "01230120022455012623010202";
28 static SV *sv_soundex (SV *source)
35 source_p = SvPV(source, source_len);
36 source_end = &source_p[source_len];
39 while (source_p != source_end)
41 if ((*source_p & ~((UV) 0x7F)) == 0 && isalpha(*source_p))
43 SV *code = newSV(SOUNDEX_ACCURACY);
44 char *code_p = SvPVX(code);
45 char *code_end = &code_p[SOUNDEX_ACCURACY];
48 SvCUR_set(code, SOUNDEX_ACCURACY);
51 code_last = soundex_table[(*code_p++ = toupper(*source_p++)) - 'A'];
53 while (source_p != source_end && code_p != code_end)
57 if ((c & ~((UV) 0x7F)) == 0 && isalpha(c))
59 *code_p = soundex_table[toupper(c) - 'A'];
60 if (*code_p != code_last && (code_last = *code_p) != '0')
65 while (code_p != code_end)
76 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
79 static SV *sv_soundex_utf8 (SV* source)
86 source_p = (U8 *) SvPV(source, source_len);
87 source_end = &source_p[source_len];
90 while (source_p < source_end)
93 UV c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
94 source_p = (offset >= 1) ? &source_p[offset] : source_end;
96 if ((c & ~((UV) 0x7F)) == 0 && isalpha(c))
98 SV *code = newSV(SOUNDEX_ACCURACY);
99 char *code_p = SvPVX(code);
100 char *code_end = &code_p[SOUNDEX_ACCURACY];
103 SvCUR_set(code, SOUNDEX_ACCURACY);
106 code_last = soundex_table[(*code_p++ = toupper(c)) - 'A'];
108 while (source_p != source_end && code_p != code_end)
110 c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
111 source_p = (offset >= 1) ? &source_p[offset] : source_end;
113 if ((c & ~((UV) 0x7F)) == 0 && isalpha(c))
115 *code_p = soundex_table[toupper(c) - 'A'];
116 if (*code_p != code_last && (code_last = *code_p) != '0')
121 while (code_p != code_end)
132 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
135 MODULE = Text::Soundex PACKAGE = Text::Soundex
144 for (i = 0; i < items; i++)
149 sv = sv_soundex_utf8(sv);
153 PUSHs(sv_2mortal(sv));