Commit | Line | Data |
11f885b5 |
1 | /* -*- c -*- */ |
2 | |
3 | /* (c) Copyright 1998-2003 by Mark Mielke |
4 | * |
5 | * Freedom to use these sources for whatever you want, as long as credit |
6 | * is given where credit is due, is hereby granted. You may make modifications |
7 | * where you see fit but leave this copyright somewhere visible. As well try |
8 | * to initial any changes you make so that if i like the changes i can |
9 | * incorporate them into any later versions of mine. |
10 | * |
11 | * - Mark Mielke <mark@mielke.cc> |
12 | */ |
13 | |
14 | #include "EXTERN.h" |
15 | #include "perl.h" |
16 | #include "XSUB.h" |
17 | |
18 | #define SOUNDEX_ACCURACY (4) /* The maximum code length... (should be>=2) */ |
19 | |
20 | #if !(PERL_REVISION >= 5 && PERL_VERSION >= 8) |
21 | # define utf8n_to_uvchr utf8_to_uv |
22 | #endif |
23 | |
24 | static char *soundex_table = |
25 | /*ABCDEFGHIJKLMNOPQRSTUVWXYZ*/ |
26 | "01230120022455012623010202"; |
27 | |
81a4c762 |
28 | static SV *sv_soundex (SV *source) |
11f885b5 |
29 | { |
30 | char *source_p; |
31 | char *source_end; |
32 | |
33 | { |
34 | STRLEN source_len; |
35 | source_p = SvPV(source, source_len); |
36 | source_end = &source_p[source_len]; |
37 | } |
38 | |
39 | while (source_p != source_end) |
40 | { |
41 | if ((*source_p & ~((UV) 0x7F)) == 0 && isalpha(*source_p)) |
42 | { |
43 | SV *code = newSV(SOUNDEX_ACCURACY); |
44 | char *code_p = SvPVX(code); |
45 | char *code_end = &code_p[SOUNDEX_ACCURACY]; |
46 | char code_last; |
47 | |
48 | SvCUR_set(code, SOUNDEX_ACCURACY); |
49 | SvPOK_only(code); |
50 | |
51 | code_last = soundex_table[(*code_p++ = toupper(*source_p++)) - 'A']; |
52 | |
53 | while (source_p != source_end && code_p != code_end) |
54 | { |
55 | char c = *source_p++; |
56 | |
57 | if ((c & ~((UV) 0x7F)) == 0 && isalpha(c)) |
58 | { |
59 | *code_p = soundex_table[toupper(c) - 'A']; |
60 | if (*code_p != code_last && (code_last = *code_p) != '0') |
61 | code_p++; |
62 | } |
63 | } |
64 | |
65 | while (code_p != code_end) |
66 | *code_p++ = '0'; |
67 | |
68 | *code_end = '\0'; |
69 | |
70 | return code; |
71 | } |
72 | |
73 | source_p++; |
74 | } |
75 | |
76 | return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE)); |
77 | } |
78 | |
81a4c762 |
79 | static SV *sv_soundex_utf8 (SV* source) |
11f885b5 |
80 | { |
81 | U8 *source_p; |
82 | U8 *source_end; |
83 | |
84 | { |
85 | STRLEN source_len; |
86 | source_p = (U8 *) SvPV(source, source_len); |
87 | source_end = &source_p[source_len]; |
88 | } |
89 | |
90 | while (source_p < source_end) |
91 | { |
92 | STRLEN offset; |
93 | UV c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0); |
94 | source_p = (offset >= 1) ? &source_p[offset] : source_end; |
95 | |
96 | if ((c & ~((UV) 0x7F)) == 0 && isalpha(c)) |
97 | { |
98 | SV *code = newSV(SOUNDEX_ACCURACY); |
99 | char *code_p = SvPVX(code); |
100 | char *code_end = &code_p[SOUNDEX_ACCURACY]; |
101 | char code_last; |
102 | |
103 | SvCUR_set(code, SOUNDEX_ACCURACY); |
104 | SvPOK_only(code); |
105 | |
106 | code_last = soundex_table[(*code_p++ = toupper(c)) - 'A']; |
107 | |
108 | while (source_p != source_end && code_p != code_end) |
109 | { |
110 | c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0); |
111 | source_p = (offset >= 1) ? &source_p[offset] : source_end; |
112 | |
113 | if ((c & ~((UV) 0x7F)) == 0 && isalpha(c)) |
114 | { |
115 | *code_p = soundex_table[toupper(c) - 'A']; |
116 | if (*code_p != code_last && (code_last = *code_p) != '0') |
117 | code_p++; |
118 | } |
119 | } |
120 | |
121 | while (code_p != code_end) |
122 | *code_p++ = '0'; |
123 | |
124 | *code_end = '\0'; |
125 | |
126 | return code; |
127 | } |
128 | |
129 | source_p++; |
130 | } |
131 | |
132 | return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE)); |
133 | } |
134 | |
135 | MODULE = Text::Soundex PACKAGE = Text::Soundex |
136 | |
137 | PROTOTYPES: DISABLE |
138 | |
139 | void |
140 | soundex_xs (...) |
141 | PPCODE: |
142 | { |
143 | int i; |
144 | for (i = 0; i < items; i++) |
145 | { |
146 | SV *sv = ST(i); |
147 | |
148 | if (DO_UTF8(sv)) |
149 | sv = sv_soundex_utf8(sv); |
150 | else |
151 | sv = sv_soundex(sv); |
152 | |
153 | PUSHs(sv_2mortal(sv)); |
154 | } |
155 | } |