1 /* -*- buffer-read-only: t -*-
5 * Copyright (C) 2007, by Larry Wall and others
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
10 * !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
11 * This file is built by Porting/regcharclass.pl.
12 * (Generated at: Tue Apr 24 12:19:13 2007 GMT)
13 * Any changes made here will be lost!
17 LNBREAK: Line Break: \R
19 "\x0D\x0A" # CRLF - Network (Windows) line ending
21 0x0B # VT | VERTICAL TAB
23 0x0D # CR | CARRIAGE RETURN
24 0x85 # NEL | NEXT LINE
25 0x2028 # LINE SEPARATOR
26 0x2029 # PARAGRAPH SEPARATOR
28 /*** GENERATED CODE ***/
29 #define is_LNBREAK(s,is_utf8) \
30 ( ( ((U8*)s)[0] == 0x0D ) ? \
31 ( ( ((U8*)s)[1] == 0x0A ) ? 2 : 1 ) : \
32 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C) ? 1 : \
34 ( ( ((U8*)s)[0] == 0xC2 ) ? \
35 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
36 (((( ((U8*)s)[0] == 0xE2 ) && ( ((U8*)s)[1] == 0x80 )) && ( ((U8*)s)[2] == 0xA8 || ((U8*)s)[2] == 0xA9 )) ? 3 : 0) ) :\
37 ( ((U8*)s)[0] == 0x85 ) ) ) )
39 /*** GENERATED CODE ***/
40 #define is_LNBREAK_safe(s,e,is_utf8) \
41 ( ( (e) - (s) > 2 ) ? \
42 ( ( ((U8*)s)[0] == 0x0D ) ? \
43 ( ( ((U8*)s)[1] == 0x0A ) ? 2 : 1 ) : \
44 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C) ? 1 : \
46 ( ( ((U8*)s)[0] == 0xC2 ) ? \
47 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
48 (((( ((U8*)s)[0] == 0xE2 ) && ( ((U8*)s)[1] == 0x80 )) && ( ((U8*)s)[2] == 0xA8 || ((U8*)s)[2] == 0xA9 )) ? 3 : 0) ) :\
49 ( ((U8*)s)[0] == 0x85 ) ) ) ) : \
50 ( ( (e) - (s) > 1 ) ? \
51 ( ( ((U8*)s)[0] == 0x0D ) ? \
52 ( ( ((U8*)s)[1] == 0x0A ) ? 2 : 1 ) : \
53 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C) ? 1 : \
55 ((( ((U8*)s)[0] == 0xC2 ) && ( ((U8*)s)[1] == 0x85 )) ? 2 : 0) : \
56 ( ((U8*)s)[0] == 0x85 ) ) ) ) : \
57 ( ( (e) - (s) > 0 ) ? \
58 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) ? 1 : \
60 ( ((U8*)s)[0] == 0x85 ) : 0 ) ) : 0 ) ) )
62 /*** GENERATED CODE ***/
63 #define is_LNBREAK_utf8(s) \
64 ( ( ((U8*)s)[0] == 0xC2 ) ? \
65 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
66 ( ( ((U8*)s)[0] == 0xE2 ) ? \
67 ((( ((U8*)s)[1] == 0x80 ) && ( ((U8*)s)[2] == 0xA8 || ((U8*)s)[2] == 0xA9 )) ? 3 : 0) :\
68 ( ( ((U8*)s)[0] == 0x0D ) ? \
69 ( ( ((U8*)s)[1] == 0x0A ) ? 2 : 1 ) : \
70 (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C) ) ) )
72 /*** GENERATED CODE ***/
73 #define is_LNBREAK_utf8_safe(s,e) \
74 ( ( (e) - (s) > 2 ) ? \
75 ( ( ((U8*)s)[0] == 0xC2 ) ? \
76 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
77 ( ( ((U8*)s)[0] == 0xE2 ) ? \
78 ((( ((U8*)s)[1] == 0x80 ) && ( ((U8*)s)[2] == 0xA8 || ((U8*)s)[2] == 0xA9 )) ? 3 : 0) :\
79 ( ( ((U8*)s)[0] == 0x0D ) ? \
80 ( ( ((U8*)s)[1] == 0x0A ) ? 2 : 1 ) : \
81 (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C) ) ) ) : \
82 ( ( (e) - (s) > 1 ) ? \
83 ( ( ((U8*)s)[0] == 0xC2 ) ? \
84 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
85 ( ( ((U8*)s)[0] == 0x0D ) ? \
86 ( ( ((U8*)s)[1] == 0x0A ) ? 2 : 1 ) : \
87 (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C) ) ) : \
88 ( ( (e) - (s) > 0 ) ? \
89 (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) : 0 ) ) )
91 /*** GENERATED CODE ***/
92 #define is_LNBREAK_latin1(s) \
93 ( ( ((U8*)s)[0] == 0x0D ) ? \
94 ( ( ((U8*)s)[1] == 0x0A ) ? 2 : 1 ) : \
95 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C) || ((U8*)s)[0] == 0x85 ) )
97 /*** GENERATED CODE ***/
98 #define is_LNBREAK_latin1_safe(s,e) \
99 ( ( (e) - (s) > 1 ) ? \
100 ( ( ((U8*)s)[0] == 0x0D ) ? \
101 ( ( ((U8*)s)[1] == 0x0A ) ? 2 : 1 ) : \
102 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C) || ((U8*)s)[0] == 0x85 ) ) :\
103 ( ( (e) - (s) > 0 ) ? \
104 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) || ((U8*)s)[0] == 0x85 ) : 0 ) )
106 /*** GENERATED CODE ***/
107 #define is_LNBREAK_cp(cp) \
108 ( (0x0A <= cp && cp <= 0x0D) ||( cp > 0x0D && \
109 ( cp == 0x85 ||( cp > 0x85 && \
110 ( cp == 0x2028 ||( cp > 0x2028 && \
111 cp == 0x2029 ) ) ) ) ) )
114 HORIZWS: Horizontal Whitespace: \h \H
119 0x1680 # OGHAM SPACE MARK
120 0x180e # MONGOLIAN VOWEL SEPARATOR
125 0x2004 # THREE-PER-EM SPACE
126 0x2005 # FOUR-PER-EM SPACE
127 0x2006 # SIX-PER-EM SPACE
128 0x2007 # FIGURE SPACE
129 0x2008 # PUNCTUATION SPACE
132 0x202f # NARROW NO-BREAK SPACE
133 0x205f # MEDIUM MATHEMATICAL SPACE
134 0x3000 # IDEOGRAPHIC SPACE
136 /*** GENERATED CODE ***/
137 #define is_HORIZWS(s,is_utf8) \
138 ( ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 ) ? 1 : \
140 ( ( ((U8*)s)[0] == 0xC2 ) ? \
141 ( ( ((U8*)s)[1] == 0xA0 ) ? 2 : 0 ) : \
142 ( ( ((U8*)s)[0] == 0xE1 ) ? \
143 ( ( ((U8*)s)[1] == 0xA0 ) ? \
144 ( ( ((U8*)s)[2] == 0x8E ) ? 3 : 0 ) : \
145 ((( ((U8*)s)[1] == 0x9A ) && ( ((U8*)s)[2] == 0x80 )) ? 3 : 0) ) : \
146 ( ( ((U8*)s)[0] == 0xE2 ) ? \
147 ( ( ((U8*)s)[1] == 0x81 ) ? \
148 ( ( ((U8*)s)[2] == 0x9F ) ? 3 : 0 ) : \
149 ((( ((U8*)s)[1] == 0x80 ) && ( (0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A) || ((U8*)s)[2] == 0xAF )) ? 3 : 0) ) :\
150 (((( ((U8*)s)[0] == 0xE3 ) && ( ((U8*)s)[1] == 0x80 )) && ( ((U8*)s)[2] == 0x80 )) ? 3 : 0) ) ) ) :\
151 ( ((U8*)s)[0] == 0xA0 ) ) )
153 /*** GENERATED CODE ***/
154 #define is_HORIZWS_safe(s,e,is_utf8) \
155 ( ( (e) - (s) > 2 ) ? \
156 ( ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 ) ? 1 : \
158 ( ( ((U8*)s)[0] == 0xC2 ) ? \
159 ( ( ((U8*)s)[1] == 0xA0 ) ? 2 : 0 ) : \
160 ( ( ((U8*)s)[0] == 0xE1 ) ? \
161 ( ( ((U8*)s)[1] == 0xA0 ) ? \
162 ( ( ((U8*)s)[2] == 0x8E ) ? 3 : 0 ) : \
163 ((( ((U8*)s)[1] == 0x9A ) && ( ((U8*)s)[2] == 0x80 )) ? 3 : 0) ) : \
164 ( ( ((U8*)s)[0] == 0xE2 ) ? \
165 ( ( ((U8*)s)[1] == 0x81 ) ? \
166 ( ( ((U8*)s)[2] == 0x9F ) ? 3 : 0 ) : \
167 ((( ((U8*)s)[1] == 0x80 ) && ( (0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A) || ((U8*)s)[2] == 0xAF )) ? 3 : 0) ) :\
168 (((( ((U8*)s)[0] == 0xE3 ) && ( ((U8*)s)[1] == 0x80 )) && ( ((U8*)s)[2] == 0x80 )) ? 3 : 0) ) ) ) :\
169 ( ((U8*)s)[0] == 0xA0 ) ) ) : \
170 ( ( (e) - (s) > 1 ) ? \
171 ( ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 ) ? 1 : \
173 ((( ((U8*)s)[0] == 0xC2 ) && ( ((U8*)s)[1] == 0xA0 )) ? 2 : 0) : \
174 ( ((U8*)s)[0] == 0xA0 ) ) ) : \
175 ( ( (e) - (s) > 0 ) ? \
176 ( ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 ) ? 1 : \
178 ( ((U8*)s)[0] == 0xA0 ) : 0 ) ) : 0 ) ) )
180 /*** GENERATED CODE ***/
181 #define is_HORIZWS_utf8(s) \
182 ( ( ((U8*)s)[0] == 0xC2 ) ? \
183 ( ( ((U8*)s)[1] == 0xA0 ) ? 2 : 0 ) : \
184 ( ( ((U8*)s)[0] == 0xE1 ) ? \
185 ( ( ((U8*)s)[1] == 0xA0 ) ? \
186 ( ( ((U8*)s)[2] == 0x8E ) ? 3 : 0 ) : \
187 ((( ((U8*)s)[1] == 0x9A ) && ( ((U8*)s)[2] == 0x80 )) ? 3 : 0) ) : \
188 ( ( ((U8*)s)[0] == 0xE2 ) ? \
189 ( ( ((U8*)s)[1] == 0x81 ) ? \
190 ( ( ((U8*)s)[2] == 0x9F ) ? 3 : 0 ) : \
191 ((( ((U8*)s)[1] == 0x80 ) && ( (0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A) || ((U8*)s)[2] == 0xAF )) ? 3 : 0) ) :\
192 ( ( ((U8*)s)[0] == 0xE3 ) ? \
193 ((( ((U8*)s)[1] == 0x80 ) && ( ((U8*)s)[2] == 0x80 )) ? 3 : 0) : \
194 ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 ) ) ) ) )
196 /*** GENERATED CODE ***/
197 #define is_HORIZWS_utf8_safe(s,e) \
198 ( ( (e) - (s) > 2 ) ? \
199 ( ( ((U8*)s)[0] == 0xC2 ) ? \
200 ( ( ((U8*)s)[1] == 0xA0 ) ? 2 : 0 ) : \
201 ( ( ((U8*)s)[0] == 0xE1 ) ? \
202 ( ( ((U8*)s)[1] == 0xA0 ) ? \
203 ( ( ((U8*)s)[2] == 0x8E ) ? 3 : 0 ) : \
204 ((( ((U8*)s)[1] == 0x9A ) && ( ((U8*)s)[2] == 0x80 )) ? 3 : 0) ) : \
205 ( ( ((U8*)s)[0] == 0xE2 ) ? \
206 ( ( ((U8*)s)[1] == 0x81 ) ? \
207 ( ( ((U8*)s)[2] == 0x9F ) ? 3 : 0 ) : \
208 ((( ((U8*)s)[1] == 0x80 ) && ( (0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A) || ((U8*)s)[2] == 0xAF )) ? 3 : 0) ) :\
209 ( ( ((U8*)s)[0] == 0xE3 ) ? \
210 ((( ((U8*)s)[1] == 0x80 ) && ( ((U8*)s)[2] == 0x80 )) ? 3 : 0) : \
211 ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 ) ) ) ) ) : \
212 ( ( (e) - (s) > 1 ) ? \
213 ( ( ((U8*)s)[0] == 0xC2 ) ? \
214 ( ( ((U8*)s)[1] == 0xA0 ) ? 2 : 0 ) : \
215 ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 ) ) : \
216 ( ( (e) - (s) > 0 ) ? \
217 ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 ) : 0 ) ) )
219 /*** GENERATED CODE ***/
220 #define is_HORIZWS_latin1(s) \
221 ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 || ((U8*)s)[0] == 0xA0 )
223 /*** GENERATED CODE ***/
224 #define is_HORIZWS_latin1_safe(s,e) \
225 ( ( (e) - (s) > 0 ) ? \
226 ( ((U8*)s)[0] == 0x09 || ((U8*)s)[0] == 0x20 || ((U8*)s)[0] == 0xA0 ) : 0 )
228 /*** GENERATED CODE ***/
229 #define is_HORIZWS_cp(cp) \
230 ( cp == 0x09 ||( cp > 0x09 && \
231 ( cp == 0x20 ||( cp > 0x20 && \
232 ( cp == 0xA0 ||( cp > 0xA0 && \
233 ( cp == 0x1680 ||( cp > 0x1680 && \
234 ( cp == 0x180E ||( cp > 0x180E && \
235 ( (0x2000 <= cp && cp <= 0x200A) ||( cp > 0x200A && \
236 ( cp == 0x202F ||( cp > 0x202F && \
237 ( cp == 0x205F ||( cp > 0x205F && \
238 cp == 0x3000 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
241 VERTWS: Vertical Whitespace: \v \V
248 0x2028 # LINE SEPARATOR
249 0x2029 # PARAGRAPH SEPARATOR
251 /*** GENERATED CODE ***/
252 #define is_VERTWS(s,is_utf8) \
253 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) ? 1 : \
255 ( ( ((U8*)s)[0] == 0xC2 ) ? \
256 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
257 (((( ((U8*)s)[0] == 0xE2 ) && ( ((U8*)s)[1] == 0x80 )) && ( ((U8*)s)[2] == 0xA8 || ((U8*)s)[2] == 0xA9 )) ? 3 : 0) ) :\
258 ( ((U8*)s)[0] == 0x85 ) ) )
260 /*** GENERATED CODE ***/
261 #define is_VERTWS_safe(s,e,is_utf8) \
262 ( ( (e) - (s) > 2 ) ? \
263 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) ? 1 : \
265 ( ( ((U8*)s)[0] == 0xC2 ) ? \
266 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
267 (((( ((U8*)s)[0] == 0xE2 ) && ( ((U8*)s)[1] == 0x80 )) && ( ((U8*)s)[2] == 0xA8 || ((U8*)s)[2] == 0xA9 )) ? 3 : 0) ) :\
268 ( ((U8*)s)[0] == 0x85 ) ) ) : \
269 ( ( (e) - (s) > 1 ) ? \
270 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) ? 1 : \
272 ((( ((U8*)s)[0] == 0xC2 ) && ( ((U8*)s)[1] == 0x85 )) ? 2 : 0) : \
273 ( ((U8*)s)[0] == 0x85 ) ) ) : \
274 ( ( (e) - (s) > 0 ) ? \
275 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) ? 1 : \
277 ( ((U8*)s)[0] == 0x85 ) : 0 ) ) : 0 ) ) )
279 /*** GENERATED CODE ***/
280 #define is_VERTWS_utf8(s) \
281 ( ( ((U8*)s)[0] == 0xC2 ) ? \
282 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
283 ( ( ((U8*)s)[0] == 0xE2 ) ? \
284 ((( ((U8*)s)[1] == 0x80 ) && ( ((U8*)s)[2] == 0xA8 || ((U8*)s)[2] == 0xA9 )) ? 3 : 0) :\
285 (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) ) )
287 /*** GENERATED CODE ***/
288 #define is_VERTWS_utf8_safe(s,e) \
289 ( ( (e) - (s) > 2 ) ? \
290 ( ( ((U8*)s)[0] == 0xC2 ) ? \
291 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
292 ( ( ((U8*)s)[0] == 0xE2 ) ? \
293 ((( ((U8*)s)[1] == 0x80 ) && ( ((U8*)s)[2] == 0xA8 || ((U8*)s)[2] == 0xA9 )) ? 3 : 0) :\
294 (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) ) ) : \
295 ( ( (e) - (s) > 1 ) ? \
296 ( ( ((U8*)s)[0] == 0xC2 ) ? \
297 ( ( ((U8*)s)[1] == 0x85 ) ? 2 : 0 ) : \
298 (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) ) : \
299 ( ( (e) - (s) > 0 ) ? \
300 (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) : 0 ) ) )
302 /*** GENERATED CODE ***/
303 #define is_VERTWS_latin1(s) \
304 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) || ((U8*)s)[0] == 0x85 )
306 /*** GENERATED CODE ***/
307 #define is_VERTWS_latin1_safe(s,e) \
308 ( ( (e) - (s) > 0 ) ? \
309 ( (0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D) || ((U8*)s)[0] == 0x85 ) : 0 )
311 /*** GENERATED CODE ***/
312 #define is_VERTWS_cp(cp) \
313 ( (0x0A <= cp && cp <= 0x0D) ||( cp > 0x0D && \
314 ( cp == 0x85 ||( cp > 0x85 && \
315 ( cp == 0x2028 ||( cp > 0x2028 && \
316 cp == 0x2029 ) ) ) ) ) )
319 TRICKYFOLD: Problematic fold case letters.
321 0x00DF # LATIN SMALL LETTER SHARP S
322 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
323 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
325 /*** GENERATED CODE ***/
326 #define is_TRICKYFOLD(s,is_utf8) \
328 ( ( ((U8*)s)[0] == 0xC3 ) ? \
329 ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \
330 ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) :\
331 ( ((U8*)s)[0] == 0xDF ) )
333 /*** GENERATED CODE ***/
334 #define is_TRICKYFOLD_safe(s,e,is_utf8) \
335 ( ( (e) - (s) > 1 ) ? \
337 ( ( ((U8*)s)[0] == 0xC3 ) ? \
338 ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \
339 ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) :\
340 ( ((U8*)s)[0] == 0xDF ) ) : \
341 ((( (e) - (s) > 0 ) && (!is_utf8)) ? ( ((U8*)s)[0] == 0xDF ) : 0) )
343 /*** GENERATED CODE ***/
344 #define is_TRICKYFOLD_utf8(s) \
345 ( ( ((U8*)s)[0] == 0xC3 ) ? \
346 ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \
347 ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) )
349 /*** GENERATED CODE ***/
350 #define is_TRICKYFOLD_utf8_safe(s,e) \
351 ( ( (e) - (s) > 1 ) ? \
352 ( ( ((U8*)s)[0] == 0xC3 ) ? \
353 ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \
354 ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) : 0 )
356 /*** GENERATED CODE ***/
357 #define is_TRICKYFOLD_latin1(s) \
358 ( ((U8*)s)[0] == 0xDF )
360 /*** GENERATED CODE ***/
361 #define is_TRICKYFOLD_latin1_safe(s,e) \
362 ( ( (e) - (s) > 0 ) ? \
363 ( ((U8*)s)[0] == 0xDF ) : 0 )
365 /*** GENERATED CODE ***/
366 #define is_TRICKYFOLD_cp(cp) \
367 ( cp == 0xDF ||( cp > 0xDF && \
368 ( cp == 0x390 ||( cp > 0x390 && \