1 /* -*- buffer-read-only: t -*-
5 * Copyright (C) 2007, by Larry Wall and others
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
10 * !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
11 * This file is built by Porting/regcharclass.pl.
12 * (Generated at: Mon Apr 23 12:29:30 2007 GMT)
13 * Any changes made here will be lost!
17 LNBREAK: Line Break: \R
19 "\x0D\x0A" # CRLF - Network (Windows) line ending
21 0x0B # VT | VERTICAL TAB
23 0x0D # CR | CARRIAGE RETURN
24 0x85 # NEL | NEXT LINE
25 0x2028 # LINE SEPARATOR
26 0x2029 # PARAGRAPH SEPARATOR
28 /*** GENERATED CODE ***/
29 #define is_LNBREAK(s,is_utf8) \
30 ( ( ((U8*)s)[0]==13 ) ? \
31 ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \
32 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ? 1 : \
34 ( ( ((U8*)s)[0]==194 ) ? \
35 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
36 (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\
37 ( ((U8*)s)[0]==133 ) ) ) )
39 /*** GENERATED CODE ***/
40 #define is_LNBREAK_safe(s,e,is_utf8) \
41 ( ( (e) - (s) > 2 ) ? \
42 ( ( ((U8*)s)[0]==13 ) ? \
43 ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \
44 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ? 1 : \
46 ( ( ((U8*)s)[0]==194 ) ? \
47 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
48 (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\
49 ( ((U8*)s)[0]==133 ) ) ) ) : \
50 ( ( (e) - (s) > 1 ) ? \
51 ( ( ((U8*)s)[0]==13 ) ? \
52 ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \
53 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ? 1 : \
55 ((( ((U8*)s)[0]==194 ) && ( ((U8*)s)[1]==133 )) ? 2 : 0) : \
56 ( ((U8*)s)[0]==133 ) ) ) ) : \
57 ( ( (e) - (s) > 0 ) ? \
58 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \
60 ( ((U8*)s)[0]==133 ) : 0 ) ) : 0 ) ) )
62 /*** GENERATED CODE ***/
63 #define is_LNBREAK_utf8(s) \
64 ( ( ((U8*)s)[0]==194 ) ? \
65 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
66 ( ( ((U8*)s)[0]==226 ) ? \
67 ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\
68 ( ( ((U8*)s)[0]==13 ) ? \
69 ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \
70 (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ) ) )
72 /*** GENERATED CODE ***/
73 #define is_LNBREAK_utf8_safe(s,e) \
74 ( ( (e) - (s) > 2 ) ? \
75 ( ( ((U8*)s)[0]==194 ) ? \
76 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
77 ( ( ((U8*)s)[0]==226 ) ? \
78 ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\
79 ( ( ((U8*)s)[0]==13 ) ? \
80 ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \
81 (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ) ) ) : \
82 ( ( (e) - (s) > 1 ) ? \
83 ( ( ((U8*)s)[0]==194 ) ? \
84 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
85 ( ( ((U8*)s)[0]==13 ) ? \
86 ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \
87 (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ) ) : \
88 ( ( (e) - (s) > 0 ) ? \
89 (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) : 0 ) ) )
91 /*** GENERATED CODE ***/
92 #define is_LNBREAK_latin1(s) \
93 ( ( ((U8*)s)[0]==13 ) ? \
94 ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \
95 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) || ((U8*)s)[0]==133 ) )
97 /*** GENERATED CODE ***/
98 #define is_LNBREAK_latin1_safe(s,e) \
99 ( ( (e) - (s) > 1 ) ? \
100 ( ( ((U8*)s)[0]==13 ) ? \
101 ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \
102 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) || ((U8*)s)[0]==133 ) ) : \
103 ( ( (e) - (s) > 0 ) ? \
104 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) || ((U8*)s)[0]==133 ) : 0 ) )
106 /*** GENERATED CODE ***/
107 #define is_LNBREAK_cp(cp) \
108 ( (10<=cp && cp<=13) || ( cp > 13 && \
109 ( cp==133 || ( cp > 133 && \
110 ( cp==8232 || ( cp > 8232 && \
114 HORIZWS: Horizontal Whitespace: \h \H
119 0x1680 # OGHAM SPACE MARK
120 0x180e # MONGOLIAN VOWEL SEPARATOR
125 0x2004 # THREE-PER-EM SPACE
126 0x2005 # FOUR-PER-EM SPACE
127 0x2006 # SIX-PER-EM SPACE
128 0x2007 # FIGURE SPACE
129 0x2008 # PUNCTUATION SPACE
132 0x202f # NARROW NO-BREAK SPACE
133 0x205f # MEDIUM MATHEMATICAL SPACE
134 0x3000 # IDEOGRAPHIC SPACE
136 /*** GENERATED CODE ***/
137 #define is_HORIZWS(s,is_utf8) \
138 ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 : \
140 ( ( ((U8*)s)[0]==194 ) ? \
141 ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \
142 ( ( ((U8*)s)[0]==225 ) ? \
143 ( ( ((U8*)s)[1]==160 ) ? \
144 ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) : \
145 ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) : \
146 ( ( ((U8*)s)[0]==226 ) ? \
147 ( ( ((U8*)s)[1]==129 ) ? \
148 ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) : \
149 ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\
150 (((( ((U8*)s)[0]==227 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) ) ) :\
151 ( ((U8*)s)[0]==160 ) ) )
153 /*** GENERATED CODE ***/
154 #define is_HORIZWS_safe(s,e,is_utf8) \
155 ( ( (e) - (s) > 2 ) ? \
156 ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 : \
158 ( ( ((U8*)s)[0]==194 ) ? \
159 ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \
160 ( ( ((U8*)s)[0]==225 ) ? \
161 ( ( ((U8*)s)[1]==160 ) ? \
162 ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) : \
163 ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) : \
164 ( ( ((U8*)s)[0]==226 ) ? \
165 ( ( ((U8*)s)[1]==129 ) ? \
166 ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) : \
167 ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\
168 (((( ((U8*)s)[0]==227 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) ) ) :\
169 ( ((U8*)s)[0]==160 ) ) ) : \
170 ( ( (e) - (s) > 1 ) ? \
171 ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 : \
173 ((( ((U8*)s)[0]==194 ) && ( ((U8*)s)[1]==160 )) ? 2 : 0) : \
174 ( ((U8*)s)[0]==160 ) ) ) : \
175 ( ( (e) - (s) > 0 ) ? \
176 ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 : \
178 ( ((U8*)s)[0]==160 ) : 0 ) ) : 0 ) ) )
180 /*** GENERATED CODE ***/
181 #define is_HORIZWS_utf8(s) \
182 ( ( ((U8*)s)[0]==194 ) ? \
183 ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \
184 ( ( ((U8*)s)[0]==225 ) ? \
185 ( ( ((U8*)s)[1]==160 ) ? \
186 ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) : \
187 ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) : \
188 ( ( ((U8*)s)[0]==226 ) ? \
189 ( ( ((U8*)s)[1]==129 ) ? \
190 ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) : \
191 ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\
192 ( ( ((U8*)s)[0]==227 ) ? \
193 ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) : \
194 ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ) ) ) )
196 /*** GENERATED CODE ***/
197 #define is_HORIZWS_utf8_safe(s,e) \
198 ( ( (e) - (s) > 2 ) ? \
199 ( ( ((U8*)s)[0]==194 ) ? \
200 ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \
201 ( ( ((U8*)s)[0]==225 ) ? \
202 ( ( ((U8*)s)[1]==160 ) ? \
203 ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) : \
204 ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) : \
205 ( ( ((U8*)s)[0]==226 ) ? \
206 ( ( ((U8*)s)[1]==129 ) ? \
207 ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) : \
208 ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\
209 ( ( ((U8*)s)[0]==227 ) ? \
210 ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) : \
211 ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ) ) ) ) : \
212 ( ( (e) - (s) > 1 ) ? \
213 ( ( ((U8*)s)[0]==194 ) ? \
214 ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \
215 ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ) : \
216 ( ( (e) - (s) > 0 ) ? \
217 ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) : 0 ) ) )
219 /*** GENERATED CODE ***/
220 #define is_HORIZWS_latin1(s) \
221 ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 || ((U8*)s)[0]==160 )
223 /*** GENERATED CODE ***/
224 #define is_HORIZWS_latin1_safe(s,e) \
225 ( ( (e) - (s) > 0 ) ? \
226 ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 || ((U8*)s)[0]==160 ) : 0 )
228 /*** GENERATED CODE ***/
229 #define is_HORIZWS_cp(cp) \
230 ( cp==9 || ( cp > 9 && \
231 ( cp==32 || ( cp > 32 && \
232 ( cp==160 || ( cp > 160 && \
233 ( cp==5760 || ( cp > 5760 && \
234 ( cp==6158 || ( cp > 6158 && \
235 ( (8192<=cp && cp<=8202) || ( cp > 8202 && \
236 ( cp==8239 || ( cp > 8239 && \
237 ( cp==8287 || ( cp > 8287 && \
238 cp==12288 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
241 VERTWS: Vertical Whitespace: \v \V
248 0x2028 # LINE SEPARATOR
249 0x2029 # PARAGRAPH SEPARATOR
251 /*** GENERATED CODE ***/
252 #define is_VERTWS(s,is_utf8) \
253 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \
255 ( ( ((U8*)s)[0]==194 ) ? \
256 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
257 (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\
258 ( ((U8*)s)[0]==133 ) ) )
260 /*** GENERATED CODE ***/
261 #define is_VERTWS_safe(s,e,is_utf8) \
262 ( ( (e) - (s) > 2 ) ? \
263 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \
265 ( ( ((U8*)s)[0]==194 ) ? \
266 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
267 (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\
268 ( ((U8*)s)[0]==133 ) ) ) : \
269 ( ( (e) - (s) > 1 ) ? \
270 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \
272 ((( ((U8*)s)[0]==194 ) && ( ((U8*)s)[1]==133 )) ? 2 : 0) : \
273 ( ((U8*)s)[0]==133 ) ) ) : \
274 ( ( (e) - (s) > 0 ) ? \
275 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \
277 ( ((U8*)s)[0]==133 ) : 0 ) ) : 0 ) ) )
279 /*** GENERATED CODE ***/
280 #define is_VERTWS_utf8(s) \
281 ( ( ((U8*)s)[0]==194 ) ? \
282 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
283 ( ( ((U8*)s)[0]==226 ) ? \
284 ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\
285 (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ) )
287 /*** GENERATED CODE ***/
288 #define is_VERTWS_utf8_safe(s,e) \
289 ( ( (e) - (s) > 2 ) ? \
290 ( ( ((U8*)s)[0]==194 ) ? \
291 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
292 ( ( ((U8*)s)[0]==226 ) ? \
293 ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\
294 (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ) ) : \
295 ( ( (e) - (s) > 1 ) ? \
296 ( ( ((U8*)s)[0]==194 ) ? \
297 ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \
298 (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ) : \
299 ( ( (e) - (s) > 0 ) ? \
300 (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) : 0 ) ) )
302 /*** GENERATED CODE ***/
303 #define is_VERTWS_latin1(s) \
304 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) || ((U8*)s)[0]==133 )
306 /*** GENERATED CODE ***/
307 #define is_VERTWS_latin1_safe(s,e) \
308 ( ( (e) - (s) > 0 ) ? \
309 ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) || ((U8*)s)[0]==133 ) : 0 )
311 /*** GENERATED CODE ***/
312 #define is_VERTWS_cp(cp) \
313 ( (10<=cp && cp<=13) || ( cp > 13 && \
314 ( cp==133 || ( cp > 133 && \
315 ( cp==8232 || ( cp > 8232 && \