1 /* -*- buffer-read-only: t -*-
5 * Copyright (C) 2007, by Larry Wall and others
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
10 * !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
11 * This file is built by Porting/regcharclass.pl.
13 * Any changes made here will be lost!
18 LNBREAK: Line Break: \R
20 "\x0D\x0A" # CRLF - Network (Windows) line ending
22 0x0B # VT | VERTICAL TAB
24 0x0D # CR | CARRIAGE RETURN
25 0x85 # NEL | NEXT LINE
26 0x2028 # LINE SEPARATOR
27 0x2029 # PARAGRAPH SEPARATOR
29 /*** GENERATED CODE ***/
30 #define is_LNBREAK(s,is_utf8) \
31 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
32 : ( 0x0D == ((U8*)s)[0] ) ? \
33 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
35 ( ( 0xC2 == ((U8*)s)[0] ) ? \
36 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
37 : ( 0xE2 == ((U8*)s)[0] ) ? \
38 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
40 : ( 0x85 == ((U8*)s)[0] ) )
42 /*** GENERATED CODE ***/
43 #define is_LNBREAK_safe(s,e,is_utf8) \
45 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
46 : ( 0x0D == ((U8*)s)[0] ) ? \
47 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
49 ( ( 0xC2 == ((U8*)s)[0] ) ? \
50 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
51 : ( 0xE2 == ((U8*)s)[0] ) ? \
52 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
54 : ( 0x85 == ((U8*)s)[0] ) ) \
56 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
57 : ( 0x0D == ((U8*)s)[0] ) ? \
58 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
60 ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0x85 == ((U8*)s)[1] ) ) ? 2 : 0 ) \
61 : ( 0x85 == ((U8*)s)[0] ) ) \
63 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
64 : ( !( is_utf8 ) ) ? \
65 ( 0x85 == ((U8*)s)[0] ) \
69 /*** GENERATED CODE ***/
70 #define is_LNBREAK_utf8(s) \
71 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
72 : ( 0x0D == ((U8*)s)[0] ) ? \
73 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
74 : ( 0xC2 == ((U8*)s)[0] ) ? \
75 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
76 : ( 0xE2 == ((U8*)s)[0] ) ? \
77 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
80 /*** GENERATED CODE ***/
81 #define is_LNBREAK_utf8_safe(s,e) \
83 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
84 : ( 0x0D == ((U8*)s)[0] ) ? \
85 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
86 : ( 0xC2 == ((U8*)s)[0] ) ? \
87 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
88 : ( 0xE2 == ((U8*)s)[0] ) ? \
89 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
92 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
93 : ( 0x0D == ((U8*)s)[0] ) ? \
94 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
95 : ( 0xC2 == ((U8*)s)[0] ) ? \
96 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
99 ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) \
102 /*** GENERATED CODE ***/
103 #define is_LNBREAK_latin1(s) \
104 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
105 : ( 0x0D == ((U8*)s)[0] ) ? \
106 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
107 : ( 0x85 == ((U8*)s)[0] ) )
109 /*** GENERATED CODE ***/
110 #define is_LNBREAK_latin1_safe(s,e) \
112 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
113 : ( 0x0D == ((U8*)s)[0] ) ? \
114 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
115 : ( 0x85 == ((U8*)s)[0] ) ) \
117 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )\
121 HORIZWS: Horizontal Whitespace: \h \H
126 0x1680 # OGHAM SPACE MARK
127 0x180e # MONGOLIAN VOWEL SEPARATOR
132 0x2004 # THREE-PER-EM SPACE
133 0x2005 # FOUR-PER-EM SPACE
134 0x2006 # SIX-PER-EM SPACE
135 0x2007 # FIGURE SPACE
136 0x2008 # PUNCTUATION SPACE
139 0x202f # NARROW NO-BREAK SPACE
140 0x205f # MEDIUM MATHEMATICAL SPACE
141 0x3000 # IDEOGRAPHIC SPACE
143 /*** GENERATED CODE ***/
144 #define is_HORIZWS(s,is_utf8) \
145 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
147 ( ( 0xC2 == ((U8*)s)[0] ) ? \
148 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
149 : ( 0xE1 == ((U8*)s)[0] ) ? \
150 ( ( 0x9A == ((U8*)s)[1] ) ? \
151 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
152 : ( 0xA0 == ((U8*)s)[1] ) ? \
153 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
155 : ( 0xE2 == ((U8*)s)[0] ) ? \
156 ( ( 0x80 == ((U8*)s)[1] ) ? \
157 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
158 : ( 0x81 == ((U8*)s)[1] ) ? \
159 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
161 : ( 0xE3 == ((U8*)s)[0] ) ? \
162 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
164 : ( 0xA0 == ((U8*)s)[0] ) )
166 /*** GENERATED CODE ***/
167 #define is_HORIZWS_safe(s,e,is_utf8) \
169 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
171 ( ( 0xC2 == ((U8*)s)[0] ) ? \
172 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
173 : ( 0xE1 == ((U8*)s)[0] ) ? \
174 ( ( 0x9A == ((U8*)s)[1] ) ? \
175 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
176 : ( 0xA0 == ((U8*)s)[1] ) ? \
177 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
179 : ( 0xE2 == ((U8*)s)[0] ) ? \
180 ( ( 0x80 == ((U8*)s)[1] ) ? \
181 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
182 : ( 0x81 == ((U8*)s)[1] ) ? \
183 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
185 : ( 0xE3 == ((U8*)s)[0] ) ? \
186 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\
188 : ( 0xA0 == ((U8*)s)[0] ) ) \
190 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
192 ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0xA0 == ((U8*)s)[1] ) ) ? 2 : 0 ) \
193 : ( 0xA0 == ((U8*)s)[0] ) ) \
195 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
196 : ( !( is_utf8 ) ) ? \
197 ( 0xA0 == ((U8*)s)[0] ) \
201 /*** GENERATED CODE ***/
202 #define is_HORIZWS_utf8(s) \
203 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
204 : ( 0xC2 == ((U8*)s)[0] ) ? \
205 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
206 : ( 0xE1 == ((U8*)s)[0] ) ? \
207 ( ( 0x9A == ((U8*)s)[1] ) ? \
208 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
209 : ( 0xA0 == ((U8*)s)[1] ) ? \
210 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
212 : ( 0xE2 == ((U8*)s)[0] ) ? \
213 ( ( 0x80 == ((U8*)s)[1] ) ? \
214 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
215 : ( 0x81 == ((U8*)s)[1] ) ? \
216 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
218 : ( 0xE3 == ((U8*)s)[0] ) ? \
219 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
222 /*** GENERATED CODE ***/
223 #define is_HORIZWS_utf8_safe(s,e) \
225 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
226 : ( 0xC2 == ((U8*)s)[0] ) ? \
227 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
228 : ( 0xE1 == ((U8*)s)[0] ) ? \
229 ( ( 0x9A == ((U8*)s)[1] ) ? \
230 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
231 : ( 0xA0 == ((U8*)s)[1] ) ? \
232 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
234 : ( 0xE2 == ((U8*)s)[0] ) ? \
235 ( ( 0x80 == ((U8*)s)[1] ) ? \
236 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
237 : ( 0x81 == ((U8*)s)[1] ) ? \
238 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
240 : ( 0xE3 == ((U8*)s)[0] ) ? \
241 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
244 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
245 : ( 0xC2 == ((U8*)s)[0] ) ? \
246 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
249 ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) \
252 /*** GENERATED CODE ***/
253 #define is_HORIZWS_latin1(s) \
254 ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] || 0xA0 == ((U8*)s)[0] )
256 /*** GENERATED CODE ***/
257 #define is_HORIZWS_latin1_safe(s,e) \
259 ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] || 0xA0 == ((U8*)s)[0] ) \
262 /*** GENERATED CODE ***/
263 #define is_HORIZWS_cp(cp) \
264 ( 0x09 == cp || ( 0x09 < cp && \
265 ( 0x20 == cp || ( 0x20 < cp && \
266 ( 0xA0 == cp || ( 0xA0 < cp && \
267 ( 0x1680 == cp || ( 0x1680 < cp && \
268 ( 0x180E == cp || ( 0x180E < cp && \
269 ( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp && \
270 ( 0x202F == cp || ( 0x202F < cp && \
271 ( 0x205F == cp || ( 0x205F < cp && \
272 0x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
275 VERTWS: Vertical Whitespace: \v \V
282 0x2028 # LINE SEPARATOR
283 0x2029 # PARAGRAPH SEPARATOR
285 /*** GENERATED CODE ***/
286 #define is_VERTWS(s,is_utf8) \
287 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
289 ( ( 0xC2 == ((U8*)s)[0] ) ? \
290 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
291 : ( 0xE2 == ((U8*)s)[0] ) ? \
292 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
294 : ( 0x85 == ((U8*)s)[0] ) )
296 /*** GENERATED CODE ***/
297 #define is_VERTWS_safe(s,e,is_utf8) \
299 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
301 ( ( 0xC2 == ((U8*)s)[0] ) ? \
302 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
303 : ( 0xE2 == ((U8*)s)[0] ) ? \
304 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
306 : ( 0x85 == ((U8*)s)[0] ) ) \
308 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
310 ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0x85 == ((U8*)s)[1] ) ) ? 2 : 0 ) \
311 : ( 0x85 == ((U8*)s)[0] ) ) \
313 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
314 : ( !( is_utf8 ) ) ? \
315 ( 0x85 == ((U8*)s)[0] ) \
319 /*** GENERATED CODE ***/
320 #define is_VERTWS_utf8(s) \
321 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
322 : ( 0xC2 == ((U8*)s)[0] ) ? \
323 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
324 : ( 0xE2 == ((U8*)s)[0] ) ? \
325 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
328 /*** GENERATED CODE ***/
329 #define is_VERTWS_utf8_safe(s,e) \
331 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
332 : ( 0xC2 == ((U8*)s)[0] ) ? \
333 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
334 : ( 0xE2 == ((U8*)s)[0] ) ? \
335 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
338 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
339 : ( 0xC2 == ((U8*)s)[0] ) ? \
340 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
343 ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) \
346 /*** GENERATED CODE ***/
347 #define is_VERTWS_latin1(s) \
348 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )
350 /*** GENERATED CODE ***/
351 #define is_VERTWS_latin1_safe(s,e) \
353 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )\
356 /*** GENERATED CODE ***/
357 #define is_VERTWS_cp(cp) \
358 ( ( 0x0A <= cp && cp <= 0x0D ) || ( 0x0D < cp && \
359 ( 0x85 == cp || ( 0x85 < cp && \
360 ( 0x2028 == cp || ( 0x2028 < cp && \
361 0x2029 == cp ) ) ) ) ) )
364 TRICKYFOLD: Problematic fold case letters.
366 0x00DF # LATIN1 SMALL LETTER SHARP S
367 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
368 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
370 /*** GENERATED CODE ***/
371 #define is_TRICKYFOLD(s,is_utf8) \
373 ( ( 0xC3 == ((U8*)s)[0] ) ? \
374 ( ( 0x9F == ((U8*)s)[1] ) ? 2 : 0 ) \
375 : ( 0xCE == ((U8*)s)[0] ) ? \
376 ( ( 0x90 == ((U8*)s)[1] || 0xB0 == ((U8*)s)[1] ) ? 2 : 0 ) \
378 : ( 0xDF == ((U8*)s)[0] ) )
380 /*** GENERATED CODE ***/
381 #define is_TRICKYFOLD_safe(s,e,is_utf8) \
384 ( ( 0xC3 == ((U8*)s)[0] ) ? \
385 ( ( 0x9F == ((U8*)s)[1] ) ? 2 : 0 ) \
386 : ( 0xCE == ((U8*)s)[0] ) ? \
387 ( ( 0x90 == ((U8*)s)[1] || 0xB0 == ((U8*)s)[1] ) ? 2 : 0 ) \
389 : ( 0xDF == ((U8*)s)[0] ) ) \
391 ( ( !( is_utf8 ) ) ? \
392 ( 0xDF == ((U8*)s)[0] ) \
396 /*** GENERATED CODE ***/
397 #define is_TRICKYFOLD_cp(cp) \
398 ( 0xDF == cp || ( 0xDF < cp && \
399 ( 0x390 == cp || ( 0x390 < cp && \
402 /*** GENERATED CODE ***/
403 #define what_TRICKYFOLD(s,is_utf8) \
405 ( ( 0xC3 == ((U8*)s)[0] ) ? \
406 ( ( 0x9F == ((U8*)s)[1] ) ? 0xDF : 0 ) \
407 : ( 0xCE == ((U8*)s)[0] ) ? \
408 ( ( 0x90 == ((U8*)s)[1] ) ? 0x390 \
409 : ( 0xB0 == ((U8*)s)[1] ) ? 0x3B0 : 0 ) \
411 : ( 0xDF == ((U8*)s)[0] ) ? 0xDF : 0 )
413 /*** GENERATED CODE ***/
414 #define what_TRICKYFOLD_safe(s,e,is_utf8) \
417 ( ( 0xC3 == ((U8*)s)[0] ) ? \
418 ( ( 0x9F == ((U8*)s)[1] ) ? 0xDF : 0 ) \
419 : ( 0xCE == ((U8*)s)[0] ) ? \
420 ( ( 0x90 == ((U8*)s)[1] ) ? 0x390 \
421 : ( 0xB0 == ((U8*)s)[1] ) ? 0x3B0 : 0 ) \
423 : ( 0xDF == ((U8*)s)[0] ) ? 0xDF : 0 ) \
425 ( ( ( !( is_utf8 ) ) && ( 0xDF == ((U8*)s)[0] ) ) ? 0xDF : 0 ) \
428 /*** GENERATED CODE ***/
429 #define what_len_TRICKYFOLD(s,is_utf8,len) \
431 ( ( 0xC3 == ((U8*)s)[0] ) ? \
432 ( ( 0x9F == ((U8*)s)[1] ) ? len=2, 0xDF : 0 ) \
433 : ( 0xCE == ((U8*)s)[0] ) ? \
434 ( ( 0x90 == ((U8*)s)[1] ) ? len=2, 0x390 \
435 : ( 0xB0 == ((U8*)s)[1] ) ? len=2, 0x3B0 : 0 ) \
437 : ( 0xDF == ((U8*)s)[0] ) ? len=1, 0xDF : 0 )
439 /*** GENERATED CODE ***/
440 #define what_len_TRICKYFOLD_safe(s,e,is_utf8,len) \
443 ( ( 0xC3 == ((U8*)s)[0] ) ? \
444 ( ( 0x9F == ((U8*)s)[1] ) ? len=2, 0xDF : 0 ) \
445 : ( 0xCE == ((U8*)s)[0] ) ? \
446 ( ( 0x90 == ((U8*)s)[1] ) ? len=2, 0x390 \
447 : ( 0xB0 == ((U8*)s)[1] ) ? len=2, 0x3B0 : 0 ) \
449 : ( 0xDF == ((U8*)s)[0] ) ? len=1, 0xDF : 0 ) \
451 ( ( ( !( is_utf8 ) ) && ( 0xDF == ((U8*)s)[0] ) ) ? len=1, 0xDF : 0 ) \