Re: Analysis of problems with mixed encoding case insensitive matches in regex engine.
[p5sagit/p5-mst-13.2.git] / regcharclass.h
index 40d21bf..8425693 100644 (file)
@@ -9,7 +9,7 @@
  *
  * !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!!
  * This file is built by Porting/regcharclass.pl.
- * (Generated at: Mon Apr 23 15:30:51 2007 GMT)
+ * (Generated at: Tue Apr 24 12:19:13 2007 GMT)
  * Any changes made here will be lost!
  */
 
 
 /*** GENERATED CODE ***/
 #define is_LNBREAK_cp(cp)                                                   \
-( (0x0A <= cp && cp <= 0x0D) || ( cp > 13 &&                                \
-( cp == 0x85 || ( cp > 133 &&                                               \
-( cp == 0x2028 || ( cp > 8232 &&                                            \
+( (0x0A <= cp && cp <= 0x0D) ||( cp > 0x0D &&                               \
+( cp == 0x85 ||( cp > 0x85 &&                                               \
+( cp == 0x2028 ||( cp > 0x2028 &&                                           \
 cp == 0x2029 ) ) ) ) ) )
 
 /*
@@ -227,14 +227,14 @@ cp == 0x2029 ) ) ) ) ) )
 
 /*** GENERATED CODE ***/
 #define is_HORIZWS_cp(cp)                                                   \
-( cp == 0x09 || ( cp > 9 &&                                                 \
-( cp == 0x20 || ( cp > 32 &&                                                \
-( cp == 0xA0 || ( cp > 160 &&                                               \
-( cp == 0x1680 || ( cp > 5760 &&                                            \
-( cp == 0x180E || ( cp > 6158 &&                                            \
-( (0x2000 <= cp && cp <= 0x200A) || ( cp > 8202 &&                          \
-( cp == 0x202F || ( cp > 8239 &&                                            \
-( cp == 0x205F || ( cp > 8287 &&                                            \
+( cp == 0x09 ||( cp > 0x09 &&                                               \
+( cp == 0x20 ||( cp > 0x20 &&                                               \
+( cp == 0xA0 ||( cp > 0xA0 &&                                               \
+( cp == 0x1680 ||( cp > 0x1680 &&                                           \
+( cp == 0x180E ||( cp > 0x180E &&                                           \
+( (0x2000 <= cp && cp <= 0x200A) ||( cp > 0x200A &&                         \
+( cp == 0x202F ||( cp > 0x202F &&                                           \
+( cp == 0x205F ||( cp > 0x205F &&                                           \
 cp == 0x3000 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
 
 /*
@@ -310,9 +310,62 @@ cp == 0x3000 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
 
 /*** GENERATED CODE ***/
 #define is_VERTWS_cp(cp)                                                    \
-( (0x0A <= cp && cp <= 0x0D) || ( cp > 13 &&                                \
-( cp == 0x85 || ( cp > 133 &&                                               \
-( cp == 0x2028 || ( cp > 8232 &&                                            \
+( (0x0A <= cp && cp <= 0x0D) ||( cp > 0x0D &&                               \
+( cp == 0x85 ||( cp > 0x85 &&                                               \
+( cp == 0x2028 ||( cp > 0x2028 &&                                           \
 cp == 0x2029 ) ) ) ) ) )
 
+/*
+       TRICKYFOLD: Problematic fold case letters.
+
+       0x00DF  # LATIN SMALL LETTER SHARP S
+       0x0390  # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+       0x03B0  # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+*/
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD(s,is_utf8)                                            \
+( (is_utf8) ?                                                               \
+  ( ( ((U8*)s)[0] == 0xC3 ) ?                                               \
+    ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) :                                   \
+  ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) :\
+  ( ((U8*)s)[0] == 0xDF ) )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_safe(s,e,is_utf8)                                     \
+( ( (e) - (s) > 1 ) ?                                                       \
+( (is_utf8) ?                                                               \
+  ( ( ((U8*)s)[0] == 0xC3 ) ?                                               \
+    ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) :                                   \
+  ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) :\
+  ( ((U8*)s)[0] == 0xDF ) ) :                                               \
+((( (e) - (s) > 0 ) && (!is_utf8)) ? ( ((U8*)s)[0] == 0xDF ) : 0) )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_utf8(s)                                               \
+( ( ((U8*)s)[0] == 0xC3 ) ?                                                 \
+    ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) :                                   \
+  ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_utf8_safe(s,e)                                        \
+( ( (e) - (s) > 1 ) ?                                                       \
+  ( ( ((U8*)s)[0] == 0xC3 ) ?                                               \
+    ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) :                                   \
+  ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) : 0 )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_latin1(s)                                             \
+( ((U8*)s)[0] == 0xDF )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_latin1_safe(s,e)                                      \
+( ( (e) - (s) > 0 ) ?                                                       \
+  ( ((U8*)s)[0] == 0xDF ) : 0 )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_cp(cp)                                                \
+( cp == 0xDF ||( cp > 0xDF &&                                               \
+( cp == 0x390 ||( cp > 0x390 &&                                             \
+cp == 0x3B0 ) ) ) )
+
 /* ex: set ro: */