POSIX [[:character class:]] support for standard, locale,
Jarkko Hietaniemi [Tue, 6 Jul 1999 21:47:04 +0000 (21:47 +0000)]
and utf8.  If both utf8 and locale are on, utf8 wins.
I don't fully understand why so many tables changed in
lib/unicode because of "make" -- maybe it was just overdue.

p4raw-id: //depot/cfgperl@3624

46 files changed:
MANIFEST
Todo-5.005
embed.h
embed.pl
embedvar.h
global.sym
handy.h
intrpvar.h
lib/unicode/Bidirectional.pl
lib/unicode/Block.pl
lib/unicode/Category.pl
lib/unicode/Is/ASCII.pl [new file with mode: 0644]
lib/unicode/Is/Alnum.pl
lib/unicode/Is/Alpha.pl
lib/unicode/Is/BidiL.pl
lib/unicode/Is/Cntrl.pl [new file with mode: 0644]
lib/unicode/Is/Digit.pl
lib/unicode/Is/Graph.pl [new file with mode: 0644]
lib/unicode/Is/L.pl
lib/unicode/Is/Lo.pl
lib/unicode/Is/Print.pl
lib/unicode/Is/Punct.pl [new file with mode: 0644]
lib/unicode/Is/Space.pl
lib/unicode/Is/Word.pl [new file with mode: 0644]
lib/unicode/Is/XDigit.pl [new file with mode: 0644]
lib/unicode/Is/Z.pl
lib/unicode/Is/Zs.pl
lib/unicode/Name.pl
lib/unicode/To/Digit.pl
lib/unicode/mktables.PL
objXSUB.h
pod/perldelta.pod
pod/perldiag.pod
pod/perlre.pod
proto.h
regcomp.c
regcomp.h
regcomp.sym
regexec.c
regnodes.h
t/op/pat.t
t/op/re_tests
t/op/regexp.t
t/pragma/utf8.t
t/pragma/warn/regcomp
utf8.c

index 11543e1..6aa3d5f 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -826,6 +826,7 @@ lib/unicode/Is/So.pl                                Unicode character database
 lib/unicode/Is/Space.pl                                Unicode character database
 lib/unicode/Is/Syllable.pl                     Unicode character database
 lib/unicode/Is/Upper.pl                                Unicode character database
+lib/unicode/Is/Word.pl                         Unicode character database
 lib/unicode/Is/Z.pl                            Unicode character database
 lib/unicode/Is/Zl.pl                           Unicode character database
 lib/unicode/Is/Zp.pl                           Unicode character database
index a8831b1..b700603 100644 (file)
@@ -36,8 +36,8 @@ Locales
     decimal separator (3,1415927 is Europeanese for an approximation of pi)
 
 Regexen
-   POSIX [:foo:] character classes
-       ([=bar=] and [.zap.] would nice too but there's no API for them)
+   POSIX [=bar=] and [.zap.] would nice too but there's no API for them
+   (=bar= could be done with Unicode, though)
    approximate matching
 
 Reliable Signals
diff --git a/embed.h b/embed.h
index 0871c6f..ba07096 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define io_close               Perl_io_close
 #define invert                 Perl_invert
 #define is_uni_alnum           Perl_is_uni_alnum
+#define is_uni_alnumc          Perl_is_uni_alnumc
 #define is_uni_idfirst         Perl_is_uni_idfirst
 #define is_uni_alpha           Perl_is_uni_alpha
+#define is_uni_ascii           Perl_is_uni_ascii
 #define is_uni_space           Perl_is_uni_space
+#define is_uni_cntrl           Perl_is_uni_cntrl
+#define is_uni_graph           Perl_is_uni_graph
 #define is_uni_digit           Perl_is_uni_digit
 #define is_uni_upper           Perl_is_uni_upper
 #define is_uni_lower           Perl_is_uni_lower
 #define is_uni_print           Perl_is_uni_print
+#define is_uni_punct           Perl_is_uni_punct
+#define is_uni_xdigit          Perl_is_uni_xdigit
 #define to_uni_upper           Perl_to_uni_upper
 #define to_uni_title           Perl_to_uni_title
 #define to_uni_lower           Perl_to_uni_lower
 #define is_uni_alnum_lc                Perl_is_uni_alnum_lc
+#define is_uni_alnumc_lc       Perl_is_uni_alnumc_lc
 #define is_uni_idfirst_lc      Perl_is_uni_idfirst_lc
 #define is_uni_alpha_lc                Perl_is_uni_alpha_lc
+#define is_uni_ascii_lc                Perl_is_uni_ascii_lc
 #define is_uni_space_lc                Perl_is_uni_space_lc
+#define is_uni_cntrl_lc                Perl_is_uni_cntrl_lc
+#define is_uni_graph_lc                Perl_is_uni_graph_lc
 #define is_uni_digit_lc                Perl_is_uni_digit_lc
 #define is_uni_upper_lc                Perl_is_uni_upper_lc
 #define is_uni_lower_lc                Perl_is_uni_lower_lc
 #define is_uni_print_lc                Perl_is_uni_print_lc
+#define is_uni_punct_lc                Perl_is_uni_punct_lc
+#define is_uni_xdigit_lc       Perl_is_uni_xdigit_lc
 #define to_uni_upper_lc                Perl_to_uni_upper_lc
 #define to_uni_title_lc                Perl_to_uni_title_lc
 #define to_uni_lower_lc                Perl_to_uni_lower_lc
 #define is_utf8_alnum          Perl_is_utf8_alnum
+#define is_utf8_alnumc         Perl_is_utf8_alnumc
 #define is_utf8_idfirst                Perl_is_utf8_idfirst
 #define is_utf8_alpha          Perl_is_utf8_alpha
+#define is_utf8_ascii          Perl_is_utf8_ascii
 #define is_utf8_space          Perl_is_utf8_space
+#define is_utf8_cntrl          Perl_is_utf8_cntrl
 #define is_utf8_digit          Perl_is_utf8_digit
+#define is_utf8_graph          Perl_is_utf8_graph
 #define is_utf8_upper          Perl_is_utf8_upper
 #define is_utf8_lower          Perl_is_utf8_lower
 #define is_utf8_print          Perl_is_utf8_print
+#define is_utf8_punct          Perl_is_utf8_punct
+#define is_utf8_xdigit         Perl_is_utf8_xdigit
 #define is_utf8_mark           Perl_is_utf8_mark
 #define jmaybe                 Perl_jmaybe
 #define keyword                        Perl_keyword
 #define add_data               S_add_data
 #define re_croak2              S_re_croak2
 #define regpposixcc            S_regpposixcc
+#define checkposixcc           S_checkposixcc
 #define clear_re               S_clear_re
 #endif
 #if defined(PERL_IN_REGEXEC_C) || defined(PERL_DECL_PROT)
 #define ck_require             Perl_ck_require
 #define ck_rfun                        Perl_ck_rfun
 #define ck_rvconst             Perl_ck_rvconst
+#define ck_sassign             Perl_ck_sassign
 #define ck_scmp                        Perl_ck_scmp
 #define ck_select              Perl_ck_select
 #define ck_shift               Perl_ck_shift
 #define io_close(a)            Perl_io_close(aTHX_ a)
 #define invert(a)              Perl_invert(aTHX_ a)
 #define is_uni_alnum(a)                Perl_is_uni_alnum(aTHX_ a)
+#define is_uni_alnumc(a)       Perl_is_uni_alnumc(aTHX_ a)
 #define is_uni_idfirst(a)      Perl_is_uni_idfirst(aTHX_ a)
 #define is_uni_alpha(a)                Perl_is_uni_alpha(aTHX_ a)
+#define is_uni_ascii(a)                Perl_is_uni_ascii(aTHX_ a)
 #define is_uni_space(a)                Perl_is_uni_space(aTHX_ a)
+#define is_uni_cntrl(a)                Perl_is_uni_cntrl(aTHX_ a)
+#define is_uni_graph(a)                Perl_is_uni_graph(aTHX_ a)
 #define is_uni_digit(a)                Perl_is_uni_digit(aTHX_ a)
 #define is_uni_upper(a)                Perl_is_uni_upper(aTHX_ a)
 #define is_uni_lower(a)                Perl_is_uni_lower(aTHX_ a)
 #define is_uni_print(a)                Perl_is_uni_print(aTHX_ a)
+#define is_uni_punct(a)                Perl_is_uni_punct(aTHX_ a)
+#define is_uni_xdigit(a)       Perl_is_uni_xdigit(aTHX_ a)
 #define to_uni_upper(a)                Perl_to_uni_upper(aTHX_ a)
 #define to_uni_title(a)                Perl_to_uni_title(aTHX_ a)
 #define to_uni_lower(a)                Perl_to_uni_lower(aTHX_ a)
 #define is_uni_alnum_lc(a)     Perl_is_uni_alnum_lc(aTHX_ a)
+#define is_uni_alnumc_lc(a)    Perl_is_uni_alnumc_lc(aTHX_ a)
 #define is_uni_idfirst_lc(a)   Perl_is_uni_idfirst_lc(aTHX_ a)
 #define is_uni_alpha_lc(a)     Perl_is_uni_alpha_lc(aTHX_ a)
+#define is_uni_ascii_lc(a)     Perl_is_uni_ascii_lc(aTHX_ a)
 #define is_uni_space_lc(a)     Perl_is_uni_space_lc(aTHX_ a)
+#define is_uni_cntrl_lc(a)     Perl_is_uni_cntrl_lc(aTHX_ a)
+#define is_uni_graph_lc(a)     Perl_is_uni_graph_lc(aTHX_ a)
 #define is_uni_digit_lc(a)     Perl_is_uni_digit_lc(aTHX_ a)
 #define is_uni_upper_lc(a)     Perl_is_uni_upper_lc(aTHX_ a)
 #define is_uni_lower_lc(a)     Perl_is_uni_lower_lc(aTHX_ a)
 #define is_uni_print_lc(a)     Perl_is_uni_print_lc(aTHX_ a)
+#define is_uni_punct_lc(a)     Perl_is_uni_punct_lc(aTHX_ a)
+#define is_uni_xdigit_lc(a)    Perl_is_uni_xdigit_lc(aTHX_ a)
 #define to_uni_upper_lc(a)     Perl_to_uni_upper_lc(aTHX_ a)
 #define to_uni_title_lc(a)     Perl_to_uni_title_lc(aTHX_ a)
 #define to_uni_lower_lc(a)     Perl_to_uni_lower_lc(aTHX_ a)
 #define is_utf8_alnum(a)       Perl_is_utf8_alnum(aTHX_ a)
+#define is_utf8_alnumc(a)      Perl_is_utf8_alnumc(aTHX_ a)
 #define is_utf8_idfirst(a)     Perl_is_utf8_idfirst(aTHX_ a)
 #define is_utf8_alpha(a)       Perl_is_utf8_alpha(aTHX_ a)
+#define is_utf8_ascii(a)       Perl_is_utf8_ascii(aTHX_ a)
 #define is_utf8_space(a)       Perl_is_utf8_space(aTHX_ a)
+#define is_utf8_cntrl(a)       Perl_is_utf8_cntrl(aTHX_ a)
 #define is_utf8_digit(a)       Perl_is_utf8_digit(aTHX_ a)
+#define is_utf8_graph(a)       Perl_is_utf8_graph(aTHX_ a)
 #define is_utf8_upper(a)       Perl_is_utf8_upper(aTHX_ a)
 #define is_utf8_lower(a)       Perl_is_utf8_lower(aTHX_ a)
 #define is_utf8_print(a)       Perl_is_utf8_print(aTHX_ a)
+#define is_utf8_punct(a)       Perl_is_utf8_punct(aTHX_ a)
+#define is_utf8_xdigit(a)      Perl_is_utf8_xdigit(aTHX_ a)
 #define is_utf8_mark(a)                Perl_is_utf8_mark(aTHX_ a)
 #define jmaybe(a)              Perl_jmaybe(aTHX_ a)
 #define keyword(a,b)           Perl_keyword(aTHX_ a,b)
 #define study_chunk(a,b,c,d,e) S_study_chunk(aTHX_ a,b,c,d,e)
 #define add_data(a,b)          S_add_data(aTHX_ a,b)
 #define regpposixcc(a)         S_regpposixcc(aTHX_ a)
+#define checkposixcc()         S_checkposixcc(aTHX)
 #define clear_re(a)            S_clear_re(aTHX_ a)
 #endif
 #if defined(PERL_IN_REGEXEC_C) || defined(PERL_DECL_PROT)
 #define ck_require(a)          Perl_ck_require(aTHX_ a)
 #define ck_rfun(a)             Perl_ck_rfun(aTHX_ a)
 #define ck_rvconst(a)          Perl_ck_rvconst(aTHX_ a)
+#define ck_sassign(a)          Perl_ck_sassign(aTHX_ a)
 #define ck_scmp(a)             Perl_ck_scmp(aTHX_ a)
 #define ck_select(a)           Perl_ck_select(aTHX_ a)
 #define ck_shift(a)            Perl_ck_shift(aTHX_ a)
 #define invert                 Perl_invert
 #define Perl_is_uni_alnum      CPerlObj::Perl_is_uni_alnum
 #define is_uni_alnum           Perl_is_uni_alnum
+#define Perl_is_uni_alnumc     CPerlObj::Perl_is_uni_alnumc
+#define is_uni_alnumc          Perl_is_uni_alnumc
 #define Perl_is_uni_idfirst    CPerlObj::Perl_is_uni_idfirst
 #define is_uni_idfirst         Perl_is_uni_idfirst
 #define Perl_is_uni_alpha      CPerlObj::Perl_is_uni_alpha
 #define is_uni_alpha           Perl_is_uni_alpha
+#define Perl_is_uni_ascii      CPerlObj::Perl_is_uni_ascii
+#define is_uni_ascii           Perl_is_uni_ascii
 #define Perl_is_uni_space      CPerlObj::Perl_is_uni_space
 #define is_uni_space           Perl_is_uni_space
+#define Perl_is_uni_cntrl      CPerlObj::Perl_is_uni_cntrl
+#define is_uni_cntrl           Perl_is_uni_cntrl
+#define Perl_is_uni_graph      CPerlObj::Perl_is_uni_graph
+#define is_uni_graph           Perl_is_uni_graph
 #define Perl_is_uni_digit      CPerlObj::Perl_is_uni_digit
 #define is_uni_digit           Perl_is_uni_digit
 #define Perl_is_uni_upper      CPerlObj::Perl_is_uni_upper
 #define is_uni_lower           Perl_is_uni_lower
 #define Perl_is_uni_print      CPerlObj::Perl_is_uni_print
 #define is_uni_print           Perl_is_uni_print
+#define Perl_is_uni_punct      CPerlObj::Perl_is_uni_punct
+#define is_uni_punct           Perl_is_uni_punct
+#define Perl_is_uni_xdigit     CPerlObj::Perl_is_uni_xdigit
+#define is_uni_xdigit          Perl_is_uni_xdigit
 #define Perl_to_uni_upper      CPerlObj::Perl_to_uni_upper
 #define to_uni_upper           Perl_to_uni_upper
 #define Perl_to_uni_title      CPerlObj::Perl_to_uni_title
 #define to_uni_lower           Perl_to_uni_lower
 #define Perl_is_uni_alnum_lc   CPerlObj::Perl_is_uni_alnum_lc
 #define is_uni_alnum_lc                Perl_is_uni_alnum_lc
+#define Perl_is_uni_alnumc_lc  CPerlObj::Perl_is_uni_alnumc_lc
+#define is_uni_alnumc_lc       Perl_is_uni_alnumc_lc
 #define Perl_is_uni_idfirst_lc CPerlObj::Perl_is_uni_idfirst_lc
 #define is_uni_idfirst_lc      Perl_is_uni_idfirst_lc
 #define Perl_is_uni_alpha_lc   CPerlObj::Perl_is_uni_alpha_lc
 #define is_uni_alpha_lc                Perl_is_uni_alpha_lc
+#define Perl_is_uni_ascii_lc   CPerlObj::Perl_is_uni_ascii_lc
+#define is_uni_ascii_lc                Perl_is_uni_ascii_lc
 #define Perl_is_uni_space_lc   CPerlObj::Perl_is_uni_space_lc
 #define is_uni_space_lc                Perl_is_uni_space_lc
+#define Perl_is_uni_cntrl_lc   CPerlObj::Perl_is_uni_cntrl_lc
+#define is_uni_cntrl_lc                Perl_is_uni_cntrl_lc
+#define Perl_is_uni_graph_lc   CPerlObj::Perl_is_uni_graph_lc
+#define is_uni_graph_lc                Perl_is_uni_graph_lc
 #define Perl_is_uni_digit_lc   CPerlObj::Perl_is_uni_digit_lc
 #define is_uni_digit_lc                Perl_is_uni_digit_lc
 #define Perl_is_uni_upper_lc   CPerlObj::Perl_is_uni_upper_lc
 #define is_uni_lower_lc                Perl_is_uni_lower_lc
 #define Perl_is_uni_print_lc   CPerlObj::Perl_is_uni_print_lc
 #define is_uni_print_lc                Perl_is_uni_print_lc
+#define Perl_is_uni_punct_lc   CPerlObj::Perl_is_uni_punct_lc
+#define is_uni_punct_lc                Perl_is_uni_punct_lc
+#define Perl_is_uni_xdigit_lc  CPerlObj::Perl_is_uni_xdigit_lc
+#define is_uni_xdigit_lc       Perl_is_uni_xdigit_lc
 #define Perl_to_uni_upper_lc   CPerlObj::Perl_to_uni_upper_lc
 #define to_uni_upper_lc                Perl_to_uni_upper_lc
 #define Perl_to_uni_title_lc   CPerlObj::Perl_to_uni_title_lc
 #define to_uni_lower_lc                Perl_to_uni_lower_lc
 #define Perl_is_utf8_alnum     CPerlObj::Perl_is_utf8_alnum
 #define is_utf8_alnum          Perl_is_utf8_alnum
+#define Perl_is_utf8_alnumc    CPerlObj::Perl_is_utf8_alnumc
+#define is_utf8_alnumc         Perl_is_utf8_alnumc
 #define Perl_is_utf8_idfirst   CPerlObj::Perl_is_utf8_idfirst
 #define is_utf8_idfirst                Perl_is_utf8_idfirst
 #define Perl_is_utf8_alpha     CPerlObj::Perl_is_utf8_alpha
 #define is_utf8_alpha          Perl_is_utf8_alpha
+#define Perl_is_utf8_ascii     CPerlObj::Perl_is_utf8_ascii
+#define is_utf8_ascii          Perl_is_utf8_ascii
 #define Perl_is_utf8_space     CPerlObj::Perl_is_utf8_space
 #define is_utf8_space          Perl_is_utf8_space
+#define Perl_is_utf8_cntrl     CPerlObj::Perl_is_utf8_cntrl
+#define is_utf8_cntrl          Perl_is_utf8_cntrl
 #define Perl_is_utf8_digit     CPerlObj::Perl_is_utf8_digit
 #define is_utf8_digit          Perl_is_utf8_digit
+#define Perl_is_utf8_graph     CPerlObj::Perl_is_utf8_graph
+#define is_utf8_graph          Perl_is_utf8_graph
 #define Perl_is_utf8_upper     CPerlObj::Perl_is_utf8_upper
 #define is_utf8_upper          Perl_is_utf8_upper
 #define Perl_is_utf8_lower     CPerlObj::Perl_is_utf8_lower
 #define is_utf8_lower          Perl_is_utf8_lower
 #define Perl_is_utf8_print     CPerlObj::Perl_is_utf8_print
 #define is_utf8_print          Perl_is_utf8_print
+#define Perl_is_utf8_punct     CPerlObj::Perl_is_utf8_punct
+#define is_utf8_punct          Perl_is_utf8_punct
+#define Perl_is_utf8_xdigit    CPerlObj::Perl_is_utf8_xdigit
+#define is_utf8_xdigit         Perl_is_utf8_xdigit
 #define Perl_is_utf8_mark      CPerlObj::Perl_is_utf8_mark
 #define is_utf8_mark           Perl_is_utf8_mark
 #define Perl_jmaybe            CPerlObj::Perl_jmaybe
 #define re_croak2              S_re_croak2
 #define S_regpposixcc          CPerlObj::S_regpposixcc
 #define regpposixcc            S_regpposixcc
+#define S_checkposixcc         CPerlObj::S_checkposixcc
+#define checkposixcc           S_checkposixcc
 #define S_clear_re             CPerlObj::S_clear_re
 #define clear_re               S_clear_re
 #endif
 #define ck_rfun                        Perl_ck_rfun
 #define Perl_ck_rvconst                CPerlObj::Perl_ck_rvconst
 #define ck_rvconst             Perl_ck_rvconst
+#define Perl_ck_sassign                CPerlObj::Perl_ck_sassign
+#define ck_sassign             Perl_ck_sassign
 #define Perl_ck_scmp           CPerlObj::Perl_ck_scmp
 #define ck_scmp                        Perl_ck_scmp
 #define Perl_ck_select         CPerlObj::Perl_ck_select
index ed7f3e4..206dbbf 100755 (executable)
--- a/embed.pl
+++ b/embed.pl
@@ -947,35 +947,53 @@ p |char*  |instr          |const char* big|const char* little
 p      |bool   |io_close       |IO* io
 p      |OP*    |invert         |OP* cmd
 p      |bool   |is_uni_alnum   |U32 c
+p      |bool   |is_uni_alnumc  |U32 c
 p      |bool   |is_uni_idfirst |U32 c
 p      |bool   |is_uni_alpha   |U32 c
+p      |bool   |is_uni_ascii   |U32 c
 p      |bool   |is_uni_space   |U32 c
+p      |bool   |is_uni_cntrl   |U32 c
+p      |bool   |is_uni_graph   |U32 c
 p      |bool   |is_uni_digit   |U32 c
 p      |bool   |is_uni_upper   |U32 c
 p      |bool   |is_uni_lower   |U32 c
 p      |bool   |is_uni_print   |U32 c
+p      |bool   |is_uni_punct   |U32 c
+p      |bool   |is_uni_xdigit  |U32 c
 p      |U32    |to_uni_upper   |U32 c
 p      |U32    |to_uni_title   |U32 c
 p      |U32    |to_uni_lower   |U32 c
 p      |bool   |is_uni_alnum_lc|U32 c
+p      |bool   |is_uni_alnumc_lc|U32 c
 p      |bool   |is_uni_idfirst_lc|U32 c
 p      |bool   |is_uni_alpha_lc|U32 c
+p      |bool   |is_uni_ascii_lc|U32 c
 p      |bool   |is_uni_space_lc|U32 c
+p      |bool   |is_uni_cntrl_lc|U32 c
+p      |bool   |is_uni_graph_lc|U32 c
 p      |bool   |is_uni_digit_lc|U32 c
 p      |bool   |is_uni_upper_lc|U32 c
 p      |bool   |is_uni_lower_lc|U32 c
 p      |bool   |is_uni_print_lc|U32 c
+p      |bool   |is_uni_punct_lc|U32 c
+p      |bool   |is_uni_xdigit_lc|U32 c
 p      |U32    |to_uni_upper_lc|U32 c
 p      |U32    |to_uni_title_lc|U32 c
 p      |U32    |to_uni_lower_lc|U32 c
 p      |bool   |is_utf8_alnum  |U8 *p
+p      |bool   |is_utf8_alnumc |U8 *p
 p      |bool   |is_utf8_idfirst|U8 *p
 p      |bool   |is_utf8_alpha  |U8 *p
+p      |bool   |is_utf8_ascii  |U8 *p
 p      |bool   |is_utf8_space  |U8 *p
+p      |bool   |is_utf8_cntrl  |U8 *p
 p      |bool   |is_utf8_digit  |U8 *p
+p      |bool   |is_utf8_graph  |U8 *p
 p      |bool   |is_utf8_upper  |U8 *p
 p      |bool   |is_utf8_lower  |U8 *p
 p      |bool   |is_utf8_print  |U8 *p
+p      |bool   |is_utf8_punct  |U8 *p
+p      |bool   |is_utf8_xdigit |U8 *p
 p      |bool   |is_utf8_mark   |U8 *p
 p      |OP*    |jmaybe         |OP* arg
 p      |I32    |keyword        |char* d|I32 len
@@ -1646,7 +1664,8 @@ s |I32    |study_chunk    |regnode **scanp|I32 *deltap \
                                |regnode *last|scan_data_t *data|U32 flags
 s      |I32    |add_data       |I32 n|char *s
 rs     |void|re_croak2 |const char* pat1|const char* pat2|...
-s      |char*|regpposixcc      |I32 value
+s      |I32    |regpposixcc    |I32 value
+s      |void   |checkposixcc
 s      |void   |clear_re       |void *r
 #endif
 
index f759b63..74e7ca5 100644 (file)
 #define PL_uid                 (PL_curinterp->Iuid)
 #define PL_unsafe              (PL_curinterp->Iunsafe)
 #define PL_utf8_alnum          (PL_curinterp->Iutf8_alnum)
+#define PL_utf8_alnumc         (PL_curinterp->Iutf8_alnumc)
 #define PL_utf8_alpha          (PL_curinterp->Iutf8_alpha)
+#define PL_utf8_ascii          (PL_curinterp->Iutf8_ascii)
+#define PL_utf8_cntrl          (PL_curinterp->Iutf8_cntrl)
 #define PL_utf8_digit          (PL_curinterp->Iutf8_digit)
+#define PL_utf8_graph          (PL_curinterp->Iutf8_graph)
 #define PL_utf8_lower          (PL_curinterp->Iutf8_lower)
 #define PL_utf8_mark           (PL_curinterp->Iutf8_mark)
 #define PL_utf8_print          (PL_curinterp->Iutf8_print)
+#define PL_utf8_punct          (PL_curinterp->Iutf8_punct)
 #define PL_utf8_space          (PL_curinterp->Iutf8_space)
 #define PL_utf8_tolower                (PL_curinterp->Iutf8_tolower)
 #define PL_utf8_totitle                (PL_curinterp->Iutf8_totitle)
 #define PL_utf8_toupper                (PL_curinterp->Iutf8_toupper)
 #define PL_utf8_upper          (PL_curinterp->Iutf8_upper)
+#define PL_utf8_xdigit         (PL_curinterp->Iutf8_xdigit)
 #define PL_uudmap              (PL_curinterp->Iuudmap)
 #define PL_warnhook            (PL_curinterp->Iwarnhook)
 #define PL_xiv_arenaroot       (PL_curinterp->Ixiv_arenaroot)
 #define PL_Iuid                        PL_uid
 #define PL_Iunsafe             PL_unsafe
 #define PL_Iutf8_alnum         PL_utf8_alnum
+#define PL_Iutf8_alnumc                PL_utf8_alnumc
 #define PL_Iutf8_alpha         PL_utf8_alpha
+#define PL_Iutf8_ascii         PL_utf8_ascii
+#define PL_Iutf8_cntrl         PL_utf8_cntrl
 #define PL_Iutf8_digit         PL_utf8_digit
+#define PL_Iutf8_graph         PL_utf8_graph
 #define PL_Iutf8_lower         PL_utf8_lower
 #define PL_Iutf8_mark          PL_utf8_mark
 #define PL_Iutf8_print         PL_utf8_print
+#define PL_Iutf8_punct         PL_utf8_punct
 #define PL_Iutf8_space         PL_utf8_space
 #define PL_Iutf8_tolower       PL_utf8_tolower
 #define PL_Iutf8_totitle       PL_utf8_totitle
 #define PL_Iutf8_toupper       PL_utf8_toupper
 #define PL_Iutf8_upper         PL_utf8_upper
+#define PL_Iutf8_xdigit                PL_utf8_xdigit
 #define PL_Iuudmap             PL_uudmap
 #define PL_Iwarnhook           PL_warnhook
 #define PL_Ixiv_arenaroot      PL_xiv_arenaroot
index 87ece3c..c5597d1 100644 (file)
@@ -177,35 +177,53 @@ Perl_instr
 Perl_io_close
 Perl_invert
 Perl_is_uni_alnum
+Perl_is_uni_alnumc
 Perl_is_uni_idfirst
 Perl_is_uni_alpha
+Perl_is_uni_ascii
 Perl_is_uni_space
+Perl_is_uni_cntrl
+Perl_is_uni_graph
 Perl_is_uni_digit
 Perl_is_uni_upper
 Perl_is_uni_lower
 Perl_is_uni_print
+Perl_is_uni_punct
+Perl_is_uni_xdigit
 Perl_to_uni_upper
 Perl_to_uni_title
 Perl_to_uni_lower
 Perl_is_uni_alnum_lc
+Perl_is_uni_alnumc_lc
 Perl_is_uni_idfirst_lc
 Perl_is_uni_alpha_lc
+Perl_is_uni_ascii_lc
 Perl_is_uni_space_lc
+Perl_is_uni_cntrl_lc
+Perl_is_uni_graph_lc
 Perl_is_uni_digit_lc
 Perl_is_uni_upper_lc
 Perl_is_uni_lower_lc
 Perl_is_uni_print_lc
+Perl_is_uni_punct_lc
+Perl_is_uni_xdigit_lc
 Perl_to_uni_upper_lc
 Perl_to_uni_title_lc
 Perl_to_uni_lower_lc
 Perl_is_utf8_alnum
+Perl_is_utf8_alnumc
 Perl_is_utf8_idfirst
 Perl_is_utf8_alpha
+Perl_is_utf8_ascii
 Perl_is_utf8_space
+Perl_is_utf8_cntrl
 Perl_is_utf8_digit
+Perl_is_utf8_graph
 Perl_is_utf8_upper
 Perl_is_utf8_lower
 Perl_is_utf8_print
+Perl_is_utf8_punct
+Perl_is_utf8_xdigit
 Perl_is_utf8_mark
 Perl_jmaybe
 Perl_keyword
diff --git a/handy.h b/handy.h
index 851f348..95bcec7 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -215,13 +215,25 @@ typedef unsigned short    U16;
     /* In EBCDIC we do not do locales: therefore() isupper() is fine. */
 #   define isUPPER(c)  isupper(c)
 #   define isLOWER(c)  islower(c)
+#   define isALNUMC(c) isalnum(c)
+#   define isASCII(c)  isascii(c)
+#   define isCNTRL(c)  iscntrl(c)
+#   define isGRAPH(c)  isgraph(c)
 #   define isPRINT(c)  isprint(c)
+#   define isPUNCT(c)  ispunct(c)
+#   define isXDIGIT(c) isxdigit(c)
 #   define toUPPER(c)  toupper(c)
 #   define toLOWER(c)  tolower(c)
 #else
 #   define isUPPER(c)  ((c) >= 'A' && (c) <= 'Z')
 #   define isLOWER(c)  ((c) >= 'a' && (c) <= 'z')
+#   define isALNUMC(c) (isALPHA(c) || isDIGIT(c))
+#   define isASCII(c)  ((c) <= 127)
+#   define isCNTRL(c)  ((c) < ' ')
+#   define isGRAPH(c)  (isALNUM(c) || isPUNCT(c))
 #   define isPRINT(c)  (((c) > 32 && (c) < 127) || isSPACE(c))
+#   define isPUNCT(c)  (((c) >= 33 && (c) <= 47) || ((c) >= 58 && (c) <= 64)  || ((c) >= 91 && (c) <= 96) || ((c) >= 123 && (c) <= 126))
+#   define isXDIGIT(c)  (isdigit(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
 #   define toUPPER(c)  (isLOWER(c) ? (c) - ('a' - 'A') : (c))
 #   define toLOWER(c)  (isUPPER(c) ? (c) + ('a' - 'A') : (c))
 #endif
@@ -229,8 +241,7 @@ typedef unsigned short      U16;
 #ifdef USE_NEXT_CTYPE
 
 #  define isALNUM_LC(c) \
-       (NXIsAlpha((unsigned int)(c)) || NXIsDigit((unsigned int)(c)) || \
-        (char)(c) == '_')
+       (NXIsAlnum((unsigned int)(c)) || (char)(c) == '_')
 #  define isIDFIRST_LC(c) \
        (NXIsAlpha((unsigned int)(c)) || (char)(c) == '_')
 #  define isALPHA_LC(c)                NXIsAlpha((unsigned int)(c))
@@ -238,37 +249,47 @@ typedef unsigned short    U16;
 #  define isDIGIT_LC(c)                NXIsDigit((unsigned int)(c))
 #  define isUPPER_LC(c)                NXIsUpper((unsigned int)(c))
 #  define isLOWER_LC(c)                NXIsLower((unsigned int)(c))
+#  define isALNUMC_LC(c)       NXIsAlnum((unsigned int)(c))
+#  define isCNTRL_LC(c)                NXIsCntrl((unsigned int)(c))
+#  define isGRAPH_LC(c)                NXIsGraph((unsigned int)(c))
 #  define isPRINT_LC(c)                NXIsPrint((unsigned int)(c))
+#  define isPUNCT_LC(c)                NXIsPunct((unsigned int)(c))
 #  define toUPPER_LC(c)                NXToUpper((unsigned int)(c))
 #  define toLOWER_LC(c)                NXToLower((unsigned int)(c))
 
 #else /* !USE_NEXT_CTYPE */
+
 #  if defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))
 
-#    define isALNUM_LC(c) \
-       (isalpha((unsigned char)(c)) || \
-        isdigit((unsigned char)(c)) || (char)(c) == '_')
+#    define isALNUM_LC(c)   (isalnum((unsigned char)(c)) || (char)(c) == '_')
 #    define isIDFIRST_LC(c) (isalpha((unsigned char)(c)) || (char)(c) == '_')
 #    define isALPHA_LC(c)      isalpha((unsigned char)(c))
 #    define isSPACE_LC(c)      isspace((unsigned char)(c))
 #    define isDIGIT_LC(c)      isdigit((unsigned char)(c))
 #    define isUPPER_LC(c)      isupper((unsigned char)(c))
 #    define isLOWER_LC(c)      islower((unsigned char)(c))
+#    define isALNUMC_LC(c)     isalnum((unsigned char)(c))
+#    define isCNTRL_LC(c)      iscntrl((unsigned char)(c))
+#    define isGRAPH_LC(c)      isgraph((unsigned char)(c))
 #    define isPRINT_LC(c)      isprint((unsigned char)(c))
+#    define isPUNCT_LC(c)      ispunct((unsigned char)(c))
 #    define toUPPER_LC(c)      toupper((unsigned char)(c))
 #    define toLOWER_LC(c)      tolower((unsigned char)(c))
 
 #  else
 
-#    define isALNUM_LC(c) \
-       (isascii(c) && (isalpha(c) || isdigit(c) || (c) == '_'))
+#    define isALNUM_LC(c)      (isascii(c) && (isalnum(c) || (c) == '_'))
 #    define isIDFIRST_LC(c)    (isascii(c) && (isalpha(c) || (c) == '_'))
 #    define isALPHA_LC(c)      (isascii(c) && isalpha(c))
 #    define isSPACE_LC(c)      (isascii(c) && isspace(c))
 #    define isDIGIT_LC(c)      (isascii(c) && isdigit(c))
 #    define isUPPER_LC(c)      (isascii(c) && isupper(c))
 #    define isLOWER_LC(c)      (isascii(c) && islower(c))
+#    define isALNUMC_LC(c)     (isascii(c) && isalnum(c))
+#    define isCNTRL_LC(c)      (isascii(c) && iscntrl(c))
+#    define isGRAPH_LC(c)      (isascii(c) && isgraph(c))
 #    define isPRINT_LC(c)      (isascii(c) && isprint(c))
+#    define isPUNCT_LC(c)      (isascii(c) && ispunct(c))
 #    define toUPPER_LC(c)      toupper(c)
 #    define toLOWER_LC(c)      tolower(c)
 
@@ -282,7 +303,13 @@ typedef unsigned short     U16;
 #define isDIGIT_uni(c)         is_uni_digit(c)
 #define isUPPER_uni(c)         is_uni_upper(c)
 #define isLOWER_uni(c)         is_uni_lower(c)
+#define isALNUMC_uni(c)                is_uni_alnumc(c)
+#define isASCII_uni(c)         is_uni_ascii(c)
+#define isCNTRL_uni(c)         is_uni_cntrl(c)
+#define isGRAPH_uni(c)         is_uni_graph(c)
 #define isPRINT_uni(c)         is_uni_print(c)
+#define isPUNCT_uni(c)         is_uni_punct(c)
+#define isXDIGIT_uni(c)                is_uni_xdigit(c)
 #define toUPPER_uni(c)         to_uni_upper(c)
 #define toTITLE_uni(c)         to_uni_title(c)
 #define toLOWER_uni(c)         to_uni_lower(c)
@@ -294,7 +321,11 @@ typedef unsigned short     U16;
 #define isDIGIT_LC_uni(c)      (c < 256 ? isDIGIT_LC(c) : is_uni_digit_lc(c))
 #define isUPPER_LC_uni(c)      (c < 256 ? isUPPER_LC(c) : is_uni_upper_lc(c))
 #define isLOWER_LC_uni(c)      (c < 256 ? isLOWER_LC(c) : is_uni_lower_lc(c))
+#define isALNUMC_LC_uni(c)     (c < 256 ? isALNUMC_LC(c) : is_uni_alnumc_lc(c))
+#define isCNTRL_LC_uni(c)      (c < 256 ? isCNTRL_LC(c) : is_uni_cntrl_lc(c))
+#define isGRAPH_LC_uni(c)      (c < 256 ? isGRAPH_LC(c) : is_uni_graph_lc(c))
 #define isPRINT_LC_uni(c)      (c < 256 ? isPRINT_LC(c) : is_uni_print_lc(c))
+#define isPUNCT_LC_uni(c)      (c < 256 ? isPUNCT_LC(c) : is_uni_punct_lc(c))
 #define toUPPER_LC_uni(c)      (c < 256 ? toUPPER_LC(c) : to_uni_upper_lc(c))
 #define toTITLE_LC_uni(c)      (c < 256 ? toUPPER_LC(c) : to_uni_title_lc(c))
 #define toLOWER_LC_uni(c)      (c < 256 ? toLOWER_LC(c) : to_uni_lower_lc(c))
@@ -306,7 +337,13 @@ typedef unsigned short     U16;
 #define isDIGIT_utf8(p)                is_utf8_digit(p)
 #define isUPPER_utf8(p)                is_utf8_upper(p)
 #define isLOWER_utf8(p)                is_utf8_lower(p)
+#define isALNUMC_utf8(p)       is_utf8_alnumc(p)
+#define isASCII_utf8(p)                is_utf8_ascii(p)
+#define isCNTRL_utf8(p)                is_utf8_cntrl(p)
+#define isGRAPH_utf8(p)                is_utf8_graph(p)
 #define isPRINT_utf8(p)                is_utf8_print(p)
+#define isPUNCT_utf8(p)                is_utf8_punct(p)
+#define isXDIGIT_utf8(p)       is_utf8_xdigit(p)
 #define toUPPER_utf8(p)                to_utf8_upper(p)
 #define toTITLE_utf8(p)                to_utf8_title(p)
 #define toLOWER_utf8(p)                to_utf8_lower(p)
@@ -318,7 +355,11 @@ typedef unsigned short     U16;
 #define isDIGIT_LC_utf8(p)     isDIGIT_LC_uni(utf8_to_uv(p, 0))
 #define isUPPER_LC_utf8(p)     isUPPER_LC_uni(utf8_to_uv(p, 0))
 #define isLOWER_LC_utf8(p)     isLOWER_LC_uni(utf8_to_uv(p, 0))
+#define isALNUMC_LC_utf8(p)    isALNUMC_LC_uni(utf8_to_uv(p, 0))
+#define isCNTRL_LC_utf8(p)     isCNTRL_LC_uni(utf8_to_uv(p, 0))
+#define isGRAPH_LC_utf8(p)     isGRAPH_LC_uni(utf8_to_uv(p, 0))
 #define isPRINT_LC_utf8(p)     isPRINT_LC_uni(utf8_to_uv(p, 0))
+#define isPUNCT_LC_utf8(p)     isPUNCT_LC_uni(utf8_to_uv(p, 0))
 #define toUPPER_LC_utf8(p)     toUPPER_LC_uni(utf8_to_uv(p, 0))
 #define toTITLE_LC_utf8(p)     toTITLE_LC_uni(utf8_to_uv(p, 0))
 #define toLOWER_LC_utf8(p)     toLOWER_LC_uni(utf8_to_uv(p, 0))
index 5cff858..e2d1d5f 100644 (file)
@@ -322,12 +322,18 @@ PERLVAR(Inumeric_radix,           char)
 
 /* utf8 character classes */
 PERLVAR(Iutf8_alnum,   SV *)
+PERLVAR(Iutf8_alnumc,  SV *)
+PERLVAR(Iutf8_ascii,   SV *)
 PERLVAR(Iutf8_alpha,   SV *)
 PERLVAR(Iutf8_space,   SV *)
+PERLVAR(Iutf8_cntrl,   SV *)
+PERLVAR(Iutf8_graph,   SV *)
 PERLVAR(Iutf8_digit,   SV *)
 PERLVAR(Iutf8_upper,   SV *)
 PERLVAR(Iutf8_lower,   SV *)
 PERLVAR(Iutf8_print,   SV *)
+PERLVAR(Iutf8_punct,   SV *)
+PERLVAR(Iutf8_xdigit,  SV *)
 PERLVAR(Iutf8_mark,    SV *)
 PERLVAR(Iutf8_toupper, SV *)
 PERLVAR(Iutf8_totitle, SV *)
index 1523d50..e10210d 100644 (file)
@@ -233,6 +233,32 @@ return <<'END';
 1100   1159    L
 115f   11a2    L
 11a8   11f9    L
+1200   1206    L
+1208   1246    L
+1248           L
+124a   124d    L
+1250   1256    L
+1258           L
+125a   125d    L
+1260   1286    L
+1288           L
+128a   128d    L
+1290   12ae    L
+12b0           L
+12b2   12b5    L
+12b8   12be    L
+12c0           L
+12c2   12c5    L
+12c8   12ce    L
+12d0   12d6    L
+12d8   12ee    L
+12f0   130e    L
+1310           L
+1312   1315    L
+1318   131e    L
+1320   1346    L
+1348   135a    L
+1361   137c    L
 1e00   1e9b    L
 1ea0   1ef9    L
 1f00   1f15    L
index ce9289a..1c0b280 100644 (file)
@@ -27,7 +27,6 @@ return <<'END';
 1100   11FF    Hangul Jamo
 1E00   1EFF    Latin Extended Additional
 1F00   1FFF    Greek Extended
-1200   137F    Ethiopic
 2000   206F    General Punctuation
 2070   209F    Superscripts and Subscripts
 20A0   20CF    Currency Symbols
index 3b47570..5c0842c 100644 (file)
@@ -762,7 +762,31 @@ return <<'END';
 1100   1159    Lo
 115f   11a2    Lo
 11a8   11f9    Lo
-1200   135a    Lo
+1200   1206    Lo
+1208   1246    Lo
+1248           Lo
+124a   124d    Lo
+1250   1256    Lo
+1258           Lo
+125a   125d    Lo
+1260   1286    Lo
+1288           Lo
+128a   128d    Lo
+1290   12ae    Lo
+12b0           Lo
+12b2   12b5    Lo
+12b8   12be    Lo
+12c0           Lo
+12c2   12c5    Lo
+12c8   12ce    Lo
+12d0   12d6    Lo
+12d8   12ee    Lo
+12f0   130e    Lo
+1310           Lo
+1312   1315    Lo
+1318   131e    Lo
+1320   1346    Lo
+1348   135a    Lo
 1361   1368    Po
 1369   1371    Nd
 1372   137c    No
diff --git a/lib/unicode/Is/ASCII.pl b/lib/unicode/Is/ASCII.pl
new file mode 100644 (file)
index 0000000..b7843e9
--- /dev/null
@@ -0,0 +1,3 @@
+return <<'END';
+0000   007f
+END
index aa82e4f..ac48257 100644 (file)
@@ -1,7 +1,6 @@
 return <<'END';
 0030   0039
 0041   005a
-005f   
 0061   007a
 00aa   
 00b5   
@@ -156,8 +155,32 @@ return <<'END';
 1100   1159
 115f   11a2
 11a8   11f9
-1200   135a
-1369   137c
+1200   1206
+1208   1246
+1248   
+124a   124d
+1250   1256
+1258   
+125a   125d
+1260   1286
+1288   
+128a   128d
+1290   12ae
+12b0   
+12b2   12b5
+12b8   12be
+12c0   
+12c2   12c5
+12c8   12ce
+12d0   12d6
+12d8   12ee
+12f0   130e
+1310   
+1312   1315
+1318   131e
+1320   1346
+1348   135a
+1369   1371
 1e00   1e9b
 1ea0   1ef9
 1f00   1f15
index ea6fa7f..9de0521 100644 (file)
@@ -141,7 +141,31 @@ return <<'END';
 1100   1159
 115f   11a2
 11a8   11f9
-1200   135a
+1200   1206
+1208   1246
+1248   
+124a   124d
+1250   1256
+1258   
+125a   125d
+1260   1286
+1288   
+128a   128d
+1290   12ae
+12b0   
+12b2   12b5
+12b8   12be
+12c0   
+12c2   12c5
+12c8   12ce
+12d0   12d6
+12d8   12ee
+12f0   130e
+1310   
+1312   1315
+1318   131e
+1320   1346
+1348   135a
 1e00   1e9b
 1ea0   1ef9
 1f00   1f15
index 85de325..c17ef10 100644 (file)
@@ -186,6 +186,32 @@ return <<'END';
 1100   1159
 115f   11a2
 11a8   11f9
+1200   1206
+1208   1246
+1248   
+124a   124d
+1250   1256
+1258   
+125a   125d
+1260   1286
+1288   
+128a   128d
+1290   12ae
+12b0   
+12b2   12b5
+12b8   12be
+12c0   
+12c2   12c5
+12c8   12ce
+12d0   12d6
+12d8   12ee
+12f0   130e
+1310   
+1312   1315
+1318   131e
+1320   1346
+1348   135a
+1361   137c
 1e00   1e9b
 1ea0   1ef9
 1f00   1f15
diff --git a/lib/unicode/Is/Cntrl.pl b/lib/unicode/Is/Cntrl.pl
new file mode 100644 (file)
index 0000000..274239f
--- /dev/null
@@ -0,0 +1,12 @@
+return <<'END';
+0000   001f
+007f   009f
+200c   200f
+202a   202e
+206a   206f
+d800   db7f
+db80   dbff
+dc00   dfff
+e000   f8ff
+feff   
+END
index 2181f15..a25e28f 100644 (file)
@@ -14,6 +14,6 @@ return <<'END';
 0e50   0e59
 0ed0   0ed9
 0f20   0f29
-1369   137c
+1369   1371
 ff10   ff19
 END
diff --git a/lib/unicode/Is/Graph.pl b/lib/unicode/Is/Graph.pl
new file mode 100644 (file)
index 0000000..7a8c225
--- /dev/null
@@ -0,0 +1,327 @@
+return <<'END';
+0021   007e
+00a0   01f5
+01fa   0217
+0250   02a8
+02b0   02de
+02e0   02e9
+0300   0345
+0360   0361
+0374   0375
+037a   
+037e   
+0384   038a
+038c   
+038e   03a1
+03a3   03ce
+03d0   03d6
+03da   
+03dc   
+03de   
+03e0   
+03e2   03f3
+0401   040c
+040e   044f
+0451   045c
+045e   0486
+0490   04c4
+04c7   04c8
+04cb   04cc
+04d0   04eb
+04ee   04f5
+04f8   04f9
+0531   0556
+0559   055f
+0561   0587
+0589   
+0591   05a1
+05a3   05b9
+05bb   05c4
+05d0   05ea
+05f0   05f4
+060c   
+061b   
+061f   
+0621   063a
+0640   0652
+0660   066d
+0670   06b7
+06ba   06be
+06c0   06ce
+06d0   06ed
+06f0   06f9
+0901   0903
+0905   0939
+093c   094d
+0950   0954
+0958   0970
+0981   0983
+0985   098c
+098f   0990
+0993   09a8
+09aa   09b0
+09b2   
+09b6   09b9
+09bc   
+09be   09c4
+09c7   09c8
+09cb   09cd
+09d7   
+09dc   09dd
+09df   09e3
+09e6   09fa
+0a02   
+0a05   0a0a
+0a0f   0a10
+0a13   0a28
+0a2a   0a30
+0a32   0a33
+0a35   0a36
+0a38   0a39
+0a3c   
+0a3e   0a42
+0a47   0a48
+0a4b   0a4d
+0a59   0a5c
+0a5e   
+0a66   0a74
+0a81   0a83
+0a85   0a8b
+0a8d   
+0a8f   0a91
+0a93   0aa8
+0aaa   0ab0
+0ab2   0ab3
+0ab5   0ab9
+0abc   0ac5
+0ac7   0ac9
+0acb   0acd
+0ad0   
+0ae0   
+0ae6   0aef
+0b01   0b03
+0b05   0b0c
+0b0f   0b10
+0b13   0b28
+0b2a   0b30
+0b32   0b33
+0b36   0b39
+0b3c   0b43
+0b47   0b48
+0b4b   0b4d
+0b56   0b57
+0b5c   0b5d
+0b5f   0b61
+0b66   0b70
+0b82   0b83
+0b85   0b8a
+0b8e   0b90
+0b92   0b95
+0b99   0b9a
+0b9c   
+0b9e   0b9f
+0ba3   0ba4
+0ba8   0baa
+0bae   0bb5
+0bb7   0bb9
+0bbe   0bc2
+0bc6   0bc8
+0bca   0bcd
+0bd7   
+0be7   0bf2
+0c01   0c03
+0c05   0c0c
+0c0e   0c10
+0c12   0c28
+0c2a   0c33
+0c35   0c39
+0c3e   0c44
+0c46   0c48
+0c4a   0c4d
+0c55   0c56
+0c60   0c61
+0c66   0c6f
+0c82   0c83
+0c85   0c8c
+0c8e   0c90
+0c92   0ca8
+0caa   0cb3
+0cb5   0cb9
+0cbe   0cc4
+0cc6   0cc8
+0cca   0ccd
+0cd5   0cd6
+0cde   
+0ce0   0ce1
+0ce6   0cef
+0d02   0d03
+0d05   0d0c
+0d0e   0d10
+0d12   0d28
+0d2a   0d39
+0d3e   0d43
+0d46   0d48
+0d4a   0d4d
+0d57   
+0d60   0d61
+0d66   0d6f
+0e01   0e3a
+0e3f   0e5b
+0e81   0e82
+0e84   
+0e87   0e88
+0e8a   
+0e8d   
+0e94   0e97
+0e99   0e9f
+0ea1   0ea3
+0ea5   
+0ea7   
+0eaa   0eab
+0ead   0eb9
+0ebb   0ebd
+0ec0   0ec4
+0ec6   
+0ec8   0ecd
+0ed0   0ed9
+0edc   0edd
+0f00   0f47
+0f49   0f69
+0f71   0f8b
+0f90   0f95
+0f97   
+0f99   0fad
+0fb1   0fb7
+0fb9   
+10a0   10c5
+10d0   10f6
+10fb   
+1100   1159
+115f   11a2
+11a8   11f9
+1200   1206
+1208   1246
+1248   
+124a   124d
+1250   1256
+1258   
+125a   125d
+1260   1286
+1288   
+128a   128d
+1290   12ae
+12b0   
+12b2   12b5
+12b8   12be
+12c0   
+12c2   12c5
+12c8   12ce
+12d0   12d6
+12d8   12ee
+12f0   130e
+1310   
+1312   1315
+1318   131e
+1320   1346
+1348   135a
+1361   137c
+1e00   1e9b
+1ea0   1ef9
+1f00   1f15
+1f18   1f1d
+1f20   1f45
+1f48   1f4d
+1f50   1f57
+1f59   
+1f5b   
+1f5d   
+1f5f   1f7d
+1f80   1fb4
+1fb6   1fc4
+1fc6   1fd3
+1fd6   1fdb
+1fdd   1fef
+1ff2   1ff4
+1ff6   1ffe
+2000   200b
+2010   2029
+2030   2046
+2070   
+2074   208e
+20a0   20ac
+20d0   20e1
+2100   2138
+2153   2182
+2190   21ea
+2200   22f1
+2300   
+2302   237a
+2400   2424
+2440   244a
+2460   24ea
+2500   2595
+25a0   25ef
+2600   2613
+261a   266f
+2701   2704
+2706   2709
+270c   2727
+2729   274b
+274d   
+274f   2752
+2756   
+2758   275e
+2761   2767
+2776   2794
+2798   27af
+27b1   27be
+3000   3037
+303f   
+3041   3094
+3099   309e
+30a1   30fe
+3105   312c
+3131   318e
+3190   319f
+3200   321c
+3220   3243
+3260   327b
+327f   32b0
+32c0   32cb
+32d0   32fe
+3300   3376
+337b   33dd
+33e0   33fe
+4e00   9fa5
+ac00   d7a3
+f900   fa2d
+fb00   fb06
+fb13   fb17
+fb1e   fb36
+fb38   fb3c
+fb3e   
+fb40   fb41
+fb43   fb44
+fb46   fbb1
+fbd3   fd3f
+fd50   fd8f
+fd92   fdc7
+fdf0   fdfb
+fe20   fe23
+fe30   fe44
+fe49   fe52
+fe54   fe66
+fe68   fe6b
+fe70   fe72
+fe74   
+fe76   fefc
+ff01   ff5e
+ff61   ffbe
+ffc2   ffc7
+ffca   ffcf
+ffd2   ffd7
+ffda   ffdc
+ffe0   ffe6
+ffe8   ffee
+fffc   fffd
+END
index 9c8e3cf..06796fd 100644 (file)
@@ -145,7 +145,31 @@ return <<'END';
 1100   1159
 115f   11a2
 11a8   11f9
-1200   135a
+1200   1206
+1208   1246
+1248   
+124a   124d
+1250   1256
+1258   
+125a   125d
+1260   1286
+1288   
+128a   128d
+1290   12ae
+12b0   
+12b2   12b5
+12b8   12be
+12c0   
+12c2   12c5
+12c8   12ce
+12d0   12d6
+12d8   12ee
+12f0   130e
+1310   
+1312   1315
+1318   131e
+1320   1346
+1348   135a
 1e00   1e9b
 1ea0   1ef9
 1f00   1f15
index 30f776d..07da29e 100644 (file)
@@ -107,7 +107,31 @@ return <<'END';
 1100   1159
 115f   11a2
 11a8   11f9
-1200   135a
+1200   1206
+1208   1246
+1248   
+124a   124d
+1250   1256
+1258   
+125a   125d
+1260   1286
+1288   
+128a   128d
+1290   12ae
+12b0   
+12b2   12b5
+12b8   12be
+12c0   
+12c2   12c5
+12c8   12ce
+12d0   12d6
+12d8   12ee
+12f0   130e
+1310   
+1312   1315
+1318   131e
+1320   1346
+1348   135a
 2135   2138
 3041   3094
 30a1   30fa
index eef2d31..c2e6070 100644 (file)
@@ -199,6 +199,32 @@ return <<'END';
 1100   1159
 115f   11a2
 11a8   11f9
+1200   1206
+1208   1246
+1248   
+124a   124d
+1250   1256
+1258   
+125a   125d
+1260   1286
+1288   
+128a   128d
+1290   12ae
+12b0   
+12b2   12b5
+12b8   12be
+12c0   
+12c2   12c5
+12c8   12ce
+12d0   12d6
+12d8   12ee
+12f0   130e
+1310   
+1312   1315
+1318   131e
+1320   1346
+1348   135a
+1361   137c
 1e00   1e9b
 1ea0   1ef9
 1f00   1f15
diff --git a/lib/unicode/Is/Punct.pl b/lib/unicode/Is/Punct.pl
new file mode 100644 (file)
index 0000000..0d52205
--- /dev/null
@@ -0,0 +1,70 @@
+return <<'END';
+0021   0023
+0025   002a
+002c   002f
+003a   003b
+003f   0040
+005b   005d
+005f   
+007b   
+007d   
+00a1   
+00ab   
+00ad   
+00b7   
+00bb   
+00bf   
+0374   0375
+037e   
+0387   
+055a   055f
+0589   
+05be   
+05c0   
+05c3   
+05f3   05f4
+060c   
+061b   
+061f   
+066a   066d
+06d4   
+0964   0965
+0970   
+0e2f   
+0e5a   0e5b
+0eaf   
+0f04   0f12
+0f3a   0f3d
+0f85   
+10fb   
+1361   1368
+2010   2027
+2030   2043
+2045   2046
+207d   207e
+208d   208e
+2329   232a
+3001   3003
+3006   
+3008   3011
+3014   301f
+3030   
+30fb   
+fd3e   fd3f
+fe30   fe44
+fe49   fe52
+fe54   fe61
+fe63   
+fe68   
+fe6a   fe6b
+ff01   ff03
+ff05   ff0a
+ff0c   ff0f
+ff1a   ff1b
+ff1f   ff20
+ff3b   ff3d
+ff3f   
+ff5b   
+ff5d   
+ff61   ff65
+END
index 903f854..715afc3 100644 (file)
@@ -3,7 +3,6 @@ return <<'END';
 000c   000d
 0020   
 00a0   
-1361
 2000   200b
 2028   2029
 3000   
diff --git a/lib/unicode/Is/Word.pl b/lib/unicode/Is/Word.pl
new file mode 100644 (file)
index 0000000..6a30246
--- /dev/null
@@ -0,0 +1,250 @@
+return <<'END';
+0030   0039
+0041   005a
+005f   
+0061   007a
+00aa   
+00b5   
+00ba   
+00c0   00d6
+00d8   00f6
+00f8   01c4
+01c6   01c7
+01c9   01ca
+01cc   01f1
+01f3   01f5
+01fa   0217
+0250   02a8
+0386   
+0388   038a
+038c   
+038e   03a1
+03a3   03ce
+03d0   03d6
+03da   
+03dc   
+03de   
+03e0   
+03e2   03f3
+0401   040c
+040e   044f
+0451   045c
+045e   0481
+0490   04c4
+04c7   04c8
+04cb   04cc
+04d0   04eb
+04ee   04f5
+04f8   04f9
+0531   0556
+0561   0587
+05d0   05ea
+05f0   05f2
+0621   063a
+0641   064a
+0660   0669
+0671   06b7
+06ba   06be
+06c0   06ce
+06d0   06d3
+06d5   
+06f0   06f9
+0905   0939
+093d   
+0958   0961
+0966   096f
+0985   098c
+098f   0990
+0993   09a8
+09aa   09b0
+09b2   
+09b6   09b9
+09dc   09dd
+09df   09e1
+09e6   09f1
+0a05   0a0a
+0a0f   0a10
+0a13   0a28
+0a2a   0a30
+0a32   0a33
+0a35   0a36
+0a38   0a39
+0a59   0a5c
+0a5e   
+0a66   0a6f
+0a72   0a74
+0a85   0a8b
+0a8d   
+0a8f   0a91
+0a93   0aa8
+0aaa   0ab0
+0ab2   0ab3
+0ab5   0ab9
+0abd   
+0ae0   
+0ae6   0aef
+0b05   0b0c
+0b0f   0b10
+0b13   0b28
+0b2a   0b30
+0b32   0b33
+0b36   0b39
+0b3d   
+0b5c   0b5d
+0b5f   0b61
+0b66   0b6f
+0b85   0b8a
+0b8e   0b90
+0b92   0b95
+0b99   0b9a
+0b9c   
+0b9e   0b9f
+0ba3   0ba4
+0ba8   0baa
+0bae   0bb5
+0bb7   0bb9
+0be7   0bef
+0c05   0c0c
+0c0e   0c10
+0c12   0c28
+0c2a   0c33
+0c35   0c39
+0c60   0c61
+0c66   0c6f
+0c85   0c8c
+0c8e   0c90
+0c92   0ca8
+0caa   0cb3
+0cb5   0cb9
+0cde   
+0ce0   0ce1
+0ce6   0cef
+0d05   0d0c
+0d0e   0d10
+0d12   0d28
+0d2a   0d39
+0d60   0d61
+0d66   0d6f
+0e01   0e2e
+0e30   
+0e32   0e33
+0e40   0e45
+0e50   0e59
+0e81   0e82
+0e84   
+0e87   0e88
+0e8a   
+0e8d   
+0e94   0e97
+0e99   0e9f
+0ea1   0ea3
+0ea5   
+0ea7   
+0eaa   0eab
+0ead   0eae
+0eb0   
+0eb2   0eb3
+0ebd   
+0ec0   0ec4
+0ed0   0ed9
+0edc   0edd
+0f20   0f29
+0f40   0f47
+0f49   0f69
+10a0   10c5
+10d0   10f6
+1100   1159
+115f   11a2
+11a8   11f9
+1200   1206
+1208   1246
+1248   
+124a   124d
+1250   1256
+1258   
+125a   125d
+1260   1286
+1288   
+128a   128d
+1290   12ae
+12b0   
+12b2   12b5
+12b8   12be
+12c0   
+12c2   12c5
+12c8   12ce
+12d0   12d6
+12d8   12ee
+12f0   130e
+1310   
+1312   1315
+1318   131e
+1320   1346
+1348   135a
+1369   1371
+1e00   1e9b
+1ea0   1ef9
+1f00   1f15
+1f18   1f1d
+1f20   1f45
+1f48   1f4d
+1f50   1f57
+1f59   
+1f5b   
+1f5d   
+1f5f   1f7d
+1f80   1fb4
+1fb6   1fbc
+1fbe   
+1fc2   1fc4
+1fc6   1fcc
+1fd0   1fd3
+1fd6   1fdb
+1fe0   1fec
+1ff2   1ff4
+1ff6   1ffc
+207f   
+2102   
+2107   
+210a   2113
+2115   
+2118   211d
+2124   
+2126   
+2128   
+212a   2131
+2133   2138
+3041   3094
+30a1   30fa
+3105   312c
+3131   318e
+4e00   9fa5
+ac00   d7a3
+f900   fa2d
+fb00   fb06
+fb13   fb17
+fb1f   fb28
+fb2a   fb36
+fb38   fb3c
+fb3e   
+fb40   fb41
+fb43   fb44
+fb46   fbb1
+fbd3   fd3d
+fd50   fd8f
+fd92   fdc7
+fdf0   fdfb
+fe70   fe72
+fe74   
+fe76   fefc
+ff10   ff19
+ff21   ff3a
+ff41   ff5a
+ff66   ff6f
+ff71   ff9d
+ffa0   ffbe
+ffc2   ffc7
+ffca   ffcf
+ffd2   ffd7
+ffda   ffdc
+END
diff --git a/lib/unicode/Is/XDigit.pl b/lib/unicode/Is/XDigit.pl
new file mode 100644 (file)
index 0000000..f0b7044
--- /dev/null
@@ -0,0 +1,5 @@
+return <<'END';
+0030   0039
+0041   0046
+0061   0066
+END
index af595da..9e83d94 100644 (file)
@@ -1,7 +1,6 @@
 return <<'END';
 0020   
 00a0   
-1361
 2000   200b
 2028   2029
 3000   
index 403728c..87d4455 100644 (file)
@@ -1,7 +1,6 @@
 return <<'END';
 0020   
 00a0   
-1361
 2000   200b
 3000   
 END
index 0925bad..45099ac 100644 (file)
@@ -2740,352 +2740,351 @@ return <<'END';
 11f7           HANGUL JONGSEONG HIEUH-MIEUM
 11f8           HANGUL JONGSEONG HIEUH-PIEUP
 11f9           HANGUL JONGSEONG YEORINHIEUH
-1200           ETHIOPIC SYLLABLE HA
-1201           ETHIOPIC SYLLABLE HU
-1202           ETHIOPIC SYLLABLE HI
-1203           ETHIOPIC SYLLABLE HAA
-1204           ETHIOPIC SYLLABLE HEE
-1205           ETHIOPIC SYLLABLE HE
-1206           ETHIOPIC SYLLABLE HO
-1208           ETHIOPIC SYLLABLE LA
-1209           ETHIOPIC SYLLABLE LU
-120A           ETHIOPIC SYLLABLE LI
-120B           ETHIOPIC SYLLABLE LAA
-120C           ETHIOPIC SYLLABLE LEE
-120D           ETHIOPIC SYLLABLE LE
-120E           ETHIOPIC SYLLABLE LO
-120F           ETHIOPIC SYLLABLE LWA
-1210           ETHIOPIC SYLLABLE HHA
-1211           ETHIOPIC SYLLABLE HHU
-1212           ETHIOPIC SYLLABLE HHI
-1213           ETHIOPIC SYLLABLE HHAA
-1214           ETHIOPIC SYLLABLE HHEE
-1215           ETHIOPIC SYLLABLE HHE
-1217           ETHIOPIC SYLLABLE HHWA
-1218           ETHIOPIC SYLLABLE MA
-1219           ETHIOPIC SYLLABLE MU
-121A           ETHIOPIC SYLLABLE MI
-121B           ETHIOPIC SYLLABLE MAA
-121C           ETHIOPIC SYLLABLE MEE
-121D           ETHIOPIC SYLLABLE ME
-121E           ETHIOPIC SYLLABLE MO
-121F           ETHIOPIC SYLLABLE MWAA
-1220           ETHIOPIC SYLLABLE SZA
-1221           ETHIOPIC SYLLABLE SZU
-1222           ETHIOPIC SYLLABLE SZI
-1223           ETHIOPIC SYLLABLE SZAA
-1224           ETHIOPIC SYLLABLE SZEE
-1225           ETHIOPIC SYLLABLE SZE
-1226           ETHIOPIC SYLLABLE SZO
-1227           ETHIOPIC SYLLABLE SZWA
-1228           ETHIOPIC SYLLABLE RA
-1229           ETHIOPIC SYLLABLE RU
-122A           ETHIOPIC SYLLABLE RI
-122B           ETHIOPIC SYLLABLE RAA
-122C           ETHIOPIC SYLLABLE REE
-122D           ETHIOPIC SYLLABLE RE
-122E           ETHIOPIC SYLLABLE RO
-122F           ETHIOPIC SYLLABLE RWA
-1230           ETHIOPIC SYLLABLE SA
-1231           ETHIOPIC SYLLABLE SU
-1232           ETHIOPIC SYLLABLE SI
-1233           ETHIOPIC SYLLABLE SAA
-1234           ETHIOPIC SYLLABLE SEE
-1235           ETHIOPIC SYLLABLE SE
-1236           ETHIOPIC SYLLABLE SO
-1237           ETHIOPIC SYLLABLE SWA
-1238           ETHIOPIC SYLLABLE SHA
-1239           ETHIOPIC SYLLABLE SHU
-123A           ETHIOPIC SYLLABLE SHI
-123B           ETHIOPIC SYLLABLE SHAA
-123C           ETHIOPIC SYLLABLE SHEE
-123D           ETHIOPIC SYLLABLE SHE
-123E           ETHIOPIC SYLLABLE SHO
-123F           ETHIOPIC SYLLABLE SHWA
-1240           ETHIOPIC SYLLABLE QA 
-1241           ETHIOPIC SYLLABLE QU 
-1242           ETHIOPIC SYLLABLE QI 
-1243           ETHIOPIC SYLLABLE QAA
-1244           ETHIOPIC SYLLABLE QEE
-1245           ETHIOPIC SYLLABLE QE 
-1246           ETHIOPIC SYLLABLE QO 
-1248           ETHIOPIC SYLLABLE QWA
-124A           ETHIOPIC SYLLABLE QWI
-124B           ETHIOPIC SYLLABLE QWAA
-124C           ETHIOPIC SYLLABLE QWEE
-124D           ETHIOPIC SYLLABLE QWE
-1250           ETHIOPIC SYLLABLE QHA 
-1251           ETHIOPIC SYLLABLE QHU  
-1252           ETHIOPIC SYLLABLE QHI 
-1253           ETHIOPIC SYLLABLE QHAA
-1254           ETHIOPIC SYLLABLE QHEE
-1255           ETHIOPIC SYLLABLE QHE 
-1256           ETHIOPIC SYLLABLE QHO 
-1258           ETHIOPIC SYLLABLE QHWA 
-125A           ETHIOPIC SYLLABLE QHWI
-125B           ETHIOPIC SYLLABLE QHWAA
-125C           ETHIOPIC SYLLABLE QHWEE
-125D           ETHIOPIC SYLLABLE QHWE
-1260           ETHIOPIC SYLLABLE BA
-1261           ETHIOPIC SYLLABLE BU
-1262           ETHIOPIC SYLLABLE BI
-1263           ETHIOPIC SYLLABLE BAA
-1264           ETHIOPIC SYLLABLE BEE
-1265           ETHIOPIC SYLLABLE BE
-1266           ETHIOPIC SYLLABLE BO
-1267           ETHIOPIC SYLLABLE BWAA
-1268           ETHIOPIC SYLLABLE VA
-1269           ETHIOPIC SYLLABLE VU
-126A           ETHIOPIC SYLLABLE VI
-126B           ETHIOPIC SYLLABLE VAA
-126C           ETHIOPIC SYLLABLE VEE
-126D           ETHIOPIC SYLLABLE VE
-126E           ETHIOPIC SYLLABLE VO
-126F           ETHIOPIC SYLLABLE VWA
-1270           ETHIOPIC SYLLABLE TA
-1271           ETHIOPIC SYLLABLE TU
-1272           ETHIOPIC SYLLABLE TI
-1273           ETHIOPIC SYLLABLE TAA
-1274           ETHIOPIC SYLLABLE TEE
-1275           ETHIOPIC SYLLABLE TE
-1276           ETHIOPIC SYLLABLE TO
-1277           ETHIOPIC SYLLABLE TWA
-1278           ETHIOPIC SYLLABLE CA
-1279           ETHIOPIC SYLLABLE CU
-127A           ETHIOPIC SYLLABLE CI
-127B           ETHIOPIC SYLLABLE CAA
-127C           ETHIOPIC SYLLABLE CEE
-127D           ETHIOPIC SYLLABLE CE
-127E           ETHIOPIC SYLLABLE CO
-127F           ETHIOPIC SYLLABLE CWA
-1280           ETHIOPIC SYLLABLE XA
-1281           ETHIOPIC SYLLABLE XU
-1282           ETHIOPIC SYLLABLE XI
-1283           ETHIOPIC SYLLABLE XAA
-1284           ETHIOPIC SYLLABLE XEE
-1285           ETHIOPIC SYLLABLE XE
-1286           ETHIOPIC SYLLABLE XO
-1288           ETHIOPIC SYLLABLE XWA
-128A           ETHIOPIC SYLLABLE XWI
-128B           ETHIOPIC SYLLABLE XWAA
-128C           ETHIOPIC SYLLABLE XWEE
-128D           ETHIOPIC SYLLABLE XWE
-1290           ETHIOPIC SYLLABLE NA
-1291           ETHIOPIC SYLLABLE NU
-1292           ETHIOPIC SYLLABLE NI
-1293           ETHIOPIC SYLLABLE NAA
-1294           ETHIOPIC SYLLABLE NEE
-1295           ETHIOPIC SYLLABLE NE
-1296           ETHIOPIC SYLLABLE NO
-1297           ETHIOPIC SYLLABLE NWA
-1298           ETHIOPIC SYLLABLE NYA
-1299           ETHIOPIC SYLLABLE NYU
-129A           ETHIOPIC SYLLABLE NYI
-129B           ETHIOPIC SYLLABLE NYAA
-129C           ETHIOPIC SYLLABLE NYEE
-129D           ETHIOPIC SYLLABLE NYE
-129E           ETHIOPIC SYLLABLE NYO
-129F           ETHIOPIC SYLLABLE NYWA
-12A0           ETHIOPIC SYLLABLE GLOTTAL A
-12A1           ETHIOPIC SYLLABLE GLOTTAL U
-12A2           ETHIOPIC SYLLABLE GLOTTAL I
-12A3           ETHIOPIC SYLLABLE GLOTTAL AA
-12A4           ETHIOPIC SYLLABLE GLOTTAL EE
-12A5           ETHIOPIC SYLLABLE GLOTTAL E
-12A6           ETHIOPIC SYLLABLE GLOTTAL O
-12A7           ETHIOPIC SYLLABLE GLOTTAL WA
-12A8           ETHIOPIC SYLLABLE KA
-12A9           ETHIOPIC SYLLABLE KU
-12AA           ETHIOPIC SYLLABLE KI
-12AB           ETHIOPIC SYLLABLE KAA
-12AC           ETHIOPIC SYLLABLE KEE
-12AD           ETHIOPIC SYLLABLE KE
-12AE           ETHIOPIC SYLLABLE KO
-12B0           ETHIOPIC SYLLABLE KWA
-12B2           ETHIOPIC SYLLABLE KWI
-12B3           ETHIOPIC SYLLABLE KWAA
-12B4           ETHIOPIC SYLLABLE KWEE
-12B5           ETHIOPIC SYLLABLE KWE
-12B8           ETHIOPIC SYLLABLE KXA
-12B9           ETHIOPIC SYLLABLE KXU
-12BA           ETHIOPIC SYLLABLE KXI
-12BB           ETHIOPIC SYLLABLE KXAA
-12BC           ETHIOPIC SYLLABLE KXEE
-12BD           ETHIOPIC SYLLABLE KXE
-12BE           ETHIOPIC SYLLABLE KXO
-12C0           ETHIOPIC SYLLABLE KXWA
-12C2           ETHIOPIC SYLLABLE KXWI
-12C3           ETHIOPIC SYLLABLE KXWAA
-12C4           ETHIOPIC SYLLABLE KXWEE
-12C5           ETHIOPIC SYLLABLE KXWE
-12C8           ETHIOPIC SYLLABLE WA
-12C9           ETHIOPIC SYLLABLE WU
-12CA           ETHIOPIC SYLLABLE WI
-12CB           ETHIOPIC SYLLABLE WAA
-12CC           ETHIOPIC SYLLABLE WEE
-12CD           ETHIOPIC SYLLABLE WE
-12CE           ETHIOPIC SYLLABLE WO
-12D0           ETHIOPIC SYLLABLE PHARYNGEAL A
-12D1           ETHIOPIC SYLLABLE PHARYNGEAL U
-12D2           ETHIOPIC SYLLABLE PHARYNGEAL I
-12D3           ETHIOPIC SYLLABLE PHARYNGEAL AA
-12D4           ETHIOPIC SYLLABLE PHARYNGEAL EE
-12D5           ETHIOPIC SYLLABLE PHARYNGEAL E
-12D6           ETHIOPIC SYLLABLE PHARYNGEAL O
-12D8           ETHIOPIC SYLLABLE ZA
-12D9           ETHIOPIC SYLLABLE ZU
-12DA           ETHIOPIC SYLLABLE ZI
-12DB           ETHIOPIC SYLLABLE ZAA
-12DC           ETHIOPIC SYLLABLE ZEE
-12DD           ETHIOPIC SYLLABLE ZE
-12DE           ETHIOPIC SYLLABLE ZO
-12DF           ETHIOPIC SYLLABLE ZWA
-12E0           ETHIOPIC SYLLABLE ZHA
-12E1           ETHIOPIC SYLLABLE ZHU
-12E2           ETHIOPIC SYLLABLE ZHI
-12E3           ETHIOPIC SYLLABLE ZHAA
-12E4           ETHIOPIC SYLLABLE ZHEE
-12E5           ETHIOPIC SYLLABLE ZHE
-12E6           ETHIOPIC SYLLABLE ZHO
-12E7           ETHIOPIC SYLLABLE ZHWA
-12E8           ETHIOPIC SYLLABLE YA
-12E9           ETHIOPIC SYLLABLE YU
-12EA           ETHIOPIC SYLLABLE YI
-12EB           ETHIOPIC SYLLABLE YAA
-12EC           ETHIOPIC SYLLABLE YEE
-12ED           ETHIOPIC SYLLABLE YE
-12EE           ETHIOPIC SYLLABLE YO
-12EF           ETHIOPIC SYLLABLE YWA
-12F0           ETHIOPIC SYLLABLE DA
-12F1           ETHIOPIC SYLLABLE DU
-12F2           ETHIOPIC SYLLABLE DI
-12F3           ETHIOPIC SYLLABLE DAA
-12F4           ETHIOPIC SYLLABLE DEE
-12F5           ETHIOPIC SYLLABLE DE
-12F6           ETHIOPIC SYLLABLE DO
-12F7           ETHIOPIC SYLLABLE DWA
-12F8           ETHIOPIC SYLLABLE DDA
-12F9           ETHIOPIC SYLLABLE DDU
-12FA           ETHIOPIC SYLLABLE DDI
-12FB           ETHIOPIC SYLLABLE DDAA
-12FC           ETHIOPIC SYLLABLE DDEE
-12FD           ETHIOPIC SYLLABLE DDE
-12FE           ETHIOPIC SYLLABLE DDO
-12FF           ETHIOPIC SYLLABLE DDWA
-1300           ETHIOPIC SYLLABLE JA
-1301           ETHIOPIC SYLLABLE JU
-1302           ETHIOPIC SYLLABLE JI
-1303           ETHIOPIC SYLLABLE JAA
-1304           ETHIOPIC SYLLABLE JEE
-1305           ETHIOPIC SYLLABLE JE
-1306           ETHIOPIC SYLLABLE JO
-1307           ETHIOPIC SYLLABLE JWA
-1308           ETHIOPIC SYLLABLE GA 
-1309           ETHIOPIC SYLLABLE GU 
-130A           ETHIOPIC SYLLABLE GI 
-130B           ETHIOPIC SYLLABLE GAA
-130C           ETHIOPIC SYLLABLE GEE
-130D           ETHIOPIC SYLLABLE GE 
-130E           ETHIOPIC SYLLABLE GO 
-1310           ETHIOPIC SYLLABLE GWA
-1312           ETHIOPIC SYLLABLE GWI
-1313           ETHIOPIC SYLLABLE GWAA
-1314           ETHIOPIC SYLLABLE GWEE
-1315           ETHIOPIC SYLLABLE GWE
-1318           ETHIOPIC SYLLABLE GGA
-1319           ETHIOPIC SYLLABLE GGU
-131A           ETHIOPIC SYLLABLE GGI
-131B           ETHIOPIC SYLLABLE GGAA
-131C           ETHIOPIC SYLLABLE GGEE
-131D           ETHIOPIC SYLLABLE GGE
-131E           ETHIOPIC SYLLABLE GGO
-131F           ETHIOPIC SYLLABLE GGWAA
-1320           ETHIOPIC SYLLABLE THA
-1321           ETHIOPIC SYLLABLE THU
-1322           ETHIOPIC SYLLABLE THI
-1323           ETHIOPIC SYLLABLE THAA
-1324           ETHIOPIC SYLLABLE THEE
-1325           ETHIOPIC SYLLABLE THE
-1326           ETHIOPIC SYLLABLE THO
-1327           ETHIOPIC SYLLABLE THWA
-1328           ETHIOPIC SYLLABLE CHA
-1329           ETHIOPIC SYLLABLE CHU
-132A           ETHIOPIC SYLLABLE CHI
-132B           ETHIOPIC SYLLABLE CHAA
-132C           ETHIOPIC SYLLABLE CHEE
-132D           ETHIOPIC SYLLABLE CHE
-132E           ETHIOPIC SYLLABLE CHO
-132F           ETHIOPIC SYLLABLE CHWA
-1330           ETHIOPIC SYLLABLE PHA
-1331           ETHIOPIC SYLLABLE PHU
-1332           ETHIOPIC SYLLABLE PHI
-1333           ETHIOPIC SYLLABLE PHAA
-1334           ETHIOPIC SYLLABLE PHEE
-1335           ETHIOPIC SYLLABLE PHE
-1336           ETHIOPIC SYLLABLE PHO
-1337           ETHIOPIC SYLLABLE PHWA
-1338           ETHIOPIC SYLLABLE TSA
-1339           ETHIOPIC SYLLABLE TSU
-133A           ETHIOPIC SYLLABLE TSI
-133B           ETHIOPIC SYLLABLE TSAA
-133C           ETHIOPIC SYLLABLE TSEE
-133D           ETHIOPIC SYLLABLE TSE
-133E           ETHIOPIC SYLLABLE TSO
-133F           ETHIOPIC SYLLABLE TSWA
-1340           ETHIOPIC SYLLABLE TZA
-1341           ETHIOPIC SYLLABLE TZU
-1342           ETHIOPIC SYLLABLE TZI
-1343           ETHIOPIC SYLLABLE TZAA
-1344           ETHIOPIC SYLLABLE TZEE
-1345           ETHIOPIC SYLLABLE TZE
-1346           ETHIOPIC SYLLABLE TZO
-1348           ETHIOPIC SYLLABLE FA
-1349           ETHIOPIC SYLLABLE FU
-134A           ETHIOPIC SYLLABLE FI
-134B           ETHIOPIC SYLLABLE FAA
-134C           ETHIOPIC SYLLABLE FEE
-134D           ETHIOPIC SYLLABLE FE
-134E           ETHIOPIC SYLLABLE FO
-134F           ETHIOPIC SYLLABLE FWAA
-1350           ETHIOPIC SYLLABLE PA
-1351           ETHIOPIC SYLLABLE PU
-1352           ETHIOPIC SYLLABLE PI
-1353           ETHIOPIC SYLLABLE PAA
-1354           ETHIOPIC SYLLABLE PEE
-1355           ETHIOPIC SYLLABLE PE
-1356           ETHIOPIC SYLLABLE PO
-1357           ETHIOPIC SYLLABLE PWAA
-1358           ETHIOPIC SYLLABLE MYA
-1359           ETHIOPIC SYLLABLE RYA
-135A           ETHIOPIC SYLLABLE FYA
-1361           ETHIOPIC WORDSPACE
-1362           ETHIOPIC FULL STOP
-1363           ETHIOPIC COMMA
-1364           ETHIOPIC SEMICOLON
-1365           ETHIOPIC COLON
-1366           ETHIOPIC PREFACE COLON
-1367           ETHIOPIC QUESTION MARK
-1368           ETHIOPIC PARAGRAPH SEPARATOR
-1369           ETHIOPIC DIGIT ONE
-136A           ETHIOPIC DIGIT TWO
-136B           ETHIOPIC DIGIT THREE
-136C           ETHIOPIC DIGIT FOUR
-136D           ETHIOPIC DIGIT FIVE
-136E           ETHIOPIC DIGIT SIX
-136F           ETHIOPIC DIGIT SEVEN
-1370           ETHIOPIC DIGIT EIGHT
-1371           ETHIOPIC DIGIT NINE
-1372           ETHIOPIC NUMBER TEN
-1373           ETHIOPIC NUMBER TWENTY
-1374           ETHIOPIC NUMBER THIRTY
-1375           ETHIOPIC NUMBER FORTY
-1376           ETHIOPIC NUMBER FIFTY
-1377           ETHIOPIC NUMBER SIXTY
-1378           ETHIOPIC NUMBER SEVENTY
-1379           ETHIOPIC NUMBER EIGHTY
-137A           ETHIOPIC NUMBER NINETY
-137B           ETHIOPIC NUMBER HUNDRED
-137C           ETHIOPIC NUMBER TEN THOUSAND
+1200           ETHIOPIC SYLLABLE HA
+1201           ETHIOPIC SYLLABLE HU
+1202           ETHIOPIC SYLLABLE HI
+1203           ETHIOPIC SYLLABLE HAA
+1204           ETHIOPIC SYLLABLE HEE
+1205           ETHIOPIC SYLLABLE HE
+1206           ETHIOPIC SYLLABLE HO
+1208           ETHIOPIC SYLLABLE LA
+1209           ETHIOPIC SYLLABLE LU
+120a           ETHIOPIC SYLLABLE LI
+120b           ETHIOPIC SYLLABLE LAA
+120c           ETHIOPIC SYLLABLE LEE
+120d           ETHIOPIC SYLLABLE LE
+120e           ETHIOPIC SYLLABLE LO
+120f           ETHIOPIC SYLLABLE LWA
+1210           ETHIOPIC SYLLABLE HHA
+1211           ETHIOPIC SYLLABLE HHU
+1212           ETHIOPIC SYLLABLE HHI
+1213           ETHIOPIC SYLLABLE HHAA
+1214           ETHIOPIC SYLLABLE HHEE
+1215           ETHIOPIC SYLLABLE HHE
+1216           ETHIOPIC SYLLABLE HHO
+1217           ETHIOPIC SYLLABLE HHWA
+1218           ETHIOPIC SYLLABLE MA
+1219           ETHIOPIC SYLLABLE MU
+121a           ETHIOPIC SYLLABLE MI
+121b           ETHIOPIC SYLLABLE MAA
+121c           ETHIOPIC SYLLABLE MEE
+121d           ETHIOPIC SYLLABLE ME
+121e           ETHIOPIC SYLLABLE MO
+121f           ETHIOPIC SYLLABLE MWA
+1220           ETHIOPIC SYLLABLE SZA
+1221           ETHIOPIC SYLLABLE SZU
+1222           ETHIOPIC SYLLABLE SZI
+1223           ETHIOPIC SYLLABLE SZAA
+1224           ETHIOPIC SYLLABLE SZEE
+1225           ETHIOPIC SYLLABLE SZE
+1226           ETHIOPIC SYLLABLE SZO
+1227           ETHIOPIC SYLLABLE SZWA
+1228           ETHIOPIC SYLLABLE RA
+1229           ETHIOPIC SYLLABLE RU
+122a           ETHIOPIC SYLLABLE RI
+122b           ETHIOPIC SYLLABLE RAA
+122c           ETHIOPIC SYLLABLE REE
+122d           ETHIOPIC SYLLABLE RE
+122e           ETHIOPIC SYLLABLE RO
+122f           ETHIOPIC SYLLABLE RWA
+1230           ETHIOPIC SYLLABLE SA
+1231           ETHIOPIC SYLLABLE SU
+1232           ETHIOPIC SYLLABLE SI
+1233           ETHIOPIC SYLLABLE SAA
+1234           ETHIOPIC SYLLABLE SEE
+1235           ETHIOPIC SYLLABLE SE
+1236           ETHIOPIC SYLLABLE SO
+1237           ETHIOPIC SYLLABLE SWA
+1238           ETHIOPIC SYLLABLE SHA
+1239           ETHIOPIC SYLLABLE SHU
+123a           ETHIOPIC SYLLABLE SHI
+123b           ETHIOPIC SYLLABLE SHAA
+123c           ETHIOPIC SYLLABLE SHEE
+123d           ETHIOPIC SYLLABLE SHE
+123e           ETHIOPIC SYLLABLE SHO
+123f           ETHIOPIC SYLLABLE SHWA
+1240           ETHIOPIC SYLLABLE QA
+1241           ETHIOPIC SYLLABLE QU
+1242           ETHIOPIC SYLLABLE QI
+1243           ETHIOPIC SYLLABLE QAA
+1244           ETHIOPIC SYLLABLE QEE
+1245           ETHIOPIC SYLLABLE QE
+1246           ETHIOPIC SYLLABLE QO
+1248           ETHIOPIC SYLLABLE QWA
+124a           ETHIOPIC SYLLABLE QWI
+124b           ETHIOPIC SYLLABLE QWAA
+124c           ETHIOPIC SYLLABLE QWEE
+124d           ETHIOPIC SYLLABLE QWE
+1250           ETHIOPIC SYLLABLE QHA
+1251           ETHIOPIC SYLLABLE QHU
+1252           ETHIOPIC SYLLABLE QHI
+1253           ETHIOPIC SYLLABLE QHAA
+1254           ETHIOPIC SYLLABLE QHEE
+1255           ETHIOPIC SYLLABLE QHE
+1256           ETHIOPIC SYLLABLE QHO
+1258           ETHIOPIC SYLLABLE QHWA
+125a           ETHIOPIC SYLLABLE QHWI
+125b           ETHIOPIC SYLLABLE QHWAA
+125c           ETHIOPIC SYLLABLE QHWEE
+125d           ETHIOPIC SYLLABLE QHWE
+1260           ETHIOPIC SYLLABLE BA
+1261           ETHIOPIC SYLLABLE BU
+1262           ETHIOPIC SYLLABLE BI
+1263           ETHIOPIC SYLLABLE BAA
+1264           ETHIOPIC SYLLABLE BEE
+1265           ETHIOPIC SYLLABLE BE
+1266           ETHIOPIC SYLLABLE BO
+1267           ETHIOPIC SYLLABLE BWA
+1268           ETHIOPIC SYLLABLE VA
+1269           ETHIOPIC SYLLABLE VU
+126a           ETHIOPIC SYLLABLE VI
+126b           ETHIOPIC SYLLABLE VAA
+126c           ETHIOPIC SYLLABLE VEE
+126d           ETHIOPIC SYLLABLE VE
+126e           ETHIOPIC SYLLABLE VO
+126f           ETHIOPIC SYLLABLE VWA
+1270           ETHIOPIC SYLLABLE TA
+1271           ETHIOPIC SYLLABLE TU
+1272           ETHIOPIC SYLLABLE TI
+1273           ETHIOPIC SYLLABLE TAA
+1274           ETHIOPIC SYLLABLE TEE
+1275           ETHIOPIC SYLLABLE TE
+1276           ETHIOPIC SYLLABLE TO
+1277           ETHIOPIC SYLLABLE TWA
+1278           ETHIOPIC SYLLABLE CA
+1279           ETHIOPIC SYLLABLE CU
+127a           ETHIOPIC SYLLABLE CI
+127b           ETHIOPIC SYLLABLE CAA
+127c           ETHIOPIC SYLLABLE CEE
+127d           ETHIOPIC SYLLABLE CE
+127e           ETHIOPIC SYLLABLE CO
+127f           ETHIOPIC SYLLABLE CWA
+1280           ETHIOPIC SYLLABLE XA
+1281           ETHIOPIC SYLLABLE XU
+1282           ETHIOPIC SYLLABLE XI
+1283           ETHIOPIC SYLLABLE XAA
+1284           ETHIOPIC SYLLABLE XEE
+1285           ETHIOPIC SYLLABLE XE
+1286           ETHIOPIC SYLLABLE XO
+1288           ETHIOPIC SYLLABLE XWA
+128a           ETHIOPIC SYLLABLE XWI
+128b           ETHIOPIC SYLLABLE XWAA
+128c           ETHIOPIC SYLLABLE XWEE
+128d           ETHIOPIC SYLLABLE XWE
+1290           ETHIOPIC SYLLABLE NA
+1291           ETHIOPIC SYLLABLE NU
+1292           ETHIOPIC SYLLABLE NI
+1293           ETHIOPIC SYLLABLE NAA
+1294           ETHIOPIC SYLLABLE NEE
+1295           ETHIOPIC SYLLABLE NE
+1296           ETHIOPIC SYLLABLE NO
+1297           ETHIOPIC SYLLABLE NWA
+1298           ETHIOPIC SYLLABLE NYA
+1299           ETHIOPIC SYLLABLE NYU
+129a           ETHIOPIC SYLLABLE NYI
+129b           ETHIOPIC SYLLABLE NYAA
+129c           ETHIOPIC SYLLABLE NYEE
+129d           ETHIOPIC SYLLABLE NYE
+129e           ETHIOPIC SYLLABLE NYO
+129f           ETHIOPIC SYLLABLE NYWA
+12a0           ETHIOPIC SYLLABLE GLOTTAL A
+12a1           ETHIOPIC SYLLABLE GLOTTAL U
+12a2           ETHIOPIC SYLLABLE GLOTTAL I
+12a3           ETHIOPIC SYLLABLE GLOTTAL AA
+12a4           ETHIOPIC SYLLABLE GLOTTAL EE
+12a5           ETHIOPIC SYLLABLE GLOTTAL E
+12a6           ETHIOPIC SYLLABLE GLOTTAL O
+12a7           ETHIOPIC SYLLABLE GLOTTAL WA
+12a8           ETHIOPIC SYLLABLE KA
+12a9           ETHIOPIC SYLLABLE KU
+12aa           ETHIOPIC SYLLABLE KI
+12ab           ETHIOPIC SYLLABLE KAA
+12ac           ETHIOPIC SYLLABLE KEE
+12ad           ETHIOPIC SYLLABLE KE
+12ae           ETHIOPIC SYLLABLE KO
+12b0           ETHIOPIC SYLLABLE KWA
+12b2           ETHIOPIC SYLLABLE KWI
+12b3           ETHIOPIC SYLLABLE KWAA
+12b4           ETHIOPIC SYLLABLE KWEE
+12b5           ETHIOPIC SYLLABLE KWE
+12b8           ETHIOPIC SYLLABLE KXA
+12b9           ETHIOPIC SYLLABLE KXU
+12ba           ETHIOPIC SYLLABLE KXI
+12bb           ETHIOPIC SYLLABLE KXAA
+12bc           ETHIOPIC SYLLABLE KXEE
+12bd           ETHIOPIC SYLLABLE KXE
+12be           ETHIOPIC SYLLABLE KXO
+12c0           ETHIOPIC SYLLABLE KXWA
+12c2           ETHIOPIC SYLLABLE KXWI
+12c3           ETHIOPIC SYLLABLE KXWAA
+12c4           ETHIOPIC SYLLABLE KXWEE
+12c5           ETHIOPIC SYLLABLE KXWE
+12c8           ETHIOPIC SYLLABLE WA
+12c9           ETHIOPIC SYLLABLE WU
+12ca           ETHIOPIC SYLLABLE WI
+12cb           ETHIOPIC SYLLABLE WAA
+12cc           ETHIOPIC SYLLABLE WEE
+12cd           ETHIOPIC SYLLABLE WE
+12ce           ETHIOPIC SYLLABLE WO
+12d0           ETHIOPIC SYLLABLE PHARYNGEAL A
+12d1           ETHIOPIC SYLLABLE PHARYNGEAL U
+12d2           ETHIOPIC SYLLABLE PHARYNGEAL I
+12d3           ETHIOPIC SYLLABLE PHARYNGEAL AA
+12d4           ETHIOPIC SYLLABLE PHARYNGEAL EE
+12d5           ETHIOPIC SYLLABLE PHARYNGEAL E
+12d6           ETHIOPIC SYLLABLE PHARYNGEAL O
+12d8           ETHIOPIC SYLLABLE ZA
+12d9           ETHIOPIC SYLLABLE ZU
+12da           ETHIOPIC SYLLABLE ZI
+12db           ETHIOPIC SYLLABLE ZAA
+12dc           ETHIOPIC SYLLABLE ZEE
+12dd           ETHIOPIC SYLLABLE ZE
+12de           ETHIOPIC SYLLABLE ZO
+12df           ETHIOPIC SYLLABLE ZWA
+12e0           ETHIOPIC SYLLABLE ZHA
+12e1           ETHIOPIC SYLLABLE ZHU
+12e2           ETHIOPIC SYLLABLE ZHI
+12e3           ETHIOPIC SYLLABLE ZHAA
+12e4           ETHIOPIC SYLLABLE ZHEE
+12e5           ETHIOPIC SYLLABLE ZHE
+12e6           ETHIOPIC SYLLABLE ZHO
+12e7           ETHIOPIC SYLLABLE ZHWA
+12e8           ETHIOPIC SYLLABLE YA
+12e9           ETHIOPIC SYLLABLE YU
+12ea           ETHIOPIC SYLLABLE YI
+12eb           ETHIOPIC SYLLABLE YAA
+12ec           ETHIOPIC SYLLABLE YEE
+12ed           ETHIOPIC SYLLABLE YE
+12ee           ETHIOPIC SYLLABLE YO
+12f0           ETHIOPIC SYLLABLE DA
+12f1           ETHIOPIC SYLLABLE DU
+12f2           ETHIOPIC SYLLABLE DI
+12f3           ETHIOPIC SYLLABLE DAA
+12f4           ETHIOPIC SYLLABLE DEE
+12f5           ETHIOPIC SYLLABLE DE
+12f6           ETHIOPIC SYLLABLE DO
+12f7           ETHIOPIC SYLLABLE DWA
+12f8           ETHIOPIC SYLLABLE DDA
+12f9           ETHIOPIC SYLLABLE DDU
+12fa           ETHIOPIC SYLLABLE DDI
+12fb           ETHIOPIC SYLLABLE DDAA
+12fc           ETHIOPIC SYLLABLE DDEE
+12fd           ETHIOPIC SYLLABLE DDE
+12fe           ETHIOPIC SYLLABLE DDO
+12ff           ETHIOPIC SYLLABLE DDWA
+1300           ETHIOPIC SYLLABLE JA
+1301           ETHIOPIC SYLLABLE JU
+1302           ETHIOPIC SYLLABLE JI
+1303           ETHIOPIC SYLLABLE JAA
+1304           ETHIOPIC SYLLABLE JEE
+1305           ETHIOPIC SYLLABLE JE
+1306           ETHIOPIC SYLLABLE JO
+1307           ETHIOPIC SYLLABLE JWA
+1308           ETHIOPIC SYLLABLE GA
+1309           ETHIOPIC SYLLABLE GU
+130a           ETHIOPIC SYLLABLE GI
+130b           ETHIOPIC SYLLABLE GAA
+130c           ETHIOPIC SYLLABLE GEE
+130d           ETHIOPIC SYLLABLE GE
+130e           ETHIOPIC SYLLABLE GO
+1310           ETHIOPIC SYLLABLE GWA
+1312           ETHIOPIC SYLLABLE GWI
+1313           ETHIOPIC SYLLABLE GWAA
+1314           ETHIOPIC SYLLABLE GWEE
+1315           ETHIOPIC SYLLABLE GWE
+1318           ETHIOPIC SYLLABLE GGA
+1319           ETHIOPIC SYLLABLE GGU
+131a           ETHIOPIC SYLLABLE GGI
+131b           ETHIOPIC SYLLABLE GGAA
+131c           ETHIOPIC SYLLABLE GGEE
+131d           ETHIOPIC SYLLABLE GGE
+131e           ETHIOPIC SYLLABLE GGO
+1320           ETHIOPIC SYLLABLE THA
+1321           ETHIOPIC SYLLABLE THU
+1322           ETHIOPIC SYLLABLE THI
+1323           ETHIOPIC SYLLABLE THAA
+1324           ETHIOPIC SYLLABLE THEE
+1325           ETHIOPIC SYLLABLE THE
+1326           ETHIOPIC SYLLABLE THO
+1327           ETHIOPIC SYLLABLE THWA
+1328           ETHIOPIC SYLLABLE CHA
+1329           ETHIOPIC SYLLABLE CHU
+132a           ETHIOPIC SYLLABLE CHI
+132b           ETHIOPIC SYLLABLE CHAA
+132c           ETHIOPIC SYLLABLE CHEE
+132d           ETHIOPIC SYLLABLE CHE
+132e           ETHIOPIC SYLLABLE CHO
+132f           ETHIOPIC SYLLABLE CHWA
+1330           ETHIOPIC SYLLABLE PHA
+1331           ETHIOPIC SYLLABLE PHU
+1332           ETHIOPIC SYLLABLE PHI
+1333           ETHIOPIC SYLLABLE PHAA
+1334           ETHIOPIC SYLLABLE PHEE
+1335           ETHIOPIC SYLLABLE PHE
+1336           ETHIOPIC SYLLABLE PHO
+1337           ETHIOPIC SYLLABLE PHWA
+1338           ETHIOPIC SYLLABLE TSA
+1339           ETHIOPIC SYLLABLE TSU
+133a           ETHIOPIC SYLLABLE TSI
+133b           ETHIOPIC SYLLABLE TSAA
+133c           ETHIOPIC SYLLABLE TSEE
+133d           ETHIOPIC SYLLABLE TSE
+133e           ETHIOPIC SYLLABLE TSO
+133f           ETHIOPIC SYLLABLE TSWA
+1340           ETHIOPIC SYLLABLE TZA
+1341           ETHIOPIC SYLLABLE TZU
+1342           ETHIOPIC SYLLABLE TZI
+1343           ETHIOPIC SYLLABLE TZAA
+1344           ETHIOPIC SYLLABLE TZEE
+1345           ETHIOPIC SYLLABLE TZE
+1346           ETHIOPIC SYLLABLE TZO
+1348           ETHIOPIC SYLLABLE FA
+1349           ETHIOPIC SYLLABLE FU
+134a           ETHIOPIC SYLLABLE FI
+134b           ETHIOPIC SYLLABLE FAA
+134c           ETHIOPIC SYLLABLE FEE
+134d           ETHIOPIC SYLLABLE FE
+134e           ETHIOPIC SYLLABLE FO
+134f           ETHIOPIC SYLLABLE FWA
+1350           ETHIOPIC SYLLABLE PA
+1351           ETHIOPIC SYLLABLE PU
+1352           ETHIOPIC SYLLABLE PI
+1353           ETHIOPIC SYLLABLE PAA
+1354           ETHIOPIC SYLLABLE PEE
+1355           ETHIOPIC SYLLABLE PE
+1356           ETHIOPIC SYLLABLE PO
+1357           ETHIOPIC SYLLABLE PWA
+1358           ETHIOPIC SYLLABLE RYA
+1359           ETHIOPIC SYLLABLE MYA
+135a           ETHIOPIC SYLLABLE FYA
+1361           ETHIOPIC WORDSPACE
+1362           ETHIOPIC FULL STOP
+1363           ETHIOPIC COMMA
+1364           ETHIOPIC SEMICOLON
+1365           ETHIOPIC COLON
+1366           ETHIOPIC PREFACE COLON
+1367           ETHIOPIC QUESTION MARK
+1368           ETHIOPIC PARAGRAPH SEPARATOR
+1369           ETHIOPIC DIGIT ONE
+136a           ETHIOPIC DIGIT TWO
+136b           ETHIOPIC DIGIT THREE
+136c           ETHIOPIC DIGIT FOUR
+136d           ETHIOPIC DIGIT FIVE
+136e           ETHIOPIC DIGIT SIX
+136f           ETHIOPIC DIGIT SEVEN
+1370           ETHIOPIC DIGIT EIGHT
+1371           ETHIOPIC DIGIT NINE
+1372           ETHIOPIC NUMBER TEN
+1373           ETHIOPIC NUMBER TWENTY
+1374           ETHIOPIC NUMBER THIRTY
+1375           ETHIOPIC NUMBER FORTY
+1376           ETHIOPIC NUMBER FIFTY
+1377           ETHIOPIC NUMBER SIXTY
+1378           ETHIOPIC NUMBER SEVENTY
+1379           ETHIOPIC NUMBER EIGHTY
+137a           ETHIOPIC NUMBER NINETY
+137b           ETHIOPIC NUMBER HUNDRED
+137c           ETHIOPIC NUMBER TEN THOUSAND
 1e00           LATIN CAPITAL LETTER A WITH RING BELOW
 1e01           LATIN SMALL LETTER A WITH RING BELOW
 1e02           LATIN CAPITAL LETTER B WITH DOT ABOVE
index 8f60c4f..7ccd849 100644 (file)
@@ -16,6 +16,7 @@ return <<'END';
 0e50   0e59    0000
 0ed0   0ed9    0000
 0f20   0f29    0000
+1369   1371    0001
 2070           0000
 2074   2079    0004
 2080   2089    0000
index 306f2a4..82d8307 100755 (executable)
@@ -9,17 +9,23 @@ mkdir "To", 0777;
 @todo = (
 # typical
 
-    ['IsAlnum', '$cat =~ /^L[ulo]|^Nd/ or $code eq "005F"',    ''],
-    ['IsAlpha', '$cat =~ /^L[ulo]/',   ''],
-    ['IsSpace', '$cat =~ /^Z/ or $code lt "0020" and chr(hex $code) =~ /^\s/', ''],
-    ['IsDigit', '$cat =~ /^Nd$/',      ''],
-    ['IsUpper', '$cat =~ /^Lu$/',      ''],
-    ['IsLower', '$cat =~ /^Ll$/',      ''],
-    ['IsPrint', '$cat =~ /^[^C]/',     ''],
-    ['ToUpper', '$up',                 '$up'],
-    ['ToLower', '$down',               '$down'],
-    ['ToTitle', '$title',              '$title'],
-    ['ToDigit', '$dec ne ""',          '$dec'],
+    ['IsWord',  '$cat =~ /^L[ulo]|^Nd/ or $code eq "005F"',    ''],
+    ['IsAlnum', '$cat =~ /^L[ulo]|^Nd/',       ''],
+    ['IsAlpha',  '$cat =~ /^L[ulo]/',  ''],
+    ['IsSpace',  '$cat =~ /^Z/ or $code lt "0020" and chr(hex $code) =~ /^\s/',        ''],
+    ['IsDigit',  '$cat =~ /^Nd$/',     ''],
+    ['IsUpper',  '$cat =~ /^Lu$/',     ''],
+    ['IsLower',  '$cat =~ /^Ll$/',     ''],
+    ['IsASCII',  'hex $code <= 127',   ''],
+    ['IsCntrl',  '$cat =~ /^C/',       ''],
+    ['IsGraph',  '$cat =~ /^[^C]/ and $code ne "0020"',        ''],
+    ['IsPrint',  '$cat =~ /^[^C]/',    ''],
+    ['IsPunct',  '$cat =~ /^P/',       ''],
+    ['IsXDigit', '$code =~ /^00(3[0-9]|[46][1-6])$/',  ''],
+    ['ToUpper',  '$up',                        '$up'],
+    ['ToLower',  '$down',              '$down'],
+    ['ToTitle',  '$title',             '$title'],
+    ['ToDigit',  '$dec ne ""',         '$dec'],
 
 # Name
 
index d91f84d..6f201dc 100644 (file)
--- a/objXSUB.h
+++ b/objXSUB.h
 #define PL_unsafe              pPerl->PL_unsafe
 #undef  PL_utf8_alnum
 #define PL_utf8_alnum          pPerl->PL_utf8_alnum
+#undef  PL_utf8_alnumc
+#define PL_utf8_alnumc         pPerl->PL_utf8_alnumc
 #undef  PL_utf8_alpha
 #define PL_utf8_alpha          pPerl->PL_utf8_alpha
+#undef  PL_utf8_ascii
+#define PL_utf8_ascii          pPerl->PL_utf8_ascii
+#undef  PL_utf8_cntrl
+#define PL_utf8_cntrl          pPerl->PL_utf8_cntrl
 #undef  PL_utf8_digit
 #define PL_utf8_digit          pPerl->PL_utf8_digit
+#undef  PL_utf8_graph
+#define PL_utf8_graph          pPerl->PL_utf8_graph
 #undef  PL_utf8_lower
 #define PL_utf8_lower          pPerl->PL_utf8_lower
 #undef  PL_utf8_mark
 #define PL_utf8_mark           pPerl->PL_utf8_mark
 #undef  PL_utf8_print
 #define PL_utf8_print          pPerl->PL_utf8_print
+#undef  PL_utf8_punct
+#define PL_utf8_punct          pPerl->PL_utf8_punct
 #undef  PL_utf8_space
 #define PL_utf8_space          pPerl->PL_utf8_space
 #undef  PL_utf8_tolower
 #define PL_utf8_toupper                pPerl->PL_utf8_toupper
 #undef  PL_utf8_upper
 #define PL_utf8_upper          pPerl->PL_utf8_upper
+#undef  PL_utf8_xdigit
+#define PL_utf8_xdigit         pPerl->PL_utf8_xdigit
 #undef  PL_uudmap
 #define PL_uudmap              pPerl->PL_uudmap
 #undef  PL_warnhook
 #define Perl_is_uni_alnum      pPerl->Perl_is_uni_alnum
 #undef  is_uni_alnum
 #define is_uni_alnum           Perl_is_uni_alnum
+#undef  Perl_is_uni_alnumc
+#define Perl_is_uni_alnumc     pPerl->Perl_is_uni_alnumc
+#undef  is_uni_alnumc
+#define is_uni_alnumc          Perl_is_uni_alnumc
 #undef  Perl_is_uni_idfirst
 #define Perl_is_uni_idfirst    pPerl->Perl_is_uni_idfirst
 #undef  is_uni_idfirst
 #define Perl_is_uni_alpha      pPerl->Perl_is_uni_alpha
 #undef  is_uni_alpha
 #define is_uni_alpha           Perl_is_uni_alpha
+#undef  Perl_is_uni_ascii
+#define Perl_is_uni_ascii      pPerl->Perl_is_uni_ascii
+#undef  is_uni_ascii
+#define is_uni_ascii           Perl_is_uni_ascii
 #undef  Perl_is_uni_space
 #define Perl_is_uni_space      pPerl->Perl_is_uni_space
 #undef  is_uni_space
 #define is_uni_space           Perl_is_uni_space
+#undef  Perl_is_uni_cntrl
+#define Perl_is_uni_cntrl      pPerl->Perl_is_uni_cntrl
+#undef  is_uni_cntrl
+#define is_uni_cntrl           Perl_is_uni_cntrl
+#undef  Perl_is_uni_graph
+#define Perl_is_uni_graph      pPerl->Perl_is_uni_graph
+#undef  is_uni_graph
+#define is_uni_graph           Perl_is_uni_graph
 #undef  Perl_is_uni_digit
 #define Perl_is_uni_digit      pPerl->Perl_is_uni_digit
 #undef  is_uni_digit
 #define Perl_is_uni_print      pPerl->Perl_is_uni_print
 #undef  is_uni_print
 #define is_uni_print           Perl_is_uni_print
+#undef  Perl_is_uni_punct
+#define Perl_is_uni_punct      pPerl->Perl_is_uni_punct
+#undef  is_uni_punct
+#define is_uni_punct           Perl_is_uni_punct
+#undef  Perl_is_uni_xdigit
+#define Perl_is_uni_xdigit     pPerl->Perl_is_uni_xdigit
+#undef  is_uni_xdigit
+#define is_uni_xdigit          Perl_is_uni_xdigit
 #undef  Perl_to_uni_upper
 #define Perl_to_uni_upper      pPerl->Perl_to_uni_upper
 #undef  to_uni_upper
 #define Perl_is_uni_alnum_lc   pPerl->Perl_is_uni_alnum_lc
 #undef  is_uni_alnum_lc
 #define is_uni_alnum_lc                Perl_is_uni_alnum_lc
+#undef  Perl_is_uni_alnumc_lc
+#define Perl_is_uni_alnumc_lc  pPerl->Perl_is_uni_alnumc_lc
+#undef  is_uni_alnumc_lc
+#define is_uni_alnumc_lc       Perl_is_uni_alnumc_lc
 #undef  Perl_is_uni_idfirst_lc
 #define Perl_is_uni_idfirst_lc pPerl->Perl_is_uni_idfirst_lc
 #undef  is_uni_idfirst_lc
 #define Perl_is_uni_alpha_lc   pPerl->Perl_is_uni_alpha_lc
 #undef  is_uni_alpha_lc
 #define is_uni_alpha_lc                Perl_is_uni_alpha_lc
+#undef  Perl_is_uni_ascii_lc
+#define Perl_is_uni_ascii_lc   pPerl->Perl_is_uni_ascii_lc
+#undef  is_uni_ascii_lc
+#define is_uni_ascii_lc                Perl_is_uni_ascii_lc
 #undef  Perl_is_uni_space_lc
 #define Perl_is_uni_space_lc   pPerl->Perl_is_uni_space_lc
 #undef  is_uni_space_lc
 #define is_uni_space_lc                Perl_is_uni_space_lc
+#undef  Perl_is_uni_cntrl_lc
+#define Perl_is_uni_cntrl_lc   pPerl->Perl_is_uni_cntrl_lc
+#undef  is_uni_cntrl_lc
+#define is_uni_cntrl_lc                Perl_is_uni_cntrl_lc
+#undef  Perl_is_uni_graph_lc
+#define Perl_is_uni_graph_lc   pPerl->Perl_is_uni_graph_lc
+#undef  is_uni_graph_lc
+#define is_uni_graph_lc                Perl_is_uni_graph_lc
 #undef  Perl_is_uni_digit_lc
 #define Perl_is_uni_digit_lc   pPerl->Perl_is_uni_digit_lc
 #undef  is_uni_digit_lc
 #define Perl_is_uni_print_lc   pPerl->Perl_is_uni_print_lc
 #undef  is_uni_print_lc
 #define is_uni_print_lc                Perl_is_uni_print_lc
+#undef  Perl_is_uni_punct_lc
+#define Perl_is_uni_punct_lc   pPerl->Perl_is_uni_punct_lc
+#undef  is_uni_punct_lc
+#define is_uni_punct_lc                Perl_is_uni_punct_lc
+#undef  Perl_is_uni_xdigit_lc
+#define Perl_is_uni_xdigit_lc  pPerl->Perl_is_uni_xdigit_lc
+#undef  is_uni_xdigit_lc
+#define is_uni_xdigit_lc       Perl_is_uni_xdigit_lc
 #undef  Perl_to_uni_upper_lc
 #define Perl_to_uni_upper_lc   pPerl->Perl_to_uni_upper_lc
 #undef  to_uni_upper_lc
 #define Perl_is_utf8_alnum     pPerl->Perl_is_utf8_alnum
 #undef  is_utf8_alnum
 #define is_utf8_alnum          Perl_is_utf8_alnum
+#undef  Perl_is_utf8_alnumc
+#define Perl_is_utf8_alnumc    pPerl->Perl_is_utf8_alnumc
+#undef  is_utf8_alnumc
+#define is_utf8_alnumc         Perl_is_utf8_alnumc
 #undef  Perl_is_utf8_idfirst
 #define Perl_is_utf8_idfirst   pPerl->Perl_is_utf8_idfirst
 #undef  is_utf8_idfirst
 #define Perl_is_utf8_alpha     pPerl->Perl_is_utf8_alpha
 #undef  is_utf8_alpha
 #define is_utf8_alpha          Perl_is_utf8_alpha
+#undef  Perl_is_utf8_ascii
+#define Perl_is_utf8_ascii     pPerl->Perl_is_utf8_ascii
+#undef  is_utf8_ascii
+#define is_utf8_ascii          Perl_is_utf8_ascii
 #undef  Perl_is_utf8_space
 #define Perl_is_utf8_space     pPerl->Perl_is_utf8_space
 #undef  is_utf8_space
 #define is_utf8_space          Perl_is_utf8_space
+#undef  Perl_is_utf8_cntrl
+#define Perl_is_utf8_cntrl     pPerl->Perl_is_utf8_cntrl
+#undef  is_utf8_cntrl
+#define is_utf8_cntrl          Perl_is_utf8_cntrl
 #undef  Perl_is_utf8_digit
 #define Perl_is_utf8_digit     pPerl->Perl_is_utf8_digit
 #undef  is_utf8_digit
 #define is_utf8_digit          Perl_is_utf8_digit
+#undef  Perl_is_utf8_graph
+#define Perl_is_utf8_graph     pPerl->Perl_is_utf8_graph
+#undef  is_utf8_graph
+#define is_utf8_graph          Perl_is_utf8_graph
 #undef  Perl_is_utf8_upper
 #define Perl_is_utf8_upper     pPerl->Perl_is_utf8_upper
 #undef  is_utf8_upper
 #define Perl_is_utf8_print     pPerl->Perl_is_utf8_print
 #undef  is_utf8_print
 #define is_utf8_print          Perl_is_utf8_print
+#undef  Perl_is_utf8_punct
+#define Perl_is_utf8_punct     pPerl->Perl_is_utf8_punct
+#undef  is_utf8_punct
+#define is_utf8_punct          Perl_is_utf8_punct
+#undef  Perl_is_utf8_xdigit
+#define Perl_is_utf8_xdigit    pPerl->Perl_is_utf8_xdigit
+#undef  is_utf8_xdigit
+#define is_utf8_xdigit         Perl_is_utf8_xdigit
 #undef  Perl_is_utf8_mark
 #define Perl_is_utf8_mark      pPerl->Perl_is_utf8_mark
 #undef  is_utf8_mark
 #define Perl_ck_rvconst                pPerl->Perl_ck_rvconst
 #undef  ck_rvconst
 #define ck_rvconst             Perl_ck_rvconst
+#undef  Perl_ck_sassign
+#define Perl_ck_sassign                pPerl->Perl_ck_sassign
+#undef  ck_sassign
+#define ck_sassign             Perl_ck_sassign
 #undef  Perl_ck_scmp
 #define Perl_ck_scmp           pPerl->Perl_ck_scmp
 #undef  ck_scmp
index de727db..2278a54 100644 (file)
@@ -121,6 +121,13 @@ Unix and UNICOS also have 64-bit support.
 
 =head2 Better syntax checks on parenthesized unary operators
 
+TODO
+
+=head2 POSIX character class syntax [: :] supported
+
+For example to match alphabetic characters use /[[:alpha:]]/.
+See L<perlre> for details.
+
 Expressions such as:
 
     print defined(&foo,&bar,&baz);
index d7b9024..b352e9c 100644 (file)
@@ -1000,21 +1000,23 @@ there is no builtin with the name C<word>.
 opposed to a subroutine reference): no such method callable via the
 package. If method name is C<???>, this is an internal error.
 
-=item Character class syntax [. .] is reserved for future extensions
+=item Character class [:%s:] unknown
 
-(W) Within regular expression character classes ([]) the syntax beginning
-with "[." and ending with ".]" is reserved for future extensions.
-If you need to represent those character sequences inside a regular
-expression character class, just quote the square brackets with the
-backslash: "\[." and ".\]".
+(F) The class in the character class [: :] syntax is unknown.
 
-=item Character class syntax [: :] is reserved for future extensions
+=item Character class syntax [%s] belongs inside character classes
+
+(W) The character class constructs [: :], [= =], and [. .]  go
+I<inside> character classes, the [] are part of the construct.  For
+example: /[[:alpha:]]/
+
+=item Character class syntax [ .] is reserved for future extensions
 
 (W) Within regular expression character classes ([]) the syntax beginning
-with "[:" and ending with ":]" is reserved for future extensions.
+with "[." and ending with ".]" is reserved for future extensions.
 If you need to represent those character sequences inside a regular
 expression character class, just quote the square brackets with the
-backslash: "\[:" and ":\]".
+backslash: "\[." and ".\]".
 
 =item Character class syntax [= =] is reserved for future extensions
 
index ca95638..470c593 100644 (file)
@@ -186,6 +186,100 @@ current locale.  See L<perllocale>.  You may use C<\w>, C<\W>, C<\s>, C<\S>,
 C<\d>, and C<\D> within character classes (though not as either end of
 a range).  See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>.
 
+The POSIX character class syntax
+
+       [:class:]
+
+is also available.  The available classes and their \-equivalents
+(if any) are as follows:
+
+    alpha
+    alnum
+    ascii
+    cntrl
+    digit       \d
+    graph
+    lower
+    print
+    punct
+    space       \s
+    upper
+    word        \w
+    xdigit
+
+Note that the [] are part of the [::] construct, not part of the whole
+character class.  For example:
+
+       [01[:alpha:]%]
+
+matches one, zero, any alphabetic character, and the percentage sign.
+
+The exact meanings of the above classes depend from many things:
+if the C<utf8> pragma is used, the following equivalenced to Unicode
+\p{} constructs hold:
+
+    alpha       IsAlpha
+    alnum       IsAlnum
+    ascii       IsASCII
+    cntrl       IsCntrl
+    digit       IsDigit
+    graph       IsGraph
+    lower       IsLower
+    print       IsPrint
+    punct       IsPunct
+    space       IsSpace
+    upper       IsUpper
+    word        IsWord
+    xdigit      IsXDigit
+
+For example, [:lower:] and \p{IsLower} are equivalent.
+
+If the C<utf8> pragma is not used but the C<locale> pragma is, the
+classes correlate with the isalpha(3) interface (except for `word',
+which is a Perl extension).
+
+The assumedly non-obviously named classes are:
+
+=over 4
+
+=item cntrl
+
+        Any control character.  Usually characters that don't produce
+        output as such but instead control the terminal somehow:
+        for example newline and backspace are control characters.
+
+=item graph
+
+        Any alphanumeric or punctuation character.
+
+=item print
+
+        Any alphanumeric or punctuation character or space.
+
+=item punct
+
+        Any punctuation character.
+
+=item xdigit
+
+        Any hexadecimal digit.  Though this may feel silly
+        (/0-9a-f/i would work just fine) it is included
+        for completeness.
+
+=item 
+
+=back
+
+You can negate the [::] character classes by prefixing the class name
+with a '^'. This is a Perl extension.  For example:
+
+    ^digit      \D      \P{IsDigit}
+    ^space     \S      \P{IsSpace}
+    ^word      \W      \P{IsWord}
+
+The POSIX character classes [.cc.] and [=cc=] are B<not> supported
+and trying to use them will cause an error.
+
 Perl defines the following zero-width assertions:
 
     \b Match a word boundary
diff --git a/proto.h b/proto.h
index 7fa6424..402876a 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -196,35 +196,53 @@ VIRTUAL char*     Perl_instr(pTHX_ const char* big, const char* little);
 VIRTUAL bool   Perl_io_close(pTHX_ IO* io);
 VIRTUAL OP*    Perl_invert(pTHX_ OP* cmd);
 VIRTUAL bool   Perl_is_uni_alnum(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_alnumc(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_idfirst(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_alpha(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_ascii(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_space(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_cntrl(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_graph(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_digit(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_upper(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_lower(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_print(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_punct(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_xdigit(pTHX_ U32 c);
 VIRTUAL U32    Perl_to_uni_upper(pTHX_ U32 c);
 VIRTUAL U32    Perl_to_uni_title(pTHX_ U32 c);
 VIRTUAL U32    Perl_to_uni_lower(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_alnum_lc(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_alnumc_lc(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_idfirst_lc(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_alpha_lc(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_ascii_lc(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_space_lc(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_cntrl_lc(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_graph_lc(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_digit_lc(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_upper_lc(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_lower_lc(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_uni_print_lc(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_punct_lc(pTHX_ U32 c);
+VIRTUAL bool   Perl_is_uni_xdigit_lc(pTHX_ U32 c);
 VIRTUAL U32    Perl_to_uni_upper_lc(pTHX_ U32 c);
 VIRTUAL U32    Perl_to_uni_title_lc(pTHX_ U32 c);
 VIRTUAL U32    Perl_to_uni_lower_lc(pTHX_ U32 c);
 VIRTUAL bool   Perl_is_utf8_alnum(pTHX_ U8 *p);
+VIRTUAL bool   Perl_is_utf8_alnumc(pTHX_ U8 *p);
 VIRTUAL bool   Perl_is_utf8_idfirst(pTHX_ U8 *p);
 VIRTUAL bool   Perl_is_utf8_alpha(pTHX_ U8 *p);
+VIRTUAL bool   Perl_is_utf8_ascii(pTHX_ U8 *p);
 VIRTUAL bool   Perl_is_utf8_space(pTHX_ U8 *p);
+VIRTUAL bool   Perl_is_utf8_cntrl(pTHX_ U8 *p);
 VIRTUAL bool   Perl_is_utf8_digit(pTHX_ U8 *p);
+VIRTUAL bool   Perl_is_utf8_graph(pTHX_ U8 *p);
 VIRTUAL bool   Perl_is_utf8_upper(pTHX_ U8 *p);
 VIRTUAL bool   Perl_is_utf8_lower(pTHX_ U8 *p);
 VIRTUAL bool   Perl_is_utf8_print(pTHX_ U8 *p);
+VIRTUAL bool   Perl_is_utf8_punct(pTHX_ U8 *p);
+VIRTUAL bool   Perl_is_utf8_xdigit(pTHX_ U8 *p);
 VIRTUAL bool   Perl_is_utf8_mark(pTHX_ U8 *p);
 VIRTUAL OP*    Perl_jmaybe(pTHX_ OP* arg);
 VIRTUAL I32    Perl_keyword(pTHX_ char* d, I32 len);
@@ -854,7 +872,8 @@ STATIC void S_scan_commit(pTHX_ scan_data_t *data);
 STATIC I32     S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 flags);
 STATIC I32     S_add_data(pTHX_ I32 n, char *s);
 STATIC void    S_re_croak2(pTHX_ const char* pat1, const char* pat2, ...) __attribute__((noreturn));
-STATIC char*   S_regpposixcc(pTHX_ I32 value);
+STATIC I32     S_regpposixcc(pTHX_ I32 value);
+STATIC void    S_checkposixcc(pTHX);
 STATIC void    S_clear_re(pTHX_ void *r);
 #endif
 #if defined(PERL_IN_REGEXEC_C) || defined(PERL_DECL_PROT)
index 59fe5a7..3569b3b 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -163,6 +163,9 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 #define LOC (PL_regflags & PMf_LOCALE)
 #define FOLD (PL_regflags & PMf_FOLD)
 
+#define OOB_CHAR8              1234
+#define OOB_UTF8               123456
+
 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
 
@@ -2093,12 +2096,17 @@ S_regwhite(pTHX_ char *p, char *e)
     return p;
 }
 
-/* parse POSIX character classes like [[:foo:]] */
-STATIC char*
+/* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
+   Character classes ([:foo:]) can also be negated ([:^foo:]).
+   Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.
+   Equivalence classes ([=foo=]) and composites ([.foo.]) are parsed,
+   but trigger warnings because they are currently unimplemented. */
+STATIC I32
 S_regpposixcc(pTHX_ I32 value)
 {
     dTHR;
     char *posixcc = 0;
+    I32 namedclass = -1;
 
     if (value == '[' && PL_regcomp_parse + 1 < PL_regxend &&
        /* I smell either [: or [= or [. -- POSIX has been here, right? */
@@ -2114,26 +2122,120 @@ S_regpposixcc(pTHX_ I32 value)
            /* Grandfather lone [:, [=, [. */
            PL_regcomp_parse = s;
        else {
-           PL_regcomp_parse++; /* skip over the c */
-           if (*PL_regcomp_parse == ']') {
-               /* Not Implemented Yet.
-                * (POSIX Extended Character Classes, that is)
-                * The text between e.g. [: and :] would start
-                * at s + 1 and stop at regcomp_parse - 2. */
-               if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY)
+           char* t = PL_regcomp_parse++; /* skip over the c */
+
+           if (*PL_regcomp_parse == ']') {
+               PL_regcomp_parse++; /* skip over the ending ] */
+               posixcc = s + 1;
+               if (*s == ':') {
+                   I32 complement = *posixcc == '^' ? *posixcc++ : 0;
+                   I32 skip = 5; /* the most common skip */
+
+                   switch (*posixcc) {
+                   case 'a':
+                       if (strnEQ(posixcc, "alnum", 5))
+                           namedclass =
+                               complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
+                       else if (strnEQ(posixcc, "alpha", 5))
+                           namedclass =
+                               complement ? ANYOF_NALPHA : ANYOF_ALPHA;
+                       else if (strnEQ(posixcc, "ascii", 5))
+                           namedclass =
+                               complement ? ANYOF_NASCII : ANYOF_ASCII;
+                       break;
+                   case 'c':
+                       if (strnEQ(posixcc, "cntrl", 5))
+                           namedclass =
+                               complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
+                       break;
+                   case 'd':
+                       if (strnEQ(posixcc, "digit", 5))
+                           namedclass =
+                               complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
+                       break;
+                   case 'g':
+                       if (strnEQ(posixcc, "graph", 5))
+                           namedclass =
+                               complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
+                       break;
+                   case 'l':
+                       if (strnEQ(posixcc, "lower", 5))
+                           namedclass =
+                               complement ? ANYOF_NLOWER : ANYOF_LOWER;
+                       break;
+                   case 'p':
+                       if (strnEQ(posixcc, "print", 5))
+                           namedclass =
+                               complement ? ANYOF_NPRINT : ANYOF_PRINT;
+                       else if (strnEQ(posixcc, "punct", 5))
+                           namedclass =
+                               complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
+                       break;
+                   case 's':
+                       if (strnEQ(posixcc, "space", 5))
+                           namedclass =
+                               complement ? ANYOF_NSPACE : ANYOF_SPACE;
+                   case 'u':
+                       if (strnEQ(posixcc, "upper", 5))
+                           namedclass =
+                               complement ? ANYOF_NUPPER : ANYOF_UPPER;
+                       break;
+                   case 'w': /* this is not POSIX, this is the Perl \w */
+                       if (strnEQ(posixcc, "word", 4)) {
+                           namedclass =
+                               complement ? ANYOF_NALNUM : ANYOF_ALNUM;
+                           skip = 4;
+                       }
+                       break;
+                   case 'x':
+                       if (strnEQ(posixcc, "xdigit", 6)) {
+                           namedclass =
+                               complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
+                           skip = 6;
+                       }
+                       break;
+                   }
+                   if ((namedclass == -1 ||
+                        !(posixcc + skip + 2 < PL_regxend &&
+                          (posixcc[skip] == ':' &&
+                           posixcc[skip + 1] == ']'))))
+                       Perl_croak(aTHX_ "Character class [:%.*s:] unknown",
+                                  t - s - 1, s + 1); 
+               } else if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY)
+                   /* [[=foo=]] and [[.foo.]] are still future. */
                    Perl_warner(aTHX_ WARN_UNSAFE,
-                          "Character class syntax [%c %c] is reserved for future extensions", c, c);
-               PL_regcomp_parse++; /* skip over the ending ] */
-               posixcc = s + 1;
-           }
-           else {
-               /* maternal grandfather */
+                               "Character class syntax [%c %c] is reserved for future extensions", c, c);
+           } else {
+               /* Maternal grandfather:
+                * "[:" ending in ":" but not in ":]" */
                PL_regcomp_parse = s;
            }
        }
     }
 
-    return posixcc;
+    return namedclass;
+}
+
+STATIC void
+S_checkposixcc(pTHX)
+{
+    if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY &&
+       (*PL_regcomp_parse == ':' ||
+        *PL_regcomp_parse == '=' ||
+        *PL_regcomp_parse == '.')) {
+       char *s = PL_regcomp_parse;
+       char  c = *s++;
+
+       while(*s && isALNUM(*s))
+           s++;
+       if (*s && c == *s && s[1] == ']') {
+           Perl_warner(aTHX_ WARN_UNSAFE,
+                       "Character class syntax [%c %c] belongs inside character classes", c, c);
+           if (c == '=' || c == '.')
+               Perl_warner(aTHX_ WARN_UNSAFE,
+                           "Character class syntax [%c %c] is reserved for future extensions", c, c);
+       }
+    }
 }
 
 STATIC regnode *
@@ -2142,142 +2244,319 @@ S_regclass(pTHX)
     dTHR;
     register char *opnd, *s;
     register I32 value;
-    register I32 lastvalue = 1234;
+    register I32 lastvalue = OOB_CHAR8;
     register I32 range = 0;
     register regnode *ret;
     register I32 def;
     I32 numlen;
+    I32 namedclass;
 
     s = opnd = (char *) OPERAND(PL_regcode);
     ret = reg_node(ANYOF);
-    for (value = 0; value < 33; value++)
+    for (value = 0; value < ANYOF_SIZE; value++)
        regc(0, s++);
     if (*PL_regcomp_parse == '^') {    /* Complement of range. */
        PL_regnaughty++;
        PL_regcomp_parse++;
        if (!SIZE_ONLY)
-           *opnd |= ANYOF_INVERT;
+           ANYOF_FLAGS(opnd) |= ANYOF_INVERT;
     }
     if (!SIZE_ONLY) {
        PL_regcode += ANY_SKIP;
        if (FOLD)
-           *opnd |= ANYOF_FOLD;
+           ANYOF_FLAGS(opnd) |= ANYOF_FOLD;
        if (LOC)
-           *opnd |= ANYOF_LOCALE;
+           ANYOF_FLAGS(opnd) |= ANYOF_LOCALE;
     }
     else {
        PL_regsize += ANY_SKIP;
     }
+
+    checkposixcc();
+
     if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-')
        goto skipcond;          /* allow 1st char to be ] or - */
     while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
        skipcond:
+       namedclass = -1;
        value = UCHARAT(PL_regcomp_parse++);
        if (value == '[')
-           (void)regpposixcc(value); /* ignore the return value for now */
+           namedclass = regpposixcc(value);
        else if (value == '\\') {
            value = UCHARAT(PL_regcomp_parse++);
            switch (value) {
-           case 'w':
-               if (!SIZE_ONLY) {
-                   if (LOC)
-                       *opnd |= ANYOF_ALNUML;
-                   else {
-                       for (value = 0; value < 256; value++)
-                           if (isALNUM(value))
-                               ANYOF_SET(opnd, value);
-                   }
+           case 'w':   namedclass = ANYOF_ALNUM;       break;
+           case 'W':   namedclass = ANYOF_NALNUM;      break;
+           case 's':   namedclass = ANYOF_SPACE;       break;
+           case 'S':   namedclass = ANYOF_NSPACE;      break;
+           case 'd':   namedclass = ANYOF_DIGIT;       break;
+           case 'D':   namedclass = ANYOF_NDIGIT;      break;
+           case 'n':   value = '\n';                   break;
+           case 'r':   value = '\r';                   break;
+           case 't':   value = '\t';                   break;
+           case 'f':   value = '\f';                   break;
+           case 'b':   value = '\b';                   break;
+           case 'e':   value = '\033';                 break;
+           case 'a':   value = '\007';                 break;
+           case 'x':
+               value = scan_hex(PL_regcomp_parse, 2, &numlen);
+               PL_regcomp_parse += numlen;
+               break;
+           case 'c':
+               value = UCHARAT(PL_regcomp_parse++);
+               value = toCTRL(value);
+               break;
+           case '0': case '1': case '2': case '3': case '4':
+           case '5': case '6': case '7': case '8': case '9':
+               value = scan_oct(--PL_regcomp_parse, 3, &numlen);
+               PL_regcomp_parse += numlen;
+               break;
+           }
+       }
+       if (!SIZE_ONLY && namedclass > -1) {
+           switch (namedclass) {
+           case ANYOF_ALNUM:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isALNUM(value))
+                           ANYOF_BITMAP_SET(opnd, value);
                }
-               lastvalue = 1234;
-               continue;
-           case 'W':
-               if (!SIZE_ONLY) {
-                   if (LOC)
-                       *opnd |= ANYOF_NALNUML;
-                   else {
-                       for (value = 0; value < 256; value++)
-                           if (!isALNUM(value))
-                               ANYOF_SET(opnd, value);
-                   }
+               break;
+           case ANYOF_NALNUM:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isALNUM(value))
+                           ANYOF_BITMAP_SET(opnd, value);
                }
-               lastvalue = 1234;
-               continue;
-           case 's':
-               if (!SIZE_ONLY) {
-                   if (LOC)
-                       *opnd |= ANYOF_SPACEL;
-                   else {
-                       for (value = 0; value < 256; value++)
-                           if (isSPACE(value))
-                               ANYOF_SET(opnd, value);
-                   }
+               break;
+           case ANYOF_SPACE:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isSPACE(value))
+                           ANYOF_BITMAP_SET(opnd, value);
                }
-               lastvalue = 1234;
-               continue;
-           case 'S':
-               if (!SIZE_ONLY) {
-                   if (LOC)
-                       *opnd |= ANYOF_NSPACEL;
-                   else {
-                       for (value = 0; value < 256; value++)
-                           if (!isSPACE(value))
-                               ANYOF_SET(opnd, value);
-                   }
+               break;
+           case ANYOF_NSPACE:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isSPACE(value))
+                           ANYOF_BITMAP_SET(opnd, value);
                }
-               lastvalue = 1234;
-               continue;
-           case 'd':
-               if (!SIZE_ONLY) {
+               break;
+           case ANYOF_DIGIT:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
+               else {
                    for (value = '0'; value <= '9'; value++)
-                       ANYOF_SET(opnd, value);
+                       ANYOF_BITMAP_SET(opnd, value);
                }
-               lastvalue = 1234;
-               continue;
-           case 'D':
-               if (!SIZE_ONLY) {
+               break;
+           case ANYOF_NDIGIT:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
+               else {
                    for (value = 0; value < '0'; value++)
-                       ANYOF_SET(opnd, value);
+                       ANYOF_BITMAP_SET(opnd, value);
                    for (value = '9' + 1; value < 256; value++)
-                       ANYOF_SET(opnd, value);
+                       ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_NALNUMC:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isALNUMC(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_ALNUMC:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isALNUMC(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_ALPHA:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isALPHA(value))
+                           ANYOF_BITMAP_SET(opnd, value);
                }
-               lastvalue = 1234;
-               continue;
-           case 'n':
-               value = '\n';
                break;
-           case 'r':
-               value = '\r';
+           case ANYOF_NALPHA:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isALPHA(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
                break;
-           case 't':
-               value = '\t';
+           case ANYOF_ASCII:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
+               else {
+                   for (value = 0; value < 128; value++)
+                       ANYOF_BITMAP_SET(opnd, value);
+               }
                break;
-           case 'f':
-               value = '\f';
+           case ANYOF_NASCII:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
+               else {
+                   for (value = 128; value < 256; value++)
+                       ANYOF_BITMAP_SET(opnd, value);
+               }
                break;
-           case 'b':
-               value = '\b';
+           case ANYOF_CNTRL:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isCNTRL(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               lastvalue = OOB_CHAR8;
                break;
-           case 'e':
-               value = '\033';
+           case ANYOF_NCNTRL:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isCNTRL(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
                break;
-           case 'a':
-               value = '\007';
+           case ANYOF_GRAPH:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isGRAPH(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
                break;
-           case 'x':
-               value = scan_hex(PL_regcomp_parse, 2, &numlen);
-               PL_regcomp_parse += numlen;
+           case ANYOF_NGRAPH:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isGRAPH(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
                break;
-           case 'c':
-               value = UCHARAT(PL_regcomp_parse++);
-               value = toCTRL(value);
+           case ANYOF_LOWER:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isLOWER(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
                break;
-           case '0': case '1': case '2': case '3': case '4':
-           case '5': case '6': case '7': case '8': case '9':
-               value = scan_oct(--PL_regcomp_parse, 3, &numlen);
-               PL_regcomp_parse += numlen;
+           case ANYOF_NLOWER:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isLOWER(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_PRINT:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isPRINT(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_NPRINT:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isPRINT(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_PUNCT:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isPUNCT(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_NPUNCT:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isPUNCT(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_UPPER:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isUPPER(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_NUPPER:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isUPPER(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_XDIGIT:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (isXDIGIT(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           case ANYOF_NXDIGIT:
+               if (LOC)
+                   ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
+               else {
+                   for (value = 0; value < 256; value++)
+                       if (!isXDIGIT(value))
+                           ANYOF_BITMAP_SET(opnd, value);
+               }
+               break;
+           default:
+               FAIL("invalid [::] class in regexp");
                break;
            }
+           if (LOC)
+               ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
+           lastvalue = OOB_CHAR8;
        }
+        else
        if (range) {
            if (lastvalue > value)
                FAIL("invalid [] range in regexp");
@@ -2301,35 +2580,36 @@ S_regclass(pTHX)
                if (isLOWER(lastvalue)) {
                    for (i = lastvalue; i <= value; i++)
                        if (isLOWER(i))
-                           ANYOF_SET(opnd, i);
+                           ANYOF_BITMAP_SET(opnd, i);
                } else {
                    for (i = lastvalue; i <= value; i++)
                        if (isUPPER(i))
-                           ANYOF_SET(opnd, i);
+                           ANYOF_BITMAP_SET(opnd, i);
                }
            }
            else
 #endif
                for ( ; lastvalue <= value; lastvalue++)
-                   ANYOF_SET(opnd, lastvalue);
+                   ANYOF_BITMAP_SET(opnd, lastvalue);
         }
        lastvalue = value;
     }
     /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
-    if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) {
+    if (!SIZE_ONLY &&
+       (ANYOF_FLAGS(opnd) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) {
        for (value = 0; value < 256; ++value) {
-           if (ANYOF_TEST(opnd, value)) {
+           if (ANYOF_BITMAP_TEST(opnd, value)) {
                I32 cf = PL_fold[value];
-               ANYOF_SET(opnd, cf);
+               ANYOF_BITMAP_SET(opnd, cf);
            }
        }
-       *opnd &= ~ANYOF_FOLD;
+       ANYOF_FLAGS(opnd) &= ~ANYOF_FOLD;
     }
     /* optimize inverted simple patterns (e.g. [^a-z]) */
-    if (!SIZE_ONLY && (*opnd & 0xFF) == ANYOF_INVERT) {
-       for (value = 0; value < 32; ++value)
-           opnd[1 + value] ^= 0xFF;
-       *opnd = 0;
+    if (!SIZE_ONLY && (ANYOF_FLAGS(opnd) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
+       for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
+           opnd[ANYOF_BITMAP_OFFSET + value] ^= ANYOF_FLAGS_ALL;
+       ANYOF_FLAGS(opnd) = 0;
     }
     return ret;
 }
@@ -2337,16 +2617,17 @@ S_regclass(pTHX)
 STATIC regnode *
 S_regclassutf8(pTHX)
 {
+    dTHR;
     register char *opnd, *e;
     register U32 value;
-    register U32 lastvalue = 123456;
+    register U32 lastvalue = OOB_UTF8;
     register I32 range = 0;
     register regnode *ret;
     I32 numlen;
     I32 n;
     SV *listsv;
     U8 flags = 0;
-    dTHR;
+    I32 namedclass;
 
     if (*PL_regcomp_parse == '^') {    /* Complement of range. */
        PL_regnaughty++;
@@ -2362,75 +2643,29 @@ S_regclassutf8(pTHX)
        listsv = newSVpvn("# comment\n",10);
     }
 
+    checkposixcc();
+
     if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-')
        goto skipcond;          /* allow 1st char to be ] or - */
 
     while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
        skipcond:
+       namedclass = -1;
        value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
        PL_regcomp_parse += numlen;
 
        if (value == '[')
-           (void)regpposixcc(value); /* ignore the return value for now */
+           namedclass = regpposixcc(value);
        else if (value == '\\') {
            value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
            PL_regcomp_parse += numlen;
            switch (value) {
-           case 'w':
-               if (!SIZE_ONLY) {
-                   if (LOC)
-                       flags |= ANYOF_ALNUML;
-
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
-               }
-               lastvalue = 123456;
-               continue;
-           case 'W':
-               if (!SIZE_ONLY) {
-                   if (LOC)
-                       flags |= ANYOF_NALNUML;
-
-                   Perl_sv_catpvf(aTHX_ listsv,
-                       "-utf8::IsAlpha\n-utf8::IsDigit\n0000\t%04x\n%04x\tffff\n",
-                       '_' - 1,
-                       '_' + 1);
-               }
-               lastvalue = 123456;
-               continue;
-           case 's':
-               if (!SIZE_ONLY) {
-                   if (LOC)
-                       flags |= ANYOF_SPACEL;
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
-                   if (!PL_utf8_space)
-                       is_utf8_space((U8*)" ");
-               }
-               lastvalue = 123456;
-               continue;
-           case 'S':
-               if (!SIZE_ONLY) {
-                   if (LOC)
-                       flags |= ANYOF_NSPACEL;
-                   Perl_sv_catpvf(aTHX_ listsv,
-                       "!utf8::IsSpace\n");
-                   if (!PL_utf8_space)
-                       is_utf8_space((U8*)" ");
-               }
-               lastvalue = 123456;
-               continue;
-           case 'd':
-               if (!SIZE_ONLY) {
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
-               }
-               lastvalue = 123456;
-               continue;
-           case 'D':
-               if (!SIZE_ONLY) {
-                   Perl_sv_catpvf(aTHX_ listsv,
-                       "!utf8::IsDigit\n");
-               }
-               lastvalue = 123456;
-               continue;
+           case 'w':           namedclass = ANYOF_ALNUM;               break;
+           case 'W':           namedclass = ANYOF_NALNUM;              break;
+           case 's':           namedclass = ANYOF_SPACE;               break;
+           case 'S':           namedclass = ANYOF_NSPACE;              break;
+           case 'd':           namedclass = ANYOF_DIGIT;               break;
+           case 'D':           namedclass = ANYOF_NDIGIT;              break;
            case 'p':
            case 'P':
                if (*PL_regcomp_parse == '{') {
@@ -2445,41 +2680,30 @@ S_regclassutf8(pTHX)
                }
                if (!SIZE_ONLY) {
                    if (value == 'p')
-                       Perl_sv_catpvf(aTHX_ listsv, "+utf8::%.*s\n", n, PL_regcomp_parse);
+                       Perl_sv_catpvf(aTHX_ listsv,
+                                      "+utf8::%.*s\n", n, PL_regcomp_parse);
                    else
                        Perl_sv_catpvf(aTHX_ listsv,
-                           "!utf8::%.*s\n", n, PL_regcomp_parse);
+                                      "!utf8::%.*s\n", n, PL_regcomp_parse);
                }
                PL_regcomp_parse = e + 1;
-               lastvalue = 123456;
+               lastvalue = OOB_UTF8;
                continue;
-           case 'n':
-               value = '\n';
-               break;
-           case 'r':
-               value = '\r';
-               break;
-           case 't':
-               value = '\t';
-               break;
-           case 'f':
-               value = '\f';
-               break;
-           case 'b':
-               value = '\b';
-               break;
-           case 'e':
-               value = '\033';
-               break;
-           case 'a':
-               value = '\007';
-               break;
+           case 'n':           value = '\n';           break;
+           case 'r':           value = '\r';           break;
+           case 't':           value = '\t';           break;
+           case 'f':           value = '\f';           break;
+           case 'b':           value = '\b';           break;
+           case 'e':           value = '\033';         break;
+           case 'a':           value = '\007';         break;
            case 'x':
                if (*PL_regcomp_parse == '{') {
                    e = strchr(PL_regcomp_parse++, '}');
                     if (!e)
                         FAIL("Missing right brace on \\x{}");
-                   value = scan_hex(PL_regcomp_parse, e - PL_regcomp_parse, &numlen);
+                   value = scan_hex(PL_regcomp_parse,
+                                    e - PL_regcomp_parse,
+                                    &numlen);
                    PL_regcomp_parse = e + 1;
                }
                else {
@@ -2498,7 +2722,64 @@ S_regclassutf8(pTHX)
                break;
            }
        }
-       if (range) {
+       if (!SIZE_ONLY && namedclass > -1) {
+           switch (namedclass) {
+           case ANYOF_ALNUM:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");        break;
+           case ANYOF_NALNUM:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");        break;
+           case ANYOF_ALNUMC:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");       break;
+           case ANYOF_NALNUMC:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");       break;
+           case ANYOF_ALPHA:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");       break;
+           case ANYOF_NALPHA:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");       break;
+           case ANYOF_ASCII:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");       break;
+           case ANYOF_NASCII:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");       break;
+           case ANYOF_CNTRL:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");       break;
+           case ANYOF_NCNTRL:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");       break;
+           case ANYOF_GRAPH:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");       break;
+           case ANYOF_NGRAPH:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");       break;
+           case ANYOF_DIGIT:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");       break;
+           case ANYOF_NDIGIT:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");       break;
+           case ANYOF_LOWER:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");       break;
+           case ANYOF_NLOWER:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");       break;
+           case ANYOF_PRINT:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");       break;
+           case ANYOF_NPRINT:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");       break;
+           case ANYOF_PUNCT:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");       break;
+           case ANYOF_NPUNCT:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");       break;
+           case ANYOF_SPACE:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");       break;
+           case ANYOF_NSPACE:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");       break;
+           case ANYOF_UPPER:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");       break;
+           case ANYOF_NUPPER:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");       break;
+           case ANYOF_XDIGIT:
+               Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");      break;
+           case ANYOF_NXDIGIT:
+               Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");      break;
+           }
+       }
+        else
+        if (range) {
            if (lastvalue > value)
                FAIL("invalid [] range in regexp");
            if (!SIZE_ONLY)
index 518add0..c679ca4 100644 (file)
--- a/regcomp.h
+++ b/regcomp.h
@@ -154,24 +154,76 @@ struct regnode_2 {
 
 #define SIZE_ONLY (PL_regcode == &PL_regdummy)
 
-/* Flags for first parameter byte of ANYOF */
-#define ANYOF_INVERT   0x40
-#define ANYOF_FOLD     0x20
-#define ANYOF_LOCALE   0x10
-#define ANYOF_ISA      0x0F
-#define ANYOF_ALNUML    0x08
-#define ANYOF_NALNUML   0x04
-#define ANYOF_SPACEL    0x02
-#define ANYOF_NSPACEL   0x01
-
-/* Utility macros for bitmap of ANYOF */
-#define ANYOF_BYTE(p,c)     (p)[1 + (((c) >> 3) & 31)]
-#define ANYOF_BIT(c)        (1 << ((c) & 7))
-#define ANYOF_SET(p,c)      (ANYOF_BYTE(p,c) |=  ANYOF_BIT(c))
-#define ANYOF_CLEAR(p,c)    (ANYOF_BYTE(p,c) &= ~ANYOF_BIT(c))
-#define ANYOF_TEST(p,c)     (ANYOF_BYTE(p,c) &   ANYOF_BIT(c))
-
-#define ANY_SKIP ((33 - 1)/sizeof(regnode) + 1)
+/* Flags for first parameter byte [0] of ANYOF */
+
+#define ANYOF_CLASS    0x08
+#define ANYOF_INVERT   0x04
+#define ANYOF_FOLD     0x02
+#define ANYOF_LOCALE   0x01
+
+/* Character classes for bytes [1..4] of ANYOF */
+
+#define ANYOF_ALNUM     0      /* \w, utf8::IsWord, isALNUM() */
+#define ANYOF_NALNUM    1
+#define ANYOF_SPACE     2
+#define ANYOF_NSPACE    3
+#define ANYOF_DIGIT     4
+#define ANYOF_NDIGIT    5
+#define ANYOF_ALNUMC    6      /* isalnum(3), utf8::IsAlnum, isALNUMC() */
+#define ANYOF_NALNUMC   7
+#define ANYOF_ALPHA     8
+#define ANYOF_NALPHA    9
+#define ANYOF_ASCII    10
+#define ANYOF_NASCII   11
+#define ANYOF_CNTRL    12
+#define ANYOF_NCNTRL   13
+#define ANYOF_GRAPH    14
+#define ANYOF_NGRAPH   15
+#define ANYOF_LOWER    16
+#define ANYOF_NLOWER   17
+#define ANYOF_PRINT    18
+#define ANYOF_NPRINT   19
+#define ANYOF_PUNCT    20
+#define ANYOF_NPUNCT   21
+#define ANYOF_UPPER    22
+#define ANYOF_NUPPER   23
+#define ANYOF_XDIGIT   24
+#define ANYOF_NXDIGIT  25
+
+#define ANYOF_MAX      31
+
+/* Backward source code compatibility. */
+
+#define ANYOF_ALNUML    ANYOF_ALNUM
+#define ANYOF_NALNUML   ANYOF_NALNUM
+#define ANYOF_SPACEL    ANYOF_SPACE
+#define ANYOF_NSPACEL   ANYOF_NSPACE
+
+/* Utility macros for the bitmap and classes of ANYOF */
+
+#define ANYOF_OPND_SIZE                 1
+#define ANYOF_CLASS_SIZE        4
+#define ANYOF_BITMAP_SIZE      32      /* 256 b/(8 b/B) */
+#define ANYOF_SIZE     (ANYOF_OPND_SIZE+ANYOF_CLASS_SIZE+ANYOF_BITMAP_SIZE)
+
+#define ANYOF_FLAGS(p)         ((p)[0])
+#define ANYOF_FLAGS_ALL                0xff
+
+#define ANYOF_BIT(c)           (1 << ((c) & 7))
+
+#define ANYOF_CLASS_OFFSET     ANYOF_OPND_SIZE
+#define ANYOF_CLASS_BYTE(p, c) ((p)[ANYOF_CLASS_OFFSET + (((c) >> 3) & 3)])
+#define ANYOF_CLASS_SET(p, c)  (ANYOF_CLASS_BYTE(p, c) |=  ANYOF_BIT(c))
+#define ANYOF_CLASS_CLEAR(p, c)        (ANYOF_CLASS_BYTE(p, c) &= ~ANYOF_BIT(c))
+#define ANYOF_CLASS_TEST(p, c) (ANYOF_CLASS_BYTE(p, c) &   ANYOF_BIT(c))
+
+#define ANYOF_BITMAP_OFFSET    (ANYOF_CLASS_OFFSET+ANYOF_CLASS_SIZE)
+#define ANYOF_BITMAP_BYTE(p, c)        ((p)[ANYOF_BITMAP_OFFSET + (((c) >> 3) & 31)])
+#define ANYOF_BITMAP_SET(p, c) (ANYOF_BITMAP_BYTE(p, c) |=  ANYOF_BIT(c))
+#define ANYOF_BITMAP_CLEAR(p,c)        (ANYOF_BITMAP_BYTE(p, c) &= ~ANYOF_BIT(c))
+#define ANYOF_BITMAP_TEST(p, c)        (ANYOF_BITMAP_BYTE(p, c) &   ANYOF_BIT(c))
+
+#define ANY_SKIP ((ANYOF_SIZE - 1)/sizeof(regnode) + 1)
 
 /*
  * Utility definitions.
index 1391dfb..4e5c1c1 100644 (file)
@@ -50,8 +50,80 @@ NSPACEL              NSPACE, no      Match any non-whitespace char in locale
 NSPACELUTF8    NSPACE, no      Match any non-whitespace char in locale
 DIGIT          DIGIT,  no      Match any numeric character
 DIGITUTF8      DIGIT,  no      Match any numeric character
+DIGITL         DIGIT,  no      Match any numeric character in locale
+DIGITLUTF8     DIGIT,  no      Match any numeric character in locale
 NDIGIT         NDIGIT, no      Match any non-numeric character
 NDIGITUTF8     NDIGIT, no      Match any non-numeric character
+NDIGITL                NDIGIT, no      Match any non-numeric character in locale
+NDIGITLUTF8    NDIGIT, no      Match any non-numeric character in locale
+ALNUMC         ALNUMC,  no     Match any alphanumeric character
+ALNUMCUTF8     ALNUMC,  no     Match any alphanumeric character
+ALNUMCL                ALNUMC,  no     Match any alphanumeric character in locale
+ALNUMCLUTF8    ALNUMC,  no     Match any alphanumeric character in locale
+NALNUMC                NALNUMC, no     Match any non-alphanumeric character
+NALNUMCUTF8    NALNUMC, no     Match any non-alphanumeric character
+NALNUMCL       NALNUMC, no     Match any non-alphanumeric character in locale
+NALNUMCLUTF8   NALNUMC, no     Match any non-alphanumeric character in locale
+ALPHA          ALPHA,  no      Match any alphabetic character
+ALPHAUTF8      ALPHA,  no      Match any alphabetic character
+ALPHAL         ALPHA,  no      Match any alphabetic character in locale
+ALPHALUTF8     ALPHA,  no      Match any alphabetic character in locale
+NALPHA         NALPHA, no      Match any non-alphabetic character
+NALPHAUTF8     NALPHA, no      Match any non-alphabetic character
+NALPHAL                NALPHA, no      Match any non-alphabetic character in locale
+NALPHALUTF8    NALPHA, no      Match any non-alphabetic character in locale
+ASCII          ASCII,  no      Match any ASCII character
+NASCII         NASCII, no      Match any non-ASCII character
+CNTRL          CNTRL,  no      Match any control character
+CNTRLUTF8      CNTRL,  no      Match any control character
+CNTRLL         CNTRL,  no      Match any control character in locale
+CNTRLLUTF8     CNTRL,  no      Match any control character in locale
+NCNTRL         NCNTRL, no      Match any non-control character
+NCNTRLUTF8     NCNTRL, no      Match any non-control character
+NCNTRLL                NCNTRL, no      Match any non-control character in locale
+NCNTRLLUTF8    NCNTRL, no      Match any non-control character in locale
+GRAPH          GRAPH,  no      Match any graphical character
+GRAPHUTF8      GRAPH,  no      Match any graphical character
+GRAPHL         GRAPH,  no      Match any graphical character in locale
+GRAPHLUTF8     GRAPH,  no      Match any graphical character in locale
+NGRAPH         NGRAPH, no      Match any non-graphical character
+NGRAPHUTF8     NGRAPH, no      Match any non-graphical character
+NGRAPHL                NGRAPH, no      Match any non-graphical character in locale
+NGRAPHLUTF8    NGRAPH, no      Match any non-graphical character in locale
+LOWER          LOWER,  no      Match any lowercase character
+LOWERUTF8      LOWER,  no      Match any lowercase character
+LOWERL         LOWER,  no      Match any lowercase character in locale
+LOWERLUTF8     LOWER,  no      Match any lowercase character in locale
+NLOWER         NLOWER, no      Match any non-lowercase character
+NLOWERUTF8     NLOWER, no      Match any non-lowercase character
+NLOWERL                NLOWER, no      Match any non-lowercase character in locale
+NLOWERLUTF8    NLOWER, no      Match any non-lowercase character in locale
+PRINT          PRINT,  no      Match any printable character
+PRINTUTF8      PRINT,  no      Match any printable character
+PRINTL         PRINT,  no      Match any printable character in locale
+PRINTLUTF8     PRINT,  no      Match any printable character in locale
+NPRINT         NPRINT, no      Match any non-printable character
+NPRINTUTF8     NPRINT, no      Match any non-printable character
+NPRINTL                NPRINT, no      Match any non-printable character in locale
+NPRINTLUTF8    NPRINT, no      Match any non-printable character in locale
+PUNCT          PUNCT,  no      Match any punctuation character
+PUNCTUTF8      PUNCT,  no      Match any punctuation character
+PUNCTL         PUNCT,  no      Match any punctuation character in locale
+PUNCTLUTF8     PUNCT,  no      Match any punctuation character in locale
+NPUNCT         NPUNCT, no      Match any non-punctuation character
+NPUNCTUTF8     NPUNCT, no      Match any non-punctuation character
+NPUNCTL                NPUNCT, no      Match any non-punctuation character in locale
+NPUNCTLUTF8    NPUNCT, no      Match any non-punctuation character in locale
+UPPER          UPPER,  no      Match any uppercase character
+UPPERUTF8      UPPER,  no      Match any uppercase character
+UPPERL         UPPER,  no      Match any uppercase character in locale
+UPPERLUTF8     UPPER,  no      Match any uppercase character in locale
+NUPPER         NUPPER, no      Match any non-uppercase character
+NUPPERUTF8     NUPPER, no      Match any non-uppercase character
+NUPPERL                NUPPER, no      Match any non-uppercase character in locale
+NUPPERLUTF8    NUPPER, no      Match any non-uppercase character in locale
+XDIGIT         XDIGIT,  no     Match any hexdigit character
+NXDIGIT                NXDIGIT, no     Match any non-hexdigit character
 CLUMP          CLUMP,  no      Match any combining character sequence
 
 # BRANCH       The set of branches constituting a single choice are hooked
index c97f89e..75f3873 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -97,7 +97,7 @@
  * Forwards.
  */
 
-#define REGINCLASS(p,c)  (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
+#define REGINCLASS(p,c)  (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
 #define REGINCLASSUTF8(f,p)  (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
 
 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
@@ -1062,6 +1062,34 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                s += UTF8SKIP(s);
            }
            break;
+       case DIGITL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isDIGIT_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case DIGITLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isDIGIT_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
        case NDIGIT:
            while (s < strend) {
                if (!isDIGIT(*s)) {
@@ -1088,197 +1116,1033 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                s += UTF8SKIP(s);
            }
            break;
-       }
-    }
-    else {
-       dontbother = 0;
-       if (prog->float_substr != Nullsv) {     /* Trim the end. */
-           char *last;
-           I32 oldpos = scream_pos;
-
-           if (flags & REXEC_SCREAM) {
-               last = screaminstr(sv, prog->float_substr, s - strbeg,
-                                  end_shift, &scream_pos, 1); /* last one */
-               if (!last)
-                   last = scream_olds; /* Only one occurence. */
+       case NDIGITL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isDIGIT_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
            }
-           else {
-               STRLEN len;
-               char *little = SvPV(prog->float_substr, len);
-
-               if (SvTAIL(prog->float_substr)) {
-                   if (memEQ(strend - len + 1, little, len - 1))
-                       last = strend - len + 1;
-                   else if (!PL_multiline)
-                       last = memEQ(strend - len, little, len) 
-                           ? strend - len : Nullch;
+           break;
+       case NDIGITLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isDIGIT_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
                    else
-                       goto find_last;
-               } else {
-                 find_last:
-                   if (len) 
-                       last = rninstr(s, strend, little, little + len);
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case ALNUMC:
+           while (s < strend) {
+               if (isALNUMC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
                    else
-                       last = strend;  /* matching `$' */
+                       tmp = doevery;
                }
+               else
+                   tmp = 1;
+               s++;
            }
-           if (last == NULL) goto phooey; /* Should not happen! */
-           dontbother = strend - last + prog->float_min_offset;
-       }
-       if (minlen && (dontbother < minlen))
-           dontbother = minlen - 1;
-       strend -= dontbother;              /* this one's always in bytes! */
-       /* We don't know much -- general case. */
-       if (UTF) {
-           for (;;) {
-               if (regtry(prog, s))
-                   goto got_it;
-               if (s >= strend)
-                   break;
+           break;
+       case ALNUMCUTF8:
+           while (s < strend) {
+               if (swash_fetch(PL_utf8_alnumc, (U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
                s += UTF8SKIP(s);
-           };
-       }
-       else {
-           do {
-               if (regtry(prog, s))
-                   goto got_it;
-           } while (s++ < strend);
-       }
-    }
-
-    /* Failure. */
-    goto phooey;
-
-got_it:
-    RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
-
-    if (PL_reg_eval_set) {
-       /* Preserve the current value of $^R */
-       if (oreplsv != GvSV(PL_replgv))
-           sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
-                                                 restored, the value remains
-                                                 the same. */
-       restore_pos(0);
-    }
-
-    /* make sure $`, $&, $', and $digit will work later */
-    if ( !(flags & REXEC_NOT_FIRST) ) {
-       if (RX_MATCH_COPIED(prog)) {
-           Safefree(prog->subbeg);
-           RX_MATCH_COPIED_off(prog);
-       }
-       if (flags & REXEC_COPY_STR) {
-           I32 i = PL_regeol - startpos + (stringarg - strbeg);
-
-           s = savepvn(strbeg, i);
-           prog->subbeg = s;
-           prog->sublen = i;
-           RX_MATCH_COPIED_on(prog);
-       }
-       else {
-           prog->subbeg = strbeg;
-           prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
-       }
-    }
-    
-    return 1;
-
-phooey:
-    if (PL_reg_eval_set)
-       restore_pos(0);
-    return 0;
-}
-
-/*
- - regtry - try match at specific point
- */
-STATIC I32                     /* 0 failure, 1 success */
-S_regtry(pTHX_ regexp *prog, char *startpos)
-{
-    dTHR;
-    register I32 i;
-    register I32 *sp;
-    register I32 *ep;
-    CHECKPOINT lastcp;
-
-    if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
-       MAGIC *mg;
-
-       PL_reg_eval_set = RS_init;
-       DEBUG_r(DEBUG_s(
-           PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %i\n",
-                         PL_stack_sp - PL_stack_base);
-           ));
-       SAVEINT(cxstack[cxstack_ix].blk_oldsp);
-       cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
-       /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
-       SAVETMPS;
-       /* Apparently this is not needed, judging by wantarray. */
-       /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
-          cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
-
-       if (PL_reg_sv) {
-           /* Make $_ available to executed code. */
-           if (PL_reg_sv != DEFSV) {
-               /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
-               SAVESPTR(DEFSV);
-               DEFSV = PL_reg_sv;
            }
-       
-           if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv) 
-                 && (mg = mg_find(PL_reg_sv, 'g')))) {
-               /* prepare for quick setting of pos */
-               sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
-               mg = mg_find(PL_reg_sv, 'g');
-               mg->mg_len = -1;
+           break;
+       case ALNUMCL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isALNUMC_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
            }
-           PL_reg_magic    = mg;
-           PL_reg_oldpos   = mg->mg_len;
-           SAVEDESTRUCTOR(S_restore_pos, 0);
-        }
-       if (!PL_reg_curpm)
-           New(22,PL_reg_curpm, 1, PMOP);
-       PL_reg_curpm->op_pmregexp = prog;
-       PL_reg_oldcurpm = PL_curpm;
-       PL_curpm = PL_reg_curpm;
-       if (RX_MATCH_COPIED(prog)) {
-           /*  Here is a serious problem: we cannot rewrite subbeg,
-               since it may be needed if this match fails.  Thus
-               $` inside (?{}) could fail... */
-           PL_reg_oldsaved = prog->subbeg;
-           PL_reg_oldsavedlen = prog->sublen;
-           RX_MATCH_COPIED_off(prog);
-       }
-       else
-           PL_reg_oldsaved = Nullch;
-       prog->subbeg = PL_bostr;
-       prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
-    }
-    prog->startp[0] = startpos - PL_bostr;
-    PL_reginput = startpos;
-    PL_regstartp = prog->startp;
-    PL_regendp = prog->endp;
-    PL_reglastparen = &prog->lastparen;
-    prog->lastparen = 0;
-    PL_regsize = 0;
-    DEBUG_r(PL_reg_starttry = startpos);
-    if (PL_reg_start_tmpl <= prog->nparens) {
-       PL_reg_start_tmpl = prog->nparens*3/2 + 3;
-        if(PL_reg_start_tmp)
-            Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
-        else
-            New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
-    }
-
-    /* XXXX What this code is doing here?!!!  There should be no need
-       to do this again and again, PL_reglastparen should take care of
-       this!  */
-    sp = prog->startp;
-    ep = prog->endp;
-    if (prog->nparens) {
-       for (i = prog->nparens; i >= 1; i--) {
-           *++sp = -1;
-           *++ep = -1;
+           break;
+       case ALNUMCLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isALNUMC_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NALNUMC:
+           while (s < strend) {
+               if (!isALNUMC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NALNUMCUTF8:
+           while (s < strend) {
+               if (!swash_fetch(PL_utf8_alnumc, (U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NALNUMCL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isALNUMC_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NALNUMCLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isALNUMC_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case ASCII:
+           while (s < strend) {
+               if (isASCII(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NASCII:
+           while (s < strend) {
+               if (!isASCII(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case CNTRL:
+           while (s < strend) {
+               if (isCNTRL(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case CNTRLUTF8:
+           while (s < strend) {
+               if (swash_fetch(PL_utf8_cntrl,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case CNTRLL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isCNTRL_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case CNTRLLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (*s == ' ' || isCNTRL_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NCNTRL:
+           while (s < strend) {
+               if (!isCNTRL(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NCNTRLUTF8:
+           while (s < strend) {
+               if (!swash_fetch(PL_utf8_cntrl,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NCNTRLL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isCNTRL_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NCNTRLLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isCNTRL_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case GRAPH:
+           while (s < strend) {
+               if (isGRAPH(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case GRAPHUTF8:
+           while (s < strend) {
+               if (swash_fetch(PL_utf8_graph,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case GRAPHL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isGRAPH_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case GRAPHLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (*s == ' ' || isGRAPH_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NGRAPH:
+           while (s < strend) {
+               if (!isGRAPH(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NGRAPHUTF8:
+           while (s < strend) {
+               if (!swash_fetch(PL_utf8_graph,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NGRAPHL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isGRAPH_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NGRAPHLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isGRAPH_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case LOWER:
+           while (s < strend) {
+               if (isLOWER(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case LOWERUTF8:
+           while (s < strend) {
+               if (swash_fetch(PL_utf8_lower,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case LOWERL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isLOWER_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case LOWERLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (*s == ' ' || isLOWER_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NLOWER:
+           while (s < strend) {
+               if (!isLOWER(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NLOWERUTF8:
+           while (s < strend) {
+               if (!swash_fetch(PL_utf8_lower,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NLOWERL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isLOWER_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NLOWERLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isLOWER_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case PRINT:
+           while (s < strend) {
+               if (isPRINT(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case PRINTUTF8:
+           while (s < strend) {
+               if (swash_fetch(PL_utf8_print,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case PRINTL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isPRINT_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case PRINTLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (*s == ' ' || isPRINT_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NPRINT:
+           while (s < strend) {
+               if (!isPRINT(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NPRINTUTF8:
+           while (s < strend) {
+               if (!swash_fetch(PL_utf8_print,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NPRINTL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isPRINT_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NPRINTLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isPRINT_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case PUNCT:
+           while (s < strend) {
+               if (isPUNCT(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case PUNCTUTF8:
+           while (s < strend) {
+               if (swash_fetch(PL_utf8_punct,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case PUNCTL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isPUNCT_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case PUNCTLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (*s == ' ' || isPUNCT_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NPUNCT:
+           while (s < strend) {
+               if (!isPUNCT(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NPUNCTUTF8:
+           while (s < strend) {
+               if (!swash_fetch(PL_utf8_punct,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NPUNCTL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isPUNCT_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NPUNCTLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isPUNCT_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case UPPER:
+           while (s < strend) {
+               if (isUPPER(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case UPPERUTF8:
+           while (s < strend) {
+               if (swash_fetch(PL_utf8_upper,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case UPPERL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (isUPPER_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case UPPERLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (*s == ' ' || isUPPER_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NUPPER:
+           while (s < strend) {
+               if (!isUPPER(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NUPPERUTF8:
+           while (s < strend) {
+               if (!swash_fetch(PL_utf8_upper,(U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case NUPPERL:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isUPPER_LC(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NUPPERLUTF8:
+           PL_reg_flags |= RF_tainted;
+           while (s < strend) {
+               if (!isUPPER_LC_utf8((U8*)s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s += UTF8SKIP(s);
+           }
+           break;
+       case XDIGIT:
+           while (s < strend) {
+               if (isXDIGIT(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       case NXDIGIT:
+           while (s < strend) {
+               if (!isXDIGIT(*s)) {
+                   if (tmp && regtry(prog, s))
+                       goto got_it;
+                   else
+                       tmp = doevery;
+               }
+               else
+                   tmp = 1;
+               s++;
+           }
+           break;
+       }
+    }
+    else {
+       dontbother = 0;
+       if (prog->float_substr != Nullsv) {     /* Trim the end. */
+           char *last;
+           I32 oldpos = scream_pos;
+
+           if (flags & REXEC_SCREAM) {
+               last = screaminstr(sv, prog->float_substr, s - strbeg,
+                                  end_shift, &scream_pos, 1); /* last one */
+               if (!last)
+                   last = scream_olds; /* Only one occurence. */
+           }
+           else {
+               STRLEN len;
+               char *little = SvPV(prog->float_substr, len);
+
+               if (SvTAIL(prog->float_substr)) {
+                   if (memEQ(strend - len + 1, little, len - 1))
+                       last = strend - len + 1;
+                   else if (!PL_multiline)
+                       last = memEQ(strend - len, little, len) 
+                           ? strend - len : Nullch;
+                   else
+                       goto find_last;
+               } else {
+                 find_last:
+                   if (len) 
+                       last = rninstr(s, strend, little, little + len);
+                   else
+                       last = strend;  /* matching `$' */
+               }
+           }
+           if (last == NULL) goto phooey; /* Should not happen! */
+           dontbother = strend - last + prog->float_min_offset;
+       }
+       if (minlen && (dontbother < minlen))
+           dontbother = minlen - 1;
+       strend -= dontbother;              /* this one's always in bytes! */
+       /* We don't know much -- general case. */
+       if (UTF) {
+           for (;;) {
+               if (regtry(prog, s))
+                   goto got_it;
+               if (s >= strend)
+                   break;
+               s += UTF8SKIP(s);
+           };
+       }
+       else {
+           do {
+               if (regtry(prog, s))
+                   goto got_it;
+           } while (s++ < strend);
+       }
+    }
+
+    /* Failure. */
+    goto phooey;
+
+got_it:
+    RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
+
+    if (PL_reg_eval_set) {
+       /* Preserve the current value of $^R */
+       if (oreplsv != GvSV(PL_replgv))
+           sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
+                                                 restored, the value remains
+                                                 the same. */
+       restore_pos(0);
+    }
+
+    /* make sure $`, $&, $', and $digit will work later */
+    if ( !(flags & REXEC_NOT_FIRST) ) {
+       if (RX_MATCH_COPIED(prog)) {
+           Safefree(prog->subbeg);
+           RX_MATCH_COPIED_off(prog);
+       }
+       if (flags & REXEC_COPY_STR) {
+           I32 i = PL_regeol - startpos + (stringarg - strbeg);
+
+           s = savepvn(strbeg, i);
+           prog->subbeg = s;
+           prog->sublen = i;
+           RX_MATCH_COPIED_on(prog);
+       }
+       else {
+           prog->subbeg = strbeg;
+           prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
+       }
+    }
+    
+    return 1;
+
+phooey:
+    if (PL_reg_eval_set)
+       restore_pos(0);
+    return 0;
+}
+
+/*
+ - regtry - try match at specific point
+ */
+STATIC I32                     /* 0 failure, 1 success */
+S_regtry(pTHX_ regexp *prog, char *startpos)
+{
+    dTHR;
+    register I32 i;
+    register I32 *sp;
+    register I32 *ep;
+    CHECKPOINT lastcp;
+
+    if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
+       MAGIC *mg;
+
+       PL_reg_eval_set = RS_init;
+       DEBUG_r(DEBUG_s(
+           PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %i\n",
+                         PL_stack_sp - PL_stack_base);
+           ));
+       SAVEINT(cxstack[cxstack_ix].blk_oldsp);
+       cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
+       /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
+       SAVETMPS;
+       /* Apparently this is not needed, judging by wantarray. */
+       /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
+          cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
+
+       if (PL_reg_sv) {
+           /* Make $_ available to executed code. */
+           if (PL_reg_sv != DEFSV) {
+               /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
+               SAVESPTR(DEFSV);
+               DEFSV = PL_reg_sv;
+           }
+       
+           if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv) 
+                 && (mg = mg_find(PL_reg_sv, 'g')))) {
+               /* prepare for quick setting of pos */
+               sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
+               mg = mg_find(PL_reg_sv, 'g');
+               mg->mg_len = -1;
+           }
+           PL_reg_magic    = mg;
+           PL_reg_oldpos   = mg->mg_len;
+           SAVEDESTRUCTOR(S_restore_pos, 0);
+        }
+       if (!PL_reg_curpm)
+           New(22,PL_reg_curpm, 1, PMOP);
+       PL_reg_curpm->op_pmregexp = prog;
+       PL_reg_oldcurpm = PL_curpm;
+       PL_curpm = PL_reg_curpm;
+       if (RX_MATCH_COPIED(prog)) {
+           /*  Here is a serious problem: we cannot rewrite subbeg,
+               since it may be needed if this match fails.  Thus
+               $` inside (?{}) could fail... */
+           PL_reg_oldsaved = prog->subbeg;
+           PL_reg_oldsavedlen = prog->sublen;
+           RX_MATCH_COPIED_off(prog);
+       }
+       else
+           PL_reg_oldsaved = Nullch;
+       prog->subbeg = PL_bostr;
+       prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
+    }
+    prog->startp[0] = startpos - PL_bostr;
+    PL_reginput = startpos;
+    PL_regstartp = prog->startp;
+    PL_regendp = prog->endp;
+    PL_reglastparen = &prog->lastparen;
+    prog->lastparen = 0;
+    PL_regsize = 0;
+    DEBUG_r(PL_reg_starttry = startpos);
+    if (PL_reg_start_tmpl <= prog->nparens) {
+       PL_reg_start_tmpl = prog->nparens*3/2 + 3;
+        if(PL_reg_start_tmp)
+            Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
+        else
+            New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
+    }
+
+    /* XXXX What this code is doing here?!!!  There should be no need
+       to do this again and again, PL_reglastparen should take care of
+       this!  */
+    sp = prog->startp;
+    ep = prog->endp;
+    if (prog->nparens) {
+       for (i = prog->nparens; i >= 1; i--) {
+           *++sp = -1;
+           *++ep = -1;
        }
     }
     REGCP_SET;
@@ -1390,173 +2254,723 @@ S_regmatch(pTHX_ regnode *prog)
                /* regtill = regbol; */
                break;
            }
-           sayNO;
-       case MBOL:
-           if (locinput == PL_bostr
-               ? PL_regprev == '\n'
-               : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
-           {
+           sayNO;
+       case MBOL:
+           if (locinput == PL_bostr
+               ? PL_regprev == '\n'
+               : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
+           {
+               break;
+           }
+           sayNO;
+       case SBOL:
+           if (locinput == PL_regbol && PL_regprev == '\n')
+               break;
+           sayNO;
+       case GPOS:
+           if (locinput == PL_reg_ganch)
+               break;
+           sayNO;
+       case EOL:
+           if (PL_multiline)
+               goto meol;
+           else
+               goto seol;
+       case MEOL:
+         meol:
+           if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
+               sayNO;
+           break;
+       case SEOL:
+         seol:
+           if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
+               sayNO;
+           if (PL_regeol - locinput > 1)
+               sayNO;
+           break;
+       case EOS:
+           if (PL_regeol != locinput)
+               sayNO;
+           break;
+       case SANYUTF8:
+           if (nextchr & 0x80) {
+               locinput += PL_utf8skip[nextchr];
+               if (locinput > PL_regeol)
+                   sayNO;
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case SANY:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case ANYUTF8:
+           if (nextchr & 0x80) {
+               locinput += PL_utf8skip[nextchr];
+               if (locinput > PL_regeol)
+                   sayNO;
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case REG_ANY:
+           if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case EXACT:
+           s = (char *) OPERAND(scan);
+           ln = UCHARAT(s++);
+           /* Inline the first character, for speed. */
+           if (UCHARAT(s) != nextchr)
+               sayNO;
+           if (PL_regeol - locinput < ln)
+               sayNO;
+           if (ln > 1 && memNE(s, locinput, ln))
+               sayNO;
+           locinput += ln;
+           nextchr = UCHARAT(locinput);
+           break;
+       case EXACTFL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case EXACTF:
+           s = (char *) OPERAND(scan);
+           ln = UCHARAT(s++);
+
+           if (UTF) {
+               char *l = locinput;
+               char *e = s + ln;
+               c1 = OP(scan) == EXACTF;
+               while (s < e) {
+                   if (l >= PL_regeol)
+                       sayNO;
+                   if (utf8_to_uv((U8*)s, 0) != (c1 ?
+                                                 toLOWER_utf8((U8*)l) :
+                                                 toLOWER_LC_utf8((U8*)l)))
+                   {
+                       sayNO;
+                   }
+                   s += UTF8SKIP(s);
+                   l += UTF8SKIP(l);
+               }
+               locinput = l;
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+
+           /* Inline the first character, for speed. */
+           if (UCHARAT(s) != nextchr &&
+               UCHARAT(s) != ((OP(scan) == EXACTF)
+                              ? PL_fold : PL_fold_locale)[nextchr])
+               sayNO;
+           if (PL_regeol - locinput < ln)
+               sayNO;
+           if (ln > 1 && (OP(scan) == EXACTF
+                          ? ibcmp(s, locinput, ln)
+                          : ibcmp_locale(s, locinput, ln)))
+               sayNO;
+           locinput += ln;
+           nextchr = UCHARAT(locinput);
+           break;
+       case ANYOFUTF8:
+           s = (char *) OPERAND(scan);
+           if (!REGINCLASSUTF8(scan, (U8*)locinput))
+               sayNO;
+           if (locinput >= PL_regeol)
+               sayNO;
+           locinput += PL_utf8skip[nextchr];
+           nextchr = UCHARAT(locinput);
+           break;
+       case ANYOF:
+           s = (char *) OPERAND(scan);
+           if (nextchr < 0)
+               nextchr = UCHARAT(locinput);
+           if (!REGINCLASS(s, nextchr))
+               sayNO;
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case ALNUML:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case ALNUM:
+           if (!nextchr)
+               sayNO;
+           if (!(OP(scan) == ALNUM
+                 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case ALNUMLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case ALNUMUTF8:
+           if (!nextchr)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (!(OP(scan) == ALNUMUTF8
+                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
+                     : isALNUM_LC_utf8((U8*)locinput)))
+               {
+                   sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (!(OP(scan) == ALNUMUTF8
+                 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NALNUML:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NALNUM:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (OP(scan) == NALNUM
+               ? isALNUM(nextchr) : isALNUM_LC(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NALNUMLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NALNUMUTF8:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (OP(scan) == NALNUMUTF8
+                   ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
+                   : isALNUM_LC_utf8((U8*)locinput))
+               {
+                   sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (OP(scan) == NALNUMUTF8
+               ? isALNUM(nextchr) : isALNUM_LC(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case BOUNDL:
+       case NBOUNDL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case BOUND:
+       case NBOUND:
+           /* was last char in word? */
+           ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
+           if (OP(scan) == BOUND || OP(scan) == NBOUND) {
+               ln = isALNUM(ln);
+               n = isALNUM(nextchr);
+           }
+           else {
+               ln = isALNUM_LC(ln);
+               n = isALNUM_LC(nextchr);
+           }
+           if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
+               sayNO;
+           break;
+       case BOUNDLUTF8:
+       case NBOUNDLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case BOUNDUTF8:
+       case NBOUNDUTF8:
+           /* was last char in word? */
+           ln = (locinput != PL_regbol)
+               ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
+           if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
+               ln = isALNUM_uni(ln);
+               n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
+           }
+           else {
+               ln = isALNUM_LC_uni(ln);
+               n = isALNUM_LC_utf8((U8*)locinput);
+           }
+           if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
+               sayNO;
+           break;
+       case SPACEL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case SPACE:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (!(OP(scan) == SPACE
+                 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case SPACELUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case SPACEUTF8:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (!(OP(scan) == SPACEUTF8
+                     ? swash_fetch(PL_utf8_space,(U8*)locinput)
+                     : isSPACE_LC_utf8((U8*)locinput)))
+               {
+                   sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (!(OP(scan) == SPACEUTF8
+                 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NSPACEL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NSPACE:
+           if (!nextchr)
+               sayNO;
+           if (OP(scan) == SPACE
+               ? isSPACE(nextchr) : isSPACE_LC(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NSPACELUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NSPACEUTF8:
+           if (!nextchr)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (OP(scan) == NSPACEUTF8
+                   ? swash_fetch(PL_utf8_space,(U8*)locinput)
+                   : isSPACE_LC_utf8((U8*)locinput))
+               {
+                   sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
                break;
            }
-           sayNO;
-       case SBOL:
-           if (locinput == PL_regbol && PL_regprev == '\n')
+           if (OP(scan) == NSPACEUTF8
+               ? isSPACE(nextchr) : isSPACE_LC(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case DIGITL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case DIGIT:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (!(OP(scan) == DIGIT
+                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case DIGITLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case DIGITUTF8:
+           if (!nextchr)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (OP(scan) == NDIGITUTF8
+                   ? swash_fetch(PL_utf8_digit,(U8*)locinput)
+                   : isDIGIT_LC_utf8((U8*)locinput))
+               {
+                   sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
                break;
-           sayNO;
-       case GPOS:
-           if (locinput == PL_reg_ganch)
+           }
+           if (!isDIGIT(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NDIGITL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NDIGIT:
+           if (!nextchr)
+               sayNO;
+           if (OP(scan) == DIGIT
+               ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NDIGITLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NDIGITUTF8:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (swash_fetch(PL_utf8_digit,(U8*)locinput))
+                   sayNO;
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
                break;
-           sayNO;
-       case EOL:
-           if (PL_multiline)
-               goto meol;
-           else
-               goto seol;
-       case MEOL:
-         meol:
-           if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
+           }
+           if (isDIGIT(nextchr))
                sayNO;
+           nextchr = UCHARAT(++locinput);
            break;
-       case SEOL:
-         seol:
-           if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
+       case ALNUMCL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case ALNUMC:
+           if (!nextchr)
                sayNO;
-           if (PL_regeol - locinput > 1)
+           if (!(OP(scan) == ALNUMC
+                 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case ALNUMCLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case ALNUMCUTF8:
+           if (!nextchr)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (!(OP(scan) == ALNUMCUTF8
+                     ? swash_fetch(PL_utf8_alnumc, (U8*)locinput)
+                     : isALNUMC_LC_utf8((U8*)locinput)))
+               {
+                   sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (!(OP(scan) == ALNUMCUTF8
+                 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NALNUMCL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NALNUMC:
+           if (!nextchr)
+               sayNO;
+           if (OP(scan) == ALNUMC
+               ? isALNUMC(nextchr) : isALNUMC_LC(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NALNUMCLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NALNUMCUTF8:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (swash_fetch(PL_utf8_alnumc,(U8*)locinput))
+                   sayNO;
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (isALNUMC(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case ALPHAL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case ALPHA:
+           if (!nextchr)
+               sayNO;
+           if (!(OP(scan) == ALPHA
+                 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case ALPHALUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case ALPHAUTF8:
+           if (!nextchr)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (!(OP(scan) == ALPHAUTF8
+                     ? swash_fetch(PL_utf8_alpha, (U8*)locinput)
+                     : isALPHA_LC_utf8((U8*)locinput)))
+               {
+                   sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (!(OP(scan) == ALPHAUTF8
+                 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NALPHAL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NALPHA:
+           if (!nextchr)
+               sayNO;
+           if (OP(scan) == ALPHA
+               ? isALPHA(nextchr) : isALPHA_LC(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NALPHALUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NALPHAUTF8:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (swash_fetch(PL_utf8_alpha,(U8*)locinput))
+                   sayNO;
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (isALPHA(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case ASCII:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (!isASCII(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case NASCII:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
+           if (isASCII(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case CNTRLL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case CNTRL:
+           if (!nextchr)
+               sayNO;
+           if (!(OP(scan) == CNTRL
+                 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
+           break;
+       case CNTRLLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case CNTRLUTF8:
+           if (!nextchr)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (!(OP(scan) == CNTRLUTF8
+                     ? swash_fetch(PL_utf8_cntrl, (U8*)locinput)
+                     : isCNTRL_LC_utf8((U8*)locinput)))
+               {
+                   sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (!(OP(scan) == CNTRLUTF8
+                 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
                sayNO;
+           nextchr = UCHARAT(++locinput);
            break;
-       case EOS:
-           if (PL_regeol != locinput)
+       case NCNTRLL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NCNTRL:
+           if (!nextchr)
                sayNO;
+           if (OP(scan) == CNTRL
+               ? isCNTRL(nextchr) : isCNTRL_LC(nextchr))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
            break;
-       case SANYUTF8:
+       case NCNTRLLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NCNTRLUTF8:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
            if (nextchr & 0x80) {
-               locinput += PL_utf8skip[nextchr];
-               if (locinput > PL_regeol)
+               if (swash_fetch(PL_utf8_cntrl,(U8*)locinput))
                    sayNO;
+               locinput += PL_utf8skip[nextchr];
                nextchr = UCHARAT(locinput);
                break;
            }
-           if (!nextchr && locinput >= PL_regeol)
+           if (isCNTRL(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case SANY:
-           if (!nextchr && locinput >= PL_regeol)
+       case GRAPHL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case GRAPH:
+           if (!nextchr)
+               sayNO;
+           if (!(OP(scan) == GRAPH
+                 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case ANYUTF8:
+       case GRAPHLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case GRAPHUTF8:
+           if (!nextchr)
+               sayNO;
            if (nextchr & 0x80) {
-               locinput += PL_utf8skip[nextchr];
-               if (locinput > PL_regeol)
+               if (!(OP(scan) == GRAPHUTF8
+                     ? swash_fetch(PL_utf8_graph, (U8*)locinput)
+                     : isGRAPH_LC_utf8((U8*)locinput)))
+               {
                    sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
                nextchr = UCHARAT(locinput);
                break;
            }
-           if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
+           if (!(OP(scan) == GRAPHUTF8
+                 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case REG_ANY:
-           if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
+       case NGRAPHL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NGRAPH:
+           if (!nextchr)
+               sayNO;
+           if (OP(scan) == GRAPH
+               ? isGRAPH(nextchr) : isGRAPH_LC(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case EXACT:
-           s = (char *) OPERAND(scan);
-           ln = UCHARAT(s++);
-           /* Inline the first character, for speed. */
-           if (UCHARAT(s) != nextchr)
+       case NGRAPHLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NGRAPHUTF8:
+           if (!nextchr && locinput >= PL_regeol)
                sayNO;
-           if (PL_regeol - locinput < ln)
+           if (nextchr & 0x80) {
+               if (swash_fetch(PL_utf8_graph,(U8*)locinput))
+                   sayNO;
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (isGRAPH(nextchr))
                sayNO;
-           if (ln > 1 && memNE(s, locinput, ln))
+           nextchr = UCHARAT(++locinput);
+           break;
+       case LOWERL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case LOWER:
+           if (!nextchr)
                sayNO;
-           locinput += ln;
-           nextchr = UCHARAT(locinput);
+           if (!(OP(scan) == LOWER
+                 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
+               sayNO;
+           nextchr = UCHARAT(++locinput);
            break;
-       case EXACTFL:
+       case LOWERLUTF8:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case EXACTF:
-           s = (char *) OPERAND(scan);
-           ln = UCHARAT(s++);
-
-           if (UTF) {
-               char *l = locinput;
-               char *e = s + ln;
-               c1 = OP(scan) == EXACTF;
-               while (s < e) {
-                   if (l >= PL_regeol)
-                       sayNO;
-                   if (utf8_to_uv((U8*)s, 0) != (c1 ?
-                                                 toLOWER_utf8((U8*)l) :
-                                                 toLOWER_LC_utf8((U8*)l)))
-                   {
-                       sayNO;
-                   }
-                   s += UTF8SKIP(s);
-                   l += UTF8SKIP(l);
+       case LOWERUTF8:
+           if (!nextchr)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (!(OP(scan) == LOWERUTF8
+                     ? swash_fetch(PL_utf8_lower, (U8*)locinput)
+                     : isLOWER_LC_utf8((U8*)locinput)))
+               {
+                   sayNO;
                }
-               locinput = l;
+               locinput += PL_utf8skip[nextchr];
                nextchr = UCHARAT(locinput);
                break;
            }
-
-           /* Inline the first character, for speed. */
-           if (UCHARAT(s) != nextchr &&
-               UCHARAT(s) != ((OP(scan) == EXACTF)
-                              ? PL_fold : PL_fold_locale)[nextchr])
-               sayNO;
-           if (PL_regeol - locinput < ln)
-               sayNO;
-           if (ln > 1 && (OP(scan) == EXACTF
-                          ? ibcmp(s, locinput, ln)
-                          : ibcmp_locale(s, locinput, ln)))
+           if (!(OP(scan) == LOWERUTF8
+                 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
                sayNO;
-           locinput += ln;
-           nextchr = UCHARAT(locinput);
+           nextchr = UCHARAT(++locinput);
            break;
-       case ANYOFUTF8:
-           s = (char *) OPERAND(scan);
-           if (!REGINCLASSUTF8(scan, (U8*)locinput))
+       case NLOWERL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NLOWER:
+           if (!nextchr)
                sayNO;
-           if (locinput >= PL_regeol)
+           if (OP(scan) == LOWER
+               ? isLOWER(nextchr) : isLOWER_LC(nextchr))
                sayNO;
-           locinput += PL_utf8skip[nextchr];
-           nextchr = UCHARAT(locinput);
+           nextchr = UCHARAT(++locinput);
            break;
-       case ANYOF:
-           s = (char *) OPERAND(scan);
-           if (nextchr < 0)
-               nextchr = UCHARAT(locinput);
-           if (!REGINCLASS(s, nextchr))
-               sayNO;
+       case NLOWERLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NLOWERUTF8:
            if (!nextchr && locinput >= PL_regeol)
                sayNO;
+           if (nextchr & 0x80) {
+               if (swash_fetch(PL_utf8_lower,(U8*)locinput))
+                   sayNO;
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
+           }
+           if (isLOWER(nextchr))
+               sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case ALNUML:
+       case PRINTL:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case ALNUM:
+       case PRINT:
            if (!nextchr)
                sayNO;
-           if (!(OP(scan) == ALNUM
-                 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
+           if (!(OP(scan) == PRINT
+                 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case ALNUMLUTF8:
+       case PRINTLUTF8:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case ALNUMUTF8:
+       case PRINTUTF8:
            if (!nextchr)
                sayNO;
            if (nextchr & 0x80) {
-               if (!(OP(scan) == ALNUMUTF8
-                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
-                     : isALNUM_LC_utf8((U8*)locinput)))
+               if (!(OP(scan) == PRINTUTF8
+                     ? swash_fetch(PL_utf8_print, (U8*)locinput)
+                     : isPRINT_LC_utf8((U8*)locinput)))
                {
                    sayNO;
                }
@@ -1564,137 +2978,121 @@ S_regmatch(pTHX_ regnode *prog)
                nextchr = UCHARAT(locinput);
                break;
            }
-           if (!(OP(scan) == ALNUMUTF8
-                 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
+           if (!(OP(scan) == PRINTUTF8
+                 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case NALNUML:
+       case NPRINTL:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case NALNUM:
-           if (!nextchr && locinput >= PL_regeol)
+       case NPRINT:
+           if (!nextchr)
                sayNO;
-           if (OP(scan) == NALNUM
-               ? isALNUM(nextchr) : isALNUM_LC(nextchr))
+           if (OP(scan) == PRINT
+               ? isPRINT(nextchr) : isPRINT_LC(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case NALNUMLUTF8:
+       case NPRINTLUTF8:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case NALNUMUTF8:
+       case NPRINTUTF8:
            if (!nextchr && locinput >= PL_regeol)
                sayNO;
            if (nextchr & 0x80) {
-               if (OP(scan) == NALNUMUTF8
-                   ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
-                   : isALNUM_LC_utf8((U8*)locinput))
-               {
+               if (swash_fetch(PL_utf8_print,(U8*)locinput))
                    sayNO;
-               }
                locinput += PL_utf8skip[nextchr];
                nextchr = UCHARAT(locinput);
                break;
            }
-           if (OP(scan) == NALNUMUTF8
-               ? isALNUM(nextchr) : isALNUM_LC(nextchr))
+           if (isPRINT(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case BOUNDL:
-       case NBOUNDL:
+       case PUNCTL:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case BOUND:
-       case NBOUND:
-           /* was last char in word? */
-           ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
-           if (OP(scan) == BOUND || OP(scan) == NBOUND) {
-               ln = isALNUM(ln);
-               n = isALNUM(nextchr);
-           }
-           else {
-               ln = isALNUM_LC(ln);
-               n = isALNUM_LC(nextchr);
-           }
-           if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
+       case PUNCT:
+           if (!nextchr)
+               sayNO;
+           if (!(OP(scan) == PUNCT
+                 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
                sayNO;
+           nextchr = UCHARAT(++locinput);
            break;
-       case BOUNDLUTF8:
-       case NBOUNDLUTF8:
+       case PUNCTLUTF8:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case BOUNDUTF8:
-       case NBOUNDUTF8:
-           /* was last char in word? */
-           ln = (locinput != PL_regbol)
-               ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
-           if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
-               ln = isALNUM_uni(ln);
-               n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
-           }
-           else {
-               ln = isALNUM_LC_uni(ln);
-               n = isALNUM_LC_utf8((U8*)locinput);
+       case PUNCTUTF8:
+           if (!nextchr)
+               sayNO;
+           if (nextchr & 0x80) {
+               if (!(OP(scan) == PUNCTUTF8
+                     ? swash_fetch(PL_utf8_punct, (U8*)locinput)
+                     : isPUNCT_LC_utf8((U8*)locinput)))
+               {
+                   sayNO;
+               }
+               locinput += PL_utf8skip[nextchr];
+               nextchr = UCHARAT(locinput);
+               break;
            }
-           if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
+           if (!(OP(scan) == PUNCTUTF8
+                 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
                sayNO;
+           nextchr = UCHARAT(++locinput);
            break;
-       case SPACEL:
+       case NPUNCTL:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case SPACE:
-           if (!nextchr && locinput >= PL_regeol)
+       case NPUNCT:
+           if (!nextchr)
                sayNO;
-           if (!(OP(scan) == SPACE
-                 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
+           if (OP(scan) == PUNCT
+               ? isPUNCT(nextchr) : isPUNCT_LC(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case SPACELUTF8:
+       case NPUNCTLUTF8:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case SPACEUTF8:
+       case NPUNCTUTF8:
            if (!nextchr && locinput >= PL_regeol)
                sayNO;
            if (nextchr & 0x80) {
-               if (!(OP(scan) == SPACEUTF8
-                     ? swash_fetch(PL_utf8_space,(U8*)locinput)
-                     : isSPACE_LC_utf8((U8*)locinput)))
-               {
+               if (swash_fetch(PL_utf8_punct,(U8*)locinput))
                    sayNO;
-               }
                locinput += PL_utf8skip[nextchr];
                nextchr = UCHARAT(locinput);
                break;
            }
-           if (!(OP(scan) == SPACEUTF8
-                 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
+           if (isPUNCT(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case NSPACEL:
+       case UPPERL:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case NSPACE:
+       case UPPER:
            if (!nextchr)
                sayNO;
-           if (OP(scan) == SPACE
-               ? isSPACE(nextchr) : isSPACE_LC(nextchr))
+           if (!(OP(scan) == UPPER
+                 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case NSPACELUTF8:
+       case UPPERLUTF8:
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case NSPACEUTF8:
+       case UPPERUTF8:
            if (!nextchr)
                sayNO;
            if (nextchr & 0x80) {
-               if (OP(scan) == NSPACEUTF8
-                   ? swash_fetch(PL_utf8_space,(U8*)locinput)
-                   : isSPACE_LC_utf8((U8*)locinput))
+               if (!(OP(scan) == UPPERUTF8
+                     ? swash_fetch(PL_utf8_upper, (U8*)locinput)
+                     : isUPPER_LC_utf8((U8*)locinput)))
                {
                    sayNO;
                }
@@ -1702,46 +3100,50 @@ S_regmatch(pTHX_ regnode *prog)
                nextchr = UCHARAT(locinput);
                break;
            }
-           if (OP(scan) == NSPACEUTF8
-               ? isSPACE(nextchr) : isSPACE_LC(nextchr))
+           if (!(OP(scan) == UPPERUTF8
+                 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case DIGIT:
-           if (!isDIGIT(nextchr))
+       case NUPPERL:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NUPPER:
+           if (!nextchr)
+               sayNO;
+           if (OP(scan) == UPPER
+               ? isUPPER(nextchr) : isUPPER_LC(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case DIGITUTF8:
+       case NUPPERLUTF8:
+           PL_reg_flags |= RF_tainted;
+           /* FALL THROUGH */
+       case NUPPERUTF8:
+           if (!nextchr && locinput >= PL_regeol)
+               sayNO;
            if (nextchr & 0x80) {
-               if (!(swash_fetch(PL_utf8_digit,(U8*)locinput)))
+               if (swash_fetch(PL_utf8_upper,(U8*)locinput))
                    sayNO;
                locinput += PL_utf8skip[nextchr];
                nextchr = UCHARAT(locinput);
                break;
            }
-           if (!isDIGIT(nextchr))
+           if (isUPPER(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case NDIGIT:
+       case XDIGIT:
            if (!nextchr && locinput >= PL_regeol)
                sayNO;
-           if (isDIGIT(nextchr))
+           if (!isXDIGIT(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
-       case NDIGITUTF8:
+       case NXDIGIT:
            if (!nextchr && locinput >= PL_regeol)
                sayNO;
-           if (nextchr & 0x80) {
-               if (swash_fetch(PL_utf8_digit,(U8*)locinput))
-                   sayNO;
-               locinput += PL_utf8skip[nextchr];
-               nextchr = UCHARAT(locinput);
-               break;
-           }
-           if (isDIGIT(nextchr))
+           if (isXDIGIT(nextchr))
                sayNO;
            nextchr = UCHARAT(++locinput);
            break;
@@ -2920,11 +4322,11 @@ STATIC bool
 S_reginclass(pTHX_ register char *p, register I32 c)
 {
     dTHR;
-    char flags = *p;
+    char flags = ANYOF_FLAGS(p);
     bool match = FALSE;
 
     c &= 0xFF;
-    if (ANYOF_TEST(p, c))
+    if (ANYOF_BITMAP_TEST(p, c))
        match = TRUE;
     else if (flags & ANYOF_FOLD) {
        I32 cf;
@@ -2934,17 +4336,40 @@ S_reginclass(pTHX_ register char *p, register I32 c)
        }
        else
            cf = PL_fold[c];
-       if (ANYOF_TEST(p, cf))
+       if (ANYOF_BITMAP_TEST(p, cf))
            match = TRUE;
     }
 
-    if (!match && (flags & ANYOF_ISA)) {
+    if (!match && (flags & ANYOF_CLASS)) {
        PL_reg_flags |= RF_tainted;
-
-       if (((flags & ANYOF_ALNUML)  && isALNUM_LC(c))  ||
-           ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
-           ((flags & ANYOF_SPACEL)  && isSPACE_LC(c))  ||
-           ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
+       if (
+           (ANYOF_CLASS_TEST(p, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
+           (ANYOF_CLASS_TEST(p, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_ASCII)   &&  isASCII(c))     ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NASCII)  && !isASCII(c))     ||
+           (ANYOF_CLASS_TEST(p, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
+           (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
+           (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
+           ) /* How's that for a conditional? */
        {
            match = TRUE;
        }
@@ -2976,17 +4401,7 @@ S_reginclassutf8(pTHX_ regnode *f, U8 *p)
            match = TRUE;
     }
 
-    if (!match && (flags & ANYOF_ISA)) {
-       PL_reg_flags |= RF_tainted;
-
-       if (((flags & ANYOF_ALNUML)  && isALNUM_LC_utf8(p))  ||
-           ((flags & ANYOF_NALNUML) && !isALNUM_LC_utf8(p)) ||
-           ((flags & ANYOF_SPACEL)  && isSPACE_LC_utf8(p))  ||
-           ((flags & ANYOF_NSPACEL) && !isSPACE_LC_utf8(p)))
-       {
-           match = TRUE;
-       }
-    }
+    /* UTF8 combined with ANYOF_CLASS is ill-defined. */
 
     return (flags & ANYOF_INVERT) ? !match : match;
 }
index 030fa1a..cdc6dd4 100644 (file)
 #define        NSPACELUTF8     39      /* 0x27 Match any non-whitespace char in locale */
 #define        DIGIT   40      /* 0x28 Match any numeric character */
 #define        DIGITUTF8       41      /* 0x29 Match any numeric character */
-#define        NDIGIT  42      /* 0x2a Match any non-numeric character */
-#define        NDIGITUTF8      43      /* 0x2b Match any non-numeric character */
-#define        CLUMP   44      /* 0x2c Match any combining character sequence */
-#define        BRANCH  45      /* 0x2d Match this alternative, or the next... */
-#define        BACK    46      /* 0x2e Match "", "next" ptr points backward. */
-#define        EXACT   47      /* 0x2f Match this string (preceded by length). */
-#define        EXACTF  48      /* 0x30 Match this string, folded (prec. by length). */
-#define        EXACTFL 49      /* 0x31 Match this string, folded in locale (w/len). */
-#define        NOTHING 50      /* 0x32 Match empty string. */
-#define        TAIL    51      /* 0x33 Match empty string. Can jump here from outside. */
-#define        STAR    52      /* 0x34 Match this (simple) thing 0 or more times. */
-#define        PLUS    53      /* 0x35 Match this (simple) thing 1 or more times. */
-#define        CURLY   54      /* 0x36 Match this simple thing {n,m} times. */
-#define        CURLYN  55      /* 0x37 Match next-after-this simple thing  */
-#define        CURLYM  56      /* 0x38 Match this medium-complex thing {n,m} times. */
-#define        CURLYX  57      /* 0x39 Match this complex thing {n,m} times. */
-#define        WHILEM  58      /* 0x3a Do curly processing and see if rest matches. */
-#define        OPEN    59      /* 0x3b Mark this point in input as start of #n. */
-#define        CLOSE   60      /* 0x3c Analogous to OPEN. */
-#define        REF     61      /* 0x3d Match some already matched string */
-#define        REFF    62      /* 0x3e Match already matched string, folded */
-#define        REFFL   63      /* 0x3f Match already matched string, folded in loc. */
-#define        IFMATCH 64      /* 0x40 Succeeds if the following matches. */
-#define        UNLESSM 65      /* 0x41 Fails if the following matches. */
-#define        SUSPEND 66      /* 0x42 "Independent" sub-RE. */
-#define        IFTHEN  67      /* 0x43 Switch, should be preceeded by switcher . */
-#define        GROUPP  68      /* 0x44 Whether the group matched. */
-#define        LONGJMP 69      /* 0x45 Jump far away. */
-#define        BRANCHJ 70      /* 0x46 BRANCH with long offset. */
-#define        EVAL    71      /* 0x47 Execute some Perl code. */
-#define        MINMOD  72      /* 0x48 Next operator is not greedy. */
-#define        LOGICAL 73      /* 0x49 Next opcode should set the flag only. */
-#define        RENUM   74      /* 0x4a Group with independently numbered parens. */
-#define        OPTIMIZED       75      /* 0x4b Placeholder for dump. */
+#define        DIGITL  42      /* 0x2a Match any numeric character in locale */
+#define        DIGITLUTF8      43      /* 0x2b Match any numeric character in locale */
+#define        NDIGIT  44      /* 0x2c Match any non-numeric character */
+#define        NDIGITUTF8      45      /* 0x2d Match any non-numeric character */
+#define        NDIGITL 46      /* 0x2e Match any non-numeric character in locale */
+#define        NDIGITLUTF8     47      /* 0x2f Match any non-numeric character in locale */
+#define        ALNUMC  48      /* 0x30 Match any alphanumeric character */
+#define        ALNUMCUTF8      49      /* 0x31 Match any alphanumeric character */
+#define        ALNUMCL 50      /* 0x32 Match any alphanumeric character in locale */
+#define        ALNUMCLUTF8     51      /* 0x33 Match any alphanumeric character in locale */
+#define        NALNUMC 52      /* 0x34 Match any non-alphanumeric character */
+#define        NALNUMCUTF8     53      /* 0x35 Match any non-alphanumeric character */
+#define        NALNUMCL        54      /* 0x36 Match any non-alphanumeric character in locale */
+#define        NALNUMCLUTF8    55      /* 0x37 Match any non-alphanumeric character in locale */
+#define        ALPHA   56      /* 0x38 Match any alphabetic character */
+#define        ALPHAUTF8       57      /* 0x39 Match any alphabetic character */
+#define        ALPHAL  58      /* 0x3a Match any alphabetic character in locale */
+#define        ALPHALUTF8      59      /* 0x3b Match any alphabetic character in locale */
+#define        NALPHA  60      /* 0x3c Match any non-alphabetic character */
+#define        NALPHAUTF8      61      /* 0x3d Match any non-alphabetic character */
+#define        NALPHAL 62      /* 0x3e Match any non-alphabetic character in locale */
+#define        NALPHALUTF8     63      /* 0x3f Match any non-alphabetic character in locale */
+#define        ASCII   64      /* 0x40 Match any ASCII character */
+#define        NASCII  65      /* 0x41 Match any non-ASCII character */
+#define        CNTRL   66      /* 0x42 Match any control character */
+#define        CNTRLUTF8       67      /* 0x43 Match any control character */
+#define        CNTRLL  68      /* 0x44 Match any control character in locale */
+#define        CNTRLLUTF8      69      /* 0x45 Match any control character in locale */
+#define        NCNTRL  70      /* 0x46 Match any non-control character */
+#define        NCNTRLUTF8      71      /* 0x47 Match any non-control character */
+#define        NCNTRLL 72      /* 0x48 Match any non-control character in locale */
+#define        NCNTRLLUTF8     73      /* 0x49 Match any non-control character in locale */
+#define        GRAPH   74      /* 0x4a Match any graphical character */
+#define        GRAPHUTF8       75      /* 0x4b Match any graphical character */
+#define        GRAPHL  76      /* 0x4c Match any graphical character in locale */
+#define        GRAPHLUTF8      77      /* 0x4d Match any graphical character in locale */
+#define        NGRAPH  78      /* 0x4e Match any non-graphical character */
+#define        NGRAPHUTF8      79      /* 0x4f Match any non-graphical character */
+#define        NGRAPHL 80      /* 0x50 Match any non-graphical character in locale */
+#define        NGRAPHLUTF8     81      /* 0x51 Match any non-graphical character in locale */
+#define        LOWER   82      /* 0x52 Match any lowercase character */
+#define        LOWERUTF8       83      /* 0x53 Match any lowercase character */
+#define        LOWERL  84      /* 0x54 Match any lowercase character in locale */
+#define        LOWERLUTF8      85      /* 0x55 Match any lowercase character in locale */
+#define        NLOWER  86      /* 0x56 Match any non-lowercase character */
+#define        NLOWERUTF8      87      /* 0x57 Match any non-lowercase character */
+#define        NLOWERL 88      /* 0x58 Match any non-lowercase character in locale */
+#define        NLOWERLUTF8     89      /* 0x59 Match any non-lowercase character in locale */
+#define        PRINT   90      /* 0x5a Match any printable character */
+#define        PRINTUTF8       91      /* 0x5b Match any printable character */
+#define        PRINTL  92      /* 0x5c Match any printable character in locale */
+#define        PRINTLUTF8      93      /* 0x5d Match any printable character in locale */
+#define        NPRINT  94      /* 0x5e Match any non-printable character */
+#define        NPRINTUTF8      95      /* 0x5f Match any non-printable character */
+#define        NPRINTL 96      /* 0x60 Match any non-printable character in locale */
+#define        NPRINTLUTF8     97      /* 0x61 Match any non-printable character in locale */
+#define        PUNCT   98      /* 0x62 Match any punctuation character */
+#define        PUNCTUTF8       99      /* 0x63 Match any punctuation character */
+#define        PUNCTL  100     /* 0x64 Match any punctuation character in locale */
+#define        PUNCTLUTF8      101     /* 0x65 Match any punctuation character in locale */
+#define        NPUNCT  102     /* 0x66 Match any non-punctuation character */
+#define        NPUNCTUTF8      103     /* 0x67 Match any non-punctuation character */
+#define        NPUNCTL 104     /* 0x68 Match any non-punctuation character in locale */
+#define        NPUNCTLUTF8     105     /* 0x69 Match any non-punctuation character in locale */
+#define        UPPER   106     /* 0x6a Match any uppercase character */
+#define        UPPERUTF8       107     /* 0x6b Match any uppercase character */
+#define        UPPERL  108     /* 0x6c Match any uppercase character in locale */
+#define        UPPERLUTF8      109     /* 0x6d Match any uppercase character in locale */
+#define        NUPPER  110     /* 0x6e Match any non-uppercase character */
+#define        NUPPERUTF8      111     /* 0x6f Match any non-uppercase character */
+#define        NUPPERL 112     /* 0x70 Match any non-uppercase character in locale */
+#define        NUPPERLUTF8     113     /* 0x71 Match any non-uppercase character in locale */
+#define        XDIGIT  114     /* 0x72 Match any hexdigit character */
+#define        NXDIGIT 115     /* 0x73 Match any non-hexdigit character */
+#define        CLUMP   116     /* 0x74 Match any combining character sequence */
+#define        BRANCH  117     /* 0x75 Match this alternative, or the next... */
+#define        BACK    118     /* 0x76 Match "", "next" ptr points backward. */
+#define        EXACT   119     /* 0x77 Match this string (preceded by length). */
+#define        EXACTF  120     /* 0x78 Match this string, folded (prec. by length). */
+#define        EXACTFL 121     /* 0x79 Match this string, folded in locale (w/len). */
+#define        NOTHING 122     /* 0x7a Match empty string. */
+#define        TAIL    123     /* 0x7b Match empty string. Can jump here from outside. */
+#define        STAR    124     /* 0x7c Match this (simple) thing 0 or more times. */
+#define        PLUS    125     /* 0x7d Match this (simple) thing 1 or more times. */
+#define        CURLY   126     /* 0x7e Match this simple thing {n,m} times. */
+#define        CURLYN  127     /* 0x7f Match next-after-this simple thing  */
+#define        CURLYM  128     /* 0x80 Match this medium-complex thing {n,m} times. */
+#define        CURLYX  129     /* 0x81 Match this complex thing {n,m} times. */
+#define        WHILEM  130     /* 0x82 Do curly processing and see if rest matches. */
+#define        OPEN    131     /* 0x83 Mark this point in input as start of #n. */
+#define        CLOSE   132     /* 0x84 Analogous to OPEN. */
+#define        REF     133     /* 0x85 Match some already matched string */
+#define        REFF    134     /* 0x86 Match already matched string, folded */
+#define        REFFL   135     /* 0x87 Match already matched string, folded in loc. */
+#define        IFMATCH 136     /* 0x88 Succeeds if the following matches. */
+#define        UNLESSM 137     /* 0x89 Fails if the following matches. */
+#define        SUSPEND 138     /* 0x8a "Independent" sub-RE. */
+#define        IFTHEN  139     /* 0x8b Switch, should be preceeded by switcher . */
+#define        GROUPP  140     /* 0x8c Whether the group matched. */
+#define        LONGJMP 141     /* 0x8d Jump far away. */
+#define        BRANCHJ 142     /* 0x8e BRANCH with long offset. */
+#define        EVAL    143     /* 0x8f Execute some Perl code. */
+#define        MINMOD  144     /* 0x90 Next operator is not greedy. */
+#define        LOGICAL 145     /* 0x91 Next opcode should set the flag only. */
+#define        RENUM   146     /* 0x92 Group with independently numbered parens. */
+#define        OPTIMIZED       147     /* 0x93 Placeholder for dump. */
 
 #ifndef DOINIT
 EXTCONST U8 PL_regkind[];
@@ -126,8 +198,80 @@ EXTCONST U8 PL_regkind[] = {
        NSPACE,         /* NSPACELUTF8 */
        DIGIT,          /* DIGIT */
        DIGIT,          /* DIGITUTF8 */
+       DIGIT,          /* DIGITL */
+       DIGIT,          /* DIGITLUTF8 */
        NDIGIT,         /* NDIGIT */
        NDIGIT,         /* NDIGITUTF8 */
+       NDIGIT,         /* NDIGITL */
+       NDIGIT,         /* NDIGITLUTF8 */
+       ALNUMC,         /* ALNUMC */
+       ALNUMC,         /* ALNUMCUTF8 */
+       ALNUMC,         /* ALNUMCL */
+       ALNUMC,         /* ALNUMCLUTF8 */
+       NALNUMC,                /* NALNUMC */
+       NALNUMC,                /* NALNUMCUTF8 */
+       NALNUMC,                /* NALNUMCL */
+       NALNUMC,                /* NALNUMCLUTF8 */
+       ALPHA,          /* ALPHA */
+       ALPHA,          /* ALPHAUTF8 */
+       ALPHA,          /* ALPHAL */
+       ALPHA,          /* ALPHALUTF8 */
+       NALPHA,         /* NALPHA */
+       NALPHA,         /* NALPHAUTF8 */
+       NALPHA,         /* NALPHAL */
+       NALPHA,         /* NALPHALUTF8 */
+       ASCII,          /* ASCII */
+       NASCII,         /* NASCII */
+       CNTRL,          /* CNTRL */
+       CNTRL,          /* CNTRLUTF8 */
+       CNTRL,          /* CNTRLL */
+       CNTRL,          /* CNTRLLUTF8 */
+       NCNTRL,         /* NCNTRL */
+       NCNTRL,         /* NCNTRLUTF8 */
+       NCNTRL,         /* NCNTRLL */
+       NCNTRL,         /* NCNTRLLUTF8 */
+       GRAPH,          /* GRAPH */
+       GRAPH,          /* GRAPHUTF8 */
+       GRAPH,          /* GRAPHL */
+       GRAPH,          /* GRAPHLUTF8 */
+       NGRAPH,         /* NGRAPH */
+       NGRAPH,         /* NGRAPHUTF8 */
+       NGRAPH,         /* NGRAPHL */
+       NGRAPH,         /* NGRAPHLUTF8 */
+       LOWER,          /* LOWER */
+       LOWER,          /* LOWERUTF8 */
+       LOWER,          /* LOWERL */
+       LOWER,          /* LOWERLUTF8 */
+       NLOWER,         /* NLOWER */
+       NLOWER,         /* NLOWERUTF8 */
+       NLOWER,         /* NLOWERL */
+       NLOWER,         /* NLOWERLUTF8 */
+       PRINT,          /* PRINT */
+       PRINT,          /* PRINTUTF8 */
+       PRINT,          /* PRINTL */
+       PRINT,          /* PRINTLUTF8 */
+       NPRINT,         /* NPRINT */
+       NPRINT,         /* NPRINTUTF8 */
+       NPRINT,         /* NPRINTL */
+       NPRINT,         /* NPRINTLUTF8 */
+       PUNCT,          /* PUNCT */
+       PUNCT,          /* PUNCTUTF8 */
+       PUNCT,          /* PUNCTL */
+       PUNCT,          /* PUNCTLUTF8 */
+       NPUNCT,         /* NPUNCT */
+       NPUNCT,         /* NPUNCTUTF8 */
+       NPUNCT,         /* NPUNCTL */
+       NPUNCT,         /* NPUNCTLUTF8 */
+       UPPER,          /* UPPER */
+       UPPER,          /* UPPERUTF8 */
+       UPPER,          /* UPPERL */
+       UPPER,          /* UPPERLUTF8 */
+       NUPPER,         /* NUPPER */
+       NUPPER,         /* NUPPERUTF8 */
+       NUPPER,         /* NUPPERL */
+       NUPPER,         /* NUPPERLUTF8 */
+       XDIGIT,         /* XDIGIT */
+       NXDIGIT,                /* NXDIGIT */
        CLUMP,          /* CLUMP */
        BRANCH,         /* BRANCH */
        BACK,           /* BACK */
@@ -208,8 +352,80 @@ const static U8 regarglen[] = {
        0,              /* NSPACELUTF8 */
        0,              /* DIGIT */
        0,              /* DIGITUTF8 */
+       0,              /* DIGITL */
+       0,              /* DIGITLUTF8 */
        0,              /* NDIGIT */
        0,              /* NDIGITUTF8 */
+       0,              /* NDIGITL */
+       0,              /* NDIGITLUTF8 */
+       0,              /* ALNUMC */
+       0,              /* ALNUMCUTF8 */
+       0,              /* ALNUMCL */
+       0,              /* ALNUMCLUTF8 */
+       0,              /* NALNUMC */
+       0,              /* NALNUMCUTF8 */
+       0,              /* NALNUMCL */
+       0,              /* NALNUMCLUTF8 */
+       0,              /* ALPHA */
+       0,              /* ALPHAUTF8 */
+       0,              /* ALPHAL */
+       0,              /* ALPHALUTF8 */
+       0,              /* NALPHA */
+       0,              /* NALPHAUTF8 */
+       0,              /* NALPHAL */
+       0,              /* NALPHALUTF8 */
+       0,              /* ASCII */
+       0,              /* NASCII */
+       0,              /* CNTRL */
+       0,              /* CNTRLUTF8 */
+       0,              /* CNTRLL */
+       0,              /* CNTRLLUTF8 */
+       0,              /* NCNTRL */
+       0,              /* NCNTRLUTF8 */
+       0,              /* NCNTRLL */
+       0,              /* NCNTRLLUTF8 */
+       0,              /* GRAPH */
+       0,              /* GRAPHUTF8 */
+       0,              /* GRAPHL */
+       0,              /* GRAPHLUTF8 */
+       0,              /* NGRAPH */
+       0,              /* NGRAPHUTF8 */
+       0,              /* NGRAPHL */
+       0,              /* NGRAPHLUTF8 */
+       0,              /* LOWER */
+       0,              /* LOWERUTF8 */
+       0,              /* LOWERL */
+       0,              /* LOWERLUTF8 */
+       0,              /* NLOWER */
+       0,              /* NLOWERUTF8 */
+       0,              /* NLOWERL */
+       0,              /* NLOWERLUTF8 */
+       0,              /* PRINT */
+       0,              /* PRINTUTF8 */
+       0,              /* PRINTL */
+       0,              /* PRINTLUTF8 */
+       0,              /* NPRINT */
+       0,              /* NPRINTUTF8 */
+       0,              /* NPRINTL */
+       0,              /* NPRINTLUTF8 */
+       0,              /* PUNCT */
+       0,              /* PUNCTUTF8 */
+       0,              /* PUNCTL */
+       0,              /* PUNCTLUTF8 */
+       0,              /* NPUNCT */
+       0,              /* NPUNCTUTF8 */
+       0,              /* NPUNCTL */
+       0,              /* NPUNCTLUTF8 */
+       0,              /* UPPER */
+       0,              /* UPPERUTF8 */
+       0,              /* UPPERL */
+       0,              /* UPPERLUTF8 */
+       0,              /* NUPPER */
+       0,              /* NUPPERUTF8 */
+       0,              /* NUPPERL */
+       0,              /* NUPPERLUTF8 */
+       0,              /* XDIGIT */
+       0,              /* NXDIGIT */
        0,              /* CLUMP */
        0,              /* BRANCH */
        0,              /* BACK */
@@ -287,8 +503,80 @@ const static char reg_off_by_arg[] = {
        0,              /* NSPACELUTF8 */
        0,              /* DIGIT */
        0,              /* DIGITUTF8 */
+       0,              /* DIGITL */
+       0,              /* DIGITLUTF8 */
        0,              /* NDIGIT */
        0,              /* NDIGITUTF8 */
+       0,              /* NDIGITL */
+       0,              /* NDIGITLUTF8 */
+       0,              /* ALNUMC */
+       0,              /* ALNUMCUTF8 */
+       0,              /* ALNUMCL */
+       0,              /* ALNUMCLUTF8 */
+       0,              /* NALNUMC */
+       0,              /* NALNUMCUTF8 */
+       0,              /* NALNUMCL */
+       0,              /* NALNUMCLUTF8 */
+       0,              /* ALPHA */
+       0,              /* ALPHAUTF8 */
+       0,              /* ALPHAL */
+       0,              /* ALPHALUTF8 */
+       0,              /* NALPHA */
+       0,              /* NALPHAUTF8 */
+       0,              /* NALPHAL */
+       0,              /* NALPHALUTF8 */
+       0,              /* ASCII */
+       0,              /* NASCII */
+       0,              /* CNTRL */
+       0,              /* CNTRLUTF8 */
+       0,              /* CNTRLL */
+       0,              /* CNTRLLUTF8 */
+       0,              /* NCNTRL */
+       0,              /* NCNTRLUTF8 */
+       0,              /* NCNTRLL */
+       0,              /* NCNTRLLUTF8 */
+       0,              /* GRAPH */
+       0,              /* GRAPHUTF8 */
+       0,              /* GRAPHL */
+       0,              /* GRAPHLUTF8 */
+       0,              /* NGRAPH */
+       0,              /* NGRAPHUTF8 */
+       0,              /* NGRAPHL */
+       0,              /* NGRAPHLUTF8 */
+       0,              /* LOWER */
+       0,              /* LOWERUTF8 */
+       0,              /* LOWERL */
+       0,              /* LOWERLUTF8 */
+       0,              /* NLOWER */
+       0,              /* NLOWERUTF8 */
+       0,              /* NLOWERL */
+       0,              /* NLOWERLUTF8 */
+       0,              /* PRINT */
+       0,              /* PRINTUTF8 */
+       0,              /* PRINTL */
+       0,              /* PRINTLUTF8 */
+       0,              /* NPRINT */
+       0,              /* NPRINTUTF8 */
+       0,              /* NPRINTL */
+       0,              /* NPRINTLUTF8 */
+       0,              /* PUNCT */
+       0,              /* PUNCTUTF8 */
+       0,              /* PUNCTL */
+       0,              /* PUNCTLUTF8 */
+       0,              /* NPUNCT */
+       0,              /* NPUNCTUTF8 */
+       0,              /* NPUNCTL */
+       0,              /* NPUNCTLUTF8 */
+       0,              /* UPPER */
+       0,              /* UPPERUTF8 */
+       0,              /* UPPERL */
+       0,              /* UPPERLUTF8 */
+       0,              /* NUPPER */
+       0,              /* NUPPERUTF8 */
+       0,              /* NUPPERL */
+       0,              /* NUPPERLUTF8 */
+       0,              /* XDIGIT */
+       0,              /* NXDIGIT */
        0,              /* CLUMP */
        0,              /* BRANCH */
        0,              /* BACK */
@@ -367,43 +655,115 @@ const static char * const reg_name[] = {
        "NSPACELUTF8",          /* 0x27 */
        "DIGIT",                /* 0x28 */
        "DIGITUTF8",            /* 0x29 */
-       "NDIGIT",               /* 0x2a */
-       "NDIGITUTF8",           /* 0x2b */
-       "CLUMP",                /* 0x2c */
-       "BRANCH",               /* 0x2d */
-       "BACK",         /* 0x2e */
-       "EXACT",                /* 0x2f */
-       "EXACTF",               /* 0x30 */
-       "EXACTFL",              /* 0x31 */
-       "NOTHING",              /* 0x32 */
-       "TAIL",         /* 0x33 */
-       "STAR",         /* 0x34 */
-       "PLUS",         /* 0x35 */
-       "CURLY",                /* 0x36 */
-       "CURLYN",               /* 0x37 */
-       "CURLYM",               /* 0x38 */
-       "CURLYX",               /* 0x39 */
-       "WHILEM",               /* 0x3a */
-       "OPEN",         /* 0x3b */
-       "CLOSE",                /* 0x3c */
-       "REF",          /* 0x3d */
-       "REFF",         /* 0x3e */
-       "REFFL",                /* 0x3f */
-       "IFMATCH",              /* 0x40 */
-       "UNLESSM",              /* 0x41 */
-       "SUSPEND",              /* 0x42 */
-       "IFTHEN",               /* 0x43 */
-       "GROUPP",               /* 0x44 */
-       "LONGJMP",              /* 0x45 */
-       "BRANCHJ",              /* 0x46 */
-       "EVAL",         /* 0x47 */
-       "MINMOD",               /* 0x48 */
-       "LOGICAL",              /* 0x49 */
-       "RENUM",                /* 0x4a */
-       "OPTIMIZED",            /* 0x4b */
+       "DIGITL",               /* 0x2a */
+       "DIGITLUTF8",           /* 0x2b */
+       "NDIGIT",               /* 0x2c */
+       "NDIGITUTF8",           /* 0x2d */
+       "NDIGITL",              /* 0x2e */
+       "NDIGITLUTF8",          /* 0x2f */
+       "ALNUMC",               /* 0x30 */
+       "ALNUMCUTF8",           /* 0x31 */
+       "ALNUMCL",              /* 0x32 */
+       "ALNUMCLUTF8",          /* 0x33 */
+       "NALNUMC",              /* 0x34 */
+       "NALNUMCUTF8",          /* 0x35 */
+       "NALNUMCL",             /* 0x36 */
+       "NALNUMCLUTF8",         /* 0x37 */
+       "ALPHA",                /* 0x38 */
+       "ALPHAUTF8",            /* 0x39 */
+       "ALPHAL",               /* 0x3a */
+       "ALPHALUTF8",           /* 0x3b */
+       "NALPHA",               /* 0x3c */
+       "NALPHAUTF8",           /* 0x3d */
+       "NALPHAL",              /* 0x3e */
+       "NALPHALUTF8",          /* 0x3f */
+       "ASCII",                /* 0x40 */
+       "NASCII",               /* 0x41 */
+       "CNTRL",                /* 0x42 */
+       "CNTRLUTF8",            /* 0x43 */
+       "CNTRLL",               /* 0x44 */
+       "CNTRLLUTF8",           /* 0x45 */
+       "NCNTRL",               /* 0x46 */
+       "NCNTRLUTF8",           /* 0x47 */
+       "NCNTRLL",              /* 0x48 */
+       "NCNTRLLUTF8",          /* 0x49 */
+       "GRAPH",                /* 0x4a */
+       "GRAPHUTF8",            /* 0x4b */
+       "GRAPHL",               /* 0x4c */
+       "GRAPHLUTF8",           /* 0x4d */
+       "NGRAPH",               /* 0x4e */
+       "NGRAPHUTF8",           /* 0x4f */
+       "NGRAPHL",              /* 0x50 */
+       "NGRAPHLUTF8",          /* 0x51 */
+       "LOWER",                /* 0x52 */
+       "LOWERUTF8",            /* 0x53 */
+       "LOWERL",               /* 0x54 */
+       "LOWERLUTF8",           /* 0x55 */
+       "NLOWER",               /* 0x56 */
+       "NLOWERUTF8",           /* 0x57 */
+       "NLOWERL",              /* 0x58 */
+       "NLOWERLUTF8",          /* 0x59 */
+       "PRINT",                /* 0x5a */
+       "PRINTUTF8",            /* 0x5b */
+       "PRINTL",               /* 0x5c */
+       "PRINTLUTF8",           /* 0x5d */
+       "NPRINT",               /* 0x5e */
+       "NPRINTUTF8",           /* 0x5f */
+       "NPRINTL",              /* 0x60 */
+       "NPRINTLUTF8",          /* 0x61 */
+       "PUNCT",                /* 0x62 */
+       "PUNCTUTF8",            /* 0x63 */
+       "PUNCTL",               /* 0x64 */
+       "PUNCTLUTF8",           /* 0x65 */
+       "NPUNCT",               /* 0x66 */
+       "NPUNCTUTF8",           /* 0x67 */
+       "NPUNCTL",              /* 0x68 */
+       "NPUNCTLUTF8",          /* 0x69 */
+       "UPPER",                /* 0x6a */
+       "UPPERUTF8",            /* 0x6b */
+       "UPPERL",               /* 0x6c */
+       "UPPERLUTF8",           /* 0x6d */
+       "NUPPER",               /* 0x6e */
+       "NUPPERUTF8",           /* 0x6f */
+       "NUPPERL",              /* 0x70 */
+       "NUPPERLUTF8",          /* 0x71 */
+       "XDIGIT",               /* 0x72 */
+       "NXDIGIT",              /* 0x73 */
+       "CLUMP",                /* 0x74 */
+       "BRANCH",               /* 0x75 */
+       "BACK",         /* 0x76 */
+       "EXACT",                /* 0x77 */
+       "EXACTF",               /* 0x78 */
+       "EXACTFL",              /* 0x79 */
+       "NOTHING",              /* 0x7a */
+       "TAIL",         /* 0x7b */
+       "STAR",         /* 0x7c */
+       "PLUS",         /* 0x7d */
+       "CURLY",                /* 0x7e */
+       "CURLYN",               /* 0x7f */
+       "CURLYM",               /* 0x80 */
+       "CURLYX",               /* 0x81 */
+       "WHILEM",               /* 0x82 */
+       "OPEN",         /* 0x83 */
+       "CLOSE",                /* 0x84 */
+       "REF",          /* 0x85 */
+       "REFF",         /* 0x86 */
+       "REFFL",                /* 0x87 */
+       "IFMATCH",              /* 0x88 */
+       "UNLESSM",              /* 0x89 */
+       "SUSPEND",              /* 0x8a */
+       "IFTHEN",               /* 0x8b */
+       "GROUPP",               /* 0x8c */
+       "LONGJMP",              /* 0x8d */
+       "BRANCHJ",              /* 0x8e */
+       "EVAL",         /* 0x8f */
+       "MINMOD",               /* 0x90 */
+       "LOGICAL",              /* 0x91 */
+       "RENUM",                /* 0x92 */
+       "OPTIMIZED",            /* 0x93 */
 };
 
-const static int reg_num = 76;
+const static int reg_num = 148;
 
 #endif /* DEBUGGING */
 #endif /* REG_COMP_C */
index a086c12..6312c75 100755 (executable)
@@ -282,14 +282,7 @@ eval qq("${context}y" =~ /(?<=$context)y/);
 print "not " if $@ !~ m%^\Q/(?<=\Ex+/: lookbehind longer than 255 not%;
 print "ok 71\n";
 
-# This one will fail when POSIX character classes do get implemented
-{
-       my $w;
-       local $^W = 1;
-       local $SIG{__WARN__} = sub{$w = shift};
-       eval q('a' =~ /[[:alpha:]]/);
-       print "not " if $w !~ /^\QCharacter class syntax [: :] is reserved/;
-}
+# removed test
 print "ok 72\n";
 
 # Long Monsters
index 466fc85..cbcb725 100644 (file)
@@ -474,9 +474,37 @@ $(?<=^(a)) a       y       $1      a
 ([[=]+)        a=[b]=  y       $1      =[
 ([[.]+)        a.[b].  y       $1      .[
 [a[:xyz:       -       c       -       /[a[:xyz:/: unmatched [] in regexp
-[a[:xyz:]      -       c       -       /[a[:xyz:]/: unmatched [] in regexp
+[a[:xyz:]      -       c       -       Character class [:xyz:] unknown
 [a[:]b[:c]     abc     y       $&      abc
-([a[:xyz:]b]+) pbaq    y       $1      ba
+([a[:xyz:]b]+) pbaq    c       -       Character class [:xyz:] unknown
+[a[:]b[:c]     abc     y       $&      abc
+([[:alpha:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd
+([[:alnum:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd01Xy
+([[:ascii:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd01Xy__--  ${nulnul}
+([[:cntrl:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ${nulnul}
+([[:digit:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      01
+([[:graph:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd01Xy__--
+([[:lower:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      cd
+([[:print:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd01Xy__--  
+([[:punct:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      __--
+([[:space:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1        
+([[:word:]]+)  ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd01Xy__
+([[:upper:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      AB
+([[:xdigit:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd01
+([[:^alpha:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      01
+([[:^alnum:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      __--  ${nulnul}${ffff}
+([[:^ascii:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ${ffff}
+([[:^cntrl:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd01Xy__--  
+([[:^digit:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd
+([[:^lower:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      AB
+([[:^print:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ${nulnul}${ffff}
+([[:^punct:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd01Xy
+([[:^space:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      ABcd01Xy__--
+([[:^word:]]+) ABcd01Xy__--  ${nulnul}${ffff}  y       $1      --  ${nulnul}${ffff}
+([[:^upper:]]+)        ABcd01Xy__--  ${nulnul}${ffff}  y       $1      cd01
+([[:^xdigit:]]+)       ABcd01Xy__--  ${nulnul}${ffff}  y       $1      Xy__--  ${nulnul}${ffff}
+[[:foo:]]      -       c       -       Character class [:foo:] unknown
+[[:^foo:]]     -       c       -       Character class [:^foo:] unknown
 ((?>a+)b)      aaab    y       $1      aaab
 (?>(a+))b      aaab    y       $1      aaa
 ((?>[^()]+)|\([^()]*\))+       ((abc(ade)ufh()()x      y       $&      abc(ade)ufh()()x
index 66b2d1c..4ffe136 100755 (executable)
@@ -47,6 +47,8 @@ seek(TESTS,0,0);
 $. = 0;
 
 $bang = sprintf "\\%03o", ord "!"; # \41 would not be portable.
+$ffff  = chr(0xff) x 2;
+$nulnul = "\0" x 2;
 
 $| = 1;
 print "1..$numtests\n# $iters iterations\n";
@@ -59,12 +61,16 @@ while (<TESTS>) {
     infty_subst(\$pat);
     infty_subst(\$expect);
     $pat = "'$pat'" unless $pat =~ /^[:']/;
-    $pat =~ s/\\n/\n/g;
     $pat =~ s/(\$\{\w+\})/$1/eeg;
+    $pat =~ s/\\n/\n/g;
+    $subject =~ s/(\$\{\w+\})/$1/eeg;
     $subject =~ s/\\n/\n/g;
+    $expect =~ s/(\$\{\w+\})/$1/eeg;
     $expect =~ s/\\n/\n/g;
     $expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/;
     $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//));
+    # Certain tests don't work with utf8 (the re_test should be in UTF8)
+    $skip = 1 if ($^H &= ~0x00000008) && $pat =~ /\[:\^(alnum|print|word):\]/;
     $result =~ s/B//i unless $skip;
     for $study ('', 'study \$subject') {
        $c = $iters;
@@ -75,7 +81,7 @@ while (<TESTS>) {
            last;  # no need to study a syntax error
        }
        elsif ( $skip ) {
-           print "ok $. # Skipped: not fixed yet\n"; next TEST;
+           print "ok $. # skipped\n"; next TEST;
        }
        elsif ($@) {
            print "not ok $. $input => error `$err'\n"; next TEST;
index e5b3bb5..5e467ae 100755 (executable)
@@ -6,7 +6,7 @@ BEGIN {
     $ENV{PERL5LIB} = '../lib';
 }
 
-print "1..3\n";
+print "1..9\n";
 
 my $test = 1;
 
@@ -34,4 +34,37 @@ sub ok {
     s/([$rx])/"&#".ord($1).";"/eg; 
     ok $_, '>&#9786;<';
     $test++;
+
+    $_ = "alpha,numeric"; 
+    m/([[:alpha:]]+)/; 
+    ok $1, 'alpha';
+    $test++;
+
+    $_ = "alphaNUMERICstring";
+    m/([[:^lower:]]+)/; 
+    ok $1, 'NUMERIC';
+    $test++;
+
+    $_ = "alphaNUMERICstring";
+    m/(\p{Ll}+)/; 
+    ok $1, 'alpha';
+    $test++;
+
+    $_ = "alphaNUMERICstring"; 
+    m/(\p{Lu}+)/; 
+    ok $1, 'NUMERIC';
+    $test++;
+
+    $_ = "alpha,numeric"; 
+    m/([\p{IsAlpha}]+)/; 
+    ok $1, 'alpha';
+    $test++;
+
+    $_ = "alphaNUMERICstring";
+    m/([^\p{IsLower}]+)/; 
+    ok $1, 'NUMERIC';
+    $test++;
+
 }
+
+
index 52a163a..0f48c67 100644 (file)
@@ -8,9 +8,6 @@
 
        /(?=a)?/
 
-  Character class syntax [: :] is reserved for future extensions
-       /[a[:xyz:]b]/
-
   Character class syntax [. .] is reserved for future extensions
   Character class syntax [= =] is reserved for future extensions
 
@@ -32,22 +29,21 @@ Strange *+?{} on zero-length expression at - line 4.
 # regcomp.c
 use warning 'unsafe' ;
 $_ = "" ;
-/[a[:xyz:]b]/;
 /[a[.xyz.]b]/;
 /[a[=xyz=]b]/;
 EXPECT
-Character class syntax [: :] is reserved for future extensions at - line 4.
-Character class syntax [. .] is reserved for future extensions at - line 5.
-Character class syntax [= =] is reserved for future extensions at - line 6.
+Character class syntax [. .] is reserved for future extensions at - line 4.
+Character class syntax [= =] is reserved for future extensions at - line 5.
 ########
 # regcomp.c
-use warning 'unsafe' ; 
-# use utf8 ; # Note this line should be uncommented when utf8 gets fixed.
+use warning 'unsafe' ;
 $_ = "" ;
-/[a[:xyz:]b]/;
-/[a[.xyz.]b]/;
-/[a[=xyz=]b]/;
+/[:foo:]/;
+/[.bar.]/;
+/[=zog=]/;
 EXPECT
-Character class syntax [: :] is reserved for future extensions at - line 5.
-Character class syntax [. .] is reserved for future extensions at - line 6.
-Character class syntax [= =] is reserved for future extensions at - line 7.
+Character class syntax [: :] belongs inside character classes at - line 4.
+Character class syntax [. .] belongs inside character classes at - line 5.
+Character class syntax [. .] is reserved for future extensions at - line 5.
+Character class syntax [= =] belongs inside character classes at - line 6.
+Character class syntax [= =] is reserved for future extensions at - line 6.
diff --git a/utf8.c b/utf8.c
index 8c7aee2..0e52f21 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -255,6 +255,14 @@ Perl_is_uni_alnum(pTHX_ U32 c)
 }
 
 bool
+Perl_is_uni_alnumc(pTHX_ U32 c)
+{
+    U8 tmpbuf[10];
+    uv_to_utf8(tmpbuf, (UV)c);
+    return is_utf8_alnumc(tmpbuf);
+}
+
+bool
 Perl_is_uni_idfirst(pTHX_ U32 c)
 {
     U8 tmpbuf[10];
@@ -303,6 +311,22 @@ Perl_is_uni_lower(pTHX_ U32 c)
 }
 
 bool
+Perl_is_uni_cntrl(pTHX_ U32 c)
+{
+    U8 tmpbuf[10];
+    uv_to_utf8(tmpbuf, (UV)c);
+    return is_utf8_cntrl(tmpbuf);
+}
+
+bool
+Perl_is_uni_graph(pTHX_ U32 c)
+{
+    U8 tmpbuf[10];
+    uv_to_utf8(tmpbuf, (UV)c);
+    return is_utf8_graph(tmpbuf);
+}
+
+bool
 Perl_is_uni_print(pTHX_ U32 c)
 {
     U8 tmpbuf[10];
@@ -310,6 +334,14 @@ Perl_is_uni_print(pTHX_ U32 c)
     return is_utf8_print(tmpbuf);
 }
 
+bool
+is_uni_punct(U32 c)
+{
+    U8 tmpbuf[10];
+    uv_to_utf8(tmpbuf, (UV)c);
+    return is_utf8_punct(tmpbuf);
+}
+
 U32
 Perl_to_uni_upper(pTHX_ U32 c)
 {
@@ -343,6 +375,12 @@ Perl_is_uni_alnum_lc(pTHX_ U32 c)
 }
 
 bool
+Perl_is_uni_alnumc_lc(pTHX_ U32 c)
+{
+    return is_uni_alnumc(c);   /* XXX no locale support yet */
+}
+
+bool
 Perl_is_uni_idfirst_lc(pTHX_ U32 c)
 {
     return is_uni_idfirst(c);  /* XXX no locale support yet */
@@ -379,11 +417,29 @@ Perl_is_uni_lower_lc(pTHX_ U32 c)
 }
 
 bool
+Perl_is_uni_cntrl_lc(pTHX_ U32 c)
+{
+    return is_uni_cntrl(c);    /* XXX no locale support yet */
+}
+
+bool
+Perl_is_uni_graph_lc(pTHX_ U32 c)
+{
+    return is_uni_graph(c);    /* XXX no locale support yet */
+}
+
+bool
 Perl_is_uni_print_lc(pTHX_ U32 c)
 {
     return is_uni_print(c);    /* XXX no locale support yet */
 }
 
+bool
+Perl_is_uni_punct_lc(pTHX_ U32 c)
+{
+    return is_uni_punct(c);    /* XXX no locale support yet */
+}
+
 U32
 Perl_to_uni_upper_lc(pTHX_ U32 c)
 {
@@ -402,7 +458,6 @@ Perl_to_uni_lower_lc(pTHX_ U32 c)
     return to_uni_lower(c);    /* XXX no locale support yet */
 }
 
-
 bool
 Perl_is_utf8_alnum(pTHX_ U8 *p)
 {
@@ -419,6 +474,21 @@ Perl_is_utf8_alnum(pTHX_ U8 *p)
 }
 
 bool
+Perl_is_utf8_alnumc(pTHX_ U8 *p)
+{
+    if (!PL_utf8_alnum)
+       PL_utf8_alnum = swash_init("utf8", "IsAlnumC", &PL_sv_undef, 0, 0);
+    return swash_fetch(PL_utf8_alnum, p);
+/*    return is_utf8_alpha(p) || is_utf8_digit(p); */
+#ifdef SURPRISINGLY_SLOWER  /* probably because alpha is usually true */
+    if (!PL_utf8_alnum)
+       PL_utf8_alnum = swash_init("utf8", "",
+           sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0);
+    return swash_fetch(PL_utf8_alnum, p);
+#endif
+}
+
+bool
 Perl_is_utf8_idfirst(pTHX_ U8 *p)
 {
     return *p == '_' || is_utf8_alpha(p);
@@ -433,6 +503,14 @@ Perl_is_utf8_alpha(pTHX_ U8 *p)
 }
 
 bool
+Perl_is_utf8_ascii(pTHX_ U8 *p)
+{
+    if (!PL_utf8_ascii)
+       PL_utf8_ascii = swash_init("utf8", "IsAscii", &PL_sv_undef, 0, 0);
+    return swash_fetch(PL_utf8_ascii, p);
+}
+
+bool
 Perl_is_utf8_space(pTHX_ U8 *p)
 {
     if (!PL_utf8_space)
@@ -465,6 +543,22 @@ Perl_is_utf8_lower(pTHX_ U8 *p)
 }
 
 bool
+Perl_is_utf8_cntrl(pTHX_ U8 *p)
+{
+    if (!PL_utf8_cntrl)
+       PL_utf8_cntrl = swash_init("utf8", "IsCntrl", &PL_sv_undef, 0, 0);
+    return swash_fetch(PL_utf8_cntrl, p);
+}
+
+bool
+Perl_is_utf8_graph(pTHX_ U8 *p)
+{
+    if (!PL_utf8_graph)
+       PL_utf8_graph = swash_init("utf8", "IsGraph", &PL_sv_undef, 0, 0);
+    return swash_fetch(PL_utf8_graph, p);
+}
+
+bool
 Perl_is_utf8_print(pTHX_ U8 *p)
 {
     if (!PL_utf8_print)
@@ -473,6 +567,22 @@ Perl_is_utf8_print(pTHX_ U8 *p)
 }
 
 bool
+Perl_is_utf8_punct(pTHX_ U8 *p)
+{
+    if (!PL_utf8_punct)
+       PL_utf8_punct = swash_init("utf8", "IsPunct", &PL_sv_undef, 0, 0);
+    return swash_fetch(PL_utf8_punct, p);
+}
+
+bool
+Perl_is_utf8_xdigit(pTHX_ U8 *p)
+{
+    if (!PL_utf8_xdigit)
+       PL_utf8_xdigit = swash_init("utf8", "IsXDigit", &PL_sv_undef, 0, 0);
+    return swash_fetch(PL_utf8_xdigit, p);
+}
+
+bool
 Perl_is_utf8_mark(pTHX_ U8 *p)
 {
     if (!PL_utf8_mark)