ApR |bool |is_utf8_alpha |NN const U8 *p
ApR |bool |is_utf8_ascii |NN const U8 *p
ApR |bool |is_utf8_space |NN const U8 *p
+ApR |bool |is_utf8_perl_space |NN const U8 *p
+ApR |bool |is_utf8_perl_word |NN const U8 *p
ApR |bool |is_utf8_cntrl |NN const U8 *p
ApR |bool |is_utf8_digit |NN const U8 *p
+ApR |bool |is_utf8_posix_digit |NN const U8 *p
ApR |bool |is_utf8_graph |NN const U8 *p
ApR |bool |is_utf8_upper |NN const U8 *p
ApR |bool |is_utf8_lower |NN const U8 *p
#define is_utf8_alpha Perl_is_utf8_alpha
#define is_utf8_ascii Perl_is_utf8_ascii
#define is_utf8_space Perl_is_utf8_space
+#define is_utf8_perl_space Perl_is_utf8_perl_space
+#define is_utf8_perl_word Perl_is_utf8_perl_word
#define is_utf8_cntrl Perl_is_utf8_cntrl
#define is_utf8_digit Perl_is_utf8_digit
+#define is_utf8_posix_digit Perl_is_utf8_posix_digit
#define is_utf8_graph Perl_is_utf8_graph
#define is_utf8_upper Perl_is_utf8_upper
#define is_utf8_lower Perl_is_utf8_lower
#define is_utf8_alpha(a) Perl_is_utf8_alpha(aTHX_ a)
#define is_utf8_ascii(a) Perl_is_utf8_ascii(aTHX_ a)
#define is_utf8_space(a) Perl_is_utf8_space(aTHX_ a)
+#define is_utf8_perl_space(a) Perl_is_utf8_perl_space(aTHX_ a)
+#define is_utf8_perl_word(a) Perl_is_utf8_perl_word(aTHX_ a)
#define is_utf8_cntrl(a) Perl_is_utf8_cntrl(aTHX_ a)
#define is_utf8_digit(a) Perl_is_utf8_digit(aTHX_ a)
+#define is_utf8_posix_digit(a) Perl_is_utf8_posix_digit(aTHX_ a)
#define is_utf8_graph(a) Perl_is_utf8_graph(aTHX_ a)
#define is_utf8_upper(a) Perl_is_utf8_upper(aTHX_ a)
#define is_utf8_lower(a) Perl_is_utf8_lower(aTHX_ a)
#define PL_utf8_idstart (vTHX->Iutf8_idstart)
#define PL_utf8_lower (vTHX->Iutf8_lower)
#define PL_utf8_mark (vTHX->Iutf8_mark)
+#define PL_utf8_perl_space (vTHX->Iutf8_perl_space)
+#define PL_utf8_perl_word (vTHX->Iutf8_perl_word)
+#define PL_utf8_posix_digit (vTHX->Iutf8_posix_digit)
#define PL_utf8_print (vTHX->Iutf8_print)
#define PL_utf8_punct (vTHX->Iutf8_punct)
#define PL_utf8_space (vTHX->Iutf8_space)
#define PL_Iutf8_idstart PL_utf8_idstart
#define PL_Iutf8_lower PL_utf8_lower
#define PL_Iutf8_mark PL_utf8_mark
+#define PL_Iutf8_perl_space PL_utf8_perl_space
+#define PL_Iutf8_perl_word PL_utf8_perl_word
+#define PL_Iutf8_posix_digit PL_utf8_posix_digit
#define PL_Iutf8_print PL_utf8_print
#define PL_Iutf8_punct PL_utf8_punct
#define PL_Iutf8_space PL_utf8_space
Perl_is_utf8_alpha
Perl_is_utf8_ascii
Perl_is_utf8_space
+Perl_is_utf8_perl_space
+Perl_is_utf8_perl_word
Perl_is_utf8_cntrl
Perl_is_utf8_digit
+Perl_is_utf8_posix_digit
Perl_is_utf8_graph
Perl_is_utf8_upper
Perl_is_utf8_lower
PERLVAR(Iutf8_ascii, SV *)
PERLVAR(Iutf8_alpha, SV *)
PERLVAR(Iutf8_space, SV *)
+PERLVAR(Iutf8_perl_space, SV *)
+PERLVAR(Iutf8_perl_word, SV *)
+PERLVAR(Iutf8_posix_digit, SV *)
PERLVAR(Iutf8_cntrl, SV *)
PERLVAR(Iutf8_graph, SV *)
PERLVAR(Iutf8_digit, SV *)
my ($sym, $ch);
foreach (@imports) {
if (($ch, $sym) = /^([\$\@\%\*\&])(.+)/) {
- if ($sym =~ /\W/) {
+ if ($sym =~ /\P{IsWord}/) {
# time for a more-detailed check-up
if ($sym =~ /^\w+[[{].*[]}]$/) {
require Carp;
#define PL_utf8_lower (*Perl_Iutf8_lower_ptr(aTHX))
#undef PL_utf8_mark
#define PL_utf8_mark (*Perl_Iutf8_mark_ptr(aTHX))
+#undef PL_utf8_perl_space
+#define PL_utf8_perl_space (*Perl_Iutf8_perl_space_ptr(aTHX))
+#undef PL_utf8_perl_word
+#define PL_utf8_perl_word (*Perl_Iutf8_perl_word_ptr(aTHX))
+#undef PL_utf8_posix_digit
+#define PL_utf8_posix_digit (*Perl_Iutf8_posix_digit_ptr(aTHX))
#undef PL_utf8_print
#define PL_utf8_print (*Perl_Iutf8_print_ptr(aTHX))
#undef PL_utf8_punct
#define PERL_ARGS_ASSERT_IS_UTF8_SPACE \
assert(p)
+PERL_CALLCONV bool Perl_is_utf8_perl_space(pTHX_ const U8 *p)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_IS_UTF8_PERL_SPACE \
+ assert(p)
+
+PERL_CALLCONV bool Perl_is_utf8_perl_word(pTHX_ const U8 *p)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_IS_UTF8_PERL_WORD \
+ assert(p)
+
PERL_CALLCONV bool Perl_is_utf8_cntrl(pTHX_ const U8 *p)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_IS_UTF8_DIGIT \
assert(p)
+PERL_CALLCONV bool Perl_is_utf8_posix_digit(pTHX_ const U8 *p)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_IS_UTF8_POSIX_DIGIT \
+ assert(p)
+
PERL_CALLCONV bool Perl_is_utf8_graph(pTHX_ const U8 *p)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define ANYOF_BITMAP_SIZE 32 /* 256 b/(8 b/B) */
-#define ANYOF_CLASSBITMAP_SIZE 4 /* up to 40 (8*5) named classes */
+#define ANYOF_CLASSBITMAP_SIZE 4 /* up to 32 (8*4) named classes */
/* also used by trie */
struct regnode_charclass {
#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
+/*
+ We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
+ so that it is possible to override the option here without having to
+ rebuild the entire core. as we are required to do if we change regcomp.h
+ which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
+*/
+#if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
+#define BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#endif
+
+#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#define LOAD_UTF8_CHARCLASS_PERL_WORD() LOAD_UTF8_CHARCLASS_ALNUM()
+#define LOAD_UTF8_CHARCLASS_PERL_SPACE() LOAD_UTF8_CHARCLASS_SPACE()
+#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS_DIGIT()
+#define RE_utf8_perl_word PL_utf8_alnum
+#define RE_utf8_perl_space PL_utf8_space
+#define RE_utf8_posix_digit PL_utf8_digit
+#define perl_word alnum
+#define perl_space space
+#define posix_digit digit
+#else
+#define LOAD_UTF8_CHARCLASS_PERL_WORD() LOAD_UTF8_CHARCLASS(perl_word,"a")
+#define LOAD_UTF8_CHARCLASS_PERL_SPACE() LOAD_UTF8_CHARCLASS(perl_space," ")
+#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS(posix_digit,"0")
+#define RE_utf8_perl_word PL_utf8_perl_word
+#define RE_utf8_perl_space PL_utf8_perl_space
+#define RE_utf8_posix_digit PL_utf8_posix_digit
+#endif
+
+
#define CCC_TRY_AFF(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC) \
case NAMEL: \
PL_reg_flags |= RF_tainted; \
break
+
+
+
/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
/* for use after a quantifier and before an EXACT-like node -- japhy */
break;
case ALNUM:
REXEC_FBC_CSCAN_PRELOAD(
- LOAD_UTF8_CHARCLASS_ALNUM(),
- swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
+ LOAD_UTF8_CHARCLASS_PERL_WORD(),
+ swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
isALNUM(*s)
);
case ALNUML:
);
case NALNUM:
REXEC_FBC_CSCAN_PRELOAD(
- LOAD_UTF8_CHARCLASS_ALNUM(),
- !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
+ LOAD_UTF8_CHARCLASS_PERL_WORD(),
+ !swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
!isALNUM(*s)
);
case NALNUML:
);
case SPACE:
REXEC_FBC_CSCAN_PRELOAD(
- LOAD_UTF8_CHARCLASS_SPACE(),
- *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
+ LOAD_UTF8_CHARCLASS_PERL_SPACE(),
+ *s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8),
isSPACE(*s)
);
case SPACEL:
);
case NSPACE:
REXEC_FBC_CSCAN_PRELOAD(
- LOAD_UTF8_CHARCLASS_SPACE(),
- !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
+ LOAD_UTF8_CHARCLASS_PERL_SPACE(),
+ !(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8)),
!isSPACE(*s)
);
case NSPACEL:
);
case DIGIT:
REXEC_FBC_CSCAN_PRELOAD(
- LOAD_UTF8_CHARCLASS_DIGIT(),
- swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
+ LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
+ swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
isDIGIT(*s)
);
case DIGITL:
);
case NDIGIT:
REXEC_FBC_CSCAN_PRELOAD(
- LOAD_UTF8_CHARCLASS_DIGIT(),
- !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
+ LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
+ !swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
!isDIGIT(*s)
);
case NDIGITL:
sayNO;
break;
/* Special char classes - The defines start on line 129 or so */
- CCC_TRY_AFF( ALNUM, ALNUML, alnum, "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
- CCC_TRY_NEG(NALNUM, NALNUML, alnum, "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
+ CCC_TRY_AFF( ALNUM, ALNUML, perl_word, "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
+ CCC_TRY_NEG(NALNUM, NALNUML, perl_word, "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
- CCC_TRY_AFF( SPACE, SPACEL, space, " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
- CCC_TRY_NEG(NSPACE, NSPACEL, space, " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
+ CCC_TRY_AFF( SPACE, SPACEL, perl_space, " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
+ CCC_TRY_NEG(NSPACE, NSPACEL, perl_space, " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
- CCC_TRY_AFF( DIGIT, DIGITL, digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
- CCC_TRY_NEG(NDIGIT, NDIGITL, digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
+ CCC_TRY_AFF( DIGIT, DIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
+ CCC_TRY_NEG(NDIGIT, NDIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
case CLUMP:
if (locinput >= PL_regeol)
($c = $b) =~ s/(\w+)/lc($1)/ge;
is($c , $a, "Using s///e to change case.");
-($c = $a) =~ s/(\w+)/uc($1)/ge;
+($c = $a) =~ s/(\p{IsWord}+)/uc($1)/ge;
is($c , $b, "Using s///e to change case.");
-($c = $b) =~ s/(\w+)/lcfirst($1)/ge;
+($c = $b) =~ s/(\p{IsWord}+)/lcfirst($1)/ge;
is($c , "\x{3c3}FOO.bAR", "Using s///e to change case.");
-($c = $a) =~ s/(\w+)/ucfirst($1)/ge;
+($c = $a) =~ s/(\p{IsWord}+)/ucfirst($1)/ge;
is($c , "\x{3a3}foo.Bar", "Using s///e to change case.");
# #18931: perl5.8.0 bug in \U..\E processing
}
bool
+Perl_is_utf8_perl_space(pTHX_ const U8 *p)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_IS_UTF8_PERL_SPACE;
+
+ return is_utf8_common(p, &PL_utf8_perl_space, "IsPerlSpace");
+}
+
+bool
+Perl_is_utf8_perl_word(pTHX_ const U8 *p)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_IS_UTF8_PERL_WORD;
+
+ return is_utf8_common(p, &PL_utf8_perl_word, "IsPerlWord");
+}
+
+bool
Perl_is_utf8_digit(pTHX_ const U8 *p)
{
dVAR;
}
bool
+Perl_is_utf8_posix_digit(pTHX_ const U8 *p)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_IS_UTF8_POSIX_DIGIT;
+
+ return is_utf8_common(p, &PL_utf8_posix_digit, "IsPosixDigit");
+}
+
+bool
Perl_is_utf8_upper(pTHX_ const U8 *p)
{
dVAR;
PERL_ARGS_ASSERT_IS_UTF8_XDIGIT;
- return is_utf8_common(p, &PL_utf8_xdigit, "Isxdigit");
+ return is_utf8_common(p, &PL_utf8_xdigit, "IsXDigit");
}
bool