}
while (s < send) {
- if ((uv = swash_fetch(rv, s)) < none) {
+ if ((uv = swash_fetch(rv, s, TRUE)) < none) {
s += UTF8SKIP(s);
matches++;
d = uvuni_to_utf8(d, uv);
send = s + len;
while (s < send) {
- if ((uv = swash_fetch(rv, s)) < none || uv == extra)
+ if ((uv = swash_fetch(rv, s, TRUE)) < none || uv == extra)
matches++;
s += UTF8SKIP(s);
}
if (squash) {
UV puv = 0xfeedface;
while (s < send) {
- uv = swash_fetch(rv, s);
+ uv = swash_fetch(rv, s, TRUE);
if (d > dend) {
STRLEN clen = d - dstart;
}
else {
while (s < send) {
- uv = swash_fetch(rv, s);
+ uv = swash_fetch(rv, s, TRUE);
if (d > dend) {
STRLEN clen = d - dstart;
STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
#define sv_vsetpvfn(a,b,c,d,e,f,g) Perl_sv_vsetpvfn(aTHX_ a,b,c,d,e,f,g)
#define str_to_version(a) Perl_str_to_version(aTHX_ a)
#define swash_init(a,b,c,d,e) Perl_swash_init(aTHX_ a,b,c,d,e)
-#define swash_fetch(a,b) Perl_swash_fetch(aTHX_ a,b)
+#define swash_fetch(a,b,c) Perl_swash_fetch(aTHX_ a,b,c)
#define taint_env() Perl_taint_env(aTHX)
#define taint_proper(a,b) Perl_taint_proper(aTHX_ a,b)
#define to_utf8_lower(a) Perl_to_utf8_lower(aTHX_ a)
Ap |NV |str_to_version |SV *sv
Ap |SV* |swash_init |char* pkg|char* name|SV* listsv \
|I32 minbits|I32 none
-Ap |UV |swash_fetch |SV *sv|U8 *ptr
+Ap |UV |swash_fetch |SV *sv|U8 *ptr|bool do_utf8
Ap |void |taint_env
Ap |void |taint_proper |const char* f|const char* s
Ap |UV |to_utf8_lower |U8 *p
#define Perl_init_stacks pPerl->Perl_init_stacks
#undef init_stacks
#define init_stacks Perl_init_stacks
+#undef Perl_init_tm
+#define Perl_init_tm pPerl->Perl_init_tm
+#undef init_tm
+#define init_tm Perl_init_tm
#undef Perl_instr
#define Perl_instr pPerl->Perl_instr
#undef instr
#define Perl_mg_size pPerl->Perl_mg_size
#undef mg_size
#define mg_size Perl_mg_size
+#undef Perl_mini_mktime
+#define Perl_mini_mktime pPerl->Perl_mini_mktime
+#undef mini_mktime
+#define mini_mktime Perl_mini_mktime
#undef Perl_moreswitches
#define Perl_moreswitches pPerl->Perl_moreswitches
#undef moreswitches
#define Perl_my_stat pPerl->Perl_my_stat
#undef my_stat
#define my_stat Perl_my_stat
+#undef Perl_my_strftime
+#define Perl_my_strftime pPerl->Perl_my_strftime
+#undef my_strftime
+#define my_strftime Perl_my_strftime
#if defined(MYSWAP)
#undef Perl_my_swap
#define Perl_my_swap pPerl->Perl_my_swap
PERL_CALLCONV void Perl_sv_vsetpvfn(pTHX_ SV* sv, const char* pat, STRLEN patlen, va_list* args, SV** svargs, I32 svmax, bool *maybe_tainted);
PERL_CALLCONV NV Perl_str_to_version(pTHX_ SV *sv);
PERL_CALLCONV SV* Perl_swash_init(pTHX_ char* pkg, char* name, SV* listsv, I32 minbits, I32 none);
-PERL_CALLCONV UV Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr);
+PERL_CALLCONV UV Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8);
PERL_CALLCONV void Perl_taint_env(pTHX);
PERL_CALLCONV void Perl_taint_proper(pTHX_ const char* f, const char* s);
PERL_CALLCONV UV Perl_to_utf8_lower(pTHX_ U8 *p);
break;
case 'p':
case 'P':
- { /* a lovely hack--pretend we saw [\pX] instead */
+ {
char* oldregxend = RExC_end;
char* parse_start = RExC_parse;
if (RExC_parse[1] == '{') {
+ /* a lovely hack--pretend we saw [\pX] instead */
RExC_end = strchr(RExC_parse, '}');
if (!RExC_end) {
RExC_parse += 2;
S_regclass(pTHX_ RExC_state_t *pRExC_state)
{
register UV value;
- register IV lastvalue = OOB_UNICODE;
+ register IV prevvalue = OOB_UNICODE;
register IV range = 0;
register regnode *ret;
STRLEN numlen;
register char *e;
char *parse_start = RExC_parse; /* MJD */
UV n;
- bool dont_optimize_invert = FALSE;
+ bool optimize_invert = TRUE;
ret = reganode(pRExC_state, ANYOF, 0);
rangebegin = RExC_parse;
if (UTF) {
value = utf8n_to_uvchr((U8*)RExC_parse,
- RExC_end - RExC_parse,
- &numlen, 0);
+ RExC_end - RExC_parse,
+ &numlen, 0);
RExC_parse += numlen;
}
else
RExC_parse - rangebegin,
RExC_parse - rangebegin,
rangebegin);
- if (lastvalue < 256) {
- ANYOF_BITMAP_SET(ret, lastvalue);
+ if (prevvalue < 256) {
+ ANYOF_BITMAP_SET(ret, prevvalue);
ANYOF_BITMAP_SET(ret, '-');
}
else {
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
Perl_sv_catpvf(aTHX_ listsv,
- "%04"UVxf"\n%04"UVxf"\n", (UV)lastvalue, (UV) '-');
+ "%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
}
}
}
if (!SIZE_ONLY) {
+ if (namedclass > OOB_NAMEDCLASS)
+ optimize_invert = FALSE;
/* Possible truncation here but in some 64-bit environments
* the compiler gets heartburn about switch on 64-bit values.
* A similar issue a little earlier when switching on value.
if (isALNUM(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");
break;
case ANYOF_NALNUM:
if (!isALNUM(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");
break;
case ANYOF_ALNUMC:
if (isALNUMC(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
break;
case ANYOF_NALNUMC:
if (!isALNUMC(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");
break;
case ANYOF_ALPHA:
if (isALPHA(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");
break;
case ANYOF_NALPHA:
if (!isALPHA(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");
break;
case ANYOF_ASCII:
}
#endif /* EBCDIC */
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");
break;
case ANYOF_NASCII:
}
#endif /* EBCDIC */
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");
break;
case ANYOF_BLANK:
if (isBLANK(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n");
break;
case ANYOF_NBLANK:
if (!isBLANK(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n");
break;
case ANYOF_CNTRL:
if (isCNTRL(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");
break;
case ANYOF_NCNTRL:
if (!isCNTRL(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");
break;
case ANYOF_DIGIT:
for (value = '0'; value <= '9'; value++)
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
break;
case ANYOF_NDIGIT:
for (value = '9' + 1; value < 256; value++)
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");
break;
case ANYOF_GRAPH:
if (isGRAPH(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");
break;
case ANYOF_NGRAPH:
if (!isGRAPH(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");
break;
case ANYOF_LOWER:
if (isLOWER(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");
break;
case ANYOF_NLOWER:
if (!isLOWER(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");
break;
case ANYOF_PRINT:
if (isPRINT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");
break;
case ANYOF_NPRINT:
if (!isPRINT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");
break;
case ANYOF_PSXSPC:
if (isPSXSPC(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
break;
case ANYOF_NPSXSPC:
if (!isPSXSPC(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");
break;
case ANYOF_PUNCT:
if (isPUNCT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");
break;
case ANYOF_NPUNCT:
if (!isPUNCT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");
break;
case ANYOF_SPACE:
if (isSPACE(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");
break;
case ANYOF_NSPACE:
if (!isSPACE(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");
break;
case ANYOF_UPPER:
if (isUPPER(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");
break;
case ANYOF_NUPPER:
if (!isUPPER(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");
break;
case ANYOF_XDIGIT:
if (isXDIGIT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");
break;
case ANYOF_NXDIGIT:
if (!isXDIGIT(value))
ANYOF_BITMAP_SET(ret, value);
}
- dont_optimize_invert = TRUE;
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
break;
default:
} /* end of namedclass \blah */
if (range) {
- if (((lastvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
- ((NATIVE_TO_UNI(lastvalue) > NATIVE_TO_UNI(value)) && (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
+ if (((prevvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
+ ((NATIVE_TO_UNI(prevvalue) > NATIVE_TO_UNI(value)) &&
+ (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
Simple_vFAIL4("Invalid [] range \"%*.*s\"",
RExC_parse - rangebegin,
RExC_parse - rangebegin,
rangebegin);
+ range = 0; /* not a valid range */
}
- range = 0; /* not a true range */
}
else {
- lastvalue = value; /* save the beginning of the range */
+ prevvalue = value; /* save the beginning of the range */
if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
RExC_parse[1] != ']') {
RExC_parse++;
/* now is the next time */
if (!SIZE_ONLY) {
- if (lastvalue < 256 && value < 256) {
-#ifdef EBCDIC /* EBCDIC, for example. */
- if (PL_hints & HINT_RE_ASCIIR) {
- IV i;
+ IV i;
+
+ if (prevvalue < 256) {
+ IV ceilvalue = value < 256 ? value : 255;
+
+#ifdef EBCDIC
/* New style scheme for ranges:
- * after :
* use re 'asciir';
* do ranges in ASCII/Unicode space
*/
- for (i = NATIVE_TO_ASCII(lastvalue) ; i <= NATIVE_TO_ASCII(value); i++)
- ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
+ for (i = NATIVE_TO_ASCII(prevvalue);
+ i <= NATIVE_TO_ASCII(ceilvalue);
+ i++)
+ ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
}
- else if ((isLOWER(lastvalue) && isLOWER(value)) ||
- (isUPPER(lastvalue) && isUPPER(value)))
+ else if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+ (isUPPER(prevvalue) && isUPPER(ceilvalue)))
{
- IV i;
- if (isLOWER(lastvalue)) {
- for (i = lastvalue; i <= value; i++)
+ if (isLOWER(prevvalue)) {
+ for (i = prevvalue; i <= ceilvalue; i++)
if (isLOWER(i))
ANYOF_BITMAP_SET(ret, i);
} else {
- for (i = lastvalue; i <= value; i++)
+ for (i = prevvalue; i <= ceilvalue; i++)
if (isUPPER(i))
ANYOF_BITMAP_SET(ret, i);
}
}
else
#endif
- for ( ; lastvalue <= value; lastvalue++)
- ANYOF_BITMAP_SET(ret, lastvalue);
- } else {
+ for (i = prevvalue; i <= ceilvalue; i++)
+ ANYOF_BITMAP_SET(ret, i);
+ }
+ if (value > 255) {
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
- if (lastvalue < value)
+ if (prevvalue < value)
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
- (UV)lastvalue, (UV)value);
- else
+ (UV)prevvalue, (UV)value);
+ else if (prevvalue == value)
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
(UV)value);
}
}
/* optimize inverted simple patterns (e.g. [^a-z]) */
- if (!SIZE_ONLY && !dont_optimize_invert &&
+ if (!SIZE_ONLY && optimize_invert &&
/* If the only flag is inversion. */
(ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
for (i = 0; i <= 256; i++) { /* just the first 256 */
U8 *e = uvchr_to_utf8(s, i);
- if (i < 256 && swash_fetch(sw, s)) {
+ if (i < 256 && swash_fetch(sw, s, TRUE)) {
if (rangestart == -1)
rangestart = i;
} else if (rangestart != -1) {
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
if (tmp == !(OP(c) == BOUND ?
- swash_fetch(PL_utf8_alnum, (U8*)s) :
+ swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
{
tmp = !tmp;
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
if (tmp == !(OP(c) == NBOUND ?
- swash_fetch(PL_utf8_alnum, (U8*)s) :
+ swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
else if ((norun || regtry(prog, s)))
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
- if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
+ if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
while (s < strend) {
- if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
+ if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
while (s < strend) {
- if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
+ if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
while (s < strend) {
- if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
+ if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
while (s < strend) {
- if (swash_fetch(PL_utf8_digit,(U8*)s)) {
+ if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
while (s < strend) {
- if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
+ if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
sayNO;
if (do_utf8) {
if (!(OP(scan) == ALNUM
- ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
+ ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput)))
{
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(alnum,"a");
if (OP(scan) == NALNUM
- ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
+ ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput))
{
sayNO;
if (OP(scan) == BOUND || OP(scan) == NBOUND) {
ln = isALNUM_uni(ln);
LOAD_UTF8_CHARCLASS(alnum,"a");
- n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
+ n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
}
else {
ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
if (UTF8_IS_CONTINUED(nextchr)) {
LOAD_UTF8_CHARCLASS(space," ");
if (!(OP(scan) == SPACE
- ? swash_fetch(PL_utf8_space, (U8*)locinput)
+ ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput)))
{
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(space," ");
if (OP(scan) == NSPACE
- ? swash_fetch(PL_utf8_space, (U8*)locinput)
+ ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput))
{
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
if (!(OP(scan) == DIGIT
- ? swash_fetch(PL_utf8_digit, (U8*)locinput)
+ ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput)))
{
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS(digit,"0");
if (OP(scan) == NDIGIT
- ? swash_fetch(PL_utf8_digit, (U8*)locinput)
+ ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput))
{
sayNO;
break;
case CLUMP:
LOAD_UTF8_CHARCLASS(mark,"~");
- if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
+ if (locinput >= PL_regeol ||
+ swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
sayNO;
locinput += PL_utf8skip[nextchr];
- while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
+ while (locinput < PL_regeol &&
+ swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
locinput += UTF8SKIP(locinput);
if (locinput > PL_regeol)
sayNO;
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(alnum,"a");
while (hardcount < max && scan < loceol &&
- swash_fetch(PL_utf8_alnum, (U8*)scan)) {
+ swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(alnum,"a");
while (hardcount < max && scan < loceol &&
- !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
+ !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(space," ");
while (hardcount < max && scan < loceol &&
- (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
+ (*scan == ' ' ||
+ swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
scan += UTF8SKIP(scan);
hardcount++;
}
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(space," ");
while (hardcount < max && scan < loceol &&
- !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
+ !(*scan == ' ' ||
+ swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
scan += UTF8SKIP(scan);
hardcount++;
}
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(digit,"0");
while (hardcount < max && scan < loceol &&
- swash_fetch(PL_utf8_digit,(U8*)scan)) {
+ swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
loceol = PL_regeol;
LOAD_UTF8_CHARCLASS(digit,"0");
while (hardcount < max && scan < loceol &&
- !swash_fetch(PL_utf8_digit,(U8*)scan)) {
+ !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
scan += UTF8SKIP(scan);
hardcount++;
}
char flags = ANYOF_FLAGS(n);
bool match = FALSE;
UV c;
- STRLEN len;
+ STRLEN len = 0;
- if (do_utf8)
- c = utf8_to_uvchr(p, &len);
- else
- c = *p;
+ c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
if (do_utf8 || (flags & ANYOF_UNICODE)) {
if (do_utf8 && !ANYOF_RUNTIME(n)) {
if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
match = TRUE;
}
- if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
+ if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
match = TRUE;
if (!match) {
SV *sw = regclass_swash(n, TRUE, 0);
if (sw) {
- if (swash_fetch(sw, p))
+ if (swash_fetch(sw, p, do_utf8))
match = TRUE;
else if (flags & ANYOF_FOLD) {
U8 tmpbuf[UTF8_MAXLEN+1];
}
else
uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
- if (swash_fetch(sw, tmpbuf))
+ if (swash_fetch(sw, tmpbuf, do_utf8))
match = TRUE;
}
}
if (ANYOF_BITMAP_TEST(n, c))
match = TRUE;
else if (flags & ANYOF_FOLD) {
- I32 f;
+ I32 f;
if (flags & ANYOF_LOCALE) {
PL_reg_flags |= RF_tainted;
# that does fit that format, add it to op/re_tests, not here.
$| = 1;
-print "1..587\n";
+
+print "1..615\n";
BEGIN {
chdir 't' if -d 't';
{
# the second half of 20001028.003
+ my $X = '';
$X =~ s/^/chr(1488)/e;
print "not " unless length $X == 1 && ord($X) == 1488;
print "ok 260\n";
"\0" => 'Cc',
);
- for my $char (keys %s) {
+ for my $char (map { s/^\S+ //; $_ }
+ sort map { sprintf("%06x", ord($_))." $_" } keys %s) {
my $class = $s{$char};
- my $code = sprintf("%04x", ord($char));
- printf "# 0x$code\n";
+ my $code = sprintf("%06x", ord($char));
+ printf "#\n# 0x$code\n#\n";
print "# IsAlpha\n";
if ($class =~ /^[LM]/) {
print "not " unless $char =~ /\p{IsAlpha}/;
print "ok $test\n"; $test++;
}
print "# IsASCII\n";
- if ($code <= 127) {
+ if ($code le '00007f') {
print "not " unless $char =~ /\p{IsASCII}/;
print "ok $test\n"; $test++;
print "not " if $char =~ /\P{IsASCII}/;
print "not " unless ord($x) == 0x12345678 && length($x) == 1;
print "ok 587\n";
}
+
+{
+ my $x = "\x7f";
+
+ print "not " if $x =~ /[\x80-\xff]/;
+ print "ok 588\n";
+
+ print "not " if $x =~ /[\x80-\x{100}]/;
+ print "ok 589\n";
+
+ print "not " if $x =~ /[\x{100}]/;
+ print "ok 590\n";
+
+ print "not " if $x =~ /\p{InLatin1Supplement}/;
+ print "ok 591\n";
+
+ print "not " unless $x =~ /\P{InLatin1Supplement}/;
+ print "ok 592\n";
+
+ print "not " if $x =~ /\p{InLatinExtendedA}/;
+ print "ok 593\n";
+
+ print "not " unless $x =~ /\P{InLatinExtendedA}/;
+ print "ok 594\n";
+}
+
+{
+ my $x = "\x80";
+
+ print "not " unless $x =~ /[\x80-\xff]/;
+ print "ok 595\n";
+
+ print "not " unless $x =~ /[\x80-\x{100}]/;
+ print "ok 596\n";
+
+ print "not " if $x =~ /[\x{100}]/;
+ print "ok 597\n";
+
+ print "not " unless $x =~ /\p{InLatin1Supplement}/;
+ print "ok 598\n";
+
+ print "not " if $x =~ /\P{InLatin1Supplement}/;
+ print "ok 599\n";
+
+ print "not " if $x =~ /\p{InLatinExtendedA}/;
+ print "ok 600\n";
+
+ print "not " unless $x =~ /\P{InLatinExtendedA}/;
+ print "ok 601\n";
+}
+
+{
+ my $x = "\xff";
+
+ print "not " unless $x =~ /[\x80-\xff]/;
+ print "ok 602\n";
+
+ print "not " unless $x =~ /[\x80-\x{100}]/;
+ print "ok 603\n";
+
+ print "not " if $x =~ /[\x{100}]/;
+ print "ok 604\n";
+
+ print "not " unless $x =~ /\p{InLatin1Supplement}/;
+ print "ok 605\n";
+
+ print "not " if $x =~ /\P{InLatin1Supplement}/;
+ print "ok 606\n";
+
+ print "not " if $x =~ /\p{InLatinExtendedA}/;
+ print "ok 607\n";
+
+ print "not " unless $x =~ /\P{InLatinExtendedA}/;
+ print "ok 608\n";
+}
+
+{
+ my $x = "\x{100}";
+
+ print "not " if $x =~ /[\x80-\xff]/;
+ print "ok 609\n";
+
+ print "not " unless $x =~ /[\x80-\x{100}]/;
+ print "ok 610\n";
+
+ print "not " unless $x =~ /[\x{100}]/;
+ print "ok 611\n";
+
+ print "not " if $x =~ /\p{InLatin1Supplement}/;
+ print "ok 612\n";
+
+ print "not " unless $x =~ /\P{InLatin1Supplement}/;
+ print "ok 613\n";
+
+ print "not " unless $x =~ /\p{InLatinExtendedA}/;
+ print "ok 614\n";
+
+ print "not " if $x =~ /\P{InLatinExtendedA}/;
+ print "ok 615\n";
+}
+
* descendant of isalnum(3), in other words, it doesn't
* contain the '_'. --jhi */
PL_utf8_alnum = swash_init("utf8", "IsWord", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_alnum, p);
+ return swash_fetch(PL_utf8_alnum, p, TRUE);
/* return *p == '_' || is_utf8_alpha(p) || is_utf8_digit(p); */
#ifdef SURPRISINGLY_SLOWER /* probably because alpha is usually true */
if (!PL_utf8_alnum)
PL_utf8_alnum = swash_init("utf8", "",
sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0);
- return swash_fetch(PL_utf8_alnum, p);
+ return swash_fetch(PL_utf8_alnum, p, TRUE);
#endif
}
return FALSE;
if (!PL_utf8_alnum)
PL_utf8_alnum = swash_init("utf8", "IsAlnumC", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_alnum, p);
+ return swash_fetch(PL_utf8_alnum, p, TRUE);
/* return is_utf8_alpha(p) || is_utf8_digit(p); */
#ifdef SURPRISINGLY_SLOWER /* probably because alpha is usually true */
if (!PL_utf8_alnum)
PL_utf8_alnum = swash_init("utf8", "",
sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0);
- return swash_fetch(PL_utf8_alnum, p);
+ return swash_fetch(PL_utf8_alnum, p, TRUE);
#endif
}
return FALSE;
if (!PL_utf8_alpha)
PL_utf8_alpha = swash_init("utf8", "IsAlpha", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_alpha, p);
+ return swash_fetch(PL_utf8_alpha, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_ascii)
PL_utf8_ascii = swash_init("utf8", "IsAscii", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_ascii, p);
+ return swash_fetch(PL_utf8_ascii, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_space)
PL_utf8_space = swash_init("utf8", "IsSpacePerl", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_space, p);
+ return swash_fetch(PL_utf8_space, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_digit)
PL_utf8_digit = swash_init("utf8", "IsDigit", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_digit, p);
+ return swash_fetch(PL_utf8_digit, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_upper)
PL_utf8_upper = swash_init("utf8", "IsUpper", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_upper, p);
+ return swash_fetch(PL_utf8_upper, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_lower)
PL_utf8_lower = swash_init("utf8", "IsLower", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_lower, p);
+ return swash_fetch(PL_utf8_lower, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_cntrl)
PL_utf8_cntrl = swash_init("utf8", "IsCntrl", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_cntrl, p);
+ return swash_fetch(PL_utf8_cntrl, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_graph)
PL_utf8_graph = swash_init("utf8", "IsGraph", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_graph, p);
+ return swash_fetch(PL_utf8_graph, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_print)
PL_utf8_print = swash_init("utf8", "IsPrint", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_print, p);
+ return swash_fetch(PL_utf8_print, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_punct)
PL_utf8_punct = swash_init("utf8", "IsPunct", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_punct, p);
+ return swash_fetch(PL_utf8_punct, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_xdigit)
PL_utf8_xdigit = swash_init("utf8", "IsXDigit", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_xdigit, p);
+ return swash_fetch(PL_utf8_xdigit, p, TRUE);
}
bool
return FALSE;
if (!PL_utf8_mark)
PL_utf8_mark = swash_init("utf8", "IsM", &PL_sv_undef, 0, 0);
- return swash_fetch(PL_utf8_mark, p);
+ return swash_fetch(PL_utf8_mark, p, TRUE);
}
UV
if (!PL_utf8_toupper)
PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0);
- uv = swash_fetch(PL_utf8_toupper, p);
+ uv = swash_fetch(PL_utf8_toupper, p, TRUE);
return uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p,0);
}
if (!PL_utf8_totitle)
PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0);
- uv = swash_fetch(PL_utf8_totitle, p);
+ uv = swash_fetch(PL_utf8_totitle, p, TRUE);
return uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p,0);
}
if (!PL_utf8_tolower)
PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0);
- uv = swash_fetch(PL_utf8_tolower, p);
+ uv = swash_fetch(PL_utf8_tolower, p, TRUE);
return uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p,0);
}
}
UV
-Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
+Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8)
{
HV* hv = (HV*)SvRV(sv);
- /* Given a UTF-X encoded char 0xAA..0xYY,0xZZ
- then the "swatch" is a vec() for al the chars which start
- with 0xAA..0xYY
- So the key in the hash is length of encoded char -1
- */
- U32 klen = UTF8SKIP(ptr) - 1;
- U32 off = ptr[klen];
+ U32 klen;
+ U32 off;
STRLEN slen;
STRLEN needents;
U8 *tmps;
U32 bit;
SV *retval;
+ U8 tmputf8[2];
+ UV c = NATIVE_TO_ASCII(*ptr);
+
+ if (!do_utf8 && !UNI_IS_INVARIANT(c)) {
+ tmputf8[0] = UTF8_EIGHT_BIT_HI(c);
+ tmputf8[1] = UTF8_EIGHT_BIT_LO(c);
+ ptr = tmputf8;
+ }
+ /* Given a UTF-X encoded char 0xAA..0xYY,0xZZ
+ * then the "swatch" is a vec() for al the chars which start
+ * with 0xAA..0xYY
+ * So the key in the hash (klen) is length of encoded char -1
+ */
+ klen = UTF8SKIP(ptr) - 1;
+ off = ptr[klen];
if (klen == 0)
{
* NB: this code assumes that swatches are never modified, once generated!
*/
- if (hv == PL_last_swash_hv &&
+ if (hv == PL_last_swash_hv &&
klen == PL_last_swash_klen &&
- (!klen || memEQ((char *)ptr,(char *)PL_last_swash_key,klen)) )
+ (!klen || memEQ((char *)ptr, (char *)PL_last_swash_key, klen)) )
{
tmps = PL_last_swash_tmps;
slen = PL_last_swash_slen;
EXTEND(SP,3);
PUSHs((SV*)sv);
/* On EBCDIC & ~(0xA0-1) isn't a useful thing to do */
- PUSHs(sv_2mortal(newSViv((klen) ? (code_point & ~(needents - 1)) : 0)));
+ PUSHs(sv_2mortal(newSViv((klen) ?
+ (code_point & ~(needents - 1)) : 0)));
PUSHs(sv_2mortal(newSViv(needents)));
PUTBACK;
if (call_method("SWASHGET", G_SCALAR))