In character classes one couldn't have 0x80..0xff characters
Jarkko Hietaniemi [Sun, 29 Apr 2001 02:04:46 +0000 (02:04 +0000)]
at the left hand side if there were 0x100.. characters in the
character class.

p4raw-id: //depot/perl@9901

doop.c
embed.h
embed.pl
objXSUB.h
proto.h
regcomp.c
regexec.c
t/op/pat.t
utf8.c

diff --git a/doop.c b/doop.c
index d7baecc..755cbfd 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -344,7 +344,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
     }
 
     while (s < send) {
-       if ((uv = swash_fetch(rv, s)) < none) {
+       if ((uv = swash_fetch(rv, s, TRUE)) < none) {
            s += UTF8SKIP(s);
            matches++;
            d = uvuni_to_utf8(d, uv);
@@ -423,7 +423,7 @@ S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
     send = s + len;
 
     while (s < send) {
-       if ((uv = swash_fetch(rv, s)) < none || uv == extra)
+       if ((uv = swash_fetch(rv, s, TRUE)) < none || uv == extra)
            matches++;
        s += UTF8SKIP(s);
     }
@@ -491,7 +491,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
     if (squash) {
        UV puv = 0xfeedface;
        while (s < send) {
-           uv = swash_fetch(rv, s);
+           uv = swash_fetch(rv, s, TRUE);
        
            if (d > dend) {
                STRLEN clen = d - dstart;
@@ -546,7 +546,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
     }
     else {
        while (s < send) {
-           uv = swash_fetch(rv, s);
+           uv = swash_fetch(rv, s, TRUE);
            if (d > dend) {
                STRLEN clen = d - dstart;
                STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
diff --git a/embed.h b/embed.h
index 887e9eb..dd5d658 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define sv_vsetpvfn(a,b,c,d,e,f,g)     Perl_sv_vsetpvfn(aTHX_ a,b,c,d,e,f,g)
 #define str_to_version(a)      Perl_str_to_version(aTHX_ a)
 #define swash_init(a,b,c,d,e)  Perl_swash_init(aTHX_ a,b,c,d,e)
-#define swash_fetch(a,b)       Perl_swash_fetch(aTHX_ a,b)
+#define swash_fetch(a,b,c)     Perl_swash_fetch(aTHX_ a,b,c)
 #define taint_env()            Perl_taint_env(aTHX)
 #define taint_proper(a,b)      Perl_taint_proper(aTHX_ a,b)
 #define to_utf8_lower(a)       Perl_to_utf8_lower(aTHX_ a)
index 9e272b8..fcaaaed 100755 (executable)
--- a/embed.pl
+++ b/embed.pl
@@ -2089,7 +2089,7 @@ Apd       |void   |sv_vsetpvfn    |SV* sv|const char* pat|STRLEN patlen \
 Ap     |NV     |str_to_version |SV *sv
 Ap     |SV*    |swash_init     |char* pkg|char* name|SV* listsv \
                                |I32 minbits|I32 none
-Ap     |UV     |swash_fetch    |SV *sv|U8 *ptr
+Ap     |UV     |swash_fetch    |SV *sv|U8 *ptr|bool do_utf8
 Ap     |void   |taint_env
 Ap     |void   |taint_proper   |const char* f|const char* s
 Ap     |UV     |to_utf8_lower  |U8 *p
index 99d9a3e..28bed78 100644 (file)
--- a/objXSUB.h
+++ b/objXSUB.h
 #define Perl_init_stacks       pPerl->Perl_init_stacks
 #undef  init_stacks
 #define init_stacks            Perl_init_stacks
+#undef  Perl_init_tm
+#define Perl_init_tm           pPerl->Perl_init_tm
+#undef  init_tm
+#define init_tm                        Perl_init_tm
 #undef  Perl_instr
 #define Perl_instr             pPerl->Perl_instr
 #undef  instr
 #define Perl_mg_size           pPerl->Perl_mg_size
 #undef  mg_size
 #define mg_size                        Perl_mg_size
+#undef  Perl_mini_mktime
+#define Perl_mini_mktime       pPerl->Perl_mini_mktime
+#undef  mini_mktime
+#define mini_mktime            Perl_mini_mktime
 #undef  Perl_moreswitches
 #define Perl_moreswitches      pPerl->Perl_moreswitches
 #undef  moreswitches
 #define Perl_my_stat           pPerl->Perl_my_stat
 #undef  my_stat
 #define my_stat                        Perl_my_stat
+#undef  Perl_my_strftime
+#define Perl_my_strftime       pPerl->Perl_my_strftime
+#undef  my_strftime
+#define my_strftime            Perl_my_strftime
 #if defined(MYSWAP)
 #undef  Perl_my_swap
 #define Perl_my_swap           pPerl->Perl_my_swap
diff --git a/proto.h b/proto.h
index 63fc518..cc4050d 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -808,7 +808,7 @@ PERL_CALLCONV void  Perl_sv_vcatpvfn(pTHX_ SV* sv, const char* pat, STRLEN patlen
 PERL_CALLCONV void     Perl_sv_vsetpvfn(pTHX_ SV* sv, const char* pat, STRLEN patlen, va_list* args, SV** svargs, I32 svmax, bool *maybe_tainted);
 PERL_CALLCONV NV       Perl_str_to_version(pTHX_ SV *sv);
 PERL_CALLCONV SV*      Perl_swash_init(pTHX_ char* pkg, char* name, SV* listsv, I32 minbits, I32 none);
-PERL_CALLCONV UV       Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr);
+PERL_CALLCONV UV       Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8);
 PERL_CALLCONV void     Perl_taint_env(pTHX);
 PERL_CALLCONV void     Perl_taint_proper(pTHX_ const char* f, const char* s);
 PERL_CALLCONV UV       Perl_to_utf8_lower(pTHX_ U8 *p);
index 1cc3a98..20388f1 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -2799,11 +2799,12 @@ tryagain:
            break;
        case 'p':
        case 'P':
-           {   /* a lovely hack--pretend we saw [\pX] instead */
+           {   
                char* oldregxend = RExC_end;
                 char* parse_start = RExC_parse;
 
                if (RExC_parse[1] == '{') {
+                 /* a lovely hack--pretend we saw [\pX] instead */
                    RExC_end = strchr(RExC_parse, '}');
                    if (!RExC_end) {
                        RExC_parse += 2;
@@ -3259,7 +3260,7 @@ STATIC regnode *
 S_regclass(pTHX_ RExC_state_t *pRExC_state)
 {
     register UV value;
-    register IV lastvalue = OOB_UNICODE;
+    register IV prevvalue = OOB_UNICODE;
     register IV range = 0;
     register regnode *ret;
     STRLEN numlen;
@@ -3270,7 +3271,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     register char *e;
     char *parse_start = RExC_parse; /* MJD */
     UV n;
-    bool dont_optimize_invert = FALSE;
+    bool optimize_invert = TRUE;
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
@@ -3312,8 +3313,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            rangebegin = RExC_parse;
        if (UTF) {
            value = utf8n_to_uvchr((U8*)RExC_parse,
-                              RExC_end - RExC_parse,
-                              &numlen, 0);
+                                  RExC_end - RExC_parse,
+                                  &numlen, 0);
            RExC_parse += numlen;
        }
        else
@@ -3423,14 +3424,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                               RExC_parse - rangebegin,
                               RExC_parse - rangebegin,
                               rangebegin);
-                   if (lastvalue < 256) {
-                       ANYOF_BITMAP_SET(ret, lastvalue);
+                   if (prevvalue < 256) {
+                       ANYOF_BITMAP_SET(ret, prevvalue);
                        ANYOF_BITMAP_SET(ret, '-');
                    }
                    else {
                        ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
                        Perl_sv_catpvf(aTHX_ listsv,
-                                      "%04"UVxf"\n%04"UVxf"\n", (UV)lastvalue, (UV) '-');
+                                      "%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
                    }
                }
 
@@ -3438,6 +3439,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            }
 
            if (!SIZE_ONLY) {
+               if (namedclass > OOB_NAMEDCLASS)
+                   optimize_invert = FALSE;
                /* Possible truncation here but in some 64-bit environments
                 * the compiler gets heartburn about switch on 64-bit values.
                 * A similar issue a little earlier when switching on value.
@@ -3451,7 +3454,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");    
                    break;
                case ANYOF_NALNUM:
@@ -3462,7 +3464,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");
                    break;
                case ANYOF_ALNUMC:
@@ -3473,7 +3474,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
                    break;
                case ANYOF_NALNUMC:
@@ -3484,7 +3484,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");
                    break;
                case ANYOF_ALPHA:
@@ -3495,7 +3494,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");
                    break;
                case ANYOF_NALPHA:
@@ -3506,7 +3504,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");
                    break;
                case ANYOF_ASCII:
@@ -3529,7 +3526,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        }
 #endif /* EBCDIC */
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");
                    break;
                case ANYOF_NASCII:
@@ -3552,7 +3548,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        }
 #endif /* EBCDIC */
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");
                    break;
                case ANYOF_BLANK:
@@ -3563,7 +3558,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n");
                    break;
                case ANYOF_NBLANK:
@@ -3574,7 +3568,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n");
                    break;
                case ANYOF_CNTRL:
@@ -3585,7 +3578,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");
                    break;
                case ANYOF_NCNTRL:
@@ -3596,7 +3588,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");
                    break;
                case ANYOF_DIGIT:
@@ -3607,7 +3598,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        for (value = '0'; value <= '9'; value++)
                            ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
                    break;
                case ANYOF_NDIGIT:
@@ -3620,7 +3610,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        for (value = '9' + 1; value < 256; value++)
                            ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");
                    break;
                case ANYOF_GRAPH:
@@ -3631,7 +3620,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");
                    break;
                case ANYOF_NGRAPH:
@@ -3642,7 +3630,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");
                    break;
                case ANYOF_LOWER:
@@ -3653,7 +3640,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");
                    break;
                case ANYOF_NLOWER:
@@ -3664,7 +3650,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");
                    break;
                case ANYOF_PRINT:
@@ -3675,7 +3660,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");
                    break;
                case ANYOF_NPRINT:
@@ -3686,7 +3670,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");
                    break;
                case ANYOF_PSXSPC:
@@ -3697,7 +3680,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
                    break;
                case ANYOF_NPSXSPC:
@@ -3708,7 +3690,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");
                    break;
                case ANYOF_PUNCT:
@@ -3719,7 +3700,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");
                    break;
                case ANYOF_NPUNCT:
@@ -3730,7 +3710,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");
                    break;
                case ANYOF_SPACE:
@@ -3741,7 +3720,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isSPACE(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");
                    break;
                case ANYOF_NSPACE:
@@ -3752,7 +3730,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isSPACE(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");
                    break;
                case ANYOF_UPPER:
@@ -3763,7 +3740,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");
                    break;
                case ANYOF_NUPPER:
@@ -3774,7 +3750,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");
                    break;
                case ANYOF_XDIGIT:
@@ -3785,7 +3760,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");
                    break;
                case ANYOF_NXDIGIT:
@@ -3796,7 +3770,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
                    break;
                default:
@@ -3810,17 +3783,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        } /* end of namedclass \blah */
 
        if (range) {
-           if (((lastvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
-                ((NATIVE_TO_UNI(lastvalue) > NATIVE_TO_UNI(value)) && (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
+           if (((prevvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
+                ((NATIVE_TO_UNI(prevvalue) > NATIVE_TO_UNI(value)) &&
+                (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
                Simple_vFAIL4("Invalid [] range \"%*.*s\"",
                              RExC_parse - rangebegin,
                              RExC_parse - rangebegin,
                              rangebegin);
+               range = 0; /* not a valid range */
            }
-           range = 0; /* not a true range */
        }
        else {
-           lastvalue = value; /* save the beginning of the range */
+           prevvalue = value; /* save the beginning of the range */
            if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
                RExC_parse[1] != ']') {
                RExC_parse++;
@@ -3843,42 +3817,45 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 
        /* now is the next time */
        if (!SIZE_ONLY) {
-           if (lastvalue < 256 && value < 256) {
-#ifdef EBCDIC /* EBCDIC, for example. */
-               if (PL_hints & HINT_RE_ASCIIR) {
-                   IV i;
+           IV i;
+
+           if (prevvalue < 256) {
+               IV ceilvalue = value < 256 ? value : 255;
+
+#ifdef EBCDIC
                    /* New style scheme for ranges:
-                    * after :
                     * use re 'asciir';
                     * do ranges in ASCII/Unicode space
                     */
-                   for (i = NATIVE_TO_ASCII(lastvalue) ; i <= NATIVE_TO_ASCII(value); i++)
-                       ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
+                   for (i  = NATIVE_TO_ASCII(prevvalue);
+                        i <= NATIVE_TO_ASCII(ceilvalue);
+                        i++)
+                     ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
                }
-               else if ((isLOWER(lastvalue) && isLOWER(value)) ||
-                   (isUPPER(lastvalue) && isUPPER(value)))
+               else if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+                        (isUPPER(prevvalue) && isUPPER(ceilvalue)))
                {
-                   IV i;
-                   if (isLOWER(lastvalue)) {
-                       for (i = lastvalue; i <= value; i++)
+                   if (isLOWER(prevvalue)) {
+                       for (i = prevvalue; i <= ceilvalue; i++)
                            if (isLOWER(i))
                                ANYOF_BITMAP_SET(ret, i);
                    } else {
-                       for (i = lastvalue; i <= value; i++)
+                       for (i = prevvalue; i <= ceilvalue; i++)
                            if (isUPPER(i))
                                ANYOF_BITMAP_SET(ret, i);
                    }
                }
                else
 #endif
-                   for ( ; lastvalue <= value; lastvalue++)
-                       ANYOF_BITMAP_SET(ret, lastvalue);
-           } else {
+                   for (i = prevvalue; i <= ceilvalue; i++)
+                       ANYOF_BITMAP_SET(ret, i);
+         }
+         if (value > 255) {
                ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
-               if (lastvalue < value)
+               if (prevvalue < value)
                    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
-                                  (UV)lastvalue, (UV)value);
-               else
+                                  (UV)prevvalue, (UV)value);
+               else if (prevvalue == value)
                    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
                                   (UV)value);
            }
@@ -3912,7 +3889,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     }
 
     /* optimize inverted simple patterns (e.g. [^a-z]) */
-    if (!SIZE_ONLY && !dont_optimize_invert &&
+    if (!SIZE_ONLY && optimize_invert &&
        /* If the only flag is inversion. */
        (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
        for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
@@ -4448,7 +4425,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
                    for (i = 0; i <= 256; i++) { /* just the first 256 */
                        U8 *e = uvchr_to_utf8(s, i);
                        
-                       if (i < 256 && swash_fetch(sw, s)) {
+                       if (i < 256 && swash_fetch(sw, s, TRUE)) {
                            if (rangestart == -1)
                                rangestart = i;
                        } else if (rangestart != -1) {
index e358d63..c9096f0 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -958,7 +958,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                LOAD_UTF8_CHARCLASS(alnum,"a");
                while (s < strend) {
                    if (tmp == !(OP(c) == BOUND ?
-                                swash_fetch(PL_utf8_alnum, (U8*)s) :
+                                swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
                                 isALNUM_LC_utf8((U8*)s)))
                    {
                        tmp = !tmp;
@@ -1001,7 +1001,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                LOAD_UTF8_CHARCLASS(alnum,"a");
                while (s < strend) {
                    if (tmp == !(OP(c) == NBOUND ?
-                                swash_fetch(PL_utf8_alnum, (U8*)s) :
+                                swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
                                 isALNUM_LC_utf8((U8*)s)))
                        tmp = !tmp;
                    else if ((norun || regtry(prog, s)))
@@ -1029,7 +1029,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(alnum,"a");
                while (s < strend) {
-                   if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
+                   if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
                        if (tmp && (norun || regtry(prog, s)))
                            goto got_it;
                        else
@@ -1087,7 +1087,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(alnum,"a");
                while (s < strend) {
-                   if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
+                   if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
                        if (tmp && (norun || regtry(prog, s)))
                            goto got_it;
                        else
@@ -1145,7 +1145,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(space," ");
                while (s < strend) {
-                   if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
+                   if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
                        if (tmp && (norun || regtry(prog, s)))
                            goto got_it;
                        else
@@ -1203,7 +1203,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(space," ");
                while (s < strend) {
-                   if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
+                   if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
                        if (tmp && (norun || regtry(prog, s)))
                            goto got_it;
                        else
@@ -1261,7 +1261,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(digit,"0");
                while (s < strend) {
-                   if (swash_fetch(PL_utf8_digit,(U8*)s)) {
+                   if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
                        if (tmp && (norun || regtry(prog, s)))
                            goto got_it;
                        else
@@ -1319,7 +1319,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(digit,"0");
                while (s < strend) {
-                   if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
+                   if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
                        if (tmp && (norun || regtry(prog, s)))
                            goto got_it;
                        else
@@ -2214,7 +2214,7 @@ S_regmatch(pTHX_ regnode *prog)
                sayNO;
            if (do_utf8) {
                if (!(OP(scan) == ALNUM
-                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
+                     ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
                      : isALNUM_LC_utf8((U8*)locinput)))
                {
                    sayNO;
@@ -2237,7 +2237,7 @@ S_regmatch(pTHX_ regnode *prog)
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(alnum,"a");
                if (OP(scan) == NALNUM
-                   ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
+                   ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
                    : isALNUM_LC_utf8((U8*)locinput))
                {
                    sayNO;
@@ -2269,7 +2269,7 @@ S_regmatch(pTHX_ regnode *prog)
                if (OP(scan) == BOUND || OP(scan) == NBOUND) {
                    ln = isALNUM_uni(ln);
                    LOAD_UTF8_CHARCLASS(alnum,"a");
-                   n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
+                   n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
                }
                else {
                    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
@@ -2302,7 +2302,7 @@ S_regmatch(pTHX_ regnode *prog)
                if (UTF8_IS_CONTINUED(nextchr)) {
                    LOAD_UTF8_CHARCLASS(space," ");
                    if (!(OP(scan) == SPACE
-                         ? swash_fetch(PL_utf8_space, (U8*)locinput)
+                         ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
                          : isSPACE_LC_utf8((U8*)locinput)))
                    {
                        sayNO;
@@ -2332,7 +2332,7 @@ S_regmatch(pTHX_ regnode *prog)
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(space," ");
                if (OP(scan) == NSPACE
-                   ? swash_fetch(PL_utf8_space, (U8*)locinput)
+                   ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
                    : isSPACE_LC_utf8((U8*)locinput))
                {
                    sayNO;
@@ -2355,7 +2355,7 @@ S_regmatch(pTHX_ regnode *prog)
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(digit,"0");
                if (!(OP(scan) == DIGIT
-                     ? swash_fetch(PL_utf8_digit, (U8*)locinput)
+                     ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
                      : isDIGIT_LC_utf8((U8*)locinput)))
                {
                    sayNO;
@@ -2378,7 +2378,7 @@ S_regmatch(pTHX_ regnode *prog)
            if (do_utf8) {
                LOAD_UTF8_CHARCLASS(digit,"0");
                if (OP(scan) == NDIGIT
-                   ? swash_fetch(PL_utf8_digit, (U8*)locinput)
+                   ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
                    : isDIGIT_LC_utf8((U8*)locinput))
                {
                    sayNO;
@@ -2394,10 +2394,12 @@ S_regmatch(pTHX_ regnode *prog)
            break;
        case CLUMP:
            LOAD_UTF8_CHARCLASS(mark,"~");
-           if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
+           if (locinput >= PL_regeol ||
+               swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
                sayNO;
            locinput += PL_utf8skip[nextchr];
-           while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
+           while (locinput < PL_regeol &&
+                  swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
                locinput += UTF8SKIP(locinput);
            if (locinput > PL_regeol)
                sayNO;
@@ -3623,7 +3625,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
            loceol = PL_regeol;
            LOAD_UTF8_CHARCLASS(alnum,"a");
            while (hardcount < max && scan < loceol &&
-                  swash_fetch(PL_utf8_alnum, (U8*)scan)) {
+                  swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
                scan += UTF8SKIP(scan);
                hardcount++;
            }
@@ -3651,7 +3653,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
            loceol = PL_regeol;
            LOAD_UTF8_CHARCLASS(alnum,"a");
            while (hardcount < max && scan < loceol &&
-                  !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
+                  !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
                scan += UTF8SKIP(scan);
                hardcount++;
            }
@@ -3679,7 +3681,8 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
            loceol = PL_regeol;
            LOAD_UTF8_CHARCLASS(space," ");
            while (hardcount < max && scan < loceol &&
-                  (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
+                  (*scan == ' ' ||
+                   swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
                scan += UTF8SKIP(scan);
                hardcount++;
            }
@@ -3707,7 +3710,8 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
            loceol = PL_regeol;
            LOAD_UTF8_CHARCLASS(space," ");
            while (hardcount < max && scan < loceol &&
-                  !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
+                  !(*scan == ' ' ||
+                    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
                scan += UTF8SKIP(scan);
                hardcount++;
            }
@@ -3735,7 +3739,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
            loceol = PL_regeol;
            LOAD_UTF8_CHARCLASS(digit,"0");
            while (hardcount < max && scan < loceol &&
-                  swash_fetch(PL_utf8_digit,(U8*)scan)) {
+                  swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
                scan += UTF8SKIP(scan);
                hardcount++;
            }
@@ -3749,7 +3753,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
            loceol = PL_regeol;
            LOAD_UTF8_CHARCLASS(digit,"0");
            while (hardcount < max && scan < loceol &&
-                  !swash_fetch(PL_utf8_digit,(U8*)scan)) {
+                  !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
                scan += UTF8SKIP(scan);
                hardcount++;
            }
@@ -3879,25 +3883,22 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
     char flags = ANYOF_FLAGS(n);
     bool match = FALSE;
     UV c;
-    STRLEN len;
+    STRLEN len = 0;
 
-    if (do_utf8)
-       c = utf8_to_uvchr(p, &len);
-    else
-       c = *p;
+    c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
 
     if (do_utf8 || (flags & ANYOF_UNICODE)) {
        if (do_utf8 && !ANYOF_RUNTIME(n)) {
            if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
                match = TRUE;
        }
-       if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
+       if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
            match = TRUE;
        if (!match) {
            SV *sw = regclass_swash(n, TRUE, 0);
        
            if (sw) {
-               if (swash_fetch(sw, p))
+               if (swash_fetch(sw, p, do_utf8))
                    match = TRUE;
                else if (flags & ANYOF_FOLD) {
                    U8 tmpbuf[UTF8_MAXLEN+1];
@@ -3908,7 +3909,7 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
                    }
                    else
                        uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
-                   if (swash_fetch(sw, tmpbuf))
+                   if (swash_fetch(sw, tmpbuf, do_utf8))
                        match = TRUE;
                }
            }
@@ -3918,7 +3919,7 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
        if (ANYOF_BITMAP_TEST(n, c))
            match = TRUE;
        else if (flags & ANYOF_FOLD) {
-           I32 f;
+         I32 f;
 
            if (flags & ANYOF_LOCALE) {
                PL_reg_flags |= RF_tainted;
index 9130454..1be7234 100755 (executable)
@@ -5,7 +5,8 @@
 # that does fit that format, add it to op/re_tests, not here.
 
 $| = 1;
-print "1..587\n";
+
+print "1..615\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -1302,6 +1303,7 @@ print "ok 247\n";
 {
     # the second half of 20001028.003
 
+    my $X = '';
     $X =~ s/^/chr(1488)/e;
     print "not " unless length $X == 1 && ord($X) == 1488;
     print "ok 260\n";
@@ -1353,10 +1355,11 @@ print "ok 247\n";
             "\0"                               => 'Cc',
             );
        
-    for my $char (keys %s) {
+    for my $char (map { s/^\S+ //; $_ }
+                    sort map { sprintf("%06x", ord($_))." $_" } keys %s) {
        my $class = $s{$char};
-       my $code  = sprintf("%04x", ord($char));
-       printf "# 0x$code\n";
+       my $code  = sprintf("%06x", ord($char));
+       printf "#\n# 0x$code\n#\n";
        print "# IsAlpha\n";
        if ($class =~ /^[LM]/) {
            print "not " unless $char =~ /\p{IsAlpha}/;
@@ -1382,7 +1385,7 @@ print "ok 247\n";
            print "ok $test\n"; $test++;
        }
        print "# IsASCII\n";
-       if ($code <= 127) {
+       if ($code le '00007f') {
            print "not " unless $char =~ /\p{IsASCII}/;
            print "ok $test\n"; $test++;
            print "not " if     $char =~ /\P{IsASCII}/;
@@ -1583,3 +1586,104 @@ EOT
     print "not " unless ord($x) == 0x12345678 && length($x) == 1;
     print "ok 587\n";
 }
+
+{
+    my $x = "\x7f";
+
+    print "not " if     $x =~ /[\x80-\xff]/;
+    print "ok 588\n";
+
+    print "not " if     $x =~ /[\x80-\x{100}]/;
+    print "ok 589\n";
+
+    print "not " if     $x =~ /[\x{100}]/;
+    print "ok 590\n";
+
+    print "not " if     $x =~ /\p{InLatin1Supplement}/;
+    print "ok 591\n";
+
+    print "not " unless $x =~ /\P{InLatin1Supplement}/;
+    print "ok 592\n";
+
+    print "not " if     $x =~ /\p{InLatinExtendedA}/;
+    print "ok 593\n";
+
+    print "not " unless $x =~ /\P{InLatinExtendedA}/;
+    print "ok 594\n";
+}
+
+{
+    my $x = "\x80";
+
+    print "not " unless $x =~ /[\x80-\xff]/;
+    print "ok 595\n";
+
+    print "not " unless $x =~ /[\x80-\x{100}]/;
+    print "ok 596\n";
+
+    print "not " if     $x =~ /[\x{100}]/;
+    print "ok 597\n";
+
+    print "not " unless $x =~ /\p{InLatin1Supplement}/;
+    print "ok 598\n";
+
+    print "not " if    $x =~ /\P{InLatin1Supplement}/;
+    print "ok 599\n";
+
+    print "not " if     $x =~ /\p{InLatinExtendedA}/;
+    print "ok 600\n";
+
+    print "not " unless $x =~ /\P{InLatinExtendedA}/;
+    print "ok 601\n";
+}
+
+{
+    my $x = "\xff";
+
+    print "not " unless $x =~ /[\x80-\xff]/;
+    print "ok 602\n";
+
+    print "not " unless $x =~ /[\x80-\x{100}]/;
+    print "ok 603\n";
+
+    print "not " if     $x =~ /[\x{100}]/;
+    print "ok 604\n";
+
+    print "not " unless $x =~ /\p{InLatin1Supplement}/;
+    print "ok 605\n";
+
+    print "not " if     $x =~ /\P{InLatin1Supplement}/;
+    print "ok 606\n";
+
+    print "not " if     $x =~ /\p{InLatinExtendedA}/;
+    print "ok 607\n";
+
+    print "not " unless $x =~ /\P{InLatinExtendedA}/;
+    print "ok 608\n";
+}
+
+{
+    my $x = "\x{100}";
+
+    print "not " if     $x =~ /[\x80-\xff]/;
+    print "ok 609\n";
+
+    print "not " unless $x =~ /[\x80-\x{100}]/;
+    print "ok 610\n";
+
+    print "not " unless $x =~ /[\x{100}]/;
+    print "ok 611\n";
+
+    print "not " if     $x =~ /\p{InLatin1Supplement}/;
+    print "ok 612\n";
+
+    print "not " unless $x =~ /\P{InLatin1Supplement}/;
+    print "ok 613\n";
+
+    print "not " unless $x =~ /\p{InLatinExtendedA}/;
+    print "ok 614\n";
+
+    print "not " if     $x =~ /\P{InLatinExtendedA}/;
+    print "ok 615\n";
+}
+
diff --git a/utf8.c b/utf8.c
index fda9920..b682cf6 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1045,13 +1045,13 @@ Perl_is_utf8_alnum(pTHX_ U8 *p)
         * descendant of isalnum(3), in other words, it doesn't
         * contain the '_'. --jhi */
        PL_utf8_alnum = swash_init("utf8", "IsWord", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_alnum, p);
+    return swash_fetch(PL_utf8_alnum, p, TRUE);
 /*    return *p == '_' || is_utf8_alpha(p) || is_utf8_digit(p); */
 #ifdef SURPRISINGLY_SLOWER  /* probably because alpha is usually true */
     if (!PL_utf8_alnum)
        PL_utf8_alnum = swash_init("utf8", "",
            sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0);
-    return swash_fetch(PL_utf8_alnum, p);
+    return swash_fetch(PL_utf8_alnum, p, TRUE);
 #endif
 }
 
@@ -1062,13 +1062,13 @@ Perl_is_utf8_alnumc(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_alnum)
        PL_utf8_alnum = swash_init("utf8", "IsAlnumC", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_alnum, p);
+    return swash_fetch(PL_utf8_alnum, p, TRUE);
 /*    return is_utf8_alpha(p) || is_utf8_digit(p); */
 #ifdef SURPRISINGLY_SLOWER  /* probably because alpha is usually true */
     if (!PL_utf8_alnum)
        PL_utf8_alnum = swash_init("utf8", "",
            sv_2mortal(newSVpv("+utf8::IsAlpha\n+utf8::IsDigit\n005F\n",0)), 0, 0);
-    return swash_fetch(PL_utf8_alnum, p);
+    return swash_fetch(PL_utf8_alnum, p, TRUE);
 #endif
 }
 
@@ -1085,7 +1085,7 @@ Perl_is_utf8_alpha(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_alpha)
        PL_utf8_alpha = swash_init("utf8", "IsAlpha", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_alpha, p);
+    return swash_fetch(PL_utf8_alpha, p, TRUE);
 }
 
 bool
@@ -1095,7 +1095,7 @@ Perl_is_utf8_ascii(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_ascii)
        PL_utf8_ascii = swash_init("utf8", "IsAscii", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_ascii, p);
+    return swash_fetch(PL_utf8_ascii, p, TRUE);
 }
 
 bool
@@ -1105,7 +1105,7 @@ Perl_is_utf8_space(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_space)
        PL_utf8_space = swash_init("utf8", "IsSpacePerl", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_space, p);
+    return swash_fetch(PL_utf8_space, p, TRUE);
 }
 
 bool
@@ -1115,7 +1115,7 @@ Perl_is_utf8_digit(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_digit)
        PL_utf8_digit = swash_init("utf8", "IsDigit", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_digit, p);
+    return swash_fetch(PL_utf8_digit, p, TRUE);
 }
 
 bool
@@ -1125,7 +1125,7 @@ Perl_is_utf8_upper(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_upper)
        PL_utf8_upper = swash_init("utf8", "IsUpper", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_upper, p);
+    return swash_fetch(PL_utf8_upper, p, TRUE);
 }
 
 bool
@@ -1135,7 +1135,7 @@ Perl_is_utf8_lower(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_lower)
        PL_utf8_lower = swash_init("utf8", "IsLower", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_lower, p);
+    return swash_fetch(PL_utf8_lower, p, TRUE);
 }
 
 bool
@@ -1145,7 +1145,7 @@ Perl_is_utf8_cntrl(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_cntrl)
        PL_utf8_cntrl = swash_init("utf8", "IsCntrl", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_cntrl, p);
+    return swash_fetch(PL_utf8_cntrl, p, TRUE);
 }
 
 bool
@@ -1155,7 +1155,7 @@ Perl_is_utf8_graph(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_graph)
        PL_utf8_graph = swash_init("utf8", "IsGraph", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_graph, p);
+    return swash_fetch(PL_utf8_graph, p, TRUE);
 }
 
 bool
@@ -1165,7 +1165,7 @@ Perl_is_utf8_print(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_print)
        PL_utf8_print = swash_init("utf8", "IsPrint", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_print, p);
+    return swash_fetch(PL_utf8_print, p, TRUE);
 }
 
 bool
@@ -1175,7 +1175,7 @@ Perl_is_utf8_punct(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_punct)
        PL_utf8_punct = swash_init("utf8", "IsPunct", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_punct, p);
+    return swash_fetch(PL_utf8_punct, p, TRUE);
 }
 
 bool
@@ -1185,7 +1185,7 @@ Perl_is_utf8_xdigit(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_xdigit)
        PL_utf8_xdigit = swash_init("utf8", "IsXDigit", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_xdigit, p);
+    return swash_fetch(PL_utf8_xdigit, p, TRUE);
 }
 
 bool
@@ -1195,7 +1195,7 @@ Perl_is_utf8_mark(pTHX_ U8 *p)
        return FALSE;
     if (!PL_utf8_mark)
        PL_utf8_mark = swash_init("utf8", "IsM", &PL_sv_undef, 0, 0);
-    return swash_fetch(PL_utf8_mark, p);
+    return swash_fetch(PL_utf8_mark, p, TRUE);
 }
 
 UV
@@ -1205,7 +1205,7 @@ Perl_to_utf8_upper(pTHX_ U8 *p)
 
     if (!PL_utf8_toupper)
        PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0);
-    uv = swash_fetch(PL_utf8_toupper, p);
+    uv = swash_fetch(PL_utf8_toupper, p, TRUE);
     return uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p,0);
 }
 
@@ -1216,7 +1216,7 @@ Perl_to_utf8_title(pTHX_ U8 *p)
 
     if (!PL_utf8_totitle)
        PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0);
-    uv = swash_fetch(PL_utf8_totitle, p);
+    uv = swash_fetch(PL_utf8_totitle, p, TRUE);
     return uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p,0);
 }
 
@@ -1227,7 +1227,7 @@ Perl_to_utf8_lower(pTHX_ U8 *p)
 
     if (!PL_utf8_tolower)
        PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0);
-    uv = swash_fetch(PL_utf8_tolower, p);
+    uv = swash_fetch(PL_utf8_tolower, p, TRUE);
     return uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p,0);
 }
 
@@ -1282,21 +1282,31 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none)
 }
 
 UV
-Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
+Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8)
 {
     HV* hv = (HV*)SvRV(sv);
-    /* Given a UTF-X encoded char 0xAA..0xYY,0xZZ
-       then the "swatch" is a vec() for al the chars which start
-       with 0xAA..0xYY
-       So the key in the hash is length of encoded char -1
-     */
-    U32 klen = UTF8SKIP(ptr) - 1;
-    U32 off = ptr[klen];
+    U32 klen;
+    U32 off;
     STRLEN slen;
     STRLEN needents;
     U8 *tmps;
     U32 bit;
     SV *retval;
+    U8 tmputf8[2];
+    UV c = NATIVE_TO_ASCII(*ptr);
+
+    if (!do_utf8 && !UNI_IS_INVARIANT(c)) {
+        tmputf8[0] = UTF8_EIGHT_BIT_HI(c);
+        tmputf8[1] = UTF8_EIGHT_BIT_LO(c);
+        ptr = tmputf8;
+    }
+    /* Given a UTF-X encoded char 0xAA..0xYY,0xZZ
+     * then the "swatch" is a vec() for al the chars which start
+     * with 0xAA..0xYY
+     * So the key in the hash (klen) is length of encoded char -1
+     */
+    klen = UTF8SKIP(ptr) - 1;
+    off  = ptr[klen];
 
     if (klen == 0)
      {
@@ -1322,9 +1332,9 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
      * NB: this code assumes that swatches are never modified, once generated!
      */
 
-    if (hv == PL_last_swash_hv &&
+    if (hv   == PL_last_swash_hv &&
        klen == PL_last_swash_klen &&
-       (!klen || memEQ((char *)ptr,(char *)PL_last_swash_key,klen)) )
+       (!klen || memEQ((char *)ptr, (char *)PL_last_swash_key, klen)) )
     {
        tmps = PL_last_swash_tmps;
        slen = PL_last_swash_slen;
@@ -1348,7 +1358,8 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
            EXTEND(SP,3);
            PUSHs((SV*)sv);
            /* On EBCDIC & ~(0xA0-1) isn't a useful thing to do */
-           PUSHs(sv_2mortal(newSViv((klen) ? (code_point & ~(needents - 1)) : 0)));
+           PUSHs(sv_2mortal(newSViv((klen) ?
+                                    (code_point & ~(needents - 1)) : 0)));
            PUSHs(sv_2mortal(newSViv(needents)));
            PUTBACK;
            if (call_method("SWASHGET", G_SCALAR))