Integrate perlio:
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 547f756..997044f 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1930,6 +1930,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        r->reganch |= ROPT_LOOKBEHIND_SEEN;
     if (RExC_seen & REG_SEEN_EVAL)
        r->reganch |= ROPT_EVAL_SEEN;
+    if (RExC_seen & REG_SEEN_SANY)
+       r->reganch |= ROPT_SANY_SEEN;
     Newz(1002, r->startp, RExC_npar, I32);
     Newz(1002, r->endp, RExC_npar, I32);
     PL_regdata = r->data; /* for regprop() */
@@ -2638,6 +2640,7 @@ tryagain:
            break;
        case 'C':
            ret = reg_node(pRExC_state, SANY);
+           RExC_seen |= REG_SEEN_SANY;
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
            break;
@@ -3182,6 +3185,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     register char *e;
     UV n;
     bool dont_optimize_invert = FALSE;
+#ifdef ALPHAS_HAVE_GAPS
+    bool explicit_alpha      = TRUE;
+    bool explicit_alpha_prev = TRUE;
+#endif
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
@@ -3368,7 +3375,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");    
                    break;
                case ANYOF_NALNUM:
@@ -3379,7 +3385,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");
                    break;
                case ANYOF_ALNUMC:
@@ -3390,7 +3395,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
                    break;
                case ANYOF_NALNUMC:
@@ -3401,7 +3405,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");
                    break;
                case ANYOF_ALPHA:
@@ -3412,7 +3415,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");
                    break;
                case ANYOF_NALPHA:
@@ -3423,39 +3425,36 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");
                    break;
                case ANYOF_ASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_ASCII);
                    else {
-#ifdef ASCIIish
-                       for (value = 0; value < 128; value++)
-                           ANYOF_BITMAP_SET(ret, value);
-#else  /* EBCDIC */
+#ifdef ALPHAS_HAVE_GAPS
                        for (value = 0; value < 256; value++)
                            if (isASCII(value))
                                ANYOF_BITMAP_SET(ret, value);
-#endif /* EBCDIC */
+#else
+                       for (value = 0; value < 128; value++)
+                           ANYOF_BITMAP_SET(ret, value);
+#endif
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");
                    break;
                case ANYOF_NASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_NASCII);
                    else {
-#ifdef ASCIIish
-                       for (value = 128; value < 256; value++)
-                           ANYOF_BITMAP_SET(ret, value);
-#else  /* EBCDIC */
+#ifdef ALPHAS_HAVE_GAPS
                        for (value = 0; value < 256; value++)
                            if (!isASCII(value))
                                ANYOF_BITMAP_SET(ret, value);
-#endif /* EBCDIC */
+#else
+                       for (value = 128; value < 256; value++)
+                           ANYOF_BITMAP_SET(ret, value);
+#endif
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");
                    break;
                case ANYOF_BLANK:
@@ -3466,7 +3465,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n");
                    break;
                case ANYOF_NBLANK:
@@ -3477,7 +3475,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n");
                    break;
                case ANYOF_CNTRL:
@@ -3488,7 +3485,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");
                    break;
                case ANYOF_NCNTRL:
@@ -3499,7 +3495,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");
                    break;
                case ANYOF_DIGIT:
@@ -3510,7 +3505,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        for (value = '0'; value <= '9'; value++)
                            ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
                    break;
                case ANYOF_NDIGIT:
@@ -3523,7 +3517,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        for (value = '9' + 1; value < 256; value++)
                            ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");
                    break;
                case ANYOF_GRAPH:
@@ -3534,7 +3527,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");
                    break;
                case ANYOF_NGRAPH:
@@ -3545,7 +3537,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");
                    break;
                case ANYOF_LOWER:
@@ -3556,7 +3547,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");
                    break;
                case ANYOF_NLOWER:
@@ -3567,7 +3557,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");
                    break;
                case ANYOF_PRINT:
@@ -3578,7 +3567,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");
                    break;
                case ANYOF_NPRINT:
@@ -3589,7 +3577,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");
                    break;
                case ANYOF_PSXSPC:
@@ -3600,7 +3587,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
                    break;
                case ANYOF_NPSXSPC:
@@ -3611,7 +3597,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");
                    break;
                case ANYOF_PUNCT:
@@ -3622,7 +3607,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");
                    break;
                case ANYOF_NPUNCT:
@@ -3633,7 +3617,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");
                    break;
                case ANYOF_SPACE:
@@ -3644,7 +3627,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isSPACE(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");
                    break;
                case ANYOF_NSPACE:
@@ -3655,7 +3637,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isSPACE(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");
                    break;
                case ANYOF_UPPER:
@@ -3666,7 +3647,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");
                    break;
                case ANYOF_NUPPER:
@@ -3677,7 +3657,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");
                    break;
                case ANYOF_XDIGIT:
@@ -3688,7 +3667,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");
                    break;
                case ANYOF_NXDIGIT:
@@ -3699,7 +3677,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
                    break;
                default:
@@ -3708,6 +3685,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                }
                if (LOC)
                    ANYOF_FLAGS(ret) |= ANYOF_CLASS;
+               dont_optimize_invert = TRUE;
                continue;
            }
        } /* end of namedclass \blah */
@@ -3723,6 +3701,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        }
        else {
            lastvalue = value; /* save the beginning of the range */
+#ifdef ALPHAS_HAVE_GAPS
+           explicit_alpha_prev = explicit_alpha;
+           explicit_alpha      = isALPHA(value);
+#endif
            if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
                RExC_parse[1] != ']') {
                RExC_parse++;
@@ -3746,9 +3728,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        /* now is the next time */
        if (!SIZE_ONLY) {
            if (lastvalue < 256 && value < 256) {
-#ifndef ASCIIish /* EBCDIC, for example. */
-               if ((isLOWER(lastvalue) && isLOWER(value)) ||
-                   (isUPPER(lastvalue) && isUPPER(value)))
+#ifdef ALPHAS_HAVE_GAPS
+               /* In EBCDIC the letters are not an unbroken range 
+                * numerically, there's are gaps between i-j, r-s,
+                * I-J, R-S.  We DWIM that if the endpoints of the
+                * range are specified as explicitly alphabetic,
+                * an alphabetic range is requested, otherwise
+                * (the else branch) (say, explicit numeric endpoints
+                * like \xHH are used) we do a straightforward
+                * numeric range. */
+               if (explicit_alpha_prev && explicit_alpha &&
+                   ((isLOWER(lastvalue) && isLOWER(value)) ||
+                   ((isUPPER(lastvalue) && isUPPER(value)))))
                {
                    IV i;
                    if (isLOWER(lastvalue)) {