Re: Unicode/EBCDIC
[p5sagit/p5-mst-13.2.git] / regcomp.c
index bdcea75..9a935f7 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -650,21 +650,19 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 #endif
                    n = regnext(n);
                }
-               else {
+               else if (stringok) {
                    int oldl = STR_LEN(scan);
                    regnode *nnext = regnext(n);
-               
+
                    if (oldl + STR_LEN(n) > U8_MAX)
                        break;
                    NEXT_OFF(scan) += NEXT_OFF(n);
                    STR_LEN(scan) += STR_LEN(n);
                    next = n + NODE_SZ_STR(n);
                    /* Now we can overwrite *n : */
-                   Move(STRING(n), STRING(scan) + oldl,
-                        STR_LEN(n), char);
+                   Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
 #ifdef DEBUGGING
-                   if (stringok)
-                       stop = next - 1;
+                   stop = next - 1;
 #endif
                    n = nnext;
                }
@@ -1932,6 +1930,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        r->reganch |= ROPT_LOOKBEHIND_SEEN;
     if (RExC_seen & REG_SEEN_EVAL)
        r->reganch |= ROPT_EVAL_SEEN;
+    if (RExC_seen & REG_SEEN_SANY)
+       r->reganch |= ROPT_SANY_SEEN;
     Newz(1002, r->startp, RExC_npar, I32);
     Newz(1002, r->endp, RExC_npar, I32);
     PL_regdata = r->data; /* for regprop() */
@@ -2640,6 +2640,7 @@ tryagain:
            break;
        case 'C':
            ret = reg_node(pRExC_state, SANY);
+           RExC_seen |= REG_SEEN_SANY;
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
            break;
@@ -2880,6 +2881,8 @@ tryagain:
                            else {
                                numlen = 1;     /* allow underscores */
                                ender = (UV)scan_hex(p + 1, e - p - 1, &numlen);
+                               if (ender > 0xff)
+                                   RExC_utf8 = 1;
                                /* numlen is generous */
                                if (numlen + len >= 127) {
                                    p--;