Various tweaks to Encode
[p5sagit/p5-mst-13.2.git] / regexec.c
index 8db2dc2..4380fd8 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -999,8 +999,10 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
                to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
 
-               c1 = utf8_to_uvchr(tmpbuf1, 0);
-               c2 = utf8_to_uvchr(tmpbuf2, 0);
+               c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC, 
+                                   0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+               c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
+                                   0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
            }
            else {
                c1 = *(U8*)m;
@@ -1013,7 +1015,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            c1 = *(U8*)m;
            c2 = PL_fold_locale[c1];
          do_exactf:
-           e = HOP3c(strend, -ln, s);
+           e = HOP3c(strend, -(I32)ln, s);
 
            if (norun && e < s)
                e = s;                  /* Due to minlen logic of intuit() */
@@ -1037,7 +1039,9 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                
                if (c1 == c2) {
                    while (s <= e) {
-                       c = utf8_to_uvchr((U8*)s, &len);
+                       c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
+                                          ckWARN(WARN_UTF8) ?
+                                          0 : UTF8_ALLOW_ANY);
                        if ( c == c1
                             && (ln == len ||
                                 ibcmp_utf8(s, (char **)0, 0,  do_utf8,
@@ -1062,7 +1066,9 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                }
                else {
                    while (s <= e) {
-                       c = utf8_to_uvchr((U8*)s, &len);
+                     c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
+                                          ckWARN(WARN_UTF8) ?
+                                          0 : UTF8_ALLOW_ANY);
 
                        /* Handle some of the three Greek sigmas cases.
                         * Note that not all the possible combinations
@@ -2390,7 +2396,9 @@ S_regmatch(pTHX_ regnode *prog)
                        if (l >= PL_regeol)
                             sayNO;
                        if (NATIVE_TO_UNI(*(U8*)s) !=
-                           utf8_to_uvuni((U8*)l, &ulen))
+                           utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
+                                          ckWARN(WARN_UTF8) ?
+                                          0 : UTF8_ALLOW_ANY))
                             sayNO;
                        l += ulen;
                        s ++;
@@ -2402,7 +2410,9 @@ S_regmatch(pTHX_ regnode *prog)
                        if (l >= PL_regeol)
                            sayNO;
                        if (NATIVE_TO_UNI(*((U8*)l)) !=
-                           utf8_to_uvuni((U8*)s, &ulen))
+                           utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
+                                          ckWARN(WARN_UTF8) ?
+                                          0 : UTF8_ALLOW_ANY))
                            sayNO;
                        s += ulen;
                        l ++;
@@ -3545,11 +3555,17 @@ S_regmatch(pTHX_ regnode *prog)
                             to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
                             to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
 
-                            c1 = utf8_to_uvuni(tmpbuf1, 0);
-                            c2 = utf8_to_uvuni(tmpbuf2, 0);
+                            c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXLEN, 0,
+                                                ckWARN(WARN_UTF8) ?
+                                                0 : UTF8_ALLOW_ANY);
+                            c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXLEN, 0,
+                                                ckWARN(WARN_UTF8) ?
+                                                0 : UTF8_ALLOW_ANY);
                        }
                        else {
-                           c2 = c1 = utf8_to_uvchr(s, NULL);
+                           c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXLEN, 0,
+                                                    ckWARN(WARN_UTF8) ?
+                                                    0 : UTF8_ALLOW_ANY);
                        }
                    }
                }
@@ -3605,16 +3621,24 @@ S_regmatch(pTHX_ regnode *prog)
                        else {
                            STRLEN len;
                            if (c1 == c2) {
-                               /* count initialised to utf8_distance(old, locinput) */
+                               /* count initialised to
+                                * utf8_distance(old, locinput) */
                                while (locinput <= e &&
-                                      utf8_to_uvchr((U8*)locinput, &len) != c1) {
+                                      utf8n_to_uvchr((U8*)locinput,
+                                                     UTF8_MAXLEN, &len,
+                                                     ckWARN(WARN_UTF8) ?
+                                                     0 : UTF8_ALLOW_ANY) != c1) {
                                    locinput += len;
                                    count++;
                                }
                            } else {
-                               /* count initialised to utf8_distance(old, locinput) */
+                               /* count initialised to
+                                * utf8_distance(old, locinput) */
                                while (locinput <= e) {
-                                   UV c = utf8_to_uvchr((U8*)locinput, &len);
+                                   UV c = utf8n_to_uvchr((U8*)locinput,
+                                                         UTF8_MAXLEN, &len,
+                                                         ckWARN(WARN_UTF8) ?
+                                                         0 : UTF8_ALLOW_ANY);
                                    if (c == c1 || c == c2)
                                        break;
                                    locinput += len;
@@ -3648,7 +3672,10 @@ S_regmatch(pTHX_ regnode *prog)
                    UV c;
                    if (c1 != -1000) {
                        if (do_utf8)
-                           c = utf8_to_uvchr((U8*)PL_reginput, NULL);
+                           c = utf8n_to_uvchr((U8*)PL_reginput,
+                                              UTF8_MAXLEN, 0,
+                                              ckWARN(WARN_UTF8) ?
+                                              0 : UTF8_ALLOW_ANY);
                        else
                            c = UCHARAT(PL_reginput);
                        /* If it could work, try it. */
@@ -3695,7 +3722,10 @@ S_regmatch(pTHX_ regnode *prog)
                    while (n >= ln) {
                        if (c1 != -1000) {
                            if (do_utf8)
-                               c = utf8_to_uvchr((U8*)PL_reginput, NULL);
+                               c = utf8n_to_uvchr((U8*)PL_reginput,
+                                                  UTF8_MAXLEN, 0,
+                                                  ckWARN(WARN_UTF8) ?
+                                                  0 : UTF8_ALLOW_ANY);
                            else
                                c = UCHARAT(PL_reginput);
                        }
@@ -3715,7 +3745,10 @@ S_regmatch(pTHX_ regnode *prog)
                    while (n >= ln) {
                        if (c1 != -1000) {
                            if (do_utf8)
-                               c = utf8_to_uvchr((U8*)PL_reginput, NULL);
+                               c = utf8n_to_uvchr((U8*)PL_reginput,
+                                                  UTF8_MAXLEN, 0,
+                                                  ckWARN(WARN_UTF8) ?
+                                                  0 : UTF8_ALLOW_ANY);
                            else
                                c = UCHARAT(PL_reginput);
                        }
@@ -4297,7 +4330,8 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register b
     STRLEN len = 0;
     STRLEN plen;
 
-    c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
+    c = do_utf8 ? utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
+                                ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY) : *p;
 
     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
     if (do_utf8 || (flags & ANYOF_UNICODE)) {