provisional MakeMaker patch for VMS

[p5sagit/p5-mst-13.2.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index 4fe7889..7b459e2 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -393,11 +393,22 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
 #endif
 
+    if (prog->reganch & ROPT_UTF8) {
+       DEBUG_r(PerlIO_printf(Perl_debug_log,
+                             "UTF-8 regex...\n"));
+       PL_reg_flags |= RF_utf8;
+    }
+
     DEBUG_r({
-        char*s   = UTF ? sv_uni_display(dsv, sv, 60, 0) : strpos;
-        int  len = UTF ? strlen(s) : strend - strpos;
+        char *s   = PL_reg_match_utf8 ?
+                        sv_uni_display(dsv, sv, 60, 0) : strpos;
+        int   len = PL_reg_match_utf8 ?
+                        strlen(s) : strend - strpos;
         if (!PL_colorset)
              reginitcolors();
+        if (PL_reg_match_utf8)
+            DEBUG_r(PerlIO_printf(Perl_debug_log,
+                                  "UTF-8 target...\n"));
         PerlIO_printf(Perl_debug_log,
                       "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
                       PL_colors[4],PL_colors[5],PL_colors[0],
@@ -411,9 +422,6 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
              );
     });
 
-    if (prog->reganch & ROPT_UTF8)
-       PL_reg_flags |= RF_utf8;
-
     if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
        DEBUG_r(PerlIO_printf(Perl_debug_log,
                              "String too short... [re_intuit_start]\n"));
@@ -967,7 +975,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                 * Fortunately, not getting this right is allowed
                 * for Unicode Regular Expression Support level 1,
                 * only one-to-one matching is required. --jhi */
-               if (c1 == c2)
+               if (c1 == c2) {
                    while (s <= e) {
                        if ( utf8_to_uvchr((U8*)s, &len) == c1
                             && (ln == len ||
@@ -977,9 +985,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                            goto got_it;
                        s += len;
                    }
-               else
+               }
+               else {
                    while (s <= e) {
                        UV c = utf8_to_uvchr((U8*)s, &len);
+                       if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
+                           c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
+                           c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
                        if ( (c == c1 || c == c2)
                             && (ln == len ||
                                 ibcmp_utf8(s, do_utf8, strend - s,
@@ -988,6 +1000,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                            goto got_it;
                        s += len;
                    }
+               }
            }
            else {
                if (c1 == c2)
@@ -2583,16 +2596,21 @@ S_regmatch(pTHX_ regnode *prog)
            nextchr = UCHARAT(++locinput);
            break;
        case CLUMP:
-           LOAD_UTF8_CHARCLASS(mark,"~");
-           if (locinput >= PL_regeol ||
-               swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
-               sayNO;
-           locinput += PL_utf8skip[nextchr];
-           while (locinput < PL_regeol &&
-                  swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
-               locinput += UTF8SKIP(locinput);
-           if (locinput > PL_regeol)
+           if (locinput >= PL_regeol)
                sayNO;
+           if  (do_utf8) {
+               LOAD_UTF8_CHARCLASS(mark,"~");
+               if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
+                   sayNO;
+               locinput += PL_utf8skip[nextchr];
+               while (locinput < PL_regeol &&
+                      swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
+                   locinput += UTF8SKIP(locinput);
+               if (locinput > PL_regeol)
+                   sayNO;
+           } 
+           else
+              locinput++;
            nextchr = UCHARAT(locinput);
            break;
        case REFFL: