provisional MakeMaker patch for VMS
[p5sagit/p5-mst-13.2.git] / regcomp.c
index c73e815..b5d9860 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -658,6 +658,17 @@ S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, str
     }
 }
 
+/*
+ * There are strange code-generation bugs caused on sparc64 by gcc-2.95.2.
+ * These need to be revisited when a newer toolchain becomes available.
+ */
+#if defined(__sparc64__) && defined(__GNUC__)
+#   if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
+#       undef  SPARC64_GCC_WORKAROUND
+#       define SPARC64_GCC_WORKAROUND 1
+#   endif
+#endif
+
 /* REx optimizer.  Converts nodes into quickier variants "in place".
    Finds fixed substrings.  */
 
@@ -1203,11 +1214,28 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                    int counted = mincount != 0;
 
                    if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
+#if defined(SPARC64_GCC_WORKAROUND)
+                       I32 b = 0;
+                       STRLEN l = 0;
+                       char *s = NULL;
+                       I32 old = 0;
+
+                       if (pos_before >= data->last_start_min)
+                           b = pos_before;
+                       else
+                           b = data->last_start_min;
+
+                       l = 0;
+                       s = SvPV(data->last_found, l);
+                       old = b - data->last_start_min;
+
+#else
                        I32 b = pos_before >= data->last_start_min
                            ? pos_before : data->last_start_min;
                        STRLEN l;
                        char *s = SvPV(data->last_found, l);
                        I32 old = b - data->last_start_min;
+#endif
 
                        if (UTF)
                            old = utf8_hop((U8*)s, old) - (U8*)s;
@@ -1662,17 +1690,15 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     if (exp == NULL)
        FAIL("NULL regexp argument");
 
-    /* XXXX This looks very suspicious... */
-    if (pm->op_pmdynflags & PMdf_CMP_UTF8)
-        RExC_utf8 = 1;
-    else
-        RExC_utf8 = 0;
+    RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
 
     RExC_precomp = exp;
-    DEBUG_r(if (!PL_colorset) reginitcolors());
-    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
-                     PL_colors[4],PL_colors[5],PL_colors[0],
-                     (int)(xend - exp), RExC_precomp, PL_colors[1]));
+    DEBUG_r({
+        if (!PL_colorset) reginitcolors();
+        PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
+                      PL_colors[4],PL_colors[5],PL_colors[0],
+                      (int)(xend - exp), RExC_precomp, PL_colors[1]);
+    });
     RExC_flags16 = pm->op_pmflags;
     RExC_sawback = 0;
 
@@ -2960,7 +2986,7 @@ tryagain:
            char *oldp, *s;
            STRLEN numlen;
            STRLEN ulen;
-           U8 tmpbuf[UTF8_MAXLEN*2+1];
+           U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
 
             parse_start = RExC_parse - 1;
 
@@ -2991,6 +3017,8 @@ tryagain:
                case '\\':
                    switch (*++p) {
                    case 'A':
+                   case 'C':
+                   case 'X':
                    case 'G':
                    case 'Z':
                    case 'z':
@@ -3103,7 +3131,7 @@ tryagain:
                if (RExC_flags16 & PMf_EXTENDED)
                    p = regwhite(p, RExC_end);
                if (UTF && FOLD) {
-                   toLOWER_uni(ender, tmpbuf, &ulen);
+                   toFOLD_uni(ender, tmpbuf, &ulen);
                    ender = utf8_to_uvchr(tmpbuf, 0);
                }
                if (ISMULT2(p)) { /* Back off on ?+*. */
@@ -3152,20 +3180,26 @@ tryagain:
        break;
     }
 
-    if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT && !RExC_utf8) {
+    if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
         STRLEN oldlen = STR_LEN(ret);
         SV *sv        = sv_2mortal(newSVpvn(STRING(ret), oldlen));
-        char *s       = Perl_sv_recode_to_utf8(aTHX_ sv, PL_encoding);
-        STRLEN newlen = SvCUR(sv);
-        if (!SIZE_ONLY) {
-             DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
-                                   oldlen, STRING(ret), newlen, s));
-             Copy(s, STRING(ret), newlen, char);
-             STR_LEN(ret) += newlen - oldlen;
-             RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
-        } else
-             RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
-        RExC_utf8 = 1;
+
+        if (RExC_utf8)
+             SvUTF8_on(sv);
+        if (sv_utf8_downgrade(sv, TRUE)) {
+             char *s       = Perl_sv_recode_to_utf8(aTHX_ sv, PL_encoding);
+             STRLEN newlen = SvCUR(sv);
+        
+             if (!SIZE_ONLY) {
+                  DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
+                                        (int)oldlen, STRING(ret),
+                                        (int)newlen, s));
+                  Copy(s, STRING(ret), newlen, char);
+                  STR_LEN(ret) += newlen - oldlen;
+                  RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
+             } else
+                  RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
+        }
     }
 
     return(ret);
@@ -3939,17 +3973,29 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                }
                else
 #endif
-                   for (i = prevvalue; i <= ceilvalue; i++)
-                       ANYOF_BITMAP_SET(ret, i);
+                     for (i = prevvalue; i <= ceilvalue; i++)
+                         ANYOF_BITMAP_SET(ret, i);
          }
-         if (value > 255) {
+         if (value > 255 || UTF) {
                ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
                if (prevvalue < value)
                    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
                                   (UV)prevvalue, (UV)value);
-               else if (prevvalue == value)
+               else if (prevvalue == value) {
                    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
                                   (UV)value);
+                   if (FOLD) {
+                        if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
+                             Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+                                            (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
+                             Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+                                            (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
+                        }
+                        else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA)
+                             Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+                                            (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
+                   }
+               }
            }
         }