X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=51b3c5d50bf098d382a2a3ca82173c8eff213391;hb=7d824d8e6f7cacfafe95d58fa9ab3d99bd41e854;hp=a223533526967c5bfb425ae2c4542c476eb4859a;hpb=a2a2844f59a5c91f404052ef98a588c171fc29f8;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/regcomp.c b/regcomp.c
index a223533..51b3c5d 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -522,11 +522,8 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
 STATIC void
 S_cl_anything(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
-    int value;
-
     ANYOF_CLASS_ZERO(cl);
-    for (value = 0; value < 256; ++value)
-	ANYOF_BITMAP_SET(cl, value);
+    ANYOF_BITMAP_SETALL(cl);
     cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
     if (LOC)
 	cl->flags |= ANYOF_LOCALE;
@@ -543,9 +540,8 @@ S_cl_is_anything(pTHX_ struct regnode_charclass_class *cl)
 	    return 1;
     if (!(cl->flags & ANYOF_UNICODE_ALL))
 	return 0;
-    for (value = 0; value < 256; ++value)
-	if (!ANYOF_BITMAP_TEST(cl, value))
-	    return 0;
+    if (!ANYOF_BITMAP_TESTALLSET(cl))
+	return 0;
     return 1;
 }
 
@@ -662,6 +658,17 @@ S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, str
     }
 }
 
+/*
+ * There are strange code-generation bugs caused on sparc64 by gcc-2.95.2.
+ * These need to be revisited when a newer toolchain becomes available.
+ */
+#if defined(__sparc64__) && defined(__GNUC__)
+#   if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
+#       undef  SPARC64_GCC_WORKAROUND
+#       define SPARC64_GCC_WORKAROUND 1
+#   endif
+#endif
+
 /* REx optimizer.  Converts nodes into quickier variants "in place".
    Finds fixed substrings.  */
 
@@ -729,6 +736,50 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 		    n = nnext;
 		}
 	    }
+
+	    if (UTF && OP(scan) == EXACTF && STR_LEN(scan) >= 6) {
+/*
+  Two problematic code points in Unicode casefolding of EXACT nodes:
+
+   U+0390 - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+   U+03B0 - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+
+   which casefold to
+
+   Unicode			UTF-8
+
+   U+03B9 U+0308 U+0301		0xCE 0xB9 0xCC 0x88 0xCC 0x81
+   U+03C5 U+0308 U+0301		0xCF 0x85 0xCC 0x88 0xCC 0x81
+
+   This means that in case-insensitive matching (or "loose matching",
+   as Unicode calls it), an EXACTF of length six (the UTF-8 encoded byte
+   length of the above casefolded versions) can match a target string
+   of length two (the byte length of UTF-8 encoded U+0390 or U+03B0).
+   This would rather mess up the minimum length computation.
+
+   What we'll do is to look for the tail four bytes, and then peek
+   at the preceding two bytes to see whether we need to decrease
+   the minimum length by four (six minus two).
+
+   Thanks to the design of UTF-8, there cannot be false matches:
+   A sequence of valid UTF-8 bytes cannot be a subsequence of
+   another valid sequence of UTF-8 bytes.
+
+*/
+		 char *s0 = STRING(scan), *s, *t;
+		 char *s1 = s0 + STR_LEN(scan) - 1, *s2 = s1 - 4;
+		 char *t0 = "\xcc\x88\xcc\x81";
+		 char *t1 = t0 + 3;
+		 
+		 for (s = s0 + 2;
+		      s < s2 && (t = ninstr(s, s1, t0, t1));
+		      s = t + 4) {
+		      if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) ||
+			  ((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
+			   min -= 4;
+		 }
+	    }
+
 #ifdef DEBUGGING
 	    /* Allow dumping */
 	    n = scan + NODE_SZ_STR(scan);
@@ -1207,11 +1258,28 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 		    int counted = mincount != 0;
 
 		    if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
+#if defined(SPARC64_GCC_WORKAROUND)
+			I32 b = 0;
+			STRLEN l = 0;
+			char *s = NULL;
+			I32 old = 0;
+
+			if (pos_before >= data->last_start_min)
+			    b = pos_before;
+			else
+			    b = data->last_start_min;
+
+			l = 0;
+			s = SvPV(data->last_found, l);
+			old = b - data->last_start_min;
+
+#else
 			I32 b = pos_before >= data->last_start_min
 			    ? pos_before : data->last_start_min;
 			STRLEN l;
 			char *s = SvPV(data->last_found, l);
 			I32 old = b - data->last_start_min;
+#endif
 
 			if (UTF)
 			    old = utf8_hop((U8*)s, old) - (U8*)s;
@@ -1666,17 +1734,15 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     if (exp == NULL)
 	FAIL("NULL regexp argument");
 
-    /* XXXX This looks very suspicious... */
-    if (pm->op_pmdynflags & PMdf_CMP_UTF8)
-        RExC_utf8 = 1;
-    else
-        RExC_utf8 = 0;
+    RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
 
     RExC_precomp = exp;
-    DEBUG_r(if (!PL_colorset) reginitcolors());
-    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
-		      PL_colors[4],PL_colors[5],PL_colors[0],
-		      (int)(xend - exp), RExC_precomp, PL_colors[1]));
+    DEBUG_r({
+	 if (!PL_colorset) reginitcolors();
+	 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
+		       PL_colors[4],PL_colors[5],PL_colors[0],
+		       (int)(xend - exp), RExC_precomp, PL_colors[1]);
+    });
     RExC_flags16 = pm->op_pmflags;
     RExC_sawback = 0;
 
@@ -1764,7 +1830,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
     pm->op_pmflags = RExC_flags16;
     if (UTF)
-	r->reganch |= ROPT_UTF8;
+        r->reganch |= ROPT_UTF8;	/* Unicode in it? */
     r->regstclass = NULL;
     if (RExC_naughty >= 10)	/* Probably an expensive pattern. */
 	r->reganch |= ROPT_NAUGHTY;
@@ -1826,7 +1892,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
 	    first = NEXTOPER(first);
 	    goto again;
 	}
-	else if ((OP(first) == STAR &&
+	else if (!sawopen && (OP(first) == STAR &&
 	    PL_regkind[(U8)OP(NEXTOPER(first))] == REG_ANY) &&
 	    !(r->reganch & ROPT_ANCH) )
 	{
@@ -2339,9 +2405,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
     }
     else if (paren != '?')		/* Not Conditional */
 	ret = br;
-    if (flags&HASWIDTH)
-	*flagp |= HASWIDTH;
-    *flagp |= flags&SPSTART;
+    *flagp |= flags & (SPSTART | HASWIDTH);
     lastbr = br;
     while (*RExC_parse == '|') {
 	if (!SIZE_ONLY && RExC_extralen) {
@@ -2795,6 +2859,7 @@ tryagain:
 	case 'Z':
 	    ret = reg_node(pRExC_state, SEOL);
 	    *flagp |= SIMPLE;
+	    RExC_seen_zerolen++;		/* Do not optimize RE away */
 	    nextchar(pRExC_state);
 	    break;
 	case 'z':
@@ -2962,8 +3027,8 @@ tryagain:
 	    register char *p;
 	    char *oldp, *s;
 	    STRLEN numlen;
-	    STRLEN ulen;
-	    U8 tmpbuf[UTF8_MAXLEN*2+1];
+	    STRLEN foldlen;
+	    U8 tmpbuf[UTF8_MAXLEN_FOLD+1], *foldbuf;
 
             parse_start = RExC_parse - 1;
 
@@ -2994,6 +3059,8 @@ tryagain:
 		case '\\':
 		    switch (*++p) {
 		    case 'A':
+		    case 'C':
+		    case 'X':
 		    case 'G':
 		    case 'Z':
 		    case 'z':
@@ -3106,16 +3173,30 @@ tryagain:
 		if (RExC_flags16 & PMf_EXTENDED)
 		    p = regwhite(p, RExC_end);
 		if (UTF && FOLD) {
-		    toLOWER_uni(ender, tmpbuf, &ulen);
-		    ender = utf8_to_uvchr(tmpbuf, 0);
+		    /* Prime the casefolded buffer. */
+		    ender = toFOLD_uni(ender, tmpbuf, &foldlen);
 		}
 		if (ISMULT2(p)) { /* Back off on ?+*. */
 		    if (len)
 			p = oldp;
-		    else if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
-			reguni(pRExC_state, ender, s, &numlen);
-			s += numlen;
-			len += numlen;
+		    else if (UTF) {
+			 if (FOLD) {
+			      /* Emit all the Unicode characters. */
+			      for (foldbuf = tmpbuf;
+				   foldlen;
+				   foldlen -= numlen) {
+				   ender = utf8_to_uvchr(foldbuf, &numlen);
+				   reguni(pRExC_state, ender, s, &numlen);
+				   s       += numlen;
+				   len     += numlen;
+				   foldbuf += numlen;
+			      }
+			 }
+			 else {
+			      reguni(pRExC_state, ender, s, &numlen);
+			      s   += numlen;
+			      len += numlen;
+			 }
 		    }
 		    else {
 			len++;
@@ -3123,10 +3204,25 @@ tryagain:
 		    }
 		    break;
 		}
-		if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
-		    reguni(pRExC_state, ender, s, &numlen);
-		    s += numlen;
-		    len += numlen - 1;
+		if (UTF) {
+		     if (FOLD) {
+		          /* Emit all the Unicode characters. */
+			  for (foldbuf = tmpbuf;
+			       foldlen;
+			       foldlen -= numlen) {
+			       ender = utf8_to_uvchr(foldbuf, &numlen);
+			       reguni(pRExC_state, ender, s, &numlen);
+			       s       += numlen;
+			       len     += numlen;
+			       foldbuf += numlen;
+			  }
+		     }
+		     else {
+			  reguni(pRExC_state, ender, s, &numlen);
+			  s   += numlen;
+			  len += numlen;
+		     }
+		     len--;
 		}
 		else
 		    REGC(ender, s++);
@@ -3155,6 +3251,30 @@ tryagain:
 	break;
     }
 
+    /* If the encoding pragma is in effect recode the text of
+     * any EXACT-kind nodes. */
+    if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
+	 STRLEN oldlen = STR_LEN(ret);
+	 SV *sv        = sv_2mortal(newSVpvn(STRING(ret), oldlen));
+
+	 if (RExC_utf8)
+	      SvUTF8_on(sv);
+	 if (sv_utf8_downgrade(sv, TRUE)) {
+	      char *s       = Perl_sv_recode_to_utf8(aTHX_ sv, PL_encoding);
+	      STRLEN newlen = SvCUR(sv);
+	 
+	      if (!SIZE_ONLY) {
+		   DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
+					 (int)oldlen, STRING(ret),
+					 (int)newlen, s));
+		   Copy(s, STRING(ret), newlen, char);
+		   STR_LEN(ret) += newlen - oldlen;
+		   RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
+	      } else
+		   RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
+	 }
+    }
+
     return(ret);
 }
 
@@ -3179,7 +3299,12 @@ S_regwhite(pTHX_ char *p, char *e)
    Character classes ([:foo:]) can also be negated ([:^foo:]).
    Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.
    Equivalence classes ([=foo=]) and composites ([.foo.]) are parsed,
-   but trigger warnings because they are currently unimplemented. */
+   but trigger failures because they are currently unimplemented. */
+
+#define POSIXCC_DONE(c)   ((c) == ':')
+#define POSIXCC_NOTYET(c) ((c) == '=' || (c) == '.')
+#define POSIXCC(c) (POSIXCC_DONE(c) || POSIXCC_NOTYET(c))
+
 STATIC I32
 S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 {
@@ -3188,13 +3313,11 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 
     if (value == '[' && RExC_parse + 1 < RExC_end &&
 	/* I smell either [: or [= or [. -- POSIX has been here, right? */
-	(*RExC_parse == ':' ||
-	 *RExC_parse == '=' ||
-	 *RExC_parse == '.')) {
-	char  c = *RExC_parse;
+	POSIXCC(UCHARAT(RExC_parse))) {
+	char  c = UCHARAT(RExC_parse);
 	char* s = RExC_parse++;
 	
-	while (RExC_parse < RExC_end && *RExC_parse != c)
+	while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
 	    RExC_parse++;
 	if (RExC_parse == RExC_end)
 	    /* Grandfather lone [:, [=, [. */
@@ -3202,7 +3325,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 	else {
 	    char* t = RExC_parse++; /* skip over the c */
 
-  	    if (*RExC_parse == ']') {
+  	    if (UCHARAT(RExC_parse) == ']') {
   		RExC_parse++; /* skip over the ending ] */
   		posixcc = s + 1;
 		if (*s == ':') {
@@ -3291,7 +3414,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 
 		    /* adjust RExC_parse so the warning shows after
 		       the class closes */
-		    while (*RExC_parse && *RExC_parse != ']')
+		    while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse) != ']')
 			RExC_parse++;
 		    Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
 		}
@@ -3310,9 +3433,7 @@ STATIC void
 S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
 {
     if (!SIZE_ONLY && ckWARN(WARN_REGEXP) &&
-	(*RExC_parse == ':' ||
-	 *RExC_parse == '=' ||
-	 *RExC_parse == '.')) {
+	POSIXCC(UCHARAT(RExC_parse))) {
 	char *s = RExC_parse;
  	char  c = *s++;
 
@@ -3322,11 +3443,10 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
 	    vWARN3(s+2, "POSIX syntax [%c %c] belongs inside character classes", c, c);
 
 	    /* [[=foo=]] and [[.foo.]] are still future. */
-	    if (c == '=' || c == '.')
-	    {
+	    if (POSIXCC_NOTYET(c)) {
 		/* adjust RExC_parse so the error shows after
 		   the class closes */
-		while (*RExC_parse && *RExC_parse++ != ']')
+		while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse++) != ']')
 		    ;
 		Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
 	    }
@@ -3338,6 +3458,7 @@ STATIC regnode *
 S_regclass(pTHX_ RExC_state_t *pRExC_state)
 {
     register UV value;
+    register UV nextvalue;
     register IV prevvalue = OOB_UNICODE;
     register IV range = 0;
     register regnode *ret;
@@ -3348,14 +3469,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     SV *listsv = Nullsv;
     register char *e;
     UV n;
-    bool optimize_invert = TRUE;
+    bool optimize_invert   = TRUE;
+    AV* unicode_alternate  = 0;
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
     if (!SIZE_ONLY)
 	ANYOF_FLAGS(ret) = 0;
 
-    if (*RExC_parse == '^') {	/* Complement of range. */
+    if (UCHARAT(RExC_parse) == '^') {	/* Complement of range. */
 	RExC_naughty++;
 	RExC_parse++;
 	if (!SIZE_ONLY)
@@ -3374,13 +3496,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	listsv = newSVpvn("# comment\n", 10);
     }
 
-    if (!SIZE_ONLY && ckWARN(WARN_REGEXP))
+    nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
+
+    if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && POSIXCC(nextvalue))
 	checkposixcc(pRExC_state);
 
-    if (*RExC_parse == ']' || *RExC_parse == '-')
-	goto charclassloop;		/* allow 1st char to be ] or - */
+    /* allow 1st char to be ] (allowing it to be - is dealt with later) */
+    if (UCHARAT(RExC_parse) == ']')
+	goto charclassloop;
 
-    while (RExC_parse < RExC_end && *RExC_parse != ']') {
+    while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
 
     charclassloop:
 
@@ -3396,7 +3521,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	}
 	else
 	    value = UCHARAT(RExC_parse++);
-	if (value == '[')
+	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
+	if (value == '[' && POSIXCC(nextvalue))
 	    namedclass = regpposixcc(pRExC_state, value);
 	else if (value == '\\') {
 	    if (UTF) {
@@ -3922,17 +4048,73 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 		}
 		else
 #endif
-		    for (i = prevvalue; i <= ceilvalue; i++)
-			ANYOF_BITMAP_SET(ret, i);
+		      for (i = prevvalue; i <= ceilvalue; i++)
+			  ANYOF_BITMAP_SET(ret, i);
 	  }
-	  if (value > 255) {
+	  if (value > 255 || UTF) {
 		ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
 		if (prevvalue < value)
 		    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
 				   (UV)prevvalue, (UV)value);
-		else if (prevvalue == value)
+		else if (prevvalue == value) {
 		    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
 				   (UV)value);
+		    if (FOLD) {
+			 U8 tmpbuf [UTF8_MAXLEN+1];
+			 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
+			 STRLEN foldlen;
+			 UV f;
+
+			 uvchr_to_utf8(tmpbuf, value);
+			 to_utf8_fold(tmpbuf, foldbuf, &foldlen);
+			 f = utf8_to_uvchr(foldbuf, 0);
+
+			 /* If folding and foldable and a single
+			  * character, insert also the folded version
+			  * to the charclass. */
+			 if (f != value) {
+			      if (foldlen == UNISKIP(f))
+				  Perl_sv_catpvf(aTHX_ listsv,
+						 "%04"UVxf"\n", f);
+			      else {
+				  /* Any multicharacter foldings
+				   * require the following transform:
+				   * [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst)
+				   * where E folds into "pq" and F folds
+				   * into "rst", all other characters
+				   * fold to single characters.  We save
+				   * away these multicharacter foldings,
+				   * to be later saved as part of the
+				   * additional "s" data. */
+				  SV *sv;
+
+				  if (!unicode_alternate)
+				      unicode_alternate = newAV();
+				  sv = newSVpvn((char*)foldbuf, foldlen);
+				  SvUTF8_on(sv);
+				  av_push(unicode_alternate, sv);
+			      }
+			 }
+
+			 /* If folding and the value is one of the Greek
+			  * sigmas insert a few more sigmas to make the
+			  * folding rules of the sigmas to work right.
+			  * Note that not all the possible combinations
+			  * are handled here: some of them are handled
+			  * by the standard folding rules, and some of
+			  * them (literal or EXACTF cases) are handled
+			  * during runtime in regexec.c:S_find_byclass(). */
+			 if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
+			      Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+					     (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
+			      Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+					     (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
+			 }
+			 else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA)
+			      Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+					     (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
+		    }
+		}
 	    }
         }
 
@@ -3976,8 +4158,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	AV *av = newAV();
 	SV *rv;
 
+	/* The 0th element stores the character class description
+	 * in its textual form: used later (regexec.c:Perl_regclass_swatch())
+	 * to initialize the appropriate swash (which gets stored in
+	 * the 1st element), and also useful for dumping the regnode.
+	 * The 2nd element stores the multicharacter foldings,
+	 * used later (regexec.c:s_reginclasslen()). */
 	av_store(av, 0, listsv);
 	av_store(av, 1, NULL);
+	av_store(av, 2, (SV*)unicode_alternate);
 	rv = newRV_noinc((SV*)av);
 	n = add_data(pRExC_state, 1, "s");
 	RExC_rx->data->data[n] = (void*)rv;
@@ -4409,9 +4598,24 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
 
     k = PL_regkind[(U8)OP(o)];
 
-    if (k == EXACT)
-	Perl_sv_catpvf(aTHX_ sv, " <%s%.*s%s>", PL_colors[0],
-		       STR_LEN(o), STRING(o), PL_colors[1]);
+    if (k == EXACT) {
+        SV *dsv = sv_2mortal(newSVpvn("", 0));
+	/* Using is_utf8_string() is a crude hack but it may
+	 * be the best for now since we have no flag "this EXACTish
+	 * node was UTF-8" --jhi */
+	bool do_utf8 = is_utf8_string((U8*)STRING(o), STR_LEN(o));
+	char *s    = do_utf8 ?
+	  pv_uni_display(dsv, (U8*)STRING(o), STR_LEN(o), 60,
+			 UNI_DISPLAY_REGEX) :
+	  STRING(o);
+	int len = do_utf8 ?
+	  strlen(s) :
+	  STR_LEN(o);
+	Perl_sv_catpvf(aTHX_ sv, " <%s%.*s%s>",
+		       PL_colors[0],
+		       len, s,
+		       PL_colors[1]);
+    }
     else if (k == CURLY) {
 	if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
 	    Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
@@ -4492,11 +4696,11 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
 	if (flags & ANYOF_UNICODE)
 	    sv_catpv(sv, "{unicode}");
 	else if (flags & ANYOF_UNICODE_ALL)
-	    sv_catpv(sv, "{all-unicode}");
+	    sv_catpv(sv, "{unicode_all}");
 
 	{
 	    SV *lv;
-	    SV *sw = regclass_swash(o, FALSE, &lv);
+	    SV *sw = regclass_swash(o, FALSE, &lv, 0);
 	
 	    if (lv) {
 		if (sw) {
@@ -4585,16 +4789,25 @@ Perl_re_intuit_string(pTHX_ regexp *prog)
 void
 Perl_pregfree(pTHX_ struct regexp *r)
 {
-    DEBUG_r(if (!PL_colorset) reginitcolors());
+#ifdef DEBUGGING
+    SV *dsv = PERL_DEBUG_PAD_ZERO(0);
+#endif
 
     if (!r || (--r->refcnt > 0))
 	return;
-    DEBUG_r(PerlIO_printf(Perl_debug_log,
-		      "%sFreeing REx:%s `%s%.60s%s%s'\n",
-		      PL_colors[4],PL_colors[5],PL_colors[0],
-		      r->precomp,
-		      PL_colors[1],
-		      (strlen(r->precomp) > 60 ? "..." : "")));
+    DEBUG_r({
+         char *s = pv_uni_display(dsv, (U8*)r->precomp, r->prelen, 60,
+				  UNI_DISPLAY_REGEX);
+	 int len = SvCUR(dsv);
+	 if (!PL_colorset)
+	      reginitcolors();
+	 PerlIO_printf(Perl_debug_log,
+		       "%sFreeing REx:%s `%s%*.*s%s%s'\n",
+		       PL_colors[4],PL_colors[5],PL_colors[0],
+		       len, len, s,
+		       PL_colors[1],
+		       len > 60 ? "..." : "");
+    });
 
     if (r->precomp)
 	Safefree(r->precomp);
@@ -4650,7 +4863,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
 		new_comppad = NULL;
 		break;
 	    case 'n':
-		break;
+	        break;
 	    default:
 		Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]);
 	    }