X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=0f883f3ce88920178b429e5e3ce6182742cdb1e8;hb=7ee8c957e643df1e9e47d243c3269eb47c2da591;hp=f256c4e4d77a96d8bd6efcd46a0697552cfa3baf;hpb=c485e6072d15e92c314a8b9cb6957c3edf13f985;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/regcomp.c b/regcomp.c
index f256c4e..0f883f3 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -38,6 +38,7 @@
 /* *These* symbols are masked to allow static link. */
 #  define Perl_pregfree my_regfree
 #  define Perl_regnext my_regnext
+#  define save_re_context my_save_re_context
 #endif 
 
 /*SUPPRESS 112*/
@@ -319,7 +320,10 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
 
 	}
 	if (OP(scan) != CURLYX) {
-	    int max = (reg_off_by_arg[OP(scan)] ? I32_MAX : U16_MAX);
+	    int max = (reg_off_by_arg[OP(scan)]
+		       ? I32_MAX
+		       /* I32 may be smaller than U16 on CRAYs! */
+		       : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
 	    int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
 	    int noff;
 	    regnode *n = scan;
@@ -486,10 +490,10 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
 					? (flags & ~SCF_DO_SUBSTR) : flags);
 		if (!scan) 		/* It was not CURLYX, but CURLY. */
 		    scan = next;
-		if (PL_dowarn && (minnext + deltanext == 0) 
+		if (ckWARN(WARN_UNSAFE) && (minnext + deltanext == 0) 
 		    && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
 		    && maxcount <= 10000) /* Complement check for big count */
-		    warn("Strange *+?{} on zero-length expression");
+		    warner(WARN_UNSAFE, "Strange *+?{} on zero-length expression");
 		min += minnext * mincount;
 		is_inf |= (maxcount == REG_INFTY && (minnext + deltanext) > 0
 			   || deltanext == I32_MAX);
@@ -1554,8 +1558,8 @@ regpiece(I32 *flagp)
 	goto do_curly;
     }
   nest_check:
-    if (PL_dowarn && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) {
-	warn("%.*s matches null string many times",
+    if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) {
+	warner(WARN_UNSAFE, "%.*s matches null string many times",
 	    PL_regcomp_parse - origparse, origparse);
     }
 
@@ -1633,6 +1637,9 @@ tryagain:
     case '[':
 	PL_regcomp_parse++;
 	ret = (UTF ? regclassutf8() : regclass());
+	if (*PL_regcomp_parse != ']')
+	    FAIL("unmatched [] in regexp");
+	nextchar();
 	*flagp |= HASWIDTH|SIMPLE;
 	break;
     case '(':
@@ -1700,7 +1707,7 @@ tryagain:
 	    *flagp |= HASWIDTH;
 	    nextchar();
 	    if (UTF && !PL_utf8_mark)
-		is_utf8_mark("~");	/* preload table */
+		is_utf8_mark((U8*)"~");		/* preload table */
 	    break;
 	case 'w':
 	    ret = reg_node(
@@ -1710,7 +1717,7 @@ tryagain:
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    if (UTF && !PL_utf8_alnum)
-		is_utf8_alnum("a");	/* preload table */
+		is_utf8_alnum((U8*)"a");	/* preload table */
 	    break;
 	case 'W':
 	    ret = reg_node(
@@ -1720,7 +1727,7 @@ tryagain:
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    if (UTF && !PL_utf8_alnum)
-		is_utf8_alnum("a");	/* preload table */
+		is_utf8_alnum((U8*)"a");	/* preload table */
 	    break;
 	case 'b':
 	    PL_seen_zerolen++;
@@ -1731,7 +1738,7 @@ tryagain:
 	    *flagp |= SIMPLE;
 	    nextchar();
 	    if (UTF && !PL_utf8_alnum)
-		is_utf8_alnum("a");	/* preload table */
+		is_utf8_alnum((U8*)"a");	/* preload table */
 	    break;
 	case 'B':
 	    PL_seen_zerolen++;
@@ -1742,7 +1749,7 @@ tryagain:
 	    *flagp |= SIMPLE;
 	    nextchar();
 	    if (UTF && !PL_utf8_alnum)
-		is_utf8_alnum("a");	/* preload table */
+		is_utf8_alnum((U8*)"a");	/* preload table */
 	    break;
 	case 's':
 	    ret = reg_node(
@@ -1752,7 +1759,7 @@ tryagain:
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    if (UTF && !PL_utf8_space)
-		is_utf8_space(" ");	/* preload table */
+		is_utf8_space((U8*)" ");	/* preload table */
 	    break;
 	case 'S':
 	    ret = reg_node(
@@ -1762,21 +1769,44 @@ tryagain:
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    if (UTF && !PL_utf8_space)
-		is_utf8_space(" ");	/* preload table */
+		is_utf8_space((U8*)" ");	/* preload table */
 	    break;
 	case 'd':
 	    ret = reg_node(UTF ? DIGITUTF8 : DIGIT);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    if (UTF && !PL_utf8_digit)
-		is_utf8_digit("1");	/* preload table */
+		is_utf8_digit((U8*)"1");	/* preload table */
 	    break;
 	case 'D':
 	    ret = reg_node(UTF ? NDIGITUTF8 : NDIGIT);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    if (UTF && !PL_utf8_digit)
-		is_utf8_digit("1");	/* preload table */
+		is_utf8_digit((U8*)"1");	/* preload table */
+	    break;
+	case 'p':
+	case 'P':
+	    {	/* a lovely hack--pretend we saw [\pX] instead */
+		char* oldregxend = PL_regxend;
+
+		if (PL_regcomp_parse[1] == '{') {
+		    PL_regxend = strchr(PL_regcomp_parse, '}');
+		    if (!PL_regxend)
+			FAIL("Missing right brace on \\p{}");
+		    PL_regxend++;
+		}
+		else
+		    PL_regxend = PL_regcomp_parse + 2;
+		PL_regcomp_parse--;
+
+		ret = regclassutf8();
+
+		PL_regxend = oldregxend;
+		PL_regcomp_parse--;
+		nextchar();
+		*flagp |= HASWIDTH|SIMPLE;
+	    }
 	    break;
 	case 'n':
 	case 'r':
@@ -1873,6 +1903,8 @@ tryagain:
 		    case 'S':
 		    case 'd':
 		    case 'D':
+		    case 'p':
+		    case 'P':
 			--p;
 			goto loopdone;
 		    case 'n':
@@ -1949,7 +1981,7 @@ tryagain:
 		default:
 		  normal_default:
 		    if ((*p & 0xc0) == 0xc0 && UTF) {
-			ender = utf8_to_uv(p, &numlen);
+			ender = utf8_to_uv((U8*)p, &numlen);
 			p += numlen;
 		    }
 		    else
@@ -2083,8 +2115,9 @@ regclass(void)
 		     * (POSIX Extended Character Classes, that is)
 		     * The text between e.g. [: and :] would start
 		     * at posixccs + 1 and stop at regcomp_parse - 2. */
-		    if (dowarn && !SIZE_ONLY)
-			warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
+		    if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY)
+			warner(WARN_UNSAFE,
+			    "Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
 		    PL_regcomp_parse++; /* skip over the ending ] */
 		}
 	    }
@@ -2212,9 +2245,6 @@ regclass(void)
 	}
 	lastvalue = value;
     }
-    if (*PL_regcomp_parse != ']')
-	FAIL("unmatched [] in regexp");
-    nextchar();
     /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
     if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) {
 	for (value = 0; value < 256; ++value) {
@@ -2267,7 +2297,7 @@ regclassutf8(void)
 
     while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
        skipcond:
-	value = utf8_to_uv(PL_regcomp_parse, &numlen);
+	value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
 	PL_regcomp_parse += numlen;
 
 	if (value == '[' && PL_regcomp_parse + 1 < PL_regxend &&
@@ -2288,15 +2318,16 @@ regclassutf8(void)
 		     * (POSIX Extended Character Classes, that is)
 		     * The text between e.g. [: and :] would start
 		     * at posixccs + 1 and stop at regcomp_parse - 2. */
-		    if (dowarn && !SIZE_ONLY)
-			warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
+		    if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY)
+			warner(WARN_UNSAFE,
+			    "Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
 		    PL_regcomp_parse++; /* skip over the ending ] */
 		}
 	    }
 	}
 
 	if (value == '\\') {
-	    value = utf8_to_uv(PL_regcomp_parse, &numlen);
+	    value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
 	    PL_regcomp_parse += numlen;
 	    switch (value) {
 	    case 'w':
@@ -2326,7 +2357,7 @@ regclassutf8(void)
 			flags |= ANYOF_SPACEL;
 		    sv_catpvf(listsv, "+utf8::IsSpace\n");
 		    if (!PL_utf8_space)
-			is_utf8_space(" ");
+			is_utf8_space((U8*)" ");
 		}
 		lastvalue = 123456;
 		continue;
@@ -2337,7 +2368,7 @@ regclassutf8(void)
 		    sv_catpvf(listsv,
 			"!utf8::IsSpace\n");
 		    if (!PL_utf8_space)
-			is_utf8_space(" ");
+			is_utf8_space((U8*)" ");
 		}
 		lastvalue = 123456;
 		continue;
@@ -2441,9 +2472,6 @@ regclassutf8(void)
 		sv_catpvf(listsv, "%04x\n", value);
 	}
     }
-    if (*PL_regcomp_parse != ']')
-	FAIL("unmatched [] in regexp");
-    nextchar();
 
     ret = reganode(ANYOFUTF8, 0);
 
@@ -2547,11 +2575,11 @@ reguni(UV uv, char* s, I32* lenp)
 {
     dTHR;
     if (SIZE_ONLY) {
-	char tmpbuf[10];
+	U8 tmpbuf[10];
 	*lenp = uv_to_utf8(tmpbuf, uv) - tmpbuf;
     }
     else
-	*lenp = uv_to_utf8(s, uv) - s;
+	*lenp = uv_to_utf8((U8*)s, uv) - (U8*)s;
 
 }