From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Sun, 25 Feb 2001 17:25:47 +0000 (+0000)
Subject: (Retracted by #8395.)
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=064fcf7b8c2e751af75a80918f4d92aeaa8e0110;p=p5sagit%2Fp5-mst-13.2.git

(Retracted by #8395.)

Attempt to fix the EBCDIC character range problem with //.

p4raw-id: //depot/perl@8930
---

diff --git a/regcomp.c b/regcomp.c
index 69d114e..997044f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3185,6 +3185,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     register char *e;
     UV n;
     bool dont_optimize_invert = FALSE;
+#ifdef ALPHAS_HAVE_GAPS
+    bool explicit_alpha      = TRUE;
+    bool explicit_alpha_prev = TRUE;
+#endif
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
@@ -3371,7 +3375,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isALNUM(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");	
 		    break;
 		case ANYOF_NALNUM:
@@ -3382,7 +3385,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isALNUM(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");
 		    break;
 		case ANYOF_ALNUMC:
@@ -3393,7 +3395,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isALNUMC(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
 		    break;
 		case ANYOF_NALNUMC:
@@ -3404,7 +3405,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isALNUMC(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");
 		    break;
 		case ANYOF_ALPHA:
@@ -3415,7 +3415,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isALPHA(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");
 		    break;
 		case ANYOF_NALPHA:
@@ -3426,39 +3425,36 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isALPHA(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");
 		    break;
 		case ANYOF_ASCII:
 		    if (LOC)
 			ANYOF_CLASS_SET(ret, ANYOF_ASCII);
 		    else {
-#ifdef ASCIIish
-			for (value = 0; value < 128; value++)
-			    ANYOF_BITMAP_SET(ret, value);
-#else  /* EBCDIC */
+#ifdef ALPHAS_HAVE_GAPS
 			for (value = 0; value < 256; value++)
 			    if (isASCII(value))
 				ANYOF_BITMAP_SET(ret, value);
-#endif /* EBCDIC */
+#else
+			for (value = 0; value < 128; value++)
+			    ANYOF_BITMAP_SET(ret, value);
+#endif
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");
 		    break;
 		case ANYOF_NASCII:
 		    if (LOC)
 			ANYOF_CLASS_SET(ret, ANYOF_NASCII);
 		    else {
-#ifdef ASCIIish
-			for (value = 128; value < 256; value++)
-			    ANYOF_BITMAP_SET(ret, value);
-#else  /* EBCDIC */
+#ifdef ALPHAS_HAVE_GAPS
 			for (value = 0; value < 256; value++)
 			    if (!isASCII(value))
 				ANYOF_BITMAP_SET(ret, value);
-#endif /* EBCDIC */
+#else
+			for (value = 128; value < 256; value++)
+			    ANYOF_BITMAP_SET(ret, value);
+#endif
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");
 		    break;
 		case ANYOF_BLANK:
@@ -3469,7 +3465,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isBLANK(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n");
 		    break;
 		case ANYOF_NBLANK:
@@ -3480,7 +3475,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isBLANK(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n");
 		    break;
 		case ANYOF_CNTRL:
@@ -3491,7 +3485,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isCNTRL(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");
 		    break;
 		case ANYOF_NCNTRL:
@@ -3502,7 +3495,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isCNTRL(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");
 		    break;
 		case ANYOF_DIGIT:
@@ -3513,7 +3505,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			for (value = '0'; value <= '9'; value++)
 			    ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
 		    break;
 		case ANYOF_NDIGIT:
@@ -3526,7 +3517,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			for (value = '9' + 1; value < 256; value++)
 			    ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");
 		    break;
 		case ANYOF_GRAPH:
@@ -3537,7 +3527,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isGRAPH(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");
 		    break;
 		case ANYOF_NGRAPH:
@@ -3548,7 +3537,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isGRAPH(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");
 		    break;
 		case ANYOF_LOWER:
@@ -3559,7 +3547,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isLOWER(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");
 		    break;
 		case ANYOF_NLOWER:
@@ -3570,7 +3557,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isLOWER(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");
 		    break;
 		case ANYOF_PRINT:
@@ -3581,7 +3567,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isPRINT(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");
 		    break;
 		case ANYOF_NPRINT:
@@ -3592,7 +3577,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isPRINT(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");
 		    break;
 		case ANYOF_PSXSPC:
@@ -3603,7 +3587,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isPSXSPC(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
 		    break;
 		case ANYOF_NPSXSPC:
@@ -3614,7 +3597,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isPSXSPC(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");
 		    break;
 		case ANYOF_PUNCT:
@@ -3625,7 +3607,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isPUNCT(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");
 		    break;
 		case ANYOF_NPUNCT:
@@ -3636,7 +3617,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isPUNCT(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");
 		    break;
 		case ANYOF_SPACE:
@@ -3647,7 +3627,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isSPACE(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");
 		    break;
 		case ANYOF_NSPACE:
@@ -3658,7 +3637,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isSPACE(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");
 		    break;
 		case ANYOF_UPPER:
@@ -3669,7 +3647,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isUPPER(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");
 		    break;
 		case ANYOF_NUPPER:
@@ -3680,7 +3657,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isUPPER(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");
 		    break;
 		case ANYOF_XDIGIT:
@@ -3691,7 +3667,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (isXDIGIT(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");
 		    break;
 		case ANYOF_NXDIGIT:
@@ -3702,7 +3677,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    if (!isXDIGIT(value))
 				ANYOF_BITMAP_SET(ret, value);
 		    }
-		    dont_optimize_invert = TRUE;
 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
 		    break;
 		default:
@@ -3711,6 +3685,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 		}
 		if (LOC)
 		    ANYOF_FLAGS(ret) |= ANYOF_CLASS;
+		dont_optimize_invert = TRUE;
 		continue;
 	    }
 	} /* end of namedclass \blah */
@@ -3726,6 +3701,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	}
 	else {
 	    lastvalue = value; /* save the beginning of the range */
+#ifdef ALPHAS_HAVE_GAPS
+	    explicit_alpha_prev = explicit_alpha;
+	    explicit_alpha      = isALPHA(value);
+#endif
 	    if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
 		RExC_parse[1] != ']') {
 		RExC_parse++;
@@ -3749,9 +3728,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	/* now is the next time */
 	if (!SIZE_ONLY) {
 	    if (lastvalue < 256 && value < 256) {
-#ifndef ASCIIish /* EBCDIC, for example. */
-		if ((isLOWER(lastvalue) && isLOWER(value)) ||
-		    (isUPPER(lastvalue) && isUPPER(value)))
+#ifdef ALPHAS_HAVE_GAPS
+		/* In EBCDIC the letters are not an unbroken range 
+		 * numerically, there's are gaps between i-j, r-s,
+		 * I-J, R-S.  We DWIM that if the endpoints of the
+		 * range are specified as explicitly alphabetic,
+		 * an alphabetic range is requested, otherwise
+		 * (the else branch) (say, explicit numeric endpoints
+		 * like \xHH are used) we do a straightforward
+		 * numeric range. */
+		if (explicit_alpha_prev && explicit_alpha &&
+		    ((isLOWER(lastvalue) && isLOWER(value)) ||
+		    ((isUPPER(lastvalue) && isUPPER(value)))))
 		{
 		    IV i;
 		    if (isLOWER(lastvalue)) {