From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Fri, 11 May 2001 14:08:20 +0000 (+0000)
Subject: Remove the 'asciir' re subpragma.  Should instead implement
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=3a3c44472a318717ec0cdc0a7f768125ae0f001f;p=p5sagit%2Fp5-mst-13.2.git

Remove the 'asciir' re subpragma.  Should instead implement
the 'physical vs logical' range scheme:

\xAA-\xCC is a native physical range, you want that range of
codepoints in your native encoding.  In EBCDIC the codepoints
in the gaps (between i-j and r-s) should be included.

\x{AA}-\x{CC} is a physical Unicode range, you want that range of
codepoints in Unicode.

a-z is a logical range, you want that range of 'logical' codepoints
in your native encoding.  In EBCDIC the codepoints in the gaps
(between i-j and r-s) should not be included.

Mixed cases (a-\xAA, etc) should either be errors, or maybe
the 'logical' endpoints should be converted to native/Unicode
codepoints, and the range handled as a physical range.

'Logical endpoints' are to be recognized only in the A-Z, a-z,
and 0-9 ranges.  Probably a warning should be given for mixed
cases like A-z or a-9 (since such expressions are encoding
dependent), with a recommendation to use physical ranges.

p4raw-id: //depot/perl@10085
---

diff --git a/ext/re/re.pm b/ext/re/re.pm
index d66bda5..f1a4e3b 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -79,7 +79,6 @@ See L<perlmodlib/Pragmatic Modules>.
 my %bitmask = (
 taint		=> 0x00100000,
 eval		=> 0x00200000,
-asciirange	=> 0x02000000,
 );
 
 sub setcolor {
diff --git a/perl.h b/perl.h
index 4c8dc55..35629d0 100644
--- a/perl.h
+++ b/perl.h
@@ -2840,7 +2840,6 @@ enum {		/* pass one of these to get_vtbl */
 #define HINT_FILETEST_ACCESS	0x00400000
 #define HINT_UTF8		0x00800000
 #define HINT_UTF8_DISTINCT	0x01000000
-#define HINT_RE_ASCIIR		0x02000000
 
 /* Various states of an input record separator SV (rs, nrs) */
 #define RsSNARF(sv)   (! SvOK(sv))
diff --git a/regcomp.c b/regcomp.c
index bf77ee6..a1defc6 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3515,14 +3515,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
 			for (value = 0; value < 256; value++) {
-			    if (PL_hints & HINT_RE_ASCIIR) {
-				if (NATIVE_TO_ASCII(value) < 128)
-				    ANYOF_BITMAP_SET(ret, value);
-			    }
-			    else {
-				if (isASCII(value))
-				    ANYOF_BITMAP_SET(ret, value);
-			    }
+			    if (isASCII(value))
+			        ANYOF_BITMAP_SET(ret, value);
 			}
 #endif /* EBCDIC */
 		    }
@@ -3537,14 +3531,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
 			for (value = 0; value < 256; value++) {
-			    if (PL_hints & HINT_RE_ASCIIR) {
-				if (NATIVE_TO_ASCII(value) >= 128)
-				    ANYOF_BITMAP_SET(ret, value);
-			    }
-			    else {
-				if (!isASCII(value))
-				    ANYOF_BITMAP_SET(ret, value);
-			    }
+			    if (!isASCII(value))
+			        ANYOF_BITMAP_SET(ret, value);
 			}
 #endif /* EBCDIC */
 		    }
@@ -3783,9 +3771,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	} /* end of namedclass \blah */
 
 	if (range) {
-	    if (((prevvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
-                ((NATIVE_TO_UNI(prevvalue) > NATIVE_TO_UNI(value)) &&
-		 (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
+	    if (prevvalue > value) /* b-a */ {
 		Simple_vFAIL4("Invalid [] range \"%*.*s\"",
 			      RExC_parse - rangebegin,
 			      RExC_parse - rangebegin,
@@ -3823,18 +3809,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	        IV ceilvalue = value < 256 ? value : 255;
 
 #ifdef EBCDIC
-		if (PL_hints & HINT_RE_ASCIIR) {
-		    /* New style scheme for ranges:
-		     * use re 'asciir';
-		     * do ranges in ASCII/Unicode space
-		     */
-		    for (i  = NATIVE_TO_ASCII(prevvalue);
-			 i <= NATIVE_TO_ASCII(ceilvalue);
-			 i++)
-		      ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
-		}
-		else if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
-			 (isUPPER(prevvalue) && isUPPER(ceilvalue)))
+		if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+		    (isUPPER(prevvalue) && isUPPER(ceilvalue)))
 		{
 		    if (isLOWER(prevvalue)) {
 			for (i = prevvalue; i <= ceilvalue; i++)
diff --git a/t/op/pat.t b/t/op/pat.t
index 1be7234..0df4d78 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -13,8 +13,6 @@ BEGIN {
     @INC = '../lib';
 }
 
-use re 'asciirange'; # Compute ranges in ASCII space
-
 eval 'use Config';          #  Defaults assumed if this fails
 
 $x = "abc\ndef\n";
diff --git a/t/op/regexp.t b/t/op/regexp.t
index 0b81e71..4a4d42f 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -36,8 +36,6 @@ BEGIN {
     @INC = '../lib';
 }
 
-use re 'asciirange'; # ranges are computed in ASCII
-
 $iters = shift || 1;		# Poor man performance suite, 10000 is OK.
 
 open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') ||