Remove the 'asciir' re subpragma. Should instead implement
Jarkko Hietaniemi [Fri, 11 May 2001 14:08:20 +0000 (14:08 +0000)]
the 'physical vs logical' range scheme:

\xAA-\xCC is a native physical range, you want that range of
codepoints in your native encoding.  In EBCDIC the codepoints
in the gaps (between i-j and r-s) should be included.

\x{AA}-\x{CC} is a physical Unicode range, you want that range of
codepoints in Unicode.

a-z is a logical range, you want that range of 'logical' codepoints
in your native encoding.  In EBCDIC the codepoints in the gaps
(between i-j and r-s) should not be included.

Mixed cases (a-\xAA, etc) should either be errors, or maybe
the 'logical' endpoints should be converted to native/Unicode
codepoints, and the range handled as a physical range.

'Logical endpoints' are to be recognized only in the A-Z, a-z,
and 0-9 ranges.  Probably a warning should be given for mixed
cases like A-z or a-9 (since such expressions are encoding
dependent), with a recommendation to use physical ranges.

p4raw-id: //depot/perl@10085

ext/re/re.pm
perl.h
regcomp.c
t/op/pat.t
t/op/regexp.t

index d66bda5..f1a4e3b 100644 (file)
@@ -79,7 +79,6 @@ See L<perlmodlib/Pragmatic Modules>.
 my %bitmask = (
 taint          => 0x00100000,
 eval           => 0x00200000,
-asciirange     => 0x02000000,
 );
 
 sub setcolor {
diff --git a/perl.h b/perl.h
index 4c8dc55..35629d0 100644 (file)
--- a/perl.h
+++ b/perl.h
@@ -2840,7 +2840,6 @@ enum {            /* pass one of these to get_vtbl */
 #define HINT_FILETEST_ACCESS   0x00400000
 #define HINT_UTF8              0x00800000
 #define HINT_UTF8_DISTINCT     0x01000000
-#define HINT_RE_ASCIIR         0x02000000
 
 /* Various states of an input record separator SV (rs, nrs) */
 #define RsSNARF(sv)   (! SvOK(sv))
index bf77ee6..a1defc6 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3515,14 +3515,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
                        for (value = 0; value < 256; value++) {
-                           if (PL_hints & HINT_RE_ASCIIR) {
-                               if (NATIVE_TO_ASCII(value) < 128)
-                                   ANYOF_BITMAP_SET(ret, value);
-                           }
-                           else {
-                               if (isASCII(value))
-                                   ANYOF_BITMAP_SET(ret, value);
-                           }
+                           if (isASCII(value))
+                               ANYOF_BITMAP_SET(ret, value);
                        }
 #endif /* EBCDIC */
                    }
@@ -3537,14 +3531,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
                        for (value = 0; value < 256; value++) {
-                           if (PL_hints & HINT_RE_ASCIIR) {
-                               if (NATIVE_TO_ASCII(value) >= 128)
-                                   ANYOF_BITMAP_SET(ret, value);
-                           }
-                           else {
-                               if (!isASCII(value))
-                                   ANYOF_BITMAP_SET(ret, value);
-                           }
+                           if (!isASCII(value))
+                               ANYOF_BITMAP_SET(ret, value);
                        }
 #endif /* EBCDIC */
                    }
@@ -3783,9 +3771,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        } /* end of namedclass \blah */
 
        if (range) {
-           if (((prevvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
-                ((NATIVE_TO_UNI(prevvalue) > NATIVE_TO_UNI(value)) &&
-                (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
+           if (prevvalue > value) /* b-a */ {
                Simple_vFAIL4("Invalid [] range \"%*.*s\"",
                              RExC_parse - rangebegin,
                              RExC_parse - rangebegin,
@@ -3823,18 +3809,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                IV ceilvalue = value < 256 ? value : 255;
 
 #ifdef EBCDIC
-               if (PL_hints & HINT_RE_ASCIIR) {
-                   /* New style scheme for ranges:
-                    * use re 'asciir';
-                    * do ranges in ASCII/Unicode space
-                    */
-                   for (i  = NATIVE_TO_ASCII(prevvalue);
-                        i <= NATIVE_TO_ASCII(ceilvalue);
-                        i++)
-                     ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
-               }
-               else if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
-                        (isUPPER(prevvalue) && isUPPER(ceilvalue)))
+               if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+                   (isUPPER(prevvalue) && isUPPER(ceilvalue)))
                {
                    if (isLOWER(prevvalue)) {
                        for (i = prevvalue; i <= ceilvalue; i++)
index 1be7234..0df4d78 100755 (executable)
@@ -13,8 +13,6 @@ BEGIN {
     @INC = '../lib';
 }
 
-use re 'asciirange'; # Compute ranges in ASCII space
-
 eval 'use Config';          #  Defaults assumed if this fails
 
 $x = "abc\ndef\n";
index 0b81e71..4a4d42f 100755 (executable)
@@ -36,8 +36,6 @@ BEGIN {
     @INC = '../lib';
 }
 
-use re 'asciirange'; # ranges are computed in ASCII
-
 $iters = shift || 1;           # Poor man performance suite, 10000 is OK.
 
 open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') ||