From: Jarkko Hietaniemi Date: Fri, 11 May 2001 14:08:20 +0000 (+0000) Subject: Remove the 'asciir' re subpragma. Should instead implement X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=3a3c44472a318717ec0cdc0a7f768125ae0f001f;p=p5sagit%2Fp5-mst-13.2.git Remove the 'asciir' re subpragma. Should instead implement the 'physical vs logical' range scheme: \xAA-\xCC is a native physical range, you want that range of codepoints in your native encoding. In EBCDIC the codepoints in the gaps (between i-j and r-s) should be included. \x{AA}-\x{CC} is a physical Unicode range, you want that range of codepoints in Unicode. a-z is a logical range, you want that range of 'logical' codepoints in your native encoding. In EBCDIC the codepoints in the gaps (between i-j and r-s) should not be included. Mixed cases (a-\xAA, etc) should either be errors, or maybe the 'logical' endpoints should be converted to native/Unicode codepoints, and the range handled as a physical range. 'Logical endpoints' are to be recognized only in the A-Z, a-z, and 0-9 ranges. Probably a warning should be given for mixed cases like A-z or a-9 (since such expressions are encoding dependent), with a recommendation to use physical ranges. p4raw-id: //depot/perl@10085 --- diff --git a/ext/re/re.pm b/ext/re/re.pm index d66bda5..f1a4e3b 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -79,7 +79,6 @@ See L. my %bitmask = ( taint => 0x00100000, eval => 0x00200000, -asciirange => 0x02000000, ); sub setcolor { diff --git a/perl.h b/perl.h index 4c8dc55..35629d0 100644 --- a/perl.h +++ b/perl.h @@ -2840,7 +2840,6 @@ enum { /* pass one of these to get_vtbl */ #define HINT_FILETEST_ACCESS 0x00400000 #define HINT_UTF8 0x00800000 #define HINT_UTF8_DISTINCT 0x01000000 -#define HINT_RE_ASCIIR 0x02000000 /* Various states of an input record separator SV (rs, nrs) */ #define RsSNARF(sv) (! SvOK(sv)) diff --git a/regcomp.c b/regcomp.c index bf77ee6..a1defc6 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3515,14 +3515,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) ANYOF_BITMAP_SET(ret, value); #else /* EBCDIC */ for (value = 0; value < 256; value++) { - if (PL_hints & HINT_RE_ASCIIR) { - if (NATIVE_TO_ASCII(value) < 128) - ANYOF_BITMAP_SET(ret, value); - } - else { - if (isASCII(value)) - ANYOF_BITMAP_SET(ret, value); - } + if (isASCII(value)) + ANYOF_BITMAP_SET(ret, value); } #endif /* EBCDIC */ } @@ -3537,14 +3531,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) ANYOF_BITMAP_SET(ret, value); #else /* EBCDIC */ for (value = 0; value < 256; value++) { - if (PL_hints & HINT_RE_ASCIIR) { - if (NATIVE_TO_ASCII(value) >= 128) - ANYOF_BITMAP_SET(ret, value); - } - else { - if (!isASCII(value)) - ANYOF_BITMAP_SET(ret, value); - } + if (!isASCII(value)) + ANYOF_BITMAP_SET(ret, value); } #endif /* EBCDIC */ } @@ -3783,9 +3771,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } /* end of namedclass \blah */ if (range) { - if (((prevvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) || - ((NATIVE_TO_UNI(prevvalue) > NATIVE_TO_UNI(value)) && - (PL_hints & HINT_RE_ASCIIR))) /* b-a */ { + if (prevvalue > value) /* b-a */ { Simple_vFAIL4("Invalid [] range \"%*.*s\"", RExC_parse - rangebegin, RExC_parse - rangebegin, @@ -3823,18 +3809,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) IV ceilvalue = value < 256 ? value : 255; #ifdef EBCDIC - if (PL_hints & HINT_RE_ASCIIR) { - /* New style scheme for ranges: - * use re 'asciir'; - * do ranges in ASCII/Unicode space - */ - for (i = NATIVE_TO_ASCII(prevvalue); - i <= NATIVE_TO_ASCII(ceilvalue); - i++) - ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i)); - } - else if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || - (isUPPER(prevvalue) && isUPPER(ceilvalue))) + if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || + (isUPPER(prevvalue) && isUPPER(ceilvalue))) { if (isLOWER(prevvalue)) { for (i = prevvalue; i <= ceilvalue; i++) diff --git a/t/op/pat.t b/t/op/pat.t index 1be7234..0df4d78 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -13,8 +13,6 @@ BEGIN { @INC = '../lib'; } -use re 'asciirange'; # Compute ranges in ASCII space - eval 'use Config'; # Defaults assumed if this fails $x = "abc\ndef\n"; diff --git a/t/op/regexp.t b/t/op/regexp.t index 0b81e71..4a4d42f 100755 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -36,8 +36,6 @@ BEGIN { @INC = '../lib'; } -use re 'asciirange'; # ranges are computed in ASCII - $iters = shift || 1; # Poor man performance suite, 10000 is OK. open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') ||