Retract #8929,8930,8932,8933 for now.
Jarkko Hietaniemi [Sun, 25 Feb 2001 18:46:36 +0000 (18:46 +0000)]
p4raw-id: //depot/perl@8935

perl.h
regcomp.c
t/op/pat.t
t/op/tr.t
toke.c

diff --git a/perl.h b/perl.h
index a1ddcf0..2b66473 100644 (file)
--- a/perl.h
+++ b/perl.h
@@ -3517,10 +3517,6 @@ typedef struct am_table_short AMTS;
 #define EXEC_ARGV_CAST(x) x
 #endif
 
-#ifdef EBCDIC
-#define ALPHAS_HAVE_GAPS
-#endif
-
 /* and finally... */
 #define PERL_PATCHLEVEL_H_IMPLICIT
 #include "patchlevel.h"
index 997044f..69d114e 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3185,10 +3185,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     register char *e;
     UV n;
     bool dont_optimize_invert = FALSE;
-#ifdef ALPHAS_HAVE_GAPS
-    bool explicit_alpha      = TRUE;
-    bool explicit_alpha_prev = TRUE;
-#endif
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
@@ -3375,6 +3371,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");    
                    break;
                case ANYOF_NALNUM:
@@ -3385,6 +3382,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");
                    break;
                case ANYOF_ALNUMC:
@@ -3395,6 +3393,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
                    break;
                case ANYOF_NALNUMC:
@@ -3405,6 +3404,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");
                    break;
                case ANYOF_ALPHA:
@@ -3415,6 +3415,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");
                    break;
                case ANYOF_NALPHA:
@@ -3425,36 +3426,39 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");
                    break;
                case ANYOF_ASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_ASCII);
                    else {
-#ifdef ALPHAS_HAVE_GAPS
+#ifdef ASCIIish
+                       for (value = 0; value < 128; value++)
+                           ANYOF_BITMAP_SET(ret, value);
+#else  /* EBCDIC */
                        for (value = 0; value < 256; value++)
                            if (isASCII(value))
                                ANYOF_BITMAP_SET(ret, value);
-#else
-                       for (value = 0; value < 128; value++)
-                           ANYOF_BITMAP_SET(ret, value);
-#endif
+#endif /* EBCDIC */
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");
                    break;
                case ANYOF_NASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_NASCII);
                    else {
-#ifdef ALPHAS_HAVE_GAPS
+#ifdef ASCIIish
+                       for (value = 128; value < 256; value++)
+                           ANYOF_BITMAP_SET(ret, value);
+#else  /* EBCDIC */
                        for (value = 0; value < 256; value++)
                            if (!isASCII(value))
                                ANYOF_BITMAP_SET(ret, value);
-#else
-                       for (value = 128; value < 256; value++)
-                           ANYOF_BITMAP_SET(ret, value);
-#endif
+#endif /* EBCDIC */
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");
                    break;
                case ANYOF_BLANK:
@@ -3465,6 +3469,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n");
                    break;
                case ANYOF_NBLANK:
@@ -3475,6 +3480,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n");
                    break;
                case ANYOF_CNTRL:
@@ -3485,6 +3491,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");
                    break;
                case ANYOF_NCNTRL:
@@ -3495,6 +3502,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");
                    break;
                case ANYOF_DIGIT:
@@ -3505,6 +3513,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        for (value = '0'; value <= '9'; value++)
                            ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
                    break;
                case ANYOF_NDIGIT:
@@ -3517,6 +3526,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                        for (value = '9' + 1; value < 256; value++)
                            ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");
                    break;
                case ANYOF_GRAPH:
@@ -3527,6 +3537,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");
                    break;
                case ANYOF_NGRAPH:
@@ -3537,6 +3548,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");
                    break;
                case ANYOF_LOWER:
@@ -3547,6 +3559,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");
                    break;
                case ANYOF_NLOWER:
@@ -3557,6 +3570,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");
                    break;
                case ANYOF_PRINT:
@@ -3567,6 +3581,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");
                    break;
                case ANYOF_NPRINT:
@@ -3577,6 +3592,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");
                    break;
                case ANYOF_PSXSPC:
@@ -3587,6 +3603,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
                    break;
                case ANYOF_NPSXSPC:
@@ -3597,6 +3614,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");
                    break;
                case ANYOF_PUNCT:
@@ -3607,6 +3625,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");
                    break;
                case ANYOF_NPUNCT:
@@ -3617,6 +3636,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");
                    break;
                case ANYOF_SPACE:
@@ -3627,6 +3647,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isSPACE(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");
                    break;
                case ANYOF_NSPACE:
@@ -3637,6 +3658,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isSPACE(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");
                    break;
                case ANYOF_UPPER:
@@ -3647,6 +3669,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");
                    break;
                case ANYOF_NUPPER:
@@ -3657,6 +3680,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");
                    break;
                case ANYOF_XDIGIT:
@@ -3667,6 +3691,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");
                    break;
                case ANYOF_NXDIGIT:
@@ -3677,6 +3702,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   dont_optimize_invert = TRUE;
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
                    break;
                default:
@@ -3685,7 +3711,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                }
                if (LOC)
                    ANYOF_FLAGS(ret) |= ANYOF_CLASS;
-               dont_optimize_invert = TRUE;
                continue;
            }
        } /* end of namedclass \blah */
@@ -3701,10 +3726,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        }
        else {
            lastvalue = value; /* save the beginning of the range */
-#ifdef ALPHAS_HAVE_GAPS
-           explicit_alpha_prev = explicit_alpha;
-           explicit_alpha      = isALPHA(value);
-#endif
            if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
                RExC_parse[1] != ']') {
                RExC_parse++;
@@ -3728,18 +3749,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        /* now is the next time */
        if (!SIZE_ONLY) {
            if (lastvalue < 256 && value < 256) {
-#ifdef ALPHAS_HAVE_GAPS
-               /* In EBCDIC the letters are not an unbroken range 
-                * numerically, there's are gaps between i-j, r-s,
-                * I-J, R-S.  We DWIM that if the endpoints of the
-                * range are specified as explicitly alphabetic,
-                * an alphabetic range is requested, otherwise
-                * (the else branch) (say, explicit numeric endpoints
-                * like \xHH are used) we do a straightforward
-                * numeric range. */
-               if (explicit_alpha_prev && explicit_alpha &&
-                   ((isLOWER(lastvalue) && isLOWER(value)) ||
-                   ((isUPPER(lastvalue) && isUPPER(value)))))
+#ifndef ASCIIish /* EBCDIC, for example. */
+               if ((isLOWER(lastvalue) && isLOWER(value)) ||
+                   (isUPPER(lastvalue) && isUPPER(value)))
                {
                    IV i;
                    if (isLOWER(lastvalue)) {
index 590c268..237ea44 100755 (executable)
@@ -4,7 +4,7 @@
 # the format supported by op/regexp.t.  If you want to add a test
 # that does fit that format, add it to op/re_tests, not here.
 
-print "1..244\n";
+print "1..242\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -1183,29 +1183,23 @@ if (/(\C)/g) {
   }
 }
 
-if (ord('i') == 0x89 && ord('j') == 0x91) { # EBCDIC
-  if ("\x8e" =~ /[\x89-\x91]/) {
-    print "ok 241\n";
-  } else {
-    print "not ok 241\n";
-  }
-  if ("\x8e" !~ /[i-j]/) {
-    print "ok 242\n";
-  } else {
-    print "not ok 242\n";
-  }
-  if ("\xce" =~ /[\xc9-\xd1]/) {
-    print "ok 243\n";
-  } else {
-    print "not ok 243\n";
-  }
-  if ("\xce" !~ /[I-J]/) {
-    print "ok 244\n";
-  } else {
-    print "not ok 244\n";
-  }
-} else {
-  for (241..244) {
-    print "ok $_ # Skip: not EBCDIC\n";
-  }
-}
+# 241..242
+#
+# The tr is admittedly NOT a regular expression operator,
+# but this test is more of an EBCDIC test, the background is
+# that \x89 is 'i' and \x90 is 'j', and \x8e is not a letter,
+# not even a printable character.  Now for the trick:
+# if the range is specified using letters, the \x8e should most
+# probably not match, but if the range is specified using explicit
+# numeric endpoints, it probably should match.  The first case,
+# not matching if using letters, is already tested elsewhere,
+# here we test for the matching cases.
+
+$_ = qq/\x8E/;
+
+print "not " unless /[\x89-\x91]/;
+print "ok 241\n";
+
+print "not " unless tr/\x89-\x91//d == 1;
+print "ok 242\n";
+
index 514d15c..75887ab 100755 (executable)
--- a/t/op/tr.t
+++ b/t/op/tr.t
@@ -5,7 +5,7 @@ BEGIN {
     @INC = '../lib';
 }
 
-print "1..58\n";
+print "1..51\n";
 
 $_ = "abcdefghijklmnopqrstuvwxyz";
 
@@ -296,44 +296,3 @@ print "ok 50\n";
 ($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/;
 print "not " unless $a eq v300.300.172.302.301.172;
 print "ok 51\n";
-
-# Tricky on EBCDIC: while [a-z] must not match the gap characters,
-# (i-j, r-s, I-J, R-S), [\x89-\x91] has to match them, from Karsten
-# Sperling.
-
-if (ord('i') == 0x89 & ord('j') == 0x91) {
-
-$c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/;
-print "not " unless $c == 8 and $a eq "XXXXXXXX";
-print "ok 52\n";
-   
-$c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/;
-print "not " unless $c == 2 and $a eq "X\x8a\x8b\x8c\x8d\x8f\x90X";
-print "ok 53\n";
-   
-$c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/;
-print "not " unless $c == 8 and $a eq "XXXXXXXX";
-print "ok 54\n";
-   
-$c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/;
-print "not " unless $c == 2 and $a eq "X\xca\xcb\xcc\xcd\xcf\xd0X";
-print "ok 55\n";
-
-} else {
-  for (52..55) { print "ok $_ # Skip: not EBCDIC\n" }
-}
-
-# some more wide-char tests from Karsten Sperling
-
-($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c;
-print "not " unless $a eq "X";
-print "ok 56\n";
-
-($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
-print "not " unless $a eq "X";
-print "ok 57\n";
-($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
-print "not " unless $a eq "X";
-print ok "58\n"; 
-
diff --git a/toke.c b/toke.c
index 2cb6407..f8d7145 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1226,9 +1226,9 @@ S_scan_const(pTHX_ char *start)
        if (PL_lex_inwhat == OP_TRANS) {
            /* expand a range A-Z to the full set of characters.  AIE! */
            if (dorange) {
-               UV i;                           /* current expanded character */
-               UV min;                         /* first character in range */
-               UV max;                         /* last character in range */
+               I32 i;                          /* current expanded character */
+               I32 min;                        /* first character in range */
+               I32 max;                        /* last character in range */
 
                i = d - SvPVX(sv);              /* remember current offset */
                SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
@@ -1240,12 +1240,11 @@ S_scan_const(pTHX_ char *start)
 
                 if (min > max) {
                    Perl_croak(aTHX_
-                              "Invalid [] range \"\\x%"UVxf"-\\x%"UVxf"\" in transliteration operator",
-                              min, max);
+                              "Invalid [] range \"%c-%c\" in transliteration operator",
+                              (char)min, (char)max);
                 }
 
-#ifdef ALPHAS_HAVE_GAPS
-               /* BROKEN FOR EBCDIC, see regcomp.c:reglass() */ 
+#ifndef ASCIIish
                if ((isLOWER(min) && isLOWER(max)) ||
                    (isUPPER(min) && isUPPER(max))) {
                    if (isLOWER(min)) {