From: Jarkko Hietaniemi Date: Sun, 12 May 2002 15:29:36 +0000 (+0000) Subject: EBCDIC: make t/op/pat #242 and 243 finally succeed. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=1b2d223bc80114ab97e5af7bf5270927ae350ba4;p=p5sagit%2Fp5-mst-13.2.git EBCDIC: make t/op/pat #242 and 243 finally succeed. p4raw-id: //depot/perl@16556 --- diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod index a9f1d0f..44ad6b9 100644 --- a/pod/perlebcdic.pod +++ b/pod/perlebcdic.pod @@ -837,7 +837,11 @@ As of perl 5.005_03 the letter range regular expression such as [A-Z] and [a-z] have been especially coded to not pick up gap characters. For example, characters such as E C that lie between I and J would not be matched by the -regular expression range C. +regular expression range C. This works in +the other direction, too, if either of the range end points is +explicitly numeric: C<[\x89-\x91]> will match C<\x8e>, even +though C<\x89> is C and C<\x91 > is C, and C<\x8e> +is a gap character from the alphabetic viewpoint. If you do want to match the alphabet gap characters in a single octet regular expression try matching the hex or octal code such diff --git a/regcomp.c b/regcomp.c index a435be9..6b17be1 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3523,6 +3523,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) UV n; bool optimize_invert = TRUE; AV* unicode_alternate = 0; +#ifdef EBCDIC + UV literal_endpoint = 0; +#endif ret = reganode(pRExC_state, ANYOF, 0); @@ -3685,6 +3688,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) break; } } /* end of \blah */ +#ifdef EBCDIC + else + literal_endpoint++; +#endif if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */ @@ -4087,8 +4094,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) IV ceilvalue = value < 256 ? value : 255; #ifdef EBCDIC - if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || - (isUPPER(prevvalue) && isUPPER(ceilvalue))) + /* In EBCDIC [\x89-\x91] should include + * the \x8e but [i-j] should not. */ + if (literal_endpoint == 2 && + ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || + (isUPPER(prevvalue) && isUPPER(ceilvalue)))) { if (isLOWER(prevvalue)) { for (i = prevvalue; i <= ceilvalue; i++) @@ -4168,6 +4178,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } } } +#ifdef EBCDIC + literal_endpoint = 0; +#endif } range = 0; /* this range (if it was one) is done now */