From: Karl Williamson Date: Wed, 12 May 2010 17:50:19 +0000 (-0600) Subject: PATCH: Clean up EBCDIC handling of \cX X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=6ae9f32af6f08fad4023edda45596d5c654483de;p=p5sagit%2Fp5-mst-13.2.git PATCH: Clean up EBCDIC handling of \cX The function perl_ebcdic_control() is unnecessary, as the toCTRL macro that calls it can be changed to just map EBCDIC to ASCII first, and then doing the normal procedure. This means that EBCDIC and ASCII will no longer diverge. Currently, EBCIDIC gives a syntax error for inputs outside its domain, whereas the ASCII version accepts some of them. --- diff --git a/handy.h b/handy.h index 3f44c69..d17b8d3 100644 --- a/handy.h +++ b/handy.h @@ -664,16 +664,9 @@ parameter, casts can silently truncate and yield wrong results. #define isPSXSPC_LC_utf8(c) (isSPACE_LC_utf8(c) ||(c) == '\f') #define isBLANK_LC_utf8(c) isBLANK(c) /* could be wrong */ -#ifdef EBCDIC -# ifdef PERL_IMPLICIT_CONTEXT -# define toCTRL(c) Perl_ebcdic_control(aTHX_ c) -# else -# define toCTRL Perl_ebcdic_control -# endif -#else - /* This conversion works both ways, strangely enough. */ -# define toCTRL(c) (toUPPER(c) ^ 64) -#endif +/* This conversion works both ways, strangely enough. On EBCDIC platforms, + * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII */ +# define toCTRL(c) (toUPPER(NATIVE_TO_UNI(c)) ^ 64) /* Line numbers are unsigned, 32 bits. */ typedef U32 line_t; diff --git a/pod/perlop.pod b/pod/perlop.pod index de687d3..58c0660 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1060,9 +1060,9 @@ L for the complete list of what these sequences mean on both ASCII and EBCDIC platforms. Use of any other character following the "c" besides those listed above is -prohibited on EBCDIC platforms, and discouraged (and may become deprecated or -forbidden) on ASCII ones. What happens for those other characters currently -though, is that the value is derived by inverting the 7th bit (0x40). +discouraged, and may become deprecated or forbidden. What happens for those +other characters currently though, is that the value is derived by inverting +the 7th bit (0x40). To get platform independent controls, you can use C<\N{...}>. diff --git a/t/porting/diag.t b/t/porting/diag.t index 8a40a26..5d30823 100644 --- a/t/porting/diag.t +++ b/t/porting/diag.t @@ -366,7 +366,6 @@ Unexpected constant lvalue entersub entry via type/targ %d:%d Unicode non-character 0x%04 Unknown PerlIO layer "scalar" Unknown Unicode option letter '%c' -unrecognised control character '%c' Unstable directory path, current directory changed unexpectedly Unsupported script encoding UTF-16BE Unsupported script encoding UTF-16LE diff --git a/util.c b/util.c index 2eb2cc1..f1d7d50 100644 --- a/util.c +++ b/util.c @@ -3832,46 +3832,6 @@ Perl_report_evil_fh(pTHX_ const GV *gv, const IO *io, I32 op) } } -#ifdef EBCDIC -/* in ASCII order, not that it matters */ -static const char controllablechars[] = "?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; - -int -Perl_ebcdic_control(pTHX_ int ch) -{ - if (ch > 'a') { - const char *ctlp; - - if (islower(ch)) - ch = toupper(ch); - - if ((ctlp = strchr(controllablechars, ch)) == 0) { - Perl_die(aTHX_ "unrecognised control character '%c'\n", ch); - } - - if (ctlp == controllablechars) - return('\177'); /* DEL */ - else - return((unsigned char)(ctlp - controllablechars - 1)); - } else { /* Want uncontrol */ - if (ch == '\177' || ch == -1) - return('?'); - else if (ch == '\157') - return('\177'); - else if (ch == '\174') - return('\000'); - else if (ch == '^') /* '\137' in 1047, '\260' in 819 */ - return('\036'); - else if (ch == '\155') - return('\037'); - else if (0 < ch && ch < (sizeof(controllablechars) - 1)) - return(controllablechars[ch+1]); - else - Perl_die(aTHX_ "invalid control request: '\\%03o'\n", ch & 0xFF); - } -} -#endif - /* XXX Add documentation after final interface and behavior is decided */ /* May want to show context for error, so would pass Perl_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning) U8 source = *current;