From: Karl Williamson Date: Wed, 5 May 2010 18:03:16 +0000 (-0600) Subject: Fix perlebcdic for for 80 column tty; fix C<>s X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c72e675e8b42d658ffa4770f8d9c87ab4870aceb;p=p5sagit%2Fp5-mst-13.2.git Fix perlebcdic for for 80 column tty; fix C<>s Two C<>'s were unclosed --- diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod index 7c1082d..f690595 100644 --- a/pod/perlebcdic.pod +++ b/pod/perlebcdic.pod @@ -124,8 +124,8 @@ The problem is: which code points to use for code points less than 256? In EBCDIC, for the low 256 the EBCDIC code points are used. This means that the equivalences - pack("U", ord($character)) eq $character - unpack("U", $character) == ord $character + pack("U", ord($character)) eq $character + unpack("U", $character) == ord $character will hold. (If Unicode code points were applied consistently over all the possible code points, pack("U",ord("A")) would in EBCDIC @@ -182,23 +182,23 @@ to translate from EBCDIC to Latin-1 code points. Encode knows about more EBCDIC character sets than Perl can currently be compiled to run on. - use Encode 'from_to'; + use Encode 'from_to'; - my %ebcdic = ( 176 => 'cp37', 95 => 'cp1047', 106 => 'posix-bc' ); + my %ebcdic = ( 176 => 'cp37', 95 => 'cp1047', 106 => 'posix-bc' ); - # $a is in EBCDIC code points - from_to($a, $ebcdic{ord '^'}, 'latin1'); - # $a is ISO 8859-1 code points + # $a is in EBCDIC code points + from_to($a, $ebcdic{ord '^'}, 'latin1'); + # $a is ISO 8859-1 code points and from Latin-1 code points to EBCDIC code points - use Encode 'from_to'; + use Encode 'from_to'; - my %ebcdic = ( 176 => 'cp37', 95 => 'cp1047', 106 => 'posix-bc' ); + my %ebcdic = ( 176 => 'cp37', 95 => 'cp1047', 106 => 'posix-bc' ); - # $a is ISO 8859-1 code points - from_to($a, 'latin1', $ebcdic{ord '^'}); - # $a is in EBCDIC code points + # $a is ISO 8859-1 code points + from_to($a, 'latin1', $ebcdic{ord '^'}); + # $a is in EBCDIC code points For doing I/O it is suggested that you use the autotranslating features of PerlIO, see L. @@ -265,20 +265,22 @@ might want to write: =back - open(FH,") { - if (/(.{43})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)\s+(\d+)\.?(\d*)/) { - if ($7 ne '' && $9 ne '') { - printf("%s%-9o%-9o%-9o%-9o%-3o.%-5o%-3o.%o\n",$1,$2,$3,$4,$5,$6,$7,$8,$9); - } - elsif ($7 ne '') { - printf("%s%-9o%-9o%-9o%-9o%-3o.%-5o%o\n",$1,$2,$3,$4,$5,$6,$7,$8); - } - else { - printf("%s%-9o%-9o%-9o%-9o%-9o%o\n",$1,$2,$3,$4,$5,$6,$8); - } - } - } + open(FH,") { + if (/(.{43})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)\s+(\d+)\.?(\d*)/) { + if ($7 ne '' && $9 ne '') { + printf("%s%-9o%-9o%-9o%-9o%-3o.%-5o%-3o.%o\n", + $1,$2,$3,$4,$5,$6,$7,$8,$9); + } + elsif ($7 ne '') { + printf("%s%-9o%-9o%-9o%-9o%-3o.%-5o%o\n", + $1,$2,$3,$4,$5,$6,$7,$8); + } + else { + printf("%s%-9o%-9o%-9o%-9o%-9o%o\n",$1,$2,$3,$4,$5,$6,$8); + } + } + } If you would rather see this table listing hexadecimal values then run the table through: @@ -300,20 +302,22 @@ Or, in order to retain the UTF-x code points in hexadecimal: =back - open(FH,") { - if (/(.{43})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)\s+(\d+)\.?(\d*)/) { - if ($7 ne '' && $9 ne '') { - printf("%s%-9X%-9X%-9X%-9X%-2X.%-6X%-2X.%X\n",$1,$2,$3,$4,$5,$6,$7,$8,$9); - } - elsif ($7 ne '') { - printf("%s%-9X%-9X%-9X%-9X%-2X.%-6X%X\n",$1,$2,$3,$4,$5,$6,$7,$8); - } - else { - printf("%s%-9X%-9X%-9X%-9X%-9X%X\n",$1,$2,$3,$4,$5,$6,$8); - } - } - } + open(FH,") { + if (/(.{43})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\.?(\d*)\s+(\d+)\.?(\d*)/) { + if ($7 ne '' && $9 ne '') { + printf("%s%-9X%-9X%-9X%-9X%-2X.%-6X%-2X.%X\n", + $1,$2,$3,$4,$5,$6,$7,$8,$9); + } + elsif ($7 ne '') { + printf("%s%-9X%-9X%-9X%-9X%-2X.%-6X%X\n", + $1,$2,$3,$4,$5,$6,$7,$8); + } + else { + printf("%s%-9X%-9X%-9X%-9X%-9X%X\n",$1,$2,$3,$4,$5,$6,$8); + } + } + } ISO 8859-1 CCSID CCSID CCSID 1047 @@ -585,7 +589,7 @@ ASCII + Latin-1 order then run the table through: =back - perl -ne 'if(/.{43}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\ + perl -ne 'if(/.{43}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\ -e '{push(@l,$_)}' \ -e 'END{print map{$_->[0]}' \ -e ' sort{$a->[1] <=> $b->[1]}' \ @@ -600,7 +604,7 @@ If you would rather see it in CCSID 1047 order then change the number =back - perl -ne 'if(/.{43}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\ + perl -ne 'if(/.{43}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\ -e '{push(@l,$_)}' \ -e 'END{print map{$_->[0]}' \ -e ' sort{$a->[1] <=> $b->[1]}' \ @@ -615,7 +619,7 @@ If you would rather see it in POSIX-BC order then change the number =back - perl -ne 'if(/.{43}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\ + perl -ne 'if(/.{43}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\ -e '{push(@l,$_)}' \ -e 'END{print map{$_->[0]}' \ -e ' sort{$a->[1] <=> $b->[1]}' \ @@ -758,8 +762,8 @@ an example adapted from the one in L: An interesting property of the 32 C0 control characters in the ASCII table is that they can "literally" be constructed -as control characters in perl, e.g. C<(chr(0) eq C<\c@>)> -C<(chr(1) eq C<\cA>)>, and so on. Perl on EBCDIC platforms has been +as control characters in perl, e.g. C<(chr(0)> eq C<\c@>)> +C<(chr(1)> eq C<\cA>)>, and so on. Perl on EBCDIC platforms has been ported to take C<\c@> to chr(0) and C<\cA> to chr(1), etc. as well, but the thirty three characters that result depend on which code page you are using. The table below uses the standard acronyms for the controls. @@ -773,7 +777,7 @@ or regex, as it will absorb the terminator. But C<\c\I> is a C concatenated with I for all I. chr ord 8859-1 0037 1047 && POSIX-BC - ------------------------------------------------------------------------ + ----------------------------------------------------------------------- \c? 127 " " \c@ 0 \cA 1 @@ -1032,7 +1036,7 @@ letters compared to the digits. If sorted on an ASCII based platform the two letter abbreviation for a physician comes before the two letter for drive, that is: - @sorted = sort(qw(Dr. dr.)); # @sorted holds ('Dr.','dr.') on ASCII, + @sorted = sort(qw(Dr. dr.)); # @sorted holds ('Dr.','dr.') on ASCII, # but ('dr.','Dr.') on EBCDIC The property of lower case before uppercase letters in EBCDIC is