From: Jarkko Hietaniemi Date: Fri, 7 Dec 2001 21:57:30 +0000 (+0000) Subject: perluniintro tweaks. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=1ecefa54ea1334df5ae2e4a9a02bc2fae515a3f6;p=p5sagit%2Fp5-mst-13.2.git perluniintro tweaks. p4raw-id: //depot/perl@13523 --- diff --git a/pod/perluniintro.pod b/pod/perluniintro.pod index 7714d13..41d27ca 100644 --- a/pod/perluniintro.pod +++ b/pod/perluniintro.pod @@ -274,8 +274,8 @@ the C pragma; see L: With the C pragma you can use the C<:locale> discipline - $ENV{LANG} = 'ru_RU.KOI8-R'; - # the :locale will probe the locale environment variables like LANG + $ENV{LC_ALL} = $ENV{LANG} = 'ru_RU.KOI8-R'; + # the :locale will probe the locale environment variables like LC_ALL use open OUT => ':locale'; # russki parusski open(O, ">koi8"); print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 @@ -303,10 +303,10 @@ streams, use explicit disciplines directly in the C call. You can switch encodings on an already opened stream by using C, see L. -The C<:locale> does not currently work with C and -C, only with the C pragma. The C<:utf8> and -C<:encoding(...)> do work with all of C, C, -and the C pragma. +The C<:locale> does not currently (as of Perl 5.8.0) work with +C and C, only with the C pragma. The +C<:utf8> and C<:encoding(...)> do work with all of C, +C, and the C pragma. Similarly, you may use these I/O disciplines on input streams to automatically convert data from the specified encoding when it is @@ -345,6 +345,27 @@ If you run this code twice, the contents of the F will be twice UTF-8 encoded. A C would have avoided the bug, or explicitly opening also the F for input as UTF-8. +=head2 Displaying Unicode As Text + +Sometimes you might want to display Perl scalars containing Unicode as +simple ASCII (or EBCDIC) text. The following subroutine will convert +its argument so that Unicode characters with code points greater than +255 are displayed as "\x{...}", control characters (like "\n") are +displayed as "\x..", and the rest of the characters as themselves. + +sub nice_string { + join("", + map { $_ > 255 ? # if wide character... + sprintf("\\x{%x}", $_) : # \x{...} + chr($_) =~ /[[:cntrl:]]/ ? # else if control character ... + sprintf("\\x%02x", $_) : # \x.. + chr($_) } # else as themselves + unpack("U*", $_[0])); # unpack Unicode characters +} + +For example, C will return +C<"foo\x{100}bar\x0a">. + =head2 Special Cases =over 4