From: Rafael Garcia-Suarez Date: Thu, 4 Mar 2004 08:03:54 +0000 (+0000) Subject: Clarify the difference between utf8::downgrade/upgrade X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=2c9359a248d51da75ec39822c411d2e97fe5c631;p=p5sagit%2Fp5-mst-13.2.git Clarify the difference between utf8::downgrade/upgrade and utf8::encode/decode (patch by Jarkko). p4raw-id: //depot/perl@22430 --- diff --git a/lib/utf8.pm b/lib/utf8.pm index f5eebe7..ea99dd9 100644 --- a/lib/utf8.pm +++ b/lib/utf8.pm @@ -31,9 +31,11 @@ utf8 - Perl pragma to enable/disable UTF-8 (or UTF-EBCDIC) in source code use utf8; no utf8; + # Convert a Perl scalar to/from UTF-8. $num_octets = utf8::upgrade($string); $success = utf8::downgrade($string[, FAIL_OK]); + # Change the native bytes of a Perl scalar to/from UTF-8 bytes. utf8::encode($string); utf8::decode($string); @@ -133,18 +135,23 @@ pragma. =item * utf8::encode($string) -Converts (in-place) I<$string> from logical characters to octet -sequence representing it in Perl's I encoding. Returns -nothing. Same as Encode::encode_utf8(). Note that this should not be -used to convert a legacy byte encoding to Unicode: use Encode for -that. +Converts in-place the octets of the I<$string> to the octet sequence +in Perl's I encoding. Returns nothing. B to UTF-8>, and that this handles +only ISO 8859-1 (or EBCDIC) as the source character set. Therefore +this should not be used to convert a legacy 8-bit encoding to Unicode: +use Encode::decode() for that. In the very limited case of wanting to +handle just ISO 8859-1 (or EBCDIC), you could use utf8::upgrade(). =item * utf8::decode($string) Attempts to convert I<$string> in-place from Perl's I encoding -into logical characters. Returns nothing. Same as Encode::decode_utf8(). -Note that this should not be used to convert Unicode back to a legacy -byte encoding: use Encode for that. +into octets. Returns nothing. B from UTF-8>, and that this handles only ISO 8859-1 +(or EBCDIC) as the destination character set. Therefore this should +not be used to convert Unicode back to a legacy 8-bit encoding: +use Encode::encode() for that. In the very limited case of wanting +to handle just ISO 8859-1 (or EBCDIC), you could use utf8::downgrade(). =item * $flag = utf8::is_utf8(STRING) diff --git a/pod/perluniintro.pod b/pod/perluniintro.pod index 71d0e57..6a9d4b0 100644 --- a/pod/perluniintro.pod +++ b/pod/perluniintro.pod @@ -299,8 +299,8 @@ If that variable isn't set, the encoding pragma will fail. The C module knows about many encodings and has interfaces for doing conversions between those encodings: - use Encode 'from_to'; - from_to($data, "iso-8859-3", "utf-8"); # from legacy to utf-8 + use Encode 'decode'; + $data = decode("iso-8859-3", $data); # convert from legacy to utf-8 =head2 Unicode I/O