Upgrade to Encode 1.57, from Dan Kogai.
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.pm
CommitLineData
2c674647 1package Encode;
51ef4e11 2use strict;
011b2d2f 3our $VERSION = do { my @r = (q$Revision: 1.57 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5129552c 4our $DEBUG = 0;
6d1c0808 5use XSLoader ();
6XSLoader::load 'Encode';
2c674647 7
2c674647 8require Exporter;
6d1c0808 9our @ISA = qw(Exporter);
2c674647 10
4411f3b6 11# Public, encouraged API is exported by default
85982a32 12
13our @EXPORT = qw(
14 decode decode_utf8 encode encode_utf8
15 encodings find_encoding
4411f3b6 16);
17
85982a32 18our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC PERLQQ);
19our @FB_CONSTS = qw(FB_DEFAULT FB_QUIET FB_WARN FB_PERLQQ FB_CROAK);
20
51ef4e11 21our @EXPORT_OK =
6d1c0808 22 (
85982a32 23 qw(
24 _utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
25 is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
26 ),
27 @FB_FLAGS, @FB_CONSTS,
28 );
29
6d1c0808 30our %EXPORT_TAGS =
85982a32 31 (
32 all => [ @EXPORT, @EXPORT_OK ],
33 fallbacks => [ @FB_CONSTS ],
34 fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
35 );
36
4411f3b6 37# Documentation moved after __END__ for speed - NI-S
2c674647 38
bf230f3d 39use Carp;
40
a63c962f 41our $ON_EBCDIC = (ord("A") == 193);
f2a2953c 42
5d030b67 43use Encode::Alias;
44
5129552c 45# Make a %Encoding package variable to allow a certain amount of cheating
46our %Encoding;
aae85ceb 47our %ExtModule;
48require Encode::Config;
49eval { require Encode::ConfigLocal };
5129552c 50
656753f8 51sub encodings
52{
5129552c 53 my $class = shift;
071db25d 54 my @modules = (@_ and $_[0] eq ":all") ? values %ExtModule : @_;
c731e18e 55 for my $mod (@modules){
56 $mod =~ s,::,/,g or $mod = "Encode/$mod";
6d1c0808 57 $mod .= '.pm';
c731e18e 58 $DEBUG and warn "about to require $mod;";
59 eval { require $mod; };
5129552c 60 }
c731e18e 61 my %modules = map {$_ => 1} @modules;
5129552c 62 return
ce912cd4 63 sort { lc $a cmp lc $b }
64 grep {!/^(?:Internal|Unicode)$/o} keys %Encoding;
51ef4e11 65}
66
85982a32 67sub perlio_ok{
0ab8f81e 68 my $obj = ref($_[0]) ? $_[0] : find_encoding($_[0]);
011b2d2f 69 $obj->can("perlio_ok") and return $obj->perlio_ok();
0ab8f81e 70 return 0; # safety net
85982a32 71}
72
51ef4e11 73sub define_encoding
74{
18586f54 75 my $obj = shift;
76 my $name = shift;
5129552c 77 $Encoding{$name} = $obj;
18586f54 78 my $lc = lc($name);
79 define_alias($lc => $obj) unless $lc eq $name;
80 while (@_)
81 {
82 my $alias = shift;
83 define_alias($alias,$obj);
84 }
85 return $obj;
656753f8 86}
87
656753f8 88sub getEncoding
89{
dd9703c9 90 my ($class,$name,$skip_external) = @_;
18586f54 91 my $enc;
92 if (ref($name) && $name->can('new_sequence'))
93 {
94 return $name;
95 }
96 my $lc = lc $name;
5129552c 97 if (exists $Encoding{$name})
18586f54 98 {
5129552c 99 return $Encoding{$name};
18586f54 100 }
5129552c 101 if (exists $Encoding{$lc})
18586f54 102 {
5129552c 103 return $Encoding{$lc};
18586f54 104 }
c50d192e 105
5129552c 106 my $oc = $class->find_alias($name);
c50d192e 107 return $oc if defined $oc;
108
5129552c 109 $oc = $class->find_alias($lc) if $lc ne $name;
c50d192e 110 return $oc if defined $oc;
111
c731e18e 112 unless ($skip_external)
d1ed7747 113 {
c731e18e 114 if (my $mod = $ExtModule{$name} || $ExtModule{$lc}){
115 $mod =~ s,::,/,g ; $mod .= '.pm';
116 eval{ require $mod; };
117 return $Encoding{$name} if exists $Encoding{$name};
118 }
d1ed7747 119 }
18586f54 120 return;
656753f8 121}
122
4411f3b6 123sub find_encoding
124{
dd9703c9 125 my ($name,$skip_external) = @_;
126 return __PACKAGE__->getEncoding($name,$skip_external);
4411f3b6 127}
128
fcb875d4 129sub resolve_alias {
130 my $obj = find_encoding(shift);
131 defined $obj and return $obj->name;
132 return;
133}
134
b2704119 135sub encode($$;$)
4411f3b6 136{
18586f54 137 my ($name,$string,$check) = @_;
b2704119 138 $check ||=0;
18586f54 139 my $enc = find_encoding($name);
140 croak("Unknown encoding '$name'") unless defined $enc;
141 my $octets = $enc->encode($string,$check);
142 return undef if ($check && length($string));
143 return $octets;
4411f3b6 144}
145
b2704119 146sub decode($$;$)
4411f3b6 147{
18586f54 148 my ($name,$octets,$check) = @_;
b2704119 149 $check ||=0;
18586f54 150 my $enc = find_encoding($name);
151 croak("Unknown encoding '$name'") unless defined $enc;
152 my $string = $enc->decode($octets,$check);
153 $_[1] = $octets if $check;
154 return $string;
4411f3b6 155}
156
b2704119 157sub from_to($$$;$)
4411f3b6 158{
18586f54 159 my ($string,$from,$to,$check) = @_;
b2704119 160 $check ||=0;
18586f54 161 my $f = find_encoding($from);
162 croak("Unknown encoding '$from'") unless defined $f;
163 my $t = find_encoding($to);
164 croak("Unknown encoding '$to'") unless defined $t;
165 my $uni = $f->decode($string,$check);
166 return undef if ($check && length($string));
a999c27c 167 $string = $t->encode($uni,$check);
18586f54 168 return undef if ($check && length($uni));
3ef515df 169 return defined($_[0] = $string) ? length($string) : undef ;
4411f3b6 170}
171
b2704119 172sub encode_utf8($)
4411f3b6 173{
18586f54 174 my ($str) = @_;
c731e18e 175 utf8::encode($str);
18586f54 176 return $str;
4411f3b6 177}
178
b2704119 179sub decode_utf8($)
4411f3b6 180{
18586f54 181 my ($str) = @_;
182 return undef unless utf8::decode($str);
183 return $str;
5ad8ef52 184}
185
f2a2953c 186predefine_encodings();
187
188#
189# This is to restore %Encoding if really needed;
190#
191sub predefine_encodings{
6d1c0808 192 if ($ON_EBCDIC) {
f2a2953c 193 # was in Encode::UTF_EBCDIC
194 package Encode::UTF_EBCDIC;
195 *name = sub{ shift->{'Name'} };
196 *new_sequence = sub{ return $_[0] };
197 *decode = sub{
198 my ($obj,$str,$chk) = @_;
199 my $res = '';
200 for (my $i = 0; $i < length($str); $i++) {
6d1c0808 201 $res .=
f2a2953c 202 chr(utf8::unicode_to_native(ord(substr($str,$i,1))));
203 }
204 $_[1] = '' if $chk;
205 return $res;
206 };
207 *encode = sub{
208 my ($obj,$str,$chk) = @_;
209 my $res = '';
210 for (my $i = 0; $i < length($str); $i++) {
6d1c0808 211 $res .=
f2a2953c 212 chr(utf8::native_to_unicode(ord(substr($str,$i,1))));
213 }
214 $_[1] = '' if $chk;
215 return $res;
216 };
6d1c0808 217 $Encode::Encoding{Unicode} =
c731e18e 218 bless {Name => "UTF_EBCDIC"} => "Encode::UTF_EBCDIC";
6d1c0808 219 } else {
f2a2953c 220 # was in Encode::UTF_EBCDIC
221 package Encode::Internal;
222 *name = sub{ shift->{'Name'} };
223 *new_sequence = sub{ return $_[0] };
224 *decode = sub{
225 my ($obj,$str,$chk) = @_;
226 utf8::upgrade($str);
227 $_[1] = '' if $chk;
228 return $str;
229 };
230 *encode = \&decode;
6d1c0808 231 $Encode::Encoding{Unicode} =
c731e18e 232 bless {Name => "Internal"} => "Encode::Internal";
f2a2953c 233 }
234
235 {
236 # was in Encode::utf8
237 package Encode::utf8;
238 *name = sub{ shift->{'Name'} };
239 *new_sequence = sub{ return $_[0] };
240 *decode = sub{
241 my ($obj,$octets,$chk) = @_;
242 my $str = Encode::decode_utf8($octets);
243 if (defined $str) {
244 $_[1] = '' if $chk;
245 return $str;
246 }
247 return undef;
248 };
249 *encode = sub {
250 my ($obj,$string,$chk) = @_;
251 my $octets = Encode::encode_utf8($string);
252 $_[1] = '' if $chk;
253 return $octets;
254 };
0ab8f81e 255 $Encode::Encoding{utf8} =
c731e18e 256 bless {Name => "utf8"} => "Encode::utf8";
f2a2953c 257 }
f2a2953c 258}
259
656753f8 2601;
261
2a936312 262__END__
263
4411f3b6 264=head1 NAME
265
266Encode - character encodings
267
268=head1 SYNOPSIS
269
270 use Encode;
271
67d7b5ef 272=head2 Table of Contents
273
0ab8f81e 274Encode consists of a collection of modules whose details are too big
67d7b5ef 275to fit in one document. This POD itself explains the top-level APIs
6d1c0808 276and general topics at a glance. For other topics and more details,
0ab8f81e 277see the PODs below:
67d7b5ef 278
279 Name Description
280 --------------------------------------------------------
6d1c0808 281 Encode::Alias Alias definitions to encodings
67d7b5ef 282 Encode::Encoding Encode Implementation Base Class
283 Encode::Supported List of Supported Encodings
284 Encode::CN Simplified Chinese Encodings
285 Encode::JP Japanese Encodings
286 Encode::KR Korean Encodings
287 Encode::TW Traditional Chinese Encodings
288 --------------------------------------------------------
289
4411f3b6 290=head1 DESCRIPTION
291
47bfe92f 292The C<Encode> module provides the interfaces between Perl's strings
67d7b5ef 293and the rest of the system. Perl strings are sequences of
294B<characters>.
295
296The repertoire of characters that Perl can represent is at least that
297defined by the Unicode Consortium. On most platforms the ordinal
298values of the characters (as returned by C<ord(ch)>) is the "Unicode
299codepoint" for the character (the exceptions are those platforms where
300the legacy encoding is some variant of EBCDIC rather than a super-set
301of ASCII - see L<perlebcdic>).
302
0ab8f81e 303Traditionally, computer data has been moved around in 8-bit chunks
67d7b5ef 304often called "bytes". These chunks are also known as "octets" in
305networking standards. Perl is widely used to manipulate data of many
306types - not only strings of characters representing human or computer
0ab8f81e 307languages but also "binary" data being the machine's representation of
67d7b5ef 308numbers, pixels in an image - or just about anything.
309
0ab8f81e 310When Perl is processing "binary data", the programmer wants Perl to
67d7b5ef 311process "sequences of bytes". This is not a problem for Perl - as a
0ab8f81e 312byte has 256 possible values, it easily fits in Perl's much larger
67d7b5ef 313"logical character".
314
315=head2 TERMINOLOGY
4411f3b6 316
67d7b5ef 317=over 4
21938dfa 318
67d7b5ef 319=item *
320
321I<character>: a character in the range 0..(2**32-1) (or more).
322(What Perl's strings are made of.)
323
324=item *
325
326I<byte>: a character in the range 0..255
327(A special case of a Perl character.)
328
329=item *
330
331I<octet>: 8 bits of data, with ordinal values 0..255
0ab8f81e 332(Term for bytes passed to or from a non-Perl context, e.g. a disk file.)
67d7b5ef 333
334=back
4411f3b6 335
67d7b5ef 336The marker [INTERNAL] marks Internal Implementation Details, in
337general meant only for those who think they know what they are doing,
338and such details may change in future releases.
339
340=head1 PERL ENCODING API
4411f3b6 341
342=over 4
343
f2a2953c 344=item $octets = encode(ENCODING, $string[, CHECK])
4411f3b6 345
0ab8f81e 346Encodes a string from Perl's internal form into I<ENCODING> and returns
67d7b5ef 347a sequence of octets. ENCODING can be either a canonical name or
0ab8f81e 348an alias. For encoding names and aliases, see L</"Defining Aliases">.
349For CHECK, see L</"Handling Malformed Data">.
4411f3b6 350
0ab8f81e 351For example, to convert (internally UTF-8 encoded) Unicode string to
6d1c0808 352iso-8859-1 (also known as Latin1),
681a7c68 353
67d7b5ef 354 $octets = encode("iso-8859-1", $unicode);
681a7c68 355
f2a2953c 356=item $string = decode(ENCODING, $octets[, CHECK])
4411f3b6 357
0ab8f81e 358Decodes a sequence of octets assumed to be in I<ENCODING> into Perl's
359internal form and returns the resulting string. As in encode(),
360ENCODING can be either a canonical name or an alias. For encoding names
361and aliases, see L</"Defining Aliases">. For CHECK, see
47bfe92f 362L</"Handling Malformed Data">.
363
0ab8f81e 364For example, to convert ISO-8859-1 data to UTF-8:
681a7c68 365
67d7b5ef 366 $utf8 = decode("iso-8859-1", $latin1);
681a7c68 367
f2a2953c 368=item [$length =] from_to($string, FROM_ENCODING, TO_ENCODING [,CHECK])
47bfe92f 369
0ab8f81e 370Converts B<in-place> data between two encodings.
371For example, to convert ISO-8859-1 data to UTF-8:
2b106fbe 372
373 from_to($data, "iso-8859-1", "utf-8");
374
375and to convert it back:
376
377 from_to($data, "utf-8", "iso-8859-1");
4411f3b6 378
ab97ca19 379Note that because the conversion happens in place, the data to be
0ab8f81e 380converted cannot be a string constant; it must be a scalar variable.
ab97ca19 381
0ab8f81e 382from_to() returns the length of the converted string on success, undef
3ef515df 383otherwise.
384
4411f3b6 385=back
386
f2a2953c 387=head2 UTF-8 / utf8
388
0ab8f81e 389The Unicode Consortium defines the UTF-8 transformation format as a
390way of encoding the entire Unicode repertoire as sequences of octets.
391This encoding is expected to become very widespread. Perl can use this
392form internally to represent strings, so conversions to and from this
393form are particularly efficient (as octets in memory do not have to
394change, just the meta-data that tells Perl how to treat them).
f2a2953c 395
396=over 4
397
398=item $octets = encode_utf8($string);
399
0ab8f81e 400The characters that comprise $string are encoded in Perl's superset of
401UTF-8 and the resulting octets are returned as a sequence of bytes. All
402possible characters have a UTF-8 representation so this function cannot
403fail.
f2a2953c 404
405=item $string = decode_utf8($octets [, CHECK]);
406
407The sequence of octets represented by $octets is decoded from UTF-8
408into a sequence of logical characters. Not all sequences of octets
409form valid UTF-8 encodings, so it is possible for this call to fail.
0ab8f81e 410For CHECK, see L</"Handling Malformed Data">.
f2a2953c 411
412=back
413
51ef4e11 414=head2 Listing available encodings
415
5129552c 416 use Encode;
417 @list = Encode->encodings();
418
419Returns a list of the canonical names of the available encodings that
420are loaded. To get a list of all available encodings including the
421ones that are not loaded yet, say
422
423 @all_encodings = Encode->encodings(":all");
424
0ab8f81e 425Or you can give the name of a specific module.
5129552c 426
c731e18e 427 @with_jp = Encode->encodings("Encode::JP");
428
429When "::" is not in the name, "Encode::" is assumed.
51ef4e11 430
c731e18e 431 @ebcdic = Encode->encodings("EBCDIC");
5d030b67 432
0ab8f81e 433To find out in detail which encodings are supported by this package,
5d030b67 434see L<Encode::Supported>.
51ef4e11 435
436=head2 Defining Aliases
437
0ab8f81e 438To add a new alias to a given encoding, use:
67d7b5ef 439
5129552c 440 use Encode;
441 use Encode::Alias;
a63c962f 442 define_alias(newName => ENCODING);
51ef4e11 443
3ef515df 444After that, newName can be used as an alias for ENCODING.
f2a2953c 445ENCODING may be either the name of an encoding or an
446I<encoding object>
51ef4e11 447
fcb875d4 448But before you do so, make sure the alias is nonexistent with
449C<resolve_alias()>, which returns the canonical name thereof.
450i.e.
451
452 Encode::resolve_alias("latin1") eq "iso-8859-1" # true
453 Encode::resolve_alias("iso-8859-12") # false; nonexistent
454 Encode::resolve_alias($name) eq $name # true if $name is canonical
455
0ab8f81e 456resolve_alias() does not need C<use Encode::Alias>; it can be
457exported via C<use Encode qw(resolve_alias)>.
fcb875d4 458
0ab8f81e 459See L<Encode::Alias> for details.
51ef4e11 460
85982a32 461=head1 Encoding via PerlIO
4411f3b6 462
0ab8f81e 463If your perl supports I<PerlIO>, you can use a PerlIO layer to decode
464and encode directly via a filehandle. The following two examples
465are totally identical in their functionality.
4411f3b6 466
85982a32 467 # via PerlIO
468 open my $in, "<:encoding(shiftjis)", $infile or die;
469 open my $out, ">:encoding(euc-jp)", $outfile or die;
470 while(<>){ print; }
8e86646e 471
85982a32 472 # via from_to
0ab8f81e 473 open my $in, "<", $infile or die;
474 open my $out, ">", $outfile or die;
6d1c0808 475 while(<>){
0ab8f81e 476 from_to($_, "shiftjis", "euc-jp", 1);
85982a32 477 }
4411f3b6 478
0ab8f81e 479Unfortunately, there may be encodings are PerlIO-savvy. You can check
480if your encoding is supported by PerlIO by calling the C<perlio_ok>
481method.
482
483 Encode::perlio_ok("hz"); # False
484 find_encoding("euc-cn")->perlio_ok; # True where PerlIO is available
485
486 use Encode qw(perlio_ok); # exported upon request
487 perlio_ok("euc-jp")
4411f3b6 488
0ab8f81e 489Fortunately, all encodings that come with Encode core are PerlIO-savvy
490except for hz and ISO-2022-kr. See L<Encode::Encoding> for details.
4411f3b6 491
0ab8f81e 492For gory details, see L<Encode::PerlIO>.
4411f3b6 493
85982a32 494=head1 Handling Malformed Data
4411f3b6 495
85982a32 496=over 4
47bfe92f 497
0ab8f81e 498The I<CHECK> argument is used as follows. When you omit it,
499the behaviour is the same as if you had passed a value of 0 for
500I<CHECK>.
47bfe92f 501
85982a32 502=item I<CHECK> = Encode::FB_DEFAULT ( == 0)
47bfe92f 503
0ab8f81e 504If I<CHECK> is 0, (en|de)code will put a I<substitution character>
505in place of a malformed character. For UCM-based encodings,
506E<lt>subcharE<gt> will be used. For Unicode, "\x{FFFD}" is used.
507If the data is supposed to be UTF-8, an optional lexical warning
508(category utf8) is given.
e9692b5b 509
85982a32 510=item I<CHECK> = Encode::DIE_ON_ERROR (== 1)
e9692b5b 511
0ab8f81e 512If I<CHECK> is 1, methods will die immediately with an error
513message. Therefore, when I<CHECK> is set to 1, you should trap the
514fatal error with eval{} unless you really want to let it die on error.
47bfe92f 515
85982a32 516=item I<CHECK> = Encode::FB_QUIET
47bfe92f 517
85982a32 518If I<CHECK> is set to Encode::FB_QUIET, (en|de)code will immediately
0ab8f81e 519return the portion of the data that has been processed so far when
520an error occurs. The data argument will be overwritten with
521everything after that point (that is, the unprocessed part of data).
522This is handy when you have to call decode repeatedly in the case
523where your source data may contain partial multi-byte character
524sequences, for example because you are reading with a fixed-width
525buffer. Here is some sample code that does exactly this:
4411f3b6 526
85982a32 527 my $data = '';
528 while(defined(read $fh, $buffer, 256)){
0ab8f81e 529 # buffer may end in a partial character so we append
85982a32 530 $data .= $buffer;
531 $utf8 .= decode($encoding, $data, ENCODE::FB_QUIET);
0ab8f81e 532 # $data now contains the unprocessed partial character
85982a32 533 }
1768d7eb 534
85982a32 535=item I<CHECK> = Encode::FB_WARN
67d7b5ef 536
0ab8f81e 537This is the same as above, except that it warns on error. Handy when
538you are debugging the mode above.
85982a32 539
540=item perlqq mode (I<CHECK> = Encode::FB_PERLQQ)
541
542For encodings that are implemented by Encode::XS, CHECK ==
543Encode::FB_PERLQQ turns (en|de)code into C<perlqq> fallback mode.
544
0ab8f81e 545When you decode, '\xI<XX>' will be inserted for a malformed character,
546where I<XX> is the hex representation of the octet that could not be
547decoded to utf8. And when you encode, '\x{I<xxxx>}' will be inserted,
548where I<xxxx> is the Unicode ID of the character that cannot be found
549in the character repertoire of the encoding.
85982a32 550
551=item The bitmask
552
0ab8f81e 553These modes are actually set via a bitmask. Here is how the FB_XX
554constants are laid out. You can import the FB_XX constants via
555C<use Encode qw(:fallbacks)>; you can import the generic bitmask
556constants via C<use Encode qw(:fallback_all)>.
85982a32 557
b0b300a3 558 FB_DEFAULT FB_CROAK FB_QUIET FB_WARN FB_PERLQQ
559 DIE_ON_ERR 0x0001 X
560 WARN_ON_ER 0x0002 X
561 RETURN_ON_ERR 0x0004 X X
562 LEAVE_SRC 0x0008
563 PERLQQ 0x0100 X
67d7b5ef 564
0ab8f81e 565=head2 Unimplemented fallback schemes
67d7b5ef 566
0ab8f81e 567In the future, you will be able to use a code reference to a callback
f2a2953c 568function for the value of I<CHECK> but its API is still undecided.
67d7b5ef 569
570=head1 Defining Encodings
571
572To define a new encoding, use:
573
574 use Encode qw(define_alias);
575 define_encoding($object, 'canonicalName' [, alias...]);
576
577I<canonicalName> will be associated with I<$object>. The object
0ab8f81e 578should provide the interface described in L<Encode::Encoding>.
67d7b5ef 579If more than two arguments are provided then additional
0ab8f81e 580arguments are taken as aliases for I<$object>, as for C<define_alias>.
67d7b5ef 581
f2a2953c 582See L<Encode::Encoding> for more details.
583
4411f3b6 584=head1 Messing with Perl's Internals
585
47bfe92f 586The following API uses parts of Perl's internals in the current
0ab8f81e 587implementation. As such, they are efficient but may change.
4411f3b6 588
589=over 4
590
a63c962f 591=item is_utf8(STRING [, CHECK])
4411f3b6 592
0ab8f81e 593[INTERNAL] Tests whether the UTF-8 flag is turned on in the STRING.
47bfe92f 594If CHECK is true, also checks the data in STRING for being well-formed
595UTF-8. Returns true if successful, false otherwise.
4411f3b6 596
a63c962f 597=item _utf8_on(STRING)
4411f3b6 598
0ab8f81e 599[INTERNAL] Turns on the UTF-8 flag in STRING. The data in STRING is
4411f3b6 600B<not> checked for being well-formed UTF-8. Do not use unless you
601B<know> that the STRING is well-formed UTF-8. Returns the previous
0ab8f81e 602state of the UTF-8 flag (so please don't treat the return value as
603indicating success or failure), or C<undef> if STRING is not a string.
4411f3b6 604
a63c962f 605=item _utf8_off(STRING)
4411f3b6 606
0ab8f81e 607[INTERNAL] Turns off the UTF-8 flag in STRING. Do not use frivolously.
608Returns the previous state of the UTF-8 flag (so please don't treat the
609return value as indicating success or failure), or C<undef> if STRING is
4411f3b6 610not a string.
611
612=back
613
614=head1 SEE ALSO
615
5d030b67 616L<Encode::Encoding>,
617L<Encode::Supported>,
6d1c0808 618L<Encode::PerlIO>,
5d030b67 619L<encoding>,
6d1c0808 620L<perlebcdic>,
621L<perlfunc/open>,
622L<perlunicode>,
623L<utf8>,
5d030b67 624the Perl Unicode Mailing List E<lt>perl-unicode@perl.orgE<gt>
4411f3b6 625
85982a32 626=head1 MAINTAINER
aae85ceb 627
628This project was originated by Nick Ing-Simmons and later maintained
0ab8f81e 629by Dan Kogai E<lt>dankogai@dan.co.jpE<gt>. See AUTHORS for a full list
aae85ceb 630of people involved. For any questions, use
631E<lt>perl-unicode@perl.orgE<gt> so others can share.
632
4411f3b6 633=cut