Extra guidance for JAPH debuggers.
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.pm
CommitLineData
2c674647 1package Encode;
51ef4e11 2use strict;
af1f55d9 3our $VERSION = do { my @r = (q$Revision: 1.60 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5129552c 4our $DEBUG = 0;
6d1c0808 5use XSLoader ();
6XSLoader::load 'Encode';
2c674647 7
2c674647 8require Exporter;
6d1c0808 9our @ISA = qw(Exporter);
2c674647 10
4411f3b6 11# Public, encouraged API is exported by default
85982a32 12
13our @EXPORT = qw(
14 decode decode_utf8 encode encode_utf8
15 encodings find_encoding
4411f3b6 16);
17
af1f55d9 18our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
19 PERLQQ HTMLCREF XMLCREF);
20our @FB_CONSTS = qw(FB_DEFAULT FB_CROAK FB_QUIET FB_WARN
21 FB_PERLQQ FB_HTMLCREF FB_XMLCREF);
85982a32 22
51ef4e11 23our @EXPORT_OK =
6d1c0808 24 (
85982a32 25 qw(
26 _utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
27 is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
28 ),
29 @FB_FLAGS, @FB_CONSTS,
30 );
31
6d1c0808 32our %EXPORT_TAGS =
85982a32 33 (
34 all => [ @EXPORT, @EXPORT_OK ],
35 fallbacks => [ @FB_CONSTS ],
36 fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
37 );
38
4411f3b6 39# Documentation moved after __END__ for speed - NI-S
2c674647 40
bf230f3d 41use Carp;
42
a63c962f 43our $ON_EBCDIC = (ord("A") == 193);
f2a2953c 44
5d030b67 45use Encode::Alias;
46
5129552c 47# Make a %Encoding package variable to allow a certain amount of cheating
48our %Encoding;
aae85ceb 49our %ExtModule;
50require Encode::Config;
51eval { require Encode::ConfigLocal };
5129552c 52
656753f8 53sub encodings
54{
5129552c 55 my $class = shift;
071db25d 56 my @modules = (@_ and $_[0] eq ":all") ? values %ExtModule : @_;
c731e18e 57 for my $mod (@modules){
58 $mod =~ s,::,/,g or $mod = "Encode/$mod";
6d1c0808 59 $mod .= '.pm';
c731e18e 60 $DEBUG and warn "about to require $mod;";
61 eval { require $mod; };
5129552c 62 }
c731e18e 63 my %modules = map {$_ => 1} @modules;
5129552c 64 return
ce912cd4 65 sort { lc $a cmp lc $b }
66 grep {!/^(?:Internal|Unicode)$/o} keys %Encoding;
51ef4e11 67}
68
85982a32 69sub perlio_ok{
0ab8f81e 70 my $obj = ref($_[0]) ? $_[0] : find_encoding($_[0]);
011b2d2f 71 $obj->can("perlio_ok") and return $obj->perlio_ok();
0ab8f81e 72 return 0; # safety net
85982a32 73}
74
51ef4e11 75sub define_encoding
76{
18586f54 77 my $obj = shift;
78 my $name = shift;
5129552c 79 $Encoding{$name} = $obj;
18586f54 80 my $lc = lc($name);
81 define_alias($lc => $obj) unless $lc eq $name;
82 while (@_)
83 {
84 my $alias = shift;
85 define_alias($alias,$obj);
86 }
87 return $obj;
656753f8 88}
89
656753f8 90sub getEncoding
91{
dd9703c9 92 my ($class,$name,$skip_external) = @_;
18586f54 93 my $enc;
94 if (ref($name) && $name->can('new_sequence'))
95 {
96 return $name;
97 }
98 my $lc = lc $name;
5129552c 99 if (exists $Encoding{$name})
18586f54 100 {
5129552c 101 return $Encoding{$name};
18586f54 102 }
5129552c 103 if (exists $Encoding{$lc})
18586f54 104 {
5129552c 105 return $Encoding{$lc};
18586f54 106 }
c50d192e 107
5129552c 108 my $oc = $class->find_alias($name);
c50d192e 109 return $oc if defined $oc;
110
5129552c 111 $oc = $class->find_alias($lc) if $lc ne $name;
c50d192e 112 return $oc if defined $oc;
113
c731e18e 114 unless ($skip_external)
d1ed7747 115 {
c731e18e 116 if (my $mod = $ExtModule{$name} || $ExtModule{$lc}){
117 $mod =~ s,::,/,g ; $mod .= '.pm';
118 eval{ require $mod; };
119 return $Encoding{$name} if exists $Encoding{$name};
120 }
d1ed7747 121 }
18586f54 122 return;
656753f8 123}
124
4411f3b6 125sub find_encoding
126{
dd9703c9 127 my ($name,$skip_external) = @_;
128 return __PACKAGE__->getEncoding($name,$skip_external);
4411f3b6 129}
130
fcb875d4 131sub resolve_alias {
132 my $obj = find_encoding(shift);
133 defined $obj and return $obj->name;
134 return;
135}
136
b2704119 137sub encode($$;$)
4411f3b6 138{
18586f54 139 my ($name,$string,$check) = @_;
b2704119 140 $check ||=0;
18586f54 141 my $enc = find_encoding($name);
142 croak("Unknown encoding '$name'") unless defined $enc;
143 my $octets = $enc->encode($string,$check);
144 return undef if ($check && length($string));
145 return $octets;
4411f3b6 146}
147
b2704119 148sub decode($$;$)
4411f3b6 149{
18586f54 150 my ($name,$octets,$check) = @_;
b2704119 151 $check ||=0;
18586f54 152 my $enc = find_encoding($name);
153 croak("Unknown encoding '$name'") unless defined $enc;
154 my $string = $enc->decode($octets,$check);
155 $_[1] = $octets if $check;
156 return $string;
4411f3b6 157}
158
b2704119 159sub from_to($$$;$)
4411f3b6 160{
18586f54 161 my ($string,$from,$to,$check) = @_;
b2704119 162 $check ||=0;
18586f54 163 my $f = find_encoding($from);
164 croak("Unknown encoding '$from'") unless defined $f;
165 my $t = find_encoding($to);
166 croak("Unknown encoding '$to'") unless defined $t;
167 my $uni = $f->decode($string,$check);
168 return undef if ($check && length($string));
a999c27c 169 $string = $t->encode($uni,$check);
18586f54 170 return undef if ($check && length($uni));
3ef515df 171 return defined($_[0] = $string) ? length($string) : undef ;
4411f3b6 172}
173
b2704119 174sub encode_utf8($)
4411f3b6 175{
18586f54 176 my ($str) = @_;
c731e18e 177 utf8::encode($str);
18586f54 178 return $str;
4411f3b6 179}
180
b2704119 181sub decode_utf8($)
4411f3b6 182{
18586f54 183 my ($str) = @_;
184 return undef unless utf8::decode($str);
185 return $str;
5ad8ef52 186}
187
f2a2953c 188predefine_encodings();
189
190#
191# This is to restore %Encoding if really needed;
192#
193sub predefine_encodings{
6d1c0808 194 if ($ON_EBCDIC) {
f2a2953c 195 # was in Encode::UTF_EBCDIC
196 package Encode::UTF_EBCDIC;
197 *name = sub{ shift->{'Name'} };
198 *new_sequence = sub{ return $_[0] };
af1f55d9 199 *needs_lines = sub{ 0 };
200 *perlio_ok = sub {
201 eval{ require PerlIO::encoding };
202 return $@ ? 0 : 1;
203 };
f2a2953c 204 *decode = sub{
205 my ($obj,$str,$chk) = @_;
206 my $res = '';
207 for (my $i = 0; $i < length($str); $i++) {
6d1c0808 208 $res .=
f2a2953c 209 chr(utf8::unicode_to_native(ord(substr($str,$i,1))));
210 }
211 $_[1] = '' if $chk;
212 return $res;
213 };
214 *encode = sub{
215 my ($obj,$str,$chk) = @_;
216 my $res = '';
217 for (my $i = 0; $i < length($str); $i++) {
6d1c0808 218 $res .=
f2a2953c 219 chr(utf8::native_to_unicode(ord(substr($str,$i,1))));
220 }
221 $_[1] = '' if $chk;
222 return $res;
223 };
6d1c0808 224 $Encode::Encoding{Unicode} =
c731e18e 225 bless {Name => "UTF_EBCDIC"} => "Encode::UTF_EBCDIC";
6d1c0808 226 } else {
f2a2953c 227 # was in Encode::UTF_EBCDIC
228 package Encode::Internal;
229 *name = sub{ shift->{'Name'} };
230 *new_sequence = sub{ return $_[0] };
af1f55d9 231 *needs_lines = sub{ 0 };
232 *perlio_ok = sub {
233 eval{ require PerlIO::encoding };
234 return $@ ? 0 : 1;
235 };
f2a2953c 236 *decode = sub{
237 my ($obj,$str,$chk) = @_;
238 utf8::upgrade($str);
239 $_[1] = '' if $chk;
240 return $str;
241 };
242 *encode = \&decode;
6d1c0808 243 $Encode::Encoding{Unicode} =
c731e18e 244 bless {Name => "Internal"} => "Encode::Internal";
f2a2953c 245 }
246
247 {
248 # was in Encode::utf8
249 package Encode::utf8;
250 *name = sub{ shift->{'Name'} };
251 *new_sequence = sub{ return $_[0] };
af1f55d9 252 *needs_lines = sub{ 0 };
253 *perlio_ok = sub {
254 eval{ require PerlIO::encoding };
255 return $@ ? 0 : 1;
256 };
f2a2953c 257 *decode = sub{
258 my ($obj,$octets,$chk) = @_;
259 my $str = Encode::decode_utf8($octets);
260 if (defined $str) {
261 $_[1] = '' if $chk;
262 return $str;
263 }
264 return undef;
265 };
266 *encode = sub {
267 my ($obj,$string,$chk) = @_;
268 my $octets = Encode::encode_utf8($string);
269 $_[1] = '' if $chk;
270 return $octets;
271 };
0ab8f81e 272 $Encode::Encoding{utf8} =
c731e18e 273 bless {Name => "utf8"} => "Encode::utf8";
f2a2953c 274 }
f2a2953c 275}
276
656753f8 2771;
278
2a936312 279__END__
280
4411f3b6 281=head1 NAME
282
283Encode - character encodings
284
285=head1 SYNOPSIS
286
287 use Encode;
288
67d7b5ef 289=head2 Table of Contents
290
0ab8f81e 291Encode consists of a collection of modules whose details are too big
67d7b5ef 292to fit in one document. This POD itself explains the top-level APIs
6d1c0808 293and general topics at a glance. For other topics and more details,
0ab8f81e 294see the PODs below:
67d7b5ef 295
296 Name Description
297 --------------------------------------------------------
6d1c0808 298 Encode::Alias Alias definitions to encodings
67d7b5ef 299 Encode::Encoding Encode Implementation Base Class
300 Encode::Supported List of Supported Encodings
301 Encode::CN Simplified Chinese Encodings
302 Encode::JP Japanese Encodings
303 Encode::KR Korean Encodings
304 Encode::TW Traditional Chinese Encodings
305 --------------------------------------------------------
306
4411f3b6 307=head1 DESCRIPTION
308
47bfe92f 309The C<Encode> module provides the interfaces between Perl's strings
67d7b5ef 310and the rest of the system. Perl strings are sequences of
311B<characters>.
312
313The repertoire of characters that Perl can represent is at least that
314defined by the Unicode Consortium. On most platforms the ordinal
315values of the characters (as returned by C<ord(ch)>) is the "Unicode
316codepoint" for the character (the exceptions are those platforms where
317the legacy encoding is some variant of EBCDIC rather than a super-set
318of ASCII - see L<perlebcdic>).
319
0ab8f81e 320Traditionally, computer data has been moved around in 8-bit chunks
67d7b5ef 321often called "bytes". These chunks are also known as "octets" in
322networking standards. Perl is widely used to manipulate data of many
323types - not only strings of characters representing human or computer
0ab8f81e 324languages but also "binary" data being the machine's representation of
67d7b5ef 325numbers, pixels in an image - or just about anything.
326
0ab8f81e 327When Perl is processing "binary data", the programmer wants Perl to
67d7b5ef 328process "sequences of bytes". This is not a problem for Perl - as a
0ab8f81e 329byte has 256 possible values, it easily fits in Perl's much larger
67d7b5ef 330"logical character".
331
332=head2 TERMINOLOGY
4411f3b6 333
67d7b5ef 334=over 4
21938dfa 335
67d7b5ef 336=item *
337
338I<character>: a character in the range 0..(2**32-1) (or more).
339(What Perl's strings are made of.)
340
341=item *
342
343I<byte>: a character in the range 0..255
344(A special case of a Perl character.)
345
346=item *
347
348I<octet>: 8 bits of data, with ordinal values 0..255
0ab8f81e 349(Term for bytes passed to or from a non-Perl context, e.g. a disk file.)
67d7b5ef 350
351=back
4411f3b6 352
67d7b5ef 353The marker [INTERNAL] marks Internal Implementation Details, in
354general meant only for those who think they know what they are doing,
355and such details may change in future releases.
356
357=head1 PERL ENCODING API
4411f3b6 358
359=over 4
360
f2a2953c 361=item $octets = encode(ENCODING, $string[, CHECK])
4411f3b6 362
0ab8f81e 363Encodes a string from Perl's internal form into I<ENCODING> and returns
67d7b5ef 364a sequence of octets. ENCODING can be either a canonical name or
0ab8f81e 365an alias. For encoding names and aliases, see L</"Defining Aliases">.
366For CHECK, see L</"Handling Malformed Data">.
4411f3b6 367
0ab8f81e 368For example, to convert (internally UTF-8 encoded) Unicode string to
6d1c0808 369iso-8859-1 (also known as Latin1),
681a7c68 370
67d7b5ef 371 $octets = encode("iso-8859-1", $unicode);
681a7c68 372
f2a2953c 373=item $string = decode(ENCODING, $octets[, CHECK])
4411f3b6 374
0ab8f81e 375Decodes a sequence of octets assumed to be in I<ENCODING> into Perl's
376internal form and returns the resulting string. As in encode(),
377ENCODING can be either a canonical name or an alias. For encoding names
378and aliases, see L</"Defining Aliases">. For CHECK, see
47bfe92f 379L</"Handling Malformed Data">.
380
0ab8f81e 381For example, to convert ISO-8859-1 data to UTF-8:
681a7c68 382
67d7b5ef 383 $utf8 = decode("iso-8859-1", $latin1);
681a7c68 384
f2a2953c 385=item [$length =] from_to($string, FROM_ENCODING, TO_ENCODING [,CHECK])
47bfe92f 386
0ab8f81e 387Converts B<in-place> data between two encodings.
388For example, to convert ISO-8859-1 data to UTF-8:
2b106fbe 389
390 from_to($data, "iso-8859-1", "utf-8");
391
392and to convert it back:
393
394 from_to($data, "utf-8", "iso-8859-1");
4411f3b6 395
ab97ca19 396Note that because the conversion happens in place, the data to be
0ab8f81e 397converted cannot be a string constant; it must be a scalar variable.
ab97ca19 398
0ab8f81e 399from_to() returns the length of the converted string on success, undef
3ef515df 400otherwise.
401
4411f3b6 402=back
403
f2a2953c 404=head2 UTF-8 / utf8
405
0ab8f81e 406The Unicode Consortium defines the UTF-8 transformation format as a
407way of encoding the entire Unicode repertoire as sequences of octets.
408This encoding is expected to become very widespread. Perl can use this
409form internally to represent strings, so conversions to and from this
410form are particularly efficient (as octets in memory do not have to
411change, just the meta-data that tells Perl how to treat them).
f2a2953c 412
413=over 4
414
415=item $octets = encode_utf8($string);
416
0ab8f81e 417The characters that comprise $string are encoded in Perl's superset of
418UTF-8 and the resulting octets are returned as a sequence of bytes. All
419possible characters have a UTF-8 representation so this function cannot
420fail.
f2a2953c 421
422=item $string = decode_utf8($octets [, CHECK]);
423
424The sequence of octets represented by $octets is decoded from UTF-8
425into a sequence of logical characters. Not all sequences of octets
426form valid UTF-8 encodings, so it is possible for this call to fail.
0ab8f81e 427For CHECK, see L</"Handling Malformed Data">.
f2a2953c 428
429=back
430
51ef4e11 431=head2 Listing available encodings
432
5129552c 433 use Encode;
434 @list = Encode->encodings();
435
436Returns a list of the canonical names of the available encodings that
437are loaded. To get a list of all available encodings including the
438ones that are not loaded yet, say
439
440 @all_encodings = Encode->encodings(":all");
441
0ab8f81e 442Or you can give the name of a specific module.
5129552c 443
c731e18e 444 @with_jp = Encode->encodings("Encode::JP");
445
446When "::" is not in the name, "Encode::" is assumed.
51ef4e11 447
c731e18e 448 @ebcdic = Encode->encodings("EBCDIC");
5d030b67 449
0ab8f81e 450To find out in detail which encodings are supported by this package,
5d030b67 451see L<Encode::Supported>.
51ef4e11 452
453=head2 Defining Aliases
454
0ab8f81e 455To add a new alias to a given encoding, use:
67d7b5ef 456
5129552c 457 use Encode;
458 use Encode::Alias;
a63c962f 459 define_alias(newName => ENCODING);
51ef4e11 460
3ef515df 461After that, newName can be used as an alias for ENCODING.
f2a2953c 462ENCODING may be either the name of an encoding or an
463I<encoding object>
51ef4e11 464
fcb875d4 465But before you do so, make sure the alias is nonexistent with
466C<resolve_alias()>, which returns the canonical name thereof.
467i.e.
468
469 Encode::resolve_alias("latin1") eq "iso-8859-1" # true
470 Encode::resolve_alias("iso-8859-12") # false; nonexistent
471 Encode::resolve_alias($name) eq $name # true if $name is canonical
472
0ab8f81e 473resolve_alias() does not need C<use Encode::Alias>; it can be
474exported via C<use Encode qw(resolve_alias)>.
fcb875d4 475
0ab8f81e 476See L<Encode::Alias> for details.
51ef4e11 477
85982a32 478=head1 Encoding via PerlIO
4411f3b6 479
0ab8f81e 480If your perl supports I<PerlIO>, you can use a PerlIO layer to decode
481and encode directly via a filehandle. The following two examples
482are totally identical in their functionality.
4411f3b6 483
85982a32 484 # via PerlIO
485 open my $in, "<:encoding(shiftjis)", $infile or die;
486 open my $out, ">:encoding(euc-jp)", $outfile or die;
487 while(<>){ print; }
8e86646e 488
85982a32 489 # via from_to
0ab8f81e 490 open my $in, "<", $infile or die;
491 open my $out, ">", $outfile or die;
6d1c0808 492 while(<>){
0ab8f81e 493 from_to($_, "shiftjis", "euc-jp", 1);
85982a32 494 }
4411f3b6 495
0ab8f81e 496Unfortunately, there may be encodings are PerlIO-savvy. You can check
497if your encoding is supported by PerlIO by calling the C<perlio_ok>
498method.
499
500 Encode::perlio_ok("hz"); # False
501 find_encoding("euc-cn")->perlio_ok; # True where PerlIO is available
502
503 use Encode qw(perlio_ok); # exported upon request
504 perlio_ok("euc-jp")
4411f3b6 505
0ab8f81e 506Fortunately, all encodings that come with Encode core are PerlIO-savvy
507except for hz and ISO-2022-kr. See L<Encode::Encoding> for details.
4411f3b6 508
0ab8f81e 509For gory details, see L<Encode::PerlIO>.
4411f3b6 510
85982a32 511=head1 Handling Malformed Data
4411f3b6 512
85982a32 513=over 4
47bfe92f 514
0ab8f81e 515The I<CHECK> argument is used as follows. When you omit it,
516the behaviour is the same as if you had passed a value of 0 for
517I<CHECK>.
47bfe92f 518
85982a32 519=item I<CHECK> = Encode::FB_DEFAULT ( == 0)
47bfe92f 520
0ab8f81e 521If I<CHECK> is 0, (en|de)code will put a I<substitution character>
522in place of a malformed character. For UCM-based encodings,
523E<lt>subcharE<gt> will be used. For Unicode, "\x{FFFD}" is used.
524If the data is supposed to be UTF-8, an optional lexical warning
525(category utf8) is given.
e9692b5b 526
85982a32 527=item I<CHECK> = Encode::DIE_ON_ERROR (== 1)
e9692b5b 528
0ab8f81e 529If I<CHECK> is 1, methods will die immediately with an error
530message. Therefore, when I<CHECK> is set to 1, you should trap the
531fatal error with eval{} unless you really want to let it die on error.
47bfe92f 532
85982a32 533=item I<CHECK> = Encode::FB_QUIET
47bfe92f 534
85982a32 535If I<CHECK> is set to Encode::FB_QUIET, (en|de)code will immediately
0ab8f81e 536return the portion of the data that has been processed so far when
537an error occurs. The data argument will be overwritten with
538everything after that point (that is, the unprocessed part of data).
539This is handy when you have to call decode repeatedly in the case
540where your source data may contain partial multi-byte character
541sequences, for example because you are reading with a fixed-width
542buffer. Here is some sample code that does exactly this:
4411f3b6 543
85982a32 544 my $data = '';
545 while(defined(read $fh, $buffer, 256)){
0ab8f81e 546 # buffer may end in a partial character so we append
85982a32 547 $data .= $buffer;
548 $utf8 .= decode($encoding, $data, ENCODE::FB_QUIET);
0ab8f81e 549 # $data now contains the unprocessed partial character
85982a32 550 }
1768d7eb 551
85982a32 552=item I<CHECK> = Encode::FB_WARN
67d7b5ef 553
0ab8f81e 554This is the same as above, except that it warns on error. Handy when
555you are debugging the mode above.
85982a32 556
557=item perlqq mode (I<CHECK> = Encode::FB_PERLQQ)
558
af1f55d9 559=item HTML charref mode (I<CHECK> = Encode::FB_HTMLCREF)
560
561=item XML charref mode (I<CHECK> = Encode::FB_XMLCREF)
562
85982a32 563For encodings that are implemented by Encode::XS, CHECK ==
564Encode::FB_PERLQQ turns (en|de)code into C<perlqq> fallback mode.
565
0ab8f81e 566When you decode, '\xI<XX>' will be inserted for a malformed character,
567where I<XX> is the hex representation of the octet that could not be
568decoded to utf8. And when you encode, '\x{I<xxxx>}' will be inserted,
569where I<xxxx> is the Unicode ID of the character that cannot be found
570in the character repertoire of the encoding.
85982a32 571
af1f55d9 572HTML/XML character reference modes are about the same, in place of
573\x{I<xxxx>}, HTML uses &#I<1234>; where I<1234> is a decimal digit and
574XML uses &#xI<abcd>; where I<abcd> is the hexadecimal digit.
575
85982a32 576=item The bitmask
577
0ab8f81e 578These modes are actually set via a bitmask. Here is how the FB_XX
579constants are laid out. You can import the FB_XX constants via
580C<use Encode qw(:fallbacks)>; you can import the generic bitmask
581constants via C<use Encode qw(:fallback_all)>.
85982a32 582
b0b300a3 583 FB_DEFAULT FB_CROAK FB_QUIET FB_WARN FB_PERLQQ
584 DIE_ON_ERR 0x0001 X
585 WARN_ON_ER 0x0002 X
586 RETURN_ON_ERR 0x0004 X X
587 LEAVE_SRC 0x0008
588 PERLQQ 0x0100 X
af1f55d9 589 HTMLCREF 0x0200
590 XMLCREF 0x0400
67d7b5ef 591
0ab8f81e 592=head2 Unimplemented fallback schemes
67d7b5ef 593
0ab8f81e 594In the future, you will be able to use a code reference to a callback
f2a2953c 595function for the value of I<CHECK> but its API is still undecided.
67d7b5ef 596
597=head1 Defining Encodings
598
599To define a new encoding, use:
600
601 use Encode qw(define_alias);
602 define_encoding($object, 'canonicalName' [, alias...]);
603
604I<canonicalName> will be associated with I<$object>. The object
0ab8f81e 605should provide the interface described in L<Encode::Encoding>.
67d7b5ef 606If more than two arguments are provided then additional
0ab8f81e 607arguments are taken as aliases for I<$object>, as for C<define_alias>.
67d7b5ef 608
f2a2953c 609See L<Encode::Encoding> for more details.
610
4411f3b6 611=head1 Messing with Perl's Internals
612
47bfe92f 613The following API uses parts of Perl's internals in the current
0ab8f81e 614implementation. As such, they are efficient but may change.
4411f3b6 615
616=over 4
617
a63c962f 618=item is_utf8(STRING [, CHECK])
4411f3b6 619
0ab8f81e 620[INTERNAL] Tests whether the UTF-8 flag is turned on in the STRING.
47bfe92f 621If CHECK is true, also checks the data in STRING for being well-formed
622UTF-8. Returns true if successful, false otherwise.
4411f3b6 623
a63c962f 624=item _utf8_on(STRING)
4411f3b6 625
0ab8f81e 626[INTERNAL] Turns on the UTF-8 flag in STRING. The data in STRING is
4411f3b6 627B<not> checked for being well-formed UTF-8. Do not use unless you
628B<know> that the STRING is well-formed UTF-8. Returns the previous
0ab8f81e 629state of the UTF-8 flag (so please don't treat the return value as
630indicating success or failure), or C<undef> if STRING is not a string.
4411f3b6 631
a63c962f 632=item _utf8_off(STRING)
4411f3b6 633
0ab8f81e 634[INTERNAL] Turns off the UTF-8 flag in STRING. Do not use frivolously.
635Returns the previous state of the UTF-8 flag (so please don't treat the
636return value as indicating success or failure), or C<undef> if STRING is
4411f3b6 637not a string.
638
639=back
640
641=head1 SEE ALSO
642
5d030b67 643L<Encode::Encoding>,
644L<Encode::Supported>,
6d1c0808 645L<Encode::PerlIO>,
5d030b67 646L<encoding>,
6d1c0808 647L<perlebcdic>,
648L<perlfunc/open>,
649L<perlunicode>,
650L<utf8>,
5d030b67 651the Perl Unicode Mailing List E<lt>perl-unicode@perl.orgE<gt>
4411f3b6 652
85982a32 653=head1 MAINTAINER
aae85ceb 654
655This project was originated by Nick Ing-Simmons and later maintained
0ab8f81e 656by Dan Kogai E<lt>dankogai@dan.co.jpE<gt>. See AUTHORS for a full list
aae85ceb 657of people involved. For any questions, use
658E<lt>perl-unicode@perl.orgE<gt> so others can share.
659
4411f3b6 660=cut