use strict;
our $VERSION = do { my @r = (q$Revision: 1.50 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
+use XSLoader ();
+XSLoader::load 'Encode';
-require DynaLoader;
require Exporter;
-
-our @ISA = qw(Exporter DynaLoader);
+our @ISA = qw(Exporter);
# Public, encouraged API is exported by default
our @FB_CONSTS = qw(FB_DEFAULT FB_QUIET FB_WARN FB_PERLQQ FB_CROAK);
our @EXPORT_OK =
- (
+ (
qw(
_utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
@FB_FLAGS, @FB_CONSTS,
);
-our %EXPORT_TAGS =
+our %EXPORT_TAGS =
(
all => [ @EXPORT, @EXPORT_OK ],
fallbacks => [ @FB_CONSTS ],
fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
);
-
-bootstrap Encode ();
-
# Documentation moved after __END__ for speed - NI-S
use Carp;
my @modules = (@_ and $_[0] eq ":all") ? values %ExtModule : @_;
for my $mod (@modules){
$mod =~ s,::,/,g or $mod = "Encode/$mod";
- $mod .= '.pm';
+ $mod .= '.pm';
$DEBUG and warn "about to require $mod;";
eval { require $mod; };
}
# This is to restore %Encoding if really needed;
#
sub predefine_encodings{
- if ($ON_EBCDIC) {
+ if ($ON_EBCDIC) {
# was in Encode::UTF_EBCDIC
package Encode::UTF_EBCDIC;
*name = sub{ shift->{'Name'} };
my ($obj,$str,$chk) = @_;
my $res = '';
for (my $i = 0; $i < length($str); $i++) {
- $res .=
+ $res .=
chr(utf8::unicode_to_native(ord(substr($str,$i,1))));
}
$_[1] = '' if $chk;
my ($obj,$str,$chk) = @_;
my $res = '';
for (my $i = 0; $i < length($str); $i++) {
- $res .=
+ $res .=
chr(utf8::native_to_unicode(ord(substr($str,$i,1))));
}
$_[1] = '' if $chk;
return $res;
};
- $Encode::Encoding{Unicode} =
+ $Encode::Encoding{Unicode} =
bless {Name => "UTF_EBCDIC"} => "Encode::UTF_EBCDIC";
- } else {
+ } else {
# was in Encode::UTF_EBCDIC
package Encode::Internal;
*name = sub{ shift->{'Name'} };
return $str;
};
*encode = \&decode;
- $Encode::Encoding{Unicode} =
+ $Encode::Encoding{Unicode} =
bless {Name => "Internal"} => "Encode::Internal";
}
$_[1] = '' if $chk;
return $octets;
};
- $Encode::Encoding{utf8} =
+ $Encode::Encoding{utf8} =
bless {Name => "utf8"} => "Encode::utf8";
}
}
require Encode::Encoding;
+@Encode::XS::ISA = qw(Encode::Encoding);
-eval qq{ use PerlIO::encoding 0.02 };
-# warn $@ if $@;
1;
=head2 Table of Contents
-Encode consists of a collection of modules which details are too big
+Encode consists of a collection of modules which details are too big
to fit in one document. This POD itself explains the top-level APIs
-and general topics at a glance. For other topics and more details,
+and general topics at a glance. For other topics and more details,
see the PODs below;
Name Description
--------------------------------------------------------
- Encode::Alias Alias defintions to encodings
+ Encode::Alias Alias definitions to encodings
Encode::Encoding Encode Implementation Base Class
Encode::Supported List of Supported Encodings
Encode::CN Simplified Chinese Encodings
For CHECK see L</"Handling Malformed Data">.
For example to convert (internally UTF-8 encoded) Unicode string to
-iso-8859-1 (also known as Latin1),
+iso-8859-1 (also known as Latin1),
$octets = encode("iso-8859-1", $unicode);
@ebcdic = Encode->encodings("EBCDIC");
-To find which encodings are supported by this package in details,
+To find which encodings are supported by this package in details,
see L<Encode::Supported>.
=head2 Defining Aliases
Encode::resolve_alias("iso-8859-12") # false; nonexistent
Encode::resolve_alias($name) eq $name # true if $name is canonical
-This resolve_alias() does not need C<use Encode::Alias> and is
+This resolve_alias() does not need C<use Encode::Alias> and is
exported via C<use encode qw(resolve_alias)>.
See L<Encode::Alias> on details.
# via from_to
open my $in, $infile or die;
open my $out, $outfile or die;
- while(<>){
+ while(<>){
from_to($_, "shiftjis", "euc", 1);
}
place of the malformed character. for UCM-based encodings,
E<lt>subcharE<gt> will be used. For Unicode, \xFFFD is used. If the
data is supposed to be UTF-8, an optional lexical warning (category
-utf8) is given.
+utf8) is given.
=item I<CHECK> = Encode::DIE_ON_ERROR (== 1)
=item I<CHECK> = Encode::FB_QUIET
If I<CHECK> is set to Encode::FB_QUIET, (en|de)code will immediately
-return proccessed part on error, with data passed via argument
-overwritten with unproccessed part. This is handy when have to
+return processed part on error, with data passed via argument
+overwritten with unprocessed part. This is handy when have to
repeatedly call because the source data is chopped in the middle for
-some reasons, such as fixed-width buffer. Here is a sample code that
+some reasons, such as fixed-width buffer. Here is a sample code that
just does this.
my $data = '';
representation of the octet that could not be decoded to utf8. And
when you encode, '\x{I<xxxx>}' will be placed where I<xxxx> is the
Unicode ID of the character that cannot be found in the character
-repartoire of the encoding.
+repertoire of the encoding.
=item The bitmask
L<Encode::Encoding>,
L<Encode::Supported>,
-L<Encode::PerlIO>,
+L<Encode::PerlIO>,
L<encoding>,
-L<perlebcdic>,
-L<perlfunc/open>,
-L<perlunicode>,
-L<utf8>,
+L<perlebcdic>,
+L<perlfunc/open>,
+L<perlunicode>,
+L<utf8>,
the Perl Unicode Mailing List E<lt>perl-unicode@perl.orgE<gt>
=head1 MAINTAINER
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
+#define U8 U8
+#include "../Encode/encode.h"
#define FBCHAR 0xFFFd
#define BOM_BE 0xFeFF
MODULE = Encode::Unicode PACKAGE = Encode::Unicode
+PROTOTYPES: DISABLE
+
void
-decode_xs(obj, str, chk = &PL_sv_undef)
+decode_xs(obj, str, check = 0)
SV * obj
SV * str
-SV * chk
+IV check
CODE:
{
int size = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
U8 *d;
if (size != 4 && invalid_ucs2(ord)) {
if (ucs2) {
- if (SvTRUE(chk)) {
+ if (check) {
croak("%s:no surrogates allowed %"UVxf,
SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),
ord);
}
if (s+size <= e) {
/* skip the next one as well */
- enc_unpack(aTHX_ &s,e,size,endian);
+ enc_unpack(aTHX_ &s,e,size,endian);
}
ord = FBCHAR;
}
d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
SvCUR_set(result,d - (U8 *)SvPVX(result));
}
- if (SvTRUE(chk)) {
- if (s < e) {
+ if (s < e) {
Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
+ }
+ if (check && !(check & ENCODE_LEAVE_SRC)){
+ if (s < e) {
Move(s,SvPVX(str),e-s,U8);
SvCUR_set(str,(e-s));
}
}
void
-encode_xs(obj, utf8, chk = &PL_sv_undef)
- SV * obj
+encode_xs(obj, utf8, check = 0)
+SV * obj
SV * utf8
-SV * chk
+IV check
CODE:
{
int size = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
if (size != 4 && invalid_ucs2(ord)) {
if (!issurrogate(ord)){
if (ucs2) {
- if (SvTRUE(chk)) {
+ if (check) {
croak("%s:code point \"\\x{"UVxf"}\" too high",
SvPV_nolen(
*hv_fetch((HV *)SvRV(obj),"Name",4,0))
enc_pack(aTHX_ result,size,endian,ord);
}
}
- if (SvTRUE(chk)) {
+ if (s < e) {
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
+ SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
+ }
+ if (check && !(check & ENCODE_LEAVE_SRC)){
if (s < e) {
- Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
- SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
Move(s,SvPVX(utf8),e-s,U8);
SvCUR_set(utf8,(e-s));
}