package Encode::Byte;
use Encode;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
-XSLoader::load(__PACKAGE__,$VERSION);
+XSLoader::load( __PACKAGE__, $VERSION );
1;
__END__
my $name = 'Byte';
my %tables = (
- byte_t =>
- [
- # misc. vendors
- 'gsm0338.ucm',
- 'nextstep.ucm',
- 'hp-roman8.ucm',
- 'viscii.ucm',
- 'adobeStdenc.ucm',
- # koi8
- 'koi8-f.ucm', 'koi8-r.ucm', 'koi8-u.ucm',
- # Mac
- qw(
- macArabic.ucm
- macCentEuro.ucm
- macCroatian.ucm
- macCyrillic.ucm
- macFarsi.ucm
- macGreek.ucm
- macHebrew.ucm
- macIceland.ucm
- macRoman.ucm
- macROMnn.ucm
- macRUMnn.ucm
- macSami.ucm
- macThai.ucm
- macTurkish.ucm
- macUkraine.ucm
- ),
- ],
+ byte_t =>
+ [
+ # misc. vendors
+ 'gsm0338.ucm',
+ 'nextstep.ucm',
+ 'hp-roman8.ucm',
+ 'viscii.ucm',
+ 'adobeStdenc.ucm',
+ # koi8
+ 'koi8-f.ucm', 'koi8-r.ucm', 'koi8-u.ucm',
+ # Mac
+ qw(
+ macArabic.ucm
+ macCentEuro.ucm
+ macCroatian.ucm
+ macCyrillic.ucm
+ macFarsi.ucm
+ macGreek.ucm
+ macHebrew.ucm
+ macIceland.ucm
+ macRoman.ucm
+ macROMnn.ucm
+ macRUMnn.ucm
+ macSami.ucm
+ macThai.ucm
+ macTurkish.ucm
+ macUkraine.ucm
+ ),
+ ],
);
my %not_here =
WriteMakefile(
INC => "-I../Encode",
- NAME => 'Encode::'.$name,
- VERSION_FROM => "$name.pm",
- OBJECT => '$(O_FILES)',
- 'dist' => {
- COMPRESS => 'gzip -9f',
- SUFFIX => 'gz',
- DIST_DEFAULT => 'all tardist',
- },
- MAN3PODS => {},
- # OS 390 winges about line numbers > 64K ???
- XSOPT => '-nolinenumbers',
- );
+ NAME => 'Encode::'.$name,
+ VERSION_FROM => "$name.pm",
+ OBJECT => '$(O_FILES)',
+ 'dist' => {
+ COMPRESS => 'gzip -9f',
+ SUFFIX => 'gz',
+ DIST_DEFAULT => 'all tardist',
+ },
+ MAN3PODS => {},
+ # OS 390 winges about line numbers > 64K ???
+ XSOPT => '-nolinenumbers',
+ );
package MY;
# Add the table O_FILES
foreach my $e (keys %tables)
{
- $o{$e.$x} = 1;
+ $o{$e.$x} = 1;
}
$o{"$name$x"} = 1;
$self->{'O_FILES'} = [sort keys %o];
$self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
my %xs;
foreach my $table (keys %tables) {
- push (@{$self->{'C'}},"$table.c");
- # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
- # get built.
- foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
- push (@files,$table.$ext);
- }
- $self->{SOURCE} .= " $table.c"
- if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
+ push (@{$self->{'C'}},"$table.c");
+ # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
+ # get built.
+ foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
+ push (@files,$table.$ext);
+ }
+ $self->{SOURCE} .= " $table.c"
+ if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
}
$self->{'XS'} = { "$name.xs" => "$name.c" };
$self->{'clean'}{'FILES'} .= join(' ',@files);
#include "encode.h"
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.h"\n];
+ print XS qq[#include "${table}.h"\n];
}
print XS <<"END";
{
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.exh"\n];
+ print XS qq[#include "${table}.exh"\n];
}
print XS "}\n";
close(XS);
$str .= "$name.c : $name.xs ";
foreach my $table (keys %tables)
{
- $str .= " $table.c";
+ $str .= " $table.c";
}
$str .= "\n\n";
$str .= "$name\$(OBJ_EXT) : $name.c\n\n";
my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
foreach my $table (keys %tables)
{
- my $numlines = 1;
- my $lengthsofar = length($str);
- my $continuator = '';
- $str .= "$table.c : $enc2xs Makefile.PL";
- foreach my $file (@{$tables{$table}})
- {
- $str .= $continuator.' '.$self->catfile($dir,$file);
- if ( length($str)-$lengthsofar > 128*$numlines )
- {
- $continuator .= " \\\n\t";
- $numlines++;
- } else {
- $continuator = '';
- }
- }
- my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
- $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
- my $ucopts = '-"Q" -"O"';
- $str .=
- qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
- open (FILELIST, ">$table.fnm")
- || die "Could not open $table.fnm: $!";
- foreach my $file (@{$tables{$table}})
- {
- print FILELIST $self->catfile($dir,$file) . "\n";
- }
- close(FILELIST);
+ my $numlines = 1;
+ my $lengthsofar = length($str);
+ my $continuator = '';
+ $str .= "$table.c : $enc2xs Makefile.PL";
+ foreach my $file (@{$tables{$table}})
+ {
+ $str .= $continuator.' '.$self->catfile($dir,$file);
+ if ( length($str)-$lengthsofar > 128*$numlines )
+ {
+ $continuator .= " \\\n\t";
+ $numlines++;
+ } else {
+ $continuator = '';
+ }
+ }
+ my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+ $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
+ my $ucopts = '-"Q" -"O"';
+ $str .=
+ qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+ open (FILELIST, ">$table.fnm")
+ || die "Could not open $table.fnm: $!";
+ foreach my $file (@{$tables{$table}})
+ {
+ print FILELIST $self->catfile($dir,$file) . "\n";
+ }
+ close(FILELIST);
}
return $str;
}
package Encode::CN;
+
BEGIN {
- if (ord("A") == 193) {
- die "Encode::CN not supported on EBCDIC\n";
+ if ( ord("A") == 193 ) {
+ die "Encode::CN not supported on EBCDIC\n";
}
}
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode;
use XSLoader;
-XSLoader::load(__PACKAGE__,$VERSION);
+XSLoader::load( __PACKAGE__, $VERSION );
# Relocated from Encode.pm
use Encode::CN::HZ;
+
# use Encode::CN::2022_CN;
1;
Canonical Alias Description
--------------------------------------------------------------------
euc-cn /\beuc.*cn$/i EUC (Extended Unix Character)
- /\bcn.*euc$/i
+ /\bcn.*euc$/i
/\bGB[-_ ]?2312(?:\D.*$|$)/i (see below)
gb2312-raw The raw (low-bit) GB2312 character map
gb12345-raw Traditional chinese counterpart to
- GB2312 (raw)
+ GB2312 (raw)
iso-ir-165 GB2312 + GB6345 + GB8565 + additions
MacChineseSimp GB2312 + Apple Additions
cp936 Code Page 936, also known as GBK
- (Extended GuoBiao)
+ (Extended GuoBiao)
hz 7-bit escaped GB2312 encoding
--------------------------------------------------------------------
use strict;
my %tables = (euc_cn_t => ['euc-cn.ucm',
- 'cp936.ucm',
- 'macChinsimp.ucm',
- ],
- '2312_t' => ['gb2312.ucm'],
- '12345_t' => ['gb12345.ucm'],
- ir_165_t => ['ir-165.ucm'],
+ 'cp936.ucm',
+ 'macChinsimp.ucm',
+ ],
+ '2312_t' => ['gb2312.ucm'],
+ '12345_t' => ['gb12345.ucm'],
+ ir_165_t => ['ir-165.ucm'],
);
unless ($ENV{AGGREGATE_TABLES}){
my @ucm;
for my $k (keys %tables){
- push @ucm, @{$tables{$k}};
+ push @ucm, @{$tables{$k}};
}
%tables = ();
my $seq = 0;
for my $ucm (sort @ucm){
- # 8.3 compliance !
- my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
- $tables{$t} = [ $ucm ];
+ # 8.3 compliance !
+ my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
+ $tables{$t} = [ $ucm ];
}
}
WriteMakefile(
INC => "-I../Encode",
- NAME => 'Encode::'.$name,
- VERSION_FROM => "$name.pm",
- OBJECT => '$(O_FILES)',
- 'dist' => {
- COMPRESS => 'gzip -9f',
- SUFFIX => 'gz',
- DIST_DEFAULT => 'all tardist',
- },
- MAN3PODS => {},
- # OS 390 winges about line numbers > 64K ???
- XSOPT => '-nolinenumbers',
+ NAME => 'Encode::'.$name,
+ VERSION_FROM => "$name.pm",
+ OBJECT => '$(O_FILES)',
+ 'dist' => {
+ COMPRESS => 'gzip -9f',
+ SUFFIX => 'gz',
+ DIST_DEFAULT => 'all tardist',
+ },
+ MAN3PODS => {},
+ # OS 390 winges about line numbers > 64K ???
+ XSOPT => '-nolinenumbers',
XSPROTOARG => '-noprototypes',
- );
+ );
package MY;
# Add the table O_FILES
foreach my $e (keys %tables)
{
- $o{$e.$x} = 1;
+ $o{$e.$x} = 1;
}
$o{"$name$x"} = 1;
$self->{'O_FILES'} = [sort keys %o];
$self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
my %xs;
foreach my $table (keys %tables) {
- push (@{$self->{'C'}},"$table.c");
- # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
- # get built.
- foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
- push (@files,$table.$ext);
- }
- $self->{SOURCE} .= " $table.c"
- if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
+ push (@{$self->{'C'}},"$table.c");
+ # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
+ # get built.
+ foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
+ push (@files,$table.$ext);
+ }
+ $self->{SOURCE} .= " $table.c"
+ if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
}
$self->{'XS'} = { "$name.xs" => "$name.c" };
$self->{'clean'}{'FILES'} .= join(' ',@files);
#include "encode.h"
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.h"\n];
+ print XS qq[#include "${table}.h"\n];
}
print XS <<"END";
{
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.exh"\n];
+ print XS qq[#include "${table}.exh"\n];
}
print XS "}\n";
close(XS);
$str .= "$name.c : $name.xs ";
foreach my $table (keys %tables)
{
- $str .= " $table.c";
+ $str .= " $table.c";
}
$str .= "\n\n";
$str .= "$name\$(OBJ_EXT) : $name.c\n\n";
my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
foreach my $table (keys %tables)
{
- my $numlines = 1;
- my $lengthsofar = length($str);
- my $continuator = '';
- $str .= "$table.c : $enc2xs Makefile.PL";
- foreach my $file (@{$tables{$table}})
- {
- $str .= $continuator.' '.$self->catfile($dir,$file);
- if ( length($str)-$lengthsofar > 128*$numlines )
- {
- $continuator .= " \\\n\t";
- $numlines++;
- } else {
- $continuator = '';
- }
- }
- my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
- $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
- my $ucopts = '-"Q"';
- $str .=
- qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
- open (FILELIST, ">$table.fnm")
- || die "Could not open $table.fnm: $!";
- foreach my $file (@{$tables{$table}})
- {
- print FILELIST $self->catfile($dir,$file) . "\n";
- }
- close(FILELIST);
+ my $numlines = 1;
+ my $lengthsofar = length($str);
+ my $continuator = '';
+ $str .= "$table.c : $enc2xs Makefile.PL";
+ foreach my $file (@{$tables{$table}})
+ {
+ $str .= $continuator.' '.$self->catfile($dir,$file);
+ if ( length($str)-$lengthsofar > 128*$numlines )
+ {
+ $continuator .= " \\\n\t";
+ $numlines++;
+ } else {
+ $continuator = '';
+ }
+ }
+ my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+ $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
+ my $ucopts = '-"Q"';
+ $str .=
+ qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+ open (FILELIST, ">$table.fnm")
+ || die "Could not open $table.fnm: $!";
+ foreach my $file (@{$tables{$table}})
+ {
+ print FILELIST $self->catfile($dir,$file) . "\n";
+ }
+ close(FILELIST);
}
return $str;
}
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 2.15 2006/04/06 15:44:11 dankogai Exp dankogai $
+# $Id: Changes,v 2.16 2006/05/03 18:24:10 dankogai Exp $
#
+$Revision: 2.16 $ $Date: 2006/05/03 18:24:10 $
+! bin/piconv
+ --xmlcref and --htmlcref added.
+! Encode.pm
+ Copyright Notice Added.
+ http://rt.cpan.org/NoAuth/Bug.html?id=#19056
+! *
+ Replaced remaining ^\t with q( ) x 4. -- Perl Best Practice pp. 20
+ And all .pm's are now perltidy-ed.
-$Revision: 2.15 $ $Date: 2006/04/06 15:44:11 $
+2.15 2006/04/06 15:44:11
! Unicode/Unicode.xs
Addressed: UTF-16, UTF-32, UCS, UTF-7 decoders mishandle illegal characters
http://rt.cpan.org/NoAuth/Bug.html?id=#18556
package Encode::EBCDIC;
use Encode;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
-XSLoader::load(__PACKAGE__,$VERSION);
+XSLoader::load( __PACKAGE__, $VERSION );
1;
__END__
my $name = 'EBCDIC';
my %tables = (
- ebcdic_t =>
- ['posix-bc.ucm',
- qw(cp037.ucm cp1026.ucm cp1047.ucm cp500.ucm cp875.ucm),
- ],
- );
+ ebcdic_t =>
+ ['posix-bc.ucm',
+ qw(cp037.ucm cp1026.ucm cp1047.ucm cp500.ucm cp875.ucm),
+ ],
+ );
WriteMakefile(
INC => "-I../Encode",
- NAME => 'Encode::'.$name,
- VERSION_FROM => "$name.pm",
- OBJECT => '$(O_FILES)',
- 'dist' => {
- COMPRESS => 'gzip -9f',
- SUFFIX => 'gz',
- DIST_DEFAULT => 'all tardist',
- },
- MAN3PODS => {},
- # OS 390 winges about line numbers > 64K ???
- XSOPT => '-nolinenumbers',
- );
+ NAME => 'Encode::'.$name,
+ VERSION_FROM => "$name.pm",
+ OBJECT => '$(O_FILES)',
+ 'dist' => {
+ COMPRESS => 'gzip -9f',
+ SUFFIX => 'gz',
+ DIST_DEFAULT => 'all tardist',
+ },
+ MAN3PODS => {},
+ # OS 390 winges about line numbers > 64K ???
+ XSOPT => '-nolinenumbers',
+ );
package MY;
# Add the table O_FILES
foreach my $e (keys %tables)
{
- $o{$e.$x} = 1;
+ $o{$e.$x} = 1;
}
$o{"$name$x"} = 1;
$self->{'O_FILES'} = [sort keys %o];
$self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
my %xs;
foreach my $table (keys %tables) {
- push (@{$self->{'C'}},"$table.c");
- # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
- # get built.
- foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
- push (@files,$table.$ext);
- }
- $self->{SOURCE} .= " $table.c"
- if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
+ push (@{$self->{'C'}},"$table.c");
+ # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
+ # get built.
+ foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
+ push (@files,$table.$ext);
+ }
+ $self->{SOURCE} .= " $table.c"
+ if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
}
$self->{'XS'} = { "$name.xs" => "$name.c" };
$self->{'clean'}{'FILES'} .= join(' ',@files);
#include "encode.h"
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.h"\n];
+ print XS qq[#include "${table}.h"\n];
}
print XS <<"END";
{
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.exh"\n];
+ print XS qq[#include "${table}.exh"\n];
}
print XS "}\n";
close(XS);
$str .= "$name.c : $name.xs ";
foreach my $table (keys %tables)
{
- $str .= " $table.c";
+ $str .= " $table.c";
}
$str .= "\n\n";
$str .= "$name\$(OBJ_EXT) : $name.c\n\n";
my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
foreach my $table (keys %tables)
{
- my $numlines = 1;
- my $lengthsofar = length($str);
- my $continuator = '';
- $str .= "$table.c : $enc2xs Makefile.PL";
- foreach my $file (@{$tables{$table}})
- {
- $str .= $continuator.' '.$self->catfile($dir,$file);
- if ( length($str)-$lengthsofar > 128*$numlines )
- {
- $continuator .= " \\\n\t";
- $numlines++;
- } else {
- $continuator = '';
- }
- }
- my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
- $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
- my $ucopts = '-"Q" -"O"';
- $str .=
- qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
- open (FILELIST, ">$table.fnm")
- || die "Could not open $table.fnm: $!";
- foreach my $file (@{$tables{$table}})
- {
- print FILELIST $self->catfile($dir,$file) . "\n";
- }
- close(FILELIST);
+ my $numlines = 1;
+ my $lengthsofar = length($str);
+ my $continuator = '';
+ $str .= "$table.c : $enc2xs Makefile.PL";
+ foreach my $file (@{$tables{$table}})
+ {
+ $str .= $continuator.' '.$self->catfile($dir,$file);
+ if ( length($str)-$lengthsofar > 128*$numlines )
+ {
+ $continuator .= " \\\n\t";
+ $numlines++;
+ } else {
+ $continuator = '';
+ }
+ }
+ my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+ $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
+ my $ucopts = '-"Q" -"O"';
+ $str .=
+ qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+ open (FILELIST, ">$table.fnm")
+ || die "Could not open $table.fnm: $!";
+ foreach my $file (@{$tables{$table}})
+ {
+ print FILELIST $self->catfile($dir,$file) . "\n";
+ }
+ close(FILELIST);
}
return $str;
}
#
-# $Id: Encode.pm,v 2.15 2006/04/06 15:44:11 dankogai Exp dankogai $
+# $Id: Encode.pm,v 2.16 2006/05/03 18:32:25 dankogai Exp dankogai $
#
package Encode;
use strict;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.15 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.16 $ =~ /(\d+)/g;
sub DEBUG () { 0 }
use XSLoader ();
-XSLoader::load(__PACKAGE__, $VERSION);
+XSLoader::load( __PACKAGE__, $VERSION );
require Exporter;
use base qw/Exporter/;
decode decode_utf8 encode encode_utf8 str2bytes bytes2str
encodings find_encoding clone_encoding
);
-
-our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
- PERLQQ HTMLCREF XMLCREF STOP_AT_PARTIAL);
-our @FB_CONSTS = qw(FB_DEFAULT FB_CROAK FB_QUIET FB_WARN
- FB_PERLQQ FB_HTMLCREF FB_XMLCREF);
-
-our @EXPORT_OK =
- (
- qw(
- _utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
- is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
+our @FB_FLAGS = qw(
+ DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
+ PERLQQ HTMLCREF XMLCREF STOP_AT_PARTIAL
+);
+our @FB_CONSTS = qw(
+ FB_DEFAULT FB_CROAK FB_QUIET FB_WARN
+ FB_PERLQQ FB_HTMLCREF FB_XMLCREF
+);
+our @EXPORT_OK = (
+ qw(
+ _utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
+ is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
),
- @FB_FLAGS, @FB_CONSTS,
- );
+ @FB_FLAGS, @FB_CONSTS,
+);
-our %EXPORT_TAGS =
- (
- all => [ @EXPORT, @EXPORT_OK ],
- fallbacks => [ @FB_CONSTS ],
- fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
- );
+our %EXPORT_TAGS = (
+ all => [ @EXPORT, @EXPORT_OK ],
+ fallbacks => [@FB_CONSTS],
+ fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
+);
# Documentation moved after __END__ for speed - NI-S
-our $ON_EBCDIC = (ord("A") == 193);
+our $ON_EBCDIC = ( ord("A") == 193 );
use Encode::Alias;
require Encode::Config;
eval { require Encode::ConfigLocal };
-sub encodings
-{
+sub encodings {
my $class = shift;
my %enc;
- if (@_ and $_[0] eq ":all"){
- %enc = ( %Encoding, %ExtModule );
- }else{
- %enc = %Encoding;
- for my $mod (map {m/::/o ? $_ : "Encode::$_" } @_){
- DEBUG and warn $mod;
- for my $enc (keys %ExtModule){
- $ExtModule{$enc} eq $mod and $enc{$enc} = $mod;
- }
- }
+ if ( @_ and $_[0] eq ":all" ) {
+ %enc = ( %Encoding, %ExtModule );
}
- return
- sort { lc $a cmp lc $b }
- grep {!/^(?:Internal|Unicode|Guess)$/o} keys %enc;
+ else {
+ %enc = %Encoding;
+ for my $mod ( map { m/::/o ? $_ : "Encode::$_" } @_ ) {
+ DEBUG and warn $mod;
+ for my $enc ( keys %ExtModule ) {
+ $ExtModule{$enc} eq $mod and $enc{$enc} = $mod;
+ }
+ }
+ }
+ return sort { lc $a cmp lc $b }
+ grep { !/^(?:Internal|Unicode|Guess)$/o } keys %enc;
}
-sub perlio_ok{
- my $obj = ref($_[0]) ? $_[0] : find_encoding($_[0]);
+sub perlio_ok {
+ my $obj = ref( $_[0] ) ? $_[0] : find_encoding( $_[0] );
$obj->can("perlio_ok") and return $obj->perlio_ok();
- return 0; # safety net
+ return 0; # safety net
}
-sub define_encoding
-{
+sub define_encoding {
my $obj = shift;
my $name = shift;
$Encoding{$name} = $obj;
my $lc = lc($name);
- define_alias($lc => $obj) unless $lc eq $name;
- while (@_){
- my $alias = shift;
- define_alias($alias, $obj);
+ define_alias( $lc => $obj ) unless $lc eq $name;
+ while (@_) {
+ my $alias = shift;
+ define_alias( $alias, $obj );
}
return $obj;
}
-sub getEncoding
-{
- my ($class, $name, $skip_external) = @_;
+sub getEncoding {
+ my ( $class, $name, $skip_external ) = @_;
ref($name) && $name->can('renew') and return $name;
exists $Encoding{$name} and return $Encoding{$name};
$lc ne $name and $oc = $class->find_alias($lc);
defined($oc) and return $oc;
- unless ($skip_external)
- {
- if (my $mod = $ExtModule{$name} || $ExtModule{$lc}){
- $mod =~ s,::,/,g ; $mod .= '.pm';
- eval{ require $mod; };
- exists $Encoding{$name} and return $Encoding{$name};
- }
+ unless ($skip_external) {
+ if ( my $mod = $ExtModule{$name} || $ExtModule{$lc} ) {
+ $mod =~ s,::,/,g;
+ $mod .= '.pm';
+ eval { require $mod; };
+ exists $Encoding{$name} and return $Encoding{$name};
+ }
}
return;
}
-sub find_encoding($;$)
-{
- my ($name, $skip_external) = @_;
- return __PACKAGE__->getEncoding($name,$skip_external);
+sub find_encoding($;$) {
+ my ( $name, $skip_external ) = @_;
+ return __PACKAGE__->getEncoding( $name, $skip_external );
}
-sub resolve_alias($){
+sub resolve_alias($) {
my $obj = find_encoding(shift);
defined $obj and return $obj->name;
return;
}
-sub clone_encoding($){
+sub clone_encoding($) {
my $obj = find_encoding(shift);
ref $obj or return;
eval { require Storable };
return Storable::dclone($obj);
}
-sub encode($$;$)
-{
- my ($name, $string, $check) = @_;
+sub encode($$;$) {
+ my ( $name, $string, $check ) = @_;
return undef unless defined $string;
- $string .= '' if ref $string; # stringify;
- $check ||=0;
+ $string .= '' if ref $string; # stringify;
+ $check ||= 0;
my $enc = find_encoding($name);
- unless(defined $enc){
- require Carp;
- Carp::croak("Unknown encoding '$name'");
+ unless ( defined $enc ) {
+ require Carp;
+ Carp::croak("Unknown encoding '$name'");
}
- my $octets = $enc->encode($string,$check);
- $_[1] = $string if $check and !($check & LEAVE_SRC());
+ my $octets = $enc->encode( $string, $check );
+ $_[1] = $string if $check and !( $check & LEAVE_SRC() );
return $octets;
}
*str2bytes = \&encode;
-sub decode($$;$)
-{
- my ($name,$octets,$check) = @_;
+sub decode($$;$) {
+ my ( $name, $octets, $check ) = @_;
return undef unless defined $octets;
$octets .= '' if ref $octets;
- $check ||=0;
+ $check ||= 0;
my $enc = find_encoding($name);
- unless(defined $enc){
- require Carp;
- Carp::croak("Unknown encoding '$name'");
+ unless ( defined $enc ) {
+ require Carp;
+ Carp::croak("Unknown encoding '$name'");
}
- my $string = $enc->decode($octets,$check);
- $_[1] = $octets if $check and !($check & LEAVE_SRC());
+ my $string = $enc->decode( $octets, $check );
+ $_[1] = $octets if $check and !( $check & LEAVE_SRC() );
return $string;
}
*bytes2str = \&decode;
-sub from_to($$$;$)
-{
- my ($string,$from,$to,$check) = @_;
+sub from_to($$$;$) {
+ my ( $string, $from, $to, $check ) = @_;
return undef unless defined $string;
- $check ||=0;
+ $check ||= 0;
my $f = find_encoding($from);
- unless (defined $f){
- require Carp;
- Carp::croak("Unknown encoding '$from'");
+ unless ( defined $f ) {
+ require Carp;
+ Carp::croak("Unknown encoding '$from'");
}
my $t = find_encoding($to);
- unless (defined $t){
- require Carp;
- Carp::croak("Unknown encoding '$to'");
+ unless ( defined $t ) {
+ require Carp;
+ Carp::croak("Unknown encoding '$to'");
}
my $uni = $f->decode($string);
- $_[0] = $string = $t->encode($uni,$check);
- return undef if ($check && length($uni));
- return defined($_[0]) ? length($string) : undef ;
+ $_[0] = $string = $t->encode( $uni, $check );
+ return undef if ( $check && length($uni) );
+ return defined( $_[0] ) ? length($string) : undef;
}
-sub encode_utf8($)
-{
+sub encode_utf8($) {
my ($str) = @_;
utf8::encode($str);
return $str;
}
-sub decode_utf8($;$)
-{
- my ($str, $check) = @_;
+sub decode_utf8($;$) {
+ my ( $str, $check ) = @_;
return $str if is_utf8($str);
- if ($check){
- return decode("utf8", $str, $check);
- }else{
- return decode("utf8", $str);
- return $str;
+ if ($check) {
+ return decode( "utf8", $str, $check );
+ }
+ else {
+ return decode( "utf8", $str );
+ return $str;
}
}
# This is to restore %Encoding if really needed;
#
-sub predefine_encodings{
+sub predefine_encodings {
use Encode::Encoding;
no warnings 'redefine';
my $use_xs = shift;
if ($ON_EBCDIC) {
- # was in Encode::UTF_EBCDIC
- package Encode::UTF_EBCDIC;
- push @Encode::UTF_EBCDIC::ISA, 'Encode::Encoding';
- *decode = sub{
- my ($obj,$str,$chk) = @_;
- my $res = '';
- for (my $i = 0; $i < length($str); $i++) {
- $res .=
- chr(utf8::unicode_to_native(ord(substr($str,$i,1))));
- }
- $_[1] = '' if $chk;
- return $res;
- };
- *encode = sub{
- my ($obj,$str,$chk) = @_;
- my $res = '';
- for (my $i = 0; $i < length($str); $i++) {
- $res .=
- chr(utf8::native_to_unicode(ord(substr($str,$i,1))));
- }
- $_[1] = '' if $chk;
- return $res;
- };
- $Encode::Encoding{Unicode} =
- bless {Name => "UTF_EBCDIC"} => "Encode::UTF_EBCDIC";
- } else {
- package Encode::Internal;
- push @Encode::Internal::ISA, 'Encode::Encoding';
- *decode = sub{
- my ($obj,$str,$chk) = @_;
- utf8::upgrade($str);
- $_[1] = '' if $chk;
- return $str;
- };
- *encode = \&decode;
- $Encode::Encoding{Unicode} =
- bless {Name => "Internal"} => "Encode::Internal";
+
+ # was in Encode::UTF_EBCDIC
+ package Encode::UTF_EBCDIC;
+ push @Encode::UTF_EBCDIC::ISA, 'Encode::Encoding';
+ *decode = sub {
+ my ( $obj, $str, $chk ) = @_;
+ my $res = '';
+ for ( my $i = 0 ; $i < length($str) ; $i++ ) {
+ $res .=
+ chr(
+ utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) )
+ );
+ }
+ $_[1] = '' if $chk;
+ return $res;
+ };
+ *encode = sub {
+ my ( $obj, $str, $chk ) = @_;
+ my $res = '';
+ for ( my $i = 0 ; $i < length($str) ; $i++ ) {
+ $res .=
+ chr(
+ utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) )
+ );
+ }
+ $_[1] = '' if $chk;
+ return $res;
+ };
+ $Encode::Encoding{Unicode} =
+ bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC";
+ }
+ else {
+
+ package Encode::Internal;
+ push @Encode::Internal::ISA, 'Encode::Encoding';
+ *decode = sub {
+ my ( $obj, $str, $chk ) = @_;
+ utf8::upgrade($str);
+ $_[1] = '' if $chk;
+ return $str;
+ };
+ *encode = \&decode;
+ $Encode::Encoding{Unicode} =
+ bless { Name => "Internal" } => "Encode::Internal";
}
{
- # was in Encode::utf8
- package Encode::utf8;
- push @Encode::utf8::ISA, 'Encode::Encoding';
- #
- if ($use_xs){
- Encode::DEBUG and warn __PACKAGE__, " XS on";
- *decode = \&decode_xs;
- *encode = \&encode_xs;
- }else{
- Encode::DEBUG and warn __PACKAGE__, " XS off";
- *decode = sub{
- my ($obj,$octets,$chk) = @_;
- my $str = Encode::decode_utf8($octets);
- if (defined $str) {
- $_[1] = '' if $chk;
- return $str;
- }
- return undef;
- };
- *encode = sub {
- my ($obj,$string,$chk) = @_;
- my $octets = Encode::encode_utf8($string);
- $_[1] = '' if $chk;
- return $octets;
- };
- }
- *cat_decode = sub{ # ($obj, $dst, $src, $pos, $trm, $chk)
- my ($obj, undef, undef, $pos, $trm) = @_; # currently ignores $chk
- my ($rdst, $rsrc, $rpos) = \@_[1,2,3];
- use bytes;
- if ((my $npos = index($$rsrc, $trm, $pos)) >= 0) {
- $$rdst .= substr($$rsrc, $pos, $npos - $pos + length($trm));
- $$rpos = $npos + length($trm);
- return 1;
- }
- $$rdst .= substr($$rsrc, $pos);
- $$rpos = length($$rsrc);
- return '';
- };
- $Encode::Encoding{utf8} =
- bless {Name => "utf8"} => "Encode::utf8";
- $Encode::Encoding{"utf-8-strict"} =
- bless {Name => "utf-8-strict", strict_utf8 => 1 } => "Encode::utf8";
+
+ # was in Encode::utf8
+ package Encode::utf8;
+ push @Encode::utf8::ISA, 'Encode::Encoding';
+
+ #
+ if ($use_xs) {
+ Encode::DEBUG and warn __PACKAGE__, " XS on";
+ *decode = \&decode_xs;
+ *encode = \&encode_xs;
+ }
+ else {
+ Encode::DEBUG and warn __PACKAGE__, " XS off";
+ *decode = sub {
+ my ( $obj, $octets, $chk ) = @_;
+ my $str = Encode::decode_utf8($octets);
+ if ( defined $str ) {
+ $_[1] = '' if $chk;
+ return $str;
+ }
+ return undef;
+ };
+ *encode = sub {
+ my ( $obj, $string, $chk ) = @_;
+ my $octets = Encode::encode_utf8($string);
+ $_[1] = '' if $chk;
+ return $octets;
+ };
+ }
+ *cat_decode = sub { # ($obj, $dst, $src, $pos, $trm, $chk)
+ # currently ignores $chk
+ my ( $obj, undef, undef, $pos, $trm ) = @_;
+ my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
+ use bytes;
+ if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) {
+ $$rdst .=
+ substr( $$rsrc, $pos, $npos - $pos + length($trm) );
+ $$rpos = $npos + length($trm);
+ return 1;
+ }
+ $$rdst .= substr( $$rsrc, $pos );
+ $$rpos = length($$rsrc);
+ return '';
+ };
+ $Encode::Encoding{utf8} =
+ bless { Name => "utf8" } => "Encode::utf8";
+ $Encode::Encoding{"utf-8-strict"} =
+ bless { Name => "utf-8-strict", strict_utf8 => 1 } =>
+ "Encode::utf8";
}
}
list of people involved. For any questions, use
E<lt>perl-unicode@perl.orgE<gt> so we can all share.
+While Dan Kogai retains the copyright as a maintainer, the credit
+should go to all those involoved. See AUTHORS for those submitted
+codes.
+
+=head1 COPYRIGHT
+
+Copyright 2002-2006 Dan Kogai E<lt>dankogai@dan.co.jpE<gt>
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
=cut
/*
- $Id: Encode.xs,v 2.8 2006/04/06 15:44:11 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.9 2006/05/03 18:24:10 dankogai Exp $
*/
#define PERL_NO_GET_CONTEXT
#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
Perl_croak(aTHX_ "panic_unimplemented"); \
- return (y)0; /* fool picky compilers */ \
+ return (y)0; /* fool picky compilers */ \
}
/**/
PUSHMARK(sp);
XPUSHs(sv);
while (enc->name[i]) {
- const char *name = enc->name[i++];
- XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
+ const char *name = enc->name[i++];
+ XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
}
PUTBACK;
call_pv("Encode::define_encoding", G_DISCARD);
argc = call_sv(fallback_cb, G_SCALAR);
SPAGAIN;
if (argc != 1){
- croak("fallback sub must return scalar!");
+ croak("fallback sub must return scalar!");
}
retval = newSVsv(POPs);
PUTBACK;
static SV *
encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
- int check, STRLEN * offset, SV * term, int * retcode)
+ int check, STRLEN * offset, SV * term, int * retcode)
{
STRLEN slen;
U8 *s = (U8 *) SvPV(src, slen);
if (offset) {
s += *offset;
if (slen > *offset){ /* safeguard against slen overflow */
- slen -= *offset;
+ slen -= *offset;
}else{
- slen = 0;
+ slen = 0;
}
tlen = slen;
}
if (slen == 0){
- SvCUR_set(dst, 0);
- SvPOK_only(dst);
- goto ENCODE_END;
+ SvCUR_set(dst, 0);
+ SvPOK_only(dst);
+ goto ENCODE_END;
}
while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check,
- trm, trmlen)) )
+ trm, trmlen)) )
{
- SvCUR_set(dst, dlen+ddone);
- SvPOK_only(dst);
-
- if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL ||
- code == ENCODE_FOUND_TERM) {
- break;
- }
- switch (code) {
- case ENCODE_NOSPACE:
- {
- STRLEN more = 0; /* make sure you initialize! */
- STRLEN sleft;
- sdone += slen;
- ddone += dlen;
- sleft = tlen - sdone;
+ SvCUR_set(dst, dlen+ddone);
+ SvPOK_only(dst);
+
+ if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL ||
+ code == ENCODE_FOUND_TERM) {
+ break;
+ }
+ switch (code) {
+ case ENCODE_NOSPACE:
+ {
+ STRLEN more = 0; /* make sure you initialize! */
+ STRLEN sleft;
+ sdone += slen;
+ ddone += dlen;
+ sleft = tlen - sdone;
#if ENCODE_XS_PROFILE >= 2
- Perl_warn(aTHX_
- "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
- more, sdone, sleft, SvLEN(dst));
+ Perl_warn(aTHX_
+ "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
+ more, sdone, sleft, SvLEN(dst));
#endif
- if (sdone != 0) { /* has src ever been processed ? */
+ if (sdone != 0) { /* has src ever been processed ? */
#if ENCODE_XS_USEFP == 2
- more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
- - SvLEN(dst);
+ more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
+ - SvLEN(dst);
#elif ENCODE_XS_USEFP
- more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft);
+ more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft);
#else
- /* safe until SvLEN(dst) == MAX_INT/16 */
- more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
+ /* safe until SvLEN(dst) == MAX_INT/16 */
+ more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
#endif
- }
- more += UTF8_MAXLEN; /* insurance policy */
- d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
- /* dst need to grow need MORE bytes! */
- if (ddone >= SvLEN(dst)) {
- Perl_croak(aTHX_ "Destination couldn't be grown.");
- }
- dlen = SvLEN(dst)-ddone-1;
- d += ddone;
- s += slen;
- slen = tlen-sdone;
- continue;
- }
- case ENCODE_NOREP:
- /* encoding */
- if (dir == enc->f_utf8) {
- STRLEN clen;
- UV ch =
- utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
- &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
- /* if non-representable multibyte prefix at end of current buffer - break*/
- if (clen > tlen - sdone) break;
- if (check & ENCODE_DIE_ON_ERR) {
- Perl_croak(aTHX_ ERR_ENCODE_NOMAP,
- (UV)ch, enc->name[0]);
- return &PL_sv_undef; /* never reaches but be safe */
- }
- if (check & ENCODE_WARN_ON_ERR){
- Perl_warner(aTHX_ packWARN(WARN_UTF8),
- ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]);
- }
- if (check & ENCODE_RETURN_ON_ERR){
- goto ENCODE_SET_SRC;
- }
- if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* subchar =
- (fallback_cb != (SV*)NULL) ? do_fallback_cb(aTHX_ ch) :
- newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" :
- check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
- "&#x%" UVxf ";", (UV)ch);
- sdone += slen + clen;
- ddone += dlen + SvCUR(subchar);
- sv_catsv(dst, subchar);
- SvREFCNT_dec(subchar);
- } else {
- /* fallback char */
- sdone += slen + clen;
- ddone += dlen + enc->replen;
- sv_catpvn(dst, (char*)enc->rep, enc->replen);
- }
- }
- /* decoding */
- else {
- if (check & ENCODE_DIE_ON_ERR){
- Perl_croak(aTHX_ ERR_DECODE_NOMAP,
+ }
+ more += UTF8_MAXLEN; /* insurance policy */
+ d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
+ /* dst need to grow need MORE bytes! */
+ if (ddone >= SvLEN(dst)) {
+ Perl_croak(aTHX_ "Destination couldn't be grown.");
+ }
+ dlen = SvLEN(dst)-ddone-1;
+ d += ddone;
+ s += slen;
+ slen = tlen-sdone;
+ continue;
+ }
+ case ENCODE_NOREP:
+ /* encoding */
+ if (dir == enc->f_utf8) {
+ STRLEN clen;
+ UV ch =
+ utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
+ &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
+ /* if non-representable multibyte prefix at end of current buffer - break*/
+ if (clen > tlen - sdone) break;
+ if (check & ENCODE_DIE_ON_ERR) {
+ Perl_croak(aTHX_ ERR_ENCODE_NOMAP,
+ (UV)ch, enc->name[0]);
+ return &PL_sv_undef; /* never reaches but be safe */
+ }
+ if (check & ENCODE_WARN_ON_ERR){
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),
+ ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]);
+ }
+ if (check & ENCODE_RETURN_ON_ERR){
+ goto ENCODE_SET_SRC;
+ }
+ if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
+ SV* subchar =
+ (fallback_cb != (SV*)NULL) ? do_fallback_cb(aTHX_ ch) :
+ newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" :
+ check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
+ "&#x%" UVxf ";", (UV)ch);
+ sdone += slen + clen;
+ ddone += dlen + SvCUR(subchar);
+ sv_catsv(dst, subchar);
+ SvREFCNT_dec(subchar);
+ } else {
+ /* fallback char */
+ sdone += slen + clen;
+ ddone += dlen + enc->replen;
+ sv_catpvn(dst, (char*)enc->rep, enc->replen);
+ }
+ }
+ /* decoding */
+ else {
+ if (check & ENCODE_DIE_ON_ERR){
+ Perl_croak(aTHX_ ERR_DECODE_NOMAP,
enc->name[0], (UV)s[slen]);
- return &PL_sv_undef; /* never reaches but be safe */
- }
- if (check & ENCODE_WARN_ON_ERR){
- Perl_warner(
- aTHX_ packWARN(WARN_UTF8),
- ERR_DECODE_NOMAP,
+ return &PL_sv_undef; /* never reaches but be safe */
+ }
+ if (check & ENCODE_WARN_ON_ERR){
+ Perl_warner(
+ aTHX_ packWARN(WARN_UTF8),
+ ERR_DECODE_NOMAP,
enc->name[0], (UV)s[slen]);
- }
- if (check & ENCODE_RETURN_ON_ERR){
- goto ENCODE_SET_SRC;
- }
- if (check &
- (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* subchar =
- (fallback_cb != (SV*)NULL) ?
- do_fallback_cb(aTHX_ (UV)s[slen]) :
- newSVpvf("\\x%02" UVXf, (UV)s[slen]);
- sdone += slen + 1;
- ddone += dlen + SvCUR(subchar);
- sv_catsv(dst, subchar);
- SvREFCNT_dec(subchar);
- } else {
- sdone += slen + 1;
- ddone += dlen + strlen(FBCHAR_UTF8);
- sv_catpv(dst, FBCHAR_UTF8);
- }
- }
- /* settle variables when fallback */
- d = (U8 *)SvEND(dst);
+ }
+ if (check & ENCODE_RETURN_ON_ERR){
+ goto ENCODE_SET_SRC;
+ }
+ if (check &
+ (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
+ SV* subchar =
+ (fallback_cb != (SV*)NULL) ?
+ do_fallback_cb(aTHX_ (UV)s[slen]) :
+ newSVpvf("\\x%02" UVXf, (UV)s[slen]);
+ sdone += slen + 1;
+ ddone += dlen + SvCUR(subchar);
+ sv_catsv(dst, subchar);
+ SvREFCNT_dec(subchar);
+ } else {
+ sdone += slen + 1;
+ ddone += dlen + strlen(FBCHAR_UTF8);
+ sv_catpv(dst, FBCHAR_UTF8);
+ }
+ }
+ /* settle variables when fallback */
+ d = (U8 *)SvEND(dst);
dlen = SvLEN(dst) - ddone - 1;
- s = (U8*)SvPVX(src) + sdone;
- slen = tlen - sdone;
- break;
-
- default:
- Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
- code, (dir == enc->f_utf8) ? "to" : "from",
- enc->name[0]);
- return &PL_sv_undef;
- }
+ s = (U8*)SvPVX(src) + sdone;
+ slen = tlen - sdone;
+ break;
+
+ default:
+ Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
+ code, (dir == enc->f_utf8) ? "to" : "from",
+ enc->name[0]);
+ return &PL_sv_undef;
+ }
}
ENCODE_SET_SRC:
if (check && !(check & ENCODE_LEAVE_SRC)){
- sdone = SvCUR(src) - (slen+sdone);
- if (sdone) {
- sv_setpvn(src, (char*)s+slen, sdone);
- }
- SvCUR_set(src, sdone);
+ sdone = SvCUR(src) - (slen+sdone);
+ if (sdone) {
+ sv_setpvn(src, (char*)s+slen, sdone);
+ }
+ SvCUR_set(src, sdone);
}
/* warn("check = 0x%X, code = 0x%d\n", check, code); */
#if ENCODE_XS_PROFILE
if (SvCUR(dst) > SvCUR(src)){
- Perl_warn(aTHX_
- "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n",
- SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst),
- (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
+ Perl_warn(aTHX_
+ "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n",
+ SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst),
+ (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
}
#endif
UTF8_ALLOW_NONSTRICT)
);
#if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */
- if (strict && uv > PERL_UNICODE_MAX)
- ulen = -1;
+ if (strict && uv > PERL_UNICODE_MAX)
+ ulen = -1;
#endif
if (ulen == -1) {
if (strict) {
XPUSHs(obj);
PUTBACK;
if (call_method("renewed",G_SCALAR) == 1) {
- SPAGAIN;
- renewed = (bool)POPi;
- PUTBACK;
+ SPAGAIN;
+ renewed = (bool)POPi;
+ PUTBACK;
#if 0
- fprintf(stderr, "renewed == %d\n", renewed);
+ fprintf(stderr, "renewed == %d\n", renewed);
#endif
}
FREETMPS; LEAVE;
/* end PerlIO check */
if (SvUTF8(src)) {
- s = utf8_to_bytes(s,&slen);
- if (s) {
- SvCUR_set(src,slen);
- SvUTF8_off(src);
- e = s+slen;
- }
- else {
- croak("Cannot decode string with wide characters");
- }
+ s = utf8_to_bytes(s,&slen);
+ if (s) {
+ SvCUR_set(src,slen);
+ SvUTF8_off(src);
+ e = s+slen;
+ }
+ else {
+ croak("Cannot decode string with wide characters");
+ }
}
s = process_utf8(aTHX_ dst, s, e, check, 0, strict_utf8(aTHX_ obj), renewed);
/* Clear out translated part of source unless asked not to */
if (check && !(check & ENCODE_LEAVE_SRC)){
- slen = e-s;
- if (slen) {
- sv_setpvn(src, (char*)s, slen);
- }
- SvCUR_set(src, slen);
+ slen = e-s;
+ if (slen) {
+ sv_setpvn(src, (char*)s, slen);
+ }
+ SvCUR_set(src, slen);
}
SvUTF8_on(dst);
ST(0) = sv_2mortal(dst);
U8 *e = (U8 *) SvEND(src);
SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */
if (SvUTF8(src)) {
- /* Already encoded */
- if (strict_utf8(aTHX_ obj)) {
- s = process_utf8(aTHX_ dst, s, e, check, 1, 1, 0);
- }
+ /* Already encoded */
+ if (strict_utf8(aTHX_ obj)) {
+ s = process_utf8(aTHX_ dst, s, e, check, 1, 1, 0);
+ }
else {
/* trust it and just copy the octets */
sv_setpvn(dst,(char *)s,(e-s));
- s = e;
+ s = e;
}
}
else {
/* Native bytes - can always encode */
- U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */
+ U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */
while (s < e) {
UV uv = NATIVE_TO_UNI((UV) *s++);
if (UNI_IS_INVARIANT(uv))
*d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
*d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
}
- }
+ }
SvCUR_set(dst, d- (U8 *)SvPVX(dst));
*SvEND(dst) = '\0';
}
/* Clear out translated part of source unless asked not to */
if (check && !(check & ENCODE_LEAVE_SRC)){
- slen = e-s;
- if (slen) {
- sv_setpvn(src, (char*)s, slen);
- }
- SvCUR_set(src, slen);
+ slen = e-s;
+ if (slen) {
+ sv_setpvn(src, (char*)s, slen);
+ }
+ SvCUR_set(src, slen);
}
SvPOK_only(dst);
SvUTF8_off(dst);
sv_utf8_downgrade(src, FALSE);
}
sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check,
- &offset, term, &code));
+ &offset, term, &code));
SvIV_set(off, (IV)offset);
if (code == ENCODE_FOUND_TERM) {
- ST(0) = &PL_sv_yes;
+ ST(0) = &PL_sv_yes;
}else{
- ST(0) = &PL_sv_no;
+ ST(0) = &PL_sv_no;
}
XSRETURN(1);
}
sv_utf8_downgrade(src, FALSE);
}
if (SvROK(check_sv)){
- if (fallback_cb == (SV*)NULL){
+ if (fallback_cb == (SV*)NULL){
fallback_cb = newSVsv(check_sv); /* First time */
}else{
SvSetSV(fallback_cb, check_sv); /* Been here before */
- }
- check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
+ }
+ check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
}else{
- fallback_cb = (SV*)NULL;
- check = SvIV(check_sv);
+ fallback_cb = (SV*)NULL;
+ check = SvIV(check_sv);
}
ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check,
- NULL, Nullsv, NULL);
+ NULL, Nullsv, NULL);
SvUTF8_on(ST(0));
XSRETURN(1);
}
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
sv_utf8_upgrade(src);
if (SvROK(check_sv)){
- if (fallback_cb == (SV*)NULL){
+ if (fallback_cb == (SV*)NULL){
fallback_cb = newSVsv(check_sv); /* First time */
}else{
SvSetSV(fallback_cb, check_sv); /* Been here before */
- }
- check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
+ }
+ check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
}else{
- fallback_cb = (SV*)NULL;
- check = SvIV(check_sv);
+ fallback_cb = (SV*)NULL;
+ check = SvIV(check_sv);
}
ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check,
- NULL, Nullsv, NULL);
+ NULL, Nullsv, NULL);
XSRETURN(1);
}
eval_pv("require PerlIO::encoding", 0);
if (SvTRUE(get_sv("@", 0))) {
- ST(0) = &PL_sv_no;
+ ST(0) = &PL_sv_no;
}else{
- ST(0) = &PL_sv_yes;
+ ST(0) = &PL_sv_yes;
}
XSRETURN(1);
}
if (encoding)
RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
else {
- STRLEN len;
- U8* s = (U8*)SvPV(sv, len);
- U8* converted;
-
- converted = bytes_to_utf8(s, &len); /* This allocs */
- sv_setpvn(sv, (char *)converted, len);
- SvUTF8_on(sv); /* XXX Should we? */
- Safefree(converted); /* ... so free it */
- RETVAL = len;
+ STRLEN len;
+ U8* s = (U8*)SvPV(sv, len);
+ U8* converted;
+
+ converted = bytes_to_utf8(s, &len); /* This allocs */
+ sv_setpvn(sv, (char *)converted, len);
+ SvUTF8_on(sv); /* XXX Should we? */
+ Safefree(converted); /* ... so free it */
+ RETVAL = len;
}
}
OUTPUT:
SV * check = items > 2 ? ST(2) : Nullsv;
if (to) {
- RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
+ RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
} else {
- STRLEN len;
- U8 *s = (U8*)SvPV(sv, len);
+ STRLEN len;
+ U8 *s = (U8*)SvPV(sv, len);
- RETVAL = 0;
- if (SvTRUE(check)) {
- /* Must do things the slow way */
- U8 *dest;
+ RETVAL = 0;
+ if (SvTRUE(check)) {
+ /* Must do things the slow way */
+ U8 *dest;
/* We need a copy to pass to check() */
- U8 *src = s;
- U8 *send = s + len;
- U8 *d0;
+ U8 *src = (U8*)savepv((char *)s);
+ U8 *send = s + len;
- New(83, dest, len, U8); /* I think */
- d0 = dest;
+ New(83, dest, len, U8); /* I think */
- while (s < send) {
+ while (s < send) {
if (*s < 0x80){
- *dest++ = *s++;
+ *dest++ = *s++;
} else {
- STRLEN ulen;
- UV uv = *s++;
-
- /* Have to do it all ourselves because of error routine,
- aargh. */
- if (!(uv & 0x40)){ goto failure; }
- if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
- else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
- else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
- else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
- else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
- else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
- else { ulen = 13; uv = 0; }
-
- /* Note change to utf8.c variable naming, for variety */
- while (ulen--) {
- if ((*s & 0xc0) != 0x80){
- goto failure;
- } else {
- uv = (uv << 6) | (*s++ & 0x3f);
- }
- }
- if (uv > 256) {
- failure:
- call_failure(check, s, dest, src);
- /* Now what happens? */
- }
- *dest++ = (U8)uv;
- }
- }
- RETVAL = dest - d0;
- sv_usepvn(sv, (char *)dest, RETVAL);
- SvUTF8_off(sv);
- } else {
- RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
- }
+ STRLEN ulen;
+ UV uv = *s++;
+
+ /* Have to do it all ourselves because of error routine,
+ aargh. */
+ if (!(uv & 0x40)){ goto failure; }
+ if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
+ else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
+ else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
+ else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
+ else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
+ else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
+ else { ulen = 13; uv = 0; }
+
+ /* Note change to utf8.c variable naming, for variety */
+ while (ulen--) {
+ if ((*s & 0xc0) != 0x80){
+ goto failure;
+ } else {
+ uv = (uv << 6) | (*s++ & 0x3f);
+ }
+ }
+ if (uv > 256) {
+ failure:
+ call_failure(check, s, dest, src);
+ /* Now what happens? */
+ }
+ *dest++ = (U8)uv;
+ }
+ }
+ } else {
+ RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
+ }
}
}
OUTPUT:
CODE:
{
if (SvGMAGICAL(sv)) /* it could be $1, for example */
- sv = newSVsv(sv); /* GMAGIG will be done */
+ sv = newSVsv(sv); /* GMAGIG will be done */
if (SvPOK(sv)) {
- RETVAL = SvUTF8(sv) ? TRUE : FALSE;
- if (RETVAL &&
- check &&
- !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
- RETVAL = FALSE;
+ RETVAL = SvUTF8(sv) ? TRUE : FALSE;
+ if (RETVAL &&
+ check &&
+ !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
+ RETVAL = FALSE;
} else {
- RETVAL = FALSE;
+ RETVAL = FALSE;
}
if (sv != ST(0))
- SvREFCNT_dec(sv); /* it was a temp copy */
+ SvREFCNT_dec(sv); /* it was a temp copy */
}
OUTPUT:
RETVAL
CODE:
{
if (SvPOK(sv)) {
- SV *rsv = newSViv(SvUTF8(sv));
- RETVAL = rsv;
- SvUTF8_on(sv);
+ SV *rsv = newSViv(SvUTF8(sv));
+ RETVAL = rsv;
+ SvUTF8_on(sv);
} else {
- RETVAL = &PL_sv_undef;
+ RETVAL = &PL_sv_undef;
}
}
OUTPUT:
CODE:
{
if (SvPOK(sv)) {
- SV *rsv = newSViv(SvUTF8(sv));
- RETVAL = rsv;
- SvUTF8_off(sv);
+ SV *rsv = newSViv(SvUTF8(sv));
+ RETVAL = rsv;
+ SvUTF8_off(sv);
} else {
- RETVAL = &PL_sv_undef;
+ RETVAL = &PL_sv_undef;
}
}
OUTPUT:
# Please edit the following to the taste!
my $name = '$_Name_';
my %tables = (
- $_Name__t => [ $_TableFiles_ ],
- );
+ $_Name__t => [ $_TableFiles_ ],
+ );
#### DO NOT EDIT BEYOND THIS POINT!
require File::Spec;
my ($enc2xs, $encode_h) = ();
PATHLOOP:
for my $d (@Config{qw/bin sitebin vendorbin/},
- (split /$Config{path_sep}/o, $ENV{PATH})){
+ (split /$Config{path_sep}/o, $ENV{PATH})){
for my $f (qw/enc2xs enc2xs5.7.3/){
my $path = File::Spec->catfile($d, $f);
-r $path and $enc2xs = $path and last PATHLOOP;
WriteMakefile(
INC => "-I$encode_h",
#### END_OF_HEADER -- DO NOT EDIT THIS LINE BY HAND! ####
- NAME => 'Encode::'.$name,
- VERSION_FROM => "$name.pm",
- OBJECT => '$(O_FILES)',
- 'dist' => {
- COMPRESS => 'gzip -9f',
- SUFFIX => 'gz',
- DIST_DEFAULT => 'all tardist',
- },
- MAN3PODS => {},
- PREREQ_PM => {
- 'Encode' => "1.41",
+ NAME => 'Encode::'.$name,
+ VERSION_FROM => "$name.pm",
+ OBJECT => '$(O_FILES)',
+ 'dist' => {
+ COMPRESS => 'gzip -9f',
+ SUFFIX => 'gz',
+ DIST_DEFAULT => 'all tardist',
+ },
+ MAN3PODS => {},
+ PREREQ_PM => {
+ 'Encode' => "1.41",
},
- # OS 390 winges about line numbers > 64K ???
- XSOPT => '-nolinenumbers',
- );
+ # OS 390 winges about line numbers > 64K ???
+ XSOPT => '-nolinenumbers',
+ );
package MY;
# Add the table O_FILES
foreach my $e (keys %tables)
{
- $o{$e.$x} = 1;
+ $o{$e.$x} = 1;
}
$o{"$name$x"} = 1;
$self->{'O_FILES'} = [sort keys %o];
# $self->{'H'} = [$self->catfile($self->updir,'encode.h')];
my %xs;
foreach my $table (keys %tables) {
- push (@{$self->{'C'}},"$table.c");
- # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
- # get built.
- foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
- push (@files,$table.$ext);
- }
+ push (@{$self->{'C'}},"$table.c");
+ # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
+ # get built.
+ foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
+ push (@files,$table.$ext);
+ }
}
$self->{'XS'} = { "$name.xs" => "$name.c" };
$self->{'clean'}{'FILES'} .= join(' ',@files);
#include "encode.h"
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.h"\n];
+ print XS qq[#include "${table}.h"\n];
}
print XS <<"END";
{
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.exh"\n];
+ print XS qq[#include "${table}.exh"\n];
}
print XS "}\n";
close(XS);
$str .= "$name.c : $name.xs ";
foreach my $table (keys %tables)
{
- $str .= " $table.c";
+ $str .= " $table.c";
}
$str .= "\n\n";
$str .= "$name\$(OBJ_EXT) : $name.c\n\n";
foreach my $table (keys %tables)
{
- my $numlines = 1;
- my $lengthsofar = length($str);
- my $continuator = '';
- $str .= "$table.c : Makefile.PL";
- foreach my $file (@{$tables{$table}})
- {
- $str .= $continuator.' '.$self->catfile($dir,$file);
- if ( length($str)-$lengthsofar > 128*$numlines )
- {
- $continuator .= " \\\n\t";
- $numlines++;
- } else {
- $continuator = '';
- }
- }
- my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
- my $ucopts = '-"Q"';
- $str .=
- qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
- open (FILELIST, ">$table.fnm")
- || die "Could not open $table.fnm: $!";
- foreach my $file (@{$tables{$table}})
- {
- print FILELIST $self->catfile($dir,$file) . "\n";
- }
- close(FILELIST);
+ my $numlines = 1;
+ my $lengthsofar = length($str);
+ my $continuator = '';
+ $str .= "$table.c : Makefile.PL";
+ foreach my $file (@{$tables{$table}})
+ {
+ $str .= $continuator.' '.$self->catfile($dir,$file);
+ if ( length($str)-$lengthsofar > 128*$numlines )
+ {
+ $continuator .= " \\\n\t";
+ $numlines++;
+ } else {
+ $continuator = '';
+ }
+ }
+ my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+ my $ucopts = '-"Q"';
+ $str .=
+ qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+ open (FILELIST, ">$table.fnm")
+ || die "Could not open $table.fnm: $!";
+ foreach my $file (@{$tables{$table}})
+ {
+ print FILELIST $self->catfile($dir,$file) . "\n";
+ }
+ close(FILELIST);
}
return $str;
}
struct encpage_s
{
- /* fields ordered to pack nicely on 32-bit machines */
- const U8 *const seq; /* Packed output sequences we generate
- if we match */
- const encpage_t *const next; /* Page to go to if we match */
- const U8 min; /* Min value of octet to match this entry */
- const U8 max; /* Max value of octet to match this entry */
- const U8 dlen; /* destination length -
- size of entries in seq */
- const U8 slen; /* source length -
- number of source octets needed */
+ /* fields ordered to pack nicely on 32-bit machines */
+ const U8 *const seq; /* Packed output sequences we generate
+ if we match */
+ const encpage_t *const next; /* Page to go to if we match */
+ const U8 min; /* Min value of octet to match this entry */
+ const U8 max; /* Max value of octet to match this entry */
+ const U8 dlen; /* destination length -
+ size of entries in seq */
+ const U8 slen; /* source length -
+ number of source octets needed */
};
/*
typedef struct encode_s encode_t;
struct encode_s
{
- const encpage_t *const t_utf8; /* Starting table for translation from
- the encoding to UTF-8 form */
- const encpage_t *const f_utf8; /* Starting table for translation
- from UTF-8 to the encoding */
- const U8 *const rep; /* Replacement character in this
- encoding e.g. "?" */
- int replen; /* Number of octets in rep */
- U8 min_el; /* Minimum octets to represent a
- character */
- U8 max_el; /* Maximum octets to represent a
- character */
- const char *const name[2]; /* name(s) of this encoding */
+ const encpage_t *const t_utf8; /* Starting table for translation from
+ the encoding to UTF-8 form */
+ const encpage_t *const f_utf8; /* Starting table for translation
+ from UTF-8 to the encoding */
+ const U8 *const rep; /* Replacement character in this
+ encoding e.g. "?" */
+ int replen; /* Number of octets in rep */
+ U8 min_el; /* Minimum octets to represent a
+ character */
+ U8 max_el; /* Maximum octets to represent a
+ character */
+ const char *const name[2]; /* name(s) of this encoding */
};
#ifdef U8
extern int do_encode(const encpage_t *enc, const U8 *src, STRLEN *slen,
U8 *dst, STRLEN dlen, STRLEN *dout, int approx,
- const U8 *term, STRLEN tlen);
+ const U8 *term, STRLEN tlen);
extern void Encode_DefineEncoding(encode_t *enc);
package Encode::JP;
+
BEGIN {
- if (ord("A") == 193) {
- die "Encode::JP not supported on EBCDIC\n";
+ if ( ord("A") == 193 ) {
+ die "Encode::JP not supported on EBCDIC\n";
}
}
use Encode;
-our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
-XSLoader::load(__PACKAGE__,$VERSION);
+XSLoader::load( __PACKAGE__, $VERSION );
use Encode::JP::JIS7;
--------------------------------------------------------------------
euc-jp /\beuc.*jp$/i EUC (Extended Unix Character)
/\bjp.*euc/i
- /\bujis$/i
+ /\bujis$/i
shiftjis /\bshift.*jis$/i Shift JIS (aka MS Kanji)
- /\bsjis$/i
+ /\bsjis$/i
7bit-jis /\bjis$/i 7bit JIS
iso-2022-jp ISO-2022-JP [RFC1468]
- = 7bit JIS with all Halfwidth Kana
- converted to Fullwidth
+ = 7bit JIS with all Halfwidth Kana
+ converted to Fullwidth
iso-2022-jp-1 ISO-2022-JP-1 [RFC2237]
= ISO-2022-JP with JIS X 0212-1990
- support. See below
+ support. See below
MacJapanese Shift JIS + Apple vendor mappings
cp932 /\bwindows-31j$/i Code Page 932
= Shift JIS + MS/IBM vendor mappings
use strict;
my %tables = (
- euc_jp_t => ['euc-jp.ucm'],
+ euc_jp_t => ['euc-jp.ucm'],
sjis_t => ['shiftjis.ucm',
- 'macJapanese.ucm',
- 'cp932.ucm'],
- raw_t => [
- qw(jis0201.ucm jis0208.ucm jis0212.ucm)
- ],
+ 'macJapanese.ucm',
+ 'cp932.ucm'],
+ raw_t => [
+ qw(jis0201.ucm jis0208.ucm jis0212.ucm)
+ ],
);
unless ($ENV{AGGREGATE_TABLES}){
my @ucm;
for my $k (keys %tables){
- push @ucm, @{$tables{$k}};
+ push @ucm, @{$tables{$k}};
}
%tables = ();
my $seq = 0;
for my $ucm (sort @ucm){
- # 8.3 compliance !
- my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
- $tables{$t} = [ $ucm ];
+ # 8.3 compliance !
+ my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
+ $tables{$t} = [ $ucm ];
}
}
WriteMakefile(
INC => "-I../Encode",
- NAME => 'Encode::'.$name,
- VERSION_FROM => "$name.pm",
- OBJECT => '$(O_FILES)',
- 'dist' => {
- COMPRESS => 'gzip -9f',
- SUFFIX => 'gz',
- DIST_DEFAULT => 'all tardist',
- },
- MAN3PODS => {},
- # OS 390 winges about line numbers > 64K ???
- XSOPT => '-nolinenumbers',
- );
+ NAME => 'Encode::'.$name,
+ VERSION_FROM => "$name.pm",
+ OBJECT => '$(O_FILES)',
+ 'dist' => {
+ COMPRESS => 'gzip -9f',
+ SUFFIX => 'gz',
+ DIST_DEFAULT => 'all tardist',
+ },
+ MAN3PODS => {},
+ # OS 390 winges about line numbers > 64K ???
+ XSOPT => '-nolinenumbers',
+ );
package MY;
# Add the table O_FILES
foreach my $e (keys %tables)
{
- $o{$e.$x} = 1;
+ $o{$e.$x} = 1;
}
$o{"$name$x"} = 1;
$self->{'O_FILES'} = [sort keys %o];
$self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
my %xs;
foreach my $table (keys %tables) {
- push (@{$self->{'C'}},"$table.c");
- # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
- # get built.
- foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
- push (@files,$table.$ext);
- }
- $self->{SOURCE} .= " $table.c"
- if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
+ push (@{$self->{'C'}},"$table.c");
+ # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
+ # get built.
+ foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
+ push (@files,$table.$ext);
+ }
+ $self->{SOURCE} .= " $table.c"
+ if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
}
$self->{'XS'} = { "$name.xs" => "$name.c" };
$self->{'clean'}{'FILES'} .= join(' ',@files);
#include "encode.h"
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.h"\n];
+ print XS qq[#include "${table}.h"\n];
}
print XS <<"END";
{
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.exh"\n];
+ print XS qq[#include "${table}.exh"\n];
}
print XS "}\n";
close(XS);
$str .= "$name.c : $name.xs ";
foreach my $table (keys %tables)
{
- $str .= " $table.c";
+ $str .= " $table.c";
}
$str .= "\n\n";
$str .= "$name\$(OBJ_EXT) : $name.c\n\n";
my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
foreach my $table (keys %tables)
{
- my $numlines = 1;
- my $lengthsofar = length($str);
- my $continuator = '';
- $str .= "$table.c : $enc2xs Makefile.PL";
- foreach my $file (@{$tables{$table}})
- {
- $str .= $continuator.' '.$self->catfile($dir,$file);
- if ( length($str)-$lengthsofar > 128*$numlines )
- {
- $continuator .= " \\\n\t";
- $numlines++;
- } else {
- $continuator = '';
- }
- }
- my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
- $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
- my $ucopts = '-"Q"';
- $str .=
- qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
- open (FILELIST, ">$table.fnm")
- || die "Could not open $table.fnm: $!";
- foreach my $file (@{$tables{$table}})
- {
- print FILELIST $self->catfile($dir,$file) . "\n";
- }
- close(FILELIST);
+ my $numlines = 1;
+ my $lengthsofar = length($str);
+ my $continuator = '';
+ $str .= "$table.c : $enc2xs Makefile.PL";
+ foreach my $file (@{$tables{$table}})
+ {
+ $str .= $continuator.' '.$self->catfile($dir,$file);
+ if ( length($str)-$lengthsofar > 128*$numlines )
+ {
+ $continuator .= " \\\n\t";
+ $numlines++;
+ } else {
+ $continuator = '';
+ }
+ }
+ my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+ $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
+ my $ucopts = '-"Q"';
+ $str .=
+ qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+ open (FILELIST, ">$table.fnm")
+ || die "Could not open $table.fnm: $!";
+ foreach my $file (@{$tables{$table}})
+ {
+ print FILELIST $self->catfile($dir,$file) . "\n";
+ }
+ close(FILELIST);
}
return $str;
}
package Encode::KR;
+
BEGIN {
- if (ord("A") == 193) {
- die "Encode::KR not supported on EBCDIC\n";
+ if ( ord("A") == 193 ) {
+ die "Encode::KR not supported on EBCDIC\n";
}
}
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode;
use XSLoader;
-XSLoader::load(__PACKAGE__,$VERSION);
+XSLoader::load( __PACKAGE__, $VERSION );
use Encode::KR::2022_KR;
Canonical Alias Description
--------------------------------------------------------------------
euc-kr /\beuc.*kr$/i EUC (Extended Unix Character)
- /\bkr.*euc$/i
+ /\bkr.*euc$/i
ksc5601-raw Korean standard code set (as is)
cp949 /(?:x-)?uhc$/i
/(?:x-)?windows-949$/i
use strict;
my %tables = (euc_kr_t => ['euc-kr.ucm',
- 'macKorean.ucm',
- 'cp949.ucm',
- ],
- '5601_t' => ['ksc5601.ucm'],
- johab_t => ['johab.ucm'],
+ 'macKorean.ucm',
+ 'cp949.ucm',
+ ],
+ '5601_t' => ['ksc5601.ucm'],
+ johab_t => ['johab.ucm'],
);
unless ($ENV{AGGREGATE_TABLES}){
my @ucm;
for my $k (keys %tables){
- push @ucm, @{$tables{$k}};
+ push @ucm, @{$tables{$k}};
}
%tables = ();
my $seq = 0;
for my $ucm (sort @ucm){
- # 8.3 compliance !
- my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
- $tables{$t} = [ $ucm ];
+ # 8.3 compliance !
+ my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
+ $tables{$t} = [ $ucm ];
}
}
WriteMakefile(
INC => "-I../Encode",
- NAME => 'Encode::'.$name,
- VERSION_FROM => "$name.pm",
- OBJECT => '$(O_FILES)',
- 'dist' => {
- COMPRESS => 'gzip -9f',
- SUFFIX => 'gz',
- DIST_DEFAULT => 'all tardist',
- },
- MAN3PODS => {},
- # OS 390 winges about line numbers > 64K ???
- XSOPT => '-nolinenumbers',
- );
+ NAME => 'Encode::'.$name,
+ VERSION_FROM => "$name.pm",
+ OBJECT => '$(O_FILES)',
+ 'dist' => {
+ COMPRESS => 'gzip -9f',
+ SUFFIX => 'gz',
+ DIST_DEFAULT => 'all tardist',
+ },
+ MAN3PODS => {},
+ # OS 390 winges about line numbers > 64K ???
+ XSOPT => '-nolinenumbers',
+ );
package MY;
# Add the table O_FILES
foreach my $e (keys %tables)
{
- $o{$e.$x} = 1;
+ $o{$e.$x} = 1;
}
$o{"$name$x"} = 1;
$self->{'O_FILES'} = [sort keys %o];
$self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
my %xs;
foreach my $table (keys %tables) {
- push (@{$self->{'C'}},"$table.c");
- # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
- # get built.
- foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
- push (@files,$table.$ext);
- }
- $self->{SOURCE} .= " $table.c"
- if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
+ push (@{$self->{'C'}},"$table.c");
+ # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
+ # get built.
+ foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
+ push (@files,$table.$ext);
+ }
+ $self->{SOURCE} .= " $table.c"
+ if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
}
$self->{'XS'} = { "$name.xs" => "$name.c" };
$self->{'clean'}{'FILES'} .= join(' ',@files);
#include "encode.h"
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.h"\n];
+ print XS qq[#include "${table}.h"\n];
}
print XS <<"END";
{
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.exh"\n];
+ print XS qq[#include "${table}.exh"\n];
}
print XS "}\n";
close(XS);
$str .= "$name.c : $name.xs ";
foreach my $table (keys %tables)
{
- $str .= " $table.c";
+ $str .= " $table.c";
}
$str .= "\n\n";
$str .= "$name\$(OBJ_EXT) : $name.c\n\n";
my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
foreach my $table (keys %tables)
{
- my $numlines = 1;
- my $lengthsofar = length($str);
- my $continuator = '';
- $str .= "$table.c : $enc2xs Makefile.PL";
- foreach my $file (@{$tables{$table}})
- {
- $str .= $continuator.' '.$self->catfile($dir,$file);
- if ( length($str)-$lengthsofar > 128*$numlines )
- {
- $continuator .= " \\\n\t";
- $numlines++;
- } else {
- $continuator = '';
- }
- }
- my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
- $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
- my $ucopts = '-"Q"';
- $str .=
- qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
- open (FILELIST, ">$table.fnm")
- || die "Could not open $table.fnm: $!";
- foreach my $file (@{$tables{$table}})
- {
- print FILELIST $self->catfile($dir,$file) . "\n";
- }
- close(FILELIST);
+ my $numlines = 1;
+ my $lengthsofar = length($str);
+ my $continuator = '';
+ $str .= "$table.c : $enc2xs Makefile.PL";
+ foreach my $file (@{$tables{$table}})
+ {
+ $str .= $continuator.' '.$self->catfile($dir,$file);
+ if ( length($str)-$lengthsofar > 128*$numlines )
+ {
+ $continuator .= " \\\n\t";
+ $numlines++;
+ } else {
+ $continuator = '';
+ }
+ }
+ my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+ $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
+ my $ucopts = '-"Q"';
+ $str .=
+ qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+ open (FILELIST, ">$table.fnm")
+ || die "Could not open $table.fnm: $!";
+ foreach my $file (@{$tables{$table}})
+ {
+ print FILELIST $self->catfile($dir,$file) . "\n";
+ }
+ close(FILELIST);
}
return $str;
}
my %tables =
(
def_t => ['ascii.ucm',
- '8859-1.ucm',
- 'null.ucm',
- 'ctrl.ucm',
- ]
+ '8859-1.ucm',
+ 'null.ucm',
+ 'ctrl.ucm',
+ ]
);
my @exe_files = qw(bin/enc2xs
- bin/piconv
- );
+ bin/piconv
+ );
my @more_exe_files = qw(
- unidump
- );
+ unidump
+ );
my @pmlibdirs = qw(lib Encode);
$ARGV{MORE_SCRIOPTS} and push @exe_files, @more_exe_files;
$ARGV{INSTALL_UCM} and push @pmlibdirs, "ucm";
WriteMakefile(
- NAME => "Encode",
- EXE_FILES => \@exe_files,
- VERSION_FROM => 'Encode.pm',
- OBJECT => '$(O_FILES)',
- 'dist' => {
- COMPRESS => 'gzip -9f',
- SUFFIX => 'gz',
- DIST_DEFAULT => 'all tardist',
- },
- MAN1PODS => {},
- MAN3PODS => {},
- INC => "-I./Encode",
- PMLIBDIRS => \@pmlibdirs,
- INSTALLDIRS => 'perl',
- );
+ NAME => "Encode",
+ EXE_FILES => \@exe_files,
+ VERSION_FROM => 'Encode.pm',
+ OBJECT => '$(O_FILES)',
+ 'dist' => {
+ COMPRESS => 'gzip -9f',
+ SUFFIX => 'gz',
+ DIST_DEFAULT => 'all tardist',
+ },
+ MAN1PODS => {},
+ MAN3PODS => {},
+ INC => "-I./Encode",
+ PMLIBDIRS => \@pmlibdirs,
+ INSTALLDIRS => 'perl',
+ );
package MY;
# Find existing O_FILES
foreach my $f (@{$self->{'O_FILES'}})
{
- $o{$f} = 1;
+ $o{$f} = 1;
}
my $x = $self->{'OBJ_EXT'};
# Add the table O_FILES
foreach my $e (keys %tables)
{
- $o{$e.$x} = 1;
+ $o{$e.$x} = 1;
}
# Trick case-blind filesystems.
delete $o{'encode'.$x};
my @files;
foreach my $table (keys %tables)
{
- foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm))
+ foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm))
{
- push (@files,$table.$ext);
+ push (@files,$table.$ext);
}
$self->{SOURCE} .= " $table.c"
- if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
+ if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
}
$self->{'clean'}{'FILES'} .= join(' ',@files);
return '';
$str .= ' Encode.c';
foreach my $table (keys %tables)
{
- $str .= " $table.c";
+ $str .= " $table.c";
}
$str .= "\n\n";
foreach my $table (keys %tables)
{
- my $numlines = 1;
- my $lengthsofar = length($str);
- my $continuator = '';
- my $enc2xs = $self->catfile('bin', 'enc2xs');
- $str .= "$table.c : $enc2xs Makefile.PL";
- foreach my $file (@{$tables{$table}})
- {
- $str .= $continuator.' '.$self->catfile($dir,$file);
- if ( length($str)-$lengthsofar > 128*$numlines )
- {
- $continuator .= " \\\n\t";
- $numlines++;
- } else {
- $continuator = '';
- }
- }
- my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
- $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
- my $ucopts = '-"Q" -"O"';
- $str .=
- qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
- open (FILELIST, ">$table.fnm")
- || die "Could not open $table.fnm: $!";
- foreach my $file (@{$tables{$table}})
- {
- print FILELIST $self->catfile($dir,$file) . "\n";
- }
- close(FILELIST);
+ my $numlines = 1;
+ my $lengthsofar = length($str);
+ my $continuator = '';
+ my $enc2xs = $self->catfile('bin', 'enc2xs');
+ $str .= "$table.c : $enc2xs Makefile.PL";
+ foreach my $file (@{$tables{$table}})
+ {
+ $str .= $continuator.' '.$self->catfile($dir,$file);
+ if ( length($str)-$lengthsofar > 128*$numlines )
+ {
+ $continuator .= " \\\n\t";
+ $numlines++;
+ } else {
+ $continuator = '';
+ }
+ }
+ my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+ $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
+ my $ucopts = '-"Q" -"O"';
+ $str .=
+ qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+ open (FILELIST, ">$table.fnm")
+ || die "Could not open $table.fnm: $!";
+ foreach my $file (@{$tables{$table}})
+ {
+ print FILELIST $self->catfile($dir,$file) . "\n";
+ }
+ close(FILELIST);
}
return $str;
}
my $name = 'Symbol';
my %tables = (
- symbol_t => [qw(
- symbol.ucm
- dingbats.ucm
- adobeSymbol.ucm
- adobeZdingbat.ucm
- macSymbol.ucm
- macDingbats.ucm
- )
- ],
- );
+ symbol_t => [qw(
+ symbol.ucm
+ dingbats.ucm
+ adobeSymbol.ucm
+ adobeZdingbat.ucm
+ macSymbol.ucm
+ macDingbats.ucm
+ )
+ ],
+ );
WriteMakefile(
INC => "-I../Encode",
- NAME => 'Encode::'.$name,
- VERSION_FROM => "$name.pm",
- OBJECT => '$(O_FILES)',
- 'dist' => {
- COMPRESS => 'gzip -9f',
- SUFFIX => 'gz',
- DIST_DEFAULT => 'all tardist',
- },
- MAN3PODS => {},
- # OS 390 winges about line numbers > 64K ???
- XSOPT => '-nolinenumbers',
- );
+ NAME => 'Encode::'.$name,
+ VERSION_FROM => "$name.pm",
+ OBJECT => '$(O_FILES)',
+ 'dist' => {
+ COMPRESS => 'gzip -9f',
+ SUFFIX => 'gz',
+ DIST_DEFAULT => 'all tardist',
+ },
+ MAN3PODS => {},
+ # OS 390 winges about line numbers > 64K ???
+ XSOPT => '-nolinenumbers',
+ );
package MY;
# Add the table O_FILES
foreach my $e (keys %tables)
{
- $o{$e.$x} = 1;
+ $o{$e.$x} = 1;
}
$o{"$name$x"} = 1;
$self->{'O_FILES'} = [sort keys %o];
$self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
my %xs;
foreach my $table (keys %tables) {
- push (@{$self->{'C'}},"$table.c");
- # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
- # get built.
- foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
- push (@files,$table.$ext);
- }
- $self->{SOURCE} .= " $table.c"
- if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
+ push (@{$self->{'C'}},"$table.c");
+ # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
+ # get built.
+ foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
+ push (@files,$table.$ext);
+ }
+ $self->{SOURCE} .= " $table.c"
+ if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
}
$self->{'XS'} = { "$name.xs" => "$name.c" };
$self->{'clean'}{'FILES'} .= join(' ',@files);
#include "encode.h"
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.h"\n];
+ print XS qq[#include "${table}.h"\n];
}
print XS <<"END";
{
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.exh"\n];
+ print XS qq[#include "${table}.exh"\n];
}
print XS "}\n";
close(XS);
$str .= "$name.c : $name.xs ";
foreach my $table (keys %tables)
{
- $str .= " $table.c";
+ $str .= " $table.c";
}
$str .= "\n\n";
$str .= "$name\$(OBJ_EXT) : $name.c\n\n";
my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
foreach my $table (keys %tables)
{
- my $numlines = 1;
- my $lengthsofar = length($str);
- my $continuator = '';
- $str .= "$table.c : $enc2xs Makefile.PL";
- foreach my $file (@{$tables{$table}})
- {
- $str .= $continuator.' '.$self->catfile($dir,$file);
- if ( length($str)-$lengthsofar > 128*$numlines )
- {
- $continuator .= " \\\n\t";
- $numlines++;
- } else {
- $continuator = '';
- }
- }
- my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
- $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
- my $ucopts = '-"Q" -"O"';
- $str .=
- qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
- open (FILELIST, ">$table.fnm")
- || die "Could not open $table.fnm: $!";
- foreach my $file (@{$tables{$table}})
- {
- print FILELIST $self->catfile($dir,$file) . "\n";
- }
- close(FILELIST);
+ my $numlines = 1;
+ my $lengthsofar = length($str);
+ my $continuator = '';
+ $str .= "$table.c : $enc2xs Makefile.PL";
+ foreach my $file (@{$tables{$table}})
+ {
+ $str .= $continuator.' '.$self->catfile($dir,$file);
+ if ( length($str)-$lengthsofar > 128*$numlines )
+ {
+ $continuator .= " \\\n\t";
+ $numlines++;
+ } else {
+ $continuator = '';
+ }
+ }
+ my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+ $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
+ my $ucopts = '-"Q" -"O"';
+ $str .=
+ qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+ open (FILELIST, ">$table.fnm")
+ || die "Could not open $table.fnm: $!";
+ foreach my $file (@{$tables{$table}})
+ {
+ print FILELIST $self->catfile($dir,$file) . "\n";
+ }
+ close(FILELIST);
}
return $str;
}
package Encode::Symbol;
use Encode;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
-XSLoader::load(__PACKAGE__,$VERSION);
+XSLoader::load( __PACKAGE__, $VERSION );
1;
__END__
use strict;
my %tables = (big5_t => ['big5-eten.ucm',
- 'big5-hkscs.ucm',
- 'macChintrad.ucm',
- 'cp950.ucm'],
+ 'big5-hkscs.ucm',
+ 'macChintrad.ucm',
+ 'cp950.ucm'],
);
unless ($ENV{AGGREGATE_TABLES}){
my @ucm;
for my $k (keys %tables){
- push @ucm, @{$tables{$k}};
+ push @ucm, @{$tables{$k}};
}
%tables = ();
my $seq = 0;
for my $ucm (sort @ucm){
- # 8.3 compliance !
- my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
- $tables{$t} = [ $ucm ];
+ # 8.3 compliance !
+ my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
+ $tables{$t} = [ $ucm ];
}
}
WriteMakefile(
INC => "-I../Encode",
- NAME => 'Encode::'.$name,
- VERSION_FROM => "$name.pm",
- OBJECT => '$(O_FILES)',
- 'dist' => {
- COMPRESS => 'gzip -9f',
- SUFFIX => 'gz',
- DIST_DEFAULT => 'all tardist',
- },
- MAN3PODS => {},
- # OS 390 winges about line numbers > 64K ???
- XSOPT => '-nolinenumbers',
- );
+ NAME => 'Encode::'.$name,
+ VERSION_FROM => "$name.pm",
+ OBJECT => '$(O_FILES)',
+ 'dist' => {
+ COMPRESS => 'gzip -9f',
+ SUFFIX => 'gz',
+ DIST_DEFAULT => 'all tardist',
+ },
+ MAN3PODS => {},
+ # OS 390 winges about line numbers > 64K ???
+ XSOPT => '-nolinenumbers',
+ );
package MY;
# Add the table O_FILES
foreach my $e (keys %tables)
{
- $o{$e.$x} = 1;
+ $o{$e.$x} = 1;
}
$o{"$name$x"} = 1;
$self->{'O_FILES'} = [sort keys %o];
$self->{'H'} = [$self->catfile($self->updir,'Encode', 'encode.h')];
my %xs;
foreach my $table (keys %tables) {
- push (@{$self->{'C'}},"$table.c");
- # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
- # get built.
- foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
- push (@files,$table.$ext);
- }
- $self->{SOURCE} .= " $table.c"
- if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
+ push (@{$self->{'C'}},"$table.c");
+ # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
+ # get built.
+ foreach my $ext (qw($(OBJ_EXT) .c .h .exh .fnm)) {
+ push (@files,$table.$ext);
+ }
+ $self->{SOURCE} .= " $table.c"
+ if $^O eq 'MacOS' && $self->{SOURCE} !~ /\b$table\.c\b/;
}
$self->{'XS'} = { "$name.xs" => "$name.c" };
$self->{'clean'}{'FILES'} .= join(' ',@files);
#include "encode.h"
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.h"\n];
+ print XS qq[#include "${table}.h"\n];
}
print XS <<"END";
{
END
foreach my $table (keys %tables) {
- print XS qq[#include "${table}.exh"\n];
+ print XS qq[#include "${table}.exh"\n];
}
print XS "}\n";
close(XS);
$str .= "$name.c : $name.xs ";
foreach my $table (keys %tables)
{
- $str .= " $table.c";
+ $str .= " $table.c";
}
$str .= "\n\n";
$str .= "$name\$(OBJ_EXT) : $name.c\n\n";
my $enc2xs = $self->catfile($self->updir,'bin', 'enc2xs');
foreach my $table (keys %tables)
{
- my $numlines = 1;
- my $lengthsofar = length($str);
- my $continuator = '';
- $str .= "$table.c : $enc2xs Makefile.PL";
- foreach my $file (@{$tables{$table}})
- {
- $str .= $continuator.' '.$self->catfile($dir,$file);
- if ( length($str)-$lengthsofar > 128*$numlines )
- {
- $continuator .= " \\\n\t";
- $numlines++;
- } else {
- $continuator = '';
- }
- }
- my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
- $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
- my $ucopts = '-"Q"';
- $str .=
- qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
- open (FILELIST, ">$table.fnm")
- || die "Could not open $table.fnm: $!";
- foreach my $file (@{$tables{$table}})
- {
- print FILELIST $self->catfile($dir,$file) . "\n";
- }
- close(FILELIST);
+ my $numlines = 1;
+ my $lengthsofar = length($str);
+ my $continuator = '';
+ $str .= "$table.c : $enc2xs Makefile.PL";
+ foreach my $file (@{$tables{$table}})
+ {
+ $str .= $continuator.' '.$self->catfile($dir,$file);
+ if ( length($str)-$lengthsofar > 128*$numlines )
+ {
+ $continuator .= " \\\n\t";
+ $numlines++;
+ } else {
+ $continuator = '';
+ }
+ }
+ my $plib = $self->{PERL_CORE} ? '"-I$(PERL_LIB)"' : '';
+ $plib .= " -MCross=$::Cross::platform" if defined $::Cross::platform;
+ my $ucopts = '-"Q"';
+ $str .=
+ qq{\n\t\$(PERL) $plib $enc2xs $ucopts -o \$\@ -f $table.fnm\n\n};
+ open (FILELIST, ">$table.fnm")
+ || die "Could not open $table.fnm: $!";
+ foreach my $file (@{$tables{$table}})
+ {
+ print FILELIST $self->catfile($dir,$file) . "\n";
+ }
+ close(FILELIST);
}
return $str;
}
package Encode::TW;
+
BEGIN {
- if (ord("A") == 193) {
- die "Encode::TW not supported on EBCDIC\n";
+ if ( ord("A") == 193 ) {
+ die "Encode::TW not supported on EBCDIC\n";
}
}
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode;
use XSLoader;
-XSLoader::load(__PACKAGE__,$VERSION);
+XSLoader::load( __PACKAGE__, $VERSION );
1;
__END__
Canonical Alias Description
--------------------------------------------------------------------
big5-eten /\bbig-?5$/i Big5 encoding (with ETen extensions)
- /\bbig5-?et(en)?$/i
- /\btca-?big5$/i
+ /\bbig5-?et(en)?$/i
+ /\btca-?big5$/i
big5-hkscs /\bbig5-?hk(scs)?$/i
/\bhk(scs)?-?big5$/i
Big5 + Cantonese characters in Hong Kong
WriteMakefile(
INC => "-I../Encode",
- NAME => 'Encode::Unicode',
- VERSION_FROM => "Unicode.pm",
- MAN3PODS => {},
- );
+ NAME => 'Encode::Unicode',
+ VERSION_FROM => "Unicode.pm",
+ MAN3PODS => {},
+ );
use warnings;
no warnings 'redefine';
-our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
-XSLoader::load(__PACKAGE__,$VERSION);
+XSLoader::load( __PACKAGE__, $VERSION );
#
# Object Generator 8 transcoders all at once!
require Encode;
-our %BOM_Unknown = map {$_ => 1} qw(UTF-16 UTF-32);
+our %BOM_Unknown = map { $_ => 1 } qw(UTF-16 UTF-32);
-for my $name (qw(UTF-16 UTF-16BE UTF-16LE
- UTF-32 UTF-32BE UTF-32LE
- UCS-2BE UCS-2LE))
+for my $name (
+ qw(UTF-16 UTF-16BE UTF-16LE
+ UTF-32 UTF-32BE UTF-32LE
+ UCS-2BE UCS-2LE)
+ )
{
- my ($size, $endian, $ucs2, $mask);
+ my ( $size, $endian, $ucs2, $mask );
$name =~ /^(\w+)-(\d+)(\w*)$/o;
- if ($ucs2 = ($1 eq 'UCS')){
- $size = 2;
- }else{
- $size = $2/8;
+ if ( $ucs2 = ( $1 eq 'UCS' ) ) {
+ $size = 2;
}
- $endian = ($3 eq 'BE') ? 'n' : ($3 eq 'LE') ? 'v' : '' ;
+ else {
+ $size = $2 / 8;
+ }
+ $endian = ( $3 eq 'BE' ) ? 'n' : ( $3 eq 'LE' ) ? 'v' : '';
$size == 4 and $endian = uc($endian);
- $Encode::Encoding{$name} =
- bless {
- Name => $name,
- size => $size,
- endian => $endian,
- ucs2 => $ucs2,
- } => __PACKAGE__;
+ $Encode::Encoding{$name} = bless {
+ Name => $name,
+ size => $size,
+ endian => $endian,
+ ucs2 => $ucs2,
+ } => __PACKAGE__;
}
use base qw(Encode::Encoding);
-sub renew {
+sub renew {
my $self = shift;
- $BOM_Unknown{$self->name} or return $self;
- my $clone = bless { %$self } => ref($self);
- $clone->{renewed}++; # so the caller knows it is renewed.
+ $BOM_Unknown{ $self->name } or return $self;
+ my $clone = bless {%$self} => ref($self);
+ $clone->{renewed}++; # so the caller knows it is renewed.
return $clone;
}
/*
- $Id: Unicode.xs,v 2.2 2006/04/06 15:44:11 dankogai Exp dankogai $
+ $Id: Unicode.xs,v 2.3 2006/05/03 18:24:10 dankogai Exp $
*/
#define PERL_NO_GET_CONTEXT
U8 *s = *sp;
UV v = 0;
if (s+size > e) {
- croak("Partial character %c",(char) endian);
+ croak("Partial character %c",(char) endian);
}
switch(endian) {
case 'N':
- v = *s++;
- v = (v << 8) | *s++;
+ v = *s++;
+ v = (v << 8) | *s++;
case 'n':
- v = (v << 8) | *s++;
- v = (v << 8) | *s++;
- break;
+ v = (v << 8) | *s++;
+ v = (v << 8) | *s++;
+ break;
case 'V':
case 'v':
- v |= *s++;
- v |= (*s++ << 8);
- if (endian == 'v')
- break;
- v |= (*s++ << 16);
- v |= (*s++ << 24);
- break;
+ v |= *s++;
+ v |= (*s++ << 8);
+ if (endian == 'v')
+ break;
+ v |= (*s++ << 16);
+ v |= (*s++ << 24);
+ break;
default:
- croak("Unknown endian %c",(char) endian);
- break;
+ croak("Unknown endian %c",(char) endian);
+ break;
}
*sp = s;
return v;
switch(endian) {
case 'v':
case 'V':
- d += SvCUR(result);
- SvCUR_set(result,SvCUR(result)+size);
- while (size--) {
- *d++ = (U8)(value & 0xFF);
- value >>= 8;
- }
- break;
+ d += SvCUR(result);
+ SvCUR_set(result,SvCUR(result)+size);
+ while (size--) {
+ *d++ = (U8)(value & 0xFF);
+ value >>= 8;
+ }
+ break;
case 'n':
case 'N':
- SvCUR_set(result,SvCUR(result)+size);
- d += SvCUR(result);
- while (size--) {
- *--d = (U8)(value & 0xFF);
- value >>= 8;
- }
- break;
+ SvCUR_set(result,SvCUR(result)+size);
+ d += SvCUR(result);
+ while (size--) {
+ *--d = (U8)(value & 0xFF);
+ value >>= 8;
+ }
+ break;
default:
- croak("Unknown endian %c",(char) endian);
- break;
+ croak("Unknown endian %c",(char) endian);
+ break;
}
}
SvUTF8_on(result);
if (!endian && s+size <= e) {
- UV bom;
- endian = (size == 4) ? 'N' : 'n';
- bom = enc_unpack(aTHX_ &s,e,size,endian);
+ UV bom;
+ endian = (size == 4) ? 'N' : 'n';
+ bom = enc_unpack(aTHX_ &s,e,size,endian);
if (bom != BOM_BE) {
- if (bom == BOM16LE) {
- endian = 'v';
- }
- else if (bom == BOM32LE) {
- endian = 'V';
- }
- else {
- croak("%"SVf":Unrecognised BOM %"UVxf,
+ if (bom == BOM16LE) {
+ endian = 'v';
+ }
+ else if (bom == BOM32LE) {
+ endian = 'V';
+ }
+ else {
+ croak("%"SVf":Unrecognised BOM %"UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
- bom);
- }
- }
+ bom);
+ }
+ }
#if 1
- /* Update endian for next sequence */
- if (renewed) {
- hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
- }
+ /* Update endian for next sequence */
+ if (renewed) {
+ hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+ }
#endif
}
while (s < e && s+size <= e) {
- UV ord = enc_unpack(aTHX_ &s,e,size,endian);
- U8 *d;
- if (issurrogate(ord)) {
- if (ucs2 || size == 4) {
- if (check) {
- croak("%"SVf":no surrogates allowed %"UVxf,
- *hv_fetch((HV *)SvRV(obj),"Name",4,0),
- ord);
- }
- if (s+size <= e) {
+ UV ord = enc_unpack(aTHX_ &s,e,size,endian);
+ U8 *d;
+ if (issurrogate(ord)) {
+ if (ucs2 || size == 4) {
+ if (check) {
+ croak("%"SVf":no surrogates allowed %"UVxf,
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+ ord);
+ }
+ if (s+size <= e) {
/* skip the next one as well */
- enc_unpack(aTHX_ &s,e,size,endian);
- }
- ord = FBCHAR;
- }
- else {
- UV lo;
- if (!isHiSurrogate(ord)) {
- if (check) {
- croak("%"SVf":Malformed HI surrogate %"UVxf,
- *hv_fetch((HV *)SvRV(obj),"Name",4,0),
- ord);
- }
- else {
- ord = FBCHAR;
- }
- }
- else {
- if (s+size > e) {
- /* Partial character */
- s -= size; /* back up to 1st half */
- break; /* And exit loop */
- }
- lo = enc_unpack(aTHX_ &s,e,size,endian);
- if (!isLoSurrogate(lo)){
- if (check) {
- croak("%"SVf":Malformed LO surrogate %"UVxf,
- *hv_fetch((HV *)SvRV(obj),"Name",4,0),
- ord);
- }
- else {
- ord = FBCHAR;
- }
- }
- else {
- ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
- }
- }
- }
- }
+ enc_unpack(aTHX_ &s,e,size,endian);
+ }
+ ord = FBCHAR;
+ }
+ else {
+ UV lo;
+ if (!isHiSurrogate(ord)) {
+ if (check) {
+ croak("%"SVf":Malformed HI surrogate %"UVxf,
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+ ord);
+ }
+ else {
+ ord = FBCHAR;
+ }
+ }
+ else {
+ if (s+size > e) {
+ /* Partial character */
+ s -= size; /* back up to 1st half */
+ break; /* And exit loop */
+ }
+ lo = enc_unpack(aTHX_ &s,e,size,endian);
+ if (!isLoSurrogate(lo)){
+ if (check) {
+ croak("%"SVf":Malformed LO surrogate %"UVxf,
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+ ord);
+ }
+ else {
+ ord = FBCHAR;
+ }
+ }
+ else {
+ ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
+ }
+ }
+ }
+ }
- if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) {
- if (check) {
- croak("%"SVf":Unicode character %"UVxf" is illegal",
- *hv_fetch((HV *)SvRV(obj),"Name",4,0),
- ord);
- } else {
- ord = FBCHAR;
- }
- }
+ if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) {
+ if (check) {
+ croak("%"SVf":Unicode character %"UVxf" is illegal",
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0),
+ ord);
+ } else {
+ ord = FBCHAR;
+ }
+ }
- d = (U8 *) SvGROW(result,SvCUR(result)+UTF8_MAXLEN+1);
- d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
- SvCUR_set(result,d - (U8 *)SvPVX(result));
+ d = (U8 *) SvGROW(result,SvCUR(result)+UTF8_MAXLEN+1);
+ d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
+ SvCUR_set(result,d - (U8 *)SvPVX(result));
}
if (s < e) {
- /* unlikely to happen because it's fixed-length -- dankogai */
- if (check & ENCODE_WARN_ON_ERR){
- Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
- *hv_fetch((HV *)SvRV(obj),"Name",4,0));
- }
+ /* unlikely to happen because it's fixed-length -- dankogai */
+ if (check & ENCODE_WARN_ON_ERR){
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0));
+ }
}
if (check && !(check & ENCODE_LEAVE_SRC)){
- if (s < e) {
- Move(s,SvPVX(str),e-s,U8);
- SvCUR_set(str,(e-s));
- }
- else {
- SvCUR_set(str,0);
- }
- *SvEND(str) = '\0';
+ if (s < e) {
+ Move(s,SvPVX(str),e-s,U8);
+ SvCUR_set(str,(e-s));
+ }
+ else {
+ SvCUR_set(str,0);
+ }
+ *SvEND(str) = '\0';
}
XSRETURN(1);
}
U8 *e = (U8 *)SvEND(utf8);
ST(0) = sv_2mortal(result);
if (!endian) {
- endian = (size == 4) ? 'N' : 'n';
- enc_pack(aTHX_ result,size,endian,BOM_BE);
+ endian = (size == 4) ? 'N' : 'n';
+ enc_pack(aTHX_ result,size,endian,BOM_BE);
#if 1
- /* Update endian for next sequence */
- if (renewed){
- hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
- }
+ /* Update endian for next sequence */
+ if (renewed){
+ hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+ }
#endif
}
while (s < e && s+UTF8SKIP(s) <= e) {
- STRLEN len;
- UV ord = utf8n_to_uvuni(s, e-s, &len, 0);
+ STRLEN len;
+ UV ord = utf8n_to_uvuni(s, e-s, &len, 0);
s += len;
- if (size != 4 && invalid_ucs2(ord)) {
- if (!issurrogate(ord)){
- if (ucs2) {
- if (check) {
- croak("%"SVf":code point \"\\x{%"UVxf"}\" too high",
- *hv_fetch((HV *)SvRV(obj),"Name",4,0),ord);
- }
- enc_pack(aTHX_ result,size,endian,FBCHAR);
- }else{
- UV hi = ((ord - 0x10000) >> 10) + 0xD800;
- UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00;
- enc_pack(aTHX_ result,size,endian,hi);
- enc_pack(aTHX_ result,size,endian,lo);
- }
- }
- else {
- /* not supposed to happen */
- enc_pack(aTHX_ result,size,endian,FBCHAR);
- }
- }
- else {
- enc_pack(aTHX_ result,size,endian,ord);
- }
+ if (size != 4 && invalid_ucs2(ord)) {
+ if (!issurrogate(ord)){
+ if (ucs2) {
+ if (check) {
+ croak("%"SVf":code point \"\\x{%"UVxf"}\" too high",
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0),ord);
+ }
+ enc_pack(aTHX_ result,size,endian,FBCHAR);
+ }else{
+ UV hi = ((ord - 0x10000) >> 10) + 0xD800;
+ UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00;
+ enc_pack(aTHX_ result,size,endian,hi);
+ enc_pack(aTHX_ result,size,endian,lo);
+ }
+ }
+ else {
+ /* not supposed to happen */
+ enc_pack(aTHX_ result,size,endian,FBCHAR);
+ }
+ }
+ else {
+ enc_pack(aTHX_ result,size,endian,ord);
+ }
}
if (s < e) {
- /* UTF-8 partial char happens often on PerlIO.
- Since this is okay and normal, we do not warn.
- But this is critical when you choose to LEAVE_SRC
- in which case we die */
- if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)){
- Perl_croak(aTHX_ "%"SVf":partial character is not allowed "
- "when CHECK = 0x%" UVuf,
- *hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
- }
-
+ /* UTF-8 partial char happens often on PerlIO.
+ Since this is okay and normal, we do not warn.
+ But this is critical when you choose to LEAVE_SRC
+ in which case we die */
+ if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)){
+ Perl_croak(aTHX_ "%"SVf":partial character is not allowed "
+ "when CHECK = 0x%" UVuf,
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
+ }
+
}
if (check && !(check & ENCODE_LEAVE_SRC)){
- if (s < e) {
- Move(s,SvPVX(utf8),e-s,U8);
- SvCUR_set(utf8,(e-s));
- }
- else {
- SvCUR_set(utf8,0);
- }
- *SvEND(utf8) = '\0';
+ if (s < e) {
+ Move(s,SvPVX(utf8),e-s,U8);
+ SvCUR_set(utf8,(e-s));
+ }
+ else {
+ SvCUR_set(utf8,0);
+ }
+ *SvEND(utf8) = '\0';
}
XSRETURN(1);
}
use warnings;
use Getopt::Std;
my @orig_ARGV = @ARGV;
-our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
# These may get re-ordered.
# RAW is a do_now as inserted by &enter
my $sym = "${enc}_encoding";
$sym =~ s/\W+/_/g;
my @info = ($e2u->{Cname},$u2e->{Cname},"${sym}_rep_character",$replen,
- $min_el,$max_el);
+ $min_el,$max_el);
print C "static const U8 ${sym}_rep_character[] = \"$rep\";\n";
print C "static const char ${sym}_enc_name[] = \"$enc\";\n\n";
print C "const encode_t $sym = \n";
# This is to make null encoding work -- dankogai
for (my $i = (scalar @info) - 1; $i >= 0; --$i){
- $info[$i] ||= 1;
+ $info[$i] ||= 1;
}
# end of null tweak -- dankogai
print C " {",join(',',@info,"{${sym}_enc_name,(const char *)0}"),"};\n\n";
$strings_in_acc{$s} = $index;
} else {
OPTIMISER: {
- if ($opt{'O'}) {
- my $sublength = length $s;
- while (--$sublength > 0) {
- # progressively lop characters off the end, to see if the start of
- # the new string overlaps the end of the accumulator.
- if (substr ($string_acc, -$sublength)
- eq substr ($s, 0, $sublength)) {
- $subsave += $sublength;
- $strings_in_acc{$s} = length ($string_acc) - $sublength;
- # append the last bit on the end.
- $string_acc .= substr ($s, $sublength);
- last OPTIMISER;
- }
- # or if the end of the new string overlaps the start of the
- # accumulator
- next unless substr ($string_acc, 0, $sublength)
- eq substr ($s, -$sublength);
- # well, the last $sublength characters of the accumulator match.
- # so as we're prepending to the accumulator, need to shift all our
- # existing offsets forwards
- $_ += $sublength foreach values %strings_in_acc;
- $subsave += $sublength;
- $strings_in_acc{$s} = 0;
- # append the first bit on the start.
- $string_acc = substr ($s, 0, -$sublength) . $string_acc;
- last OPTIMISER;
- }
- }
- # Optimiser (if it ran) found nothing, so just going have to tack the
- # whole thing on the end.
- $strings_in_acc{$s} = length $string_acc;
- $string_acc .= $s;
+ if ($opt{'O'}) {
+ my $sublength = length $s;
+ while (--$sublength > 0) {
+ # progressively lop characters off the end, to see if the start of
+ # the new string overlaps the end of the accumulator.
+ if (substr ($string_acc, -$sublength)
+ eq substr ($s, 0, $sublength)) {
+ $subsave += $sublength;
+ $strings_in_acc{$s} = length ($string_acc) - $sublength;
+ # append the last bit on the end.
+ $string_acc .= substr ($s, $sublength);
+ last OPTIMISER;
+ }
+ # or if the end of the new string overlaps the start of the
+ # accumulator
+ next unless substr ($string_acc, 0, $sublength)
+ eq substr ($s, -$sublength);
+ # well, the last $sublength characters of the accumulator match.
+ # so as we're prepending to the accumulator, need to shift all our
+ # existing offsets forwards
+ $_ += $sublength foreach values %strings_in_acc;
+ $subsave += $sublength;
+ $strings_in_acc{$s} = 0;
+ # append the first bit on the start.
+ $string_acc = substr ($s, 0, -$sublength) . $string_acc;
+ last OPTIMISER;
+ }
+ }
+ # Optimiser (if it ran) found nothing, so just going have to tack the
+ # whole thing on the end.
+ $strings_in_acc{$s} = length $string_acc;
+ $string_acc .= $s;
};
}
}
eval { require File::Find; };
my (@inc, %e2x_dir);
for my $inc (@INC){
- push @inc, $inc unless $inc eq '.'; #skip current dir
+ push @inc, $inc unless $inc eq '.'; #skip current dir
}
File::Find::find(
- sub {
- my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,$blksize,$blocks)
- = lstat($_) or return;
- -f _ or return;
- if (/^.*\.e2x$/o){
- no warnings 'once';
- $e2x_dir{$File::Find::dir} ||= $mtime;
- }
- return;
- }, @inc);
+ sub {
+ my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
+ $atime,$mtime,$ctime,$blksize,$blocks)
+ = lstat($_) or return;
+ -f _ or return;
+ if (/^.*\.e2x$/o){
+ no warnings 'once';
+ $e2x_dir{$File::Find::dir} ||= $mtime;
+ }
+ return;
+ }, @inc);
warn join("\n", keys %e2x_dir), "\n";
for my $d (sort {$e2x_dir{$a} <=> $e2x_dir{$b}} keys %e2x_dir){
- $_E2X = $d;
- # warn "$_E2X => ", scalar localtime($e2x_dir{$d});
- return $_E2X;
+ $_E2X = $d;
+ # warn "$_E2X => ", scalar localtime($e2x_dir{$d});
+ return $_E2X;
}
}
}
use vars qw(
- $_ModLines
- $_LocalVer
- );
+ $_ModLines
+ $_LocalVer
+ );
sub make_configlocal_pm
{
my %in_core = map {$_=>1}('ascii','iso-8859-1','utf8');
my %LocalMod = ();
for my $d (@INC){
- my $inc = File::Spec->catfile($d, "Encode");
- -d $inc or next;
- opendir my $dh, $inc or die "$inc:$!";
- warn "Checking $inc...\n";
- for my $f (grep /\.pm$/o, readdir($dh)){
- -f File::Spec->catfile($inc, "$f") or next;
- $INC{"Encode/$f"} and next;
- warn "require Encode/$f;\n";
- eval { require "Encode/$f"; };
- $@ and die "Can't require Encode/$f: $@\n";
- for my $enc (Encode->encodings()){
- no warnings 'once';
- $in_core{$enc} and next;
- $Encode::Config::ExtModule{$enc} and next;
- my $mod = "Encode/$f";
- $mod =~ s/\.pm$//o; $mod =~ s,/,::,og;
- $LocalMod{$enc} ||= $mod;
- }
- }
+ my $inc = File::Spec->catfile($d, "Encode");
+ -d $inc or next;
+ opendir my $dh, $inc or die "$inc:$!";
+ warn "Checking $inc...\n";
+ for my $f (grep /\.pm$/o, readdir($dh)){
+ -f File::Spec->catfile($inc, "$f") or next;
+ $INC{"Encode/$f"} and next;
+ warn "require Encode/$f;\n";
+ eval { require "Encode/$f"; };
+ $@ and die "Can't require Encode/$f: $@\n";
+ for my $enc (Encode->encodings()){
+ no warnings 'once';
+ $in_core{$enc} and next;
+ $Encode::Config::ExtModule{$enc} and next;
+ my $mod = "Encode/$f";
+ $mod =~ s/\.pm$//o; $mod =~ s,/,::,og;
+ $LocalMod{$enc} ||= $mod;
+ }
+ }
}
$_ModLines = "";
for my $enc (sort keys %LocalMod){
- $_ModLines .=
- qq(\$Encode::ExtModule{'$enc'} =\t"$LocalMod{$enc}";\n);
+ $_ModLines .=
+ qq(\$Encode::ExtModule{'$enc'} =\t"$LocalMod{$enc}";\n);
}
warn $_ModLines;
$_LocalVer = _mkversion();
$_E2X = find_e2x();
$_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o;
_print_expand(File::Spec->catfile($_E2X,"ConfigLocal_PM.e2x"),
- File::Spec->catfile($_Inc,"ConfigLocal.pm"),
- 1);
+ File::Spec->catfile($_Inc,"ConfigLocal.pm"),
+ 1);
exit;
}
File::Basename->import();
my ($src, $dst, $clobber) = @_;
if (!$clobber and -e $dst){
- warn "$dst exists. skipping\n";
- return;
+ warn "$dst exists. skipping\n";
+ return;
}
warn "Generating $dst...\n";
open my $in, $src or die "$src : $!";
if ((my $d = dirname($dst)) ne '.'){
- -d $d or mkdir $d, 0755 or die "mkdir $d : $!";
+ -d $d or mkdir $d, 0755 or die "mkdir $d : $!";
}
open my $out, ">$dst" or die "$!";
my $asis = 0;
while (<$in>){
- if (/^#### END_OF_HEADER/){
- $asis = 1; next;
- }
- s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis;
- print $out $_;
+ if (/^#### END_OF_HEADER/){
+ $asis = 1; next;
+ }
+ s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis;
+ print $out $_;
}
}
__END__
#!./perl
-# $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $
+# $Id: piconv,v 2.2 2006/05/03 18:24:10 dankogai Exp $
#
use 5.8.0;
use strict;
help()
unless
GetOptions(\%Opt,
- 'from|f=s',
- 'to|t=s',
- 'list|l',
- 'string|s=s',
- 'check|C=i',
- 'c',
- 'perlqq|p',
- 'debug|D',
- 'scheme|S=s',
- 'resolve|r=s',
- 'help',
- );
+ 'from|f=s',
+ 'to|t=s',
+ 'list|l',
+ 'string|s=s',
+ 'check|C=i',
+ 'c',
+ 'perlqq|p',
+ 'htmlcref',
+ 'xmlcref',
+ 'debug|D',
+ 'scheme|S=s',
+ 'resolve|r=s',
+ 'help',
+ );
$Opt{help} and help();
$Opt{list} and list_encodings();
$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
$Opt{check} ||= $Opt{c};
-$Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
+$Opt{perlqq} and $Opt{check} = Encode::PERLQQ;
+$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
+$Opt{xmlcref} and $Opt{check} = Encode::XMLCREF;
if ($Opt{debug}){
my $cfrom = Encode->getEncoding($from)->name;
}
# we do not use <> (or ARGV) for the sake of binmode()
-@ARGV or push @ARGV, \*STDIN;
+@ARGV or push @ARGV, \*STDIN;
-unless ($scheme eq 'perlio'){
+unless ( $scheme eq 'perlio' ) {
binmode STDOUT;
- for my $argv (@ARGV){
- my $ifh = ref $argv ? $argv : undef;
- $ifh or open $ifh, "<", $argv or next;
- binmode $ifh;
- if ($scheme eq 'from_to'){ # default
- while(<$ifh>){
- Encode::from_to($_, $from, $to, $Opt{check});
- print;
- }
- }elsif ($scheme eq 'decode_encode'){ # step-by-step
- while(<$ifh>){
- my $decoded = decode($from, $_, $Opt{check});
- my $encoded = encode($to, $decoded);
- print $encoded;
- }
- } else { # won't reach
- die "$name: unknown scheme: $scheme";
- }
+ for my $argv (@ARGV) {
+ my $ifh = ref $argv ? $argv : undef;
+ $ifh or open $ifh, "<", $argv or next;
+ binmode $ifh;
+ if ( $scheme eq 'from_to' ) { # default
+ while (<$ifh>) {
+ Encode::from_to( $_, $from, $to, $Opt{check} );
+ print;
+ }
+ }
+ elsif ( $scheme eq 'decode_encode' ) { # step-by-step
+ while (<$ifh>) {
+ my $decoded = decode( $from, $_, $Opt{check} );
+ my $encoded = encode( $to, $decoded );
+ print $encoded;
+ }
+ }
+ else { # won't reach
+ die "$name: unknown scheme: $scheme";
+ }
}
-}else{
+}
+else {
+
# NI-S favorite
binmode STDOUT => "raw:encoding($to)";
- for my $argv (@ARGV){
- my $ifh = ref $argv ? $argv : undef;
- $ifh or open $ifh, "<", $argv or next;
- binmode $ifh => "raw:encoding($from)";
- print while(<$ifh>);
+ for my $argv (@ARGV) {
+ my $ifh = ref $argv ? $argv : undef;
+ $ifh or open $ifh, "<", $argv or next;
+ binmode $ifh => "raw:encoding($from)";
+ print while (<$ifh>);
}
}
-sub list_encodings{
- print join("\n", Encode->encodings(":all")), "\n";
+sub list_encodings {
+ print join( "\n", Encode->encodings(":all") ), "\n";
exit 0;
}
sub resolve_encoding {
- if (my $alias = Encode::resolve_alias($_[0])) {
- print $alias, "\n";
- exit 0;
- } else {
- warn "$name: $_[0] is not known to Encode\n";
- exit 1;
+ if ( my $alias = Encode::resolve_alias( $_[0] ) ) {
+ print $alias, "\n";
+ exit 0;
+ }
+ else {
+ warn "$name: $_[0] is not known to Encode\n";
+ exit 1;
}
}
-sub help{
+sub help {
my $message = shift;
$message and print STDERR "$name error: $message\n";
print STDERR <<"EOT";
"string" will be the input instead of STDIN or files
The following are mainly of interest to Encode hackers:
-D,--debug show debug information
- -C N | -c | -p check the validity of the input
+ -C N | -c check the validity of the input
-S,--scheme scheme use the scheme for conversion
+Those are handy when you can only see ascii characters:
+ -p,--perlqq
+ --htmlcref
+ --xmlcref
EOT
- exit;
+ exit;
}
__END__
=item -p,--perlqq
-Same as C<-C -1>.
+=item --htmlcref
+
+=item --xmlcref
+
+Applies PERLQQ, HTMLCREF, XMLCREF, respectively. Try
+
+ piconv -f utf8 -t ascii --perlqq
+
+To see what it does.
=item -h,--help
#!/usr/bin/perl
-# $Id: ucm2table,v 2.0 2004/05/16 20:55:16 dankogai Exp $
+# $Id: ucm2table,v 2.1 2006/05/03 18:24:10 dankogai Exp $
#
use 5.006;
my $Hex = '[0-9A-Fa-f]';
while(<>){
- chomp;
- my ($uni, $enc, $fb) =
- /^<U($Hex+)>\s+(\S+)\s+\|(\d)/o or next;
- $fb eq '0' or next;
- my @byte = ();
- my $ord = 0;
- while($enc =~ /\G\\x($Hex+)/iog){
- my $byte = hex($1);
- push @byte, $byte;
- $ord <<= 8; $ord += $byte;
- };
- # print join('', @byte), " => $ord \n";
- if ($Opt{u}){
- $Chartab{$ord} = pack("U", hex($uni));
- }else{
- $Chartab{$ord} = pack("C*", @byte);
- }
+ chomp;
+ my ($uni, $enc, $fb) =
+ /^<U($Hex+)>\s+(\S+)\s+\|(\d)/o or next;
+ $fb eq '0' or next;
+ my @byte = ();
+ my $ord = 0;
+ while($enc =~ /\G\\x($Hex+)/iog){
+ my $byte = hex($1);
+ push @byte, $byte;
+ $ord <<= 8; $ord += $byte;
+ };
+ # print join('', @byte), " => $ord \n";
+ if ($Opt{u}){
+ $Chartab{$ord} = pack("U", hex($uni));
+ }else{
+ $Chartab{$ord} = pack("C*", @byte);
+ }
}
my $start = $Opt{a} ? 0x20 : 0xa0;
for (my $x = $start; $x <= 0xffff; $x += 32) {
my $line = '';
for my $i (0..31){
- my $num = $x+$i; $num eq 0x7f and next; # skip delete
- my $char = $Chartab{$num};
- $line .= !$char ? " " :
- ($num < 0x7f ) ? " $char" : $char ;
+ my $num = $x+$i; $num eq 0x7f and next; # skip delete
+ my $char = $Chartab{$num};
+ $line .= !$char ? " " :
+ ($num < 0x7f ) ? " $char" : $char ;
}
$line =~ /^\s+$/o and next;
printf "0x%04x: $line\n", $x;
#!/usr/local/bin/perl
#
-# $Id: ucmlint,v 2.0 2004/05/16 20:55:16 dankogai Exp $
+# $Id: ucmlint,v 2.1 2006/05/03 18:24:10 dankogai Exp $
#
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Getopt::Std;
our %Opt;
my ($msg, $level) = @_;
my $lstr;
if ($level == 2){
- $lstr = 'notice';
+ $lstr = 'notice';
}elsif ($level == 1){
- $lstr = 'warning'; $nwarning++;
+ $lstr = 'warning'; $nwarning++;
}else{
- $lstr = 'error'; $nerror++;
+ $lstr = 'error'; $nerror++;
}
print "$ARGV:$lstr in line $.: $msg\n";
}
$in_charmap = $nerror = $nwarning = 0;
$. = 0;
while(<UCM>){
- chomp;
- s/\s*#.*$//o; /^$/ and next;
- if ($_ eq "CHARMAP"){
- $in_charmap = 1;
- for my $must (qw/code_set_name mb_cur_min mb_cur_max/){
- exists $Hdr{$must} or nit "<$must> nonexistent";
- }
- $Hdr{mb_cur_min} > $Hdr{mb_cur_max}
- and nit sprintf("mb_cur_min(%d) > mb_cur_max(%d)",
- $Hdr{mb_cur_min},$Hdr{mb_cur_max});
- $in_charmap = 1;
- next;
- }
- unless ($in_charmap){
- my($hkey, $hvalue) = /^<(\S+)>\s+[\"\']?([^\"\']+)/o or next;
- $Opt{D} and warn "$hkey => $hvalue";
- if ($hkey eq "code_set_name"){ # name check
- exists $Hdr{code_set_name}
- and nit "Duplicate <code_set_name>: $hkey";
- }
- if ($hkey eq "code_set_alias"){ # alias check
- $hvalue eq $Hdr{code_set_name}
- and nit qq(alias "$hvalue" is already in <code_set_name>);
- }
- $Hdr{$hkey} = $hvalue;
- }else{
- my $name = $Hdr{code_set_name};
- my($unistr, $encstr, $fb) = /^(\S+)\s+(\S+)\s(\S+)/o or next;
- $Opt{v} and nit $_, 2;
- my $uni = uniparse($unistr);
- my $enc = encparse($encstr);
- $fb =~ /^\|([0123])$/ or nit "malformed fallback: $fb";
- $fb = $1;
- $Opt{f} and $fb = 0;
- unless ($fb == 1){ # check uni -> enc
- if (exists $U2E{$uni}){
- nit "dupe encode map: U$uni => $U2E{$uni} and $enc", 1;
- }else{
- $U2E{$uni} = $enc;
- if ($Opt{e} and $fb != 3) {
- my $e = hex2enc($enc);
- my $u = hex2uni($uni);
- my $eu = Encode::encode($name, $u);
- $e eq $eu
- or nit qq(encode('$name', $uni) != $enc);
- }
- }
- }
- unless ($fb == 3){ # check enc -> uni
- if (exists $E2U{$enc}){
- nit "dupe decode map: $enc => U$E2U{$enc} and U$uni", 1;
- }else{
- $E2U{$enc} = $uni;
- if ($Opt{e} and $fb != 1) {
- my $e = hex2enc($enc);
- my $u = hex2uni($uni);
- $Opt{D} and warn "$uni, $enc";
- my $de = Encode::decode($name, $e);
- $de eq $u
- or nit qq(decode('$name', $enc) != $uni);
- }
- }
- }
- # warn "$uni, $enc, $fb";
- }
+ chomp;
+ s/\s*#.*$//o; /^$/ and next;
+ if ($_ eq "CHARMAP"){
+ $in_charmap = 1;
+ for my $must (qw/code_set_name mb_cur_min mb_cur_max/){
+ exists $Hdr{$must} or nit "<$must> nonexistent";
+ }
+ $Hdr{mb_cur_min} > $Hdr{mb_cur_max}
+ and nit sprintf("mb_cur_min(%d) > mb_cur_max(%d)",
+ $Hdr{mb_cur_min},$Hdr{mb_cur_max});
+ $in_charmap = 1;
+ next;
+ }
+ unless ($in_charmap){
+ my($hkey, $hvalue) = /^<(\S+)>\s+[\"\']?([^\"\']+)/o or next;
+ $Opt{D} and warn "$hkey => $hvalue";
+ if ($hkey eq "code_set_name"){ # name check
+ exists $Hdr{code_set_name}
+ and nit "Duplicate <code_set_name>: $hkey";
+ }
+ if ($hkey eq "code_set_alias"){ # alias check
+ $hvalue eq $Hdr{code_set_name}
+ and nit qq(alias "$hvalue" is already in <code_set_name>);
+ }
+ $Hdr{$hkey} = $hvalue;
+ }else{
+ my $name = $Hdr{code_set_name};
+ my($unistr, $encstr, $fb) = /^(\S+)\s+(\S+)\s(\S+)/o or next;
+ $Opt{v} and nit $_, 2;
+ my $uni = uniparse($unistr);
+ my $enc = encparse($encstr);
+ $fb =~ /^\|([0123])$/ or nit "malformed fallback: $fb";
+ $fb = $1;
+ $Opt{f} and $fb = 0;
+ unless ($fb == 1){ # check uni -> enc
+ if (exists $U2E{$uni}){
+ nit "dupe encode map: U$uni => $U2E{$uni} and $enc", 1;
+ }else{
+ $U2E{$uni} = $enc;
+ if ($Opt{e} and $fb != 3) {
+ my $e = hex2enc($enc);
+ my $u = hex2uni($uni);
+ my $eu = Encode::encode($name, $u);
+ $e eq $eu
+ or nit qq(encode('$name', $uni) != $enc);
+ }
+ }
+ }
+ unless ($fb == 3){ # check enc -> uni
+ if (exists $E2U{$enc}){
+ nit "dupe decode map: $enc => U$E2U{$enc} and U$uni", 1;
+ }else{
+ $E2U{$enc} = $uni;
+ if ($Opt{e} and $fb != 1) {
+ my $e = hex2enc($enc);
+ my $u = hex2uni($uni);
+ $Opt{D} and warn "$uni, $enc";
+ my $de = Encode::decode($name, $e);
+ $de eq $u
+ or nit qq(decode('$name', $enc) != $uni);
+ }
+ }
+ }
+ # warn "$uni, $enc, $fb";
+ }
}
$in_charmap or nit "Where is CHARMAP?";
checkRT();
printf ("$ARGV: %s error%s found\n",
- ($nerror == 0 ? 'no' : $nerror),
- ($nerror > 1 ? 's' : ''));
+ ($nerror == 0 ? 'no' : $nerror),
+ ($nerror > 1 ? 's' : ''));
}
exit;
sub checkRT{
for my $uni (keys %E2U){
- my $enc = $U2E{$uni} or next; # okay
- $E2U{$U2E{$uni}} eq $uni or
- nit "RT failure: U$uni => $enc =>U$E2U{$U2E{$uni}}";
+ my $enc = $U2E{$uni} or next; # okay
+ $E2U{$U2E{$uni}} eq $uni or
+ nit "RT failure: U$uni => $enc =>U$E2U{$U2E{$uni}}";
}
for my $enc (keys %E2U){
- my $uni = $E2U{$enc} or next; # okay
- $U2E{$E2U{$enc}} eq $enc or
- nit "RT failure: $enc => U$uni => $U2E{$E2U{$enc}}";
+ my $uni = $E2U{$enc} or next; # okay
+ $U2E{$E2U{$enc}} eq $enc or
+ nit "RT failure: $enc => U$uni => $U2E{$E2U{$enc}}";
}
}
my @u;
push @u, $1 while($str =~ /\G<U(.*?)>/ig);
for my $u (@u){
- $u =~ /^([0-9A-Za-z]+)$/o
- or nit "malformed Unicode character: $u";
+ $u =~ /^([0-9A-Za-z]+)$/o
+ or nit "malformed Unicode character: $u";
}
return join(',', @u);
}
my $str = shift;
my @e;
for my $e (split /\\x/io, $str){
- $e or next; # first \x
- $e =~ /^([0-9A-Za-z]{1,2})$/io
- or nit "Hex $e in $str is bogus";
- push @e, $1;
+ $e or next; # first \x
+ $e =~ /^([0-9A-Za-z]{1,2})$/io
+ or nit "Hex $e in $str is bogus";
+ push @e, $1;
}
return join(',', @e);
}
#!/usr/local/bin/perl
#
-# $Id: ucmsort,v 2.1 2004/08/31 10:55:34 dankogai Exp $
+# $Id: ucmsort,v 2.2 2006/05/03 18:24:10 dankogai Exp $
#
use strict;
my @lines;
while (<>){
unless (m/^<U/o){
unless(@lines){
- $head .= $_;
- }else{
- $tail .= $_;
- }
- next;
+ $head .= $_;
+ }else{
+ $tail .= $_;
+ }
+ next;
}
chomp;
my @words = split;
print $head;
for (sort {
hex($a->[0]) <=> hex($b->[0]) # Unicode descending order
- or $a->[2] cmp $b->[2] # fallback descending order
- or $a->[1] cmp $b->[1] # Encoding descending order
+ or $a->[2] cmp $b->[2] # fallback descending order
+ or $a->[1] cmp $b->[1] # Encoding descending order
}
@lines) {
my $u = shift @$_;
my $string = shift;
$Opt{P} and print "#!$^X -w\nprint\n";
unless ($string){
- while(<>){
- use utf8;
- $linebuf .= Encode::decode($Opt{f}, $_);
- while($linebuf){
- my $chr = render_p(substr($linebuf, 0, 1, ''));
- length($outbuf) + length($chr) > $CPL and print_P();
- $outbuf .= $chr;
- }
- }
- $outbuf and print print_P(";");
+ while(<>){
+ use utf8;
+ $linebuf .= Encode::decode($Opt{f}, $_);
+ while($linebuf){
+ my $chr = render_p(substr($linebuf, 0, 1, ''));
+ length($outbuf) + length($chr) > $CPL and print_P();
+ $outbuf .= $chr;
+ }
+ }
+ $outbuf and print print_P(";");
}else{
- while($string){
- my $chr = render_p(substr($string, 0, 1, ''));
- length($outbuf) + length($chr) > $CPL and print_P();
- $outbuf .= $chr;
- }
+ while($string){
+ my $chr = render_p(substr($string, 0, 1, ''));
+ length($outbuf) + length($chr) > $CPL and print_P();
+ $outbuf .= $chr;
+ }
}
$outbuf and print print_P(";");
exit;
$S2pstr{$chr} and return $S2pstr{$chr}; # \t\n...
$chr =~ /[\x20-\x7e]/ and return $chr; # ascii, printable;
my $fmt = ($chr =~ /[\x00-\x1f\x7F]/) ?
- q(\x%x) : q(\x{%x});
+ q(\x%x) : q(\x{%x});
return sprintf $fmt, ord($chr);
}
my $string = shift;
!$Opt{p} and exists $Opt{H} and print_H();
unless ($string){
- while(<>){
- use utf8;
- $linebuf .= Encode::decode($Opt{f}, $_);
- while (length($linebuf) > $CPL){
- my $chunk = substr($linebuf, 0, $CPL, '');
- print_C($chunk, $linenum++);
- $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
- }
- }
- $linebuf and print_C($linebuf);
+ while(<>){
+ use utf8;
+ $linebuf .= Encode::decode($Opt{f}, $_);
+ while (length($linebuf) > $CPL){
+ my $chunk = substr($linebuf, 0, $CPL, '');
+ print_C($chunk, $linenum++);
+ $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
+ }
+ }
+ $linebuf and print_C($linebuf);
}else{
- while ($string){
- my $chunk = substr($string, 0, $CPL, '');
- print_C($chunk, $linenum++);
- $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
- }
+ while ($string){
+ my $chunk = substr($string, 0, $CPL, '');
+ print_C($chunk, $linenum++);
+ $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
+ }
}
exit;
}
sub print_S{
print "--------+------------------------------------------------";
if ($Opt{C}){
- print "-+-----------------";
+ print "-+-----------------";
}
print "\n";
}
sub print_H{
print " Offset 0 1 2 3 4 5 6 7";
if ($Opt{C}){
- print " | 0 1 2 3 4 5 6 7";
+ print " | 0 1 2 3 4 5 6 7";
}
print "\n";
print_S;
sub print_C{
my ($chunk, $linenum) = @_;
if (!$Opt{v} and $chunk eq $PrevChunk){
- printf "%08x *\n", $linenum*8; return;
+ printf "%08x *\n", $linenum*8; return;
}
$PrevChunk = $chunk;
my $end = length($chunk) - 1;
my (@ord, @chr);
for my $i (0..$end){
- use utf8;
- my $chr = substr($chunk,$i,1);
- my $ord = ord($chr);
- my $fmt = $ord <= 0xffff ? " %04x" : " %05x";
- push @ord, (sprintf $fmt, $ord);
- $Opt{C} and push @chr, render_c($chr);
+ use utf8;
+ my $chr = substr($chunk,$i,1);
+ my $ord = ord($chr);
+ my $fmt = $ord <= 0xffff ? " %04x" : " %05x";
+ push @ord, (sprintf $fmt, $ord);
+ $Opt{C} and push @chr, render_c($chr);
}
if (++$end < 7){
- for my $i ($end..7){
- push @ord, (" " x 6);
- }
+ for my $i ($end..7){
+ push @ord, (" " x 6);
+ }
}
my $line = sprintf "%08x %s", $linenum*8, join('', @ord);
$Opt{C} and $line .= sprintf " | %s", join('', @chr);
BEGIN{
our %S2pstr= (
- "\\" => '\\\\',
- "\0" => '\0',
- "\t" => '\t',
- "\n" => '\n',
- "\r" => '\r',
- "\v" => '\v',
- "\a" => '\a',
- "\e" => '\e',
- "\"" => qq(\\\"),
- "\'" => qq(\\\'),
- '$' => '\$',
- "@" => '\@',
- "%" => '\%',
- );
+ "\\" => '\\\\',
+ "\0" => '\0',
+ "\t" => '\t',
+ "\n" => '\n',
+ "\r" => '\r',
+ "\v" => '\v',
+ "\a" => '\a',
+ "\e" => '\e',
+ "\"" => qq(\\\"),
+ "\'" => qq(\\\'),
+ '$' => '\$',
+ "@" => '\@',
+ "%" => '\%',
+ );
our %S2str = (
- qq(\x00) => q(\0), # NULL
- qq(\x01) => q(^A), # START OF HEADING
- qq(\x02) => q(^B), # START OF TEXT
- qq(\x03) => q(^C), # END OF TEXT
- qq(\x04) => q(^D), # END OF TRANSMISSION
- qq(\x05) => q(^E), # ENQUIRY
- qq(\x06) => q(^F), # ACKNOWLEDGE
- qq(\x07) => q(\a), # BELL
- qq(\x08) => q(^H), # BACKSPACE
- qq(\x09) => q(\t), # HORIZONTAL TABULATION
- qq(\x0A) => q(\n), # LINE FEED
- qq(\x0B) => q(\v), # VERTICAL TABULATION
- qq(\x0C) => q(^L), # FORM FEED
- qq(\x0D) => q(\r), # CARRIAGE RETURN
- qq(\x0E) => q(^N), # SHIFT OUT
- qq(\x0F) => q(^O), # SHIFT IN
- qq(\x10) => q(^P), # DATA LINK ESCAPE
- qq(\x11) => q(^Q), # DEVICE CONTROL ONE
- qq(\x12) => q(^R), # DEVICE CONTROL TWO
- qq(\x13) => q(^S), # DEVICE CONTROL THREE
- qq(\x14) => q(^T), # DEVICE CONTROL FOUR
- qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE
- qq(\x16) => q(^V), # SYNCHRONOUS IDLE
- qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK
- qq(\x18) => q(^X), # CANCEL
- qq(\x19) => q(^Y), # END OF MEDIUM
- qq(\x1A) => q(^Z), # SUBSTITUTE
- qq(\x1B) => q(\e), # ESCAPE (\c[)
- qq(\x1C) => "^\\", # FILE SEPARATOR
- qq(\x1D) => "^\]", # GROUP SEPARATOR
- qq(\x1E) => q(^^), # RECORD SEPARATOR
- qq(\x1F) => q(^_), # UNIT SEPARATOR
- );
+ qq(\x00) => q(\0), # NULL
+ qq(\x01) => q(^A), # START OF HEADING
+ qq(\x02) => q(^B), # START OF TEXT
+ qq(\x03) => q(^C), # END OF TEXT
+ qq(\x04) => q(^D), # END OF TRANSMISSION
+ qq(\x05) => q(^E), # ENQUIRY
+ qq(\x06) => q(^F), # ACKNOWLEDGE
+ qq(\x07) => q(\a), # BELL
+ qq(\x08) => q(^H), # BACKSPACE
+ qq(\x09) => q(\t), # HORIZONTAL TABULATION
+ qq(\x0A) => q(\n), # LINE FEED
+ qq(\x0B) => q(\v), # VERTICAL TABULATION
+ qq(\x0C) => q(^L), # FORM FEED
+ qq(\x0D) => q(\r), # CARRIAGE RETURN
+ qq(\x0E) => q(^N), # SHIFT OUT
+ qq(\x0F) => q(^O), # SHIFT IN
+ qq(\x10) => q(^P), # DATA LINK ESCAPE
+ qq(\x11) => q(^Q), # DEVICE CONTROL ONE
+ qq(\x12) => q(^R), # DEVICE CONTROL TWO
+ qq(\x13) => q(^S), # DEVICE CONTROL THREE
+ qq(\x14) => q(^T), # DEVICE CONTROL FOUR
+ qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE
+ qq(\x16) => q(^V), # SYNCHRONOUS IDLE
+ qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK
+ qq(\x18) => q(^X), # CANCEL
+ qq(\x19) => q(^Y), # END OF MEDIUM
+ qq(\x1A) => q(^Z), # SUBSTITUTE
+ qq(\x1B) => q(\e), # ESCAPE (\c[)
+ qq(\x1C) => "^\\", # FILE SEPARATOR
+ qq(\x1D) => "^\]", # GROUP SEPARATOR
+ qq(\x1E) => q(^^), # RECORD SEPARATOR
+ qq(\x1F) => q(^_), # UNIT SEPARATOR
+ );
#
# Generated out of lib/unicore/EastAsianWidth.txt
# will it work ?
#
our $IsFullWidth =
- qr/^[
- \x{1100}-\x{1159}
- \x{115F}-\x{115F}
- \x{2329}-\x{232A}
- \x{2E80}-\x{2E99}
- \x{2E9B}-\x{2EF3}
- \x{2F00}-\x{2FD5}
- \x{2FF0}-\x{2FFB}
- \x{3000}-\x{303E}
- \x{3041}-\x{3096}
- \x{3099}-\x{30FF}
- \x{3105}-\x{312C}
- \x{3131}-\x{318E}
- \x{3190}-\x{31B7}
- \x{31F0}-\x{321C}
- \x{3220}-\x{3243}
- \x{3251}-\x{327B}
- \x{327F}-\x{32CB}
- \x{32D0}-\x{32FE}
- \x{3300}-\x{3376}
- \x{337B}-\x{33DD}
- \x{3400}-\x{4DB5}
- \x{4E00}-\x{9FA5}
- \x{33E0}-\x{33FE}
- \x{A000}-\x{A48C}
- \x{AC00}-\x{D7A3}
- \x{A490}-\x{A4C6}
- \x{F900}-\x{FA2D}
- \x{FA30}-\x{FA6A}
- \x{FE30}-\x{FE46}
- \x{FE49}-\x{FE52}
- \x{FE54}-\x{FE66}
- \x{FE68}-\x{FE6B}
- \x{FF01}-\x{FF60}
- \x{FFE0}-\x{FFE6}
- \x{20000}-\x{2A6D6}
- ]$/xo;
+ qr/^[
+ \x{1100}-\x{1159}
+ \x{115F}-\x{115F}
+ \x{2329}-\x{232A}
+ \x{2E80}-\x{2E99}
+ \x{2E9B}-\x{2EF3}
+ \x{2F00}-\x{2FD5}
+ \x{2FF0}-\x{2FFB}
+ \x{3000}-\x{303E}
+ \x{3041}-\x{3096}
+ \x{3099}-\x{30FF}
+ \x{3105}-\x{312C}
+ \x{3131}-\x{318E}
+ \x{3190}-\x{31B7}
+ \x{31F0}-\x{321C}
+ \x{3220}-\x{3243}
+ \x{3251}-\x{327B}
+ \x{327F}-\x{32CB}
+ \x{32D0}-\x{32FE}
+ \x{3300}-\x{3376}
+ \x{337B}-\x{33DD}
+ \x{3400}-\x{4DB5}
+ \x{4E00}-\x{9FA5}
+ \x{33E0}-\x{33FE}
+ \x{A000}-\x{A48C}
+ \x{AC00}-\x{D7A3}
+ \x{A490}-\x{A4C6}
+ \x{F900}-\x{FA2D}
+ \x{FA30}-\x{FA6A}
+ \x{FE30}-\x{FE46}
+ \x{FE49}-\x{FE52}
+ \x{FE54}-\x{FE66}
+ \x{FE68}-\x{FE6B}
+ \x{FF01}-\x{FF60}
+ \x{FFE0}-\x{FFE6}
+ \x{20000}-\x{2A6D6}
+ ]$/xo;
}
__END__
int
do_encode(const encpage_t * enc, const U8 * src, STRLEN * slen, U8 * dst,
- STRLEN dlen, STRLEN * dout, int approx, const U8 *term, STRLEN tlen)
+ STRLEN dlen, STRLEN * dout, int approx, const U8 *term, STRLEN tlen)
{
const U8 *s = src;
const U8 *send = s + *slen;
U8 *dend = d + dlen, *dlast = d;
int code = 0;
while (s < send) {
- const encpage_t *e = enc;
- U8 byte = *s;
- while (byte > e->max)
- e++;
- if (byte >= e->min && e->slen && (approx || !(e->slen & 0x80))) {
- const U8 *cend = s + (e->slen & 0x7f);
- if (cend <= send) {
- STRLEN n;
- if ((n = e->dlen)) {
- const U8 *out = e->seq + n * (byte - e->min);
- U8 *oend = d + n;
- if (dst) {
- if (oend <= dend) {
- while (d < oend)
- *d++ = *out++;
- }
- else {
- /* Out of space */
- code = ENCODE_NOSPACE;
- break;
- }
- }
- else
- d = oend;
- }
- enc = e->next;
- s++;
- if (s == cend) {
- if (approx && (e->slen & 0x80))
- code = ENCODE_FALLBACK;
- last = s;
- if (term && (STRLEN)(d-dlast) == tlen && memEQ(dlast, term, tlen)) {
- code = ENCODE_FOUND_TERM;
- break;
- }
- dlast = d;
- }
- }
- else {
- /* partial source character */
- code = ENCODE_PARTIAL;
- break;
- }
- }
- else {
- /* Cannot represent */
- code = ENCODE_NOREP;
- break;
- }
+ const encpage_t *e = enc;
+ U8 byte = *s;
+ while (byte > e->max)
+ e++;
+ if (byte >= e->min && e->slen && (approx || !(e->slen & 0x80))) {
+ const U8 *cend = s + (e->slen & 0x7f);
+ if (cend <= send) {
+ STRLEN n;
+ if ((n = e->dlen)) {
+ const U8 *out = e->seq + n * (byte - e->min);
+ U8 *oend = d + n;
+ if (dst) {
+ if (oend <= dend) {
+ while (d < oend)
+ *d++ = *out++;
+ }
+ else {
+ /* Out of space */
+ code = ENCODE_NOSPACE;
+ break;
+ }
+ }
+ else
+ d = oend;
+ }
+ enc = e->next;
+ s++;
+ if (s == cend) {
+ if (approx && (e->slen & 0x80))
+ code = ENCODE_FALLBACK;
+ last = s;
+ if (term && (STRLEN)(d-dlast) == tlen && memEQ(dlast, term, tlen)) {
+ code = ENCODE_FOUND_TERM;
+ break;
+ }
+ dlast = d;
+ }
+ }
+ else {
+ /* partial source character */
+ code = ENCODE_PARTIAL;
+ break;
+ }
+ }
+ else {
+ /* Cannot represent */
+ code = ENCODE_NOREP;
+ break;
+ }
}
*slen = last - src;
*dout = d - dst;
-# $Id: encoding.pm,v 2.2 2005/09/08 14:17:17 dankogai Exp $
+# $Id: encoding.pm,v 2.3 2006/05/03 18:24:10 dankogai Exp $
package encoding;
-our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode;
use strict;
sub DEBUG () { 0 }
BEGIN {
- if (ord("A") == 193) {
- require Carp;
- Carp::croak("encoding: pragma does not support EBCDIC platforms");
+ if ( ord("A") == 193 ) {
+ require Carp;
+ Carp::croak("encoding: pragma does not support EBCDIC platforms");
}
}
our $HAS_PERLIO = 0;
eval { require PerlIO::encoding };
-unless ($@){
- $HAS_PERLIO = (PerlIO::encoding->VERSION >= 0.02);
+unless ($@) {
+ $HAS_PERLIO = ( PerlIO::encoding->VERSION >= 0.02 );
}
-sub _exception{
+sub _exception {
my $name = shift;
- $] > 5.008 and return 0; # 5.8.1 or higher then no
- my %utfs = map {$_=>1}
- qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
- UTF-32 UTF-32BE UTF-32LE);
- $utfs{$name} or return 0; # UTFs or no
- require Config; Config->import(); our %Config;
- return $Config{perl_patchlevel} ? 0 : 1 # maintperl then no
+ $] > 5.008 and return 0; # 5.8.1 or higher then no
+ my %utfs = map { $_ => 1 }
+ qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
+ UTF-32 UTF-32BE UTF-32LE);
+ $utfs{$name} or return 0; # UTFs or no
+ require Config;
+ Config->import();
+ our %Config;
+ return $Config{perl_patchlevel} ? 0 : 1 # maintperl then no
}
-sub in_locale { $^H & ($locale::hint_bits || 0)}
+sub in_locale { $^H & ( $locale::hint_bits || 0 ) }
sub _get_locale_encoding {
my $locale_encoding;
# I18N::Langinfo isn't available everywhere
eval {
- require I18N::Langinfo;
- I18N::Langinfo->import(qw(langinfo CODESET));
- $locale_encoding = langinfo(CODESET());
+ require I18N::Langinfo;
+ I18N::Langinfo->import(qw(langinfo CODESET));
+ $locale_encoding = langinfo( CODESET() );
};
-
+
my $country_language;
no warnings 'uninitialized';
- if (not $locale_encoding && in_locale()) {
- if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
- ($country_language, $locale_encoding) = ($1, $2);
- } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
- ($country_language, $locale_encoding) = ($1, $2);
- }
- # LANGUAGE affects only LC_MESSAGES only on glibc
- } elsif (not $locale_encoding) {
- if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
- $ENV{LANG} =~ /\butf-?8\b/i) {
- $locale_encoding = 'utf8';
- }
- # Could do more heuristics based on the country and language
- # parts of LC_ALL and LANG (the parts before the dot (if any)),
- # since we have Locale::Country and Locale::Language available.
- # TODO: get a database of Language -> Encoding mappings
- # (the Estonian database at http://www.eki.ee/letter/
- # would be excellent!) --jhi
+ if ( not $locale_encoding && in_locale() ) {
+ if ( $ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/ ) {
+ ( $country_language, $locale_encoding ) = ( $1, $2 );
+ }
+ elsif ( $ENV{LANG} =~ /^([^.]+)\.([^.]+)$/ ) {
+ ( $country_language, $locale_encoding ) = ( $1, $2 );
+ }
+
+ # LANGUAGE affects only LC_MESSAGES only on glibc
+ }
+ elsif ( not $locale_encoding ) {
+ if ( $ENV{LC_ALL} =~ /\butf-?8\b/i
+ || $ENV{LANG} =~ /\butf-?8\b/i )
+ {
+ $locale_encoding = 'utf8';
+ }
+
+ # Could do more heuristics based on the country and language
+ # parts of LC_ALL and LANG (the parts before the dot (if any)),
+ # since we have Locale::Country and Locale::Language available.
+ # TODO: get a database of Language -> Encoding mappings
+ # (the Estonian database at http://www.eki.ee/letter/
+ # would be excellent!) --jhi
}
- if (defined $locale_encoding &&
- lc($locale_encoding) eq 'euc' &&
- defined $country_language) {
- if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
- $locale_encoding = 'euc-jp';
- } elsif ($country_language =~ /^ko_KR|korean?$/i) {
- $locale_encoding = 'euc-kr';
- } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
- $locale_encoding = 'euc-cn';
- } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
- $locale_encoding = 'euc-tw';
- } else {
- require Carp;
- Carp::croak("encoding: Locale encoding '$locale_encoding' too ambiguous");
- }
+ if ( defined $locale_encoding
+ && lc($locale_encoding) eq 'euc'
+ && defined $country_language )
+ {
+ if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) {
+ $locale_encoding = 'euc-jp';
+ }
+ elsif ( $country_language =~ /^ko_KR|korean?$/i ) {
+ $locale_encoding = 'euc-kr';
+ }
+ elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)?$/i ) {
+ $locale_encoding = 'euc-cn';
+ }
+ elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) {
+ $locale_encoding = 'euc-tw';
+ }
+ else {
+ require Carp;
+ Carp::croak(
+ "encoding: Locale encoding '$locale_encoding' too ambiguous"
+ );
+ }
}
return $locale_encoding;
sub import {
my $class = shift;
my $name = shift;
- if ($name eq ':_get_locale_encoding') { # used by lib/open.pm
- my $caller = caller();
+ if ( $name eq ':_get_locale_encoding' ) { # used by lib/open.pm
+ my $caller = caller();
{
- no strict 'refs';
- *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding;
- }
- return;
+ no strict 'refs';
+ *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding;
+ }
+ return;
}
$name = _get_locale_encoding() if $name eq ':locale';
my %arg = @_;
$name = $ENV{PERL_ENCODING} unless defined $name;
my $enc = find_encoding($name);
- unless (defined $enc) {
- require Carp;
- Carp::croak("encoding: Unknown encoding '$name'");
+ unless ( defined $enc ) {
+ require Carp;
+ Carp::croak("encoding: Unknown encoding '$name'");
+ }
+ $name = $enc->name; # canonize
+ unless ( $arg{Filter} ) {
+ DEBUG and warn "_exception($name) = ", _exception($name);
+ _exception($name) or ${^ENCODING} = $enc;
+ $HAS_PERLIO or return 1;
}
- $name = $enc->name; # canonize
- unless ($arg{Filter}) {
- DEBUG and warn "_exception($name) = ", _exception($name);
- _exception($name) or ${^ENCODING} = $enc;
- $HAS_PERLIO or return 1;
- }else{
- defined(${^ENCODING}) and undef ${^ENCODING};
- # implicitly 'use utf8'
- require utf8; # to fetch $utf8::hint_bits;
- $^H |= $utf8::hint_bits;
- eval {
- require Filter::Util::Call ;
- Filter::Util::Call->import ;
- filter_add(sub{
- my $status = filter_read();
- if ($status > 0){
- $_ = $enc->decode($_, 1);
- DEBUG and warn $_;
- }
- $status ;
- });
- };
+ else {
+ defined( ${^ENCODING} ) and undef ${^ENCODING};
+
+ # implicitly 'use utf8'
+ require utf8; # to fetch $utf8::hint_bits;
+ $^H |= $utf8::hint_bits;
+ eval {
+ require Filter::Util::Call;
+ Filter::Util::Call->import;
+ filter_add(
+ sub {
+ my $status = filter_read();
+ if ( $status > 0 ) {
+ $_ = $enc->decode( $_, 1 );
+ DEBUG and warn $_;
+ }
+ $status;
+ }
+ );
+ };
$@ eq '' and DEBUG and warn "Filter installed";
}
defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
- for my $h (qw(STDIN STDOUT)){
- if ($arg{$h}){
- unless (defined find_encoding($arg{$h})) {
- require Carp;
- Carp::croak("encoding: Unknown encoding for $h, '$arg{$h}'");
- }
- eval { binmode($h, ":raw :encoding($arg{$h})") };
- }else{
- unless (exists $arg{$h}){
- eval {
- no warnings 'uninitialized';
- binmode($h, ":raw :encoding($name)");
- };
- }
- }
- if ($@){
- require Carp;
- Carp::croak($@);
- }
+ for my $h (qw(STDIN STDOUT)) {
+ if ( $arg{$h} ) {
+ unless ( defined find_encoding( $arg{$h} ) ) {
+ require Carp;
+ Carp::croak(
+ "encoding: Unknown encoding for $h, '$arg{$h}'");
+ }
+ eval { binmode( $h, ":raw :encoding($arg{$h})" ) };
+ }
+ else {
+ unless ( exists $arg{$h} ) {
+ eval {
+ no warnings 'uninitialized';
+ binmode( $h, ":raw :encoding($name)" );
+ };
+ }
+ }
+ if ($@) {
+ require Carp;
+ Carp::croak($@);
+ }
}
- return 1; # I doubt if we need it, though
+ return 1; # I doubt if we need it, though
}
-sub unimport{
+sub unimport {
no warnings;
undef ${^ENCODING};
- if ($HAS_PERLIO){
- binmode(STDIN, ":raw");
- binmode(STDOUT, ":raw");
- }else{
- binmode(STDIN);
- binmode(STDOUT);
+ if ($HAS_PERLIO) {
+ binmode( STDIN, ":raw" );
+ binmode( STDOUT, ":raw" );
+ }
+ else {
+ binmode(STDIN);
+ binmode(STDOUT);
}
- if ($INC{"Filter/Util/Call.pm"}){
- eval { filter_del() };
+ if ( $INC{"Filter/Util/Call.pm"} ) {
+ eval { filter_del() };
}
}
Notice that only literals (string or regular expression) having only
legacy code points are affected: if you mix data like this
- \xDF\x{100}
+ \xDF\x{100}
the data is assumed to be in (Latin 1 and) Unicode, not in your native
encoding. In other words, this will match in "greek":
- "\xDF" =~ /\x{3af}/
+ "\xDF" =~ /\x{3af}/
but this will not
- "\xDF\x{100}" =~ /\x{3af}\x{100}/
+ "\xDF\x{100}" =~ /\x{3af}\x{100}/
since the C<\xDF> (ISO 8859-7 GREEK SMALL LETTER IOTA WITH TONOS) on
the left will B<not> be upgraded to C<\x{3af}> (Unicode GREEK SMALL
use strict;
no warnings 'redefine';
use Encode;
-our $VERSION = do { my @r = (q$Revision: 2.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
sub DEBUG () { 0 }
use base qw(Exporter);
# Public, encouraged API is exported by default
-our @EXPORT =
- qw (
- define_alias
- find_alias
- );
+our @EXPORT =
+ qw (
+ define_alias
+ find_alias
+);
-our @Alias; # ordered matching list
-our %Alias; # cached known aliases
+our @Alias; # ordered matching list
+our %Alias; # cached known aliases
-sub find_alias{
+sub find_alias {
my $class = shift;
- my $find = shift;
- unless (exists $Alias{$find}) {
- $Alias{$find} = undef; # Recursion guard
- for (my $i=0; $i < @Alias; $i += 2){
- my $alias = $Alias[$i];
- my $val = $Alias[$i+1];
- my $new;
- if (ref($alias) eq 'Regexp' && $find =~ $alias){
- DEBUG and warn "eval $val";
- $new = eval $val;
- DEBUG and $@ and warn "$val, $@";
- }elsif (ref($alias) eq 'CODE'){
- DEBUG and warn "$alias", "->", "($find)";
- $new = $alias->($find);
- }elsif (lc($find) eq lc($alias)){
- $new = $val;
- }
- if (defined($new)){
- next if $new eq $find; # avoid (direct) recursion on bugs
- DEBUG and warn "$alias, $new";
- my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
- if ($enc){
- $Alias{$find} = $enc;
- last;
- }
- }
- }
- # case insensitive search when canonical is not in all lowercase
- # RT ticket #7835
- unless ($Alias{$find}){
- my $lcfind = lc($find);
- for my $name (keys %Encode::Encoding, keys %Encode::ExtModule){
- $lcfind eq lc($name) or next;
- $Alias{$find} = Encode::find_encoding($name);
- DEBUG and warn "$find => $name";
- }
- }
+ my $find = shift;
+ unless ( exists $Alias{$find} ) {
+ $Alias{$find} = undef; # Recursion guard
+ for ( my $i = 0 ; $i < @Alias ; $i += 2 ) {
+ my $alias = $Alias[$i];
+ my $val = $Alias[ $i + 1 ];
+ my $new;
+ if ( ref($alias) eq 'Regexp' && $find =~ $alias ) {
+ DEBUG and warn "eval $val";
+ $new = eval $val;
+ DEBUG and $@ and warn "$val, $@";
+ }
+ elsif ( ref($alias) eq 'CODE' ) {
+ DEBUG and warn "$alias", "->", "($find)";
+ $new = $alias->($find);
+ }
+ elsif ( lc($find) eq lc($alias) ) {
+ $new = $val;
+ }
+ if ( defined($new) ) {
+ next if $new eq $find; # avoid (direct) recursion on bugs
+ DEBUG and warn "$alias, $new";
+ my $enc =
+ ( ref($new) ) ? $new : Encode::find_encoding($new);
+ if ($enc) {
+ $Alias{$find} = $enc;
+ last;
+ }
+ }
+ }
+
+ # case insensitive search when canonical is not in all lowercase
+ # RT ticket #7835
+ unless ( $Alias{$find} ) {
+ my $lcfind = lc($find);
+ for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule )
+ {
+ $lcfind eq lc($name) or next;
+ $Alias{$find} = Encode::find_encoding($name);
+ DEBUG and warn "$find => $name";
+ }
+ }
}
- if (DEBUG){
- my $name;
- if (my $e = $Alias{$find}){
- $name = $e->name;
- }else{
- $name = "";
- }
- warn "find_alias($class, $find)->name = $name";
+ if (DEBUG) {
+ my $name;
+ if ( my $e = $Alias{$find} ) {
+ $name = $e->name;
+ }
+ else {
+ $name = "";
+ }
+ warn "find_alias($class, $find)->name = $name";
}
return $Alias{$find};
}
-sub define_alias{
- while (@_){
- my ($alias,$name) = splice(@_,0,2);
- unshift(@Alias, $alias => $name); # newer one has precedence
- if (ref($alias)){
- # clear %Alias cache to allow overrides
- my @a = keys %Alias;
- for my $k (@a){
- if (ref($alias) eq 'Regexp' && $k =~ $alias){
- DEBUG and warn "delete \$Alias\{$k\}";
- delete $Alias{$k};
- }
- elsif (ref($alias) eq 'CODE'){
- DEBUG and warn "delete \$Alias\{$k\}";
- delete $Alias{$alias->($name)};
- }
- }
- }else{
- DEBUG and warn "delete \$Alias\{$alias\}";
- delete $Alias{$alias};
- }
+sub define_alias {
+ while (@_) {
+ my ( $alias, $name ) = splice( @_, 0, 2 );
+ unshift( @Alias, $alias => $name ); # newer one has precedence
+ if ( ref($alias) ) {
+
+ # clear %Alias cache to allow overrides
+ my @a = keys %Alias;
+ for my $k (@a) {
+ if ( ref($alias) eq 'Regexp' && $k =~ $alias ) {
+ DEBUG and warn "delete \$Alias\{$k\}";
+ delete $Alias{$k};
+ }
+ elsif ( ref($alias) eq 'CODE' ) {
+ DEBUG and warn "delete \$Alias\{$k\}";
+ delete $Alias{ $alias->($name) };
+ }
+ }
+ }
+ else {
+ DEBUG and warn "delete \$Alias\{$alias\}";
+ delete $Alias{$alias};
+ }
}
}
# Allow latin-1 style names as well
# 0 1 2 3 4 5 6 7 8 9 10
our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
+
# Allow winlatin1 style names as well
-our %Winlatin2cp = (
- 'latin1' => 1252,
- 'latin2' => 1250,
- 'cyrillic' => 1251,
- 'greek' => 1253,
- 'turkish' => 1254,
- 'hebrew' => 1255,
- 'arabic' => 1256,
- 'baltic' => 1257,
- 'vietnamese' => 1258,
- );
+our %Winlatin2cp = (
+ 'latin1' => 1252,
+ 'latin2' => 1250,
+ 'cyrillic' => 1251,
+ 'greek' => 1253,
+ 'turkish' => 1254,
+ 'hebrew' => 1255,
+ 'arabic' => 1256,
+ 'baltic' => 1257,
+ 'vietnamese' => 1258,
+);
init_aliases();
-sub undef_aliases{
+sub undef_aliases {
@Alias = ();
%Alias = ();
}
-sub init_aliases
-{
+sub init_aliases {
undef_aliases();
+
# Try all-lower-case version should all else fails
define_alias( qr/^(.*)$/ => '"\L$1"' );
# UTF/UCS stuff
- define_alias( qr/^UTF-?7$/i => '"UTF-7"');
- define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
- define_alias( qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
- qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
- qr/^iso-10646-1$/i => '"UCS-2BE"' );
- define_alias( qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
- qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
- qr/^UTF-?(16|32)$/i => '"UTF-$1"',
- );
+ define_alias( qr/^UTF-?7$/i => '"UTF-7"' );
+ define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
+ define_alias(
+ qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
+ qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
+ qr/^iso-10646-1$/i => '"UCS-2BE"'
+ );
+ define_alias(
+ qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"',
+ qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"',
+ qr/^UTF-?(16|32)$/i => '"UTF-$1"',
+ );
+
# ASCII
- define_alias(qr/^(?:US-?)ascii$/i => '"ascii"');
- define_alias('C' => 'ascii');
- define_alias(qr/\bISO[-_]?646[-_]?US$/i => '"ascii"');
+ define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' );
+ define_alias( 'C' => 'ascii' );
+ define_alias( qr/\bISO[-_]?646[-_]?US$/i => '"ascii"' );
+
# Allow variants of iso-8859-1 etc.
define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' );
# More HP stuff.
- define_alias( qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' );
+ define_alias(
+ qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i =>
+ '"${1}8"' );
# The Official name of ASCII.
define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
# has been redefined as the euro symbol.)
define_alias( qr/^(.+)\@euro$/i => '"$1"' );
- define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i
- => 'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' );
+ define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i =>
+'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef'
+ );
- define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
- hebrew|arabic|baltic|vietnamese)$/ix =>
- '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
+ define_alias(
+ qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
+ hebrew|arabic|baltic|vietnamese)$/ix =>
+ '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}'
+ );
# Common names for non-latin preferred MIME names
- define_alias( 'ascii' => 'US-ascii',
- 'cyrillic' => 'iso-8859-5',
- 'arabic' => 'iso-8859-6',
- 'greek' => 'iso-8859-7',
- 'hebrew' => 'iso-8859-8',
- 'thai' => 'iso-8859-11',
- 'tis620' => 'iso-8859-11',
- );
+ define_alias(
+ 'ascii' => 'US-ascii',
+ 'cyrillic' => 'iso-8859-5',
+ 'arabic' => 'iso-8859-6',
+ 'greek' => 'iso-8859-7',
+ 'hebrew' => 'iso-8859-8',
+ 'thai' => 'iso-8859-11',
+ 'tis620' => 'iso-8859-11',
+ );
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
# And Microsoft has their own naming (again, surprisingly).
- # And windows-* is registered in IANA!
- define_alias( qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"');
+ # And windows-* is registered in IANA!
+ define_alias(
+ qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' );
# Sometimes seen with a leading zero.
# define_alias( qr/\bcp037\b/i => '"cp37"');
# Mac Mappings
# predefined in *.ucm; unneeded
# define_alias( qr/\bmacIcelandic$/i => '"macIceland"');
- define_alias( qr/^mac_(.*)$/i => '"mac$1"');
+ define_alias( qr/^mac_(.*)$/i => '"mac$1"' );
+
# Ououououou. gone. They are differente!
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
-
+
# Standardize on the dashed versions.
define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' );
- unless ($Encode::ON_EBCDIC){
+ unless ($Encode::ON_EBCDIC) {
+
# for Encode::CN
- define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
- define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
- # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
- # CP936 doesn't have vendor-addon for GBK, so they're identical.
- define_alias( qr/^gbk$/i => '"cp936"');
- # This fixes gb2312 vs. euc-cn confusion, practically
- define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
- # for Encode::JP
- define_alias( qr/\bjis$/i => '"7bit-jis"' );
- define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
- define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
- define_alias( qr/\bujis$/i => '"euc-jp"' );
- define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
- define_alias( qr/\bsjis$/i => '"shiftjis"' );
- define_alias( qr/\bwindows-31j$/i => '"cp932"' );
+ define_alias( qr/\beuc.*cn$/i => '"euc-cn"' );
+ define_alias( qr/\bcn.*euc$/i => '"euc-cn"' );
+
+ # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' )
+ # CP936 doesn't have vendor-addon for GBK, so they're identical.
+ define_alias( qr/^gbk$/i => '"cp936"' );
+
+ # This fixes gb2312 vs. euc-cn confusion, practically
+ define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
+
+ # for Encode::JP
+ define_alias( qr/\bjis$/i => '"7bit-jis"' );
+ define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
+ define_alias( qr/\bjp.*euc$/i => '"euc-jp"' );
+ define_alias( qr/\bujis$/i => '"euc-jp"' );
+ define_alias( qr/\bshift.*jis$/i => '"shiftjis"' );
+ define_alias( qr/\bsjis$/i => '"shiftjis"' );
+ define_alias( qr/\bwindows-31j$/i => '"cp932"' );
+
# for Encode::KR
- define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
- define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
- # This fixes ksc5601 vs. euc-kr confusion, practically
- define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
- define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
- define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
+ define_alias( qr/\beuc.*kr$/i => '"euc-kr"' );
+ define_alias( qr/\bkr.*euc$/i => '"euc-kr"' );
+
+ # This fixes ksc5601 vs. euc-kr confusion, practically
+ define_alias( qr/(?:x-)?uhc$/i => '"cp949"' );
+ define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' );
+ define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
+
# for Encode::TW
- define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
- define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
- define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
- define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
- define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
+ define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
+ define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
+ define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
+ define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
+ define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
}
+
# utf8 is blessed :)
- define_alias( qr/^UTF-8$/i => '"utf-8-strict"');
+ define_alias( qr/^UTF-8$/i => '"utf-8-strict"' );
+
# At last, Map white space and _ to '-'
define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
}
#
-# $Id: CJKConstants.pm,v 2.0 2004/05/16 20:55:16 dankogai Exp $
+# $Id: CJKConstants.pm,v 2.1 2006/05/03 18:24:10 dankogai Exp $
#
package Encode::CJKConstants;
use strict;
-our $RCSID = q$Id: CJKConstants.pm,v 2.0 2004/05/16 20:55:16 dankogai Exp $;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $RCSID = q$Id: CJKConstants.pm,v 2.1 2006/05/03 18:24:10 dankogai Exp $;
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Carp;
our %EXPORT_TAGS = ( 'all' => [ @EXPORT_OK, @EXPORT ] );
my %_0208 = (
- 1978 => '\e\$\@',
- 1983 => '\e\$B',
- 1990 => '\e&\@\e\$B',
- );
+ 1978 => '\e\$\@',
+ 1983 => '\e\$B',
+ 1990 => '\e&\@\e\$B',
+);
our %CHARCODE = (
- UNDEF_EUC => "\xa2\xae", # ¢® in EUC
- UNDEF_SJIS => "\x81\xac", # ¢® in SJIS
- UNDEF_JIS => "\xa2\xf7", # ¢÷ -- used in unicode
- UNDEF_UNICODE => "\x20\x20", # ¢÷ -- used in unicode
- );
+ UNDEF_EUC => "\xa2\xae", # ¢® in EUC
+ UNDEF_SJIS => "\x81\xac", # ¢® in SJIS
+ UNDEF_JIS => "\xa2\xf7", # ¢÷ -- used in unicode
+ UNDEF_UNICODE => "\x20\x20", # ¢÷ -- used in unicode
+);
-our %ESC = (
- GB_2312 => "\e\$A",
- JIS_0208 => "\e\$B",
- JIS_0212 => "\e\$(D",
- KSC_5601 => "\e\$(C",
- ASC => "\e\(B",
- KANA => "\e\(I",
- '2022_KR' => "\e\$)C",
- );
+our %ESC = (
+ GB_2312 => "\e\$A",
+ JIS_0208 => "\e\$B",
+ JIS_0212 => "\e\$(D",
+ KSC_5601 => "\e\$(C",
+ ASC => "\e\(B",
+ KANA => "\e\(I",
+ '2022_KR' => "\e\$)C",
+);
-our %RE =
- (
- ASCII => '[\x00-\x7f]',
- BIN => '[\x00-\x06\x7f\xff]',
- EUC_0212 => '\x8f[\xa1-\xfe][\xa1-\xfe]',
- EUC_C => '[\xa1-\xfe][\xa1-\xfe]',
- EUC_KANA => '\x8e[\xa1-\xdf]',
- JIS_0208 => "$_0208{1978}|$_0208{1983}|$_0208{1990}",
- JIS_0212 => "\e" . '\$\(D',
- ISO_ASC => "\e" . '\([BJ]',
- JIS_KANA => "\e" . '\(I',
- '2022_KR' => "\e" . '\$\)C',
- SJIS_C => '[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]',
- SJIS_KANA => '[\xa1-\xdf]',
- UTF8 => '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
- );
+our %RE = (
+ ASCII => '[\x00-\x7f]',
+ BIN => '[\x00-\x06\x7f\xff]',
+ EUC_0212 => '\x8f[\xa1-\xfe][\xa1-\xfe]',
+ EUC_C => '[\xa1-\xfe][\xa1-\xfe]',
+ EUC_KANA => '\x8e[\xa1-\xdf]',
+ JIS_0208 => "$_0208{1978}|$_0208{1983}|$_0208{1990}",
+ JIS_0212 => "\e" . '\$\(D',
+ ISO_ASC => "\e" . '\([BJ]',
+ JIS_KANA => "\e" . '\(I',
+ '2022_KR' => "\e" . '\$\)C',
+ SJIS_C => '[\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]',
+ SJIS_KANA => '[\xa1-\xdf]',
+ UTF8 => '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
+);
1;
Encode::CJKConstants.pm -- Internally used by Encode::??::ISO_2022_*
=cut
+
use strict;
use vars qw($VERSION);
-$VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+$VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
# not ported for EBCDIC. Which should be used, "~" or "\x7E"?
-sub needs_lines { 1 }
+sub needs_lines { 1 }
-sub decode ($$;$)
-{
- my ($obj,$str,$chk) = @_;
+sub decode ($$;$) {
+ my ( $obj, $str, $chk ) = @_;
- my $GB = Encode::find_encoding('gb2312-raw');
+ my $GB = Encode::find_encoding('gb2312-raw');
my $ret = '';
- my $in_ascii = 1; # default mode is ASCII.
-
- while (length $str) {
- if ($in_ascii) { # ASCII mode
- if ($str =~ s/^([\x00-\x7D\x7F]+)//) { # no '~' => ASCII
- $ret .= $1;
- # EBCDIC should need ascii2native, but not ported.
- }
- elsif ($str =~ s/^\x7E\x7E//) { # escaped tilde
- $ret .= '~';
- }
- elsif ($str =~ s/^\x7E\cJ//) { # '\cJ' == LF in ASCII
- 1; # no-op
- }
- elsif ($str =~ s/^\x7E\x7B//) { # '~{'
- $in_ascii = 0; # to GB
- }
- else { # encounters an invalid escape, \x80 or greater
- last;
- }
- }
- else { # GB mode; the byte ranges are as in RFC 1843.
- no warnings 'uninitialized';
- if ($str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)//) {
- $ret .= $GB->decode($1, $chk);
- }
- elsif ($str =~ s/^\x7E\x7D//) { # '~}'
- $in_ascii = 1;
- }
- else { # invalid
- last;
- }
- }
+ my $in_ascii = 1; # default mode is ASCII.
+
+ while ( length $str ) {
+ if ($in_ascii) { # ASCII mode
+ if ( $str =~ s/^([\x00-\x7D\x7F]+)// ) { # no '~' => ASCII
+ $ret .= $1;
+
+ # EBCDIC should need ascii2native, but not ported.
+ }
+ elsif ( $str =~ s/^\x7E\x7E// ) { # escaped tilde
+ $ret .= '~';
+ }
+ elsif ( $str =~ s/^\x7E\cJ// ) { # '\cJ' == LF in ASCII
+ 1; # no-op
+ }
+ elsif ( $str =~ s/^\x7E\x7B// ) { # '~{'
+ $in_ascii = 0; # to GB
+ }
+ else { # encounters an invalid escape, \x80 or greater
+ last;
+ }
+ }
+ else { # GB mode; the byte ranges are as in RFC 1843.
+ no warnings 'uninitialized';
+ if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) {
+ $ret .= $GB->decode( $1, $chk );
+ }
+ elsif ( $str =~ s/^\x7E\x7D// ) { # '~}'
+ $in_ascii = 1;
+ }
+ else { # invalid
+ last;
+ }
+ }
}
- $_[1] = '' if $chk; # needs_lines guarantees no partial character
+ $_[1] = '' if $chk; # needs_lines guarantees no partial character
return $ret;
}
sub cat_decode {
- my ($obj, undef, $src, $pos, $trm, $chk) = @_;
- my ($rdst, $rsrc, $rpos) = \@_[1..3];
+ my ( $obj, undef, $src, $pos, $trm, $chk ) = @_;
+ my ( $rdst, $rsrc, $rpos ) = \@_[ 1 .. 3 ];
- my $GB = Encode::find_encoding('gb2312-raw');
+ my $GB = Encode::find_encoding('gb2312-raw');
my $ret = '';
- my $in_ascii = 1; # default mode is ASCII.
+ my $in_ascii = 1; # default mode is ASCII.
my $ini_pos = pos($$rsrc);
- substr($src, 0, $pos) = '';
+ substr( $src, 0, $pos ) = '';
my $ini_len = bytes::length($src);
# XXX: Is better C<$src =~ s/^\x7E// or die if ...>?
$src =~ s/^\x7E// if $trm eq "\x7E";
- while (length $src) {
- my $now;
- if ($in_ascii) { # ASCII mode
- if ($src =~ s/^([\x00-\x7D\x7F])//) { # no '~' => ASCII
- $now = $1;
- }
- elsif ($src =~ s/^\x7E\x7E//) { # escaped tilde
- $now = '~';
- }
- elsif ($src =~ s/^\x7E\cJ//) { # '\cJ' == LF in ASCII
- next;
- }
- elsif ($src =~ s/^\x7E\x7B//) { # '~{'
- $in_ascii = 0; # to GB
- next;
- }
- else { # encounters an invalid escape, \x80 or greater
- last;
- }
- }
- else { # GB mode; the byte ranges are as in RFC 1843.
- if ($src =~ s/^((?:[\x21-\x77][\x21-\x7F])+)//) {
- $now = $GB->decode($1, $chk);
- }
- elsif ($src =~ s/^\x7E\x7D//) { # '~}'
- $in_ascii = 1;
- next;
- }
- else { # invalid
- last;
- }
- }
-
- next if ! defined $now;
-
- $ret .= $now;
-
- if ($now eq $trm) {
- $$rdst .= $ret;
- $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);
- pos($$rsrc) = $ini_pos;
- return 1;
- }
+ while ( length $src ) {
+ my $now;
+ if ($in_ascii) { # ASCII mode
+ if ( $src =~ s/^([\x00-\x7D\x7F])// ) { # no '~' => ASCII
+ $now = $1;
+ }
+ elsif ( $src =~ s/^\x7E\x7E// ) { # escaped tilde
+ $now = '~';
+ }
+ elsif ( $src =~ s/^\x7E\cJ// ) { # '\cJ' == LF in ASCII
+ next;
+ }
+ elsif ( $src =~ s/^\x7E\x7B// ) { # '~{'
+ $in_ascii = 0; # to GB
+ next;
+ }
+ else { # encounters an invalid escape, \x80 or greater
+ last;
+ }
+ }
+ else { # GB mode; the byte ranges are as in RFC 1843.
+ if ( $src =~ s/^((?:[\x21-\x77][\x21-\x7F])+)// ) {
+ $now = $GB->decode( $1, $chk );
+ }
+ elsif ( $src =~ s/^\x7E\x7D// ) { # '~}'
+ $in_ascii = 1;
+ next;
+ }
+ else { # invalid
+ last;
+ }
+ }
+
+ next if !defined $now;
+
+ $ret .= $now;
+
+ if ( $now eq $trm ) {
+ $$rdst .= $ret;
+ $$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);
+ pos($$rsrc) = $ini_pos;
+ return 1;
+ }
}
$$rdst .= $ret;
$$rpos = $ini_pos + $pos + $ini_len - bytes::length($src);
pos($$rsrc) = $ini_pos;
- return ''; # terminator not found
+ return ''; # terminator not found
}
+sub encode($$;$) {
+ my ( $obj, $str, $chk ) = @_;
-sub encode($$;$)
-{
- my ($obj,$str,$chk) = @_;
-
- my $GB = Encode::find_encoding('gb2312-raw');
+ my $GB = Encode::find_encoding('gb2312-raw');
my $ret = '';
- my $in_ascii = 1; # default mode is ASCII.
-
- no warnings 'utf8'; # $str may be malformed UTF8 at the end of a chunk.
-
- while (length $str) {
- if ($str =~ s/^([[:ascii:]]+)//) {
- my $tmp = $1;
- $tmp =~ s/~/~~/g; # escapes tildes
- if (! $in_ascii) {
- $ret .= "\x7E\x7D"; # '~}'
- $in_ascii = 1;
- }
- $ret .= pack 'a*', $tmp; # remove UTF8 flag.
- }
- elsif ($str =~ s/(.)//) {
- my $s = $1;
- my $tmp = $GB->encode($s, $chk);
- last if !defined $tmp;
- if (length $tmp == 2) { # maybe a valid GB char (XXX)
- if ($in_ascii) {
- $ret .= "\x7E\x7B"; # '~{'
- $in_ascii = 0;
- }
- $ret .= $tmp;
- }
- elsif (length $tmp) { # maybe FALLBACK in ASCII (XXX)
- if (!$in_ascii) {
- $ret .= "\x7E\x7D"; # '~}'
- $in_ascii = 1;
- }
- $ret .= $tmp;
- }
- }
- else { # if $str is malformed UTF8 *and* if length $str != 0.
- last;
- }
+ my $in_ascii = 1; # default mode is ASCII.
+
+ no warnings 'utf8'; # $str may be malformed UTF8 at the end of a chunk.
+
+ while ( length $str ) {
+ if ( $str =~ s/^([[:ascii:]]+)// ) {
+ my $tmp = $1;
+ $tmp =~ s/~/~~/g; # escapes tildes
+ if ( !$in_ascii ) {
+ $ret .= "\x7E\x7D"; # '~}'
+ $in_ascii = 1;
+ }
+ $ret .= pack 'a*', $tmp; # remove UTF8 flag.
+ }
+ elsif ( $str =~ s/(.)// ) {
+ my $s = $1;
+ my $tmp = $GB->encode( $s, $chk );
+ last if !defined $tmp;
+ if ( length $tmp == 2 ) { # maybe a valid GB char (XXX)
+ if ($in_ascii) {
+ $ret .= "\x7E\x7B"; # '~{'
+ $in_ascii = 0;
+ }
+ $ret .= $tmp;
+ }
+ elsif ( length $tmp ) { # maybe FALLBACK in ASCII (XXX)
+ if ( !$in_ascii ) {
+ $ret .= "\x7E\x7D"; # '~}'
+ $in_ascii = 1;
+ }
+ $ret .= $tmp;
+ }
+ }
+ else { # if $str is malformed UTF8 *and* if length $str != 0.
+ last;
+ }
}
$_[1] = $str if $chk;
- # The state at the end of the chunk is discarded, even if in GB mode.
- # That results in the combination of GB-OUT and GB-IN, i.e. "~}~{".
- # Parhaps it is harmless, but further investigations may be required...
+ # The state at the end of the chunk is discarded, even if in GB mode.
+ # That results in the combination of GB-OUT and GB-IN, i.e. "~}~{".
+ # Parhaps it is harmless, but further investigations may be required...
- if (! $in_ascii) {
- $ret .= "\x7E\x7D"; # '~}'
- $in_ascii = 1;
+ if ( !$in_ascii ) {
+ $ret .= "\x7E\x7D"; # '~}'
+ $in_ascii = 1;
}
return $ret;
}
# Demand-load module list
#
package Encode::Config;
-our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use strict;
-our %ExtModule =
- (
- # Encode::Byte
- #iso-8859-1 is in Encode.pm itself
- 'iso-8859-2' => 'Encode::Byte',
- 'iso-8859-3' => 'Encode::Byte',
- 'iso-8859-4' => 'Encode::Byte',
- 'iso-8859-5' => 'Encode::Byte',
- 'iso-8859-6' => 'Encode::Byte',
- 'iso-8859-7' => 'Encode::Byte',
- 'iso-8859-8' => 'Encode::Byte',
- 'iso-8859-9' => 'Encode::Byte',
- 'iso-8859-10' => 'Encode::Byte',
- 'iso-8859-11' => 'Encode::Byte',
- 'iso-8859-13' => 'Encode::Byte',
- 'iso-8859-14' => 'Encode::Byte',
- 'iso-8859-15' => 'Encode::Byte',
- 'iso-8859-16' => 'Encode::Byte',
- 'koi8-f' => 'Encode::Byte',
- 'koi8-r' => 'Encode::Byte',
- 'koi8-u' => 'Encode::Byte',
- 'viscii' => 'Encode::Byte',
- 'cp424' => 'Encode::Byte',
- 'cp437' => 'Encode::Byte',
- 'cp737' => 'Encode::Byte',
- 'cp775' => 'Encode::Byte',
- 'cp850' => 'Encode::Byte',
- 'cp852' => 'Encode::Byte',
- 'cp855' => 'Encode::Byte',
- 'cp856' => 'Encode::Byte',
- 'cp857' => 'Encode::Byte',
- 'cp860' => 'Encode::Byte',
- 'cp861' => 'Encode::Byte',
- 'cp862' => 'Encode::Byte',
- 'cp863' => 'Encode::Byte',
- 'cp864' => 'Encode::Byte',
- 'cp865' => 'Encode::Byte',
- 'cp866' => 'Encode::Byte',
- 'cp869' => 'Encode::Byte',
- 'cp874' => 'Encode::Byte',
- 'cp1006' => 'Encode::Byte',
- 'cp1250' => 'Encode::Byte',
- 'cp1251' => 'Encode::Byte',
- 'cp1252' => 'Encode::Byte',
- 'cp1253' => 'Encode::Byte',
- 'cp1254' => 'Encode::Byte',
- 'cp1255' => 'Encode::Byte',
- 'cp1256' => 'Encode::Byte',
- 'cp1257' => 'Encode::Byte',
- 'cp1258' => 'Encode::Byte',
- 'AdobeStandardEncoding' => 'Encode::Byte',
- 'MacArabic' => 'Encode::Byte',
- 'MacCentralEurRoman' => 'Encode::Byte',
- 'MacCroatian' => 'Encode::Byte',
- 'MacCyrillic' => 'Encode::Byte',
- 'MacFarsi' => 'Encode::Byte',
- 'MacGreek' => 'Encode::Byte',
- 'MacHebrew' => 'Encode::Byte',
- 'MacIcelandic' => 'Encode::Byte',
- 'MacRoman' => 'Encode::Byte',
- 'MacRomanian' => 'Encode::Byte',
- 'MacRumanian' => 'Encode::Byte',
- 'MacSami' => 'Encode::Byte',
- 'MacThai' => 'Encode::Byte',
- 'MacTurkish' => 'Encode::Byte',
- 'MacUkrainian' => 'Encode::Byte',
- 'nextstep' => 'Encode::Byte',
- 'hp-roman8' => 'Encode::Byte',
- 'gsm0338' => 'Encode::Byte',
- # Encode::EBCDIC
- 'cp37' => 'Encode::EBCDIC',
- 'cp500' => 'Encode::EBCDIC',
- 'cp875' => 'Encode::EBCDIC',
- 'cp1026' => 'Encode::EBCDIC',
- 'cp1047' => 'Encode::EBCDIC',
- 'posix-bc' => 'Encode::EBCDIC',
- # Encode::Symbol
- 'dingbats' => 'Encode::Symbol',
- 'symbol' => 'Encode::Symbol',
- 'AdobeSymbol' => 'Encode::Symbol',
- 'AdobeZdingbat' => 'Encode::Symbol',
- 'MacDingbats' => 'Encode::Symbol',
- 'MacSymbol' => 'Encode::Symbol',
- # Encode::Unicode
- 'UCS-2BE' => 'Encode::Unicode',
- 'UCS-2LE' => 'Encode::Unicode',
- 'UTF-16' => 'Encode::Unicode',
- 'UTF-16BE' => 'Encode::Unicode',
- 'UTF-16LE' => 'Encode::Unicode',
- 'UTF-32' => 'Encode::Unicode',
- 'UTF-32BE' => 'Encode::Unicode',
- 'UTF-32LE' => 'Encode::Unicode',
- 'UTF-7' => 'Encode::Unicode::UTF7',
- );
+our %ExtModule = (
+
+ # Encode::Byte
+ #iso-8859-1 is in Encode.pm itself
+ 'iso-8859-2' => 'Encode::Byte',
+ 'iso-8859-3' => 'Encode::Byte',
+ 'iso-8859-4' => 'Encode::Byte',
+ 'iso-8859-5' => 'Encode::Byte',
+ 'iso-8859-6' => 'Encode::Byte',
+ 'iso-8859-7' => 'Encode::Byte',
+ 'iso-8859-8' => 'Encode::Byte',
+ 'iso-8859-9' => 'Encode::Byte',
+ 'iso-8859-10' => 'Encode::Byte',
+ 'iso-8859-11' => 'Encode::Byte',
+ 'iso-8859-13' => 'Encode::Byte',
+ 'iso-8859-14' => 'Encode::Byte',
+ 'iso-8859-15' => 'Encode::Byte',
+ 'iso-8859-16' => 'Encode::Byte',
+ 'koi8-f' => 'Encode::Byte',
+ 'koi8-r' => 'Encode::Byte',
+ 'koi8-u' => 'Encode::Byte',
+ 'viscii' => 'Encode::Byte',
+ 'cp424' => 'Encode::Byte',
+ 'cp437' => 'Encode::Byte',
+ 'cp737' => 'Encode::Byte',
+ 'cp775' => 'Encode::Byte',
+ 'cp850' => 'Encode::Byte',
+ 'cp852' => 'Encode::Byte',
+ 'cp855' => 'Encode::Byte',
+ 'cp856' => 'Encode::Byte',
+ 'cp857' => 'Encode::Byte',
+ 'cp860' => 'Encode::Byte',
+ 'cp861' => 'Encode::Byte',
+ 'cp862' => 'Encode::Byte',
+ 'cp863' => 'Encode::Byte',
+ 'cp864' => 'Encode::Byte',
+ 'cp865' => 'Encode::Byte',
+ 'cp866' => 'Encode::Byte',
+ 'cp869' => 'Encode::Byte',
+ 'cp874' => 'Encode::Byte',
+ 'cp1006' => 'Encode::Byte',
+ 'cp1250' => 'Encode::Byte',
+ 'cp1251' => 'Encode::Byte',
+ 'cp1252' => 'Encode::Byte',
+ 'cp1253' => 'Encode::Byte',
+ 'cp1254' => 'Encode::Byte',
+ 'cp1255' => 'Encode::Byte',
+ 'cp1256' => 'Encode::Byte',
+ 'cp1257' => 'Encode::Byte',
+ 'cp1258' => 'Encode::Byte',
+ 'AdobeStandardEncoding' => 'Encode::Byte',
+ 'MacArabic' => 'Encode::Byte',
+ 'MacCentralEurRoman' => 'Encode::Byte',
+ 'MacCroatian' => 'Encode::Byte',
+ 'MacCyrillic' => 'Encode::Byte',
+ 'MacFarsi' => 'Encode::Byte',
+ 'MacGreek' => 'Encode::Byte',
+ 'MacHebrew' => 'Encode::Byte',
+ 'MacIcelandic' => 'Encode::Byte',
+ 'MacRoman' => 'Encode::Byte',
+ 'MacRomanian' => 'Encode::Byte',
+ 'MacRumanian' => 'Encode::Byte',
+ 'MacSami' => 'Encode::Byte',
+ 'MacThai' => 'Encode::Byte',
+ 'MacTurkish' => 'Encode::Byte',
+ 'MacUkrainian' => 'Encode::Byte',
+ 'nextstep' => 'Encode::Byte',
+ 'hp-roman8' => 'Encode::Byte',
+ 'gsm0338' => 'Encode::Byte',
+
+ # Encode::EBCDIC
+ 'cp37' => 'Encode::EBCDIC',
+ 'cp500' => 'Encode::EBCDIC',
+ 'cp875' => 'Encode::EBCDIC',
+ 'cp1026' => 'Encode::EBCDIC',
+ 'cp1047' => 'Encode::EBCDIC',
+ 'posix-bc' => 'Encode::EBCDIC',
+
+ # Encode::Symbol
+ 'dingbats' => 'Encode::Symbol',
+ 'symbol' => 'Encode::Symbol',
+ 'AdobeSymbol' => 'Encode::Symbol',
+ 'AdobeZdingbat' => 'Encode::Symbol',
+ 'MacDingbats' => 'Encode::Symbol',
+ 'MacSymbol' => 'Encode::Symbol',
+
+ # Encode::Unicode
+ 'UCS-2BE' => 'Encode::Unicode',
+ 'UCS-2LE' => 'Encode::Unicode',
+ 'UTF-16' => 'Encode::Unicode',
+ 'UTF-16BE' => 'Encode::Unicode',
+ 'UTF-16LE' => 'Encode::Unicode',
+ 'UTF-32' => 'Encode::Unicode',
+ 'UTF-32BE' => 'Encode::Unicode',
+ 'UTF-32LE' => 'Encode::Unicode',
+ 'UTF-7' => 'Encode::Unicode::UTF7',
+);
+
+unless ( ord("A") == 193 ) {
+ %ExtModule = (
+ %ExtModule,
+ 'euc-cn' => 'Encode::CN',
+ 'gb12345-raw' => 'Encode::CN',
+ 'gb2312-raw' => 'Encode::CN',
+ 'hz' => 'Encode::CN',
+ 'iso-ir-165' => 'Encode::CN',
+ 'cp936' => 'Encode::CN',
+ 'MacChineseSimp' => 'Encode::CN',
-unless (ord("A") == 193){
- %ExtModule =
- (
- %ExtModule,
- 'euc-cn' => 'Encode::CN',
- 'gb12345-raw' => 'Encode::CN',
- 'gb2312-raw' => 'Encode::CN',
- 'hz' => 'Encode::CN',
- 'iso-ir-165' => 'Encode::CN',
- 'cp936' => 'Encode::CN',
- 'MacChineseSimp' => 'Encode::CN',
-
- '7bit-jis' => 'Encode::JP',
- 'euc-jp' => 'Encode::JP',
- 'iso-2022-jp' => 'Encode::JP',
- 'iso-2022-jp-1' => 'Encode::JP',
- 'jis0201-raw' => 'Encode::JP',
- 'jis0208-raw' => 'Encode::JP',
- 'jis0212-raw' => 'Encode::JP',
- 'cp932' => 'Encode::JP',
- 'MacJapanese' => 'Encode::JP',
- 'shiftjis' => 'Encode::JP',
-
-
- 'euc-kr' => 'Encode::KR',
- 'iso-2022-kr' => 'Encode::KR',
- 'johab' => 'Encode::KR',
- 'ksc5601-raw' => 'Encode::KR',
- 'cp949' => 'Encode::KR',
- 'MacKorean' => 'Encode::KR',
-
- 'big5-eten' => 'Encode::TW',
- 'big5-hkscs' => 'Encode::TW',
- 'cp950' => 'Encode::TW',
- 'MacChineseTrad' => 'Encode::TW',
-
- #'big5plus' => 'Encode::HanExtra',
- #'euc-tw' => 'Encode::HanExtra',
- #'gb18030' => 'Encode::HanExtra',
-
- 'MIME-Header' => 'Encode::MIME::Header',
- 'MIME-B' => 'Encode::MIME::Header',
- 'MIME-Q' => 'Encode::MIME::Header',
-
- 'MIME-Header-ISO_2022_JP' => 'Encode::MIME::Header::ISO_2022_JP',
- );
+ '7bit-jis' => 'Encode::JP',
+ 'euc-jp' => 'Encode::JP',
+ 'iso-2022-jp' => 'Encode::JP',
+ 'iso-2022-jp-1' => 'Encode::JP',
+ 'jis0201-raw' => 'Encode::JP',
+ 'jis0208-raw' => 'Encode::JP',
+ 'jis0212-raw' => 'Encode::JP',
+ 'cp932' => 'Encode::JP',
+ 'MacJapanese' => 'Encode::JP',
+ 'shiftjis' => 'Encode::JP',
+
+ 'euc-kr' => 'Encode::KR',
+ 'iso-2022-kr' => 'Encode::KR',
+ 'johab' => 'Encode::KR',
+ 'ksc5601-raw' => 'Encode::KR',
+ 'cp949' => 'Encode::KR',
+ 'MacKorean' => 'Encode::KR',
+
+ 'big5-eten' => 'Encode::TW',
+ 'big5-hkscs' => 'Encode::TW',
+ 'cp950' => 'Encode::TW',
+ 'MacChineseTrad' => 'Encode::TW',
+
+ #'big5plus' => 'Encode::HanExtra',
+ #'euc-tw' => 'Encode::HanExtra',
+ #'gb18030' => 'Encode::HanExtra',
+
+ 'MIME-Header' => 'Encode::MIME::Header',
+ 'MIME-B' => 'Encode::MIME::Header',
+ 'MIME-Q' => 'Encode::MIME::Header',
+
+ 'MIME-Header-ISO_2022_JP' => 'Encode::MIME::Header::ISO_2022_JP',
+ );
}
#
# Why not export ? to keep ConfigLocal Happy!
#
-while (my ($enc,$mod) = each %ExtModule){
+while ( my ( $enc, $mod ) = each %ExtModule ) {
$Encode::ExtModule{$enc} = $mod;
}
#
-# $Id: Encoder.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $
+# $Id: Encoder.pm,v 2.1 2006/05/03 18:24:10 dankogai Exp $
#
package Encode::Encoder;
use strict;
use warnings;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
require Exporter;
-our @ISA = qw(Exporter);
+our @ISA = qw(Exporter);
our @EXPORT_OK = qw ( encoder );
our $AUTOLOAD;
use Encode qw(encode decode find_encoding from_to);
use Carp;
-sub new{
- my ($class, $data, $encname) = @_;
- unless($encname){
- $encname = Encode::is_utf8($data) ? 'utf8' : '';
- }else{
- my $obj = find_encoding($encname)
- or croak __PACKAGE__, ": unknown encoding: $encname";
- $encname = $obj->name;
+sub new {
+ my ( $class, $data, $encname ) = @_;
+ unless ($encname) {
+ $encname = Encode::is_utf8($data) ? 'utf8' : '';
+ }
+ else {
+ my $obj = find_encoding($encname)
+ or croak __PACKAGE__, ": unknown encoding: $encname";
+ $encname = $obj->name;
}
my $self = {
- data => $data,
- encoding => $encname,
- };
+ data => $data,
+ encoding => $encname,
+ };
bless $self => $class;
}
-sub encoder{ __PACKAGE__->new(@_) }
+sub encoder { __PACKAGE__->new(@_) }
-sub data{
- my ($self, $data) = @_;
- if (defined $data){
- $self->{data} = $data;
- return $data;
- }else{
- return $self->{data};
+sub data {
+ my ( $self, $data ) = @_;
+ if ( defined $data ) {
+ $self->{data} = $data;
+ return $data;
+ }
+ else {
+ return $self->{data};
}
}
-sub encoding{
- my ($self, $encname) = @_;
- if ($encname){
- my $obj = find_encoding($encname)
- or confess __PACKAGE__, ": unknown encoding: $encname";
- $self->{encoding} = $obj->name;
- return $self;
- }else{
- return $self->{encoding}
+sub encoding {
+ my ( $self, $encname ) = @_;
+ if ($encname) {
+ my $obj = find_encoding($encname)
+ or confess __PACKAGE__, ": unknown encoding: $encname";
+ $self->{encoding} = $obj->name;
+ return $self;
+ }
+ else {
+ return $self->{encoding};
}
}
sub bytes {
- my ($self, $encname) = @_;
+ my ( $self, $encname ) = @_;
$encname ||= $self->{encoding};
- my $obj = find_encoding($encname)
- or confess __PACKAGE__, ": unknown encoding: $encname";
- $self->{data} = $obj->decode($self->{data}, 1);
- $self->{encoding} = '' ;
+ my $obj = find_encoding($encname)
+ or confess __PACKAGE__, ": unknown encoding: $encname";
+ $self->{data} = $obj->decode( $self->{data}, 1 );
+ $self->{encoding} = '';
return $self;
}
-sub DESTROY{ # defined so it won't autoload.
+sub DESTROY { # defined so it won't autoload.
DEBUG and warn shift;
}
sub AUTOLOAD {
my $self = shift;
my $type = ref($self)
- or confess "$self is not an object";
+ or confess "$self is not an object";
my $myname = $AUTOLOAD;
- $myname =~ s/.*://; # strip fully-qualified portion
- my $obj = find_encoding($myname)
- or confess __PACKAGE__, ": unknown encoding: $myname";
+ $myname =~ s/.*://; # strip fully-qualified portion
+ my $obj = find_encoding($myname)
+ or confess __PACKAGE__, ": unknown encoding: $myname";
DEBUG and warn $self->{encoding}, " => ", $obj->name;
- if ($self->{encoding}){
- from_to($self->{data}, $self->{encoding}, $obj->name, 1);
- }else{
- $self->{data} = $obj->encode($self->{data}, 1);
+ if ( $self->{encoding} ) {
+ from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
+ }
+ else {
+ $self->{data} = $obj->encode( $self->{data}, 1 );
}
$self->{encoding} = $obj->name;
return $self;
}
-use overload
- q("") => sub { $_[0]->{data} },
- q(0+) => sub { use bytes (); bytes::length($_[0]->{data}) },
- fallback => 1,
- ;
+use overload
+ q("") => sub { $_[0]->{data} },
+ q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) },
+ fallback => 1,
+ ;
1;
__END__
package Encode::Encoding;
+
# Base class for classes which implement encodings
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
require Encode;
sub DEBUG { 0 }
-sub Define
-{
- my $obj = shift;
+
+sub Define {
+ my $obj = shift;
my $canonical = shift;
- $obj = bless { Name => $canonical },$obj unless ref $obj;
+ $obj = bless { Name => $canonical }, $obj unless ref $obj;
+
# warn "$canonical => $obj\n";
- Encode::define_encoding($obj, $canonical, @_);
+ Encode::define_encoding( $obj, $canonical, @_ );
}
-sub name { return shift->{'Name'} }
+sub name { return shift->{'Name'} }
# sub renew { return $_[0] }
sub renew {
my $self = shift;
- my $clone = bless { %$self } => ref($self);
- $clone->{renewed}++; # so the caller can see it
+ my $clone = bless {%$self} => ref($self);
+ $clone->{renewed}++; # so the caller can see it
DEBUG and warn $clone->{renewed};
return $clone;
}
-sub renewed{ return $_[0]->{renewed} || 0 }
+sub renewed { return $_[0]->{renewed} || 0 }
*new_sequence = \&renew;
-sub needs_lines { 0 };
+sub needs_lines { 0 }
-sub perlio_ok {
- eval{ require PerlIO::encoding };
+sub perlio_ok {
+ eval { require PerlIO::encoding };
return $@ ? 0 : 1;
}
# (Temporary|legacy) methods
-sub toUnicode { shift->decode(@_) }
-sub fromUnicode { shift->encode(@_) }
+sub toUnicode { shift->decode(@_) }
+sub fromUnicode { shift->encode(@_) }
#
# Needs to be overloaded or just croak
require Carp;
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
- Carp::croak($class . "->encode() not defined!");
+ Carp::croak( $class . "->encode() not defined!" );
}
-sub decode{
+sub decode {
require Carp;
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
- Carp::croak($class . "->encode() not defined!");
+ Carp::croak( $class . "->encode() not defined!" );
}
-sub DESTROY {}
+sub DESTROY { }
1;
__END__
use strict;
use Encode qw(:fallbacks find_encoding);
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
my $Canon = 'Guess';
sub DEBUG () { 0 }
our %DEF_SUSPECTS = map { $_ => find_encoding($_) } qw(ascii utf8);
-$Encode::Encoding{$Canon} =
- bless {
- Name => $Canon,
- Suspects => { %DEF_SUSPECTS },
- } => __PACKAGE__;
+$Encode::Encoding{$Canon} = bless {
+ Name => $Canon,
+ Suspects => {%DEF_SUSPECTS},
+} => __PACKAGE__;
use base qw(Encode::Encoding);
sub needs_lines { 1 }
-sub perlio_ok { 0 }
+sub perlio_ok { 0 }
-our @EXPORT = qw(guess_encoding);
+our @EXPORT = qw(guess_encoding);
our $NoUTFAutoGuess = 0;
-our $UTF8_BOM = pack("C3", 0xef, 0xbb, 0xbf);
+our $UTF8_BOM = pack( "C3", 0xef, 0xbb, 0xbf );
-sub import { # Exporter not used so we do it on our own
+sub import { # Exporter not used so we do it on our own
my $callpkg = caller;
- for my $item (@EXPORT){
- no strict 'refs';
- *{"$callpkg\::$item"} = \&{"$item"};
+ for my $item (@EXPORT) {
+ no strict 'refs';
+ *{"$callpkg\::$item"} = \&{"$item"};
}
set_suspects(@_);
}
-sub set_suspects{
+sub set_suspects {
my $class = shift;
my $self = ref($class) ? $class : $Encode::Encoding{$Canon};
- $self->{Suspects} = { %DEF_SUSPECTS };
+ $self->{Suspects} = {%DEF_SUSPECTS};
$self->add_suspects(@_);
}
-sub add_suspects{
+sub add_suspects {
my $class = shift;
my $self = ref($class) ? $class : $Encode::Encoding{$Canon};
- for my $c (@_){
- my $e = find_encoding($c) or die "Unknown encoding: $c";
- $self->{Suspects}{$e->name} = $e;
- DEBUG and warn "Added: ", $e->name;
+ for my $c (@_) {
+ my $e = find_encoding($c) or die "Unknown encoding: $c";
+ $self->{Suspects}{ $e->name } = $e;
+ DEBUG and warn "Added: ", $e->name;
}
}
-sub decode($$;$){
- my ($obj, $octet, $chk) = @_;
- my $guessed = guess($obj, $octet);
- unless (ref($guessed)){
- require Carp;
- Carp::croak($guessed);
+sub decode($$;$) {
+ my ( $obj, $octet, $chk ) = @_;
+ my $guessed = guess( $obj, $octet );
+ unless ( ref($guessed) ) {
+ require Carp;
+ Carp::croak($guessed);
}
- my $utf8 = $guessed->decode($octet, $chk);
+ my $utf8 = $guessed->decode( $octet, $chk );
$_[1] = $octet if $chk;
return $utf8;
}
-sub guess_encoding{
- guess($Encode::Encoding{$Canon}, @_);
+sub guess_encoding {
+ guess( $Encode::Encoding{$Canon}, @_ );
}
sub guess {
# cheat 0: utf8 flag;
if ( Encode::is_utf8($octet) ) {
- return find_encoding('utf8') unless $NoUTFAutoGuess;
- Encode::_utf8_off($octet);
+ return find_encoding('utf8') unless $NoUTFAutoGuess;
+ Encode::_utf8_off($octet);
}
+
# cheat 1: BOM
use Encode::Unicode;
unless ($NoUTFAutoGuess) {
- my $BOM = pack('C3', unpack("C3", $octet));
- return find_encoding('utf8')
- if (defined $BOM and $BOM eq $UTF8_BOM);
- $BOM = unpack('N', $octet);
- return find_encoding('UTF-32')
- if (defined $BOM and ($BOM == 0xFeFF or $BOM == 0xFFFe0000));
- $BOM = unpack('n', $octet);
- return find_encoding('UTF-16')
- if (defined $BOM and ($BOM == 0xFeFF or $BOM == 0xFFFe));
- if ($octet =~ /\x00/o){ # if \x00 found, we assume UTF-(16|32)(BE|LE)
- my $utf;
- my ($be, $le) = (0, 0);
- if ($octet =~ /\x00\x00/o){ # UTF-32(BE|LE) assumed
- $utf = "UTF-32";
- for my $char (unpack('N*', $octet)){
- $char & 0x0000ffff and $be++;
- $char & 0xffff0000 and $le++;
- }
- }else{ # UTF-16(BE|LE) assumed
- $utf = "UTF-16";
- for my $char (unpack('n*', $octet)){
- $char & 0x00ff and $be++;
- $char & 0xff00 and $le++;
- }
- }
- DEBUG and warn "$utf, be == $be, le == $le";
- $be == $le
- and return
- "Encodings ambiguous between $utf BE and LE ($be, $le)";
- $utf .= ($be > $le) ? 'BE' : 'LE';
- return find_encoding($utf);
- }
+ my $BOM = pack( 'C3', unpack( "C3", $octet ) );
+ return find_encoding('utf8')
+ if ( defined $BOM and $BOM eq $UTF8_BOM );
+ $BOM = unpack( 'N', $octet );
+ return find_encoding('UTF-32')
+ if ( defined $BOM and ( $BOM == 0xFeFF or $BOM == 0xFFFe0000 ) );
+ $BOM = unpack( 'n', $octet );
+ return find_encoding('UTF-16')
+ if ( defined $BOM and ( $BOM == 0xFeFF or $BOM == 0xFFFe ) );
+ if ( $octet =~ /\x00/o )
+ { # if \x00 found, we assume UTF-(16|32)(BE|LE)
+ my $utf;
+ my ( $be, $le ) = ( 0, 0 );
+ if ( $octet =~ /\x00\x00/o ) { # UTF-32(BE|LE) assumed
+ $utf = "UTF-32";
+ for my $char ( unpack( 'N*', $octet ) ) {
+ $char & 0x0000ffff and $be++;
+ $char & 0xffff0000 and $le++;
+ }
+ }
+ else { # UTF-16(BE|LE) assumed
+ $utf = "UTF-16";
+ for my $char ( unpack( 'n*', $octet ) ) {
+ $char & 0x00ff and $be++;
+ $char & 0xff00 and $le++;
+ }
+ }
+ DEBUG and warn "$utf, be == $be, le == $le";
+ $be == $le
+ and return
+ "Encodings ambiguous between $utf BE and LE ($be, $le)";
+ $utf .= ( $be > $le ) ? 'BE' : 'LE';
+ return find_encoding($utf);
+ }
}
- my %try = %{$obj->{Suspects}};
- for my $c (@_){
- my $e = find_encoding($c) or die "Unknown encoding: $c";
- $try{$e->name} = $e;
- DEBUG and warn "Added: ", $e->name;
+ my %try = %{ $obj->{Suspects} };
+ for my $c (@_) {
+ my $e = find_encoding($c) or die "Unknown encoding: $c";
+ $try{ $e->name } = $e;
+ DEBUG and warn "Added: ", $e->name;
}
my $nline = 1;
- for my $line (split /\r\n?|\n/, $octet){
- # cheat 2 -- \e in the string
- if ($line =~ /\e/o){
- my @keys = keys %try;
- delete @try{qw/utf8 ascii/};
- for my $k (@keys){
- ref($try{$k}) eq 'Encode::XS' and delete $try{$k};
- }
- }
- my %ok = %try;
- # warn join(",", keys %try);
- for my $k (keys %try){
- my $scratch = $line;
- $try{$k}->decode($scratch, FB_QUIET);
- if ($scratch eq ''){
- DEBUG and warn sprintf("%4d:%-24s ok\n", $nline, $k);
- }else{
- use bytes ();
- DEBUG and
- warn sprintf("%4d:%-24s not ok; %d bytes left\n",
- $nline, $k, bytes::length($scratch));
- delete $ok{$k};
- }
- }
- %ok or return "No appropriate encodings found!";
- if (scalar(keys(%ok)) == 1){
- my ($retval) = values(%ok);
- return $retval;
- }
- %try = %ok; $nline++;
+ for my $line ( split /\r\n?|\n/, $octet ) {
+
+ # cheat 2 -- \e in the string
+ if ( $line =~ /\e/o ) {
+ my @keys = keys %try;
+ delete @try{qw/utf8 ascii/};
+ for my $k (@keys) {
+ ref( $try{$k} ) eq 'Encode::XS' and delete $try{$k};
+ }
+ }
+ my %ok = %try;
+
+ # warn join(",", keys %try);
+ for my $k ( keys %try ) {
+ my $scratch = $line;
+ $try{$k}->decode( $scratch, FB_QUIET );
+ if ( $scratch eq '' ) {
+ DEBUG and warn sprintf( "%4d:%-24s ok\n", $nline, $k );
+ }
+ else {
+ use bytes ();
+ DEBUG
+ and warn sprintf( "%4d:%-24s not ok; %d bytes left\n",
+ $nline, $k, bytes::length($scratch) );
+ delete $ok{$k};
+ }
+ }
+ %ok or return "No appropriate encodings found!";
+ if ( scalar( keys(%ok) ) == 1 ) {
+ my ($retval) = values(%ok);
+ return $retval;
+ }
+ %try = %ok;
+ $nline++;
}
- $try{ascii} or
- return "Encodings too ambiguous: ", join(" or ", keys %try);
+ $try{ascii}
+ or return "Encodings too ambiguous: ", join( " or ", keys %try );
return $try{ascii};
}
-
-
1;
__END__
#
-# $Id: H2Z.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $
+# $Id: H2Z.pm,v 2.1 2006/05/03 18:24:10 dankogai Exp $
#
package Encode::JP::H2Z;
use strict;
-our $RCSID = q$Id: H2Z.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $RCSID = q$Id: H2Z.pm,v 2.1 2006/05/03 18:24:10 dankogai Exp $;
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode::CJKConstants qw(:all);
use vars qw(%_D2Z $_PAT_D2Z
- %_Z2D $_PAT_Z2D
- %_H2Z $_PAT_H2Z
- %_Z2H $_PAT_Z2H);
+ %_Z2D $_PAT_Z2D
+ %_H2Z $_PAT_H2Z
+ %_Z2H $_PAT_Z2H);
%_H2Z = (
- "\x8e\xa1" => "\xa1\xa3", #¡£
- "\x8e\xa2" => "\xa1\xd6", #¡Ö
- "\x8e\xa3" => "\xa1\xd7", #¡×
- "\x8e\xa4" => "\xa1\xa2", #¡¢
- "\x8e\xa5" => "\xa1\xa6", #¡¦
- "\x8e\xa6" => "\xa5\xf2", #¥ò
- "\x8e\xa7" => "\xa5\xa1", #¥¡
- "\x8e\xa8" => "\xa5\xa3", #¥£
- "\x8e\xa9" => "\xa5\xa5", #¥¥
- "\x8e\xaa" => "\xa5\xa7", #¥§
- "\x8e\xab" => "\xa5\xa9", #¥©
- "\x8e\xac" => "\xa5\xe3", #¥ã
- "\x8e\xad" => "\xa5\xe5", #¥å
- "\x8e\xae" => "\xa5\xe7", #¥ç
- "\x8e\xaf" => "\xa5\xc3", #¥Ã
- "\x8e\xb0" => "\xa1\xbc", #¡¼
- "\x8e\xb1" => "\xa5\xa2", #¥¢
- "\x8e\xb2" => "\xa5\xa4", #¥¤
- "\x8e\xb3" => "\xa5\xa6", #¥¦
- "\x8e\xb4" => "\xa5\xa8", #¥¨
- "\x8e\xb5" => "\xa5\xaa", #¥ª
- "\x8e\xb6" => "\xa5\xab", #¥«
- "\x8e\xb7" => "\xa5\xad", #¥
- "\x8e\xb8" => "\xa5\xaf", #¥¯
- "\x8e\xb9" => "\xa5\xb1", #¥±
- "\x8e\xba" => "\xa5\xb3", #¥³
- "\x8e\xbb" => "\xa5\xb5", #¥µ
- "\x8e\xbc" => "\xa5\xb7", #¥·
- "\x8e\xbd" => "\xa5\xb9", #¥¹
- "\x8e\xbe" => "\xa5\xbb", #¥»
- "\x8e\xbf" => "\xa5\xbd", #¥½
- "\x8e\xc0" => "\xa5\xbf", #¥¿
- "\x8e\xc1" => "\xa5\xc1", #¥Á
- "\x8e\xc2" => "\xa5\xc4", #¥Ä
- "\x8e\xc3" => "\xa5\xc6", #¥Æ
- "\x8e\xc4" => "\xa5\xc8", #¥È
- "\x8e\xc5" => "\xa5\xca", #¥Ê
- "\x8e\xc6" => "\xa5\xcb", #¥Ë
- "\x8e\xc7" => "\xa5\xcc", #¥Ì
- "\x8e\xc8" => "\xa5\xcd", #¥Í
- "\x8e\xc9" => "\xa5\xce", #¥Î
- "\x8e\xca" => "\xa5\xcf", #¥Ï
- "\x8e\xcb" => "\xa5\xd2", #¥Ò
- "\x8e\xcc" => "\xa5\xd5", #¥Õ
- "\x8e\xcd" => "\xa5\xd8", #¥Ø
- "\x8e\xce" => "\xa5\xdb", #¥Û
- "\x8e\xcf" => "\xa5\xde", #¥Þ
- "\x8e\xd0" => "\xa5\xdf", #¥ß
- "\x8e\xd1" => "\xa5\xe0", #¥à
- "\x8e\xd2" => "\xa5\xe1", #¥á
- "\x8e\xd3" => "\xa5\xe2", #¥â
- "\x8e\xd4" => "\xa5\xe4", #¥ä
- "\x8e\xd5" => "\xa5\xe6", #¥æ
- "\x8e\xd6" => "\xa5\xe8", #¥è
- "\x8e\xd7" => "\xa5\xe9", #¥é
- "\x8e\xd8" => "\xa5\xea", #¥ê
- "\x8e\xd9" => "\xa5\xeb", #¥ë
- "\x8e\xda" => "\xa5\xec", #¥ì
- "\x8e\xdb" => "\xa5\xed", #¥í
- "\x8e\xdc" => "\xa5\xef", #¥ï
- "\x8e\xdd" => "\xa5\xf3", #¥ó
- "\x8e\xde" => "\xa1\xab", #¡«
- "\x8e\xdf" => "\xa1\xac", #¡¬
+ "\x8e\xa1" => "\xa1\xa3", #¡£
+ "\x8e\xa2" => "\xa1\xd6", #¡Ö
+ "\x8e\xa3" => "\xa1\xd7", #¡×
+ "\x8e\xa4" => "\xa1\xa2", #¡¢
+ "\x8e\xa5" => "\xa1\xa6", #¡¦
+ "\x8e\xa6" => "\xa5\xf2", #¥ò
+ "\x8e\xa7" => "\xa5\xa1", #¥¡
+ "\x8e\xa8" => "\xa5\xa3", #¥£
+ "\x8e\xa9" => "\xa5\xa5", #¥¥
+ "\x8e\xaa" => "\xa5\xa7", #¥§
+ "\x8e\xab" => "\xa5\xa9", #¥©
+ "\x8e\xac" => "\xa5\xe3", #¥ã
+ "\x8e\xad" => "\xa5\xe5", #¥å
+ "\x8e\xae" => "\xa5\xe7", #¥ç
+ "\x8e\xaf" => "\xa5\xc3", #¥Ã
+ "\x8e\xb0" => "\xa1\xbc", #¡¼
+ "\x8e\xb1" => "\xa5\xa2", #¥¢
+ "\x8e\xb2" => "\xa5\xa4", #¥¤
+ "\x8e\xb3" => "\xa5\xa6", #¥¦
+ "\x8e\xb4" => "\xa5\xa8", #¥¨
+ "\x8e\xb5" => "\xa5\xaa", #¥ª
+ "\x8e\xb6" => "\xa5\xab", #¥«
+ "\x8e\xb7" => "\xa5\xad", #¥
+ "\x8e\xb8" => "\xa5\xaf", #¥¯
+ "\x8e\xb9" => "\xa5\xb1", #¥±
+ "\x8e\xba" => "\xa5\xb3", #¥³
+ "\x8e\xbb" => "\xa5\xb5", #¥µ
+ "\x8e\xbc" => "\xa5\xb7", #¥·
+ "\x8e\xbd" => "\xa5\xb9", #¥¹
+ "\x8e\xbe" => "\xa5\xbb", #¥»
+ "\x8e\xbf" => "\xa5\xbd", #¥½
+ "\x8e\xc0" => "\xa5\xbf", #¥¿
+ "\x8e\xc1" => "\xa5\xc1", #¥Á
+ "\x8e\xc2" => "\xa5\xc4", #¥Ä
+ "\x8e\xc3" => "\xa5\xc6", #¥Æ
+ "\x8e\xc4" => "\xa5\xc8", #¥È
+ "\x8e\xc5" => "\xa5\xca", #¥Ê
+ "\x8e\xc6" => "\xa5\xcb", #¥Ë
+ "\x8e\xc7" => "\xa5\xcc", #¥Ì
+ "\x8e\xc8" => "\xa5\xcd", #¥Í
+ "\x8e\xc9" => "\xa5\xce", #¥Î
+ "\x8e\xca" => "\xa5\xcf", #¥Ï
+ "\x8e\xcb" => "\xa5\xd2", #¥Ò
+ "\x8e\xcc" => "\xa5\xd5", #¥Õ
+ "\x8e\xcd" => "\xa5\xd8", #¥Ø
+ "\x8e\xce" => "\xa5\xdb", #¥Û
+ "\x8e\xcf" => "\xa5\xde", #¥Þ
+ "\x8e\xd0" => "\xa5\xdf", #¥ß
+ "\x8e\xd1" => "\xa5\xe0", #¥à
+ "\x8e\xd2" => "\xa5\xe1", #¥á
+ "\x8e\xd3" => "\xa5\xe2", #¥â
+ "\x8e\xd4" => "\xa5\xe4", #¥ä
+ "\x8e\xd5" => "\xa5\xe6", #¥æ
+ "\x8e\xd6" => "\xa5\xe8", #¥è
+ "\x8e\xd7" => "\xa5\xe9", #¥é
+ "\x8e\xd8" => "\xa5\xea", #¥ê
+ "\x8e\xd9" => "\xa5\xeb", #¥ë
+ "\x8e\xda" => "\xa5\xec", #¥ì
+ "\x8e\xdb" => "\xa5\xed", #¥í
+ "\x8e\xdc" => "\xa5\xef", #¥ï
+ "\x8e\xdd" => "\xa5\xf3", #¥ó
+ "\x8e\xde" => "\xa1\xab", #¡«
+ "\x8e\xdf" => "\xa1\xac", #¡¬
);
%_D2Z = (
- "\x8e\xb6\x8e\xde" => "\xa5\xac", #¥¬
- "\x8e\xb7\x8e\xde" => "\xa5\xae", #¥®
- "\x8e\xb8\x8e\xde" => "\xa5\xb0", #¥°
- "\x8e\xb9\x8e\xde" => "\xa5\xb2", #¥²
- "\x8e\xba\x8e\xde" => "\xa5\xb4", #¥´
- "\x8e\xbb\x8e\xde" => "\xa5\xb6", #¥¶
- "\x8e\xbc\x8e\xde" => "\xa5\xb8", #¥¸
- "\x8e\xbd\x8e\xde" => "\xa5\xba", #¥º
- "\x8e\xbe\x8e\xde" => "\xa5\xbc", #¥¼
- "\x8e\xbf\x8e\xde" => "\xa5\xbe", #¥¾
- "\x8e\xc0\x8e\xde" => "\xa5\xc0", #¥À
- "\x8e\xc1\x8e\xde" => "\xa5\xc2", #¥Â
- "\x8e\xc2\x8e\xde" => "\xa5\xc5", #¥Å
- "\x8e\xc3\x8e\xde" => "\xa5\xc7", #¥Ç
- "\x8e\xc4\x8e\xde" => "\xa5\xc9", #¥É
- "\x8e\xca\x8e\xde" => "\xa5\xd0", #¥Ð
- "\x8e\xcb\x8e\xde" => "\xa5\xd3", #¥Ó
- "\x8e\xcc\x8e\xde" => "\xa5\xd6", #¥Ö
- "\x8e\xcd\x8e\xde" => "\xa5\xd9", #¥Ù
- "\x8e\xce\x8e\xde" => "\xa5\xdc", #¥Ü
- "\x8e\xca\x8e\xdf" => "\xa5\xd1", #¥Ñ
- "\x8e\xcb\x8e\xdf" => "\xa5\xd4", #¥Ô
- "\x8e\xcc\x8e\xdf" => "\xa5\xd7", #¥×
- "\x8e\xcd\x8e\xdf" => "\xa5\xda", #¥Ú
- "\x8e\xce\x8e\xdf" => "\xa5\xdd", #¥Ý
- "\x8e\xb3\x8e\xde" => "\xa5\xf4", #¥ô
+ "\x8e\xb6\x8e\xde" => "\xa5\xac", #¥¬
+ "\x8e\xb7\x8e\xde" => "\xa5\xae", #¥®
+ "\x8e\xb8\x8e\xde" => "\xa5\xb0", #¥°
+ "\x8e\xb9\x8e\xde" => "\xa5\xb2", #¥²
+ "\x8e\xba\x8e\xde" => "\xa5\xb4", #¥´
+ "\x8e\xbb\x8e\xde" => "\xa5\xb6", #¥¶
+ "\x8e\xbc\x8e\xde" => "\xa5\xb8", #¥¸
+ "\x8e\xbd\x8e\xde" => "\xa5\xba", #¥º
+ "\x8e\xbe\x8e\xde" => "\xa5\xbc", #¥¼
+ "\x8e\xbf\x8e\xde" => "\xa5\xbe", #¥¾
+ "\x8e\xc0\x8e\xde" => "\xa5\xc0", #¥À
+ "\x8e\xc1\x8e\xde" => "\xa5\xc2", #¥Â
+ "\x8e\xc2\x8e\xde" => "\xa5\xc5", #¥Å
+ "\x8e\xc3\x8e\xde" => "\xa5\xc7", #¥Ç
+ "\x8e\xc4\x8e\xde" => "\xa5\xc9", #¥É
+ "\x8e\xca\x8e\xde" => "\xa5\xd0", #¥Ð
+ "\x8e\xcb\x8e\xde" => "\xa5\xd3", #¥Ó
+ "\x8e\xcc\x8e\xde" => "\xa5\xd6", #¥Ö
+ "\x8e\xcd\x8e\xde" => "\xa5\xd9", #¥Ù
+ "\x8e\xce\x8e\xde" => "\xa5\xdc", #¥Ü
+ "\x8e\xca\x8e\xdf" => "\xa5\xd1", #¥Ñ
+ "\x8e\xcb\x8e\xdf" => "\xa5\xd4", #¥Ô
+ "\x8e\xcc\x8e\xdf" => "\xa5\xd7", #¥×
+ "\x8e\xcd\x8e\xdf" => "\xa5\xda", #¥Ú
+ "\x8e\xce\x8e\xdf" => "\xa5\xdd", #¥Ý
+ "\x8e\xb3\x8e\xde" => "\xa5\xf4", #¥ô
);
# init only once;
sub h2z {
no warnings qw(uninitialized);
- my $r_str = shift;
+ my $r_str = shift;
my ($keep_dakuten) = @_;
- my $n = 0;
- unless ($keep_dakuten){
- $n = (
- $$r_str =~ s(
- ($RE{EUC_KANA}
- (?:\x8e[\xde\xdf])?)
- ){
- my $str = $1;
- $_D2Z{$str} || $_H2Z{$str} ||
- # in case dakuten and handakuten are side-by-side!
- $_H2Z{substr($str,0,2)} . $_H2Z{substr($str,2,2)};
- }eogx
- );
- }else{
- $n = (
- $$r_str =~ s(
- ($RE{EUC_KANA})
- ){
- $_H2Z{$1};
- }eogx
- );
+ my $n = 0;
+ unless ($keep_dakuten) {
+ $n = (
+ $$r_str =~ s(
+ ($RE{EUC_KANA}
+ (?:\x8e[\xde\xdf])?)
+ ){
+ my $str = $1;
+ $_D2Z{$str} || $_H2Z{$str} ||
+ # in case dakuten and handakuten are side-by-side!
+ $_H2Z{substr($str,0,2)} . $_H2Z{substr($str,2,2)};
+ }eogx
+ );
+ }
+ else {
+ $n = (
+ $$r_str =~ s(
+ ($RE{EUC_KANA})
+ ){
+ $_H2Z{$1};
+ }eogx
+ );
}
$n;
}
sub z2h {
my $r_str = shift;
- my $n = (
- $$r_str =~ s(
- ($RE{EUC_C}|$RE{EUC_0212}|$RE{EUC_KANA})
- ){
- $_Z2D{$1} || $_Z2H{$1} || $1;
- }eogx
- );
+ my $n = (
+ $$r_str =~ s(
+ ($RE{EUC_C}|$RE{EUC_0212}|$RE{EUC_KANA})
+ ){
+ $_Z2D{$1} || $_Z2H{$1} || $1;
+ }eogx
+ );
$n;
}
package Encode::JP::JIS7;
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
-for my $name ('7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1'){
- my $h2z = ($name eq '7bit-jis') ? 0 : 1;
- my $jis0212 = ($name eq 'iso-2022-jp') ? 0 : 1;
+for my $name ( '7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1' ) {
+ my $h2z = ( $name eq '7bit-jis' ) ? 0 : 1;
+ my $jis0212 = ( $name eq 'iso-2022-jp' ) ? 0 : 1;
- $Encode::Encoding{$name} =
- bless {
- Name => $name,
- h2z => $h2z,
- jis0212 => $jis0212,
- } => __PACKAGE__;
+ $Encode::Encoding{$name} = bless {
+ Name => $name,
+ h2z => $h2z,
+ jis0212 => $jis0212,
+ } => __PACKAGE__;
}
use base qw(Encode::Encoding);
# decode is identical for all 2022 variants
#
-sub decode($$;$)
-{
- my ($obj, $str, $chk) = @_;
+sub decode($$;$) {
+ my ( $obj, $str, $chk ) = @_;
my $residue = '';
- if ($chk){
- $str =~ s/([^\x00-\x7f].*)$//so and $residue = $1;
+ if ($chk) {
+ $str =~ s/([^\x00-\x7f].*)$//so and $residue = $1;
}
- $residue .= jis_euc(\$str);
+ $residue .= jis_euc( \$str );
$_[1] = $residue if $chk;
- return Encode::decode('euc-jp', $str, FB_PERLQQ);
+ return Encode::decode( 'euc-jp', $str, FB_PERLQQ );
}
#
# encode is different
#
-sub encode($$;$)
-{
+sub encode($$;$) {
require Encode::JP::H2Z;
- my ($obj, $utf8, $chk) = @_;
+ my ( $obj, $utf8, $chk ) = @_;
+
# empty the input string in the stack so perlio is ok
$_[1] = '' if $chk;
- my ($h2z, $jis0212) = @$obj{qw(h2z jis0212)};
- my $octet = Encode::encode('euc-jp', $utf8, FB_PERLQQ) ;
- $h2z and &Encode::JP::H2Z::h2z(\$octet);
- euc_jis(\$octet, $jis0212);
+ my ( $h2z, $jis0212 ) = @$obj{qw(h2z jis0212)};
+ my $octet = Encode::encode( 'euc-jp', $utf8, FB_PERLQQ );
+ $h2z and &Encode::JP::H2Z::h2z( \$octet );
+ euc_jis( \$octet, $jis0212 );
return $octet;
}
($RE{ISO_ASC}) | ($RE{JIS_KANA}) | )
([^\e]*)
}x;
-sub cat_decode { # ($obj, $dst, $src, $pos, $trm, $chk)
- my ($obj, undef, undef, $pos, $trm) = @_; # currently ignores $chk
- my ($rdst, $rsrc, $rpos) = \@_[1,2,3];
+
+sub cat_decode { # ($obj, $dst, $src, $pos, $trm, $chk)
+ my ( $obj, undef, undef, $pos, $trm ) = @_; # currently ignores $chk
+ my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
local ${^ENCODING};
use bytes;
my $opos = pos($$rsrc);
pos($$rsrc) = $pos;
- while ($$rsrc =~ /$re_scan_jis_g/gc) {
- my ($esc, $esc_0212, $esc_asc, $esc_kana, $chunk) =
- ($1, $2, $3, $4, $5);
-
- unless ($chunk) { $esc or last; next; }
-
- if ($esc && !$esc_asc) {
- $chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
- if ($esc_kana) {
- $chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
- } elsif ($esc_0212) {
- $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
- }
- $chunk = Encode::decode('euc-jp', $chunk, 0);
- }
- elsif ((my $npos = index($chunk, $trm)) >= 0) {
- $$rdst .= substr($chunk, 0, $npos + length($trm));
- $$rpos += length($esc) + $npos + length($trm);
- pos($$rsrc) = $opos;
- return 1;
- }
- $$rdst .= $chunk;
- $$rpos = pos($$rsrc);
+ while ( $$rsrc =~ /$re_scan_jis_g/gc ) {
+ my ( $esc, $esc_0212, $esc_asc, $esc_kana, $chunk ) =
+ ( $1, $2, $3, $4, $5 );
+
+ unless ($chunk) { $esc or last; next; }
+
+ if ( $esc && !$esc_asc ) {
+ $chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
+ if ($esc_kana) {
+ $chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
+ }
+ elsif ($esc_0212) {
+ $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
+ }
+ $chunk = Encode::decode( 'euc-jp', $chunk, 0 );
+ }
+ elsif ( ( my $npos = index( $chunk, $trm ) ) >= 0 ) {
+ $$rdst .= substr( $chunk, 0, $npos + length($trm) );
+ $$rpos += length($esc) + $npos + length($trm);
+ pos($$rsrc) = $opos;
+ return 1;
+ }
+ $$rdst .= $chunk;
+ $$rpos = pos($$rsrc);
}
$$rpos = pos($$rsrc);
pos($$rsrc) = $opos;
my $r_str = shift;
$$r_str =~ s($re_scan_jis)
{
- my ($esc_0212, $esc_asc, $esc_kana, $chunk) =
- ($1, $2, $3, $4);
- if (!$esc_asc) {
- $chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
- if ($esc_kana) {
- $chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
- }
- elsif ($esc_0212) {
- $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
- }
- }
- $chunk;
+ my ($esc_0212, $esc_asc, $esc_kana, $chunk) =
+ ($1, $2, $3, $4);
+ if (!$esc_asc) {
+ $chunk =~ tr/\x21-\x7e/\xa1-\xfe/;
+ if ($esc_kana) {
+ $chunk =~ s/([\xa1-\xdf])/\x8e$1/og;
+ }
+ elsif ($esc_0212) {
+ $chunk =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
+ }
+ }
+ $chunk;
}geox;
- my ($residue) = ($$r_str =~ s/(\e.*)$//so);
+ my ($residue) = ( $$r_str =~ s/(\e.*)$//so );
return $residue;
}
-sub euc_jis{
+sub euc_jis {
no warnings qw(uninitialized);
- my $r_str = shift;
+ my $r_str = shift;
my $jis0212 = shift;
$$r_str =~ s{
- ((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+)
- }{
- my $chunk = $1;
- my $esc =
- ( $chunk =~ tr/\x8E//d ) ? $ESC{KANA} :
- ( $chunk =~ tr/\x8F//d ) ? $ESC{JIS_0212} :
- $ESC{JIS_0208};
- if ($esc eq $ESC{JIS_0212} && !$jis0212){
- # fallback to '?'
- $chunk =~ tr/\xA1-\xFE/\x3F/;
- }else{
- $chunk =~ tr/\xA1-\xFE/\x21-\x7E/;
- }
- $esc . $chunk . $ESC{ASC};
- }geox;
- $$r_str =~
- s/\Q$ESC{ASC}\E
- (\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox;
+ ((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+)
+ }{
+ my $chunk = $1;
+ my $esc =
+ ( $chunk =~ tr/\x8E//d ) ? $ESC{KANA} :
+ ( $chunk =~ tr/\x8F//d ) ? $ESC{JIS_0212} :
+ $ESC{JIS_0208};
+ if ($esc eq $ESC{JIS_0212} && !$jis0212){
+ # fallback to '?'
+ $chunk =~ tr/\xA1-\xFE/\x3F/;
+ }else{
+ $chunk =~ tr/\xA1-\xFE/\x21-\x7E/;
+ }
+ $esc . $chunk . $ESC{ASC};
+ }geox;
+ $$r_str =~ s/\Q$ESC{ASC}\E
+ (\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox;
$$r_str;
}
package Encode::KR::2022_KR;
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
use base qw(Encode::Encoding);
__PACKAGE__->Define('iso-2022-kr');
-sub needs_lines { 1 }
+sub needs_lines { 1 }
-sub perlio_ok {
- return 0; # for the time being
+sub perlio_ok {
+ return 0; # for the time being
}
-sub decode
-{
- my ($obj, $str, $chk) = @_;
- my $res = $str;
- my $residue = iso_euc(\$res);
+sub decode {
+ my ( $obj, $str, $chk ) = @_;
+ my $res = $str;
+ my $residue = iso_euc( \$res );
+
# This is for PerlIO
$_[1] = $residue if $chk;
- return Encode::decode('euc-kr', $res, FB_PERLQQ);
+ return Encode::decode( 'euc-kr', $res, FB_PERLQQ );
}
-sub encode
-{
- my ($obj, $utf8, $chk) = @_;
+sub encode {
+ my ( $obj, $utf8, $chk ) = @_;
+
# empty the input string in the stack so perlio is ok
$_[1] = '' if $chk;
- my $octet = Encode::encode('euc-kr', $utf8, FB_PERLQQ) ;
- euc_iso(\$octet);
+ my $octet = Encode::encode( 'euc-kr', $utf8, FB_PERLQQ );
+ euc_iso( \$octet );
return $octet;
}
# ISO<->EUC
-sub iso_euc{
+sub iso_euc {
my $r_str = shift;
- $$r_str =~ s/$RE{'2022_KR'}//gox; # remove the designator
+ $$r_str =~ s/$RE{'2022_KR'}//gox; # remove the designator
$$r_str =~ s{ # replace characters in GL
\x0e # between SO(\x0e) and SI(\x0f)
([^\x0f]*) # with characters in GR
$out =~ tr/\x21-\x7e/\xa1-\xfe/;
$out;
}geox;
- my ($residue) = ($$r_str =~ s/(\e.*)$//so);
+ my ($residue) = ( $$r_str =~ s/(\e.*)$//so );
return $residue;
}
-sub euc_iso{
+sub euc_iso {
no warnings qw(uninitialized);
my $r_str = shift;
- substr($$r_str,0,0)=$ESC{'2022_KR'}; # put the designator at the beg.
- $$r_str =~ s{ # move KS X 1001 characters in GR to GL
+ substr( $$r_str, 0, 0 ) =
+ $ESC{'2022_KR'}; # put the designator at the beg.
+ $$r_str =~
+ s{ # move KS X 1001 characters in GR to GL
($RE{EUC_C}+) # and enclose them with SO and SI
}{
my $str = $1;
package Encode::MIME::Header;
use strict;
+
# use warnings;
-our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(find_encoding encode_utf8 decode_utf8);
use MIME::Base64;
use Carp;
-my %seed =
- (
- decode_b => '1', # decodes 'B' encoding ?
- decode_q => '1', # decodes 'Q' encoding ?
- encode => 'B', # encode with 'B' or 'Q' ?
- bpl => 75, # bytes per line
- );
+my %seed = (
+ decode_b => '1', # decodes 'B' encoding ?
+ decode_q => '1', # decodes 'Q' encoding ?
+ encode => 'B', # encode with 'B' or 'Q' ?
+ bpl => 75, # bytes per line
+);
$Encode::Encoding{'MIME-Header'} =
- bless {
- %seed,
- Name => 'MIME-Header',
- } => __PACKAGE__;
-
-$Encode::Encoding{'MIME-B'} =
- bless {
- %seed,
- decode_q => 0,
- Name => 'MIME-B',
- } => __PACKAGE__;
-
-$Encode::Encoding{'MIME-Q'} =
- bless {
- %seed,
- decode_q => 1,
- encode => 'Q',
- Name => 'MIME-Q',
- } => __PACKAGE__;
+ bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;
+
+$Encode::Encoding{'MIME-B'} = bless {
+ %seed,
+ decode_q => 0,
+ Name => 'MIME-B',
+} => __PACKAGE__;
+
+$Encode::Encoding{'MIME-Q'} = bless {
+ %seed,
+ decode_q => 1,
+ encode => 'Q',
+ Name => 'MIME-Q',
+} => __PACKAGE__;
use base qw(Encode::Encoding);
sub needs_lines { 1 }
-sub perlio_ok{ 0 };
+sub perlio_ok { 0 }
-sub decode($$;$){
+sub decode($$;$) {
use utf8;
- my ($obj, $str, $chk) = @_;
+ my ( $obj, $str, $chk ) = @_;
+
# zap spaces between encoded words
$str =~ s/\?=\s+=\?/\?==\?/gos;
+
# multi-line header to single line
$str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
- 1 while ($str =~ s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/); # Concat consecutive QP encoded mime headers
- # Fixes breaking inside multi-byte characters
+ 1 while ( $str =~
+ s/(\=\?[0-9A-Za-z\-_]+\?[Qq]\?)(.*?)\?\=\1(.*?)\?\=/$1$2$3\?\=/ )
+ ; # Concat consecutive QP encoded mime headers
+ # Fixes breaking inside multi-byte characters
- $str =~
- s{
- =\? # begin encoded word
- ([0-9A-Za-z\-_]+) # charset (encoding)
+ $str =~ s{
+ =\? # begin encoded word
+ ([0-9A-Za-z\-_]+) # charset (encoding)
(?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
- \?([QqBb])\? # delimiter
- (.*?) # Base64-encodede contents
- \?= # end encoded word
- }{
- if (uc($2) eq 'B'){
- $obj->{decode_b} or croak qq(MIME "B" unsupported);
- decode_b($1, $3);
- }elsif(uc($2) eq 'Q'){
- $obj->{decode_q} or croak qq(MIME "Q" unsupported);
- decode_q($1, $3);
- }else{
- croak qq(MIME "$2" encoding is nonexistent!);
- }
- }egox;
+ \?([QqBb])\? # delimiter
+ (.*?) # Base64-encodede contents
+ \?= # end encoded word
+ }{
+ if (uc($2) eq 'B'){
+ $obj->{decode_b} or croak qq(MIME "B" unsupported);
+ decode_b($1, $3);
+ }elsif(uc($2) eq 'Q'){
+ $obj->{decode_q} or croak qq(MIME "Q" unsupported);
+ decode_q($1, $3);
+ }else{
+ croak qq(MIME "$2" encoding is nonexistent!);
+ }
+ }egox;
$_[1] = '' if $chk;
return $str;
}
-sub decode_b{
- my $enc = shift;
- my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
+sub decode_b {
+ my $enc = shift;
+ my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
my $db64 = decode_base64(shift);
- return $d->name eq 'utf8' ?
- Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
+ return $d->name eq 'utf8'
+ ? Encode::decode_utf8($db64)
+ : $d->decode( $db64, Encode::FB_PERLQQ );
}
-sub decode_q{
- my ($enc, $q) = @_;
+sub decode_q {
+ my ( $enc, $q ) = @_;
my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
$q =~ s/_/ /go;
$q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
- return $d->name eq 'utf8' ?
- Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ);
+ return $d->name eq 'utf8'
+ ? Encode::decode_utf8($q)
+ : $d->decode( $q, Encode::FB_PERLQQ );
}
-my $especials =
- join('|' =>
- map {quotemeta(chr($_))}
- unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
+my $especials =
+ join( '|' => map { quotemeta( chr($_) ) }
+ unpack( "C*", qq{()<>@,;:\"\'/[]?.=} ) );
-my $re_encoded_word =
- qr{
+my $re_encoded_word = qr{
(?:
- =\? # begin encoded word
- (?:[0-9A-Za-z\-_]+) # charset (encoding)
+ =\? # begin encoded word
+ (?:[0-9A-Za-z\-_]+) # charset (encoding)
(?:\*\w+(?:-\w+)*)? # language (RFC 2231)
- \?(?:[QqBb])\? # delimiter
- (?:.*?) # Base64-encodede contents
- \?= # end encoded word
+ \?(?:[QqBb])\? # delimiter
+ (?:.*?) # Base64-encodede contents
+ \?= # end encoded word
)
}xo;
my $re_especials = qr{$re_encoded_word|$especials}xo;
-sub encode($$;$){
- my ($obj, $str, $chk) = @_;
+sub encode($$;$) {
+ my ( $obj, $str, $chk ) = @_;
my @line = ();
- for my $line (split /\r|\n|\r\n/o, $str){
- my (@word, @subline);
- for my $word (split /($re_especials)/o, $line){
- if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
- push @word, $obj->_encode($word);
- }else{
- push @word, $word;
- }
- }
- my $subline = '';
- for my $word (@word){
- use bytes ();
- if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
- push @subline, $subline;
- $subline = '';
- }
- $subline .= $word;
- }
- $subline and push @subline, $subline;
- push @line, join("\n " => @subline);
+ for my $line ( split /\r|\n|\r\n/o, $str ) {
+ my ( @word, @subline );
+ for my $word ( split /($re_especials)/o, $line ) {
+ if ( $word =~ /[^\x00-\x7f]/o
+ or $word =~ /^$re_encoded_word$/o )
+ {
+ push @word, $obj->_encode($word);
+ }
+ else {
+ push @word, $word;
+ }
+ }
+ my $subline = '';
+ for my $word (@word) {
+ use bytes ();
+ if ( bytes::length($subline) + bytes::length($word) >
+ $obj->{bpl} )
+ {
+ push @subline, $subline;
+ $subline = '';
+ }
+ $subline .= $word;
+ }
+ $subline and push @subline, $subline;
+ push @line, join( "\n " => @subline );
}
$_[1] = '' if $chk;
- return join("\n", @line);
+ return join( "\n", @line );
}
-use constant HEAD => '=?UTF-8?';
-use constant TAIL => '?=';
+use constant HEAD => '=?UTF-8?';
+use constant TAIL => '?=';
use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
-sub _encode{
- my ($o, $str) = @_;
- my $enc = $o->{encode};
- my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
+sub _encode {
+ my ( $o, $str ) = @_;
+ my $enc = $o->{encode};
+ my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
+
# to coerce a floating-point arithmetics, the following contains
# .0 in numbers -- dankogai
- $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
+ $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
my @result = ();
- my $chunk = '';
- while(length(my $chr = substr($str, 0, 1, ''))){
- use bytes ();
- if (bytes::length($chunk) + bytes::length($chr) > $llen){
- push @result, SINGLE->{$enc}($chunk);
- $chunk = '';
- }
- $chunk .= $chr;
+ my $chunk = '';
+ while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
+ use bytes ();
+ if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
+ push @result, SINGLE->{$enc}($chunk);
+ $chunk = '';
+ }
+ $chunk .= $chr;
}
$chunk and push @result, SINGLE->{$enc}($chunk);
return @result;
}
-sub _encode_b{
- HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
+sub _encode_b {
+ HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
}
-sub _encode_q{
+sub _encode_q {
my $chunk = shift;
$chunk =~ s{
- ([^0-9A-Za-z])
- }{
- join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
- }egox;
- return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
+ ([^0-9A-Za-z])
+ }{
+ join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
+ }egox;
+ return decode_utf8( HEAD . 'Q?' . $chunk . TAIL );
}
1;
use strict;
use base qw(Encode::MIME::Header);
-$Encode::Encoding{'MIME-Header-ISO_2022_JP'}
- = bless {encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP'}
- => __PACKAGE__;
+$Encode::Encoding{'MIME-Header-ISO_2022_JP'} =
+ bless { encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
+ __PACKAGE__;
-use constant HEAD => '=?ISO-2022-JP?B?';
-use constant TAIL => '?=';
+use constant HEAD => '=?ISO-2022-JP?B?';
+use constant TAIL => '?=';
use Encode::CJKConstants qw(%RE);
-our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
+our $VERSION = do { my @r = ( q$Revision: 1.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
# I owe the below codes totally to
# Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
sub encode {
- my $self = shift;
- my $str = shift;
+ my $self = shift;
+ my $str = shift;
- utf8::encode($str) if( Encode::is_utf8($str) );
- Encode::from_to($str, 'utf8', 'euc-jp');
+ utf8::encode($str) if ( Encode::is_utf8($str) );
+ Encode::from_to( $str, 'utf8', 'euc-jp' );
- my($trailing_crlf) = ($str =~ /(\n|\r|\x0d\x0a)$/o);
+ my ($trailing_crlf) = ( $str =~ /(\n|\r|\x0d\x0a)$/o );
- $str = _mime_unstructured_header($str, $self->{bpl});
+ $str = _mime_unstructured_header( $str, $self->{bpl} );
- not $trailing_crlf and $str =~ s/(\n|\r|\x0d\x0a)$//o;
+ not $trailing_crlf and $str =~ s/(\n|\r|\x0d\x0a)$//o;
- return $str;
+ return $str;
}
-
sub _mime_unstructured_header {
- my ($oldheader, $bpl) = @_;
- my $crlf = $oldheader =~ /\n$/;
- my($header, @words, @wordstmp, $i) = ('');
-
- $oldheader =~ s/\s+$//;
-
- @wordstmp = split /\s+/, $oldheader;
-
- for ($i = 0; $i < $#wordstmp; $i++){
- if( $wordstmp[$i] !~ /^[\x21-\x7E]+$/ and $wordstmp[$i + 1] !~ /^[\x21-\x7E]+$/){
- $wordstmp[$i + 1] = "$wordstmp[$i] $wordstmp[$i + 1]";
- }
- else{
- push(@words, $wordstmp[$i]);
- }
- }
-
- push(@words, $wordstmp[-1]);
-
- for my $word (@words){
- if ($word =~ /^[\x21-\x7E]+$/) {
- $header =~ /(?:.*\n)*(.*)/;
- if (length($1) + length($word) > $bpl) {
- $header .= "\n $word";
- }
- else{
- $header .= $word;
- }
- }
- else{
- $header = _add_encoded_word($word, $header, $bpl);
- }
-
- $header =~ /(?:.*\n)*(.*)/;
-
- if(length($1) == $bpl){
- $header .= "\n ";
- }
- else {
- $header .= ' ';
- }
- }
-
- $header =~ s/\n? $//mg;
-
- $crlf ? "$header\n" : $header;
+ my ( $oldheader, $bpl ) = @_;
+ my $crlf = $oldheader =~ /\n$/;
+ my ( $header, @words, @wordstmp, $i ) = ('');
+
+ $oldheader =~ s/\s+$//;
+
+ @wordstmp = split /\s+/, $oldheader;
+
+ for ( $i = 0 ; $i < $#wordstmp ; $i++ ) {
+ if ( $wordstmp[$i] !~ /^[\x21-\x7E]+$/
+ and $wordstmp[ $i + 1 ] !~ /^[\x21-\x7E]+$/ )
+ {
+ $wordstmp[ $i + 1 ] = "$wordstmp[$i] $wordstmp[$i + 1]";
+ }
+ else {
+ push( @words, $wordstmp[$i] );
+ }
+ }
+
+ push( @words, $wordstmp[-1] );
+
+ for my $word (@words) {
+ if ( $word =~ /^[\x21-\x7E]+$/ ) {
+ $header =~ /(?:.*\n)*(.*)/;
+ if ( length($1) + length($word) > $bpl ) {
+ $header .= "\n $word";
+ }
+ else {
+ $header .= $word;
+ }
+ }
+ else {
+ $header = _add_encoded_word( $word, $header, $bpl );
+ }
+
+ $header =~ /(?:.*\n)*(.*)/;
+
+ if ( length($1) == $bpl ) {
+ $header .= "\n ";
+ }
+ else {
+ $header .= ' ';
+ }
+ }
+
+ $header =~ s/\n? $//mg;
+
+ $crlf ? "$header\n" : $header;
}
-
sub _add_encoded_word {
- my($str, $line, $bpl) = @_;
- my $result = '';
-
- while( length($str) ){
- my $target = $str;
- $str = '';
-
- if(length($line) + 22 + ($target =~ /^(?:$RE{EUC_0212}|$RE{EUC_C})/o) * 8 > $bpl){
- $line =~ s/[ \t\n\r]*$/\n/;
- $result .= $line;
- $line = ' ';
- }
-
- while(1){
- my $iso_2022_jp = $target;
- Encode::from_to($iso_2022_jp, 'euc-jp', 'iso-2022-jp');
-
- my $encoded
- = HEAD . MIME::Base64::encode_base64($iso_2022_jp, '') . TAIL;
-
- if(length($encoded) + length($line) > $bpl){
- $target =~ s/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|$RE{ASCII})$//o;
- $str = $1 . $str;
- }
- else{
- $line .= $encoded;
- last;
- }
- }
-
- }
-
- $result . $line;
+ my ( $str, $line, $bpl ) = @_;
+ my $result = '';
+
+ while ( length($str) ) {
+ my $target = $str;
+ $str = '';
+
+ if (
+ length($line) + 22 +
+ ( $target =~ /^(?:$RE{EUC_0212}|$RE{EUC_C})/o ) * 8 > $bpl )
+ {
+ $line =~ s/[ \t\n\r]*$/\n/;
+ $result .= $line;
+ $line = ' ';
+ }
+
+ while (1) {
+ my $iso_2022_jp = $target;
+ Encode::from_to( $iso_2022_jp, 'euc-jp', 'iso-2022-jp' );
+
+ my $encoded =
+ HEAD . MIME::Base64::encode_base64( $iso_2022_jp, '' ) . TAIL;
+
+ if ( length($encoded) + length($line) > $bpl ) {
+ $target =~
+ s/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|$RE{ASCII})$//o;
+ $str = $1 . $str;
+ }
+ else {
+ $line .= $encoded;
+ last;
+ }
+ }
+
+ }
+
+ $result . $line;
}
-
1;
__END__
Now let's see what happens when you try to decode from ISO-2022-JP and
the buffer ends in the middle of a character.
- JIS208-ESC \x{5f3e}
+ JIS208-ESC \x{5f3e}
A B C .... ~ \e $ B |DAN | ....
41 42 43 .... 7E 1b 24 41 43 46 ....
<- buffer --------------------------->
#
-# $Id: UTF7.pm,v 2.1 2004/05/25 16:27:14 dankogai Exp $
+# $Id: UTF7.pm,v 2.3 2006/05/03 18:24:10 dankogai Exp $
#
package Encode::Unicode::UTF7;
use strict;
no warnings 'redefine';
use base qw(Encode::Encoding);
__PACKAGE__->Define('UTF-7');
-our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use MIME::Base64;
use Encode;
#
our $OPTIONAL_DIRECT_CHARS = 1;
-my $specials = quotemeta "\'(),-./:?";
-$OPTIONAL_DIRECT_CHARS and
- $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
+my $specials = quotemeta "\'(),-./:?";
+$OPTIONAL_DIRECT_CHARS
+ and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
+
# \s will not work because it matches U+3000 DEOGRAPHIC SPACE
-# We use qr/[\n\r\t\ ] instead
-my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
+# We use qr/[\n\r\t\ ] instead
+my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
-my $e_utf16 = find_encoding("UTF-16BE");
+my $e_utf16 = find_encoding("UTF-16BE");
-sub needs_lines { 1 };
+sub needs_lines { 1 }
-sub encode($$;$){
- my ($obj, $str, $chk) = @_;
+sub encode($$;$) {
+ my ( $obj, $str, $chk ) = @_;
my $len = length($str);
pos($str) = 0;
my $bytes = '';
- while (pos($str) < $len){
- if ($str =~ /\G($re_asis+)/ogc){
- $bytes .= $1;
- }elsif($str =~ /\G($re_encoded+)/ogsc){
- if ($1 eq "+"){
- $bytes .= "+-";
- }else{
- my $s = $1;
- my $base64 = encode_base64($e_utf16->encode($s), '');
- $base64 =~ s/=+$//;
- $bytes .= "+$base64-";
- }
- }else{
- die "This should not happen! (pos=" . pos($str) . ")";
- }
+ while ( pos($str) < $len ) {
+ if ( $str =~ /\G($re_asis+)/ogc ) {
+ $bytes .= $1;
+ }
+ elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
+ if ( $1 eq "+" ) {
+ $bytes .= "+-";
+ }
+ else {
+ my $s = $1;
+ my $base64 = encode_base64( $e_utf16->encode($s), '' );
+ $base64 =~ s/=+$//;
+ $bytes .= "+$base64-";
+ }
+ }
+ else {
+ die "This should not happen! (pos=" . pos($str) . ")";
+ }
}
$_[1] = '' if $chk;
return $bytes;
}
-
-sub decode($$;$){
- my ($obj, $bytes, $chk) = @_;
+
+sub decode($$;$) {
+ my ( $obj, $bytes, $chk ) = @_;
my $len = length($bytes);
my $str = "";
no warnings 'uninitialized';
- while (pos($bytes) < $len) {
- if ($bytes =~ /\G([^+]+)/ogc) {
- $str .= $1;
- }elsif($bytes =~ /\G\+-/ogc) {
- $str .= "+";
- }elsif($bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc) {
- my $base64 = $1;
- my $pad = length($base64) % 4;
- $base64 .= "=" x (4 - $pad) if $pad;
- $str .= $e_utf16->decode(decode_base64($base64));
- }elsif($bytes =~ /\G\+/ogc) {
- $^W and warn "Bad UTF7 data escape";
- $str .= "+";
- }else{
- die "This should not happen " . pos($bytes);
- }
+ while ( pos($bytes) < $len ) {
+ if ( $bytes =~ /\G([^+]+)/ogc ) {
+ $str .= $1;
+ }
+ elsif ( $bytes =~ /\G\+-/ogc ) {
+ $str .= "+";
+ }
+ elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) {
+ my $base64 = $1;
+ my $pad = length($base64) % 4;
+ $base64 .= "=" x ( 4 - $pad ) if $pad;
+ $str .= $e_utf16->decode( decode_base64($base64) );
+ }
+ elsif ( $bytes =~ /\G\+/ogc ) {
+ $^W and warn "Bad UTF7 data escape";
+ $str .= "+";
+ }
+ else {
+ die "This should not happen " . pos($bytes);
+ }
}
$_[1] = '' if $chk;
return $str;
BEGIN {
if ($ENV{'PERL_CORE'}){
- chdir 't';
- unshift @INC, '../lib';
+ chdir 't';
+ unshift @INC, '../lib';
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
- print "1..0 # Skip: Encode was not built\n";
- exit 0;
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
}
}
sub init_a2c{
%a2c = (
- 'US-ascii' => 'ascii',
- 'ISO-646-US' => 'ascii',
- 'UTF-8' => 'utf-8-strict',
- 'UCS-2' => 'UCS-2BE',
- 'UCS2' => 'UCS-2BE',
- 'iso-10646-1' => 'UCS-2BE',
- 'ucs2-le' => 'UCS-2LE',
- 'ucs2-be' => 'UCS-2BE',
- 'utf16' => 'UTF-16',
- 'utf32' => 'UTF-32',
- 'utf16-be' => 'UTF-16BE',
- 'utf32-be' => 'UTF-32BE',
- 'utf16-le' => 'UTF-16LE',
- 'utf32-le' => 'UTF-32LE',
- 'UCS4-BE' => 'UTF-32BE',
- 'UCS-4-LE' => 'UTF-32LE',
- 'cyrillic' => 'iso-8859-5',
- 'arabic' => 'iso-8859-6',
- 'greek' => 'iso-8859-7',
- 'hebrew' => 'iso-8859-8',
- 'thai' => 'iso-8859-11',
- 'tis620' => 'iso-8859-11',
- 'WinLatin1' => 'cp1252',
- 'WinLatin2' => 'cp1250',
- 'WinCyrillic' => 'cp1251',
- 'WinGreek' => 'cp1253',
- 'WinTurkish' => 'cp1254',
- 'WinHebrew' => 'cp1255',
- 'WinArabic' => 'cp1256',
- 'WinBaltic' => 'cp1257',
- 'WinVietnamese' => 'cp1258',
- 'koi8r' => 'koi8-r',
- 'koi8u' => 'koi8-u',
- 'ja_JP.euc' => $ON_EBCDIC ? '' : 'euc-jp',
- 'x-euc-jp' => $ON_EBCDIC ? '' : 'euc-jp',
- 'zh_CN.euc' => $ON_EBCDIC ? '' : 'euc-cn',
- 'x-euc-cn' => $ON_EBCDIC ? '' : 'euc-cn',
- 'ko_KR.euc' => $ON_EBCDIC ? '' : 'euc-kr',
- 'x-euc-kr' => $ON_EBCDIC ? '' : 'euc-kr',
- 'ujis' => $ON_EBCDIC ? '' : 'euc-jp',
- 'Shift_JIS' => $ON_EBCDIC ? '' : 'shiftjis',
- 'x-sjis' => $ON_EBCDIC ? '' : 'shiftjis',
- 'jis' => $ON_EBCDIC ? '' : '7bit-jis',
- 'big-5' => $ON_EBCDIC ? '' : 'big5-eten',
- 'zh_TW.Big5' => $ON_EBCDIC ? '' : 'big5-eten',
- 'tca-big5' => $ON_EBCDIC ? '' : 'big5-eten',
- 'big5-hk' => $ON_EBCDIC ? '' : 'big5-hkscs',
- 'hkscs-big5' => $ON_EBCDIC ? '' : 'big5-hkscs',
- 'GB_2312-80' => $ON_EBCDIC ? '' : 'euc-cn',
- 'KS_C_5601-1987' => $ON_EBCDIC ? '' : 'cp949',
- #
- 'gb12345-raw' => $ON_EBCDIC ? '' : 'gb12345-raw',
- 'gb2312-raw' => $ON_EBCDIC ? '' : 'gb2312-raw',
- 'jis0201-raw' => $ON_EBCDIC ? '' : 'jis0201-raw',
- 'jis0208-raw' => $ON_EBCDIC ? '' : 'jis0208-raw',
- 'jis0212-raw' => $ON_EBCDIC ? '' : 'jis0212-raw',
- 'ksc5601-raw' => $ON_EBCDIC ? '' : 'ksc5601-raw',
- );
+ 'US-ascii' => 'ascii',
+ 'ISO-646-US' => 'ascii',
+ 'UTF-8' => 'utf-8-strict',
+ 'UCS-2' => 'UCS-2BE',
+ 'UCS2' => 'UCS-2BE',
+ 'iso-10646-1' => 'UCS-2BE',
+ 'ucs2-le' => 'UCS-2LE',
+ 'ucs2-be' => 'UCS-2BE',
+ 'utf16' => 'UTF-16',
+ 'utf32' => 'UTF-32',
+ 'utf16-be' => 'UTF-16BE',
+ 'utf32-be' => 'UTF-32BE',
+ 'utf16-le' => 'UTF-16LE',
+ 'utf32-le' => 'UTF-32LE',
+ 'UCS4-BE' => 'UTF-32BE',
+ 'UCS-4-LE' => 'UTF-32LE',
+ 'cyrillic' => 'iso-8859-5',
+ 'arabic' => 'iso-8859-6',
+ 'greek' => 'iso-8859-7',
+ 'hebrew' => 'iso-8859-8',
+ 'thai' => 'iso-8859-11',
+ 'tis620' => 'iso-8859-11',
+ 'WinLatin1' => 'cp1252',
+ 'WinLatin2' => 'cp1250',
+ 'WinCyrillic' => 'cp1251',
+ 'WinGreek' => 'cp1253',
+ 'WinTurkish' => 'cp1254',
+ 'WinHebrew' => 'cp1255',
+ 'WinArabic' => 'cp1256',
+ 'WinBaltic' => 'cp1257',
+ 'WinVietnamese' => 'cp1258',
+ 'koi8r' => 'koi8-r',
+ 'koi8u' => 'koi8-u',
+ 'ja_JP.euc' => $ON_EBCDIC ? '' : 'euc-jp',
+ 'x-euc-jp' => $ON_EBCDIC ? '' : 'euc-jp',
+ 'zh_CN.euc' => $ON_EBCDIC ? '' : 'euc-cn',
+ 'x-euc-cn' => $ON_EBCDIC ? '' : 'euc-cn',
+ 'ko_KR.euc' => $ON_EBCDIC ? '' : 'euc-kr',
+ 'x-euc-kr' => $ON_EBCDIC ? '' : 'euc-kr',
+ 'ujis' => $ON_EBCDIC ? '' : 'euc-jp',
+ 'Shift_JIS' => $ON_EBCDIC ? '' : 'shiftjis',
+ 'x-sjis' => $ON_EBCDIC ? '' : 'shiftjis',
+ 'jis' => $ON_EBCDIC ? '' : '7bit-jis',
+ 'big-5' => $ON_EBCDIC ? '' : 'big5-eten',
+ 'zh_TW.Big5' => $ON_EBCDIC ? '' : 'big5-eten',
+ 'tca-big5' => $ON_EBCDIC ? '' : 'big5-eten',
+ 'big5-hk' => $ON_EBCDIC ? '' : 'big5-hkscs',
+ 'hkscs-big5' => $ON_EBCDIC ? '' : 'big5-hkscs',
+ 'GB_2312-80' => $ON_EBCDIC ? '' : 'euc-cn',
+ 'KS_C_5601-1987' => $ON_EBCDIC ? '' : 'cp949',
+ #
+ 'gb12345-raw' => $ON_EBCDIC ? '' : 'gb12345-raw',
+ 'gb2312-raw' => $ON_EBCDIC ? '' : 'gb2312-raw',
+ 'jis0201-raw' => $ON_EBCDIC ? '' : 'jis0201-raw',
+ 'jis0208-raw' => $ON_EBCDIC ? '' : 'jis0208-raw',
+ 'jis0212-raw' => $ON_EBCDIC ? '' : 'jis0212-raw',
+ 'ksc5601-raw' => $ON_EBCDIC ? '' : 'ksc5601-raw',
+ );
for my $i (1..11,13..16){
- $a2c{"ISO 8859 $i"} = "iso-8859-$i";
+ $a2c{"ISO 8859 $i"} = "iso-8859-$i";
}
for my $i (1..10){
- $a2c{"ISO Latin $i"} = "iso-8859-$Encode::Alias::Latin2iso[$i]";
+ $a2c{"ISO Latin $i"} = "iso-8859-$Encode::Alias::Latin2iso[$i]";
}
for my $k (keys %Encode::Alias::Winlatin2cp){
- my $v = $Encode::Alias::Winlatin2cp{$k};
- $a2c{"Win" . ucfirst($k)} = "cp" . $v;
- $a2c{"IBM-$v"} = $a2c{"MS-$v"} = "cp" . $v;
- $a2c{"cp-" . $v} = "cp" . $v;
+ my $v = $Encode::Alias::Winlatin2cp{$k};
+ $a2c{"Win" . ucfirst($k)} = "cp" . $v;
+ $a2c{"IBM-$v"} = $a2c{"MS-$v"} = "cp" . $v;
+ $a2c{"cp-" . $v} = "cp" . $v;
}
my @a2c = keys %a2c;
for my $k (@a2c){
- $a2c{uc($k)} = $a2c{$k};
- $a2c{lc($k)} = $a2c{$k};
- $a2c{lcfirst($k)} = $a2c{$k};
- $a2c{ucfirst($k)} = $a2c{$k};
+ $a2c{uc($k)} = $a2c{$k};
+ $a2c{lc($k)} = $a2c{$k};
+ $a2c{lcfirst($k)} = $a2c{$k};
+ $a2c{ucfirst($k)} = $a2c{$k};
}
}
if ($ON_EBCDIC){
delete @Encode::ExtModule{
- qw(euc-cn gb2312 gb12345 gbk cp936 iso-ir-165 MacChineseSimp
- euc-jp iso-2022-jp 7bit-jis shiftjis MacJapanese cp932
- euc-kr ksc5601 cp949 MacKorean
- big5 big5-hkscs cp950 MacChineseTrad
- gb18030 big5plus euc-tw)
- };
+ qw(euc-cn gb2312 gb12345 gbk cp936 iso-ir-165 MacChineseSimp
+ euc-jp iso-2022-jp 7bit-jis shiftjis MacJapanese cp932
+ euc-kr ksc5601 cp949 MacKorean
+ big5 big5-hkscs cp950 MacChineseTrad
+ gb18030 big5plus euc-tw)
+ };
}
use Test::More tests => (scalar keys %a2c) * 4;
foreach my $a (keys %a2c){
my $e = Encode::find_encoding($a);
is((defined($e) and $e->name), $a2c{$a},$a)
- or warn "alias was $a";;
+ or warn "alias was $a";;
}
# now we override some of the aliases and see if it works fine
define_alias(
- qr/ascii/i => 'WinLatin1',
- qr/cyrillic/i => 'WinCyrillic',
- qr/arabic/i => 'WinArabic',
- qr/greek/i => 'WinGreek',
- qr/hebrew/i => 'WinHebrew'
- );
+ qr/ascii/i => 'WinLatin1',
+ qr/cyrillic/i => 'WinCyrillic',
+ qr/arabic/i => 'WinArabic',
+ qr/greek/i => 'WinGreek',
+ qr/hebrew/i => 'WinHebrew'
+ );
print "# alias test with alias overrides\n";
foreach my $a (keys %a2c){
my $e = Encode::find_encoding($a);
is((defined($e) and $e->name), $a2c{$a}, "Override $a")
- or warn "alias was $a";
+ or warn "alias was $a";
}
print "# alias undef test\n";
foreach my $a (keys %a2c){
my $e = Encode::find_encoding($a);
ok(!defined($e) || $e->name =~ /-raw$/o,"Undef $a")
- or warn "alias was $a";
+ or warn "alias was $a";
}
print "# alias reinit test\n";
foreach my $a (keys %a2c){
my $e = Encode::find_encoding($a);
is((defined($e) and $e->name), $a2c{$a}, "Reinit $a")
- or warn "alias was $a";
+ or warn "alias was $a";
}
__END__
for my $k (keys %a2c){
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
}
# should work w/o PerlIO now!
# unless (PerlIO::Layer->find('perlio')){
open $src, "<$src_enc" or die "$src_enc : $!";
if (PerlIO::Layer->find('perlio')){
- binmode($src, ":bytes"); # needed when :utf8 in default open layer
+ binmode($src, ":bytes"); # needed when :utf8 in default open layer
}
$txt = join('',<$src>);
open $dst, ">$dst_utf" or die "$dst_utf : $!";
if (PerlIO::Layer->find('perlio')){
- binmode($dst, ":utf8");
- print $dst $uni;
+ binmode($dst, ":utf8");
+ print $dst $uni;
}else{ # ugh!
- binmode($dst);
- my $raw = $uni; Encode::_utf8_off($raw);
- print $dst $raw;
+ binmode($dst);
+ my $raw = $uni; Encode::_utf8_off($raw);
+ print $dst $raw;
}
close($dst);
is(compare_text($dst_utf, $src_utf), 0, "$dst_utf eq $src_utf")
- or ($DEBUG and rename $dst_utf, "$dst_utf.$seq");
+ or ($DEBUG and rename $dst_utf, "$dst_utf.$seq");
$seq++;
open $src, "<$src_utf" or die "$src_utf : $!";
if (PerlIO::Layer->find('perlio')){
- binmode($src, ":utf8");
- $uni = join('', <$src>);
+ binmode($src, ":utf8");
+ $uni = join('', <$src>);
}else{ # ugh!
- binmode($src);
- $uni = join('', <$src>);
- Encode::_utf8_on($uni);
+ binmode($src);
+ $uni = join('', <$src>);
+ Encode::_utf8_on($uni);
}
close $src;
print $dst $txt;
close($dst);
is(compare_text($src_enc, $dst_enc), 0 => "$dst_enc eq $src_enc")
- or ($DEBUG and rename $dst_enc, "$dst_enc.$seq");
+ or ($DEBUG and rename $dst_enc, "$dst_enc.$seq");
$seq++;
unlink($dst_utf, $dst_enc);
for my $encoding (@{$Charset{$charset}}){
- my $rt = decode($encoding, encode($encoding, $uni));
- is ($rt, $uni, "RT $encoding");
+ my $rt = decode($encoding, encode($encoding, $uni));
+ is ($rt, $uni, "RT $encoding");
}
}
use strict;
use Test;
use Encode qw(from_to encode decode
- encode_utf8 decode_utf8
- find_encoding is_utf8);
+ encode_utf8 decode_utf8
+ find_encoding is_utf8);
use charnames qw(greek);
my @encodings = grep(/iso-?8859/,Encode::encodings());
my $n = 2;
#
-# $Id: Unicode.t,v 2.0 2004/05/16 20:55:17 dankogai Exp $
+# $Id: Unicode.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $
#
# This script is written entirely in ASCII, even though quoted literals
# do include non-BMP unicode characters -- Are you happy, jhi?
}
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ exit 0;
}
$| = 1;
}
pack("C*", map {hex($_)} qw<0f 5c fc 98 00 30 3e 5f fd ff>);
my $n_32be =
pack("C*", map {hex($_)}
- qw<00 00 5c 0f 00 00 98 fc 00 00 30 00 00 00 5f 3e 00 01 ab cd>);
+ qw<00 00 5c 0f 00 00 98 fc 00 00 30 00 00 00 5f 3e 00 01 ab cd>);
my $n_32le =
pack("C*", map {hex($_)}
- qw<0f 5c 00 00 fc 98 00 00 00 30 00 00 3e 5f 00 00 cd ab 01 00>);
+ qw<0f 5c 00 00 fc 98 00 00 00 30 00 00 3e 5f 00 00 cd ab 01 00>);
my $n_16bb = pack('n', 0xFeFF) . $n_16be;
my $n_16lb = pack('v', 0xFeFF) . $n_16le;
SKIP: {
my $utf8 = '';
for my $j (0,0x10){
- for my $i (0..0xffff){
- $j == 0 and (0xD800 <= $i && $i <= 0xDFFF) and next;
- $utf8 .= ord($j+$i);
- }
- for my $major ('UTF-16', 'UTF-32'){
- for my $minor ('BE', 'LE'){
- my $enc = $major.$minor;
- is(decode($enc, encode($enc, $utf8)), $utf8, "$enc RT");
- }
- }
+ for my $i (0..0xffff){
+ $j == 0 and (0xD800 <= $i && $i <= 0xDFFF) and next;
+ $utf8 .= ord($j+$i);
+ }
+ for my $major ('UTF-16', 'UTF-32'){
+ for my $minor ('BE', 'LE'){
+ my $enc = $major.$minor;
+ is(decode($enc, encode($enc, $utf8)), $utf8, "$enc RT");
+ }
+ }
}
};
open my $fh, '<', $path or die "$path:$!";
my $content;
if (PerlIO::Layer->find('perlio')){
- binmode $fh => ':utf8';
- $content = join('' => <$fh>);
+ binmode $fh => ':utf8';
+ $content = join('' => <$fh>);
}else{ # ugh!
- binmode $fh;
- $content = join('' => <$fh>);
- Encode::_utf8_on($content)
+ binmode $fh;
+ $content = join('' => <$fh>);
+ Encode::_utf8_on($content)
}
close $fh;
is(decode("UTF-7", encode("UTF-7", $content)), $content,
BEGIN {
if ($ENV{'PERL_CORE'}){
chdir 't';
- unshift @INC, '../lib';
+ unshift @INC, '../lib';
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
}
$| = 1;
}
# $enc = encoding, $str = content
foreach my $enc (sort keys %{$tests}) {
- my $str = $tests->{$enc};
+ my $str = $tests->{$enc};
- is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title");
- is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title");
+ is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title");
+ is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title");
- my $str2 = $str;
- my $utf8 = Encode::encode('utf-8', $utf);
+ my $str2 = $str;
+ my $utf8 = Encode::encode('utf-8', $utf);
- Encode::from_to($str2, $enc, 'utf-8');
- is($str2, $utf8, "[$enc] from_to => utf8 - $title");
+ Encode::from_to($str2, $enc, 'utf-8');
+ is($str2, $utf8, "[$enc] from_to => utf8 - $title");
- Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc
- is($utf8, $str, "[$enc] utf8 => from_to - $title");
+ Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc
+ is($utf8, $str, "[$enc] utf8 => from_to - $title");
}
}
exit 0;
}
unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
+ print "1..0 # Skip: PerlIO was not built\n";
+ exit 0;
}
if (ord("A") == 193) {
- print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
}
$| = 1;
}
# $enc = encoding, $str = content
foreach my $enc (sort keys %{$tests}) {
- my $str = $tests->{$enc};
+ my $str = $tests->{$enc};
- is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title");
- is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title");
+ is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title");
+ is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title");
- my $str2 = $str;
- my $utf8 = Encode::encode('utf-8', $utf);
+ my $str2 = $str;
+ my $utf8 = Encode::encode('utf-8', $utf);
- Encode::from_to($str2, $enc, 'utf-8');
- is($str2, $utf8, "[$enc] from_to => utf8 - $title");
+ Encode::from_to($str2, $enc, 'utf-8');
+ is($str2, $utf8, "[$enc] from_to => utf8 - $title");
- Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc
- is($utf8, $str, "[$enc] utf8 => from_to - $title");
+ Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc
+ is($utf8, $str, "[$enc] utf8 => from_to - $title");
}
}
-# $Id: enc_data.t,v 2.0 2004/05/16 20:55:18 dankogai Exp $
+# $Id: enc_data.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $
BEGIN {
require Config; import Config;
exit 0;
}
unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
+ print "1..0 # Skip: PerlIO was not built\n";
+ exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
- exit(0);
+ print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ exit(0);
}
if ($] <= 5.008 and !$Config{perl_patchlevel}){
- print "1..0 # Skip: Perl 5.8.1 or later required\n";
- exit 0;
+ print "1..0 # Skip: Perl 5.8.1 or later required\n";
+ exit 0;
}
}
-# $Id: enc_eucjp.t,v 2.0 2004/05/16 20:55:18 dankogai Exp $
+# $Id: enc_eucjp.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $
# This is the twin of enc_utf8.t .
BEGIN {
exit 0;
}
unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
+ print "1..0 # Skip: PerlIO was not built\n";
+ exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
- exit(0);
+ print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ exit(0);
}
if ($] <= 5.008 and !$Config{perl_patchlevel}){
- print "1..0 # Skip: Perl 5.8.1 or later required\n";
- exit 0;
+ print "1..0 # Skip: Perl 5.8.1 or later required\n";
+ exit 0;
}
}
open(F, $f) or die "$0: failed to open '$f' for reading: $!";
binmode(F, ":encoding(utf-8)");
{
- local $^W = 1;
- local $SIG{__WARN__} = sub { $a = shift };
- eval { <F> }; # This should get caught.
+ local $^W = 1;
+ local $SIG{__WARN__} = sub { $a = shift };
+ eval { <F> }; # This should get caught.
}
close F;
print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
-# $Id: enc_module.t,v 2.0 2004/05/16 20:55:18 dankogai Exp $
+# $Id: enc_module.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $
# This file is in euc-jp
BEGIN {
require Config; import Config;
exit 0;
}
unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
+ print "1..0 # Skip: PerlIO was not built\n";
+ exit 0;
}
if (defined ${^UNICODE} and ${^UNICODE} != 0){
- print "1..0 # Skip: \${^UNICODE} == ${^UNICODE}\n";
- exit 0;
+ print "1..0 # Skip: \${^UNICODE} == ${^UNICODE}\n";
+ exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
- exit(0);
+ print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ exit(0);
}
}
use lib qw(t ext/Encode/t ../ext/Encode/t); # latter 2 for perl core
-# $Id: enc_utf8.t,v 2.0 2004/05/16 20:55:18 dankogai Exp $
+# $Id: enc_utf8.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $
# This is the twin of enc_eucjp.t .
BEGIN {
exit 0;
}
unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
+ print "1..0 # Skip: PerlIO was not built\n";
+ exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
- exit(0);
+ print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ exit(0);
}
}
open(F, $f) or die "$0: failed to open '$f' for reading: $!";
binmode(F, ":encoding(utf-8)");
{
- local $^W = 1;
- local $SIG{__WARN__} = sub { $a = shift };
- eval { <F> }; # This should get caught.
+ local $^W = 1;
+ local $SIG{__WARN__} = sub { $a = shift };
+ eval { <F> }; # This should get caught.
}
close F;
print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
exit 0;
}
unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
+ print "1..0 # Skip: PerlIO was not built\n";
+ exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
- exit(0);
+ print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ exit(0);
}
}
print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) &&
((pack("U*", 0x3AE) cmp $byte) == -1) &&
((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) &&
- ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
+ ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
print "ok 28\n";
}
{
- my %h1;
- my %h2;
- $h1{"\xdf"} = 41;
- $h2{"\x{3af}"} = 42;
- print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
- print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n";
+ my %h1;
+ my %h2;
+ $h1{"\xdf"} = 41;
+ $h2{"\x{3af}"} = 42;
+ print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
+ print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n";
}
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
}
$| = 1;
}
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
}
$| = 1;
}
my $english = "The quick brown fox jumps over the black lazy dog.";
for my $utf (qw/UTF-16 UTF-32/){
for my $bl (qw/BE LE/){
- my $test = encode("$utf$bl" => $english);
- is(guess_encoding($test)->name, "$utf$bl", "$utf$bl");
+ my $test = encode("$utf$bl" => $english);
+ is(guess_encoding($test)->name, "$utf$bl", "$utf$bl");
}
}
for my $bl (qw/BE LE/){
#
-# $Id: jperl.t,v 2.0 2004/05/16 20:55:18 dankogai Exp $
+# $Id: jperl.t,v 2.1 2006/05/03 18:24:10 dankogai Exp $
#
# This script is written in euc-jp
exit 0;
}
unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
+ print "1..0 # Skip: PerlIO was not built\n";
+ exit 0;
}
if (ord("A") == 193) {
- print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
}
$| = 1;
}
#
-# $Id: mime-header.t,v 2.1 2006/01/15 15:06:36 dankogai Exp $
+# $Id: mime-header.t,v 2.2 2006/05/03 18:24:10 dankogai Exp $
# This script is written in utf8
#
BEGIN {
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
}
$| = 1;
}
use Encode;
BEGIN{
- use_ok('Encode::MIME::Header::ISO_2022_JP');
+ use_ok('Encode::MIME::Header::ISO_2022_JP');
}
require_ok('Encode::MIME::Header::ISO_2022_JP');
# below codes are from mime.t in Jcode
my %mime = (
- "´Á»ú¡¢¥«¥¿¥«¥Ê¡¢¤Ò¤é¤¬¤Ê"
- => "=?ISO-2022-JP?B?GyRCNEE7eiEiJSslPyUrJUohIiRSJGkkLCRKGyhC?=",
- "foo bar"
- => "foo bar",
- "´Á»ú¡¢¥«¥¿¥«¥Ê¡¢¤Ò¤é¤¬¤Ê¤Îº®¤¸¤Ã¤¿Subject Header."
- => "=?ISO-2022-JP?B?GyRCNEE7eiEiJSslPyUrJUohIiRSJGkkLCRKJE46LiQ4JEMkPxsoQlN1?=\n =?ISO-2022-JP?B?YmplY3Q=?= Header.",
+ "´Á»ú¡¢¥«¥¿¥«¥Ê¡¢¤Ò¤é¤¬¤Ê"
+ => "=?ISO-2022-JP?B?GyRCNEE7eiEiJSslPyUrJUohIiRSJGkkLCRKGyhC?=",
+ "foo bar"
+ => "foo bar",
+ "´Á»ú¡¢¥«¥¿¥«¥Ê¡¢¤Ò¤é¤¬¤Ê¤Îº®¤¸¤Ã¤¿Subject Header."
+ => "=?ISO-2022-JP?B?GyRCNEE7eiEiJSslPyUrJUohIiRSJGkkLCRKJE46LiQ4JEMkPxsoQlN1?=\n =?ISO-2022-JP?B?YmplY3Q=?= Header.",
);
for my $decoded (sort keys %mime){
- my $encoded = $mime{$decoded};
+ my $encoded = $mime{$decoded};
- my $header = Encode::encode('MIME-Header-ISO_2022_JP', decode('euc-jp', $decoded));
- my $utf8 = Encode::decode('MIME-Header', $header);
+ my $header = Encode::encode('MIME-Header-ISO_2022_JP', decode('euc-jp', $decoded));
+ my $utf8 = Encode::decode('MIME-Header', $header);
- is(encode('euc-jp', $utf8), $decoded);
- is($header, $encoded);
+ is(encode('euc-jp', $utf8), $decoded);
+ is($header, $encoded);
}
__END__
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
}
unless (PerlIO::Layer->find('perlio')){
print "1..0 # Skip: PerlIO required\n";
close $fh;
for my $e (@{$e{$src}}){
- my $sfile = File::Spec->catfile($dir,"$$.sio");
- my $pfile = File::Spec->catfile($dir,"$$.pio");
+ my $sfile = File::Spec->catfile($dir,"$$.sio");
+ my $pfile = File::Spec->catfile($dir,"$$.pio");
- # first create a file without perlio
- dump2file($sfile, &encode($e, $utext, 0));
+ # first create a file without perlio
+ dump2file($sfile, &encode($e, $utext, 0));
- # then create a file via perlio without autoflush
+ # then create a file via perlio without autoflush
SKIP:{
- skip "$e: !perlio_ok", 4 unless (perlio_ok($e) or $DEBUG);
- no warnings 'uninitialized';
- open $fh, ">:encoding($e)", $pfile or die "$sfile : $!";
- $fh->autoflush(0);
- print $fh $utext;
- close $fh;
- $seq++;
- is(compare_text($sfile, $pfile), 0 => ">:encoding($e)");
- if ($DEBUG){
- copy $sfile, "$sfile.$seq";
- copy $pfile, "$pfile.$seq";
- }
-
- # this time print line by line.
- # works even for ISO-2022 but not ISO-2022-KR
- open $fh, ">:encoding($e)", $pfile or die "$sfile : $!";
- $fh->autoflush(1);
- for my $l (@uline) {
- print $fh $l;
- }
- close $fh;
- $seq++;
- is(compare_text($sfile, $pfile), 0 => ">:encoding($e) by lines");
- if ($DEBUG){
- copy $sfile, "$sfile.$seq";
- copy $pfile, "$pfile.$seq";
- }
- my $dtext;
- open $fh, "<:encoding($e)", $pfile or die "$pfile : $!";
- $fh->autoflush(0);
- $dtext = join('' => <$fh>);
- close $fh;
- $seq++;
- ok($utext eq $dtext, "<:encoding($e)");
- if ($DEBUG){
- dump2file("$sfile.$seq", $utext);
- dump2file("$pfile.$seq", $dtext);
- }
- if (perlio_ok($e) or $DEBUG){
- $dtext = '';
- open $fh, "<:encoding($e)", $pfile or die "$pfile : $!";
- while(defined(my $l = <$fh>)) {
- $dtext .= $l;
- }
- close $fh;
- }
- $seq++;
- ok($utext eq $dtext, "<:encoding($e) by lines");
- if ($DEBUG){
- dump2file("$sfile.$seq", $utext);
- dump2file("$pfile.$seq", $dtext);
- }
- }
+ skip "$e: !perlio_ok", 4 unless (perlio_ok($e) or $DEBUG);
+ no warnings 'uninitialized';
+ open $fh, ">:encoding($e)", $pfile or die "$sfile : $!";
+ $fh->autoflush(0);
+ print $fh $utext;
+ close $fh;
+ $seq++;
+ is(compare_text($sfile, $pfile), 0 => ">:encoding($e)");
+ if ($DEBUG){
+ copy $sfile, "$sfile.$seq";
+ copy $pfile, "$pfile.$seq";
+ }
+
+ # this time print line by line.
+ # works even for ISO-2022 but not ISO-2022-KR
+ open $fh, ">:encoding($e)", $pfile or die "$sfile : $!";
+ $fh->autoflush(1);
+ for my $l (@uline) {
+ print $fh $l;
+ }
+ close $fh;
+ $seq++;
+ is(compare_text($sfile, $pfile), 0 => ">:encoding($e) by lines");
+ if ($DEBUG){
+ copy $sfile, "$sfile.$seq";
+ copy $pfile, "$pfile.$seq";
+ }
+ my $dtext;
+ open $fh, "<:encoding($e)", $pfile or die "$pfile : $!";
+ $fh->autoflush(0);
+ $dtext = join('' => <$fh>);
+ close $fh;
+ $seq++;
+ ok($utext eq $dtext, "<:encoding($e)");
+ if ($DEBUG){
+ dump2file("$sfile.$seq", $utext);
+ dump2file("$pfile.$seq", $dtext);
+ }
+ if (perlio_ok($e) or $DEBUG){
+ $dtext = '';
+ open $fh, "<:encoding($e)", $pfile or die "$pfile : $!";
+ while(defined(my $l = <$fh>)) {
+ $dtext .= $l;
+ }
+ close $fh;
+ }
+ $seq++;
+ ok($utext eq $dtext, "<:encoding($e) by lines");
+ if ($DEBUG){
+ dump2file("$sfile.$seq", $utext);
+ dump2file("$pfile.$seq", $dtext);
+ }
+ }
if ( ! $DEBUG ) {
1 while unlink ($sfile);
1 while unlink ($pfile);
SKIP:{
my $pev = PerlIO::encoding->VERSION;
skip "PerlIO::encoding->VERSION = $pev <= 0.07 ", 6
- unless ($pev >= 0.07 or $DEBUG);
+ unless ($pev >= 0.07 or $DEBUG);
my $file = File::Spec->catfile($dir,"jisx0208.utf");
open my $fh, "<:utf8", $file or die "$file : $!";
my $str = join('' => <$fh>);
close $fh;
my %bom = (
- 'UTF-16BE' => pack('n', 0xFeFF),
- 'UTF-16LE' => pack('v', 0xFeFF),
- 'UTF-32BE' => pack('N', 0xFeFF),
- 'UTF-32LE' => pack('V', 0xFeFF),
- );
+ 'UTF-16BE' => pack('n', 0xFeFF),
+ 'UTF-16LE' => pack('v', 0xFeFF),
+ 'UTF-32BE' => pack('N', 0xFeFF),
+ 'UTF-32LE' => pack('V', 0xFeFF),
+ );
# reading
for my $utf (sort keys %bom){
- my $bomed = $bom{$utf} . encode($utf, $str);
- my $sfile = File::Spec->catfile($dir,".${utf}_${seq}_$$");
- dump2file($sfile, $bomed);
- my $utf_nobom = $utf; $utf_nobom =~ s/(LE|BE)$//o;
- # reading
- open $fh, "<:encoding($utf_nobom)", $sfile or die "$sfile : $!";
- my $cmp = join '' => <$fh>;
- close $fh;
- is($str, $cmp, "<:encoding($utf_nobom) eq $utf");
- unlink $sfile; $seq++;
+ my $bomed = $bom{$utf} . encode($utf, $str);
+ my $sfile = File::Spec->catfile($dir,".${utf}_${seq}_$$");
+ dump2file($sfile, $bomed);
+ my $utf_nobom = $utf; $utf_nobom =~ s/(LE|BE)$//o;
+ # reading
+ open $fh, "<:encoding($utf_nobom)", $sfile or die "$sfile : $!";
+ my $cmp = join '' => <$fh>;
+ close $fh;
+ is($str, $cmp, "<:encoding($utf_nobom) eq $utf");
+ unlink $sfile; $seq++;
}
# writing
for my $utf_nobom (qw/UTF-16 UTF-32/){
- my $utf = $utf_nobom . 'BE';
- my $sfile = File::Spec->catfile($dir,".${utf_nobom}_${seq}_$$");
- my $bomed = $bom{$utf} . encode($utf, $str);
- open $fh, ">:encoding($utf_nobom)", $sfile or die "$sfile : $!";
- print $fh $str;
- close $fh;
- open my $fh, "<:bytes", $sfile or die "$sfile : $!";
- read $fh, my $cmp, -s $sfile;
- close $fh;
- use bytes ();
- ok($bomed eq $cmp, ">:encoding($utf_nobom) eq $utf");
- unlink $sfile; $seq++;
+ my $utf = $utf_nobom . 'BE';
+ my $sfile = File::Spec->catfile($dir,".${utf_nobom}_${seq}_$$");
+ my $bomed = $bom{$utf} . encode($utf, $str);
+ open $fh, ">:encoding($utf_nobom)", $sfile or die "$sfile : $!";
+ print $fh $str;
+ close $fh;
+ open my $fh, "<:bytes", $sfile or die "$sfile : $!";
+ read $fh, my $cmp, -s $sfile;
+ close $fh;
+ use bytes ();
+ ok($bomed eq $cmp, ">:encoding($utf_nobom) eq $utf");
+ unlink $sfile; $seq++;
}
}
sub dump2file{
#!/usr/local/bin/perl
#
-# $Id: rt.pl,v 2.0 2004/05/16 20:55:19 dankogai Exp $
+# $Id: rt.pl,v 2.1 2006/05/03 18:24:10 dankogai Exp $
#
BEGIN {
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # Skip: EBCDIC\n";
- exit 0;
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
}
use strict;
require Test::More;
our $DEBUG;
our @ucm;
unless(@ARGV){
- use File::Spec;
- Test::More->import(tests => 103);
- opendir my $dh, $ucmdir or die "$ucmdir:$!";
- @ucm =
- map {File::Spec->catfile($ucmdir, $_) }
- sort grep {/\.ucm$/o} readdir($dh);
- closedir $dh;
+ use File::Spec;
+ Test::More->import(tests => 103);
+ opendir my $dh, $ucmdir or die "$ucmdir:$!";
+ @ucm =
+ map {File::Spec->catfile($ucmdir, $_) }
+ sort grep {/\.ucm$/o} readdir($dh);
+ closedir $dh;
}else{
- Test::More->import("no_plan");
- $DEBUG = 1;
- @ucm = @ARGV;
+ Test::More->import("no_plan");
+ $DEBUG = 1;
+ @ucm = @ARGV;
}
}
open my $rfh, "<$ucm" or die "$ucm:$!";
# <U0000> \x00 |0 # <control>
while(<$rfh>){
- s/#.*//o; /^$/ and next;
- unless ($name){
- /^<code_set_name>\s+"([^\"]+)"/io or next;
- $name = $1 and next;
- }else{
- /^<U([0-9a-f]+)>\s+(\S+)\s+\|(\d)/io or next;
- $nchar++;
- $3 == 0 or next;
- $nrt++;
- my $uni = chr(hex($1));
- my $enc = eval qq{ "$2" };
- decode($name, $enc) eq $uni or $nok++;
- encode($name, $uni) eq $enc or $nok++;
- }
+ s/#.*//o; /^$/ and next;
+ unless ($name){
+ /^<code_set_name>\s+"([^\"]+)"/io or next;
+ $name = $1 and next;
+ }else{
+ /^<U([0-9a-f]+)>\s+(\S+)\s+\|(\d)/io or next;
+ $nchar++;
+ $3 == 0 or next;
+ $nrt++;
+ my $uni = chr(hex($1));
+ my $enc = eval qq{ "$2" };
+ decode($name, $enc) eq $uni or $nok++;
+ encode($name, $uni) eq $enc or $nok++;
+ }
}
return($name, $nchar, $nrt, $nok);
}
for my $i (@sizes){
my $sz = 256 * $i;
for my $cp (qw(BMP HIGH)){
- $S{utf8}{$sz}{$cp} = $utf8_seed{$cp} x $i;
- $S{utf16}{$sz}{$cp} = encode('UTF-16BE', $S{utf8}{$sz}{$cp});
+ $S{utf8}{$sz}{$cp} = $utf8_seed{$cp} x $i;
+ $S{utf16}{$sz}{$cp} = encode('UTF-16BE', $S{utf8}{$sz}{$cp});
}
}
my $sz = $i * 256;
my $count = $Count * int(256/$i);
for my $cp (qw(BMP HIGH)){
- for my $op (qw(encode decode)){
- my ($meth, $from, $to) = ($op eq 'encode') ?
- (\&encode, 'utf8', 'utf16') : (\&decode, 'utf16', 'utf8');
- my $XS = sub {
- Encode::Unicode::set_transcoder("xs");
- $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
- eq $S{$to}{$sz}{$cp}
- or die "$op,$from,$to,$sz,$cp";
- };
- my $modern = sub {
- Encode::Unicode::set_transcoder("modern");
- $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
- eq $S{$to}{$sz}{$cp}
- or die "$op,$from,$to,$sz,$cp";
- };
- my $classic = sub {
- Encode::Unicode::set_transcoder("classic");
- $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
- eq $S{$to}{$sz}{$cp} or
- die "$op,$from,$to,$sz,$cp";
- };
- print "---- $op length=$sz/range=$cp ----\n";
- my $r = timethese($count,
- {
- "XS" => $XS,
- "Modern" => $modern,
- "Classic" => $classic,
- },
- 'none',
- );
- cmpthese($r);
- }
+ for my $op (qw(encode decode)){
+ my ($meth, $from, $to) = ($op eq 'encode') ?
+ (\&encode, 'utf8', 'utf16') : (\&decode, 'utf16', 'utf8');
+ my $XS = sub {
+ Encode::Unicode::set_transcoder("xs");
+ $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
+ eq $S{$to}{$sz}{$cp}
+ or die "$op,$from,$to,$sz,$cp";
+ };
+ my $modern = sub {
+ Encode::Unicode::set_transcoder("modern");
+ $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
+ eq $S{$to}{$sz}{$cp}
+ or die "$op,$from,$to,$sz,$cp";
+ };
+ my $classic = sub {
+ Encode::Unicode::set_transcoder("classic");
+ $meth->('UTF-16BE', $S{$from}{$sz}{$cp})
+ eq $S{$to}{$sz}{$cp} or
+ die "$op,$from,$to,$sz,$cp";
+ };
+ print "---- $op length=$sz/range=$cp ----\n";
+ my $r = timethese($count,
+ {
+ "XS" => $XS,
+ "Modern" => $modern,
+ "Classic" => $classic,
+ },
+ 'none',
+ );
+ cmpthese($r);
+ }
}
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
print "1..0 # Skip: Encode was not built\n";
- exit 0;
+ exit 0;
}
if ($] <= 5.008 and !$Config{perl_patchlevel}){
- print "1..0 # Skip: Perl 5.8.1 or later required\n";
- exit 0;
+ print "1..0 # Skip: Perl 5.8.1 or later required\n";
+ exit 0;
}
# http://smontagu.damowmow.com/utf8test.html
%ORD = (
- 0x00000080 => 0, # 2.1.2
- 0x00000800 => 0, # 2.1.3
- 0x00010000 => 0, # 2.1.4
- 0x00200000 => 1, # 2.1.5
- 0x00400000 => 1, # 2.1.6
- 0x0000007F => 0, # 2.2.1 -- unmapped okay
- 0x000007FF => 0, # 2.2.2
- 0x0000FFFF => 1, # 2.2.3
- 0x001FFFFF => 1, # 2.2.4
- 0x03FFFFFF => 1, # 2.2.5
- 0x7FFFFFFF => 1, # 2.2.6
- 0x0000D800 => 1, # 5.1.1
- 0x0000DB7F => 1, # 5.1.2
- 0x0000D880 => 1, # 5.1.3
- 0x0000DBFF => 1, # 5.1.4
- 0x0000DC00 => 1, # 5.1.5
- 0x0000DF80 => 1, # 5.1.6
- 0x0000DFFF => 1, # 5.1.7
- # 5.2 "Paird UTF-16 surrogates skipped
- # because utf-8-strict raises exception at the first one
- 0x0000FFFF => 1, # 5.3.1
- );
+ 0x00000080 => 0, # 2.1.2
+ 0x00000800 => 0, # 2.1.3
+ 0x00010000 => 0, # 2.1.4
+ 0x00200000 => 1, # 2.1.5
+ 0x00400000 => 1, # 2.1.6
+ 0x0000007F => 0, # 2.2.1 -- unmapped okay
+ 0x000007FF => 0, # 2.2.2
+ 0x0000FFFF => 1, # 2.2.3
+ 0x001FFFFF => 1, # 2.2.4
+ 0x03FFFFFF => 1, # 2.2.5
+ 0x7FFFFFFF => 1, # 2.2.6
+ 0x0000D800 => 1, # 5.1.1
+ 0x0000DB7F => 1, # 5.1.2
+ 0x0000D880 => 1, # 5.1.3
+ 0x0000DBFF => 1, # 5.1.4
+ 0x0000DC00 => 1, # 5.1.5
+ 0x0000DF80 => 1, # 5.1.6
+ 0x0000DFFF => 1, # 5.1.7
+ # 5.2 "Paird UTF-16 surrogates skipped
+ # because utf-8-strict raises exception at the first one
+ 0x0000FFFF => 1, # 5.3.1
+ );
$NTESTS += scalar keys %ORD;
%SEQ = (
- qq/ed 9f bf/ => 0, # 2.3.1
- qq/ee 80 80/ => 0, # 2.3.2
- qq/f4 8f bf bf/ => 0, # 2.3.3
- qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG
- # "3 Malformed sequences" are checked by perl.
- # "4 Overlong sequences" are checked by perl.
- );
+ qq/ed 9f bf/ => 0, # 2.3.1
+ qq/ee 80 80/ => 0, # 2.3.2
+ qq/f4 8f bf bf/ => 0, # 2.3.3
+ qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG
+ # "3 Malformed sequences" are checked by perl.
+ # "4 Overlong sequences" are checked by perl.
+ );
$NTESTS += scalar keys %SEQ;
}
use strict;