ext/Encode/lib/Encode/Config.pm Encode configuration module
ext/Encode/lib/Encode/Encoder.pm OO Encoder
ext/Encode/lib/Encode/Encoding.pm Encode extension
+ext/Encode/lib/Encode/Guess.pm Encode Extension
ext/Encode/lib/Encode/JP/H2Z.pm Encode extension
ext/Encode/lib/Encode/JP/JIS7.pm Encode extension
ext/Encode/lib/Encode/KR/2022_KR.pm Encode extension
+ext/Encode/lib/Encode/MIME/Header.pm Encode extension
ext/Encode/lib/Encode/PerlIO.pod Documents for Encode & PerlIO
ext/Encode/lib/Encode/Supported.pod Documents for supported encodings
-ext/Encode/t/unibench.pl benchmark script
ext/Encode/t/Aliases.t test script
ext/Encode/t/CJKT.t test script
ext/Encode/t/Encode.t test script
ext/Encode/t/gb2312.enc test data
ext/Encode/t/gb2312.utf test data
ext/Encode/t/grow.t test script
+ext/Encode/t/guess.t test script
ext/Encode/t/jisx0201.enc test data
ext/Encode/t/jisx0201.utf test data
ext/Encode/t/jisx0208.enc test data
ext/Encode/t/jperl.t test script
ext/Encode/t/ksc5601.enc test data
ext/Encode/t/ksc5601.utf test data
+ext/Encode/t/mime-header.t test script
ext/Encode/t/perlio.t test script
+ext/Encode/t/unibench.pl benchmark script
ext/Encode/ucm/8859-1.ucm Unicode Character Map
ext/Encode/ucm/8859-10.ucm Unicode Character Map
ext/Encode/ucm/8859-11.ucm Unicode Character Map
use 5.7.2;
use strict;
use ExtUtils::MakeMaker;
+use strict;
my %tables = (euc_cn_t => ['euc-cn.ucm',
'cp936.ucm',
ir_165_t => ['ir-165.ucm'],
);
+unless ($ENV{AGGREGATE_TABLES}){
+ my @ucm;
+ for my $k (keys %tables){
+ push @ucm, @{$tables{$k}};
+ }
+ %tables = ();
+ my $seq = 0;
+ for my $ucm (sort @ucm){
+ # 8.3 compliance !
+ my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
+ $tables{$t} = [ $ucm ];
+ }
+}
+
my $name = 'CN';
WriteMakefile(
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.58 2002/04/22 23:54:22 dankogai Exp $
+# $Id: Changes,v 1.60 2002/04/24 20:06:52 dankogai Exp $
#
-$Revision: 1.58 $ $Date: 2002/04/22 23:54:22 $
+$Revision: 1.60 $ $Date: 2002/04/24 20:06:52 $
+! Encode.xs
+ "Thou shalt not assume %x works." -- jhi
+ Message-Id: <20020424210618.E24347@alpha.hut.fi>
+! CN/Makefile.PL JP/Makefile.PL KR/Makefile.PL TW/Makefile.PL To make
+ low-memory build machines happy, now *.c is created for each *.ucm
+ (no table aggregation). You can still override this by setting
+ $ENV{AGGREGATE_TABLES}.
+ Message-Id: <00B1B3E4-579F-11D6-A441-00039301D480@dan.co.jp>
++ lib/Encode/Guess.pm
++ lib/Encode/JP/JIS7.pm
+ Encoding-autodetect (mainly for Japanese encoding) added. In a
+ course of development, JIS7.pm was improved.
++ lib/Encode/HTML/Header.pm
++ lib/Encode/Config.pm
+ MIME B/Q Header Encoding Added!
+! Encode.pm Encode.xs t/fallback.t
+ new fallbacks; XMLCREF and HTMLCREF upon Bart's request.
+ Message-Id: <20020424130709.GA14211@tanglefoot>
+
+1.59 $ 2002/04/22 23:54:22
+! Encode.pm Encode.xs
+ needs_lines() and perlio_ok() are added to Internal encodings such
+ as utf8 so XML::SAX is happy. FB_* stub xsubs are now prototyped.
+
+1.58 2002/04/22 23:54:22
! TW/TW.pm
s/MacChineseSimp/MacChineseTrad/ # ... oops.
! bin/ucm2text
Typo fixes and improvements by jhi
Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al.
-1.11 $Date: 2002/04/22 23:54:22 $
+1.11 $Date: 2002/04/24 20:06:52 $
+ t/encoding.t
+ t/jperl.t
! MANIFEST
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.58 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.60 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
use XSLoader ();
XSLoader::load 'Encode';
encodings find_encoding
);
-our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC PERLQQ);
-our @FB_CONSTS = qw(FB_DEFAULT FB_QUIET FB_WARN FB_PERLQQ FB_CROAK);
+our @FB_FLAGS = qw(DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
+ PERLQQ HTMLCREF XMLCREF);
+our @FB_CONSTS = qw(FB_DEFAULT FB_CROAK FB_QUIET FB_WARN
+ FB_PERLQQ FB_HTMLCREF FB_XMLCREF);
our @EXPORT_OK =
(
package Encode::UTF_EBCDIC;
*name = sub{ shift->{'Name'} };
*new_sequence = sub{ return $_[0] };
+ *needs_lines = sub{ 0 };
+ *perlio_ok = sub {
+ eval{ require PerlIO::encoding };
+ return $@ ? 0 : 1;
+ };
*decode = sub{
my ($obj,$str,$chk) = @_;
my $res = '';
package Encode::Internal;
*name = sub{ shift->{'Name'} };
*new_sequence = sub{ return $_[0] };
+ *needs_lines = sub{ 0 };
+ *perlio_ok = sub {
+ eval{ require PerlIO::encoding };
+ return $@ ? 0 : 1;
+ };
*decode = sub{
my ($obj,$str,$chk) = @_;
utf8::upgrade($str);
package Encode::utf8;
*name = sub{ shift->{'Name'} };
*new_sequence = sub{ return $_[0] };
+ *needs_lines = sub{ 0 };
+ *perlio_ok = sub {
+ eval{ require PerlIO::encoding };
+ return $@ ? 0 : 1;
+ };
*decode = sub{
my ($obj,$octets,$chk) = @_;
my $str = Encode::decode_utf8($octets);
=item perlqq mode (I<CHECK> = Encode::FB_PERLQQ)
+=item HTML charref mode (I<CHECK> = Encode::FB_HTMLCREF)
+
+=item XML charref mode (I<CHECK> = Encode::FB_XMLCREF)
+
For encodings that are implemented by Encode::XS, CHECK ==
Encode::FB_PERLQQ turns (en|de)code into C<perlqq> fallback mode.
where I<xxxx> is the Unicode ID of the character that cannot be found
in the character repertoire of the encoding.
+HTML/XML character reference modes are about the same, in place of
+\x{I<xxxx>}, HTML uses &#I<1234>; where I<1234> is a decimal digit and
+XML uses &#xI<abcd>; where I<abcd> is the hexadecimal digit.
+
=item The bitmask
These modes are actually set via a bitmask. Here is how the FB_XX
RETURN_ON_ERR 0x0004 X X
LEAVE_SRC 0x0008
PERLQQ 0x0100 X
+ HTMLCREF 0x0200
+ XMLCREF 0x0400
=head2 Unimplemented fallback schemes
/*
- $Id: Encode.xs,v 1.34 2002/04/22 20:27:30 dankogai Exp $
+ $Id: Encode.xs,v 1.38 2002/04/24 20:11:14 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
sdone += slen + clen;
ddone += dlen + SvCUR(perlqq);
sv_catsv(dst, perlqq);
+ }else if (check & ENCODE_HTMLCREF){
+ SV* htmlcref =
+ sv_2mortal(newSVpvf("&#%d;", ch));
+ sdone += slen + clen;
+ ddone += dlen + SvCUR(htmlcref);
+ sv_catsv(dst, htmlcref);
+ }else if (check & ENCODE_XMLCREF){
+ SV* xmlcref =
+ sv_2mortal(newSVpvf("&#x%" UVxf ";", ch));
+ sdone += slen + clen;
+ ddone += dlen + SvCUR(xmlcref);
+ sv_catsv(dst, xmlcref);
} else {
/* fallback char */
sdone += slen + clen;
enc->name[0], (U8) s[slen], code);
}
goto ENCODE_SET_SRC;
- }else if (check & ENCODE_PERLQQ){
+ }else if (check &
+ (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
SV* perlqq =
sv_2mortal(newSVpvf("\\x%02X", s[slen]));
sdone += slen + 1;
OUTPUT:
RETVAL
-PROTOTYPES: DISABLE
-
-
int
DIE_ON_ERR()
CODE:
RETVAL
int
+HTMLCREF()
+CODE:
+ RETVAL = ENCODE_HTMLCREF;
+OUTPUT:
+ RETVAL
+
+int
+XMLCREF()
+CODE:
+ RETVAL = ENCODE_XMLCREF;
+OUTPUT:
+ RETVAL
+
+int
FB_DEFAULT()
CODE:
RETVAL = ENCODE_FB_DEFAULT;
OUTPUT:
RETVAL
+int
+FB_HTMLCREF()
+CODE:
+ RETVAL = ENCODE_FB_HTMLCREF;
+OUTPUT:
+ RETVAL
+
+int
+FB_XMLCREF()
+CODE:
+ RETVAL = ENCODE_FB_XMLCREF;
+OUTPUT:
+ RETVAL
+
BOOT:
{
#include "def_t.h"
#define ENCODE_RETURN_ON_ERR 0x0004 /* immediately returns on NOREP */
#define ENCODE_LEAVE_SRC 0x0008 /* $src updated unless set */
#define ENCODE_PERLQQ 0x0100 /* perlqq fallback string */
+#define ENCODE_HTMLCREF 0x0200 /* HTML character ref. fb mode */
+#define ENCODE_XMLCREF 0x0400 /* XML character ref. fb mode */
#define ENCODE_FB_DEFAULT 0x0000
#define ENCODE_FB_CROAK 0x0001
#define ENCODE_FB_QUIET ENCODE_RETURN_ON_ERR
#define ENCODE_FB_WARN (ENCODE_RETURN_ON_ERR|ENCODE_WARN_ON_ERR)
#define ENCODE_FB_PERLQQ ENCODE_PERLQQ
+#define ENCODE_FB_HTMLCREF ENCODE_HTMLCREF
+#define ENCODE_FB_XMLCREF ENCODE_XMLCREF
#endif /* ENCODE_H */
use 5.7.2;
use strict;
use ExtUtils::MakeMaker;
+use strict;
my %tables = (
euc_jp_t => ['euc-jp.ucm'],
],
);
+unless ($ENV{AGGREGATE_TABLES}){
+ my @ucm;
+ for my $k (keys %tables){
+ push @ucm, @{$tables{$k}};
+ }
+ %tables = ();
+ my $seq = 0;
+ for my $ucm (sort @ucm){
+ # 8.3 compliance !
+ my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
+ $tables{$t} = [ $ucm ];
+ }
+}
+
my $name = 'JP';
WriteMakefile(
use 5.7.2;
use strict;
use ExtUtils::MakeMaker;
+use strict;
my %tables = (euc_kr_t => ['euc-kr.ucm',
'macKorean.ucm',
johab_t => ['johab.ucm'],
);
+unless ($ENV{AGGREGATE_TABLES}){
+ my @ucm;
+ for my $k (keys %tables){
+ push @ucm, @{$tables{$k}};
+ }
+ %tables = ();
+ my $seq = 0;
+ for my $ucm (sort @ucm){
+ # 8.3 compliance !
+ my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
+ $tables{$t} = [ $ucm ];
+ }
+}
+
my $name = 'KR';
WriteMakefile(
lib/Encode/Config.pm Encode configuration module
lib/Encode/Encoder.pm OO Encoder
lib/Encode/Encoding.pm Encode extension
+lib/Encode/Guess.pm Encode Extension
lib/Encode/JP/H2Z.pm Encode extension
lib/Encode/JP/JIS7.pm Encode extension
lib/Encode/KR/2022_KR.pm Encode extension
+lib/Encode/MIME/Header.pm Encode extension
lib/Encode/PerlIO.pod Documents for Encode & PerlIO
lib/Encode/Supported.pod Documents for supported encodings
-t/unibench.pl benchmark script
t/Aliases.t test script
t/CJKT.t test script
t/Encode.t test script
t/gb2312.enc test data
t/gb2312.utf test data
t/grow.t test script
+t/guess.t test script
t/jisx0201.enc test data
t/jisx0201.utf test data
t/jisx0208.enc test data
t/jperl.t test script
t/ksc5601.enc test data
t/ksc5601.utf test data
+t/mime-header.t test script
t/perlio.t test script
+t/unibench.pl benchmark script
ucm/8859-1.ucm Unicode Character Map
ucm/8859-10.ucm Unicode Character Map
ucm/8859-11.ucm Unicode Character Map
use 5.7.2;
use strict;
use ExtUtils::MakeMaker;
+use strict;
my %tables = (big5_t => ['big5-eten.ucm',
'big5-hkscs.ucm',
'cp950.ucm'],
);
+unless ($ENV{AGGREGATE_TABLES}){
+ my @ucm;
+ for my $k (keys %tables){
+ push @ucm, @{$tables{$k}};
+ }
+ %tables = ();
+ my $seq = 0;
+ for my $ucm (sort @ucm){
+ # 8.3 compliance !
+ my $t = sprintf ("%s_%02d_t", substr($ucm, 0, 2), $seq++);
+ $tables{$t} = [ $ucm ];
+ }
+}
+
my $name = 'TW';
WriteMakefile(
# Demand-load module list
#
package Encode::Config;
-our $VERSION = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.6 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use strict;
#'big5plus' => 'Encode::HanExtra',
#'euc-tw' => 'Encode::HanExtra',
#'gb18030' => 'Encode::HanExtra',
+
+ 'MIME-Header' => 'Encode::MIME::Header',
+ 'MIME-B' => 'Encode::MIME::Header',
+ 'MIME-Q' => 'Encode::MIME::Header',
+
);
}
--- /dev/null
+package Encode::Guess;
+use strict;
+use Carp;
+use Encode qw(:fallbacks find_encoding);
+our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+
+my $Canon = 'Guess';
+$Encode::Encoding{$Canon} = bless { Name => $Canon } => __PACKAGE__;
+our $DEBUG = 0;
+our %DEF_CANDIDATES =
+ map { $_ => find_encoding($_) } qw(ascii utf8);
+our %CANDIDATES;
+
+
+sub import{
+ my $class = shift;
+ %CANDIDATES = %DEF_CANDIDATES;
+ for my $c (@_){
+ my $e = find_encoding($c) or die "Unknown encoding: $c";
+ $CANDIDATES{$e->name} = $e;
+ $DEBUG and warn "Added: ", $e->name;
+ }
+}
+
+sub name { shift->{'Name'} }
+sub new_sequence { $_[0] }
+sub needs_lines { 1 }
+sub perlio_ok { 0 }
+
+sub decode($$;$){
+ my ($obj, $octet, $chk) = @_;
+ my $utf8 = $obj->guess($octet)->decode($octet, $chk);
+ $_[1] = $octet if $chk;
+ return $utf8;
+}
+
+sub encode{
+ croak "Tsk, tsk, tsk. You can't be too lazy here here!";
+}
+
+sub guess {
+ my ($obj, $octet) = @_;
+ # cheat 1: utf8 flag;
+ Encode::is_utf8($octet) and return find_encoding('utf8');
+ my %try = %CANDIDATES;
+ my $nline = 1;
+ for my $line (split /\r|\n|\r\n/, $octet){
+ # cheat 2 -- escape
+ if ($line =~ /\e/o){
+ my @keys = keys %try;
+ delete @try{qw/utf8 ascii/};
+ for my $k (@keys){
+ ref($try{$k}) eq 'Encode::XS' and delete $try{$k};
+ }
+ }
+ my %ok = %try;
+ # warn join(",", keys %try);
+ for my $k (keys %try){
+ my $scratch = $line;
+ $try{$k}->decode($scratch, FB_QUIET);
+ if ($scratch eq ''){
+ $DEBUG and warn sprintf("%4d:%-24s ok\n", $nline, $k);
+ }else{
+ use bytes ();
+ $DEBUG and
+ warn sprintf("%4d:%-24s not ok; %d bytes left\n",
+ $nline, $k, bytes::length($scratch));
+ delete $ok{$k};
+
+ }
+ }
+ %ok or croak "No appropriate encodings found!";
+ if (scalar(keys(%ok)) == 1){
+ my ($retval) = values(%ok);
+ return $retval;
+ }
+ %try = %ok; $nline++;
+ }
+ unless ($try{ascii}){
+ croak "Encodings too ambiguous: ",
+ join(" or ", keys %try);
+ }
+ return $try{ascii};
+}
+
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::Guess -- guesscoding!
+
+=cut
+
package Encode::JP::JIS7;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.6 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode qw(:fallbacks);
sub decode($$;$)
{
- my ($obj,$str,$chk) = @_;
- my $residue = jis_euc(\$str);
- # This is for PerlIO
+ my ($obj, $str, $chk) = @_;
+ my $residue = '';
+ if ($chk){
+ $str =~ s/([^\x00-\x7f].*)$//so;
+ $1 and $residue = $1;
+ }
+ $residue .= jis_euc(\$str);
$_[1] = $residue if $chk;
return Encode::decode('euc-jp', $str, FB_PERLQQ);
}
--- /dev/null
+package Encode::MIME::Header;
+use strict;
+# use warnings;
+our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+
+use Encode qw(find_encoding encode_utf8);
+use MIME::Base64;
+use Carp;
+
+my %seed =
+ (
+ decode_b => '1', # decodes 'B' encoding ?
+ decode_q => '1', # decodes 'Q' encoding ?
+ encode => 'B', # encode with 'B' or 'Q' ?
+ bpl => 75, # bytes per line
+ );
+
+$Encode::Encoding{'MIME-Header'} =
+ bless {
+ %seed,
+ Name => 'MIME-Header',
+ } => __PACKAGE__;
+
+$Encode::Encoding{'MIME-B'} =
+ bless {
+ %seed,
+ decode_q => 0,
+ Name => 'MIME-B',
+ } => __PACKAGE__;
+
+$Encode::Encoding{'MIME-Q'} =
+ bless {
+ %seed,
+ decode_q => 1,
+ encode => 'Q',
+ Name => 'MIME-Q',
+ } => __PACKAGE__;
+
+sub name { shift->{'Name'} }
+sub new_sequence { $_[0] }
+sub needs_lines { 1 }
+sub perlio_ok{ 0 };
+
+sub decode($$;$){
+ use utf8;
+ my ($obj, $str, $chk) = @_;
+ # zap spaces between encoded words
+ $str =~ s/\?=\s+=\?/\?==\?/gos;
+ # multi-line header to single line
+ $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
+ $str =~
+ s{
+ =\? # begin encoded word
+ ([0-9A-Za-z\-]+) # charset (encoding)
+ \?([QqBb])\? # delimiter
+ (.*?) # Base64-encodede contents
+ \?= # end encoded word
+ }{
+ if (uc($2) eq 'B'){
+ $obj->{decode_b} or croak qq(MIME "B" unsupported);
+ decode_b($1, $3);
+ }elsif(uc($2) eq 'Q'){
+ $obj->{decode_q} or croak qq(MIME "Q" unsupported);
+ decode_q($1, $3);
+ }else{
+ croak qq(MIME "$2" encoding is nonexistent!);
+ }
+ }egox;
+ $_[1] = '' if $chk;
+ return $str;
+}
+
+sub decode_b{
+ my $enc = shift;
+ my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
+ my $db64 = decode_base64(shift);
+ return $d->decode($db64, Encode::FB_PERLQQ);
+}
+
+sub decode_q{
+ my ($enc, $q) = @_;
+ my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
+ $q =~ s/_/ /go;
+ $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
+ return $d->decode($q, Encode::FB_PERLQQ);
+}
+
+my $especials =
+ join('|' =>
+ map {quotemeta(chr($_))}
+ unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
+
+my $re_especials = qr/$especials/o;
+
+sub encode($$;$){
+ my ($obj, $str, $chk) = @_;
+ my @line = ();
+ for my $line (split /\r|\n|\r\n/o, $str){
+ my (@word, @subline);
+ for my $word (split /($re_especials)/o, $line){
+ if ($word =~ /[^\x00-\x7f]/o){
+ push @word, $obj->_encode($word);
+ }else{
+ push @word, $word;
+ }
+ }
+ my $subline = '';
+ for my $word (@word){
+ use bytes ();
+ if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
+ push @subline, $subline;
+ $subline = '';
+ }
+ $subline .= $word;
+ }
+ $subline and push @subline, $subline;
+ push @line, join("\n " => @subline);
+ }
+ $_[1] = '' if $chk;
+ return join("\n", @line);
+}
+
+use constant HEAD => '=?UTF-8?';
+use constant TAIL => '?=';
+use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
+
+sub _encode{
+ my ($o, $str) = @_;
+ my $enc = $o->{encode};
+ my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
+ $llen *= $enc eq 'B' ? 3/4 : 1/3;
+ my @result = ();
+ my $chunk = '';
+ while(my $chr = substr($str, 0, 1, '')){
+ use bytes ();
+ if (bytes::length($chunk) + bytes::length($chr) > $llen){
+ push @result, SINGLE->{$enc}($chunk);
+ $chunk = '';
+ }
+ $chunk .= $chr;
+ }
+ $chunk and push @result, SINGLE->{$enc}($chunk);
+ return @result;
+}
+
+sub _encode_b{
+ HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
+}
+
+sub _encode_q{
+ my $chunk = shift;
+ $chunk =~ s{
+ ([^0-9A-Za-z])
+ }{
+ join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
+ }egox;
+ return HEAD . 'Q?' . $chunk . TAIL;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
+
+=head1 SYNOPSIS
+
+ use Encode qw/encode decode/;
+ $utf8 = decode('MIME-Header', $header);
+ $header = encode('MIME-Header', $utf8);
+
+=head1 ABSTRACT
+
+This module implements RFC 2047 Mime Header Encoding. There are 3
+variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
+difference is described below
+
+ decode() encode()
+ ----------------------------------------------
+ MIME-Header Both B and Q =?UTF-8?B?....?=
+ MIME-B B only; Q croaks =?UTF-8?B?....?=
+ MIME-Q Q only; B croaks =?UTF-8?Q?....?=
+
+=head1 DESCRIPTION
+
+When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
+is extracted and decoded for I<X> encoding (B for Base64, Q for
+Quoted-Printable). Then the decoded chunk is fed to
+decode(I<encoding>). So long as I<encoding> is supported by Encode,
+any source encoding is fine.
+
+When you encode, it just encodes UTF-8 string with I<X> encoding then
+quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
+encode are left as is and long lines are folded within 76 bytes per
+line.
+
+=head1 BUGS
+
+It would be nice to support non-UTF8 encoding, such as =?ISO-2022-JP?
+and =?ISO-8859-1?= but that makes the implementation too complicated.
+These days major mail agents all support =?UTF-8? so I think it is
+just good enough.
+
+=head1 SEE ALSO
+
+L<Encode>
+
+RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
+locations.
+
+=cut
use strict;
#use Test::More qw(no_plan);
-use Test::More tests => 15;
+use Test::More tests => 19;
use Encode q(:all);
my $original = '';
my $nofallback = '';
-my ($fallenback, $quiet, $perlqq);
+my ($fallenback, $quiet, $perlqq, $htmlcref, $xmlcref);
for my $i (0x20..0x7e){
$original .= chr($i);
}
-$fallenback = $quiet = $perlqq = $nofallback = $original;
+$fallenback = $quiet =
+$perlqq = $htmlcref = $xmlcref = $nofallback = $original;
my $residue = '';
for my $i (0x80..0xff){
$residue .= chr($i);
$fallenback .= '?';
$perlqq .= sprintf("\\x{%04x}", $i);
+ $htmlcref .= sprintf("&#%d;", $i);
+ $xmlcref .= sprintf("&#x%x;", $i);
}
utf8::upgrade($original);
my $meth = find_encoding('ascii');
$dst = $meth->encode($src, FB_PERLQQ);
is($dst, $perlqq, "FB_PERLQQ");
is($src, '', "FB_PERLQQ residue");
+
+$src = $original;
+$dst = $meth->encode($src, FB_HTMLCREF);
+is($dst, $htmlcref, "FB_HTMLCREF");
+is($src, '', "FB_HTMLCREF residue");
+
+$src = $original;
+$dst = $meth->encode($src, FB_XMLCREF);
+is($dst, $xmlcref, "FB_XMLCREF");
+is($src, '', "FB_XMLCREF residue");
--- /dev/null
+BEGIN {
+ if ($ENV{'PERL_CORE'}){
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use File::Basename;
+use File::Spec;
+use Encode qw(decode encode find_encoding _utf8_off);
+
+#use Test::More qw(no_plan);
+use Test::More tests => 11;
+use_ok("Encode::Guess");
+{
+ no warnings;
+ $Encode::Guess::DEBUG = shift || 0;
+}
+
+my $ascii = join('' => map {chr($_)}(0x21..0x7e));
+my $latin1 = join('' => map {chr($_)}(0xa1..0xfe));
+my $utf8on = join('' => map {chr($_)}(0x3000..0x30fe));
+my $utf8off = $utf8on; _utf8_off($utf8off);
+
+is(Encode::Guess->guess($ascii)->name, 'ascii');
+
+eval { Encode::Guess->guess($latin1) } ;
+like($@, qr/No appropriate encoding/io);
+
+Encode::Guess->import(qw(latin1));
+
+is(Encode::Guess->guess($latin1)->name, 'iso-8859-1');
+is(Encode::Guess->guess($utf8on)->name, 'utf8');
+
+eval { Encode::Guess->guess($utf8off) };
+like($@, qr/ambiguous/io);
+
+my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf');
+my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf');
+my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf');
+
+open my $fh, $jisx0208 or die "$jisx0208: $!";
+$utf8off = join('' => <$fh>);
+close $fh;
+$utf8on = decode('utf8', $utf8off);
+my @jp = qw(7bit-jis shiftjis euc-jp);
+
+Encode::Guess->import(@jp);
+
+for my $jp (@jp){
+ my $test = encode($jp, $utf8on);
+ is(Encode::Guess->guess($test)->name, $jp, $jp);
+}
+is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
+eval{ encode('Guess', $utf8on) };
+like($@, qr/lazy/io, "no encode()");
+__END__;
--- /dev/null
+BEGIN {
+ if ($ENV{'PERL_CORE'}){
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use Test::More qw(no_plan);
+#use Test::More tests => 19;
+use_ok("Encode::MIME::Header");
+
+
+__END__;