package Encode;
+use strict;
-$VERSION = 0.01;
+our $VERSION = 0.02;
require DynaLoader;
require Exporter;
-@ISA = qw(Exporter DynaLoader);
+our @ISA = qw(Exporter DynaLoader);
# Public, encouraged API is exported by default
-@EXPORT = qw (
+our @EXPORT = qw (
encode
decode
encode_utf8
decode_utf8
find_encoding
+ encodings
);
-@EXPORT_OK =
+our @EXPORT_OK =
qw(
- encodings
+ define_encoding
+ define_alias
from_to
is_utf8
is_8bit
use Carp;
-# The global hash is declared in XS code
-$encoding{Unicode} = bless({},'Encode::Unicode');
-$encoding{utf8} = bless({},'Encode::utf8');
-$encoding{'iso10646-1'} = bless({},'Encode::iso10646_1');
+# Make a %encoding package variable to allow a certain amount of cheating
+our %encoding;
+my @alias; # ordered matching list
+my %alias; # cached known aliases
sub encodings
{
my ($class) = @_;
- foreach my $dir (@INC)
+ return keys %encoding;
+}
+
+sub findAlias
+{
+ my $class = shift;
+ local $_ = shift;
+ unless (exists $alias{$_})
{
- if (opendir(my $dh,"$dir/Encode"))
+ for (my $i=0; $i < @alias; $i += 2)
{
- while (defined(my $name = readdir($dh)))
+ my $alias = $alias[$i];
+ my $val = $alias[$i+1];
+ my $new;
+ if (ref($alias) eq 'Regexp' && $_ =~ $alias)
{
- if ($name =~ /^(.*)\.enc$/)
+ $new = eval $val;
+ }
+ elsif (ref($alias) eq 'CODE')
+ {
+ $new = &{$alias}($val)
+ }
+ elsif (lc($_) eq $alias)
+ {
+ $new = $val;
+ }
+ if (defined($new))
+ {
+ next if $new eq $_; # avoid (direct) recursion on bugs
+ my $enc = (ref($new)) ? $new : find_encoding($new);
+ if ($enc)
{
- next if exists $encoding{$1};
- $encoding{$1} = "$dir/$name";
+ $alias{$_} = $enc;
+ last;
}
}
- closedir($dh);
}
}
- return keys %encoding;
+ return $alias{$_};
}
-sub loadEncoding
+sub define_alias
{
- my ($class,$name,$file) = @_;
- if (open(my $fh,$file))
+ while (@_)
{
- my $type;
- while (1)
- {
- my $line = <$fh>;
- $type = substr($line,0,1);
- last unless $type eq '#';
- }
- $class .= ('::'.(($type eq 'E') ? 'Escape' : 'Table'));
- #warn "Loading $file";
- return $class->read($fh,$name,$type);
+ my ($alias,$name) = splice(@_,0,2);
+ push(@alias, $alias => $name);
}
- else
+}
+
+define_alias( qr/^iso(\d+-\d+)$/i => '"iso-$1"' );
+define_alias( qr/^(\S+)\s+(.*)$/i => '"$1-$2"' );
+#define_alias( sub { return /^iso-(\d+-\d+)$/i ? "iso$1" : '' } );
+define_alias( 'ascii' => 'US-ascii');
+define_alias( 'ibm-1047' => 'cp1047');
+
+sub define_encoding
+{
+ my $obj = shift;
+ my $name = shift;
+ $encoding{$name} = $obj;
+ my $lc = lc($name);
+ define_alias($lc => $obj) unless $lc eq $name;
+ while (@_)
{
- return undef;
+ my $alias = shift;
+ define_alias($alias,$obj);
}
+ return $obj;
}
sub getEncoding
{
my ($class,$name) = @_;
my $enc;
- unless (ref($enc = $encoding{$name}))
+ if (exists $encoding{$name})
{
- $enc = $class->loadEncoding($name,$enc) if defined $enc;
- unless (ref($enc))
- {
- foreach my $dir (@INC)
- {
- last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
- }
- }
- $encoding{$name} = $enc;
+ return $encoding{$name};
+ }
+ else
+ {
+ return $class->findAlias($name);
}
- return $enc;
}
sub find_encoding
package Encode::Encoding;
# Base class for classes which implement encodings
+sub Define
+{
+ my $obj = shift;
+ my $canonical = shift;
+ $obj = bless { Name => $canonical },$obj unless ref $obj;
+ # warn "$canonical => $obj\n";
+ Encode::define_encoding($obj, $canonical, @_);
+}
+
+sub name { shift->{'Name'} }
+
# Temporary legacy methods
sub toUnicode { shift->decode(@_) }
sub fromUnicode { shift->encode(@_) }
# Dummy package that provides the encode interface but leaves data
# as UTF-8 encoded. It is here so that from_to() works.
-sub name { 'Unicode' }
+__PACKAGE__->Define('Unicode');
sub decode
{
package Encode::utf8;
use base 'Encode::Encoding';
-
# package to allow long-hand
# $octets = encode( utf8 => $string );
#
-sub name { 'utf8' }
+__PACKAGE__->Define(qw(UTF-8 utf8));
sub decode
{
return $octets;
}
-package Encode::Table;
-use base 'Encode::Encoding';
-
-sub read
-{
- my ($class,$fh,$name,$type) = @_;
- my $rep = $class->can("rep_$type");
- my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
- my @touni;
- my %fmuni;
- my $count = 0;
- $def = hex($def);
- while ($pages--)
- {
- my $line = <$fh>;
- chomp($line);
- my $page = hex($line);
- my @page;
- my $ch = $page * 256;
- for (my $i = 0; $i < 16; $i++)
- {
- my $line = <$fh>;
- for (my $j = 0; $j < 16; $j++)
- {
- my $val = hex(substr($line,0,4,''));
- if ($val || !$ch)
- {
- my $uch = chr($val);
- push(@page,$uch);
- $fmuni{$uch} = $ch;
- $count++;
- }
- else
- {
- push(@page,undef);
- }
- $ch++;
- }
- }
- $touni[$page] = \@page;
- }
-
- return bless {Name => $name,
- Rep => $rep,
- ToUni => \@touni,
- FmUni => \%fmuni,
- Def => $def,
- Num => $count,
- },$class;
-}
-
-sub name { shift->{'Name'} }
-
-sub rep_S { 'C' }
-
-sub rep_D { 'n' }
-
-sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
-
-sub representation
-{
- my ($obj,$ch) = @_;
- $ch = 0 unless @_ > 1;
- $obj-{'Rep'}->($ch);
-}
-
-sub decode
-{
- my ($obj,$str,$chk) = @_;
- my $rep = $obj->{'Rep'};
- my $touni = $obj->{'ToUni'};
- my $uni = '';
- while (length($str))
- {
- my $ch = ord(substr($str,0,1,''));
- my $x;
- if (&$rep($ch) eq 'C')
- {
- $x = $touni->[0][$ch];
- }
- else
- {
- $x = $touni->[$ch][ord(substr($str,0,1,''))];
- }
- unless (defined $x)
- {
- last if $chk;
- # What do we do here ?
- $x = '';
- }
- $uni .= $x;
- }
- $_[1] = $str if $chk;
- return $uni;
-}
-
-sub encode
-{
- my ($obj,$uni,$chk) = @_;
- my $fmuni = $obj->{'FmUni'};
- my $str = '';
- my $def = $obj->{'Def'};
- my $rep = $obj->{'Rep'};
- while (length($uni))
- {
- my $ch = substr($uni,0,1,'');
- my $x = $fmuni->{chr(ord($ch))};
- unless (defined $x)
- {
- last if ($chk);
- $x = $def;
- }
- $str .= pack(&$rep($x),$x);
- }
- $_[1] = $uni if $chk;
- return $str;
-}
-
package Encode::iso10646_1;
use base 'Encode::Encoding';
-
-# Encoding is 16-bit network order Unicode
+# Encoding is 16-bit network order Unicode (no surogates)
# Used for X font encodings
-sub name { 'iso10646-1' }
+__PACKAGE__->Define(qw(UCS-2 iso10646-1));
sub decode
{
return $str;
}
-
-package Encode::Escape;
-use base 'Encode::Encoding';
-
-use Carp;
-
-sub read
-{
- my ($class,$fh,$name) = @_;
- my %self = (Name => $name, Num => 0);
- while (<$fh>)
- {
- my ($key,$val) = /^(\S+)\s+(.*)$/;
- $val =~ s/^\{(.*?)\}/$1/g;
- $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
- $self{$key} = $val;
- }
- return bless \%self,$class;
-}
-
-sub name { shift->{'Name'} }
-
-sub decode
-{
- croak("Not implemented yet");
-}
-
-sub encode
-{
- croak("Not implemented yet");
-}
-
# switch back to Encode package in case we ever add AutoLoader
package Encode;
encodings have state C<Encode> uses the encoding object internally
once an operation is in progress.
-I<Aliasing is not yet implemented.>
-
=head1 PERL ENCODING API
=head2 Generic Encoding Interface
UCS-2 can only represent 0..0xFFFF, while UTF-16 has a "surogate pair"
scheme which allows it to cover the whole Unicode range.
-Encode implements big-endian UCS-2 as the encoding "iso10646-1" as that
+Encode implements big-endian UCS-2 aliased to "iso10646-1" as that
happens to be the name used by that representation when used with X11 fonts.
UTF-32 or UCS-4 is 32-bit or 4-byte chunks. Perl's logical characters
depending on the endian required.
-No UTF-32 encodings are not yet implemented.
+No UTF-32 encodings are implemented yet.
Both UCS-2 and UCS-4 style encodings can have "byte order marks" by representing
the code point 0xFFFE as the very first thing in a file.
+=head2 Listing available encodings
+
+ use Encode qw(encodings);
+ @list = encodings();
+
+Returns a list of the canonical names of the available encodings.
+
+=head2 Defining Aliases
+
+ use Encode qw(define_alias);
+ define_alias( newName => ENCODING);
+
+Allows newName to be used as am alias for ENCODING. ENCODING may be either the
+name of an encoding or and encoding object (as above).
+
+Currently I<newName> can be specified in the following ways:
+
+=over 4
+
+=item As a simple string.
+
+=item As a qr// compiled regular expression, e.g.:
+
+ define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
+
+In this case if I<ENCODING> is not a reference it is C<eval>-ed to allow
+C<$1> etc. to be subsituted.
+The example is one way to names as used in X11 font names to alias the MIME names for the
+iso-8859-* family.
+
+=item As a code reference, e.g.:
+
+ define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
+
+In this case C<$_> will be set to the name that is being looked up and
+I<ENCODING> is passed to the sub as its first argument.
+The example is another way to names as used in X11 font names to alias the MIME names for
+the iso-8859-* family.
+
+=back
+
+=head2 Defining Encodings
+
+ use Encode qw(define_alias);
+ define_encoding( $object, 'canonicalName' [,alias...]);
+
+Causes I<canonicalName> to be associated with I<$object>.
+The object should provide the interface described in L</"IMPLEMENTATION CLASSES"> below.
+If more than two arguments are provided then additional arguments are taken
+as aliases for I<$object> as for C<define_alias>.
+
=head1 Encoding and IO
It is very common to want to do encoding transformations when
C<Encode> provides a "layer" (See L<perliol>) which can transform
data as it is read or written.
- open(my $ilyad,'>:encoding(iso8859-7)','ilyad.greek');
+ open(my $ilyad,'>:encoding(iso-8859-7)','ilyad.greek');
print $ilyad @epic;
In addition the new IO system can also be configured to read/write
As mentioned above encodings are (in the current implementation at least)
defined by objects. The mapping of encoding name to object is via the
-C<%Encode::encodings> hash. (It is a package hash to allow XS code to get
-at it.)
+C<%encodings> hash.
The values of the hash can currently be either strings or objects.
The string form may go away in the future. The string form occurs
It is also highly desirable that encoding classes inherit from C<Encode::Encoding>
as a base class. This allows that class to define additional behaviour for
-all encoding objects.
+all encoding objects. For example built in Unicode, UCS-2 and UTF-8 classes
+use :
+
+ package Encode::MyEncoding;
+ use base qw(Encode::Encoding);
+
+ __PACKAGE__->Define(qw(myCanonical myAlias));
+
+To create an object with bless {Name => ...},$class, and call define_encoding.
+They inherit their C<name> method from C<Encode::Encoding>.
=head2 Compiled Encodings
ENTER;
SAVETMPS;
PUSHMARK(sp);
- XPUSHs(sv_2mortal(newSVpv("Encode",0)));
XPUSHs(sv_2mortal(newSVpvn(arg,len)));
PUTBACK;
- if (perl_call_method("getEncoding",G_SCALAR) != 1)
+ if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1)
{
/* should never happen */
- Perl_die(aTHX_ "Encode::getEncoding did not return a value");
+ Perl_die(aTHX_ "Encode::find_encoding did not return a value");
return -1;
}
SPAGAIN;
void
Encode_Define(pTHX_ encode_t *enc)
{
- HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI);
+ dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
int i = 0;
+ PUSHMARK(sp);
+ XPUSHs(sv);
while (enc->name[i])
{
const char *name = enc->name[i++];
- hv_store(hash,name,strlen(name),SvREFCNT_inc(sv),0);
+ XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
}
+ PUTBACK;
+ call_pv("Encode::define_encoding",G_DISCARD);
SvREFCNT_dec(sv);
}
--- /dev/null
+package Encode::Tcl;
+use strict;
+use Encode qw(find_encoding);
+use base 'Encode::Encoding';
+use Carp;
+
+
+sub INC_search
+{
+ foreach my $dir (@INC)
+ {
+ if (opendir(my $dh,"$dir/Encode"))
+ {
+ while (defined(my $name = readdir($dh)))
+ {
+ if ($name =~ /^(.*)\.enc$/)
+ {
+ my $canon = $1;
+ my $obj = find_encoding($canon);
+ if (!defined($obj))
+ {
+ my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__;
+ $obj->Define( $canon );
+ # warn "$canon => $obj\n";
+ }
+ }
+ }
+ closedir($dh);
+ }
+ }
+}
+
+sub import
+{
+ INC_search();
+}
+
+sub encode
+{
+ my $obj = shift;
+ my $new = $obj->loadEncoding;
+ return undef unless (defined $new);
+ return $new->encode(@_);
+}
+
+sub new_sequence
+{
+ my $obj = shift;
+ my $new = $obj->loadEncoding;
+ return undef unless (defined $new);
+ return $new->new_sequence(@_);
+}
+
+sub decode
+{
+ my $obj = shift;
+ my $new = $obj->loadEncoding;
+ return undef unless (defined $new);
+ return $new->decode(@_);
+}
+
+sub loadEncoding
+{
+ my $obj = shift;
+ my $file = $obj->{'File'};
+ my $name = $obj->name;
+ if (open(my $fh,$file))
+ {
+ my $type;
+ while (1)
+ {
+ my $line = <$fh>;
+ $type = substr($line,0,1);
+ last unless $type eq '#';
+ }
+ my $class = ref($obj).('::'.(($type eq 'E') ? 'Escape' : 'Table'));
+ carp "Loading $file";
+ bless $obj,$class;
+ return $obj if $obj->read($fh,$obj->name,$type);
+ }
+ else
+ {
+ croak("Cannot open $file for ".$obj->name);
+ }
+ $obj->Undefine($name);
+ return undef;
+}
+
+sub INC_find
+{
+ my ($class,$name) = @_;
+ my $enc;
+ foreach my $dir (@INC)
+ {
+ last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
+ }
+ return $enc;
+}
+
+package Encode::Tcl::Table;
+use base 'Encode::Encoding';
+
+use Data::Dumper;
+
+sub read
+{
+ my ($obj,$fh,$name,$type) = @_;
+ my $rep = $obj->can("rep_$type");
+ my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
+ my @touni;
+ my %fmuni;
+ my $count = 0;
+ $def = hex($def);
+ while ($pages--)
+ {
+ my $line = <$fh>;
+ chomp($line);
+ my $page = hex($line);
+ my @page;
+ my $ch = $page * 256;
+ for (my $i = 0; $i < 16; $i++)
+ {
+ my $line = <$fh>;
+ for (my $j = 0; $j < 16; $j++)
+ {
+ my $val = hex(substr($line,0,4,''));
+ if ($val || !$ch)
+ {
+ my $uch = chr($val);
+ push(@page,$uch);
+ $fmuni{$uch} = $ch;
+ $count++;
+ }
+ else
+ {
+ push(@page,undef);
+ }
+ $ch++;
+ }
+ }
+ $touni[$page] = \@page;
+ }
+ $obj->{'Rep'} = $rep;
+ $obj->{'ToUni'} = \@touni;
+ $obj->{'FmUni'} = \%fmuni;
+ $obj->{'Def'} = $def;
+ $obj->{'Num'} = $count;
+ return $obj;
+}
+
+sub rep_S { 'C' }
+
+sub rep_D { 'n' }
+
+sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
+
+sub representation
+{
+ my ($obj,$ch) = @_;
+ $ch = 0 unless @_ > 1;
+ $obj-{'Rep'}->($ch);
+}
+
+sub decode
+{
+ my ($obj,$str,$chk) = @_;
+ my $rep = $obj->{'Rep'};
+ my $touni = $obj->{'ToUni'};
+ my $uni = '';
+ while (length($str))
+ {
+ my $ch = ord(substr($str,0,1,''));
+ my $x;
+ if (&$rep($ch) eq 'C')
+ {
+ $x = $touni->[0][$ch];
+ }
+ else
+ {
+ $x = $touni->[$ch][ord(substr($str,0,1,''))];
+ }
+ unless (defined $x)
+ {
+ last if $chk;
+ # What do we do here ?
+ $x = '';
+ }
+ $uni .= $x;
+ }
+ $_[1] = $str if $chk;
+ return $uni;
+}
+
+
+sub encode
+{
+ my ($obj,$uni,$chk) = @_;
+ my $fmuni = $obj->{'FmUni'};
+ my $str = '';
+ my $def = $obj->{'Def'};
+ my $rep = $obj->{'Rep'};
+ while (length($uni))
+ {
+ my $ch = substr($uni,0,1,'');
+ my $x = $fmuni->{chr(ord($ch))};
+ unless (defined $x)
+ {
+ last if ($chk);
+ $x = $def;
+ }
+ $str .= pack(&$rep($x),$x);
+ }
+ $_[1] = $uni if $chk;
+ return $str;
+}
+
+package Encode::Tcl::Escape;
+use base 'Encode::Encoding';
+
+use Carp;
+
+sub read
+{
+ my ($class,$fh,$name) = @_;
+ my %self = (Name => $name, Num => 0);
+ while (<$fh>)
+ {
+ my ($key,$val) = /^(\S+)\s+(.*)$/;
+ $val =~ s/^\{(.*?)\}/$1/g;
+ $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
+ $self{$key} = $val;
+ }
+ return bless \%self,$class;
+}
+
+sub decode
+{
+ croak("Not implemented yet");
+}
+
+sub encode
+{
+ croak("Not implemented yet");
+}
+
+1;
+__END__
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n ascii -o Encode/ascii.ucm Encode/ascii.enc
-<code_set_name> "ascii"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n US-ascii -o Encode/ascii.ucm Encode/ascii.enc
+<code_set_name> "US-ascii"
+<code_set_alias> "ascii"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n cp1250 -o Encode/cp1250.ucm Encode/cp1250.enc
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n cp1250 -o Encode/cp1250.ucm Encode/cp1250.enc
<code_set_name> "cp1250"
<mb_cur_min> 1
<mb_cur_max> 1
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-1 -o Encode/iso8859-1.ucm Encode/iso8859-1.enc
-<code_set_name> "iso8859-1"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-1 -o Encode/iso8859-1.ucm Encode/iso8859-1.enc
+<code_set_name> "iso-8859-1"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-10 -o Encode/iso8859-10.ucm Encode/iso8859-10.enc
-<code_set_name> "iso8859-10"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-10 -o Encode/iso8859-10.ucm Encode/iso8859-10.enc
+<code_set_name> "iso-8859-10"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-13 -o Encode/iso8859-13.ucm Encode/iso8859-13.enc
-<code_set_name> "iso8859-13"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-13 -o Encode/iso8859-13.ucm Encode/iso8859-13.enc
+<code_set_name> "iso-8859-13"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-14 -o Encode/iso8859-14.ucm Encode/iso8859-14.enc
-<code_set_name> "iso8859-14"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-14 -o Encode/iso8859-14.ucm Encode/iso8859-14.enc
+<code_set_name> "iso-8859-14"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-15 -o Encode/iso8859-15.ucm Encode/iso8859-15.enc
-<code_set_name> "iso8859-15"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-15 -o Encode/iso8859-15.ucm Encode/iso8859-15.enc
+<code_set_name> "iso-8859-15"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-16 -o Encode/iso8859-16.ucm Encode/iso8859-16.enc
-<code_set_name> "iso8859-16"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-16 -o Encode/iso8859-16.ucm Encode/iso8859-16.enc
+<code_set_name> "iso-8859-16"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-2 -o Encode/iso8859-2.ucm Encode/iso8859-2.enc
-<code_set_name> "iso8859-2"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-2 -o Encode/iso8859-2.ucm Encode/iso8859-2.enc
+<code_set_name> "iso-8859-2"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-3 -o Encode/iso8859-3.ucm Encode/iso8859-3.enc
-<code_set_name> "iso8859-3"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-3 -o Encode/iso8859-3.ucm Encode/iso8859-3.enc
+<code_set_name> "iso-8859-3"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-4 -o Encode/iso8859-4.ucm Encode/iso8859-4.enc
-<code_set_name> "iso8859-4"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-4 -o Encode/iso8859-4.ucm Encode/iso8859-4.enc
+<code_set_name> "iso-8859-4"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-5 -o Encode/iso8859-5.ucm Encode/iso8859-5.enc
-<code_set_name> "iso8859-5"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-5 -o Encode/iso8859-5.ucm Encode/iso8859-5.enc
+<code_set_name> "iso-8859-5"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-6 -o Encode/iso8859-6.ucm Encode/iso8859-6.enc
-<code_set_name> "iso8859-6"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-6 -o Encode/iso8859-6.ucm Encode/iso8859-6.enc
+<code_set_name> "iso-8859-6"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-7 -o Encode/iso8859-7.ucm Encode/iso8859-7.enc
-<code_set_name> "iso8859-7"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-7 -o Encode/iso8859-7.ucm Encode/iso8859-7.enc
+<code_set_name> "iso-8859-7"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-8 -o Encode/iso8859-8.ucm Encode/iso8859-8.enc
-<code_set_name> "iso8859-8"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-8 -o Encode/iso8859-8.ucm Encode/iso8859-8.enc
+<code_set_name> "iso-8859-8"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-9 -o Encode/iso8859-9.ucm Encode/iso8859-9.enc
-<code_set_name> "iso8859-9"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-9 -o Encode/iso8859-9.ucm Encode/iso8859-9.enc
+<code_set_name> "iso-8859-9"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
foreach my $enc (sort cmp_name @encfiles)
{
my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/;
- $name = delete $opt{'n'} if exists $opt{'n'};
+ $name = $opt{'n'} if exists $opt{'n'};
if (open(E,$enc))
{
if ($sfx eq 'enc')
}
else
{
- # $name = lc($cs);
+ $name = $cs unless exists $opt{'n'};
}
my $erep;
my $urep;
}
}
use Test;
-use Encode qw(from_to encode decode encode_utf8 decode_utf8);
+use Encode qw(from_to encode decode encode_utf8 decode_utf8 find_encoding);
use charnames qw(greek);
-my @encodings = grep(/iso8859/,Encode::encodings());
+my @encodings = grep(/iso-?8859/,Encode::encodings());
my $n = 2;
my @character_set = ('0'..'9', 'A'..'Z', 'a'..'z');
my @source = qw(ascii iso8859-1 cp1250);
my @destiny = qw(cp1047 cp37 posix-bc);
my @ebcdic_sets = qw(cp1047 cp37 posix-bc);
-plan test => 33+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256;
+plan test => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256;
my $str = join('',map(chr($_),0x20..0x7E));
my $cpy = $str;
ok(length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong");
}
}
+my $mime = find_encoding('iso-8859-2');
+ok(defined($mime),1,"Cannot find MIME-ish'iso-8859-2'");
+my $x11 = find_encoding('iso8859-2');
+ok(defined($x11),1,"Cannot find X11-ish 'iso8859-2'");
+ok($mime,$x11,"iso8598-2 and iso-8859-2 not same");
+my $spc = find_encoding('iso 8859-2');
+ok(defined($spc),1,"Cannot find 'iso 8859-2'");
+ok($spc,$mime,"iso 8859-2 and iso-8859-2 not same");
+
for my $i (256,128,129,256)
{
my $c = chr($i);