From: Nick Ing-Simmons Date: Mon, 5 Mar 2001 14:51:50 +0000 (+0000) Subject: Encode implementation "completion" X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=51ef4e1196e74554150c2d1993b5a0e37f6709c9;p=p5sagit%2Fp5-mst-13.2.git Encode implementation "completion" Implement and document define_encoding() Implement and document encoding aliases including define_alias() Make Encode::XS use define_encoding() rather than back-door. Move run-time *.enc to separate Encode::Tcl module. Make 'compile' honour Change canonical names of to iso-8859-* and US-ascii. p4raw-id: //depot/perlio@9032 --- diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 72d6cc0..38c30ad 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,24 +1,27 @@ package Encode; +use strict; -$VERSION = 0.01; +our $VERSION = 0.02; require DynaLoader; require Exporter; -@ISA = qw(Exporter DynaLoader); +our @ISA = qw(Exporter DynaLoader); # Public, encouraged API is exported by default -@EXPORT = qw ( +our @EXPORT = qw ( encode decode encode_utf8 decode_utf8 find_encoding + encodings ); -@EXPORT_OK = +our @EXPORT_OK = qw( - encodings + define_encoding + define_alias from_to is_utf8 is_8bit @@ -35,71 +38,97 @@ bootstrap Encode (); use Carp; -# The global hash is declared in XS code -$encoding{Unicode} = bless({},'Encode::Unicode'); -$encoding{utf8} = bless({},'Encode::utf8'); -$encoding{'iso10646-1'} = bless({},'Encode::iso10646_1'); +# Make a %encoding package variable to allow a certain amount of cheating +our %encoding; +my @alias; # ordered matching list +my %alias; # cached known aliases sub encodings { my ($class) = @_; - foreach my $dir (@INC) + return keys %encoding; +} + +sub findAlias +{ + my $class = shift; + local $_ = shift; + unless (exists $alias{$_}) { - if (opendir(my $dh,"$dir/Encode")) + for (my $i=0; $i < @alias; $i += 2) { - while (defined(my $name = readdir($dh))) + my $alias = $alias[$i]; + my $val = $alias[$i+1]; + my $new; + if (ref($alias) eq 'Regexp' && $_ =~ $alias) { - if ($name =~ /^(.*)\.enc$/) + $new = eval $val; + } + elsif (ref($alias) eq 'CODE') + { + $new = &{$alias}($val) + } + elsif (lc($_) eq $alias) + { + $new = $val; + } + if (defined($new)) + { + next if $new eq $_; # avoid (direct) recursion on bugs + my $enc = (ref($new)) ? $new : find_encoding($new); + if ($enc) { - next if exists $encoding{$1}; - $encoding{$1} = "$dir/$name"; + $alias{$_} = $enc; + last; } } - closedir($dh); } } - return keys %encoding; + return $alias{$_}; } -sub loadEncoding +sub define_alias { - my ($class,$name,$file) = @_; - if (open(my $fh,$file)) + while (@_) { - my $type; - while (1) - { - my $line = <$fh>; - $type = substr($line,0,1); - last unless $type eq '#'; - } - $class .= ('::'.(($type eq 'E') ? 'Escape' : 'Table')); - #warn "Loading $file"; - return $class->read($fh,$name,$type); + my ($alias,$name) = splice(@_,0,2); + push(@alias, $alias => $name); } - else +} + +define_alias( qr/^iso(\d+-\d+)$/i => '"iso-$1"' ); +define_alias( qr/^(\S+)\s+(.*)$/i => '"$1-$2"' ); +#define_alias( sub { return /^iso-(\d+-\d+)$/i ? "iso$1" : '' } ); +define_alias( 'ascii' => 'US-ascii'); +define_alias( 'ibm-1047' => 'cp1047'); + +sub define_encoding +{ + my $obj = shift; + my $name = shift; + $encoding{$name} = $obj; + my $lc = lc($name); + define_alias($lc => $obj) unless $lc eq $name; + while (@_) { - return undef; + my $alias = shift; + define_alias($alias,$obj); } + return $obj; } sub getEncoding { my ($class,$name) = @_; my $enc; - unless (ref($enc = $encoding{$name})) + if (exists $encoding{$name}) { - $enc = $class->loadEncoding($name,$enc) if defined $enc; - unless (ref($enc)) - { - foreach my $dir (@INC) - { - last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc")); - } - } - $encoding{$name} = $enc; + return $encoding{$name}; + } + else + { + return $class->findAlias($name); } - return $enc; } sub find_encoding @@ -159,6 +188,17 @@ sub decode_utf8 package Encode::Encoding; # Base class for classes which implement encodings +sub Define +{ + my $obj = shift; + my $canonical = shift; + $obj = bless { Name => $canonical },$obj unless ref $obj; + # warn "$canonical => $obj\n"; + Encode::define_encoding($obj, $canonical, @_); +} + +sub name { shift->{'Name'} } + # Temporary legacy methods sub toUnicode { shift->decode(@_) } sub fromUnicode { shift->encode(@_) } @@ -174,7 +214,7 @@ use base 'Encode::Encoding'; # Dummy package that provides the encode interface but leaves data # as UTF-8 encoded. It is here so that from_to() works. -sub name { 'Unicode' } +__PACKAGE__->Define('Unicode'); sub decode { @@ -188,12 +228,11 @@ sub decode package Encode::utf8; use base 'Encode::Encoding'; - # package to allow long-hand # $octets = encode( utf8 => $string ); # -sub name { 'utf8' } +__PACKAGE__->Define(qw(UTF-8 utf8)); sub decode { @@ -215,131 +254,12 @@ sub encode return $octets; } -package Encode::Table; -use base 'Encode::Encoding'; - -sub read -{ - my ($class,$fh,$name,$type) = @_; - my $rep = $class->can("rep_$type"); - my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); - my @touni; - my %fmuni; - my $count = 0; - $def = hex($def); - while ($pages--) - { - my $line = <$fh>; - chomp($line); - my $page = hex($line); - my @page; - my $ch = $page * 256; - for (my $i = 0; $i < 16; $i++) - { - my $line = <$fh>; - for (my $j = 0; $j < 16; $j++) - { - my $val = hex(substr($line,0,4,'')); - if ($val || !$ch) - { - my $uch = chr($val); - push(@page,$uch); - $fmuni{$uch} = $ch; - $count++; - } - else - { - push(@page,undef); - } - $ch++; - } - } - $touni[$page] = \@page; - } - - return bless {Name => $name, - Rep => $rep, - ToUni => \@touni, - FmUni => \%fmuni, - Def => $def, - Num => $count, - },$class; -} - -sub name { shift->{'Name'} } - -sub rep_S { 'C' } - -sub rep_D { 'n' } - -sub rep_M { ($_[0] > 255) ? 'n' : 'C' } - -sub representation -{ - my ($obj,$ch) = @_; - $ch = 0 unless @_ > 1; - $obj-{'Rep'}->($ch); -} - -sub decode -{ - my ($obj,$str,$chk) = @_; - my $rep = $obj->{'Rep'}; - my $touni = $obj->{'ToUni'}; - my $uni = ''; - while (length($str)) - { - my $ch = ord(substr($str,0,1,'')); - my $x; - if (&$rep($ch) eq 'C') - { - $x = $touni->[0][$ch]; - } - else - { - $x = $touni->[$ch][ord(substr($str,0,1,''))]; - } - unless (defined $x) - { - last if $chk; - # What do we do here ? - $x = ''; - } - $uni .= $x; - } - $_[1] = $str if $chk; - return $uni; -} - -sub encode -{ - my ($obj,$uni,$chk) = @_; - my $fmuni = $obj->{'FmUni'}; - my $str = ''; - my $def = $obj->{'Def'}; - my $rep = $obj->{'Rep'}; - while (length($uni)) - { - my $ch = substr($uni,0,1,''); - my $x = $fmuni->{chr(ord($ch))}; - unless (defined $x) - { - last if ($chk); - $x = $def; - } - $str .= pack(&$rep($x),$x); - } - $_[1] = $uni if $chk; - return $str; -} - package Encode::iso10646_1; use base 'Encode::Encoding'; - -# Encoding is 16-bit network order Unicode +# Encoding is 16-bit network order Unicode (no surogates) # Used for X font encodings -sub name { 'iso10646-1' } +__PACKAGE__->Define(qw(UCS-2 iso10646-1)); sub decode { @@ -374,38 +294,6 @@ sub encode return $str; } - -package Encode::Escape; -use base 'Encode::Encoding'; - -use Carp; - -sub read -{ - my ($class,$fh,$name) = @_; - my %self = (Name => $name, Num => 0); - while (<$fh>) - { - my ($key,$val) = /^(\S+)\s+(.*)$/; - $val =~ s/^\{(.*?)\}/$1/g; - $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; - $self{$key} = $val; - } - return bless \%self,$class; -} - -sub name { shift->{'Name'} } - -sub decode -{ - croak("Not implemented yet"); -} - -sub encode -{ - croak("Not implemented yet"); -} - # switch back to Encode package in case we ever add AutoLoader package Encode; @@ -564,8 +452,6 @@ Because of all the alias issues, and because in the general case encodings have state C uses the encoding object internally once an operation is in progress. -I - =head1 PERL ENCODING API =head2 Generic Encoding Interface @@ -686,7 +572,7 @@ UTF-16 is similar to UCS-2, 16 bit or 2-byte chunks. UCS-2 can only represent 0..0xFFFF, while UTF-16 has a "surogate pair" scheme which allows it to cover the whole Unicode range. -Encode implements big-endian UCS-2 as the encoding "iso10646-1" as that +Encode implements big-endian UCS-2 aliased to "iso10646-1" as that happens to be the name used by that representation when used with X11 fonts. UTF-32 or UCS-4 is 32-bit or 4-byte chunks. Perl's logical characters @@ -701,11 +587,62 @@ to transfer strings in this form (e.g. to write them to a file) would need to depending on the endian required. -No UTF-32 encodings are not yet implemented. +No UTF-32 encodings are implemented yet. Both UCS-2 and UCS-4 style encodings can have "byte order marks" by representing the code point 0xFFFE as the very first thing in a file. +=head2 Listing available encodings + + use Encode qw(encodings); + @list = encodings(); + +Returns a list of the canonical names of the available encodings. + +=head2 Defining Aliases + + use Encode qw(define_alias); + define_alias( newName => ENCODING); + +Allows newName to be used as am alias for ENCODING. ENCODING may be either the +name of an encoding or and encoding object (as above). + +Currently I can be specified in the following ways: + +=over 4 + +=item As a simple string. + +=item As a qr// compiled regular expression, e.g.: + + define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' ); + +In this case if I is not a reference it is C-ed to allow +C<$1> etc. to be subsituted. +The example is one way to names as used in X11 font names to alias the MIME names for the +iso-8859-* family. + +=item As a code reference, e.g.: + + define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , ''); + +In this case C<$_> will be set to the name that is being looked up and +I is passed to the sub as its first argument. +The example is another way to names as used in X11 font names to alias the MIME names for +the iso-8859-* family. + +=back + +=head2 Defining Encodings + + use Encode qw(define_alias); + define_encoding( $object, 'canonicalName' [,alias...]); + +Causes I to be associated with I<$object>. +The object should provide the interface described in L below. +If more than two arguments are provided then additional arguments are taken +as aliases for I<$object> as for C. + =head1 Encoding and IO It is very common to want to do encoding transformations when @@ -714,7 +651,7 @@ If perl is configured to use the new 'perlio' IO system then C provides a "layer" (See L) which can transform data as it is read or written. - open(my $ilyad,'>:encoding(iso8859-7)','ilyad.greek'); + open(my $ilyad,'>:encoding(iso-8859-7)','ilyad.greek'); print $ilyad @epic; In addition the new IO system can also be configured to read/write @@ -816,8 +753,7 @@ not a string. As mentioned above encodings are (in the current implementation at least) defined by objects. The mapping of encoding name to object is via the -C<%Encode::encodings> hash. (It is a package hash to allow XS code to get -at it.) +C<%encodings> hash. The values of the hash can currently be either strings or objects. The string form may go away in the future. The string form occurs @@ -883,7 +819,16 @@ and additional parameter. It is also highly desirable that encoding classes inherit from C as a base class. This allows that class to define additional behaviour for -all encoding objects. +all encoding objects. For example built in Unicode, UCS-2 and UTF-8 classes +use : + + package Encode::MyEncoding; + use base qw(Encode::Encoding); + + __PACKAGE__->Define(qw(myCanonical myAlias)); + +To create an object with bless {Name => ...},$class, and call define_encoding. +They inherit their C method from C. =head2 Compiled Encodings diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 40c3dc7..584849a 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -60,13 +60,12 @@ PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len) ENTER; SAVETMPS; PUSHMARK(sp); - XPUSHs(sv_2mortal(newSVpv("Encode",0))); XPUSHs(sv_2mortal(newSVpvn(arg,len))); PUTBACK; - if (perl_call_method("getEncoding",G_SCALAR) != 1) + if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1) { /* should never happen */ - Perl_die(aTHX_ "Encode::getEncoding did not return a value"); + Perl_die(aTHX_ "Encode::find_encoding did not return a value"); return -1; } SPAGAIN; @@ -330,15 +329,19 @@ PerlIO_funcs PerlIO_encode = { void Encode_Define(pTHX_ encode_t *enc) { - HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI); + dSP; HV *stash = gv_stashpv("Encode::XS", TRUE); SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); int i = 0; + PUSHMARK(sp); + XPUSHs(sv); while (enc->name[i]) { const char *name = enc->name[i++]; - hv_store(hash,name,strlen(name),SvREFCNT_inc(sv),0); + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); SvREFCNT_dec(sv); } diff --git a/ext/Encode/Encode/Tcl.pm b/ext/Encode/Encode/Tcl.pm new file mode 100644 index 0000000..8c956ff --- /dev/null +++ b/ext/Encode/Encode/Tcl.pm @@ -0,0 +1,247 @@ +package Encode::Tcl; +use strict; +use Encode qw(find_encoding); +use base 'Encode::Encoding'; +use Carp; + + +sub INC_search +{ + foreach my $dir (@INC) + { + if (opendir(my $dh,"$dir/Encode")) + { + while (defined(my $name = readdir($dh))) + { + if ($name =~ /^(.*)\.enc$/) + { + my $canon = $1; + my $obj = find_encoding($canon); + if (!defined($obj)) + { + my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__; + $obj->Define( $canon ); + # warn "$canon => $obj\n"; + } + } + } + closedir($dh); + } + } +} + +sub import +{ + INC_search(); +} + +sub encode +{ + my $obj = shift; + my $new = $obj->loadEncoding; + return undef unless (defined $new); + return $new->encode(@_); +} + +sub new_sequence +{ + my $obj = shift; + my $new = $obj->loadEncoding; + return undef unless (defined $new); + return $new->new_sequence(@_); +} + +sub decode +{ + my $obj = shift; + my $new = $obj->loadEncoding; + return undef unless (defined $new); + return $new->decode(@_); +} + +sub loadEncoding +{ + my $obj = shift; + my $file = $obj->{'File'}; + my $name = $obj->name; + if (open(my $fh,$file)) + { + my $type; + while (1) + { + my $line = <$fh>; + $type = substr($line,0,1); + last unless $type eq '#'; + } + my $class = ref($obj).('::'.(($type eq 'E') ? 'Escape' : 'Table')); + carp "Loading $file"; + bless $obj,$class; + return $obj if $obj->read($fh,$obj->name,$type); + } + else + { + croak("Cannot open $file for ".$obj->name); + } + $obj->Undefine($name); + return undef; +} + +sub INC_find +{ + my ($class,$name) = @_; + my $enc; + foreach my $dir (@INC) + { + last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc")); + } + return $enc; +} + +package Encode::Tcl::Table; +use base 'Encode::Encoding'; + +use Data::Dumper; + +sub read +{ + my ($obj,$fh,$name,$type) = @_; + my $rep = $obj->can("rep_$type"); + my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); + my @touni; + my %fmuni; + my $count = 0; + $def = hex($def); + while ($pages--) + { + my $line = <$fh>; + chomp($line); + my $page = hex($line); + my @page; + my $ch = $page * 256; + for (my $i = 0; $i < 16; $i++) + { + my $line = <$fh>; + for (my $j = 0; $j < 16; $j++) + { + my $val = hex(substr($line,0,4,'')); + if ($val || !$ch) + { + my $uch = chr($val); + push(@page,$uch); + $fmuni{$uch} = $ch; + $count++; + } + else + { + push(@page,undef); + } + $ch++; + } + } + $touni[$page] = \@page; + } + $obj->{'Rep'} = $rep; + $obj->{'ToUni'} = \@touni; + $obj->{'FmUni'} = \%fmuni; + $obj->{'Def'} = $def; + $obj->{'Num'} = $count; + return $obj; +} + +sub rep_S { 'C' } + +sub rep_D { 'n' } + +sub rep_M { ($_[0] > 255) ? 'n' : 'C' } + +sub representation +{ + my ($obj,$ch) = @_; + $ch = 0 unless @_ > 1; + $obj-{'Rep'}->($ch); +} + +sub decode +{ + my ($obj,$str,$chk) = @_; + my $rep = $obj->{'Rep'}; + my $touni = $obj->{'ToUni'}; + my $uni = ''; + while (length($str)) + { + my $ch = ord(substr($str,0,1,'')); + my $x; + if (&$rep($ch) eq 'C') + { + $x = $touni->[0][$ch]; + } + else + { + $x = $touni->[$ch][ord(substr($str,0,1,''))]; + } + unless (defined $x) + { + last if $chk; + # What do we do here ? + $x = ''; + } + $uni .= $x; + } + $_[1] = $str if $chk; + return $uni; +} + + +sub encode +{ + my ($obj,$uni,$chk) = @_; + my $fmuni = $obj->{'FmUni'}; + my $str = ''; + my $def = $obj->{'Def'}; + my $rep = $obj->{'Rep'}; + while (length($uni)) + { + my $ch = substr($uni,0,1,''); + my $x = $fmuni->{chr(ord($ch))}; + unless (defined $x) + { + last if ($chk); + $x = $def; + } + $str .= pack(&$rep($x),$x); + } + $_[1] = $uni if $chk; + return $str; +} + +package Encode::Tcl::Escape; +use base 'Encode::Encoding'; + +use Carp; + +sub read +{ + my ($class,$fh,$name) = @_; + my %self = (Name => $name, Num => 0); + while (<$fh>) + { + my ($key,$val) = /^(\S+)\s+(.*)$/; + $val =~ s/^\{(.*?)\}/$1/g; + $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; + $self{$key} = $val; + } + return bless \%self,$class; +} + +sub decode +{ + croak("Not implemented yet"); +} + +sub encode +{ + croak("Not implemented yet"); +} + +1; +__END__ diff --git a/ext/Encode/Encode/ascii.ucm b/ext/Encode/Encode/ascii.ucm index 71e2dd1..344423e 100644 --- a/ext/Encode/Encode/ascii.ucm +++ b/ext/Encode/Encode/ascii.ucm @@ -1,6 +1,7 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n ascii -o Encode/ascii.ucm Encode/ascii.enc - "ascii" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n US-ascii -o Encode/ascii.ucm Encode/ascii.enc + "US-ascii" + "ascii" 1 1 \x3F diff --git a/ext/Encode/Encode/cp1250.ucm b/ext/Encode/Encode/cp1250.ucm index 6acc1d1..bc3cedc 100644 --- a/ext/Encode/Encode/cp1250.ucm +++ b/ext/Encode/Encode/cp1250.ucm @@ -1,5 +1,5 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n cp1250 -o Encode/cp1250.ucm Encode/cp1250.enc +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n cp1250 -o Encode/cp1250.ucm Encode/cp1250.enc "cp1250" 1 1 diff --git a/ext/Encode/Encode/iso8859-1.ucm b/ext/Encode/Encode/iso8859-1.ucm index 1366f60..6f139fb 100644 --- a/ext/Encode/Encode/iso8859-1.ucm +++ b/ext/Encode/Encode/iso8859-1.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-1 -o Encode/iso8859-1.ucm Encode/iso8859-1.enc - "iso8859-1" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-1 -o Encode/iso8859-1.ucm Encode/iso8859-1.enc + "iso-8859-1" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-10.ucm b/ext/Encode/Encode/iso8859-10.ucm index a326352..2bcc2b0 100644 --- a/ext/Encode/Encode/iso8859-10.ucm +++ b/ext/Encode/Encode/iso8859-10.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-10 -o Encode/iso8859-10.ucm Encode/iso8859-10.enc - "iso8859-10" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-10 -o Encode/iso8859-10.ucm Encode/iso8859-10.enc + "iso-8859-10" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-13.ucm b/ext/Encode/Encode/iso8859-13.ucm index 435c492..ff3e75c 100644 --- a/ext/Encode/Encode/iso8859-13.ucm +++ b/ext/Encode/Encode/iso8859-13.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-13 -o Encode/iso8859-13.ucm Encode/iso8859-13.enc - "iso8859-13" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-13 -o Encode/iso8859-13.ucm Encode/iso8859-13.enc + "iso-8859-13" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-14.ucm b/ext/Encode/Encode/iso8859-14.ucm index c069f83..76a2bba 100644 --- a/ext/Encode/Encode/iso8859-14.ucm +++ b/ext/Encode/Encode/iso8859-14.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-14 -o Encode/iso8859-14.ucm Encode/iso8859-14.enc - "iso8859-14" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-14 -o Encode/iso8859-14.ucm Encode/iso8859-14.enc + "iso-8859-14" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-15.ucm b/ext/Encode/Encode/iso8859-15.ucm index da41e2d..40538ac 100644 --- a/ext/Encode/Encode/iso8859-15.ucm +++ b/ext/Encode/Encode/iso8859-15.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-15 -o Encode/iso8859-15.ucm Encode/iso8859-15.enc - "iso8859-15" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-15 -o Encode/iso8859-15.ucm Encode/iso8859-15.enc + "iso-8859-15" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-16.ucm b/ext/Encode/Encode/iso8859-16.ucm index b49d975..2ff7cb8 100644 --- a/ext/Encode/Encode/iso8859-16.ucm +++ b/ext/Encode/Encode/iso8859-16.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-16 -o Encode/iso8859-16.ucm Encode/iso8859-16.enc - "iso8859-16" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-16 -o Encode/iso8859-16.ucm Encode/iso8859-16.enc + "iso-8859-16" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-2.ucm b/ext/Encode/Encode/iso8859-2.ucm index c93deb2..b55c8dc 100644 --- a/ext/Encode/Encode/iso8859-2.ucm +++ b/ext/Encode/Encode/iso8859-2.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-2 -o Encode/iso8859-2.ucm Encode/iso8859-2.enc - "iso8859-2" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-2 -o Encode/iso8859-2.ucm Encode/iso8859-2.enc + "iso-8859-2" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-3.ucm b/ext/Encode/Encode/iso8859-3.ucm index 31fa1d6..ec68ed1 100644 --- a/ext/Encode/Encode/iso8859-3.ucm +++ b/ext/Encode/Encode/iso8859-3.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-3 -o Encode/iso8859-3.ucm Encode/iso8859-3.enc - "iso8859-3" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-3 -o Encode/iso8859-3.ucm Encode/iso8859-3.enc + "iso-8859-3" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-4.ucm b/ext/Encode/Encode/iso8859-4.ucm index eb9e6fa..3d43082 100644 --- a/ext/Encode/Encode/iso8859-4.ucm +++ b/ext/Encode/Encode/iso8859-4.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-4 -o Encode/iso8859-4.ucm Encode/iso8859-4.enc - "iso8859-4" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-4 -o Encode/iso8859-4.ucm Encode/iso8859-4.enc + "iso-8859-4" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-5.ucm b/ext/Encode/Encode/iso8859-5.ucm index 67daf56..86235a8 100644 --- a/ext/Encode/Encode/iso8859-5.ucm +++ b/ext/Encode/Encode/iso8859-5.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-5 -o Encode/iso8859-5.ucm Encode/iso8859-5.enc - "iso8859-5" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-5 -o Encode/iso8859-5.ucm Encode/iso8859-5.enc + "iso-8859-5" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-6.ucm b/ext/Encode/Encode/iso8859-6.ucm index e0d5c93..fbeb228 100644 --- a/ext/Encode/Encode/iso8859-6.ucm +++ b/ext/Encode/Encode/iso8859-6.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-6 -o Encode/iso8859-6.ucm Encode/iso8859-6.enc - "iso8859-6" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-6 -o Encode/iso8859-6.ucm Encode/iso8859-6.enc + "iso-8859-6" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-7.ucm b/ext/Encode/Encode/iso8859-7.ucm index 6a4cb63..ba405db 100644 --- a/ext/Encode/Encode/iso8859-7.ucm +++ b/ext/Encode/Encode/iso8859-7.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-7 -o Encode/iso8859-7.ucm Encode/iso8859-7.enc - "iso8859-7" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-7 -o Encode/iso8859-7.ucm Encode/iso8859-7.enc + "iso-8859-7" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-8.ucm b/ext/Encode/Encode/iso8859-8.ucm index 0f7146f..574abfd 100644 --- a/ext/Encode/Encode/iso8859-8.ucm +++ b/ext/Encode/Encode/iso8859-8.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-8 -o Encode/iso8859-8.ucm Encode/iso8859-8.enc - "iso8859-8" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-8 -o Encode/iso8859-8.ucm Encode/iso8859-8.enc + "iso-8859-8" 1 1 \x3F diff --git a/ext/Encode/Encode/iso8859-9.ucm b/ext/Encode/Encode/iso8859-9.ucm index f1a308f..24d7d4b 100644 --- a/ext/Encode/Encode/iso8859-9.ucm +++ b/ext/Encode/Encode/iso8859-9.ucm @@ -1,6 +1,6 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n iso8859-9 -o Encode/iso8859-9.ucm Encode/iso8859-9.enc - "iso8859-9" +# Written $Id: //depot/perlio/ext/Encode/compile#15 $ +# ./compile -n iso-8859-9 -o Encode/iso8859-9.ucm Encode/iso8859-9.enc + "iso-8859-9" 1 1 \x3F diff --git a/ext/Encode/compile b/ext/Encode/compile index 5e3e645..8201043 100755 --- a/ext/Encode/compile +++ b/ext/Encode/compile @@ -143,7 +143,7 @@ sub cmp_name foreach my $enc (sort cmp_name @encfiles) { my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/; - $name = delete $opt{'n'} if exists $opt{'n'}; + $name = $opt{'n'} if exists $opt{'n'}; if (open(E,$enc)) { if ($sfx eq 'enc') @@ -241,7 +241,7 @@ sub compile_ucm } else { - # $name = lc($cs); + $name = $cs unless exists $opt{'n'}; } my $erep; my $urep; diff --git a/t/lib/encode.t b/t/lib/encode.t index da4d1b8..af1f34b 100644 --- a/t/lib/encode.t +++ b/t/lib/encode.t @@ -8,15 +8,15 @@ BEGIN { } } use Test; -use Encode qw(from_to encode decode encode_utf8 decode_utf8); +use Encode qw(from_to encode decode encode_utf8 decode_utf8 find_encoding); use charnames qw(greek); -my @encodings = grep(/iso8859/,Encode::encodings()); +my @encodings = grep(/iso-?8859/,Encode::encodings()); my $n = 2; my @character_set = ('0'..'9', 'A'..'Z', 'a'..'z'); my @source = qw(ascii iso8859-1 cp1250); my @destiny = qw(cp1047 cp37 posix-bc); my @ebcdic_sets = qw(cp1047 cp37 posix-bc); -plan test => 33+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256; +plan test => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256; my $str = join('',map(chr($_),0x20..0x7E)); my $cpy = $str; ok(length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong"); @@ -91,6 +91,15 @@ foreach my $enc_eb (@ebcdic_sets) } } +my $mime = find_encoding('iso-8859-2'); +ok(defined($mime),1,"Cannot find MIME-ish'iso-8859-2'"); +my $x11 = find_encoding('iso8859-2'); +ok(defined($x11),1,"Cannot find X11-ish 'iso8859-2'"); +ok($mime,$x11,"iso8598-2 and iso-8859-2 not same"); +my $spc = find_encoding('iso 8859-2'); +ok(defined($spc),1,"Cannot find 'iso 8859-2'"); +ok($spc,$mime,"iso 8859-2 and iso-8859-2 not same"); + for my $i (256,128,129,256) { my $c = chr($i);