From: Nick Ing-Simmons Date: Mon, 2 Oct 2000 18:23:14 +0000 (+0000) Subject: A few tweaks to get Tk803 to work with Encode scheme. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=87714904135dbd2ae4657dbe20a531654286994e;p=p5sagit%2Fp5-mst-13.2.git A few tweaks to get Tk803 to work with Encode scheme. p4raw-id: //depot/perl@7107 --- diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 220520a..abcbf36 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -360,7 +360,9 @@ sub encodings return @names; } -my %encoding = ( Unicode => 'Encode::Unicode' ); +my %encoding = ( Unicode => bless({},'Encode::Unicode'), + 'iso10646-1' => bless({},'Encode::iso10646_1'), + ); sub getEncoding { @@ -384,6 +386,10 @@ sub getEncoding $class .= ('::'.(($type eq 'E') ? 'Escape' : 'Table')); $encoding{$name} = $class->read($fh,$name,$type); } + else + { + $encoding{$name} = undef; + } } return $encoding{$name}; } @@ -409,10 +415,11 @@ sub read my %fmuni; my $count = 0; $def = hex($def); - $def = pack(&$rep($def),$def); while ($pages--) { - my $page = hex(<$fh>); + my $line = <$fh>; + chomp($line); + my $page = hex($line); my @page; my $ch = $page * 256; for (my $i = 0; $i < 16; $i++) @@ -425,7 +432,7 @@ sub read { my $uch = chr($val); push(@page,$uch); - $fmuni{$uch} = pack(&$rep($ch),$ch); + $fmuni{$uch} = $ch; $count++; } else @@ -498,6 +505,7 @@ sub fromUnicode my $fmuni = $obj->{'FmUni'}; my $str = ''; my $def = $obj->{'Def'}; + my $rep = $obj->{'Rep'}; while (length($uni)) { my $ch = substr($uni,0,1,''); @@ -507,7 +515,43 @@ sub fromUnicode last if ($chk); $x = $def; } - $str .= $x; + $str .= pack(&$rep($x),$x); + } + $_[1] = $uni if $chk; + return $str; +} + +package Encode::iso10646_1;# + +sub name { 'iso10646-1' } + +sub toUnicode +{ + my ($obj,$str,$chk) = @_; + my $uni = ''; + while (length($str)) + { + my $code = unpack('S',substr($str,0,2,'')); + $uni .= chr($code); + } + $_[1] = $str if $chk; + return $uni; +} + +sub fromUnicode +{ + my ($obj,$uni,$chk) = @_; + my $str = ''; + while (length($uni)) + { + my $ch = substr($uni,0,1,''); + my $x = ord($ch); + unless ($x < 32768) + { + last if ($chk); + $x = 0; + } + $str .= pack('S',$x); } $_[1] = $uni if $chk; return $str; diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index b4d256f..c231bba 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -5,7 +5,7 @@ #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ Perl_croak(aTHX_ "panic_unimplemented"); \ return (y)0; /* fool picky compilers */ \ - } + } UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) @@ -46,7 +46,7 @@ _utf8_to_bytes(sv, ...) { SV * to = items > 1 ? ST(1) : Nullsv; SV * check = items > 2 ? ST(2) : Nullsv; - + if (to) RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); else { @@ -56,7 +56,7 @@ _utf8_to_bytes(sv, ...) if (SvTRUE(check)) { /* Must do things the slow way */ U8 *dest; - U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ + U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ U8 *send = s + len; New(83, dest, len, U8); /* I think */ @@ -67,7 +67,7 @@ _utf8_to_bytes(sv, ...) else { STRLEN ulen; UV uv = *s++; - + /* Have to do it all ourselves because of error routine, aargh. */ if (!(uv & 0x40)) @@ -79,15 +79,15 @@ _utf8_to_bytes(sv, ...) else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } else if (!(uv & 0x01)) { ulen = 7; uv = 0; } else { ulen = 13; uv = 0; } - + /* Note change to utf8.c variable naming, for variety */ while (ulen--) { if ((*s & 0xc0) != 0x80) goto failure; - + else uv = (uv << 6) | (*s++ & 0x3f); - } + } if (uv > 256) { failure: call_failure(check, s, dest, src); @@ -200,8 +200,7 @@ _on_utf8(sv) CODE: { if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - sv_2mortal(rsv); + SV *rsv = newSViv(SvUTF8(sv)); RETVAL = rsv; SvUTF8_on(sv); } else { @@ -217,8 +216,7 @@ _off_utf8(sv) CODE: { if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - sv_2mortal(rsv); + SV *rsv = newSViv(SvUTF8(sv)); RETVAL = rsv; SvUTF8_off(sv); } else { diff --git a/t/lib/encode.t b/t/lib/encode.t index 4610181..08e9bac 100644 --- a/t/lib/encode.t +++ b/t/lib/encode.t @@ -17,7 +17,7 @@ ok(from_to($cpy,'Unicode','iso8859-1'),length($str),"Length wrong"); ok($cpy,$str,"ASCII mangled by translating from Unicode to iso8859-1"); $str = join('',map(chr($_),0xa0..0xff)); -my $cpy = $str; +$cpy = $str; ok(length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong"); my $sym = Encode->getEncoding('symbol');