From: Dan Kogai Date: Sat, 24 May 2003 00:15:49 +0000 (+0900) Subject: Re: Stateful PerlIO implemented [Was: [perl #22261] Was: Unrecognised BOM...] X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=47dd3999631d2fd5030069ca2ee4380b5f022d88;p=p5sagit%2Fp5-mst-13.2.git Re: Stateful PerlIO implemented [Was: [perl #22261] Was: Unrecognised BOM...] Message-Id: <6F7B29DA-8D31-11D7-9F95-000393AE4244@dan.co.jp> p4raw-id: //depot/perl@19595 --- diff --git a/ext/Encode/Changes b/ext/Encode/Changes index ded94e1..f729d18 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -3,6 +3,19 @@ # $Id: Changes,v 1.95 2003/05/21 08:41:11 dankogai Exp $ # $Revision: 1.95 $ $Date: 2003/05/21 08:41:11 $ +! encoding.pm + Addressed [cpan #2629] Wrong assumption in numeric comparison + Message-Id: +! Encode.pm Encode.xs Unicode/Unicode.pm Unicode/Unicode.xs + lib/Encode/Encoding.pm t/perlio.t + ! API Change: ->new_sequence() => ->renew() + + Encode::Unicode makes use of it so it can handle BOM on PerlIO + + Encode::XS and Encode::utf8 now supports ->renew() + + Encode::Encoding now documents this with examples + - Non-XS (en|de)code stripped out of Encode::Unicode + Message-Id: <146957DB-8C39-11D7-9C91-000393AE4244@dan.co.jp> + +1.95 2003/05/21 08:41:11 ! ucm/8859-*.ucm Since bogus entries were found in iso-8859-6, all entries are re-generated once again out of diff --git a/ext/Encode/Unicode/Unicode.xs b/ext/Encode/Unicode/Unicode.xs index 8b02402..cb27bb3 100644 --- a/ext/Encode/Unicode/Unicode.xs +++ b/ext/Encode/Unicode/Unicode.xs @@ -171,8 +171,11 @@ CODE: SvCUR_set(result,d - (U8 *)SvPVX(result)); } if (s < e) { + /* unlikely to happen because it's fixed-length -- dankogai */ + if (check & ENCODE_WARN_ON_ERR){ Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character", *hv_fetch((HV *)SvRV(obj),"Name",4,0)); + } } if (check && !(check & ENCODE_LEAVE_SRC)){ if (s < e) { @@ -242,8 +245,16 @@ CODE: } } if (s < e) { - Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character", - *hv_fetch((HV *)SvRV(obj),"Name",4,0)); + /* UTF-8 partial char happens often on PerlIO. + Since this is okay and normal, we do not warn. + But this is critical when you choose to LEAVE_SRC + in which case we die */ + if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)){ + Perl_croak(aTHX_ "%"SVf":partial character is not allowed " + "when CHECK = 0x%" UVuf, + *hv_fetch((HV *)SvRV(obj),"Name",4,0), check); + } + } if (check && !(check & ENCODE_LEAVE_SRC)){ if (s < e) { @@ -254,7 +265,7 @@ CODE: SvCUR_set(utf8,0); } *SvEND(utf8) = '\0'; - } + } XSRETURN(1); } diff --git a/ext/Encode/t/perlio.t b/ext/Encode/t/perlio.t index 9966ef8..ed16796 100644 --- a/ext/Encode/t/perlio.t +++ b/ext/Encode/t/perlio.t @@ -27,7 +27,7 @@ use File::Copy; use FileHandle; #use Test::More qw(no_plan); -use Test::More tests => 28; +use Test::More tests => 38; our $DEBUG = 0; @@ -40,23 +40,19 @@ use Encode (":all"); #$Encode::JP::JIS7::DEBUG = $DEBUG; } - - my $seq = 0; my $dir = dirname(__FILE__); my %e = ( jisx0208 => [ qw/euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1/], - #ksc5601 => [ qw/euc-kr iso-2022-kr/], ksc5601 => [ qw/euc-kr/], - #gb2312 => [ qw/euc-cn hz/], - gb2312 => [ qw/euc-cn/], + gb2312 => [ qw/euc-cn hz/], ); $/ = "\x0a"; # may fix VMS problem for test #28 and #29 -for my $src(sort keys %e) { +for my $src (sort keys %e) { my $ufile = File::Spec->catfile($dir,"$src.utf"); open my $fh, "<:utf8", $ufile or die "$ufile : $!"; my @uline = <$fh>; @@ -72,9 +68,8 @@ for my $src(sort keys %e) { # then create a file via perlio without autoflush - TODO:{ - #local $TODO = "$e: !perlio_ok" unless (perlio_ok($e) or $DEBUG); - todo_skip "$e: !perlio_ok", 4 unless (perlio_ok($e) or $DEBUG); + SKIP:{ + skip "$e: !perlio_ok", 4 unless (perlio_ok($e) or $DEBUG); no warnings 'uninitialized'; open $fh, ">:encoding($e)", $pfile or die "$sfile : $!"; $fh->autoflush(0); @@ -130,8 +125,53 @@ for my $src(sort keys %e) { $DEBUG or unlink ($sfile, $pfile); } } - +# BOM Test + +SKIP:{ + my $pev = PerlIO::encoding->VERSION; + skip "PerlIO::encoding->VERSION = $pev <= 0.07 ", 6 + unless ($pev >= 0.07 or $DEBUG); + + my $file = File::Spec->catfile($dir,"jisx0208.utf"); + open my $fh, "<:utf8", $file or die "$file : $!"; + my $str = join('' => <$fh>); + close $fh; + my %bom = ( + 'UTF-16BE' => pack('n', 0xFeFF), + 'UTF-16LE' => pack('v', 0xFeFF), + 'UTF-32BE' => pack('N', 0xFeFF), + 'UTF-32LE' => pack('V', 0xFeFF), + ); + # reading + for my $utf (sort keys %bom){ + my $bomed = $bom{$utf} . encode($utf, $str); + my $sfile = File::Spec->catfile($dir,".$utf.$seq.$$"); + dump2file($sfile, $bomed); + my $utf_nobom = $utf; $utf_nobom =~ s/(LE|BE)$//o; + # reading + open $fh, "<:encoding($utf_nobom)", $sfile or die "$sfile : $!"; + my $cmp = join '' => <$fh>; + close $fh; + is($str, $cmp, "<:encoding($utf_nobom) eq $utf"); + unlink $sfile; $seq++; + } + # writing + for my $utf_nobom (qw/UTF-16 UTF-32/){ + my $utf = $utf_nobom . 'BE'; + my $sfile = File::Spec->catfile($dir,".$utf_nobom.$seq.$$"); + my $bomed = $bom{$utf} . encode($utf, $str); + open $fh, ">:encoding($utf_nobom)", $sfile or die "$sfile : $!"; + print $fh $str; + close $fh; + open my $fh, "<:raw", $sfile or die "$sfile : $!"; + read $fh, my $cmp, -s $sfile; + close $fh; + use bytes (); + ok($bomed eq $cmp, ">:encoding($utf_nobom) eq $utf"); + unlink $sfile; $seq++; + } +} sub dump2file{ no warnings; open my $fh, ">", $_[0] or die "$_[0]: $!"; diff --git a/ext/PerlIO/encoding/encoding.pm b/ext/PerlIO/encoding/encoding.pm index 53c9d46..61a116f 100644 --- a/ext/PerlIO/encoding/encoding.pm +++ b/ext/PerlIO/encoding/encoding.pm @@ -1,6 +1,6 @@ package PerlIO::encoding; use strict; -our $VERSION = '0.06'; +our $VERSION = '0.07'; our $DEBUG = 0; $DEBUG and warn __PACKAGE__, " called by ", join(", ", caller), "\n";