# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 2.8 2004/10/24 13:00:29 dankogai Exp dankogai $
+# $Id: Changes,v 2.9 2004/12/03 19:16:53 dankogai Exp dankogai $
#
-$Revision: 2.8 $ $Date: 2004/10/24 13:00:29 $
+$Revision: 2.9 $ $Date: 2004/12/03 19:16:53 $
+! Encode.pm Encode.xs
+ Addressed " :encoding(utf8) broken in perl-5.8.6".
+ Message-Id: <lrllcfeank.fsf_-_@caliper.activestate.com>
+! Encode.pm
+ Addressed "(de|en)code($valid_encoding, undef) does not warn".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8723
+! Encode.pm t/Encode.t
+ Addressed "Can't encode URI". When a reference is fed to (en|de)code,
+ Encode now stringifies instead of returning undef.
+ http://rt.cpan.org/NoAuth/Bug.html?id=8725
+! Encode.xs t/fallback.t
+ Addressed "FB_HTMLCREF and FB_XMLCREF for the UTF-8 decoder".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8694
+! Encode.pm
+ Addressed "s/digit/number/".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8695
+! Encode.pm
+ Addressed "while (defined(read )) { ... } is an infinite loop".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8696
+! Encode.pm
+ Addressed "What the heck is UCM?".
+ Document fixed so that it no longer contains "UCM-Based Encodings".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8697
+
+2.08 2004/10/24 13:00:29
! Encode.xs lib/Encode/Encoding.pm Unicode/Unicode.{pm,xs}
- Resolved the issue that was raised by the Encode::utf8 fallbacks vs.
- PerlIO::encoding issue that was introduced in 2.07. This is done by
- making use of ->renew() method that used to be used only by
- Encode::Unicode. ->renewed() method was also introduced to fetch
- the value thereof.
+ Resolved the issue that was raised by 2.07 -- Encode::utf8 fallbacks
+ that was introduce messed up PerlIO::encoding.
+ * To do so, ->renew() is renewed and ->renewed() was introduced to
+ tell whether the caller is PerlIO or not.
Message-Id: <94B2EB12-25B7-11D9-9E6A-000A95DBB50A@dan.co.jp>
2.07 2004/10/22 19:35:52
#
-# $Id: Encode.pm,v 2.8 2004/10/24 12:32:06 dankogai Exp $
+# $Id: Encode.pm,v 2.9 2004/12/03 19:16:40 dankogai Exp $
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.9 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
sub DEBUG () { 0 }
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
{
my ($name, $string, $check) = @_;
return undef unless defined $string;
- return undef if ref $string;
+ $string .= '' if ref $string; # stringify;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
{
my ($name,$octets,$check) = @_;
return undef unless defined $octets;
- return undef if ref $octets;
+ $octets .= '' if ref $octets;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
the result is always off, even when it contains completely valid utf8
string. See L</"The UTF-8 flag"> below.
-encode($valid_encoding, undef) is harmless but warns you for
-C<Use of uninitialized value in subroutine entry>.
-encode($valid_encoding, '') is harmless and warnless.
+If the $string is C<undef> or a reference then C<undef> is returned.
=item $string = decode(ENCODING, $octets [, CHECK])
ASCII data (or EBCDIC on EBCDIC machines). See L</"The UTF-8 flag">
below.
-decode($valid_encoding, undef) is harmless but warns you for
-C<Use of uninitialized value in subroutine entry>.
-decode($valid_encoding, '') is harmless and warnless.
+If the $string is C<undef> or a reference then C<undef> is returned.
=item [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
=item I<CHECK> = Encode::FB_DEFAULT ( == 0)
If I<CHECK> is 0, (en|de)code will put a I<substitution character> in
-place of a malformed character. When you encode to UCM-based encodings,
-E<lt>subcharE<gt> will be used. When you decode from UCM-based
-encodings, the code point C<0xFFFD> is used. If the data is supposed
-to be UTF-8, an optional lexical warning (category utf8) is given.
+place of a malformed character. When you encode, E<lt>subcharE<gt>
+will be used. When you decode the code point C<0xFFFD> is used. If
+the data is supposed to be UTF-8, an optional lexical warning
+(category utf8) is given.
=item I<CHECK> = Encode::FB_CROAK ( == 1)
(i.e. you are reading with a fixed-width buffer). Here is a sample
code that does exactly this:
- my $data = ''; my $utf8 = '';
- while(defined(read $fh, $buffer, 256)){
- # buffer may end in a partial character so we append
- $data .= $buffer;
- $utf8 .= decode($encoding, $data, Encode::FB_QUIET);
- # $data now contains the unprocessed partial character
+ my $buffer = ''; my $string = '';
+ while(read $fh, $buffer, 256, length($buffer)){
+ $string .= decode($encoding, $buffer, Encode::FB_QUIET);
+ # $buffer now contains the unprocessed partial character
}
=item I<CHECK> = Encode::FB_WARN
in the character repertoire of the encoding.
HTML/XML character reference modes are about the same, in place of
-C<\x{I<HHHH>}>, HTML uses C<&#I<NNNN>;> where I<NNNN> is a decimal digit and
-XML uses C<&#xI<HHHH>;> where I<HHHH> is the hexadecimal digit.
+C<\x{I<HHHH>}>, HTML uses C<&#I<NNN>;> where I<NNN> is a decimal number and
+XML uses C<&#xI<HHHH>;> where I<HHHH> is the hexadecimal number.
=item The bitmask
/*
- $Id: Encode.xs,v 2.2 2004/10/24 13:00:29 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.3 2004/12/03 19:16:53 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
#if 0
fprintf(stderr, "renewed == %d\n", renewed);
#endif
- if (renewed){ check |= ENCODE_RETURN_ON_ERR; }
}
FREETMPS; LEAVE;
/* end PerlIO check */
U8 skip = UTF8SKIP(s);
if ((s + skip) > e) {
/* Partial character - done */
+ if (renewed)
+ break;
goto decode_utf8_fallback;
}
else if (is_utf8_char(s)) {
break;
}
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* subchar = newSVpvf("\\x%02" UVXf, (UV)*s);
+ SV* subchar = newSVpvf(check & ENCODE_PERLQQ ? "\\x%02" UVXf :
+ check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
+ "&#x%" UVxf ";", (UV)*s);
sv_catsv(dst, subchar);
SvREFCNT_dec(subchar);
} else {
# http://module-build.sourceforge.net/META-spec.html
#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
name: Encode
-version: 2.08
+version: 2.09
version_from: Encode.pm
installdirs: perl
requires:
/*
- $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp dankogai $
+ $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp $
*/
#define PERL_NO_GET_CONTEXT
ok( is_utf8($a)); # weird but true: an empty UTF-8 string
# non-string arguments
-ok(decode(latin1 => bless {}, "x"), undef);
-ok(encode(utf8 => bless {}, "x"), undef);
-
+package Encode::Dummy;
+use overload q("") => sub { $_[0]->[0] };
+sub new { my $class = shift; bless [ @_ ] => $class }
+package main;
+ok(decode(latin1 => Encode::Dummy->new("foobar")), "foobar");
+ok(encode(utf8 => Encode::Dummy->new("foobar")), "foobar");
use strict;
#use Test::More qw(no_plan);
-use Test::More tests => 36;
+use Test::More tests => 40;
use Encode q(:all);
my $uo = '';
is($dst, $ah, "FB_HTMLCREF ascii");
is($src, '', "FB_HTMLCREF residue ascii");
-#$src = $ao;
-#$dst = $utf8->decode($src, FB_HTMLCREF);
-#is($dst, $uh, "FB_HTMLCREF utf8");
-#is($src, '', "FB_HTMLCREF residue utf8");
+$src = $ao;
+$dst = $utf8->decode($src, FB_HTMLCREF);
+is($dst, $uh, "FB_HTMLCREF utf8");
+is($src, '', "FB_HTMLCREF residue utf8");
$src = $uo;
$dst = $ascii->encode($src, FB_XMLCREF);
is($dst, $ax, "FB_XMLCREF ascii");
is($src, '', "FB_XMLCREF residue ascii");
-#$src = $ao;
-#$dst = $utf8->decode($src, FB_XMLCREF);
-#is($dst, $ax, "FB_XMLCREF utf8");
-#is($src, '', "FB_XMLCREF residue utf8");
+$src = $ao;
+$dst = $utf8->decode($src, FB_XMLCREF);
+is($dst, $ax, "FB_XMLCREF utf8");
+is($src, '', "FB_XMLCREF residue utf8");