tbl = (short*)cPVOP->op_pv;
if (!tbl)
- Perl_croak(aTHX_ "panic: do_trans_simple");
+ Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
s = (U8*)SvPV(sv, len);
send = s + len;
tbl = (short*)cPVOP->op_pv;
if (!tbl)
- Perl_croak(aTHX_ "panic: do_trans_count");
+ Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
s = (U8*)SvPV(sv, len);
send = s + len;
tbl = (short*)cPVOP->op_pv;
if (!tbl)
- Perl_croak(aTHX_ "panic: do_trans_complex");
+ Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
s = (U8*)SvPV(sv, len);
isutf8 = SvUTF8(sv);
if ((uv = swash_fetch(rv, s)) < none) {
s += UTF8SKIP(s);
matches++;
- d = uvchr_to_utf8(d, uv);
+ d = uvuni_to_utf8(d, uv);
}
else if (uv == none) {
int i = UTF8SKIP(s);
int i = UTF8SKIP(s);
s += i;
matches++;
- d = uvchr_to_utf8(d, final);
+ d = uvuni_to_utf8(d, final);
}
else
s += UTF8SKIP(s);
STRLEN clen = d - dstart;
STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
if (!grows)
- Perl_croak(aTHX_ "panic: do_trans_complex_utf8");
+ Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
Renew(dstart, nlen+UTF8_MAXLEN, U8);
d = dstart + clen;
dend = dstart + nlen;
STRLEN clen = d - dstart;
STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
if (!grows)
- Perl_croak(aTHX_ "panic: do_trans_complex_utf8");
+ Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
Renew(dstart, nlen+UTF8_MAXLEN, U8);
d = dstart + clen;
dend = dstart + nlen;
matches++;
s += UTF8SKIP(s);
if (uv != puv) {
- d = uvchr_to_utf8(d, uv);
+ d = uvuni_to_utf8(d, uv);
puv = uv;
}
continue;
if (havefinal) {
s += UTF8SKIP(s);
if (puv != final) {
- d = uvchr_to_utf8(d, final);
+ d = uvuni_to_utf8(d, final);
puv = final;
}
}
else {
STRLEN len;
- uv = utf8_to_uvchr(s, &len);
+ uv = utf8_to_uvuni(s, &len);
if (uv != puv) {
Copy(s, d, len, U8);
d += len;
STRLEN clen = d - dstart;
STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
if (!grows)
- Perl_croak(aTHX_ "panic: do_trans_complex_utf8");
+ Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
Renew(dstart, nlen+UTF8_MAXLEN, U8);
d = dstart + clen;
dend = dstart + nlen;
if (uv < none) {
matches++;
s += UTF8SKIP(s);
- d = uvchr_to_utf8(d, uv);
+ d = uvuni_to_utf8(d, uv);
continue;
}
else if (uv == none) { /* "none" is unmapped character */
else if (uv == extra && !del) {
matches++;
s += UTF8SKIP(s);
- d = uvchr_to_utf8(d, final);
+ d = uvuni_to_utf8(d, final);
continue;
}
matches++; /* "none+1" is delete character */
my $alias = $alias[$i];
my $val = $alias[$i+1];
my $new;
+
if (ref($alias) eq 'Regexp' && $_ =~ $alias)
{
$new = eval $val;
{
$new = &{$alias}($val)
}
- elsif (lc($_) eq $alias)
+ elsif (lc($_) eq lc($alias))
{
$new = $val;
}
package Encode::XS;
use base 'Encode::Encoding';
-package Encode::Unicode;
+package Encode::Internal;
use base 'Encode::Encoding';
# Dummy package that provides the encode interface but leaves data
# as UTF-X encoded. It is here so that from_to() works.
-__PACKAGE__->Define('Unicode');
+__PACKAGE__->Define('Internal');
+
+Encode::define_alias( 'Unicode' => 'Internal' ) if ord('A') == 65;
sub decode
{
*encode = \&decode;
+package Encoding::Unicode;
+use base 'Encode::Encoding';
+
+__PACKAGE__->Define('Unicode') unless ord('A') == 65;
+
+sub decode
+{
+ my ($obj,$str,$chk) = @_;
+ my $res = '';
+ for (my $i = 0; $i < length($str); $i++)
+ {
+ $res .= chr(utf8::unicode_to_native(ord(substr($str,$i,1))));
+ }
+ $_[1] = '' if $chk;
+ return $res;
+}
+
+sub encode
+{
+ my ($obj,$str,$chk) = @_;
+ my $res = '';
+ for (my $i = 0; $i < length($str); $i++)
+ {
+ $res .= chr(utf8::native_to_unicode(ord(substr($str,$i,1))));
+ }
+ $_[1] = '' if $chk;
+ return $res;
+}
+
+
package Encode::utf8;
use base 'Encode::Encoding';
# package to allow long-hand
Copyright (c) 1991 Bell Communications Research, Inc. (Bellcore)
- Permission to use, copy, modify, and distribute this material
- for any purpose and without fee is hereby granted, provided
- that the above copyright notice and this permission notice
- appear in all copies, and that the name of Bellcore not be
- used in advertising or publicity pertaining to this
- material without the specific, prior written permission
- of an authorized representative of Bellcore. BELLCORE
- MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY
- OF THIS MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS",
+ Permission to use, copy, modify, and distribute this material
+ for any purpose and without fee is hereby granted, provided
+ that the above copyright notice and this permission notice
+ appear in all copies, and that the name of Bellcore not be
+ used in advertising or publicity pertaining to this
+ material without the specific, prior written permission
+ of an authorized representative of Bellcore. BELLCORE
+ MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY
+ OF THIS MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS",
WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
*/
PREINIT:
STRLEN len;
- register unsigned char *str = (unsigned char*)SvPV(sv, len);
+ register unsigned char *str = (unsigned char*)SvPVbyte(sv, len);
unsigned char const* end = str + len;
char *r;
unsigned char c[4];
while (str < end) {
int i = 0;
do {
- unsigned char uc = index_64[*str++];
+ unsigned char uc = index_64[NATIVE_TO_ASCII(*str++)];
if (uc != INVALID)
c[i++] = uc;
break;
}
} while (i < 4);
-
+
if (c[0] == EQ || c[1] == EQ) {
if (PL_dowarn) warn("Premature padding of base64 data");
break;
*encode = \&encode_qp;
*decode = \&decode_qp;
+# Methods for use as a PerlIO layer object
+
+sub PUSHED
+{
+ my ($class,$mode) = @_;
+ # When writing we buffer the data
+ my $write = '';
+ return bless \$write,$class;
+}
+
+sub FILL
+{
+ my ($obj,$fh) = @_;
+ my $line = <$fh>;
+ return (defined $line) ? decode_qp($line) : undef;
+ return undef;
+}
+
+sub WRITE
+{
+ my ($obj,$buf,$fh) = @_;
+ $$obj .= encode_qp($buf);
+ return length($buf);
+}
+
+sub FLUSH
+{
+ my ($obj,$fh) = @_;
+ print $fh $$obj or return -1;
+ $$obj = '';
+ return 0;
+}
+
+
1;
ok(length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong");
my $sym = Encode->getEncoding('symbol');
-my $uni = $sym->decode('a');
+my $uni = $sym->decode(encode(ascii => 'a'));
ok("\N{alpha}",substr($uni,0,1),"alpha does not map to symbol 'a'");
$str = $sym->encode("\N{Beta}");
-ok("B",substr($str,0,1),"Symbol 'B' does not map to Beta");
+ok("B",decode(ascii => substr($str,0,1)),"Symbol 'B' does not map to Beta");
foreach my $enc (qw(symbol dingbats ascii),@encodings)
{
print "# Testing MIME::Base64-", $MIME::Base64::VERSION, "\n";
+BEGIN {
+ if (ord('A') != 193) {
+ *ASCII = sub { return $_[0] };
+ }
+ else {
+ require Encode;
+ *ASCII = sub { Encode::encode('ascii',$_[0]) };
+ }
+}
+
$testno = 1;
encodeTest();
print "# encode test\n";
my @encode_tests = (
- ['' => ''],
- ['a' => 'YQ=='],
- ['aa' => 'YWE='],
- ['aaa' => 'YWFh'],
-
- ['aaa' => 'YWFh'],
- ['aaa' => 'YWFh'],
- ['aaa' => 'YWFh'],
-
- ["\000\377" => "AP8="],
- ["\377\000" => "/wA="],
- ["\000\000\000" => "AAAA"],
-
- # from HTTP spec
- ['Aladdin:open sesame' => 'QWxhZGRpbjpvcGVuIHNlc2FtZQ=='],
-
- ['a' x 100 => 'YWFh' x 33 . 'YQ=='],
-
- ['Multipurpose Internet Mail Extensions: The Base64 Content-Transfer-Encoding is designed to represent sequences of octets in a form that is not humanly readable. '
- => "TXVsdGlwdXJwb3NlIEludGVybmV0IE1haWwgRXh0ZW5zaW9uczogVGhlIEJhc2U2NCBDb250ZW50LVRyYW5zZmVyLUVuY29kaW5nIGlzIGRlc2lnbmVkIHRvIHJlcHJlc2VudCBzZXF1ZW5jZXMgb2Ygb2N0ZXRzIGluIGEgZm9ybSB0aGF0IGlzIG5vdCBodW1hbmx5IHJlYWRhYmxlLiA="],
-
# All values
["\000" => "AA=="],
["\001" => "AQ=="],
["\375" => "/Q=="],
["\376" => "/g=="],
["\377" => "/w=="],
+
+ ["\000\377" => "AP8="],
+ ["\377\000" => "/wA="],
+ ["\000\000\000" => "AAAA"],
+
+ ['' => ''],
+ [ASCII('a') => 'YQ=='],
+ [ASCII('aa') => 'YWE='],
+ [ASCII('aaa') => 'YWFh'],
+
+ [ASCII('aaa') => 'YWFh'],
+ [ASCII('aaa') => 'YWFh'],
+ [ASCII('aaa') => 'YWFh'],
+
+
+ # from HTTP spec
+ [ASCII('Aladdin:open sesame') => 'QWxhZGRpbjpvcGVuIHNlc2FtZQ=='],
+
+ [ASCII('a') x 100 => 'YWFh' x 33 . 'YQ=='],
+
+ [ASCII('Multipurpose Internet Mail Extensions: The Base64 Content-Transfer-Encoding is designed to represent sequences of octets in a form that is not humanly readable. ')
+ => "TXVsdGlwdXJwb3NlIEludGVybmV0IE1haWwgRXh0ZW5zaW9uczogVGhlIEJhc2U2NCBDb250ZW50LVRyYW5zZmVyLUVuY29kaW5nIGlzIGRlc2lnbmVkIHRvIHJlcHJlc2VudCBzZXF1ZW5jZXMgb2Ygb2N0ZXRzIGluIGEgZm9ybSB0aGF0IGlzIG5vdCBodW1hbmx5IHJlYWRhYmxlLiA="],
+
);
for $test (@encode_tests) {
}
my $decoded = decode_base64($encoded);
if ($decoded ne $plain) {
- print "test $testno ($plain): expected $expected, got $encoded\n";
+ print "test $testno ($encoded): expected $plain, got $decoded\n";
print "not ";
}
- # Try the old C versions too
+ if (ord('A') != 193) { # perl versions broken on EBCDIC
+ # Try the old Perl versions too
if ($encoded ne MIME::Base64::old_encode_base64($plain, '')) {
print "old_encode_base64 give different result.\n";
print "not ";
print "old_decode_base64 give different result.\n";
print "not ";
}
+ }
print "ok $testno\n";
$testno++;
local $SIG{__WARN__} = sub { print $_[0] }; # avoid warnings on stderr
my @decode_tests = (
- ['YWE=' => 'aa'],
- [' YWE=' => 'aa'],
- ['Y WE=' => 'aa'],
- ['YWE= ' => 'aa'],
- ["Y\nW\r\nE=" => 'aa'],
+ ['YWE=' => ASCII('aa')],
+ [' YWE=' => ASCII('aa')],
+ ['Y WE=' => ASCII('aa')],
+ ['YWE= ' => ASCII('aa')],
+ ["Y\nW\r\nE=" => ASCII('aa')],
# These will generate some warnings
- ['YWE=====' => 'aa'], # extra padding
- ['YWE' => 'aa'], # missing padding
- ['YWFh====' => 'aaa'],
- ['YQ' => 'a'],
+ ['YWE=====' => ASCII('aa')], # extra padding
+ ['YWE' => ASCII('aa')], # missing padding
+ ['YWFh====' => ASCII('aaa')],
+ ['YQ' => ASCII('a')],
['Y' => ''],
['' => ''],
[undef() => ''],
Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
{
HV* hv = (HV*)SvRV(sv);
+ /* Given a UTF-X encoded char 0xAA..0xYY,0xZZ
+ then the "swatch" is a vec() for al the chars which start
+ with 0xAA..0xYY
+ So the key in the hash is length of encoded char -1
+ */
U32 klen = UTF8SKIP(ptr) - 1;
- U32 off = ptr[klen] & 127; /* NB: 64 bit always 0 when len > 1 */
+ U32 off = ptr[klen];
STRLEN slen;
- STRLEN needents = (klen ? 64 : 128);
+ STRLEN needents;
U8 *tmps;
U32 bit;
SV *retval;
+ if (klen == 0)
+ {
+ /* If char in invariant then swatch is for all the invariant chars
+ * In both UTF-8 and UTF8-MOD that happens to be UTF_CONTINUATION_MARK
+ */
+ needents = UTF_CONTINUATION_MARK;
+ off = NATIVE_TO_UTF(ptr[klen]);
+ }
+ else
+ {
+ /* If char is encoded then swatch is for the prefix */
+ needents = (1 << UTF_ACCUMULATION_SHIFT);
+ off = NATIVE_TO_UTF(ptr[klen]) & UTF_CONTINUATION_MASK;
+ }
+
/*
* This single-entry cache saves about 1/3 of the utf8 overhead in test
* suite. (That is, only 7-8% overall over just a hash cache. Still,
svp = hv_store(hv, (char*)ptr, klen, retval, 0);
- if (!svp || !(tmps = (U8*)SvPV(*svp, slen)) || slen < 8)
+ if (!svp || !(tmps = (U8*)SvPV(*svp, slen)) || (slen << 3) < needents)
Perl_croak(aTHX_ "SWASHGET didn't return result of proper length");
}