t/TestInit.pm Preamble library for core tests
t/test.pl Simple testing library
t/uni/case.pl See if Unicode casing works
+t/uni/chomp.t See if Unicode chomp works
t/uni/fold.t See if Unicode folding works
t/uni/lower.t See if Unicode casing works
t/uni/sprintf.t See if Unicode sprintf works
STRLEN n_a;
char *s;
char *temp_buffer = NULL;
+ SV* svrecode = Nullsv;
if (RsSNARF(PL_rs))
return 0;
if (SvREADONLY(sv))
Perl_croak(aTHX_ PL_no_modify);
}
+
+ if (PL_encoding) {
+ if (!SvUTF8(sv)) {
+ /* XXX, here sv is utf8-ized as a side-effect!
+ If encoding.pm is used properly, almost string-generating
+ operations, including literal strings, chr(), input data, etc.
+ should have been utf8-ized already, right?
+ */
+ sv_recode_to_utf8(sv, PL_encoding);
+ }
+ }
+
s = SvPV(sv, len);
if (s && len) {
s += --len;
}
}
else {
- STRLEN rslen;
+ STRLEN rslen, rs_charlen;
char *rsptr = SvPV(PL_rs, rslen);
+
+ rs_charlen = SvUTF8(PL_rs)
+ ? sv_len_utf8(PL_rs)
+ : rslen;
+
if (SvUTF8(PL_rs) != SvUTF8(sv)) {
/* Assumption is that rs is shorter than the scalar. */
if (SvUTF8(PL_rs)) {
goto nope;
}
rsptr = temp_buffer;
- } else {
+ }
+ else if (PL_encoding) {
+ /* RS is 8 bit, encoding.pm is used.
+ * Do not recode PL_rs as a side-effect. */
+ svrecode = newSVpvn(rsptr, rslen);
+ sv_recode_to_utf8(svrecode, PL_encoding);
+ rsptr = SvPV(svrecode, rslen);
+ rs_charlen = sv_len_utf8(svrecode);
+ }
+ else {
/* RS is 8 bit, scalar is utf8. */
temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
rsptr = temp_buffer;
s -= rslen - 1;
if (memNE(s, rsptr, rslen))
goto nope;
- count += rslen;
+ count += rs_charlen;
}
}
s = SvPV_force(sv, n_a);
SvSETMAGIC(sv);
}
nope:
+
+ if (svrecode)
+ SvREFCNT_dec(svrecode);
+
Safefree(temp_buffer);
return count;
}
require './test.pl';
}
-plan tests => 91;
+plan tests => 93;
$_ = 'abc';
$c = do foo();
is ($chomped, $string, "$message (\$/ as bytes)");
}
}
+
+{
+ # returns length in characters, but not in bytes.
+ $/ = "\x{100}";
+ $a = "A$/";
+ $b = chomp $a;
+ is ($b, 1);
+
+ $/ = "\x{100}\x{101}";
+ $a = "A$/";
+ $b = chomp $a;
+ is ($b, 2);
+}
--- /dev/null
+#!./perl -w
+
+BEGIN {
+ if ($ENV{'PERL_CORE'}){
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ unless (PerlIO::Layer->find('perlio')){
+ print "1..0 # Skip: PerlIO required\n";
+ exit 0;
+ }
+ eval 'use Encode';
+ if ($@ =~ /dynamic loading not available/) {
+ print "1..0 # Skip: no dynamic loading, no Encode\n";
+ exit 0;
+ }
+}
+
+use strict;
+use Test::More tests => (4 * 4 * 4) * (3); # (@char ** 3) * (keys %mbchars)
+
+# %mbchars = (encoding => { bytes => utf8, ... }, ...);
+# * pack('C*') is expected to return bytes even if ${^ENCODING} is true.
+our %mbchars = (
+ 'big-5' => {
+ pack('C*', 0x40) => pack('U*', 0x40), # COMMERCIAL AT
+ pack('C*', 0xA4, 0x40) => "\x{4E00}", # CJK-4E00
+ },
+ 'euc-jp' => {
+ pack('C*', 0xB0, 0xA1) => "\x{4E9C}", # CJK-4E9C
+ pack('C*', 0x8F, 0xB0, 0xA1) => "\x{4E02}", # CJK-4E02
+ },
+ 'shift-jis' => {
+ pack('C*', 0xA9) => "\x{FF69}", # halfwidth katakana small U
+ pack('C*', 0x82, 0xA9) => "\x{304B}", # hiragana KA
+ },
+);
+
+for my $enc (sort keys %mbchars) {
+ local ${^ENCODING} = find_encoding($enc);
+ my @char = (sort(keys %{ $mbchars{$enc} }),
+ sort(values %{ $mbchars{$enc} }));
+
+ for my $rs (@char) {
+ local $/ = $rs;
+ for my $start (@char) {
+ for my $end (@char) {
+ my $string = $start.$end;
+ my $expect = $end eq $rs ? $start : $string;
+ chomp $string;
+ is($string, $expect);
+ }
+ }
+ }
+}