require './test.pl';
}
-plan tests => 139;
+plan tests => 143;
$_ = 'abc';
$c = foo();
map chomp(+()), ('')x68;
ok(1, "extend sp in pp_chomp");
}
+
+{
+ # [perl #73246] chop doesn't support utf8
+ # the problem was UTF8_IS_START() didn't handle perl's extended UTF8
+ my $utf = "\x{80000001}\x{80000000}";
+ my $result = chop($utf);
+ is($utf, "\x{80000001}", "chopping high 'unicode'- remnant");
+ is($result, "\x{80000000}", "chopping high 'unicode' - result");
+
+ SKIP: {
+ use Config;
+ $Config{ivsize} >= 8
+ or skip("this build can't handle very large characters", 2);
+ my $utf = "\x{ffffffffffffffff}\x{fffffffffffffffe}";
+ my $result = chop $utf;
+ is($utf, "\x{ffffffffffffffff}", "chop even higher 'unicode' - remnant");
+ is($result, "\x{fffffffffffffffe}", "chop even higher 'unicode' - result");
+ }
+}
leading bits of the start byte tell how many bytes there are in the
encoded character.
+Perl's extended UTF-8 means we can have start bytes up to FF.
+
*/
#define UNI_IS_INVARIANT(c) (((UV)c) < 0x80)
/* Note that C0 and C1 are invalid in legal UTF8, so the lower bound of the
* below might ought to be C2 */
-#define UTF8_IS_START(c) (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd))
+#define UTF8_IS_START(c) (((U8)c) >= 0xc0)
#define UTF8_IS_CONTINUATION(c) (((U8)c) >= 0x80 && (((U8)c) <= 0xbf))
#define UTF8_IS_CONTINUED(c) (((U8)c) & 0x80)
#define UTF8_IS_DOWNGRADEABLE_START(c) (((U8)c & 0xfc) == 0xc0)