}
}
-print "1..181\n";
+print "1..191\n";
my $test = 1;
{
# bug id 20001009.001
- my($a,$b);
- { use bytes; $a = "\xc3\xa4"; }
- { use utf8; $b = "\xe4"; }
- { use bytes; ok_bytes $a, $b; $test++; } # 69
- { use utf8; nok $a, $b; $test++; } # 70
+ my ($a, $b);
+
+ { use bytes; $a = "\xc3\xa4" }
+ { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8
+
+ print "not " if $a eq $b;
+ print "ok $test\n"; $test++;
+
+ { use utf8; print "not " if $a eq $b; }
+ print "ok $test\n"; $test++;
}
{
}
}
+{
+ # tests 182..191
+
+ {
+ my $a = "\x{41}";
+
+ print "not " unless length($a) == 1;
+ print "ok $test\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\x41" && length($a) == 1;
+ print "ok $test\n";
+ $test++;
+ }
+
+ {
+ my $a = "\x{80}";
+
+ print "not " unless length($a) == 1;
+ print "ok $test\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\xc2\x80" && length($a) == 2;
+ print "ok $test\n";
+ $test++;
+ }
+
+ {
+ my $a = "\x{100}";
+
+ print "not " unless length($a) == 1;
+ print "ok $test\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\xc4\x80" && length($a) == 2;
+ print "ok $test\n";
+ $test++;
+ }
+
+ {
+ my $a = "\x{100}\x{80}";
+
+ print "not " unless length($a) == 2;
+ print "ok $test\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\xc4\x80\xc2\x80" && length($a) == 4;
+ print "ok $test\n";
+ $test++;
+ }
+
+ {
+ my $a = "\x{80}\x{100}";
+
+ print "not " unless length($a) == 2;
+ print "ok $test\n";
+ $test++;
+
+ use bytes;
+ print "not " unless $a eq "\xc2\x80\xc4\x80" && length($a) == 4;
+ print "ok $test\n";
+ $test++;
+ }
+}
+
register char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
- bool has_utf = FALSE; /* embedded \x{} */
+ bool has_utf8 = FALSE; /* embedded \x{} */
UV uv;
I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
: UTF;
- I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
+ I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
: UTF;
/* (now in tr/// code again) */
- if (*s & 0x80 && thisutf) {
+ if (*s & 0x80 && this_utf8) {
STRLEN len;
UV uv;
while (len--)
*d++ = *s++;
}
- has_utf = TRUE;
+ has_utf8 = TRUE;
continue;
}
yyerror("Missing right brace on \\x{}");
e = s;
}
- {
+ else {
STRLEN len = 1; /* allow underscores */
uv = (UV)scan_hex(s + 1, e - s - 1, &len);
+ has_utf8 = TRUE;
}
s = e + 1;
}
* There will always enough room in sv since such escapes will
* be longer than any utf8 sequence they can end up as
*/
- if (uv > 127) {
- if (!thisutf && !has_utf && uv > 255) {
+ if (uv > 127 || has_utf8) {
+ if (!this_utf8 && !has_utf8 && uv > 255) {
/* might need to recode whatever we have accumulated so far
* if it contains any hibit chars
*/
}
}
- if (thisutf || uv > 255) {
+ if (has_utf8 || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
- has_utf = TRUE;
+ this_utf8 = TRUE;
}
else {
*d++ = (char)uv;
res = new_constant( Nullch, 0, "charnames",
res, Nullsv, "\\N{...}" );
str = SvPV(res,len);
- if (!has_utf && SvUTF8(res)) {
+ if (!has_utf8 && SvUTF8(res)) {
char *ostart = SvPVX(sv);
SvCUR_set(sv, d - ostart);
SvPOK_on(sv);
/* this just broke our allocation above... */
SvGROW(sv, send - start);
d = SvPVX(sv) + SvCUR(sv);
- has_utf = TRUE;
+ has_utf8 = TRUE;
}
if (len > e - s + 4) {
char *odest = SvPVX(sv);
*d = '\0';
SvCUR_set(sv, d - SvPVX(sv));
SvPOK_on(sv);
- if (has_utf)
+ if (has_utf8)
SvUTF8_on(sv);
/* shrink the sv if we allocated more than we used */
register char term; /* terminating character */
register char *to; /* current position in the sv's data */
I32 brackets = 1; /* bracket nesting level */
- bool has_utf = FALSE; /* is there any utf8 content? */
+ bool has_utf8 = FALSE; /* is there any utf8 content? */
/* skip space before the delimiter */
if (isSPACE(*s))
/* after skipping whitespace, the next character is the terminator */
term = *s;
if ((term & 0x80) && UTF)
- has_utf = TRUE;
+ has_utf8 = TRUE;
/* mark where we are */
PL_multi_start = CopLINE(PL_curcop);
have found the terminator */
else if (*s == term)
break;
- else if (!has_utf && (*s & 0x80) && UTF)
- has_utf = TRUE;
+ else if (!has_utf8 && (*s & 0x80) && UTF)
+ has_utf8 = TRUE;
*to = *s;
}
}
break;
else if (*s == PL_multi_open)
brackets++;
- else if (!has_utf && (*s & 0x80) && UTF)
- has_utf = TRUE;
+ else if (!has_utf8 && (*s & 0x80) && UTF)
+ has_utf8 = TRUE;
*to = *s;
}
}
if (keep_delims)
sv_catpvn(sv, s, 1);
- if (has_utf)
+ if (has_utf8)
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
s++;