it revealed a bug in #8248 (the UTF8_EIGHT_BIT_LO() was wrong).
p4raw-id: //depot/perl@8249
PP(pp_ord)
{
djSP; dTARGET;
- UV value;
- SV *tmpsv = POPs;
+ SV *argsv = POPs;
STRLEN len;
- U8 *tmps = (U8*)SvPVx(tmpsv, len);
- STRLEN retlen;
+ U8 *s = (U8*)SvPVx(argsv, len);
- if ((*tmps & 0x80) && DO_UTF8(tmpsv))
- value = utf8_to_uv(tmps, len, &retlen, 0);
- else
- value = (UV)(*tmps & 255);
- XPUSHu(value);
+ XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff));
RETURN;
}
};
{ my $s = join("", chr(1234),chr(255));
- print "not " unless length($s) == 2;
+ print "not " unless length($s) == 2 &&
+ ord(substr($s,0,1)) == 1234 &&
+ ord(substr($s,1,1)) == 255;
print "ok 11\n";
}
{ my $s = join(chr(2345), chr(1234),chr(255));
- print "not " unless length($s) == 3;
+ print "not " unless length($s) == 3 &&
+ ord(substr($s,0,1)) == 1234 &&
+ ord(substr($s,1,1)) == 2345 &&
+ ord(substr($s,2,1)) == 255;
print "ok 12\n";
}
{ my $s = join(chr(2345), chr(1234),chr(3456));
- print "not " unless length($s) == 3;
+ print "not " unless length($s) == 3 &&
+ ord(substr($s,0,1)) == 1234 &&
+ ord(substr($s,1,1)) == 2345 &&
+ ord(substr($s,2,1)) == 3456;
print "ok 13\n";
}
{ my $s = join(chr(255), chr(1234),chr(2345));
- print "not " unless length($s) == 3;
+ print "not " unless length($s) == 3 &&
+ ord(substr($s,0,1)) == 1234 &&
+ ord(substr($s,1,1)) == 255 &&
+ ord(substr($s,2,1)) == 2345;
print "ok 14\n";
}
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding and no longer than C<curlen>;
-C<retlen> will be set to the length, in bytes, of that character,
-and the pointer C<s> will be advanced to the end of the character.
+C<retlen> will be set to the length, in bytes, of that character.
If C<s> does not point to a well-formed UTF8 character, the behaviour
is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding; C<retlen> will be set to the
-length, in bytes, of that character, and the pointer C<s> will be
-advanced to the end of the character.
+length, in bytes, of that character.
If C<s> does not point to a well-formed UTF8 character, zero is
returned and retlen is set, if possible, to -1.
#define UTF8_CONTINUATION_MASK ((U8)0x3f)
#define UTF8_ACCUMULATION_SHIFT 6
-#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK))
+#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | (((U8)new) & UTF8_CONTINUATION_MASK))
-#define UTF8_EIGHT_BIT_HI(c) ( (((U8)c)>>6) |0xc0)
-#define UTF8_EIGHT_BIT_LO(c) (((((U8)c)>>6)&0x3f)|0x80)
+#define UTF8_EIGHT_BIT_HI(c) ( (((U8)(c))>>6) |0xc0)
+#define UTF8_EIGHT_BIT_LO(c) (((((U8)(c)) )&0x3f)|0x80)
#ifdef HAS_QUAD
#define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \