From: Jarkko Hietaniemi Date: Fri, 29 Dec 2000 07:54:51 +0000 (+0000) Subject: (Retracted by #8264) More join() testing which was good because X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=7df053ec69e901392ae6352566832be0a6917cfe;p=p5sagit%2Fp5-mst-13.2.git (Retracted by #8264) More join() testing which was good because it revealed a bug in #8248 (the UTF8_EIGHT_BIT_LO() was wrong). p4raw-id: //depot/perl@8249 --- diff --git a/pp.c b/pp.c index eac7532..1150697 100644 --- a/pp.c +++ b/pp.c @@ -2942,17 +2942,11 @@ PP(pp_sprintf) PP(pp_ord) { djSP; dTARGET; - UV value; - SV *tmpsv = POPs; + SV *argsv = POPs; STRLEN len; - U8 *tmps = (U8*)SvPVx(tmpsv, len); - STRLEN retlen; + U8 *s = (U8*)SvPVx(argsv, len); - if ((*tmps & 0x80) && DO_UTF8(tmpsv)) - value = utf8_to_uv(tmps, len, &retlen, 0); - else - value = (UV)(*tmps & 255); - XPUSHu(value); + XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff)); RETURN; } diff --git a/t/op/join.t b/t/op/join.t index eea9add..4cbe692 100755 --- a/t/op/join.t +++ b/t/op/join.t @@ -46,21 +46,32 @@ if ($f eq 'baeak') {print "ok 6\n";} else {print "# '$f'\nnot ok 6\n";} }; { my $s = join("", chr(1234),chr(255)); - print "not " unless length($s) == 2; + print "not " unless length($s) == 2 && + ord(substr($s,0,1)) == 1234 && + ord(substr($s,1,1)) == 255; print "ok 11\n"; } { my $s = join(chr(2345), chr(1234),chr(255)); - print "not " unless length($s) == 3; + print "not " unless length($s) == 3 && + ord(substr($s,0,1)) == 1234 && + ord(substr($s,1,1)) == 2345 && + ord(substr($s,2,1)) == 255; print "ok 12\n"; } { my $s = join(chr(2345), chr(1234),chr(3456)); - print "not " unless length($s) == 3; + print "not " unless length($s) == 3 && + ord(substr($s,0,1)) == 1234 && + ord(substr($s,1,1)) == 2345 && + ord(substr($s,2,1)) == 3456; print "ok 13\n"; } { my $s = join(chr(255), chr(1234),chr(2345)); - print "not " unless length($s) == 3; + print "not " unless length($s) == 3 && + ord(substr($s,0,1)) == 1234 && + ord(substr($s,1,1)) == 255 && + ord(substr($s,2,1)) == 2345; print "ok 14\n"; } diff --git a/utf8.c b/utf8.c index 8d812ab..24dc692 100644 --- a/utf8.c +++ b/utf8.c @@ -183,8 +183,7 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) Returns the character value of the first character in the string C which is assumed to be in UTF8 encoding and no longer than C; -C will be set to the length, in bytes, of that character, -and the pointer C will be advanced to the end of the character. +C will be set to the length, in bytes, of that character. If C does not point to a well-formed UTF8 character, the behaviour is dependent on the value of C: if it contains UTF8_CHECK_ONLY, @@ -351,8 +350,7 @@ malformed: Returns the character value of the first character in the string C which is assumed to be in UTF8 encoding; C will be set to the -length, in bytes, of that character, and the pointer C will be -advanced to the end of the character. +length, in bytes, of that character. If C does not point to a well-formed UTF8 character, zero is returned and retlen is set, if possible, to -1. diff --git a/utf8.h b/utf8.h index e9598b8..8d46aa9 100644 --- a/utf8.h +++ b/utf8.h @@ -69,10 +69,10 @@ END_EXTERN_C #define UTF8_CONTINUATION_MASK ((U8)0x3f) #define UTF8_ACCUMULATION_SHIFT 6 -#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK)) +#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | (((U8)new) & UTF8_CONTINUATION_MASK)) -#define UTF8_EIGHT_BIT_HI(c) ( (((U8)c)>>6) |0xc0) -#define UTF8_EIGHT_BIT_LO(c) (((((U8)c)>>6)&0x3f)|0x80) +#define UTF8_EIGHT_BIT_HI(c) ( (((U8)(c))>>6) |0xc0) +#define UTF8_EIGHT_BIT_LO(c) (((((U8)(c)) )&0x3f)|0x80) #ifdef HAS_QUAD #define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \