From: Jarkko Hietaniemi Date: Mon, 6 Jun 2005 20:28:35 +0000 (+0300) Subject: Re: [perl #36130] chr(-1) should probably return undef X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=8a064bd6d0d7a44f3e80bed959e1dc566b57850d;p=p5sagit%2Fp5-mst-13.2.git Re: [perl #36130] chr(-1) should probably return undef Message-ID: <42A487C3.8010306@gmail.com> p4raw-id: //depot/perl@24720 --- diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 55b6ba1..894542f 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -742,6 +742,10 @@ chr(0x263a) is a Unicode smiley face. Note that characters from 128 to 255 (inclusive) are by default not encoded in UTF-8 Unicode for backward compatibility reasons (but see L). +Negative values give the Unicode replacement character (chr(0xfffd)), +except under the L pragma, where low eight bits of the value +(truncated to an integer) are used. + If NUMBER is omitted, uses C<$_>. For the reverse, use L. diff --git a/pp.c b/pp.c index f931285..9ed8bc4 100644 --- a/pp.c +++ b/pp.c @@ -3356,7 +3356,20 @@ PP(pp_chr) { dSP; dTARGET; char *tmps; - UV value = POPu; + UV value; + + if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0) + || + (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) { + if (IN_BYTES) { + value = POPu; /* chr(-1) eq chr(0xff), etc. */ + } else { + (void) POPs; /* Ignore the argument value. */ + value = UNICODE_REPLACEMENT; + } + } else { + value = POPu; + } SvUPGRADE(TARG,SVt_PV); diff --git a/t/op/chr.t b/t/op/chr.t index 94450ec..e63c3b5 100644 --- a/t/op/chr.t +++ b/t/op/chr.t @@ -6,7 +6,7 @@ BEGIN { require "test.pl"; } -plan tests => 26; +plan tests => 34; # Note that t/op/ord.t already tests for chr() <-> ord() rountripping. @@ -19,11 +19,24 @@ is(chr(127), "\x7F"); is(chr(128), "\x80"); is(chr(255), "\xFF"); -# is(chr(-1), undef); # Shouldn't it be? +is(chr(-0.1), "\x{FFFD}"); # The U+FFFD Unicode replacement character. +is(chr(-1 ), "\x{FFFD}"); +is(chr(-2 ), "\x{FFFD}"); +is(chr(-3.0), "\x{FFFD}"); +{ + use bytes; # Backward compatibility. + is(chr(-0.1), "\x00"); + is(chr(-1 ), "\xFF"); + is(chr(-2 ), "\xFE"); + is(chr(-3.0), "\xFD"); +} # Check UTF-8. -sub hexes { join(" ",map{sprintf"%02x",$_}unpack("C*",chr($_[0]))) } +sub hexes { + no warnings 'utf8'; # avoid surrogate and beyond Unicode warnings + join(" ",map{sprintf"%02x",$_}unpack("C*",chr($_[0]))); +} # The following code points are some interesting steps in UTF-8. is(hexes( 0x100), "c4 80");