From: Spider Boardman Date: Fri, 8 Sep 2000 02:21:02 +0000 (-0400) Subject: Re-allow vec() for characters > 255. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=33b454808819084359e76a3f223a41b842c180b7;p=p5sagit%2Fp5-mst-13.2.git Re-allow vec() for characters > 255. Subject: [PATCH] Re: [ID 20000907.005] Not OK: perl v5.7.0 +devel-7030 on alpha-dec_osf-perlio 4.0f (UNINSTALLED) Message-Id: <200009080621.CAA03409@leggy.zk3.dec.com> p4raw-id: //depot/perl@7039 --- diff --git a/doop.c b/doop.c index 46ffc1b..77c7324 100644 --- a/doop.c +++ b/doop.c @@ -537,8 +537,7 @@ Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) SvTAINTED_on(sv); } -/* currently converts input to bytes if needed and croaks if a character - > 255 is encountered */ +/* currently converts input to bytes if possible, but doesn't sweat failure */ UV Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size) { @@ -552,12 +551,7 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size) Perl_croak(aTHX_ "Illegal number of bits in vec"); if (SvUTF8(sv)) { - if (Perl_utf8_to_bytes(aTHX_ (U8*) s, &srclen)) { - SvUTF8_off(sv); - SvCUR_set(sv, srclen); - } - else - Perl_croak(aTHX_ "Character > 255 in vec()"); + (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE); } offset *= size; /* turn into bit offset */ @@ -681,8 +675,10 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size) return retnum; } -/* currently converts input to bytes if needed and croaks if a character - > 255 is encountered */ +/* currently converts input to bytes if possible but doesn't sweat failures, + * although it does ensure that the string it clobbers is not marked as + * utf8-valid any more + */ void Perl_do_vecset(pTHX_ SV *sv) { @@ -699,12 +695,11 @@ Perl_do_vecset(pTHX_ SV *sv) return; s = (unsigned char*)SvPV_force(targ, targlen); if (SvUTF8(targ)) { - if (Perl_utf8_to_bytes(aTHX_ (U8*) s, &targlen)) { - /* SvUTF8_off(targ); SvPOK_only below ensures this */ - SvCUR_set(targ, targlen); - } - else - Perl_croak(aTHX_ "Character > 255 in vec()"); + /* This is handled by the SvPOK_only below... + if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE)) + SvUTF8_off(targ); + */ + (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE); } (void)SvPOK_only(targ); diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 13e557e..7c21f5f 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -515,7 +515,9 @@ see pack('U0', ...)). =item * -vec() now refuses to deal with characters >255. +vec() now tries to work with characters <= 255 when possible, but it leaves +higher character values in place. In that case, if vec() was used to modify +the string, it is no longer considered to be utf8-encoded. =item * diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 3e9f633..63d7f99 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1043,11 +1043,6 @@ references can be weakened. with an assignment operator, which implies modifying the value itself. Perhaps you need to copy the value to a temporary, and repeat that. -=item Character > 255 in vec() - -(F) You applied the vec() function to a UTF8 string which contained -a character > 255. vec() currently only operates on characters < 256. - =item chmod() mode argument is missing initial 0 (W chmod) A novice will sometimes say diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index d02b9ba..323b83d 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -5516,8 +5516,13 @@ If an element off the end of the string is written to, Perl will first extend the string with sufficiently many zero bytes. It is an error to try to write off the beginning of the string (i.e. negative OFFSET). -The string must not contain any character with value > 255 (which -can only happen if you're using UTF8 encoding). +The string should not contain any character with the value > 255 (which +can only happen if you're using UTF8 encoding). If it does, it will be +treated as something which is not UTF8 encoded. When the C was +assigned to, other parts of your program will also no longer consider the +string to be UTF8 encoded. In other words, if you do have such characters +in your string, vec() will operate on the actual byte string, and not the +conceptual character string. Strings created with C can also be manipulated with the logical operators C<|>, C<&>, C<^>, and C<~>. These operators will assume a bit diff --git a/t/op/vec.t b/t/op/vec.t index b75bebf..7fe0974 100755 --- a/t/op/vec.t +++ b/t/op/vec.t @@ -57,13 +57,14 @@ $x = substr $foo, 1; print "not " if vec($x, 0, 8) != 255; print "ok 24\n"; eval { vec($foo, 1, 8) }; -print "not " unless $@ =~ /^Character > 255 in vec\(\) /; +print "not " if $@; print "ok 25\n"; eval { vec($foo, 1, 8) = 13 }; -print "not " unless $@ =~ /^Character > 255 in vec\(\) /; +print "not " if $@; print "ok 26\n"; -print "not " if $foo ne "\x{100}" . "\xff\xfe"; +print "not " if $foo ne "\xc4\x0d\xc3\xbf\xc3\xbe"; print "ok 27\n"; +$foo = "\x{100}" . "\xff\xfe"; $x = substr $foo, 1; vec($x, 2, 4) = 7; print "not " if $x ne "\xff\xf7";