SvTAINTED_on(sv);
}
-/* currently converts input to bytes if needed and croaks if a character
- > 255 is encountered */
+/* currently converts input to bytes if possible, but doesn't sweat failure */
UV
Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
{
Perl_croak(aTHX_ "Illegal number of bits in vec");
if (SvUTF8(sv)) {
- if (Perl_utf8_to_bytes(aTHX_ (U8*) s, &srclen)) {
- SvUTF8_off(sv);
- SvCUR_set(sv, srclen);
- }
- else
- Perl_croak(aTHX_ "Character > 255 in vec()");
+ (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
}
offset *= size; /* turn into bit offset */
return retnum;
}
-/* currently converts input to bytes if needed and croaks if a character
- > 255 is encountered */
+/* currently converts input to bytes if possible but doesn't sweat failures,
+ * although it does ensure that the string it clobbers is not marked as
+ * utf8-valid any more
+ */
void
Perl_do_vecset(pTHX_ SV *sv)
{
return;
s = (unsigned char*)SvPV_force(targ, targlen);
if (SvUTF8(targ)) {
- if (Perl_utf8_to_bytes(aTHX_ (U8*) s, &targlen)) {
- /* SvUTF8_off(targ); SvPOK_only below ensures this */
- SvCUR_set(targ, targlen);
- }
- else
- Perl_croak(aTHX_ "Character > 255 in vec()");
+ /* This is handled by the SvPOK_only below...
+ if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
+ SvUTF8_off(targ);
+ */
+ (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
}
(void)SvPOK_only(targ);
=item *
-vec() now refuses to deal with characters >255.
+vec() now tries to work with characters <= 255 when possible, but it leaves
+higher character values in place. In that case, if vec() was used to modify
+the string, it is no longer considered to be utf8-encoded.
=item *
with an assignment operator, which implies modifying the value itself.
Perhaps you need to copy the value to a temporary, and repeat that.
-=item Character > 255 in vec()
-
-(F) You applied the vec() function to a UTF8 string which contained
-a character > 255. vec() currently only operates on characters < 256.
-
=item chmod() mode argument is missing initial 0
(W chmod) A novice will sometimes say
extend the string with sufficiently many zero bytes. It is an error
to try to write off the beginning of the string (i.e. negative OFFSET).
-The string must not contain any character with value > 255 (which
-can only happen if you're using UTF8 encoding).
+The string should not contain any character with the value > 255 (which
+can only happen if you're using UTF8 encoding). If it does, it will be
+treated as something which is not UTF8 encoded. When the C<vec> was
+assigned to, other parts of your program will also no longer consider the
+string to be UTF8 encoded. In other words, if you do have such characters
+in your string, vec() will operate on the actual byte string, and not the
+conceptual character string.
Strings created with C<vec> can also be manipulated with the logical
operators C<|>, C<&>, C<^>, and C<~>. These operators will assume a bit
print "not " if vec($x, 0, 8) != 255;
print "ok 24\n";
eval { vec($foo, 1, 8) };
-print "not " unless $@ =~ /^Character > 255 in vec\(\) /;
+print "not " if $@;
print "ok 25\n";
eval { vec($foo, 1, 8) = 13 };
-print "not " unless $@ =~ /^Character > 255 in vec\(\) /;
+print "not " if $@;
print "ok 26\n";
-print "not " if $foo ne "\x{100}" . "\xff\xfe";
+print "not " if $foo ne "\xc4\x0d\xc3\xbf\xc3\xbe";
print "ok 27\n";
+$foo = "\x{100}" . "\xff\xfe";
$x = substr $foo, 1;
vec($x, 2, 4) = 7;
print "not " if $x ne "\xff\xf7";