dSP; dTARGET;
SV *big;
SV *little;
+ SV *temp = Nullsv;
I32 offset;
I32 retval;
char *tmps;
char *tmps2;
STRLEN biglen;
I32 arybase = PL_curcop->cop_arybase;
+ int big_utf8;
+ int little_utf8;
if (MAXARG < 3)
offset = 0;
offset = POPi - arybase;
little = POPs;
big = POPs;
- tmps = SvPV(big, biglen);
- if (offset > 0 && DO_UTF8(big))
+ big_utf8 = DO_UTF8(big);
+ little_utf8 = DO_UTF8(little);
+ if (big_utf8 ^ little_utf8) {
+ /* One needs to be upgraded. */
+ SV *bytes = little_utf8 ? big : little;
+ STRLEN len;
+ char *p = SvPV(bytes, len);
+
+ temp = newSVpvn(p, len);
+
+ if (PL_encoding) {
+ sv_recode_to_utf8(temp, PL_encoding);
+ } else {
+ sv_utf8_upgrade(temp);
+ }
+ if (little_utf8) {
+ big = temp;
+ big_utf8 = TRUE;
+ } else {
+ little = temp;
+ }
+ }
+ if (big_utf8 && offset > 0)
sv_pos_u2b(big, &offset, 0);
+ tmps = SvPV(big, biglen);
if (offset < 0)
offset = 0;
else if (offset > (I32)biglen)
retval = -1;
else
retval = tmps2 - tmps;
- if (retval > 0 && DO_UTF8(big))
+ if (retval > 0 && big_utf8)
sv_pos_b2u(big, &retval);
+ if (temp)
+ SvREFCNT_dec(temp);
PUSHi(retval + arybase);
RETURN;
}
dSP; dTARGET;
SV *big;
SV *little;
+ SV *temp = Nullsv;
STRLEN blen;
STRLEN llen;
I32 offset;
char *tmps;
char *tmps2;
I32 arybase = PL_curcop->cop_arybase;
+ int big_utf8;
+ int little_utf8;
if (MAXARG >= 3)
offset = POPi;
little = POPs;
big = POPs;
+ big_utf8 = DO_UTF8(big);
+ little_utf8 = DO_UTF8(little);
+ if (big_utf8 ^ little_utf8) {
+ /* One needs to be upgraded. */
+ SV *bytes = little_utf8 ? big : little;
+ STRLEN len;
+ char *p = SvPV(bytes, len);
+
+ temp = newSVpvn(p, len);
+
+ if (PL_encoding) {
+ sv_recode_to_utf8(temp, PL_encoding);
+ } else {
+ sv_utf8_upgrade(temp);
+ }
+ if (little_utf8) {
+ big = temp;
+ big_utf8 = TRUE;
+ } else {
+ little = temp;
+ }
+ }
tmps2 = SvPV(little, llen);
tmps = SvPV(big, blen);
+
if (MAXARG < 3)
offset = blen;
else {
- if (offset > 0 && DO_UTF8(big))
+ if (offset > 0 && big_utf8)
sv_pos_u2b(big, &offset, 0);
offset = offset - arybase + llen;
}
retval = -1;
else
retval = tmps2 - tmps;
- if (retval > 0 && DO_UTF8(big))
+ if (retval > 0 && big_utf8)
sv_pos_b2u(big, &retval);
+ if (temp)
+ SvREFCNT_dec(temp);
PUSHi(retval + arybase);
RETURN;
}
{
DIE(aTHX_ "tried to access per-thread data in non-threaded perl");
}
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+*/
@INC = '../lib';
}
+use strict;
require './test.pl';
-plan( tests => 28 );
+plan( tests => 46 );
-$foo = 'Now is the time for all good men to come to the aid of their country.';
+my $foo = 'Now is the time for all good men to come to the aid of their country.';
-$first = substr($foo,0,index($foo,'the'));
+my $first = substr($foo,0,index($foo,'the'));
is($first, "Now is ");
-$last = substr($foo,rindex($foo,'the'),100);
+my $last = substr($foo,rindex($foo,'the'),100);
is($last, "their country.");
$last = substr($foo,index($foo,'Now'),2);
is($a, $b, q{[perl #22375] 'split'/'index' problem for utf8});
}
}
+
+{
+ my $search = "foo \xc9 bar";
+ my $text = "a\xa3\xa3a $search $search quux";
+
+ my $text_utf8 = $text;
+ utf8::upgrade($text_utf8);
+ my $search_utf8 = $search;
+ utf8::upgrade($search_utf8);
+
+ is (index($text, $search), 5);
+ is (rindex($text, $search), 18);
+ is (index($text, $search_utf8), 5);
+ is (rindex($text, $search_utf8), 18);
+ is (index($text_utf8, $search), 5);
+ is (rindex($text_utf8, $search), 18);
+ is (index($text_utf8, $search_utf8), 5);
+ is (rindex($text_utf8, $search_utf8), 18);
+
+ my $text_octets = $text_utf8;
+ utf8::encode ($text_octets);
+ my $search_octets = $search_utf8;
+ utf8::encode ($search_octets);
+
+ is (index($text_octets, $search_octets), 7, "index octets, octets")
+ or _diag ($text_octets, $search_octets);
+ is (rindex($text_octets, $search_octets), 21, "rindex octets, octets");
+ is (index($text_octets, $search_utf8), -1);
+ is (rindex($text_octets, $search_utf8), -1);
+ is (index($text_utf8, $search_octets), -1);
+ is (rindex($text_utf8, $search_octets), -1);
+
+ is (index($text_octets, $search), -1);
+ is (rindex($text_octets, $search), -1);
+ is (index($text, $search_octets), -1);
+ is (rindex($text, $search_octets), -1);
+}