Apd |SV* |sv_newref |NULLOK SV *const sv
Ap |char* |sv_peek |NULLOK SV* sv
Apd |void |sv_pos_u2b |NULLOK SV *const sv|NN I32 *const offsetp|NULLOK I32 *const lenp
+Apd |void |sv_pos_u2b_proper|NULLOK SV *const sv|NN STRLEN *const offsetp|NULLOK STRLEN *const lenp
Apd |void |sv_pos_b2u |NULLOK SV *const sv|NN I32 *const offsetp
Amdb |char* |sv_pvn_force |NN SV* sv|NULLOK STRLEN* lp
Apd |char* |sv_pvutf8n_force|NN SV *const sv|NULLOK STRLEN *const lp
#define sv_newref Perl_sv_newref
#define sv_peek Perl_sv_peek
#define sv_pos_u2b Perl_sv_pos_u2b
+#define sv_pos_u2b_proper Perl_sv_pos_u2b_proper
#define sv_pos_b2u Perl_sv_pos_b2u
#define sv_pvutf8n_force Perl_sv_pvutf8n_force
#define sv_pvbyten_force Perl_sv_pvbyten_force
#define sv_newref(a) Perl_sv_newref(aTHX_ a)
#define sv_peek(a) Perl_sv_peek(aTHX_ a)
#define sv_pos_u2b(a,b,c) Perl_sv_pos_u2b(aTHX_ a,b,c)
+#define sv_pos_u2b_proper(a,b,c) Perl_sv_pos_u2b_proper(aTHX_ a,b,c)
#define sv_pos_b2u(a,b) Perl_sv_pos_b2u(aTHX_ a,b)
#define sv_pvutf8n_force(a,b) Perl_sv_pvutf8n_force(aTHX_ a,b)
#define sv_pvbyten_force(a,b) Perl_sv_pvbyten_force(aTHX_ a,b)
Perl_sv_newref
Perl_sv_peek
Perl_sv_pos_u2b
+Perl_sv_pos_u2b_proper
Perl_sv_pos_b2u
Perl_sv_pvn_force
Perl_sv_pvutf8n_force
STRLEN len;
SV * const lsv = LvTARG(sv);
const char * const tmps = SvPV_const(lsv,len);
- I32 offs = LvTARGOFF(sv);
- I32 rem = LvTARGLEN(sv);
+ STRLEN offs = LvTARGOFF(sv);
+ STRLEN rem = LvTARGLEN(sv);
PERL_ARGS_ASSERT_MAGIC_GETSUBSTR;
PERL_UNUSED_ARG(mg);
if (SvUTF8(lsv))
- sv_pos_u2b(lsv, &offs, &rem);
- if (offs > (I32)len)
+ sv_pos_u2b_proper(lsv, &offs, &rem);
+ if (offs > len)
offs = len;
- if (rem + offs > (I32)len)
+ if (rem > len - offs)
rem = len - offs;
sv_setpvn(sv, tmps + offs, (STRLEN)rem);
if (SvUTF8(lsv))
STRLEN len;
const char * const tmps = SvPV_const(sv, len);
SV * const lsv = LvTARG(sv);
- I32 lvoff = LvTARGOFF(sv);
- I32 lvlen = LvTARGLEN(sv);
+ STRLEN lvoff = LvTARGOFF(sv);
+ STRLEN lvlen = LvTARGLEN(sv);
PERL_ARGS_ASSERT_MAGIC_SETSUBSTR;
PERL_UNUSED_ARG(mg);
if (DO_UTF8(sv)) {
sv_utf8_upgrade(lsv);
- sv_pos_u2b(lsv, &lvoff, &lvlen);
+ sv_pos_u2b_proper(lsv, &lvoff, &lvlen);
sv_insert(lsv, lvoff, lvlen, tmps, len);
LvTARGLEN(sv) = sv_len_utf8(sv);
SvUTF8_on(lsv);
}
else if (lsv && SvUTF8(lsv)) {
const char *utf8;
- sv_pos_u2b(lsv, &lvoff, &lvlen);
+ sv_pos_u2b_proper(lsv, &lvoff, &lvlen);
LvTARGLEN(sv) = len;
utf8 = (char*)bytes_to_utf8((U8*)tmps, &len);
sv_insert(lsv, lvoff, lvlen, utf8, len);
LvTARGLEN(sv) = len;
}
-
return 0;
}
{
dVAR; dSP; dTARGET;
SV *sv;
- I32 len = 0;
STRLEN curlen;
STRLEN utf8_curlen;
- I32 pos;
- I32 rem;
- I32 fail;
+ SV * pos_sv;
+ IV pos1_iv;
+ int pos1_is_uv;
+ IV pos2_iv;
+ int pos2_is_uv;
+ SV * len_sv;
+ IV len_iv = 0;
+ int len_is_uv = 1;
const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
const char *tmps;
- const I32 arybase = CopARYBASE_get(PL_curcop);
+ const IV arybase = CopARYBASE_get(PL_curcop);
SV *repl_sv = NULL;
const char *repl = NULL;
STRLEN repl_len;
repl = SvPV_const(repl_sv, repl_len);
repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
}
- len = POPi;
+ len_sv = POPs;
+ len_iv = SvIV(len_sv);
+ len_is_uv = SvIOK_UV(len_sv);
}
- pos = POPi;
+ pos_sv = POPs;
+ pos1_iv = SvIV(pos_sv);
+ pos1_is_uv = SvIOK_UV(pos_sv);
sv = POPs;
PUTBACK;
if (repl_sv) {
else
utf8_curlen = 0;
- if (pos >= arybase) {
- pos -= arybase;
- rem = curlen-pos;
- fail = rem;
- if (num_args > 2) {
- if (len < 0) {
- rem += len;
- if (rem < 0)
- rem = 0;
- }
- else if (rem > len)
- rem = len;
+ if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
+ UV pos1_uv = pos1_iv-arybase;
+ /* Overflow can occur when $[ < 0 */
+ if (arybase < 0 && pos1_uv < (UV)pos1_iv)
+ goto BOUND_FAIL;
+ pos1_iv = pos1_uv;
+ pos1_is_uv = 1;
+ }
+ else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
+ goto BOUND_FAIL; /* $[=3; substr($_,2,...) */
+ }
+ else { /* pos < $[ */
+ if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
+ pos1_iv = curlen;
+ pos1_is_uv = 1;
+ } else {
+ if (curlen) {
+ pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
+ pos1_iv += curlen;
+ }
}
}
- else {
- pos += curlen;
- if (num_args < 3)
- rem = curlen;
- else if (len >= 0) {
- rem = pos+len;
- if (rem > (I32)curlen)
- rem = curlen;
+ if (pos1_is_uv || pos1_iv > 0) {
+ if ((UV)pos1_iv > curlen)
+ goto BOUND_FAIL;
+ }
+
+ if (num_args > 2) {
+ if (!len_is_uv && len_iv < 0) {
+ pos2_iv = curlen + len_iv;
+ if (curlen)
+ pos2_is_uv = curlen-1 > ~(UV)len_iv;
+ else
+ pos2_is_uv = 0;
+ } else { /* len_iv >= 0 */
+ if (!pos1_is_uv && pos1_iv < 0) {
+ pos2_iv = pos1_iv + len_iv;
+ pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
+ } else {
+ if ((UV)len_iv > curlen-(UV)pos1_iv)
+ pos2_iv = curlen;
+ else
+ pos2_iv = pos1_iv+len_iv;
+ pos2_is_uv = 1;
+ }
}
- else {
- rem = curlen+len;
- if (rem < pos)
- rem = pos;
- }
- if (pos < 0)
- pos = 0;
- fail = rem;
- rem -= pos;
- }
- if (fail < 0) {
- if (lvalue || repl)
- Perl_croak(aTHX_ "substr outside of string");
- Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
- RETPUSHUNDEF;
}
else {
- const I32 upos = pos;
- const I32 urem = rem;
+ pos2_iv = curlen;
+ pos2_is_uv = 1;
+ }
+
+ if (!pos2_is_uv && pos2_iv < 0) {
+ if (!pos1_is_uv && pos1_iv < 0)
+ goto BOUND_FAIL;
+ pos2_iv = 0;
+ }
+ else if (!pos1_is_uv && pos1_iv < 0)
+ pos1_iv = 0;
+
+ if ((UV)pos2_iv < (UV)pos1_iv)
+ pos2_iv = pos1_iv;
+ if ((UV)pos2_iv > curlen)
+ pos2_iv = curlen;
+
+ {
+ /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
+ const STRLEN pos = (STRLEN)( (UV)pos1_iv );
+ const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
+ STRLEN byte_pos = pos;
+ STRLEN byte_len = len;
if (utf8_curlen)
- sv_pos_u2b(sv, (I32 *)&pos, (I32 *)&rem);
- tmps += pos;
+ sv_pos_u2b_proper(sv, &byte_pos, &byte_len);
+ tmps += byte_pos;
/* we either return a PV or an LV. If the TARG hasn't been used
* before, or is of that type, reuse it; otherwise use a mortal
* instead. Note that LVs can have an extended lifetime, so also
}
}
- sv_setpvn(TARG, tmps, rem);
+ sv_setpvn(TARG, tmps, byte_len);
#ifdef USE_LOCALE_COLLATE
sv_unmagic(TARG, PERL_MAGIC_collxfrm);
#endif
}
if (!SvOK(sv))
sv_setpvs(sv, "");
- sv_insert_flags(sv, pos, rem, repl, repl_len, 0);
+ sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
if (repl_is_utf8)
SvUTF8_on(sv);
SvREFCNT_dec(repl_sv_copy);
SvREFCNT_dec(LvTARG(TARG));
LvTARG(TARG) = SvREFCNT_inc_simple(sv);
}
- LvTARGOFF(TARG) = upos;
- LvTARGLEN(TARG) = urem;
+ LvTARGOFF(TARG) = pos;
+ LvTARGLEN(TARG) = len;
}
}
SPAGAIN;
PUSHs(TARG); /* avoid SvSETMAGIC here */
RETURN;
+
+BOUND_FAIL:
+ if (lvalue || repl)
+ Perl_croak(aTHX_ "substr outside of string");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
+ RETPUSHUNDEF;
}
PP(pp_vec)
#define PERL_ARGS_ASSERT_SV_POS_U2B \
assert(offsetp)
+PERL_CALLCONV void Perl_sv_pos_u2b_proper(pTHX_ SV *const sv, STRLEN *const offsetp, STRLEN *const lenp)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_SV_POS_U2B_PROPER \
+ assert(offsetp)
+
PERL_CALLCONV void Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
__attribute__nonnull__(pTHX_2);
#define PERL_ARGS_ASSERT_SV_POS_B2U \
/*
-=for apidoc sv_pos_u2b
+=for apidoc sv_pos_u2b_proper
Converts the value pointed to by offsetp from a count of UTF-8 chars from
the start of the string, to a count of the equivalent number of bytes; if
*/
/*
- * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential
+ * sv_pos_u2b_proper() uses, like sv_pos_b2u(), the mg_ptr of the potential
* PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and
* byte offsets. See also the comments of S_utf8_mg_pos_cache_update().
*
*/
void
-Perl_sv_pos_u2b(pTHX_ register SV *const sv, I32 *const offsetp, I32 *const lenp)
+Perl_sv_pos_u2b_proper(pTHX_ register SV *const sv, STRLEN *const offsetp, STRLEN *const lenp)
{
const U8 *start;
STRLEN len;
start = (U8*)SvPV_const(sv, len);
if (len) {
- STRLEN uoffset = (STRLEN) *offsetp;
+ STRLEN uoffset = *offsetp;
const U8 * const send = start + len;
MAGIC *mg = NULL;
const STRLEN boffset = sv_pos_u2b_cached(sv, &mg, start, send,
uoffset, 0, 0);
- *offsetp = (I32) boffset;
+ *offsetp = boffset;
if (lenp) {
/* Convert the relative offset to absolute. */
- const STRLEN uoffset2 = uoffset + (STRLEN) *lenp;
+ const STRLEN uoffset2 = uoffset + *lenp;
const STRLEN boffset2
= sv_pos_u2b_cached(sv, &mg, start, send, uoffset2,
uoffset, boffset) - boffset;
return;
}
+/*
+=for apidoc sv_pos_u2b
+
+Converts the value pointed to by offsetp from a count of UTF-8 chars from
+the start of the string, to a count of the equivalent number of bytes; if
+lenp is non-zero, it does the same to lenp, but this time starting from
+the offset, rather than from the start of the string. Handles magic and
+type coercion.
+
+=cut
+*/
+
+/*
+ * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential
+ * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and
+ * byte offsets. See also the comments of S_utf8_mg_pos_cache_update().
+ *
+ */
+
+/* This function is subject to size and sign problems */
+
+void
+Perl_sv_pos_u2b(pTHX_ register SV *const sv, I32 *const offsetp, I32 *const lenp)
+{
+ STRLEN uoffset = (STRLEN)*offsetp;
+ if (lenp) {
+ STRLEN ulen = (STRLEN)*lenp;
+ sv_pos_u2b_proper(sv, &uoffset, &ulen);
+ *lenp = (I32)ulen;
+ } else {
+ sv_pos_u2b_proper(sv, &uoffset, NULL);
+ }
+ *offsetp = (I32)uoffset;
+}
+
/* Create and update the UTF8 magic offset cache, with the proffered utf8/
byte length pairing. The (byte) length of the total SV is passed in too,
as blen, because for some (more esoteric) SVs, the call to SvPV_const()
require './test.pl';
-plan(334);
+plan(360);
run_tests() unless caller;
eval{substr($a,1) = "" ; }; # P=R=S Q
like($@, $FATAL_MSG);
+$b = substr($a,-7,-6) ; # warn # Q R P S
+is($w--, 1);
+eval{substr($a,-7,-6) = "" ; }; # Q R P S
+like($@, $FATAL_MSG);
+
my $a = 'zxcvbnm';
substr($a,2,0) = '';
is($a, 'zxcvbnm');
is(substr($a,1,1), 'b');
}
+# [perl #62646] offsets exceeding 32 bits on 64-bit system
+SKIP: {
+ skip("32-bit system", 24) unless ~0 > 0xffffffff;
+ my $a = "abc";
+ my $s;
+ my $r;
+
+ utf8::downgrade($a);
+ for (1..2) {
+ $w = 0;
+ $r = substr($a, 0xffffffff, 1);
+ is($r, undef);
+ is($w, 1);
+
+ $w = 0;
+ $r = substr($a, 0xffffffff+1, 1);
+ is($r, undef);
+ is($w, 1);
+
+ $w = 0;
+ ok( !eval { $r = substr($s=$a, 0xffffffff, 1, "_"); 1 } );
+ is($r, undef);
+ is($s, $a);
+ is($w, 0);
+
+ $w = 0;
+ ok( !eval { $r = substr($s=$a, 0xffffffff+1, 1, "_"); 1 } );
+ is($r, undef);
+ is($s, $a);
+ is($w, 0);
+
+ utf8::upgrade($a);
+ }
+}
+
}