/* pp.c
*
* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
+ * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
dVAR; dSP; dTARGET;
I32 gimme;
if (PL_op->op_private & OPpLVAL_INTRO)
- SAVECLEARSV(PAD_SVl(PL_op->op_targ));
+ if (!(PL_op->op_private & OPpPAD_STATE))
+ SAVECLEARSV(PAD_SVl(PL_op->op_targ));
EXTEND(SP, 1);
if (PL_op->op_flags & OPf_REF) {
PUSHs(TARG);
XPUSHs(TARG);
if (PL_op->op_private & OPpLVAL_INTRO)
- SAVECLEARSV(PAD_SVl(PL_op->op_targ));
+ if (!(PL_op->op_private & OPpPAD_STATE))
+ SAVECLEARSV(PAD_SVl(PL_op->op_targ));
if (PL_op->op_flags & OPf_REF)
RETURN;
else if (LVRET) {
RETURN;
}
+/* Helper function for pp_rv2sv and pp_rv2av */
+GV *
+Perl_softref2xv(pTHX_ SV *const sv, const char *const what, const U32 type,
+ SV ***spp)
+{
+ dVAR;
+ GV *gv;
+
+ if (PL_op->op_private & HINT_STRICT_REFS) {
+ if (SvOK(sv))
+ Perl_die(aTHX_ PL_no_symref_sv, sv, what);
+ else
+ Perl_die(aTHX_ PL_no_usym, what);
+ }
+ if (!SvOK(sv)) {
+ if (PL_op->op_flags & OPf_REF)
+ Perl_die(aTHX_ PL_no_usym, what);
+ if (ckWARN(WARN_UNINITIALIZED))
+ report_uninit(sv);
+ if (type != SVt_PV && GIMME_V == G_ARRAY) {
+ (*spp)--;
+ return NULL;
+ }
+ **spp = &PL_sv_undef;
+ return NULL;
+ }
+ if ((PL_op->op_flags & OPf_SPECIAL) &&
+ !(PL_op->op_flags & OPf_MOD))
+ {
+ gv = gv_fetchsv(sv, 0, type);
+ if (!gv
+ && (!is_gv_magical_sv(sv,0)
+ || !(gv = gv_fetchsv(sv, GV_ADD, type))))
+ {
+ **spp = &PL_sv_undef;
+ return NULL;
+ }
+ }
+ else {
+ gv = gv_fetchsv(sv, GV_ADD, type);
+ }
+ return gv;
+}
+
PP(pp_rv2sv)
{
dVAR; dSP; dTOPss;
case SVt_PVFM:
case SVt_PVIO:
DIE(aTHX_ "Not a SCALAR reference");
+ default: NOOP;
}
}
else {
if (SvROK(sv))
goto wasref;
}
- if (PL_op->op_private & HINT_STRICT_REFS) {
- if (SvOK(sv))
- DIE(aTHX_ PL_no_symref_sv, sv, "a SCALAR");
- else
- DIE(aTHX_ PL_no_usym, "a SCALAR");
- }
- if (!SvOK(sv)) {
- if (PL_op->op_flags & OPf_REF)
- DIE(aTHX_ PL_no_usym, "a SCALAR");
- if (ckWARN(WARN_UNINITIALIZED))
- report_uninit(sv);
- RETSETUNDEF;
- }
- if ((PL_op->op_flags & OPf_SPECIAL) &&
- !(PL_op->op_flags & OPf_MOD))
- {
- gv = (GV*)gv_fetchsv(sv, 0, SVt_PV);
- if (!gv
- && (!is_gv_magical_sv(sv, 0)
- || !(gv = (GV*)gv_fetchsv(sv, GV_ADD, SVt_PV))))
- {
- RETSETUNDEF;
- }
- }
- else {
- gv = (GV*)gv_fetchsv(sv, GV_ADD, SVt_PV);
- }
+ gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
+ if (!gv)
+ RETURN;
}
sv = GvSVn(gv);
}
AV * const av = (AV*)TOPs;
SV ** const sv = Perl_av_arylen_p(aTHX_ (AV*)av);
if (!*sv) {
- *sv = newSV(0);
- sv_upgrade(*sv, SVt_PVMG);
+ *sv = newSV_type(SVt_PVMG);
sv_magic(*sv, (SV*)av, PERL_MAGIC_arylen, NULL, 0);
}
SETs(*sv);
{
dVAR; dSP;
GV *gv;
- HV *stash;
+ HV *stash_unused;
const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
? 0
: ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
/* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
/* (But not in defined().) */
- CV *cv = sv_2cv(TOPs, &stash, &gv, flags);
+ CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
if (cv) {
if (CvCLONE(cv))
cv = (CV*)sv_2mortal((SV*)cv_clone(cv));
SV *ret = &PL_sv_undef;
if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
- const char * const s = SvPVX_const(TOPs);
+ const char * s = SvPVX_const(TOPs);
if (strnEQ(s, "CORE::", 6)) {
- const int code = keyword(s + 6, SvCUR(TOPs) - 6);
+ const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
if (code < 0) { /* Overridable. */
#define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
- int i = 0, n = 0, seen_question = 0;
+ int i = 0, n = 0, seen_question = 0, defgv = 0;
I32 oa;
char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
if (code == -KEY_chop || code == -KEY_chomp
- || code == -KEY_exec || code == -KEY_system)
+ || code == -KEY_exec || code == -KEY_system || code == -KEY_err)
goto set;
+ if (code == -KEY_mkdir) {
+ ret = sv_2mortal(newSVpvs("_;$"));
+ goto set;
+ }
+ if (code == -KEY_readpipe) {
+ s = "CORE::backtick";
+ }
while (i < MAXO) { /* The slow way. */
if (strEQ(s + 6, PL_op_name[i])
|| strEQ(s + 6, PL_op_desc[i]))
}
goto nonesuch; /* Should not happen... */
found:
+ defgv = PL_opargs[i] & OA_DEFGV;
oa = PL_opargs[i] >> OASHIFT;
while (oa) {
- if (oa & OA_OPTIONAL && !seen_question) {
+ if (oa & OA_OPTIONAL && !seen_question && !defgv) {
seen_question = 1;
str[n++] = ';';
}
str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
oa = oa >> 4;
}
+ if (defgv && str[n - 1] == '$')
+ str[n - 1] = '_';
str[n++] = '\0';
ret = sv_2mortal(newSVpvn(str, n - 1));
}
if (len == 0 && ckWARN(WARN_MISC))
Perl_warner(aTHX_ packWARN(WARN_MISC),
"Explicit blessing to '' (assuming package main)");
- stash = gv_stashpvn(ptr, len, TRUE);
+ stash = gv_stashpvn(ptr, len, GV_ADD);
}
(void)sv_bless(TOPs, stash);
}
s = (unsigned char*)(SvPV(sv, len));
pos = len;
- if (pos <= 0 || !SvPOK(sv)) {
+ if (pos <= 0 || !SvPOK(sv) || SvUTF8(sv)) {
/* No point in studying a zero length string, and not safe to study
anything that doesn't appear to be a simple scalar (and hence might
change between now and when the regexp engine runs without our set
bhigh = blow >> (4 * sizeof (UV));
blow &= botmask;
if (ahigh && bhigh) {
- /*EMPTY*/;
+ NOOP;
/* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
which is overflow. Drop to NVs below. */
} else if (!ahigh && !bhigh) {
if (SvUTF8(TARG)) {
/* Calculate exact length, let's not estimate. */
STRLEN targlen = 0;
- U8 *result;
- U8 *send;
STRLEN l;
UV nchar = 0;
UV nwide = 0;
+ U8 * const send = tmps + len;
+ U8 * const origtmps = tmps;
+ const UV utf8flags = UTF8_ALLOW_ANYUV;
- send = tmps + len;
while (tmps < send) {
- const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
- tmps += UTF8SKIP(tmps);
+ const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
+ tmps += l;
targlen += UNISKIP(~c);
nchar++;
if (c > 0xff)
}
/* Now rewind strings and write them. */
- tmps -= len;
+ tmps = origtmps;
if (nwide) {
- Newxz(result, targlen + 1, U8);
+ U8 *result;
+ U8 *p;
+
+ Newx(result, targlen + 1, U8);
+ p = result;
while (tmps < send) {
- const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
- tmps += UTF8SKIP(tmps);
- result = uvchr_to_utf8_flags(result, ~c, UNICODE_ALLOW_ANY);
+ const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
+ tmps += l;
+ p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
}
- *result = '\0';
- result -= targlen;
- sv_setpvn(TARG, (char*)result, targlen);
+ *p = '\0';
+ sv_usepvn_flags(TARG, (char*)result, targlen,
+ SV_HAS_TRAILING_NUL);
SvUTF8_on(TARG);
}
else {
- Newxz(result, nchar + 1, U8);
+ U8 *result;
+ U8 *p;
+
+ Newx(result, nchar + 1, U8);
+ p = result;
while (tmps < send) {
- const U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY);
- tmps += UTF8SKIP(tmps);
- *result++ = ~c;
+ const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
+ tmps += l;
+ *p++ = ~c;
}
- *result = '\0';
- result -= nchar;
- sv_setpvn(TARG, (char*)result, nchar);
+ *p = '\0';
+ sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
SvUTF8_off(TARG);
}
- Safefree(result);
SETs(TARG);
RETURN;
}
for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
*tmps = ~*tmps;
tmpl = (long*)tmps;
- for ( ; anum >= sizeof(long); anum -= sizeof(long), tmpl++)
+ for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
*tmpl = ~*tmpl;
tmps = (U8*)tmpl;
}
dVAR; dSP; dTARGET;
SV * const sv = TOPs;
- if (DO_UTF8(sv))
+ if (SvAMAGIC(sv)) {
+ /* For an overloaded scalar, we can't know in advance if it's going to
+ be UTF-8 or not. Also, we can't call sv_len_utf8 as it likes to
+ cache the length. Maybe that should be a documented feature of it.
+ */
+ STRLEN len;
+ const char *const p = SvPV_const(sv, len);
+
+ if (DO_UTF8(sv)) {
+ SETi(utf8_length((U8*)p, (U8*)p + len));
+ }
+ else
+ SETi(len);
+
+ }
+ else if (DO_UTF8(sv))
SETi(sv_len_utf8(sv));
else
SETi(sv_len(sv));
sv_upgrade(TARG, SVt_PVLV);
sv_magic(TARG, NULL, PERL_MAGIC_substr, NULL, 0);
}
- else
- SvOK_off(TARG);
LvTYPE(TARG) = 'x';
if (LvTARG(TARG) != sv) {
STRLEN llen = 0;
I32 offset;
I32 retval;
- const char *tmps;
- const char *tmps2;
+ const char *big_p;
+ const char *little_p;
const I32 arybase = CopARYBASE_get(PL_curcop);
bool big_utf8;
bool little_utf8;
}
little = POPs;
big = POPs;
+ big_p = SvPV_const(big, biglen);
+ little_p = SvPV_const(little, llen);
+
big_utf8 = DO_UTF8(big);
little_utf8 = DO_UTF8(little);
if (big_utf8 ^ little_utf8) {
if (little_utf8 && !PL_encoding) {
/* Well, maybe instead we might be able to downgrade the small
string? */
- STRLEN little_len;
- const U8 * const little_pv = (U8*) SvPV_const(little, little_len);
- char * const pv = (char*)bytes_from_utf8(little_pv, &little_len,
+ char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
&little_utf8);
if (little_utf8) {
/* If the large string is ISO-8859-1, and it's not possible to
/* At this point, pv is a malloc()ed string. So donate it to temp
to ensure it will get free()d */
little = temp = newSV(0);
- sv_usepvn(temp, pv, little_len);
+ sv_usepvn(temp, pv, llen);
+ little_p = SvPVX(little);
} else {
- SV * const bytes = little_utf8 ? big : little;
- STRLEN len;
- const char * const p = SvPV_const(bytes, len);
-
- temp = newSVpvn(p, len);
+ temp = little_utf8
+ ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
if (PL_encoding) {
sv_recode_to_utf8(temp, PL_encoding);
if (little_utf8) {
big = temp;
big_utf8 = TRUE;
+ big_p = SvPV_const(big, biglen);
} else {
little = temp;
+ little_p = SvPV_const(little, llen);
}
}
}
- /* Don't actually need the NULL initialisation, but it keeps gcc quiet. */
- tmps2 = is_index ? NULL : SvPV_const(little, llen);
- tmps = SvPV_const(big, biglen);
+ if (SvGAMAGIC(big)) {
+ /* Life just becomes a lot easier if I use a temporary here.
+ Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
+ will trigger magic and overloading again, as will fbm_instr()
+ */
+ big = sv_2mortal(newSVpvn(big_p, biglen));
+ if (big_utf8)
+ SvUTF8_on(big);
+ big_p = SvPVX(big);
+ }
+ if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
+ /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
+ warn on undef, and we've already triggered a warning with the
+ SvPV_const some lines above. We can't remove that, as we need to
+ call some SvPV to trigger overloading early and find out if the
+ string is UTF-8.
+ This is all getting to messy. The API isn't quite clean enough,
+ because data access has side effects.
+ */
+ little = sv_2mortal(newSVpvn(little_p, llen));
+ if (little_utf8)
+ SvUTF8_on(little);
+ little_p = SvPVX(little);
+ }
if (MAXARG < 3)
offset = is_index ? 0 : biglen;
else {
if (big_utf8 && offset > 0)
sv_pos_u2b(big, &offset, 0);
- offset += llen;
+ if (!is_index)
+ offset += llen;
}
if (offset < 0)
offset = 0;
else if (offset > (I32)biglen)
offset = biglen;
- if (!(tmps2 = is_index
- ? fbm_instr((unsigned char*)tmps + offset,
- (unsigned char*)tmps + biglen, little, 0)
- : rninstr(tmps, tmps + offset,
- tmps2, tmps2 + llen)))
+ if (!(little_p = is_index
+ ? fbm_instr((unsigned char*)big_p + offset,
+ (unsigned char*)big_p + biglen, little, 0)
+ : rninstr(big_p, big_p + offset,
+ little_p, little_p + llen)))
retval = -1;
else {
- retval = tmps2 - tmps;
+ retval = little_p - big_p;
if (retval > 0 && big_utf8)
sv_pos_b2u(big, &retval);
}
PP(pp_sprintf)
{
dVAR; dSP; dMARK; dORIGMARK; dTARGET;
+ if (SvTAINTED(MARK[1]))
+ TAINT_PROPER("sprintf");
do_sprintf(TARG, SP-MARK, MARK+1);
TAINT_IF(SvTAINTED(TARG));
SP = ORIGMARK;
PP(pp_ord)
{
dVAR; dSP; dTARGET;
+
SV *argsv = POPs;
STRLEN len;
const U8 *s = (U8*)SvPV_const(argsv, len);
- SV *tmpsv;
if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
- tmpsv = sv_2mortal(newSVsv(argsv));
+ SV * const tmpsv = sv_2mortal(newSVsv(argsv));
s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
argsv = tmpsv;
}
*tmps++ = (char)value;
*tmps = '\0';
(void)SvPOK_only(TARG);
+
if (PL_encoding && !IN_BYTES) {
sv_recode_to_utf8(TARG, PL_encoding);
tmps = SvPVX(TARG);
if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
- memEQ(tmps, "\xef\xbf\xbd\0", 4)) {
- SvGROW(TARG, 3);
+ UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
+ SvGROW(TARG, 2);
tmps = SvPVX(TARG);
- SvCUR_set(TARG, 2);
- *tmps++ = (U8)UTF8_EIGHT_BIT_HI(value);
- *tmps++ = (U8)UTF8_EIGHT_BIT_LO(value);
+ SvCUR_set(TARG, 1);
+ *tmps++ = (char)value;
*tmps = '\0';
- SvUTF8_on(TARG);
+ SvUTF8_off(TARG);
}
}
+
XPUSHs(TARG);
RETURN;
}
{
dVAR;
dSP;
- SV *sv = TOPs;
- const U8 *s;
+ SV *source = TOPs;
STRLEN slen;
+ STRLEN need;
+ SV *dest;
+ bool inplace = TRUE;
+ bool doing_utf8;
const int op_type = PL_op->op_type;
+ const U8 *s;
+ U8 *d;
+ U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+ STRLEN ulen;
+ STRLEN tculen;
+
+ SvGETMAGIC(source);
+ if (SvOK(source)) {
+ s = (const U8*)SvPV_nomg_const(source, slen);
+ } else {
+ s = (const U8*)"";
+ slen = 0;
+ }
- SvGETMAGIC(sv);
- if (DO_UTF8(sv) &&
- (s = (const U8*)SvPV_nomg_const(sv, slen)) && slen &&
- UTF8_IS_START(*s)) {
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- STRLEN ulen;
- STRLEN tculen;
-
+ if (slen && DO_UTF8(source) && UTF8_IS_START(*s)) {
+ doing_utf8 = TRUE;
utf8_to_uvchr(s, &ulen);
if (op_type == OP_UCFIRST) {
toTITLE_utf8(s, tmpbuf, &tculen);
} else {
toLOWER_utf8(s, tmpbuf, &tculen);
}
+ /* If the two differ, we definately cannot do inplace. */
+ inplace = (ulen == tculen);
+ need = slen + 1 - ulen + tculen;
+ } else {
+ doing_utf8 = FALSE;
+ need = slen + 1;
+ }
+
+ if (SvPADTMP(source) && !SvREADONLY(source) && inplace) {
+ /* We can convert in place. */
+
+ dest = source;
+ s = d = (U8*)SvPV_force_nomg(source, slen);
+ } else {
+ dTARGET;
+
+ dest = TARG;
+
+ SvUPGRADE(dest, SVt_PV);
+ d = (U8*)SvGROW(dest, need);
+ (void)SvPOK_only(dest);
+
+ SETs(dest);
- if (!SvPADTMP(sv) || SvREADONLY(sv) || ulen != tculen) {
- dTARGET;
+ inplace = FALSE;
+ }
+
+ if (doing_utf8) {
+ if(!inplace) {
/* slen is the byte length of the whole SV.
* ulen is the byte length of the original Unicode character
* stored as UTF-8 at s.
* lowercased) Unicode character stored as UTF-8 at tmpbuf.
* We first set the result to be the titlecased (/lowercased)
* character, and then append the rest of the SV data. */
- sv_setpvn(TARG, (char*)tmpbuf, tculen);
+ sv_setpvn(dest, (char*)tmpbuf, tculen);
if (slen > ulen)
- sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
- SvUTF8_on(TARG);
- sv = TARG;
- SETs(sv);
+ sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
+ SvUTF8_on(dest);
}
else {
- s = (U8*)SvPV_force_nomg(sv, slen);
- Copy(tmpbuf, s, tculen, U8);
+ Copy(tmpbuf, d, tculen, U8);
+ SvCUR_set(dest, need - 1);
}
}
else {
- U8 *s1;
- if (!SvPADTMP(sv) || SvREADONLY(sv)) {
- dTARGET;
- SvUTF8_off(TARG); /* decontaminate */
- sv_setsv_nomg(TARG, sv);
- sv = TARG;
- SETs(sv);
- }
- s1 = (U8*)SvPV_force_nomg(sv, slen);
- if (*s1) {
+ if (*s) {
if (IN_LOCALE_RUNTIME) {
TAINT;
- SvTAINTED_on(sv);
- *s1 = (op_type == OP_UCFIRST)
- ? toUPPER_LC(*s1) : toLOWER_LC(*s1);
+ SvTAINTED_on(dest);
+ *d = (op_type == OP_UCFIRST)
+ ? toUPPER_LC(*s) : toLOWER_LC(*s);
}
else
- *s1 = (op_type == OP_UCFIRST) ? toUPPER(*s1) : toLOWER(*s1);
+ *d = (op_type == OP_UCFIRST) ? toUPPER(*s) : toLOWER(*s);
+ } else {
+ /* See bug #39028 */
+ *d = *s;
+ }
+
+ if (SvUTF8(source))
+ SvUTF8_on(dest);
+
+ if (!inplace) {
+ /* This will copy the trailing NUL */
+ Copy(s + 1, d + 1, slen, U8);
+ SvCUR_set(dest, need - 1);
}
}
- SvSETMAGIC(sv);
+ SvSETMAGIC(dest);
RETURN;
}
+/* There's so much setup/teardown code common between uc and lc, I wonder if
+ it would be worth merging the two, and just having a switch outside each
+ of the three tight loops. */
PP(pp_uc)
{
dVAR;
dSP;
- SV *sv = TOPs;
+ SV *source = TOPs;
STRLEN len;
+ STRLEN min;
+ SV *dest;
+ const U8 *s;
+ U8 *d;
- SvGETMAGIC(sv);
- if (DO_UTF8(sv)) {
+ SvGETMAGIC(source);
+
+ if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
+ && !DO_UTF8(source)) {
+ /* We can convert in place. */
+
+ dest = source;
+ s = d = (U8*)SvPV_force_nomg(source, len);
+ min = len + 1;
+ } else {
dTARGET;
- STRLEN ulen;
- register U8 *d;
- const U8 *s;
- const U8 *send;
- U8 tmpbuf[UTF8_MAXBYTES+1];
- s = (const U8*)SvPV_nomg_const(sv,len);
- if (!len) {
- SvUTF8_off(TARG); /* decontaminate */
- sv_setpvn(TARG, "", 0);
- sv = TARG;
- SETs(sv);
- }
- else {
- STRLEN min = len + 1;
+ dest = TARG;
- SvUPGRADE(TARG, SVt_PV);
- SvGROW(TARG, min);
- (void)SvPOK_only(TARG);
- d = (U8*)SvPVX(TARG);
- send = s + len;
- while (s < send) {
- STRLEN u = UTF8SKIP(s);
-
- toUPPER_utf8(s, tmpbuf, &ulen);
- if (ulen > u && (SvLEN(TARG) < (min += ulen - u))) {
- /* If the eventually required minimum size outgrows
- * the available space, we need to grow. */
- const UV o = d - (U8*)SvPVX_const(TARG);
-
- /* If someone uppercases one million U+03B0s we
- * SvGROW() one million times. Or we could try
- * guessing how much to allocate without allocating
- * too much. Such is life. */
- SvGROW(TARG, min);
- d = (U8*)SvPVX(TARG) + o;
- }
- Copy(tmpbuf, d, ulen, U8);
- d += ulen;
- s += u;
- }
- *d = '\0';
- SvUTF8_on(TARG);
- SvCUR_set(TARG, d - (U8*)SvPVX_const(TARG));
- sv = TARG;
- SETs(sv);
+ /* The old implementation would copy source into TARG at this point.
+ This had the side effect that if source was undef, TARG was now
+ an undefined SV with PADTMP set, and they don't warn inside
+ sv_2pv_flags(). However, we're now getting the PV direct from
+ source, which doesn't have PADTMP set, so it would warn. Hence the
+ little games. */
+
+ if (SvOK(source)) {
+ s = (const U8*)SvPV_nomg_const(source, len);
+ } else {
+ s = (const U8*)"";
+ len = 0;
}
+ min = len + 1;
+
+ SvUPGRADE(dest, SVt_PV);
+ d = (U8*)SvGROW(dest, min);
+ (void)SvPOK_only(dest);
+
+ SETs(dest);
}
- else {
- U8 *s;
- if (!SvPADTMP(sv) || SvREADONLY(sv)) {
- dTARGET;
- SvUTF8_off(TARG); /* decontaminate */
- sv_setsv_nomg(TARG, sv);
- sv = TARG;
- SETs(sv);
+
+ /* Overloaded values may have toggled the UTF-8 flag on source, so we need
+ to check DO_UTF8 again here. */
+
+ if (DO_UTF8(source)) {
+ const U8 *const send = s + len;
+ U8 tmpbuf[UTF8_MAXBYTES+1];
+
+ while (s < send) {
+ const STRLEN u = UTF8SKIP(s);
+ STRLEN ulen;
+
+ toUPPER_utf8(s, tmpbuf, &ulen);
+ if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
+ /* If the eventually required minimum size outgrows
+ * the available space, we need to grow. */
+ const UV o = d - (U8*)SvPVX_const(dest);
+
+ /* If someone uppercases one million U+03B0s we SvGROW() one
+ * million times. Or we could try guessing how much to
+ allocate without allocating too much. Such is life. */
+ SvGROW(dest, min);
+ d = (U8*)SvPVX(dest) + o;
+ }
+ Copy(tmpbuf, d, ulen, U8);
+ d += ulen;
+ s += u;
}
- s = (U8*)SvPV_force_nomg(sv, len);
+ SvUTF8_on(dest);
+ *d = '\0';
+ SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+ } else {
if (len) {
- register const U8 *send = s + len;
-
+ const U8 *const send = s + len;
if (IN_LOCALE_RUNTIME) {
TAINT;
- SvTAINTED_on(sv);
- for (; s < send; s++)
- *s = toUPPER_LC(*s);
+ SvTAINTED_on(dest);
+ for (; s < send; d++, s++)
+ *d = toUPPER_LC(*s);
}
else {
- for (; s < send; s++)
- *s = toUPPER(*s);
+ for (; s < send; d++, s++)
+ *d = toUPPER(*s);
}
}
+ if (source != dest) {
+ *d = '\0';
+ SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+ }
}
- SvSETMAGIC(sv);
+ SvSETMAGIC(dest);
RETURN;
}
{
dVAR;
dSP;
- SV *sv = TOPs;
+ SV *source = TOPs;
STRLEN len;
+ STRLEN min;
+ SV *dest;
+ const U8 *s;
+ U8 *d;
- SvGETMAGIC(sv);
- if (DO_UTF8(sv)) {
+ SvGETMAGIC(source);
+
+ if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
+ && !DO_UTF8(source)) {
+ /* We can convert in place. */
+
+ dest = source;
+ s = d = (U8*)SvPV_force_nomg(source, len);
+ min = len + 1;
+ } else {
dTARGET;
- const U8 *s;
- STRLEN ulen;
- register U8 *d;
- const U8 *send;
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- s = (const U8*)SvPV_nomg_const(sv,len);
- if (!len) {
- SvUTF8_off(TARG); /* decontaminate */
- sv_setpvn(TARG, "", 0);
- sv = TARG;
- SETs(sv);
+ dest = TARG;
+
+ /* The old implementation would copy source into TARG at this point.
+ This had the side effect that if source was undef, TARG was now
+ an undefined SV with PADTMP set, and they don't warn inside
+ sv_2pv_flags(). However, we're now getting the PV direct from
+ source, which doesn't have PADTMP set, so it would warn. Hence the
+ little games. */
+
+ if (SvOK(source)) {
+ s = (const U8*)SvPV_nomg_const(source, len);
+ } else {
+ s = (const U8*)"";
+ len = 0;
}
- else {
- STRLEN min = len + 1;
+ min = len + 1;
- SvUPGRADE(TARG, SVt_PV);
- SvGROW(TARG, min);
- (void)SvPOK_only(TARG);
- d = (U8*)SvPVX(TARG);
- send = s + len;
- while (s < send) {
- const STRLEN u = UTF8SKIP(s);
- const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
+ SvUPGRADE(dest, SVt_PV);
+ d = (U8*)SvGROW(dest, min);
+ (void)SvPOK_only(dest);
+
+ SETs(dest);
+ }
+
+ /* Overloaded values may have toggled the UTF-8 flag on source, so we need
+ to check DO_UTF8 again here. */
+
+ if (DO_UTF8(source)) {
+ const U8 *const send = s + len;
+ U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+
+ while (s < send) {
+ const STRLEN u = UTF8SKIP(s);
+ STRLEN ulen;
+ const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
#define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 /* Unicode U+03A3 */
- if (uv == GREEK_CAPITAL_LETTER_SIGMA) {
- /*EMPTY*/
- /*
- * Now if the sigma is NOT followed by
- * /$ignorable_sequence$cased_letter/;
- * and it IS preceded by
- * /$cased_letter$ignorable_sequence/;
- * where $ignorable_sequence is
- * [\x{2010}\x{AD}\p{Mn}]*
- * and $cased_letter is
- * [\p{Ll}\p{Lo}\p{Lt}]
- * then it should be mapped to 0x03C2,
- * (GREEK SMALL LETTER FINAL SIGMA),
- * instead of staying 0x03A3.
- * "should be": in other words,
- * this is not implemented yet.
- * See lib/unicore/SpecialCasing.txt.
- */
- }
- if (ulen > u && (SvLEN(TARG) < (min += ulen - u))) {
- /* If the eventually required minimum size outgrows
- * the available space, we need to grow. */
- const UV o = d - (U8*)SvPVX_const(TARG);
-
- /* If someone lowercases one million U+0130s we
- * SvGROW() one million times. Or we could try
- * guessing how much to allocate without allocating.
- * too much. Such is life. */
- SvGROW(TARG, min);
- d = (U8*)SvPVX(TARG) + o;
- }
- Copy(tmpbuf, d, ulen, U8);
- d += ulen;
- s += u;
+ if (uv == GREEK_CAPITAL_LETTER_SIGMA) {
+ NOOP;
+ /*
+ * Now if the sigma is NOT followed by
+ * /$ignorable_sequence$cased_letter/;
+ * and it IS preceded by /$cased_letter$ignorable_sequence/;
+ * where $ignorable_sequence is [\x{2010}\x{AD}\p{Mn}]*
+ * and $cased_letter is [\p{Ll}\p{Lo}\p{Lt}]
+ * then it should be mapped to 0x03C2,
+ * (GREEK SMALL LETTER FINAL SIGMA),
+ * instead of staying 0x03A3.
+ * "should be": in other words, this is not implemented yet.
+ * See lib/unicore/SpecialCasing.txt.
+ */
}
- *d = '\0';
- SvUTF8_on(TARG);
- SvCUR_set(TARG, d - (U8*)SvPVX_const(TARG));
- sv = TARG;
- SETs(sv);
- }
- }
- else {
- U8 *s;
- if (!SvPADTMP(sv) || SvREADONLY(sv)) {
- dTARGET;
- SvUTF8_off(TARG); /* decontaminate */
- sv_setsv_nomg(TARG, sv);
- sv = TARG;
- SETs(sv);
+ if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
+ /* If the eventually required minimum size outgrows
+ * the available space, we need to grow. */
+ const UV o = d - (U8*)SvPVX_const(dest);
+
+ /* If someone lowercases one million U+0130s we SvGROW() one
+ * million times. Or we could try guessing how much to
+ allocate without allocating too much. Such is life. */
+ SvGROW(dest, min);
+ d = (U8*)SvPVX(dest) + o;
+ }
+ Copy(tmpbuf, d, ulen, U8);
+ d += ulen;
+ s += u;
}
-
- s = (U8*)SvPV_force_nomg(sv, len);
+ SvUTF8_on(dest);
+ *d = '\0';
+ SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+ } else {
if (len) {
- register const U8 * const send = s + len;
-
+ const U8 *const send = s + len;
if (IN_LOCALE_RUNTIME) {
TAINT;
- SvTAINTED_on(sv);
- for (; s < send; s++)
- *s = toLOWER_LC(*s);
+ SvTAINTED_on(dest);
+ for (; s < send; d++, s++)
+ *d = toLOWER_LC(*s);
}
else {
- for (; s < send; s++)
- *s = toLOWER(*s);
+ for (; s < send; d++, s++)
+ *d = toLOWER(*s);
}
}
+ if (source != dest) {
+ *d = '\0';
+ SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+ }
}
- SvSETMAGIC(sv);
+ SvSETMAGIC(dest);
RETURN;
}
{
dVAR;
dSP;
- HV * const hash = (HV*)POPs;
+ HV * hash = (HV*)POPs;
HE *entry;
const I32 gimme = GIMME_V;
if (lval) {
if (!svp || *svp == &PL_sv_undef) {
- DIE(aTHX_ PL_no_helem_sv, keysv);
+ DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
}
if (localizing) {
if (HvNAME_get(hv) && isGV(*svp))
STRLEN keylen;
const char * const key = SvPV_const(keysv, keylen);
SAVEDELETE(hv, savepvn(key,keylen),
- SvUTF8(keysv) ? -keylen : keylen);
+ SvUTF8(keysv) ? -(I32)keylen : (I32)keylen);
}
}
}
SV ** const firstlelem = PL_stack_base + POPMARK + 1;
register SV ** const firstrelem = lastlelem + 1;
const I32 arybase = CopARYBASE_get(PL_curcop);
- I32 is_something_there = PL_op->op_flags & OPf_MOD;
+ I32 is_something_there = FALSE;
register const I32 max = lastrelem - lastlelem;
register SV **lelem;
{
dVAR; dSP; dMARK; dORIGMARK;
const I32 items = SP - MARK;
- SV * const av = sv_2mortal((SV*)av_make(items, MARK+1));
+ SV * const av = (SV *) av_make(items, MARK+1);
SP = ORIGMARK; /* av_make() might realloc stack_sp */
- XPUSHs(av);
+ XPUSHs(sv_2mortal((PL_op->op_flags & OPf_SPECIAL)
+ ? newRV_noinc(av) : av));
RETURN;
}
PP(pp_anonhash)
{
dVAR; dSP; dMARK; dORIGMARK;
- HV* const hv = (HV*)sv_2mortal((SV*)newHV());
+ HV* const hv = newHV();
while (MARK < SP) {
SV * const key = *++MARK;
(void)hv_store_ent(hv,key,val,0);
}
SP = ORIGMARK;
- XPUSHs((SV*)hv);
+ XPUSHs(sv_2mortal((PL_op->op_flags & OPf_SPECIAL)
+ ? newRV_noinc((SV*) hv) : (SV*)hv));
RETURN;
}
}
if (diff < 0) { /* shrinking the area */
- SV **tmparyval;
+ SV **tmparyval = NULL;
if (newlen) {
Newx(tmparyval, newlen, SV*); /* so remember insertion */
Copy(MARK, tmparyval, newlen, SV*);
*dst-- = *src--;
}
dst = AvARRAY(ary);
- SvPV_set(ary, (char*)(AvARRAY(ary) - diff)); /* diff is negative */
+ AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
AvMAX(ary) += diff;
}
else {
dst = src - diff;
Move(src, dst, offset, SV*);
}
- SvPV_set(ary, (char*)(AvARRAY(ary) - diff));/* diff is positive */
+ AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
AvMAX(ary) += diff;
AvFILLp(ary) += diff;
}
PP(pp_push)
{
dVAR; dSP; dMARK; dORIGMARK; dTARGET;
- register AV *ary = (AV*)*++MARK;
+ register AV * const ary = (AV*)*++MARK;
const MAGIC * const mg = SvTIED_mg((SV*)ary, PERL_MAGIC_tied);
if (mg) {
register I32 tmp;
dTARGET;
STRLEN len;
- I32 padoff_du;
+ PADOFFSET padoff_du;
SvUTF8_off(TARG); /* decontaminate */
if (SP - MARK > 1)
register SV *dstr;
register const char *m;
I32 iters = 0;
- const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (strend - s);
+ const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
I32 maxiters = slen + 10;
const char *orig;
const I32 origlimit = limit;
base = SP - PL_stack_base;
orig = s;
if (pm->op_pmflags & PMf_SKIPWHITE) {
- if (pm->op_pmflags & PMf_LOCALE) {
+ if (do_utf8) {
+ while (*s == ' ' || is_utf8_space((U8*)s))
+ s += UTF8SKIP(s);
+ }
+ else if (pm->op_pmflags & PMf_LOCALE) {
while (isSPACE_LC(*s))
s++;
}
if (pm->op_pmflags & PMf_WHITE) {
while (--limit) {
m = s;
- while (m < strend &&
- !((pm->op_pmflags & PMf_LOCALE)
- ? isSPACE_LC(*m) : isSPACE(*m)))
- ++m;
+ /* this one uses 'm' and is a negative test */
+ if (do_utf8) {
+ while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
+ const int t = UTF8SKIP(m);
+ /* is_utf8_space returns FALSE for malform utf8 */
+ if (strend - m < t)
+ m = strend;
+ else
+ m += t;
+ }
+ } else if (pm->op_pmflags & PMf_LOCALE) {
+ while (m < strend && !isSPACE_LC(*m))
+ ++m;
+ } else {
+ while (m < strend && !isSPACE(*m))
+ ++m;
+ }
if (m >= strend)
break;
(void)SvUTF8_on(dstr);
XPUSHs(dstr);
- s = m + 1;
- while (s < strend &&
- ((pm->op_pmflags & PMf_LOCALE)
- ? isSPACE_LC(*s) : isSPACE(*s)))
- ++s;
+ /* skip the whitespace found last */
+ if (do_utf8)
+ s = m + UTF8SKIP(m);
+ else
+ s = m + 1;
+
+ /* this one uses 's' and is a positive test */
+ if (do_utf8) {
+ while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
+ s += UTF8SKIP(s);
+ } else if (pm->op_pmflags & PMf_LOCALE) {
+ while (s < strend && isSPACE_LC(*s))
+ ++s;
+ } else {
+ while (s < strend && isSPACE(*s))
+ ++s;
+ }
}
}
- else if (rx->precomp[0] == '^' && rx->precomp[1] == '\0') {
+ else if (rx->extflags & RXf_START_ONLY) {
while (--limit) {
for (m = s; m < strend && *m != '\n'; m++)
;
s = m;
}
}
- else if (do_utf8 == ((rx->reganch & ROPT_UTF8) != 0) &&
- (rx->reganch & RE_USE_INTUIT) && !rx->nparens
- && (rx->reganch & ROPT_CHECK_ALL)
- && !(rx->reganch & ROPT_ANCH)) {
- const int tail = (rx->reganch & RE_INTUIT_TAIL);
- SV * const csv = CALLREG_INTUIT_STRING(aTHX_ rx);
+ else if (do_utf8 == ((rx->extflags & RXf_UTF8) != 0) &&
+ (rx->extflags & RXf_USE_INTUIT) && !rx->nparens
+ && (rx->extflags & RXf_CHECK_ALL)
+ && !(rx->extflags & RXf_ANCH)) {
+ const int tail = (rx->extflags & RXf_INTUIT_TAIL);
+ SV * const csv = CALLREG_INTUIT_STRING(rx);
- len = rx->minlen;
- if (len == 1 && !(rx->reganch & ROPT_UTF8) && !tail) {
+ len = rx->minlenret;
+ if (len == 1 && !(rx->extflags & RXf_UTF8) && !tail) {
const char c = *SvPV_nolen_const(csv);
while (--limit) {
for (m = s; m < strend && *m != c; m++)
{
I32 rex_return;
PUTBACK;
- rex_return = CALLREGEXEC(aTHX_ rx, (char*)s, (char*)strend, (char*)orig, 1 ,
+ rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
sv, NULL, 0);
SPAGAIN;
if (rex_return == 0)