Add possessive quantifiers to regex engine.
[p5sagit/p5-mst-13.2.git] / sv.c
diff --git a/sv.c b/sv.c
index d0cf661..7dd83cc 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -1851,6 +1851,7 @@ STATIC int
 S_sv_2iuv_non_preserve(pTHX_ register SV *sv, I32 numtype)
 {
     dVAR;
+    PERL_UNUSED_ARG(numtype); /* Used only under DEBUGGING? */
     DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%"NVgf" inttype=%"UVXf"\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
     if (SvNVX(sv) < (NV)IV_MIN) {
        (void)SvIOKp_on(sv);
@@ -2728,12 +2729,15 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
                }
            }
            {
-               SV *tsv;
+               STRLEN len;
+               char *retval;
+               char *buffer;
                MAGIC *mg;
                const SV *const referent = (SV*)SvRV(sv);
 
                if (!referent) {
-                   tsv = sv_2mortal(newSVpvs("NULLREF"));
+                   len = 7;
+                   retval = buffer = savepvn("NULLREF", len);
                } else if (SvTYPE(referent) == SVt_PVMG
                           && ((SvFLAGS(referent) &
                                (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG))
@@ -2742,21 +2746,66 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
                    return stringify_regexp(sv, mg, lp);
                } else {
                    const char *const typestr = sv_reftype(referent, 0);
+                   const STRLEN typelen = strlen(typestr);
+                   UV addr = PTR2UV(referent);
+                   const char *stashname = NULL;
+                   STRLEN stashnamelen = 0; /* hush, gcc */
+                   const char *buffer_end;
 
-                   tsv = sv_newmortal();
                    if (SvOBJECT(referent)) {
-                       const char *const name = HvNAME_get(SvSTASH(referent));
-                       Perl_sv_setpvf(aTHX_ tsv, "%s=%s(0x%"UVxf")",
-                                      name ? name : "__ANON__" , typestr,
-                                      PTR2UV(referent));
+                       const HEK *const name = HvNAME_HEK(SvSTASH(referent));
+
+                       if (name) {
+                           stashname = HEK_KEY(name);
+                           stashnamelen = HEK_LEN(name);
+
+                           if (HEK_UTF8(name)) {
+                               SvUTF8_on(sv);
+                           } else {
+                               SvUTF8_off(sv);
+                           }
+                       } else {
+                           stashname = "__ANON__";
+                           stashnamelen = 8;
+                       }
+                       len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
+                           + 2 * sizeof(UV) + 2 /* )\0 */;
+                   } else {
+                       len = typelen + 3 /* (0x */
+                           + 2 * sizeof(UV) + 2 /* )\0 */;
                    }
-                   else
-                       Perl_sv_setpvf(aTHX_ tsv, "%s(0x%"UVxf")", typestr,
-                                      PTR2UV(referent));
+
+                   Newx(buffer, len, char);
+                   buffer_end = retval = buffer + len;
+
+                   /* Working backwards  */
+                   *--retval = '\0';
+                   *--retval = ')';
+                   do {
+                       *--retval = PL_hexdigit[addr & 15];
+                   } while (addr >>= 4);
+                   *--retval = 'x';
+                   *--retval = '0';
+                   *--retval = '(';
+
+                   retval -= typelen;
+                   memcpy(retval, typestr, typelen);
+
+                   if (stashname) {
+                       *--retval = '=';
+                       retval -= stashnamelen;
+                       memcpy(retval, stashname, stashnamelen);
+                   }
+                   /* retval may not neccesarily have reached the start of the
+                      buffer here.  */
+                   assert (retval >= buffer);
+
+                   len = buffer_end - retval - 1; /* -1 for that \0  */
                }
                if (lp)
-                   *lp = SvCUR(tsv);
-               return SvPVX(tsv);
+                   *lp = len;
+               SAVEFREEPV(buffer);
+               return retval;
            }
        }
        if (SvREADONLY(sv) && !SvOK(sv)) {
@@ -3477,7 +3526,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags)
     case SVt_PVBM:
        if (SvGMAGICAL(sstr) && (flags & SV_GMAGIC)) {
            mg_get(sstr);
-           if ((int)SvTYPE(sstr) != stype) {
+           if (SvTYPE(sstr) != stype) {
                stype = SvTYPE(sstr);
                if (stype == SVt_PVGV && dtype <= SVt_PVGV) {
                    glob_assign_glob(dstr, sstr, dtype);
@@ -4483,6 +4532,9 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam
     case PERL_MAGIC_regdata:
        vtable = &PL_vtbl_regdata;
        break;
+    case PERL_MAGIC_regdata_names:
+       vtable = &PL_vtbl_regdata_names;
+       break;
     case PERL_MAGIC_regdatum:
        vtable = &PL_vtbl_regdatum;
        break;
@@ -7630,7 +7682,7 @@ Returns a string describing what the SV is a reference to.
 =cut
 */
 
-char *
+const char *
 Perl_sv_reftype(pTHX_ const SV *sv, int ob)
 {
     /* The fact that I don't need to downcast to char * everywhere, only in ?:
@@ -8867,13 +8919,17 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
            else {
                eptr = SvPVx_const(argsv, elen);
                if (DO_UTF8(argsv)) {
+                   I32 old_precis = precis;
                    if (has_precis && precis < elen) {
                        I32 p = precis;
                        sv_pos_u2b(argsv, &p, 0); /* sticks at end */
                        precis = p;
                    }
                    if (width) { /* fudge width (can't fudge elen) */
-                       width += elen - sv_len_utf8(argsv);
+                       if (has_precis && precis < elen)
+                           width += precis - old_precis;
+                       else
+                           width += elen - sv_len_utf8(argsv);
                    }
                    is_utf8 = TRUE;
                }
@@ -9038,8 +9094,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
                switch (base) {
                    unsigned dig;
                case 16:
-                   p = (char*)((c == 'X')
-                               ? "0123456789ABCDEF" : "0123456789abcdef");
+                   p = (char *)((c == 'X') ? PL_hexdigit + 16 : PL_hexdigit);
                    do {
                        dig = uv & 15;
                        *--ptr = p[dig];
@@ -9482,127 +9537,7 @@ ptr_table_* functions.
 REGEXP *
 Perl_re_dup(pTHX_ const REGEXP *r, CLONE_PARAMS *param)
 {
-    dVAR;
-    REGEXP *ret;
-    int i, len, npar;
-    struct reg_substr_datum *s;
-
-    if (!r)
-       return (REGEXP *)NULL;
-
-    if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r)))
-       return ret;
-
-    len = r->offsets[0];
-    npar = r->nparens+1;
-
-    Newxc(ret, sizeof(regexp) + (len+1)*sizeof(regnode), char, regexp);
-    Copy(r->program, ret->program, len+1, regnode);
-
-    Newx(ret->startp, npar, I32);
-    Copy(r->startp, ret->startp, npar, I32);
-    Newx(ret->endp, npar, I32);
-    Copy(r->startp, ret->startp, npar, I32);
-
-    Newx(ret->substrs, 1, struct reg_substr_data);
-    for (s = ret->substrs->data, i = 0; i < 3; i++, s++) {
-       s->min_offset = r->substrs->data[i].min_offset;
-       s->max_offset = r->substrs->data[i].max_offset;
-       s->end_shift  = r->substrs->data[i].end_shift;
-       s->substr     = sv_dup_inc(r->substrs->data[i].substr, param);
-       s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param);
-    }
-
-    ret->regstclass = NULL;
-    if (r->data) {
-       struct reg_data *d;
-        const int count = r->data->count;
-       int i;
-
-       Newxc(d, sizeof(struct reg_data) + count*sizeof(void *),
-               char, struct reg_data);
-       Newx(d->what, count, U8);
-
-       d->count = count;
-       for (i = 0; i < count; i++) {
-           d->what[i] = r->data->what[i];
-           switch (d->what[i]) {
-               /* legal options are one of: sfpont
-                  see also regcomp.h and pregfree() */
-           case 's':
-               d->data[i] = sv_dup_inc((SV *)r->data->data[i], param);
-               break;
-           case 'p':
-               d->data[i] = av_dup_inc((AV *)r->data->data[i], param);
-               break;
-           case 'f':
-               /* This is cheating. */
-               Newx(d->data[i], 1, struct regnode_charclass_class);
-               StructCopy(r->data->data[i], d->data[i],
-                           struct regnode_charclass_class);
-               ret->regstclass = (regnode*)d->data[i];
-               break;
-           case 'o':
-               /* Compiled op trees are readonly, and can thus be
-                  shared without duplication. */
-               OP_REFCNT_LOCK;
-               d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]);
-               OP_REFCNT_UNLOCK;
-               break;
-           case 'n':
-               d->data[i] = r->data->data[i];
-               break;
-           case 't':
-               d->data[i] = r->data->data[i];
-               OP_REFCNT_LOCK;
-               ((reg_trie_data*)d->data[i])->refcount++;
-               OP_REFCNT_UNLOCK;
-               break;
-           case 'T':
-               d->data[i] = r->data->data[i];
-               OP_REFCNT_LOCK;
-               ((reg_ac_data*)d->data[i])->refcount++;
-               OP_REFCNT_UNLOCK;
-               /* Trie stclasses are readonly and can thus be shared
-                * without duplication. We free the stclass in pregfree
-                * when the corresponding reg_ac_data struct is freed.
-                */
-               ret->regstclass= r->regstclass;
-               break;
-            default:
-               Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", r->data->what[i]);
-           }
-       }
-
-       ret->data = d;
-    }
-    else
-       ret->data = NULL;
-
-    Newx(ret->offsets, 2*len+1, U32);
-    Copy(r->offsets, ret->offsets, 2*len+1, U32);
-
-    ret->precomp        = SAVEPVN(r->precomp, r->prelen);
-    ret->refcnt         = r->refcnt;
-    ret->minlen         = r->minlen;
-    ret->prelen         = r->prelen;
-    ret->nparens        = r->nparens;
-    ret->lastparen      = r->lastparen;
-    ret->lastcloseparen = r->lastcloseparen;
-    ret->reganch        = r->reganch;
-
-    ret->sublen         = r->sublen;
-
-    if (RX_MATCH_COPIED(ret))
-       ret->subbeg  = SAVEPVN(r->subbeg, r->sublen);
-    else
-       ret->subbeg = NULL;
-#ifdef PERL_OLD_COPY_ON_WRITE
-    ret->saved_copy = NULL;
-#endif
-
-    ptr_table_store(PL_ptr_table, r, ret);
-    return ret;
+    return CALLREGDUPE(r,param);
 }
 
 /* duplicate a file handle */
@@ -10309,9 +10244,7 @@ Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max, CLONE_PARAMS* param)
            case CXt_LOOP:
                ncx->blk_loop.label     = cx->blk_loop.label;
                ncx->blk_loop.resetsp   = cx->blk_loop.resetsp;
-               ncx->blk_loop.redo_op   = cx->blk_loop.redo_op;
-               ncx->blk_loop.next_op   = cx->blk_loop.next_op;
-               ncx->blk_loop.last_op   = cx->blk_loop.last_op;
+               ncx->blk_loop.my_op     = cx->blk_loop.my_op;
                ncx->blk_loop.iterdata  = (CxPADLOOP(cx)
                                           ? cx->blk_loop.iterdata
                                           : gv_dup((GV*)cx->blk_loop.iterdata, param));
@@ -11061,6 +10994,12 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     sv_setpvn(PERL_DEBUG_PAD(1), "", 0);       /* ext/re needs these */
     sv_setpvn(PERL_DEBUG_PAD(2), "", 0);       /* even without DEBUGGING. */
 
+   
+    /* RE engine related */
+    Zero(&PL_reg_state, 1, struct re_save_state);
+    PL_reginterp_cnt   = 0;
+    PL_regmatch_slab   = NULL;
+    
     /* Clone the regex array */
     PL_regex_padav = newAV();
     {
@@ -11559,15 +11498,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_colorset                = 0;            /* reinits PL_colors[] */
     /*PL_colors[6]     = {0,0,0,0,0,0};*/
 
-    /* RE engine - function pointers */
-    PL_regcompp                = proto_perl->Tregcompp;
-    PL_regexecp                = proto_perl->Tregexecp;
-    PL_regint_start    = proto_perl->Tregint_start;
-    PL_regint_string   = proto_perl->Tregint_string;
-    PL_regfree         = proto_perl->Tregfree;
-    Zero(&PL_reg_state, 1, struct re_save_state);
-    PL_reginterp_cnt   = 0;
-    PL_regmatch_slab   = NULL;
+
 
     /* Pluggable optimizer */
     PL_peepp           = proto_perl->Tpeepp;