Re: [PATCH] Lighten up glob
[p5sagit/p5-mst-13.2.git] / op.c
diff --git a/op.c b/op.c
index 5623e14..59cbfa3 100644 (file)
--- a/op.c
+++ b/op.c
@@ -103,30 +103,6 @@ S_no_bareword_allowed(pTHX_ OP *o)
                     SvPV_nolen(cSVOPo_sv)));
 }
 
-STATIC U8*
-S_trlist_upgrade(pTHX_ U8** sp, U8** ep)
-{
-    U8 *s = *sp;
-    U8 *e = *ep;
-    U8 *d;
-
-    Newz(801, d, (e - s) * 2, U8);
-    *sp = d;
-
-    while (s < e) {
-        if (*s < 0x80 || *s == 0xff)
-            *d++ = *s++;
-       else {
-            U8 c = *s++;
-            *d++ = ((c >> 6)         | 0xc0);
-            *d++ = ((c       & 0x3f) | 0x80);
-        }
-    }
-    *ep = d;
-    return *sp;
-}
-
-
 /* "register" allocation */
 
 PADOFFSET
@@ -845,7 +821,7 @@ S_op_clear(pTHX_ OP *o)
 clear_pmop:
        {
            HV *pmstash = PmopSTASH(cPMOPo);
-           if (pmstash) {
+           if (pmstash && SvREFCNT(pmstash)) {
                PMOP *pmop = HvPMROOT(pmstash);
                PMOP *lastpmop = NULL;
                while (pmop) {
@@ -977,8 +953,6 @@ Perl_scalar(pTHX_ OP *o)
 
     switch (o->op_type) {
     case OP_REPEAT:
-       if (o->op_private & OPpREPEAT_DOLIST)
-           null(((LISTOP*)cBINOPo->op_first)->op_first);
        scalar(cBINOPo->op_first);
        break;
     case OP_OR:
@@ -2650,15 +2624,16 @@ Perl_newBINOP(pTHX_ I32 type, I32 flags, OP *first, OP *last)
 }
 
 static int
-utf8compare(const void *a, const void *b)
-{
-    int i;
-    for (i = 0; i < 10; i++) {
-       if ((*(U8**)a)[i] < (*(U8**)b)[i])
-           return -1;
-       if ((*(U8**)a)[i] > (*(U8**)b)[i])
-           return 1;
-    }
+uvcompare(const void *a, const void *b)
+{
+    if (*((UV *)a) < (*(UV *)b))
+       return -1;
+    if (*((UV *)a) > (*(UV *)b))
+       return 1;
+    if (*((UV *)a+1) < (*(UV *)b+1))
+       return -1;
+    if (*((UV *)a+1) > (*(UV *)b+1))
+       return 1;
     return 0;
 }
 
@@ -2709,50 +2684,71 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
        U32 final;
        I32 from_utf    = o->op_private & OPpTRANS_FROM_UTF;
        I32 to_utf      = o->op_private & OPpTRANS_TO_UTF;
-       U8* tsave = from_utf ? NULL : trlist_upgrade(&t, &tend);
-       U8* rsave = to_utf   ? NULL : trlist_upgrade(&r, &rend);
+       U8* tsave = NULL;
+       U8* rsave = NULL;
+
+       if (!from_utf) {
+           STRLEN len = tlen;
+           tsave = t = bytes_to_utf8(t, &len);
+           tend = t + len;
+       }
+       if (!to_utf && rlen) {
+           STRLEN len = rlen;
+           rsave = r = bytes_to_utf8(r, &len);
+           rend = r + len;
+       }
+
+/* There are several snags with this code on EBCDIC:
+   1. 0xFF is a legal UTF-EBCDIC byte (there are no illegal bytes).
+   2. scan_const() in toke.c has encoded chars in native encoding which makes
+      ranges at least in EBCDIC 0..255 range the bottom odd.
+*/
 
        if (complement) {
            U8 tmpbuf[UTF8_MAXLEN+1];
-           U8** cp;
+           UV *cp;
            UV nextmin = 0;
-           New(1109, cp, tlen, U8*);
+           New(1109, cp, 2*tlen, UV);
            i = 0;
            transv = newSVpvn("",0);
            while (t < tend) {
-               cp[i++] = t;
-               t += UTF8SKIP(t);
-               if (t < tend && *t == 0xff) {
+               cp[2*i] = utf8n_to_uvuni(t, tend-t, &ulen, 0);
+               t += ulen;
+               if (t < tend && NATIVE_TO_UTF(*t) == 0xff) {
                    t++;
-                   t += UTF8SKIP(t);
+                   cp[2*i+1] = utf8n_to_uvuni(t, tend-t, &ulen, 0);
+                   t += ulen;
+               }
+               else {
+                cp[2*i+1] = cp[2*i];
                }
+               i++;
            }
-           qsort(cp, i, sizeof(U8*), utf8compare);
+           qsort(cp, i, 2*sizeof(UV), uvcompare);
            for (j = 0; j < i; j++) {
-               U8 *s = cp[j];
-               I32 cur = j < i - 1 ? cp[j+1] - s : tend - s;
-               /* CHECKME: Use unicode code points for ranges - needs more thought ... NI-S */
-               UV  val = utf8n_to_uvuni(s, cur, &ulen, 0);
-               s += ulen;
+               UV  val = cp[2*j];
                diff = val - nextmin;
                if (diff > 0) {
                    t = uvuni_to_utf8(tmpbuf,nextmin);
                    sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
                    if (diff > 1) {
+                       U8  range_mark = UTF_TO_NATIVE(0xff);
                        t = uvuni_to_utf8(tmpbuf, val - 1);
-                       sv_catpvn(transv, "\377", 1);
+                       sv_catpvn(transv, (char *)&range_mark, 1);
                        sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
                    }
                }
-               if (s < tend && *s == 0xff)
-                   val = utf8n_to_uvuni(s+1, cur - 1, &ulen, 0);
+               val = cp[2*j+1];
                if (val >= nextmin)
                    nextmin = val + 1;
            }
            t = uvuni_to_utf8(tmpbuf,nextmin);
            sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
+           {
+               U8 range_mark = UTF_TO_NATIVE(0xff);
+               sv_catpvn(transv, (char *)&range_mark, 1);
+           }
            t = uvuni_to_utf8(tmpbuf, 0x7fffffff);
-           sv_catpvn(transv, "\377", 1);
            sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
            t = (U8*)SvPVX(transv);
            tlen = SvCUR(transv);
@@ -2763,7 +2759,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
            r = t; rlen = tlen; rend = tend;
        }
        if (!squash) {
-               if (t == r ||
+               if ((!rlen && !del) || t == r ||
                    (tlen == rlen && memEQ((char *)t, (char *)r, tlen)))
                {
                    o->op_private |= OPpTRANS_IDENTICAL;
@@ -2775,7 +2771,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
            if (tfirst > tlast) {
                tfirst = (I32)utf8n_to_uvuni(t, tend - t, &ulen, 0);
                t += ulen;
-               if (t < tend && *t == 0xff) {   /* illegal utf8 val indicates range */
+               if (t < tend && NATIVE_TO_UTF(*t) == 0xff) {    /* illegal utf8 val indicates range */
                    t++;
                    tlast = (I32)utf8n_to_uvuni(t, tend - t, &ulen, 0);
                    t += ulen;
@@ -2789,7 +2785,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                if (r < rend) {
                    rfirst = (I32)utf8n_to_uvuni(r, rend - r, &ulen, 0);
                    r += ulen;
-                   if (r < rend && *r == 0xff) {       /* illegal utf8 val indicates range */
+                   if (r < rend && NATIVE_TO_UTF(*r) == 0xff) {        /* illegal utf8 val indicates range */
                        r++;
                        rlast = (I32)utf8n_to_uvuni(r, rend - r, &ulen, 0);
                        r += ulen;
@@ -2832,9 +2828,10 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
 
                if (rfirst + diff > max)
                    max = rfirst + diff;
-               rfirst += diff + 1;
                if (!grows)
-                   grows = (UNISKIP(tfirst) < UNISKIP(rfirst));
+                   grows = (tfirst < rfirst &&
+                            UNISKIP(tfirst) < UNISKIP(rfirst + diff));
+               rfirst += diff + 1;
            }
            tfirst += diff + 1;
        }
@@ -2856,7 +2853,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
        if (transv)
            SvREFCNT_dec(transv);
 
-       if (!del && havefinal)
+       if (!del && havefinal && rlen)
            (void)hv_store((HV*)SvRV((cSVOPo->op_sv)), "FINAL", 5,
                           newSVuv((UV)final), 0);
 
@@ -2896,7 +2893,12 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
            }
        }
        if (!del) {
-           if (j >= rlen)
+           if (!rlen) {
+               j = rlen;
+               if (!squash)
+                   o->op_private |= OPpTRANS_IDENTICAL;
+           }
+           else if (j >= rlen)
                j = rlen - 1;
            else
                cPVOPo->op_pv = (char*)Renew(tbl, 0x101+rlen-j, short);
@@ -3304,6 +3306,20 @@ Perl_utilize(pTHX_ int aver, I32 floor, OP *version, OP *id, OP *arg)
     PL_expect = XSTATE;
 }
 
+/*
+=for apidoc load_module
+
+Loads the module whose name is pointed to by the string part of name.
+Note that the actual module name, not its filename, should be given.
+Eg, "Foo::Bar" instead of "Foo/Bar.pm".  flags can be any of
+PERL_LOADMOD_DENY, PERL_LOADMOD_NOIMPORT, or PERL_LOADMOD_IMPORT_OPS
+(or 0 for no flags). ver, if specified, provides version semantics
+similar to C<use Foo::Bar VERSION>.  The optional trailing SV*
+arguments can be used to specify arguments to the module's import()
+method, similar to C<use Foo::Bar VERSION LIST>.
+
+=cut */
+
 void
 Perl_load_module(pTHX_ U32 flags, SV *name, SV *ver, ...)
 {
@@ -5677,6 +5693,12 @@ Perl_ck_fun(pTHX_ OP *o)
                    list(kid);
                break;
            case OA_AVREF:
+               if ((type == OP_PUSH || type == OP_UNSHIFT)
+                   && !kid->op_sibling && ckWARN(WARN_SYNTAX))
+                   Perl_warner(aTHX_ WARN_SYNTAX,
+                       "Useless use of %s with no values",
+                       PL_op_desc[type]);
+                   
                if (kid->op_type == OP_CONST &&
                    (kid->op_private & OPpCONST_BARE))
                {
@@ -5840,11 +5862,14 @@ Perl_ck_glob(pTHX_ OP *o)
 #if !defined(PERL_EXTERNAL_GLOB)
     /* XXX this can be tightened up and made more failsafe. */
     if (!gv) {
+       GV *glob_gv;
        ENTER;
-       Perl_load_module(aTHX_ 0, newSVpvn("File::Glob", 10), Nullsv,
-                        /* null-terminated import list */
-                        newSVpvn(":globally", 9), Nullsv);
+       Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvn("File::Glob", 10), Nullsv,
+                        Nullsv, Nullsv);
        gv = gv_fetchpv("CORE::GLOBAL::glob", FALSE, SVt_PVCV);
+       glob_gv = gv_fetchpv("File::Glob::csh_glob", FALSE, SVt_PVCV);
+       GvCV(gv) = GvCV(glob_gv);
+       GvIMPORTED_CV_on(gv);
        LEAVE;
     }
 #endif /* PERL_EXTERNAL_GLOB */
@@ -6839,6 +6864,7 @@ Perl_peep(pTHX_ register OP *o)
            break;
 
        case OP_ENTERLOOP:
+       case OP_ENTERITER:
            o->op_seq = PL_op_seqmax++;
            while (cLOOP->op_redoop->op_type == OP_NULL)
                cLOOP->op_redoop = cLOOP->op_redoop->op_next;
@@ -7034,3 +7060,4 @@ const_sv_xsub(pTHXo_ CV* cv)
     ST(0) = (SV*)XSANY.any_ptr;
     XSRETURN(1);
 }
+