Integrate mainline (for regexp stuff).
[p5sagit/p5-mst-13.2.git] / toke.c
diff --git a/toke.c b/toke.c
index 33915ed..0d4fc1d 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -179,7 +179,7 @@ int yyactlevel = -1;
 
 STATIC void
 S_tokereport(pTHX_ char *thing, char* s, I32 rv)
-{ 
+{
     SV *report;
     DEBUG_T({
         report = newSVpv(thing, 0);
@@ -838,7 +838,7 @@ Perl_str_to_version(pTHX_ SV *sv)
        STRLEN skip;
        UV n;
        if (utf)
-           n = utf8_to_uv((U8*)start, len, &skip, 0);
+           n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
        else {
            n = *(U8*)start;
            skip = 1;
@@ -1240,6 +1240,17 @@ S_scan_const(pTHX_ char *start)
                I32 min;                        /* first character in range */
                I32 max;                        /* last character in range */
 
+               if (utf) {
+                   char *c = (char*)utf8_hop((U8*)d, -1);
+                   char *e = d++;
+                   while (e-- > c)
+                       *(e + 1) = *e;
+                   *c = (char)0xff;
+                   /* mark the range as done, and continue */
+                   dorange = FALSE;
+                   didrange = TRUE;
+                   continue;
+               }
                i = d - SvPVX(sv);              /* remember current offset */
                SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
                d = SvPVX(sv) + i;              /* refresh d after realloc */
@@ -1464,9 +1475,9 @@ S_scan_const(pTHX_ char *start)
                        if (hicount) {
                            char *old_pvx = SvPVX(sv);
                            char *src, *dst;
-                         
+                       
                            d = SvGROW(sv,
-                                      SvCUR(sv) + hicount + 1) +
+                                      SvLEN(sv) + hicount + 1) +
                                         (d - old_pvx);
 
                            src = d - 1;
@@ -1486,7 +1497,7 @@ S_scan_const(pTHX_ char *start)
                     }
 
                     if (has_utf8 || uv > 255) {
-                       d = (char*)uv_to_utf8((U8*)d, uv);
+                       d = (char*)uvchr_to_utf8((U8*)d, uv);
                        has_utf8 = TRUE;
                        if (PL_lex_inwhat == OP_TRANS &&
                            PL_sublex_info.sub_op) {
@@ -1539,7 +1550,7 @@ S_scan_const(pTHX_ char *start)
                    if (len > e - s + 4) {
                        char *odest = SvPVX(sv);
 
-                       SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
+                       SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
                        d = SvPVX(sv) + (d - odest);
                    }
                    Copy(str, d, len, char);
@@ -1611,14 +1622,14 @@ S_scan_const(pTHX_ char *start)
            STRLEN len = (STRLEN) -1;
            UV uv;
            if (this_utf8) {
-               uv = utf8_to_uv((U8*)s, send - s, &len, 0);
+               uv = utf8n_to_uvchr((U8*)s, send - s, &len, 0);
            }
            if (len == (STRLEN)-1) {
                /* Illegal UTF8 (a high-bit byte), make it valid. */
                char *old_pvx = SvPVX(sv);
                /* need space for one extra char (NOTE: SvCUR() not set here) */
                d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
-               d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
+               d = (char*)uvchr_to_utf8((U8*)d, (U8)*s++);
            }
            else {
                while (len--)
@@ -6303,9 +6314,6 @@ S_scan_trans(pTHX_ char *start)
        Perl_croak(aTHX_ "Transliteration replacement not terminated");
     }
 
-    New(803,tbl,256,short);
-    o = newPVOP(OP_TRANS, 0, (char*)tbl);
-
     complement = del = squash = 0;
     while (strchr("cds", *s)) {
        if (*s == 'c')
@@ -6316,6 +6324,9 @@ S_scan_trans(pTHX_ char *start)
            squash = OPpTRANS_SQUASH;
        s++;
     }
+
+    New(803, tbl, complement&&!del?258:256, short);
+    o = newPVOP(OP_TRANS, 0, (char*)tbl);
     o->op_private = del|squash|complement|
       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
@@ -7262,7 +7273,8 @@ vstring:
                                            "Integer overflow in decimal number");
                        }
                    }
-                   tmpend = uv_to_utf8(tmpbuf, rev);
+                   /* Append native character for the rev point */
+                   tmpend = uvchr_to_utf8(tmpbuf, rev);
                    if (rev > revmax)
                        revmax = rev;
                    sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
@@ -7278,11 +7290,11 @@ vstring:
 
                SvPOK_on(sv);
                SvREADONLY_on(sv);
-               if (revmax > 127) {
-                   SvUTF8_on(sv);
+               /* if (revmax > 127) { */
+                   SvUTF8_on(sv); /*
                    if (revmax < 256)
                      sv_utf8_downgrade(sv, TRUE);
-               }
+               } */
            }
        }
        break;