Re: [perl #33892] Add Interix support
[p5sagit/p5-mst-13.2.git] / toke.c
diff --git a/toke.c b/toke.c
index 4e1aa8e..5d5abf4 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1,7 +1,7 @@
 /*    toke.c
  *
  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- *    2000, 2001, 2002, 2003, 2004, by Larry Wall and others
+ *    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -275,9 +275,7 @@ S_tokereport(pTHX_ char* s, I32 rv)
        char *name = Nullch;
        enum token_type type = TOKENTYPE_NONE;
        struct debug_tokens *p;
-        SV* report = NEWSV(0, 60);
-
-        Perl_sv_catpvf(aTHX_ report, "<== ");
+        SV* report = newSVpvn("<== ", 4);
 
        for (p = debug_tokens; p->token; p++) {
            if (p->token == (int)rv) {
@@ -287,11 +285,11 @@ S_tokereport(pTHX_ char* s, I32 rv)
            }
        }
        if (name)
-           Perl_sv_catpvf(aTHX_ report, "%s", name);
+           Perl_sv_catpv(aTHX_ report, name);
        else if ((char)rv > ' ' && (char)rv < '~')
            Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
        else if (!rv)
-           Perl_sv_catpvf(aTHX_ report, "EOF");
+           Perl_sv_catpv(aTHX_ report, "EOF");
        else
            Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
        switch (type) {
@@ -309,8 +307,11 @@ S_tokereport(pTHX_ char* s, I32 rv)
            Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
            break;
        case TOKENTYPE_OPVAL:
-           Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
+           if (yylval.opval)
+               Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
                                    PL_op_name[yylval.opval->op_type]);
+           else
+               Perl_sv_catpv(aTHX_ report, "(opval=null)");
            break;
        }
         Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
@@ -712,15 +713,17 @@ S_skipspace(pTHX_ register char *s)
                             (prevlen = SvCUR(PL_linestr)))) == Nullch)
        {
            /* end of file.  Add on the -p or -n magic */
-           if (PL_minus_n || PL_minus_p) {
-               sv_setpv(PL_linestr,PL_minus_p ?
-                        ";}continue{print or die qq(-p destination: $!\\n)" :
-                        "");
-               sv_catpv(PL_linestr,";}");
+           if (PL_minus_p) {
+               sv_setpv(PL_linestr,
+                        ";}continue{print or die qq(-p destination: $!\\n);}");
                PL_minus_n = PL_minus_p = 0;
            }
+           else if (PL_minus_n) {
+               sv_setpvn(PL_linestr, ";}", 2);
+               PL_minus_n = 0;
+           }
            else
-               sv_setpv(PL_linestr,";");
+               sv_setpvn(PL_linestr,";", 1);
 
            /* reset variables for next time we lex */
            PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
@@ -867,6 +870,15 @@ S_force_next(pTHX_ I32 type)
     }
 }
 
+STATIC SV *
+S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
+{
+    SV *sv = newSVpvn(start,len);
+    if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
+       SvUTF8_on(sv);
+    return sv;
+}
+
 /*
  * S_force_word
  * When the lexer knows the next thing is a word (for instance, it has
@@ -906,10 +918,10 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow
                PL_expect = XOPERATOR;
            }
        }
-       PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
+       PL_nextval[PL_nexttoke].opval
+           = (OP*)newSVOP(OP_CONST,0,
+                          S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
        PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
-       if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
-           SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke].opval)->op_sv);
        force_next(token);
     }
     return s;
@@ -1692,7 +1704,7 @@ S_scan_const(pTHX_ char *start)
                         UV uv = utf8_to_uvchr((U8*)str, 0);
 
                         if (uv < 0x100) {
-                             U8 tmpbuf[UTF8_MAXLEN+1], *d;
+                             U8 tmpbuf[UTF8_MAXBYTES+1], *d;
 
                              d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
                              sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
@@ -1963,8 +1975,10 @@ S_intuit_more(pTHX_ register char *s)
                    weight -= 5;        /* cope with negative subscript */
                break;
            default:
-               if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
-                       isALPHA(*s) && s[1] && isALPHA(s[1])) {
+               if (!isALNUM(last_un_char)
+                   && !(last_un_char == '$' || last_un_char == '@'
+                        || last_un_char == '&')
+                   && isALPHA(*s) && s[1] && isALPHA(s[1])) {
                    char *d = tmpbuf;
                    while (isALPHA(*s))
                        *d++ = *s++;
@@ -2154,19 +2168,17 @@ Perl_filter_del(pTHX_ filter_t funcp)
 }
 
 
-/* Invoke the n'th filter function for the current rsfp.        */
+/* Invoke the idxth filter function for the current rsfp.       */
+/* maxlen 0 = read one text line */
 I32
 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
-
-
-                               /* 0 = read one text line */
 {
     filter_t funcp;
     SV *datasv = NULL;
 
     if (!PL_rsfp_filters)
        return -1;
-    if (idx > AvFILLp(PL_rsfp_filters)){       /* Any more filters?    */
+    if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?   */
        /* Provide a default input filter to make life easy.    */
        /* Note that we append to the line. This is handy.      */
        DEBUG_P(PerlIO_printf(Perl_debug_log,
@@ -2197,7 +2209,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
        return SvCUR(buf_sv);
     }
     /* Skip this filter slot if filter has been deleted        */
-    if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
+    if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
        DEBUG_P(PerlIO_printf(Perl_debug_log,
                              "filter_read %d: skipped (filter deleted)\n",
                              idx));
@@ -2223,7 +2235,6 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
     }
 #endif
     if (PL_rsfp_filters) {
-
        if (!append)
             SvCUR_set(sv, 0);  /* start with empty line        */
         if (FILTER_READ(0, sv, 0) > 0)
@@ -2300,7 +2311,7 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
 int
 Perl_yylex(pTHX)
 {
-    register char *s;
+    register char *s = PL_bufptr;
     register char *d;
     register I32 tmp;
     STRLEN len;
@@ -2358,7 +2369,8 @@ Perl_yylex(pTHX)
                oldmod = PL_lex_casestack[--PL_lex_casemods];
                PL_lex_casestack[PL_lex_casemods] = '\0';
 
-               if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
+               if (PL_bufptr != PL_bufend
+                   && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
                    PL_bufptr += 2;
                    PL_lex_state = LEX_INTERPCONCAT;
                }
@@ -2381,7 +2393,7 @@ Perl_yylex(pTHX)
            else {
                if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
                    tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
-               if (strchr("LU", *s) &&
+               if ((*s == 'L' || *s == 'U') &&
                    (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
                    PL_lex_casestack[--PL_lex_casemods] = '\0';
                    return REPORT(')');
@@ -2411,7 +2423,11 @@ Perl_yylex(pTHX)
            if (PL_lex_starts) {
                s = PL_bufptr;
                PL_lex_starts = 0;
-               Aop(OP_CONCAT);
+               /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+               if (PL_lex_casemods == 1 && PL_lex_inpat)
+                   OPERATOR(',');
+               else
+                   Aop(OP_CONCAT);
            }
            else
                return yylex();
@@ -2441,7 +2457,11 @@ Perl_yylex(pTHX)
        }
        if (PL_lex_starts++) {
            s = PL_bufptr;
-           Aop(OP_CONCAT);
+           /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+           if (!PL_lex_casemods && PL_lex_inpat)
+               OPERATOR(',');
+           else
+               Aop(OP_CONCAT);
        }
        return yylex();
 
@@ -2495,8 +2515,13 @@ Perl_yylex(pTHX)
            PL_nextval[PL_nexttoke] = yylval;
            PL_expect = XTERM;
            force_next(THING);
-           if (PL_lex_starts++)
-               Aop(OP_CONCAT);
+           if (PL_lex_starts++) {
+               /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+               if (!PL_lex_casemods && PL_lex_inpat)
+                   OPERATOR(',');
+               else
+                   Aop(OP_CONCAT);
+           }
            else {
                PL_bufptr = s;
                return yylex();
@@ -2552,12 +2577,12 @@ Perl_yylex(pTHX)
            PL_preambled = TRUE;
            sv_setpv(PL_linestr,incl_perldb());
            if (SvCUR(PL_linestr))
-               sv_catpv(PL_linestr,";");
+               sv_catpvn(PL_linestr,";", 1);
            if (PL_preambleav){
                while(AvFILLp(PL_preambleav) >= 0) {
                    SV *tmpsv = av_shift(PL_preambleav);
                    sv_catsv(PL_linestr, tmpsv);
-                   sv_catpv(PL_linestr, ";");
+                   sv_catpvn(PL_linestr, ";", 1);
                    sv_free(tmpsv);
                }
                sv_free((SV*)PL_preambleav);
@@ -2569,29 +2594,35 @@ Perl_yylex(pTHX)
                    sv_catpv(PL_linestr,"chomp;");
                if (PL_minus_a) {
                    if (PL_minus_F) {
-                       if (strchr("/'\"", *PL_splitstr)
+                       if ((*PL_splitstr == '/' || *PL_splitstr == '\''
+                            || *PL_splitstr == '"')
                              && strchr(PL_splitstr + 1, *PL_splitstr))
                            Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
                        else {
-                           char delim;
-                           s = "'~#\200\1'"; /* surely one char is unused...*/
-                           while (s[1] && strchr(PL_splitstr, *s))  s++;
-                           delim = *s;
-                           Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
-                                     "q" + (delim == '\''), delim);
-                           for (s = PL_splitstr; *s; s++) {
+                           /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
+                              bytes can be used as quoting characters.  :-) */
+                           /* The count here deliberately includes the NUL
+                              that terminates the C string constant.  This
+                              embeds the opening NUL into the string.  */
+                           sv_catpvn(PL_linestr, "our @F=split(q", 15);
+                           s = PL_splitstr;
+                           do {
+                               /* Need to \ \s  */
                                if (*s == '\\')
-                                   sv_catpvn(PL_linestr, "\\", 1);
+                                   sv_catpvn(PL_linestr, s, 1);
                                sv_catpvn(PL_linestr, s, 1);
-                           }
-                           Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
+                           } while (*s++);
+                           /* This loop will embed the trailing NUL of
+                              PL_linestr as the last thing it does before
+                              terminating.  */
+                           sv_catpvn(PL_linestr, ");", 2);
                        }
                    }
                    else
                        sv_catpv(PL_linestr,"our @F=split(' ');");
                }
            }
-           sv_catpv(PL_linestr, "\n");
+           sv_catpvn(PL_linestr, "\n", 1);
            PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
            PL_last_lop = PL_last_uni = Nullch;
@@ -2621,8 +2652,8 @@ Perl_yylex(pTHX)
                    PL_doextract = FALSE;
                }
                if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
-                   sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
-                   sv_catpv(PL_linestr,";}");
+                   sv_setpv(PL_linestr,PL_minus_p
+                            ? ";}continue{print;}" : ";}");
                    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                    PL_last_lop = PL_last_uni = Nullch;
@@ -2991,7 +3022,7 @@ Perl_yylex(pTHX)
                 * subroutine call (or a -bareword), then. */
                DEBUG_T( { PerlIO_printf(Perl_debug_log,
                        "### '-%c' looked like a file test but was not\n",
-                       tmp);
+                       (int) tmp);
                } );
                s = --PL_bufptr;
            }
@@ -3701,7 +3732,8 @@ Perl_yylex(pTHX)
                PL_expect = XTERM;              /* e.g. print $fh 3 */
            else if (*s == '.' && isDIGIT(s[1]))
                PL_expect = XTERM;              /* e.g. print $fh .3 */
-           else if (strchr("?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
+           else if ((*s == '?' || *s == '-' || *s == '+')
+                    && !isSPACE(s[1]) && s[1] != '=')
                PL_expect = XTERM;              /* e.g. print $fh -1 */
            else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
                PL_expect = XTERM;              /* e.g. print $fh /.../
@@ -3985,10 +4017,10 @@ Perl_yylex(pTHX)
        /* Is this a word before a => operator? */
        if (*d == '=' && d[1] == '>') {
            CLINE;
-           yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
+           yylval.opval
+               = (OP*)newSVOP(OP_CONST, 0,
+                              S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
            yylval.opval->op_private = OPpCONST_BARE;
-           if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
-             SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
            TERM(WORD);
        }
 
@@ -4024,6 +4056,16 @@ Perl_yylex(pTHX)
            {
                tmp = 0;                /* any sub overrides "weak" keyword */
            }
+           else if (gv && !gvp
+                   && tmp == -KEY_err
+                   && GvCVu(gv)
+                   && PL_expect != XOPERATOR
+                   && PL_expect != XTERMORDORDOR)
+           {
+               /* any sub overrides the "err" keyword, except when really an
+                * operator is expected */
+               tmp = 0;
+           }
            else {                      /* no override */
                tmp = -tmp;
                if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
@@ -4100,8 +4142,12 @@ Perl_yylex(pTHX)
                    sv = newSVpvn("CORE::GLOBAL::",14);
                    sv_catpv(sv,PL_tokenbuf);
                }
-               else
-                   sv = newSVpv(PL_tokenbuf,0);
+               else {
+                   /* If len is 0, newSVpv does strlen(), which is correct.
+                      If len is non-zero, then it will be the true length,
+                      and so the scalar will be created correctly.  */
+                   sv = newSVpv(PL_tokenbuf,len);
+               }
 
                /* Presume this is going to be a bareword of some sort. */
 
@@ -4225,7 +4271,7 @@ Perl_yylex(pTHX)
                        char *proto = SvPV((SV*)cv, len);
                        if (!len)
                            TERM(FUNC0SUB);
-                       if (strEQ(proto, "$"))
+                       if (*proto == '$' && proto[1] == '\0')
                            OPERATOR(UNIOPSUB);
                        while (*proto == ';')
                            proto++;
@@ -4258,7 +4304,8 @@ Perl_yylex(pTHX)
                }
 
            safe_bareword:
-               if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
+               if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
+                   && ckWARN_d(WARN_AMBIGUOUS)) {
                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
                        "Operator or semicolon missing before %c%s",
                        lastchar, PL_tokenbuf);
@@ -4282,7 +4329,7 @@ Perl_yylex(pTHX)
        case KEY___PACKAGE__:
            yylval.opval = (OP*)newSVOP(OP_CONST, 0,
                                        (PL_curstash
-                                        ? newSVsv(PL_curstname)
+                                        ? newSVpv(HvNAME(PL_curstash), 0)
                                         : &PL_sv_undef));
            TERM(THING);
 
@@ -5477,7 +5524,7 @@ S_pending_ident(pTHX)
                 sv_catpv(sym, PL_tokenbuf+1);
                 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
                 yylval.opval->op_private = OPpCONST_ENTERED;
-                gv_fetchpv(SvPVX(sym),
+                gv_fetchsv(sym,
                     (PL_in_eval
                         ? (GV_ADDMULTI | GV_ADDINEVAL)
                         : GV_ADDMULTI
@@ -5759,7 +5806,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
            else if (*d == 'l') {
                if (strEQ(d,"login"))           return -KEY_getlogin;
            }
-           else if (strEQ(d,"c"))              return -KEY_getc;
+           else if (*d == 'c' && d[1] == '\0') return -KEY_getc;
            break;
        }
        switch (len) {
@@ -5906,12 +5953,16 @@ Perl_keyword(pTHX_ register char *d, I32 len)
        }
        break;
     case 'q':
-       if (len <= 2) {
-           if (strEQ(d,"q"))                   return KEY_q;
-           if (strEQ(d,"qr"))                  return KEY_qr;
-           if (strEQ(d,"qq"))                  return KEY_qq;
-           if (strEQ(d,"qw"))                  return KEY_qw;
-           if (strEQ(d,"qx"))                  return KEY_qx;
+       if (len == 1) {
+                                               return KEY_q;
+       }
+       else if (len == 2) {
+           switch (d[1]) {
+           case 'r':                           return KEY_qr;
+           case 'q':                           return KEY_qq;
+           case 'w':                           return KEY_qw;
+           case 'x':                           return KEY_qx;
+           };
        }
        else if (strEQ(d,"quotemeta"))          return -KEY_quotemeta;
        break;
@@ -6290,6 +6341,9 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
     return res;
 }
 
+/* Returns a NUL terminated string, with the length of the string written to
+   *slp
+   */
 STATIC char *
 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
 {
@@ -6383,7 +6437,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
        return s;
     }
     if (*s == '$' && s[1] &&
-       (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
+       (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
     {
        return s;
     }
@@ -6636,15 +6690,23 @@ S_scan_trans(pTHX_ char *start)
     }
 
     complement = del = squash = 0;
-    while (strchr("cds", *s)) {
-       if (*s == 'c')
+    while (1) {
+       switch (*s) {
+       case 'c':
            complement = OPpTRANS_COMPLEMENT;
-       else if (*s == 'd')
+           break;
+       case 'd':
            del = OPpTRANS_DELETE;
-       else if (*s == 's')
+           break;
+       case 's':
            squash = OPpTRANS_SQUASH;
+           break;
+       default:
+           goto no_more;
+       }
        s++;
     }
+  no_more:
 
     New(803, tbl, complement&&!del?258:256, short);
     o = newPVOP(OP_TRANS, 0, (char*)tbl);
@@ -6677,7 +6739,7 @@ S_scan_heredoc(pTHX_ register char *s)
     if (!outer)
        *d++ = '\n';
     for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
-    if (*peek && strchr("`'\"",*peek)) {
+    if (*peek == '`' || *peek == '\'' || *peek =='"') {
        s = peek;
        term = *s++;
        s = delimcpy(d, e, s, PL_bufend, term, &len);
@@ -6831,10 +6893,11 @@ S_scan_heredoc(pTHX_ register char *s)
            av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
        }
        if (*s == term && memEQ(s,PL_tokenbuf,len)) {
-           s = PL_bufend - 1;
-           *s = ' ';
+           STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
+           *(SvPVX(PL_linestr) + off ) = ' ';
            sv_catsv(PL_linestr,herewas);
            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+           s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
        }
        else {
            s = PL_bufend;
@@ -7074,7 +7137,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
     I32 brackets = 1;                  /* bracket nesting level */
     bool has_utf8 = FALSE;             /* is there any utf8 content? */
     I32 termcode;                      /* terminating char. code */
-    U8 termstr[UTF8_MAXLEN];           /* terminating string */
+    U8 termstr[UTF8_MAXBYTES];         /* terminating string */
     STRLEN termlen;                    /* length of terminating string */
     char *last = NULL;                 /* last position for nesting bracket */
 
@@ -7635,7 +7698,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
        }
 
        /* read exponent part, if present */
-       if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
+       if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
            floatit = TRUE;
            s++;
 
@@ -7996,10 +8059,9 @@ S_swallow_bom(pTHX_ U8 *s)
 
                filter_add(utf16rev_textfilter, NULL);
                New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
-               PL_bufend =
-                    (char*)utf16_to_utf8_reversed(s, news,
-                                                  PL_bufend - (char*)s - 1,
-                                                  &newlen);
+               utf16_to_utf8_reversed(s, news,
+                                      PL_bufend - (char*)s - 1,
+                                      &newlen);
                sv_setpvn(PL_linestr, (const char*)news, newlen);
                Safefree(news);
                SvUTF8_on(PL_linestr);
@@ -8023,10 +8085,9 @@ S_swallow_bom(pTHX_ U8 *s)
 
                filter_add(utf16_textfilter, NULL);
                New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
-               PL_bufend =
-                    (char*)utf16_to_utf8(s, news,
-                                         PL_bufend - (char*)s,
-                                         &newlen);
+               utf16_to_utf8(s, news,
+                             PL_bufend - (char*)s,
+                             &newlen);
                sv_setpvn(PL_linestr, (const char*)news, newlen);
                Safefree(news);
                SvUTF8_on(PL_linestr);
@@ -8093,38 +8154,42 @@ restore_rsfp(pTHX_ void *f)
 static I32
 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 {
+    STRLEN old = SvCUR(sv);
     I32 count = FILTER_READ(idx+1, sv, maxlen);
+    DEBUG_P(PerlIO_printf(Perl_debug_log,
+                         "utf16_textfilter(%p): %d %d (%d)\n",
+                         utf16_textfilter, idx, maxlen, (int) count));
     if (count) {
        U8* tmps;
-       U8* tend;
        I32 newlen;
        New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
-       if (!*SvPV_nolen(sv))
-       /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
-       return count;
-
-       tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
-       sv_usepvn(sv, (char*)tmps, tend - tmps);
+       Copy(SvPVX(sv), tmps, old, char);
+       utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+                     SvCUR(sv) - old, &newlen);
+       sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
     }
-    return count;
+    DEBUG_P({sv_dump(sv);});
+    return SvCUR(sv);
 }
 
 static I32
 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 {
+    STRLEN old = SvCUR(sv);
     I32 count = FILTER_READ(idx+1, sv, maxlen);
+    DEBUG_P(PerlIO_printf(Perl_debug_log,
+                         "utf16rev_textfilter(%p): %d %d (%d)\n",
+                         utf16rev_textfilter, idx, maxlen, (int) count));
     if (count) {
        U8* tmps;
-       U8* tend;
        I32 newlen;
-       if (!*SvPV_nolen(sv))
-       /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
-       return count;
-
        New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
-       tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
-       sv_usepvn(sv, (char*)tmps, tend - tmps);
+       Copy(SvPVX(sv), tmps, old, char);
+       utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+                     SvCUR(sv) - old, &newlen);
+       sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
     }
+    DEBUG_P({ sv_dump(sv); });
     return count;
 }
 #endif
@@ -8165,7 +8230,7 @@ Perl_scan_vstring(pTHX_ char *s, SV *sv)
 
     if (!isALPHA(*pos)) {
        UV rev;
-       U8 tmpbuf[UTF8_MAXLEN+1];
+       U8 tmpbuf[UTF8_MAXBYTES+1];
        U8 *tmpend;
 
        if (*s == 'v') s++;  /* get past 'v' */