Re: [perl #33892] Add Interix support
[p5sagit/p5-mst-13.2.git] / toke.c
diff --git a/toke.c b/toke.c
index d79c123..5d5abf4 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1,7 +1,7 @@
 /*    toke.c
  *
  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- *    2000, 2001, 2002, 2003, 2004, by Larry Wall and others
+ *    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -275,9 +275,7 @@ S_tokereport(pTHX_ char* s, I32 rv)
        char *name = Nullch;
        enum token_type type = TOKENTYPE_NONE;
        struct debug_tokens *p;
-        SV* report = NEWSV(0, 60);
-
-        Perl_sv_catpvf(aTHX_ report, "<== ");
+        SV* report = newSVpvn("<== ", 4);
 
        for (p = debug_tokens; p->token; p++) {
            if (p->token == (int)rv) {
@@ -287,11 +285,11 @@ S_tokereport(pTHX_ char* s, I32 rv)
            }
        }
        if (name)
-           Perl_sv_catpvf(aTHX_ report, "%s", name);
+           Perl_sv_catpv(aTHX_ report, name);
        else if ((char)rv > ' ' && (char)rv < '~')
            Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
        else if (!rv)
-           Perl_sv_catpvf(aTHX_ report, "EOF");
+           Perl_sv_catpv(aTHX_ report, "EOF");
        else
            Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
        switch (type) {
@@ -715,15 +713,17 @@ S_skipspace(pTHX_ register char *s)
                             (prevlen = SvCUR(PL_linestr)))) == Nullch)
        {
            /* end of file.  Add on the -p or -n magic */
-           if (PL_minus_n || PL_minus_p) {
-               sv_setpv(PL_linestr,PL_minus_p ?
-                        ";}continue{print or die qq(-p destination: $!\\n)" :
-                        "");
-               sv_catpv(PL_linestr,";}");
+           if (PL_minus_p) {
+               sv_setpv(PL_linestr,
+                        ";}continue{print or die qq(-p destination: $!\\n);}");
                PL_minus_n = PL_minus_p = 0;
            }
+           else if (PL_minus_n) {
+               sv_setpvn(PL_linestr, ";}", 2);
+               PL_minus_n = 0;
+           }
            else
-               sv_setpv(PL_linestr,";");
+               sv_setpvn(PL_linestr,";", 1);
 
            /* reset variables for next time we lex */
            PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
@@ -870,6 +870,15 @@ S_force_next(pTHX_ I32 type)
     }
 }
 
+STATIC SV *
+S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
+{
+    SV *sv = newSVpvn(start,len);
+    if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
+       SvUTF8_on(sv);
+    return sv;
+}
+
 /*
  * S_force_word
  * When the lexer knows the next thing is a word (for instance, it has
@@ -909,10 +918,10 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow
                PL_expect = XOPERATOR;
            }
        }
-       PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
+       PL_nextval[PL_nexttoke].opval
+           = (OP*)newSVOP(OP_CONST,0,
+                          S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
        PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
-       if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
-           SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke].opval)->op_sv);
        force_next(token);
     }
     return s;
@@ -1695,7 +1704,7 @@ S_scan_const(pTHX_ char *start)
                         UV uv = utf8_to_uvchr((U8*)str, 0);
 
                         if (uv < 0x100) {
-                             U8 tmpbuf[UTF8_MAXLEN+1], *d;
+                             U8 tmpbuf[UTF8_MAXBYTES+1], *d;
 
                              d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
                              sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
@@ -2568,12 +2577,12 @@ Perl_yylex(pTHX)
            PL_preambled = TRUE;
            sv_setpv(PL_linestr,incl_perldb());
            if (SvCUR(PL_linestr))
-               sv_catpv(PL_linestr,";");
+               sv_catpvn(PL_linestr,";", 1);
            if (PL_preambleav){
                while(AvFILLp(PL_preambleav) >= 0) {
                    SV *tmpsv = av_shift(PL_preambleav);
                    sv_catsv(PL_linestr, tmpsv);
-                   sv_catpv(PL_linestr, ";");
+                   sv_catpvn(PL_linestr, ";", 1);
                    sv_free(tmpsv);
                }
                sv_free((SV*)PL_preambleav);
@@ -2590,25 +2599,30 @@ Perl_yylex(pTHX)
                              && strchr(PL_splitstr + 1, *PL_splitstr))
                            Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
                        else {
-                           char delim;
-                           s = "'~#\200\1'"; /* surely one char is unused...*/
-                           while (s[1] && strchr(PL_splitstr, *s))  s++;
-                           delim = *s;
-                           Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
-                                     "q" + (delim == '\''), delim);
-                           for (s = PL_splitstr; *s; s++) {
+                           /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
+                              bytes can be used as quoting characters.  :-) */
+                           /* The count here deliberately includes the NUL
+                              that terminates the C string constant.  This
+                              embeds the opening NUL into the string.  */
+                           sv_catpvn(PL_linestr, "our @F=split(q", 15);
+                           s = PL_splitstr;
+                           do {
+                               /* Need to \ \s  */
                                if (*s == '\\')
-                                   sv_catpvn(PL_linestr, "\\", 1);
+                                   sv_catpvn(PL_linestr, s, 1);
                                sv_catpvn(PL_linestr, s, 1);
-                           }
-                           Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
+                           } while (*s++);
+                           /* This loop will embed the trailing NUL of
+                              PL_linestr as the last thing it does before
+                              terminating.  */
+                           sv_catpvn(PL_linestr, ");", 2);
                        }
                    }
                    else
                        sv_catpv(PL_linestr,"our @F=split(' ');");
                }
            }
-           sv_catpv(PL_linestr, "\n");
+           sv_catpvn(PL_linestr, "\n", 1);
            PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
            PL_last_lop = PL_last_uni = Nullch;
@@ -2638,8 +2652,8 @@ Perl_yylex(pTHX)
                    PL_doextract = FALSE;
                }
                if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
-                   sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
-                   sv_catpv(PL_linestr,";}");
+                   sv_setpv(PL_linestr,PL_minus_p
+                            ? ";}continue{print;}" : ";}");
                    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                    PL_last_lop = PL_last_uni = Nullch;
@@ -3008,7 +3022,7 @@ Perl_yylex(pTHX)
                 * subroutine call (or a -bareword), then. */
                DEBUG_T( { PerlIO_printf(Perl_debug_log,
                        "### '-%c' looked like a file test but was not\n",
-                       tmp);
+                       (int) tmp);
                } );
                s = --PL_bufptr;
            }
@@ -4003,10 +4017,10 @@ Perl_yylex(pTHX)
        /* Is this a word before a => operator? */
        if (*d == '=' && d[1] == '>') {
            CLINE;
-           yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
+           yylval.opval
+               = (OP*)newSVOP(OP_CONST, 0,
+                              S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
            yylval.opval->op_private = OPpCONST_BARE;
-           if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
-             SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
            TERM(WORD);
        }
 
@@ -4128,8 +4142,12 @@ Perl_yylex(pTHX)
                    sv = newSVpvn("CORE::GLOBAL::",14);
                    sv_catpv(sv,PL_tokenbuf);
                }
-               else
-                   sv = newSVpv(PL_tokenbuf,0);
+               else {
+                   /* If len is 0, newSVpv does strlen(), which is correct.
+                      If len is non-zero, then it will be the true length,
+                      and so the scalar will be created correctly.  */
+                   sv = newSVpv(PL_tokenbuf,len);
+               }
 
                /* Presume this is going to be a bareword of some sort. */
 
@@ -4253,7 +4271,7 @@ Perl_yylex(pTHX)
                        char *proto = SvPV((SV*)cv, len);
                        if (!len)
                            TERM(FUNC0SUB);
-                       if (strEQ(proto, "$"))
+                       if (*proto == '$' && proto[1] == '\0')
                            OPERATOR(UNIOPSUB);
                        while (*proto == ';')
                            proto++;
@@ -5506,7 +5524,7 @@ S_pending_ident(pTHX)
                 sv_catpv(sym, PL_tokenbuf+1);
                 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
                 yylval.opval->op_private = OPpCONST_ENTERED;
-                gv_fetchpv(SvPVX(sym),
+                gv_fetchsv(sym,
                     (PL_in_eval
                         ? (GV_ADDMULTI | GV_ADDINEVAL)
                         : GV_ADDMULTI
@@ -5788,7 +5806,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
            else if (*d == 'l') {
                if (strEQ(d,"login"))           return -KEY_getlogin;
            }
-           else if (strEQ(d,"c"))              return -KEY_getc;
+           else if (*d == 'c' && d[1] == '\0') return -KEY_getc;
            break;
        }
        switch (len) {
@@ -5935,12 +5953,16 @@ Perl_keyword(pTHX_ register char *d, I32 len)
        }
        break;
     case 'q':
-       if (len <= 2) {
-           if (strEQ(d,"q"))                   return KEY_q;
-           if (strEQ(d,"qr"))                  return KEY_qr;
-           if (strEQ(d,"qq"))                  return KEY_qq;
-           if (strEQ(d,"qw"))                  return KEY_qw;
-           if (strEQ(d,"qx"))                  return KEY_qx;
+       if (len == 1) {
+                                               return KEY_q;
+       }
+       else if (len == 2) {
+           switch (d[1]) {
+           case 'r':                           return KEY_qr;
+           case 'q':                           return KEY_qq;
+           case 'w':                           return KEY_qw;
+           case 'x':                           return KEY_qx;
+           };
        }
        else if (strEQ(d,"quotemeta"))          return -KEY_quotemeta;
        break;
@@ -6319,6 +6341,9 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
     return res;
 }
 
+/* Returns a NUL terminated string, with the length of the string written to
+   *slp
+   */
 STATIC char *
 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
 {
@@ -7112,7 +7137,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
     I32 brackets = 1;                  /* bracket nesting level */
     bool has_utf8 = FALSE;             /* is there any utf8 content? */
     I32 termcode;                      /* terminating char. code */
-    U8 termstr[UTF8_MAXLEN];           /* terminating string */
+    U8 termstr[UTF8_MAXBYTES];         /* terminating string */
     STRLEN termlen;                    /* length of terminating string */
     char *last = NULL;                 /* last position for nesting bracket */
 
@@ -8133,7 +8158,7 @@ utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
     I32 count = FILTER_READ(idx+1, sv, maxlen);
     DEBUG_P(PerlIO_printf(Perl_debug_log,
                          "utf16_textfilter(%p): %d %d (%d)\n",
-                         utf16_textfilter, idx, maxlen, count));
+                         utf16_textfilter, idx, maxlen, (int) count));
     if (count) {
        U8* tmps;
        I32 newlen;
@@ -8154,7 +8179,7 @@ utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
     I32 count = FILTER_READ(idx+1, sv, maxlen);
     DEBUG_P(PerlIO_printf(Perl_debug_log,
                          "utf16rev_textfilter(%p): %d %d (%d)\n",
-                         utf16rev_textfilter, idx, maxlen, count));
+                         utf16rev_textfilter, idx, maxlen, (int) count));
     if (count) {
        U8* tmps;
        I32 newlen;
@@ -8205,7 +8230,7 @@ Perl_scan_vstring(pTHX_ char *s, SV *sv)
 
     if (!isALPHA(*pos)) {
        UV rev;
-       U8 tmpbuf[UTF8_MAXLEN+1];
+       U8 tmpbuf[UTF8_MAXBYTES+1];
        U8 *tmpend;
 
        if (*s == 'v') s++;  /* get past 'v' */