Re: [perl #33892] Add Interix support
[p5sagit/p5-mst-13.2.git] / toke.c
diff --git a/toke.c b/toke.c
index 7c9d88f..5d5abf4 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1,7 +1,7 @@
 /*    toke.c
  *
  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- *    2000, 2001, 2002, 2003, 2004, by Larry Wall and others
+ *    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -275,9 +275,7 @@ S_tokereport(pTHX_ char* s, I32 rv)
        char *name = Nullch;
        enum token_type type = TOKENTYPE_NONE;
        struct debug_tokens *p;
-        SV* report = NEWSV(0, 60);
-
-        Perl_sv_catpvf(aTHX_ report, "<== ");
+        SV* report = newSVpvn("<== ", 4);
 
        for (p = debug_tokens; p->token; p++) {
            if (p->token == (int)rv) {
@@ -287,11 +285,11 @@ S_tokereport(pTHX_ char* s, I32 rv)
            }
        }
        if (name)
-           Perl_sv_catpvf(aTHX_ report, "%s", name);
+           Perl_sv_catpv(aTHX_ report, name);
        else if ((char)rv > ' ' && (char)rv < '~')
            Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
        else if (!rv)
-           Perl_sv_catpvf(aTHX_ report, "EOF");
+           Perl_sv_catpv(aTHX_ report, "EOF");
        else
            Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
        switch (type) {
@@ -715,15 +713,17 @@ S_skipspace(pTHX_ register char *s)
                             (prevlen = SvCUR(PL_linestr)))) == Nullch)
        {
            /* end of file.  Add on the -p or -n magic */
-           if (PL_minus_n || PL_minus_p) {
-               sv_setpv(PL_linestr,PL_minus_p ?
-                        ";}continue{print or die qq(-p destination: $!\\n)" :
-                        "");
-               sv_catpv(PL_linestr,";}");
+           if (PL_minus_p) {
+               sv_setpv(PL_linestr,
+                        ";}continue{print or die qq(-p destination: $!\\n);}");
                PL_minus_n = PL_minus_p = 0;
            }
+           else if (PL_minus_n) {
+               sv_setpvn(PL_linestr, ";}", 2);
+               PL_minus_n = 0;
+           }
            else
-               sv_setpv(PL_linestr,";");
+               sv_setpvn(PL_linestr,";", 1);
 
            /* reset variables for next time we lex */
            PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
@@ -870,6 +870,15 @@ S_force_next(pTHX_ I32 type)
     }
 }
 
+STATIC SV *
+S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
+{
+    SV *sv = newSVpvn(start,len);
+    if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
+       SvUTF8_on(sv);
+    return sv;
+}
+
 /*
  * S_force_word
  * When the lexer knows the next thing is a word (for instance, it has
@@ -909,10 +918,10 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow
                PL_expect = XOPERATOR;
            }
        }
-       PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
+       PL_nextval[PL_nexttoke].opval
+           = (OP*)newSVOP(OP_CONST,0,
+                          S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
        PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
-       if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
-           SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke].opval)->op_sv);
        force_next(token);
     }
     return s;
@@ -1695,7 +1704,7 @@ S_scan_const(pTHX_ char *start)
                         UV uv = utf8_to_uvchr((U8*)str, 0);
 
                         if (uv < 0x100) {
-                             U8 tmpbuf[UTF8_MAXLEN+1], *d;
+                             U8 tmpbuf[UTF8_MAXBYTES+1], *d;
 
                              d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
                              sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
@@ -1966,8 +1975,10 @@ S_intuit_more(pTHX_ register char *s)
                    weight -= 5;        /* cope with negative subscript */
                break;
            default:
-               if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
-                       isALPHA(*s) && s[1] && isALPHA(s[1])) {
+               if (!isALNUM(last_un_char)
+                   && !(last_un_char == '$' || last_un_char == '@'
+                        || last_un_char == '&')
+                   && isALPHA(*s) && s[1] && isALPHA(s[1])) {
                    char *d = tmpbuf;
                    while (isALPHA(*s))
                        *d++ = *s++;
@@ -2358,7 +2369,8 @@ Perl_yylex(pTHX)
                oldmod = PL_lex_casestack[--PL_lex_casemods];
                PL_lex_casestack[PL_lex_casemods] = '\0';
 
-               if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
+               if (PL_bufptr != PL_bufend
+                   && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
                    PL_bufptr += 2;
                    PL_lex_state = LEX_INTERPCONCAT;
                }
@@ -2381,7 +2393,7 @@ Perl_yylex(pTHX)
            else {
                if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
                    tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
-               if (strchr("LU", *s) &&
+               if ((*s == 'L' || *s == 'U') &&
                    (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
                    PL_lex_casestack[--PL_lex_casemods] = '\0';
                    return REPORT(')');
@@ -2565,12 +2577,12 @@ Perl_yylex(pTHX)
            PL_preambled = TRUE;
            sv_setpv(PL_linestr,incl_perldb());
            if (SvCUR(PL_linestr))
-               sv_catpv(PL_linestr,";");
+               sv_catpvn(PL_linestr,";", 1);
            if (PL_preambleav){
                while(AvFILLp(PL_preambleav) >= 0) {
                    SV *tmpsv = av_shift(PL_preambleav);
                    sv_catsv(PL_linestr, tmpsv);
-                   sv_catpv(PL_linestr, ";");
+                   sv_catpvn(PL_linestr, ";", 1);
                    sv_free(tmpsv);
                }
                sv_free((SV*)PL_preambleav);
@@ -2582,29 +2594,35 @@ Perl_yylex(pTHX)
                    sv_catpv(PL_linestr,"chomp;");
                if (PL_minus_a) {
                    if (PL_minus_F) {
-                       if (strchr("/'\"", *PL_splitstr)
+                       if ((*PL_splitstr == '/' || *PL_splitstr == '\''
+                            || *PL_splitstr == '"')
                              && strchr(PL_splitstr + 1, *PL_splitstr))
                            Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
                        else {
-                           char delim;
-                           s = "'~#\200\1'"; /* surely one char is unused...*/
-                           while (s[1] && strchr(PL_splitstr, *s))  s++;
-                           delim = *s;
-                           Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
-                                     "q" + (delim == '\''), delim);
-                           for (s = PL_splitstr; *s; s++) {
+                           /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
+                              bytes can be used as quoting characters.  :-) */
+                           /* The count here deliberately includes the NUL
+                              that terminates the C string constant.  This
+                              embeds the opening NUL into the string.  */
+                           sv_catpvn(PL_linestr, "our @F=split(q", 15);
+                           s = PL_splitstr;
+                           do {
+                               /* Need to \ \s  */
                                if (*s == '\\')
-                                   sv_catpvn(PL_linestr, "\\", 1);
+                                   sv_catpvn(PL_linestr, s, 1);
                                sv_catpvn(PL_linestr, s, 1);
-                           }
-                           Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
+                           } while (*s++);
+                           /* This loop will embed the trailing NUL of
+                              PL_linestr as the last thing it does before
+                              terminating.  */
+                           sv_catpvn(PL_linestr, ");", 2);
                        }
                    }
                    else
                        sv_catpv(PL_linestr,"our @F=split(' ');");
                }
            }
-           sv_catpv(PL_linestr, "\n");
+           sv_catpvn(PL_linestr, "\n", 1);
            PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
            PL_last_lop = PL_last_uni = Nullch;
@@ -2634,8 +2652,8 @@ Perl_yylex(pTHX)
                    PL_doextract = FALSE;
                }
                if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
-                   sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
-                   sv_catpv(PL_linestr,";}");
+                   sv_setpv(PL_linestr,PL_minus_p
+                            ? ";}continue{print;}" : ";}");
                    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                    PL_last_lop = PL_last_uni = Nullch;
@@ -3004,7 +3022,7 @@ Perl_yylex(pTHX)
                 * subroutine call (or a -bareword), then. */
                DEBUG_T( { PerlIO_printf(Perl_debug_log,
                        "### '-%c' looked like a file test but was not\n",
-                       tmp);
+                       (int) tmp);
                } );
                s = --PL_bufptr;
            }
@@ -3714,7 +3732,8 @@ Perl_yylex(pTHX)
                PL_expect = XTERM;              /* e.g. print $fh 3 */
            else if (*s == '.' && isDIGIT(s[1]))
                PL_expect = XTERM;              /* e.g. print $fh .3 */
-           else if (strchr("?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
+           else if ((*s == '?' || *s == '-' || *s == '+')
+                    && !isSPACE(s[1]) && s[1] != '=')
                PL_expect = XTERM;              /* e.g. print $fh -1 */
            else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
                PL_expect = XTERM;              /* e.g. print $fh /.../
@@ -3998,10 +4017,10 @@ Perl_yylex(pTHX)
        /* Is this a word before a => operator? */
        if (*d == '=' && d[1] == '>') {
            CLINE;
-           yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
+           yylval.opval
+               = (OP*)newSVOP(OP_CONST, 0,
+                              S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
            yylval.opval->op_private = OPpCONST_BARE;
-           if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
-             SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
            TERM(WORD);
        }
 
@@ -4037,6 +4056,16 @@ Perl_yylex(pTHX)
            {
                tmp = 0;                /* any sub overrides "weak" keyword */
            }
+           else if (gv && !gvp
+                   && tmp == -KEY_err
+                   && GvCVu(gv)
+                   && PL_expect != XOPERATOR
+                   && PL_expect != XTERMORDORDOR)
+           {
+               /* any sub overrides the "err" keyword, except when really an
+                * operator is expected */
+               tmp = 0;
+           }
            else {                      /* no override */
                tmp = -tmp;
                if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
@@ -4113,8 +4142,12 @@ Perl_yylex(pTHX)
                    sv = newSVpvn("CORE::GLOBAL::",14);
                    sv_catpv(sv,PL_tokenbuf);
                }
-               else
-                   sv = newSVpv(PL_tokenbuf,0);
+               else {
+                   /* If len is 0, newSVpv does strlen(), which is correct.
+                      If len is non-zero, then it will be the true length,
+                      and so the scalar will be created correctly.  */
+                   sv = newSVpv(PL_tokenbuf,len);
+               }
 
                /* Presume this is going to be a bareword of some sort. */
 
@@ -4238,7 +4271,7 @@ Perl_yylex(pTHX)
                        char *proto = SvPV((SV*)cv, len);
                        if (!len)
                            TERM(FUNC0SUB);
-                       if (strEQ(proto, "$"))
+                       if (*proto == '$' && proto[1] == '\0')
                            OPERATOR(UNIOPSUB);
                        while (*proto == ';')
                            proto++;
@@ -4271,7 +4304,8 @@ Perl_yylex(pTHX)
                }
 
            safe_bareword:
-               if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
+               if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
+                   && ckWARN_d(WARN_AMBIGUOUS)) {
                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
                        "Operator or semicolon missing before %c%s",
                        lastchar, PL_tokenbuf);
@@ -4295,7 +4329,7 @@ Perl_yylex(pTHX)
        case KEY___PACKAGE__:
            yylval.opval = (OP*)newSVOP(OP_CONST, 0,
                                        (PL_curstash
-                                        ? newSVsv(PL_curstname)
+                                        ? newSVpv(HvNAME(PL_curstash), 0)
                                         : &PL_sv_undef));
            TERM(THING);
 
@@ -5490,7 +5524,7 @@ S_pending_ident(pTHX)
                 sv_catpv(sym, PL_tokenbuf+1);
                 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
                 yylval.opval->op_private = OPpCONST_ENTERED;
-                gv_fetchpv(SvPVX(sym),
+                gv_fetchsv(sym,
                     (PL_in_eval
                         ? (GV_ADDMULTI | GV_ADDINEVAL)
                         : GV_ADDMULTI
@@ -5772,7 +5806,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
            else if (*d == 'l') {
                if (strEQ(d,"login"))           return -KEY_getlogin;
            }
-           else if (strEQ(d,"c"))              return -KEY_getc;
+           else if (*d == 'c' && d[1] == '\0') return -KEY_getc;
            break;
        }
        switch (len) {
@@ -5919,12 +5953,16 @@ Perl_keyword(pTHX_ register char *d, I32 len)
        }
        break;
     case 'q':
-       if (len <= 2) {
-           if (strEQ(d,"q"))                   return KEY_q;
-           if (strEQ(d,"qr"))                  return KEY_qr;
-           if (strEQ(d,"qq"))                  return KEY_qq;
-           if (strEQ(d,"qw"))                  return KEY_qw;
-           if (strEQ(d,"qx"))                  return KEY_qx;
+       if (len == 1) {
+                                               return KEY_q;
+       }
+       else if (len == 2) {
+           switch (d[1]) {
+           case 'r':                           return KEY_qr;
+           case 'q':                           return KEY_qq;
+           case 'w':                           return KEY_qw;
+           case 'x':                           return KEY_qx;
+           };
        }
        else if (strEQ(d,"quotemeta"))          return -KEY_quotemeta;
        break;
@@ -6303,6 +6341,9 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
     return res;
 }
 
+/* Returns a NUL terminated string, with the length of the string written to
+   *slp
+   */
 STATIC char *
 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
 {
@@ -6396,7 +6437,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
        return s;
     }
     if (*s == '$' && s[1] &&
-       (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
+       (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
     {
        return s;
     }
@@ -6649,15 +6690,23 @@ S_scan_trans(pTHX_ char *start)
     }
 
     complement = del = squash = 0;
-    while (strchr("cds", *s)) {
-       if (*s == 'c')
+    while (1) {
+       switch (*s) {
+       case 'c':
            complement = OPpTRANS_COMPLEMENT;
-       else if (*s == 'd')
+           break;
+       case 'd':
            del = OPpTRANS_DELETE;
-       else if (*s == 's')
+           break;
+       case 's':
            squash = OPpTRANS_SQUASH;
+           break;
+       default:
+           goto no_more;
+       }
        s++;
     }
+  no_more:
 
     New(803, tbl, complement&&!del?258:256, short);
     o = newPVOP(OP_TRANS, 0, (char*)tbl);
@@ -6690,7 +6739,7 @@ S_scan_heredoc(pTHX_ register char *s)
     if (!outer)
        *d++ = '\n';
     for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
-    if (*peek && strchr("`'\"",*peek)) {
+    if (*peek == '`' || *peek == '\'' || *peek =='"') {
        s = peek;
        term = *s++;
        s = delimcpy(d, e, s, PL_bufend, term, &len);
@@ -7088,7 +7137,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
     I32 brackets = 1;                  /* bracket nesting level */
     bool has_utf8 = FALSE;             /* is there any utf8 content? */
     I32 termcode;                      /* terminating char. code */
-    U8 termstr[UTF8_MAXLEN];           /* terminating string */
+    U8 termstr[UTF8_MAXBYTES];         /* terminating string */
     STRLEN termlen;                    /* length of terminating string */
     char *last = NULL;                 /* last position for nesting bracket */
 
@@ -7649,7 +7698,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
        }
 
        /* read exponent part, if present */
-       if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
+       if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
            floatit = TRUE;
            s++;
 
@@ -8109,7 +8158,7 @@ utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
     I32 count = FILTER_READ(idx+1, sv, maxlen);
     DEBUG_P(PerlIO_printf(Perl_debug_log,
                          "utf16_textfilter(%p): %d %d (%d)\n",
-                         utf16_textfilter, idx, maxlen, count));
+                         utf16_textfilter, idx, maxlen, (int) count));
     if (count) {
        U8* tmps;
        I32 newlen;
@@ -8130,7 +8179,7 @@ utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
     I32 count = FILTER_READ(idx+1, sv, maxlen);
     DEBUG_P(PerlIO_printf(Perl_debug_log,
                          "utf16rev_textfilter(%p): %d %d (%d)\n",
-                         utf16rev_textfilter, idx, maxlen, count));
+                         utf16rev_textfilter, idx, maxlen, (int) count));
     if (count) {
        U8* tmps;
        I32 newlen;
@@ -8181,7 +8230,7 @@ Perl_scan_vstring(pTHX_ char *s, SV *sv)
 
     if (!isALPHA(*pos)) {
        UV rev;
-       U8 tmpbuf[UTF8_MAXLEN+1];
+       U8 tmpbuf[UTF8_MAXBYTES+1];
        U8 *tmpend;
 
        if (*s == 'v') s++;  /* get past 'v' */