Improve and restructure t/op/pat.t and split out some unicode related tests into...
[p5sagit/p5-mst-13.2.git] / toke.c
diff --git a/toke.c b/toke.c
index 72a37c4..d8cd2e5 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1,7 +1,7 @@
 /*    toke.c
  *
- *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- *    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
+ *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+ *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -9,7 +9,9 @@
  */
 
 /*
- *   "It all comes from here, the stench and the peril."  --Frodo
+ *  'It all comes from here, the stench and the peril.'    --Frodo
+ *
+ *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  */
 
 /*
@@ -368,6 +370,7 @@ static struct debug_tokens {
     { WHEN,            TOKENTYPE_IVAL,         "WHEN" },
     { WHILE,           TOKENTYPE_IVAL,         "WHILE" },
     { WORD,            TOKENTYPE_OPVAL,        "WORD" },
+    { YADAYADA,                TOKENTYPE_IVAL,         "YADAYADA" },
     { 0,               TOKENTYPE_NONE,         NULL }
 };
 
@@ -543,13 +546,7 @@ S_missingterm(pTHX_ char *s)
        if (nl)
            *nl = '\0';
     }
-    else if (
-#ifdef EBCDIC
-       iscntrl(PL_multi_close)
-#else
-       PL_multi_close < 32 || PL_multi_close == 127
-#endif
-       ) {
+    else if (isCNTRL(PL_multi_close)) {
        *tmpbuf = '^';
        tmpbuf[1] = (char)toCTRL(PL_multi_close);
        tmpbuf[2] = '\0';
@@ -716,7 +713,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
     parser->expect = XSTATE;
     parser->rsfp = rsfp;
     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
-               : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
+               : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 
     Newx(parser->lex_brackstack, 120, char);
     Newx(parser->lex_casestack, 12, char);
@@ -760,8 +757,8 @@ Perl_parser_free(pTHX_  const yy_parser *parser)
 
     if (parser->rsfp == PerlIO_stdin())
        PerlIO_clearerr(parser->rsfp);
-    else if (parser->rsfp && parser->old_parser
-                         && parser->rsfp != parser->old_parser->rsfp)
+    else if (parser->rsfp && (!parser->old_parser ||
+               (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
        PerlIO_close(parser->rsfp);
     SvREFCNT_dec(parser->rsfp_filters);
 
@@ -901,8 +898,8 @@ S_incline(pTHX_ const char *s)
                    gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
                    /* adjust ${"::_<newfilename"} to store the new file name */
                    GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
-                   GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
-                   GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
+                   GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
+                   GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
                }
 
                if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
@@ -1118,17 +1115,17 @@ S_skipspace(pTHX_ register char *s)
            }
            else if (PL_minus_n) {
 #ifdef PERL_MAD
-               sv_catpvn(PL_linestr, ";}", 2);
+               sv_catpvs(PL_linestr, ";}");
 #else
-               sv_setpvn(PL_linestr, ";}", 2);
+               sv_setpvs(PL_linestr, ";}");
 #endif
                PL_minus_n = 0;
            }
            else
 #ifdef PERL_MAD
-               sv_catpvn(PL_linestr,";", 1);
+               sv_catpvs(PL_linestr,";");
 #else
-               sv_setpvn(PL_linestr,";", 1);
+               sv_setpvs(PL_linestr,";");
 #endif
 
            /* reset variables for next time we lex */
@@ -1178,7 +1175,7 @@ S_skipspace(pTHX_ register char *s)
        /* debugger active and we're not compiling the debugger code,
         * so store the line into the debugger's array of lines
         */
-       if (PERLDB_LINE && PL_curstash != PL_debstash)
+       if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
            update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
     }
 
@@ -1318,7 +1315,7 @@ S_curmad(pTHX_ char slot, SV *sv)
        where = &PL_nexttoke[PL_curforce].next_mad;
 
     if (PL_faketokens)
-       sv_setpvn(sv, "", 0);
+       sv_setpvs(sv, "");
     else {
        if (!IN_BYTES) {
            if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
@@ -1332,7 +1329,7 @@ S_curmad(pTHX_ char slot, SV *sv)
     /* keep a slot open for the head of the list? */
     if (slot != '_' && *where && (*where)->mad_key == '^') {
        (*where)->mad_key = slot;
-       sv_free((SV*)((*where)->mad_val));
+       sv_free(MUTABLE_SV(((*where)->mad_val)));
        (*where)->mad_val = (void*)sv;
     }
     else
@@ -1359,7 +1356,7 @@ S_force_next(pTHX_ I32 type)
 #ifdef DEBUGGING
     if (DEBUG_T_TEST) {
         PerlIO_printf(Perl_debug_log, "### forced token:\n");
-       tokereport(THING, &NEXTVAL_NEXTTOKE);
+       tokereport(type, &NEXTVAL_NEXTTOKE);
     }
 #endif
 #ifdef PERL_MAD
@@ -1845,7 +1842,7 @@ S_sublex_done(pTHX)
                PL_thiswhite = 0;
            }
            if (PL_thistoken)
-               sv_setpvn(PL_thistoken,"",0);
+               sv_setpvs(PL_thistoken,"");
            else
                PL_realtokenstart = -1;
        }
@@ -2172,8 +2169,13 @@ S_scan_const(pTHX_ char *start)
        else if (*s == '$') {
            if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
                break;
-           if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
+           if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
+               if (s[1] == '\\' && ckWARN(WARN_AMBIGUOUS)) {
+                   Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+                               "Possible unintended interpolation of $\\ in regex");
+               }
                break;          /* in regexp, $ might be tail anchor */
+            }
        }
 
        /* End of else if chain - OP_TRANS rejoin rest */
@@ -3332,7 +3334,7 @@ Perl_yylex(pTHX)
            PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
            PL_nexttoke[PL_lasttoke].next_mad = 0;
            if (PL_thismad && PL_thismad->mad_key == '_') {
-               PL_thiswhite = (SV*)PL_thismad->mad_val;
+               PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
                PL_thismad->mad_val = 0;
                mad_free(PL_thismad);
                PL_thismad = 0;
@@ -3685,9 +3687,12 @@ Perl_yylex(pTHX)
                    ++svp;
                    sv_catpvs(PL_linestr, ";");
                }
-               sv_free((SV*)PL_preambleav);
+               sv_free(MUTABLE_SV(PL_preambleav));
                PL_preambleav = NULL;
            }
+           if (PL_minus_E)
+               sv_catpvs(PL_linestr,
+                         "use feature ':5." STRINGIFY(PERL_VERSION) "';");
            if (PL_minus_n || PL_minus_p) {
                sv_catpvs(PL_linestr, "LINE: while (<>) {");
                if (PL_minus_l)
@@ -3719,14 +3724,11 @@ Perl_yylex(pTHX)
                        sv_catpvs(PL_linestr,"our @F=split(' ');");
                }
            }
-           if (PL_minus_E)
-               sv_catpvs(PL_linestr,
-                         "use feature ':5." STRINGIFY(PERL_VERSION) "';");
            sv_catpvs(PL_linestr, "\n");
            PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
            PL_last_lop = PL_last_uni = NULL;
-           if (PERLDB_LINE && PL_curstash != PL_debstash)
+           if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
                update_debugger_info(PL_linestr, NULL, 0);
            goto retry;
        }
@@ -3762,7 +3764,7 @@ Perl_yylex(pTHX)
                }
                PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
                PL_last_lop = PL_last_uni = NULL;
-               sv_setpvn(PL_linestr,"",0);
+               sv_setpvs(PL_linestr,"");
                TOKEN(';');     /* not infinite loop because rsfp is NULL now */
            }
            /* If it looks like the start of a BOM or raw UTF-16,
@@ -3798,7 +3800,7 @@ Perl_yylex(pTHX)
                    sv_catsv(PL_thiswhite, PL_linestr);
 #endif
                if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
-                   sv_setpvn(PL_linestr, "", 0);
+                   sv_setpvs(PL_linestr, "");
                    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                    PL_last_lop = PL_last_uni = NULL;
@@ -3808,7 +3810,7 @@ Perl_yylex(pTHX)
            incline(s);
        } while (PL_doextract);
        PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
-       if (PERLDB_LINE && PL_curstash != PL_debstash)
+       if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
            update_debugger_info(PL_linestr, NULL, 0);
        PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
        PL_last_lop = PL_last_uni = NULL;
@@ -3981,17 +3983,17 @@ Perl_yylex(pTHX)
                            } while (argc && argv[0][0] == '-' && argv[0][1]);
                            init_argv_symbols(argc,argv);
                        }
-                       if ((PERLDB_LINE && !oldpdb) ||
+                       if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
                            ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
                              /* if we have already added "LINE: while (<>) {",
                                 we must not do it again */
                        {
-                           sv_setpvn(PL_linestr, "", 0);
+                           sv_setpvs(PL_linestr, "");
                            PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
                            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                            PL_last_lop = PL_last_uni = NULL;
                            PL_preambled = FALSE;
-                           if (PERLDB_LINE)
+                           if (PERLDB_LINE || PERLDB_SAVESRC)
                                (void)gv_fetchfile(PL_origfilename);
                            goto retry;
                        }
@@ -4085,7 +4087,7 @@ Perl_yylex(pTHX)
                        if (!PL_thiswhite)
                            PL_thiswhite = newSVpvs("");
                        if (CopLINE(PL_curcop) == 1) {
-                           sv_setpvn(PL_thiswhite, "", 0);
+                           sv_setpvs(PL_thiswhite, "");
                            PL_faketokens = 0;
                        }
                        sv_catpvn(PL_thiswhite, s, d - s);
@@ -4649,7 +4651,7 @@ Perl_yylex(pTHX)
                    if (PL_madskills) {
                        if (!PL_thiswhite)
                            PL_thiswhite = newSVpvs("");
-                       sv_catpvn(PL_thiswhite,"}",1);
+                       sv_catpvs(PL_thiswhite,"}");
                    }
 #endif
                    return yylex();     /* ignore fake brackets */
@@ -4686,7 +4688,7 @@ Perl_yylex(pTHX)
                && isIDFIRST_lazy_if(s,UTF))
            {
                CopLINE_dec(PL_curcop);
-               Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
+               Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
                CopLINE_inc(PL_curcop);
            }
            BAop(OP_BIT_AND);
@@ -4774,6 +4776,10 @@ Perl_yylex(pTHX)
        pl_yylval.ival = 0;
        OPERATOR(ASSIGNOP);
     case '!':
+       if (PL_expect == XSTATE && s[1] == '!' && s[2] == '!') {
+           s += 3;
+           LOP(OP_DIE,XTERM);
+       }
        s++;
        {
            const char tmp = *s++;
@@ -5025,10 +5031,14 @@ Perl_yylex(pTHX)
            AOPERATOR(DORDOR);
        }
      case '?':                 /* may either be conditional or pattern */
-        if(PL_expect == XOPERATOR) {
+       if (PL_expect == XSTATE && s[1] == '?' && s[2] == '?') {
+           s += 3;
+           LOP(OP_WARN,XTERM);
+       }
+       if (PL_expect == XOPERATOR) {
             char tmp = *s++;
             if(tmp == '?') {
-                 OPERATOR('?');
+               OPERATOR('?');
             }
              else {
                 tmp = *s++;
@@ -5067,6 +5077,10 @@ Perl_yylex(pTHX)
            PL_expect = XSTATE;
            goto rightbracket;
        }
+       if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
+           s += 3;
+           OPERATOR(YADAYADA);
+       }
        if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
            char tmp = *s++;
            if (*s == tmp) {
@@ -5342,7 +5356,7 @@ Perl_yylex(pTHX)
                if (PL_expect == XOPERATOR) {
                    if (PL_bufptr == PL_linestart) {
                        CopLINE_dec(PL_curcop);
-                       Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
+                       Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
                        CopLINE_inc(PL_curcop);
                    }
                    else
@@ -5424,7 +5438,7 @@ Perl_yylex(pTHX)
                    /* Real typeglob, so get the real subroutine: */
                           ? GvCVu(gv)
                    /* A proxy for a subroutine in this package? */
-                          : SvOK(gv) ? (CV *) gv : NULL)
+                          : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
                    : NULL;
 
                /* See if it's the indirect object for a list operator. */
@@ -5573,7 +5587,7 @@ Perl_yylex(pTHX)
                        SvPOK(cv))
                    {
                        STRLEN protolen;
-                       const char *proto = SvPV_const((SV*)cv, protolen);
+                       const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
                        if (!protolen)
                            TERM(FUNC0SUB);
                        if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
@@ -5654,10 +5668,10 @@ Perl_yylex(pTHX)
 
                /* Call it a bare word */
 
+               bareword:
                if (PL_hints & HINT_STRICT_SUBS)
                    pl_yylval.opval->op_private |= OPpCONST_STRICT;
                else {
-               bareword:
                    if (lastchar != '-') {
                        if (ckWARN(WARN_RESERVED)) {
                            d = PL_tokenbuf;
@@ -6709,7 +6723,7 @@ Perl_yylex(pTHX)
                        Perl_croak(aTHX_ "Missing name in \"my sub\"");
                    PL_expect = XTERMBLOCK;
                    attrful = XATTRTERM;
-                   sv_setpvn(PL_subname,"?",1);
+                   sv_setpvs(PL_subname,"?");
                    have_name = FALSE;
                }
 
@@ -6731,6 +6745,12 @@ Perl_yylex(pTHX)
                if (*s == '(') {
                    char *p;
                    bool bad_proto = FALSE;
+                   bool in_brackets = FALSE;
+                   char greedy_proto = ' ';
+                   bool proto_after_greedy_proto = FALSE;
+                   bool must_be_last = FALSE;
+                   bool underscore = FALSE;
+                   bool seen_underscore = FALSE;
                    const bool warnsyntax = ckWARN(WARN_SYNTAX);
 
                    s = scan_str(s,!!PL_madskills,FALSE);
@@ -6742,14 +6762,47 @@ Perl_yylex(pTHX)
                    for (p = d; *p; ++p) {
                        if (!isSPACE(*p)) {
                            d[tmp++] = *p;
-                           if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
-                               bad_proto = TRUE;
+
+                           if (warnsyntax) {
+                               if (must_be_last)
+                                   proto_after_greedy_proto = TRUE;
+                               if (!strchr("$@%*;[]&\\_", *p)) {
+                                   bad_proto = TRUE;
+                               }
+                               else {
+                                   if ( underscore ) {
+                                       if ( *p != ';' )
+                                           bad_proto = TRUE;
+                                       underscore = FALSE;
+                                   }
+                                   if ( *p == '[' ) {
+                                       in_brackets = TRUE;
+                                   }
+                                   else if ( *p == ']' ) {
+                                       in_brackets = FALSE;
+                                   }
+                                   else if ( (*p == '@' || *p == '%') &&
+                                        ( tmp < 2 || d[tmp-2] != '\\' ) &&
+                                        !in_brackets ) {
+                                       must_be_last = TRUE;
+                                       greedy_proto = *p;
+                                   }
+                                   else if ( *p == '_' ) {
+                                       underscore = seen_underscore = TRUE;
+                                   }
+                               }
+                           }
                        }
                    }
                    d[tmp] = '\0';
+                   if (proto_after_greedy_proto)
+                       Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                                   "Prototype after '%c' for %"SVf" : %s",
+                                   greedy_proto, SVfARG(PL_subname), d);
                    if (bad_proto)
                        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                   "Illegal character in prototype for %"SVf" : %s",
+                                   "Illegal character %sin prototype for %"SVf" : %s",
+                                   seen_underscore ? "after '_' " : "",
                                    SVfARG(PL_subname), d);
                    SvCUR_set(PL_lex_stuff, tmp);
                    have_proto = TRUE;
@@ -10924,10 +10977,10 @@ S_scan_pat(pTHX_ char *start, I32 type)
           matches.  */
        assert(type != OP_TRANS);
        if (PL_curstash) {
-           MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
+           MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
            U32 elements;
            if (!mg) {
-               mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
+               mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
                                 0);
            }
            elements = mg->mg_len / sizeof(PMOP**);
@@ -11074,9 +11127,9 @@ S_scan_trans(pTHX_ char *start)
     register char* s;
     OP *o;
     short *tbl;
-    I32 squash;
-    I32 del;
-    I32 complement;
+    U8 squash;
+    U8 del;
+    U8 complement;
 #ifdef PERL_MAD
     char *modstart;
 #endif
@@ -11353,7 +11406,7 @@ S_scan_heredoc(pTHX_ register char *s)
        PL_last_lop = PL_last_uni = NULL;
     }
     else
-       sv_setpvn(tmpstr,"",0);   /* avoid "uninitialized" warning */
+       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
     while (s >= PL_bufend) {   /* multiple line string? */
 #ifdef PERL_MAD
        if (PL_madskills) {
@@ -11390,7 +11443,7 @@ S_scan_heredoc(pTHX_ register char *s)
        else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
            PL_bufend[-1] = '\n';
 #endif
-       if (PERLDB_LINE && PL_curstash != PL_debstash)
+       if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
            update_debugger_info(PL_linestr, NULL, 0);
        if (*s == term && memEQ(s,PL_tokenbuf,len)) {
            STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
@@ -11889,7 +11942,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
        CopLINE_inc(PL_curcop);
 
        /* update debugger info */
-       if (PERLDB_LINE && PL_curstash != PL_debstash)
+       if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
            update_debugger_info(PL_linestr, NULL, 0);
 
        /* having changed the buffer, we must update PL_bufend */
@@ -12508,12 +12561,12 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
     save_item(PL_subname);
     SAVESPTR(PL_compcv);
 
-    PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
+    PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
     CvFLAGS(PL_compcv) |= flags;
 
     PL_subline = CopLINE(PL_curcop);
     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
-    CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
+    CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
 
     return oldsavestack_ix;
@@ -12850,7 +12903,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
        if (*s == 'v')
            s++;  /* get past 'v' */
 
-       sv_setpvn(sv, "", 0);
+       sv_setpvs(sv, "");
 
        for (;;) {
            /* this is atoi() that tolerates underscores */