Upgrade to CPAN-1.87_62

[p5sagit/p5-mst-13.2.git] / toke.c
diff --git a/toke.c b/toke.c

index 30a4548..3fec508 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1787,12 +1787,14 @@ S_scan_const(pTHX_ char *start)
     UV uv;
 #ifdef EBCDIC
     UV literal_endpoint = 0;
+    bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
 #endif
 
     const char * const leaveit = /* set of acceptably-backslashed characters */
-       PL_lex_inpat
-           ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
-           : "";
+       (const char *)
+       (PL_lex_inpat
+        ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+        : "");
 
     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
        /* If we are doing a trans and we know we want UTF8 set expectation */
@@ -1810,7 +1812,15 @@ S_scan_const(pTHX_ char *start)
                I32 min;                        /* first character in range */
                I32 max;                        /* last character in range */
 
-               if (has_utf8) {
+#ifdef EBCDIC
+               UV uvmax = 0;
+#endif
+
+               if (has_utf8
+#ifdef EBCDIC
+                   && !native_range
+#endif
+                   ) {
                    char * const c = (char*)utf8_hop((U8*)d, -1);
                    char *e = d++;
                    while (e-- > c)
@@ -1823,12 +1833,43 @@ S_scan_const(pTHX_ char *start)
                }
 
                i = d - SvPVX_const(sv);                /* remember current offset */
+#ifdef EBCDIC
+                SvGROW(sv,
+                      SvLEN(sv) + (has_utf8 ?
+                                   (512 - UTF_CONTINUATION_MARK +
+                                    UNISKIP(0x100))
+                                   : 256));
+                /* How many two-byte within 0..255: 128 in UTF-8,
+                * 96 in UTF-8-mod. */
+#else
                SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
+#endif
                d = SvPVX(sv) + i;              /* refresh d after realloc */
-               d -= 2;                         /* eat the first char and the - */
-
-               min = (U8)*d;                   /* first char in range */
-               max = (U8)d[1];                 /* last char in range  */
+#ifdef EBCDIC
+                if (has_utf8) {
+                    int j;
+                    for (j = 0; j <= 1; j++) {
+                        char * const c = (char*)utf8_hop((U8*)d, -1);
+                        const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
+                        if (j)
+                            min = (U8)uv;
+                        else if (uv < 256)
+                            max = (U8)uv;
+                        else {
+                            max = (U8)0xff; /* only to \xff */
+                            uvmax = uv; /* \x{100} to uvmax */
+                        }
+                        d = c; /* eat endpoint chars */
+                     }
+                }
+               else {
+#endif
+                  d -= 2;              /* eat the first char and the - */
+                  min = (U8)*d;        /* first char in range */
+                  max = (U8)d[1];      /* last char in range  */
+#ifdef EBCDIC
+              }
+#endif
 
                 if (min > max) {
                    Perl_croak(aTHX_
@@ -1853,7 +1894,29 @@ S_scan_const(pTHX_ char *start)
                else
 #endif
                    for (i = min; i <= max; i++)
-                       *d++ = (char)i;
+#ifdef EBCDIC
+                        if (has_utf8) {
+                            const U8 ch = (U8)NATIVE_TO_UTF(i);
+                            if (UNI_IS_INVARIANT(ch))
+                                *d++ = (U8)i;
+                            else {
+                                *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
+                                *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
+                            }
+                        }
+                        else
+#endif
+                            *d++ = (char)i;
+ 
+#ifdef EBCDIC
+                if (uvmax) {
+                    d = (char*)uvchr_to_utf8((U8*)d, 0x100);
+                    if (uvmax > 0x101)
+                        *d++ = (char)UTF_TO_NATIVE(0xff);
+                    if (uvmax > 0x100)
+                        d = (char*)uvchr_to_utf8((U8*)d, uvmax);
+                }
+#endif
 
                /* mark the range as done, and continue */
                dorange = FALSE;
@@ -1869,7 +1932,11 @@ S_scan_const(pTHX_ char *start)
                if (didrange) {
                    Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
                }
-               if (has_utf8) {
+               if (has_utf8
+#ifdef EBCDIC
+                   && !native_range
+#endif
+                   ) {
                    *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
                    s++;
                    continue;
@@ -1881,6 +1948,7 @@ S_scan_const(pTHX_ char *start)
                didrange = FALSE;
 #ifdef EBCDIC
                literal_endpoint = 0;
+               native_range = TRUE;
 #endif
            }
        }
@@ -1927,9 +1995,14 @@ S_scan_const(pTHX_ char *start)
        /* check for embedded arrays
           (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
           */
-       else if (*s == '@' && s[1]
-                && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
-           break;
+       else if (*s == '@' && s[1]) {
+           if (isALNUM_lazy_if(s+1,UTF))
+               break;
+           if (strchr(":'{$", s[1]))
+               break;
+           if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
+               break; /* in regexp, neither @+ nor @- are interpolated */
+       }
 
        /* check for embedded scalars.  only stop if we're sure it's a
           variable.
@@ -1985,8 +2058,8 @@ S_scan_const(pTHX_ char *start)
                    if ((isALPHA(*s) || isDIGIT(*s)) &&
                        ckWARN(WARN_MISC))
                        Perl_warner(aTHX_ packWARN(WARN_MISC),
-                              "Unrecognized escape \\%c passed through",
-                              *s);
+                                   "Unrecognized escape \\%c passed through",
+                                   *s);
                    /* default action is to copy the quoted character */
                    goto default_action;
                }
@@ -2084,6 +2157,10 @@ S_scan_const(pTHX_ char *start)
                                (PL_lex_repl ? OPpTRANS_FROM_UTF
                                             : OPpTRANS_TO_UTF);
                        }
+#ifdef EBCDIC
+                       if (uv > 255 && !dorange)
+                           native_range = FALSE;
+#endif
                     }
                    else {
                        *d++ = (char)uv;
@@ -2161,6 +2238,10 @@ S_scan_const(pTHX_ char *start)
                        SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
                        d = SvPVX(sv) + (d - odest);
                    }
+#ifdef EBCDIC
+                   if (!dorange)
+                       native_range = FALSE; /* \N{} is guessed to be Unicode */
+#endif
                    Copy(str, d, len, char);
                    d += len;
                    SvREFCNT_dec(res);
@@ -2234,6 +2315,10 @@ S_scan_const(pTHX_ char *start)
            }
            d = (char*)uvchr_to_utf8((U8*)d, nextuv);
            has_utf8 = TRUE;
+#ifdef EBCDIC
+           if (uv > 255 && !dorange)
+               native_range = FALSE;
+#endif
        }
        else {
            *d++ = NATIVE_TO_NEED(has_utf8,*s++);
@@ -2268,13 +2353,15 @@ S_scan_const(pTHX_ char *start)
     /* return the substring (via yylval) only if we parsed anything */
     if (s > PL_bufptr) {
        if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
-           sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
+           sv = new_constant(start, s - start,
+                             (const char *)(PL_lex_inpat ? "qr" : "q"),
                              sv, NULL,
-                             ( PL_lex_inwhat == OP_TRANS
-                               ? "tr"
-                               : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
-                                   ? "s"
-                                   : "qq")));
+                             (const char *)
+                             (( PL_lex_inwhat == OP_TRANS
+                                ? "tr"
+                                : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
+                                    ? "s"
+                                    : "qq"))));
        yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
     } else
        SvREFCNT_dec(sv);
@@ -2389,7 +2476,7 @@ S_intuit_more(pTHX_ register char *s)
                if (s[1]) {
                    if (strchr("wds]",s[1]))
                        weight += 100;
-                   else if (seen['\''] || seen['"'])
+                   else if (seen[(U8)'\''] || seen[(U8)'"'])
                        weight += 1;
                    else if (strchr("rnftbxcav",s[1]))
                        weight += 40;
@@ -2500,7 +2587,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
        len = start - SvPVX(PL_linestr);
 #endif
        s = PEEKSPACE(s);
-#ifdef PERLMAD
+#ifdef PERL_MAD
        start = SvPVX(PL_linestr) + len;
 #endif
        PL_bufptr = start;
@@ -3011,6 +3098,13 @@ Perl_yylex(pTHX)
     STRLEN len;
     bool bof = FALSE;
 
+    /* orig_keyword, gvp, and gv are initialized here because
+     * jump to the label just_a_word_zero can bypass their
+     * initialization later. */
+    I32 orig_keyword = 0;
+    GV *gv = NULL;
+    GV **gvp = NULL;
+
     DEBUG_T( {
        SV* tmp = newSVpvs("");
        PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
@@ -3348,9 +3442,10 @@ Perl_yylex(pTHX)
            PL_last_uni = 0;
            PL_last_lop = 0;
            if (PL_lex_brackets) {
-               yyerror(PL_lex_formbrack
-                   ? "Format not terminated"
-                   : "Missing right curly or square bracket");
+               yyerror((const char *)
+                       (PL_lex_formbrack
+                        ? "Format not terminated"
+                        : "Missing right curly or square bracket"));
            }
             DEBUG_T( { PerlIO_printf(Perl_debug_log,
                         "### Tokener got EOF\n");
@@ -3450,8 +3545,10 @@ Perl_yylex(pTHX)
                    if (PL_madskills)
                        PL_faketokens = 1;
 #endif
-                   sv_setpv(PL_linestr,PL_minus_p
-                            ? ";}continue{print;}" : ";}");
+                   sv_setpv(PL_linestr,
+                            (const char *)
+                            (PL_minus_p
+                             ? ";}continue{print;}" : ";}"));
                    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                    PL_last_lop = PL_last_uni = NULL;
@@ -4122,10 +4219,11 @@ Perl_yylex(pTHX)
                       context messages from yyerror().
                    */
                    PL_bufptr = s;
-                   yyerror( *s
-                            ? Perl_form(aTHX_ "Invalid separator character "
-                                        "%c%c%c in attribute list", q, *s, q)
-                            : "Unterminated attribute list" );
+                   yyerror( (const char *)
+                            (*s
+                             ? Perl_form(aTHX_ "Invalid separator character "
+                                         "%c%c%c in attribute list", q, *s, q)
+                             : "Unterminated attribute list" ) );
                    if (attrs)
                        op_free(attrs);
                    OPERATOR(':');
@@ -4931,9 +5029,10 @@ Perl_yylex(pTHX)
 
       keylookup: {
        I32 tmp;
-       I32 orig_keyword = 0;
-       GV *gv = NULL;
-       GV **gvp = NULL;
+
+       orig_keyword = 0;
+       gv = NULL;
+       gvp = NULL;
 
        PL_bufptr = s;
        s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
@@ -5310,8 +5409,10 @@ Perl_yylex(pTHX)
                        while (*proto == ';')
                            proto++;
                        if (*proto == '&' && *s == '{') {
-                           sv_setpv(PL_subname, PL_curstash ?
-                                       "__ANON__" : "__ANON__::__ANON__");
+                           sv_setpv(PL_subname,
+                                    (const char *)
+                                    (PL_curstash ?
+                                     "__ANON__" : "__ANON__::__ANON__"));
                            PREBLOCK(LSTOPSUB);
                        }
                    }
@@ -6539,7 +6640,8 @@ Perl_yylex(pTHX)
 #endif
                if (!have_name) {
                    sv_setpv(PL_subname,
-                       PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
+                            (const char *)
+                            (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
                    TOKEN(ANONSUB);
                }
 #ifndef PERL_MAD
@@ -10290,9 +10392,10 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
        SV *msg;
        
-       why2 = strEQ(key,"charnames")
-              ? "(possibly a missing \"use charnames ...\")"
-              : "";
+       why2 = (const char *)
+           (strEQ(key,"charnames")
+            ? "(possibly a missing \"use charnames ...\")"
+            : "");
        msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
                            (type ? type: "undef"), why2);
 
@@ -10520,7 +10623,9 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
                s++;
            if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
                if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
-                   const char * const brack = (*s == '[') ? "[...]" : "{...}";
+                   const char * const brack =
+                       (const char *)
+                       ((*s == '[') ? "[...]" : "{...}");
                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
                        "Ambiguous use of %c{%s%s} resolved to %c%s%s",
                        funny, dest, brack, funny, dest, brack);
@@ -10597,7 +10702,8 @@ S_scan_pat(pTHX_ char *start, I32 type)
     dVAR;
     PMOP *pm;
     char *s = scan_str(start,!!PL_madskills,FALSE);
-    const char * const valid_flags = (type == OP_QR) ? "iomsx" : "iogcmsx";
+    const char * const valid_flags =
+       (const char *)((type == OP_QR) ? "iomsx" : "iogcmsx");
 #ifdef PERL_MAD
     char *modstart;
 #endif
@@ -10605,9 +10711,11 @@ S_scan_pat(pTHX_ char *start, I32 type)
 
     if (!s) {
        const char * const delimiter = skipspace(start);
-       Perl_croak(aTHX_ *delimiter == '?'
-                  ? "Search pattern not terminated or ternary operator parsed as search pattern"
-                  : "Search pattern not terminated" );
+       Perl_croak(aTHX_
+                  (const char *)
+                  (*delimiter == '?'
+                   ? "Search pattern not terminated or ternary operator parsed as search pattern"
+                   : "Search pattern not terminated" ));
     }
 
     pm = (PMOP*)newPMOP(type, 0);
@@ -10722,7 +10830,7 @@ S_scan_subst(pTHX_ char *start)
        PL_multi_end = 0;
        pm->op_pmflags |= PMf_EVAL;
        while (es-- > 0)
-           sv_catpv(repl, es ? "eval " : "do ");
+           sv_catpv(repl, (const char *)(es ? "eval " : "do "));
        sv_catpvs(repl, "{");
        sv_catsv(repl, PL_lex_repl);
        if (strchr(SvPVX(PL_lex_repl), '#'))
@@ -10920,7 +11028,7 @@ S_scan_heredoc(pTHX_ register char *s)
 #ifdef PERL_MAD
     found_newline = 0;
 #endif
-    if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
+    if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
         herewas = newSVpvn(s,PL_bufend-s);
     }
     else {
@@ -11306,7 +11414,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 {
     dVAR;
     SV *sv;                            /* scalar value: string */
-    char *tmps;                                /* temp string, used for delimiter matching */
+    const char *tmps;                  /* temp string, used for delimiter matching */
     register char *s = start;          /* current position in the buffer */
     register char term;                        /* terminating character */
     register char *to;                 /* current position in the sv's data */
@@ -12016,7 +12124,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 
        if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
                       (PL_hints & HINT_NEW_INTEGER) )
-           sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
+           sv = new_constant(PL_tokenbuf,
+                             d - PL_tokenbuf,
+                             (const char *)
                              (floatit ? "float" : "integer"),
                              sv, NULL, NULL);
        break;
@@ -12413,6 +12523,15 @@ S_swallow_bom(pTHX_ U8 *s)
                  goto utf16be;
             }
        }
+#ifdef EBCDIC
+    case 0xDD:
+        if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
+            if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+            s += 4;                      /* UTF-8 */
+        }
+        break;
+#endif
+
     default:
         if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
                  /* Leading bytes