Upgrade to I18N::LangTags 0.30.
[p5sagit/p5-mst-13.2.git] / toke.c
diff --git a/toke.c b/toke.c
index bc4194b..b113499 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -23,8 +23,8 @@
 #define PERL_IN_TOKE_C
 #include "perl.h"
 
-#define yychar PL_yychar
-#define yylval PL_yylval
+#define yychar (*PL_yycharp)
+#define yylval (*PL_yylvalp)
 
 static char ident_too_long[] = "Identifier too long";
 static char c_without_g[] = "Use of /c modifier is meaningless without /g";
@@ -79,22 +79,6 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
 #undef ff_next
 #endif
 
-#ifdef USE_PURE_BISON
-#  ifndef YYMAXLEVEL
-#    define YYMAXLEVEL 100
-#  endif
-YYSTYPE* yylval_pointer[YYMAXLEVEL];
-int* yychar_pointer[YYMAXLEVEL];
-int yyactlevel = -1;
-#  undef yylval
-#  undef yychar
-#  define yylval (*yylval_pointer[yyactlevel])
-#  define yychar (*yychar_pointer[yyactlevel])
-#  define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
-#  undef yylex
-#  define yylex()      Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
-#endif
-
 #include "keywords.h"
 
 /* CLINE is a macro that ensures PL_copline has a sane value */
@@ -1236,7 +1220,7 @@ S_scan_const(pTHX_ char *start)
 
     const char *leaveit =      /* set of acceptably-backslashed characters */
        PL_lex_inpat
-           ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+           ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
            : "";
 
     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
@@ -2176,26 +2160,6 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
       if we already built the token before, use it.
 */
 
-#ifdef USE_PURE_BISON
-int
-Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
-{
-    int r;
-
-    yyactlevel++;
-    yylval_pointer[yyactlevel] = lvalp;
-    yychar_pointer[yyactlevel] = lcharp;
-    if (yyactlevel >= YYMAXLEVEL)
-       Perl_croak(aTHX_ "panic: YYMAXLEVEL");
-
-    r = Perl_yylex(aTHX);
-
-    if (yyactlevel > 0)
-       yyactlevel--;
-
-    return r;
-}
-#endif
 
 #ifdef __SC__
 #pragma segment Perl_yylex
@@ -2533,8 +2497,13 @@ Perl_yylex(pTHX)
                sv_setpv(PL_linestr,"");
                TOKEN(';');     /* not infinite loop because rsfp is NULL now */
            }
-           /* if it looks like the start of a BOM, check if it in fact is */
-           else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
+           /* If it looks like the start of a BOM or raw UTF-16,
+            * check if it in fact is. */
+           else if (bof &&
+                    (*s == 0 ||
+                     *(U8*)s == 0xEF ||
+                     *(U8*)s >= 0xFE ||
+                     s[1] == 0)) {
 #ifdef PERLIO_IS_STDIO
 #  ifdef __GNU_LIBRARY__
 #    if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
@@ -3418,8 +3387,24 @@ Perl_yylex(pTHX)
     case '!':
        s++;
        tmp = *s++;
-       if (tmp == '=')
+       if (tmp == '=') {
+            /* was this !=~ where !~ was meant?
+             * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
+
+            if (*s == '~' && ckWARN(WARN_SYNTAX)) {
+                char *t = s+1;
+
+                while (t < PL_bufend && isSPACE(*t))
+                    ++t;
+
+                if (*t == '/' || *t == '?' ||
+                    ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
+                    (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
+                    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                                "!=~ should be !~");
+            }
            Eop(OP_NE);
+        }
        if (tmp == '~')
            PMop(OP_NOT);
        s--;
@@ -4701,8 +4686,8 @@ Perl_yylex(pTHX)
            if (isIDFIRST_lazy_if(s,UTF)) {
                char *t;
                for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
-               t = skipspace(d);
-               if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
+               for (t=d; *t && isSPACE(*t); t++) ;
+               if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
                    /* [perl #16184] */
                    && !(t[0] == '=' && t[1] == '>')
                ) {
@@ -6522,7 +6507,8 @@ S_scan_trans(pTHX_ char *start)
 
     New(803, tbl, complement&&!del?258:256, short);
     o = newPVOP(OP_TRANS, 0, (char*)tbl);
-    o->op_private = del|squash|complement|
+    o->op_private &= ~OPpTRANS_ALL;
+    o->op_private |= del|squash|complement|
       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
 
@@ -7796,12 +7782,7 @@ Perl_yyerror(pTHX_ char *s)
     }
     else if (yychar > 255)
        where = "next token ???";
-#ifdef USE_PURE_BISON
-/*  GNU Bison sets the value -2 */
-    else if (yychar == -2) {
-#else
-    else if ((yychar & 127) == 127) {
-#endif
+    else if (yychar == -2) { /* YYEMPTY */
        if (PL_lex_state == LEX_NORMAL ||
           (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
            where = "at end of line";
@@ -7858,72 +7839,94 @@ S_swallow_bom(pTHX_ U8 *s)
 {
     STRLEN slen;
     slen = SvCUR(PL_linestr);
-    switch (*s) {
+    switch (s[0]) {
     case 0xFF:
        if (s[1] == 0xFE) {
-           /* UTF-16 little-endian */
+           /* UTF-16 little-endian? (or UTF32-LE?) */
            if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
-               Perl_croak(aTHX_ "Unsupported script encoding");
+               Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
 #ifndef PERL_NO_UTF16_FILTER
-           DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
            s += 2;
+       utf16le:
            if (PL_bufend > (char*)s) {
                U8 *news;
                I32 newlen;
 
                filter_add(utf16rev_textfilter, NULL);
                New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
-               PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
-                                                PL_bufend - (char*)s - 1,
-                                                &newlen);
-               Copy(news, s, newlen, U8);
-               SvCUR_set(PL_linestr, newlen);
-               PL_bufend = SvPVX(PL_linestr) + newlen;
-               news[newlen++] = '\0';
+               PL_bufend =
+                    (char*)utf16_to_utf8_reversed(s, news,
+                                                  PL_bufend - (char*)s - 1,
+                                                  &newlen);
+               sv_setpvn(PL_linestr, (const char*)news, newlen);
                Safefree(news);
+               SvUTF8_on(PL_linestr);
+               s = (U8*)SvPVX(PL_linestr);
+               PL_bufend = SvPVX(PL_linestr) + newlen;
            }
 #else
-           Perl_croak(aTHX_ "Unsupported script encoding");
+           Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
 #endif
        }
        break;
     case 0xFE:
-       if (s[1] == 0xFF) {   /* UTF-16 big-endian */
+       if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
 #ifndef PERL_NO_UTF16_FILTER
-           DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
            s += 2;
+       utf16be:
            if (PL_bufend > (char *)s) {
                U8 *news;
                I32 newlen;
 
                filter_add(utf16_textfilter, NULL);
                New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
-               PL_bufend = (char*)utf16_to_utf8(s, news,
-                                                PL_bufend - (char*)s,
-                                                &newlen);
-               Copy(news, s, newlen, U8);
-               SvCUR_set(PL_linestr, newlen);
-               PL_bufend = SvPVX(PL_linestr) + newlen;
-               news[newlen++] = '\0';
+               PL_bufend =
+                    (char*)utf16_to_utf8(s, news,
+                                         PL_bufend - (char*)s,
+                                         &newlen);
+               sv_setpvn(PL_linestr, (const char*)news, newlen);
                Safefree(news);
+               SvUTF8_on(PL_linestr);
+               s = (U8*)SvPVX(PL_linestr);
+               PL_bufend = SvPVX(PL_linestr) + newlen;
            }
 #else
-           Perl_croak(aTHX_ "Unsupported script encoding");
+           Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
 #endif
        }
        break;
     case 0xEF:
        if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
-           DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
            s += 3;                      /* UTF-8 */
        }
        break;
     case 0:
-       if (slen > 3 && s[1] == 0 &&  /* UTF-32 big-endian */
-           s[2] == 0xFE && s[3] == 0xFF)
-       {
-           Perl_croak(aTHX_ "Unsupported script encoding");
+       if (slen > 3) {
+            if (s[1] == 0) {
+                 if (s[2] == 0xFE && s[3] == 0xFF) {
+                      /* UTF-32 big-endian */
+                      Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
+                 }
+            }
+            else if (s[2] == 0 && s[3] != 0) {
+                 /* Leading bytes
+                  * 00 xx 00 xx
+                  * are a good indicator of UTF-16BE. */
+                 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
+                 goto utf16be;
+            }
        }
+    default:
+        if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
+                 /* Leading bytes
+                  * xx 00 xx 00
+                  * are a good indicator of UTF-16LE. */
+             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
+             goto utf16le;
+        }
     }
     return (char*)s;
 }