Argh, cut-and-pasto.
[p5sagit/p5-mst-13.2.git] / toke.c
diff --git a/toke.c b/toke.c
index e7834c4..23ae908 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1,6 +1,7 @@
 /*    toke.c
  *
- *    Copyright (c) 1991-2002, Larry Wall
+ *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+ *    2000, 2001, 2002, 2003, by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -2141,7 +2142,7 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
 #ifdef DEBUGGING
     static char* exp_name[] =
        { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
-         "ATTRTERM", "TERMBLOCK"
+         "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
        };
 #endif
 
@@ -3025,6 +3026,8 @@ Perl_yylex(pTHX)
                        CvLOCKED_on(PL_compcv);
                    else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
                        CvMETHOD_on(PL_compcv);
+                   else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
+                       CvASSERTION_on(PL_compcv);
 #ifdef USE_ITHREADS
                    else if (PL_in_my == KEY_our && len == 6 &&
                             strnEQ(s, "unique", len))
@@ -3086,6 +3089,7 @@ Perl_yylex(pTHX)
            PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
        else
            PL_expect = XTERM;
+       s = skipspace(s);
        TOKEN('(');
     case ';':
        CLINE;
@@ -3207,12 +3211,17 @@ Perl_yylex(pTHX)
                            || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
                                && !isALNUM(*t))))
                    {
+                       /* skip q//-like construct */
                        char *tmps;
                        char open, close, term;
                        I32 brackets = 1;
 
                        while (t < PL_bufend && isSPACE(*t))
                            t++;
+                       /* check for q => */
+                       if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
+                           OPERATOR(HASHBRACK);
+                       }
                        term = *t;
                        open = term;
                        if (term && (tmps = strchr("([{< )]}> )]}>",term)))
@@ -3225,7 +3234,7 @@ Perl_yylex(pTHX)
                                else if (*t == open)
                                    break;
                            }
-                       else
+                       else {
                            for (t++; t < PL_bufend; t++) {
                                if (*t == '\\' && t+1 < PL_bufend)
                                    t++;
@@ -3234,8 +3243,13 @@ Perl_yylex(pTHX)
                                else if (*t == open)
                                    brackets++;
                            }
+                       }
+                       t++;
                    }
-                   t++;
+                   else
+                       /* skip plain q word */
+                       while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
+                            t += UTF8SKIP(t);
                }
                else if (isALNUM_lazy_if(t,UTF)) {
                    t += UTF8SKIP(t);
@@ -4186,8 +4200,29 @@ Perl_yylex(pTHX)
                }
 #endif
 #ifdef PERLIO_LAYERS
-               if (UTF && !IN_BYTES)
-                   PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
+               if (!IN_BYTES) {
+                   if (UTF)
+                       PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
+                   else if (PL_encoding) {
+                       SV *name;
+                       dSP;
+                       ENTER;
+                       SAVETMPS;
+                       PUSHMARK(sp);
+                       EXTEND(SP, 1);
+                       XPUSHs(PL_encoding);
+                       PUTBACK;
+                       call_method("name", G_SCALAR);
+                       SPAGAIN;
+                       name = POPs;
+                       PUTBACK;
+                       PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, 
+                                           Perl_form(aTHX_ ":encoding(%"SVf")",
+                                                     name));
+                       FREETMPS;
+                       LEAVE;
+                   }
+               }
 #endif
                PL_rsfp = Nullfp;
            }
@@ -5239,7 +5274,7 @@ static int
 S_pending_ident(pTHX)
 {
     register char *d;
-    register I32 tmp;
+    register I32 tmp = 0;
     /* pit holds the identifier we read and pending_ident is reset */
     char pit = PL_pending_ident;
     PL_pending_ident = 0;
@@ -6274,8 +6309,10 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
        }
        if (*s == '}') {
            s++;
-           if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets)
+           if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
                PL_lex_state = LEX_INTERPEND;
+               PL_expect = XREF;
+           }
            if (funny == '#')
                funny = '@';
            if (PL_lex_state == LEX_NORMAL) {
@@ -6662,8 +6699,12 @@ retval:
        Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
     }
     SvREFCNT_dec(herewas);
-    if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
-       SvUTF8_on(tmpstr);
+    if (!IN_BYTES) {
+       if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
+           SvUTF8_on(tmpstr);
+       else if (PL_encoding)
+           sv_recode_to_utf8(tmpstr, PL_encoding);
+    }
     PL_lex_stuff = tmpstr;
     yylval.ival = op_type;
     return s;
@@ -6882,6 +6923,10 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
     register char *to;                 /* current position in the sv's data */
     I32 brackets = 1;                  /* bracket nesting level */
     bool has_utf8 = FALSE;             /* is there any utf8 content? */
+    I32 termcode;                      /* terminating char. code */
+    U8 termstr[UTF8_MAXLEN];           /* terminating string */
+    STRLEN termlen;                    /* length of terminating string */
+    char *last = NULL;                 /* last position for nesting bracket */
 
     /* skip space before the delimiter */
     if (isSPACE(*s))
@@ -6892,8 +6937,16 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 
     /* after skipping whitespace, the next character is the terminator */
     term = *s;
-    if (!UTF8_IS_INVARIANT((U8)term) && UTF)
-       has_utf8 = TRUE;
+    if (!UTF) {
+       termcode = termstr[0] = term;
+       termlen = 1;
+    }
+    else {
+       termcode = utf8_to_uvchr((U8*)s, &termlen);
+       Copy(s, termstr, termlen, U8);
+       if (!UTF8_IS_INVARIANT(term))
+           has_utf8 = TRUE;
+    }
 
     /* mark where we are */
     PL_multi_start = CopLINE(PL_curcop);
@@ -6901,21 +6954,92 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 
     /* find corresponding closing delimiter */
     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
-       term = tmps[5];
+       termcode = termstr[0] = term = tmps[5];
+
     PL_multi_close = term;
 
     /* create a new SV to hold the contents.  87 is leak category, I'm
        assuming.  79 is the SV's initial length.  What a random number. */
     sv = NEWSV(87,79);
     sv_upgrade(sv, SVt_PVIV);
-    SvIVX(sv) = term;
+    SvIVX(sv) = termcode;
     (void)SvPOK_only(sv);              /* validate pointer */
 
     /* move past delimiter and try to read a complete string */
     if (keep_delims)
-       sv_catpvn(sv, s, 1);
-    s++;
+       sv_catpvn(sv, s, termlen);
+    s += termlen;
     for (;;) {
+       if (PL_encoding && !UTF) {
+           bool cont = TRUE;
+
+           while (cont) {
+               int offset = s - SvPVX(PL_linestr);
+               bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
+                                          &offset, (char*)termstr, termlen);
+               char *ns = SvPVX(PL_linestr) + offset;
+               char *svlast = SvEND(sv) - 1;
+
+               for (; s < ns; s++) {
+                   if (*s == '\n' && !PL_rsfp)
+                       CopLINE_inc(PL_curcop);
+               }
+               if (!found)
+                   goto read_more_line;
+               else {
+                   /* handle quoted delimiters */
+                   if (*(svlast-1) == '\\') {
+                       char *t;
+                       for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';)
+                           t--;
+                       if ((svlast-1 - t) % 2) {
+                           if (!keep_quoted) {
+                               *(svlast-1) = term;
+                               *svlast = '\0';
+                               SvCUR_set(sv, SvCUR(sv) - 1);
+                           }
+                           continue;
+                       }
+                   }
+                   if (PL_multi_open == PL_multi_close) {
+                       cont = FALSE;
+                   }
+                   else {
+                       char *t, *w;
+                       if (!last)
+                           last = SvPVX(sv);
+                       for (w = t = last; t < svlast; w++, t++) {
+                           /* At here, all closes are "was quoted" one,
+                              so we don't check PL_multi_close. */
+                           if (*t == '\\') {
+                               if (!keep_quoted && *(t+1) == PL_multi_open)
+                                   t++;
+                               else
+                                   *w++ = *t++;
+                           }
+                           else if (*t == PL_multi_open)
+                               brackets++;
+
+                           *w = *t;
+                       }
+                       if (w < t) {
+                           *w++ = term;
+                           *w = '\0';
+                           SvCUR_set(sv, w - SvPVX(sv));
+                       }
+                       last = w;
+                       if (--brackets <= 0)
+                           cont = FALSE;
+                   }
+               }
+           }
+           if (!keep_delims) {
+               SvCUR_set(sv, SvCUR(sv) - 1);
+               *SvEND(sv) = '\0';
+           }
+           break;
+       }
+
        /* extend sv if need be */
        SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
        /* set 'to' to the next character in the sv's string */
@@ -6937,8 +7061,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
                }
                /* terminate when run out of buffer (the for() condition), or
                   have found the terminator */
-               else if (*s == term)
-                   break;
+               else if (*s == term) {
+                   if (termlen == 1)
+                       break;
+                   if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
+                       break;
+               }
                else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
                    has_utf8 = TRUE;
                *to = *s;
@@ -7000,6 +7128,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
            to[-1] = '\n';
 #endif
        
+     read_more_line:
        /* if we're out of file, or a read fails, bail and reset the current
           line marker so we can report where the unterminated string began
        */
@@ -7030,15 +7159,15 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 
     /* at this point, we have successfully read the delimited string */
 
-    if (keep_delims)
-       sv_catpvn(sv, s, 1);
-    if (has_utf8)
+    if (!PL_encoding || UTF) {
+       if (keep_delims)
+           sv_catpvn(sv, s, termlen);
+       s += termlen;
+    }
+    if (has_utf8 || PL_encoding)
        SvUTF8_on(sv);
-    else if (PL_encoding)
-       sv_recode_to_utf8(sv, PL_encoding);
 
     PL_multi_end = CopLINE(PL_curcop);
-    s++;
 
     /* if we allocated too much space, give some back */
     if (SvCUR(sv) + 5 < SvLEN(sv)) {
@@ -7525,6 +7654,12 @@ S_scan_formline(pTHX_ register char *s)
        }
        else
            PL_lex_state = LEX_FORMLINE;
+       if (!IN_BYTES) {
+           if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff)))
+               SvUTF8_on(stuff);
+           else if (PL_encoding)
+               sv_recode_to_utf8(stuff, PL_encoding);
+       }
        PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
        force_next(THING);
        PL_nextval[PL_nexttoke].ival = OP_FORMLINE;