Add =head2 to the perldelta change in da76b8593e
[p5sagit/p5-mst-13.2.git] / toke.c
diff --git a/toke.c b/toke.c
index fcfdd71..b5236da 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -938,6 +938,7 @@ function is more convenient.
 void
 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
 {
+    dVAR;
     char *bufptr;
     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
     if (flags & ~(LEX_STUFF_UTF8))
@@ -955,6 +956,8 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
            bufptr = PL_parser->bufptr;
            Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
+           SvCUR_set(PL_parser->linestr,
+               SvCUR(PL_parser->linestr) + len+highhalf);
            PL_parser->bufend += len+highhalf;
            for (p = pv; p != e; p++) {
                U8 c = (U8)*p;
@@ -993,6 +996,8 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
            bufptr = PL_parser->bufptr;
            Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
+           SvCUR_set(PL_parser->linestr,
+               SvCUR(PL_parser->linestr) + len-highhalf);
            PL_parser->bufend += len-highhalf;
            for (p = pv; p != e; p++) {
                U8 c = (U8)*p;
@@ -1008,6 +1013,7 @@ Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
            lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
            bufptr = PL_parser->bufptr;
            Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
+           SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
            PL_parser->bufend += len;
            Copy(pv, bufptr, len, char);
        }
@@ -1123,7 +1129,7 @@ it is not permitted to discard text that has yet to be lexed.
 Normally it is not necessarily to do this directly, because it suffices to
 use the implicit discarding behaviour of L</lex_next_chunk> and things
 based on it.  However, if a token stretches across multiple lines,
-and the lexing code has kept multiple lines of text in the buffer fof
+and the lexing code has kept multiple lines of text in the buffer for
 that purpose, then after completion of the token it would be wise to
 explicitly discard the now-unneeded earlier lines, to avoid future
 multi-line tokens growing the buffer without bound.
@@ -1302,6 +1308,7 @@ is encountered, an exception is generated.
 I32
 Perl_lex_peek_unichar(pTHX_ U32 flags)
 {
+    dVAR;
     char *s, *bufend;
     if (flags & ~(LEX_KEEP_PREVIOUS))
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
@@ -2968,10 +2975,10 @@ S_scan_const(pTHX_ char *start)
                 * errors and upgrading to utf8) is:
                 *  Further disambiguate between the two meanings of \N, and if
                 *      not a charname, go process it elsewhere
-                *  If of form \N{U+...}, pass it through if a pattern; otherwise
-                *      convert to utf8
-                *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a pattern;
-                *      otherwise convert to utf8 */
+                *  If of form \N{U+...}, pass it through if a pattern;
+                *      otherwise convert to utf8
+                *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
+                *  pattern; otherwise convert to utf8 */
 
                /* Here, s points to the 'N'; the test below is guaranteed to
                 * succeed if we are being called on a pattern as we already
@@ -2985,27 +2992,14 @@ S_scan_const(pTHX_ char *start)
                }
                s++;
 
-               /* If there is no matching '}', it is an error outside of a
-                * pattern, or ambiguous inside. */
+               /* If there is no matching '}', it is an error. */
                if (! (e = strchr(s, '}'))) {
                    if (! PL_lex_inpat) {
                        yyerror("Missing right brace on \\N{}");
-                       continue;
-                   }
-                   else {
-
-                       /* A missing brace means it can't be a legal character
-                        * name, and it could be a legal "match non-newline".
-                        * But it's kind of weird without an unescaped left
-                        * brace, so warn. */
-                       if (ckWARN(WARN_SYNTAX)) {
-                           Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                   "Missing right brace on \\N{} or unescaped left brace after \\N.  Assuming the latter");
-                       }
-                       s -= 3; /* Backup over cur char, {, N, to the '\' */
-                       *d++ = NATIVE_TO_NEED(has_utf8,'\\');
-                       goto default_action;
+                   } else {
+                       yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
                    }
+                   continue;
                }
 
                /* Here it looks like a named character */
@@ -3053,9 +3047,7 @@ S_scan_const(pTHX_ char *start)
 
                        /* Pass through to the regex compiler unchanged.  The
                         * reason we evaluated the number above is to make sure
-                        * there wasn't a syntax error.  It also makes sure
-                        * that the syntax created below, \N{Uc1.c2...}, is
-                        * internal-only */
+                        * there wasn't a syntax error. */
                        s -= 5;     /* Include the '\N{U+' */
                        Copy(s, d, e - s + 1, char);    /* 1 = include the } */
                        d += e - s + 1;
@@ -3219,7 +3211,70 @@ S_scan_const(pTHX_ char *start)
                        d += len;
                    }
                    SvREFCNT_dec(res);
-               }
+
+                   /* Deprecate non-approved name syntax */
+                   if (ckWARN_d(WARN_DEPRECATED)) {
+                       bool problematic = FALSE;
+                       char* i = s;
+
+                       /* For non-ut8 input, look to see that the first
+                        * character is an alpha, then loop through the rest
+                        * checking that each is a continuation */
+                       if (! this_utf8) {
+                           if (! isALPHAU(*i)) problematic = TRUE;
+                           else for (i = s + 1; i < e; i++) {
+                               if (isCHARNAME_CONT(*i)) continue;
+                               problematic = TRUE;
+                               break;
+                           }
+                       }
+                       else {
+                           /* Similarly for utf8.  For invariants can check
+                            * directly.  We accept anything above the latin1
+                            * range because it is immaterial to Perl if it is
+                            * correct or not, and is expensive to check.  But
+                            * it is fairly easy in the latin1 range to convert
+                            * the variants into a single character and check
+                            * those */
+                           if (UTF8_IS_INVARIANT(*i)) {
+                               if (! isALPHAU(*i)) problematic = TRUE;
+                           } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
+                               if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
+                                                                           *(i+1)))))
+                               {
+                                   problematic = TRUE;
+                               }
+                           }
+                           if (! problematic) for (i = s + UTF8SKIP(s);
+                                                   i < e;
+                                                   i+= UTF8SKIP(i))
+                           {
+                               if (UTF8_IS_INVARIANT(*i)) {
+                                   if (isCHARNAME_CONT(*i)) continue;
+                               } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
+                                   continue;
+                               } else if (isCHARNAME_CONT(
+                                           UNI_TO_NATIVE(
+                                           UTF8_ACCUMULATE(*i, *(i+1)))))
+                               {
+                                   continue;
+                               }
+                               problematic = TRUE;
+                               break;
+                           }
+                       }
+                       if (problematic) {
+                           char *string;
+                           Newx(string, e - i + 1, char);
+                           Copy(i, string, e - i, char);
+                           string[e - i] = '\0';
+                           Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                               "Deprecated character(s) in \\N{...} starting at '%s'",
+                               string);
+                           Safefree(string);
+                       }
+                   }
+               } /* End \N{NAME} */
 #ifdef EBCDIC
                if (!dorange) 
                    native_range = FALSE; /* \N{} is defined to be Unicode */
@@ -6093,8 +6148,6 @@ Perl_yylex(pTHX)
        /* Is this a label? */
        if (!anydelim && PL_expect == XSTATE
              && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
-           if (tmp)
-               Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
            s = d + 1;
            pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
            CLINE;
@@ -11520,7 +11573,8 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
     }
 
     /* charnames doesn't work well if there have been errors found */
-    if (PL_error_count > 0 && strEQ(key,"charnames")) return res;
+    if (PL_error_count > 0 && strEQ(key,"charnames"))
+       return &PL_sv_undef;
 
     cvp = hv_fetch(table, key, keylen, FALSE);
     if (!cvp || !SvOK(*cvp)) {
@@ -13646,7 +13700,7 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
     SV *const utf8_buffer = filter;
     IV status = IoPAGE(filter);
-    const bool reverse = (bool) IoLINES(filter);
+    const bool reverse = cBOOL(IoLINES(filter));
     I32 retval;
 
     /* As we're automatically added, at the lowest level, and hence only called