X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=52706a324de632b1e0ff0c93a708867bee8cdf35;hb=f86a8bc58a487e28b31c5d91c65920f4859f3f74;hp=e312c4eef1c20716ac27a9b05c1a815c3b9bff05;hpb=7c95f590e1d3e90413f423893f54acc0b5dc228c;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/toke.c b/toke.c
index e312c4e..52706a3 100644
--- a/toke.c
+++ b/toke.c
@@ -36,10 +36,17 @@ static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
 #define XFAKEBRACK 128
 #define XENUMMASK 127
 
-/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
-#define UTF (PL_hints & HINT_UTF8)
+#ifdef USE_UTF8_SCRIPTS
+#   define UTF (!IN_BYTES)
+#else
+#   ifdef EBCDIC /* For now 'use utf8' does not affect tokenizer on EBCDIC */
+#       define UTF (PL_linestr && DO_UTF8(PL_linestr))
+#   else
+#       define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
+#   endif
+#endif
 
-/* In variables name $^X, these are the legal values for X.
+/* In variables named $^X, these are the legal values for X.
  * 1999-02-27 mjd-perl-patch@plover.com */
 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 
@@ -177,13 +184,15 @@ int yyactlevel = -1;
 /* grandfather return to old style */
 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 
+#ifdef DEBUGGING
+
 STATIC void
 S_tokereport(pTHX_ char *thing, char* s, I32 rv)
-{ 
-    SV *report;
+{
     DEBUG_T({
-        report = newSVpv(thing, 0);
-        Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv);
+        SV* report = newSVpv(thing, 0);
+        Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
+		(IV)rv);
 
         if (s - PL_bufptr > 0)
             sv_catpvn(report, PL_bufptr, s - PL_bufptr);
@@ -192,9 +201,11 @@ S_tokereport(pTHX_ char *thing, char* s, I32 rv)
                 sv_catpv(report, PL_tokenbuf);
         }
         PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
-    })
+    });
 }
 
+#endif
+
 /*
  * S_ao
  *
@@ -533,7 +544,7 @@ S_skipspace(pTHX_ register char *s)
     for (;;) {
 	STRLEN prevlen;
 	SSize_t oldprevlen, oldoldprevlen;
-	SSize_t oldloplen, oldunilen;
+	SSize_t oldloplen = 0, oldunilen = 0;
 	while (s < PL_bufend && isSPACE(*s)) {
 	    if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
 		incline(s);
@@ -838,7 +849,7 @@ Perl_str_to_version(pTHX_ SV *sv)
 	STRLEN skip;
 	UV n;
 	if (utf)
-	    n = utf8_to_uv((U8*)start, len, &skip, 0);
+	    n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
 	else {
 	    n = *(U8*)start;
 	    skip = 1;
@@ -917,8 +928,11 @@ S_tokeq(pTHX_ SV *sv)
     if (s == send)
 	goto finish;
     d = s;
-    if ( PL_hints & HINT_NEW_STRING )
+    if ( PL_hints & HINT_NEW_STRING ) {
 	pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
+	if (SvUTF8(sv))
+	    SvUTF8_on(pv);
+    }
     while (s < send) {
 	if (*s == '\\') {
 	    if (s + 1 < send && (s[1] == '\\'))
@@ -1035,6 +1049,7 @@ S_sublex_push(pTHX)
     SAVEI32(PL_lex_inwhat);
     SAVECOPLINE(PL_curcop);
     SAVEPPTR(PL_bufptr);
+    SAVEPPTR(PL_bufend);
     SAVEPPTR(PL_oldbufptr);
     SAVEPPTR(PL_oldoldbufptr);
     SAVEPPTR(PL_last_lop);
@@ -1215,22 +1230,22 @@ S_scan_const(pTHX_ char *start)
     register char *d = SvPVX(sv);		/* destination for copies */
     bool dorange = FALSE;			/* are we in a translit range? */
     bool didrange = FALSE;		        /* did we just finish a range? */
-    bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
-						/* the constant is UTF8 */
+    I32  has_utf8 = FALSE;			/* Output constant is UTF8 */
+    I32  this_utf8 = UTF;			/* The source string is assumed to be UTF8 */
     UV uv;
 
-    I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
-	? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
-	: UTF;
-    I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
-	? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
-						OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
-	: UTF;
     const char *leaveit =	/* set of acceptably-backslashed characters */
 	PL_lex_inpat
 	    ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
 	    : "";
 
+    if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
+	/* If we are doing a trans and we know we want UTF8 set expectation */
+	has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
+	this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+    }
+
+
     while (s < send || dorange) {
         /* get transliterations out of the way (they're most literal) */
 	if (PL_lex_inwhat == OP_TRANS) {
@@ -1240,17 +1255,18 @@ S_scan_const(pTHX_ char *start)
 		I32 min;			/* first character in range */
 		I32 max;			/* last character in range */
 
-		if (utf) {
+		if (has_utf8) {
 		    char *c = (char*)utf8_hop((U8*)d, -1);
 		    char *e = d++;
 		    while (e-- > c)
 			*(e + 1) = *e;
-		    *c = 0xff;
+		    *c = (char)UTF_TO_NATIVE(0xff);
 		    /* mark the range as done, and continue */
 		    dorange = FALSE;
 		    didrange = TRUE;
 		    continue;
 		}
+
 		i = d - SvPVX(sv);		/* remember current offset */
 		SvGROW(sv, SvLEN(sv) + 256);	/* never more than 256 chars in a range */
 		d = SvPVX(sv) + i;		/* refresh d after realloc */
@@ -1265,17 +1281,17 @@ S_scan_const(pTHX_ char *start)
 			       (char)min, (char)max);
                 }
 
-#ifndef ASCIIish
+#ifdef EBCDIC
 		if ((isLOWER(min) && isLOWER(max)) ||
 		    (isUPPER(min) && isUPPER(max))) {
 		    if (isLOWER(min)) {
 			for (i = min; i <= max; i++)
 			    if (isLOWER(i))
-				*d++ = i;
+				*d++ = NATIVE_TO_NEED(has_utf8,i);
 		    } else {
 			for (i = min; i <= max; i++)
 			    if (isUPPER(i))
-				*d++ = i;
+				*d++ = NATIVE_TO_NEED(has_utf8,i);
 		    }
 		}
 		else
@@ -1294,8 +1310,8 @@ S_scan_const(pTHX_ char *start)
 		if (didrange) {
 		    Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
 		}
-		if (utf) {
-		    *d++ = (char)0xff;	/* use illegal utf8 byte--see pmtrans */
+		if (has_utf8) {
+		    *d++ = (char)UTF_TO_NATIVE(0xff);	/* use illegal utf8 byte--see pmtrans */
 		    s++;
 		    continue;
 		}
@@ -1314,7 +1330,7 @@ S_scan_const(pTHX_ char *start)
 	else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
 	    if (s[2] == '#') {
 		while (s < send && *s != ')')
-		    *d++ = *s++;
+		    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
 	    }
 	    else if (s[2] == '{' /* This should match regcomp.c */
 		     || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
@@ -1337,7 +1353,7 @@ S_scan_const(pTHX_ char *start)
 		    yyerror("Sequence (?{...}) not terminated or not {}-balanced");
 		}
 		while (s < regparse)
-		    *d++ = *s++;
+		    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
 	    }
 	}
 
@@ -1345,7 +1361,7 @@ S_scan_const(pTHX_ char *start)
 	else if (*s == '#' && PL_lex_inpat &&
 	  ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
 	    while (s+1 < send && *s != '\n')
-		*d++ = *s++;
+		*d++ = NATIVE_TO_NEED(has_utf8,*s++);
 	}
 
 	/* check for embedded arrays
@@ -1361,18 +1377,20 @@ S_scan_const(pTHX_ char *start)
 	else if (*s == '$') {
 	    if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
 		break;
-	    if (s + 1 < send && !strchr("()| \n\t", s[1]))
+	    if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
 		break;		/* in regexp, $ might be tail anchor */
 	}
 
+	/* End of else if chain - OP_TRANS rejoin rest */
+
 	/* backslashes */
 	if (*s == '\\' && s+1 < send) {
 	    s++;
 
 	    /* some backslashes we leave behind */
 	    if (*leaveit && *s && strchr(leaveit, *s)) {
-		*d++ = '\\';
-		*d++ = *s++;
+		*d++ = NATIVE_TO_NEED(has_utf8,'\\');
+		*d++ = NATIVE_TO_NEED(has_utf8,*s++);
 		continue;
 	    }
 
@@ -1427,14 +1445,14 @@ S_scan_const(pTHX_ char *start)
 		++s;
 		if (*s == '{') {
 		    char* e = strchr(s, '}');
+		    STRLEN len = 1;		/* allow underscores */
+
 		    if (!e) {
 			yyerror("Missing right brace on \\x{}");
-			e = s;
-		    }
-		    else {
-			STRLEN len = 1;		/* allow underscores */
-			uv = (UV)scan_hex(s + 1, e - s - 1, &len);
+			++s;
+			continue;
 		    }
+		    uv = (UV)scan_hex(s + 1, e - s - 1, &len);
 		    s = e + 1;
 		}
 		else {
@@ -1448,15 +1466,13 @@ S_scan_const(pTHX_ char *start)
 	      NUM_ESCAPE_INSERT:
 		/* Insert oct or hex escaped character.
 		 * There will always enough room in sv since such
-		 * escapes will be longer than any UT-F8 sequence
+		 * escapes will be longer than any UTF-8 sequence
 		 * they can end up as. */
-
-		/* This spot is wrong for EBCDIC.  Characters like
-		 * the lowercase letters and digits are >127 in EBCDIC,
-		 * so here they would need to be mapped to the Unicode
-		 * repertoire.   --jhi */
 		
-		if (uv > 127) {
+		/* We need to map to chars to ASCII before doing the tests
+		   to cover EBCDIC
+		*/
+		if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
 		    if (!has_utf8 && uv > 255) {
 		        /* Might need to recode whatever we have
 			 * accumulated so far if it contains any
@@ -1465,46 +1481,42 @@ S_scan_const(pTHX_ char *start)
 			 * (Can't we keep track of that and avoid
 			 *  this rescan? --jhi)
 			 */
-		        int hicount = 0;
-			char *c;
-
-			for (c = SvPVX(sv); c < d; c++) {
-			    if (UTF8_IS_CONTINUED(*c))
+			int hicount = 0;
+			U8 *c;
+			for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
+			    if (!NATIVE_IS_INVARIANT(*c)) {
 			        hicount++;
+			    }
 			}
 			if (hicount) {
-			    char *old_pvx = SvPVX(sv);
-			    char *src, *dst;
-			  
-			    d = SvGROW(sv,
-				       SvLEN(sv) + hicount + 1) +
-				         (d - old_pvx);
-
-			    src = d - 1;
-			    d += hicount;
-			    dst = d - 1;
-
-			    while (src < dst) {
-			        if (UTF8_IS_CONTINUED(*src)) {
- 				    *dst-- = UTF8_EIGHT_BIT_LO(*src);
- 				    *dst-- = UTF8_EIGHT_BIT_HI(*src--);
+			    STRLEN offset = d - SvPVX(sv);
+			    U8 *src, *dst;
+			    d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
+			    src = (U8 *)d - 1;
+			    dst = src+hicount;
+			    d  += hicount;
+			    while (src >= (U8 *)SvPVX(sv)) {
+			        if (!NATIVE_IS_INVARIANT(*src)) {
+				    U8 ch = NATIVE_TO_ASCII(*src);
+				    *dst-- = UTF8_EIGHT_BIT_LO(ch);
+				    *dst-- = UTF8_EIGHT_BIT_HI(ch);
 			        }
 			        else {
-				    *dst-- = *src--;
+				    *dst-- = *src;
 			        }
+				src--;
 			    }
                         }
                     }
 
                     if (has_utf8 || uv > 255) {
-		        d = (char*)uv_to_utf8((U8*)d, uv);
+		        d = (char*)uvchr_to_utf8((U8*)d, uv);
 			has_utf8 = TRUE;
 			if (PL_lex_inwhat == OP_TRANS &&
 			    PL_sublex_info.sub_op) {
 			    PL_sublex_info.sub_op->op_private |=
 				(PL_lex_repl ? OPpTRANS_FROM_UTF
 					     : OPpTRANS_TO_UTF);
-			    utf = TRUE;
 			}
                     }
 		    else {
@@ -1512,13 +1524,13 @@ S_scan_const(pTHX_ char *start)
 		    }
 		}
 		else {
-		    *d++ = (char)uv;
+		    *d++ = (char) uv;
 		}
 		continue;
 
  	    /* \N{latin small letter a} is a named character */
  	    case 'N':
- 		s++;
+ 		++s;
  		if (*s == '{') {
  		    char* e = strchr(s, '}');
  		    SV *res;
@@ -1530,30 +1542,9 @@ S_scan_const(pTHX_ char *start)
 			e = s - 1;
 			goto cont_scan;
 		    }
-		    if (s[1] == 'U' && s[2] == '+') { /* \N{U+HHHH} */
-			STRLEN alen = e - s - 3;
-			STRLEN blen;
-			UV uv = (UV)scan_hex(s + 3, alen, &blen);
-
-			if (blen == alen) {
-			    res = newSVpvn(s, (uv >> 8) + 1); /* filler */
-			    str = (char *)uv_to_utf8((U8*)SvPVX(res), uv);
-			    SvCUR_set(res, str - SvPVX(res));
-			    *str = '\0';
-			    if (uv > 0x7f)
-				SvUTF8_on(res);
-			}
-			else {
-			    yyerror("Illegal hexadecimal code on \\N{U+...}");
-			    e = s - 1;
-			    goto cont_scan;
-			}
-		    }
-		    else {
-			res = newSVpvn(s + 1, e - s - 1);
-			res = new_constant( Nullch, 0, "charnames",
-					    res, Nullsv, "\\N{...}" );
-		    }
+		    res = newSVpvn(s + 1, e - s - 1);
+		    res = new_constant( Nullch, 0, "charnames",
+					res, Nullsv, "\\N{...}" );
 		    if (has_utf8)
 			sv_utf8_upgrade(res);
 		    str = SvPV(res,len);
@@ -1587,51 +1578,38 @@ S_scan_const(pTHX_ char *start)
 	    /* \c is a control character */
 	    case 'c':
 		s++;
-#ifdef EBCDIC
-		*d = *s++;
-		if (isLOWER(*d))
-		   *d = toUPPER(*d);
-		*d = toCTRL(*d);
-		d++;
-#else
 		{
 		    U8 c = *s++;
-		    *d++ = toCTRL(c);
-		}
+#ifdef EBCDIC
+		    if (isLOWER(c))
+			c = toUPPER(c);
 #endif
+		    *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
+		}
 		continue;
 
 	    /* printf-style backslashes, formfeeds, newlines, etc */
 	    case 'b':
-		*d++ = '\b';
+		*d++ = NATIVE_TO_NEED(has_utf8,'\b');
 		break;
 	    case 'n':
-		*d++ = '\n';
+		*d++ = NATIVE_TO_NEED(has_utf8,'\n');
 		break;
 	    case 'r':
-		*d++ = '\r';
+		*d++ = NATIVE_TO_NEED(has_utf8,'\r');
 		break;
 	    case 'f':
-		*d++ = '\f';
+		*d++ = NATIVE_TO_NEED(has_utf8,'\f');
 		break;
 	    case 't':
-		*d++ = '\t';
-		break;
-#ifdef EBCDIC
-	    case 'e':
-		*d++ = '\047';  /* CP 1047 */
-		break;
-	    case 'a':
-		*d++ = '\057';  /* CP 1047 */
+		*d++ = NATIVE_TO_NEED(has_utf8,'\t');
 		break;
-#else
 	    case 'e':
-		*d++ = '\033';
+		*d++ = ASCII_TO_NEED(has_utf8,'\033');
 		break;
 	    case 'a':
-		*d++ = '\007';
+		*d++ = ASCII_TO_NEED(has_utf8,'\007');
 		break;
-#endif
 	    } /* end switch */
 
 	    s++;
@@ -1639,41 +1617,40 @@ S_scan_const(pTHX_ char *start)
 	} /* end if (backslash) */
 
     default_action:
-       if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
-           STRLEN len = (STRLEN) -1;
-           UV uv;
-           if (this_utf8) {
-               uv = utf8_to_uv((U8*)s, send - s, &len, 0);
-           }
-           if (len == (STRLEN)-1) {
-               /* Illegal UTF8 (a high-bit byte), make it valid. */
-               char *old_pvx = SvPVX(sv);
-               /* need space for one extra char (NOTE: SvCUR() not set here) */
-               d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
-               d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
-           }
-           else {
-               while (len--)
-                   *d++ = *s++;
-           }
-           has_utf8 = TRUE;
-	   if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
-	       PL_sublex_info.sub_op->op_private |=
-		   (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
-	       utf = TRUE;
-	   }
-           continue;
-       }
-
-       *d++ = *s++;
+	/* If we started with encoded form, or already know we want it
+	   and then encode the next character */
+	if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
+	    STRLEN len  = 1;
+	    UV uv       = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
+	    STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
+	    s += len;
+	    if (need > len) {
+		/* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
+		STRLEN off = d - SvPVX(sv);
+		d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
+	    }
+	    d = (char*)uvchr_to_utf8((U8*)d, uv);
+	    has_utf8 = TRUE;
+	}
+	else {
+	    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+	}
     } /* while loop to process each character */
 
     /* terminate the string and set up the sv */
     *d = '\0';
     SvCUR_set(sv, d - SvPVX(sv));
+    if (SvCUR(sv) >= SvLEN(sv))
+      Perl_croak(aTHX_ "panic: constant overflowed allocated space");
+
     SvPOK_on(sv);
-    if (has_utf8)
+    if (has_utf8) {
 	SvUTF8_on(sv);
+	if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
+		PL_sublex_info.sub_op->op_private |=
+		    (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+	}
+    }
 
     /* shrink the sv if we allocated more than we used */
     if (SvCUR(sv) + 5 < SvLEN(sv)) {
@@ -2192,132 +2169,8 @@ Perl_yylex(pTHX)
     bool bof = FALSE;
 
     /* check if there's an identifier for us to look at */
-    if (PL_pending_ident) {
-        /* pit holds the identifier we read and pending_ident is reset */
-	char pit = PL_pending_ident;
-	PL_pending_ident = 0;
-
-	DEBUG_T({ PerlIO_printf(Perl_debug_log,
-              "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
-
-	/* if we're in a my(), we can't allow dynamics here.
-	   $foo'bar has already been turned into $foo::bar, so
-	   just check for colons.
-
-	   if it's a legal name, the OP is a PADANY.
-	*/
-	if (PL_in_my) {
-	    if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
-		if (strchr(PL_tokenbuf,':'))
-		    yyerror(Perl_form(aTHX_ "No package name allowed for "
-				      "variable %s in \"our\"",
-				      PL_tokenbuf));
-		tmp = pad_allocmy(PL_tokenbuf);
-	    }
-	    else {
-		if (strchr(PL_tokenbuf,':'))
-		    yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
-
-		yylval.opval = newOP(OP_PADANY, 0);
-		yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
-		return PRIVATEREF;
-	    }
-	}
-
-	/*
-	   build the ops for accesses to a my() variable.
-
-	   Deny my($a) or my($b) in a sort block, *if* $a or $b is
-	   then used in a comparison.  This catches most, but not
-	   all cases.  For instance, it catches
-	       sort { my($a); $a <=> $b }
-	   but not
-	       sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
-	   (although why you'd do that is anyone's guess).
-	*/
-
-	if (!strchr(PL_tokenbuf,':')) {
-#ifdef USE_THREADS
-	    /* Check for single character per-thread SVs */
-	    if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
-		&& !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
-		&& (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
-	    {
-		yylval.opval = newOP(OP_THREADSV, 0);
-		yylval.opval->op_targ = tmp;
-		return PRIVATEREF;
-	    }
-#endif /* USE_THREADS */
-	    if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
-		SV *namesv = AvARRAY(PL_comppad_name)[tmp];
-		/* might be an "our" variable" */
-		if (SvFLAGS(namesv) & SVpad_OUR) {
-		    /* build ops for a bareword */
-		    SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
-		    sv_catpvn(sym, "::", 2);
-		    sv_catpv(sym, PL_tokenbuf+1);
-		    yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
-		    yylval.opval->op_private = OPpCONST_ENTERED;
-		    gv_fetchpv(SvPVX(sym),
-			(PL_in_eval
-			    ? (GV_ADDMULTI | GV_ADDINEVAL)
-			    : TRUE
-			),
-			((PL_tokenbuf[0] == '$') ? SVt_PV
-			 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
-			 : SVt_PVHV));
-		    return WORD;
-		}
-
-		/* if it's a sort block and they're naming $a or $b */
-		if (PL_last_lop_op == OP_SORT &&
-		    PL_tokenbuf[0] == '$' &&
-		    (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
-		    && !PL_tokenbuf[2])
-		{
-		    for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
-			 d < PL_bufend && *d != '\n';
-			 d++)
-		    {
-			if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
-			    Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
-				  PL_tokenbuf);
-			}
-		    }
-		}
-
-		yylval.opval = newOP(OP_PADANY, 0);
-		yylval.opval->op_targ = tmp;
-		return PRIVATEREF;
-	    }
-	}
-
-	/*
-	   Whine if they've said @foo in a doublequoted string,
-	   and @foo isn't a variable we can find in the symbol
-	   table.
-	*/
-	if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
-	    GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
-	    if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
-		 && ckWARN(WARN_AMBIGUOUS))
-	    {
-                /* Downgraded from fatal to warning 20000522 mjd */
-		Perl_warner(aTHX_ WARN_AMBIGUOUS,
-			    "Possible unintended interpolation of %s in string",
-			     PL_tokenbuf);
-	    }
-	}
-
-	/* build ops for a bareword */
-	yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
-	yylval.opval->op_private = OPpCONST_ENTERED;
-	gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
-		   ((PL_tokenbuf[0] == '$') ? SVt_PV
-		    : (PL_tokenbuf[0] == '@') ? SVt_PVAV
-		    : SVt_PVHV));
-	return WORD;
-    }
+    if (PL_pending_ident) 
+        return S_pending_ident(aTHX);
 
     /* no identifier pending identification */
 
@@ -2339,7 +2192,7 @@ Perl_yylex(pTHX)
 	}
 	DEBUG_T({ PerlIO_printf(Perl_debug_log,
               "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
-              (IV)PL_nexttype[PL_nexttoke]); })
+              (IV)PL_nexttype[PL_nexttoke]); });
 
 	return(PL_nexttype[PL_nexttoke]);
 
@@ -2373,7 +2226,7 @@ Perl_yylex(pTHX)
 	}
 	else {
 	    DEBUG_T({ PerlIO_printf(Perl_debug_log,
-              "### Saw case modifier at '%s'\n", PL_bufptr); })
+              "### Saw case modifier at '%s'\n", PL_bufptr); });
 	    s = PL_bufptr + 1;
 	    if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
 		tmp = *s, *s = s[2], s[2] = tmp;	/* misordered... */
@@ -2425,7 +2278,7 @@ Perl_yylex(pTHX)
 	if (PL_bufptr == PL_bufend)
 	    return sublex_done();
 	DEBUG_T({ PerlIO_printf(Perl_debug_log,
-              "### Interpolated variable at '%s'\n", PL_bufptr); })
+              "### Interpolated variable at '%s'\n", PL_bufptr); });
 	PL_expect = XTERM;
 	PL_lex_dojoin = (*PL_bufptr == '@');
 	PL_lex_state = LEX_INTERPNORMAL;
@@ -2525,7 +2378,7 @@ Perl_yylex(pTHX)
     DEBUG_T( {
 	PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
 		      exp_name[PL_expect], s);
-    } )
+    } );
 
   retry:
     switch (*s) {
@@ -2544,7 +2397,7 @@ Perl_yylex(pTHX)
 		yyerror("Missing right curly or square bracket");
             DEBUG_T( { PerlIO_printf(Perl_debug_log,
                         "### Tokener got EOF\n");
-            } )
+            } );
 	    TOKEN(0);
 	}
 	if (s++ < PL_bufend)
@@ -2571,9 +2424,6 @@ Perl_yylex(pTHX)
 		if (PL_minus_l)
 		    sv_catpv(PL_linestr,"chomp;");
 		if (PL_minus_a) {
-		    GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
-		    if (gv)
-			GvIMPORTED_AV_on(gv);
 		    if (PL_minus_F) {
 			if (strchr("/'\"", *PL_splitstr)
 			      && strchr(PL_splitstr + 1, *PL_splitstr))
@@ -2583,7 +2433,7 @@ Perl_yylex(pTHX)
 			    s = "'~#\200\1'"; /* surely one char is unused...*/
 			    while (s[1] && strchr(PL_splitstr, *s))  s++;
 			    delim = *s;
-			    Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
+			    Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
 				      "q" + (delim == '\''), delim);
 			    for (s = PL_splitstr; *s; s++) {
 				if (*s == '\\')
@@ -2594,7 +2444,7 @@ Perl_yylex(pTHX)
 			}
 		    }
 		    else
-		        sv_catpv(PL_linestr,"@F=split(' ');");
+		        sv_catpv(PL_linestr,"our @F=split(' ');");
 		}
 	    }
 	    sv_catpv(PL_linestr, "\n");
@@ -2877,6 +2727,8 @@ Perl_yylex(pTHX)
 		s++;
 	    if (s < d)
 		s++;
+	    else if (s > d) /* Found by Ilya: feed random input to Perl. */
+	      Perl_croak(aTHX_ "panic: input overflow");
 	    incline(s);
 	    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
 		PL_bufptr = s;
@@ -2904,7 +2756,7 @@ Perl_yylex(pTHX)
 		s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
                             "### Saw unary minus before =>, forcing word '%s'\n", s);
-                } )
+                } );
 		OPERATOR('-');		/* unary minus */
 	    }
 	    PL_last_uni = PL_oldbufptr;
@@ -2949,7 +2801,7 @@ Perl_yylex(pTHX)
 		PL_last_lop_op = ftst;
 		DEBUG_T( { PerlIO_printf(Perl_debug_log,
                         "### Saw file test %c\n", (int)ftst);
-		} )
+		} );
 		FTST(ftst);
 	    }
 	    else {
@@ -2958,7 +2810,7 @@ Perl_yylex(pTHX)
 		DEBUG_T( { PerlIO_printf(Perl_debug_log,
 			"### %c looked like a file test but was not\n",
 			(int)ftst);
-		} )
+		} );
 		s -= 2;
 	    }
 	}
@@ -3115,8 +2967,8 @@ Perl_yylex(pTHX)
 		    else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
 			CvMETHOD_on(PL_compcv);
 #ifdef USE_ITHREADS
-		    else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len))
-			GvSHARED_on(cGVOPx_gv(yylval.opval));
+      else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "unique", len))
+			GvUNIQUE_on(cGVOPx_gv(yylval.opval));
 #endif
 		    /* After we've set the flags, it could be argued that
 		       we don't need to do the attributes.pm-based setting
@@ -3255,8 +3107,16 @@ Perl_yylex(pTHX)
 		else
 		    PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
 		s = skipspace(s);
-		if (*s == '}')
+		if (*s == '}') {
+		    if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
+			PL_expect = XTERM;
+			/* This hack is to get the ${} in the message. */
+			PL_bufptr = s+1;
+			yyerror("syntax error");
+			break;
+		    }
 		    OPERATOR(HASHBRACK);
+		}
 		/* This hack serves to disambiguate a pair of curlies
 		 * as being a block or an anon hash.  Normally, expectation
 		 * determines that, but in cases where we're not in a
@@ -3718,7 +3578,7 @@ Perl_yylex(pTHX)
 	s = scan_num(s, &yylval);
         DEBUG_T( { PerlIO_printf(Perl_debug_log,
                     "### Saw number in '%s'\n", s);
-        } )
+        } );
 	if (PL_expect == XOPERATOR)
 	    no_op("Number",s);
 	TERM(THING);
@@ -3727,7 +3587,7 @@ Perl_yylex(pTHX)
 	s = scan_str(s,FALSE,FALSE);
         DEBUG_T( { PerlIO_printf(Perl_debug_log,
                     "### Saw string before '%s'\n", s);
-        } )
+        } );
 	if (PL_expect == XOPERATOR) {
 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
 		PL_expect = XTERM;
@@ -3746,7 +3606,7 @@ Perl_yylex(pTHX)
 	s = scan_str(s,FALSE,FALSE);
         DEBUG_T( { PerlIO_printf(Perl_debug_log,
                     "### Saw string before '%s'\n", s);
-        } )
+        } );
 	if (PL_expect == XOPERATOR) {
 	    if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
 		PL_expect = XTERM;
@@ -3760,7 +3620,7 @@ Perl_yylex(pTHX)
 	    missingterm((char*)0);
 	yylval.ival = OP_CONST;
 	for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
-	    if (*d == '$' || *d == '@' || *d == '\\' || UTF8_IS_CONTINUED(*d)) {
+	    if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
 		yylval.ival = OP_STRINGIFY;
 		break;
 	    }
@@ -3771,7 +3631,7 @@ Perl_yylex(pTHX)
 	s = scan_str(s,FALSE,FALSE);
         DEBUG_T( { PerlIO_printf(Perl_debug_log,
                     "### Saw backtick string before '%s'\n", s);
-        } )
+        } );
 	if (PL_expect == XOPERATOR)
 	    no_op("Backticks",s);
 	if (!s)
@@ -3801,7 +3661,7 @@ Perl_yylex(pTHX)
 		TERM(THING);
 	    }
 	    /* avoid v123abc() or $h{v1}, allow C<print v10;> */
-	    else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF)) {
+	    else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) {
 		char c = *start;
 		GV *gv;
 		*start = '\0';
@@ -3887,7 +3747,7 @@ Perl_yylex(pTHX)
 	    CLINE;
 	    yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
 	    yylval.opval->op_private = OPpCONST_BARE;
-	    if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+	    if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
 	      SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
 	    TERM(WORD);
 	}
@@ -3941,6 +3801,7 @@ Perl_yylex(pTHX)
 	default:			/* not a keyword */
 	  just_a_word: {
 		SV *sv;
+		int pkgname = 0;
 		char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
 
 		/* Get the rest if it looks like a package qualifier */
@@ -3953,6 +3814,7 @@ Perl_yylex(pTHX)
 			Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
 				*s == '\'' ? "'" : "::");
 		    len += morelen;
+		    pkgname = 1;
 		}
 
 		if (PL_expect == XOPERATOR) {
@@ -4040,15 +3902,14 @@ Perl_yylex(pTHX)
 		    }
 		}
 
-
 		PL_expect = XOPERATOR;
 		s = skipspace(s);
 
 		/* Is this a word before a => operator? */
-		if (*s == '=' && s[1] == '>') {
+		if (*s == '=' && s[1] == '>' && !pkgname) {
 		    CLINE;
 		    sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
-		    if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+		    if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
 		      SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
 		    TERM(WORD);
 		}
@@ -4211,14 +4072,24 @@ Perl_yylex(pTHX)
 			loc = PerlIO_tell(PL_rsfp);
 			(void)PerlIO_seek(PL_rsfp, 0L, 0);
 		    }
+#ifdef NETWARE
+			if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
+#else
 		    if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
+#endif	/* NETWARE */
+#ifdef PERLIO_IS_STDIO /* really? */
+#  if defined(__BORLANDC__)
+			/* XXX see note in do_binmode() */
+			((FILE*)PL_rsfp)->flags &= ~_F_BIN;
+#  endif
+#endif
 			if (loc > 0)
 			    PerlIO_seek(PL_rsfp, loc, 0);
 		    }
 		}
 #endif
 #ifdef PERLIO_LAYERS
-		if (UTF && !IN_BYTE)
+		if (UTF && !IN_BYTES)
 		    PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
 #endif
 		PL_rsfp = Nullfp;
@@ -4307,12 +4178,6 @@ Perl_yylex(pTHX)
 	    LOP(OP_CRYPT,XTERM);
 
 	case KEY_chmod:
-	    if (ckWARN(WARN_CHMOD)) {
-		for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
-		if (*d != '0' && isDIGIT(*d))
-		    Perl_warner(aTHX_ WARN_CHMOD,
-		    		"chmod() mode argument is missing initial 0");
-	    }
 	    LOP(OP_CHMOD,XTERM);
 
 	case KEY_chown:
@@ -5007,7 +4872,7 @@ Perl_yylex(pTHX)
 	  really_sub:
 	    {
 		char tmpbuf[sizeof PL_tokenbuf];
-		SSize_t tboffset;
+		SSize_t tboffset = 0;
 		expectation attrful;
 		bool have_name, have_proto;
 		int key = tmp;
@@ -5170,12 +5035,6 @@ Perl_yylex(pTHX)
 	    LOP(OP_UTIME,XTERM);
 
 	case KEY_umask:
-	    if (ckWARN(WARN_UMASK)) {
-		for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
-		if (*d != '0' && isDIGIT(*d))
-		    Perl_warner(aTHX_ WARN_UMASK,
-		    		"umask: argument is missing initial 0");
-	    }
 	    UNI(OP_UMASK);
 
 	case KEY_unshift:
@@ -5256,6 +5115,137 @@ Perl_yylex(pTHX)
 #pragma segment Main
 #endif
 
+static int
+S_pending_ident(pTHX)
+{
+    register char *d;
+    register I32 tmp;
+    /* pit holds the identifier we read and pending_ident is reset */
+    char pit = PL_pending_ident;
+    PL_pending_ident = 0;
+
+    DEBUG_T({ PerlIO_printf(Perl_debug_log,
+          "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
+
+    /* if we're in a my(), we can't allow dynamics here.
+       $foo'bar has already been turned into $foo::bar, so
+       just check for colons.
+
+       if it's a legal name, the OP is a PADANY.
+    */
+    if (PL_in_my) {
+        if (PL_in_my == KEY_our) {	/* "our" is merely analogous to "my" */
+            if (strchr(PL_tokenbuf,':'))
+                yyerror(Perl_form(aTHX_ "No package name allowed for "
+                                  "variable %s in \"our\"",
+                                  PL_tokenbuf));
+            tmp = pad_allocmy(PL_tokenbuf);
+        }
+        else {
+            if (strchr(PL_tokenbuf,':'))
+                yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
+
+            yylval.opval = newOP(OP_PADANY, 0);
+            yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
+            return PRIVATEREF;
+        }
+    }
+
+    /*
+       build the ops for accesses to a my() variable.
+
+       Deny my($a) or my($b) in a sort block, *if* $a or $b is
+       then used in a comparison.  This catches most, but not
+       all cases.  For instance, it catches
+           sort { my($a); $a <=> $b }
+       but not
+           sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
+       (although why you'd do that is anyone's guess).
+    */
+
+    if (!strchr(PL_tokenbuf,':')) {
+#ifdef USE_THREADS
+        /* Check for single character per-thread SVs */
+        if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
+            && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
+            && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
+        {
+            yylval.opval = newOP(OP_THREADSV, 0);
+            yylval.opval->op_targ = tmp;
+            return PRIVATEREF;
+        }
+#endif /* USE_THREADS */
+        if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
+            SV *namesv = AvARRAY(PL_comppad_name)[tmp];
+            /* might be an "our" variable" */
+            if (SvFLAGS(namesv) & SVpad_OUR) {
+                /* build ops for a bareword */
+                SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
+                sv_catpvn(sym, "::", 2);
+                sv_catpv(sym, PL_tokenbuf+1);
+                yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
+                yylval.opval->op_private = OPpCONST_ENTERED;
+                gv_fetchpv(SvPVX(sym),
+                    (PL_in_eval
+                        ? (GV_ADDMULTI | GV_ADDINEVAL)
+                        : TRUE
+                    ),
+                    ((PL_tokenbuf[0] == '$') ? SVt_PV
+                     : (PL_tokenbuf[0] == '@') ? SVt_PVAV
+                     : SVt_PVHV));
+                return WORD;
+            }
+
+            /* if it's a sort block and they're naming $a or $b */
+            if (PL_last_lop_op == OP_SORT &&
+                PL_tokenbuf[0] == '$' &&
+                (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
+                && !PL_tokenbuf[2])
+            {
+                for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
+                     d < PL_bufend && *d != '\n';
+                     d++)
+                {
+                    if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
+                        Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
+                              PL_tokenbuf);
+                    }
+                }
+            }
+
+            yylval.opval = newOP(OP_PADANY, 0);
+            yylval.opval->op_targ = tmp;
+            return PRIVATEREF;
+        }
+    }
+
+    /*
+       Whine if they've said @foo in a doublequoted string,
+       and @foo isn't a variable we can find in the symbol
+       table.
+    */
+    if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
+        GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
+        if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
+             && ckWARN(WARN_AMBIGUOUS))
+        {
+            /* Downgraded from fatal to warning 20000522 mjd */
+            Perl_warner(aTHX_ WARN_AMBIGUOUS,
+                        "Possible unintended interpolation of %s in string",
+                         PL_tokenbuf);
+        }
+    }
+
+    /* build ops for a bareword */
+    yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
+    yylval.opval->op_private = OPpCONST_ENTERED;
+    gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
+               ((PL_tokenbuf[0] == '$') ? SVt_PV
+                : (PL_tokenbuf[0] == '@') ? SVt_PVAV
+                : SVt_PVHV));
+    return WORD;
+}
+
 I32
 Perl_keyword(pTHX_ register char *d, I32 len)
 {
@@ -5651,7 +5641,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
 	    if (strEQ(d,"rindex"))		return -KEY_rindex;
 	    break;
 	case 7:
-	    if (strEQ(d,"require"))		return -KEY_require;
+	    if (strEQ(d,"require"))		return KEY_require;
 	    if (strEQ(d,"reverse"))		return -KEY_reverse;
 	    if (strEQ(d,"readdir"))		return -KEY_readdir;
 	    break;
@@ -6546,7 +6536,7 @@ retval:
 	Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
     }
     SvREFCNT_dec(herewas);
-    if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
+    if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
 	SvUTF8_on(tmpstr);
     PL_lex_stuff = tmpstr;
     yylval.ival = op_type;
@@ -6732,7 +6722,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 
     /* after skipping whitespace, the next character is the terminator */
     term = *s;
-    if (UTF8_IS_CONTINUED(term) && UTF)
+    if (!UTF8_IS_INVARIANT((U8)term) && UTF)
 	has_utf8 = TRUE;
 
     /* mark where we are */
@@ -6779,7 +6769,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 		   have found the terminator */
 		else if (*s == term)
 		    break;
-		else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
+		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
 		    has_utf8 = TRUE;
 		*to = *s;
 	    }
@@ -6808,7 +6798,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 		    break;
 		else if (*s == PL_multi_open)
 		    brackets++;
-		else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
+		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
 		    has_utf8 = TRUE;
 		*to = *s;
 	    }
@@ -6900,11 +6890,11 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 
   Read a number in any of the formats that Perl accepts:
 
-  0(x[0-7A-F]+)|([0-7]+)|(b[01])
-  [\d_]+(\.[\d_]*)?[Ee](\d+)
-
-  Underbars (_) are allowed in decimal numbers.  If -w is on,
-  underbars before a decimal point must be at three digit intervals.
+  \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)	12 12.34 12.
+  \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)			.34
+  0b[01](_?[01])*
+  0[0-7](_?[0-7])*
+  0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
 
   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
   thing it reads.
@@ -6974,8 +6964,17 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
 	    else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
 		goto decimal;
 	    /* so it must be octal */
-	    else
+	    else {
 		shift = 3;
+		s++;
+	    }
+
+	    if (*s == '_') {
+	       if (ckWARN(WARN_SYNTAX))
+		   Perl_warner(aTHX_ WARN_SYNTAX,
+			       "Misplaced _ in number");
+	       lastub = s++;
+	    }
 
 	    base = bases[shift];
 	    Base = Bases[shift];
@@ -6993,9 +6992,12 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
 		default:
 		    goto out;
 
-		/* _ are ignored */
+		/* _ are ignored -- but warned about if consecutive */
 		case '_':
-		    s++;
+		    if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
+		        Perl_warner(aTHX_ WARN_SYNTAX,
+				    "Misplaced _ in number");
+		    lastub = s++;
 		    break;
 
 		/* 8 and 9 are not octal */
@@ -7062,6 +7064,13 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
 	     the number.
 	  */
 	  out:
+
+	    /* final misplaced underbar check */
+	    if (s[-1] == '_') {
+	        if (ckWARN(WARN_SYNTAX))
+		    Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
+	    }
+
 	    sv = NEWSV(92,0);
 	    if (overflowed) {
 		if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
@@ -7101,9 +7110,10 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
 	       if -w is on
 	    */
 	    if (*s == '_') {
-		if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3)
-		    Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
-		lastub = ++s;
+		if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
+		    Perl_warner(aTHX_ WARN_SYNTAX,
+				"Misplaced _ in number");
+		lastub = s++;
 	    }
 	    else {
 	        /* check for end of fixed-length buffer */
@@ -7115,7 +7125,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
 	}
 
 	/* final misplaced underbar check */
-	if (lastub && s - lastub != 3) {
+	if (lastub && s == lastub + 1) {
 	    if (ckWARN(WARN_SYNTAX))
 		Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
 	}
@@ -7128,16 +7138,34 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
 	    floatit = TRUE;
 	    *d++ = *s++;
 
-	    /* copy, ignoring underbars, until we run out of
-	       digits.  Note: no misplaced underbar checks!
+	    if (*s == '_') {
+	        if (ckWARN(WARN_SYNTAX))
+		    Perl_warner(aTHX_ WARN_SYNTAX,
+				"Misplaced _ in number");
+		lastub = s;
+	    }
+
+	    /* copy, ignoring underbars, until we run out of digits.
 	    */
 	    for (; isDIGIT(*s) || *s == '_'; s++) {
 	        /* fixed length buffer check */
 		if (d >= e)
 		    Perl_croak(aTHX_ number_too_long);
-		if (*s != '_')
+		if (*s == '_') {
+		   if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
+		       Perl_warner(aTHX_ WARN_SYNTAX,
+				   "Misplaced _ in number");
+		   lastub = s;
+		}
+		else
 		    *d++ = *s;
 	    }
+	    /* fractional part ending in underbar? */
+	    if (s[-1] == '_') {
+	        if (ckWARN(WARN_SYNTAX))
+		    Perl_warner(aTHX_ WARN_SYNTAX,
+				"Misplaced _ in number");
+	    }
 	    if (*s == '.' && isDIGIT(s[1])) {
 		/* oops, it's really a v-string, but without the "v" */
 		s = start - 1;
@@ -7146,110 +7174,84 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
 	}
 
 	/* read exponent part, if present */
-	if (*s && strchr("eE",*s) && strchr("+-0123456789",s[1])) {
+	if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
 	    floatit = TRUE;
 	    s++;
 
 	    /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
 	    *d++ = 'e';		/* At least some Mach atof()s don't grok 'E' */
 
+	    /* stray preinitial _ */
+	    if (*s == '_') {
+	        if (ckWARN(WARN_SYNTAX))
+		    Perl_warner(aTHX_ WARN_SYNTAX,
+				"Misplaced _ in number");
+	        lastub = s++;
+	    }
+
 	    /* allow positive or negative exponent */
 	    if (*s == '+' || *s == '-')
 		*d++ = *s++;
 
-	    /* read digits of exponent (no underbars :-) */
-	    while (isDIGIT(*s)) {
-		if (d >= e)
-		    Perl_croak(aTHX_ number_too_long);
-		*d++ = *s++;
+	    /* stray initial _ */
+	    if (*s == '_') {
+	        if (ckWARN(WARN_SYNTAX))
+		    Perl_warner(aTHX_ WARN_SYNTAX,
+				"Misplaced _ in number");
+	        lastub = s++;
+	    }
+
+	    /* read digits of exponent */
+	    while (isDIGIT(*s) || *s == '_') {
+	        if (isDIGIT(*s)) {
+		    if (d >= e)
+		        Perl_croak(aTHX_ number_too_long);
+		    *d++ = *s++;
+		}
+		else {
+		   if (ckWARN(WARN_SYNTAX) &&
+		       ((lastub && s == lastub + 1) ||
+			(!isDIGIT(s[1]) && s[1] != '_')))
+		       Perl_warner(aTHX_ WARN_SYNTAX,
+				   "Misplaced _ in number");
+		   lastub = s++;
+		}
 	    }
 	}
 
-	/* terminate the string */
-	*d = '\0';
 
 	/* make an sv from the string */
 	sv = NEWSV(92,0);
 
-#if defined(Strtol) && defined(Strtoul)
-
 	/*
-	   strtol/strtoll sets errno to ERANGE if the number is too big
-	   for an integer. We try to do an integer conversion first
-	   if no characters indicating "float" have been found.
+           We try to do an integer conversion first if no characters
+           indicating "float" have been found.
 	 */
 
 	if (!floatit) {
-    	    IV iv;
     	    UV uv;
-	    errno = 0;
-	    if (*PL_tokenbuf == '-')
-		iv = Strtol(PL_tokenbuf, (char**)NULL, 10);
-	    else
-		uv = Strtoul(PL_tokenbuf, (char**)NULL, 10);
-	    if (errno)
-	    	floatit = TRUE; /* Probably just too large. */
-	    else if (*PL_tokenbuf == '-')
-	    	sv_setiv(sv, iv);
-	    else if (uv <= IV_MAX)
+            int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
+
+            if (flags == IS_NUMBER_IN_UV) {
+              if (uv <= IV_MAX)
 		sv_setiv(sv, uv); /* Prefer IVs over UVs. */
-	    else
+              else
 	    	sv_setuv(sv, uv);
-	}
+            } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
+              if (uv <= (UV) IV_MIN)
+                sv_setiv(sv, -(IV)uv);
+              else
+	    	floatit = TRUE;
+            } else
+              floatit = TRUE;
+        }
 	if (floatit) {
+	    /* terminate the string */
+	    *d = '\0';
 	    nv = Atof(PL_tokenbuf);
 	    sv_setnv(sv, nv);
 	}
-#else
-	/*
-	   No working strtou?ll?.
-
-	   Unfortunately atol() doesn't do range checks (returning
-	   LONG_MIN/LONG_MAX, and setting errno to ERANGE on overflows)
-	   everywhere [1], so we cannot use use atol() (or atoll()).
-	   If we could, they would be used, as Atol(), very much like
-	   Strtol() and Strtoul() are used above.
-
-	   [1] XXX Configure test needed to check for atol()
-	           (and atoll()) overflow behaviour XXX
-
-	   --jhi
 
-	   We need to do this the hard way.  */
-
-	nv = Atof(PL_tokenbuf);
-
-	/* See if we can make do with an integer value without loss of
-	   precision.  We use U_V to cast to a UV, because some
-	   compilers have issues.  Then we try casting it back and see
-	   if it was the same [1].  We only do this if we know we
-	   specifically read an integer.  If floatit is true, then we
-	   don't need to do the conversion at all.
-
-	   [1] Note that this is lossy if our NVs cannot preserve our
-	   UVs.  There are metaconfig defines NV_PRESERVES_UV (a boolean)
-	   and NV_PRESERVES_UV_BITS (a number), but in general we really
-	   do hope all such potentially lossy platforms have strtou?ll?
-	   to do a lossless IV/UV conversion.
-
-	   Maybe could do some tricks with DBL_DIG, LDBL_DIG and
-	   DBL_MANT_DIG and LDBL_MANT_DIG (these are already available
-	   as NV_DIG and NV_MANT_DIG)?
-	
-	   --jhi
-	   */
-	{
-	    UV uv = U_V(nv);
-	    if (!floatit && (NV)uv == nv) {
-		if (uv <= IV_MAX)
-		    sv_setiv(sv, uv); /* Prefer IVs over UVs. */
-		else
-		    sv_setuv(sv, uv);
-	    }
-	    else
-		sv_setnv(sv, nv);
-	}
-#endif
 	if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
 	               (PL_hints & HINT_NEW_INTEGER) )
 	    sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
@@ -7266,7 +7268,7 @@ vstring:
 	    while (isDIGIT(*pos) || *pos == '_')
 		pos++;
 	    if (!isALPHA(*pos)) {
-		UV rev, revmax = 0;
+		UV rev;
 		U8 tmpbuf[UTF8_MAXLEN+1];
 		U8 *tmpend;
 		s++;				/* get past 'v' */
@@ -7294,10 +7296,11 @@ vstring:
 					    "Integer overflow in decimal number");
 			}
 		    }
-		    tmpend = uv_to_utf8(tmpbuf, rev);
-		    if (rev > revmax)
-			revmax = rev;
+		    /* Append native character for the rev point */
+		    tmpend = uvchr_to_utf8(tmpbuf, rev);
 		    sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
+		    if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+			SvUTF8_on(sv);
 		    if (*pos == '.' && isDIGIT(pos[1]))
 			s = ++pos;
 		    else {
@@ -7307,14 +7310,8 @@ vstring:
 		    while (isDIGIT(*pos) || *pos == '_')
 			pos++;
 		}
-
 		SvPOK_on(sv);
 		SvREADONLY_on(sv);
-		if (revmax > 127) {
-		    SvUTF8_on(sv);
-		    if (revmax < 256)
-		      sv_utf8_downgrade(sv, TRUE);
-		}
 	    }
 	}
 	break;
@@ -7365,15 +7362,19 @@ S_scan_formline(pTHX_ register char *s)
 		if (*t == '@' || *t == '^')
 		    needargs = TRUE;
 	    }
-	    sv_catpvn(stuff, s, eol-s);
+	    if (eol > s) {
+	        sv_catpvn(stuff, s, eol-s);
 #ifndef PERL_STRICT_CR
-	    if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
-		char *end = SvPVX(stuff) + SvCUR(stuff);
-		end[-2] = '\n';
-		end[-1] = '\0';
-		SvCUR(stuff)--;
-	    }
+		if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
+		    char *end = SvPVX(stuff) + SvCUR(stuff);
+		    end[-2] = '\n';
+		    end[-1] = '\0';
+		    SvCUR(stuff)--;
+		}
 #endif
+	    }
+	    else
+	      break;
 	}
 	s = eol;
 	if (PL_rsfp) {