X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=2518e54854c95564d98cd6cd08a3f3d648467871;hb=78ca652eaf12f3ab6d7714883eec614d257f666a;hp=c069978f614bc1c8d8e61a571e70eb52d4dc03fa;hpb=4a9ae47ac2dbde43455079cf404946a27c7b4906;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/toke.c b/toke.c
index c069978..2518e54 100644
--- a/toke.c
+++ b/toke.c
@@ -14,6 +14,9 @@
 #include "EXTERN.h"
 #include "perl.h"
 
+#define yychar	PL_yychar
+#define yylval	PL_yylval
+
 #ifndef PERL_OBJECT
 static void check_uni _((void));
 static void  force_next _((I32 type));
@@ -57,6 +60,8 @@ static void restore_lex_expect _((void *e));
 
 static char ident_too_long[] = "Identifier too long";
 
+#define UTF (PL_hints & HINT_UTF8)
+
 /* The following are arranged oddly so that the guard on the switch statement
  * can get by with a single comparison (if the compiler is smart enough).
  */
@@ -210,8 +215,9 @@ missingterm(char *s)
 void
 deprecate(char *s)
 {
-    if (PL_dowarn)
-	warn("Use of %s is deprecated", s);
+    dTHR;
+    if (ckWARN(WARN_DEPRECATED))
+	warner(WARN_DEPRECATED, "Use of %s is deprecated", s);
 }
 
 STATIC void
@@ -232,6 +238,39 @@ win32_textfilter(int idx, SV *sv, int maxlen)
 }
 #endif
 
+#ifndef PERL_OBJECT
+
+STATIC I32
+utf16_textfilter(int idx, SV *sv, int maxlen)
+{
+    I32 count = FILTER_READ(idx+1, sv, maxlen);
+    if (count) {
+	U8* tmps;
+	U8* tend;
+	New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
+	tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
+	sv_usepvn(sv, (char*)tmps, tend - tmps);
+    
+    }
+    return count;
+}
+
+STATIC I32
+utf16rev_textfilter(int idx, SV *sv, int maxlen)
+{
+    I32 count = FILTER_READ(idx+1, sv, maxlen);
+    if (count) {
+	U8* tmps;
+	U8* tend;
+	New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
+	tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
+	sv_usepvn(sv, (char*)tmps, tend - tmps);
+    
+    }
+    return count;
+}
+
+#endif
 
 void
 lex_start(SV *line)
@@ -849,11 +888,17 @@ scan_const(char *start)
     register char *d = SvPVX(sv);		/* destination for copies */
     bool dorange = FALSE;			/* are we in a translit range? */
     I32 len;					/* ? */
+    I32 utf = PL_lex_inwhat == OP_TRANS
+	? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
+	: UTF;
+    I32 thisutf = PL_lex_inwhat == OP_TRANS
+	? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
+	: UTF;
 
     /* leaveit is the set of acceptably-backslashed characters */
     char *leaveit =
 	PL_lex_inpat
-	    ? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+	    ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
 	    : "";
 
     while (s < send || dorange) {
@@ -862,6 +907,7 @@ scan_const(char *start)
 	    /* expand a range A-Z to the full set of characters.  AIE! */
 	    if (dorange) {
 		I32 i;				/* current expanded character */
+		I32 min;			/* first character in range */
 		I32 max;			/* last character in range */
 
 		i = d - SvPVX(sv);		/* remember current offset */
@@ -869,10 +915,26 @@ scan_const(char *start)
 		d = SvPVX(sv) + i;		/* restore d after the grow potentially has changed the ptr */
 		d -= 2;				/* eat the first char and the - */
 
-		max = (U8)d[1];			/* last char in range */
-
-		for (i = (U8)*d; i <= max; i++)
-		    *d++ = i;
+		min = (U8)*d;			/* first char in range */
+		max = (U8)d[1];			/* last char in range  */
+
+#ifndef ASCIIish
+		if ((isLOWER(min) && isLOWER(max)) ||
+		    (isUPPER(min) && isUPPER(max))) {
+		    if (isLOWER(min)) {
+			for (i = min; i <= max; i++)
+			    if (isLOWER(i))
+				*d++ = i;
+		    } else {
+			for (i = min; i <= max; i++)
+			    if (isUPPER(i))
+				*d++ = i;
+		    }
+		}
+		else
+#endif
+		    for (i = min; i <= max; i++)
+			*d++ = i;
 
 		/* mark the range as done, and continue */
 		dorange = FALSE;
@@ -881,6 +943,11 @@ scan_const(char *start)
 
 	    /* range begins (ignore - as first or last char) */
 	    else if (*s == '-' && s+1 < send  && s != start) {
+		if (utf) {
+		    *d++ = (char)0xff;	/* use illegal utf8 byte--see pmtrans */
+		    s++;
+		    continue;
+		}
 		dorange = TRUE;
 		s++;
 	    }
@@ -888,14 +955,16 @@ scan_const(char *start)
 
 	/* if we get here, we're not doing a transliteration */
 
-	/* skip for regexp comments /(?#comment)/ */
+	/* skip for regexp comments /(?#comment)/ and code /(?{code})/,
+	   except for the last char, which will be done separately. */
 	else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
 	    if (s[2] == '#') {
 		while (s < send && *s != ')')
 		    *d++ = *s++;
-	    } else if (s[2] == '{') {	/* This should march regcomp.c */
+	    } else if (s[2] == '{'
+		       || s[2] == 'p' && s[3] == '{') {	/* This should march regcomp.c */
 		I32 count = 1;
-		char *regparse = s + 3;
+		char *regparse = s + (s[2] == '{' ? 3 : 4);
 		char c;
 
 		while (count && (c = *regparse)) {
@@ -907,11 +976,11 @@ scan_const(char *start)
 			count--;
 		    regparse++;
 		}
-		if (*regparse == ')')
-		    regparse++;
-		else
+		if (*regparse != ')') {
+		    regparse--;		/* Leave one char for continuation. */
 		    yyerror("Sequence (?{...}) not terminated or not {}-balanced");
-		while (s < regparse && *s != ')')
+		}
+		while (s < regparse)
 		    *d++ = *s++;
 	    }
 	}
@@ -937,6 +1006,20 @@ scan_const(char *start)
 		break;		/* in regexp, $ might be tail anchor */
 	}
 
+	/* (now in tr/// code again) */
+
+	if (*s & 0x80 && thisutf) {
+	    dTHR;			/* only for ckWARN */
+	    if (ckWARN(WARN_UTF8)) {
+		(void)utf8_to_uv((U8*)s, &len);	/* could cvt latin-1 to utf8 here... */
+		if (len) {
+		    while (len--)
+			*d++ = *s++;
+		    continue;
+		}
+	    }
+	}
+
 	/* backslashes */
 	if (*s == '\\' && s+1 < send) {
 	    s++;
@@ -952,8 +1035,9 @@ scan_const(char *start)
 	    if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
 		isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
 	    {
-		if (PL_dowarn)
-		    warn("\\%c better written as $%c", *s, *s);
+		dTHR;			/* only for ckWARN */
+		if (ckWARN(WARN_SYNTAX))
+		    warner(WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
 		*--s = '$';
 		break;
 	    }
@@ -988,8 +1072,43 @@ scan_const(char *start)
 
 	    /* \x24 indicates a hex constant */
 	    case 'x':
-		*d++ = scan_hex(++s, 2, &len);
-		s += len;
+		++s;
+		if (*s == '{') {
+		    char* e = strchr(s, '}');
+
+		    if (!e)
+			yyerror("Missing right brace on \\x{}");
+		    if (!utf) {
+			dTHR;
+			if (ckWARN(WARN_UTF8))
+			    warner(WARN_UTF8,
+				   "Use of \\x{} without utf8 declaration");
+		    }
+		    /* note: utf always shorter than hex */
+		    d = (char*)uv_to_utf8((U8*)d,
+					  scan_hex(s + 1, e - s - 1, &len));
+		    s = e + 1;
+			
+		}
+		else {
+		    UV uv = (UV)scan_hex(s, 2, &len);
+		    if (utf && PL_lex_inwhat == OP_TRANS &&
+			utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
+		    {
+			d = (char*)uv_to_utf8((U8*)d, uv);	/* doing a CU or UC */
+		    }
+		    else {
+			if (uv >= 127 && UTF) {
+			    dTHR;
+			    if (ckWARN(WARN_UTF8))
+				warner(WARN_UTF8,
+				    "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
+				    len,s,len,s);
+			}
+			*d++ = (char)uv;
+		    }
+		    s += len;
+		}
 		continue;
 
 	    /* \c is a control character */
@@ -1317,7 +1436,7 @@ filter_del(filter_t funcp)
     if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
 	return;
     /* if filter is on top of stack (usual case) just pop it off */
-    if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (void*)funcp){
+    if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (DIR*)funcp){
 	sv_free(av_pop(PL_rsfp_filters));
 
         return;
@@ -1737,7 +1856,17 @@ yylex(void)
   retry:
     switch (*s) {
     default:
-	croak("Unrecognized character \\%03o", *s & 255);
+	/*
+	 * Note: we try to be careful never to call the isXXX_utf8() functions unless we're
+	 * pretty sure we've seen the beginning of a UTF-8 character (that is, the two high
+	 * bits are set).  Otherwise we risk loading in the heavy-duty SWASHINIT and SWASHGET
+	 * routines unnecessarily.  You will see this not just here but throughout this file.
+	 */
+	if (UTF && (*s & 0xc0) == 0x80) {
+	    if (isIDFIRST_utf8((U8*)s))
+		goto keylookup;
+	}
+	croak("Unrecognized character \\x%02X", *s & 255);
     case 4:
     case 26:
 	goto fake_eof;			/* emulate EOF on ^D or ^Z */
@@ -2379,9 +2508,9 @@ yylex(void)
 	    AOPERATOR(ANDAND);
 	s--;
 	if (PL_expect == XOPERATOR) {
-	    if (PL_dowarn && isALPHA(*s) && PL_bufptr == PL_linestart) {
+	    if (ckWARN(WARN_SEMICOLON) && isALPHA(*s) && PL_bufptr == PL_linestart) {
 		PL_curcop->cop_line--;
-		warn(warn_nosemi);
+		warner(WARN_SEMICOLON, warn_nosemi);
 		PL_curcop->cop_line++;
 	    }
 	    BAop(OP_BIT_AND);
@@ -2413,8 +2542,8 @@ yylex(void)
 	    OPERATOR(',');
 	if (tmp == '~')
 	    PMop(OP_MATCH);
-	if (PL_dowarn && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
-	    warn("Reversed %c= operator",(int)tmp);
+	if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
+	    warner(WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
 	s--;
 	if (PL_expect == XSTATE && isALPHA(tmp) &&
 		(s == PL_linestart+1 || s[-2] == '\n') )
@@ -2443,7 +2572,11 @@ yylex(void)
 	}
 	if (PL_lex_brackets < PL_lex_formbrack) {
 	    char *t;
+#ifdef PERL_STRICT_CR
 	    for (t = s; *t == ' ' || *t == '\t'; t++) ;
+#else
+	    for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
+#endif
 	    if (*t == '\n' || *t == '#') {
 		s--;
 		PL_expect = XBLOCK;
@@ -2505,7 +2638,7 @@ yylex(void)
 	    }
 	}
 
-	if (s[1] == '#' && (isALPHA(s[2]) || strchr("_{$:", s[2]))) {
+	if (s[1] == '#' && (isALPHA(s[2]) || strchr("_{$:+-", s[2]))) {
 	    if (PL_expect == XOPERATOR)
 		no_op("Array length", PL_bufptr);
 	    PL_tokenbuf[0] = '@';
@@ -2544,7 +2677,7 @@ yylex(void)
 	    char *t;
 	    if (*s == '[') {
 		PL_tokenbuf[0] = '@';
-		if (PL_dowarn) {
+		if (ckWARN(WARN_SYNTAX)) {
 		    for(t = s + 1;
 			isSPACE(*t) || isALNUM(*t) || *t == '$';
 			t++) ;
@@ -2552,14 +2685,15 @@ yylex(void)
 			PL_bufptr = skipspace(PL_bufptr);
 			while (t < PL_bufend && *t != ']')
 			    t++;
-			warn("Multidimensional syntax %.*s not supported",
-			     (t - PL_bufptr) + 1, PL_bufptr);
+			warner(WARN_SYNTAX,
+				"Multidimensional syntax %.*s not supported",
+			     	(t - PL_bufptr) + 1, PL_bufptr);
 		    }
 		}
 	    }
 	    else if (*s == '{') {
 		PL_tokenbuf[0] = '%';
-		if (PL_dowarn && strEQ(PL_tokenbuf+1, "SIG") &&
+		if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
 		    (t = strchr(s, '}')) && (t = strchr(t, '=')))
 		{
 		    char tmpbuf[sizeof PL_tokenbuf];
@@ -2568,7 +2702,8 @@ yylex(void)
 		    if (isIDFIRST(*t)) {
 			t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
 			if (*t != '(' && perl_get_cv(tmpbuf, FALSE))
-			    warn("You need to quote \"%s\"", tmpbuf);
+			    warner(WARN_SYNTAX,
+				"You need to quote \"%s\"", tmpbuf);
 		    }
 		}
 	    }
@@ -2638,7 +2773,7 @@ yylex(void)
 		PL_tokenbuf[0] = '%';
 
 	    /* Warn about @ where they meant $. */
-	    if (PL_dowarn) {
+	    if (ckWARN(WARN_SYNTAX)) {
 		if (*s == '[' || *s == '{') {
 		    char *t = s + 1;
 		    while (*t && (isALNUM(*t) || strchr(" \t$#+-'\"", *t)))
@@ -2646,7 +2781,8 @@ yylex(void)
 		    if (*t == '}' || *t == ']') {
 			t++;
 			PL_bufptr = skipspace(PL_bufptr);
-			warn("Scalar value %.*s better written as $%.*s",
+			warner(WARN_SYNTAX,
+			    "Scalar value %.*s better written as $%.*s",
 			    t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
 		    }
 		}
@@ -2672,8 +2808,14 @@ yylex(void)
 	OPERATOR(tmp);
 
     case '.':
-	if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack && s[1] == '\n' &&
-		(s == PL_linestart || s[-1] == '\n') ) {
+	if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
+#ifdef PERL_STRICT_CR
+	    && s[1] == '\n'
+#else
+	    && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
+#endif
+	    && (s == PL_linestart || s[-1] == '\n') )
+	{
 	    PL_lex_formbrack = 0;
 	    PL_expect = XSTATE;
 	    goto rightbracket;
@@ -2733,7 +2875,7 @@ yylex(void)
 	    missingterm((char*)0);
 	yylval.ival = OP_CONST;
 	for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
-	    if (*d == '$' || *d == '@' || *d == '\\') {
+	    if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
 		yylval.ival = OP_STRINGIFY;
 		break;
 	    }
@@ -2752,8 +2894,9 @@ yylex(void)
 
     case '\\':
 	s++;
-	if (PL_dowarn && PL_lex_inwhat && isDIGIT(*s))
-	    warn("Can't use \\%c to mean $%c in expression", *s, *s);
+	if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
+	    warner(WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
+			*s, *s);
 	if (PL_expect == XOPERATOR)
 	    no_op("Backslash",s);
 	OPERATOR(REFGEN);
@@ -2868,8 +3011,10 @@ yylex(void)
 		tmp = -tmp;
 		gv = Nullgv;
 		gvp = 0;
-		if (PL_dowarn && hgv)
-		    warn("Ambiguous call resolved as CORE::%s(), %s",
+		if (ckWARN(WARN_AMBIGUOUS) && hgv
+		    && tmp != KEY_x && tmp != KEY_CORE)	/* never ambiguous */
+		    warner(WARN_AMBIGUOUS,
+		    	"Ambiguous call resolved as CORE::%s(), %s",
 			 GvENAME(hgv), "qualify as such or use &");
 	    }
 	}
@@ -2897,7 +3042,7 @@ yylex(void)
 		if (PL_expect == XOPERATOR) {
 		    if (PL_bufptr == PL_linestart) {
 			PL_curcop->cop_line--;
-			warn(warn_nosemi);
+			warner(WARN_SEMICOLON, warn_nosemi);
 			PL_curcop->cop_line++;
 		    }
 		    else
@@ -2911,8 +3056,9 @@ yylex(void)
 		if (len > 2 &&
 		    PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
 		{
-		    if (PL_dowarn && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
-			warn("Bareword \"%s\" refers to nonexistent package",
+		    if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
+			warner(WARN_UNSAFE, 
+		  	    "Bareword \"%s\" refers to nonexistent package",
 			     PL_tokenbuf);
 		    len -= 2;
 		    PL_tokenbuf[len] = '\0';
@@ -3070,11 +3216,11 @@ yylex(void)
 		/* Call it a bare word */
 
 	    bareword:
-		if (PL_dowarn) {
+		if (ckWARN(WARN_RESERVED)) {
 		    if (lastchar != '-') {
 			for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
 			if (!*d)
-			    warn(warn_reserved, PL_tokenbuf);
+			    warner(WARN_RESERVED, warn_reserved, PL_tokenbuf);
 		    }
 		}
 
@@ -3215,7 +3361,7 @@ yylex(void)
 	    LOP(OP_CRYPT,XTERM);
 
 	case KEY_chmod:
-	    if (PL_dowarn) {
+	    if (ckWARN(WARN_OCTAL)) {
 		for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
 		if (*d != '0' && isDIGIT(*d))
 		    yywarn("chmod: mode argument is missing initial 0");
@@ -3635,15 +3781,17 @@ yylex(void)
 	    s = scan_str(s);
 	    if (!s)
 		missingterm((char*)0);
-	    if (PL_dowarn && SvLEN(PL_lex_stuff)) {
+	    if (ckWARN(WARN_SYNTAX) && SvLEN(PL_lex_stuff)) {
 		d = SvPV_force(PL_lex_stuff, len);
 		for (; len; --len, ++d) {
 		    if (*d == ',') {
-			warn("Possible attempt to separate words with commas");
+			warner(WARN_SYNTAX,
+			    "Possible attempt to separate words with commas");
 			break;
 		    }
 		    if (*d == '#') {
-			warn("Possible attempt to put comments in qw() list");
+			warner(WARN_SYNTAX,
+			    "Possible attempt to put comments in qw() list");
 			break;
 		    }
 		}
@@ -4018,7 +4166,7 @@ yylex(void)
 	    LOP(OP_UTIME,XTERM);
 
 	case KEY_umask:
-	    if (PL_dowarn) {
+	    if (ckWARN(WARN_OCTAL)) {
 		for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
 		if (*d != '0' && isDIGIT(*d))
 		    yywarn("umask: argument is missing initial 0");
@@ -4725,18 +4873,21 @@ checkcomma(register char *s, char *name, char *what)
 {
     char *w;
 
-    if (PL_dowarn && *s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
-	int level = 1;
-	for (w = s+2; *w && level; w++) {
-	    if (*w == '(')
-		++level;
-	    else if (*w == ')')
-		--level;
-	}
-	if (*w)
-	    for (; *w && isSPACE(*w); w++) ;
-	if (!*w || !strchr(";|})]oaiuw!=", *w))	/* an advisory hack only... */
-	    warn("%s (...) interpreted as function",name);
+    if (*s == ' ' && s[1] == '(') {	/* XXX gotta be a better way */
+	dTHR;				/* only for ckWARN */
+	if (ckWARN(WARN_SYNTAX)) {
+	    int level = 1;
+	    for (w = s+2; *w && level; w++) {
+		if (*w == '(')
+		    ++level;
+		else if (*w == ')')
+		    --level;
+	    }
+	    if (*w)
+		for (; *w && isSPACE(*w); w++) ;
+	    if (!*w || !strchr(";|})]oaiuw!=", *w))	/* an advisory hack only... */
+		warner(WARN_SYNTAX, "%s (...) interpreted as function",name);
+	}
     }
     while (s < PL_bufend && isSPACE(*s))
 	s++;
@@ -4850,6 +5001,16 @@ scan_word(register char *s, char *dest, STRLEN destlen, int allow_package, STRLE
 	    *d++ = *s++;
 	    *d++ = *s++;
 	}
+	else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8((U8*)s)) {
+	    char *t = s + UTF8SKIP(s);
+	    while (*t & 0x80 && is_utf8_mark((U8*)t))
+		t += UTF8SKIP(t);
+	    if (d + (t - s) > e)
+		croak(ident_too_long);
+	    Copy(s, d, t - s, char);
+	    d += t - s;
+	    s = t;
+	}
 	else {
 	    *d = '\0';
 	    *slp = d - dest;
@@ -4894,6 +5055,16 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3
 		*d++ = *s++;
 		*d++ = *s++;
 	    }
+	    else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8((U8*)s)) {
+		char *t = s + UTF8SKIP(s);
+		while (*t & 0x80 && is_utf8_mark((U8*)t))
+		    t += UTF8SKIP(t);
+		if (d + (t - s) > e)
+		    croak(ident_too_long);
+		Copy(s, d, t - s, char);
+		d += t - s;
+		s = t;
+	    }
 	    else
 		break;
 	}
@@ -4906,12 +5077,9 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3
 	return s;
     }
     if (*s == '$' && s[1] &&
-      (isALNUM(s[1]) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
+	(isALNUM(s[1]) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
     {
-	if (isDIGIT(s[1]) && PL_lex_state == LEX_INTERPNORMAL)
-	    deprecate("\"$$<digit>\" to mean \"${$}<digit>\"");
-	else
-	    return s;
+	return s;
     }
     if (*s == '{') {
 	bracket = s;
@@ -4936,16 +5104,31 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3
 		}
 	    }
 	}
-	if (isIDFIRST(*d)) {
+	if (isIDFIRST(*d) || (UTF && (*d & 0xc0) == 0x80 && isIDFIRST_utf8((U8*)d))) {
 	    d++;
-	    while (isALNUM(*s) || *s == ':')
-		*d++ = *s++;
+	    if (UTF) {
+		e = s;
+		while (e < send && (isALNUM(*e) || ((*e & 0xc0) == 0x80 && isALNUM_utf8((U8*)e)) || *e == ':')) {
+		    e += UTF8SKIP(e);
+		    while (e < send && *e & 0x80 && is_utf8_mark((U8*)e))
+			e += UTF8SKIP(e);
+		}
+		Copy(s, d, e - s, char);
+		d += e - s;
+		s = e;
+	    }
+	    else {
+		while (isALNUM(*s) || *s == ':')
+		    *d++ = *s++;
+	    }
 	    *d = '\0';
 	    while (s < send && (*s == ' ' || *s == '\t')) s++;
 	    if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
-		if (PL_dowarn && keyword(dest, d - dest)) {
+		dTHR;			/* only for ckWARN */
+		if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
 		    char *brack = *s == '[' ? "[...]" : "{...}";
-		    warn("Ambiguous use of %c{%s%s} resolved to %c%s%s",
+		    warner(WARN_AMBIGUOUS,
+			"Ambiguous use of %c{%s%s} resolved to %c%s%s",
 			funny, dest, brack, funny, dest, brack);
 		}
 		PL_lex_fakebrack = PL_lex_brackets+1;
@@ -4960,10 +5143,16 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3
 		PL_lex_state = LEX_INTERPEND;
 	    if (funny == '#')
 		funny = '@';
-	    if (PL_dowarn && PL_lex_state == LEX_NORMAL &&
-	      (keyword(dest, d - dest) || perl_get_cv(dest, FALSE)))
-		warn("Ambiguous use of %c{%s} resolved to %c%s",
-		    funny, dest, funny, dest);
+	    if (PL_lex_state == LEX_NORMAL) {
+		dTHR;			/* only for ckWARN */
+		if (ckWARN(WARN_AMBIGUOUS) &&
+		    (keyword(dest, d - dest) || perl_get_cv(dest, FALSE)))
+		{
+		    warner(WARN_AMBIGUOUS,
+			"Ambiguous use of %c{%s} resolved to %c%s",
+			funny, dest, funny, dest);
+		}
+	    }
 	}
 	else {
 	    s = bracket;		/* let the parser handle it */
@@ -5099,8 +5288,10 @@ scan_trans(char *start)
     OP *o;
     short *tbl;
     I32 squash;
-    I32 Delete;
+    I32 del;
     I32 complement;
+    I32 utf8;
+    I32 count = 0;
 
     yylval.ival = OP_NULL;
 
@@ -5125,20 +5316,45 @@ scan_trans(char *start)
 	croak("Transliteration replacement not terminated");
     }
 
-    New(803,tbl,256,short);
-    o = newPVOP(OP_TRANS, 0, (char*)tbl);
+    if (UTF) {
+	o = newSVOP(OP_TRANS, 0, 0);
+	utf8 = OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF;
+    }
+    else {
+	New(803,tbl,256,short);
+	o = newPVOP(OP_TRANS, 0, (char*)tbl);
+	utf8 = 0;
+    }
 
-    complement = Delete = squash = 0;
-    while (*s == 'c' || *s == 'd' || *s == 's') {
+    complement = del = squash = 0;
+    while (strchr("cdsCU", *s)) {
 	if (*s == 'c')
 	    complement = OPpTRANS_COMPLEMENT;
 	else if (*s == 'd')
-	    Delete = OPpTRANS_DELETE;
-	else
+	    del = OPpTRANS_DELETE;
+	else if (*s == 's')
 	    squash = OPpTRANS_SQUASH;
+	else {
+	    switch (count++) {
+	    case 0:
+		if (*s == 'C')
+		    utf8 &= ~OPpTRANS_FROM_UTF;
+		else
+		    utf8 |= OPpTRANS_FROM_UTF;
+		break;
+	    case 1:
+		if (*s == 'C')
+		    utf8 &= ~OPpTRANS_TO_UTF;
+		else
+		    utf8 |= OPpTRANS_TO_UTF;
+		break;
+	    default: 
+		croak("Too many /C and /U options");
+	    }
+	}
 	s++;
     }
-    o->op_private = Delete|squash|complement;
+    o->op_private = del|squash|complement|utf8;
 
     PL_lex_op = o;
     yylval.ival = OP_TRANS;
@@ -5592,7 +5808,7 @@ scan_str(char *start)
 	}
 	/* we read a line, so increment our line counter */
 	PL_curcop->cop_line++;
-	
+
 	/* update debugger info */
 	if (PERLDB_LINE && PL_curstash != PL_debstash) {
 	    SV *sv = NEWSV(88,0);
@@ -5602,7 +5818,7 @@ scan_str(char *start)
 	    av_store(GvAV(PL_curcop->cop_filegv),
 	      (I32)PL_curcop->cop_line, sv);
 	}
-	
+
 	/* having changed the buffer, we must update PL_bufend */
 	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
     }
@@ -5781,8 +5997,9 @@ scan_num(char *start)
 	       if -w is on
 	    */
 	    if (*s == '_') {
-		if (PL_dowarn && lastub && s - lastub != 3)
-		    warn("Misplaced _ in number");
+		dTHR;			/* only for ckWARN */
+		if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3)
+		    warner(WARN_SYNTAX, "Misplaced _ in number");
 		lastub = ++s;
 	    }
 	    else {
@@ -5795,8 +6012,11 @@ scan_num(char *start)
 	}
 
 	/* final misplaced underbar check */
-	if (PL_dowarn && lastub && s - lastub != 3)
-	    warn("Misplaced _ in number");
+	if (lastub && s - lastub != 3) {
+	    dTHR;
+	    if (ckWARN(WARN_SYNTAX))
+		warner(WARN_SYNTAX, "Misplaced _ in number");
+	}
 
 	/* read a decimal portion if there is one.  avoid
 	   3..5 being interpreted as the number 3. followed
@@ -5887,7 +6107,11 @@ scan_formline(register char *s)
     while (!needargs) {
 	if (*s == '.' || *s == '}') {
 	    /*SUPPRESS 530*/
-	    for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
+#ifdef PERL_STRICT_CR
+	    for (t = s+1;*t == ' ' || *t == '\t'; t++) ;
+#else
+	    for (t = s+1;*t == ' ' || *t == '\t' || *t == '\r'; t++) ;
+#endif
 	    if (*t == '\n')
 		break;
 	}