X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=2518e54854c95564d98cd6cd08a3f3d648467871;hb=78ca652eaf12f3ab6d7714883eec614d257f666a;hp=c069978f614bc1c8d8e61a571e70eb52d4dc03fa;hpb=4a9ae47ac2dbde43455079cf404946a27c7b4906;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index c069978..2518e54 100644 --- a/toke.c +++ b/toke.c @@ -14,6 +14,9 @@ #include "EXTERN.h" #include "perl.h" +#define yychar PL_yychar +#define yylval PL_yylval + #ifndef PERL_OBJECT static void check_uni _((void)); static void force_next _((I32 type)); @@ -57,6 +60,8 @@ static void restore_lex_expect _((void *e)); static char ident_too_long[] = "Identifier too long"; +#define UTF (PL_hints & HINT_UTF8) + /* The following are arranged oddly so that the guard on the switch statement * can get by with a single comparison (if the compiler is smart enough). */ @@ -210,8 +215,9 @@ missingterm(char *s) void deprecate(char *s) { - if (PL_dowarn) - warn("Use of %s is deprecated", s); + dTHR; + if (ckWARN(WARN_DEPRECATED)) + warner(WARN_DEPRECATED, "Use of %s is deprecated", s); } STATIC void @@ -232,6 +238,39 @@ win32_textfilter(int idx, SV *sv, int maxlen) } #endif +#ifndef PERL_OBJECT + +STATIC I32 +utf16_textfilter(int idx, SV *sv, int maxlen) +{ + I32 count = FILTER_READ(idx+1, sv, maxlen); + if (count) { + U8* tmps; + U8* tend; + New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); + tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv)); + sv_usepvn(sv, (char*)tmps, tend - tmps); + + } + return count; +} + +STATIC I32 +utf16rev_textfilter(int idx, SV *sv, int maxlen) +{ + I32 count = FILTER_READ(idx+1, sv, maxlen); + if (count) { + U8* tmps; + U8* tend; + New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); + tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv)); + sv_usepvn(sv, (char*)tmps, tend - tmps); + + } + return count; +} + +#endif void lex_start(SV *line) @@ -849,11 +888,17 @@ scan_const(char *start) register char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ I32 len; /* ? */ + I32 utf = PL_lex_inwhat == OP_TRANS + ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) + : UTF; + I32 thisutf = PL_lex_inwhat == OP_TRANS + ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF)) + : UTF; /* leaveit is the set of acceptably-backslashed characters */ char *leaveit = PL_lex_inpat - ? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#" + ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#" : ""; while (s < send || dorange) { @@ -862,6 +907,7 @@ scan_const(char *start) /* expand a range A-Z to the full set of characters. AIE! */ if (dorange) { I32 i; /* current expanded character */ + I32 min; /* first character in range */ I32 max; /* last character in range */ i = d - SvPVX(sv); /* remember current offset */ @@ -869,10 +915,26 @@ scan_const(char *start) d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */ d -= 2; /* eat the first char and the - */ - max = (U8)d[1]; /* last char in range */ - - for (i = (U8)*d; i <= max; i++) - *d++ = i; + min = (U8)*d; /* first char in range */ + max = (U8)d[1]; /* last char in range */ + +#ifndef ASCIIish + if ((isLOWER(min) && isLOWER(max)) || + (isUPPER(min) && isUPPER(max))) { + if (isLOWER(min)) { + for (i = min; i <= max; i++) + if (isLOWER(i)) + *d++ = i; + } else { + for (i = min; i <= max; i++) + if (isUPPER(i)) + *d++ = i; + } + } + else +#endif + for (i = min; i <= max; i++) + *d++ = i; /* mark the range as done, and continue */ dorange = FALSE; @@ -881,6 +943,11 @@ scan_const(char *start) /* range begins (ignore - as first or last char) */ else if (*s == '-' && s+1 < send && s != start) { + if (utf) { + *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */ + s++; + continue; + } dorange = TRUE; s++; } @@ -888,14 +955,16 @@ scan_const(char *start) /* if we get here, we're not doing a transliteration */ - /* skip for regexp comments /(?#comment)/ */ + /* skip for regexp comments /(?#comment)/ and code /(?{code})/, + except for the last char, which will be done separately. */ else if (*s == '(' && PL_lex_inpat && s[1] == '?') { if (s[2] == '#') { while (s < send && *s != ')') *d++ = *s++; - } else if (s[2] == '{') { /* This should march regcomp.c */ + } else if (s[2] == '{' + || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */ I32 count = 1; - char *regparse = s + 3; + char *regparse = s + (s[2] == '{' ? 3 : 4); char c; while (count && (c = *regparse)) { @@ -907,11 +976,11 @@ scan_const(char *start) count--; regparse++; } - if (*regparse == ')') - regparse++; - else + if (*regparse != ')') { + regparse--; /* Leave one char for continuation. */ yyerror("Sequence (?{...}) not terminated or not {}-balanced"); - while (s < regparse && *s != ')') + } + while (s < regparse) *d++ = *s++; } } @@ -937,6 +1006,20 @@ scan_const(char *start) break; /* in regexp, $ might be tail anchor */ } + /* (now in tr/// code again) */ + + if (*s & 0x80 && thisutf) { + dTHR; /* only for ckWARN */ + if (ckWARN(WARN_UTF8)) { + (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */ + if (len) { + while (len--) + *d++ = *s++; + continue; + } + } + } + /* backslashes */ if (*s == '\\' && s+1 < send) { s++; @@ -952,8 +1035,9 @@ scan_const(char *start) if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) { - if (PL_dowarn) - warn("\\%c better written as $%c", *s, *s); + dTHR; /* only for ckWARN */ + if (ckWARN(WARN_SYNTAX)) + warner(WARN_SYNTAX, "\\%c better written as $%c", *s, *s); *--s = '$'; break; } @@ -988,8 +1072,43 @@ scan_const(char *start) /* \x24 indicates a hex constant */ case 'x': - *d++ = scan_hex(++s, 2, &len); - s += len; + ++s; + if (*s == '{') { + char* e = strchr(s, '}'); + + if (!e) + yyerror("Missing right brace on \\x{}"); + if (!utf) { + dTHR; + if (ckWARN(WARN_UTF8)) + warner(WARN_UTF8, + "Use of \\x{} without utf8 declaration"); + } + /* note: utf always shorter than hex */ + d = (char*)uv_to_utf8((U8*)d, + scan_hex(s + 1, e - s - 1, &len)); + s = e + 1; + + } + else { + UV uv = (UV)scan_hex(s, 2, &len); + if (utf && PL_lex_inwhat == OP_TRANS && + utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) + { + d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */ + } + else { + if (uv >= 127 && UTF) { + dTHR; + if (ckWARN(WARN_UTF8)) + warner(WARN_UTF8, + "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that", + len,s,len,s); + } + *d++ = (char)uv; + } + s += len; + } continue; /* \c is a control character */ @@ -1317,7 +1436,7 @@ filter_del(filter_t funcp) if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) return; /* if filter is on top of stack (usual case) just pop it off */ - if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (void*)funcp){ + if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (DIR*)funcp){ sv_free(av_pop(PL_rsfp_filters)); return; @@ -1737,7 +1856,17 @@ yylex(void) retry: switch (*s) { default: - croak("Unrecognized character \\%03o", *s & 255); + /* + * Note: we try to be careful never to call the isXXX_utf8() functions unless we're + * pretty sure we've seen the beginning of a UTF-8 character (that is, the two high + * bits are set). Otherwise we risk loading in the heavy-duty SWASHINIT and SWASHGET + * routines unnecessarily. You will see this not just here but throughout this file. + */ + if (UTF && (*s & 0xc0) == 0x80) { + if (isIDFIRST_utf8((U8*)s)) + goto keylookup; + } + croak("Unrecognized character \\x%02X", *s & 255); case 4: case 26: goto fake_eof; /* emulate EOF on ^D or ^Z */ @@ -2379,9 +2508,9 @@ yylex(void) AOPERATOR(ANDAND); s--; if (PL_expect == XOPERATOR) { - if (PL_dowarn && isALPHA(*s) && PL_bufptr == PL_linestart) { + if (ckWARN(WARN_SEMICOLON) && isALPHA(*s) && PL_bufptr == PL_linestart) { PL_curcop->cop_line--; - warn(warn_nosemi); + warner(WARN_SEMICOLON, warn_nosemi); PL_curcop->cop_line++; } BAop(OP_BIT_AND); @@ -2413,8 +2542,8 @@ yylex(void) OPERATOR(','); if (tmp == '~') PMop(OP_MATCH); - if (PL_dowarn && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp)) - warn("Reversed %c= operator",(int)tmp); + if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp)) + warner(WARN_SYNTAX, "Reversed %c= operator",(int)tmp); s--; if (PL_expect == XSTATE && isALPHA(tmp) && (s == PL_linestart+1 || s[-2] == '\n') ) @@ -2443,7 +2572,11 @@ yylex(void) } if (PL_lex_brackets < PL_lex_formbrack) { char *t; +#ifdef PERL_STRICT_CR for (t = s; *t == ' ' || *t == '\t'; t++) ; +#else + for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ; +#endif if (*t == '\n' || *t == '#') { s--; PL_expect = XBLOCK; @@ -2505,7 +2638,7 @@ yylex(void) } } - if (s[1] == '#' && (isALPHA(s[2]) || strchr("_{$:", s[2]))) { + if (s[1] == '#' && (isALPHA(s[2]) || strchr("_{$:+-", s[2]))) { if (PL_expect == XOPERATOR) no_op("Array length", PL_bufptr); PL_tokenbuf[0] = '@'; @@ -2544,7 +2677,7 @@ yylex(void) char *t; if (*s == '[') { PL_tokenbuf[0] = '@'; - if (PL_dowarn) { + if (ckWARN(WARN_SYNTAX)) { for(t = s + 1; isSPACE(*t) || isALNUM(*t) || *t == '$'; t++) ; @@ -2552,14 +2685,15 @@ yylex(void) PL_bufptr = skipspace(PL_bufptr); while (t < PL_bufend && *t != ']') t++; - warn("Multidimensional syntax %.*s not supported", - (t - PL_bufptr) + 1, PL_bufptr); + warner(WARN_SYNTAX, + "Multidimensional syntax %.*s not supported", + (t - PL_bufptr) + 1, PL_bufptr); } } } else if (*s == '{') { PL_tokenbuf[0] = '%'; - if (PL_dowarn && strEQ(PL_tokenbuf+1, "SIG") && + if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") && (t = strchr(s, '}')) && (t = strchr(t, '='))) { char tmpbuf[sizeof PL_tokenbuf]; @@ -2568,7 +2702,8 @@ yylex(void) if (isIDFIRST(*t)) { t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len); if (*t != '(' && perl_get_cv(tmpbuf, FALSE)) - warn("You need to quote \"%s\"", tmpbuf); + warner(WARN_SYNTAX, + "You need to quote \"%s\"", tmpbuf); } } } @@ -2638,7 +2773,7 @@ yylex(void) PL_tokenbuf[0] = '%'; /* Warn about @ where they meant $. */ - if (PL_dowarn) { + if (ckWARN(WARN_SYNTAX)) { if (*s == '[' || *s == '{') { char *t = s + 1; while (*t && (isALNUM(*t) || strchr(" \t$#+-'\"", *t))) @@ -2646,7 +2781,8 @@ yylex(void) if (*t == '}' || *t == ']') { t++; PL_bufptr = skipspace(PL_bufptr); - warn("Scalar value %.*s better written as $%.*s", + warner(WARN_SYNTAX, + "Scalar value %.*s better written as $%.*s", t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1); } } @@ -2672,8 +2808,14 @@ yylex(void) OPERATOR(tmp); case '.': - if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack && s[1] == '\n' && - (s == PL_linestart || s[-1] == '\n') ) { + if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack +#ifdef PERL_STRICT_CR + && s[1] == '\n' +#else + && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) +#endif + && (s == PL_linestart || s[-1] == '\n') ) + { PL_lex_formbrack = 0; PL_expect = XSTATE; goto rightbracket; @@ -2733,7 +2875,7 @@ yylex(void) missingterm((char*)0); yylval.ival = OP_CONST; for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { - if (*d == '$' || *d == '@' || *d == '\\') { + if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) { yylval.ival = OP_STRINGIFY; break; } @@ -2752,8 +2894,9 @@ yylex(void) case '\\': s++; - if (PL_dowarn && PL_lex_inwhat && isDIGIT(*s)) - warn("Can't use \\%c to mean $%c in expression", *s, *s); + if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s)) + warner(WARN_SYNTAX,"Can't use \\%c to mean $%c in expression", + *s, *s); if (PL_expect == XOPERATOR) no_op("Backslash",s); OPERATOR(REFGEN); @@ -2868,8 +3011,10 @@ yylex(void) tmp = -tmp; gv = Nullgv; gvp = 0; - if (PL_dowarn && hgv) - warn("Ambiguous call resolved as CORE::%s(), %s", + if (ckWARN(WARN_AMBIGUOUS) && hgv + && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */ + warner(WARN_AMBIGUOUS, + "Ambiguous call resolved as CORE::%s(), %s", GvENAME(hgv), "qualify as such or use &"); } } @@ -2897,7 +3042,7 @@ yylex(void) if (PL_expect == XOPERATOR) { if (PL_bufptr == PL_linestart) { PL_curcop->cop_line--; - warn(warn_nosemi); + warner(WARN_SEMICOLON, warn_nosemi); PL_curcop->cop_line++; } else @@ -2911,8 +3056,9 @@ yylex(void) if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') { - if (PL_dowarn && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV)) - warn("Bareword \"%s\" refers to nonexistent package", + if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV)) + warner(WARN_UNSAFE, + "Bareword \"%s\" refers to nonexistent package", PL_tokenbuf); len -= 2; PL_tokenbuf[len] = '\0'; @@ -3070,11 +3216,11 @@ yylex(void) /* Call it a bare word */ bareword: - if (PL_dowarn) { + if (ckWARN(WARN_RESERVED)) { if (lastchar != '-') { for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ; if (!*d) - warn(warn_reserved, PL_tokenbuf); + warner(WARN_RESERVED, warn_reserved, PL_tokenbuf); } } @@ -3215,7 +3361,7 @@ yylex(void) LOP(OP_CRYPT,XTERM); case KEY_chmod: - if (PL_dowarn) { + if (ckWARN(WARN_OCTAL)) { for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ; if (*d != '0' && isDIGIT(*d)) yywarn("chmod: mode argument is missing initial 0"); @@ -3635,15 +3781,17 @@ yylex(void) s = scan_str(s); if (!s) missingterm((char*)0); - if (PL_dowarn && SvLEN(PL_lex_stuff)) { + if (ckWARN(WARN_SYNTAX) && SvLEN(PL_lex_stuff)) { d = SvPV_force(PL_lex_stuff, len); for (; len; --len, ++d) { if (*d == ',') { - warn("Possible attempt to separate words with commas"); + warner(WARN_SYNTAX, + "Possible attempt to separate words with commas"); break; } if (*d == '#') { - warn("Possible attempt to put comments in qw() list"); + warner(WARN_SYNTAX, + "Possible attempt to put comments in qw() list"); break; } } @@ -4018,7 +4166,7 @@ yylex(void) LOP(OP_UTIME,XTERM); case KEY_umask: - if (PL_dowarn) { + if (ckWARN(WARN_OCTAL)) { for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ; if (*d != '0' && isDIGIT(*d)) yywarn("umask: argument is missing initial 0"); @@ -4725,18 +4873,21 @@ checkcomma(register char *s, char *name, char *what) { char *w; - if (PL_dowarn && *s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ - int level = 1; - for (w = s+2; *w && level; w++) { - if (*w == '(') - ++level; - else if (*w == ')') - --level; - } - if (*w) - for (; *w && isSPACE(*w); w++) ; - if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */ - warn("%s (...) interpreted as function",name); + if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ + dTHR; /* only for ckWARN */ + if (ckWARN(WARN_SYNTAX)) { + int level = 1; + for (w = s+2; *w && level; w++) { + if (*w == '(') + ++level; + else if (*w == ')') + --level; + } + if (*w) + for (; *w && isSPACE(*w); w++) ; + if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */ + warner(WARN_SYNTAX, "%s (...) interpreted as function",name); + } } while (s < PL_bufend && isSPACE(*s)) s++; @@ -4850,6 +5001,16 @@ scan_word(register char *s, char *dest, STRLEN destlen, int allow_package, STRLE *d++ = *s++; *d++ = *s++; } + else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8((U8*)s)) { + char *t = s + UTF8SKIP(s); + while (*t & 0x80 && is_utf8_mark((U8*)t)) + t += UTF8SKIP(t); + if (d + (t - s) > e) + croak(ident_too_long); + Copy(s, d, t - s, char); + d += t - s; + s = t; + } else { *d = '\0'; *slp = d - dest; @@ -4894,6 +5055,16 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3 *d++ = *s++; *d++ = *s++; } + else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8((U8*)s)) { + char *t = s + UTF8SKIP(s); + while (*t & 0x80 && is_utf8_mark((U8*)t)) + t += UTF8SKIP(t); + if (d + (t - s) > e) + croak(ident_too_long); + Copy(s, d, t - s, char); + d += t - s; + s = t; + } else break; } @@ -4906,12 +5077,9 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3 return s; } if (*s == '$' && s[1] && - (isALNUM(s[1]) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) ) + (isALNUM(s[1]) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) ) { - if (isDIGIT(s[1]) && PL_lex_state == LEX_INTERPNORMAL) - deprecate("\"$$\" to mean \"${$}\""); - else - return s; + return s; } if (*s == '{') { bracket = s; @@ -4936,16 +5104,31 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3 } } } - if (isIDFIRST(*d)) { + if (isIDFIRST(*d) || (UTF && (*d & 0xc0) == 0x80 && isIDFIRST_utf8((U8*)d))) { d++; - while (isALNUM(*s) || *s == ':') - *d++ = *s++; + if (UTF) { + e = s; + while (e < send && (isALNUM(*e) || ((*e & 0xc0) == 0x80 && isALNUM_utf8((U8*)e)) || *e == ':')) { + e += UTF8SKIP(e); + while (e < send && *e & 0x80 && is_utf8_mark((U8*)e)) + e += UTF8SKIP(e); + } + Copy(s, d, e - s, char); + d += e - s; + s = e; + } + else { + while (isALNUM(*s) || *s == ':') + *d++ = *s++; + } *d = '\0'; while (s < send && (*s == ' ' || *s == '\t')) s++; if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { - if (PL_dowarn && keyword(dest, d - dest)) { + dTHR; /* only for ckWARN */ + if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) { char *brack = *s == '[' ? "[...]" : "{...}"; - warn("Ambiguous use of %c{%s%s} resolved to %c%s%s", + warner(WARN_AMBIGUOUS, + "Ambiguous use of %c{%s%s} resolved to %c%s%s", funny, dest, brack, funny, dest, brack); } PL_lex_fakebrack = PL_lex_brackets+1; @@ -4960,10 +5143,16 @@ scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I3 PL_lex_state = LEX_INTERPEND; if (funny == '#') funny = '@'; - if (PL_dowarn && PL_lex_state == LEX_NORMAL && - (keyword(dest, d - dest) || perl_get_cv(dest, FALSE))) - warn("Ambiguous use of %c{%s} resolved to %c%s", - funny, dest, funny, dest); + if (PL_lex_state == LEX_NORMAL) { + dTHR; /* only for ckWARN */ + if (ckWARN(WARN_AMBIGUOUS) && + (keyword(dest, d - dest) || perl_get_cv(dest, FALSE))) + { + warner(WARN_AMBIGUOUS, + "Ambiguous use of %c{%s} resolved to %c%s", + funny, dest, funny, dest); + } + } } else { s = bracket; /* let the parser handle it */ @@ -5099,8 +5288,10 @@ scan_trans(char *start) OP *o; short *tbl; I32 squash; - I32 Delete; + I32 del; I32 complement; + I32 utf8; + I32 count = 0; yylval.ival = OP_NULL; @@ -5125,20 +5316,45 @@ scan_trans(char *start) croak("Transliteration replacement not terminated"); } - New(803,tbl,256,short); - o = newPVOP(OP_TRANS, 0, (char*)tbl); + if (UTF) { + o = newSVOP(OP_TRANS, 0, 0); + utf8 = OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF; + } + else { + New(803,tbl,256,short); + o = newPVOP(OP_TRANS, 0, (char*)tbl); + utf8 = 0; + } - complement = Delete = squash = 0; - while (*s == 'c' || *s == 'd' || *s == 's') { + complement = del = squash = 0; + while (strchr("cdsCU", *s)) { if (*s == 'c') complement = OPpTRANS_COMPLEMENT; else if (*s == 'd') - Delete = OPpTRANS_DELETE; - else + del = OPpTRANS_DELETE; + else if (*s == 's') squash = OPpTRANS_SQUASH; + else { + switch (count++) { + case 0: + if (*s == 'C') + utf8 &= ~OPpTRANS_FROM_UTF; + else + utf8 |= OPpTRANS_FROM_UTF; + break; + case 1: + if (*s == 'C') + utf8 &= ~OPpTRANS_TO_UTF; + else + utf8 |= OPpTRANS_TO_UTF; + break; + default: + croak("Too many /C and /U options"); + } + } s++; } - o->op_private = Delete|squash|complement; + o->op_private = del|squash|complement|utf8; PL_lex_op = o; yylval.ival = OP_TRANS; @@ -5592,7 +5808,7 @@ scan_str(char *start) } /* we read a line, so increment our line counter */ PL_curcop->cop_line++; - + /* update debugger info */ if (PERLDB_LINE && PL_curstash != PL_debstash) { SV *sv = NEWSV(88,0); @@ -5602,7 +5818,7 @@ scan_str(char *start) av_store(GvAV(PL_curcop->cop_filegv), (I32)PL_curcop->cop_line, sv); } - + /* having changed the buffer, we must update PL_bufend */ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); } @@ -5781,8 +5997,9 @@ scan_num(char *start) if -w is on */ if (*s == '_') { - if (PL_dowarn && lastub && s - lastub != 3) - warn("Misplaced _ in number"); + dTHR; /* only for ckWARN */ + if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3) + warner(WARN_SYNTAX, "Misplaced _ in number"); lastub = ++s; } else { @@ -5795,8 +6012,11 @@ scan_num(char *start) } /* final misplaced underbar check */ - if (PL_dowarn && lastub && s - lastub != 3) - warn("Misplaced _ in number"); + if (lastub && s - lastub != 3) { + dTHR; + if (ckWARN(WARN_SYNTAX)) + warner(WARN_SYNTAX, "Misplaced _ in number"); + } /* read a decimal portion if there is one. avoid 3..5 being interpreted as the number 3. followed @@ -5887,7 +6107,11 @@ scan_formline(register char *s) while (!needargs) { if (*s == '.' || *s == '}') { /*SUPPRESS 530*/ - for (t = s+1; *t == ' ' || *t == '\t'; t++) ; +#ifdef PERL_STRICT_CR + for (t = s+1;*t == ' ' || *t == '\t'; t++) ; +#else + for (t = s+1;*t == ' ' || *t == '\t' || *t == '\r'; t++) ; +#endif if (*t == '\n') break; }