void
Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
{
+ dVAR;
char *bufptr;
PERL_ARGS_ASSERT_LEX_STUFF_PVN;
if (flags & ~(LEX_STUFF_UTF8))
lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
bufptr = PL_parser->bufptr;
Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
+ SvCUR_set(PL_parser->linestr,
+ SvCUR(PL_parser->linestr) + len+highhalf);
PL_parser->bufend += len+highhalf;
for (p = pv; p != e; p++) {
U8 c = (U8)*p;
lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
bufptr = PL_parser->bufptr;
Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
+ SvCUR_set(PL_parser->linestr,
+ SvCUR(PL_parser->linestr) + len-highhalf);
PL_parser->bufend += len-highhalf;
for (p = pv; p != e; p++) {
U8 c = (U8)*p;
lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
bufptr = PL_parser->bufptr;
Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
+ SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
PL_parser->bufend += len;
Copy(pv, bufptr, len, char);
}
Normally it is not necessarily to do this directly, because it suffices to
use the implicit discarding behaviour of L</lex_next_chunk> and things
based on it. However, if a token stretches across multiple lines,
-and the lexing code has kept multiple lines of text in the buffer fof
+and the lexing code has kept multiple lines of text in the buffer for
that purpose, then after completion of the token it would be wise to
explicitly discard the now-unneeded earlier lines, to avoid future
multi-line tokens growing the buffer without bound.
I32
Perl_lex_peek_unichar(pTHX_ U32 flags)
{
+ dVAR;
char *s, *bufend;
if (flags & ~(LEX_KEEP_PREVIOUS))
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
* errors and upgrading to utf8) is:
* Further disambiguate between the two meanings of \N, and if
* not a charname, go process it elsewhere
- * If of form \N{U+...}, pass it through if a pattern; otherwise
- * convert to utf8
- * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a pattern;
- * otherwise convert to utf8 */
+ * If of form \N{U+...}, pass it through if a pattern;
+ * otherwise convert to utf8
+ * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
+ * pattern; otherwise convert to utf8 */
/* Here, s points to the 'N'; the test below is guaranteed to
* succeed if we are being called on a pattern as we already
}
s++;
- /* If there is no matching '}', it is an error outside of a
- * pattern, or ambiguous inside. */
+ /* If there is no matching '}', it is an error. */
if (! (e = strchr(s, '}'))) {
if (! PL_lex_inpat) {
yyerror("Missing right brace on \\N{}");
- continue;
- }
- else {
-
- /* A missing brace means it can't be a legal character
- * name, and it could be a legal "match non-newline".
- * But it's kind of weird without an unescaped left
- * brace, so warn. */
- if (ckWARN(WARN_SYNTAX)) {
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Missing right brace on \\N{} or unescaped left brace after \\N. Assuming the latter");
- }
- s -= 3; /* Backup over cur char, {, N, to the '\' */
- *d++ = NATIVE_TO_NEED(has_utf8,'\\');
- goto default_action;
+ } else {
+ yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
}
+ continue;
}
/* Here it looks like a named character */
/* Pass through to the regex compiler unchanged. The
* reason we evaluated the number above is to make sure
- * there wasn't a syntax error. It also makes sure
- * that the syntax created below, \N{Uc1.c2...}, is
- * internal-only */
+ * there wasn't a syntax error. */
s -= 5; /* Include the '\N{U+' */
Copy(s, d, e - s + 1, char); /* 1 = include the } */
d += e - s + 1;
d += len;
}
SvREFCNT_dec(res);
- }
+
+ /* Deprecate non-approved name syntax */
+ if (ckWARN_d(WARN_DEPRECATED)) {
+ bool problematic = FALSE;
+ char* i = s;
+
+ /* For non-ut8 input, look to see that the first
+ * character is an alpha, then loop through the rest
+ * checking that each is a continuation */
+ if (! this_utf8) {
+ if (! isALPHAU(*i)) problematic = TRUE;
+ else for (i = s + 1; i < e; i++) {
+ if (isCHARNAME_CONT(*i)) continue;
+ problematic = TRUE;
+ break;
+ }
+ }
+ else {
+ /* Similarly for utf8. For invariants can check
+ * directly. We accept anything above the latin1
+ * range because it is immaterial to Perl if it is
+ * correct or not, and is expensive to check. But
+ * it is fairly easy in the latin1 range to convert
+ * the variants into a single character and check
+ * those */
+ if (UTF8_IS_INVARIANT(*i)) {
+ if (! isALPHAU(*i)) problematic = TRUE;
+ } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
+ if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
+ *(i+1)))))
+ {
+ problematic = TRUE;
+ }
+ }
+ if (! problematic) for (i = s + UTF8SKIP(s);
+ i < e;
+ i+= UTF8SKIP(i))
+ {
+ if (UTF8_IS_INVARIANT(*i)) {
+ if (isCHARNAME_CONT(*i)) continue;
+ } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
+ continue;
+ } else if (isCHARNAME_CONT(
+ UNI_TO_NATIVE(
+ UTF8_ACCUMULATE(*i, *(i+1)))))
+ {
+ continue;
+ }
+ problematic = TRUE;
+ break;
+ }
+ }
+ if (problematic) {
+ char *string;
+ Newx(string, e - i + 1, char);
+ Copy(i, string, e - i, char);
+ string[e - i] = '\0';
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Deprecated character(s) in \\N{...} starting at '%s'",
+ string);
+ Safefree(string);
+ }
+ }
+ } /* End \N{NAME} */
#ifdef EBCDIC
if (!dorange)
native_range = FALSE; /* \N{} is defined to be Unicode */
/* Is this a label? */
if (!anydelim && PL_expect == XSTATE
&& d < PL_bufend && *d == ':' && *(d + 1) != ':') {
- if (tmp)
- Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
s = d + 1;
pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
CLINE;
}
/* charnames doesn't work well if there have been errors found */
- if (PL_error_count > 0 && strEQ(key,"charnames")) return res;
+ if (PL_error_count > 0 && strEQ(key,"charnames"))
+ return &PL_sv_undef;
cvp = hv_fetch(table, key, keylen, FALSE);
if (!cvp || !SvOK(*cvp)) {
SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
SV *const utf8_buffer = filter;
IV status = IoPAGE(filter);
- const bool reverse = (bool) IoLINES(filter);
+ const bool reverse = cBOOL(IoLINES(filter));
I32 retval;
/* As we're automatically added, at the lowest level, and hence only called