#define PERL_5_9_PLUS
#endif
+#if !defined(PERL_5_9_PLUS) && defined(PERL_IMPLICIT_CONTEXT)
+/* These two are not exported from the core on Windows. With 5.9+
+ it's not an issue, because they're part of the PL_parser structure,
+ which is exported. On multiplicity/thread builds we can work
+ around the lack of export by this formulation, where we provide
+ a substitute implementation of the unexported accessor functions.
+ On single-interpreter builds we can't, because access is directly
+ via symbols that are not exported. */
+# define Perl_Ilinestart_ptr my_Ilinestart_ptr
+char **my_Ilinestart_ptr(pTHX) { return &(aTHX->Ilinestart); }
+# define Perl_Isublex_info_ptr my_Isublex_info_ptr
+static SUBLEXINFO *my_Isublex_info_ptr(pTHX) { return &(aTHX->Isublex_info); }
+#endif
+
#ifdef PERL_5_9_PLUS
/* 5.9+ moves a bunch of things to a PL_parser struct so we need to
declare the backcompat macros for things to still work (mst) */
#define SvPV_nolen_const SvPV_nolen
#endif
+/* utf8_to_uvchr_buf() not defined in earlier perls, but less-capable
+ * substitute is available */
+
+#ifndef utf8_to_uvchr_buf
+#define utf8_to_uvchr_buf(s, e, lp) ((e), utf8_to_uvchr(s, lp))
+#endif
+
+#ifndef isIDFIRST_lazy_if_safe
+# define isIDFIRST_lazy_if_safe(p,e,UTF) \
+ ((! UTF || p > e) ? isIDFIRST_lazy_if(p,UTF) : 0)
+#endif
+#ifndef isALNUM_lazy_if_safe
+# define isALNUM_lazy_if_safe(p,e,UTF) \
+ ((! UTF || p > e) ? isALNUM_lazy_if(p,UTF) : 0)
+#endif
+#ifndef isALNUM_utf8_safe
+# define isALNUM_utf8_safe(p,e) ((p > e) ? isALNUM_utf8(p) : 0)
+#endif
+
/* and now we're back to the toke.c stuff again (mst) */
static const char ident_too_long[] =
* of the buffer, we're not reading from a source filter, and
* we're in normal lexing mode
*/
- if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
+ if (s < PL_bufend || !PL_rsfp || PL_lex_inwhat ||
PL_lex_state == LEX_FORMLINE)
return s;
for (;;) {
if (d >= e)
Perl_croak(aTHX_ ident_too_long);
- if (isALNUM(*s)) /* UTF handled below */
- *d++ = *s++;
- else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
+ if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) {
+ /* The UTF-8 case must come first, otherwise things
+ * like c\N{COMBINING TILDE} would start failing, as the
+ * isALNUM case below would gobble the 'c' up.
+ */
+
+ char *t = s + UTF8SKIP(s);
+ while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
+ t += UTF8SKIP(t);
+ }
+ if (d + (t - s) > e)
+ Perl_croak(aTHX_ "%s", ident_too_long);
+ Copy(s, d, t - s, char);
+ *d += t - s;
+ s = t;
+ }
+ else if (isALNUM(*s))
+ do {
+ *d++ = *s++;
+ } while (isWORDCHAR_A(*s) && d < e);
+ else if ( *s == '\''
+ && allow_package
+ && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
+ {
*d++ = ':';
*d++ = ':';
s++;
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
- char *t = s + UTF8SKIP(s);
- while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
- t += UTF8SKIP(t);
- if (d + (t - s) > e)
- Perl_croak(aTHX_ ident_too_long);
- Copy(s, d, t - s, char);
- d += t - s;
- s = t;
- }
else {
*d = '\0';
*slp = d - dest;
termlen = 1;
}
else {
- termcode = utf8_to_uvchr((U8*)s, &termlen);
+ termcode = utf8_to_uvchr_buf((U8*)s, PL_bufend, &termlen);
Copy(s, termstr, termlen, U8);
if (!UTF8_IS_INVARIANT(term))
has_utf8 = TRUE;
for (;;) {
if (d >= e)
Perl_croak(aTHX_ ident_too_long);
- if (isALNUM(*s)) /* UTF handled below */
- *d++ = *s++;
- else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
+ if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) {
+ /* The UTF-8 case must come first, otherwise things
+ * like c\N{COMBINING TILDE} would start failing, as the
+ * isALNUM case below would gobble the 'c' up.
+ */
+
+ char *t = s + UTF8SKIP(s);
+ while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
+ t += UTF8SKIP(t);
+ }
+ if (d + (t - s) > e)
+ Perl_croak(aTHX_ "%s", ident_too_long);
+ Copy(s, d, t - s, char);
+ *d += t - s;
+ s = t;
+ }
+ else if (isALNUM(*s))
+ do {
+ *d++ = *s++;
+ } while (isWORDCHAR_A(*s) && d < e);
+ else if (*s == '\'' && isIDFIRST_lazy_if_safe(s+1,send,UTF)) {
*d++ = ':';
*d++ = ':';
s++;
*d++ = *s++;
*d++ = *s++;
}
- else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
- char *t = s + UTF8SKIP(s);
- while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
- t += UTF8SKIP(t);
- if (d + (t - s) > e)
- Perl_croak(aTHX_ ident_too_long);
- Copy(s, d, t - s, char);
- d += t - s;
- s = t;
- }
else
break;
}
return s;
}
if (*s == '$' && s[1] &&
- (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
+ ( isALNUM_lazy_if_safe(s+1,send,UTF)
+ || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
{
return s;
}
}
}
}
- if (isIDFIRST_lazy_if(d,UTF)) {
+ if (isIDFIRST_lazy_if_safe(d,d+destlen,UTF)) {
d++;
if (UTF) {
e = s;
- while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
+ while ( ( e < send
+ && isIDFIRST_utf8_safe(e, send)
+ || *e == ':'))
+ {
e += UTF8SKIP(e);
- while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
+ while (e < send && isIDFIRST_utf8_safe(e, send))
e += UTF8SKIP(e);
}
Copy(s, d, e - s, char);