Update to work with Perl 5.31.7

diff --git a/stolen_chunk_of_toke.c b/stolen_chunk_of_toke.c

index b9e5037..038a91c 100644 (file)
--- a/stolen_chunk_of_toke.c
+++ b/stolen_chunk_of_toke.c
@@ -186,14 +186,6 @@ static SUBLEXINFO *my_Isublex_info_ptr(pTHX) { return &(aTHX->Isublex_info); }
 #define SvPV_nolen_const SvPV_nolen
 #endif
 
-/* Name changed in 5.17; use new name in our code.  Apparently we're meant
-   to use something else instead, but no non-underscored way to achieve
-   this is apparent.  */
-
-#ifndef _is_utf8_mark
-#define _is_utf8_mark is_utf8_mark
-#endif
-
 /* utf8_to_uvchr_buf() not defined in earlier perls, but less-capable
  * substitute is available */
 
@@ -201,6 +193,18 @@ static SUBLEXINFO *my_Isublex_info_ptr(pTHX) { return &(aTHX->Isublex_info); }
 #define utf8_to_uvchr_buf(s, e, lp) ((e), utf8_to_uvchr(s, lp))
 #endif
 
+#ifndef isIDFIRST_lazy_if_safe
+# define isIDFIRST_lazy_if_safe(p,e,UTF)                                    \
+                    ((! UTF || p > e) ? isIDFIRST_lazy_if(p,UTF) : 0)
+#endif
+#ifndef isALNUM_lazy_if_safe
+# define isALNUM_lazy_if_safe(p,e,UTF)                                      \
+                        ((! UTF || p > e) ? isALNUM_lazy_if(p,UTF) : 0)
+#endif
+#ifndef isALNUM_utf8_safe
+# define isALNUM_utf8_safe(p,e)     ((p > e) ? isALNUM_utf8(p) : 0)
+#endif
+
 /* and now we're back to the toke.c stuff again (mst) */
 
 static const char ident_too_long[] =
@@ -435,9 +439,30 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
     for (;;) {
        if (d >= e)
            Perl_croak(aTHX_ ident_too_long);
-       if (isALNUM(*s))        /* UTF handled below */
-           *d++ = *s++;
-       else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
+        if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) {
+             /* The UTF-8 case must come first, otherwise things
+             * like c\N{COMBINING TILDE} would start failing, as the
+             * isALNUM case below would gobble the 'c' up.
+             */
+
+            char *t = s + UTF8SKIP(s);
+            while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
+                t += UTF8SKIP(t);
+            }
+            if (d + (t - s) > e)
+                Perl_croak(aTHX_ "%s", ident_too_long);
+            Copy(s, d, t - s, char);
+            *d += t - s;
+            s = t;
+        }
+        else if (isALNUM(*s))
+            do {
+                *d++ = *s++;
+            } while (isWORDCHAR_A(*s) && d < e);
+       else if (   *s == '\''
+                 && allow_package
+                 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
+        {
            *d++ = ':';
            *d++ = ':';
            s++;
@@ -446,16 +471,6 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
            *d++ = *s++;
            *d++ = *s++;
        }
-       else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
-           char *t = s + UTF8SKIP(s);
-           while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t))
-               t += UTF8SKIP(t);
-           if (d + (t - s) > e)
-               Perl_croak(aTHX_ ident_too_long);
-           Copy(s, d, t - s, char);
-           d += t - s;
-           s = t;
-       }
        else {
            *d = '\0';
            *slp = d - dest;
@@ -921,9 +936,27 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
        for (;;) {
            if (d >= e)
                Perl_croak(aTHX_ ident_too_long);
-           if (isALNUM(*s))    /* UTF handled below */
-               *d++ = *s++;
-           else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
+            if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) {
+                 /* The UTF-8 case must come first, otherwise things
+                 * like c\N{COMBINING TILDE} would start failing, as the
+                 * isALNUM case below would gobble the 'c' up.
+                 */
+
+                char *t = s + UTF8SKIP(s);
+                while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
+                    t += UTF8SKIP(t);
+                }
+                if (d + (t - s) > e)
+                    Perl_croak(aTHX_ "%s", ident_too_long);
+                Copy(s, d, t - s, char);
+                *d += t - s;
+                s = t;
+            }
+            else if (isALNUM(*s))
+                do {
+                    *d++ = *s++;
+                } while (isWORDCHAR_A(*s) && d < e);
+           else if (*s == '\'' && isIDFIRST_lazy_if_safe(s+1,send,UTF)) {
                *d++ = ':';
                *d++ = ':';
                s++;
@@ -932,16 +965,6 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
                *d++ = *s++;
                *d++ = *s++;
            }
-           else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
-               char *t = s + UTF8SKIP(s);
-               while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t))
-                   t += UTF8SKIP(t);
-               if (d + (t - s) > e)
-                   Perl_croak(aTHX_ ident_too_long);
-               Copy(s, d, t - s, char);
-               d += t - s;
-               s = t;
-           }
            else
                break;
        }
@@ -954,7 +977,8 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
        return s;
     }
     if (*s == '$' && s[1] &&
-       (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
+       (   isALNUM_lazy_if_safe(s+1,send,UTF)
+         || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
     {
        return s;
     }
@@ -982,13 +1006,16 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
                }
            }
        }
-       if (isIDFIRST_lazy_if(d,UTF)) {
+       if (isIDFIRST_lazy_if_safe(d,d+destlen,UTF)) {
            d++;
            if (UTF) {
                e = s;
-               while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
+               while (  (   e < send
+                          && isIDFIRST_utf8_safe(e, send)
+                       || *e == ':'))
+                {
                    e += UTF8SKIP(e);
-                   while (e < send && UTF8_IS_CONTINUED(*e) && _is_utf8_mark((U8*)e))
+                   while (e < send && isIDFIRST_utf8_safe(e, send))
                        e += UTF8SKIP(e);
                }
                Copy(s, d, e - s, char);