X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=stolen_chunk_of_toke.c;h=038a91c345a2c78626855b6b103d5aea7dd48309;hb=03b20d2d9f560a40f2509ea8af4bc81e106c62d2;hp=cbc18060000bcc6c570349d09e388c330e04fe47;hpb=ec25cea764afbf3b9802c6833fc0b7bca960f94b;p=p5sagit%2FDevel-Declare.git diff --git a/stolen_chunk_of_toke.c b/stolen_chunk_of_toke.c index cbc1806..038a91c 100644 --- a/stolen_chunk_of_toke.c +++ b/stolen_chunk_of_toke.c @@ -108,6 +108,20 @@ STATIC char* S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow #define PERL_5_9_PLUS #endif +#if !defined(PERL_5_9_PLUS) && defined(PERL_IMPLICIT_CONTEXT) +/* These two are not exported from the core on Windows. With 5.9+ + it's not an issue, because they're part of the PL_parser structure, + which is exported. On multiplicity/thread builds we can work + around the lack of export by this formulation, where we provide + a substitute implementation of the unexported accessor functions. + On single-interpreter builds we can't, because access is directly + via symbols that are not exported. */ +# define Perl_Ilinestart_ptr my_Ilinestart_ptr +char **my_Ilinestart_ptr(pTHX) { return &(aTHX->Ilinestart); } +# define Perl_Isublex_info_ptr my_Isublex_info_ptr +static SUBLEXINFO *my_Isublex_info_ptr(pTHX) { return &(aTHX->Isublex_info); } +#endif + #ifdef PERL_5_9_PLUS /* 5.9+ moves a bunch of things to a PL_parser struct so we need to declare the backcompat macros for things to still work (mst) */ @@ -172,6 +186,25 @@ STATIC char* S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow #define SvPV_nolen_const SvPV_nolen #endif +/* utf8_to_uvchr_buf() not defined in earlier perls, but less-capable + * substitute is available */ + +#ifndef utf8_to_uvchr_buf +#define utf8_to_uvchr_buf(s, e, lp) ((e), utf8_to_uvchr(s, lp)) +#endif + +#ifndef isIDFIRST_lazy_if_safe +# define isIDFIRST_lazy_if_safe(p,e,UTF) \ + ((! UTF || p > e) ? isIDFIRST_lazy_if(p,UTF) : 0) +#endif +#ifndef isALNUM_lazy_if_safe +# define isALNUM_lazy_if_safe(p,e,UTF) \ + ((! UTF || p > e) ? isALNUM_lazy_if(p,UTF) : 0) +#endif +#ifndef isALNUM_utf8_safe +# define isALNUM_utf8_safe(p,e) ((p > e) ? isALNUM_utf8(p) : 0) +#endif + /* and now we're back to the toke.c stuff again (mst) */ static const char ident_too_long[] = @@ -313,7 +346,7 @@ S_skipspace(pTHX_ register char *s, int incline) * of the buffer, we're not reading from a source filter, and * we're in normal lexing mode */ - if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat || + if (s < PL_bufend || !PL_rsfp || PL_lex_inwhat || PL_lex_state == LEX_FORMLINE) return s; @@ -406,9 +439,30 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag for (;;) { if (d >= e) Perl_croak(aTHX_ ident_too_long); - if (isALNUM(*s)) /* UTF handled below */ - *d++ = *s++; - else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) { + if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) { + /* The UTF-8 case must come first, otherwise things + * like c\N{COMBINING TILDE} would start failing, as the + * isALNUM case below would gobble the 'c' up. + */ + + char *t = s + UTF8SKIP(s); + while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) { + t += UTF8SKIP(t); + } + if (d + (t - s) > e) + Perl_croak(aTHX_ "%s", ident_too_long); + Copy(s, d, t - s, char); + *d += t - s; + s = t; + } + else if (isALNUM(*s)) + do { + *d++ = *s++; + } while (isWORDCHAR_A(*s) && d < e); + else if ( *s == '\'' + && allow_package + && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF)) + { *d++ = ':'; *d++ = ':'; s++; @@ -417,16 +471,6 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag *d++ = *s++; *d++ = *s++; } - else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { - char *t = s + UTF8SKIP(s); - while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t)) - t += UTF8SKIP(t); - if (d + (t - s) > e) - Perl_croak(aTHX_ ident_too_long); - Copy(s, d, t - s, char); - d += t - s; - s = t; - } else { *d = '\0'; *slp = d - dest; @@ -615,7 +659,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) termlen = 1; } else { - termcode = utf8_to_uvchr((U8*)s, &termlen); + termcode = utf8_to_uvchr_buf((U8*)s, PL_bufend, &termlen); Copy(s, termstr, termlen, U8); if (!UTF8_IS_INVARIANT(term)) has_utf8 = TRUE; @@ -892,9 +936,27 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL for (;;) { if (d >= e) Perl_croak(aTHX_ ident_too_long); - if (isALNUM(*s)) /* UTF handled below */ - *d++ = *s++; - else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) { + if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) { + /* The UTF-8 case must come first, otherwise things + * like c\N{COMBINING TILDE} would start failing, as the + * isALNUM case below would gobble the 'c' up. + */ + + char *t = s + UTF8SKIP(s); + while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) { + t += UTF8SKIP(t); + } + if (d + (t - s) > e) + Perl_croak(aTHX_ "%s", ident_too_long); + Copy(s, d, t - s, char); + *d += t - s; + s = t; + } + else if (isALNUM(*s)) + do { + *d++ = *s++; + } while (isWORDCHAR_A(*s) && d < e); + else if (*s == '\'' && isIDFIRST_lazy_if_safe(s+1,send,UTF)) { *d++ = ':'; *d++ = ':'; s++; @@ -903,16 +965,6 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL *d++ = *s++; *d++ = *s++; } - else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { - char *t = s + UTF8SKIP(s); - while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t)) - t += UTF8SKIP(t); - if (d + (t - s) > e) - Perl_croak(aTHX_ ident_too_long); - Copy(s, d, t - s, char); - d += t - s; - s = t; - } else break; } @@ -925,7 +977,8 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL return s; } if (*s == '$' && s[1] && - (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) ) + ( isALNUM_lazy_if_safe(s+1,send,UTF) + || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) ) { return s; } @@ -953,13 +1006,16 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL } } } - if (isIDFIRST_lazy_if(d,UTF)) { + if (isIDFIRST_lazy_if_safe(d,d+destlen,UTF)) { d++; if (UTF) { e = s; - while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') { + while ( ( e < send + && isIDFIRST_utf8_safe(e, send) + || *e == ':')) + { e += UTF8SKIP(e); - while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e)) + while (e < send && isIDFIRST_utf8_safe(e, send)) e += UTF8SKIP(e); } Copy(s, d, e - s, char);