X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=p5sagit%2FDevel-Declare.git;a=blobdiff_plain;f=stolen_chunk_of_toke.c;h=ab1c187a97b8a68433b1bc6cbcf1e3e9c0233f0f;hp=8f3b040f92210017d7789d2a0c73569985eeeb68;hb=8aa609aed8c1042b5c9ffa49b4363eabde131d53;hpb=6d6f4cad27fa7e1f88d781d842e09e21d4c0cff4

diff --git a/stolen_chunk_of_toke.c b/stolen_chunk_of_toke.c
index 8f3b040..ab1c187 100644
--- a/stolen_chunk_of_toke.c
+++ b/stolen_chunk_of_toke.c
@@ -17,9 +17,13 @@
  *   up but if it does blame me (Matt S Trout), not the poor original authors
  */
 
+#include "ppport.h"
+
 /* the following #defines are stolen from assorted headers, not toke.c (mst) */
 
-#define skipspace(a)            S_skipspace(aTHX_ a)
+#define skipspace(a)            S_skipspace(aTHX_ a, 0)
+#define peekspace(a)            S_skipspace(aTHX_ a, 1)
+#define skipspace_force(a)      S_skipspace(aTHX_ a, 2)
 #define incline(a)              S_incline(aTHX_ a)
 #define filter_gets(a,b,c)      S_filter_gets(aTHX_ a,b,c)
 #define scan_str(a,b,c)         S_scan_str(aTHX_ a,b,c)
@@ -27,7 +31,7 @@
 #define scan_ident(a,b,c,d,e)   S_scan_ident(aTHX_ a,b,c,d,e)
 
 STATIC void     S_incline(pTHX_ char *s);
-STATIC char*    S_skipspace(pTHX_ char *s);
+STATIC char*    S_skipspace(pTHX_ char *s, int incline);
 STATIC char *   S_filter_gets(pTHX_ SV *sv, PerlIO *fp, STRLEN append);
 STATIC char*    S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims);
 STATIC char*    S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp);
@@ -64,6 +68,20 @@ STATIC char*    S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow
 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 #endif
 
+/*
+ * Normally, during compile time, PL_curcop == &PL_compiling is true. However,
+ * Devel::Declare makes the interpreter call back to perl during compile time,
+ * which temporarily enters runtime. Then perl space calls various functions
+ * from this file, which are designed to work during compile time. They all
+ * happen to operate on PL_curcop, not PL_compiling. That doesn't make a
+ * difference in the core, but it does for Devel::Declare, which operates at
+ * runtime, but still wants to mangle the things that are about to be compiled.
+ * That's why we define our own PL_curcop and make it point to PL_compiling
+ * here.
+ */
+#undef PL_curcop
+#define PL_curcop (&PL_compiling)
+
 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 
 #define LEX_NORMAL    10 /* normal code (ie not within "...")     */
@@ -92,6 +110,20 @@ STATIC char*    S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow
 #define PERL_5_9_PLUS
 #endif
 
+#if !defined(PERL_5_9_PLUS) && defined(PERL_IMPLICIT_CONTEXT)
+/* These two are not exported from the core on Windows.  With 5.9+
+   it's not an issue, because they're part of the PL_parser structure,
+   which is exported.  On multiplicity/thread builds we can work
+   around the lack of export by this formulation, where we provide
+   a substitute implementation of the unexported accessor functions.
+   On single-interpreter builds we can't, because access is directly
+   via symbols that are not exported.  */
+# define Perl_Ilinestart_ptr my_Ilinestart_ptr
+char **my_Ilinestart_ptr(pTHX) { return &(aTHX->Ilinestart); }
+# define Perl_Isublex_info_ptr my_Isublex_info_ptr
+static SUBLEXINFO *my_Isublex_info_ptr(pTHX) { return &(aTHX->Isublex_info); }
+#endif
+
 #ifdef PERL_5_9_PLUS
 /* 5.9+ moves a bunch of things to a PL_parser struct so we need to
    declare the backcompat macros for things to still work (mst) */
@@ -156,6 +188,25 @@ STATIC char*    S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow
 #define SvPV_nolen_const SvPV_nolen
 #endif
 
+/* utf8_to_uvchr_buf() not defined in earlier perls, but less-capable
+ * substitute is available */
+
+#ifndef utf8_to_uvchr_buf
+#define utf8_to_uvchr_buf(s, e, lp) ((e), utf8_to_uvchr(s, lp))
+#endif
+
+#ifndef isIDFIRST_lazy_if_safe
+# define isIDFIRST_lazy_if_safe(p,e,UTF)                                    \
+                    ((! UTF || p > e) ? isIDFIRST_lazy_if(p,UTF) : 0)
+#endif
+#ifndef isALNUM_lazy_if_safe
+# define isALNUM_lazy_if_safe(p,e,UTF)                                      \
+                        ((! UTF || p > e) ? isALNUM_lazy_if(p,UTF) : 0)
+#endif
+#ifndef isALNUM_utf8_safe
+# define isALNUM_utf8_safe(p,e)     ((p > e) ? isALNUM_utf8(p) : 0)
+#endif
+
 /* and now we're back to the toke.c stuff again (mst) */
 
 static const char ident_too_long[] =
@@ -256,7 +307,7 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
  */
 
 STATIC char *
-S_skipspace(pTHX_ register char *s)
+S_skipspace(pTHX_ register char *s, int incline)
 {
     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
 	while (s < PL_bufend && SPACE_OR_TAB(*s))
@@ -268,7 +319,7 @@ S_skipspace(pTHX_ register char *s)
 	SSize_t oldprevlen, oldoldprevlen;
 	SSize_t oldloplen = 0, oldunilen = 0;
 	while (s < PL_bufend && isSPACE(*s)) {
-	    if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
+	    if (*s++ == '\n' && ((incline == 2) || (PL_in_eval && !PL_rsfp && !incline)))
 		incline(s);
 	}
 
@@ -278,18 +329,26 @@ S_skipspace(pTHX_ register char *s)
 		s++;
 	    if (s < PL_bufend) {
 		s++;
-		if (PL_in_eval && !PL_rsfp) {
+		if (PL_in_eval && !PL_rsfp && !incline) {
 		    incline(s);
 		    continue;
 		}
 	    }
 	}
 
+	/* also skip leading whitespace on the beginning of a line before deciding
+	 * whether or not to recharge the linestr. --rafl
+	 */
+	while (s < PL_bufend && isSPACE(*s)) {
+		if (*s++ == '\n' && PL_in_eval && !PL_rsfp && !incline)
+			incline(s);
+	}
+
 	/* only continue to recharge the buffer if we're at the end
 	 * of the buffer, we're not reading from a source filter, and
 	 * we're in normal lexing mode
 	 */
-	if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
+	if (s < PL_bufend || !PL_rsfp || PL_lex_inwhat ||
 		PL_lex_state == LEX_FORMLINE)
 	    return s;
 
@@ -354,19 +413,22 @@ S_skipspace(pTHX_ register char *s)
 	    PL_last_uni = s + oldunilen;
 	if (PL_last_lop)
 	    PL_last_lop = s + oldloplen;
-	incline(s);
+	if (!incline)
+		incline(s);
 
 	/* debugger active and we're not compiling the debugger code,
 	 * so store the line into the debugger's array of lines
 	 */
 	if (PERLDB_LINE && PL_curstash != PL_debstash) {
-	    SV * const sv = NEWSV(85,0);
-
-	    sv_upgrade(sv, SVt_PVMG);
-	    sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
-            (void)SvIOK_on(sv);
-            SvIV_set(sv, 0);
-	    av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
+	    AV *fileav = CopFILEAV(PL_curcop);
+	    if (fileav) {
+		SV * const sv = NEWSV(85,0);
+		sv_upgrade(sv, SVt_PVMG);
+		sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
+		(void)SvIOK_on(sv);
+		SvIV_set(sv, 0);
+		av_store(fileav,(I32)CopLINE(PL_curcop),sv);
+	    }
 	}
     }
 }
@@ -379,9 +441,30 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
     for (;;) {
 	if (d >= e)
 	    Perl_croak(aTHX_ ident_too_long);
-	if (isALNUM(*s))	/* UTF handled below */
-	    *d++ = *s++;
-	else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
+        if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) {
+             /* The UTF-8 case must come first, otherwise things
+             * like c\N{COMBINING TILDE} would start failing, as the
+             * isALNUM case below would gobble the 'c' up.
+             */
+
+            char *t = s + UTF8SKIP(s);
+            while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
+                t += UTF8SKIP(t);
+            }
+            if (d + (t - s) > e)
+                Perl_croak(aTHX_ "%s", ident_too_long);
+            Copy(s, d, t - s, char);
+            *d += t - s;
+            s = t;
+        }
+        else if (isALNUM(*s))
+            do {
+                *d++ = *s++;
+            } while (isWORDCHAR_A(*s) && d < e);
+	else if (   *s == '\''
+                 && allow_package
+                 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
+        {
 	    *d++ = ':';
 	    *d++ = ':';
 	    s++;
@@ -390,16 +473,6 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
 	    *d++ = *s++;
 	    *d++ = *s++;
 	}
-	else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
-	    char *t = s + UTF8SKIP(s);
-	    while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
-		t += UTF8SKIP(t);
-	    if (d + (t - s) > e)
-		Perl_croak(aTHX_ ident_too_long);
-	    Copy(s, d, t - s, char);
-	    d += t - s;
-	    s = t;
-	}
 	else {
 	    *d = '\0';
 	    *slp = d - dest;
@@ -588,7 +661,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 	termlen = 1;
     }
     else {
-	termcode = utf8_to_uvchr((U8*)s, &termlen);
+	termcode = utf8_to_uvchr_buf((U8*)s, PL_bufend, &termlen);
 	Copy(s, termstr, termlen, U8);
 	if (!UTF8_IS_INVARIANT(term))
 	    has_utf8 = TRUE;
@@ -790,13 +863,15 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 
 	/* update debugger info */
 	if (PERLDB_LINE && PL_curstash != PL_debstash) {
-	    SV *sv = NEWSV(88,0);
-
-	    sv_upgrade(sv, SVt_PVMG);
-	    sv_setsv(sv,PL_linestr);
-            (void)SvIOK_on(sv);
-            SvIV_set(sv, 0);
-	    av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
+	    AV *fileav = CopFILEAV(PL_curcop);
+	    if (fileav) {
+		SV *sv = NEWSV(88,0);
+		sv_upgrade(sv, SVt_PVMG);
+		sv_setsv(sv,PL_linestr);
+		(void)SvIOK_on(sv);
+		SvIV_set(sv, 0);
+		av_store(fileav, (I32)CopLINE(PL_curcop), sv);
+	    }
 	}
 
 	/* having changed the buffer, we must update PL_bufend */
@@ -838,39 +913,6 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
     return s;
 }
 
-/*
- * S_force_next
- * When the lexer realizes it knows the next token (for instance,
- * it is reordering tokens for the parser) then it can call S_force_next
- * to know what token to return the next time the lexer is called.  Caller
- * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
- * handles the token correctly.
- */
-
-STATIC void
-S_force_next(pTHX_ I32 type)
-{
-#ifdef PERL_MAD
-    dVAR;
-    if (PL_curforce < 0)
-    start_force(PL_lasttoke);
-    PL_nexttoke[PL_curforce].next_type = type;
-    if (PL_lex_state != LEX_KNOWNEXT)
-    PL_lex_defer = PL_lex_state;
-    PL_lex_state = LEX_KNOWNEXT;
-    PL_lex_expect = PL_expect;
-    PL_curforce = -1;
-#else
-    PL_nexttype[PL_nexttoke] = type;
-    PL_nexttoke++;
-    if (PL_lex_state != LEX_KNOWNEXT) {
-  PL_lex_defer = PL_lex_state;
-  PL_lex_expect = PL_expect;
-  PL_lex_state = LEX_KNOWNEXT;
-    }
-#endif
-}
-
 #define XFAKEBRACK 128
 
 STATIC char *
@@ -896,9 +938,27 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 	for (;;) {
 	    if (d >= e)
 		Perl_croak(aTHX_ ident_too_long);
-	    if (isALNUM(*s))	/* UTF handled below */
-		*d++ = *s++;
-	    else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
+            if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) {
+                 /* The UTF-8 case must come first, otherwise things
+                 * like c\N{COMBINING TILDE} would start failing, as the
+                 * isALNUM case below would gobble the 'c' up.
+                 */
+
+                char *t = s + UTF8SKIP(s);
+                while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
+                    t += UTF8SKIP(t);
+                }
+                if (d + (t - s) > e)
+                    Perl_croak(aTHX_ "%s", ident_too_long);
+                Copy(s, d, t - s, char);
+                *d += t - s;
+                s = t;
+            }
+            else if (isALNUM(*s))
+                do {
+                    *d++ = *s++;
+                } while (isWORDCHAR_A(*s) && d < e);
+	    else if (*s == '\'' && isIDFIRST_lazy_if_safe(s+1,send,UTF)) {
 		*d++ = ':';
 		*d++ = ':';
 		s++;
@@ -907,16 +967,6 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 		*d++ = *s++;
 		*d++ = *s++;
 	    }
-	    else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
-		char *t = s + UTF8SKIP(s);
-		while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
-		    t += UTF8SKIP(t);
-		if (d + (t - s) > e)
-		    Perl_croak(aTHX_ ident_too_long);
-		Copy(s, d, t - s, char);
-		d += t - s;
-		s = t;
-	    }
 	    else
 		break;
 	}
@@ -929,19 +979,18 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 	return s;
     }
     if (*s == '$' && s[1] &&
-	(isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
+	(   isALNUM_lazy_if_safe(s+1,send,UTF)
+         || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
     {
 	return s;
     }
     if (*s == '{') {
 	bracket = s;
 	s++;
+    } else if (ck_uni) {
+       /* we always call this with ck_uni == 0, so no need for check_uni() */
+       /* check_uni(); */
     }
-    /* we always call this with ck_uni == 0 (rafl) */
-    /*
-    else if (ck_uni)
-	check_uni();
-    */
     if (s < send)
 	*d = *s++;
     d[1] = '\0';
@@ -959,13 +1008,16 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
 		}
 	    }
 	}
-	if (isIDFIRST_lazy_if(d,UTF)) {
+	if (isIDFIRST_lazy_if_safe(d,d+destlen,UTF)) {
 	    d++;
 	    if (UTF) {
 		e = s;
-		while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
+		while ( ((   e < send
+                          && isIDFIRST_utf8_safe(e, send))
+                       || *e == ':'))
+                {
 		    e += UTF8SKIP(e);
-		    while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
+		    while (e < send && isIDFIRST_utf8_safe(e, send))
 			e += UTF8SKIP(e);
 		}
 		Copy(s, d, e - s, char);