X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=stolen_chunk_of_toke.c;h=c667eaa77ba2704b3cc69ae93e6b802b7555a47a;hb=b52072dc659832e3465ecb4801024bc7c05ddad7;hp=5da609536eca054d4d1cb7fc99e1fc86c3c52e33;hpb=e807ee502b7c21edb01594764925aabdc8948f37;p=p5sagit%2FDevel-Declare.git

diff --git a/stolen_chunk_of_toke.c b/stolen_chunk_of_toke.c
index 5da6095..c667eaa 100644
--- a/stolen_chunk_of_toke.c
+++ b/stolen_chunk_of_toke.c
@@ -17,13 +17,70 @@
  *   up but if it does blame me (Matt S Trout), not the poor original authors
  */
 
-/* the following #defines are stolen from assorted headers, not toke.c */
+/* the following #defines are stolen from assorted headers, not toke.c (mst) */
+
+#define skipspace(a)            S_skipspace(aTHX_ a, 0)
+#define peekspace(a)            S_skipspace(aTHX_ a, 1)
+#define skipspace_force(a)      S_skipspace(aTHX_ a, 2)
+#define incline(a)              S_incline(aTHX_ a)
+#define filter_gets(a,b,c)      S_filter_gets(aTHX_ a,b,c)
+#define scan_str(a,b,c)         S_scan_str(aTHX_ a,b,c)
+#define scan_word(a,b,c,d,e)    S_scan_word(aTHX_ a,b,c,d,e)
+#define scan_ident(a,b,c,d,e)   S_scan_ident(aTHX_ a,b,c,d,e)
+
+STATIC void     S_incline(pTHX_ char *s);
+STATIC char*    S_skipspace(pTHX_ char *s, int incline);
+STATIC char *   S_filter_gets(pTHX_ SV *sv, PerlIO *fp, STRLEN append);
+STATIC char*    S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims);
+STATIC char*    S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp);
 
 #define DPTR2FPTR(t,p) ((t)PTR2nat(p))  /* data pointer to function pointer */
 #define FPTR2DPTR(t,p) ((t)PTR2nat(p))  /* function pointer to data pointer */
-#define MEM_WRAP_CHECK_(n,t) MEM_WRAP_CHECK(n,t),
+#define PTR2nat(p)       (PTRV)(p)       /* pointer to integer of PTRSIZE */
+
+/* conditionalise these two because as of 5.9.5 we already get them from
+   the headers (mst) */
+#ifndef Newx
 #define Newx(v,n,t) (v = (MEM_WRAP_CHECK_(n,t) (t*)safemalloc((MEM_SIZE)((n)*sizeof(t)))))
+#endif
+#ifndef SvPVX_const
+#define SvPVX_const(sv) ((const char*) (0 + SvPVX(sv)))
+#endif
+#ifndef MEM_WRAP_CHECK_
+#define MEM_WRAP_CHECK_(n,t) MEM_WRAP_CHECK(n,t),
+#endif
+
+#define SvPV_renew(sv,n) \
+  STMT_START { SvLEN_set(sv, n); \
+    SvPV_set((sv), (MEM_WRAP_CHECK_(n,char)     \
+        (char*)saferealloc((Malloc_t)SvPVX(sv), \
+               (MEM_SIZE)((n)))));  \
+     } STMT_END
 
+#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
+
+/* On MacOS, respect nonbreaking spaces */
+#ifdef MACOS_TRADITIONAL
+#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
+#else
+#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
+#endif
+
+/*
+ * Normally, during compile time, PL_curcop == &PL_compiling is true. However,
+ * Devel::Declare makes the interpreter call back to perl during compile time,
+ * which temporarily enters runtime. Then perl space calls various functions
+ * from this file, which are designed to work during compile time. They all
+ * happen to operate on PL_curcop, not PL_compiling. That doesn't make a
+ * difference in the core, but it does for Devel::Declare, which operates at
+ * runtime, but still wants to mangle the things that are about to be compiled.
+ * That's why we define our own PL_curcop and make it point to PL_compiling
+ * here.
+ */
+#undef PL_curcop
+#define PL_curcop (&PL_compiling)
+
+#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 
 #define LEX_NORMAL    10 /* normal code (ie not within "...")     */
 #define LEX_INTERPNORMAL   9 /* code within a string, eg "$foo[$x+1]" */
@@ -41,6 +98,111 @@
 #define LEX_FORMLINE     1 /* expecting a format line               */
 #define LEX_KNOWNEXT     0 /* next token known; just return it      */
 
+/* and these two are my own madness (mst) */
+
+#if PERL_REVISION == 5 && PERL_VERSION == 8 && PERL_SUBVERSION >= 8
+#define PERL_5_8_8_PLUS
+#endif
+
+#if PERL_REVISION == 5 && PERL_VERSION > 8
+#define PERL_5_9_PLUS
+#endif
+
+#if !defined(PERL_5_9_PLUS) && defined(PERL_IMPLICIT_CONTEXT)
+/* These two are not exported from the core on Windows.  With 5.9+
+   it's not an issue, because they're part of the PL_parser structure,
+   which is exported.  On multiplicity/thread builds we can work
+   around the lack of export by this formulation, where we provide
+   a substitute implementation of the unexported accessor functions.
+   On single-interpreter builds we can't, because access is directly
+   via symbols that are not exported.  */
+# define Perl_Ilinestart_ptr my_Ilinestart_ptr
+char **my_Ilinestart_ptr(pTHX) { return &(aTHX->Ilinestart); }
+# define Perl_Isublex_info_ptr my_Isublex_info_ptr
+static SUBLEXINFO *my_Isublex_info_ptr(pTHX) { return &(aTHX->Isublex_info); }
+#endif
+
+#ifdef PERL_5_9_PLUS
+/* 5.9+ moves a bunch of things to a PL_parser struct so we need to
+   declare the backcompat macros for things to still work (mst) */
+
+/* XXX temporary backwards compatibility */
+#define PL_lex_brackets         (PL_parser->lex_brackets)
+#define PL_lex_brackstack       (PL_parser->lex_brackstack)
+#define PL_lex_casemods         (PL_parser->lex_casemods)
+#define PL_lex_casestack        (PL_parser->lex_casestack)
+#define PL_lex_defer            (PL_parser->lex_defer)
+#define PL_lex_dojoin           (PL_parser->lex_dojoin)
+#define PL_lex_expect           (PL_parser->lex_expect)
+#define PL_lex_formbrack        (PL_parser->lex_formbrack)
+#define PL_lex_inpat            (PL_parser->lex_inpat)
+#define PL_lex_inwhat           (PL_parser->lex_inwhat)
+#define PL_lex_op               (PL_parser->lex_op)
+#define PL_lex_repl             (PL_parser->lex_repl)
+#define PL_lex_starts           (PL_parser->lex_starts)
+#define PL_lex_stuff            (PL_parser->lex_stuff)
+#define PL_multi_start          (PL_parser->multi_start)
+#define PL_multi_open           (PL_parser->multi_open)
+#define PL_multi_close          (PL_parser->multi_close)
+#define PL_pending_ident        (PL_parser->pending_ident)
+#define PL_preambled            (PL_parser->preambled)
+#define PL_sublex_info          (PL_parser->sublex_info)
+#define PL_linestr              (PL_parser->linestr)
+#define PL_sublex_info          (PL_parser->sublex_info)
+#define PL_linestr              (PL_parser->linestr)
+#define PL_expect               (PL_parser->expect)
+#define PL_copline              (PL_parser->copline)
+#define PL_bufptr               (PL_parser->bufptr)
+#define PL_oldbufptr            (PL_parser->oldbufptr)
+#define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
+#define PL_linestart            (PL_parser->linestart)
+#define PL_bufend               (PL_parser->bufend)
+#define PL_last_uni             (PL_parser->last_uni)
+#define PL_last_lop             (PL_parser->last_lop)
+#define PL_last_lop_op          (PL_parser->last_lop_op)
+#define PL_lex_state            (PL_parser->lex_state)
+#define PL_rsfp                 (PL_parser->rsfp)
+#define PL_rsfp_filters         (PL_parser->rsfp_filters)
+#define PL_in_my                (PL_parser->in_my)
+#define PL_in_my_stash          (PL_parser->in_my_stash)
+#define PL_tokenbuf             (PL_parser->tokenbuf)
+#define PL_multi_end            (PL_parser->multi_end)
+#define PL_error_count          (PL_parser->error_count)
+#define PL_nexttoke           (PL_parser->nexttoke)
+/* these are from the non-PERL_MAD path but I don't -think- I need
+   the PERL_MAD stuff since my code isn't really populating things (mst) */
+# ifdef PERL_MAD
+#  define PL_curforce		(PL_parser->curforce)
+#  define PL_lasttoke		(PL_parser->lasttoke)
+# else
+#  define PL_nexttype           (PL_parser->nexttype)
+#  define PL_nextval            (PL_parser->nextval)
+# endif
+/* end of backcompat macros from 5.9 toke.c (mst) */
+#endif
+
+/* when ccflags include -DDEBUGGING we need this for earlier 5.8 perls */
+#ifndef SvPV_nolen_const
+#define SvPV_nolen_const SvPV_nolen
+#endif
+
+/* Name changed in 5.17; use new name in our code.  Apparently we're meant
+   to use something else instead, but no non-underscored way to achieve
+   this is apparent.  */
+
+#ifndef _is_utf8_mark
+#define _is_utf8_mark is_utf8_mark
+#endif
+
+/* utf8_to_uvchr_buf() not defined in earlier perls, but less-capable
+ * substitute is available */
+
+#ifndef utf8_to_uvchr_buf
+#define utf8_to_uvchr_buf(s, e, lp) ((e), utf8_to_uvchr(s, lp))
+#endif
+
+/* and now we're back to the toke.c stuff again (mst) */
+
 static const char ident_too_long[] =
   "Identifier too long";
 static const char c_without_g[] =
@@ -139,7 +301,7 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
  */
 
 STATIC char *
-S_skipspace(pTHX_ register char *s)
+S_skipspace(pTHX_ register char *s, int incline)
 {
     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
 	while (s < PL_bufend && SPACE_OR_TAB(*s))
@@ -151,7 +313,7 @@ S_skipspace(pTHX_ register char *s)
 	SSize_t oldprevlen, oldoldprevlen;
 	SSize_t oldloplen = 0, oldunilen = 0;
 	while (s < PL_bufend && isSPACE(*s)) {
-	    if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
+	    if (*s++ == '\n' && ((incline == 2) || (PL_in_eval && !PL_rsfp && !incline)))
 		incline(s);
 	}
 
@@ -161,13 +323,21 @@ S_skipspace(pTHX_ register char *s)
 		s++;
 	    if (s < PL_bufend) {
 		s++;
-		if (PL_in_eval && !PL_rsfp) {
+		if (PL_in_eval && !PL_rsfp && !incline) {
 		    incline(s);
 		    continue;
 		}
 	    }
 	}
 
+	/* also skip leading whitespace on the beginning of a line before deciding
+	 * whether or not to recharge the linestr. --rafl
+	 */
+	while (s < PL_bufend && isSPACE(*s)) {
+		if (*s++ == '\n' && PL_in_eval && !PL_rsfp && !incline)
+			incline(s);
+	}
+
 	/* only continue to recharge the buffer if we're at the end
 	 * of the buffer, we're not reading from a source filter, and
 	 * we're in normal lexing mode
@@ -199,16 +369,20 @@ S_skipspace(pTHX_ register char *s)
 	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
 	    PL_last_lop = PL_last_uni = Nullch;
 
-	    /* Close the filehandle.  Could be from -P preprocessor,
+	    /* In perl versions previous to p4-rawid: //depot/perl@32954 -P
+	     * preprocessors were supported here. We don't support -P at all, even
+	     * on perls that support it, and use the following chunk from blead
+	     * perl. (rafl)
+	     */
+
+	    /* Close the filehandle.  Could be from
 	     * STDIN, or a regular file.  If we were reading code from
 	     * STDIN (because the commandline held no -e or filename)
 	     * then we don't close it, we reset it so the code can
 	     * read from STDIN too.
 	     */
 
-	    if (PL_preprocess && !PL_in_eval)
-		(void)PerlProc_pclose(PL_rsfp);
-	    else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
+	    if ((PerlIO*)PL_rsfp == PerlIO_stdin())
 		PerlIO_clearerr(PL_rsfp);
 	    else
 		(void)PerlIO_close(PL_rsfp);
@@ -233,19 +407,22 @@ S_skipspace(pTHX_ register char *s)
 	    PL_last_uni = s + oldunilen;
 	if (PL_last_lop)
 	    PL_last_lop = s + oldloplen;
-	incline(s);
+	if (!incline)
+		incline(s);
 
 	/* debugger active and we're not compiling the debugger code,
 	 * so store the line into the debugger's array of lines
 	 */
 	if (PERLDB_LINE && PL_curstash != PL_debstash) {
-	    SV * const sv = NEWSV(85,0);
-
-	    sv_upgrade(sv, SVt_PVMG);
-	    sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
-            (void)SvIOK_on(sv);
-            SvIV_set(sv, 0);
-	    av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
+	    AV *fileav = CopFILEAV(PL_curcop);
+	    if (fileav) {
+		SV * const sv = NEWSV(85,0);
+		sv_upgrade(sv, SVt_PVMG);
+		sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
+		(void)SvIOK_on(sv);
+		SvIV_set(sv, 0);
+		av_store(fileav,(I32)CopLINE(PL_curcop),sv);
+	    }
 	}
     }
 }
@@ -271,7 +448,7 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag
 	}
 	else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
 	    char *t = s + UTF8SKIP(s);
-	    while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
+	    while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t))
 		t += UTF8SKIP(t);
 	    if (d + (t - s) > e)
 		Perl_croak(aTHX_ ident_too_long);
@@ -341,6 +518,12 @@ S_incline(pTHX_ char *s)
     ch = *t;
     *t = '\0';
     if (t - s > 0) {
+/* this chunk was added to S_incline during 5.8.8. I don't know why but I don't
+   honestly care since I probably want to be bug-compatible anyway (mst) */
+
+/* ... my kingdom for a perl parser in perl ... (mst) */
+
+#ifdef PERL_5_8_8_PLUS
 #ifndef USE_ITHREADS
 	const char *cf = CopFILE(PL_curcop);
 	if (cf && strlen(cf) > 7 && strnEQ(cf, "(eval ", 6)) {
@@ -378,9 +561,510 @@ S_incline(pTHX_ char *s)
 	    if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
 	}
 #endif
+#endif
+/* second endif closes out the "are we 5.8.(8+)" conditional */
 	CopFILE_free(PL_curcop);
 	CopFILE_set(PL_curcop, s);
     }
     *t = ch;
     CopLINE_set(PL_curcop, atoi(n)-1);
 }
+
+/* scan_str
+   takes: start position in buffer
+	  keep_quoted preserve \ on the embedded delimiter(s)
+	  keep_delims preserve the delimiters around the string
+   returns: position to continue reading from buffer
+   side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
+   	updates the read buffer.
+
+   This subroutine pulls a string out of the input.  It is called for:
+   	q		single quotes		q(literal text)
+	'		single quotes		'literal text'
+	qq		double quotes		qq(interpolate $here please)
+	"		double quotes		"interpolate $here please"
+	qx		backticks		qx(/bin/ls -l)
+	`		backticks		`/bin/ls -l`
+	qw		quote words		@EXPORT_OK = qw( func() $spam )
+	m//		regexp match		m/this/
+	s///		regexp substitute	s/this/that/
+	tr///		string transliterate	tr/this/that/
+	y///		string transliterate	y/this/that/
+	($*@)		sub prototypes		sub foo ($)
+	(stuff)		sub attr parameters	sub foo : attr(stuff)
+	<>		readline or globs	<FOO>, <>, <$fh>, or <*.c>
+	
+   In most of these cases (all but <>, patterns and transliterate)
+   yylex() calls scan_str().  m// makes yylex() call scan_pat() which
+   calls scan_str().  s/// makes yylex() call scan_subst() which calls
+   scan_str().  tr/// and y/// make yylex() call scan_trans() which
+   calls scan_str().
+
+   It skips whitespace before the string starts, and treats the first
+   character as the delimiter.  If the delimiter is one of ([{< then
+   the corresponding "close" character )]}> is used as the closing
+   delimiter.  It allows quoting of delimiters, and if the string has
+   balanced delimiters ([{<>}]) it allows nesting.
+
+   On success, the SV with the resulting string is put into lex_stuff or,
+   if that is already non-NULL, into lex_repl. The second case occurs only
+   when parsing the RHS of the special constructs s/// and tr/// (y///).
+   For convenience, the terminating delimiter character is stuffed into
+   SvIVX of the SV.
+*/
+
+STATIC char *
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
+{
+    SV *sv;				/* scalar value: string */
+    char *tmps;				/* temp string, used for delimiter matching */
+    register char *s = start;		/* current position in the buffer */
+    register char term;			/* terminating character */
+    register char *to;			/* current position in the sv's data */
+    I32 brackets = 1;			/* bracket nesting level */
+    bool has_utf8 = FALSE;		/* is there any utf8 content? */
+    I32 termcode;			/* terminating char. code */
+    /* 5.8.7+ uses UTF8_MAXBYTES but also its utf8.h defs _MAXLEN to it so
+       I'm reasonably hopeful this won't destroy anything (mst) */
+    U8 termstr[UTF8_MAXLEN];		/* terminating string */
+    STRLEN termlen;			/* length of terminating string */
+    char *last = NULL;			/* last position for nesting bracket */
+
+    /* skip space before the delimiter */
+    if (isSPACE(*s))
+	s = skipspace(s);
+
+    /* mark where we are, in case we need to report errors */
+    CLINE;
+
+    /* after skipping whitespace, the next character is the terminator */
+    term = *s;
+    if (!UTF) {
+	termcode = termstr[0] = term;
+	termlen = 1;
+    }
+    else {
+	termcode = utf8_to_uvchr_buf((U8*)s, PL_bufend, &termlen);
+	Copy(s, termstr, termlen, U8);
+	if (!UTF8_IS_INVARIANT(term))
+	    has_utf8 = TRUE;
+    }
+
+    /* mark where we are */
+    PL_multi_start = CopLINE(PL_curcop);
+    PL_multi_open = term;
+
+    /* find corresponding closing delimiter */
+    if (term && (tmps = strchr("([{< )]}> )]}>",term)))
+	termcode = termstr[0] = term = tmps[5];
+
+    PL_multi_close = term;
+
+    /* create a new SV to hold the contents.  87 is leak category, I'm
+       assuming.  79 is the SV's initial length.  What a random number. */
+    sv = NEWSV(87,79);
+    sv_upgrade(sv, SVt_PVIV);
+    SvIV_set(sv, termcode);
+    (void)SvPOK_only(sv);		/* validate pointer */
+
+    /* move past delimiter and try to read a complete string */
+    if (keep_delims)
+	sv_catpvn(sv, s, termlen);
+    s += termlen;
+    for (;;) {
+	if (PL_encoding && !UTF) {
+	    bool cont = TRUE;
+
+	    while (cont) {
+		int offset = s - SvPVX_const(PL_linestr);
+		const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
+					   &offset, (char*)termstr, termlen);
+		const char *ns = SvPVX_const(PL_linestr) + offset;
+		char *svlast = SvEND(sv) - 1;
+
+		for (; s < ns; s++) {
+		    if (*s == '\n' && !PL_rsfp)
+			CopLINE_inc(PL_curcop);
+		}
+		if (!found)
+		    goto read_more_line;
+		else {
+		    /* handle quoted delimiters */
+		    if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
+			const char *t;
+			for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
+			    t--;
+			if ((svlast-1 - t) % 2) {
+			    if (!keep_quoted) {
+				*(svlast-1) = term;
+				*svlast = '\0';
+				SvCUR_set(sv, SvCUR(sv) - 1);
+			    }
+			    continue;
+			}
+		    }
+		    if (PL_multi_open == PL_multi_close) {
+			cont = FALSE;
+		    }
+		    else {
+			const char *t;
+			char *w;
+			if (!last)
+			    last = SvPVX(sv);
+			for (t = w = last; t < svlast; w++, t++) {
+			    /* At here, all closes are "was quoted" one,
+			       so we don't check PL_multi_close. */
+			    if (*t == '\\') {
+				if (!keep_quoted && *(t+1) == PL_multi_open)
+				    t++;
+				else
+				    *w++ = *t++;
+			    }
+			    else if (*t == PL_multi_open)
+				brackets++;
+
+			    *w = *t;
+			}
+			if (w < t) {
+			    *w++ = term;
+			    *w = '\0';
+			    SvCUR_set(sv, w - SvPVX_const(sv));
+			}
+			last = w;
+			if (--brackets <= 0)
+			    cont = FALSE;
+		    }
+		}
+	    }
+	    if (!keep_delims) {
+		SvCUR_set(sv, SvCUR(sv) - 1);
+		*SvEND(sv) = '\0';
+	    }
+	    break;
+	}
+
+    	/* extend sv if need be */
+	SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
+	/* set 'to' to the next character in the sv's string */
+	to = SvPVX(sv)+SvCUR(sv);
+
+	/* if open delimiter is the close delimiter read unbridle */
+	if (PL_multi_open == PL_multi_close) {
+	    for (; s < PL_bufend; s++,to++) {
+	    	/* embedded newlines increment the current line number */
+		if (*s == '\n' && !PL_rsfp)
+		    CopLINE_inc(PL_curcop);
+		/* handle quoted delimiters */
+		if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
+		    if (!keep_quoted && s[1] == term)
+			s++;
+		/* any other quotes are simply copied straight through */
+		    else
+			*to++ = *s++;
+		}
+		/* terminate when run out of buffer (the for() condition), or
+		   have found the terminator */
+		else if (*s == term) {
+		    if (termlen == 1)
+			break;
+		    if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
+			break;
+		}
+		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
+		    has_utf8 = TRUE;
+		*to = *s;
+	    }
+	}
+	
+	/* if the terminator isn't the same as the start character (e.g.,
+	   matched brackets), we have to allow more in the quoting, and
+	   be prepared for nested brackets.
+	*/
+	else {
+	    /* read until we run out of string, or we find the terminator */
+	    for (; s < PL_bufend; s++,to++) {
+	    	/* embedded newlines increment the line count */
+		if (*s == '\n' && !PL_rsfp)
+		    CopLINE_inc(PL_curcop);
+		/* backslashes can escape the open or closing characters */
+		if (*s == '\\' && s+1 < PL_bufend) {
+		    if (!keep_quoted &&
+			((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
+			s++;
+		    else
+			*to++ = *s++;
+		}
+		/* allow nested opens and closes */
+		else if (*s == PL_multi_close && --brackets <= 0)
+		    break;
+		else if (*s == PL_multi_open)
+		    brackets++;
+		else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
+		    has_utf8 = TRUE;
+		*to = *s;
+	    }
+	}
+	/* terminate the copied string and update the sv's end-of-string */
+	*to = '\0';
+	SvCUR_set(sv, to - SvPVX_const(sv));
+
+	/*
+	 * this next chunk reads more into the buffer if we're not done yet
+	 */
+
+  	if (s < PL_bufend)
+	    break;		/* handle case where we are done yet :-) */
+
+#ifndef PERL_STRICT_CR
+	if (to - SvPVX_const(sv) >= 2) {
+	    if ((to[-2] == '\r' && to[-1] == '\n') ||
+		(to[-2] == '\n' && to[-1] == '\r'))
+	    {
+		to[-2] = '\n';
+		to--;
+		SvCUR_set(sv, to - SvPVX_const(sv));
+	    }
+	    else if (to[-1] == '\r')
+		to[-1] = '\n';
+	}
+	else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
+	    to[-1] = '\n';
+#endif
+	
+     read_more_line:
+	/* if we're out of file, or a read fails, bail and reset the current
+	   line marker so we can report where the unterminated string began
+	*/
+	if (!PL_rsfp ||
+	 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
+	    sv_free(sv);
+	    CopLINE_set(PL_curcop, (line_t)PL_multi_start);
+	    return Nullch;
+	}
+	/* we read a line, so increment our line counter */
+	CopLINE_inc(PL_curcop);
+
+	/* update debugger info */
+	if (PERLDB_LINE && PL_curstash != PL_debstash) {
+	    AV *fileav = CopFILEAV(PL_curcop);
+	    if (fileav) {
+		SV *sv = NEWSV(88,0);
+		sv_upgrade(sv, SVt_PVMG);
+		sv_setsv(sv,PL_linestr);
+		(void)SvIOK_on(sv);
+		SvIV_set(sv, 0);
+		av_store(fileav, (I32)CopLINE(PL_curcop), sv);
+	    }
+	}
+
+	/* having changed the buffer, we must update PL_bufend */
+	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+	PL_last_lop = PL_last_uni = Nullch;
+    }
+
+    /* at this point, we have successfully read the delimited string */
+
+    if (!PL_encoding || UTF) {
+	if (keep_delims)
+	    sv_catpvn(sv, s, termlen);
+	s += termlen;
+    }
+    if (has_utf8 || PL_encoding)
+	SvUTF8_on(sv);
+
+    PL_multi_end = CopLINE(PL_curcop);
+
+    /* if we allocated too much space, give some back */
+    if (SvCUR(sv) + 5 < SvLEN(sv)) {
+	SvLEN_set(sv, SvCUR(sv) + 1);
+/* 5.8.8 uses SvPV_renew, no prior version actually has the damn thing (mst) */
+#ifdef PERL_5_8_8_PLUS
+	SvPV_renew(sv, SvLEN(sv));
+#else
+	Renew(SvPVX(sv), SvLEN(sv), char);
+#endif
+    }
+
+    /* decide whether this is the first or second quoted string we've read
+       for this op
+    */
+
+    if (PL_lex_stuff)
+	PL_lex_repl = sv;
+    else
+	PL_lex_stuff = sv;
+    return s;
+}
+
+#define XFAKEBRACK 128
+
+STATIC char *
+S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
+{
+    register char *d;
+    register char *e;
+    char *bracket = Nullch;
+    char funny = *s++;
+
+    if (isSPACE(*s))
+	s = skipspace(s);
+    d = dest;
+    e = d + destlen - 3;	/* two-character token, ending NUL */
+    if (isDIGIT(*s)) {
+	while (isDIGIT(*s)) {
+	    if (d >= e)
+		Perl_croak(aTHX_ ident_too_long);
+	    *d++ = *s++;
+	}
+    }
+    else {
+	for (;;) {
+	    if (d >= e)
+		Perl_croak(aTHX_ ident_too_long);
+	    if (isALNUM(*s))	/* UTF handled below */
+		*d++ = *s++;
+	    else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
+		*d++ = ':';
+		*d++ = ':';
+		s++;
+	    }
+	    else if (*s == ':' && s[1] == ':') {
+		*d++ = *s++;
+		*d++ = *s++;
+	    }
+	    else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
+		char *t = s + UTF8SKIP(s);
+		while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t))
+		    t += UTF8SKIP(t);
+		if (d + (t - s) > e)
+		    Perl_croak(aTHX_ ident_too_long);
+		Copy(s, d, t - s, char);
+		d += t - s;
+		s = t;
+	    }
+	    else
+		break;
+	}
+    }
+    *d = '\0';
+    d = dest;
+    if (*d) {
+	if (PL_lex_state != LEX_NORMAL)
+	    PL_lex_state = LEX_INTERPENDMAYBE;
+	return s;
+    }
+    if (*s == '$' && s[1] &&
+	(isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
+    {
+	return s;
+    }
+    if (*s == '{') {
+	bracket = s;
+	s++;
+    } else if (ck_uni) {
+       /* we always call this with ck_uni == 0, so no need for check_uni() */
+       /* check_uni(); */
+    }
+    if (s < send)
+	*d = *s++;
+    d[1] = '\0';
+    if (*d == '^' && *s && isCONTROLVAR(*s)) {
+	*d = toCTRL(*s);
+	s++;
+    }
+    if (bracket) {
+	if (isSPACE(s[-1])) {
+	    while (s < send) {
+		const char ch = *s++;
+		if (!SPACE_OR_TAB(ch)) {
+		    *d = ch;
+		    break;
+		}
+	    }
+	}
+	if (isIDFIRST_lazy_if(d,UTF)) {
+	    d++;
+	    if (UTF) {
+		e = s;
+		while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
+		    e += UTF8SKIP(e);
+		    while (e < send && UTF8_IS_CONTINUED(*e) && _is_utf8_mark((U8*)e))
+			e += UTF8SKIP(e);
+		}
+		Copy(s, d, e - s, char);
+		d += e - s;
+		s = e;
+	    }
+	    else {
+		while ((isALNUM(*s) || *s == ':') && d < e)
+		    *d++ = *s++;
+		if (d >= e)
+		    Perl_croak(aTHX_ ident_too_long);
+	    }
+	    *d = '\0';
+	    while (s < send && SPACE_OR_TAB(*s)) s++;
+	    if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
+		/* we don't want perl to guess what is meant. the keyword
+		 * parser decides that later. (rafl)
+		 */
+		/*
+		if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
+		    const char *brack = *s == '[' ? "[...]" : "{...}";
+		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+			"Ambiguous use of %c{%s%s} resolved to %c%s%s",
+			funny, dest, brack, funny, dest, brack);
+		}
+		*/
+		bracket++;
+		PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
+		return s;
+	    }
+	}
+	/* Handle extended ${^Foo} variables
+	 * 1999-02-27 mjd-perl-patch@plover.com */
+	else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
+		 && isALNUM(*s))
+	{
+	    d++;
+	    while (isALNUM(*s) && d < e) {
+		*d++ = *s++;
+	    }
+	    if (d >= e)
+		Perl_croak(aTHX_ ident_too_long);
+	    *d = '\0';
+	}
+	if (*s == '}') {
+	    s++;
+	    if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
+		PL_lex_state = LEX_INTERPEND;
+		PL_expect = XREF;
+	    }
+	    if (funny == '#')
+		funny = '@';
+	    /* we don't want perl to guess what is meant. the keyword
+	     * parser decides that later. (rafl)
+	     */
+	    /*
+	    if (PL_lex_state == LEX_NORMAL) {
+		if (ckWARN(WARN_AMBIGUOUS) &&
+		    (keyword(dest, d - dest) || get_cv(dest, FALSE)))
+		{
+		    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+			"Ambiguous use of %c{%s} resolved to %c%s",
+			funny, dest, funny, dest);
+		}
+	    }
+	    */
+	}
+	else {
+	    s = bracket;		/* let the parser handle it */
+	    *dest = '\0';
+	}
+    }
+    /* don't intuit. we really just want the string. (rafl) */
+    /*
+    else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
+	PL_lex_state = LEX_INTERPEND;
+    */
+    return s;
+}