[p5sagit/Function-Parameters.git] / toke_on_crack.c.inc

/*
 * This code was copied from perl/toke.c and subsequently butchered
 * by Lukas Mai (2012).
 */

/* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */
#define PL_linestr              (PL_parser->linestr)
#define PL_copline              (PL_parser->copline)
#define PL_bufptr               (PL_parser->bufptr)
#define PL_bufend               (PL_parser->bufend)
#define PL_multi_start          (PL_parser->multi_start)
#define PL_multi_open           (PL_parser->multi_open)
#define PL_multi_close          (PL_parser->multi_close)
#define PL_multi_end            (PL_parser->multi_end)
#define PL_rsfp                 (PL_parser->rsfp)

#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))

#ifdef USE_UTF8_SCRIPTS
#   define UTF (!IN_BYTES)
#else
#   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
#endif

static STRLEN S_scan_word(const char *start, int allow_package) {
	const char *s = start;
	for (;;) {
		if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) {  /* UTF handled below */
			s++;
		} else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, UTF)) {
			s++;
		} else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, UTF)) {
			s += 2;
		} else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
			do {
				s += UTF8SKIP(s);
			} while (UTF8_IS_CONTINUED(*s) && is_utf8_mark((U8*)s));
		} else {
			return s - start;
		}
	}
}

static char *S_scan_str(pTHX_ SV *sv, int keep_quoted, int keep_delims) {
	dVAR;
	char *start = PL_bufptr;
	const char *tmps;                   /* temp string, used for delimiter matching */
	char *s = start;                    /* current position in the buffer */
	char term;                          /* terminating character */
	char *to;                           /* current position in the sv's data */
	I32 brackets = 1;                   /* bracket nesting level */
	bool has_utf8 = FALSE;              /* is there any utf8 content? */
	I32 termcode;                       /* terminating char. code */
	U8 termstr[UTF8_MAXBYTES];          /* terminating string */
	STRLEN termlen;                     /* length of terminating string */
	int last_off = 0;                   /* last position for nesting bracket */

	/* XXX ATTENTION: we don't skip whitespace! */

	/* mark where we are, in case we need to report errors */
	CLINE;

	/* after skipping whitespace, the next character is the terminator */
	term = *s;
	if (!UTF) {
		termcode = termstr[0] = term;
		termlen = 1;
	}
	else {
		termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
		Copy(s, termstr, termlen, U8);
		if (!UTF8_IS_INVARIANT(term))
			has_utf8 = TRUE;
	}

	/* mark where we are */
	PL_multi_start = CopLINE(PL_curcop);
	PL_multi_open = term;

	/* find corresponding closing delimiter */
	if (term && (tmps = strchr("([{< )]}> )]}>",term)))
		termcode = termstr[0] = term = tmps[5];

	PL_multi_close = term;

	{
		STRLEN dummy;
		SvPV_force(sv, dummy);
		sv_setpvs(sv, "");
		SvGROW(sv, 80);
	}

	/* move past delimiter and try to read a complete string */
	if (keep_delims)
		sv_catpvn(sv, s, termlen);
	s += termlen;
	for (;;) {
		if (PL_encoding && !UTF) {
			bool cont = TRUE;

			while (cont) {
				int offset = s - SvPVX_const(PL_linestr);
				const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
				                                 &offset, (char*)termstr, termlen);
				const char * const ns = SvPVX_const(PL_linestr) + offset;
				char * const svlast = SvEND(sv) - 1;

				for (; s < ns; s++) {
					if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
						CopLINE_inc(PL_curcop);
				}
				if (!found)
					goto read_more_line;
				else {
					/* handle quoted delimiters */
					if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
						const char *t;
						for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
							t--;
						if ((svlast-1 - t) % 2) {
							if (!keep_quoted) {
								*(svlast-1) = term;
								*svlast = '\0';
								SvCUR_set(sv, SvCUR(sv) - 1);
							}
							continue;
						}
					}
					if (PL_multi_open == PL_multi_close) {
						cont = FALSE;
					}
					else {
						const char *t;
						char *w;
						for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
							/* At here, all closes are "was quoted" one,
							   so we don't check PL_multi_close. */
							if (*t == '\\') {
								if (!keep_quoted && *(t+1) == PL_multi_open)
									t++;
								else
									*w++ = *t++;
							}
							else if (*t == PL_multi_open)
								brackets++;

							*w = *t;
						}
						if (w < t) {
							*w++ = term;
							*w = '\0';
							SvCUR_set(sv, w - SvPVX_const(sv));
						}
						last_off = w - SvPVX(sv);
						if (--brackets <= 0)
							cont = FALSE;
					}
				}
			}
			if (!keep_delims) {
				SvCUR_set(sv, SvCUR(sv) - 1);
				*SvEND(sv) = '\0';
			}
			break;
		}

		/* extend sv if need be */
		SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
		/* set 'to' to the next character in the sv's string */
		to = SvPVX(sv)+SvCUR(sv);

		/* if open delimiter is the close delimiter read unbridle */
		if (PL_multi_open == PL_multi_close) {
			for (; s < PL_bufend; s++,to++) {
				/* embedded newlines increment the current line number */
				if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
					CopLINE_inc(PL_curcop);
				/* handle quoted delimiters */
				if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
					if (!keep_quoted && s[1] == term)
						s++;
				/* any other quotes are simply copied straight through */
					else
						*to++ = *s++;
				}
				/* terminate when run out of buffer (the for() condition), or
				   have found the terminator */
				else if (*s == term) {
					if (termlen == 1)
						break;
					if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
						break;
				}
				else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
					has_utf8 = TRUE;
				*to = *s;
			}
		}

		/* if the terminator isn't the same as the start character (e.g.,
		   matched brackets), we have to allow more in the quoting, and
		   be prepared for nested brackets.
		*/
		else {
			/* read until we run out of string, or we find the terminator */
			for (; s < PL_bufend; s++,to++) {
				/* embedded newlines increment the line count */
				if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
					CopLINE_inc(PL_curcop);
				/* backslashes can escape the open or closing characters */
				if (*s == '\\' && s+1 < PL_bufend) {
					if (!keep_quoted &&
						((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
						s++;
					else
						*to++ = *s++;
				}
				/* allow nested opens and closes */
				else if (*s == PL_multi_close && --brackets <= 0)
					break;
				else if (*s == PL_multi_open)
					brackets++;
				else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
					has_utf8 = TRUE;
				*to = *s;
			}
		}
		/* terminate the copied string and update the sv's end-of-string */
		*to = '\0';
		SvCUR_set(sv, to - SvPVX_const(sv));

		/*
		 * this next chunk reads more into the buffer if we're not done yet
		 */

		if (s < PL_bufend)
			break;              /* handle case where we are done yet :-) */

#ifndef PERL_STRICT_CR
		if (to - SvPVX_const(sv) >= 2) {
			if ((to[-2] == '\r' && to[-1] == '\n') ||
			    (to[-2] == '\n' && to[-1] == '\r'))
			{
				to[-2] = '\n';
				to--;
				SvCUR_set(sv, to - SvPVX_const(sv));
			}
			else if (to[-1] == '\r')
				to[-1] = '\n';
		}
		else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
			to[-1] = '\n';
#endif

	 read_more_line:
		/* if we're out of file, or a read fails, bail and reset the current
		   line marker so we can report where the unterminated string began
		*/
		CopLINE_inc(PL_curcop);
		PL_bufptr = PL_bufend;
		if (!lex_next_chunk(0)) {
			CopLINE_set(PL_curcop, (line_t)PL_multi_start);
			return NULL;
		}
		s = PL_bufptr;
	}

	/* at this point, we have successfully read the delimited string */

	if (!PL_encoding || UTF) {
		if (keep_delims)
			sv_catpvn(sv, s, termlen);
		s += termlen;
	}
	if (has_utf8 || PL_encoding)
		SvUTF8_on(sv);

	PL_multi_end = CopLINE(PL_curcop);

	/* if we allocated too much space, give some back */
	if (SvCUR(sv) + 5 < SvLEN(sv)) {
		SvLEN_set(sv, SvCUR(sv) + 1);
		SvPV_renew(sv, SvLEN(sv));
	}

	PL_bufptr = s;
	return s;
}
/* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */
Commit	Line	Data
db81d362	1	/*
	2	* This code was copied from perl/toke.c and subsequently butchered
	3	* by Lukas Mai (2012).
	4	*/
	5
	6	/* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */
	7	#define PL_linestr (PL_parser->linestr)
	8	#define PL_copline (PL_parser->copline)
	9	#define PL_bufptr (PL_parser->bufptr)
	10	#define PL_bufend (PL_parser->bufend)
	11	#define PL_multi_start (PL_parser->multi_start)
	12	#define PL_multi_open (PL_parser->multi_open)
	13	#define PL_multi_close (PL_parser->multi_close)
	14	#define PL_multi_end (PL_parser->multi_end)
	15	#define PL_rsfp (PL_parser->rsfp)
	16
	17	#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
	18
	19	#ifdef USE_UTF8_SCRIPTS
	20	# define UTF (!IN_BYTES)
	21	#else
	22	# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) \|\| (PL_hints & HINT_UTF8))
	23	#endif
	24
	25	static STRLEN S_scan_word(const char *start, int allow_package) {
	26	const char *s = start;
	27	for (;;) {
	28	if (isALNUM(s) \|\| (!UTF && isALNUMC_L1(s))) { /* UTF handled below */
	29	s++;
	30	} else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, UTF)) {
	31	s++;
	32	} else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, UTF)) {
	33	s += 2;
	34	} else if (UTF && UTF8_IS_START(s) && isALNUM_utf8((U8)s)) {
	35	do {
	36	s += UTF8SKIP(s);
	37	} while (UTF8_IS_CONTINUED(s) && is_utf8_mark((U8)s));
	38	} else {
	39	return s - start;
	40	}
	41	}
	42	}
	43
	44	static char S_scan_str(pTHX_ SV sv, int keep_quoted, int keep_delims) {
	45	dVAR;
	46	char *start = PL_bufptr;
	47	const char tmps; / temp string, used for delimiter matching */
	48	char s = start; / current position in the buffer */
	49	char term; /* terminating character */
	50	char to; / current position in the sv's data */
	51	I32 brackets = 1; /* bracket nesting level */
	52	bool has_utf8 = FALSE; /* is there any utf8 content? */
	53	I32 termcode; /* terminating char. code */
	54	U8 termstr[UTF8_MAXBYTES]; /* terminating string */
	55	STRLEN termlen; /* length of terminating string */
	56	int last_off = 0; /* last position for nesting bracket */
	57
	58	/* XXX ATTENTION: we don't skip whitespace! */
	59
	60	/* mark where we are, in case we need to report errors */
	61	CLINE;
	62
	63	/* after skipping whitespace, the next character is the terminator */
	64	term = *s;
65	if (!UTF) {
66	termcode = termstr[0] = term;
67	termlen = 1;
68	}
69	else {
70	termcode = utf8_to_uvchr_buf((U8)s, (U8)PL_bufend, &termlen);
71	Copy(s, termstr, termlen, U8);
72	if (!UTF8_IS_INVARIANT(term))
73	has_utf8 = TRUE;
74	}
75
76	/* mark where we are */
77	PL_multi_start = CopLINE(PL_curcop);
78	PL_multi_open = term;
79
80	/* find corresponding closing delimiter */
81	if (term && (tmps = strchr("([{< )]}> )]}>",term)))
82	termcode = termstr[0] = term = tmps[5];
83
84	PL_multi_close = term;
85
86	{
87	STRLEN dummy;
88	SvPV_force(sv, dummy);
89	sv_setpvs(sv, "");
90	SvGROW(sv, 80);
91	}
92
93	/* move past delimiter and try to read a complete string */
94	if (keep_delims)
95	sv_catpvn(sv, s, termlen);
96	s += termlen;
97	for (;;) {
98	if (PL_encoding && !UTF) {
99	bool cont = TRUE;
100
101	while (cont) {
102	int offset = s - SvPVX_const(PL_linestr);
103	const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
104	&offset, (char*)termstr, termlen);
105	const char * const ns = SvPVX_const(PL_linestr) + offset;
106	char * const svlast = SvEND(sv) - 1;
107
108	for (; s < ns; s++) {
109	if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
110	CopLINE_inc(PL_curcop);
111	}
112	if (!found)
113	goto read_more_line;
114	else {
115	/* handle quoted delimiters */
116	if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
117	const char *t;
118	for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
119	t--;
120	if ((svlast-1 - t) % 2) {
121	if (!keep_quoted) {
122	*(svlast-1) = term;
123	*svlast = '\0';
124	SvCUR_set(sv, SvCUR(sv) - 1);
125	}
126	continue;
127	}
128	}
129	if (PL_multi_open == PL_multi_close) {
130	cont = FALSE;
131	}
132	else {
133	const char *t;
134	char *w;
135	for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
136	/* At here, all closes are "was quoted" one,
137	so we don't check PL_multi_close. */
138	if (*t == '\\') {
139	if (!keep_quoted && *(t+1) == PL_multi_open)
140	t++;
141	else
142	w++ = t++;
143	}
144	else if (*t == PL_multi_open)
145	brackets++;
146
147	w = t;
148	}
149	if (w < t) {
150	*w++ = term;
151	*w = '\0';
152	SvCUR_set(sv, w - SvPVX_const(sv));
153	}
154	last_off = w - SvPVX(sv);
155	if (--brackets <= 0)
156	cont = FALSE;
157	}
158	}
159	}
160	if (!keep_delims) {
161	SvCUR_set(sv, SvCUR(sv) - 1);
162	*SvEND(sv) = '\0';
163	}
164	break;
165	}
166
167	/* extend sv if need be */
168	SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
169	/* set 'to' to the next character in the sv's string */
170	to = SvPVX(sv)+SvCUR(sv);
171
172	/* if open delimiter is the close delimiter read unbridle */
173	if (PL_multi_open == PL_multi_close) {
174	for (; s < PL_bufend; s++,to++) {
175	/* embedded newlines increment the current line number */
176	if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
177	CopLINE_inc(PL_curcop);
178	/* handle quoted delimiters */
179	if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
180	if (!keep_quoted && s[1] == term)
181	s++;
182	/* any other quotes are simply copied straight through */
183	else
184	to++ = s++;
185	}
186	/* terminate when run out of buffer (the for() condition), or
187	have found the terminator */
188	else if (*s == term) {
189	if (termlen == 1)
190	break;
191	if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
192	break;
193	}
194	else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
195	has_utf8 = TRUE;
196	to = s;
197	}
198	}
199
200	/* if the terminator isn't the same as the start character (e.g.,
201	matched brackets), we have to allow more in the quoting, and
202	be prepared for nested brackets.
203	*/
204	else {
205	/* read until we run out of string, or we find the terminator */
206	for (; s < PL_bufend; s++,to++) {
207	/* embedded newlines increment the line count */
208	if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
209	CopLINE_inc(PL_curcop);
210	/* backslashes can escape the open or closing characters */
211	if (*s == '\\' && s+1 < PL_bufend) {
212	if (!keep_quoted &&
213	((s[1] == PL_multi_open) \|\| (s[1] == PL_multi_close)))
214	s++;
215	else
216	to++ = s++;
217	}
218	/* allow nested opens and closes */
219	else if (*s == PL_multi_close && --brackets <= 0)
220	break;
221	else if (*s == PL_multi_open)
222	brackets++;
223	else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
224	has_utf8 = TRUE;
225	to = s;
226	}
227	}
228	/* terminate the copied string and update the sv's end-of-string */
229	*to = '\0';
230	SvCUR_set(sv, to - SvPVX_const(sv));
231
232	/*
233	* this next chunk reads more into the buffer if we're not done yet
234	*/
235
236	if (s < PL_bufend)
237	break; /* handle case where we are done yet :-) */
238
239	#ifndef PERL_STRICT_CR
240	if (to - SvPVX_const(sv) >= 2) {
241	if ((to[-2] == '\r' && to[-1] == '\n') \|\|
242	(to[-2] == '\n' && to[-1] == '\r'))
243	{
244	to[-2] = '\n';
245	to--;
246	SvCUR_set(sv, to - SvPVX_const(sv));
247	}
248	else if (to[-1] == '\r')
249	to[-1] = '\n';
250	}
251	else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
252	to[-1] = '\n';
253	#endif
254
255	read_more_line:
256	/* if we're out of file, or a read fails, bail and reset the current
257	line marker so we can report where the unterminated string began
258	*/
259	CopLINE_inc(PL_curcop);
260	PL_bufptr = PL_bufend;
261	if (!lex_next_chunk(0)) {
262	CopLINE_set(PL_curcop, (line_t)PL_multi_start);
263	return NULL;
264	}
265	s = PL_bufptr;
266	}
267
268	/* at this point, we have successfully read the delimited string */
269
270	if (!PL_encoding \|\| UTF) {
271	if (keep_delims)
272	sv_catpvn(sv, s, termlen);
273	s += termlen;
274	}
275	if (has_utf8 \|\| PL_encoding)
276	SvUTF8_on(sv);
277
278	PL_multi_end = CopLINE(PL_curcop);
279
280	/* if we allocated too much space, give some back */
281	if (SvCUR(sv) + 5 < SvLEN(sv)) {
282	SvLEN_set(sv, SvCUR(sv) + 1);
283	SvPV_renew(sv, SvLEN(sv));
284	}
285
286	PL_bufptr = s;
287	return s;
288	}
289	/* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */