[p5sagit/Function-Parameters.git] / toke_on_crack.c.inc

/*
 * This code was copied from perl/toke.c and subsequently butchered
 * by Lukas Mai (2012).
 */
/* vi: set ft=c: */

/* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */
#define PL_linestr              (PL_parser->linestr)
#define PL_copline              (PL_parser->copline)
#define PL_bufptr               (PL_parser->bufptr)
#define PL_bufend               (PL_parser->bufend)
#define PL_multi_start          (PL_parser->multi_start)
#define PL_multi_open           (PL_parser->multi_open)
#define PL_multi_close          (PL_parser->multi_close)
#define PL_multi_end            (PL_parser->multi_end)
#define PL_rsfp                 (PL_parser->rsfp)

#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))

#ifdef USE_UTF8_SCRIPTS
#   define UTF (!IN_BYTES)
#else
#   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
#endif

static STRLEN S_scan_word(pTHX_ const char *start, int allow_package) {
	const char *s = start;
	for (;;) {
		if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) {  /* UTF handled below */
			s++;
		} else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, UTF)) {
			s++;
		} else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, UTF)) {
			s += 2;
		} else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
			do {
				s += UTF8SKIP(s);
			} while (UTF8_IS_CONTINUED(*s) && is_utf8_mark((U8*)s));
		} else {
			return s - start;
		}
	}
}

static char *S_scan_str(pTHX_ SV *sv, int keep_quoted, int keep_delims) {
	dVAR;
	char *start = PL_bufptr;
	const char *tmps;                   /* temp string, used for delimiter matching */
	char *s = start;                    /* current position in the buffer */
	char term;                          /* terminating character */
	char *to;                           /* current position in the sv's data */
	I32 brackets = 1;                   /* bracket nesting level */
	bool has_utf8 = FALSE;              /* is there any utf8 content? */
	I32 termcode;                       /* terminating char. code */
	U8 termstr[UTF8_MAXBYTES];          /* terminating string */
	STRLEN termlen;                     /* length of terminating string */
	int last_off = 0;                   /* last position for nesting bracket */

	/* XXX ATTENTION: we don't skip whitespace! */

	/* mark where we are, in case we need to report errors */
	CLINE;

	/* after skipping whitespace, the next character is the terminator */
	term = *s;
	if (!UTF) {
		termcode = termstr[0] = term;
		termlen = 1;
	}
	else {
#if HAVE_PERL_VERSION(5, 16, 0)
		termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
#else
		termcode = utf8_to_uvchr((U8*)s, &termlen);
#endif
		Copy(s, termstr, termlen, U8);
		if (!UTF8_IS_INVARIANT(term))
			has_utf8 = TRUE;
	}

	/* mark where we are */
	PL_multi_start = CopLINE(PL_curcop);
	PL_multi_open = term;

	/* find corresponding closing delimiter */
	if (term && (tmps = strchr("([{< )]}> )]}>",term)))
		termcode = termstr[0] = term = tmps[5];

	PL_multi_close = term;

	{
		STRLEN dummy;
		SvPV_force(sv, dummy);
		sv_setpvs(sv, "");
		SvGROW(sv, 80);
	}

	/* move past delimiter and try to read a complete string */
	if (keep_delims)
		sv_catpvn(sv, s, termlen);
	s += termlen;
	for (;;) {
		if (PL_encoding && !UTF) {
			bool cont = TRUE;

			while (cont) {
				int offset = s - SvPVX_const(PL_linestr);
				const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
				                                 &offset, (char*)termstr, termlen);
				const char * const ns = SvPVX_const(PL_linestr) + offset;
				char * const svlast = SvEND(sv) - 1;

				for (; s < ns; s++) {
					if (*s == '\n' && !PL_rsfp
#if HAVE_PERL_VERSION(5, 16, 0)
						&& !PL_parser->filtered
#endif
					)
						CopLINE_inc(PL_curcop);
				}
				if (!found)
					goto read_more_line;
				else {
					/* handle quoted delimiters */
					if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
						const char *t;
						for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
							t--;
						if ((svlast-1 - t) % 2) {
							if (!keep_quoted) {
								*(svlast-1) = term;
								*svlast = '\0';
								SvCUR_set(sv, SvCUR(sv) - 1);
							}
							continue;
						}
					}
					if (PL_multi_open == PL_multi_close) {
						cont = FALSE;
					}
					else {
						const char *t;
						char *w;
						for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
							/* At here, all closes are "was quoted" one,
							   so we don't check PL_multi_close. */
							if (*t == '\\') {
								if (!keep_quoted && *(t+1) == PL_multi_open)
									t++;
								else
									*w++ = *t++;
							}
							else if (*t == PL_multi_open)
								brackets++;

							*w = *t;
						}
						if (w < t) {
							*w++ = term;
							*w = '\0';
							SvCUR_set(sv, w - SvPVX_const(sv));
						}
						last_off = w - SvPVX(sv);
						if (--brackets <= 0)
							cont = FALSE;
					}
				}
			}
			if (!keep_delims) {
				SvCUR_set(sv, SvCUR(sv) - 1);
				*SvEND(sv) = '\0';
			}
			break;
		}

		/* extend sv if need be */
		SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
		/* set 'to' to the next character in the sv's string */
		to = SvPVX(sv)+SvCUR(sv);

		/* if open delimiter is the close delimiter read unbridle */
		if (PL_multi_open == PL_multi_close) {
			for (; s < PL_bufend; s++,to++) {
				/* embedded newlines increment the current line number */
				if (*s == '\n' && !PL_rsfp
#if HAVE_PERL_VERSION(5, 16, 0)
					&& !PL_parser->filtered
#endif
				)
					CopLINE_inc(PL_curcop);
				/* handle quoted delimiters */
				if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
					if (!keep_quoted && s[1] == term)
						s++;
				/* any other quotes are simply copied straight through */
					else
						*to++ = *s++;
				}
				/* terminate when run out of buffer (the for() condition), or
				   have found the terminator */
				else if (*s == term) {
					if (termlen == 1)
						break;
					if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
						break;
				}
				else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
					has_utf8 = TRUE;
				*to = *s;
			}
		}

		/* if the terminator isn't the same as the start character (e.g.,
		   matched brackets), we have to allow more in the quoting, and
		   be prepared for nested brackets.
		*/
		else {
			/* read until we run out of string, or we find the terminator */
			for (; s < PL_bufend; s++,to++) {
				/* embedded newlines increment the line count */
				if (*s == '\n' && !PL_rsfp
#if HAVE_PERL_VERSION(5, 16, 0)
					&& !PL_parser->filtered
#endif
				)
					CopLINE_inc(PL_curcop);
				/* backslashes can escape the open or closing characters */
				if (*s == '\\' && s+1 < PL_bufend) {
					if (!keep_quoted &&
						((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
						s++;
					else
						*to++ = *s++;
				}
				/* allow nested opens and closes */
				else if (*s == PL_multi_close && --brackets <= 0)
					break;
				else if (*s == PL_multi_open)
					brackets++;
				else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
					has_utf8 = TRUE;
				*to = *s;
			}
		}
		/* terminate the copied string and update the sv's end-of-string */
		*to = '\0';
		SvCUR_set(sv, to - SvPVX_const(sv));

		/*
		 * this next chunk reads more into the buffer if we're not done yet
		 */

		if (s < PL_bufend)
			break;              /* handle case where we are done yet :-) */

#ifndef PERL_STRICT_CR
		if (to - SvPVX_const(sv) >= 2) {
			if ((to[-2] == '\r' && to[-1] == '\n') ||
			    (to[-2] == '\n' && to[-1] == '\r'))
			{
				to[-2] = '\n';
				to--;
				SvCUR_set(sv, to - SvPVX_const(sv));
			}
			else if (to[-1] == '\r')
				to[-1] = '\n';
		}
		else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
			to[-1] = '\n';
#endif

	 read_more_line:
		/* if we're out of file, or a read fails, bail and reset the current
		   line marker so we can report where the unterminated string began
		*/
		CopLINE_inc(PL_curcop);
		PL_bufptr = PL_bufend;
		if (!lex_next_chunk(0)) {
			CopLINE_set(PL_curcop, (line_t)PL_multi_start);
			return NULL;
		}
		s = PL_bufptr;
	}

	/* at this point, we have successfully read the delimited string */

	if (!PL_encoding || UTF) {
		if (keep_delims)
			sv_catpvn(sv, s, termlen);
		s += termlen;
	}
	if (has_utf8 || PL_encoding)
		SvUTF8_on(sv);

	PL_multi_end = CopLINE(PL_curcop);

	/* if we allocated too much space, give some back */
	if (SvCUR(sv) + 5 < SvLEN(sv)) {
		SvLEN_set(sv, SvCUR(sv) + 1);
		SvPV_renew(sv, SvLEN(sv));
	}

	PL_bufptr = s;
	return s;
}
/* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */
Commit	Line	Data
db81d362	1	/*
	2	* This code was copied from perl/toke.c and subsequently butchered
	3	* by Lukas Mai (2012).
	4	*/
7dd35535	5	/* vi: set ft=c: */
db81d362	6
	7	/* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */
	8	#define PL_linestr (PL_parser->linestr)
	9	#define PL_copline (PL_parser->copline)
	10	#define PL_bufptr (PL_parser->bufptr)
	11	#define PL_bufend (PL_parser->bufend)
	12	#define PL_multi_start (PL_parser->multi_start)
	13	#define PL_multi_open (PL_parser->multi_open)
	14	#define PL_multi_close (PL_parser->multi_close)
	15	#define PL_multi_end (PL_parser->multi_end)
	16	#define PL_rsfp (PL_parser->rsfp)
	17
	18	#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
	19
	20	#ifdef USE_UTF8_SCRIPTS
	21	# define UTF (!IN_BYTES)
	22	#else
	23	# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) \|\| (PL_hints & HINT_UTF8))
	24	#endif
	25
7dd35535	26	static STRLEN S_scan_word(pTHX_ const char *start, int allow_package) {
db81d362	27	const char *s = start;
	28	for (;;) {
	29	if (isALNUM(s) \|\| (!UTF && isALNUMC_L1(s))) { /* UTF handled below */
	30	s++;
	31	} else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, UTF)) {
	32	s++;
	33	} else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, UTF)) {
	34	s += 2;
	35	} else if (UTF && UTF8_IS_START(s) && isALNUM_utf8((U8)s)) {
	36	do {
	37	s += UTF8SKIP(s);
	38	} while (UTF8_IS_CONTINUED(s) && is_utf8_mark((U8)s));
	39	} else {
	40	return s - start;
	41	}
	42	}
	43	}
	44
	45	static char S_scan_str(pTHX_ SV sv, int keep_quoted, int keep_delims) {
	46	dVAR;
	47	char *start = PL_bufptr;
	48	const char tmps; / temp string, used for delimiter matching */
	49	char s = start; / current position in the buffer */
	50	char term; /* terminating character */
	51	char to; / current position in the sv's data */
	52	I32 brackets = 1; /* bracket nesting level */
	53	bool has_utf8 = FALSE; /* is there any utf8 content? */
	54	I32 termcode; /* terminating char. code */
	55	U8 termstr[UTF8_MAXBYTES]; /* terminating string */
	56	STRLEN termlen; /* length of terminating string */
	57	int last_off = 0; /* last position for nesting bracket */
	58
	59	/* XXX ATTENTION: we don't skip whitespace! */
	60
	61	/* mark where we are, in case we need to report errors */
	62	CLINE;
	63
	64	/* after skipping whitespace, the next character is the terminator */
	65	term = *s;
	66	if (!UTF) {
	67	termcode = termstr[0] = term;
	68	termlen = 1;
	69	}
	70	else {
7dd35535	71	#if HAVE_PERL_VERSION(5, 16, 0)
db81d362	72	termcode = utf8_to_uvchr_buf((U8)s, (U8)PL_bufend, &termlen);
7dd35535	73	#else
	74	termcode = utf8_to_uvchr((U8*)s, &termlen);
	75	#endif
db81d362	76	Copy(s, termstr, termlen, U8);
	77	if (!UTF8_IS_INVARIANT(term))
	78	has_utf8 = TRUE;
	79	}
	80
	81	/* mark where we are */
	82	PL_multi_start = CopLINE(PL_curcop);
	83	PL_multi_open = term;
	84
	85	/* find corresponding closing delimiter */
	86	if (term && (tmps = strchr("([{< )]}> )]}>",term)))
	87	termcode = termstr[0] = term = tmps[5];
	88
	89	PL_multi_close = term;
	90
	91	{
	92	STRLEN dummy;
	93	SvPV_force(sv, dummy);
	94	sv_setpvs(sv, "");
	95	SvGROW(sv, 80);
	96	}
	97
	98	/* move past delimiter and try to read a complete string */
	99	if (keep_delims)
	100	sv_catpvn(sv, s, termlen);
	101	s += termlen;
	102	for (;;) {
	103	if (PL_encoding && !UTF) {
	104	bool cont = TRUE;
	105
	106	while (cont) {
	107	int offset = s - SvPVX_const(PL_linestr);
	108	const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
	109	&offset, (char*)termstr, termlen);
	110	const char * const ns = SvPVX_const(PL_linestr) + offset;
	111	char * const svlast = SvEND(sv) - 1;
	112
	113	for (; s < ns; s++) {
7dd35535	114	if (*s == '\n' && !PL_rsfp
	115	#if HAVE_PERL_VERSION(5, 16, 0)
	116	&& !PL_parser->filtered
	117	#endif
	118	)
db81d362	119	CopLINE_inc(PL_curcop);
	120	}
	121	if (!found)
	122	goto read_more_line;
	123	else {
	124	/* handle quoted delimiters */
	125	if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
	126	const char *t;
	127	for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
	128	t--;
	129	if ((svlast-1 - t) % 2) {
	130	if (!keep_quoted) {
	131	*(svlast-1) = term;
	132	*svlast = '\0';
	133	SvCUR_set(sv, SvCUR(sv) - 1);
	134	}
	135	continue;
	136	}
	137	}
	138	if (PL_multi_open == PL_multi_close) {
	139	cont = FALSE;
	140	}
	141	else {
	142	const char *t;
	143	char *w;
	144	for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
	145	/* At here, all closes are "was quoted" one,
	146	so we don't check PL_multi_close. */
	147	if (*t == '\\') {
	148	if (!keep_quoted && *(t+1) == PL_multi_open)
	149	t++;
	150	else
	151	w++ = t++;
	152	}
	153	else if (*t == PL_multi_open)
	154	brackets++;
	155
	156	w = t;
	157	}
	158	if (w < t) {
	159	*w++ = term;
	160	*w = '\0';
	161	SvCUR_set(sv, w - SvPVX_const(sv));
	162	}
	163	last_off = w - SvPVX(sv);
	164	if (--brackets <= 0)
	165	cont = FALSE;
	166	}
	167	}
	168	}
	169	if (!keep_delims) {
	170	SvCUR_set(sv, SvCUR(sv) - 1);
	171	*SvEND(sv) = '\0';
	172	}
	173	break;
	174	}
	175
	176	/* extend sv if need be */
	177	SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
	178	/* set 'to' to the next character in the sv's string */
	179	to = SvPVX(sv)+SvCUR(sv);
	180
	181	/* if open delimiter is the close delimiter read unbridle */
	182	if (PL_multi_open == PL_multi_close) {
183	for (; s < PL_bufend; s++,to++) {
184	/* embedded newlines increment the current line number */
7dd35535	185	if (*s == '\n' && !PL_rsfp
	186	#if HAVE_PERL_VERSION(5, 16, 0)
	187	&& !PL_parser->filtered
	188	#endif
	189	)
db81d362	190	CopLINE_inc(PL_curcop);
	191	/* handle quoted delimiters */
	192	if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
	193	if (!keep_quoted && s[1] == term)
	194	s++;
	195	/* any other quotes are simply copied straight through */
	196	else
	197	to++ = s++;
	198	}
	199	/* terminate when run out of buffer (the for() condition), or
	200	have found the terminator */
	201	else if (*s == term) {
	202	if (termlen == 1)
	203	break;
	204	if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
	205	break;
	206	}
	207	else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
	208	has_utf8 = TRUE;
	209	to = s;
	210	}
	211	}
	212
	213	/* if the terminator isn't the same as the start character (e.g.,
	214	matched brackets), we have to allow more in the quoting, and
	215	be prepared for nested brackets.
	216	*/
	217	else {
	218	/* read until we run out of string, or we find the terminator */
	219	for (; s < PL_bufend; s++,to++) {
	220	/* embedded newlines increment the line count */
7dd35535	221	if (*s == '\n' && !PL_rsfp
	222	#if HAVE_PERL_VERSION(5, 16, 0)
	223	&& !PL_parser->filtered
	224	#endif
	225	)
db81d362	226	CopLINE_inc(PL_curcop);
	227	/* backslashes can escape the open or closing characters */
	228	if (*s == '\\' && s+1 < PL_bufend) {
	229	if (!keep_quoted &&
	230	((s[1] == PL_multi_open) \|\| (s[1] == PL_multi_close)))
	231	s++;
	232	else
	233	to++ = s++;
	234	}
	235	/* allow nested opens and closes */
	236	else if (*s == PL_multi_close && --brackets <= 0)
	237	break;
	238	else if (*s == PL_multi_open)
	239	brackets++;
	240	else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
	241	has_utf8 = TRUE;
	242	to = s;
	243	}
	244	}
	245	/* terminate the copied string and update the sv's end-of-string */
	246	*to = '\0';
	247	SvCUR_set(sv, to - SvPVX_const(sv));
	248
	249	/*
	250	* this next chunk reads more into the buffer if we're not done yet
	251	*/
	252
	253	if (s < PL_bufend)
	254	break; /* handle case where we are done yet :-) */
	255
	256	#ifndef PERL_STRICT_CR
	257	if (to - SvPVX_const(sv) >= 2) {
	258	if ((to[-2] == '\r' && to[-1] == '\n') \|\|
	259	(to[-2] == '\n' && to[-1] == '\r'))
	260	{
	261	to[-2] = '\n';
	262	to--;
	263	SvCUR_set(sv, to - SvPVX_const(sv));
	264	}
	265	else if (to[-1] == '\r')
	266	to[-1] = '\n';
	267	}
	268	else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
	269	to[-1] = '\n';
	270	#endif
	271
	272	read_more_line:
	273	/* if we're out of file, or a read fails, bail and reset the current
	274	line marker so we can report where the unterminated string began
	275	*/
	276	CopLINE_inc(PL_curcop);
	277	PL_bufptr = PL_bufend;
	278	if (!lex_next_chunk(0)) {
	279	CopLINE_set(PL_curcop, (line_t)PL_multi_start);
	280	return NULL;
	281	}
	282	s = PL_bufptr;
	283	}
	284
	285	/* at this point, we have successfully read the delimited string */
	286
	287	if (!PL_encoding \|\| UTF) {
	288	if (keep_delims)
	289	sv_catpvn(sv, s, termlen);
290	s += termlen;
291	}
292	if (has_utf8 \|\| PL_encoding)
293	SvUTF8_on(sv);
294
295	PL_multi_end = CopLINE(PL_curcop);
296
297	/* if we allocated too much space, give some back */
298	if (SvCUR(sv) + 5 < SvLEN(sv)) {
299	SvLEN_set(sv, SvCUR(sv) + 1);
300	SvPV_renew(sv, SvLEN(sv));
301	}
302
303	PL_bufptr = s;
304	return s;
305	}
306	/* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */