2 * This code was copied from perl/toke.c and subsequently butchered
7 /* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */
8 #define PL_linestr (PL_parser->linestr)
9 #define PL_copline (PL_parser->copline)
10 #define PL_bufptr (PL_parser->bufptr)
11 #define PL_bufend (PL_parser->bufend)
12 #define PL_multi_start (PL_parser->multi_start)
13 #define PL_multi_open (PL_parser->multi_open)
14 #define PL_multi_close (PL_parser->multi_close)
15 #define PL_multi_end (PL_parser->multi_end)
16 #define PL_rsfp (PL_parser->rsfp)
18 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
20 #ifdef USE_UTF8_SCRIPTS
21 # define UTF (!IN_BYTES)
23 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
26 static STRLEN S_scan_word(pTHX_ const char *start, int allow_package) {
27 const char *s = start;
29 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) { /* UTF handled below */
31 } else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, UTF)) {
33 } else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, UTF)) {
35 } else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
38 } while (UTF8_IS_CONTINUED(*s) && is_utf8_mark((U8*)s));
45 static char *S_scan_str(pTHX_ SV *sv, int keep_quoted, int keep_delims) {
47 char *start = PL_bufptr;
48 const char *tmps; /* temp string, used for delimiter matching */
49 char *s = start; /* current position in the buffer */
50 char term; /* terminating character */
51 char *to; /* current position in the sv's data */
52 I32 brackets = 1; /* bracket nesting level */
53 bool has_utf8 = FALSE; /* is there any utf8 content? */
54 I32 termcode; /* terminating char. code */
55 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
56 STRLEN termlen; /* length of terminating string */
57 int last_off = 0; /* last position for nesting bracket */
59 /* XXX ATTENTION: we don't skip whitespace! */
61 /* mark where we are, in case we need to report errors */
64 /* after skipping whitespace, the next character is the terminator */
67 termcode = termstr[0] = term;
71 #if HAVE_PERL_VERSION(5, 16, 0)
72 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
74 termcode = utf8_to_uvchr((U8*)s, &termlen);
76 Copy(s, termstr, termlen, U8);
77 if (!UTF8_IS_INVARIANT(term))
81 /* mark where we are */
82 PL_multi_start = CopLINE(PL_curcop);
85 /* find corresponding closing delimiter */
86 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
87 termcode = termstr[0] = term = tmps[5];
89 PL_multi_close = term;
93 SvPV_force(sv, dummy);
98 /* move past delimiter and try to read a complete string */
100 sv_catpvn(sv, s, termlen);
103 if (PL_encoding && !UTF) {
107 int offset = s - SvPVX_const(PL_linestr);
108 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
109 &offset, (char*)termstr, termlen);
110 const char * const ns = SvPVX_const(PL_linestr) + offset;
111 char * const svlast = SvEND(sv) - 1;
113 for (; s < ns; s++) {
114 if (*s == '\n' && !PL_rsfp
115 #if HAVE_PERL_VERSION(5, 16, 0)
116 && !PL_parser->filtered
119 CopLINE_inc(PL_curcop);
124 /* handle quoted delimiters */
125 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
127 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
129 if ((svlast-1 - t) % 2) {
133 SvCUR_set(sv, SvCUR(sv) - 1);
138 if (PL_multi_open == PL_multi_close) {
144 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
145 /* At here, all closes are "was quoted" one,
146 so we don't check PL_multi_close. */
148 if (!keep_quoted && *(t+1) == PL_multi_open)
153 else if (*t == PL_multi_open)
161 SvCUR_set(sv, w - SvPVX_const(sv));
163 last_off = w - SvPVX(sv);
170 SvCUR_set(sv, SvCUR(sv) - 1);
176 /* extend sv if need be */
177 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
178 /* set 'to' to the next character in the sv's string */
179 to = SvPVX(sv)+SvCUR(sv);
181 /* if open delimiter is the close delimiter read unbridle */
182 if (PL_multi_open == PL_multi_close) {
183 for (; s < PL_bufend; s++,to++) {
184 /* embedded newlines increment the current line number */
185 if (*s == '\n' && !PL_rsfp
186 #if HAVE_PERL_VERSION(5, 16, 0)
187 && !PL_parser->filtered
190 CopLINE_inc(PL_curcop);
191 /* handle quoted delimiters */
192 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
193 if (!keep_quoted && s[1] == term)
195 /* any other quotes are simply copied straight through */
199 /* terminate when run out of buffer (the for() condition), or
200 have found the terminator */
201 else if (*s == term) {
204 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
207 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
213 /* if the terminator isn't the same as the start character (e.g.,
214 matched brackets), we have to allow more in the quoting, and
215 be prepared for nested brackets.
218 /* read until we run out of string, or we find the terminator */
219 for (; s < PL_bufend; s++,to++) {
220 /* embedded newlines increment the line count */
221 if (*s == '\n' && !PL_rsfp
222 #if HAVE_PERL_VERSION(5, 16, 0)
223 && !PL_parser->filtered
226 CopLINE_inc(PL_curcop);
227 /* backslashes can escape the open or closing characters */
228 if (*s == '\\' && s+1 < PL_bufend) {
230 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
235 /* allow nested opens and closes */
236 else if (*s == PL_multi_close && --brackets <= 0)
238 else if (*s == PL_multi_open)
240 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
245 /* terminate the copied string and update the sv's end-of-string */
247 SvCUR_set(sv, to - SvPVX_const(sv));
250 * this next chunk reads more into the buffer if we're not done yet
254 break; /* handle case where we are done yet :-) */
256 #ifndef PERL_STRICT_CR
257 if (to - SvPVX_const(sv) >= 2) {
258 if ((to[-2] == '\r' && to[-1] == '\n') ||
259 (to[-2] == '\n' && to[-1] == '\r'))
263 SvCUR_set(sv, to - SvPVX_const(sv));
265 else if (to[-1] == '\r')
268 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
273 /* if we're out of file, or a read fails, bail and reset the current
274 line marker so we can report where the unterminated string began
276 CopLINE_inc(PL_curcop);
277 PL_bufptr = PL_bufend;
278 if (!lex_next_chunk(0)) {
279 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
285 /* at this point, we have successfully read the delimited string */
287 if (!PL_encoding || UTF) {
289 sv_catpvn(sv, s, termlen);
292 if (has_utf8 || PL_encoding)
295 PL_multi_end = CopLINE(PL_curcop);
297 /* if we allocated too much space, give some back */
298 if (SvCUR(sv) + 5 < SvLEN(sv)) {
299 SvLEN_set(sv, SvCUR(sv) + 1);
300 SvPV_renew(sv, SvLEN(sv));
306 /* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */