whoops, fix compilation
[p5sagit/Function-Parameters.git] / toke_on_crack.c.inc
CommitLineData
db81d362 1/*
2 * This code was copied from perl/toke.c and subsequently butchered
3 * by Lukas Mai (2012).
4 */
5
6/* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */
7#define PL_linestr (PL_parser->linestr)
8#define PL_copline (PL_parser->copline)
9#define PL_bufptr (PL_parser->bufptr)
10#define PL_bufend (PL_parser->bufend)
11#define PL_multi_start (PL_parser->multi_start)
12#define PL_multi_open (PL_parser->multi_open)
13#define PL_multi_close (PL_parser->multi_close)
14#define PL_multi_end (PL_parser->multi_end)
15#define PL_rsfp (PL_parser->rsfp)
16
17#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
18
19#ifdef USE_UTF8_SCRIPTS
20# define UTF (!IN_BYTES)
21#else
22# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
23#endif
24
25static STRLEN S_scan_word(const char *start, int allow_package) {
26 const char *s = start;
27 for (;;) {
28 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) { /* UTF handled below */
29 s++;
30 } else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, UTF)) {
31 s++;
32 } else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, UTF)) {
33 s += 2;
34 } else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
35 do {
36 s += UTF8SKIP(s);
37 } while (UTF8_IS_CONTINUED(*s) && is_utf8_mark((U8*)s));
38 } else {
39 return s - start;
40 }
41 }
42}
43
44static char *S_scan_str(pTHX_ SV *sv, int keep_quoted, int keep_delims) {
45 dVAR;
46 char *start = PL_bufptr;
47 const char *tmps; /* temp string, used for delimiter matching */
48 char *s = start; /* current position in the buffer */
49 char term; /* terminating character */
50 char *to; /* current position in the sv's data */
51 I32 brackets = 1; /* bracket nesting level */
52 bool has_utf8 = FALSE; /* is there any utf8 content? */
53 I32 termcode; /* terminating char. code */
54 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
55 STRLEN termlen; /* length of terminating string */
56 int last_off = 0; /* last position for nesting bracket */
57
58 /* XXX ATTENTION: we don't skip whitespace! */
59
60 /* mark where we are, in case we need to report errors */
61 CLINE;
62
63 /* after skipping whitespace, the next character is the terminator */
64 term = *s;
65 if (!UTF) {
66 termcode = termstr[0] = term;
67 termlen = 1;
68 }
69 else {
70 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
71 Copy(s, termstr, termlen, U8);
72 if (!UTF8_IS_INVARIANT(term))
73 has_utf8 = TRUE;
74 }
75
76 /* mark where we are */
77 PL_multi_start = CopLINE(PL_curcop);
78 PL_multi_open = term;
79
80 /* find corresponding closing delimiter */
81 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
82 termcode = termstr[0] = term = tmps[5];
83
84 PL_multi_close = term;
85
86 {
87 STRLEN dummy;
88 SvPV_force(sv, dummy);
89 sv_setpvs(sv, "");
90 SvGROW(sv, 80);
91 }
92
93 /* move past delimiter and try to read a complete string */
94 if (keep_delims)
95 sv_catpvn(sv, s, termlen);
96 s += termlen;
97 for (;;) {
98 if (PL_encoding && !UTF) {
99 bool cont = TRUE;
100
101 while (cont) {
102 int offset = s - SvPVX_const(PL_linestr);
103 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
104 &offset, (char*)termstr, termlen);
105 const char * const ns = SvPVX_const(PL_linestr) + offset;
106 char * const svlast = SvEND(sv) - 1;
107
108 for (; s < ns; s++) {
109 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
110 CopLINE_inc(PL_curcop);
111 }
112 if (!found)
113 goto read_more_line;
114 else {
115 /* handle quoted delimiters */
116 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
117 const char *t;
118 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
119 t--;
120 if ((svlast-1 - t) % 2) {
121 if (!keep_quoted) {
122 *(svlast-1) = term;
123 *svlast = '\0';
124 SvCUR_set(sv, SvCUR(sv) - 1);
125 }
126 continue;
127 }
128 }
129 if (PL_multi_open == PL_multi_close) {
130 cont = FALSE;
131 }
132 else {
133 const char *t;
134 char *w;
135 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
136 /* At here, all closes are "was quoted" one,
137 so we don't check PL_multi_close. */
138 if (*t == '\\') {
139 if (!keep_quoted && *(t+1) == PL_multi_open)
140 t++;
141 else
142 *w++ = *t++;
143 }
144 else if (*t == PL_multi_open)
145 brackets++;
146
147 *w = *t;
148 }
149 if (w < t) {
150 *w++ = term;
151 *w = '\0';
152 SvCUR_set(sv, w - SvPVX_const(sv));
153 }
154 last_off = w - SvPVX(sv);
155 if (--brackets <= 0)
156 cont = FALSE;
157 }
158 }
159 }
160 if (!keep_delims) {
161 SvCUR_set(sv, SvCUR(sv) - 1);
162 *SvEND(sv) = '\0';
163 }
164 break;
165 }
166
167 /* extend sv if need be */
168 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
169 /* set 'to' to the next character in the sv's string */
170 to = SvPVX(sv)+SvCUR(sv);
171
172 /* if open delimiter is the close delimiter read unbridle */
173 if (PL_multi_open == PL_multi_close) {
174 for (; s < PL_bufend; s++,to++) {
175 /* embedded newlines increment the current line number */
176 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
177 CopLINE_inc(PL_curcop);
178 /* handle quoted delimiters */
179 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
180 if (!keep_quoted && s[1] == term)
181 s++;
182 /* any other quotes are simply copied straight through */
183 else
184 *to++ = *s++;
185 }
186 /* terminate when run out of buffer (the for() condition), or
187 have found the terminator */
188 else if (*s == term) {
189 if (termlen == 1)
190 break;
191 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
192 break;
193 }
194 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
195 has_utf8 = TRUE;
196 *to = *s;
197 }
198 }
199
200 /* if the terminator isn't the same as the start character (e.g.,
201 matched brackets), we have to allow more in the quoting, and
202 be prepared for nested brackets.
203 */
204 else {
205 /* read until we run out of string, or we find the terminator */
206 for (; s < PL_bufend; s++,to++) {
207 /* embedded newlines increment the line count */
208 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
209 CopLINE_inc(PL_curcop);
210 /* backslashes can escape the open or closing characters */
211 if (*s == '\\' && s+1 < PL_bufend) {
212 if (!keep_quoted &&
213 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
214 s++;
215 else
216 *to++ = *s++;
217 }
218 /* allow nested opens and closes */
219 else if (*s == PL_multi_close && --brackets <= 0)
220 break;
221 else if (*s == PL_multi_open)
222 brackets++;
223 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
224 has_utf8 = TRUE;
225 *to = *s;
226 }
227 }
228 /* terminate the copied string and update the sv's end-of-string */
229 *to = '\0';
230 SvCUR_set(sv, to - SvPVX_const(sv));
231
232 /*
233 * this next chunk reads more into the buffer if we're not done yet
234 */
235
236 if (s < PL_bufend)
237 break; /* handle case where we are done yet :-) */
238
239#ifndef PERL_STRICT_CR
240 if (to - SvPVX_const(sv) >= 2) {
241 if ((to[-2] == '\r' && to[-1] == '\n') ||
242 (to[-2] == '\n' && to[-1] == '\r'))
243 {
244 to[-2] = '\n';
245 to--;
246 SvCUR_set(sv, to - SvPVX_const(sv));
247 }
248 else if (to[-1] == '\r')
249 to[-1] = '\n';
250 }
251 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
252 to[-1] = '\n';
253#endif
254
255 read_more_line:
256 /* if we're out of file, or a read fails, bail and reset the current
257 line marker so we can report where the unterminated string began
258 */
259 CopLINE_inc(PL_curcop);
260 PL_bufptr = PL_bufend;
261 if (!lex_next_chunk(0)) {
262 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
263 return NULL;
264 }
265 s = PL_bufptr;
266 }
267
268 /* at this point, we have successfully read the delimited string */
269
270 if (!PL_encoding || UTF) {
271 if (keep_delims)
272 sv_catpvn(sv, s, termlen);
273 s += termlen;
274 }
275 if (has_utf8 || PL_encoding)
276 SvUTF8_on(sv);
277
278 PL_multi_end = CopLINE(PL_curcop);
279
280 /* if we allocated too much space, give some back */
281 if (SvCUR(sv) + 5 < SvLEN(sv)) {
282 SvLEN_set(sv, SvCUR(sv) + 1);
283 SvPV_renew(sv, SvLEN(sv));
284 }
285
286 PL_bufptr = s;
287 return s;
288}
289/* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */