Commit | Line | Data |
db81d362 |
1 | /* |
2 | * This code was copied from perl/toke.c and subsequently butchered |
3 | * by Lukas Mai (2012). |
4 | */ |
7dd35535 |
5 | /* vi: set ft=c: */ |
db81d362 |
6 | |
7 | /* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */ |
8 | #define PL_linestr (PL_parser->linestr) |
9 | #define PL_copline (PL_parser->copline) |
10 | #define PL_bufptr (PL_parser->bufptr) |
11 | #define PL_bufend (PL_parser->bufend) |
12 | #define PL_multi_start (PL_parser->multi_start) |
13 | #define PL_multi_open (PL_parser->multi_open) |
14 | #define PL_multi_close (PL_parser->multi_close) |
15 | #define PL_multi_end (PL_parser->multi_end) |
16 | #define PL_rsfp (PL_parser->rsfp) |
17 | |
18 | #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) |
19 | |
20 | #ifdef USE_UTF8_SCRIPTS |
c311cef3 |
21 | # define PARSING_UTF (!IN_BYTES) |
db81d362 |
22 | #else |
c311cef3 |
23 | # define PARSING_UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) |
db81d362 |
24 | #endif |
25 | |
7dd35535 |
26 | static STRLEN S_scan_word(pTHX_ const char *start, int allow_package) { |
db81d362 |
27 | const char *s = start; |
28 | for (;;) { |
c311cef3 |
29 | if (isALNUM(*s) || (!PARSING_UTF && isALNUMC_L1(*s))) { /* UTF handled below */ |
db81d362 |
30 | s++; |
c311cef3 |
31 | } else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, PARSING_UTF)) { |
db81d362 |
32 | s++; |
c311cef3 |
33 | } else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, PARSING_UTF)) { |
db81d362 |
34 | s += 2; |
c311cef3 |
35 | } else if (PARSING_UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { |
db81d362 |
36 | do { |
37 | s += UTF8SKIP(s); |
38 | } while (UTF8_IS_CONTINUED(*s) && is_utf8_mark((U8*)s)); |
39 | } else { |
40 | return s - start; |
41 | } |
42 | } |
43 | } |
44 | |
45 | static char *S_scan_str(pTHX_ SV *sv, int keep_quoted, int keep_delims) { |
46 | dVAR; |
47 | char *start = PL_bufptr; |
48 | const char *tmps; /* temp string, used for delimiter matching */ |
49 | char *s = start; /* current position in the buffer */ |
50 | char term; /* terminating character */ |
51 | char *to; /* current position in the sv's data */ |
52 | I32 brackets = 1; /* bracket nesting level */ |
53 | bool has_utf8 = FALSE; /* is there any utf8 content? */ |
54 | I32 termcode; /* terminating char. code */ |
55 | U8 termstr[UTF8_MAXBYTES]; /* terminating string */ |
56 | STRLEN termlen; /* length of terminating string */ |
57 | int last_off = 0; /* last position for nesting bracket */ |
58 | |
59 | /* XXX ATTENTION: we don't skip whitespace! */ |
60 | |
61 | /* mark where we are, in case we need to report errors */ |
62 | CLINE; |
63 | |
64 | /* after skipping whitespace, the next character is the terminator */ |
65 | term = *s; |
c311cef3 |
66 | if (!PARSING_UTF) { |
db81d362 |
67 | termcode = termstr[0] = term; |
68 | termlen = 1; |
69 | } |
70 | else { |
de013990 |
71 | termcode = IF_HAVE_PERL_5_16( |
72 | utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen), |
73 | utf8_to_uvchr((U8*)s, &termlen) |
74 | ); |
db81d362 |
75 | Copy(s, termstr, termlen, U8); |
76 | if (!UTF8_IS_INVARIANT(term)) |
77 | has_utf8 = TRUE; |
78 | } |
79 | |
80 | /* mark where we are */ |
81 | PL_multi_start = CopLINE(PL_curcop); |
82 | PL_multi_open = term; |
83 | |
84 | /* find corresponding closing delimiter */ |
85 | if (term && (tmps = strchr("([{< )]}> )]}>",term))) |
86 | termcode = termstr[0] = term = tmps[5]; |
87 | |
88 | PL_multi_close = term; |
89 | |
90 | { |
91 | STRLEN dummy; |
92 | SvPV_force(sv, dummy); |
93 | sv_setpvs(sv, ""); |
94 | SvGROW(sv, 80); |
95 | } |
96 | |
97 | /* move past delimiter and try to read a complete string */ |
98 | if (keep_delims) |
99 | sv_catpvn(sv, s, termlen); |
100 | s += termlen; |
101 | for (;;) { |
c311cef3 |
102 | if (PL_encoding && !PARSING_UTF) { |
db81d362 |
103 | bool cont = TRUE; |
104 | |
105 | while (cont) { |
106 | int offset = s - SvPVX_const(PL_linestr); |
107 | const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, |
108 | &offset, (char*)termstr, termlen); |
109 | const char * const ns = SvPVX_const(PL_linestr) + offset; |
110 | char * const svlast = SvEND(sv) - 1; |
111 | |
112 | for (; s < ns; s++) { |
de013990 |
113 | if (*s == '\n' && !PL_rsfp && |
114 | IF_HAVE_PERL_5_16( |
115 | !PL_parser->filtered, |
116 | TRUE |
117 | ) |
7dd35535 |
118 | ) |
db81d362 |
119 | CopLINE_inc(PL_curcop); |
120 | } |
121 | if (!found) |
122 | goto read_more_line; |
123 | else { |
124 | /* handle quoted delimiters */ |
125 | if (SvCUR(sv) > 1 && *(svlast-1) == '\\') { |
126 | const char *t; |
127 | for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';) |
128 | t--; |
129 | if ((svlast-1 - t) % 2) { |
130 | if (!keep_quoted) { |
131 | *(svlast-1) = term; |
132 | *svlast = '\0'; |
133 | SvCUR_set(sv, SvCUR(sv) - 1); |
134 | } |
135 | continue; |
136 | } |
137 | } |
138 | if (PL_multi_open == PL_multi_close) { |
139 | cont = FALSE; |
140 | } |
141 | else { |
142 | const char *t; |
143 | char *w; |
144 | for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) { |
145 | /* At here, all closes are "was quoted" one, |
146 | so we don't check PL_multi_close. */ |
147 | if (*t == '\\') { |
148 | if (!keep_quoted && *(t+1) == PL_multi_open) |
149 | t++; |
150 | else |
151 | *w++ = *t++; |
152 | } |
153 | else if (*t == PL_multi_open) |
154 | brackets++; |
155 | |
156 | *w = *t; |
157 | } |
158 | if (w < t) { |
159 | *w++ = term; |
160 | *w = '\0'; |
161 | SvCUR_set(sv, w - SvPVX_const(sv)); |
162 | } |
163 | last_off = w - SvPVX(sv); |
164 | if (--brackets <= 0) |
165 | cont = FALSE; |
166 | } |
167 | } |
168 | } |
169 | if (!keep_delims) { |
170 | SvCUR_set(sv, SvCUR(sv) - 1); |
171 | *SvEND(sv) = '\0'; |
172 | } |
173 | break; |
174 | } |
175 | |
176 | /* extend sv if need be */ |
177 | SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); |
178 | /* set 'to' to the next character in the sv's string */ |
179 | to = SvPVX(sv)+SvCUR(sv); |
180 | |
181 | /* if open delimiter is the close delimiter read unbridle */ |
182 | if (PL_multi_open == PL_multi_close) { |
183 | for (; s < PL_bufend; s++,to++) { |
184 | /* embedded newlines increment the current line number */ |
de013990 |
185 | if (*s == '\n' && !PL_rsfp && |
186 | IF_HAVE_PERL_5_16( |
187 | !PL_parser->filtered, |
188 | 1 |
189 | ) |
7dd35535 |
190 | ) |
db81d362 |
191 | CopLINE_inc(PL_curcop); |
192 | /* handle quoted delimiters */ |
193 | if (*s == '\\' && s+1 < PL_bufend && term != '\\') { |
194 | if (!keep_quoted && s[1] == term) |
195 | s++; |
196 | /* any other quotes are simply copied straight through */ |
197 | else |
198 | *to++ = *s++; |
199 | } |
200 | /* terminate when run out of buffer (the for() condition), or |
201 | have found the terminator */ |
202 | else if (*s == term) { |
203 | if (termlen == 1) |
204 | break; |
205 | if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) |
206 | break; |
207 | } |
c311cef3 |
208 | else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && PARSING_UTF) |
db81d362 |
209 | has_utf8 = TRUE; |
210 | *to = *s; |
211 | } |
212 | } |
213 | |
214 | /* if the terminator isn't the same as the start character (e.g., |
215 | matched brackets), we have to allow more in the quoting, and |
216 | be prepared for nested brackets. |
217 | */ |
218 | else { |
219 | /* read until we run out of string, or we find the terminator */ |
220 | for (; s < PL_bufend; s++,to++) { |
221 | /* embedded newlines increment the line count */ |
de013990 |
222 | if (*s == '\n' && !PL_rsfp && |
223 | IF_HAVE_PERL_5_16( |
224 | !PL_parser->filtered, |
225 | 1 |
226 | ) |
7dd35535 |
227 | ) |
db81d362 |
228 | CopLINE_inc(PL_curcop); |
229 | /* backslashes can escape the open or closing characters */ |
230 | if (*s == '\\' && s+1 < PL_bufend) { |
231 | if (!keep_quoted && |
232 | ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) |
233 | s++; |
234 | else |
235 | *to++ = *s++; |
236 | } |
237 | /* allow nested opens and closes */ |
238 | else if (*s == PL_multi_close && --brackets <= 0) |
239 | break; |
240 | else if (*s == PL_multi_open) |
241 | brackets++; |
c311cef3 |
242 | else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && PARSING_UTF) |
db81d362 |
243 | has_utf8 = TRUE; |
244 | *to = *s; |
245 | } |
246 | } |
247 | /* terminate the copied string and update the sv's end-of-string */ |
248 | *to = '\0'; |
249 | SvCUR_set(sv, to - SvPVX_const(sv)); |
250 | |
251 | /* |
252 | * this next chunk reads more into the buffer if we're not done yet |
253 | */ |
254 | |
255 | if (s < PL_bufend) |
256 | break; /* handle case where we are done yet :-) */ |
257 | |
258 | #ifndef PERL_STRICT_CR |
259 | if (to - SvPVX_const(sv) >= 2) { |
260 | if ((to[-2] == '\r' && to[-1] == '\n') || |
261 | (to[-2] == '\n' && to[-1] == '\r')) |
262 | { |
263 | to[-2] = '\n'; |
264 | to--; |
265 | SvCUR_set(sv, to - SvPVX_const(sv)); |
266 | } |
267 | else if (to[-1] == '\r') |
268 | to[-1] = '\n'; |
269 | } |
270 | else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') |
271 | to[-1] = '\n'; |
272 | #endif |
273 | |
274 | read_more_line: |
275 | /* if we're out of file, or a read fails, bail and reset the current |
276 | line marker so we can report where the unterminated string began |
277 | */ |
278 | CopLINE_inc(PL_curcop); |
279 | PL_bufptr = PL_bufend; |
280 | if (!lex_next_chunk(0)) { |
281 | CopLINE_set(PL_curcop, (line_t)PL_multi_start); |
282 | return NULL; |
283 | } |
284 | s = PL_bufptr; |
285 | } |
286 | |
287 | /* at this point, we have successfully read the delimited string */ |
288 | |
c311cef3 |
289 | if (!PL_encoding || PARSING_UTF) { |
db81d362 |
290 | if (keep_delims) |
291 | sv_catpvn(sv, s, termlen); |
292 | s += termlen; |
293 | } |
294 | if (has_utf8 || PL_encoding) |
295 | SvUTF8_on(sv); |
296 | |
297 | PL_multi_end = CopLINE(PL_curcop); |
298 | |
299 | /* if we allocated too much space, give some back */ |
300 | if (SvCUR(sv) + 5 < SvLEN(sv)) { |
301 | SvLEN_set(sv, SvCUR(sv) + 1); |
302 | SvPV_renew(sv, SvLEN(sv)); |
303 | } |
304 | |
305 | PL_bufptr = s; |
306 | return s; |
307 | } |
c311cef3 |
308 | |
311ced6f |
309 | static void S_check_prototype(pTHX_ const SV *declarator, SV *proto) { |
c311cef3 |
310 | bool bad_proto = FALSE; |
311 | bool in_brackets = FALSE; |
312 | char greedy_proto = ' '; |
313 | bool proto_after_greedy_proto = FALSE; |
314 | bool must_be_last = FALSE; |
315 | bool underscore = FALSE; |
316 | bool seen_underscore = FALSE; |
317 | const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); |
318 | char *d, *p; |
319 | STRLEN tmp, tmplen; |
320 | |
321 | /* strip spaces and check for bad characters */ |
322 | d = SvPV(proto, tmplen); |
323 | tmp = 0; |
324 | for (p = d; tmplen; tmplen--, ++p) { |
325 | if (!isSPACE(*p)) { |
326 | d[tmp++] = *p; |
327 | |
328 | if (warnillegalproto) { |
329 | if (must_be_last) { |
330 | proto_after_greedy_proto = TRUE; |
331 | } |
332 | if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { |
333 | bad_proto = TRUE; |
334 | } else { |
335 | if (underscore) { |
336 | if (!strchr(";@%", *p)) { |
337 | bad_proto = TRUE; |
338 | } |
339 | underscore = FALSE; |
340 | } |
341 | if (*p == '[') { |
342 | in_brackets = TRUE; |
343 | } else if (*p == ']') { |
344 | in_brackets = FALSE; |
345 | } else if ( |
346 | (*p == '@' || *p == '%') && |
347 | (tmp < 2 || d[tmp - 2] != '\\') && |
348 | !in_brackets |
349 | ) { |
350 | must_be_last = TRUE; |
351 | greedy_proto = *p; |
352 | } else if (*p == '_') { |
353 | underscore = seen_underscore = TRUE; |
354 | } |
355 | } |
356 | } |
357 | } |
358 | } |
359 | d[tmp] = '\0'; |
360 | SvCUR_set(proto, tmp); |
361 | if (proto_after_greedy_proto) { |
362 | Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), |
363 | "In %"SVf": prototype after '%c': %s", |
364 | SVfARG(declarator), greedy_proto, d |
365 | ); |
366 | } |
367 | if (bad_proto) { |
368 | SV *dsv = newSVpvs_flags("", SVs_TEMP); |
369 | Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), |
370 | "In %"SVf": illegal character %sin prototype: %s", |
371 | SVfARG(declarator), |
372 | seen_underscore ? "after '_' " : "", |
373 | SvUTF8(proto) |
374 | ? sv_uni_display(dsv, |
375 | newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8), |
376 | tmp, |
377 | UNI_DISPLAY_ISPRINT |
378 | ) |
379 | : pv_pretty(dsv, d, tmp, 60, NULL, NULL, |
380 | PERL_PV_ESCAPE_NONASCII |
381 | ) |
382 | ); |
383 | } |
384 | SvCUR_set(proto, tmp); |
385 | } |
386 | |
387 | #undef CLINE |
db81d362 |
388 | /* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */ |