whoops, fix compilation
[p5sagit/Function-Parameters.git] / toke_on_crack.c.inc
1 /*
2  * This code was copied from perl/toke.c and subsequently butchered
3  * by Lukas Mai (2012).
4  */
5
6 /* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */
7 #define PL_linestr              (PL_parser->linestr)
8 #define PL_copline              (PL_parser->copline)
9 #define PL_bufptr               (PL_parser->bufptr)
10 #define PL_bufend               (PL_parser->bufend)
11 #define PL_multi_start          (PL_parser->multi_start)
12 #define PL_multi_open           (PL_parser->multi_open)
13 #define PL_multi_close          (PL_parser->multi_close)
14 #define PL_multi_end            (PL_parser->multi_end)
15 #define PL_rsfp                 (PL_parser->rsfp)
16
17 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
18
19 #ifdef USE_UTF8_SCRIPTS
20 #   define UTF (!IN_BYTES)
21 #else
22 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
23 #endif
24
25 static STRLEN S_scan_word(const char *start, int allow_package) {
26         const char *s = start;
27         for (;;) {
28                 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) {  /* UTF handled below */
29                         s++;
30                 } else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, UTF)) {
31                         s++;
32                 } else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, UTF)) {
33                         s += 2;
34                 } else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
35                         do {
36                                 s += UTF8SKIP(s);
37                         } while (UTF8_IS_CONTINUED(*s) && is_utf8_mark((U8*)s));
38                 } else {
39                         return s - start;
40                 }
41         }
42 }
43
44 static char *S_scan_str(pTHX_ SV *sv, int keep_quoted, int keep_delims) {
45         dVAR;
46         char *start = PL_bufptr;
47         const char *tmps;                   /* temp string, used for delimiter matching */
48         char *s = start;                    /* current position in the buffer */
49         char term;                          /* terminating character */
50         char *to;                           /* current position in the sv's data */
51         I32 brackets = 1;                   /* bracket nesting level */
52         bool has_utf8 = FALSE;              /* is there any utf8 content? */
53         I32 termcode;                       /* terminating char. code */
54         U8 termstr[UTF8_MAXBYTES];          /* terminating string */
55         STRLEN termlen;                     /* length of terminating string */
56         int last_off = 0;                   /* last position for nesting bracket */
57
58         /* XXX ATTENTION: we don't skip whitespace! */
59
60         /* mark where we are, in case we need to report errors */
61         CLINE;
62
63         /* after skipping whitespace, the next character is the terminator */
64         term = *s;
65         if (!UTF) {
66                 termcode = termstr[0] = term;
67                 termlen = 1;
68         }
69         else {
70                 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
71                 Copy(s, termstr, termlen, U8);
72                 if (!UTF8_IS_INVARIANT(term))
73                         has_utf8 = TRUE;
74         }
75
76         /* mark where we are */
77         PL_multi_start = CopLINE(PL_curcop);
78         PL_multi_open = term;
79
80         /* find corresponding closing delimiter */
81         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
82                 termcode = termstr[0] = term = tmps[5];
83
84         PL_multi_close = term;
85
86         {
87                 STRLEN dummy;
88                 SvPV_force(sv, dummy);
89                 sv_setpvs(sv, "");
90                 SvGROW(sv, 80);
91         }
92
93         /* move past delimiter and try to read a complete string */
94         if (keep_delims)
95                 sv_catpvn(sv, s, termlen);
96         s += termlen;
97         for (;;) {
98                 if (PL_encoding && !UTF) {
99                         bool cont = TRUE;
100
101                         while (cont) {
102                                 int offset = s - SvPVX_const(PL_linestr);
103                                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
104                                                                  &offset, (char*)termstr, termlen);
105                                 const char * const ns = SvPVX_const(PL_linestr) + offset;
106                                 char * const svlast = SvEND(sv) - 1;
107
108                                 for (; s < ns; s++) {
109                                         if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
110                                                 CopLINE_inc(PL_curcop);
111                                 }
112                                 if (!found)
113                                         goto read_more_line;
114                                 else {
115                                         /* handle quoted delimiters */
116                                         if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
117                                                 const char *t;
118                                                 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
119                                                         t--;
120                                                 if ((svlast-1 - t) % 2) {
121                                                         if (!keep_quoted) {
122                                                                 *(svlast-1) = term;
123                                                                 *svlast = '\0';
124                                                                 SvCUR_set(sv, SvCUR(sv) - 1);
125                                                         }
126                                                         continue;
127                                                 }
128                                         }
129                                         if (PL_multi_open == PL_multi_close) {
130                                                 cont = FALSE;
131                                         }
132                                         else {
133                                                 const char *t;
134                                                 char *w;
135                                                 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
136                                                         /* At here, all closes are "was quoted" one,
137                                                            so we don't check PL_multi_close. */
138                                                         if (*t == '\\') {
139                                                                 if (!keep_quoted && *(t+1) == PL_multi_open)
140                                                                         t++;
141                                                                 else
142                                                                         *w++ = *t++;
143                                                         }
144                                                         else if (*t == PL_multi_open)
145                                                                 brackets++;
146
147                                                         *w = *t;
148                                                 }
149                                                 if (w < t) {
150                                                         *w++ = term;
151                                                         *w = '\0';
152                                                         SvCUR_set(sv, w - SvPVX_const(sv));
153                                                 }
154                                                 last_off = w - SvPVX(sv);
155                                                 if (--brackets <= 0)
156                                                         cont = FALSE;
157                                         }
158                                 }
159                         }
160                         if (!keep_delims) {
161                                 SvCUR_set(sv, SvCUR(sv) - 1);
162                                 *SvEND(sv) = '\0';
163                         }
164                         break;
165                 }
166
167                 /* extend sv if need be */
168                 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
169                 /* set 'to' to the next character in the sv's string */
170                 to = SvPVX(sv)+SvCUR(sv);
171
172                 /* if open delimiter is the close delimiter read unbridle */
173                 if (PL_multi_open == PL_multi_close) {
174                         for (; s < PL_bufend; s++,to++) {
175                                 /* embedded newlines increment the current line number */
176                                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
177                                         CopLINE_inc(PL_curcop);
178                                 /* handle quoted delimiters */
179                                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
180                                         if (!keep_quoted && s[1] == term)
181                                                 s++;
182                                 /* any other quotes are simply copied straight through */
183                                         else
184                                                 *to++ = *s++;
185                                 }
186                                 /* terminate when run out of buffer (the for() condition), or
187                                    have found the terminator */
188                                 else if (*s == term) {
189                                         if (termlen == 1)
190                                                 break;
191                                         if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
192                                                 break;
193                                 }
194                                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
195                                         has_utf8 = TRUE;
196                                 *to = *s;
197                         }
198                 }
199
200                 /* if the terminator isn't the same as the start character (e.g.,
201                    matched brackets), we have to allow more in the quoting, and
202                    be prepared for nested brackets.
203                 */
204                 else {
205                         /* read until we run out of string, or we find the terminator */
206                         for (; s < PL_bufend; s++,to++) {
207                                 /* embedded newlines increment the line count */
208                                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
209                                         CopLINE_inc(PL_curcop);
210                                 /* backslashes can escape the open or closing characters */
211                                 if (*s == '\\' && s+1 < PL_bufend) {
212                                         if (!keep_quoted &&
213                                                 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
214                                                 s++;
215                                         else
216                                                 *to++ = *s++;
217                                 }
218                                 /* allow nested opens and closes */
219                                 else if (*s == PL_multi_close && --brackets <= 0)
220                                         break;
221                                 else if (*s == PL_multi_open)
222                                         brackets++;
223                                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
224                                         has_utf8 = TRUE;
225                                 *to = *s;
226                         }
227                 }
228                 /* terminate the copied string and update the sv's end-of-string */
229                 *to = '\0';
230                 SvCUR_set(sv, to - SvPVX_const(sv));
231
232                 /*
233                  * this next chunk reads more into the buffer if we're not done yet
234                  */
235
236                 if (s < PL_bufend)
237                         break;              /* handle case where we are done yet :-) */
238
239 #ifndef PERL_STRICT_CR
240                 if (to - SvPVX_const(sv) >= 2) {
241                         if ((to[-2] == '\r' && to[-1] == '\n') ||
242                             (to[-2] == '\n' && to[-1] == '\r'))
243                         {
244                                 to[-2] = '\n';
245                                 to--;
246                                 SvCUR_set(sv, to - SvPVX_const(sv));
247                         }
248                         else if (to[-1] == '\r')
249                                 to[-1] = '\n';
250                 }
251                 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
252                         to[-1] = '\n';
253 #endif
254
255          read_more_line:
256                 /* if we're out of file, or a read fails, bail and reset the current
257                    line marker so we can report where the unterminated string began
258                 */
259                 CopLINE_inc(PL_curcop);
260                 PL_bufptr = PL_bufend;
261                 if (!lex_next_chunk(0)) {
262                         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
263                         return NULL;
264                 }
265                 s = PL_bufptr;
266         }
267
268         /* at this point, we have successfully read the delimited string */
269
270         if (!PL_encoding || UTF) {
271                 if (keep_delims)
272                         sv_catpvn(sv, s, termlen);
273                 s += termlen;
274         }
275         if (has_utf8 || PL_encoding)
276                 SvUTF8_on(sv);
277
278         PL_multi_end = CopLINE(PL_curcop);
279
280         /* if we allocated too much space, give some back */
281         if (SvCUR(sv) + 5 < SvLEN(sv)) {
282                 SvLEN_set(sv, SvCUR(sv) + 1);
283                 SvPV_renew(sv, SvLEN(sv));
284         }
285
286         PL_bufptr = s;
287         return s;
288 }
289 /* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */