toke_on_crack.c.inc

   1 /*
   2  * This code was copied from perl/toke.c and subsequently butchered
   3  * by Lukas Mai (2012).
   4  */
   5 /* vi: set ft=c: */
   6
   7 /* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */
   8 #define PL_linestr              (PL_parser->linestr)
   9 #define PL_copline              (PL_parser->copline)
  10 #define PL_bufptr               (PL_parser->bufptr)
  11 #define PL_bufend               (PL_parser->bufend)
  12 #define PL_multi_start          (PL_parser->multi_start)
  13 #define PL_multi_open           (PL_parser->multi_open)
  14 #define PL_multi_close          (PL_parser->multi_close)
  15 #define PL_multi_end            (PL_parser->multi_end)
  16 #define PL_rsfp                 (PL_parser->rsfp)
  17
  18 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
  19
  20 #ifdef USE_UTF8_SCRIPTS
  21 #   define PARSING_UTF (!IN_BYTES)
  22 #else
  23 #   define PARSING_UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
  24 #endif
  25
  26 static STRLEN S_scan_word(pTHX_ const char *start, int allow_package) {
  27         const char *s = start;
  28         for (;;) {
  29                 if (isALNUM(*s) || (!PARSING_UTF && isALNUMC_L1(*s))) {  /* UTF handled below */
  30                         s++;
  31                 } else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, PARSING_UTF)) {
  32                         s++;
  33                 } else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, PARSING_UTF)) {
  34                         s += 2;
  35                 } else if (PARSING_UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
  36                         do {
  37                                 s += UTF8SKIP(s);
  38                         } while (UTF8_IS_CONTINUED(*s) && is_utf8_mark((U8*)s));
  39                 } else {
  40                         return s - start;
  41                 }
  42         }
  43 }
  44
  45 static char *S_scan_str(pTHX_ SV *sv, int keep_quoted, int keep_delims) {
  46         dVAR;
  47         char *start = PL_bufptr;
  48         const char *tmps;                   /* temp string, used for delimiter matching */
  49         char *s = start;                    /* current position in the buffer */
  50         char term;                          /* terminating character */
  51         char *to;                           /* current position in the sv's data */
  52         I32 brackets = 1;                   /* bracket nesting level */
  53         bool has_utf8 = FALSE;              /* is there any utf8 content? */
  54         I32 termcode;                       /* terminating char. code */
  55         U8 termstr[UTF8_MAXBYTES];          /* terminating string */
  56         STRLEN termlen;                     /* length of terminating string */
  57         int last_off = 0;                   /* last position for nesting bracket */
  58
  59         /* XXX ATTENTION: we don't skip whitespace! */
  60
  61         /* mark where we are, in case we need to report errors */
  62         CLINE;
  63
  64         /* after skipping whitespace, the next character is the terminator */
  65         term = *s;
  66         if (!PARSING_UTF) {
  67                 termcode = termstr[0] = term;
  68                 termlen = 1;
  69         }
  70         else {
  71                 termcode = IF_HAVE_PERL_5_16(
  72                         utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen),
  73                         utf8_to_uvchr((U8*)s, &termlen)
  74                 );
  75                 Copy(s, termstr, termlen, U8);
  76                 if (!UTF8_IS_INVARIANT(term))
  77                         has_utf8 = TRUE;
  78         }
  79
  80         /* mark where we are */
  81         PL_multi_start = CopLINE(PL_curcop);
  82         PL_multi_open = term;
  83
  84         /* find corresponding closing delimiter */
  85         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
  86                 termcode = termstr[0] = term = tmps[5];
  87
  88         PL_multi_close = term;
  89
  90         {
  91                 STRLEN dummy;
  92                 SvPV_force(sv, dummy);
  93                 sv_setpvs(sv, "");
  94                 SvGROW(sv, 80);
  95         }
  96
  97         /* move past delimiter and try to read a complete string */
  98         if (keep_delims)
  99                 sv_catpvn(sv, s, termlen);
 100         s += termlen;
 101         for (;;) {
 102                 if (PL_encoding && !PARSING_UTF) {
 103                         bool cont = TRUE;
 104
 105                         while (cont) {
 106                                 int offset = s - SvPVX_const(PL_linestr);
 107                                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
 108                                                                  &offset, (char*)termstr, termlen);
 109                                 const char * const ns = SvPVX_const(PL_linestr) + offset;
 110                                 char * const svlast = SvEND(sv) - 1;
 111
 112                                 for (; s < ns; s++) {
 113                                         if (*s == '\n' && !PL_rsfp &&
 114                                                 IF_HAVE_PERL_5_16(
 115                                                         !PL_parser->filtered,
 116                                                         TRUE
 117                                                 )
 118                                         )
 119                                                 CopLINE_inc(PL_curcop);
 120                                 }
 121                                 if (!found)
 122                                         goto read_more_line;
 123                                 else {
 124                                         /* handle quoted delimiters */
 125                                         if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
 126                                                 const char *t;
 127                                                 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
 128                                                         t--;
 129                                                 if ((svlast-1 - t) % 2) {
 130                                                         if (!keep_quoted) {
 131                                                                 *(svlast-1) = term;
 132                                                                 *svlast = '\0';
 133                                                                 SvCUR_set(sv, SvCUR(sv) - 1);
 134                                                         }
 135                                                         continue;
 136                                                 }
 137                                         }
 138                                         if (PL_multi_open == PL_multi_close) {
 139                                                 cont = FALSE;
 140                                         }
 141                                         else {
 142                                                 const char *t;
 143                                                 char *w;
 144                                                 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
 145                                                         /* At here, all closes are "was quoted" one,
 146                                                            so we don't check PL_multi_close. */
 147                                                         if (*t == '\\') {
 148                                                                 if (!keep_quoted && *(t+1) == PL_multi_open)
 149                                                                         t++;
 150                                                                 else
 151                                                                         *w++ = *t++;
 152                                                         }
 153                                                         else if (*t == PL_multi_open)
 154                                                                 brackets++;
 155
 156                                                         *w = *t;
 157                                                 }
 158                                                 if (w < t) {
 159                                                         *w++ = term;
 160                                                         *w = '\0';
 161                                                         SvCUR_set(sv, w - SvPVX_const(sv));
 162                                                 }
 163                                                 last_off = w - SvPVX(sv);
 164                                                 if (--brackets <= 0)
 165                                                         cont = FALSE;
 166                                         }
 167                                 }
 168                         }
 169                         if (!keep_delims) {
 170                                 SvCUR_set(sv, SvCUR(sv) - 1);
 171                                 *SvEND(sv) = '\0';
 172                         }
 173                         break;
 174                 }
 175
 176                 /* extend sv if need be */
 177                 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
 178                 /* set 'to' to the next character in the sv's string */
 179                 to = SvPVX(sv)+SvCUR(sv);
 180
 181                 /* if open delimiter is the close delimiter read unbridle */
 182                 if (PL_multi_open == PL_multi_close) {
 183                         for (; s < PL_bufend; s++,to++) {
 184                                 /* embedded newlines increment the current line number */
 185                                 if (*s == '\n' && !PL_rsfp &&
 186                                         IF_HAVE_PERL_5_16(
 187                                                 !PL_parser->filtered,
 188                                                 1
 189                                         )
 190                                 )
 191                                         CopLINE_inc(PL_curcop);
 192                                 /* handle quoted delimiters */
 193                                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
 194                                         if (!keep_quoted && s[1] == term)
 195                                                 s++;
 196                                 /* any other quotes are simply copied straight through */
 197                                         else
 198                                                 *to++ = *s++;
 199                                 }
 200                                 /* terminate when run out of buffer (the for() condition), or
 201                                    have found the terminator */
 202                                 else if (*s == term) {
 203                                         if (termlen == 1)
 204                                                 break;
 205                                         if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
 206                                                 break;
 207                                 }
 208                                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && PARSING_UTF)
 209                                         has_utf8 = TRUE;
 210                                 *to = *s;
 211                         }
 212                 }
 213
 214                 /* if the terminator isn't the same as the start character (e.g.,
 215                    matched brackets), we have to allow more in the quoting, and
 216                    be prepared for nested brackets.
 217                 */
 218                 else {
 219                         /* read until we run out of string, or we find the terminator */
 220                         for (; s < PL_bufend; s++,to++) {
 221                                 /* embedded newlines increment the line count */
 222                                 if (*s == '\n' && !PL_rsfp &&
 223                                         IF_HAVE_PERL_5_16(
 224                                                 !PL_parser->filtered,
 225                                                 1
 226                                         )
 227                                 )
 228                                         CopLINE_inc(PL_curcop);
 229                                 /* backslashes can escape the open or closing characters */
 230                                 if (*s == '\\' && s+1 < PL_bufend) {
 231                                         if (!keep_quoted &&
 232                                                 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
 233                                                 s++;
 234                                         else
 235                                                 *to++ = *s++;
 236                                 }
 237                                 /* allow nested opens and closes */
 238                                 else if (*s == PL_multi_close && --brackets <= 0)
 239                                         break;
 240                                 else if (*s == PL_multi_open)
 241                                         brackets++;
 242                                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && PARSING_UTF)
 243                                         has_utf8 = TRUE;
 244                                 *to = *s;
 245                         }
 246                 }
 247                 /* terminate the copied string and update the sv's end-of-string */
 248                 *to = '\0';
 249                 SvCUR_set(sv, to - SvPVX_const(sv));
 250
 251                 /*
 252                  * this next chunk reads more into the buffer if we're not done yet
 253                  */
 254
 255                 if (s < PL_bufend)
 256                         break;              /* handle case where we are done yet :-) */
 257
 258 #ifndef PERL_STRICT_CR
 259                 if (to - SvPVX_const(sv) >= 2) {
 260                         if ((to[-2] == '\r' && to[-1] == '\n') ||
 261                             (to[-2] == '\n' && to[-1] == '\r'))
 262                         {
 263                                 to[-2] = '\n';
 264                                 to--;
 265                                 SvCUR_set(sv, to - SvPVX_const(sv));
 266                         }
 267                         else if (to[-1] == '\r')
 268                                 to[-1] = '\n';
 269                 }
 270                 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
 271                         to[-1] = '\n';
 272 #endif
 273
 274          read_more_line:
 275                 /* if we're out of file, or a read fails, bail and reset the current
 276                    line marker so we can report where the unterminated string began
 277                 */
 278                 CopLINE_inc(PL_curcop);
 279                 PL_bufptr = PL_bufend;
 280                 if (!lex_next_chunk(0)) {
 281                         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
 282                         return NULL;
 283                 }
 284                 s = PL_bufptr;
 285         }
 286
 287         /* at this point, we have successfully read the delimited string */
 288
 289         if (!PL_encoding || PARSING_UTF) {
 290                 if (keep_delims)
 291                         sv_catpvn(sv, s, termlen);
 292                 s += termlen;
 293         }
 294         if (has_utf8 || PL_encoding)
 295                 SvUTF8_on(sv);
 296
 297         PL_multi_end = CopLINE(PL_curcop);
 298
 299         /* if we allocated too much space, give some back */
 300         if (SvCUR(sv) + 5 < SvLEN(sv)) {
 301                 SvLEN_set(sv, SvCUR(sv) + 1);
 302                 SvPV_renew(sv, SvLEN(sv));
 303         }
 304
 305         PL_bufptr = s;
 306         return s;
 307 }
 308
 309 static void S_check_prototype(pTHX_ const SV *declarator, SV *proto) {
 310         bool bad_proto = FALSE;
 311         bool in_brackets = FALSE;
 312         char greedy_proto = ' ';
 313         bool proto_after_greedy_proto = FALSE;
 314         bool must_be_last = FALSE;
 315         bool underscore = FALSE;
 316         bool seen_underscore = FALSE;
 317         const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
 318         char *d, *p;
 319         STRLEN tmp, tmplen;
 320
 321         /* strip spaces and check for bad characters */
 322         d = SvPV(proto, tmplen);
 323         tmp = 0;
 324         for (p = d; tmplen; tmplen--, ++p) {
 325                 if (!isSPACE(*p)) {
 326                         d[tmp++] = *p;
 327
 328                         if (warnillegalproto) {
 329                                 if (must_be_last) {
 330                                         proto_after_greedy_proto = TRUE;
 331                                 }
 332                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
 333                                         bad_proto = TRUE;
 334                                 } else {
 335                                         if (underscore) {
 336                                                 if (!strchr(";@%", *p)) {
 337                                                         bad_proto = TRUE;
 338                                                 }
 339                                                 underscore = FALSE;
 340                                         }
 341                                         if (*p == '[') {
 342                                                 in_brackets = TRUE;
 343                                         } else if (*p == ']') {
 344                                                 in_brackets = FALSE;
 345                                         } else if (
 346                                                 (*p == '@' || *p == '%') &&
 347                                                 (tmp < 2 || d[tmp - 2] != '\\') &&
 348                                                 !in_brackets
 349                                         ) {
 350                                                 must_be_last = TRUE;
 351                                                 greedy_proto = *p;
 352                                         } else if (*p == '_') {
 353                                                 underscore = seen_underscore = TRUE;
 354                                         }
 355                                 }
 356                         }
 357                 }
 358         }
 359         d[tmp] = '\0';
 360         SvCUR_set(proto, tmp);
 361         if (proto_after_greedy_proto) {
 362                 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
 363                         "In %"SVf": prototype after '%c': %s",
 364                          SVfARG(declarator), greedy_proto, d
 365                 );
 366         }
 367         if (bad_proto) {
 368                 SV *dsv = newSVpvs_flags("", SVs_TEMP);
 369                 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
 370                         "In %"SVf": illegal character %sin prototype: %s",
 371                         SVfARG(declarator),
 372                         seen_underscore ? "after '_' " : "",
 373                         SvUTF8(proto)
 374                                 ? sv_uni_display(dsv,
 375                                         newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
 376                                         tmp,
 377                                         UNI_DISPLAY_ISPRINT
 378                                 )
 379                                 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
 380                                         PERL_PV_ESCAPE_NONASCII
 381                                 )
 382                 );
 383         }
 384         SvCUR_set(proto, tmp);
 385 }
 386
 387 #undef CLINE
 388 /* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */