/* * This code was copied from perl/toke.c and subsequently butchered * by Lukas Mai (2012). */ /* vi: set ft=c: */ /* vvvvvvvvvvvvvvvvvvvvv I HAVE NO IDEA WHAT I'M DOING vvvvvvvvvvvvvvvvvvvv */ #define PL_linestr (PL_parser->linestr) #define PL_copline (PL_parser->copline) #define PL_bufptr (PL_parser->bufptr) #define PL_bufend (PL_parser->bufend) #define PL_multi_start (PL_parser->multi_start) #define PL_multi_open (PL_parser->multi_open) #define PL_multi_close (PL_parser->multi_close) #define PL_multi_end (PL_parser->multi_end) #define PL_rsfp (PL_parser->rsfp) #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) #ifdef USE_UTF8_SCRIPTS # define PARSING_UTF (!IN_BYTES) #else # define PARSING_UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) #endif static STRLEN S_scan_word(pTHX_ const char *start, int allow_package) { const char *s = start; for (;;) { if (isALNUM(*s) || (!PARSING_UTF && isALNUMC_L1(*s))) { /* UTF handled below */ s++; } else if (allow_package && s > start && *s == '\'' && isIDFIRST_lazy_if(s+1, PARSING_UTF)) { s++; } else if (allow_package && s[0] == ':' && s[1] == ':' && isIDFIRST_lazy_if(s+2, PARSING_UTF)) { s += 2; } else if (PARSING_UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { do { s += UTF8SKIP(s); } while (UTF8_IS_CONTINUED(*s) && is_utf8_mark((U8*)s)); } else { return s - start; } } } static char *S_scan_str(pTHX_ SV *sv, int keep_quoted, int keep_delims) { dVAR; char *start = PL_bufptr; const char *tmps; /* temp string, used for delimiter matching */ char *s = start; /* current position in the buffer */ char term; /* terminating character */ char *to; /* current position in the sv's data */ I32 brackets = 1; /* bracket nesting level */ bool has_utf8 = FALSE; /* is there any utf8 content? */ I32 termcode; /* terminating char. code */ U8 termstr[UTF8_MAXBYTES]; /* terminating string */ STRLEN termlen; /* length of terminating string */ int last_off = 0; /* last position for nesting bracket */ /* XXX ATTENTION: we don't skip whitespace! */ /* mark where we are, in case we need to report errors */ CLINE; /* after skipping whitespace, the next character is the terminator */ term = *s; if (!PARSING_UTF) { termcode = termstr[0] = term; termlen = 1; } else { termcode = IF_HAVE_PERL_5_16( utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen), utf8_to_uvchr((U8*)s, &termlen) ); Copy(s, termstr, termlen, U8); if (!UTF8_IS_INVARIANT(term)) has_utf8 = TRUE; } /* mark where we are */ PL_multi_start = CopLINE(PL_curcop); PL_multi_open = term; /* find corresponding closing delimiter */ if (term && (tmps = strchr("([{< )]}> )]}>",term))) termcode = termstr[0] = term = tmps[5]; PL_multi_close = term; { STRLEN dummy; SvPV_force(sv, dummy); sv_setpvs(sv, ""); SvGROW(sv, 80); } /* move past delimiter and try to read a complete string */ if (keep_delims) sv_catpvn(sv, s, termlen); s += termlen; for (;;) { if (PL_encoding && !PARSING_UTF) { bool cont = TRUE; while (cont) { int offset = s - SvPVX_const(PL_linestr); const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, &offset, (char*)termstr, termlen); const char * const ns = SvPVX_const(PL_linestr) + offset; char * const svlast = SvEND(sv) - 1; for (; s < ns; s++) { if (*s == '\n' && !PL_rsfp && IF_HAVE_PERL_5_16( !PL_parser->filtered, TRUE ) ) CopLINE_inc(PL_curcop); } if (!found) goto read_more_line; else { /* handle quoted delimiters */ if (SvCUR(sv) > 1 && *(svlast-1) == '\\') { const char *t; for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';) t--; if ((svlast-1 - t) % 2) { if (!keep_quoted) { *(svlast-1) = term; *svlast = '\0'; SvCUR_set(sv, SvCUR(sv) - 1); } continue; } } if (PL_multi_open == PL_multi_close) { cont = FALSE; } else { const char *t; char *w; for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) { /* At here, all closes are "was quoted" one, so we don't check PL_multi_close. */ if (*t == '\\') { if (!keep_quoted && *(t+1) == PL_multi_open) t++; else *w++ = *t++; } else if (*t == PL_multi_open) brackets++; *w = *t; } if (w < t) { *w++ = term; *w = '\0'; SvCUR_set(sv, w - SvPVX_const(sv)); } last_off = w - SvPVX(sv); if (--brackets <= 0) cont = FALSE; } } } if (!keep_delims) { SvCUR_set(sv, SvCUR(sv) - 1); *SvEND(sv) = '\0'; } break; } /* extend sv if need be */ SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); /* set 'to' to the next character in the sv's string */ to = SvPVX(sv)+SvCUR(sv); /* if open delimiter is the close delimiter read unbridle */ if (PL_multi_open == PL_multi_close) { for (; s < PL_bufend; s++,to++) { /* embedded newlines increment the current line number */ if (*s == '\n' && !PL_rsfp && IF_HAVE_PERL_5_16( !PL_parser->filtered, 1 ) ) CopLINE_inc(PL_curcop); /* handle quoted delimiters */ if (*s == '\\' && s+1 < PL_bufend && term != '\\') { if (!keep_quoted && s[1] == term) s++; /* any other quotes are simply copied straight through */ else *to++ = *s++; } /* terminate when run out of buffer (the for() condition), or have found the terminator */ else if (*s == term) { if (termlen == 1) break; if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) break; } else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && PARSING_UTF) has_utf8 = TRUE; *to = *s; } } /* if the terminator isn't the same as the start character (e.g., matched brackets), we have to allow more in the quoting, and be prepared for nested brackets. */ else { /* read until we run out of string, or we find the terminator */ for (; s < PL_bufend; s++,to++) { /* embedded newlines increment the line count */ if (*s == '\n' && !PL_rsfp && IF_HAVE_PERL_5_16( !PL_parser->filtered, 1 ) ) CopLINE_inc(PL_curcop); /* backslashes can escape the open or closing characters */ if (*s == '\\' && s+1 < PL_bufend) { if (!keep_quoted && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close))) s++; else *to++ = *s++; } /* allow nested opens and closes */ else if (*s == PL_multi_close && --brackets <= 0) break; else if (*s == PL_multi_open) brackets++; else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && PARSING_UTF) has_utf8 = TRUE; *to = *s; } } /* terminate the copied string and update the sv's end-of-string */ *to = '\0'; SvCUR_set(sv, to - SvPVX_const(sv)); /* * this next chunk reads more into the buffer if we're not done yet */ if (s < PL_bufend) break; /* handle case where we are done yet :-) */ #ifndef PERL_STRICT_CR if (to - SvPVX_const(sv) >= 2) { if ((to[-2] == '\r' && to[-1] == '\n') || (to[-2] == '\n' && to[-1] == '\r')) { to[-2] = '\n'; to--; SvCUR_set(sv, to - SvPVX_const(sv)); } else if (to[-1] == '\r') to[-1] = '\n'; } else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') to[-1] = '\n'; #endif read_more_line: /* if we're out of file, or a read fails, bail and reset the current line marker so we can report where the unterminated string began */ CopLINE_inc(PL_curcop); PL_bufptr = PL_bufend; if (!lex_next_chunk(0)) { CopLINE_set(PL_curcop, (line_t)PL_multi_start); return NULL; } s = PL_bufptr; } /* at this point, we have successfully read the delimited string */ if (!PL_encoding || PARSING_UTF) { if (keep_delims) sv_catpvn(sv, s, termlen); s += termlen; } if (has_utf8 || PL_encoding) SvUTF8_on(sv); PL_multi_end = CopLINE(PL_curcop); /* if we allocated too much space, give some back */ if (SvCUR(sv) + 5 < SvLEN(sv)) { SvLEN_set(sv, SvCUR(sv) + 1); SvPV_renew(sv, SvLEN(sv)); } PL_bufptr = s; return s; } static void S_check_prototype(pTHX_ const SV *declarator, SV *proto) { bool bad_proto = FALSE; bool in_brackets = FALSE; char greedy_proto = ' '; bool proto_after_greedy_proto = FALSE; bool must_be_last = FALSE; bool underscore = FALSE; bool seen_underscore = FALSE; const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); char *d, *p; STRLEN tmp, tmplen; /* strip spaces and check for bad characters */ d = SvPV(proto, tmplen); tmp = 0; for (p = d; tmplen; tmplen--, ++p) { if (!isSPACE(*p)) { d[tmp++] = *p; if (warnillegalproto) { if (must_be_last) { proto_after_greedy_proto = TRUE; } if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { bad_proto = TRUE; } else { if (underscore) { if (!strchr(";@%", *p)) { bad_proto = TRUE; } underscore = FALSE; } if (*p == '[') { in_brackets = TRUE; } else if (*p == ']') { in_brackets = FALSE; } else if ( (*p == '@' || *p == '%') && (tmp < 2 || d[tmp - 2] != '\\') && !in_brackets ) { must_be_last = TRUE; greedy_proto = *p; } else if (*p == '_') { underscore = seen_underscore = TRUE; } } } } } d[tmp] = '\0'; SvCUR_set(proto, tmp); if (proto_after_greedy_proto) { Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), "In %"SVf": prototype after '%c': %s", SVfARG(declarator), greedy_proto, d ); } if (bad_proto) { SV *dsv = newSVpvs_flags("", SVs_TEMP); Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), "In %"SVf": illegal character %sin prototype: %s", SVfARG(declarator), seen_underscore ? "after '_' " : "", SvUTF8(proto) ? sv_uni_display(dsv, newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8), tmp, UNI_DISPLAY_ISPRINT ) : pv_pretty(dsv, d, tmp, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII ) ); } SvCUR_set(proto, tmp); } #undef CLINE /* ^^^^^^^^^^^^^^^^^^^^^ I HAVE NO IDEA WHAT I'M DOING ^^^^^^^^^^^^^^^^^^^^ */