X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=709eef238f4b53e23f4bd37713476d63dd30d09d;hb=a365f2ce4defc0d7fecd4e9484f8f958454c9192;hp=374d480be7b3238fb7c424143fccbad0cd5edf82;hpb=32e6a07c84b153f78f946de50870bc0ee030624f;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regexec.c b/regexec.c index 374d480..709eef2 100644 --- a/regexec.c +++ b/regexec.c @@ -285,9 +285,8 @@ S_regcppop(pTHX_ const regexp *rex) * requiring null fields (pat.t#187 and split.t#{13,14} * (as of patchlevel 7877) will fail. Then again, * this code seems to be necessary or otherwise - * building DynaLoader will fail: - * "Error: '*' not in typemap in DynaLoader.xs, line 164" - * --jhi */ + * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/ + * --jhi updated by dapm */ for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) { if (i > PL_regsize) PL_regoffs[i].start = -1; @@ -308,7 +307,7 @@ S_regcppop(pTHX_ const regexp *rex) - pregexec - match a regexp against a string */ I32 -Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend, +Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend, char *strbeg, I32 minend, SV *screamer, U32 nosave) /* strend: pointer to null at end of string */ /* strbeg: real beginning of string */ @@ -372,8 +371,8 @@ Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *stren deleted from the finite automaton. */ char * -Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, - char *strend, U32 flags, re_scream_pos_data *data) +Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos, + char *strend, const U32 flags, re_scream_pos_data *data) { dVAR; register I32 start_shift = 0; @@ -1482,8 +1481,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, U8 **points; /* map of where we were in the input string when reading a given char. For ASCII this is unnecessary overhead as the relationship - is always 1:1, but for unicode, especially - case folded unicode this is not true. */ + is always 1:1, but for Unicode, especially + case folded Unicode this is not true. */ U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ]; U8 *bitmap=NULL; @@ -1705,7 +1704,7 @@ S_swap_match_buff (pTHX_ regexp *prog) { - regexec_flags - match a regexp against a string */ I32 -Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend, +Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend, char *strbeg, I32 minend, SV *sv, void *data, U32 flags) /* strend: pointer to null at end of string */ /* strbeg: real beginning of string */ @@ -1845,7 +1844,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * if (regtry(®info, &s)) goto got_it; after_try: - if (s >= end) + if (s > end) goto phooey; if (prog->extflags & RXf_USE_INTUIT) { s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); @@ -2267,13 +2266,12 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code. * Actually, the code in regcppop() (which Ilya may be meaning by * PL_reglastparen), is not needed at all by the test suite - * (op/regexp, op/pat, op/split), but that code is needed, oddly - * enough, for building DynaLoader, or otherwise this - * "Error: '*' not in typemap in DynaLoader.xs, line 164" - * will happen. Meanwhile, this code *is* needed for the + * (op/regexp, op/pat, op/split), but that code is needed otherwise + * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/ + * Meanwhile, this code *is* needed for the * above-mentioned test suite tests to succeed. The common theme * on those tests seems to be returning null fields from matches. - * --jhi */ + * --jhi updated by dapm */ #if 1 if (prog->nparens) { regexp_paren_pair *pp = PL_regoffs; @@ -4221,12 +4219,6 @@ NULL case BRANCH: /* /(...|A|...)/ */ scan = NEXTOPER(scan); /* scan now points to inner node */ - if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ)) - && !has_cutgroup) - { - /* last branch; skip state push and jump direct to node */ - continue; - } ST.lastparen = *PL_reglastparen; ST.next_branch = next; REGCP_SET(ST.cp); @@ -5006,29 +4998,22 @@ NULL #undef ST case FOLDCHAR: n = ARG(scan); - if (nextchr==n) { - locinput += UTF8SKIP(locinput); - - } else { - /* This malarky is to handle LATIN SMALL LETTER SHARP S - properly. Sigh */ - if (0xDF==n && (UTF||do_utf8) && - toLOWER(locinput[0])=='s' && toLOWER(locinput[1])=='s') - { - locinput += 2; - } else if (do_utf8) { - U8 tmpbuf1[UTF8_MAXBYTES_CASE+1]; - STRLEN tmplen1; - U8 tmpbuf2[UTF8_MAXBYTES_CASE+1]; - STRLEN tmplen2; - to_uni_fold(n, tmpbuf1, &tmplen1); - to_utf8_fold(locinput, tmpbuf2, &tmplen2); - if (tmplen1!=tmplen2 || !strnEQ(tmpbuf1,tmpbuf2,tmplen1)) + if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) { + locinput += ln; + } else if ( 0xDF == n && !do_utf8 && !UTF ) { + sayNO; + } else { + U8 folded[UTF8_MAXBYTES_CASE+1]; + STRLEN foldlen; + const char * const l = locinput; + char *e = PL_regeol; + to_uni_fold(n, folded, &foldlen); + + if (ibcmp_utf8((const char*) folded, 0, foldlen, 1, + l, &e, 0, do_utf8)) { sayNO; - else - locinput += UTF8SKIP(locinput); - } else - sayNO; + } + locinput = e; } nextchr = UCHARAT(locinput); break;