From: Jarkko Hietaniemi Date: Sat, 13 Nov 1999 19:43:37 +0000 (+0000) Subject: Integrate with Sarathy. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=374d691e102e2975cdf0ae9698b9f6e2ad73d91c;p=p5sagit%2Fp5-mst-13.2.git Integrate with Sarathy. p4raw-id: //depot/cfgperl@4576 --- 374d691e102e2975cdf0ae9698b9f6e2ad73d91c diff --cc regexec.c index fa891c8,fa891c8..e3f0cb4 --- a/regexec.c +++ b/regexec.c @@@ -254,6 -254,6 +254,9 @@@ S_cache_re(pTHX_ regexp *prog PL_reg_re = prog; } ++static char *find_byclass(regexp * prog, regnode *c, char *s, char *strend, ++ char *startpos, I32 norun); ++ /* * Need to implement the following flags for reg_anch: * @@@ -275,6 -275,6 +278,13 @@@ /* XXXX We assume that strpos is strbeg unless sv. */ ++/* XXXX Some places assume that there is a fixed substring. ++ An update may be needed if optimizer marks as "INTUITable" ++ RExen without fixed substrings. Similarly, it is assumed that ++ lengths of all the strings are no more than minlen, thus they ++ cannot come from lookahead. ++ (Or minlen should take into account lookahead.) */ ++ /* A failure to find a constant substring means that there is no need to make an expensive call to REx engine, thus we celebrate a failure. Similarly, finding a substring too deep into the string means that less calls to @@@ -285,10 -285,10 +295,14 @@@ b) Fixed substring; c) Whether we are anchored (beginning-of-line or \G); d) First node (of those at offset 0) which may distingush positions; -- We use 'a', 'b', multiline-part of 'c', and try to find a position in the ++ We use a)b)d) and multiline-part of c), and try to find a position in the string which does not contradict any of them. */ ++/* Most of decisions we do here should have been done at compile time. ++ The nodes of the REx which we used for the search should have been ++ deleted from the finite automaton. */ ++ char * Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *strend, U32 flags, re_scream_pos_data *data) @@@ -301,7 -301,7 +315,8 @@@ char *t; I32 ml_anch; char *tmp; -- register char *other_last = Nullch; ++ register char *other_last = Nullch; /* other substr checked before this */ ++ char *check_at; /* check substr found at this pos */ #ifdef DEBUGGING char *i_strpos = strpos; #endif @@@ -432,6 -432,6 +447,8 @@@ if (!s) goto fail_finish; ++ check_at = s; ++ /* Finish the diagnostic message */ DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) ); @@@ -447,7 -447,7 +464,7 @@@ /* Take into account the "other" substring. */ /* XXXX May be hopelessly wrong for UTF... */ if (!other_last) -- other_last = strpos - 1; ++ other_last = strpos; if (check == prog->float_substr) { do_other_anchored: { @@@ -465,8 -465,8 +482,8 @@@ else t = strpos; t += prog->anchored_offset; -- if (t <= other_last) -- t = other_last + 1; ++ if (t < other_last) /* These positions already checked */ ++ t = other_last; PL_bostr = tmp; last2 = last1 = strend - prog->minlen; if (last < last1) @@@ -495,7 -495,7 +512,7 @@@ ", trying floating at offset %ld...\n", (long)(s1 + 1 - i_strpos))); PL_regeol = strend; /* Used in HOP() */ -- other_last = last1 + prog->anchored_offset; ++ other_last = last1 + prog->anchored_offset + 1; s = HOPc(last, 1); goto restart; } @@@ -503,7 -503,7 +520,7 @@@ DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", (long)(s - i_strpos))); t = s - prog->anchored_offset; -- other_last = s - 1; ++ other_last = s + 1; s = s1; if (t == strpos) goto try_at_start; @@@ -520,8 -520,8 +537,8 @@@ if (last - t > prog->float_max_offset) last = t + prog->float_max_offset; s = t + prog->float_min_offset; -- if (s <= other_last) -- s = other_last + 1; ++ if (s < other_last) ++ s = other_last; /* XXXX It is not documented what units *_offsets are in. Assume bytes. */ /* fbm_instr() takes into account exact value of end-of-str if the check is SvTAIL(ed). Since false positives are OK, @@@ -546,7 -546,7 +563,7 @@@ DEBUG_r(PerlIO_printf(Perl_debug_log, ", trying anchored starting at offset %ld...\n", (long)(s1 + 1 - i_strpos))); -- other_last = last; ++ other_last = last + 1; PL_regeol = strend; /* Used in HOP() */ s = HOPc(t, 1); goto restart; @@@ -554,7 -554,7 +571,7 @@@ else { DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", (long)(s - i_strpos))); -- other_last = s - 1; ++ other_last = s + 1; s = s1; if (t == strpos) goto try_at_start; @@@ -652,6 -652,6 +669,72 @@@ s = strpos; } ++ /* Last resort... */ ++ /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */ ++ if (prog->regstclass) { ++ /* minlen == 0 is possible if regstclass is \b or \B, ++ and the fixed substr is ''$. ++ Since minlen is already taken into account, s+1 is before strend; ++ accidentally, minlen >= 1 guaranties no false positives at s + 1 ++ even for \b or \B. But (minlen? 1 : 0) below assumes that ++ regstclass does not come from lookahead... */ ++ /* If regstclass takes bytelength more than 1: If charlength==1, OK. ++ This leaves EXACTF only, which is dealt with in find_byclass(). */ ++ char *endpos = (prog->anchored_substr || ml_anch) ++ ? s + (prog->minlen? 1 : 0) ++ : (prog->float_substr ? check_at - start_shift + 1 ++ : strend) ; ++ char *startpos = sv ? strend - SvCUR(sv) : s; ++ ++ t = s; ++ s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1); ++ if (!s) { ++#ifdef DEBUGGING ++ char *what; ++#endif ++ if (endpos == strend) { ++ DEBUG_r( PerlIO_printf(Perl_debug_log, ++ "Could not match STCLASS...\n") ); ++ goto fail; ++ } ++ /* Contradict one of substrings */ ++ if (prog->anchored_substr) { ++ DEBUG_r( PerlIO_printf(Perl_debug_log, ++ "This position contradicts STCLASS...\n") ); ++ if (prog->anchored_substr == check) { ++ DEBUG_r( what = "anchored" ); ++ hop_and_restart: ++ PL_regeol = strend; /* Used in HOP() */ ++ s = HOPc(t, 1); ++ DEBUG_r( PerlIO_printf(Perl_debug_log, ++ "trying %s substr starting at offset %ld...\n", ++ what, (long)(s + start_shift - i_strpos)) ); ++ goto restart; ++ } ++ /* Have both, check is floating */ ++ if (t + start_shift >= check_at) /* Contradicts floating=check */ ++ goto retry_floating_check; ++ /* Recheck anchored substring, but not floating... */ ++ s = check_at; ++ DEBUG_r( PerlIO_printf(Perl_debug_log, ++ "trying anchored substr starting at offset %ld...\n", ++ (long)(other_last - i_strpos)) ); ++ goto do_other_anchored; ++ } ++ /* Check is floating subtring. */ ++ retry_floating_check: ++ t = check_at - start_shift; ++ DEBUG_r( what = "floating" ); ++ goto hop_and_restart; ++ } ++ DEBUG_r( if (t != s) ++ PerlIO_printf(Perl_debug_log, ++ "By STCLASS: moving %ld --> %ld\n", ++ (long)(t - i_strpos), (long)(s - i_strpos)); ++ else ++ PerlIO_printf(Perl_debug_log, ++ "Does not contradict STCLASS...\n") ); ++ } DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n", PL_colors[4], PL_colors[5], (long)(s - i_strpos)) ); return s; @@@ -983,7 -983,7 +1066,7 @@@ Perl_regexec_flags(pTHX_ register regex && (ln == 1 || (OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) -- && regtry(prog, s) ) ++ && (norun || regtry(prog, s)) ) goto got_it; s++; } @@@ -993,7 -993,7 +1076,7 @@@ && (ln == 1 || (OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) -- && regtry(prog, s) ) ++ && (norun || regtry(prog, s)) ) goto got_it; s++; } @@@ -1003,32 -1003,32 +1086,24 @@@ PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUND: -- if (minlen) { -- dontbother++; -- strend -= 1; -- } -- tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev; ++ tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) { tmp = !tmp; -- if (regtry(prog, s)) ++ if ((norun || regtry(prog, s))) goto got_it; } s++; } -- if ((minlen || tmp) && regtry(prog,s)) ++ if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) goto got_it; break; case BOUNDLUTF8: PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUNDUTF8: -- if (minlen) { -- dontbother++; -- strend = reghop_c(strend, -1); -- } -- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev; ++ tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n'; tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUND ? @@@ -1036,60 -1036,60 +1111,54 @@@ isALNUM_LC_utf8((U8*)s))) { tmp = !tmp; -- if (regtry(prog, s)) ++ if ((norun || regtry(prog, s))) goto got_it; } s += UTF8SKIP(s); } -- if ((minlen || tmp) && regtry(prog,s)) ++ if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) goto got_it; break; case NBOUNDL: PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUND: -- if (minlen) { -- dontbother++; -- strend -= 1; -- } -- tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev; ++ tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s))) tmp = !tmp; -- else if (regtry(prog, s)) ++ else if ((norun || regtry(prog, s))) goto got_it; s++; } -- if ((minlen || !tmp) && regtry(prog,s)) ++ if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) goto got_it; break; case NBOUNDLUTF8: PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUNDUTF8: -- if (minlen) { -- dontbother++; ++ if (prog->minlen) strend = reghop_c(strend, -1); -- } -- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev; ++ tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n'; tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUND ? swash_fetch(PL_utf8_alnum, (U8*)s) : isALNUM_LC_utf8((U8*)s))) tmp = !tmp; -- else if (regtry(prog, s)) ++ else if ((norun || regtry(prog, s))) goto got_it; s += UTF8SKIP(s); } -- if ((minlen || !tmp) && regtry(prog,s)) ++ if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) goto got_it; break; case ALNUM: while (s < strend) { if (isALNUM(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1102,7 -1102,7 +1171,7 @@@ case ALNUMUTF8: while (s < strend) { if (swash_fetch(PL_utf8_alnum, (U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1116,7 -1116,7 +1185,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (isALNUM_LC(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1130,7 -1130,7 +1199,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (isALNUM_LC_utf8((U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1143,7 -1143,7 +1212,7 @@@ case NALNUM: while (s < strend) { if (!isALNUM(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1156,7 -1156,7 +1225,7 @@@ case NALNUMUTF8: while (s < strend) { if (!swash_fetch(PL_utf8_alnum, (U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1170,7 -1170,7 +1239,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (!isALNUM_LC(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1184,7 -1184,7 +1253,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (!isALNUM_LC_utf8((U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1197,7 -1197,7 +1266,7 @@@ case SPACE: while (s < strend) { if (isSPACE(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1210,7 -1210,7 +1279,7 @@@ case SPACEUTF8: while (s < strend) { if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1224,7 -1224,7 +1293,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (isSPACE_LC(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1238,7 -1238,7 +1307,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1251,7 -1251,7 +1320,7 @@@ case NSPACE: while (s < strend) { if (!isSPACE(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1264,7 -1264,7 +1333,7 @@@ case NSPACEUTF8: while (s < strend) { if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1278,7 -1278,7 +1347,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (!isSPACE_LC(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1292,7 -1292,7 +1361,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1305,7 -1305,7 +1374,7 @@@ case DIGIT: while (s < strend) { if (isDIGIT(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1318,7 -1318,7 +1387,7 @@@ case DIGITUTF8: while (s < strend) { if (swash_fetch(PL_utf8_digit,(U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1332,7 -1332,7 +1401,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (isDIGIT_LC(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1346,7 -1346,7 +1415,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (isDIGIT_LC_utf8((U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1359,7 -1359,7 +1428,7 @@@ case NDIGIT: while (s < strend) { if (!isDIGIT(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1372,7 -1372,7 +1441,7 @@@ case NDIGITUTF8: while (s < strend) { if (!swash_fetch(PL_utf8_digit,(U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1386,7 -1386,7 +1455,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (!isDIGIT_LC(*s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1400,7 -1400,7 +1469,7 @@@ PL_reg_flags |= RF_tainted; while (s < strend) { if (!isDIGIT_LC_utf8((U8*)s)) { -- if (tmp && regtry(prog, s)) ++ if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@@ -1414,6 -1414,6 +1483,270 @@@ Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c)); break; } ++ return 0; ++ got_it: ++ return s; ++} ++ ++/* ++ - regexec_flags - match a regexp against a string ++ */ ++I32 ++Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend, ++ char *strbeg, I32 minend, SV *sv, void *data, U32 flags) ++/* strend: pointer to null at end of string */ ++/* strbeg: real beginning of string */ ++/* minend: end of match must be >=minend after stringarg. */ ++/* data: May be used for some additional optimizations. */ ++/* nosave: For optimizations. */ ++{ ++ dTHR; ++ register char *s; ++ register regnode *c; ++ register char *startpos = stringarg; ++ register I32 tmp; ++ I32 minlen; /* must match at least this many chars */ ++ I32 dontbother = 0; /* how many characters not to try at end */ ++ I32 start_shift = 0; /* Offset of the start to find ++ constant substr. */ /* CC */ ++ I32 end_shift = 0; /* Same for the end. */ /* CC */ ++ I32 scream_pos = -1; /* Internal iterator of scream. */ ++ char *scream_olds; ++ SV* oreplsv = GvSV(PL_replgv); ++ ++ PL_regcc = 0; ++ ++ cache_re(prog); ++#ifdef DEBUGGING ++ PL_regnarrate = PL_debug & 512; ++#endif ++ ++ /* Be paranoid... */ ++ if (prog == NULL || startpos == NULL) { ++ Perl_croak(aTHX_ "NULL regexp parameter"); ++ return 0; ++ } ++ ++ minlen = prog->minlen; ++ if (strend - startpos < minlen) goto phooey; ++ ++ if (startpos == strbeg) /* is ^ valid at stringarg? */ ++ PL_regprev = '\n'; ++ else { ++ PL_regprev = (U32)stringarg[-1]; ++ if (!PL_multiline && PL_regprev == '\n') ++ PL_regprev = '\0'; /* force ^ to NOT match */ ++ } ++ ++ /* Check validity of program. */ ++ if (UCHARAT(prog->program) != REG_MAGIC) { ++ Perl_croak(aTHX_ "corrupted regexp program"); ++ } ++ ++ PL_reg_flags = 0; ++ PL_reg_eval_set = 0; ++ PL_reg_maxiter = 0; ++ ++ if (prog->reganch & ROPT_UTF8) ++ PL_reg_flags |= RF_utf8; ++ ++ /* Mark beginning of line for ^ and lookbehind. */ ++ PL_regbol = startpos; ++ PL_bostr = strbeg; ++ PL_reg_sv = sv; ++ ++ /* Mark end of line for $ (and such) */ ++ PL_regeol = strend; ++ ++ /* see how far we have to get to not match where we matched before */ ++ PL_regtill = startpos+minend; ++ ++ /* We start without call_cc context. */ ++ PL_reg_call_cc = 0; ++ ++ /* If there is a "must appear" string, look for it. */ ++ s = startpos; ++ ++ if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */ ++ MAGIC *mg; ++ ++ if (flags & REXEC_IGNOREPOS) /* Means: check only at start */ ++ PL_reg_ganch = startpos; ++ else if (sv && SvTYPE(sv) >= SVt_PVMG ++ && SvMAGIC(sv) ++ && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) { ++ PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */ ++ if (prog->reganch & ROPT_ANCH_GPOS) { ++ if (s > PL_reg_ganch) ++ goto phooey; ++ s = PL_reg_ganch; ++ } ++ } ++ else /* pos() not defined */ ++ PL_reg_ganch = strbeg; ++ } ++ ++ if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) { ++ re_scream_pos_data d; ++ ++ d.scream_olds = &scream_olds; ++ d.scream_pos = &scream_pos; ++ s = re_intuit_start(prog, sv, s, strend, flags, &d); ++ if (!s) ++ goto phooey; /* not present */ ++ } ++ ++ DEBUG_r( if (!PL_colorset) reginitcolors() ); ++ DEBUG_r(PerlIO_printf(Perl_debug_log, ++ "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n", ++ PL_colors[4],PL_colors[5],PL_colors[0], ++ prog->precomp, ++ PL_colors[1], ++ (strlen(prog->precomp) > 60 ? "..." : ""), ++ PL_colors[0], ++ (strend - startpos > 60 ? 60 : strend - startpos), ++ startpos, PL_colors[1], ++ (strend - startpos > 60 ? "..." : "")) ++ ); ++ ++ /* Simplest case: anchored match need be tried only once. */ ++ /* [unless only anchor is BOL and multiline is set] */ ++ if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { ++ if (s == startpos && regtry(prog, startpos)) ++ goto got_it; ++ else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT) ++ || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ ++ { ++ char *end; ++ ++ if (minlen) ++ dontbother = minlen - 1; ++ end = HOPc(strend, -dontbother) - 1; ++ /* for multiline we only have to try after newlines */ ++ if (prog->check_substr) { ++ if (s == startpos) ++ goto after_try; ++ while (1) { ++ if (regtry(prog, s)) ++ goto got_it; ++ after_try: ++ if (s >= end) ++ goto phooey; ++ s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); ++ if (!s) ++ goto phooey; ++ } ++ } else { ++ if (s > startpos) ++ s--; ++ while (s < end) { ++ if (*s++ == '\n') { /* don't need PL_utf8skip here */ ++ if (regtry(prog, s)) ++ goto got_it; ++ } ++ } ++ } ++ } ++ goto phooey; ++ } else if (prog->reganch & ROPT_ANCH_GPOS) { ++ if (regtry(prog, PL_reg_ganch)) ++ goto got_it; ++ goto phooey; ++ } ++ ++ /* Messy cases: unanchored match. */ ++ if (prog->anchored_substr && prog->reganch & ROPT_SKIP) { ++ /* we have /x+whatever/ */ ++ /* it must be a one character string (XXXX Except UTF?) */ ++ char ch = SvPVX(prog->anchored_substr)[0]; ++ if (UTF) { ++ while (s < strend) { ++ if (*s == ch) { ++ if (regtry(prog, s)) goto got_it; ++ s += UTF8SKIP(s); ++ while (s < strend && *s == ch) ++ s += UTF8SKIP(s); ++ } ++ s += UTF8SKIP(s); ++ } ++ } ++ else { ++ while (s < strend) { ++ if (*s == ch) { ++ if (regtry(prog, s)) goto got_it; ++ s++; ++ while (s < strend && *s == ch) ++ s++; ++ } ++ s++; ++ } ++ } ++ } ++ /*SUPPRESS 560*/ ++ else if (prog->anchored_substr != Nullsv ++ || (prog->float_substr != Nullsv ++ && prog->float_max_offset < strend - s)) { ++ SV *must = prog->anchored_substr ++ ? prog->anchored_substr : prog->float_substr; ++ I32 back_max = ++ prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset; ++ I32 back_min = ++ prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset; ++ I32 delta = back_max - back_min; ++ char *last = HOPc(strend, /* Cannot start after this */ ++ -(I32)(CHR_SVLEN(must) ++ - (SvTAIL(must) != 0) + back_min)); ++ char *last1; /* Last position checked before */ ++ ++ if (s > PL_bostr) ++ last1 = HOPc(s, -1); ++ else ++ last1 = s - 1; /* bogus */ ++ ++ /* XXXX check_substr already used to find `s', can optimize if ++ check_substr==must. */ ++ scream_pos = -1; ++ dontbother = end_shift; ++ strend = HOPc(strend, -dontbother); ++ while ( (s <= last) && ++ ((flags & REXEC_SCREAM) ++ ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg, ++ end_shift, &scream_pos, 0)) ++ : (s = fbm_instr((unsigned char*)HOP(s, back_min), ++ (unsigned char*)strend, must, ++ PL_multiline ? FBMrf_MULTILINE : 0))) ) { ++ if (HOPc(s, -back_max) > last1) { ++ last1 = HOPc(s, -back_min); ++ s = HOPc(s, -back_max); ++ } ++ else { ++ char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1; ++ ++ last1 = HOPc(s, -back_min); ++ s = t; ++ } ++ if (UTF) { ++ while (s <= last1) { ++ if (regtry(prog, s)) ++ goto got_it; ++ s += UTF8SKIP(s); ++ } ++ } ++ else { ++ while (s <= last1) { ++ if (regtry(prog, s)) ++ goto got_it; ++ s++; ++ } ++ } ++ } ++ goto phooey; ++ } ++ else if (c = prog->regstclass) { ++ if (minlen) /* don't bother with what can't match */ ++ strend = HOPc(strend, -(minlen - 1)); ++ if (find_byclass(prog, c, s, strend, startpos, 0)) ++ goto got_it; } else { dontbother = 0; diff --cc t/op/re_tests index d72a0f7,d72a0f7..f866385 --- a/t/op/re_tests +++ b/t/op/re_tests @@@ -742,3 -742,3 +742,5 @@@ tt+$ xxxtt y - ([[:digit:]-z]+) =0-z= y $1 0-z ([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z \GX.*X aaaXbX n - - ++(\d+\.\d+) 3.1415926 y $1 3.1415926 ++(\ba.{0,10}br) have a web browser y $1 a web br