From: Ilya Zakharevich Date: Wed, 9 Jun 1999 18:14:27 +0000 (-0400) Subject: applied slightly tweaked version of suggested patch for X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f722798beaa437498e1f879b01996be44f2fdc1a;p=p5sagit%2Fp5-mst-13.2.git applied slightly tweaked version of suggested patch for improved RE API Message-Id: <199906092214.SAA14126@monk.mps.ohio-state.edu> Subject: [PATCH 5.005_57] REx engine rehash p4raw-id: //depot/perl@3606 --- diff --git a/Changes b/Changes index c1b80ca..87d97f4 100644 --- a/Changes +++ b/Changes @@ -79,6 +79,99 @@ Version 5.005_58 Development release working toward 5.006 ---------------- ____________________________________________________________________________ +[ 3604] By: gsar on 1999/07/06 07:08:30 + Log: From: paul.marquess@bt.com + Date: Tue, 8 Jun 1999 22:37:58 +0100 + Message-ID: <5104D4DBC598D211B5FE0000F8FE7EB29C6C3C@mbtlipnt02.btlabs.bt.co.uk> + Subject: [PATCH 5.005_57] DB_File 1.67 + Branch: perl + ! ext/DB_File/Changes ext/DB_File/DB_File.pm + ! ext/DB_File/DB_File.xs ext/DB_File/typemap +____________________________________________________________________________ +[ 3603] By: gsar on 1999/07/06 07:04:50 + Log: From: paul.marquess@bt.com + Date: Tue, 8 Jun 1999 22:34:01 +0100 + Message-ID: <5104D4DBC598D211B5FE0000F8FE7EB29C6C3B@mbtlipnt02.btlabs.bt.co.uk> + Subject: [PATCH 5.005_57] DBM Filters + Branch: perl + ! ext/GDBM_File/GDBM_File.pm ext/GDBM_File/GDBM_File.xs + ! ext/NDBM_File/NDBM_File.pm ext/NDBM_File/NDBM_File.xs + ! ext/ODBM_File/ODBM_File.pm ext/ODBM_File/ODBM_File.xs + ! ext/SDBM_File/SDBM_File.pm ext/SDBM_File/SDBM_File.xs +____________________________________________________________________________ +[ 3602] By: gsar on 1999/07/06 07:00:01 + Log: slightly tweaked version of suggested patch + From: Dan Sugalski + Date: Tue, 08 Jun 1999 14:09:38 -0700 + Message-Id: <3.0.6.32.19990608140938.030f12e0@ous.edu> + Subject: [PATCH 5.005_57]Use NV instead of double in the core + Branch: perl + ! av.h bytecode.pl cv.h doio.c dump.c embed.pl + ! ext/ByteLoader/bytecode.h ext/ByteLoader/byterun.c hv.h + ! intrpvar.h mg.c op.c perl.h pp.c pp.h pp_ctl.c pp_sys.c + ! proto.h sv.c sv.h toke.c universal.c util.c +____________________________________________________________________________ +[ 3601] By: gsar on 1999/07/06 06:52:57 + Log: integrate cfgperl contents into mainline + Branch: perl + +> README.epoc epoc/config.h epoc/epoc.c epoc/epocish.h + +> epoc/perl.mmp epoc/perl.pkg + !> (integrate 30 files) +____________________________________________________________________________ +[ 3598] By: jhi on 1999/07/05 20:02:55 + Log: Integrate with mainperl. + Branch: cfgperl + +> lib/CGI/Pretty.pm + !> Changes ext/B/B/Bblock.pm ext/B/B/C.pm ext/B/B/CC.pm + !> ext/B/B/Stackobj.pm ext/GDBM_File/GDBM_File.xs mg.c op.c + !> opcode.h opcode.pl pp_sys.c t/lib/io_udp.t thread.h toke.c + !> vms/descrip_mms.template vms/subconfigure.com vms/vms.c + !> vms/vmsish.h +____________________________________________________________________________ +[ 3597] By: jhi on 1999/07/05 19:59:48 + Log: Hack SOCKS support some more plus a patch from Andy Dougherty + that addresses the notorious "Additional libraries" question. + Branch: cfgperl + ! Configure Porting/Glossary Porting/config.sh Porting/config_H + ! config_h.SH doio.c ext/Socket/Socket.xs hints/aix.sh perl.c + ! pp_sys.c +____________________________________________________________________________ +[ 3596] By: gsar on 1999/07/05 18:30:51 + Log: From: Ilya Zakharevich + Date: Tue, 8 Jun 1999 04:47:58 -0400 (EDT) + Message-Id: <199906080847.EAA03810@monk.mps.ohio-state.edu> + Subject: [PATCH 5.00557] Long-standing UDP sockets bug on OS/2 + Branch: perl + ! pp_sys.c t/lib/io_udp.t +____________________________________________________________________________ +[ 3595] By: gsar on 1999/07/05 18:29:08 + Log: From: Ilya Zakharevich + Date: Tue, 8 Jun 1999 04:44:58 -0400 (EDT) + Message-Id: <199906080844.EAA03784@monk.mps.ohio-state.edu> + Subject: [PATCH 5.00557] Setting $^E wipes out $! + Branch: perl + ! mg.c +____________________________________________________________________________ +[ 3594] By: gsar on 1999/07/05 18:24:53 + Log: hand-apply whitespace mutiliated patch + From: Dan Sugalski + Date: Mon, 07 Jun 1999 14:46:42 -0700 + Message-Id: <3.0.6.32.19990607144642.03079100@ous.edu> + Subject: [PATCH 5.005_57]Updated VMS patch + Branch: perl + ! thread.h vms/descrip_mms.template vms/subconfigure.com + ! vms/vms.c vms/vmsish.h +____________________________________________________________________________ +[ 3593] By: gsar on 1999/07/05 17:53:04 + Log: applied parts not duplicated by previous patches + From: "Vishal Bhatia" + Date: Sat, 05 Jun 1999 08:42:17 -0700 + Message-ID: + Subject: Fwd: [PATCH 5.005_57] consolidated compiler changes + Branch: perl + ! Changes ext/B/B/Bblock.pm ext/B/B/C.pm ext/B/B/CC.pm + ! ext/B/B/Stackobj.pm +____________________________________________________________________________ [ 3592] By: jhi on 1999/07/05 17:17:22 Log: AIX threaded build, plus few more on the side. Branch: cfgperl diff --git a/dump.c b/dump.c index 9c7d3a9..12d318d 100644 --- a/dump.c +++ b/dump.c @@ -15,6 +15,7 @@ #include "EXTERN.h" #define PERL_IN_DUMP_C #include "perl.h" +#include "regcomp.h" #ifndef DBL_DIG #define DBL_DIG 15 /* A guess that works lots of places */ diff --git a/embed.h b/embed.h index d0ede0b..0871c6f 100644 --- a/embed.h +++ b/embed.h @@ -448,6 +448,8 @@ #define pregexec Perl_pregexec #define pregfree Perl_pregfree #define pregcomp Perl_pregcomp +#define re_intuit_start Perl_re_intuit_start +#define re_intuit_string Perl_re_intuit_string #define regexec_flags Perl_regexec_flags #define regnext Perl_regnext #define regprop Perl_regprop @@ -1762,6 +1764,8 @@ #define pregexec(a,b,c,d,e,f,g) Perl_pregexec(aTHX_ a,b,c,d,e,f,g) #define pregfree(a) Perl_pregfree(aTHX_ a) #define pregcomp(a,b,c) Perl_pregcomp(aTHX_ a,b,c) +#define re_intuit_start(a,b,c,d,e,f) Perl_re_intuit_start(aTHX_ a,b,c,d,e,f) +#define re_intuit_string(a) Perl_re_intuit_string(aTHX_ a) #define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h) #define regnext(a) Perl_regnext(aTHX_ a) #define regprop(a,b) Perl_regprop(aTHX_ a,b) @@ -3486,6 +3490,10 @@ #define pregfree Perl_pregfree #define Perl_pregcomp CPerlObj::Perl_pregcomp #define pregcomp Perl_pregcomp +#define Perl_re_intuit_start CPerlObj::Perl_re_intuit_start +#define re_intuit_start Perl_re_intuit_start +#define Perl_re_intuit_string CPerlObj::Perl_re_intuit_string +#define re_intuit_string Perl_re_intuit_string #define Perl_regexec_flags CPerlObj::Perl_regexec_flags #define regexec_flags Perl_regexec_flags #define Perl_regnext CPerlObj::Perl_regnext diff --git a/embed.pl b/embed.pl index ad91f80..ed7f3e4 100755 --- a/embed.pl +++ b/embed.pl @@ -1210,6 +1210,10 @@ p |I32 |pregexec |regexp* prog|char* stringarg \ |SV* screamer|U32 nosave p |void |pregfree |struct regexp* r p |regexp*|pregcomp |char* exp|char* xend|PMOP* pm +p |char* |re_intuit_start|regexp* prog|SV* sv|char* strpos \ + |char* strend|U32 flags \ + |struct re_scream_pos_data_s *data +p |SV* |re_intuit_string|regexp* prog p |I32 |regexec_flags |regexp* prog|char* stringarg \ |char* strend|char* strbeg|I32 minend \ |SV* screamer|void* data|U32 flags diff --git a/embedvar.h b/embedvar.h index dbd94e9..f759b63 100644 --- a/embedvar.h +++ b/embedvar.h @@ -85,8 +85,11 @@ #define PL_regeol (my_perl->Tregeol) #define PL_regexecp (my_perl->Tregexecp) #define PL_regflags (my_perl->Tregflags) +#define PL_regfree (my_perl->Tregfree) #define PL_regindent (my_perl->Tregindent) #define PL_reginput (my_perl->Treginput) +#define PL_regint_start (my_perl->Tregint_start) +#define PL_regint_string (my_perl->Tregint_string) #define PL_reginterp_cnt (my_perl->Treginterp_cnt) #define PL_reglastparen (my_perl->Treglastparen) #define PL_regnarrate (my_perl->Tregnarrate) @@ -212,8 +215,11 @@ #define PL_regeol (PL_curinterp->Tregeol) #define PL_regexecp (PL_curinterp->Tregexecp) #define PL_regflags (PL_curinterp->Tregflags) +#define PL_regfree (PL_curinterp->Tregfree) #define PL_regindent (PL_curinterp->Tregindent) #define PL_reginput (PL_curinterp->Treginput) +#define PL_regint_start (PL_curinterp->Tregint_start) +#define PL_regint_string (PL_curinterp->Tregint_string) #define PL_reginterp_cnt (PL_curinterp->Treginterp_cnt) #define PL_reglastparen (PL_curinterp->Treglastparen) #define PL_regnarrate (PL_curinterp->Tregnarrate) @@ -854,8 +860,11 @@ #define PL_Tregeol PL_regeol #define PL_Tregexecp PL_regexecp #define PL_Tregflags PL_regflags +#define PL_Tregfree PL_regfree #define PL_Tregindent PL_regindent #define PL_Treginput PL_reginput +#define PL_Tregint_start PL_regint_start +#define PL_Tregint_string PL_regint_string #define PL_Treginterp_cnt PL_reginterp_cnt #define PL_Treglastparen PL_reglastparen #define PL_Tregnarrate PL_regnarrate @@ -992,8 +1001,11 @@ #define PL_regeol (thr->Tregeol) #define PL_regexecp (thr->Tregexecp) #define PL_regflags (thr->Tregflags) +#define PL_regfree (thr->Tregfree) #define PL_regindent (thr->Tregindent) #define PL_reginput (thr->Treginput) +#define PL_regint_start (thr->Tregint_start) +#define PL_regint_string (thr->Tregint_string) #define PL_reginterp_cnt (thr->Treginterp_cnt) #define PL_reglastparen (thr->Treglastparen) #define PL_regnarrate (thr->Tregnarrate) diff --git a/ext/re/Makefile.PL b/ext/re/Makefile.PL index 040b085..bd0f1f7 100644 --- a/ext/re/Makefile.PL +++ b/ext/re/Makefile.PL @@ -5,7 +5,7 @@ WriteMakefile( MAN3PODS => {}, # Pods will be built by installman. XSPROTOARG => '-noprototypes', OBJECT => 're_exec$(OBJ_EXT) re_comp$(OBJ_EXT) re$(OBJ_EXT)', - DEFINE => '-DPERL_EXT_RE_BUILD', + DEFINE => '-DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG', clean => { FILES => '*$(OBJ_EXT) *.c ../../lib/re.pm' }, ); diff --git a/ext/re/re.xs b/ext/re/re.xs index b49a110..10e44f7 100644 --- a/ext/re/re.xs +++ b/ext/re/re.xs @@ -11,6 +11,11 @@ extern regexp* my_regcomp (pTHX_ char* exp, char* xend, PMOP* pm); extern I32 my_regexec (pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags); +extern void my_regfree (pTHX_ struct regexp* r); +extern char* my_re_intuit_start (pTHX_ regexp *prog, SV *sv, char *strpos, + char *strend, U32 flags, + struct re_scream_pos_data_s *data); +extern SV* my_re_intuit_string (pTHX_ regexp *prog); static int oldfl; @@ -20,8 +25,12 @@ static void deinstall(pTHX) { dTHR; - PL_regexecp = &Perl_regexec_flags; - PL_regcompp = &Perl_pregcomp; + PL_regexecp = FUNC_NAME_TO_PTR(Perl_regexec_flags); + PL_regcompp = FUNC_NAME_TO_PTR(Perl_pregcomp); + PL_regint_start = FUNC_NAME_TO_PTR(Perl_re_intuit_start); + PL_regint_string = FUNC_NAME_TO_PTR(Perl_re_intuit_string); + PL_regfree = FUNC_NAME_TO_PTR(Perl_pregfree); + if (!oldfl) PL_debug &= ~R_DB; } @@ -33,6 +42,9 @@ install(pTHX) PL_colorset = 0; /* Allow reinspection of ENV. */ PL_regexecp = &my_regexec; PL_regcompp = &my_regcomp; + PL_regint_start = &my_re_intuit_start; + PL_regint_string = &my_re_intuit_string; + PL_regfree = &my_regfree; oldfl = PL_debug & R_DB; PL_debug |= R_DB; } diff --git a/global.sym b/global.sym index efbca1d..87ece3c 100644 --- a/global.sym +++ b/global.sym @@ -408,6 +408,8 @@ Perl_regdump Perl_pregexec Perl_pregfree Perl_pregcomp +Perl_re_intuit_start +Perl_re_intuit_string Perl_regexec_flags Perl_regnext Perl_regprop diff --git a/objXSUB.h b/objXSUB.h index d14de86..d91f84d 100644 --- a/objXSUB.h +++ b/objXSUB.h @@ -546,10 +546,16 @@ #define PL_regexecp pPerl->PL_regexecp #undef PL_regflags #define PL_regflags pPerl->PL_regflags +#undef PL_regfree +#define PL_regfree pPerl->PL_regfree #undef PL_regindent #define PL_regindent pPerl->PL_regindent #undef PL_reginput #define PL_reginput pPerl->PL_reginput +#undef PL_regint_start +#define PL_regint_start pPerl->PL_regint_start +#undef PL_regint_string +#define PL_regint_string pPerl->PL_regint_string #undef PL_reginterp_cnt #define PL_reginterp_cnt pPerl->PL_reginterp_cnt #undef PL_reglastparen @@ -2426,6 +2432,14 @@ #define Perl_pregcomp pPerl->Perl_pregcomp #undef pregcomp #define pregcomp Perl_pregcomp +#undef Perl_re_intuit_start +#define Perl_re_intuit_start pPerl->Perl_re_intuit_start +#undef re_intuit_start +#define re_intuit_start Perl_re_intuit_start +#undef Perl_re_intuit_string +#define Perl_re_intuit_string pPerl->Perl_re_intuit_string +#undef re_intuit_string +#define re_intuit_string Perl_re_intuit_string #undef Perl_regexec_flags #define Perl_regexec_flags pPerl->Perl_regexec_flags #undef regexec_flags diff --git a/perl.c b/perl.c index 39eaf30..062b334 100644 --- a/perl.c +++ b/perl.c @@ -2947,6 +2947,9 @@ S_init_main_thread(pTHX) PL_maxscream = -1; PL_regcompp = FUNC_NAME_TO_PTR(Perl_pregcomp); PL_regexecp = FUNC_NAME_TO_PTR(Perl_regexec_flags); + PL_regint_start = FUNC_NAME_TO_PTR(Perl_re_intuit_start); + PL_regint_string = FUNC_NAME_TO_PTR(Perl_re_intuit_string); + PL_regfree = FUNC_NAME_TO_PTR(Perl_pregfree); PL_regindent = 0; PL_reginterp_cnt = 0; diff --git a/perl.h b/perl.h index 5eb7b1d..b09660a 100644 --- a/perl.h +++ b/perl.h @@ -145,6 +145,9 @@ class CPerlObj; #define CALLRUNOPS CALL_FPTR(PL_runops) #define CALLREGCOMP CALL_FPTR(PL_regcompp) #define CALLREGEXEC CALL_FPTR(PL_regexecp) +#define CALLREG_INTUIT_START CALL_FPTR(PL_regint_start) +#define CALLREG_INTUIT_STRING CALL_FPTR(PL_regint_string) +#define CALLREGFREE CALL_FPTR(PL_regfree) #define CALLPROTECT CALL_FPTR(PL_protect) #define NOOP (void)0 @@ -2385,6 +2388,12 @@ typedef regexp*(CPERLscope(*regcomp_t)) (pTHX_ char* exp, char* xend, PMOP* pm); typedef I32 (CPERLscope(*regexec_t)) (pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags); +typedef char* (CPERLscope(*re_intuit_start_t)) (pTHX_ regexp *prog, SV *sv, + char *strpos, char *strend, + U32 flags, + struct re_scream_pos_data_s *d); +typedef SV* (CPERLscope(*re_intuit_string_t)) (pTHX_ regexp *prog); +typedef void (CPERLscope(*regfree_t)) (pTHX_ struct regexp* r); /* Set up PERLVAR macros for populating structs */ diff --git a/pp.c b/pp.c index e688848..d28a8c2 100644 --- a/pp.c +++ b/pp.c @@ -4998,17 +4998,19 @@ PP(pp_split) s = m; } } - else if (rx->check_substr && !rx->nparens + else if ((rx->reganch & RE_USE_INTUIT) && !rx->nparens && (rx->reganch & ROPT_CHECK_ALL) && !(rx->reganch & ROPT_ANCH)) { - int tail = SvTAIL(rx->check_substr) != 0; + int tail = (rx->reganch & RE_INTUIT_TAIL); + SV *csv = CALLREG_INTUIT_STRING(aTHX_ rx); + char c; - i = SvCUR(rx->check_substr); + i = rx->minlen; if (i == 1 && !tail) { - i = *SvPVX(rx->check_substr); + c = *SvPV(csv,i); while (--limit) { /*SUPPRESS 530*/ - for (m = s; m < strend && *m != i; m++) ; + for (m = s; m < strend && *m != c; m++) ; if (m >= strend) break; dstr = NEWSV(30, m-s); @@ -5022,8 +5024,8 @@ PP(pp_split) else { #ifndef lint while (s < strend && --limit && - (m=fbm_instr((unsigned char*)s, (unsigned char*)strend, - rx->check_substr, PL_multiline ? FBMrf_MULTILINE : 0)) ) + (m = fbm_instr((unsigned char*)s, (unsigned char*)strend, + csv, PL_multiline ? FBMrf_MULTILINE : 0)) ) #endif { dstr = NEWSV(31, m-s); @@ -5031,14 +5033,18 @@ PP(pp_split) if (make_mortal) sv_2mortal(dstr); XPUSHs(dstr); - s = m + i - tail; /* Fake \n at the end */ + s = m + i; /* Fake \n at the end */ } } } else { maxiters += (strend - s) * rx->nparens; - while (s < strend && --limit && - CALLREGEXEC(aTHX_ rx, s, strend, orig, 1, sv, NULL, 0)) + while (s < strend && --limit +/* && (!rx->check_substr + || ((s = CALLREG_INTUIT_START(aTHX_ rx, sv, s, strend, + 0, NULL)))) +*/ && CALLREGEXEC(aTHX_ rx, s, strend, orig, + 1 /* minend */, sv, NULL, 0)) { TAINT_IF(RX_MATCH_TAINTED(rx)); if (RX_MATCH_COPIED(rx) && rx->subbeg != orig) { diff --git a/pp_hot.c b/pp_hot.c index d3a1f5c..697c306 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -846,10 +846,8 @@ PP(pp_match) register char *s; char *strend; I32 global; - I32 r_flags = 0; - char *truebase; /* Start of string, may be - relocated if REx engine - copies the string. */ + I32 r_flags = REXEC_CHECKED; + char *truebase; /* Start of string */ register REGEXP *rx = pm->op_pmregexp; bool rxtainted; I32 gimme = GIMME; @@ -909,9 +907,7 @@ PP(pp_match) if ((gimme != G_ARRAY && !global && rx->nparens) || SvTEMP(TARG) || PL_sawampersand) r_flags |= REXEC_COPY_STR; - if (SvSCREAM(TARG) && rx->check_substr - && SvTYPE(rx->check_substr) == SVt_PVBM - && SvVALID(rx->check_substr)) + if (SvSCREAM(TARG)) r_flags |= REXEC_SCREAM; if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) { @@ -927,76 +923,17 @@ play_it_again: if (update_minmatch++) minmatch = had_zerolen; } - if (rx->check_substr) { - if (!(rx->reganch & ROPT_NOSCAN)) { /* Floating checkstring. */ - SV *c = rx->check_substr; + if (rx->reganch & RE_USE_INTUIT) { + s = CALLREG_INTUIT_START(aTHX_ rx, TARG, s, strend, r_flags, NULL); - if (r_flags & REXEC_SCREAM) { - I32 p = -1; - char *b; - - if (PL_screamfirst[BmRARE(c)] < 0 - && !( BmRARE(c) == '\n' && (BmPREVIOUS(c) == SvCUR(c) - 1) - && SvTAIL(c) )) - goto nope; - - b = (char*)HOP((U8*)s, rx->check_offset_min); - if (!(s = screaminstr(TARG, c, b - s, 0, &p, 0))) - goto nope; - - if ((rx->reganch & ROPT_CHECK_ALL) - && !PL_sawampersand && !SvTAIL(c)) - goto yup; - } - else if (!(s = fbm_instr((unsigned char*)HOP((U8*)s, rx->check_offset_min), - (unsigned char*)strend, c, - PL_multiline ? FBMrf_MULTILINE : 0))) - goto nope; - else if ((rx->reganch & ROPT_CHECK_ALL) && !PL_sawampersand) - goto yup; - if (s && rx->check_offset_max < s - t) { - ++BmUSEFUL(c); - s = (char*)HOP((U8*)s, -rx->check_offset_max); - } - else - s = t; - } - /* Now checkstring is fixed, i.e. at fixed offset from the - beginning of match, and the match is anchored at s. */ - else if (!PL_multiline) { /* Anchored near beginning of string. */ - I32 slen; - char *b = (char*)HOP((U8*)s, rx->check_offset_min); - - if (SvTAIL(rx->check_substr)) { - slen = SvCUR(rx->check_substr); /* >= 1 */ - - if ( strend - b > slen || strend - b < slen - 1 ) - goto nope; - if ( strend - b == slen && strend[-1] != '\n') - goto nope; - /* Now should match b[0..slen-2] */ - slen--; - if (slen && (*SvPVX(rx->check_substr) != *b - || (slen > 1 - && memNE(SvPVX(rx->check_substr), b, slen)))) - goto nope; - if ((rx->reganch & ROPT_CHECK_ALL) && !PL_sawampersand) - goto yup; - } else { /* Assume len > 0 */ - if (*SvPVX(rx->check_substr) != *b - || ((slen = SvCUR(rx->check_substr)) > 1 - && memNE(SvPVX(rx->check_substr), b, slen))) - goto nope; - if ((rx->reganch & ROPT_CHECK_ALL) && !PL_sawampersand) - goto yup; - } - } - if (!(rx->reganch & ROPT_NAUGHTY) && --BmUSEFUL(rx->check_substr) < 0 - && rx->check_substr == rx->float_substr) { - SvREFCNT_dec(rx->check_substr); - rx->check_substr = Nullsv; /* opt is being useless */ - rx->float_substr = Nullsv; - } + if (!s) + goto nope; + if ( (rx->reganch & ROPT_CHECK_ALL) + && !PL_sawampersand + && ((rx->reganch & ROPT_NOSCAN) + || !((rx->reganch & RE_INTUIT_TAIL) + && (r_flags & REXEC_SCREAM)))) + goto yup; } if (CALLREGEXEC(aTHX_ rx, s, strend, truebase, minmatch, TARG, NULL, r_flags)) { @@ -1066,11 +1003,10 @@ play_it_again: RETPUSHYES; } -yup: /* Confirmed by check_substr */ +yup: /* Confirmed by INTUIT */ if (rxtainted) RX_MATCH_TAINTED_on(rx); TAINT_IF(RX_MATCH_TAINTED(rx)); - ++BmUSEFUL(rx->check_substr); PL_curpm = pm; if (pm->op_pmflags & PMf_ONCE) pm->op_pmdynflags |= PMdf_USED; @@ -1081,7 +1017,7 @@ yup: /* Confirmed by check_substr */ if (global) { rx->subbeg = truebase; rx->startp[0] = s - truebase; - rx->endp[0] = s - truebase + SvCUR(rx->check_substr); + rx->endp[0] = s - truebase + rx->minlen; rx->sublen = strend - truebase; goto gotcha; } @@ -1092,19 +1028,16 @@ yup: /* Confirmed by check_substr */ rx->sublen = strend - t; RX_MATCH_COPIED_on(rx); off = rx->startp[0] = s - t; - rx->endp[0] = off + SvCUR(rx->check_substr); + rx->endp[0] = off + rx->minlen; } else { /* startp/endp are used by @- @+. */ rx->startp[0] = s - truebase; - rx->endp[0] = s - truebase + SvCUR(rx->check_substr); + rx->endp[0] = s - truebase + rx->minlen; } LEAVE_SCOPE(oldsave); RETPUSHYES; nope: - if (rx->check_substr) - ++BmUSEFUL(rx->check_substr); - ret_no: if (global && !(pm->op_pmflags & PMf_CONTINUE)) { if (SvTYPE(TARG) >= SVt_PVMG && SvMAGIC(TARG)) { @@ -1717,56 +1650,26 @@ PP(pp_subst) } r_flags = (rx->nparens || SvTEMP(TARG) || PL_sawampersand) ? REXEC_COPY_STR : 0; - if (SvSCREAM(TARG) && rx->check_substr - && SvTYPE(rx->check_substr) == SVt_PVBM - && SvVALID(rx->check_substr)) + if (SvSCREAM(TARG)) r_flags |= REXEC_SCREAM; if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) { SAVEINT(PL_multiline); PL_multiline = pm->op_pmflags & PMf_MULTILINE; } orig = m = s; - if (rx->check_substr) { - if (!(rx->reganch & ROPT_NOSCAN)) { /* It floats. */ - if (r_flags & REXEC_SCREAM) { - I32 p = -1; - char *b; - - if (PL_screamfirst[BmRARE(rx->check_substr)] < 0) - goto nope; - - b = (char*)HOP((U8*)s, rx->check_offset_min); - if (!(s = screaminstr(TARG, rx->check_substr, b - s, 0, &p, 0))) - goto nope; - } - else if (!(s = fbm_instr((unsigned char*)HOP((U8*)s, rx->check_offset_min), - (unsigned char*)strend, - rx->check_substr, - PL_multiline ? FBMrf_MULTILINE : 0))) - goto nope; - if (s && rx->check_offset_max < s - m) { - ++BmUSEFUL(rx->check_substr); - s = (char*)HOP((U8*)s, -rx->check_offset_max); - } - else - s = m; - } - /* Now checkstring is fixed, i.e. at fixed offset from the - beginning of match, and the match is anchored at s. */ - else if (!PL_multiline) { /* Anchored at beginning of string. */ - I32 slen; - char *b = (char*)HOP((U8*)s, rx->check_offset_min); - if (*SvPVX(rx->check_substr) != *b - || ((slen = SvCUR(rx->check_substr)) > 1 - && memNE(SvPVX(rx->check_substr), b, slen))) - goto nope; - } - if (!(rx->reganch & ROPT_NAUGHTY) && --BmUSEFUL(rx->check_substr) < 0 - && rx->check_substr == rx->float_substr) { - SvREFCNT_dec(rx->check_substr); - rx->check_substr = Nullsv; /* opt is being useless */ - rx->float_substr = Nullsv; - } + if (rx->reganch & RE_USE_INTUIT) { + s = CALLREG_INTUIT_START(aTHX_ rx, TARG, s, strend, r_flags, NULL); + + if (!s) + goto nope; + /* How to do it in subst? */ +/* if ( (rx->reganch & ROPT_CHECK_ALL) + && !PL_sawampersand + && ((rx->reganch & ROPT_NOSCAN) + || !((rx->reganch & RE_INTUIT_TAIL) + && (r_flags & REXEC_SCREAM)))) + goto yup; +*/ } /* only replace once? */ @@ -1778,7 +1681,9 @@ PP(pp_subst) /* can do inplace substitution? */ if (c && clen <= rx->minlen && (once || !(r_flags & REXEC_COPY_STR)) && !(rx->reganch & ROPT_LOOKBEHIND_SEEN)) { - if (!CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL, r_flags)) { + if (!CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL, + r_flags | REXEC_CHECKED)) + { SPAGAIN; PUSHs(&PL_sv_no); LEAVE_SCOPE(oldsave); @@ -1851,7 +1756,9 @@ PP(pp_subst) } s = rx->endp[0] + orig; } while (CALLREGEXEC(aTHX_ rx, s, strend, orig, s == m, - Nullsv, NULL, REXEC_NOT_FIRST)); /* don't match same null twice */ + TARG, NULL, + /* don't match same null twice */ + REXEC_NOT_FIRST|REXEC_IGNOREPOS)); if (s != d) { i = strend - s; SvCUR_set(TARG, d - SvPVX(TARG) + i); @@ -1873,7 +1780,9 @@ PP(pp_subst) RETURN; } - if (CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL, r_flags)) { + if (CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL, + r_flags | REXEC_CHECKED)) + { if (force_on_match) { force_on_match = 0; s = SvPV_force(TARG, len); @@ -1933,8 +1842,6 @@ PP(pp_subst) goto ret_no; nope: - ++BmUSEFUL(rx->check_substr); - ret_no: SPAGAIN; PUSHs(&PL_sv_no); diff --git a/proto.h b/proto.h index eae128a..7fa6424 100644 --- a/proto.h +++ b/proto.h @@ -452,6 +452,8 @@ VIRTUAL void Perl_regdump(pTHX_ regexp* r); VIRTUAL I32 Perl_pregexec(pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave); VIRTUAL void Perl_pregfree(pTHX_ struct regexp* r); VIRTUAL regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, PMOP* pm); +VIRTUAL char* Perl_re_intuit_start(pTHX_ regexp* prog, SV* sv, char* strpos, char* strend, U32 flags, struct re_scream_pos_data_s *data); +VIRTUAL SV* Perl_re_intuit_string(pTHX_ regexp* prog); VIRTUAL I32 Perl_regexec_flags(pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags); VIRTUAL regnode* Perl_regnext(pTHX_ regnode* p); VIRTUAL void Perl_regprop(pTHX_ SV* sv, regnode* o); diff --git a/regcomp.c b/regcomp.c index 76ae523..59fe5a7 100644 --- a/regcomp.c +++ b/regcomp.c @@ -25,7 +25,7 @@ # define PERL_IN_XSUB_RE # endif /* need access to debugger hooks */ -# ifndef DEBUGGING +# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING) # define DEBUGGING # endif #endif @@ -35,8 +35,9 @@ # define Perl_pregcomp my_regcomp # define Perl_regdump my_regdump # define Perl_regprop my_regprop -/* *These* symbols are masked to allow static link. */ # define Perl_pregfree my_regfree +# define Perl_re_intuit_string my_re_intuit_string +/* *These* symbols are masked to allow static link. */ # define Perl_regnext my_regnext # define Perl_save_re_context my_save_re_context # define Perl_reginitcolors my_reginitcolors @@ -898,7 +899,11 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) PL_regkind[(U8)OP(first)] == NBOUND) r->regstclass = first; else if (PL_regkind[(U8)OP(first)] == BOL) { - r->reganch |= (OP(first) == MBOL ? ROPT_ANCH_MBOL: ROPT_ANCH_BOL); + r->reganch |= (OP(first) == MBOL + ? ROPT_ANCH_MBOL + : (OP(first) == SBOL + ? ROPT_ANCH_SBOL + : ROPT_ANCH_BOL)); first = NEXTOPER(first); goto again; } @@ -912,12 +917,21 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) !(r->reganch & ROPT_ANCH) ) { /* turn .* into ^.* with an implied $*=1 */ - r->reganch |= ROPT_ANCH_BOL | ROPT_IMPLICIT; + int type = OP(NEXTOPER(first)); + + if (type == REG_ANY || type == ANYUTF8) + type = ROPT_ANCH_MBOL; + else + type = ROPT_ANCH_SBOL; + + r->reganch |= type | ROPT_IMPLICIT; first = NEXTOPER(first); goto again; } - if (sawplus && (!sawopen || !PL_regsawback)) - r->reganch |= ROPT_SKIP; /* x+ must match 1st of run */ + if (sawplus && (!sawopen || !PL_regsawback) + && !(PL_regseen & REG_SEEN_EVAL)) /* May examine pos and $& */ + /* x+ must match at the 1st pos of run of x's */ + r->reganch |= ROPT_SKIP; /* Scan is after the zeroth branch, first is atomic matcher. */ DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %d\n", @@ -1010,6 +1024,11 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->check_offset_min = data.offset_float_min; r->check_offset_max = data.offset_float_max; } + if (r->check_substr) { + r->reganch |= RE_USE_INTUIT; + if (SvTAIL(r->check_substr)) + r->reganch |= RE_INTUIT_TAIL; + } } else { /* Several toplevels. Best we can is to set minlen. */ @@ -2846,6 +2865,8 @@ Perl_regdump(pTHX_ regexp *r) PerlIO_printf(Perl_debug_log, "(BOL)"); if (r->reganch & ROPT_ANCH_MBOL) PerlIO_printf(Perl_debug_log, "(MBOL)"); + if (r->reganch & ROPT_ANCH_SBOL) + PerlIO_printf(Perl_debug_log, "(SBOL)"); if (r->reganch & ROPT_ANCH_GPOS) PerlIO_printf(Perl_debug_log, "(GPOS)"); PerlIO_putc(Perl_debug_log, ' '); @@ -2896,10 +2917,37 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) #endif /* DEBUGGING */ } +SV * +Perl_re_intuit_string(pTHX_ regexp *prog) +{ /* Assume that RE_INTUIT is set */ + DEBUG_r( + { STRLEN n_a; + char *s = SvPV(prog->check_substr,n_a); + + if (!PL_colorset) reginitcolors(); + PerlIO_printf(Perl_debug_log, + "%sUsing REx substr:%s `%s%.60s%s%s'\n", + PL_colors[4],PL_colors[5],PL_colors[0], + s, + PL_colors[1], + (strlen(s) > 60 ? "..." : "")); + } ); + + return prog->check_substr; +} + void Perl_pregfree(pTHX_ struct regexp *r) { dTHR; + DEBUG_r(PerlIO_printf(Perl_debug_log, + "%sFreeing REx:%s `%s%.60s%s%s'\n", + PL_colors[4],PL_colors[5],PL_colors[0], + r->precomp, + PL_colors[1], + (strlen(r->precomp) > 60 ? "..." : ""))); + + if (!r || (--r->refcnt > 0)) return; if (r->precomp) diff --git a/regcomp.h b/regcomp.h index 7c5c13a..518add0 100644 --- a/regcomp.h +++ b/regcomp.h @@ -237,3 +237,34 @@ EXTCONST char PL_simple[] = { #endif END_EXTERN_C + +typedef struct re_scream_pos_data_s +{ + char **scream_olds; /* match pos */ + I32 *scream_pos; /* Internal iterator of scream. */ +} re_scream_pos_data; + +struct reg_data { + U32 count; + U8 *what; + void* data[1]; +}; + +struct reg_substr_datum { + I32 min_offset; + I32 max_offset; + SV *substr; +}; + +struct reg_substr_data { + struct reg_substr_datum data[3]; /* Actual array */ +}; + +#define anchored_substr substrs->data[0].substr +#define anchored_offset substrs->data[0].min_offset +#define float_substr substrs->data[1].substr +#define float_min_offset substrs->data[1].min_offset +#define float_max_offset substrs->data[1].max_offset +#define check_substr substrs->data[2].substr +#define check_offset_min substrs->data[2].min_offset +#define check_offset_max substrs->data[2].max_offset diff --git a/regexec.c b/regexec.c index 7dbf6dc..c97f89e 100644 --- a/regexec.c +++ b/regexec.c @@ -25,7 +25,7 @@ # define PERL_IN_XSUB_RE # endif /* need access to debugger hooks */ -# ifndef DEBUGGING +# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING) # define DEBUGGING # endif #endif @@ -35,6 +35,7 @@ # define Perl_regexec_flags my_regexec # define Perl_regdump my_regdump # define Perl_regprop my_regprop +# define Perl_re_intuit_start my_re_intuit_start /* *These* symbols are masked to allow static link. */ # define Perl_pregexec my_pregexec # define Perl_reginitcolors my_reginitcolors @@ -258,6 +259,192 @@ S_restore_pos(pTHX_ void *arg) } } +/* + * Need to implement the following flags for reg_anch: + * + * USE_INTUIT_NOML - Useful to call re_intuit_start() first + * USE_INTUIT_ML + * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer + * INTUIT_AUTORITATIVE_ML + * INTUIT_ONCE_NOML - Intuit can match in one location only. + * INTUIT_ONCE_ML + * + * Another flag for this function: SECOND_TIME (so that float substrs + * with giant delta may be not rechecked). + */ + +/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */ + +/* If SCREAM, then sv should be compatible with strpos and strend. + Otherwise, only SvCUR(sv) is used to get strbeg. */ + +/* XXXX We assume that strpos is strbeg unless sv. */ + +char * +Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, + char *strend, U32 flags, re_scream_pos_data *data) +{ + I32 start_shift; + /* Should be nonnegative! */ + I32 end_shift; + char *s; + char *t; + I32 ml_anch; + + DEBUG_r( if (!PL_colorset) reginitcolors() ); + DEBUG_r(PerlIO_printf(Perl_debug_log, + "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n", + PL_colors[4],PL_colors[5],PL_colors[0], + prog->precomp, + PL_colors[1], + (strlen(prog->precomp) > 60 ? "..." : ""), + PL_colors[0], + (strend - strpos > 60 ? 60 : strend - strpos), + strpos, PL_colors[1], + (strend - strpos > 60 ? "..." : "")) + ); + + if (prog->minlen > strend - strpos) + goto fail; + + /* XXXX Move further down? */ + start_shift = prog->check_offset_min; /* okay to underestimate on CC */ + /* Should be nonnegative! */ + end_shift = prog->minlen - start_shift - + CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0); + + if (prog->reganch & ROPT_ANCH) { + ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE) + || ( (prog->reganch & ROPT_ANCH_BOL) + && !PL_multiline ) ); + + if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) { + /* Anchored... */ + I32 slen; + + if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */ + && (sv && (strpos + SvCUR(sv) != strend)) ) + goto fail; + + s = (char*)HOP((U8*)strpos, prog->check_offset_min); + if (SvTAIL(prog->check_substr)) { + slen = SvCUR(prog->check_substr); /* >= 1 */ + + if ( strend - s > slen || strend - s < slen - 1 ) { + s = Nullch; + goto finish; + } + if ( strend - s == slen && strend[-1] != '\n') { + s = Nullch; + goto finish; + } + /* Now should match s[0..slen-2] */ + slen--; + if (slen && (*SvPVX(prog->check_substr) != *s + || (slen > 1 + && memNE(SvPVX(prog->check_substr), s, slen)))) + s = Nullch; + } + else if (*SvPVX(prog->check_substr) != *s + || ((slen = SvCUR(prog->check_substr)) > 1 + && memNE(SvPVX(prog->check_substr), s, slen))) + s = Nullch; + else + s = strpos; + goto finish; + } + s = strpos; + if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen)) + end_shift += strend - s - prog->minlen - prog->check_offset_max; + } + else { + ml_anch = 0; + s = strpos; + } + + restart: + if (flags & REXEC_SCREAM) { + SV *c = prog->check_substr; + char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */ + I32 p = -1; /* Internal iterator of scream. */ + I32 *pp = data ? data->scream_pos : &p; + + if (PL_screamfirst[BmRARE(c)] >= 0 + || ( BmRARE(c) == '\n' + && (BmPREVIOUS(c) == SvCUR(c) - 1) + && SvTAIL(c) )) + s = screaminstr(sv, prog->check_substr, + start_shift + (strpos - strbeg), end_shift, pp, 0); + else + s = Nullch; + if (data) + *data->scream_olds = s; + } + else + s = fbm_instr((unsigned char*)s + start_shift, + (unsigned char*)strend - end_shift, + prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0); + + /* Update the count-of-usability, remove useless subpatterns, + unshift s. */ + finish: + if (!s) { + ++BmUSEFUL(prog->check_substr); /* hooray */ + goto fail; /* not present */ + } + else if (s - strpos > prog->check_offset_max && + ((prog->reganch & ROPT_UTF8) + ? ((t = reghopmaybe_c(s, -(prog->check_offset_max))) + && t >= strpos) + : (t = s - prog->check_offset_max) != 0) ) { + if (ml_anch && t[-1] != '\n') { + find_anchor: + while (t < strend - end_shift - prog->minlen) { + if (*t == '\n') { + if (t < s - prog->check_offset_min) { + s = t + 1; + goto set_useful; + } + s = t + 1; + goto restart; + } + t++; + } + s = Nullch; + goto finish; + } + s = t; + set_useful: + ++BmUSEFUL(prog->check_substr); /* hooray/2 */ + } + else { + if (ml_anch && sv + && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') { + t = strpos; + goto find_anchor; + } + if (!(prog->reganch & ROPT_NAUGHTY) + && --BmUSEFUL(prog->check_substr) < 0 + && prog->check_substr == prog->float_substr) { /* boo */ + /* If flags & SOMETHING - do not do it many times on the same match */ + SvREFCNT_dec(prog->check_substr); + prog->check_substr = Nullsv; /* disable */ + prog->float_substr = Nullsv; /* clear */ + s = strpos; + prog->reganch &= ~RE_USE_INTUIT; + } + else + s = strpos; + } + + DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n", + PL_colors[4],PL_colors[5], (long)(s - strpos)) ); + return s; + fail: + DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n", + PL_colors[4],PL_colors[5])); + return Nullch; +} /* - regexec_flags - match a regexp against a string @@ -339,103 +526,78 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* If there is a "must appear" string, look for it. */ s = startpos; - if (!(flags & REXEC_CHECKED) - && prog->check_substr != Nullsv && - !(prog->reganch & ROPT_ANCH_GPOS) && - (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL)) - || (PL_multiline && prog->check_substr == prog->anchored_substr)) ) - { - char *t; - start_shift = prog->check_offset_min; /* okay to underestimate on CC */ - /* Should be nonnegative! */ - end_shift = minlen - start_shift - - CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0); - if (flags & REXEC_SCREAM) { - SV *c = prog->check_substr; - - if (PL_screamfirst[BmRARE(c)] >= 0 - || ( BmRARE(c) == '\n' - && (BmPREVIOUS(c) == SvCUR(c) - 1) - && SvTAIL(c) )) - s = screaminstr(sv, prog->check_substr, - start_shift + (stringarg - strbeg), - end_shift, &scream_pos, 0); - else - s = Nullch; - scream_olds = s; - } + + if (prog->reganch & ROPT_GPOS_SEEN) { + MAGIC *mg; + + if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG + && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) + PL_reg_ganch = strbeg + mg->mg_len; else - s = fbm_instr((unsigned char*)s + start_shift, - (unsigned char*)strend - end_shift, - prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0); - if (!s) { - ++BmUSEFUL(prog->check_substr); /* hooray */ - goto phooey; /* not present */ - } - else if (s - stringarg > prog->check_offset_max && - (UTF - ? ((t = reghopmaybe_c(s, -(prog->check_offset_max))) && t >= stringarg) - : (t = s - prog->check_offset_max) != 0 - ) - ) - { - ++BmUSEFUL(prog->check_substr); /* hooray/2 */ - s = t; - } - else if (!(prog->reganch & ROPT_NAUGHTY) - && --BmUSEFUL(prog->check_substr) < 0 - && prog->check_substr == prog->float_substr) { /* boo */ - SvREFCNT_dec(prog->check_substr); - prog->check_substr = Nullsv; /* disable */ - prog->float_substr = Nullsv; /* clear */ - s = startpos; + PL_reg_ganch = startpos; + if (prog->reganch & ROPT_ANCH_GPOS) { + if (s > PL_reg_ganch) + goto phooey; + s = PL_reg_ganch; } - else - s = startpos; } - DEBUG_r(if (!PL_colorset) reginitcolors()); - DEBUG_r(PerlIO_printf(Perl_debug_log, + if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) { + re_scream_pos_data d; + + d.scream_olds = &scream_olds; + d.scream_pos = &scream_pos; + s = re_intuit_start(prog, sv, s, strend, flags, &d); + if (!s) + goto phooey; /* not present */ + } + + DEBUG_r( if (!PL_colorset) reginitcolors() ); + DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n", PL_colors[4],PL_colors[5],PL_colors[0], prog->precomp, PL_colors[1], (strlen(prog->precomp) > 60 ? "..." : ""), - PL_colors[0], + PL_colors[0], (strend - startpos > 60 ? 60 : strend - startpos), startpos, PL_colors[1], (strend - startpos > 60 ? "..." : "")) ); - if (prog->reganch & ROPT_GPOS_SEEN) { - MAGIC *mg; - - if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG - && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) - PL_reg_ganch = strbeg + mg->mg_len; - else - PL_reg_ganch = startpos; - } - /* Simplest case: anchored match need be tried only once. */ /* [unless only anchor is BOL and multiline is set] */ if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { - if (regtry(prog, startpos)) + if (s == startpos && regtry(prog, startpos)) goto got_it; else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT) || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ { + char *end; + if (minlen) dontbother = minlen - 1; - strend = HOPc(strend, -dontbother); + end = HOPc(strend, -dontbother) - 1; /* for multiline we only have to try after newlines */ - if (s > startpos) - s--; - while (s < strend) { - if (*s++ == '\n') { /* don't need PL_utf8skip here */ - if (s < strend && regtry(prog, s)) + if (prog->check_substr) { + while (1) { + if (regtry(prog, s)) goto got_it; - } + if (s >= end) + goto phooey; + s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); + if (!s) + goto phooey; + } + } else { + if (s > startpos) + s--; + while (s < end) { + if (*s++ == '\n') { /* don't need PL_utf8skip here */ + if (regtry(prog, s)) + goto got_it; + } + } } } goto phooey; @@ -448,7 +610,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* Messy cases: unanchored match. */ if (prog->anchored_substr && prog->reganch & ROPT_SKIP) { /* we have /x+whatever/ */ - /* it must be a one character string */ + /* it must be a one character string (XXXX Except UTF?) */ char ch = SvPVX(prog->anchored_substr)[0]; if (UTF) { while (s < strend) { diff --git a/regexp.h b/regexp.h index 9da5bd4..5d787e0 100644 --- a/regexp.h +++ b/regexp.h @@ -17,38 +17,13 @@ struct regnode { typedef struct regnode regnode; -struct reg_data { - U32 count; - U8 *what; - void* data[1]; -}; - -struct reg_substr_datum { - I32 min_offset; - I32 max_offset; - SV *substr; -}; - -struct reg_substr_data { - struct reg_substr_datum data[3]; /* Actual array */ -}; +struct reg_substr_data; typedef struct regexp { I32 *startp; I32 *endp; regnode *regstclass; -#if 0 - SV *anchored_substr; /* Substring at fixed position wrt start. */ - I32 anchored_offset; /* Position of it. */ - SV *float_substr; /* Substring at variable position wrt start. */ - I32 float_min_offset; /* Minimal position of it. */ - I32 float_max_offset; /* Maximal position of it. */ - SV *check_substr; /* Substring to check before matching. */ - I32 check_offset_min; /* Offset of the above. */ - I32 check_offset_max; /* Offset of the above. */ -#else struct reg_substr_data *substrs; -#endif char *precomp; /* pre-compilation regular expression */ struct reg_data *data; /* Additional data. */ char *subbeg; /* saved or original string @@ -64,29 +39,20 @@ typedef struct regexp { regnode program[1]; /* Unwarranted chumminess with compiler. */ } regexp; -#define anchored_substr substrs->data[0].substr -#define anchored_offset substrs->data[0].min_offset -#define float_substr substrs->data[1].substr -#define float_min_offset substrs->data[1].min_offset -#define float_max_offset substrs->data[1].max_offset -#define check_substr substrs->data[2].substr -#define check_offset_min substrs->data[2].min_offset -#define check_offset_max substrs->data[2].max_offset - -#define ROPT_ANCH (ROPT_ANCH_BOL|ROPT_ANCH_MBOL|ROPT_ANCH_GPOS) -#define ROPT_ANCH_SINGLE (ROPT_ANCH_BOL|ROPT_ANCH_GPOS) +#define ROPT_ANCH (ROPT_ANCH_BOL|ROPT_ANCH_MBOL|ROPT_ANCH_GPOS|ROPT_ANCH_SBOL) +#define ROPT_ANCH_SINGLE (ROPT_ANCH_SBOL|ROPT_ANCH_GPOS) #define ROPT_ANCH_BOL 0x00001 #define ROPT_ANCH_MBOL 0x00002 -#define ROPT_ANCH_GPOS 0x00004 -#define ROPT_SKIP 0x00008 -#define ROPT_IMPLICIT 0x00010 /* Converted .* to ^.* */ -#define ROPT_NOSCAN 0x00020 /* Check-string always at start. */ -#define ROPT_GPOS_SEEN 0x00040 -#define ROPT_CHECK_ALL 0x00080 -#define ROPT_LOOKBEHIND_SEEN 0x00100 -#define ROPT_EVAL_SEEN 0x00200 -#define ROPT_TAINTED_SEEN 0x00400 -#define ROPT_ANCH_SBOL 0x00800 +#define ROPT_ANCH_SBOL 0x00004 +#define ROPT_ANCH_GPOS 0x00008 +#define ROPT_SKIP 0x00010 +#define ROPT_IMPLICIT 0x00020 /* Converted .* to ^.* */ +#define ROPT_NOSCAN 0x00040 /* Check-string always at start. */ +#define ROPT_GPOS_SEEN 0x00080 +#define ROPT_CHECK_ALL 0x00100 +#define ROPT_LOOKBEHIND_SEEN 0x00200 +#define ROPT_EVAL_SEEN 0x00400 +#define ROPT_TAINTED_SEEN 0x00800 /* 0xf800 of reganch is used by PMf_COMPILETIME */ @@ -94,6 +60,19 @@ typedef struct regexp { #define ROPT_NAUGHTY 0x20000 /* how exponential is this pattern? */ #define ROPT_COPY_DONE 0x40000 /* subbeg is a copy of the string */ +#define RE_USE_INTUIT_NOML 0x0100000 /* Best to intuit before matching */ +#define RE_USE_INTUIT_ML 0x0200000 +#define REINT_AUTORITATIVE_NOML 0x0400000 /* Can trust a positive answer */ +#define REINT_AUTORITATIVE_ML 0x0800000 +#define REINT_ONCE_NOML 0x1000000 /* Intuit can succed once only. */ +#define REINT_ONCE_ML 0x2000000 +#define RE_INTUIT_ONECHAR 0x4000000 +#define RE_INTUIT_TAIL 0x8000000 + +#define RE_USE_INTUIT (RE_USE_INTUIT_NOML|RE_USE_INTUIT_ML) +#define REINT_AUTORITATIVE (REINT_AUTORITATIVE_NOML|REINT_AUTORITATIVE_ML) +#define REINT_ONCE (REINT_ONCE_NOML|REINT_ONCE_ML) + #define RX_MATCH_TAINTED(prog) ((prog)->reganch & ROPT_TAINTED_SEEN) #define RX_MATCH_TAINTED_on(prog) ((prog)->reganch |= ROPT_TAINTED_SEEN) #define RX_MATCH_TAINTED_off(prog) ((prog)->reganch &= ~ROPT_TAINTED_SEEN) @@ -108,18 +87,22 @@ typedef struct regexp { ? RX_MATCH_COPIED_on(prog) \ : RX_MATCH_COPIED_off(prog)) -#define REXEC_COPY_STR 1 /* Need to copy the string. */ -#define REXEC_CHECKED 2 /* check_substr already checked. */ -#define REXEC_SCREAM 4 /* use scream table. */ -#define REXEC_IGNOREPOS 8 /* \G matches at start. */ +#define REXEC_COPY_STR 0x01 /* Need to copy the string. */ +#define REXEC_CHECKED 0x02 /* check_substr already checked. */ +#define REXEC_SCREAM 0x04 /* use scream table. */ +#define REXEC_IGNOREPOS 0x08 /* \G matches at start. */ #define REXEC_NOT_FIRST 0x10 /* This is another iteration of //g. */ +#define REXEC_ML 0x20 /* $* was set. */ #define ReREFCNT_inc(re) ((re && re->refcnt++), re) -#define ReREFCNT_dec(re) pregfree(re) +#define ReREFCNT_dec(re) CALLREGFREE(aTHX_ re) #define FBMcf_TAIL_DOLLAR 1 -#define FBMcf_TAIL_Z 2 -#define FBMcf_TAIL_z 4 -#define FBMcf_TAIL (FBMcf_TAIL_DOLLAR|FBMcf_TAIL_Z|FBMcf_TAIL_z) +#define FBMcf_TAIL_DOLLARM 2 +#define FBMcf_TAIL_Z 4 +#define FBMcf_TAIL_z 8 +#define FBMcf_TAIL (FBMcf_TAIL_DOLLAR|FBMcf_TAIL_DOLLARM|FBMcf_TAIL_Z|FBMcf_TAIL_z) #define FBMrf_MULTILINE 1 + +struct re_scream_pos_data_s; diff --git a/thrdvar.h b/thrdvar.h index a442367..c823393 100644 --- a/thrdvar.h +++ b/thrdvar.h @@ -170,9 +170,16 @@ PERLVAR(Treg_oldsaved, char*) /* old saved substr during match */ PERLVAR(Treg_oldsavedlen, STRLEN) /* old length of saved substr during match */ PERLVARI(Tregcompp, regcomp_t, FUNC_NAME_TO_PTR(Perl_pregcomp)) - /* Pointer to RE compiler */ + /* Pointer to REx compiler */ PERLVARI(Tregexecp, regexec_t, FUNC_NAME_TO_PTR(Perl_regexec_flags)) - /* Pointer to RE executer */ + /* Pointer to REx executer */ +PERLVARI(Tregint_start, re_intuit_start_t, FUNC_NAME_TO_PTR(Perl_re_intuit_start)) + /* Pointer to optimized REx executer */ +PERLVARI(Tregint_string,re_intuit_string_t, FUNC_NAME_TO_PTR(Perl_re_intuit_string)) + /* Pointer to optimized REx string */ +PERLVARI(Tregfree, regfree_t, FUNC_NAME_TO_PTR(Perl_pregfree)) + /* Pointer to REx free()er */ + PERLVARI(Treginterp_cnt,int, 0) /* Whether `Regexp' was interpolated. */ PERLVARI(Treg_starttry, char *, 0) /* -Dr: where regtry was called. */ diff --git a/util.c b/util.c index 99415f0..242a308 100644 --- a/util.c +++ b/util.c @@ -3235,6 +3235,9 @@ Perl_new_struct_thread(pTHX_ struct perl_thread *t) PL_maxscream = -1; PL_regcompp = FUNC_NAME_TO_PTR(Perl_pregcomp); PL_regexecp = FUNC_NAME_TO_PTR(Perl_regexec_flags); + PL_regint_start = FUNC_NAME_TO_PTR(Perl_re_intuit_start); + PL_regint_string = FUNC_NAME_TO_PTR(Perl_re_intuit_string); + PL_regfree = FUNC_NAME_TO_PTR(Perl_pregfree); PL_regindent = 0; PL_reginterp_cnt = 0; PL_lastscream = Nullsv;