if ( noper_next < tail ) { \
if (!trie->jump) \
Newxz( trie->jump, word_count + 1, U16); \
- trie->jump[curword] = (U16)(tail - noper_next); \
+ trie->jump[curword] = (U16)(noper_next - convert); \
if (!jumper) \
jumper = noper_next; \
if (!nextbranch) \
U32 next_alloc = 0;
regnode *jumper = NULL;
regnode *nextbranch = NULL;
+ regnode *convert = NULL;
/* we just use folder as a flag in utf8 */
const U8 * const folder = ( flags == EXACTF
? PL_fold
REG_NODE_NUM(last), REG_NODE_NUM(tail),
(int)depth);
});
+
+ /* Find the node we are going to overwrite */
+ if ( first == startbranch && OP( last ) != BRANCH ) {
+ /* whole branch chain */
+ convert = first;
+ } else {
+ /* branch sub-chain */
+ convert = NEXTOPER( first );
+ }
+
/* -- First loop and Setup --
We first traverse the branches and scan each word to determine if it
);
{ /* Modify the program and insert the new TRIE node*/
- regnode *convert;
U8 nodetype =(U8)(flags & 0xFF);
char *str=NULL;
the whole branch sequence, including the first.
*/
/* Find the node we are going to overwrite */
- if ( first == startbranch && OP( last ) != BRANCH ) {
- /* whole branch chain */
- convert = first;
- DEBUG_r({
- const regnode *nop = NEXTOPER( convert );
- mjd_offset= Node_Offset((nop));
- mjd_nodelen= Node_Length((nop));
- });
- } else {
+ if ( first != startbranch || OP( last ) == BRANCH ) {
/* branch sub-chain */
- convert = NEXTOPER( first );
NEXT_OFF( first ) = (U16)(last - first);
DEBUG_r({
mjd_offset= Node_Offset((convert));
mjd_nodelen= Node_Length((convert));
});
+ /* whole branch chain */
+ } else {
+ DEBUG_r({
+ const regnode *nop = NEXTOPER( convert );
+ mjd_offset= Node_Offset((nop));
+ mjd_nodelen= Node_Length((nop));
+ });
}
+
DEBUG_OPTIMISE_r(
PerlIO_printf(Perl_debug_log, "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n",
(int)depth * 2 + 2, "",
jump[0], which is otherwise unused by the jump logic.
We use this when dumping a trie and during optimisation. */
if (trie->jump)
- trie->jump[0] = (U16)(tail - nextbranch);
+ trie->jump[0] = (U16)(nextbranch - convert);
/* XXXX */
if ( !trie->states[trie->startstate].wordnum && trie->bitmap &&
SV * const mysv=sv_newmortal(); \
regnode *Next = regnext(scan); \
regprop(RExC_rx, mysv, scan); \
- PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s [%d]\n", \
+ PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \
(int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\
Next ? (REG_NODE_NUM(Next)) : 0 ); \
});
/* NOTE - There is similar code to this block above for handling
BRANCH nodes on the initial study. If you change stuff here
check there too. */
+ regnode *trie_node= scan;
regnode *tail= regnext(scan);
reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ];
I32 max1 = 0, min1 = I32_MAX;
if (trie->jump[word]) {
if (!nextbranch)
- nextbranch = tail - trie->jump[0];
- scan= tail - trie->jump[word];
+ nextbranch = trie_node + trie->jump[0];
+ scan= trie_node + trie->jump[word];
/* We go from the jump point to the branch that follows
it. Note this means we need the vestigal unused branches
even though they arent otherwise used.
r->paren_names = 0;
if (RExC_seen & REG_SEEN_RECURSE) {
- Newx(RExC_parens, RExC_npar,regnode *);
+ Newxz(RExC_parens, RExC_npar,regnode *);
SAVEFREEPV(RExC_parens);
}
RExC_parse++;
case '=': /* (?=...) */
case '!': /* (?!...) */
+ if (*RExC_parse == ')')
+ goto do_op_fail;
RExC_seen_zerolen++;
case ':': /* (?:...) */
case '>': /* (?>...) */
break;
+ case 'F':
+ if (RExC_parse[0] == 'A' &&
+ RExC_parse[1] == 'I' &&
+ RExC_parse[2] == 'L')
+ RExC_parse+=3;
+ if (*RExC_parse != ')')
+ vFAIL("Sequence (?FAIL) or (?F) not terminated");
+ do_op_fail:
+ ret = reg_node(pRExC_state, OPFAIL);
+ nextchar(pRExC_state);
+ return ret;
+ break;
case '$': /* (?$...) */
case '@': /* (?@...) */
vFAIL2("Sequence (?%c...) not implemented", (int)paren);
case 'R' : /* (?R) */
if (*RExC_parse != ')')
FAIL("Sequence (?R) not terminated");
- reg_node(pRExC_state, SRECURSE);
- break; /* (?PARNO) */
+ ret = reg_node(pRExC_state, SRECURSE);
+ nextchar(pRExC_state);
+ return ret;
+ /*notreached*/
{ /* named and numeric backreferences */
I32 num;
char * parse_start;
DUMPUNTIL(NEXTOPER(node), next);
}
else if ( PL_regkind[(U8)op] == TRIE ) {
+ const regnode *this_trie = node;
const char op = OP(node);
const I32 n = ARG(node);
const reg_ac_data * const ac = op>=AHOCORASICK ?
PL_colors[0], PL_colors[1],
(SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) |
PERL_PV_PRETTY_ELIPSES |
- PERL_PV_PRETTY_LTGT
+ PERL_PV_PRETTY_LTGT
)
: "???"
);
if (trie->jump) {
- U16 dist= trie->jump[word_idx+1];
- PerlIO_printf(Perl_debug_log, "(%u)\n",(next - dist) - start);
+ U16 dist = trie->jump[word_idx+1];
+ PerlIO_printf(Perl_debug_log, "(%u)\n",
+ (dist ? this_trie + dist : next) - start);
if (dist) {
if (!nextbranch)
- nextbranch= next - trie->jump[0];
- DUMPUNTIL(next - dist, nextbranch);
- }
+ nextbranch = this_trie + trie->jump[0];
+ DUMPUNTIL(this_trie + dist, nextbranch);
+ }
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
nextbranch= regnext((regnode *)nextbranch);
} else {
NREFFL NREF, no-sv 1 Match already matched string, folded in loc.
-#*Special conditionals
+#*Special conditionals (70..72)
NGROUPP NGROUPP, no-sv 1 Whether the group matched.
RECURSEP RECURSEP, num 1 Whether we are in a specific recurse.
DEFINEP DEFINEP, none 1 Never execute directly.
+#*Bactracking
+OPFAIL OPFAIL, none Same as (?!)
+
# NEW STUFF ABOVE THIS LINE -- Please update counts below.
################################################################################
-#*SPECIAL REGOPS (70, 71)
+#*SPECIAL REGOPS
# This is not really a node, but an optimized away piece of a "long" node.
# To simplify debugging output, we mark it as if it were a node
#define REGCP_OTHER_ELEMS 8
SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
+
for (p = PL_regsize; p > parenfloor; p--) {
/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
SSPUSHINT(PL_regendp[p]);
ST.accepted = 0; /* how many accepting states we have seen */
ST.B = next;
ST.jump = trie->jump;
-
-#ifdef DEBUGGING
ST.me = scan;
-#endif
-
-
/*
traverse the TRIE keeping track of all accepting states
we transition through until we get to a failing node.
locinput = PL_reginput;
nextchr = UCHARAT(locinput);
- if ( !ST.jump )
+ if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
scan = ST.B;
else
- scan = ST.B - ST.jump[ST.accept_buff[0].wordnum];
+ scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
continue; /* execute rest of RE */
}
SV ** const tmp = RX_DEBUG(reginfo->prog)
? av_fetch( trie->words, ST.accept_buff[ best ].wordnum - 1, 0 )
: NULL;
- regnode *nextop=!ST.jump ?
+ regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
ST.B :
- ST.B - ST.jump[ST.accept_buff[best].wordnum];
+ ST.me + ST.jump[ST.accept_buff[best].wordnum];
PerlIO_printf( Perl_debug_log,
"%*s %strying alternation #%d <%s> at node #%d %s\n",
REPORT_CODE_OFF+depth*2, "", PL_colors[4],
best = ST.accepted;
}
PL_reginput = (char *)ST.accept_buff[ best ].endpos;
- if ( !ST.jump ) {
+ if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
PUSH_STATE_GOTO(TRIE_next, ST.B);
/* NOTREACHED */
} else {
- PUSH_STATE_GOTO(TRIE_next, ST.B - ST.jump[ST.accept_buff[best].wordnum]);
+ PUSH_STATE_GOTO(TRIE_next, ST.me + ST.jump[ST.accept_buff[best].wordnum]);
/* NOTREACHED */
}
/* NOTREACHED */
n = ARG(scan); /* which paren pair */
PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
PL_regendp[n] = locinput - PL_bostr;
+ /*if (n > PL_regsize)
+ PL_regsize = n;*/
if (n > (I32)*PL_reglastparen)
*PL_reglastparen = n;
*PL_reglastcloseparen = n;
#undef ST
-
case END:
fake_end:
if (cur_eval) {
if (next == scan)
next = NULL;
break;
+ case OPFAIL:
+ sayNO;
default:
PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
PTR2UV(scan), OP(scan));
struct {
reg_trie_accepted *accept_buff;
U32 accepted; /* how many accepting states we have seen */
- U16 *jump; /* negative offsets from B */
+ U16 *jump; /* positive offsets from me */
regnode *B; /* node following the trie */
- regnode *me; /* only needed for debugging */
+ regnode *me; /* Which node am I - needed for jump tries*/
} trie;
struct {
/* Regops and State definitions */
-#define REGNODE_MAX 74
-#define REGMATCH_STATE_MAX 104
+#define REGNODE_MAX 75
+#define REGMATCH_STATE_MAX 105
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
#define NGROUPP 70 /* 0x46 Whether the group matched. */
#define RECURSEP 71 /* 0x47 Whether we are in a specific recurse. */
#define DEFINEP 72 /* 0x48 Never execute directly. */
-#define OPTIMIZED 73 /* 0x49 Placeholder for dump. */
-#define PSEUDO 74 /* 0x4a Pseudo opcode for internal use. */
+#define OPFAIL 73 /* 0x49 Same as (?!) */
+#define OPTIMIZED 74 /* 0x4a Placeholder for dump. */
+#define PSEUDO 75 /* 0x4b Pseudo opcode for internal use. */
/* ------------ States ------------- */
-#define TRIE_next 75 /* 0x4b Regmatch state for TRIE */
-#define TRIE_next_fail 76 /* 0x4c Regmatch state for TRIE */
-#define EVAL_AB 77 /* 0x4d Regmatch state for EVAL */
-#define EVAL_AB_fail 78 /* 0x4e Regmatch state for EVAL */
-#define CURLYX_end 79 /* 0x4f Regmatch state for CURLYX */
-#define CURLYX_end_fail 80 /* 0x50 Regmatch state for CURLYX */
-#define WHILEM_A_pre 81 /* 0x51 Regmatch state for WHILEM */
-#define WHILEM_A_pre_fail 82 /* 0x52 Regmatch state for WHILEM */
-#define WHILEM_A_min 83 /* 0x53 Regmatch state for WHILEM */
-#define WHILEM_A_min_fail 84 /* 0x54 Regmatch state for WHILEM */
-#define WHILEM_A_max 85 /* 0x55 Regmatch state for WHILEM */
-#define WHILEM_A_max_fail 86 /* 0x56 Regmatch state for WHILEM */
-#define WHILEM_B_min 87 /* 0x57 Regmatch state for WHILEM */
-#define WHILEM_B_min_fail 88 /* 0x58 Regmatch state for WHILEM */
-#define WHILEM_B_max 89 /* 0x59 Regmatch state for WHILEM */
-#define WHILEM_B_max_fail 90 /* 0x5a Regmatch state for WHILEM */
-#define BRANCH_next 91 /* 0x5b Regmatch state for BRANCH */
-#define BRANCH_next_fail 92 /* 0x5c Regmatch state for BRANCH */
-#define CURLYM_A 93 /* 0x5d Regmatch state for CURLYM */
-#define CURLYM_A_fail 94 /* 0x5e Regmatch state for CURLYM */
-#define CURLYM_B 95 /* 0x5f Regmatch state for CURLYM */
-#define CURLYM_B_fail 96 /* 0x60 Regmatch state for CURLYM */
-#define IFMATCH_A 97 /* 0x61 Regmatch state for IFMATCH */
-#define IFMATCH_A_fail 98 /* 0x62 Regmatch state for IFMATCH */
-#define CURLY_B_min_known 99 /* 0x63 Regmatch state for CURLY */
-#define CURLY_B_min_known_fail 100 /* 0x64 Regmatch state for CURLY */
-#define CURLY_B_min 101 /* 0x65 Regmatch state for CURLY */
-#define CURLY_B_min_fail 102 /* 0x66 Regmatch state for CURLY */
-#define CURLY_B_max 103 /* 0x67 Regmatch state for CURLY */
-#define CURLY_B_max_fail 104 /* 0x68 Regmatch state for CURLY */
+#define TRIE_next 76 /* 0x4c Regmatch state for TRIE */
+#define TRIE_next_fail 77 /* 0x4d Regmatch state for TRIE */
+#define EVAL_AB 78 /* 0x4e Regmatch state for EVAL */
+#define EVAL_AB_fail 79 /* 0x4f Regmatch state for EVAL */
+#define CURLYX_end 80 /* 0x50 Regmatch state for CURLYX */
+#define CURLYX_end_fail 81 /* 0x51 Regmatch state for CURLYX */
+#define WHILEM_A_pre 82 /* 0x52 Regmatch state for WHILEM */
+#define WHILEM_A_pre_fail 83 /* 0x53 Regmatch state for WHILEM */
+#define WHILEM_A_min 84 /* 0x54 Regmatch state for WHILEM */
+#define WHILEM_A_min_fail 85 /* 0x55 Regmatch state for WHILEM */
+#define WHILEM_A_max 86 /* 0x56 Regmatch state for WHILEM */
+#define WHILEM_A_max_fail 87 /* 0x57 Regmatch state for WHILEM */
+#define WHILEM_B_min 88 /* 0x58 Regmatch state for WHILEM */
+#define WHILEM_B_min_fail 89 /* 0x59 Regmatch state for WHILEM */
+#define WHILEM_B_max 90 /* 0x5a Regmatch state for WHILEM */
+#define WHILEM_B_max_fail 91 /* 0x5b Regmatch state for WHILEM */
+#define BRANCH_next 92 /* 0x5c Regmatch state for BRANCH */
+#define BRANCH_next_fail 93 /* 0x5d Regmatch state for BRANCH */
+#define CURLYM_A 94 /* 0x5e Regmatch state for CURLYM */
+#define CURLYM_A_fail 95 /* 0x5f Regmatch state for CURLYM */
+#define CURLYM_B 96 /* 0x60 Regmatch state for CURLYM */
+#define CURLYM_B_fail 97 /* 0x61 Regmatch state for CURLYM */
+#define IFMATCH_A 98 /* 0x62 Regmatch state for IFMATCH */
+#define IFMATCH_A_fail 99 /* 0x63 Regmatch state for IFMATCH */
+#define CURLY_B_min_known 100 /* 0x64 Regmatch state for CURLY */
+#define CURLY_B_min_known_fail 101 /* 0x65 Regmatch state for CURLY */
+#define CURLY_B_min 102 /* 0x66 Regmatch state for CURLY */
+#define CURLY_B_min_fail 103 /* 0x67 Regmatch state for CURLY */
+#define CURLY_B_max 104 /* 0x68 Regmatch state for CURLY */
+#define CURLY_B_max_fail 105 /* 0x69 Regmatch state for CURLY */
/* PL_regkind[] What type of regop or state is this. */
NGROUPP, /* NGROUPP */
RECURSEP, /* RECURSEP */
DEFINEP, /* DEFINEP */
+ OPFAIL, /* OPFAIL */
NOTHING, /* OPTIMIZED */
PSEUDO, /* PSEUDO */
/* ------------ States ------------- */
EXTRA_SIZE(struct regnode_1), /* NGROUPP */
EXTRA_SIZE(struct regnode_1), /* RECURSEP */
EXTRA_SIZE(struct regnode_1), /* DEFINEP */
+ 0, /* OPFAIL */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
0, /* NGROUPP */
0, /* RECURSEP */
0, /* DEFINEP */
+ 0, /* OPFAIL */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
"NGROUPP", /* 0x46 */
"RECURSEP", /* 0x47 */
"DEFINEP", /* 0x48 */
- "OPTIMIZED", /* 0x49 */
- "PSEUDO", /* 0x4a */
+ "OPFAIL", /* 0x49 */
+ "OPTIMIZED", /* 0x4a */
+ "PSEUDO", /* 0x4b */
/* ------------ States ------------- */
- "TRIE_next", /* 0x4b */
- "TRIE_next_fail", /* 0x4c */
- "EVAL_AB", /* 0x4d */
- "EVAL_AB_fail", /* 0x4e */
- "CURLYX_end", /* 0x4f */
- "CURLYX_end_fail", /* 0x50 */
- "WHILEM_A_pre", /* 0x51 */
- "WHILEM_A_pre_fail", /* 0x52 */
- "WHILEM_A_min", /* 0x53 */
- "WHILEM_A_min_fail", /* 0x54 */
- "WHILEM_A_max", /* 0x55 */
- "WHILEM_A_max_fail", /* 0x56 */
- "WHILEM_B_min", /* 0x57 */
- "WHILEM_B_min_fail", /* 0x58 */
- "WHILEM_B_max", /* 0x59 */
- "WHILEM_B_max_fail", /* 0x5a */
- "BRANCH_next", /* 0x5b */
- "BRANCH_next_fail", /* 0x5c */
- "CURLYM_A", /* 0x5d */
- "CURLYM_A_fail", /* 0x5e */
- "CURLYM_B", /* 0x5f */
- "CURLYM_B_fail", /* 0x60 */
- "IFMATCH_A", /* 0x61 */
- "IFMATCH_A_fail", /* 0x62 */
- "CURLY_B_min_known", /* 0x63 */
- "CURLY_B_min_known_fail", /* 0x64 */
- "CURLY_B_min", /* 0x65 */
- "CURLY_B_min_fail", /* 0x66 */
- "CURLY_B_max", /* 0x67 */
- "CURLY_B_max_fail", /* 0x68 */
+ "TRIE_next", /* 0x4c */
+ "TRIE_next_fail", /* 0x4d */
+ "EVAL_AB", /* 0x4e */
+ "EVAL_AB_fail", /* 0x4f */
+ "CURLYX_end", /* 0x50 */
+ "CURLYX_end_fail", /* 0x51 */
+ "WHILEM_A_pre", /* 0x52 */
+ "WHILEM_A_pre_fail", /* 0x53 */
+ "WHILEM_A_min", /* 0x54 */
+ "WHILEM_A_min_fail", /* 0x55 */
+ "WHILEM_A_max", /* 0x56 */
+ "WHILEM_A_max_fail", /* 0x57 */
+ "WHILEM_B_min", /* 0x58 */
+ "WHILEM_B_min_fail", /* 0x59 */
+ "WHILEM_B_max", /* 0x5a */
+ "WHILEM_B_max_fail", /* 0x5b */
+ "BRANCH_next", /* 0x5c */
+ "BRANCH_next_fail", /* 0x5d */
+ "CURLYM_A", /* 0x5e */
+ "CURLYM_A_fail", /* 0x5f */
+ "CURLYM_B", /* 0x60 */
+ "CURLYM_B_fail", /* 0x61 */
+ "IFMATCH_A", /* 0x62 */
+ "IFMATCH_A_fail", /* 0x63 */
+ "CURLY_B_min_known", /* 0x64 */
+ "CURLY_B_min_known_fail", /* 0x65 */
+ "CURLY_B_min", /* 0x66 */
+ "CURLY_B_min_fail", /* 0x67 */
+ "CURLY_B_max", /* 0x68 */
+ "CURLY_B_max_fail", /* 0x69 */
};
#endif /* DEBUGGING */
#else
([^()]++|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x
round\(([^()]++)\) _I(round(xs * sz),1) y $1 xs * sz
+(foo[1x]|bar[2x]|baz[3x])+y foo1bar2baz3y y $1 baz3
+(foo[1x]|bar[2x]|baz[3x])+y foo1bar2baz3y y $& foo1bar2baz3y
+(foo[1x]|bar[2x]|baz[3x])*y foo1bar2baz3y y $1 baz3
+(foo[1x]|bar[2x]|baz[3x])*y foo1bar2baz3y y $& foo1bar2baz3y
+
+([yX].|WORDS|[yX].|WORD)S WORDS y $1 WORD
+(WORDS|WORLD|WORD)S WORDS y $1 WORD
+([yX].|WORDS|WORD|[xY].)S WORDS y $1 WORD
+(foo|fool|[zx].|money|parted)$ fool y $1 fool
+([zx].|foo|fool|[zq].|money|parted|[yx].)$ fool y $1 fool
+(foo|fool|[zx].|money|parted)$ fools n - -
+([zx].|foo|fool|[qx].|money|parted|[py].)$ fools n - -
+
+([yX].|WORDS|[yX].|WORD)+S WORDS y $1 WORD
+(WORDS|WORLD|WORD)+S WORDS y $1 WORD
+([yX].|WORDS|WORD|[xY].)+S WORDS y $1 WORD
+(foo|fool|[zx].|money|parted)+$ fool y $1 fool
+([zx].|foo|fool|[zq].|money|parted|[yx].)+$ fool y $1 fool
+(foo|fool|[zx].|money|parted)+$ fools n - -
+([zx].|foo|fool|[qx].|money|parted|[py].)+$ fools n - -
+
+(x|y|z[QW])+(longish|loquatious|excessive|overblown[QW])+ xyzQzWlongishoverblownW y $1-$2 zW-overblownW
+(x|y|z[QW])*(longish|loquatious|excessive|overblown[QW])* xyzQzWlongishoverblownW y $1-$2 zW-overblownW
+(x|y|z[QW]){1,5}(longish|loquatious|excessive|overblown[QW]){1,5} xyzQzWlongishoverblownW y $1-$2 zW-overblownW
+
+(x|y|z[QW])++(longish|loquatious|excessive|overblown[QW])++ xyzQzWlongishoverblownW y $1-$2 zW-overblownW
+(x|y|z[QW])*+(longish|loquatious|excessive|overblown[QW])*+ xyzQzWlongishoverblownW y $1-$2 zW-overblownW
+(x|y|z[QW]){1,5}+(longish|loquatious|excessive|overblown[QW]){1,5}+ xyzQzWlongishoverblownW y $1-$2 zW-overblownW
+
+
+a*(?!) aaaab n - -
+a*(?FAIL) aaaab n - -
+a*(?F) aaaab n - -