ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK const regmatch_info *reginfo
Es |void |to_utf8_substr |NN regexp * prog
Es |void |to_byte_substr |NN regexp * prog
+ERs |I32 |reg_check_named_buff_matched |NN const regexp *rex|NN const regnode *prog
# ifdef DEBUGGING
Es |void |dump_exec_pos |NN const char *locinput|NN const regnode *scan|NN const char *loc_regeol\
|NN const char *loc_bostr|NN const char *loc_reg_starttry|const bool do_utf8
#define find_byclass S_find_byclass
#define to_utf8_substr S_to_utf8_substr
#define to_byte_substr S_to_byte_substr
+#define reg_check_named_buff_matched S_reg_check_named_buff_matched
#endif
# ifdef DEBUGGING
#if defined(PERL_CORE) || defined(PERL_EXT)
#define find_byclass(a,b,c,d,e) S_find_byclass(aTHX_ a,b,c,d,e)
#define to_utf8_substr(a) S_to_utf8_substr(aTHX_ a)
#define to_byte_substr(a) S_to_byte_substr(aTHX_ a)
+#define reg_check_named_buff_matched(a,b) S_reg_check_named_buff_matched(aTHX_ a,b)
#endif
# ifdef DEBUGGING
#if defined(PERL_CORE) || defined(PERL_EXT)
Conditional expression. C<(condition)> should be either an integer in
parentheses (which is valid if the corresponding pair of parentheses
-matched), or look-ahead/look-behind/evaluate zero-width assertion.
+matched), a look-ahead/look-behind/evaluate zero-width assertion, a
+name in angle brackets or single quotes (which is valid if a buffer
+with the given name matched), the special symbol (R) (true when
+evaluated inside of recursion or eval). Additionally the R may be
+followed by a number, (which will be true when evaluated when recursing
+inside of the appropriate group), or by C<&NAME> in which case it will
+be true only when evaluated during recursion into the named group.
For example:
- m{ ( \( )?
- [^()]+
- (?(1) \) )
+ m{ ( \( )?
+ [^()]+
+ (?(1) \) )
}x
matches a chunk of non-parentheses, possibly included in parentheses
themselves.
+An additional special form of this pattern is the DEFINE pattern, which
+never executes its yes-pattern except by recursion, and does not allow
+a no-pattern.
+
=back
=head2 Backtracking
STATIC void S_to_byte_substr(pTHX_ regexp * prog)
__attribute__nonnull__(pTHX_1);
+STATIC I32 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *prog)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+
# ifdef DEBUGGING
STATIC void S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, const char *loc_regeol, const char *loc_bostr, const char *loc_reg_starttry, const bool do_utf8)
__attribute__nonnull__(pTHX_1)
}
flags &= ~SCF_DO_STCLASS;
}
- else if (OP(scan)==RECURSE) {
- ARG2L_SET( scan, RExC_parens[ARG(scan)-1] - scan );
- }
else if (strchr((const char*)PL_varies,OP(scan))) {
I32 mincount, maxcount, minnext, deltanext, fl = 0;
I32 f = flags, pos_before = 0;
if (data)
data->flags |= SF_HAS_EVAL;
}
- else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded follows */
+ else if ( (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
+ || OP(scan)==RECURSE) /* recursion */
+ {
+ if (OP(scan)==RECURSE) {
+ ARG2L_SET( scan, RExC_parens[ARG(scan)-1] - scan );
+ }
if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state,data,minlenp);
data->longest = &(data->longest_float);
SV* sv_dat=HeVAL(he_str);
I32 *nums=(I32*)SvPVX(sv_dat);
for ( i=0; i<SvIVX(sv_dat); i++ ) {
- if ((I32)(rx->lastcloseparen) >= nums[i] &&
- rx->startp[nums[i]] != -1 &&
+ if ((I32)(rx->lastparen) >= nums[i] &&
rx->endp[nums[i]] != -1)
{
parno = nums[i];
}
}
+
/* Scans the name of a named buffer from the pattern.
- * If flags is true then returns an SV containing the name.
+ * If flags is REG_RSN_RETURN_NULL returns null.
+ * If flags is REG_RSN_RETURN_NAME returns an SV* containing the name
+ * If flags is REG_RSN_RETURN_DATA returns the data SV* corresponding
+ * to the parsed name as looked up in the RExC_paren_names hash.
+ * If there is an error throws a vFAIL().. type exception.
*/
+
+#define REG_RSN_RETURN_NULL 0
+#define REG_RSN_RETURN_NAME 1
+#define REG_RSN_RETURN_DATA 2
+
STATIC SV*
S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
char *name_start = RExC_parse;
- if (UTF) {
+ if ( UTF ) {
STRLEN numlen;
- while (isIDFIRST_uni(utf8n_to_uvchr((U8*)RExC_parse,
- RExC_end - RExC_parse,
- &numlen, UTF8_ALLOW_DEFAULT)))
- RExC_parse += numlen;
- }
- else {
- while (isIDFIRST(*RExC_parse))
+ while( isIDFIRST_uni(utf8n_to_uvchr((U8*)RExC_parse,
+ RExC_end - RExC_parse, &numlen, UTF8_ALLOW_DEFAULT)))
+ {
+ RExC_parse += numlen;
+ }
+ } else {
+ while( isIDFIRST(*RExC_parse) )
RExC_parse++;
}
- if (flags) {
- SV* svname = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
- (int)(RExC_parse - name_start)));
+ if ( flags ) {
+ SV* sv_name = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
+ (int)(RExC_parse - name_start)));
if (UTF)
- SvUTF8_on(svname);
- return svname;
- }
- else {
- return NULL;
+ SvUTF8_on(sv_name);
+ if ( flags == REG_RSN_RETURN_NAME)
+ return sv_name;
+ else if (flags==REG_RSN_RETURN_DATA) {
+ HE *he_str = NULL;
+ SV *sv_dat = NULL;
+ if ( ! sv_name ) /* should not happen*/
+ Perl_croak(aTHX_ "panic: no svname in reg_scan_name");
+ if (RExC_paren_names)
+ he_str = hv_fetch_ent( RExC_paren_names, sv_name, 0, 0 );
+ if ( he_str )
+ sv_dat = HeVAL(he_str);
+ if ( ! sv_dat )
+ vFAIL("Reference to nonexistent named group");
+ return sv_dat;
+ }
+ else {
+ Perl_croak(aTHX_ "panic: bad flag in reg_scan_name");
+ }
+ /* NOT REACHED */
}
+ return NULL;
}
#define DEBUG_PARSE_MSG(funcname) DEBUG_PARSE_r({ \
else \
num=REG_NODE_NUM(RExC_emit); \
if (RExC_lastnum!=num) \
- PerlIO_printf(Perl_debug_log,"|%4d",num); \
+ PerlIO_printf(Perl_debug_log,"|%4d",num); \
else \
- PerlIO_printf(Perl_debug_log,"|%4s",""); \
+ PerlIO_printf(Perl_debug_log,"|%4s",""); \
PerlIO_printf(Perl_debug_log,"|%*s%-4s", \
(int)((depth*2)), "", \
(funcname) \
case '<': /* (?<...) */
if (*RExC_parse == '!')
paren = ',';
- else if (*RExC_parse != '=') { /* (?<...>) */
+ else if (*RExC_parse != '=')
+ { /* (?<...>) */
char *name_start;
SV *svname;
paren= '>';
case '\'': /* (?'...') */
name_start= RExC_parse;
- svname = reg_scan_name(pRExC_state,SIZE_ONLY);
+ svname = reg_scan_name(pRExC_state,
+ SIZE_ONLY ? /* reverse test from the others */
+ REG_RSN_RETURN_NAME :
+ REG_RSN_RETURN_NULL);
if (RExC_parse == name_start)
goto unknown;
if (*RExC_parse != paren)
case '&': /* (?&NAME) */
parse_start = RExC_parse - 1;
{
- char *name_start = RExC_parse;
- SV *svname = reg_scan_name(pRExC_state, !SIZE_ONLY);
- if (RExC_parse == name_start)
- goto unknown;
- if (*RExC_parse != ')')
- vFAIL("Expecting close bracket");
- if (!SIZE_ONLY) {
- HE *he_str = NULL;
- SV *sv_dat;
- if (!svname) /* shouldn't happen*/
- Perl_croak(aTHX_ "panic: reg_scan_name returned NULL");
- if (RExC_paren_names)
- he_str = hv_fetch_ent( RExC_paren_names, svname, 0, 0 );
- if (he_str)
- sv_dat = HeVAL(he_str);
- else
- vFAIL("Reference to nonexistent group");
- num = *((I32 *)SvPVX(sv_dat));
- } else {
- num = 0;
- }
+ SV *sv_dat = reg_scan_name(pRExC_state,
+ SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+ num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
}
goto gen_recurse_regop;
/* NOT REACHED */
"Recurse #%"UVuf" to %"IVdf"\n", ARG(ret), ARG2L(ret)));
} else {
RExC_size++;
- RExC_seen|=REG_SEEN_RECURSE;
}
+ RExC_seen |= REG_SEEN_RECURSE;
Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */
Set_Node_Offset(ret, parse_start); /* MJD */
}
case '(': /* (?(?{...})...) and (?(?=...)...) */
{
+ int is_define= 0;
if (RExC_parse[0] == '?') { /* (?(?...)) */
if (RExC_parse[1] == '=' || RExC_parse[1] == '!'
|| RExC_parse[1] == '<'
goto insert_if;
}
}
+ else if ( RExC_parse[0] == '<' /* (?(<NAME>)...) */
+ || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
+ {
+ char ch = RExC_parse[0] == '<' ? '>' : '\'';
+ char *name_start= RExC_parse++;
+ I32 num = 0;
+ SV *sv_dat=reg_scan_name(pRExC_state,
+ SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+ if (RExC_parse == name_start || *RExC_parse != ch)
+ vFAIL2("Sequence (?(%c... not terminated",
+ (ch == '>' ? '<' : ch));
+ RExC_parse++;
+ if (!SIZE_ONLY) {
+ num = add_data( pRExC_state, 1, "S" );
+ RExC_rx->data->data[num]=(void*)sv_dat;
+ SvREFCNT_inc(sv_dat);
+ }
+ ret = reganode(pRExC_state,NGROUPP,num);
+ goto insert_if_check_paren;
+ }
+ else if (RExC_parse[0] == 'D' &&
+ RExC_parse[1] == 'E' &&
+ RExC_parse[2] == 'F' &&
+ RExC_parse[3] == 'I' &&
+ RExC_parse[4] == 'N' &&
+ RExC_parse[5] == 'E')
+ {
+ ret = reganode(pRExC_state,DEFINEP,0);
+ RExC_parse +=6 ;
+ is_define = 1;
+ goto insert_if_check_paren;
+ }
+ else if (RExC_parse[0] == 'R') {
+ RExC_parse++;
+ parno = 0;
+ if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
+ parno = atoi(RExC_parse++);
+ while (isDIGIT(*RExC_parse))
+ RExC_parse++;
+ } else if (RExC_parse[0] == '&') {
+ SV *sv_dat;
+ RExC_parse++;
+ sv_dat = reg_scan_name(pRExC_state,
+ SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+ parno = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
+ }
+ ret = reganode(pRExC_state,RECURSEP,parno);
+ goto insert_if_check_paren;
+ }
else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
/* (?(1)...) */
char c;
RExC_parse++;
ret = reganode(pRExC_state, GROUPP, parno);
+ insert_if_check_paren:
if ((c = *nextchar(pRExC_state)) != ')')
vFAIL("Switch condition not recognized");
insert_if:
if (flags&HASWIDTH)
*flagp |= HASWIDTH;
if (c == '|') {
+ if (is_define)
+ vFAIL("(?(DEFINE)....) does not allow branches");
lastbr = reganode(pRExC_state, IFTHEN, 0); /* Fake one for optimizer. */
regbranch(pRExC_state, &flags, 1,depth+1);
REGTAIL(pRExC_state, ret, lastbr);
++RExC_parse;
ret= reg_namedseq(pRExC_state, NULL);
break;
- case 'k':
+ case 'k': /* Handle \k<NAME> and \k'NAME' */
{
char ch= RExC_parse[1];
if (ch != '<' && ch != '\'') {
} else {
char* name_start = (RExC_parse += 2);
I32 num = 0;
- SV *svname = reg_scan_name(pRExC_state,!SIZE_ONLY);
+ SV *sv_dat = reg_scan_name(pRExC_state,
+ SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
ch= (ch == '<') ? '>' : '\'';
if (RExC_parse == name_start || *RExC_parse != ch)
if (!SIZE_ONLY) {
- HE *he_str = NULL;
- SV *sv_dat;
- if (!svname)
- Perl_croak(aTHX_
- "panic: reg_scan_name returned NULL");
- if (RExC_paren_names)
- he_str = hv_fetch_ent( RExC_paren_names, svname, 0, 0 );
- if ( he_str ) {
- sv_dat = HeVAL(he_str);
- } else {
- vFAIL("Reference to nonexistent group");
- }
num = add_data( pRExC_state, 1, "S" );
ARG_SET(ret,num);
RExC_rx->data->data[num]=(void*)sv_dat;
NREFFL NREF, no-sv 1 Match already matched string, folded in loc.
+#*Special conditionals
+NGROUPP NGROUPP, no-sv 1 Whether the group matched.
+RECURSEP RECURSEP, num 1 Whether we are in a specific recurse.
+DEFINEP DEFINEP, none 1 Never execute directly.
+
# NEW STUFF ABOVE THIS LINE -- Please update counts below.
################################################################################
#endif
+/* reg_check_named_buff_matched()
+ * Checks to see if a named buffer has matched. The data array of
+ * buffer numbers corresponding to the buffer is expected to reside
+ * in the regexp->data->data array in the slot stored in the ARG() of
+ * node involved. Note that this routine doesn't actually care about the
+ * name, that information is not preserved from compilation to execution.
+ * Returns the index of the leftmost defined buffer with the given name
+ * or 0 if non of the buffers matched.
+ */
+STATIC I32
+S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
+ I32 n;
+ SV *sv_dat=(SV*)rex->data->data[ ARG( scan ) ];
+ I32 *nums=(I32*)SvPVX(sv_dat);
+ for ( n=0; n<SvIVX(sv_dat); n++ ) {
+ if ((I32)*PL_reglastparen >= nums[n] &&
+ PL_regendp[nums[n]] != -1)
+ {
+ return nums[n];
+ }
+ }
+ return 0;
+}
+
STATIC I32 /* 0 failure, 1 success */
S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
{
case NREF:
case NREFF:
type = OP(scan);
- {
- SV *sv_dat=(SV*)rex->data->data[ ARG( scan ) ];
- I32 *nums=(I32*)SvPVX(sv_dat);
- for ( n=0; n<SvIVX(sv_dat); n++ ) {
- if ((I32)*PL_reglastparen >= nums[n] &&
- PL_regstartp[nums[n]] != -1 &&
- PL_regendp[nums[n]] != -1)
- {
- n = nums[n];
- type = REF + ( type - NREF );
- goto do_ref;
- }
- }
+ n = reg_check_named_buff_matched(rex,scan);
+
+ if ( n ) {
+ type = REF + ( type - NREF );
+ goto do_ref;
+ } else {
sayNO;
- /* unreached */
- }
+ }
+ /* unreached */
case REFFL:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
n = ARG(scan); /* which paren pair */
sw = (bool)((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
break;
+ case NGROUPP:
+ /* reg_check_named_buff_matched returns 0 for no match */
+ sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
+ break;
+ case RECURSEP:
+ n = ARG(scan);
+ sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
+ break;
+ case DEFINEP:
+ sw = 0;
+ break;
case IFTHEN:
PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
if (sw)
case WHILEM: /* just matched an A in /A*B/ (for complex A) */
{
/* see the discussion above about CURLYX/WHILEM */
-
I32 n;
assert(cur_curlyx); /* keep Coverity happy */
n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
for (n = *PL_reglastparen; n > ST.lastparen; n--)
PL_regendp[n] = -1;
*PL_reglastparen = n;
+ /*dmq: *PL_reglastcloseparen = n; */
scan = ST.next_branch;
/* no more branches? */
if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ))
);
locinput = PL_reginput;
- if (ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)))
+
+ if (cur_eval && cur_eval->u.eval.close_paren &&
+ cur_eval->u.eval.close_paren == ST.me->flags)
+ goto fake_end;
+
+ if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
goto curlym_do_A; /* try to match another A */
goto curlym_do_B; /* try to match B */
case CURLYM_A_fail: /* just failed to match an A */
REGCP_UNWIND(ST.cp);
- if (ST.minmod || ST.count < ARG1(ST.me) /* min*/ )
+
+ if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
+ || (cur_eval && cur_eval->u.eval.close_paren &&
+ cur_eval->u.eval.close_paren == ST.me->flags))
sayNO;
curlym_do_B: /* execute the B in /A{m,n}B/ */
PL_regstartp[paren]
= HOPc(PL_reginput, -ST.alen) - PL_bostr;
PL_regendp[paren] = PL_reginput - PL_bostr;
+ /*dmq: *PL_reglastcloseparen = paren; */
}
else
PL_regendp[paren] = -1;
+ if (cur_eval && cur_eval->u.eval.close_paren &&
+ cur_eval->u.eval.close_paren == ST.me->flags)
+ {
+ if (ST.count)
+ goto fake_end;
+ else
+ sayNO;
+ }
}
+
PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
/* NOTREACHED */
if (success) { \
PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
PL_regendp[paren] = locinput - PL_bostr; \
+ *PL_reglastcloseparen = paren; \
} \
else \
PL_regendp[paren] = -1; \
*PL_reglastparen = ST.paren;
ST.min = ARG1(scan); /* min to match */
ST.max = ARG2(scan); /* max to match */
+ if (cur_eval && cur_eval->u.eval.close_paren &&
+ cur_eval->u.eval.close_paren == ST.paren) {
+ ST.min=1;
+ ST.max=1;
+ }
scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
goto repeat;
case CURLY: /* /A{m,n}B/ where A is width 1 */
}
PL_reginput = locinput;
CURLY_SETPAREN(ST.paren, ST.count);
+ if (cur_eval && cur_eval->u.eval.close_paren &&
+ cur_eval->u.eval.close_paren == ST.paren) {
+ goto fake_end;
+ }
PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
}
/* NOTREACHED */
{
curly_try_B_min:
CURLY_SETPAREN(ST.paren, ST.count);
+ if (cur_eval && cur_eval->u.eval.close_paren &&
+ cur_eval->u.eval.close_paren == ST.paren) {
+ goto fake_end;
+ }
PUSH_STATE_GOTO(CURLY_B_min, ST.B);
}
}
/* If it could work, try it. */
if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
CURLY_SETPAREN(ST.paren, ST.count);
+ if (cur_eval && cur_eval->u.eval.close_paren &&
+ cur_eval->u.eval.close_paren == ST.paren) {
+ goto fake_end;
+ }
PUSH_STATE_GOTO(CURLY_B_max, ST.B);
/* NOTREACHED */
}
/* Regops and State definitions */
-#define REGNODE_MAX 71
-#define REGMATCH_STATE_MAX 101
+#define REGNODE_MAX 74
+#define REGMATCH_STATE_MAX 104
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
#define NREF 67 /* 0x43 Match some already matched string */
#define NREFF 68 /* 0x44 Match already matched string, folded */
#define NREFFL 69 /* 0x45 Match already matched string, folded in loc. */
-#define OPTIMIZED 70 /* 0x46 Placeholder for dump. */
-#define PSEUDO 71 /* 0x47 Pseudo opcode for internal use. */
+#define NGROUPP 70 /* 0x46 Whether the group matched. */
+#define RECURSEP 71 /* 0x47 Whether we are in a specific recurse. */
+#define DEFINEP 72 /* 0x48 Never execute directly. */
+#define OPTIMIZED 73 /* 0x49 Placeholder for dump. */
+#define PSEUDO 74 /* 0x4a Pseudo opcode for internal use. */
/* ------------ States ------------- */
-#define TRIE_next 72 /* 0x48 Regmatch state for TRIE */
-#define TRIE_next_fail 73 /* 0x49 Regmatch state for TRIE */
-#define EVAL_AB 74 /* 0x4a Regmatch state for EVAL */
-#define EVAL_AB_fail 75 /* 0x4b Regmatch state for EVAL */
-#define CURLYX_end 76 /* 0x4c Regmatch state for CURLYX */
-#define CURLYX_end_fail 77 /* 0x4d Regmatch state for CURLYX */
-#define WHILEM_A_pre 78 /* 0x4e Regmatch state for WHILEM */
-#define WHILEM_A_pre_fail 79 /* 0x4f Regmatch state for WHILEM */
-#define WHILEM_A_min 80 /* 0x50 Regmatch state for WHILEM */
-#define WHILEM_A_min_fail 81 /* 0x51 Regmatch state for WHILEM */
-#define WHILEM_A_max 82 /* 0x52 Regmatch state for WHILEM */
-#define WHILEM_A_max_fail 83 /* 0x53 Regmatch state for WHILEM */
-#define WHILEM_B_min 84 /* 0x54 Regmatch state for WHILEM */
-#define WHILEM_B_min_fail 85 /* 0x55 Regmatch state for WHILEM */
-#define WHILEM_B_max 86 /* 0x56 Regmatch state for WHILEM */
-#define WHILEM_B_max_fail 87 /* 0x57 Regmatch state for WHILEM */
-#define BRANCH_next 88 /* 0x58 Regmatch state for BRANCH */
-#define BRANCH_next_fail 89 /* 0x59 Regmatch state for BRANCH */
-#define CURLYM_A 90 /* 0x5a Regmatch state for CURLYM */
-#define CURLYM_A_fail 91 /* 0x5b Regmatch state for CURLYM */
-#define CURLYM_B 92 /* 0x5c Regmatch state for CURLYM */
-#define CURLYM_B_fail 93 /* 0x5d Regmatch state for CURLYM */
-#define IFMATCH_A 94 /* 0x5e Regmatch state for IFMATCH */
-#define IFMATCH_A_fail 95 /* 0x5f Regmatch state for IFMATCH */
-#define CURLY_B_min_known 96 /* 0x60 Regmatch state for CURLY */
-#define CURLY_B_min_known_fail 97 /* 0x61 Regmatch state for CURLY */
-#define CURLY_B_min 98 /* 0x62 Regmatch state for CURLY */
-#define CURLY_B_min_fail 99 /* 0x63 Regmatch state for CURLY */
-#define CURLY_B_max 100 /* 0x64 Regmatch state for CURLY */
-#define CURLY_B_max_fail 101 /* 0x65 Regmatch state for CURLY */
+#define TRIE_next 75 /* 0x4b Regmatch state for TRIE */
+#define TRIE_next_fail 76 /* 0x4c Regmatch state for TRIE */
+#define EVAL_AB 77 /* 0x4d Regmatch state for EVAL */
+#define EVAL_AB_fail 78 /* 0x4e Regmatch state for EVAL */
+#define CURLYX_end 79 /* 0x4f Regmatch state for CURLYX */
+#define CURLYX_end_fail 80 /* 0x50 Regmatch state for CURLYX */
+#define WHILEM_A_pre 81 /* 0x51 Regmatch state for WHILEM */
+#define WHILEM_A_pre_fail 82 /* 0x52 Regmatch state for WHILEM */
+#define WHILEM_A_min 83 /* 0x53 Regmatch state for WHILEM */
+#define WHILEM_A_min_fail 84 /* 0x54 Regmatch state for WHILEM */
+#define WHILEM_A_max 85 /* 0x55 Regmatch state for WHILEM */
+#define WHILEM_A_max_fail 86 /* 0x56 Regmatch state for WHILEM */
+#define WHILEM_B_min 87 /* 0x57 Regmatch state for WHILEM */
+#define WHILEM_B_min_fail 88 /* 0x58 Regmatch state for WHILEM */
+#define WHILEM_B_max 89 /* 0x59 Regmatch state for WHILEM */
+#define WHILEM_B_max_fail 90 /* 0x5a Regmatch state for WHILEM */
+#define BRANCH_next 91 /* 0x5b Regmatch state for BRANCH */
+#define BRANCH_next_fail 92 /* 0x5c Regmatch state for BRANCH */
+#define CURLYM_A 93 /* 0x5d Regmatch state for CURLYM */
+#define CURLYM_A_fail 94 /* 0x5e Regmatch state for CURLYM */
+#define CURLYM_B 95 /* 0x5f Regmatch state for CURLYM */
+#define CURLYM_B_fail 96 /* 0x60 Regmatch state for CURLYM */
+#define IFMATCH_A 97 /* 0x61 Regmatch state for IFMATCH */
+#define IFMATCH_A_fail 98 /* 0x62 Regmatch state for IFMATCH */
+#define CURLY_B_min_known 99 /* 0x63 Regmatch state for CURLY */
+#define CURLY_B_min_known_fail 100 /* 0x64 Regmatch state for CURLY */
+#define CURLY_B_min 101 /* 0x65 Regmatch state for CURLY */
+#define CURLY_B_min_fail 102 /* 0x66 Regmatch state for CURLY */
+#define CURLY_B_max 103 /* 0x67 Regmatch state for CURLY */
+#define CURLY_B_max_fail 104 /* 0x68 Regmatch state for CURLY */
/* PL_regkind[] What type of regop or state is this. */
EXTCONST U8 PL_regkind[];
#else
EXTCONST U8 PL_regkind[] = {
- END, /* END */
- END, /* SUCCEED */
- BOL, /* BOL */
- BOL, /* MBOL */
- BOL, /* SBOL */
- EOL, /* EOS */
- EOL, /* EOL */
- EOL, /* MEOL */
- EOL, /* SEOL */
- BOUND, /* BOUND */
- BOUND, /* BOUNDL */
- NBOUND, /* NBOUND */
- NBOUND, /* NBOUNDL */
- GPOS, /* GPOS */
- REG_ANY, /* REG_ANY */
- REG_ANY, /* SANY */
- REG_ANY, /* CANY */
- ANYOF, /* ANYOF */
- ALNUM, /* ALNUM */
- ALNUM, /* ALNUML */
- NALNUM, /* NALNUM */
- NALNUM, /* NALNUML */
- SPACE, /* SPACE */
- SPACE, /* SPACEL */
- NSPACE, /* NSPACE */
- NSPACE, /* NSPACEL */
- DIGIT, /* DIGIT */
- DIGIT, /* DIGITL */
- NDIGIT, /* NDIGIT */
- NDIGIT, /* NDIGITL */
- CLUMP, /* CLUMP */
- BRANCH, /* BRANCH */
- BACK, /* BACK */
- EXACT, /* EXACT */
- EXACT, /* EXACTF */
- EXACT, /* EXACTFL */
- NOTHING, /* NOTHING */
- NOTHING, /* TAIL */
- STAR, /* STAR */
- PLUS, /* PLUS */
- CURLY, /* CURLY */
- CURLY, /* CURLYN */
- CURLY, /* CURLYM */
- CURLY, /* CURLYX */
- WHILEM, /* WHILEM */
- OPEN, /* OPEN */
- CLOSE, /* CLOSE */
- REF, /* REF */
- REF, /* REFF */
- REF, /* REFFL */
- BRANCHJ, /* IFMATCH */
- BRANCHJ, /* UNLESSM */
- BRANCHJ, /* SUSPEND */
- BRANCHJ, /* IFTHEN */
- GROUPP, /* GROUPP */
- LONGJMP, /* LONGJMP */
- BRANCHJ, /* BRANCHJ */
- EVAL, /* EVAL */
- MINMOD, /* MINMOD */
- LOGICAL, /* LOGICAL */
- BRANCHJ, /* RENUM */
- TRIE, /* TRIE */
- TRIE, /* TRIEC */
- TRIE, /* AHOCORASICK */
- TRIE, /* AHOCORASICKC */
- RECURSE, /* RECURSE */
- RECURSE, /* SRECURSE */
- NREF, /* NREF */
- NREF, /* NREFF */
- NREF, /* NREFFL */
- NOTHING, /* OPTIMIZED */
- PSEUDO, /* PSEUDO */
+ END, /* END */
+ END, /* SUCCEED */
+ BOL, /* BOL */
+ BOL, /* MBOL */
+ BOL, /* SBOL */
+ EOL, /* EOS */
+ EOL, /* EOL */
+ EOL, /* MEOL */
+ EOL, /* SEOL */
+ BOUND, /* BOUND */
+ BOUND, /* BOUNDL */
+ NBOUND, /* NBOUND */
+ NBOUND, /* NBOUNDL */
+ GPOS, /* GPOS */
+ REG_ANY, /* REG_ANY */
+ REG_ANY, /* SANY */
+ REG_ANY, /* CANY */
+ ANYOF, /* ANYOF */
+ ALNUM, /* ALNUM */
+ ALNUM, /* ALNUML */
+ NALNUM, /* NALNUM */
+ NALNUM, /* NALNUML */
+ SPACE, /* SPACE */
+ SPACE, /* SPACEL */
+ NSPACE, /* NSPACE */
+ NSPACE, /* NSPACEL */
+ DIGIT, /* DIGIT */
+ DIGIT, /* DIGITL */
+ NDIGIT, /* NDIGIT */
+ NDIGIT, /* NDIGITL */
+ CLUMP, /* CLUMP */
+ BRANCH, /* BRANCH */
+ BACK, /* BACK */
+ EXACT, /* EXACT */
+ EXACT, /* EXACTF */
+ EXACT, /* EXACTFL */
+ NOTHING, /* NOTHING */
+ NOTHING, /* TAIL */
+ STAR, /* STAR */
+ PLUS, /* PLUS */
+ CURLY, /* CURLY */
+ CURLY, /* CURLYN */
+ CURLY, /* CURLYM */
+ CURLY, /* CURLYX */
+ WHILEM, /* WHILEM */
+ OPEN, /* OPEN */
+ CLOSE, /* CLOSE */
+ REF, /* REF */
+ REF, /* REFF */
+ REF, /* REFFL */
+ BRANCHJ, /* IFMATCH */
+ BRANCHJ, /* UNLESSM */
+ BRANCHJ, /* SUSPEND */
+ BRANCHJ, /* IFTHEN */
+ GROUPP, /* GROUPP */
+ LONGJMP, /* LONGJMP */
+ BRANCHJ, /* BRANCHJ */
+ EVAL, /* EVAL */
+ MINMOD, /* MINMOD */
+ LOGICAL, /* LOGICAL */
+ BRANCHJ, /* RENUM */
+ TRIE, /* TRIE */
+ TRIE, /* TRIEC */
+ TRIE, /* AHOCORASICK */
+ TRIE, /* AHOCORASICKC */
+ RECURSE, /* RECURSE */
+ RECURSE, /* SRECURSE */
+ NREF, /* NREF */
+ NREF, /* NREFF */
+ NREF, /* NREFFL */
+ NGROUPP, /* NGROUPP */
+ RECURSEP, /* RECURSEP */
+ DEFINEP, /* DEFINEP */
+ NOTHING, /* OPTIMIZED */
+ PSEUDO, /* PSEUDO */
/* ------------ States ------------- */
- TRIE, /* TRIE_next */
- TRIE, /* TRIE_next_fail */
- EVAL, /* EVAL_AB */
- EVAL, /* EVAL_AB_fail */
- CURLYX, /* CURLYX_end */
- CURLYX, /* CURLYX_end_fail */
- WHILEM, /* WHILEM_A_pre */
- WHILEM, /* WHILEM_A_pre_fail */
- WHILEM, /* WHILEM_A_min */
- WHILEM, /* WHILEM_A_min_fail */
- WHILEM, /* WHILEM_A_max */
- WHILEM, /* WHILEM_A_max_fail */
- WHILEM, /* WHILEM_B_min */
- WHILEM, /* WHILEM_B_min_fail */
- WHILEM, /* WHILEM_B_max */
- WHILEM, /* WHILEM_B_max_fail */
- BRANCH, /* BRANCH_next */
- BRANCH, /* BRANCH_next_fail */
- CURLYM, /* CURLYM_A */
- CURLYM, /* CURLYM_A_fail */
- CURLYM, /* CURLYM_B */
- CURLYM, /* CURLYM_B_fail */
- IFMATCH, /* IFMATCH_A */
- IFMATCH, /* IFMATCH_A_fail */
- CURLY, /* CURLY_B_min_known */
- CURLY, /* CURLY_B_min_known_fail */
- CURLY, /* CURLY_B_min */
- CURLY, /* CURLY_B_min_fail */
- CURLY, /* CURLY_B_max */
- CURLY, /* CURLY_B_max_fail */
+ TRIE, /* TRIE_next */
+ TRIE, /* TRIE_next_fail */
+ EVAL, /* EVAL_AB */
+ EVAL, /* EVAL_AB_fail */
+ CURLYX, /* CURLYX_end */
+ CURLYX, /* CURLYX_end_fail */
+ WHILEM, /* WHILEM_A_pre */
+ WHILEM, /* WHILEM_A_pre_fail */
+ WHILEM, /* WHILEM_A_min */
+ WHILEM, /* WHILEM_A_min_fail */
+ WHILEM, /* WHILEM_A_max */
+ WHILEM, /* WHILEM_A_max_fail */
+ WHILEM, /* WHILEM_B_min */
+ WHILEM, /* WHILEM_B_min_fail */
+ WHILEM, /* WHILEM_B_max */
+ WHILEM, /* WHILEM_B_max_fail */
+ BRANCH, /* BRANCH_next */
+ BRANCH, /* BRANCH_next_fail */
+ CURLYM, /* CURLYM_A */
+ CURLYM, /* CURLYM_A_fail */
+ CURLYM, /* CURLYM_B */
+ CURLYM, /* CURLYM_B_fail */
+ IFMATCH, /* IFMATCH_A */
+ IFMATCH, /* IFMATCH_A_fail */
+ CURLY, /* CURLY_B_min_known */
+ CURLY, /* CURLY_B_min_known_fail */
+ CURLY, /* CURLY_B_min */
+ CURLY, /* CURLY_B_min_fail */
+ CURLY, /* CURLY_B_max */
+ CURLY, /* CURLY_B_max_fail */
};
#endif
EXTRA_SIZE(struct regnode_1), /* NREF */
EXTRA_SIZE(struct regnode_1), /* NREFF */
EXTRA_SIZE(struct regnode_1), /* NREFFL */
+ EXTRA_SIZE(struct regnode_1), /* NGROUPP */
+ EXTRA_SIZE(struct regnode_1), /* RECURSEP */
+ EXTRA_SIZE(struct regnode_1), /* DEFINEP */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
0, /* NREF */
0, /* NREFF */
0, /* NREFFL */
+ 0, /* NGROUPP */
+ 0, /* RECURSEP */
+ 0, /* DEFINEP */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
"NREF", /* 0x43 */
"NREFF", /* 0x44 */
"NREFFL", /* 0x45 */
- "OPTIMIZED", /* 0x46 */
- "PSEUDO", /* 0x47 */
+ "NGROUPP", /* 0x46 */
+ "RECURSEP", /* 0x47 */
+ "DEFINEP", /* 0x48 */
+ "OPTIMIZED", /* 0x49 */
+ "PSEUDO", /* 0x4a */
/* ------------ States ------------- */
- "TRIE_next", /* 0x48 */
- "TRIE_next_fail", /* 0x49 */
- "EVAL_AB", /* 0x4a */
- "EVAL_AB_fail", /* 0x4b */
- "CURLYX_end", /* 0x4c */
- "CURLYX_end_fail", /* 0x4d */
- "WHILEM_A_pre", /* 0x4e */
- "WHILEM_A_pre_fail", /* 0x4f */
- "WHILEM_A_min", /* 0x50 */
- "WHILEM_A_min_fail", /* 0x51 */
- "WHILEM_A_max", /* 0x52 */
- "WHILEM_A_max_fail", /* 0x53 */
- "WHILEM_B_min", /* 0x54 */
- "WHILEM_B_min_fail", /* 0x55 */
- "WHILEM_B_max", /* 0x56 */
- "WHILEM_B_max_fail", /* 0x57 */
- "BRANCH_next", /* 0x58 */
- "BRANCH_next_fail", /* 0x59 */
- "CURLYM_A", /* 0x5a */
- "CURLYM_A_fail", /* 0x5b */
- "CURLYM_B", /* 0x5c */
- "CURLYM_B_fail", /* 0x5d */
- "IFMATCH_A", /* 0x5e */
- "IFMATCH_A_fail", /* 0x5f */
- "CURLY_B_min_known", /* 0x60 */
- "CURLY_B_min_known_fail", /* 0x61 */
- "CURLY_B_min", /* 0x62 */
- "CURLY_B_min_fail", /* 0x63 */
- "CURLY_B_max", /* 0x64 */
- "CURLY_B_max_fail", /* 0x65 */
+ "TRIE_next", /* 0x4b */
+ "TRIE_next_fail", /* 0x4c */
+ "EVAL_AB", /* 0x4d */
+ "EVAL_AB_fail", /* 0x4e */
+ "CURLYX_end", /* 0x4f */
+ "CURLYX_end_fail", /* 0x50 */
+ "WHILEM_A_pre", /* 0x51 */
+ "WHILEM_A_pre_fail", /* 0x52 */
+ "WHILEM_A_min", /* 0x53 */
+ "WHILEM_A_min_fail", /* 0x54 */
+ "WHILEM_A_max", /* 0x55 */
+ "WHILEM_A_max_fail", /* 0x56 */
+ "WHILEM_B_min", /* 0x57 */
+ "WHILEM_B_min_fail", /* 0x58 */
+ "WHILEM_B_max", /* 0x59 */
+ "WHILEM_B_max_fail", /* 0x5a */
+ "BRANCH_next", /* 0x5b */
+ "BRANCH_next_fail", /* 0x5c */
+ "CURLYM_A", /* 0x5d */
+ "CURLYM_A_fail", /* 0x5e */
+ "CURLYM_B", /* 0x5f */
+ "CURLYM_B_fail", /* 0x60 */
+ "IFMATCH_A", /* 0x61 */
+ "IFMATCH_A_fail", /* 0x62 */
+ "CURLY_B_min_known", /* 0x63 */
+ "CURLY_B_min_known_fail", /* 0x64 */
+ "CURLY_B_min", /* 0x65 */
+ "CURLY_B_min_fail", /* 0x66 */
+ "CURLY_B_max", /* 0x67 */
+ "CURLY_B_max_fail", /* 0x68 */
};
#endif /* DEBUGGING */
#else
iseq(0+@k, 3, 'Got 3 keys in %+ via keys');
iseq("@k","A B C", "Got expected keys");
iseq("@v","bar baz foo", "Got expected values");
+ eval'
+ print for $+{this_key_doesnt_exist};
+ ';
+ ok(!$@,'lvalue $+{...} should not throw an exception');
}
-
-
+
+
# stress test CURLYX/WHILEM.
#
# This test includes varying levels of nesting, and according to
or print "# Unexpected outcome: should pass or crash perl\n";
# Don't forget to update this!
-BEGIN{print "1..1274\n"};
+BEGIN{print "1..1275\n"};
/^(?'main'<(?:[^<>]+|(?&crap)|(?&main))*>)(?'empty')(?'crap'!>!>!>)$/ <<!>!>!>><>>!>!>!> y $+{main} <<!>!>!>><>>
/^(?'main'<(?:[^<>]+|(?&main))*>)$/ <<><<<><>>>> y $1 <<><<<><>>>>
/(?'first'(?&second)*)(?'second'[fF]o+)/ fooFoFoo y $+{first}-$+{second} fooFo-Foo
+(?<A>foo)?(?(<A>)bar|nada) foobar y $+{A} foo
+(?<A>foo)?(?(<A>)bar|nada) foo-barnada y $& nada
+(?<A>foo)?(?(1)bar|nada) foo-barnada y $& nada
+(?<A>foo(?(R)bar))?(?1) foofoobar y $1 foo
+(?<A>foo(?(R)bar))?(?1) foofoobar y $& foofoobar
+(x)(?<A>foo(?(R&A)bar))?(?&A) xfoofoobar y $2 foo
+(x)(?<A>foo(?(R&A)bar))?(?&A) xfoofoobar y $& xfoofoobar
+(x)(?<A>foo(?(R2)bar))?(?&A) xfoofoobar y $2 foo
+(x)(?<A>foo(?(R2)bar))?(?&A) xfoofoobar y $& xfoofoobar
+(?1)(?(DEFINE)(blah)) blah y $& blah
+/^(?<PAL>(?<CHAR>.)((?&PAL)|.?)\k<CHAR>)$/ madamimadam y $& madamimadam
+/^(?<PAL>(?<CHAR>.)((?&PAL)|.?)\k<CHAR>)$/ madamiamadam n - -
+/(a)?((?1))(fox)/ aafox y $1-$2-$3 a-a-fox
+/(a)*((?1))(fox)/ aafox y $1-$2-$3 a-a-fox
+/(a)+((?1))(fox)/ aafox y $1-$2-$3 a-a-fox
+/(a){1,100}((?1))(fox)/ aafox y $1-$2-$3 a-a-fox
+/(a){0,100}((?1))(fox)/ aafox y $1-$2-$3 a-a-fox
+/(ab)?((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox
+/(ab)*((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox
+/(ab)+((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox
+/(ab){1,100}((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox
+/(ab){0,100}((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox