* requiring null fields (pat.t#187 and split.t#{13,14}
* (as of patchlevel 7877) will fail. Then again,
* this code seems to be necessary or otherwise
- * building DynaLoader will fail:
- * "Error: '*' not in typemap in DynaLoader.xs, line 164"
- * --jhi */
+ * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
+ * --jhi updated by dapm */
for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
if (i > PL_regsize)
PL_regoffs[i].start = -1;
- pregexec - match a regexp against a string
*/
I32
-Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
+Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
char *strbeg, I32 minend, SV *screamer, U32 nosave)
/* strend: pointer to null at end of string */
/* strbeg: real beginning of string */
deleted from the finite automaton. */
char *
-Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
- char *strend, U32 flags, re_scream_pos_data *data)
+Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
+ char *strend, const U32 flags, re_scream_pos_data *data)
{
dVAR;
register I32 start_shift = 0;
if ((!reginfo || regtry(reginfo, &s))) \
goto got_it
+#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
+ if (do_utf8) { \
+ REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
+ } \
+ else { \
+ REXEC_FBC_CLASS_SCAN(CoNd); \
+ } \
+ break
+
#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
if (do_utf8) { \
UtFpReLoAd; \
!isDIGIT_LC_utf8((U8*)s),
!isDIGIT_LC(*s)
);
+ case LNBREAK:
+ REXEC_FBC_CSCAN(
+ is_LNBREAK_utf8(s),
+ is_LNBREAK_latin1(s)
+ );
+ case VERTWS:
+ REXEC_FBC_CSCAN(
+ is_VERTWS_utf8(s),
+ is_VERTWS_latin1(s)
+ );
+ case NVERTWS:
+ REXEC_FBC_CSCAN(
+ !is_VERTWS_utf8(s),
+ !is_VERTWS_latin1(s)
+ );
+ case HORIZWS:
+ REXEC_FBC_CSCAN(
+ is_HORIZWS_utf8(s),
+ is_HORIZWS_latin1(s)
+ );
+ case NHORIZWS:
+ REXEC_FBC_CSCAN(
+ !is_HORIZWS_utf8(s),
+ !is_HORIZWS_latin1(s)
+ );
case AHOCORASICKC:
case AHOCORASICK:
{
U8 **points; /* map of where we were in the input string
when reading a given char. For ASCII this
is unnecessary overhead as the relationship
- is always 1:1, but for unicode, especially
- case folded unicode this is not true. */
+ is always 1:1, but for Unicode, especially
+ case folded Unicode this is not true. */
U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
U8 *bitmap=NULL;
- regexec_flags - match a regexp against a string
*/
I32
-Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
+Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
/* strend: pointer to null at end of string */
/* strbeg: real beginning of string */
if (regtry(®info, &s))
goto got_it;
after_try:
- if (s >= end)
+ if (s > end)
goto phooey;
if (prog->extflags & RXf_USE_INTUIT) {
s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
/* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
* Actually, the code in regcppop() (which Ilya may be meaning by
* PL_reglastparen), is not needed at all by the test suite
- * (op/regexp, op/pat, op/split), but that code is needed, oddly
- * enough, for building DynaLoader, or otherwise this
- * "Error: '*' not in typemap in DynaLoader.xs, line 164"
- * will happen. Meanwhile, this code *is* needed for the
+ * (op/regexp, op/pat, op/split), but that code is needed otherwise
+ * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
+ * Meanwhile, this code *is* needed for the
* above-mentioned test suite tests to succeed. The common theme
* on those tests seems to be returning null fields from matches.
- * --jhi */
+ * --jhi updated by dapm */
#if 1
if (prog->nparens) {
regexp_paren_pair *pp = PL_regoffs;
if ( got_wordnum ) {
if ( ! ST.accepted ) {
ENTER;
- SAVETMPS;
+ /* SAVETMPS; */ /* XXX is this necessary? dmq */
bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
sv_accept_buff=newSV(bufflen *
sizeof(reg_trie_accepted) - 1);
PL_reginput = (char *)ST.accept_buff[ best ].endpos;
if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
scan = ST.B;
- /* NOTREACHED */
} else {
scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
- /* NOTREACHED */
- }
- if (has_cutgroup) {
- PUSH_YES_STATE_GOTO(TRIE_next, scan);
- /* NOTREACHED */
- } else {
- PUSH_STATE_GOTO(TRIE_next, scan);
- /* NOTREACHED */
}
+ PUSH_YES_STATE_GOTO(TRIE_next, scan);
/* NOTREACHED */
}
/* NOTREACHED */
* pack("U0U*", 0xDF) =~ /ss/i,
* the 0xC3 0x9F are the UTF-8
* byte sequence for the U+00DF. */
+
if (!(do_utf8 &&
- toLOWER(s[0]) == 's' &&
+ toLOWER(s[0]) == 's' &&
ln >= 2 &&
toLOWER(s[1]) == 's' &&
(U8)l[0] == 0xC3 &&
re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
}
else {
- STRLEN len;
- const char * const t = SvPV_const(ret, len);
- PMOP pm;
+ U32 pm_flags = 0;
const I32 osize = PL_regsize;
- Zero(&pm, 1, PMOP);
- if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
- re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
+ if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
+ re = CALLREGCOMP(ret, pm_flags);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY
| SVs_GMG)))
case BRANCH: /* /(...|A|...)/ */
scan = NEXTOPER(scan); /* scan now points to inner node */
- if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
- && !has_cutgroup)
- {
- /* last branch; skip state push and jump direct to node */
- continue;
- }
ST.lastparen = *PL_reglastparen;
ST.next_branch = next;
REGCP_SET(ST.cp);
sayNO;
/* NOTREACHED */
#undef ST
+ case FOLDCHAR:
+ n = ARG(scan);
+ if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
+ locinput += ln;
+ } else if ( 0xDF == n && !do_utf8 && !UTF ) {
+ sayNO;
+ } else {
+ U8 folded[UTF8_MAXBYTES_CASE+1];
+ STRLEN foldlen;
+ const char * const l = locinput;
+ char *e = PL_regeol;
+ to_uni_fold(n, folded, &foldlen);
+
+ if (ibcmp_utf8((const char*) folded, 0, foldlen, 1,
+ l, &e, 0, do_utf8)) {
+ sayNO;
+ }
+ locinput = e;
+ }
+ nextchr = UCHARAT(locinput);
+ break;
+ case LNBREAK:
+ if ((n=is_LNBREAK(locinput,do_utf8))) {
+ locinput += n;
+ nextchr = UCHARAT(locinput);
+ } else
+ sayNO;
+ break;
+
+#define CASE_CLASS(nAmE) \
+ case nAmE: \
+ if ((n=is_##nAmE(locinput,do_utf8))) { \
+ locinput += n; \
+ nextchr = UCHARAT(locinput); \
+ } else \
+ sayNO; \
+ break; \
+ case N##nAmE: \
+ if ((n=is_##nAmE(locinput,do_utf8))) { \
+ sayNO; \
+ } else { \
+ locinput += UTF8SKIP(locinput); \
+ nextchr = UCHARAT(locinput); \
+ } \
+ break
+
+ CASE_CLASS(VERTWS);
+ CASE_CLASS(HORIZWS);
+#undef CASE_CLASS
default:
PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
} else {
while (scan < loceol && !isSPACE(*scan))
scan++;
- break;
}
+ break;
case NSPACEL:
PL_reg_flags |= RF_tainted;
if (do_utf8) {
while (scan < loceol && !isDIGIT(*scan))
scan++;
}
+ case LNBREAK:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
+ scan += c;
+ hardcount++;
+ }
+ } else {
+ /*
+ LNBREAK can match two latin chars, which is ok,
+ because we have a null terminated string, but we
+ have to use hardcount in this situation
+ */
+ while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
+ scan+=c;
+ hardcount++;
+ }
+ }
break;
+ case HORIZWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
+ scan += c;
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && is_HORIZWS_latin1(scan))
+ scan++;
+ }
+ break;
+ case NHORIZWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
+ scan += UTF8SKIP(scan);
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && !is_HORIZWS_latin1(scan))
+ scan++;
+
+ }
+ break;
+ case VERTWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
+ scan += c;
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && is_VERTWS_latin1(scan))
+ scan++;
+
+ }
+ break;
+ case NVERTWS:
+ if (do_utf8) {
+ loceol = PL_regeol;
+ while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
+ scan += UTF8SKIP(scan);
+ hardcount++;
+ }
+ } else {
+ while (scan < loceol && !is_VERTWS_latin1(scan))
+ scan++;
+
+ }
+ break;
+
default: /* Called on something of 0 width. */
break; /* So match right here or not at all. */
}
* documentation of these array elements. */
si = *ary;
- a = SvROK(ary[1]) ? &ary[1] : 0;
- b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
+ a = SvROK(ary[1]) ? &ary[1] : NULL;
+ b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
if (a)
sw = *a;