* it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
* This causes the main functions to be compiled under new names and with
* debugging support added, which makes "use re 'debug'" work.
-
*/
/* NOTE: this is derived from Henry Spencer's regexp code, and should not
const I32 multiline = prog->reganch & PMf_MULTILINE;
#ifdef DEBUGGING
const char * const i_strpos = strpos;
- SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
#endif
GET_RE_DEBUG_FLAGS_DECL;
}
DEBUG_EXECUTE_r({
- const char *s = PL_reg_match_utf8 ?
- sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
- strpos;
- const int len = PL_reg_match_utf8 ?
- (int)strlen(s) : strend - strpos;
+ RE_PV_DISPLAY_DECL(s, len, PL_reg_match_utf8,
+ PERL_DEBUG_PAD_ZERO(0), strpos, strend - strpos, 60);
+
if (!PL_colorset)
reginitcolors();
if (PL_reg_match_utf8)
/* annoyingly all the vars in this routine have different names from their counterparts
in regmatch. /grrr */
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid, \
+foldlen, foldbuf, uniflags) STMT_START { \
+ switch (trie_type) { \
+ case trie_utf8_fold: \
+ if ( foldlen>0 ) { \
+ uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
+ foldlen -= len; \
+ uscan += len; \
+ len=0; \
+ } else { \
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+ uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
+ foldlen -= UNISKIP( uvc ); \
+ uscan = foldbuf + UNISKIP( uvc ); \
+ } \
+ break; \
+ case trie_utf8: \
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+ break; \
+ case trie_plain: \
+ uvc = (UV)*uc; \
+ len = 1; \
+ } \
+ \
+ if (uvc < 256) { \
+ charid = trie->charmap[ uvc ]; \
+ } \
+ else { \
+ charid = 0; \
+ if (trie->widecharmap) { \
+ SV** const svpp = hv_fetch(trie->widecharmap, \
+ (char*)&uvc, sizeof(UV), 0); \
+ if (svpp) \
+ charid = (U16)SvIV(*svpp); \
+ } \
+ } \
+} STMT_END
+
+#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
+ if ( (CoNd) \
+ && (ln == len || \
+ ibcmp_utf8(s, NULL, 0, do_utf8, \
+ m, NULL, ln, (bool)UTF)) \
+ && (!reginfo || regtry(reginfo, s)) ) \
+ goto got_it; \
+ else { \
+ U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
+ uvchr_to_utf8(tmpbuf, c); \
+ f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
+ if ( f != c \
+ && (f == c1 || f == c2) \
+ && (ln == foldlen || \
+ !ibcmp_utf8((char *) foldbuf, \
+ NULL, foldlen, do_utf8, \
+ m, \
+ NULL, ln, (bool)UTF)) \
+ && (!reginfo || regtry(reginfo, s)) ) \
+ goto got_it; \
+ } \
+ s += len
+
+#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
+STMT_START { \
+ while (s <= e) { \
+ if ( (CoNd) \
+ && (ln == 1 || !(OP(c) == EXACTF \
+ ? ibcmp(s, m, ln) \
+ : ibcmp_locale(s, m, ln))) \
+ && (!reginfo || regtry(reginfo, s)) ) \
+ goto got_it; \
+ s++; \
+ } \
+} STMT_END
+
+#define REXEC_FBC_UTF8_SCAN(CoDe) \
+STMT_START { \
+ while (s + (uskip = UTF8SKIP(s)) <= strend) { \
+ CoDe \
+ s += uskip; \
+ } \
+} STMT_END
+
+#define REXEC_FBC_SCAN(CoDe) \
+STMT_START { \
+ while (s < strend) { \
+ CoDe \
+ s++; \
+ } \
+} STMT_END
+
+#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
+REXEC_FBC_UTF8_SCAN( \
+ if (CoNd) { \
+ if (tmp && (!reginfo || regtry(reginfo, s))) \
+ goto got_it; \
+ else \
+ tmp = doevery; \
+ } \
+ else \
+ tmp = 1; \
+)
+
+#define REXEC_FBC_CLASS_SCAN(CoNd) \
+REXEC_FBC_SCAN( \
+ if (CoNd) { \
+ if (tmp && (!reginfo || regtry(reginfo, s))) \
+ goto got_it; \
+ else \
+ tmp = doevery; \
+ } \
+ else \
+ tmp = 1; \
+)
+
+#define REXEC_FBC_TRYIT \
+if ((!reginfo || regtry(reginfo, s))) \
+ goto got_it
+
+#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
+ if (do_utf8) { \
+ UtFpReLoAd; \
+ REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
+ } \
+ else { \
+ REXEC_FBC_CLASS_SCAN(CoNd); \
+ } \
+ break
+
+#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
+ PL_reg_flags |= RF_tainted; \
+ if (do_utf8) { \
+ REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
+ } \
+ else { \
+ REXEC_FBC_CLASS_SCAN(CoNd); \
+ } \
+ break
+
STATIC char *
S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
const char *strend, const regmatch_info *reginfo)
switch (OP(c)) {
case ANYOF:
if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
+ REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
!UTF8_IS_INVARIANT((U8)s[0]) ?
reginclass(prog, c, (U8*)s, 0, do_utf8) :
- REGINCLASS(prog, c, (U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
+ REGINCLASS(prog, c, (U8*)s));
}
else {
while (s < strend) {
}
break;
case CANY:
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
- s++;
- }
+ );
break;
case EXACTF:
m = STRING(c);
while (s <= e) {
c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
uniflags);
- if ( c == c1
- && (ln == len ||
- ibcmp_utf8(s, NULL, 0, do_utf8,
- m, NULL, ln, (bool)UTF))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- else {
- U8 foldbuf[UTF8_MAXBYTES_CASE+1];
- uvchr_to_utf8(tmpbuf, c);
- f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
- if ( f != c
- && (f == c1 || f == c2)
- && (ln == foldlen ||
- !ibcmp_utf8((char *) foldbuf,
- NULL, foldlen, do_utf8,
- m,
- NULL, ln, (bool)UTF))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- }
- s += len;
+ REXEC_FBC_EXACTISH_CHECK(c == c1);
}
}
else {
c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
- if ( (c == c1 || c == c2)
- && (ln == len ||
- ibcmp_utf8(s, NULL, 0, do_utf8,
- m, NULL, ln, (bool)UTF))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- else {
- U8 foldbuf[UTF8_MAXBYTES_CASE+1];
- uvchr_to_utf8(tmpbuf, c);
- f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
- if ( f != c
- && (f == c1 || f == c2)
- && (ln == foldlen ||
- !ibcmp_utf8((char *) foldbuf,
- NULL, foldlen, do_utf8,
- m,
- NULL, ln, (bool)UTF))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- }
- s += len;
+ REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
}
}
}
else {
if (c1 == c2)
- while (s <= e) {
- if ( *(U8*)s == c1
- && (ln == 1 || !(OP(c) == EXACTF
- ? ibcmp(s, m, ln)
- : ibcmp_locale(s, m, ln)))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- s++;
- }
+ REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
else
- while (s <= e) {
- if ( (*(U8*)s == c1 || *(U8*)s == c2)
- && (ln == 1 || !(OP(c) == EXACTF
- ? ibcmp(s, m, ln)
- : ibcmp_locale(s, m, ln)))
- && (!reginfo || regtry(reginfo, s)) )
- goto got_it;
- s++;
- }
+ REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
}
break;
case BOUNDL:
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS_ALNUM();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
+ REXEC_FBC_UTF8_SCAN(
if (tmp == !(OP(c) == BOUND ?
(bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
{
tmp = !tmp;
- if ((!reginfo || regtry(reginfo, s)))
- goto got_it;
- }
- s += uskip;
+ REXEC_FBC_TRYIT;
}
+ );
}
else {
tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (tmp ==
!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
tmp = !tmp;
- if ((!reginfo || regtry(reginfo, s)))
- goto got_it;
- }
- s++;
+ REXEC_FBC_TRYIT;
}
+ );
}
if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
goto got_it;
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
LOAD_UTF8_CHARCLASS_ALNUM();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
+ REXEC_FBC_UTF8_SCAN(
if (tmp == !(OP(c) == NBOUND ?
(bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
- else if ((!reginfo || regtry(reginfo, s)))
- goto got_it;
- s += uskip;
- }
+ else REXEC_FBC_TRYIT;
+ );
}
else {
tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == NBOUND ?
isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (tmp ==
!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
tmp = !tmp;
- else if ((!reginfo || regtry(reginfo, s)))
- goto got_it;
- s++;
- }
+ else REXEC_FBC_TRYIT;
+ );
}
if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
goto got_it;
break;
case ALNUM:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_ALNUM();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isALNUM(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_ALNUM(),
+ swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
+ isALNUM(*s)
+ );
case ALNUML:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isALNUM_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ isALNUM_LC_utf8((U8*)s),
+ isALNUM_LC(*s)
+ );
case NALNUM:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_ALNUM();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isALNUM(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_ALNUM(),
+ !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
+ !isALNUM(*s)
+ );
case NALNUML:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isALNUM_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ !isALNUM_LC_utf8((U8*)s),
+ !isALNUM_LC(*s)
+ );
case SPACE:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_SPACE();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isSPACE(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_SPACE(),
+ *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
+ isSPACE(*s)
+ );
case SPACEL:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isSPACE_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ *s == ' ' || isSPACE_LC_utf8((U8*)s),
+ isSPACE_LC(*s)
+ );
case NSPACE:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_SPACE();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isSPACE(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_SPACE(),
+ !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
+ !isSPACE(*s)
+ );
case NSPACEL:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isSPACE_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
+ !isSPACE_LC(*s)
+ );
case DIGIT:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_DIGIT();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isDIGIT(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_DIGIT(),
+ swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
+ isDIGIT(*s)
+ );
case DIGITL:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (isDIGIT_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ isDIGIT_LC_utf8((U8*)s),
+ isDIGIT_LC(*s)
+ );
case NDIGIT:
- if (do_utf8) {
- LOAD_UTF8_CHARCLASS_DIGIT();
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isDIGIT(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_PRELOAD(
+ LOAD_UTF8_CHARCLASS_DIGIT(),
+ !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
+ !isDIGIT(*s)
+ );
case NDIGITL:
- PL_reg_flags |= RF_tainted;
- if (do_utf8) {
- while (s + (uskip = UTF8SKIP(s)) <= strend) {
- if (!isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += uskip;
- }
- }
- else {
- while (s < strend) {
- if (!isDIGIT_LC(*s)) {
- if (tmp && (!reginfo || regtry(reginfo, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s++;
- }
- }
- break;
+ REXEC_FBC_CSCAN_TAINT(
+ !isDIGIT_LC_utf8((U8*)s),
+ !isDIGIT_LC(*s)
+ );
case TRIE:
/*Perl_croak(aTHX_ "panic: unknown regstclass TRIE");*/
{
if (base==0) break;
}
points[pointpos++ % maxlen]= uc;
- switch (trie_type) {
- case trie_utf8_fold:
- if ( foldlen>0 ) {
- uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
- foldlen -= len;
- uscan += len;
- len=0;
- } else {
- uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
- uvc = to_uni_fold( uvc, foldbuf, &foldlen );
- foldlen -= UNISKIP( uvc );
- uscan = foldbuf + UNISKIP( uvc );
- }
- break;
- case trie_utf8:
- uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
- &len, uniflags );
- break;
- case trie_plain:
- uvc = (UV)*uc;
- len = 1;
- }
-
- if (uvc < 256) {
- charid = trie->charmap[ uvc ];
- }
- else {
- charid = 0;
- if (trie->widecharmap) {
- SV** const svpp = hv_fetch(trie->widecharmap,
- (char*)&uvc, sizeof(UV), 0);
- if (svpp)
- charid = (U16)SvIV(*svpp);
- }
- }
+ REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
+ uvc, charid, foldlen, foldbuf, uniflags);
DEBUG_TRIE_EXECUTE_r(
PerlIO_printf(Perl_debug_log,
"Pos: %d Charid:%3x CV:%4"UVxf" ",
SV* const oreplsv = GvSV(PL_replgv);
const bool do_utf8 = DO_UTF8(sv);
I32 multiline;
-#ifdef DEBUGGING
- SV* dsv0;
- SV* dsv1;
-#endif
+
regmatch_info reginfo; /* create some info to pass to regtry etc */
GET_RE_DEBUG_FLAGS_DECL;
multiline = prog->reganch & PMf_MULTILINE;
reginfo.prog = prog;
-#ifdef DEBUGGING
- dsv0 = PERL_DEBUG_PAD_ZERO(0);
- dsv1 = PERL_DEBUG_PAD_ZERO(1);
-#endif
-
RX_MATCH_UTF8_set(prog, do_utf8);
minlen = prog->minlen;
}
DEBUG_EXECUTE_r({
- const char * const s0 = UTF
- ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
- UNI_DISPLAY_REGEX)
- : prog->precomp;
- const int len0 = UTF ? (int)SvCUR(dsv0) : prog->prelen;
- const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
- UNI_DISPLAY_REGEX) : startpos;
- const int len1 = do_utf8 ? (int)SvCUR(dsv1) : strend - startpos;
+ RE_PV_DISPLAY_DECL(s0, len0, UTF,
+ PERL_DEBUG_PAD_ZERO(0), prog->precomp, prog->prelen, 60);
+ RE_PV_DISPLAY_DECL(s1, len1, do_utf8,
+ PERL_DEBUG_PAD_ZERO(1), startpos, strend - startpos, 60);
+
if (!PL_colorset)
reginitcolors();
PerlIO_printf(Perl_debug_log,
ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
if (do_utf8) {
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
if (regtry(®info, s)) goto got_it;
while (s < strend && *s == ch)
s += UTF8SKIP(s);
}
- s += UTF8SKIP(s);
- }
+ );
}
else {
- while (s < strend) {
+ REXEC_FBC_SCAN(
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
if (regtry(®info, s)) goto got_it;
while (s < strend && *s == ch)
s++;
}
- s++;
- }
+ );
}
DEBUG_EXECUTE_r(if (!did_match)
PerlIO_printf(Perl_debug_log,
}
DEBUG_EXECUTE_r({
SV * const prop = sv_newmortal();
- const char *s0;
- const char *s1;
- int len0;
- int len1;
-
regprop(prog, prop, c);
- s0 = UTF ?
- pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
- UNI_DISPLAY_REGEX) :
- SvPVX_const(prop);
- len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
- s1 = UTF ?
- sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
- len1 = UTF ? (int)SvCUR(dsv1) : strend - s;
- PerlIO_printf(Perl_debug_log,
- "Matching stclass \"%*.*s\" against \"%*.*s\" (%d chars)\n",
- len0, len0, s0,
- len1, len1, s1, (int)(strend - s));
+ {
+ RE_PV_DISPLAY_DECL(s0,len0,UTF,
+ PERL_DEBUG_PAD_ZERO(0),SvPVX_const(prop),SvCUR(prop),60);
+ RE_PV_DISPLAY_DECL(s1,len1,UTF,
+ PERL_DEBUG_PAD_ZERO(1),s,strend-s,60);
+ PerlIO_printf(Perl_debug_log,
+ "Matching stclass \"%*.*s\" against \"%*.*s\" (%d chars)\n",
+ len0, len0, s0,
+ len1, len1, s1, (int)(strend - s));
+ }
});
if (find_byclass(prog, c, s, strend, ®info))
goto got_it;
#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
-#ifdef DEBUGGING
-STATIC void
+#ifdef DEBUGGING
+
+STATIC void
S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, const bool do_utf8)
{
const int docolor = *PL_colors[0];
if (pref0_len > pref_len)
pref0_len = pref_len;
{
- const char * const s0 =
- do_utf8 && OP(scan) != CANY ?
- pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
- pref0_len, 60, UNI_DISPLAY_REGEX) :
- locinput - pref_len;
- const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len;
- const char * const s1 = do_utf8 && OP(scan) != CANY ?
- pv_uni_display(PERL_DEBUG_PAD(1),
- (U8*)(locinput - pref_len + pref0_len),
- pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
- locinput - pref_len + pref0_len;
- const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len;
- const char * const s2 = do_utf8 && OP(scan) != CANY ?
- pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
- PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
- locinput;
- const int len2 = do_utf8 ? (int)strlen(s2) : l;
- PerlIO_printf(Perl_debug_log,
+ const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
+
+ RE_PV_DISPLAY_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
+ (locinput - pref_len),pref0_len, 60);
+
+ RE_PV_DISPLAY_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
+ (locinput - pref_len + pref0_len),
+ pref_len - pref0_len, 60);
+
+ RE_PV_DISPLAY_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
+ locinput, PL_regeol - locinput, 60);
+
+ PerlIO_printf(Perl_debug_log,
"%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|",
(IV)(locinput - PL_bostr),
PL_colors[4],
"");
}
}
+
#endif
STATIC I32 /* 0 failure, 1 success */
});
if ( base ) {
- switch (trie_type) {
- case trie_utf8_fold:
- if ( foldlen>0 ) {
- uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
- foldlen -= len;
- uscan += len;
- len=0;
- } else {
- uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
- uvc = to_uni_fold( uvc, foldbuf, &foldlen );
- foldlen -= UNISKIP( uvc );
- uscan = foldbuf + UNISKIP( uvc );
- }
- break;
- case trie_utf8:
- uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
- &len, uniflags );
- break;
- case trie_plain:
- uvc = (UV)*uc;
- len = 1;
- }
-
- if (uvc < 256) {
- charid = trie->charmap[ uvc ];
- }
- else {
- charid = 0;
- if (trie->widecharmap) {
- SV** const svpp = hv_fetch(trie->widecharmap,
- (char*)&uvc, sizeof(UV), 0);
- if (svpp)
- charid = (U16)SvIV(*svpp);
- }
- }
+ REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
+ uvc, charid, foldlen, foldbuf, uniflags);
if (charid &&
(base + charid > trie->uniquecharcount )
if (ST.c1 != CHRTEST_VOID)
c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
UTF8_MAXBYTES, 0, uniflags)
- : UCHARAT(PL_reginput);
+ : (UV) UCHARAT(PL_reginput);
/* If it could work, try it. */
if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
CURLY_SETPAREN(ST.paren, ST.count);