*/
#ifdef PERL_EXT_RE_BUILD
-/* need to replace pregcomp et al, so enable that */
-# ifndef PERL_IN_XSUB_RE
-# define PERL_IN_XSUB_RE
-# endif
-/* need access to debugger hooks */
-# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
-# define DEBUGGING
-# endif
-#endif
-
-#ifdef PERL_IN_XSUB_RE
-/* We *really* need to overwrite these symbols: */
-# define Perl_pregcomp my_regcomp
-# define Perl_regdump my_regdump
-# define Perl_regprop my_regprop
-# define Perl_pregfree my_regfree
-# define Perl_re_intuit_string my_re_intuit_string
-/* *These* symbols are masked to allow static link. */
-# define Perl_regnext my_regnext
-# define Perl_save_re_context my_save_re_context
-# define Perl_reginitcolors my_reginitcolors
-
-# define PERL_NO_GET_CONTEXT
+#include "re_top.h"
#endif
/*
#endif
#define REG_COMP_C
-#include "regcomp.h"
+#ifdef PERL_IN_XSUB_RE
+# include "re_comp.h"
+#else
+# include "regcomp.h"
+#endif
#ifdef op
#undef op
});
} else {
- /*EMPTY*/; /* It's a dupe. So ignore it. */
+ NOOP; /* It's a dupe. So ignore it. */
}
} /* end second pass */
});
} else {
- /*EMPTY*/; /* Its a dupe. So ignore it. */
+ NOOP; /* Its a dupe. So ignore it. */
}
} /* end second pass */
SvPV_nolen_const(mysv));
}
PerlIO_printf( Perl_debug_log, "0x%p,0x%p,0x%p)\n",
- first, last, cur );
+ (void*)first, (void*)last, (void*)cur );
});
if ( ( first ? OP( noper ) == optype
: PL_regkind[ (U8)OP( noper ) ] == EXACT )
regprop(RExC_rx, mysv, cur);
PerlIO_printf( Perl_debug_log,
"%*s%s\t(0x%p,0x%p,0x%p)\n", (int)depth * 2 + 2,
- " ", SvPV_nolen_const( mysv ), first, last, cur);
+ " ", SvPV_nolen_const( mysv ), (void*)first, (void*)last, (void*)cur);
});
if ( last ) {
return RExC_rx->data->count - n;
}
+#ifndef PERL_IN_XSUB_RE
void
Perl_reginitcolors(pTHX)
{
}
PL_colorset = 1;
}
-
+#endif
/*
- pregcomp - compile a regular expression into internal code
again:
if (PL_regkind[(U8)OP(first)] == EXACT) {
if (OP(first) == EXACT)
- /*EMPTY*/; /* Empty, get anchored substr later. */
+ NOOP; /* Empty, get anchored substr later. */
else if ((OP(first) == EXACTF || OP(first) == EXACTFL))
r->regstclass = first;
}
FAIL("Eval-group not allowed at runtime, use re 'eval'");
if (PL_tainting && PL_tainted)
FAIL("Eval-group in insecure regular expression");
+#if PERL_VERSION > 8
if (IN_PERL_COMPILETIME)
PL_cv_has_eval = 1;
+#endif
}
nextchar(pRExC_state);
}
regtail(pRExC_state, lastbr, ender);
- if (have_branch) {
+ if (have_branch && !SIZE_ONLY) {
/* Hook the tails of the branches to the closing node. */
- for (br = ret; br != NULL; br = regnext(br)) {
- regoptail(pRExC_state, br, ender);
+ for (br = ret; br; br = regnext(br)) {
+ const U8 op = PL_regkind[OP(br)];
+ if (op == BRANCH) {
+ regtail(pRExC_state, NEXTOPER(br), ender);
+ }
+ else if (op == BRANCHJ) {
+ regtail(pRExC_state, NEXTOPER(NEXTOPER(br)), ender);
+ }
}
}
}
* faster to run. Backslashed characters are exceptions, each becoming a
* separate node; the code is simpler that way and it's not worth fixing.
*
- * [Yes, it is worth fixing, some scripts can run twice the speed.] */
+ * [Yes, it is worth fixing, some scripts can run twice the speed.]
+ * [It looks like its ok, as in S_study_chunk we merge adjacent EXACT nodes]
+ */
STATIC regnode *
S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
{
if (len)
p = oldp;
else if (UTF) {
- STRLEN unilen;
-
if (FOLD) {
/* Emit all the Unicode characters. */
STRLEN numlen;
foldlen -= numlen) {
ender = utf8_to_uvchr(foldbuf, &numlen);
if (numlen > 0) {
- reguni(pRExC_state, ender, s, &unilen);
+ const STRLEN unilen = reguni(pRExC_state, ender, s);
s += unilen;
len += unilen;
/* In EBCDIC the numlen
}
}
else {
- reguni(pRExC_state, ender, s, &unilen);
+ const STRLEN unilen = reguni(pRExC_state, ender, s);
if (unilen > 0) {
s += unilen;
len += unilen;
break;
}
if (UTF) {
- STRLEN unilen;
-
if (FOLD) {
/* Emit all the Unicode characters. */
STRLEN numlen;
foldlen -= numlen) {
ender = utf8_to_uvchr(foldbuf, &numlen);
if (numlen > 0) {
- reguni(pRExC_state, ender, s, &unilen);
+ const STRLEN unilen = reguni(pRExC_state, ender, s);
len += unilen;
s += unilen;
/* In EBCDIC the numlen
}
}
else {
- reguni(pRExC_state, ender, s, &unilen);
+ const STRLEN unilen = reguni(pRExC_state, ender, s);
if (unilen > 0) {
s += unilen;
len += unilen;
}
}
+
+/*
+ parse a class specification and produce either an ANYOF node that
+ matches the pattern. If the pattern matches a single char only and
+ that char is < 256 then we produce an EXACT node instead.
+*/
STATIC regnode *
S_regclass(pTHX_ RExC_state_t *pRExC_state)
{
#ifdef EBCDIC
UV literal_endpoint = 0;
#endif
+ UV stored = 0; /* number of chars stored in the class */
+ regnode *orig_emit = RExC_emit; /* Save the original RExC_emit in
+ case we need to change the emitted regop to an EXACT. */
+
+ /* Assume we are going to generate an ANYOF node. */
ret = reganode(pRExC_state, ANYOF, 0);
if (!SIZE_ONLY)
}
else
value = UCHARAT(RExC_parse++);
+
nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
if (value == '[' && POSIXCC(nextvalue))
namedclass = regpposixcc(pRExC_state, value);
}
/* now is the next time */
+ stored += (value - prevvalue + 1);
if (!SIZE_ONLY) {
IV i;
-
if (prevvalue < 256) {
const IV ceilvalue = value < 256 ? value : 255;
RExC_emit += ANYOF_CLASS_ADD_SKIP;
}
+
+ if (SIZE_ONLY)
+ return ret;
+ /****** !SIZE_ONLY AFTER HERE *********/
+
+ if( stored == 1 && value < 256
+ && !( ANYOF_FLAGS(ret) & ( ANYOF_FLAGS_ALL ^ ANYOF_FOLD ) )
+ ) {
+ /* optimize single char class to an EXACT node
+ but *only* when its not a UTF/high char */
+ RExC_emit = orig_emit;
+ ret = reg_node(pRExC_state,
+ (U8)((ANYOF_FLAGS(ret) & ANYOF_FOLD) ? EXACTF : EXACT));
+ *STRING(ret)= (char)value;
+ STR_LEN(ret)= 1;
+ RExC_emit += STR_SZ(1);
+ return ret;
+ }
/* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
- if (!SIZE_ONLY &&
- /* If the only flag is folding (plus possibly inversion). */
+ if ( /* If the only flag is folding (plus possibly inversion). */
((ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD)
) {
for (value = 0; value < 256; ++value) {
}
/* optimize inverted simple patterns (e.g. [^a-z]) */
- if (!SIZE_ONLY && optimize_invert &&
+ if (optimize_invert &&
/* If the only flag is inversion. */
(ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL;
ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
}
-
- if (!SIZE_ONLY) {
+ {
AV * const av = newAV();
SV *rv;
-
/* The 0th element stores the character class description
* in its textual form: used later (regexec.c:Perl_regclass_swash())
* to initialize the appropriate swash (which gets stored in
RExC_rx->data->data[n] = (void*)rv;
ARG_SET(ret, n);
}
-
return ret;
}
/*
- reguni - emit (if appropriate) a Unicode character
*/
-STATIC void
-S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
+STATIC STRLEN
+S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s)
{
dVAR;
- *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
+ return SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
}
/*
}
/*
-- regoptail - regtail on operand of first argument; nop if operandless
-*/
-/* TODO: All three parms should be const */
-STATIC void
-S_regoptail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val)
-{
- dVAR;
- /* "Operandless" and "op != BRANCH" are synonymous in practice. */
- if (p == NULL || SIZE_ONLY)
- return;
- if (PL_regkind[(U8)OP(p)] == BRANCH) {
- regtail(pRExC_state, NEXTOPER(p), val);
- }
- else if ( PL_regkind[(U8)OP(p)] == BRANCHJ) {
- regtail(pRExC_state, NEXTOPER(NEXTOPER(p)), val);
- }
- else
- return;
-}
-
-/*
- regcurly - a little FSA that accepts {\d+,?\d*}
*/
STATIC I32
len, s,
PL_colors[1]);
} else if (k == TRIE) {
- /*EMPTY*/;
+ NOOP;
/* print the details od the trie in dumpuntil instead, as
* prog->data isn't available here */
} else if (k == CURLY) {
}
if (o->flags & ANYOF_CLASS)
- for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++)
+ for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
if (ANYOF_CLASS_TEST(o,i))
sv_catpv(sv, anyofs[i]);
dVAR;
#ifdef DEBUGGING
SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
- SV * const re_debug_flags=get_sv(RE_DEBUG_FLAGS,0);
#endif
-
+ GET_RE_DEBUG_FLAGS_DECL;
if (!r || (--r->refcnt > 0))
return;
Safefree(r);
}
+#ifndef PERL_IN_XSUB_RE
/*
- regnext - dig the "next" pointer out of a node
*/
return(p+offset);
}
+#endif
STATIC void
S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
/* XXX Here's a total kludge. But we need to re-enter for swash routines. */
+#ifndef PERL_IN_XSUB_RE
void
Perl_save_re_context(pTHX)
{
U32 i;
for (i = 1; i <= rx->nparens; i++) {
char digits[TYPE_CHARS(long)];
- const STRLEN len = my_sprintf(digits, "%lu", (long)i);
+ const STRLEN len = my_snprintf(digits, sizeof(digits), "%lu", (long)i);
GV *const *const gvp
= (GV**)hv_fetch(PL_defstash, digits, len, 0);
}
}
}
+#endif
static void
clear_re(pTHX_ void *r)