#endif
typedef struct RExC_state_t {
- U16 flags16; /* are we folding, multilining? */
+ U32 flags; /* are we folding, multilining? */
char *precomp; /* uncompiled string. */
regexp *rx;
char *start; /* Start of input for compile */
#endif
} RExC_state_t;
-#define RExC_flags16 (pRExC_state->flags16)
+#define RExC_flags (pRExC_state->flags)
#define RExC_precomp (pRExC_state->precomp)
#define RExC_rx (pRExC_state->rx)
#define RExC_start (pRExC_state->start)
#define SCF_WHILEM_VISITED_POS 0x2000
#define UTF (RExC_utf8 != 0)
-#define LOC ((RExC_flags16 & PMf_LOCALE) != 0)
-#define FOLD ((RExC_flags16 & PMf_FOLD) != 0)
+#define LOC ((RExC_flags & PMf_LOCALE) != 0)
+#define FOLD ((RExC_flags & PMf_FOLD) != 0)
#define OOB_UNICODE 12345678
#define OOB_NAMEDCLASS -1
PL_colors[4],PL_colors[5],PL_colors[0],
(int)(xend - exp), RExC_precomp, PL_colors[1]);
});
- RExC_flags16 = pm->op_pmflags;
+ RExC_flags = pm->op_pmflags;
RExC_sawback = 0;
RExC_seen = 0;
RExC_rx = r;
/* Second pass: emit code. */
- RExC_flags16 = pm->op_pmflags; /* don't let top level (?i) bleed */
+ RExC_flags = pm->op_pmflags; /* don't let top level (?i) bleed */
RExC_parse = exp;
RExC_end = xend;
RExC_naughty = 0;
/* Dig out information for optimizations. */
r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
- pm->op_pmflags = RExC_flags16;
+ pm->op_pmflags = RExC_flags;
if (UTF)
r->reganch |= ROPT_UTF8; /* Unicode in it? */
r->regstclass = NULL;
if (longest_float_length
|| (data.flags & SF_FL_BEFORE_EOL
&& (!(data.flags & SF_FL_BEFORE_MEOL)
- || (RExC_flags16 & PMf_MULTILINE)))) {
+ || (RExC_flags & PMf_MULTILINE)))) {
int t;
if (SvCUR(data.longest_fixed) /* ok to leave SvCUR */
r->float_max_offset = data.offset_float_max;
t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
&& (!(data.flags & SF_FL_BEFORE_MEOL)
- || (RExC_flags16 & PMf_MULTILINE)));
+ || (RExC_flags & PMf_MULTILINE)));
fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
}
else {
if (longest_fixed_length
|| (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
&& (!(data.flags & SF_FIX_BEFORE_MEOL)
- || (RExC_flags16 & PMf_MULTILINE)))) {
+ || (RExC_flags & PMf_MULTILINE)))) {
int t;
if (SvUTF8(data.longest_fixed)) {
r->anchored_offset = data.offset_fixed;
t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
&& (!(data.flags & SF_FIX_BEFORE_MEOL)
- || (RExC_flags16 & PMf_MULTILINE)));
+ || (RExC_flags & PMf_MULTILINE)));
fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
}
else {
register regnode *lastbr;
register regnode *ender = 0;
register I32 parno = 0;
- I32 flags, oregflags = RExC_flags16, have_branch = 0, open = 0;
+ I32 flags, oregflags = RExC_flags, have_branch = 0, open = 0;
/* for (?g), (?gc), and (?o) warnings; warning
about (?c) will warn about (?g) -- japhy */
/* Make an OPEN node, if parenthesized. */
if (paren) {
if (*RExC_parse == '?') { /* (?...) */
- U16 posflags = 0, negflags = 0;
- U16 *flagsp = &posflags;
+ U32 posflags = 0, negflags = 0;
+ U32 *flagsp = &posflags;
int logical = 0;
char *seqstart = RExC_parse;
++RExC_parse;
goto parse_flags;
}
- RExC_flags16 |= posflags;
- RExC_flags16 &= ~negflags;
+ RExC_flags |= posflags;
+ RExC_flags &= ~negflags;
if (*RExC_parse == ':') {
RExC_parse++;
paren = ':';
/* Check for proper termination. */
if (paren) {
- RExC_flags16 = (U16)oregflags;
+ RExC_flags = oregflags;
if (RExC_parse >= RExC_end || *nextchar(pRExC_state) != ')') {
RExC_parse = oregcomp_parse;
vFAIL("Unmatched (");
case '^':
RExC_seen_zerolen++;
nextchar(pRExC_state);
- if (RExC_flags16 & PMf_MULTILINE)
+ if (RExC_flags & PMf_MULTILINE)
ret = reg_node(pRExC_state, MBOL);
- else if (RExC_flags16 & PMf_SINGLELINE)
+ else if (RExC_flags & PMf_SINGLELINE)
ret = reg_node(pRExC_state, SBOL);
else
ret = reg_node(pRExC_state, BOL);
nextchar(pRExC_state);
if (*RExC_parse)
RExC_seen_zerolen++;
- if (RExC_flags16 & PMf_MULTILINE)
+ if (RExC_flags & PMf_MULTILINE)
ret = reg_node(pRExC_state, MEOL);
- else if (RExC_flags16 & PMf_SINGLELINE)
+ else if (RExC_flags & PMf_SINGLELINE)
ret = reg_node(pRExC_state, SEOL);
else
ret = reg_node(pRExC_state, EOL);
break;
case '.':
nextchar(pRExC_state);
- if (RExC_flags16 & PMf_SINGLELINE)
+ if (RExC_flags & PMf_SINGLELINE)
ret = reg_node(pRExC_state, SANY);
else
ret = reg_node(pRExC_state, REG_ANY);
break;
case '#':
- if (RExC_flags16 & PMf_EXTENDED) {
+ if (RExC_flags & PMf_EXTENDED) {
while (RExC_parse < RExC_end && *RExC_parse != '\n') RExC_parse++;
if (RExC_parse < RExC_end)
goto tryagain;
{
oldp = p;
- if (RExC_flags16 & PMf_EXTENDED)
+ if (RExC_flags & PMf_EXTENDED)
p = regwhite(p, RExC_end);
switch (*p) {
case '^':
ender = *p++;
break;
}
- if (RExC_flags16 & PMf_EXTENDED)
+ if (RExC_flags & PMf_EXTENDED)
p = regwhite(p, RExC_end);
if (UTF && FOLD) {
/* Prime the casefolded buffer. */
UV n;
bool optimize_invert = TRUE;
AV* unicode_alternate = 0;
+#ifdef EBCDIC
+ UV literal_endpoint = 0;
+#endif
ret = reganode(pRExC_state, ANYOF, 0);
break;
}
} /* end of \blah */
+#ifdef EBCDIC
+ else
+ literal_endpoint++;
+#endif
if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
IV ceilvalue = value < 256 ? value : 255;
#ifdef EBCDIC
- if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
- (isUPPER(prevvalue) && isUPPER(ceilvalue)))
+ /* In EBCDIC [\x89-\x91] should include
+ * the \x8e but [i-j] should not. */
+ if (literal_endpoint == 2 &&
+ ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+ (isUPPER(prevvalue) && isUPPER(ceilvalue))))
{
if (isLOWER(prevvalue)) {
for (i = prevvalue; i <= ceilvalue; i++)
}
}
}
+#ifdef EBCDIC
+ literal_endpoint = 0;
+#endif
}
range = 0; /* this range (if it was one) is done now */
SV *rv;
/* The 0th element stores the character class description
- * in its textual form: used later (regexec.c:Perl_regclass_swatch())
+ * in its textual form: used later (regexec.c:Perl_regclass_swash())
* to initialize the appropriate swash (which gets stored in
* the 1st element), and also useful for dumping the regnode.
* The 2nd element stores the multicharacter foldings,
- * used later (regexec.c:s_reginclasslen()). */
+ * used later (regexec.c:S_reginclass()). */
av_store(av, 0, listsv);
av_store(av, 1, NULL);
av_store(av, 2, (SV*)unicode_alternate);
RExC_parse++;
continue;
}
- if (RExC_flags16 & PMf_EXTENDED) {
+ if (RExC_flags & PMf_EXTENDED) {
if (isSPACE(*RExC_parse)) {
RExC_parse++;
continue;
if (lv) {
if (sw) {
- UV i;
U8 s[UTF8_MAXLEN+1];
for (i = 0; i <= 256; i++) { /* just the first 256 */
if (!r || (--r->refcnt > 0))
return;
DEBUG_r({
- char *s = pv_uni_display(dsv, (U8*)r->precomp, r->prelen, 60,
- UNI_DISPLAY_REGEX);
- int len = SvCUR(dsv);
+ int len;
+ char *s;
+
+ s = (r->reganch & ROPT_UTF8) ? pv_uni_display(dsv, (U8*)r->precomp,
+ r->prelen, 60, UNI_DISPLAY_REGEX)
+ : pv_display(dsv, r->precomp, r->prelen, 0, 60);
+ len = SvCUR(dsv);
if (!PL_colorset)
reginitcolors();
PerlIO_printf(Perl_debug_log,
SAVEPPTR(RExC_precomp); /* uncompiled string. */
SAVEI32(RExC_npar); /* () count. */
SAVEI32(RExC_size); /* Code size. */
- SAVEI16(RExC_flags16); /* are we folding, multilining? */
+ SAVEI32(RExC_flags); /* are we folding, multilining? */
SAVEVPTR(RExC_rx); /* from regcomp.c */
SAVEI32(RExC_seen); /* from regcomp.c */
SAVEI32(RExC_sawback); /* Did we see \1, ...? */