#define SPSTART 0x4 /* Starts with * or +. */
#define TRYAGAIN 0x8 /* Weeded out a declaration. */
+/* Length of a variant. */
+
+typedef struct scan_data_t {
+ I32 len_min;
+ I32 len_delta;
+ I32 pos_min;
+ I32 pos_delta;
+ SV *last_found;
+ I32 last_end; /* min value, <0 unless valid. */
+ I32 last_start_min;
+ I32 last_start_max;
+ SV **longest; /* Either &l_fixed, or &l_float. */
+ SV *longest_fixed;
+ I32 offset_fixed;
+ SV *longest_float;
+ I32 offset_float_min;
+ I32 offset_float_max;
+ I32 flags;
+ I32 whilem_c;
+} scan_data_t;
+
/*
* Forward declarations for pregcomp()'s friends.
*/
static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0 };
+ 0, 0, 0, 0 };
#define SF_BEFORE_EOL (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
#define SF_BEFORE_SEOL 0x1
#define OOB_CHAR8 1234
#define OOB_UTF8 123456
+#define OOB_NAMEDCLASS -1
#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
+/* Allow for side effects in s */
+#define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (s);} STMT_END
+
static void clear_re(pTHXo_ void *r);
STATIC void
regnode *stop = scan;
#endif
- next = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2;
+ next = scan + NODE_SZ_STR(scan);
/* Skip NOTHING, merge EXACT*. */
while (n &&
( PL_regkind[(U8)OP(n)] == NOTHING ||
n = regnext(n);
}
else {
- int oldl = *OPERAND(scan);
+ int oldl = STR_LEN(scan);
regnode *nnext = regnext(n);
- if (oldl + *OPERAND(n) > U8_MAX)
+ if (oldl + STR_LEN(n) > U8_MAX)
break;
NEXT_OFF(scan) += NEXT_OFF(n);
- *OPERAND(scan) += *OPERAND(n);
- next = n + (*OPERAND(n) + 2 - 1)/sizeof(regnode) + 2;
+ STR_LEN(scan) += STR_LEN(n);
+ next = n + NODE_SZ_STR(n);
/* Now we can overwrite *n : */
- Move(OPERAND(n) + 1, OPERAND(scan) + oldl + 1,
- *OPERAND(n) + 1, char);
+ Move(STRING(n), STRING(scan) + oldl,
+ STR_LEN(n), char);
#ifdef DEBUGGING
if (stringok)
stop = next - 1;
}
#ifdef DEBUGGING
/* Allow dumping */
- n = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2;
+ n = scan + NODE_SZ_STR(scan);
while (n <= stop) {
/* Purify reports a benign UMR here sometimes, because we
* don't initialize the OP() slot of a node when that node
num++;
data_fake.flags = 0;
+ if (data)
+ data_fake.whilem_c = data->whilem_c;
next = regnext(scan);
scan = NEXTOPER(scan);
if (code != BRANCH)
pars++;
if (data && (data_fake.flags & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
+ if (data)
+ data->whilem_c = data_fake.whilem_c;
if (code == SUSPEND)
break;
}
continue;
}
else if (OP(scan) == EXACT) {
- I32 l = *OPERAND(scan);
+ I32 l = STR_LEN(scan);
if (UTF) {
- unsigned char *s = (unsigned char *)(OPERAND(scan)+1);
+ unsigned char *s = (unsigned char *)STRING(scan);
unsigned char *e = s + l;
I32 newl = 0;
while (s < e) {
data->last_start_max = is_inf
? I32_MAX : data->pos_min + data->pos_delta;
}
- sv_catpvn(data->last_found, (char *)(OPERAND(scan)+1), *OPERAND(scan));
+ sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
data->last_end = data->pos_min + l;
data->pos_min += l; /* As in the first entry. */
data->flags &= ~SF_BEFORE_EOL;
}
}
else if (PL_regkind[(U8)OP(scan)] == EXACT) {
- I32 l = *OPERAND(scan);
+ I32 l = STR_LEN(scan);
if (flags & SCF_DO_SUBSTR)
scan_commit(data);
if (UTF) {
- unsigned char *s = (unsigned char *)(OPERAND(scan)+1);
+ unsigned char *s = (unsigned char *)STRING(scan);
unsigned char *e = s + l;
I32 newl = 0;
while (s < e) {
nxt = regnext(nxt);
if (!strchr((char*)PL_simple,OP(nxt))
&& !(PL_regkind[(U8)OP(nxt)] == EXACT
- && *OPERAND(nxt) == 1))
+ && STR_LEN(nxt) == 1))
goto nogo;
nxt2 = nxt;
nxt = regnext(nxt);
else
oscan->flags = 0;
}
+ else if (OP(oscan) == CURLYX && data && ++data->whilem_c < 16) {
+ /* This stays as CURLYX, and can put the count/of pair. */
+ /* Find WHILEM (as in regexec.c) */
+ regnode *nxt = oscan + NEXT_OFF(oscan);
+
+ if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
+ nxt += ARG(nxt);
+ PREVOPER(nxt)->flags = data->whilem_c
+ | (PL_reg_whilem_seen << 4); /* On WHILEM */
+ }
if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
pars++;
if (flags & SCF_DO_SUBSTR) {
regnode *nscan;
data_fake.flags = 0;
+ if (data)
+ data_fake.whilem_c = data->whilem_c;
next = regnext(scan);
nscan = NEXTOPER(NEXTOPER(scan));
minnext = study_chunk(&nscan, &deltanext, last, &data_fake, 0);
FAIL("variable length lookbehind not implemented");
}
else if (minnext > U8_MAX) {
- FAIL2("lookbehind longer than %d not implemented", U8_MAX);
+ FAIL2("lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
}
scan->flags = minnext;
}
pars++;
if (data && (data_fake.flags & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
+ if (data)
+ data->whilem_c = data_fake.whilem_c;
}
else if (OP(scan) == OPEN) {
pars++;
I32 minlen = 0;
I32 sawplus = 0;
I32 sawopen = 0;
+ scan_data_t data;
if (exp == NULL)
FAIL("NULL regexp argument");
PL_regprecomp = savepvn(exp, xend - exp);
DEBUG_r(if (!PL_colorset) reginitcolors());
- DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling%s RE `%s%*s%s'\n",
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
PL_colors[4],PL_colors[5],PL_colors[0],
- xend - exp, PL_regprecomp, PL_colors[1]));
+ (int)(xend - exp), PL_regprecomp, PL_colors[1]));
PL_regflags = pm->op_pmflags;
PL_regsawback = 0;
PL_regnpar = 1;
PL_regsize = 0L;
PL_regcode = &PL_regdummy;
- regc((U8)REG_MAGIC, (char*)PL_regcode);
+ PL_reg_whilem_seen = 0;
+ REGC((U8)REG_MAGIC, (char*)PL_regcode);
if (reg(0, &flags) == NULL) {
Safefree(PL_regprecomp);
PL_regprecomp = Nullch;
return(NULL);
}
- DEBUG_r(PerlIO_printf(Perl_debug_log, "size %d ", PL_regsize));
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)PL_regsize));
/* Small enough for pointer-storage convention?
If extralen==0, this means that we will not need long jumps. */
PL_regsize += PL_extralen;
else
PL_extralen = 0;
+ if (PL_reg_whilem_seen > 15)
+ PL_reg_whilem_seen = 15;
/* Allocate space and initialize. */
Newc(1001, r, sizeof(regexp) + (unsigned)PL_regsize * sizeof(regnode),
PL_regcode = r->program;
/* Store the count of eval-groups for security checks: */
PL_regcode->next_off = ((PL_seen_evals > U16_MAX) ? U16_MAX : PL_seen_evals);
- regc((U8)REG_MAGIC, (char*) PL_regcode++);
+ REGC((U8)REG_MAGIC, (char*) PL_regcode++);
r->data = 0;
if (reg(0, &flags) == NULL)
return(NULL);
3-units-long substrs field. */
Newz(1004, r->substrs, 1, struct reg_substr_data);
+ StructCopy(&zero_scan_data, &data, scan_data_t);
if (OP(scan) != BRANCH) { /* Only one top-level choice. */
- scan_data_t data;
I32 fake;
STRLEN longest_float_length, longest_fixed_length;
- StructCopy(&zero_scan_data, &data, scan_data_t);
first = scan;
/* Skip introductions and multiplicators >= 1. */
while ((OP(first) == OPEN && (sawopen = 1)) ||
/* Starting-point info. */
again:
- if (OP(first) == EXACT); /* Empty, get anchored substr later. */
+ if (PL_regkind[(U8)OP(first) == EXACT]) {
+ if (OP(first) == EXACT); /* Empty, get anchored substr later. */
+ else if ((OP(first) == EXACTF || OP(first) == EXACTFL)
+ && !UTF)
+ r->regstclass = first;
+ }
else if (strchr((char*)PL_simple+4,OP(first)))
r->regstclass = first;
else if (PL_regkind[(U8)OP(first)] == BOUND ||
r->reganch |= ROPT_SKIP;
/* Scan is after the zeroth branch, first is atomic matcher. */
- DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %d\n",
- first - scan + 1));
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
+ (IV)(first - scan + 1)));
/*
* If there's something expensive in the r.e., find the
* longest literal string that must appear and make it the
r->check_offset_min = data.offset_float_min;
r->check_offset_max = data.offset_float_max;
}
- if (r->check_substr) {
+ /* XXXX Currently intuiting is not compatible with ANCH_GPOS.
+ This should be changed ASAP! */
+ if (r->check_substr && !(r->reganch & ROPT_ANCH_GPOS)) {
r->reganch |= RE_USE_INTUIT;
if (SvTAIL(r->check_substr))
r->reganch |= RE_INTUIT_TAIL;
DEBUG_r(PerlIO_printf(Perl_debug_log, "\n"));
scan = r->program + 1;
- minlen = study_chunk(&scan, &fake, scan + PL_regsize, NULL, 0);
+ minlen = study_chunk(&scan, &fake, scan + PL_regsize, &data, 0);
r->check_substr = r->anchored_substr = r->float_substr = Nullsv;
}
reginsert(CURLY, ret);
}
else {
- PL_regnaughty += 4 + PL_regnaughty; /* compound interest */
- regtail(ret, reg_node(WHILEM));
+ regnode *w = reg_node(WHILEM);
+
+ w->flags = 0;
+ regtail(ret, w);
if (!SIZE_ONLY && PL_extralen) {
reginsert(LONGJMP,ret);
reginsert(NOTHING,ret);
NEXT_OFF(ret) = 3; /* Go over NOTHING to LONGJMP. */
regtail(ret, reg_node(NOTHING));
if (SIZE_ONLY)
- PL_extralen += 3;
+ PL_reg_whilem_seen++, PL_extralen += 3;
+ PL_regnaughty += 4 + PL_regnaughty; /* compound interest */
}
ret->flags = 0;
ret = reg_node(FOLD
? (LOC ? EXACTFL : EXACTF)
: EXACT);
- s = (char *) OPERAND(ret);
- regc(0, s++); /* save spot for len */
+ s = STRING(ret);
for (len = 0, p = PL_regcomp_parse - 1;
len < 127 && p < PL_regxend;
len++)
if (!e)
FAIL("Missing right brace on \\x{}");
else if (UTF) {
- ender = scan_hex(p + 1, e - p, &numlen);
+ ender = (UV)scan_hex(p + 1, e - p, &numlen);
if (numlen + len >= 127) { /* numlen is generous */
p--;
goto loopdone;
FAIL("Can't use \\x{} without 'use utf8' declaration");
}
else {
- ender = scan_hex(p, 2, &numlen);
+ ender = (UV)scan_hex(p, 2, &numlen);
p += numlen;
}
break;
case '5': case '6': case '7': case '8':case '9':
if (*p == '0' ||
(isDIGIT(p[1]) && atoi(p) >= PL_regnpar) ) {
- ender = scan_oct(p, 3, &numlen);
+ ender = (UV)scan_oct(p, 3, &numlen);
p += numlen;
}
else {
default:
if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) && isALPHA(*p))
Perl_warner(aTHX_ WARN_UNSAFE,
- "/%.127s/: Unrecognized escape \\%c passed through",
- PL_regprecomp,
- *p);
+ "/%.127s/: Unrecognized escape \\%c passed through",
+ PL_regprecomp,
+ *p);
goto normal_default;
}
break;
}
else {
len++;
- regc(ender, s++);
+ REGC(ender, s++);
}
break;
}
len += numlen - 1;
}
else
- regc(ender, s++);
+ REGC(ender, s++);
}
loopdone:
PL_regcomp_parse = p - 1;
if (len == 1)
*flagp |= SIMPLE;
if (!SIZE_ONLY)
- *OPERAND(ret) = len;
- regc('\0', s++);
- if (SIZE_ONLY) {
- PL_regsize += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode);
- }
- else {
- PL_regcode += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode);
- }
+ STR_LEN(ret) = len;
+ if (SIZE_ONLY)
+ PL_regsize += STR_SZ(len);
+ else
+ PL_regcode += STR_SZ(len);
}
break;
}
{
dTHR;
char *posixcc = 0;
- I32 namedclass = -1;
+ I32 namedclass = OOB_NAMEDCLASS;
if (value == '[' && PL_regcomp_parse + 1 < PL_regxend &&
/* I smell either [: or [= or [. -- POSIX has been here, right? */
}
break;
}
- if ((namedclass == -1 ||
+ if ((namedclass == OOB_NAMEDCLASS ||
!(posixcc + skip + 2 < PL_regxend &&
(posixcc[skip] == ':' &&
posixcc[skip + 1] == ']'))))
STATIC void
S_checkposixcc(pTHX)
{
- if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY &&
+ if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) &&
(*PL_regcomp_parse == ':' ||
*PL_regcomp_parse == '=' ||
*PL_regcomp_parse == '.')) {
char *s = PL_regcomp_parse;
- char c = *s++;
+ char c = *s++;
while(*s && isALNUM(*s))
s++;
S_regclass(pTHX)
{
dTHR;
- register char *opnd, *s;
- register I32 value;
+ register UV value;
register I32 lastvalue = OOB_CHAR8;
register I32 range = 0;
register regnode *ret;
register I32 def;
I32 numlen;
I32 namedclass;
+ char *rangebegin;
+ bool need_class = 0;
- s = opnd = (char *) OPERAND(PL_regcode);
ret = reg_node(ANYOF);
- for (value = 0; value < ANYOF_SIZE; value++)
- regc(0, s++);
+ if (SIZE_ONLY)
+ PL_regsize += ANYOF_SKIP;
+ else {
+ ret->flags = 0;
+ ANYOF_BITMAP_ZERO(ret);
+ PL_regcode += ANYOF_SKIP;
+ if (FOLD)
+ ANYOF_FLAGS(ret) |= ANYOF_FOLD;
+ if (LOC)
+ ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
+ }
if (*PL_regcomp_parse == '^') { /* Complement of range. */
PL_regnaughty++;
PL_regcomp_parse++;
if (!SIZE_ONLY)
- ANYOF_FLAGS(opnd) |= ANYOF_INVERT;
- }
- if (!SIZE_ONLY) {
- PL_regcode += ANY_SKIP;
- if (FOLD)
- ANYOF_FLAGS(opnd) |= ANYOF_FOLD;
- if (LOC)
- ANYOF_FLAGS(opnd) |= ANYOF_LOCALE;
- }
- else {
- PL_regsize += ANY_SKIP;
+ ANYOF_FLAGS(ret) |= ANYOF_INVERT;
}
- checkposixcc();
+ if (!SIZE_ONLY && ckWARN(WARN_UNSAFE))
+ checkposixcc();
if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-')
goto skipcond; /* allow 1st char to be ] or - */
while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
skipcond:
- namedclass = -1;
+ namedclass = OOB_NAMEDCLASS;
+ if (!range)
+ rangebegin = PL_regcomp_parse;
value = UCHARAT(PL_regcomp_parse++);
if (value == '[')
namedclass = regpposixcc(value);
case 'e': value = '\033'; break;
case 'a': value = '\007'; break;
case 'x':
- value = scan_hex(PL_regcomp_parse, 2, &numlen);
+ value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen);
PL_regcomp_parse += numlen;
break;
case 'c':
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- value = scan_oct(--PL_regcomp_parse, 3, &numlen);
+ value = (UV)scan_oct(--PL_regcomp_parse, 3, &numlen);
PL_regcomp_parse += numlen;
break;
+ default:
+ if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) && isALPHA(value))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: Unrecognized escape \\%c in character class passed through",
+ PL_regprecomp,
+ value);
+ break;
}
}
- if (!SIZE_ONLY && namedclass > -1) {
- switch (namedclass) {
- case ANYOF_ALNUM:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
- else {
- for (value = 0; value < 256; value++)
- if (isALNUM(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NALNUM:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
- else {
- for (value = 0; value < 256; value++)
- if (!isALNUM(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_SPACE:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
- else {
- for (value = 0; value < 256; value++)
- if (isSPACE(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NSPACE:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
- else {
- for (value = 0; value < 256; value++)
- if (!isSPACE(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_DIGIT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
- else {
- for (value = '0'; value <= '9'; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NDIGIT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
- else {
- for (value = 0; value < '0'; value++)
- ANYOF_BITMAP_SET(opnd, value);
- for (value = '9' + 1; value < 256; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NALNUMC:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
- else {
- for (value = 0; value < 256; value++)
- if (!isALNUMC(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_ALNUMC:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
- else {
- for (value = 0; value < 256; value++)
- if (isALNUMC(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_ALPHA:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
- else {
- for (value = 0; value < 256; value++)
- if (isALPHA(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NALPHA:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
- else {
- for (value = 0; value < 256; value++)
- if (!isALPHA(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_ASCII:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
- else {
- for (value = 0; value < 128; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NASCII:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
- else {
- for (value = 128; value < 256; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_CNTRL:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
- else {
- for (value = 0; value < 256; value++)
- if (isCNTRL(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- lastvalue = OOB_CHAR8;
- break;
- case ANYOF_NCNTRL:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
- else {
- for (value = 0; value < 256; value++)
- if (!isCNTRL(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_GRAPH:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
- else {
- for (value = 0; value < 256; value++)
- if (isGRAPH(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NGRAPH:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
- else {
- for (value = 0; value < 256; value++)
- if (!isGRAPH(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_LOWER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
- else {
- for (value = 0; value < 256; value++)
- if (isLOWER(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NLOWER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
- else {
- for (value = 0; value < 256; value++)
- if (!isLOWER(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_PRINT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
- else {
- for (value = 0; value < 256; value++)
- if (isPRINT(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NPRINT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
- else {
- for (value = 0; value < 256; value++)
- if (!isPRINT(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_PUNCT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
- else {
- for (value = 0; value < 256; value++)
- if (isPUNCT(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NPUNCT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
- else {
- for (value = 0; value < 256; value++)
- if (!isPUNCT(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_UPPER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
- else {
- for (value = 0; value < 256; value++)
- if (isUPPER(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NUPPER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
- else {
- for (value = 0; value < 256; value++)
- if (!isUPPER(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_XDIGIT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
- else {
- for (value = 0; value < 256; value++)
- if (isXDIGIT(value))
- ANYOF_BITMAP_SET(opnd, value);
+ if (namedclass > OOB_NAMEDCLASS) {
+ if (!need_class && !SIZE_ONLY)
+ ANYOF_CLASS_ZERO(ret);
+ need_class = 1;
+ if (range) { /* a-\d, a-[:digit:] */
+ if (!SIZE_ONLY) {
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ ANYOF_BITMAP_SET(ret, lastvalue);
+ ANYOF_BITMAP_SET(ret, '-');
+ }
+ range = 0; /* this is not a true range */
+ }
+ if (!SIZE_ONLY) {
+ switch (namedclass) {
+ case ANYOF_ALNUM:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_ALNUM);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isALNUM(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NALNUM:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NALNUM);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isALNUM(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_SPACE:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_SPACE);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isSPACE(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NSPACE:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NSPACE);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isSPACE(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_DIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
+ else {
+ for (value = '0'; value <= '9'; value++)
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NDIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NDIGIT);
+ else {
+ for (value = 0; value < '0'; value++)
+ ANYOF_BITMAP_SET(ret, value);
+ for (value = '9' + 1; value < 256; value++)
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NALNUMC:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NALNUMC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isALNUMC(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_ALNUMC:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_ALNUMC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isALNUMC(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_ALPHA:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_ALPHA);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isALPHA(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NALPHA:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NALPHA);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isALPHA(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_ASCII:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_ASCII);
+ else {
+ for (value = 0; value < 128; value++)
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NASCII:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NASCII);
+ else {
+ for (value = 128; value < 256; value++)
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_CNTRL:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_CNTRL);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isCNTRL(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ lastvalue = OOB_CHAR8;
+ break;
+ case ANYOF_NCNTRL:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NCNTRL);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isCNTRL(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_GRAPH:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_GRAPH);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isGRAPH(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NGRAPH:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NGRAPH);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isGRAPH(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_LOWER:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_LOWER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isLOWER(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NLOWER:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NLOWER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isLOWER(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_PRINT:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_PRINT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isPRINT(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NPRINT:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NPRINT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isPRINT(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_PUNCT:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_PUNCT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isPUNCT(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NPUNCT:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NPUNCT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isPUNCT(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_UPPER:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_UPPER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isUPPER(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NUPPER:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NUPPER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isUPPER(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_XDIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_XDIGIT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isXDIGIT(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NXDIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NXDIGIT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isXDIGIT(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ default:
+ FAIL("invalid [::] class in regexp");
+ break;
}
- break;
- case ANYOF_NXDIGIT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
- else {
- for (value = 0; value < 256; value++)
- if (!isXDIGIT(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- default:
- FAIL("invalid [::] class in regexp");
- break;
+ ANYOF_FLAGS(ret) |= ANYOF_CLASS;
+ continue;
}
- if (LOC)
- ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
- lastvalue = OOB_CHAR8;
}
- else
if (range) {
- if (lastvalue > value)
- FAIL("invalid [] range in regexp");
+ if (lastvalue > value) /* b-a */ {
+ Perl_croak(aTHX_
+ "/%.127s/: invalid [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ }
range = 0;
}
else {
lastvalue = value;
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
- PL_regcomp_parse[1] != ']') {
+ PL_regcomp_parse[1] != ']') {
PL_regcomp_parse++;
- range = 1;
+ if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ if (!SIZE_ONLY)
+ ANYOF_BITMAP_SET(ret, '-');
+ } else
+ range = 1;
continue; /* do it next time */
}
}
+ /* now is the next time */
if (!SIZE_ONLY) {
-#ifndef ASCIIish
+#ifndef ASCIIish /* EBCDIC, for example. */
if ((isLOWER(lastvalue) && isLOWER(value)) ||
(isUPPER(lastvalue) && isUPPER(value)))
{
if (isLOWER(lastvalue)) {
for (i = lastvalue; i <= value; i++)
if (isLOWER(i))
- ANYOF_BITMAP_SET(opnd, i);
+ ANYOF_BITMAP_SET(ret, i);
} else {
for (i = lastvalue; i <= value; i++)
if (isUPPER(i))
- ANYOF_BITMAP_SET(opnd, i);
+ ANYOF_BITMAP_SET(ret, i);
}
}
else
#endif
for ( ; lastvalue <= value; lastvalue++)
- ANYOF_BITMAP_SET(opnd, lastvalue);
+ ANYOF_BITMAP_SET(ret, lastvalue);
}
- lastvalue = value;
+ range = 0;
+ }
+ if (need_class) {
+ if (SIZE_ONLY)
+ PL_regsize += ANYOF_CLASS_ADD_SKIP;
+ else
+ PL_regcode += ANYOF_CLASS_ADD_SKIP;
}
/* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
if (!SIZE_ONLY &&
- (ANYOF_FLAGS(opnd) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) {
+ (ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) {
for (value = 0; value < 256; ++value) {
- if (ANYOF_BITMAP_TEST(opnd, value)) {
+ if (ANYOF_BITMAP_TEST(ret, value)) {
I32 cf = PL_fold[value];
- ANYOF_BITMAP_SET(opnd, cf);
+ ANYOF_BITMAP_SET(ret, cf);
}
}
- ANYOF_FLAGS(opnd) &= ~ANYOF_FOLD;
+ ANYOF_FLAGS(ret) &= ~ANYOF_FOLD;
}
/* optimize inverted simple patterns (e.g. [^a-z]) */
- if (!SIZE_ONLY && (ANYOF_FLAGS(opnd) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
+ if (!SIZE_ONLY && (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
- opnd[ANYOF_BITMAP_OFFSET + value] ^= ANYOF_FLAGS_ALL;
- ANYOF_FLAGS(opnd) = 0;
+ ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL;
+ ANYOF_FLAGS(ret) = 0;
}
return ret;
}
S_regclassutf8(pTHX)
{
dTHR;
- register char *opnd, *e;
- register U32 value;
+ register char *e;
+ register UV value;
register U32 lastvalue = OOB_UTF8;
register I32 range = 0;
register regnode *ret;
SV *listsv;
U8 flags = 0;
I32 namedclass;
+ char *rangebegin;
if (*PL_regcomp_parse == '^') { /* Complement of range. */
PL_regnaughty++;
listsv = newSVpvn("# comment\n",10);
}
- checkposixcc();
+ if (!SIZE_ONLY && ckWARN(WARN_UNSAFE))
+ checkposixcc();
if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-')
goto skipcond; /* allow 1st char to be ] or - */
while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
skipcond:
- namedclass = -1;
+ namedclass = OOB_NAMEDCLASS;
+ if (!range)
+ rangebegin = PL_regcomp_parse;
value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
PL_regcomp_parse += numlen;
-
if (value == '[')
namedclass = regpposixcc(value);
else if (value == '\\') {
e = strchr(PL_regcomp_parse++, '}');
if (!e)
FAIL("Missing right brace on \\x{}");
- value = scan_hex(PL_regcomp_parse,
+ value = (UV)scan_hex(PL_regcomp_parse,
e - PL_regcomp_parse,
&numlen);
PL_regcomp_parse = e + 1;
}
else {
- value = scan_hex(PL_regcomp_parse, 2, &numlen);
+ value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen);
PL_regcomp_parse += numlen;
}
break;
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- value = scan_oct(--PL_regcomp_parse, 3, &numlen);
+ value = (UV)scan_oct(--PL_regcomp_parse, 3, &numlen);
PL_regcomp_parse += numlen;
break;
+ default:
+ if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) && isALPHA(value))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: Unrecognized escape \\%c in character class passed through",
+ PL_regprecomp,
+ value);
+ break;
}
}
- if (!SIZE_ONLY && namedclass > -1) {
- switch (namedclass) {
- case ANYOF_ALNUM:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
- case ANYOF_NALNUM:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break;
- case ANYOF_ALNUMC:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break;
- case ANYOF_NALNUMC:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break;
- case ANYOF_ALPHA:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break;
- case ANYOF_NALPHA:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break;
- case ANYOF_ASCII:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break;
- case ANYOF_NASCII:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break;
- case ANYOF_CNTRL:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break;
- case ANYOF_NCNTRL:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break;
- case ANYOF_GRAPH:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break;
- case ANYOF_NGRAPH:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break;
- case ANYOF_DIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break;
- case ANYOF_NDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break;
- case ANYOF_LOWER:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break;
- case ANYOF_NLOWER:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break;
- case ANYOF_PRINT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break;
- case ANYOF_NPRINT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break;
- case ANYOF_PUNCT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break;
- case ANYOF_NPUNCT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
- case ANYOF_SPACE:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
- case ANYOF_NSPACE:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
- case ANYOF_UPPER:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
- case ANYOF_NUPPER:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break;
- case ANYOF_XDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break;
- case ANYOF_NXDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break;
+ if (namedclass > OOB_NAMEDCLASS) {
+ if (range) { /* a-\d, a-[:digit:] */
+ if (!SIZE_ONLY) {
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ Perl_sv_catpvf(aTHX_ listsv,
+ /* 0x002D is Unicode for '-' */
+ "%04"UVxf"\n002D\n", (UV)lastvalue);
+ }
+ range = 0;
+ }
+ if (!SIZE_ONLY) {
+ switch (namedclass) {
+ case ANYOF_ALNUM:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
+ case ANYOF_NALNUM:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break;
+ case ANYOF_ALNUMC:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break;
+ case ANYOF_NALNUMC:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break;
+ case ANYOF_ALPHA:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break;
+ case ANYOF_NALPHA:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break;
+ case ANYOF_ASCII:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break;
+ case ANYOF_NASCII:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break;
+ case ANYOF_CNTRL:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break;
+ case ANYOF_NCNTRL:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break;
+ case ANYOF_GRAPH:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break;
+ case ANYOF_NGRAPH:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break;
+ case ANYOF_DIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break;
+ case ANYOF_NDIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break;
+ case ANYOF_LOWER:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break;
+ case ANYOF_NLOWER:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break;
+ case ANYOF_PRINT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break;
+ case ANYOF_NPRINT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break;
+ case ANYOF_PUNCT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break;
+ case ANYOF_NPUNCT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
+ case ANYOF_SPACE:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
+ case ANYOF_NSPACE:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
+ case ANYOF_UPPER:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
+ case ANYOF_NUPPER:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break;
+ case ANYOF_XDIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break;
+ case ANYOF_NXDIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break;
+ }
+ continue;
}
}
- else
if (range) {
- if (lastvalue > value)
- FAIL("invalid [] range in regexp");
- if (!SIZE_ONLY)
- Perl_sv_catpvf(aTHX_ listsv, "%04x\t%04x\n", lastvalue, value);
- lastvalue = value;
+ if (lastvalue > value) { /* b-a */
+ Perl_croak(aTHX_
+ "/%.127s/: invalid [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ }
range = 0;
}
else {
lastvalue = value;
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
- PL_regcomp_parse[1] != ']') {
+ PL_regcomp_parse[1] != ']') {
PL_regcomp_parse++;
- range = 1;
+ if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ if (!SIZE_ONLY)
+ Perl_sv_catpvf(aTHX_ listsv,
+ /* 0x002D is Unicode for '-' */
+ "002D\n");
+ } else
+ range = 1;
continue; /* do it next time */
}
- if (!SIZE_ONLY)
- Perl_sv_catpvf(aTHX_ listsv, "%04x\n", value);
}
+ /* now is the next time */
+ if (!SIZE_ONLY)
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
+ (UV)lastvalue, (UV)value);
+ range = 0;
}
ret = reganode(ANYOFUTF8, 0);
}
/*
-- regc - emit (if appropriate) a Unicode character
+- reguni - emit (if appropriate) a Unicode character
*/
STATIC void
S_reguni(pTHX_ UV uv, char* s, I32* lenp)
}
/*
-- regc - emit (if appropriate) a byte of code
-*/
-STATIC void
-S_regc(pTHX_ U8 b, char* s)
-{
- dTHR;
- if (!SIZE_ONLY)
- *s = b;
-}
-
-/*
- reginsert - insert an operator in front of already-emitted operand
*
* Means relocating the operand.
if (OP(node) == OPTIMIZED)
goto after_print;
regprop(sv, node);
- PerlIO_printf(Perl_debug_log, "%4d:%*s%s", node - start,
- 2*l + 1, "", SvPVX(sv));
+ PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
+ (int)(2*l + 1), "", SvPVX(sv));
if (next == NULL) /* Next ptr. */
PerlIO_printf(Perl_debug_log, "(0)");
else
- PerlIO_printf(Perl_debug_log, "(%d)", next - start);
+ PerlIO_printf(Perl_debug_log, "(%"IVdf")", (IV)(next - start));
(void)PerlIO_putc(Perl_debug_log, '\n');
after_print:
if (PL_regkind[(U8)op] == BRANCHJ) {
}
else if (op == ANYOF) {
node = NEXTOPER(node);
- node += ANY_SKIP;
+ node += ANYOF_SKIP;
}
else if (PL_regkind[(U8)op] == EXACT) {
/* Literal string, where present. */
- node += ((*OPERAND(node)) + 2 + sizeof(regnode) - 1) / sizeof(regnode);
+ node += NODE_SZ_STR(node) - 1;
node = NEXTOPER(node);
}
else {
/* Header fields of interest. */
if (r->anchored_substr)
- PerlIO_printf(Perl_debug_log, "anchored `%s%s%s'%s at %d ",
+ PerlIO_printf(Perl_debug_log,
+ "anchored `%s%.*s%s'%s at %"IVdf" ",
PL_colors[0],
+ (int)(SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0)),
SvPVX(r->anchored_substr),
PL_colors[1],
SvTAIL(r->anchored_substr) ? "$" : "",
- r->anchored_offset);
+ (IV)r->anchored_offset);
if (r->float_substr)
- PerlIO_printf(Perl_debug_log, "floating `%s%s%s'%s at %d..%u ",
+ PerlIO_printf(Perl_debug_log,
+ "floating `%s%.*s%s'%s at %"IVdf"..%"UVuf" ",
PL_colors[0],
- SvPVX(r->float_substr),
+ (int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)),
+ SvPVX(r->float_substr),
PL_colors[1],
SvTAIL(r->float_substr) ? "$" : "",
- r->float_min_offset, r->float_max_offset);
+ (IV)r->float_min_offset, (UV)r->float_max_offset);
if (r->check_substr)
PerlIO_printf(Perl_debug_log,
r->check_substr == r->float_substr
k = PL_regkind[(U8)OP(o)];
if (k == EXACT)
- Perl_sv_catpvf(aTHX_ sv, " <%s%s%s>", PL_colors[0], OPERAND(o) + 1, PL_colors[1]);
+ Perl_sv_catpvf(aTHX_ sv, " <%s%.*s%s>", PL_colors[0],
+ STR_LEN(o), STRING(o), PL_colors[1]);
else if (k == CURLY) {
if (OP(o) == CURLYM || OP(o) == CURLYN)
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
Perl_sv_catpvf(aTHX_ sv, " {%d,%d}", ARG1(o), ARG2(o));
}
+ else if (k == WHILEM && o->flags) /* Ordinal/of */
+ Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP )
Perl_sv_catpvf(aTHX_ sv, "%d", ARG(o)); /* Parenth number */
else if (k == LOGICAL)
- Perl_sv_catpvf(aTHX_ sv, "[%d]", ARG(o)); /* 2: embedded, otherwise 1 */
+ Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */
else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags);
#endif /* DEBUGGING */
{
dTHR;
DEBUG_r(if (!PL_colorset) reginitcolors());
+
+ if (!r || (--r->refcnt > 0))
+ return;
DEBUG_r(PerlIO_printf(Perl_debug_log,
"%sFreeing REx:%s `%s%.60s%s%s'\n",
PL_colors[4],PL_colors[5],PL_colors[0],
PL_colors[1],
(strlen(r->precomp) > 60 ? "..." : "")));
-
- if (!r || (--r->refcnt > 0))
- return;
if (r->precomp)
Safefree(r->precomp);
if (RX_MATCH_COPIED(r))
#else
va_start(args);
#endif
- msv = mess(buf, &args);
+ msv = vmess(buf, &args);
va_end(args);
message = SvPV(msv,l1);
if (l1 > 512)
}
#ifdef PERL_OBJECT
-#define NO_XSLOCKS
#include "XSUB.h"
#undef this
#define this pPerl