catch nonexistent backrefs in REs
Hugo van der Sanden [Wed, 1 Jul 1998 20:14:05 +0000 (22:14 +0200)]
Message-Id: <l03130304b1c027e1df9e@[194.222.64.89]>
Date: Wed, 1 Jul 1998 20:14:05 +0200
Subject: Re: [PATCH _66] for bad backrefs
--
Message-Id: <l03130300b1c03425261c@[194.222.64.89]>
Date: Wed, 1 Jul 1998 20:47:16 +0200
Subject: Re: [PATCH _66] for bad backrefs

p4raw-id: //depot/perl@1293

regcomp.c
t/op/re_tests
util.c

index ca4da9c..0a36cbb 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -765,6 +765,7 @@ pregcomp(char *exp, char *xend, PMOP *pm)
     r->prelen = xend - exp;
     r->precomp = regprecomp;
     r->subbeg = r->subbase = NULL;
+    r->nparens = regnpar - 1;          /* set early to validate backrefs */
     regcomp_rx = r;
 
     /* Second pass: emit code. */
@@ -936,7 +937,6 @@ pregcomp(char *exp, char *xend, PMOP *pm)
        r->check_substr = r->anchored_substr = r->float_substr = Nullsv;
     }
 
-    r->nparens = regnpar - 1;
     r->minlen = minlen;
     if (regseen & REG_SEEN_GPOS) 
        r->reganch |= ROPT_GPOS_SEEN;
@@ -1609,6 +1609,8 @@ tryagain:
                if (num > 9 && num >= regnpar)
                    goto defchar;
                else {
+                   if (!SIZE_ONLY && num > regcomp_rx->nparens)
+                       FAIL("reference to nonexistent group");
                    regsawback = 1;
                    ret = reganode((regflags & PMf_FOLD)
                                   ? ((regflags & PMf_LOCALE) ? REFFL : REFF)
index dd54a2a..78d89be 100644 (file)
@@ -164,6 +164,16 @@ a(bc)d     abcd    y       $1-\$1-\\$1     bc-$1-\bc
 a[-]?c ac      y       $&      ac
 (abc)\1        abcabc  y       $1      abc
 ([a-c]*)\1     abcabc  y       $1      abc
+\1     -       c       -       /\1/: reference to nonexistent group
+\2     -       c       -       /\2/: reference to nonexistent group
+(a)|\1 a       y       -       -
+(a)|\1 x       n       -       -
+(a)|\2 -       c       -       /(a)|\2/: reference to nonexistent group
+(([a-c])b*?\2)*        ababbbcbc       y       $&-$1-$2        ababb-bb-b
+(([a-c])b*?\2){3}      ababbbcbc       y       $&-$1-$2        ababbbcbc-cbc-c
+((\3|b)\2(a)x)+        aaxabxbaxbbx    n       -       -
+((\3|b)\2(a)x)+        aaaxabaxbaaxbbax        y       $&-$1-$2-$3     bbax-bbax-b-a
+((\3|b)\2(a)){2,}      bbaababbabaaaaabbaaaabba        y       $&-$1-$2-$3     bbaaaabba-bba-b-a
 'abc'i ABC     y       $&      ABC
 'abc'i XBC     n       -       -
 'abc'i AXC     n       -       -
diff --git a/util.c b/util.c
index f61b66d..96a9bb8 100644 (file)
--- a/util.c
+++ b/util.c
@@ -1091,7 +1091,7 @@ screaminstr(SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift, I32 *old_
     }
 #ifdef POINTERRIGOR
     do {
-       if (pos >= stop_pos) return Nullch;
+       if (pos >= stop_pos) break;
        if (big[pos-previous] != first)
            continue;
        for (x=big+pos+1-previous,s=little; s < littleend; /**/ ) {
@@ -1110,7 +1110,7 @@ screaminstr(SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift, I32 *old_
 #else /* !POINTERRIGOR */
     big -= previous;
     do {
-       if (pos >= stop_pos) return Nullch;
+       if (pos >= stop_pos) break;
        if (big[pos] != first)
            continue;
        for (x=big+pos+1,s=little; s < littleend; /**/ ) {