Re: [perl #41010] (?(COND)) in pattern matching not working properly
Yves Orton [Thu, 30 Nov 2006 01:12:25 +0000 (02:12 +0100)]
Message-ID: <9b18b3110611291612i137803f4lab2739e3d9af40f7@mail.gmail.com>

p4raw-id: //depot/perl@29441

embed.fnc
embed.h
proto.h
regcomp.c
t/op/pat.t

index adcdb84..1b9b5a4 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1321,7 +1321,7 @@ Es        |SV *   |reg_scan_name  |NN struct RExC_state_t *state|U32 flags
 Es     |U32    |join_exact     |NN struct RExC_state_t *state|NN regnode *scan|NN I32 *min|U32 flags|NULLOK regnode *val|U32 depth
 EsRn   |char*  |regwhite       |NN char *p|NN const char *e
 Es     |char*  |nextchar       |NN struct RExC_state_t *state
-Es     |void   |scan_commit    |NN const struct RExC_state_t* state|NN struct scan_data_t *data|NN I32 *minlenp
+Es     |void   |scan_commit    |NN const struct RExC_state_t* state|NN struct scan_data_t *data|NN I32 *minlenp|int is_inf
 Esn    |void   |cl_anything    |NN const struct RExC_state_t* state|NN struct regnode_charclass_class *cl
 EsRn   |int    |cl_is_anything |NN const struct regnode_charclass_class *cl
 Esn    |void   |cl_init        |NN const struct RExC_state_t* state|NN struct regnode_charclass_class *cl
diff --git a/embed.h b/embed.h
index 8cc8bba..5c6930f 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define join_exact(a,b,c,d,e,f)        S_join_exact(aTHX_ a,b,c,d,e,f)
 #define regwhite               S_regwhite
 #define nextchar(a)            S_nextchar(aTHX_ a)
-#define scan_commit(a,b,c)     S_scan_commit(aTHX_ a,b,c)
+#define scan_commit(a,b,c,d)   S_scan_commit(aTHX_ a,b,c,d)
 #define cl_anything            S_cl_anything
 #define cl_is_anything         S_cl_is_anything
 #define cl_init                        S_cl_init
diff --git a/proto.h b/proto.h
index 06bef08..4f8edf4 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -3601,7 +3601,7 @@ STATIC char*      S_regwhite(char *p, const char *e)
 STATIC char*   S_nextchar(pTHX_ struct RExC_state_t *state)
                        __attribute__nonnull__(pTHX_1);
 
-STATIC void    S_scan_commit(pTHX_ const struct RExC_state_t* state, struct scan_data_t *data, I32 *minlenp)
+STATIC void    S_scan_commit(pTHX_ const struct RExC_state_t* state, struct scan_data_t *data, I32 *minlenp, int is_inf)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2)
                        __attribute__nonnull__(pTHX_3);
index c5a0b7b..359e4f6 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -556,17 +556,18 @@ static const scan_data_t zero_scan_data =
 #define EXPERIMENTAL_INPLACESCAN
 #endif
 
-#define DEBUG_STUDYDATA(data,depth)                                  \
-DEBUG_OPTIMISE_MORE_r(if(data){                                           \
+#define DEBUG_STUDYDATA(str,data,depth)                              \
+DEBUG_OPTIMISE_MORE_r(if(data){                                      \
     PerlIO_printf(Perl_debug_log,                                    \
-        "%*s"/* Len:%"IVdf"/%"IVdf" */"Pos:%"IVdf"/%"IVdf           \
-        " Flags: %"IVdf" Whilem_c: %"IVdf" Lcp: %"IVdf" ",           \
+        "%*s" str "Pos:%"IVdf"/%"IVdf                                \
+        " Flags: 0x%"UVXf" Whilem_c: %"IVdf" Lcp: %"IVdf" %s",       \
         (int)(depth)*2, "",                                          \
         (IV)((data)->pos_min),                                       \
         (IV)((data)->pos_delta),                                     \
-        (IV)((data)->flags),                                         \
+        (UV)((data)->flags),                                         \
         (IV)((data)->whilem_c),                                      \
-        (IV)((data)->last_closep ? *((data)->last_closep) : -1)      \
+        (IV)((data)->last_closep ? *((data)->last_closep) : -1),     \
+        is_inf ? "INF " : ""                                         \
     );                                                               \
     if ((data)->last_found)                                          \
         PerlIO_printf(Perl_debug_log,                                \
@@ -596,7 +597,7 @@ static void clear_re(pTHX_ void *r);
    floating substrings if needed. */
 
 STATIC void
-S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *minlenp)
+S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *minlenp, int is_inf)
 {
     const STRLEN l = CHR_SVLEN(data->last_found);
     const STRLEN old_l = CHR_SVLEN(*data->longest);
@@ -614,12 +615,12 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *min
            data->minlen_fixed=minlenp; 
            data->lookbehind_fixed=0;
        }
-       else {
+       else { /* *data->longest == data->longest_float */
            data->offset_float_min = l ? data->last_start_min : data->pos_min;
            data->offset_float_max = (l
                                      ? data->last_start_max
                                      : data->pos_min + data->pos_delta);
-           if ((U32)data->offset_float_max > (U32)I32_MAX)
+           if (is_inf || (U32)data->offset_float_max > (U32)I32_MAX)
                data->offset_float_max = I32_MAX;
            if (data->flags & SF_BEFORE_EOL)
                data->flags
@@ -641,7 +642,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *min
     }
     data->last_end = -1;
     data->flags &= ~SF_BEFORE_EOL;
-    DEBUG_STUDYDATA(data,0);
+    DEBUG_STUDYDATA("cl_anything: ",data,0);
 }
 
 /* Can match anything (initialization) */
@@ -2347,6 +2348,9 @@ typedef struct scan_frame {
     I32 stop; /* what stopparen do we use */
 } scan_frame;
 
+
+#define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf)
+
 STATIC I32
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         I32 *minlenp, I32 *deltap,
@@ -2392,7 +2396,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
   fake_study_recurse:
     while ( scan && OP(scan) != END && scan < last ){
        /* Peephole optimizer: */
-       DEBUG_STUDYDATA(data,depth);
+       DEBUG_STUDYDATA("Peep:", data,depth);
        DEBUG_PEEP("Peep",scan,depth);
         JOIN_EXACT(scan,&min,0);
 
@@ -2438,7 +2442,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                regnode * const startbranch=scan;
                
                if (flags & SCF_DO_SUBSTR)
-                   scan_commit(pRExC_state, data, minlenp); /* Cannot merge strings after this. */
+                   SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot merge strings after this. */
                if (flags & SCF_DO_STCLASS)
                    cl_init_zero(pRExC_state, &accum);
 
@@ -2760,7 +2764,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     Newx(newframe,1,scan_frame);
                 } else {
                     if (flags & SCF_DO_SUBSTR) {
-                        scan_commit(pRExC_state,data,minlenp);
+                        SCAN_COMMIT(pRExC_state,data,minlenp);
                         data->longest = &(data->longest_float);
                     }
                     is_inf = is_inf_internal = 1;
@@ -2862,7 +2866,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            /* Search for fixed substrings supports EXACT only. */
            if (flags & SCF_DO_SUBSTR) {
                assert(data);
-               scan_commit(pRExC_state, data, minlenp);
+               SCAN_COMMIT(pRExC_state, data, minlenp);
            }
            if (UTF) {
                const U8 * const s = (U8 *)STRING(scan);
@@ -2941,7 +2945,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                is_inf = is_inf_internal = 1;
                scan = regnext(scan);
                if (flags & SCF_DO_SUBSTR) {
-                   scan_commit(pRExC_state, data, minlenp); /* Cannot extend fixed substrings */
+                   SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot extend fixed substrings */
                    data->longest = &(data->longest_float);
                }
                goto optimize_curly_tail;
@@ -2964,7 +2968,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                next_is_eval = (OP(scan) == EVAL);
              do_curly:
                if (flags & SCF_DO_SUBSTR) {
-                   if (mincount == 0) scan_commit(pRExC_state,data,minlenp); /* Cannot extend fixed substrings */
+                   if (mincount == 0) SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot extend fixed substrings */
                    pos_before = data->pos_min;
                }
                if (data) {
@@ -3235,7 +3239,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    if (mincount != maxcount) {
                         /* Cannot extend fixed substrings found inside
                            the group.  */
-                       scan_commit(pRExC_state,data,minlenp);
+                       SCAN_COMMIT(pRExC_state,data,minlenp);
                        if (mincount && last_str) {
                            SV * const sv = data->last_found;
                            MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
@@ -3267,7 +3271,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                continue;
            default:                    /* REF and CLUMP only? */
                if (flags & SCF_DO_SUBSTR) {
-                   scan_commit(pRExC_state,data,minlenp);      /* Cannot expect anything... */
+                   SCAN_COMMIT(pRExC_state,data,minlenp);      /* Cannot expect anything... */
                    data->longest = &(data->longest_float);
                }
                is_inf = is_inf_internal = 1;
@@ -3281,7 +3285,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            int value = 0;
 
            if (flags & SCF_DO_SUBSTR) {
-               scan_commit(pRExC_state,data,minlenp);
+               SCAN_COMMIT(pRExC_state,data,minlenp);
                data->pos_min++;
            }
            min++;
@@ -3570,7 +3574,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     if ((flags & SCF_DO_SUBSTR) && data->last_found) {
                         f |= SCF_DO_SUBSTR;
                         if (scan->flags) 
-                            scan_commit(pRExC_state, &data_fake,minlenp);
+                            SCAN_COMMIT(pRExC_state, &data_fake,minlenp);
                         data_fake.last_found=newSVsv(data->last_found);
                     }
                 }
@@ -3621,7 +3625,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
                         if (RExC_rx->minlen<*minnextp)
                             RExC_rx->minlen=*minnextp;
-                        scan_commit(pRExC_state, &data_fake, minnextp);
+                        SCAN_COMMIT(pRExC_state, &data_fake, minnextp);
                         SvREFCNT_dec(data_fake.last_found);
                         
                         if ( data_fake.minlen_fixed != minlenp ) 
@@ -3667,7 +3671,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
        }
        else if ( PL_regkind[OP(scan)] == ENDLIKE ) {
            if (flags & SCF_DO_SUBSTR) {
-               scan_commit(pRExC_state,data,minlenp);
+               SCAN_COMMIT(pRExC_state,data,minlenp);
                flags &= ~SCF_DO_SUBSTR;
            }
            if (data && OP(scan)==ACCEPT) {
@@ -3679,7 +3683,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
        else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
        {
                if (flags & SCF_DO_SUBSTR) {
-                   scan_commit(pRExC_state,data,minlenp);
+                   SCAN_COMMIT(pRExC_state,data,minlenp);
                    data->longest = &(data->longest_float);
                }
                is_inf = is_inf_internal = 1;
@@ -3713,7 +3717,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             struct regnode_charclass_class accum;
 
             if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
-                scan_commit(pRExC_state, data,minlenp); /* Cannot merge strings after this. */
+                SCAN_COMMIT(pRExC_state, data,minlenp); /* Cannot merge strings after this. */
             if (flags & SCF_DO_STCLASS)
                 cl_init_zero(pRExC_state, &accum);
                 
@@ -3830,7 +3834,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            delta += (trie->maxlen - trie->minlen);
            flags &= ~SCF_DO_STCLASS; /* xxx */
             if (flags & SCF_DO_SUBSTR) {
-               scan_commit(pRExC_state,data,minlenp);  /* Cannot expect anything... */
+               SCAN_COMMIT(pRExC_state,data,minlenp);  /* Cannot expect anything... */
                data->pos_min += trie->minlen;
                data->pos_delta += (trie->maxlen - trie->minlen);
                if (trie->maxlen != trie->minlen)
@@ -3854,6 +3858,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
   finish:
     assert(!frame);
+    DEBUG_STUDYDATA("pre-fin:",data,depth);
 
     *scanp = scan;
     *deltap = is_inf_internal ? I32_MAX : delta;
@@ -3874,7 +3879,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
     if (flags & SCF_TRIE_RESTUDY)
         data->flags |=         SCF_TRIE_RESTUDY;
     
-    DEBUG_STUDYDATA(data,depth);
+    DEBUG_STUDYDATA("post-fin:",data,depth);
     
     return min < stopmin ? min : stopmin;
 }
@@ -4342,7 +4347,7 @@ reStudy:
             && !RExC_seen_zerolen
             && (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS)))
            r->extflags |= RXf_CHECK_ALL;
-       scan_commit(pRExC_state, &data,&minlen);
+       scan_commit(pRExC_state, &data,&minlen,0);
        SvREFCNT_dec(data.last_found);
 
         /* Note that code very similar to this but for anchored string 
index 2bc5da6..245f1b5 100755 (executable)
@@ -4121,6 +4121,39 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
     ok("foobarbarxyz" =~ qr/(foo${qr_barR1})xyz/);
     ok("foobarbarxyz" =~ qr/(foo(bar)\R1)xyz/);
 } 
+{
+    local $Message = "RT#41010";
+    my @tails=('','(?(1))','(|)','()?');    
+    my @quants=('*','+');
+    my $doit=sub {
+        my $pats= shift;
+        for (@_) {
+            for my $pat (@$pats) {
+                for my $quant (@quants) {
+                    for my $tail (@tails) {
+                        my $re = "($pat$quant\$)$tail";
+                        ok(/$re/ && $1 eq $_,"'$_'=~/$re/");
+                        ok(/$re/m && $1 eq $_,"'$_'=~/$re/m");
+                    }
+                }
+            }
+       }
+    };    
+    
+    my @dpats=( 
+                '\d',
+                '[1234567890]',
+                '(1|[23]|4|[56]|[78]|[90])',
+                '(?:1|[23]|4|[56]|[78]|[90])',
+                '(1|2|3|4|5|6|7|8|9|0)',
+                '(?:1|2|3|4|5|6|7|8|9|0)',
+             );
+    my @spats=('[ ]',' ','( |\t)','(?: |\t)','[ \t]','\s');
+    my @sstrs=('  ');
+    my @dstrs=('12345');
+    $doit->(\@spats,@sstrs);
+    $doit->(\@dpats,@dstrs);
+}
  
 # Test counter is at bottom of file. Put new tests above here.
 #-------------------------------------------------------------------
@@ -4168,7 +4201,7 @@ ok((q(a)x 100) =~ /^(??{'(.)'x 100})/,
 iseq(0+$::test,$::TestCount,"Got the right number of tests!");
 # Don't forget to update this!
 BEGIN {
-    $::TestCount = 1375; 
+    $::TestCount = 1567; 
     print "1..$::TestCount\n";
 }