X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=709eef238f4b53e23f4bd37713476d63dd30d09d;hb=a365f2ce4defc0d7fecd4e9484f8f958454c9192;hp=374d480be7b3238fb7c424143fccbad0cd5edf82;hpb=32e6a07c84b153f78f946de50870bc0ee030624f;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/regexec.c b/regexec.c
index 374d480..709eef2 100644
--- a/regexec.c
+++ b/regexec.c
@@ -285,9 +285,8 @@ S_regcppop(pTHX_ const regexp *rex)
      * requiring null fields (pat.t#187 and split.t#{13,14}
      * (as of patchlevel 7877)  will fail.  Then again,
      * this code seems to be necessary or otherwise
-     * building DynaLoader will fail:
-     * "Error: '*' not in typemap in DynaLoader.xs, line 164"
-     * --jhi */
+     * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
+     * --jhi updated by dapm */
     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 	if (i > PL_regsize)
 	    PL_regoffs[i].start = -1;
@@ -308,7 +307,7 @@ S_regcppop(pTHX_ const regexp *rex)
  - pregexec - match a regexp against a string
  */
 I32
-Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
+Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
 /* strend: pointer to null at end of string */
 /* strbeg: real beginning of string */
@@ -372,8 +371,8 @@ Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *stren
    deleted from the finite automaton. */
 
 char *
-Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
-		     char *strend, U32 flags, re_scream_pos_data *data)
+Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
+		     char *strend, const U32 flags, re_scream_pos_data *data)
 {
     dVAR;
     register I32 start_shift = 0;
@@ -1482,8 +1481,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 		U8 **points; /* map of where we were in the input string
 		                when reading a given char. For ASCII this
 		                is unnecessary overhead as the relationship
-		                is always 1:1, but for unicode, especially
-		                case folded unicode this is not true. */
+		                is always 1:1, but for Unicode, especially
+		                case folded Unicode this is not true. */
 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
 		U8 *bitmap=NULL;
 
@@ -1705,7 +1704,7 @@ S_swap_match_buff (pTHX_ regexp *prog) {
  - regexec_flags - match a regexp against a string
  */
 I32
-Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
+Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
 /* strend: pointer to null at end of string */
 /* strbeg: real beginning of string */
@@ -1845,7 +1844,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
 		    if (regtry(&reginfo, &s))
 			goto got_it;
 		  after_try:
-		    if (s >= end)
+		    if (s > end)
 			goto phooey;
 		    if (prog->extflags & RXf_USE_INTUIT) {
 			s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
@@ -2267,13 +2266,12 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
      * Actually, the code in regcppop() (which Ilya may be meaning by
      * PL_reglastparen), is not needed at all by the test suite
-     * (op/regexp, op/pat, op/split), but that code is needed, oddly
-     * enough, for building DynaLoader, or otherwise this
-     * "Error: '*' not in typemap in DynaLoader.xs, line 164"
-     * will happen.  Meanwhile, this code *is* needed for the
+     * (op/regexp, op/pat, op/split), but that code is needed otherwise
+     * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
+     * Meanwhile, this code *is* needed for the
      * above-mentioned test suite tests to succeed.  The common theme
      * on those tests seems to be returning null fields from matches.
-     * --jhi */
+     * --jhi updated by dapm */
 #if 1
     if (prog->nparens) {
 	regexp_paren_pair *pp = PL_regoffs;
@@ -4221,12 +4219,6 @@ NULL
 
 	case BRANCH:	    /*  /(...|A|...)/ */
 	    scan = NEXTOPER(scan); /* scan now points to inner node */
-	    if ((!next || (OP(next) != BRANCH && OP(next) != BRANCHJ)) 
-	        && !has_cutgroup)
-	    {
-	    	/* last branch; skip state push and jump direct to node */
-		continue;
-            }
 	    ST.lastparen = *PL_reglastparen;
 	    ST.next_branch = next;
 	    REGCP_SET(ST.cp);
@@ -5006,29 +4998,22 @@ NULL
 #undef ST
         case FOLDCHAR:
             n = ARG(scan);
-            if (nextchr==n) {
-                locinput += UTF8SKIP(locinput);
-
-            } else {
-                /* This malarky is to handle LATIN SMALL LETTER SHARP S 
-                   properly. Sigh */
-                if (0xDF==n && (UTF||do_utf8) &&  
-                    toLOWER(locinput[0])=='s' && toLOWER(locinput[1])=='s') 
-                {
-                    locinput += 2;
-                } else if (do_utf8) {
-                    U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
-                    STRLEN tmplen1;
-                    U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
-                    STRLEN tmplen2;
-                    to_uni_fold(n, tmpbuf1, &tmplen1);
-                    to_utf8_fold(locinput, tmpbuf2, &tmplen2);    
-                    if (tmplen1!=tmplen2 || !strnEQ(tmpbuf1,tmpbuf2,tmplen1))
+            if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
+                locinput += ln;
+            } else if ( 0xDF == n && !do_utf8 && !UTF ) {
+                sayNO;
+            } else  {
+                U8 folded[UTF8_MAXBYTES_CASE+1];
+                STRLEN foldlen;
+                const char * const l = locinput;
+                char *e = PL_regeol;
+                to_uni_fold(n, folded, &foldlen);
+
+		if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
+                	       l, &e, 0,  do_utf8)) {
                         sayNO;
-                    else 
-                        locinput += UTF8SKIP(locinput);
-                } else 
-                    sayNO;
+                }
+                locinput = e;
             } 
             nextchr = UCHARAT(locinput);  
             break;