Attempt at portability.
[p5sagit/p5-mst-13.2.git] / regcomp.c
index bf1b42f..4bfef22 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -69,7 +69,7 @@
  *
  ****    Alterations to Henry's code are...
  ****
- ****    Copyright (c) 1991-2001, Larry Wall
+ ****    Copyright (c) 1991-2002, Larry Wall
  ****
  ****    You may distribute under the terms of either the GNU General Public
  ****    License or the Artistic License, as specified in the README file.
@@ -737,7 +737,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                }
            }
 
-           if (UTF && OP(scan) == EXACTF) {
+           if (UTF && OP(scan) == EXACTF && STR_LEN(scan) >= 6) {
 /*
   Two problematic code points in Unicode casefolding of EXACT nodes:
 
@@ -2168,6 +2168,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                /* FALL THROUGH*/
            case '?':           /* (??...) */
                logical = 1;
+               if (*RExC_parse != '{')
+                   goto unknown;
                paren = *RExC_parse++;
                /* FALL THROUGH */
            case '{':           /* (?{...}) */
@@ -3180,22 +3182,34 @@ tryagain:
                    if (len)
                        p = oldp;
                    else if (UTF) {
+                        STRLEN unilen;
+
                         if (FOLD) {
                              /* Emit all the Unicode characters. */
                              for (foldbuf = tmpbuf;
                                   foldlen;
                                   foldlen -= numlen) {
                                   ender = utf8_to_uvchr(foldbuf, &numlen);
-                                  reguni(pRExC_state, ender, s, &numlen);
-                                  s       += numlen;
-                                  len     += numlen;
-                                  foldbuf += numlen;
+                                  if (numlen > 0) {
+                                       reguni(pRExC_state, ender, s, &unilen);
+                                       s       += unilen;
+                                       len     += unilen;
+                                       /* In EBCDIC the numlen
+                                        * and unilen can differ. */
+                                       foldbuf += numlen;
+                                       if (numlen >= foldlen)
+                                            break;
+                                  }
+                                  else
+                                       break; /* "Can't happen." */
                              }
                         }
                         else {
-                             reguni(pRExC_state, ender, s, &numlen);
-                             s   += numlen;
-                             len += numlen;
+                             reguni(pRExC_state, ender, s, &unilen);
+                             if (unilen > 0) {
+                                  s   += unilen;
+                                  len += unilen;
+                             }
                         }
                    }
                    else {
@@ -3205,22 +3219,34 @@ tryagain:
                    break;
                }
                if (UTF) {
+                    STRLEN unilen;
+
                     if (FOLD) {
                          /* Emit all the Unicode characters. */
                          for (foldbuf = tmpbuf;
                               foldlen;
                               foldlen -= numlen) {
                               ender = utf8_to_uvchr(foldbuf, &numlen);
-                              reguni(pRExC_state, ender, s, &numlen);
-                              s       += numlen;
-                              len     += numlen;
-                              foldbuf += numlen;
+                              if (numlen > 0) {
+                                   reguni(pRExC_state, ender, s, &unilen);
+                                   len     += unilen;
+                                   s       += unilen;
+                                   /* In EBCDIC the numlen
+                                    * and unilen can differ. */
+                                   foldbuf += numlen;
+                                   if (numlen >= foldlen)
+                                        break;
+                              }
+                              else
+                                   break;
                          }
                     }
                     else {
-                         reguni(pRExC_state, ender, s, &numlen);
-                         s   += numlen;
-                         len += numlen;
+                         reguni(pRExC_state, ender, s, &unilen);
+                         if (unilen > 0) {
+                              s   += unilen;
+                              len += unilen;
+                         }
                     }
                     len--;
                }
@@ -3432,8 +3458,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 STATIC void
 S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
 {
-    if (!SIZE_ONLY && ckWARN(WARN_REGEXP) &&
-       POSIXCC(UCHARAT(RExC_parse))) {
+    if (!SIZE_ONLY && POSIXCC(UCHARAT(RExC_parse))) {
        char *s = RExC_parse;
        char  c = *s++;
 
@@ -3498,7 +3523,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 
     nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
 
-    if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && POSIXCC(nextvalue))
+    if (!SIZE_ONLY && POSIXCC(nextvalue))
        checkposixcc(pRExC_state);
 
     /* allow 1st char to be ] (allowing it to be - is dealt with later) */
@@ -4052,22 +4077,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                          ANYOF_BITMAP_SET(ret, i);
          }
          if (value > 255 || UTF) {
+               UV prevnatvalue  = NATIVE_TO_UNI(prevvalue);
+               UV natvalue      = NATIVE_TO_UNI(value);
+
                ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
-               if (prevvalue < value)
+               if (prevnatvalue < natvalue) { /* what about > ? */
                    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
-                                  (UV)prevvalue, (UV)value);
-               else if (prevvalue == value) {
-                   Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
-                                  (UV)value);
+                                  prevnatvalue, natvalue);
+               }
+               else if (prevnatvalue == natvalue) {
+                   Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", natvalue);
                    if (FOLD) {
-                        U8 tmpbuf [UTF8_MAXLEN+1];
                         U8 foldbuf[UTF8_MAXLEN_FOLD+1];
                         STRLEN foldlen;
-                        UV f;
-
-                        uvchr_to_utf8(tmpbuf, value);
-                        to_utf8_fold(tmpbuf, foldbuf, &foldlen);
-                        f = utf8_to_uvchr(foldbuf, 0);
+                        UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
 
                         /* If folding and foldable and a single
                          * character, insert also the folded version