EBCDIC: this seems to calm the last of the
Jarkko Hietaniemi [Sun, 24 Feb 2002 05:28:15 +0000 (05:28 +0000)]
Malformed UTF-8 warnings.

p4raw-id: //depot/perl@14850

regcomp.c
utf8.c

index b453116..854dc59 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3182,6 +3182,8 @@ tryagain:
                    if (len)
                        p = oldp;
                    else if (UTF) {
+                        STRLEN unilen;
+
                         if (FOLD) {
                              /* Emit all the Unicode characters. */
                              for (foldbuf = tmpbuf;
@@ -3189,9 +3191,11 @@ tryagain:
                                   foldlen -= numlen) {
                                   ender = utf8_to_uvchr(foldbuf, &numlen);
                                   if (numlen > 0) {
-                                       reguni(pRExC_state, ender, s, &numlen);
-                                       s       += numlen;
-                                       len     += numlen;
+                                       reguni(pRExC_state, ender, s, &unilen);
+                                       s       += unilen;
+                                       len     += unilen;
+                                       /* In EBCDIC the numlen
+                                        * and unilen can differ. */
                                        foldbuf += numlen;
                                        if (numlen >= foldlen)
                                             break;
@@ -3201,10 +3205,10 @@ tryagain:
                              }
                         }
                         else {
-                             reguni(pRExC_state, ender, s, &numlen);
+                             reguni(pRExC_state, ender, s, &unilen);
                              if (numlen > 0) {
-                                  s   += numlen;
-                                  len += numlen;
+                                  s   += unilen;
+                                  len += unilen;
                              }
                         }
                    }
@@ -3215,6 +3219,8 @@ tryagain:
                    break;
                }
                if (UTF) {
+                    STRLEN unilen;
+
                     if (FOLD) {
                          /* Emit all the Unicode characters. */
                          for (foldbuf = tmpbuf;
@@ -3222,9 +3228,11 @@ tryagain:
                               foldlen -= numlen) {
                               ender = utf8_to_uvchr(foldbuf, &numlen);
                               if (numlen > 0) {
-                                   reguni(pRExC_state, ender, s, &numlen);
-                                   len     += numlen;
-                                   s       += numlen;
+                                   reguni(pRExC_state, ender, s, &unilen);
+                                   len     += unilen;
+                                   s       += unilen;
+                                   /* In EBCDIC the numlen
+                                    * and unilen can differ. */
                                    foldbuf += numlen;
                                    if (numlen >= foldlen)
                                         break;
@@ -3234,10 +3242,10 @@ tryagain:
                          }
                     }
                     else {
-                         reguni(pRExC_state, ender, s, &numlen);
+                         reguni(pRExC_state, ender, s, &unilen);
                          if (numlen > 0) {
-                              s   += numlen;
-                              len += numlen;
+                              s   += unilen;
+                              len += unilen;
                          }
                     }
                     len--;
diff --git a/utf8.c b/utf8.c
index 6fc4acd..87b9088 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -952,33 +952,29 @@ Perl_is_uni_xdigit(pTHX_ UV c)
 UV
 Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp)
 {
-    U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
-    uvchr_to_utf8(tmpbuf, c);
-    return to_utf8_upper(tmpbuf, p, lenp);
+    uvchr_to_utf8(p, c);
+    return to_utf8_upper(p, p, lenp);
 }
 
 UV
 Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp)
 {
-    U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
-    uvchr_to_utf8(tmpbuf, c);
-    return to_utf8_title(tmpbuf, p, lenp);
+    uvchr_to_utf8(p, c);
+    return to_utf8_title(p, p, lenp);
 }
 
 UV
 Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
 {
-    U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
-    uvchr_to_utf8(tmpbuf, c);
-    return to_utf8_lower(tmpbuf, p, lenp);
+    uvchr_to_utf8(p, c);
+    return to_utf8_lower(p, p, lenp);
 }
 
 UV
 Perl_to_uni_fold(pTHX_ UV c, U8* p, STRLEN *lenp)
 {
-    U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
-    uvchr_to_utf8(tmpbuf, c);
-    return to_utf8_fold(tmpbuf, p, lenp);
+    uvchr_to_utf8(p, c);
+    return to_utf8_fold(p, p, lenp);
 }
 
 /* for now these all assume no locale info available for Unicode > 255 */