Re: [PATCH] More regex optimisations and debug enhancements (including Andys stuff...
[p5sagit/p5-mst-13.2.git] / pp_pack.c
index 5deede9..fe41b3e 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -702,34 +702,27 @@ next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
     return TRUE;
 }
 
-STATIC void
-bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
-    U8 buffer[UTF8_MAXLEN];
+STATIC char *
+S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
     const U8 * const end = start + len;
-    char *d = *dest;
+
     while (start < end) {
-        const int length =
-           uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
-       switch(length) {
-         case 1:
-           *d++ = buffer[0];
-           break;
-         case 2:
-           *d++ = buffer[0];
-           *d++ = buffer[1];
-           break;
-         default:
-           Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
-                      *start, length);
+       const UV uv = NATIVE_TO_ASCII(*start);
+       if (UNI_IS_INVARIANT(uv))
+           *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
+       else {
+           *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
+           *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
        }
        start++;
     }
-    *dest = d;
+    return dest;
 }
 
 #define PUSH_BYTES(utf8, cur, buf, len)                                \
 STMT_START {                                                   \
-    if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur));     \
+    if (utf8)                                                  \
+       (cur) = bytes_to_uni((U8 *) buf, len, (cur));           \
     else {                                                     \
        Copy(buf, cur, len, char);                              \
        (cur) += (len);                                         \
@@ -764,7 +757,7 @@ STMT_START {                                        \
 STMT_START {                                   \
     if (utf8) {                                        \
        const U8 au8 = (byte);                  \
-       bytes_to_uni(aTHX_ &au8, 1, &(s));      \
+       (s) = bytes_to_uni(&au8, 1, (s));       \
     } else *(U8 *)(s)++ = (byte);              \
 } STMT_END
 
@@ -3052,7 +3045,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                                len+(endb-buffer)*UTF8_EXPAND);
                        end = start+SvLEN(cat);
                    }
-                   bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
+                   cur = bytes_to_uni(buffer, endb-buffer, cur);
                } else {
                    if (cur >= end) {
                        *cur = '\0';