Patch for Perlbug #4253
[p5sagit/p5-mst-13.2.git] / pp_pack.c
index 815c326..edbeb5b 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
 #define PERL_IN_PP_PACK_C
 #include "perl.h"
 
+#if PERL_VERSION >= 9
+#define PERL_PACK_CAN_BYTEORDER
+#define PERL_PACK_CAN_SHRIEKSIGN
+#endif
+
 /*
  * Offset for integer pack/unpack.
  *
 
 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
 #  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
-#    define OFF16(p)   (char*)(p)
-#    define OFF32(p)   (char*)(p)
+#    define OFF16(p)   ((char*)(p))
+#    define OFF32(p)   ((char*)(p))
 #  else
 #    if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
 #      define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
 #      define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
 #    else
-       }}}} bad cray byte order
+       ++++ bad cray byte order
 #    endif
 #  endif
-#  define COPY16(s,p)  (*(p) = 0, Copy(s, OFF16(p), SIZE16, char))
-#  define COPY32(s,p)  (*(p) = 0, Copy(s, OFF32(p), SIZE32, char))
-#  define COPYNN(s,p,n) (*(p) = 0, Copy(s, (char *)(p), n, char))
-#  define CAT16(sv,p)  sv_catpvn(sv, OFF16(p), SIZE16)
-#  define CAT32(sv,p)  sv_catpvn(sv, OFF32(p), SIZE32)
 #else
-#  define COPY16(s,p)  Copy(s, p, SIZE16, char)
-#  define COPY32(s,p)  Copy(s, p, SIZE32, char)
-#  define COPYNN(s,p,n) Copy(s, (char *)(p), n, char)
-#  define CAT16(sv,p)  sv_catpvn(sv, (char*)(p), SIZE16)
-#  define CAT32(sv,p)  sv_catpvn(sv, (char*)(p), SIZE32)
+#  define OFF16(p)     ((char *) (p))
+#  define OFF32(p)     ((char *) (p))
 #endif
 
+#define COPY16(s,p)  Copy(s, OFF16(p), SIZE16, char)
+#define COPY32(s,p)  Copy(s, OFF32(p), SIZE32, char)
+#define CAT16(sv,p)  sv_catpvn(sv, OFF16(p), SIZE16)
+#define CAT32(sv,p)  sv_catpvn(sv, OFF32(p), SIZE32)
+
+/* Only to be used inside a loop (see the break) */
+#define COPYVAR(s,strend,utf8,var,format)              \
+STMT_START {                                           \
+    if (utf8) {                                                \
+        if (!next_uni_bytes(aTHX_ &s, strend,          \
+            (char *) &var, sizeof(var))) break;                \
+    } else {                                           \
+        Copy(s, (char *) &var, sizeof(var), char);     \
+        s += sizeof(var);                              \
+    }                                                  \
+    DO_BO_UNPACK(var, format);                         \
+} STMT_END
+
 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
 #define MAX_SUB_TEMPLATE_LEVEL 100
 
 /* flags (note that type modifiers can also be used as flags!) */
+#define FLAG_UNPACK_WAS_UTF8    0x40   /* original had FLAG_UNPACK_DO_UTF8 */
+#define FLAG_UNPACK_PARSE_UTF8  0x20   /* Parse as utf8 */
 #define FLAG_UNPACK_ONLY_ONE  0x10
-#define FLAG_UNPACK_DO_UTF8   0x08
+#define FLAG_UNPACK_DO_UTF8     0x08   /* The underlying string is utf8 */
 #define FLAG_SLASH            0x04
 #define FLAG_COMMA            0x02
 #define FLAG_PACK             0x01
@@ -135,14 +152,39 @@ S_mul128(pTHX_ SV *sv, U8 m)
 #define TYPE_IS_BIG_ENDIAN     0x200
 #define TYPE_IS_LITTLE_ENDIAN  0x400
 #define TYPE_ENDIANNESS_MASK   (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
-#define TYPE_ENDIANNESS(t)     ((t) & TYPE_ENDIANNESS_MASK)
-#define TYPE_NO_ENDIANNESS(t)  ((t) & ~TYPE_ENDIANNESS_MASK)
 #define TYPE_MODIFIERS(t)      ((t) & ~0xFF)
 #define TYPE_NO_MODIFIERS(t)   ((t) & 0xFF)
 
-#define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
+#else
+#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
+#endif
+
+#ifndef PERL_PACK_CAN_BYTEORDER
+/* Put "can't" first because it is shorter  */
+# define TYPE_ENDIANNESS(t)    0
+# define TYPE_NO_ENDIANNESS(t) (t)
+
+# define ENDIANNESS_ALLOWED_TYPES   ""
+
+# define DO_BO_UNPACK(var, type)
+# define DO_BO_PACK(var, type)
+# define DO_BO_UNPACK_PTR(var, type, pre_cast)
+# define DO_BO_PACK_PTR(var, type, pre_cast)
+# define DO_BO_UNPACK_N(var, type)
+# define DO_BO_PACK_N(var, type)
+# define DO_BO_UNPACK_P(var)
+# define DO_BO_PACK_P(var)
+
+#else
+
+# define TYPE_ENDIANNESS(t)    ((t) & TYPE_ENDIANNESS_MASK)
+# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
+
+# define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
 
-#define DO_BO_UNPACK(var, type)                                               \
+# define DO_BO_UNPACK(var, type)                                              \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:    var = my_betoh ## type (var); break;  \
@@ -151,7 +193,7 @@ S_mul128(pTHX_ SV *sv, U8 m)
           }                                                                   \
         } STMT_END
 
-#define DO_BO_PACK(var, type)                                                 \
+# define DO_BO_PACK(var, type)                                                \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:    var = my_htobe ## type (var); break;  \
@@ -160,7 +202,7 @@ S_mul128(pTHX_ SV *sv, U8 m)
           }                                                                   \
         } STMT_END
 
-#define DO_BO_UNPACK_PTR(var, type, pre_cast)                                 \
+# define DO_BO_UNPACK_PTR(var, type, pre_cast)                                \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:                                          \
@@ -174,7 +216,7 @@ S_mul128(pTHX_ SV *sv, U8 m)
           }                                                                   \
         } STMT_END
 
-#define DO_BO_PACK_PTR(var, type, pre_cast)                                   \
+# define DO_BO_PACK_PTR(var, type, pre_cast)                                  \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:                                          \
@@ -188,7 +230,7 @@ S_mul128(pTHX_ SV *sv, U8 m)
           }                                                                   \
         } STMT_END
 
-#define BO_CANT_DOIT(action, type)                                            \
+# define BO_CANT_DOIT(action, type)                                           \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
              case TYPE_IS_BIG_ENDIAN:                                         \
@@ -204,20 +246,20 @@ S_mul128(pTHX_ SV *sv, U8 m)
            }                                                                  \
          } STMT_END
 
-#if PTRSIZE == INTSIZE
-# define DO_BO_UNPACK_P(var)   DO_BO_UNPACK_PTR(var, i, int)
-# define DO_BO_PACK_P(var)     DO_BO_PACK_PTR(var, i, int)
-#elif PTRSIZE == LONGSIZE
-# define DO_BO_UNPACK_P(var)   DO_BO_UNPACK_PTR(var, l, long)
-# define DO_BO_PACK_P(var)     DO_BO_PACK_PTR(var, l, long)
-#else
-# define DO_BO_UNPACK_P(var)   BO_CANT_DOIT(unpack, pointer)
-# define DO_BO_PACK_P(var)     BO_CANT_DOIT(pack, pointer)
-#endif
+# if PTRSIZE == INTSIZE
+#  define DO_BO_UNPACK_P(var)  DO_BO_UNPACK_PTR(var, i, int)
+#  define DO_BO_PACK_P(var)    DO_BO_PACK_PTR(var, i, int)
+# elif PTRSIZE == LONGSIZE
+#  define DO_BO_UNPACK_P(var)  DO_BO_UNPACK_PTR(var, l, long)
+#  define DO_BO_PACK_P(var)    DO_BO_PACK_PTR(var, l, long)
+# else
+#  define DO_BO_UNPACK_P(var)  BO_CANT_DOIT(unpack, pointer)
+#  define DO_BO_PACK_P(var)    BO_CANT_DOIT(pack, pointer)
+# endif
 
-#if defined(my_htolen) && defined(my_letohn) && \
+# if defined(my_htolen) && defined(my_letohn) && \
     defined(my_htoben) && defined(my_betohn)
-# define DO_BO_UNPACK_N(var, type)                                            \
+#  define DO_BO_UNPACK_N(var, type)                                           \
          STMT_START {                                                         \
            switch (TYPE_ENDIANNESS(datumtype)) {                              \
              case TYPE_IS_BIG_ENDIAN:    my_betohn(&var, sizeof(type)); break;\
@@ -226,7 +268,7 @@ S_mul128(pTHX_ SV *sv, U8 m)
            }                                                                  \
          } STMT_END
 
-# define DO_BO_PACK_N(var, type)                                              \
+#  define DO_BO_PACK_N(var, type)                                             \
          STMT_START {                                                         \
            switch (TYPE_ENDIANNESS(datumtype)) {                              \
              case TYPE_IS_BIG_ENDIAN:    my_htoben(&var, sizeof(type)); break;\
@@ -234,18 +276,20 @@ S_mul128(pTHX_ SV *sv, U8 m)
              default: break;                                                  \
            }                                                                  \
          } STMT_END
-#else
-# define DO_BO_UNPACK_N(var, type)     BO_CANT_DOIT(unpack, type)
-# define DO_BO_PACK_N(var, type)       BO_CANT_DOIT(pack, type)
+# else
+#  define DO_BO_UNPACK_N(var, type)    BO_CANT_DOIT(unpack, type)
+#  define DO_BO_PACK_N(var, type)      BO_CANT_DOIT(pack, type)
+# endif
+
 #endif
 
 #define PACK_SIZE_CANNOT_CSUM          0x80
-#define PACK_SIZE_CANNOT_ONLY_ONE      0x40
+#define PACK_SIZE_SPARE                        0x40
 #define PACK_SIZE_MASK                 0x3F
 
 
 struct packsize_t {
-    const char *array;
+    const unsigned char *array;
     int first;
     int size;
 };
@@ -284,7 +328,8 @@ unsigned char size_normal[53] = {
   0,
   /* U */ sizeof(char),
   /* V */ SIZE32,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  /* W */ sizeof(unsigned char),
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   /* c */ sizeof(char),
   /* d */ sizeof(double),
   0,
@@ -297,7 +342,7 @@ unsigned char size_normal[53] = {
   0,
   /* n */ SIZE16,
   0,
-  /* p */ sizeof(char *) | PACK_SIZE_CANNOT_ONLY_ONE | PACK_SIZE_CANNOT_CSUM,
+  /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
 #if defined(HAS_QUAD)
   /* q */ sizeof(Quad_t),
 #else
@@ -307,28 +352,44 @@ unsigned char size_normal[53] = {
   /* s */ SIZE16,
   0, 0,
   /* v */ SIZE16,
-  /* w */ sizeof(char) | PACK_SIZE_CANNOT_CSUM
+  /* w */ sizeof(char) | PACK_SIZE_CANNOT_CSUM,
 };
 unsigned char size_shrieking[46] = {
   /* I */ sizeof(unsigned int),
   0, 0,
   /* L */ sizeof(unsigned long),
   0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
   /* N */ SIZE32,
+#else
+  0,
+#endif
   0, 0, 0, 0,
   /* S */ sizeof(unsigned short),
   0, 0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
   /* V */ SIZE32,
+#else
+  0,
+#endif
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   /* i */ sizeof(int),
   0, 0,
   /* l */ sizeof(long),
   0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
   /* n */ SIZE16,
+#else
+  0,
+#endif
   0, 0, 0, 0,
   /* s */ sizeof(short),
   0, 0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
   /* v */ SIZE16
+#else
+  0
+#endif
 };
 struct packsize_t packsize[2] = {
   {size_normal, 67, 53},
@@ -336,7 +397,7 @@ struct packsize_t packsize[2] = {
 };
 #else
 /* EBCDIC (or bust) */
-unsigned char size_normal[99] = {
+unsigned char size_normal[100] = {
   /* c */ sizeof(char),
   /* d */ sizeof(double),
   0,
@@ -350,7 +411,7 @@ unsigned char size_normal[99] = {
   0,
   /* n */ SIZE16,
   0,
-  /* p */ sizeof(char *) | PACK_SIZE_CANNOT_ONLY_ONE | PACK_SIZE_CANNOT_CSUM,
+  /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
 #if defined(HAS_QUAD)
   /* q */ sizeof(Quad_t),
 #else
@@ -389,36 +450,138 @@ unsigned char size_normal[99] = {
   /* S */ SIZE16,
   0,
   /* U */ sizeof(char),
-  /* V */ SIZE32
+  /* V */ SIZE32,
+  /* W */ sizeof(unsigned char),
 };
 unsigned char size_shrieking[93] = {
   /* i */ sizeof(int),
   0, 0, 0, 0, 0, 0, 0, 0, 0,
   /* l */ sizeof(long),
   0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
   /* n */ SIZE16,
+#else
+  0,
+#endif
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   /* s */ sizeof(short),
   0, 0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
   /* v */ SIZE16,
+#else
+  0,
+#endif
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0,
   /* I */ sizeof(unsigned int),
   0, 0, 0, 0, 0, 0, 0, 0, 0,
   /* L */ sizeof(unsigned long),
   0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
   /* N */ SIZE32,
+#else
+  0,
+#endif
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   /* S */ sizeof(unsigned short),
   0, 0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
   /* V */ SIZE32
+#else
+  0
+#endif
 };
 struct packsize_t packsize[2] = {
-  {size_normal, 131, 99},
+  {size_normal, 131, 100},
   {size_shrieking, 137, 93}
 };
 #endif
 
+STATIC U8
+next_uni_byte(pTHX_ char **s, const char *end, I32 datumtype)
+{
+    UV val;
+    STRLEN retlen;
+    val =
+       UNI_TO_NATIVE(utf8n_to_uvuni(*s, end-*s, &retlen,
+                                    ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY));
+    /* We try to process malformed UTF-8 as much as possible (preferrably with
+       warnings), but these two mean we make no progress in the string and
+       might enter an infinite loop */
+    if (retlen == (STRLEN) -1 || retlen == 0)
+       Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+    if (val >= 0x100) {
+       Perl_warner(aTHX_ packWARN(WARN_UNPACK),
+                   "Character in '%c' format wrapped in unpack",
+                   (int) datumtype);
+       val &= 0xff;
+    }
+    *s += retlen;
+    return val;
+}
+
+#define NEXT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
+       next_uni_byte(aTHX_ &(s), (strend), (datumtype)) : \
+       *(U8 *)(s)++)
+
+STATIC bool
+next_uni_bytes(pTHX_ char **s, char *end, char *buf, int buf_len)
+{
+    UV val;
+    STRLEN retlen;
+    char *from = *s;
+    int bad = 0;
+    U32 flags = ckWARN(WARN_UTF8) ?
+       UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
+    for (;buf_len > 0; buf_len--) {
+       if (from >= end) return FALSE;
+       val = UNI_TO_NATIVE(utf8n_to_uvuni(from, end-from, &retlen, flags));
+       if (retlen == (STRLEN) -1 || retlen == 0) {
+           from += UTF8SKIP(from);
+           bad |= 1;
+       } else from += retlen;
+       if (val >= 0x100) {
+           bad |= 2;
+           val &= 0xff;
+       }
+       *(U8 *)buf++ = val;
+    }
+    /* We have enough characters for the buffer. Did we have problems ? */
+    if (bad) {
+       if (bad & 1) {
+           /* Rewalk the string fragment while warning */
+           char *ptr;
+           flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+           for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
+               if (ptr >= end) break;
+               utf8n_to_uvuni(ptr, end-ptr, &retlen, flags);
+           }
+           if (from > end) from = end;
+       }
+       if ((bad & 2) && ckWARN(WARN_UNPACK))
+           Perl_warner(aTHX_ packWARN(WARN_UNPACK),
+                       "Character(s) wrapped in unpack");
+    }
+    *s = from;
+    return TRUE;
+}
+
+STATIC bool
+next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
+{
+    UV val;
+    STRLEN retlen;
+    char *from = *s;
+    val = UNI_TO_NATIVE(utf8n_to_uvuni(*s, end-*s, &retlen, UTF8_CHECK_ONLY));
+    if (val >= 0x100 || !ISUUCHAR(val) ||
+       retlen == (STRLEN) -1 || retlen == 0) {
+       *out = 0;
+       return FALSE;
+    }
+    *out = PL_uudmap[val] & 077;
+    *s = from;
+    return TRUE;
+}
 
 /* Returns the sizeof() struct described by pat */
 STATIC I32
@@ -509,8 +672,6 @@ S_measure_struct(pTHX_ register tempsym_t* symptr)
            case 'A':
            case 'Z':
            case 'a':
-           case 'c':
-           case 'C':
                size = 1;
                break;
            case 'B':
@@ -641,8 +802,9 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
         switch (*patptr) {
           case '!':
             modifier = TYPE_IS_SHRIEKING;
-            allowed = "sSiIlLxXnNvV";
+            allowed = SHRIEKING_ALLOWED_TYPES;
             break;
+#ifdef PERL_PACK_CAN_BYTEORDER
           case '>':
             modifier = TYPE_IS_BIG_ENDIAN;
             allowed = ENDIANNESS_ALLOWED_TYPES;
@@ -651,6 +813,7 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
             modifier = TYPE_IS_LITTLE_ENDIAN;
             allowed = ENDIANNESS_ALLOWED_TYPES;
             break;
+#endif
           default:
             break;
         }
@@ -756,6 +919,44 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
 }
 
 /*
+   There is no way to cleanly handle the case where we should process the 
+   string per byte in its upgraded form while it's really in downgraded form
+   (e.g. estimates like strend-s as an upper bound for the number of 
+   characters left wouldn't work). So if we foresee the need of this 
+   (pattern starts with U or contains U0), we want to work on the encoded 
+   version of the string. Users are advised to upgrade their pack string 
+   themselves if they need to do a lot of unpacks like this on it
+*/
+STATIC bool 
+need_utf8(const char *pat, const char *patend)
+{
+    bool first = TRUE;
+    while (pat < patend) {
+       if (pat[0] == '#') {
+           pat++;
+           pat = memchr(pat, '\n', patend-pat);
+           if (!pat) return FALSE;
+       } else if (pat[0] == 'U') {
+           if (first || pat[1] == '0') return TRUE;
+       } else first = FALSE;
+       pat++;
+    }
+    return FALSE;
+}
+
+STATIC char
+first_symbol(const char *pat, const char *patend) {
+    while (pat < patend) {
+       if (pat[0] != '#') return pat[0];
+       pat++;
+       pat = memchr(pat, '\n', patend-pat);
+       if (!pat) return 0;
+       pat++;
+    }
+    return 0;
+}
+
+/*
 =for apidoc unpack_str
 
 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
@@ -767,6 +968,21 @@ I32
 Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
 {
     tempsym_t sym = { 0 };
+
+    if (flags & FLAG_UNPACK_DO_UTF8) flags |= FLAG_UNPACK_WAS_UTF8;
+    else if (need_utf8(pat, patend)) {
+       /* We probably should try to avoid this in case a scalar context call
+          wouldn't get to the "U0" */
+       STRLEN len = strend - s;
+       s = bytes_to_utf8(s, &len);
+       SAVEFREEPV(s);
+       strend = s + len;
+       flags |= FLAG_UNPACK_DO_UTF8;
+    }
+
+    if (first_symbol(pat, patend) != 'U' && (flags & FLAG_UNPACK_DO_UTF8))
+       flags |= FLAG_UNPACK_PARSE_UTF8;
+
     sym.patptr = pat;
     sym.patend = patend;
     sym.flags  = flags;
@@ -787,6 +1003,21 @@ I32
 Perl_unpackstring(pTHX_ char *pat, register char *patend, register char *s, char *strend, U32 flags)
 {
     tempsym_t sym = { 0 };
+
+    if (flags & FLAG_UNPACK_DO_UTF8) flags |= FLAG_UNPACK_WAS_UTF8;
+    else if (need_utf8(pat, patend)) {
+       /* We probably should try to avoid this in case a scalar context call
+          wouldn't get to the "U0" */
+       STRLEN len = strend - s;
+       s = bytes_to_utf8(s, &len);
+       SAVEFREEPV(s);
+       strend = s + len;
+       flags |= FLAG_UNPACK_DO_UTF8;
+    }
+
+    if (first_symbol(pat, patend) != 'U' && (flags & FLAG_UNPACK_DO_UTF8))
+       flags |= FLAG_UNPACK_PARSE_UTF8;
+
     sym.patptr = pat;
     sym.patend = patend;
     sym.flags  = flags;
@@ -796,58 +1027,29 @@ Perl_unpackstring(pTHX_ char *pat, register char *patend, register char *s, char
 
 STATIC
 I32
-S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, char *strend, char **new_s )
+S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
 {
     dSP;
-    I32 datumtype;
-    register I32 len = 0;
-    register I32 bits = 0;
-    register char *str;
+    I32 datumtype, ai32;
+    I32 len = 0;
     SV *sv;
     I32 start_sp_offset = SP - PL_stack_base;
     howlen_t howlen;
 
-    /* These must not be in registers: */
-    I16 ai16;
-    U16 au16;
-    I32 ai32;
-    U32 au32;
-#ifdef HAS_QUAD
-    Quad_t aquad;
-    Uquad_t auquad;
-#endif
-#if SHORTSIZE != SIZE16
-    short ashort;
-    unsigned short aushort;
-#endif
-    int aint;
-    unsigned int auint;
-    long along;
-#if LONGSIZE != SIZE32
-    unsigned long aulong;
-#endif
-    char *aptr;
-    float afloat;
-    double adouble;
-#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
-    long double aldouble;
-#endif
-    IV aiv;
-    UV auv;
-    NV anv;
-
     I32 checksum = 0;
     UV cuv = 0;
     NV cdouble = 0.0;
     const int bits_in_uv = 8 * sizeof(cuv);
     char* strrelbeg = s;
     bool beyond = FALSE;
+    bool explicit_length;
     bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
+    bool utf8 = (symptr->flags & FLAG_UNPACK_PARSE_UTF8) ? 1 : 0;
 
     while (next_symbol(symptr)) {
         datumtype = symptr->code;
        /* do first one only unless in list context
-          / is implemented by unpacking the count, then poping it from the
+          / is implemented by unpacking the count, then popping it from the
           stack, so must check that we're not in the middle of a /  */
         if ( unpack_only_one
             && (SP - PL_stack_base == start_sp_offset + 1)
@@ -864,12 +1066,14 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
         }
 
+        explicit_length = TRUE;
       redo_switch:
         beyond = s >= strend;
        {
            int which = (symptr->code & TYPE_IS_SHRIEKING)
                ? PACK_SIZE_SHRIEKING : PACK_SIZE_NORMAL;
-           int offset = TYPE_NO_MODIFIERS(datumtype) - packsize[which].first;
+           const int rawtype = TYPE_NO_MODIFIERS(datumtype);
+           int offset = rawtype - packsize[which].first;
 
            if (offset >= 0 && offset < packsize[which].size) {
                /* Data about this template letter  */
@@ -883,9 +1087,7 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                        len = howmany;
 
                    if (!checksum || (data & PACK_SIZE_CANNOT_CSUM)) {
-                       if (len && unpack_only_one &&
-                           !(data & PACK_SIZE_CANNOT_ONLY_ONE))
-                           len = 1;
+                       if (len && unpack_only_one) len = 1;
                        EXTEND(SP, len);
                        EXTEND_MORTAL(len);
                    }
@@ -906,7 +1108,6 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
        case '(':
        {
-           char *ss = s;               /* Move from register */
             tempsym_t savsym = *symptr;
            U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
            symptr->flags |= group_modifiers;
@@ -915,45 +1116,94 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            PUTBACK;
            while (len--) {
                symptr->patptr = savsym.grpbeg;
-               unpack_rec(symptr, ss, strbeg, strend, &ss );
-                if (ss == strend && savsym.howlen == e_star)
+               if (utf8) symptr->flags |=  FLAG_UNPACK_PARSE_UTF8;
+               else      symptr->flags &= ~FLAG_UNPACK_PARSE_UTF8;
+               unpack_rec(symptr, s, strbeg, strend, &s);
+                if (s == strend && savsym.howlen == e_star)
                    break; /* No way to continue */
            }
            SPAGAIN;
-           s = ss;
            symptr->flags &= ~group_modifiers;
             savsym.flags = symptr->flags;
             *symptr = savsym;
            break;
        }
        case '@':
+           if (utf8) {
+               s = strrelbeg;
+               while (len > 0) {
+                   if (s >= strend)
+                       Perl_croak(aTHX_ "'@' outside of string in unpack");
+                   s += UTF8SKIP(s);
+                   len--;
+               }
+               if (s > strend)
+                   Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
+           } else {
            if (len > strend - strrelbeg)
                Perl_croak(aTHX_ "'@' outside of string in unpack");
            s = strrelbeg + len;
+           }
            break;
        case 'X' | TYPE_IS_SHRIEKING:
            if (!len)                   /* Avoid division by 0 */
                len = 1;
-           len = (s - strbeg) % len;
+           if (utf8) {
+               char *hop, *last;
+               I32 l;
+               for (l=len, hop = strbeg; hop < s; l++, hop += UTF8SKIP(hop))
+                   if (l == len) {
+                       last = hop;
+                       l = 0;
+                   }
+               s = last;
+               break;
+           } else len = (s - strbeg) % len;
            /* FALL THROUGH */
        case 'X':
+           if (utf8) {
+               while (len > 0) {
+                   if (s <= strbeg)
+                       Perl_croak(aTHX_ "'X' outside of string in unpack");
+                   while (UTF8_IS_CONTINUATION(*--s)) {
+                       if (s <= strbeg)
+                           Perl_croak(aTHX_ "'X' outside of string in unpack");
+                   }
+                   len--;
+               }
+           } else {
            if (len > s - strbeg)
                Perl_croak(aTHX_ "'X' outside of string in unpack" );
            s -= len;
+           }
            break;
        case 'x' | TYPE_IS_SHRIEKING:
            if (!len)                   /* Avoid division by 0 */
                len = 1;
-           aint = (s - strbeg) % len;
-           if (aint)                   /* Other portable ways? */
-               len = len - aint;
-           else
-               len = 0;
+           if (utf8) {
+               char *hop = strbeg;
+               I32 l = 0;
+               for (hop = strbeg; hop < s; hop += UTF8SKIP(hop)) l++;
+               if (s != hop)
+                   Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+               ai32 = l % len;
+           } else ai32 = (s - strbeg) % len;
+           if (ai32 == 0) break;
+           len -= ai32;
            /* FALL THROUGH */
        case 'x':
+           if (utf8) {
+               while (len>0) {
+                   if (s >= strend)
+                       Perl_croak(aTHX_ "'x' outside of string in unpack");
+                   s += UTF8SKIP(s);
+                   len--;
+               }
+           } else {
            if (len > strend - s)
                Perl_croak(aTHX_ "'x' outside of string in unpack");
            s += len;
+           };
            break;
        case '/':
            Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
@@ -961,38 +1211,60 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'A':
        case 'Z':
        case 'a':
-           if (len > strend - s)
-               len = strend - s;
-           if (checksum)
-               goto uchar_checksum;
-           sv = newSVpvn(s, len);
-           if (len > 0 && (datumtype == 'A' || datumtype == 'Z')) {
-               aptr = s;       /* borrow register */
-               if (datumtype == 'Z') { /* 'Z' strips stuff after first null */
-                   s = SvPVX(sv);
-                   while (*s)
-                       s++;
-                   if (howlen == e_star) /* exact for 'Z*' */
-                       len = s - SvPVX(sv) + 1;
+           if (checksum) {
+               /* Preliminary length estimate is assumed done in 'W' */
+               if (len > strend - s) len = strend - s;
+               goto W_checksum;
+           }
+           if (utf8) {
+               I32 l;
+               char *hop;
+               for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
+                   if (hop >= strend) {
+                       if (hop > strend)
+                           Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+                       break;
                }
-               else {          /* 'A' strips both nulls and spaces */
-                   s = SvPVX(sv) + len - 1;
-                   while (s >= SvPVX(sv) && (!*s || isSPACE(*s)))
-                       s--;
-                   *++s = '\0';
                }
-               SvCUR_set(sv, s - SvPVX(sv));
-               s = aptr;       /* unborrow register */
+               if (hop > strend)
+                   Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+               len = hop - s;
+           } else if (len > strend - s)
+               len = strend - s;
+
+           if (datumtype == 'Z') {
+               /* 'Z' strips stuff after first null */
+               char *ptr;
+               for (ptr = s; ptr < strend; ptr++) if (*ptr == 0) break;
+               sv = newSVpvn(s, ptr-s);
+               if (howlen == e_star) /* exact for 'Z*' */
+                   len = ptr-s + (ptr != strend ? 1 : 0);
+           } else if (datumtype == 'A') {
+               /* 'A' strips both nulls and spaces */
+               char *ptr;
+               for (ptr = s+len-1; ptr >= s; ptr--)
+                   if (*ptr != 0 && !isSPACE(*ptr)) break;
+               ptr++;
+               sv = newSVpvn(s, ptr-s);
+           } else sv = newSVpvn(s, len);
+
+           if (utf8) {
+               SvUTF8_on(sv);
+               /* Undo any upgrade done due to need_utf8() */
+               if (!(symptr->flags & FLAG_UNPACK_WAS_UTF8))
+                   sv_utf8_downgrade(sv, 0);
            }
-           s += len;
            XPUSHs(sv_2mortal(sv));
+           s += len;
            break;
        case 'B':
-       case 'b':
+       case 'b': {
+           char *str;
            if (howlen == e_star || len > (strend - s) * 8)
                len = (strend - s) * 8;
            if (checksum) {
                if (!PL_bitcount) {
+                   int bits;
                    Newz(601, PL_bitcount, 256, char);
                    for (bits = 1; bits < 256; bits++) {
                        if (bits & 1)   PL_bitcount[bits]++;
@@ -1005,93 +1277,110 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                        if (bits & 128) PL_bitcount[bits]++;
                    }
                }
+               if (utf8) {
+                   while (len >= 8 && s < strend) {
+                       cuv += PL_bitcount[next_uni_byte(aTHX_ &s, strend, datumtype)];
+                       len -= 8;
+                   }
+               } else {
                while (len >= 8) {
-                   cuv += PL_bitcount[*(unsigned char*)s++];
+                       cuv += PL_bitcount[*(U8 *)s++];
                    len -= 8;
                }
-               if (len) {
-                   bits = *s;
+               }
+               if (len && s < strend) {
+                   U8 bits;
+                   bits = NEXT_BYTE(utf8, s, strend, datumtype);
                    if (datumtype == 'b') {
                        while (len-- > 0) {
                            if (bits & 1) cuv++;
                            bits >>= 1;
                        }
-                   }
-                   else {
+                   } else {
                        while (len-- > 0) {
-                           if (bits & 128) cuv++;
+                           if (bits & 0x80) cuv++;
                            bits <<= 1;
                        }
                    }
                }
                break;
            }
-           sv = NEWSV(35, len + 1);
-           SvCUR_set(sv, len);
+
+           sv = sv_2mortal(NEWSV(35, len ? len : 1));
            SvPOK_on(sv);
            str = SvPVX(sv);
            if (datumtype == 'b') {
-               aint = len;
-               for (len = 0; len < aint; len++) {
-                   if (len & 7)                /*SUPPRESS 595*/
-                       bits >>= 1;
-                   else
-                       bits = *s++;
-                   *str++ = '0' + (bits & 1);
+               U8 bits;
+               ai32 = len;
+               for (len = 0; len < ai32; len++) {
+                   if (len & 7) bits >>= 1;
+                   else if (utf8) {
+                       if (s >= strend) break;
+                       bits = next_uni_byte(aTHX_ &s, strend, datumtype);
+                   } else bits = *(U8 *) s++;
+                   *str++ = bits & 1 ? '1' : '0';
                }
-           }
-           else {
-               aint = len;
-               for (len = 0; len < aint; len++) {
-                   if (len & 7)
-                       bits <<= 1;
-                   else
-                       bits = *s++;
-                   *str++ = '0' + ((bits & 128) != 0);
+           } else {
+               U8 bits;
+               ai32 = len;
+               for (len = 0; len < ai32; len++) {
+                   if (len & 7) bits <<= 1;
+                   else if (utf8) {
+                       if (s >= strend) break;
+                       bits = next_uni_byte(aTHX_ &s, strend, datumtype);
+                   } else bits = *(U8 *) s++;
+                   *str++ = bits & 0x80 ? '1' : '0';
                }
            }
            *str = '\0';
-           XPUSHs(sv_2mortal(sv));
+           SvCUR_set(sv, str - SvPVX(sv));
+           XPUSHs(sv);
            break;
+       }
        case 'H':
-       case 'h':
+       case 'h': {
+           char *str;
+             /* Preliminary length estimate, acceptable for utf8 too */
            if (howlen == e_star || len > (strend - s) * 2)
                len = (strend - s) * 2;
-           sv = NEWSV(35, len + 1);
-           SvCUR_set(sv, len);
+             sv = sv_2mortal(NEWSV(35, len ? len : 1));
            SvPOK_on(sv);
            str = SvPVX(sv);
            if (datumtype == 'h') {
-               aint = len;
-               for (len = 0; len < aint; len++) {
-                   if (len & 1)
-                       bits >>= 4;
-                   else
-                       bits = *s++;
+                 U8 bits;
+                 ai32 = len;
+                 for (len = 0; len < ai32; len++) {
+                     if (len & 1) bits >>= 4;
+                     else if (utf8) {
+                         if (s >= strend) break;
+                         bits = next_uni_byte(aTHX_ &s, strend, datumtype);
+                     } else bits = * (U8 *) s++;
                    *str++ = PL_hexdigit[bits & 15];
                }
-           }
-           else {
-               aint = len;
-               for (len = 0; len < aint; len++) {
-                   if (len & 1)
-                       bits <<= 4;
-                   else
-                       bits = *s++;
+           } else {
+               U8 bits;
+               ai32 = len;
+               for (len = 0; len < ai32; len++) {
+                   if (len & 1) bits <<= 4;
+                   else if (utf8) {
+                       if (s >= strend) break;
+                       bits = next_uni_byte(aTHX_ &s, strend, datumtype);
+                   } else bits = *(U8 *) s++;
                    *str++ = PL_hexdigit[(bits >> 4) & 15];
                }
            }
            *str = '\0';
-           XPUSHs(sv_2mortal(sv));
+           SvCUR_set(sv, str - SvPVX(sv));
+           XPUSHs(sv);
            break;
+       }
        case 'c':
            while (len-- > 0) {
-               aint = *s++;
+               int aint = NEXT_BYTE(utf8, s, strend, datumtype);
                if (aint >= 128)        /* fake up signed chars */
                    aint -= 256;
-               if (!checksum) {
+               if (!checksum)
                    PUSHs(sv_2mortal(newSViv((IV)aint)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aint;
                else
@@ -1099,55 +1388,98 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            }
            break;
        case 'C':
-       unpack_C: /* unpack U will jump here if not UTF-8 */
+       case 'W':
+         W_checksum:
             if (len == 0) {
-                symptr->flags &= ~FLAG_UNPACK_DO_UTF8;
+                if (explicit_length && datumtype == 'C') 
+                   /* Switch to "character" mode */
+                   utf8 = (symptr->flags & FLAG_UNPACK_DO_UTF8) ? 1 : 0;
                break;
            }
-           if (checksum) {
-             uchar_checksum:
-               while (len-- > 0) {
-                   auint = *s++ & 255;
-                   cuv += auint;
-               }
+           if (datumtype == 'C' ? 
+                (symptr->flags & FLAG_UNPACK_DO_UTF8) && 
+               !(symptr->flags & FLAG_UNPACK_WAS_UTF8) : utf8) {
+               while (len-- > 0 && s < strend) {
+                   UV val;
+                   STRLEN retlen;
+                   val =
+                       UNI_TO_NATIVE(utf8n_to_uvuni(s, strend-s, &retlen,
+                                                    ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY));
+                   if (retlen == (STRLEN) -1 || retlen == 0)
+                       Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+                   s += retlen;
+                   if (!checksum)
+                       PUSHs(sv_2mortal(newSVuv((UV) val)));
+                   else if (checksum > bits_in_uv)
+                       cdouble += (NV) val;
+                   else
+                       cuv += val;
            }
-           else {
+           } else if (!checksum)
                while (len-- > 0) {
-                   auint = *s++ & 255;
-                   PUSHs(sv_2mortal(newSViv((IV)auint)));
-               }
+                   U8 ch = *(U8 *) s++;
+                   PUSHs(sv_2mortal(newSVuv((UV) ch)));
            }
+           else if (checksum > bits_in_uv)
+               while (len-- > 0) cdouble += (NV) *(U8 *) s++;
+           else
+               while (len-- > 0) cuv += *(U8 *) s++;
            break;
        case 'U':
            if (len == 0) {
-                symptr->flags |= FLAG_UNPACK_DO_UTF8;
+                if (explicit_length) {
+                   /* Switch to "bytes in UTF-8" mode */
+                   if (symptr->flags & FLAG_UNPACK_DO_UTF8) utf8 = 0;
+                   else
+                       /* Should be impossible due to the need_utf8() test */
+                       Perl_croak(aTHX_ "U0 mode on a byte string");
+               }
                break;
            }
-           if ((symptr->flags & FLAG_UNPACK_DO_UTF8) == 0)
-                goto unpack_C;
-           while (len-- > 0 && s < strend) {
-               STRLEN alen;
-               auint = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, strend - s, &alen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV));
-               along = alen;
-               s += along;
+           if (len > strend - s) len = strend - s;
                if (!checksum) {
-                   PUSHs(sv_2mortal(newSVuv((UV)auint)));
+               if (len && unpack_only_one) len = 1;
+               EXTEND(SP, len);
+               EXTEND_MORTAL(len);
                }
+           while (len-- > 0 && s < strend) {
+               STRLEN retlen;
+               UV auv;
+               if (utf8) {
+                   U8 result[UTF8_MAXLEN];
+                   char *ptr;
+                   STRLEN len;
+                   ptr = s;
+                   /* Bug: warns about bad utf8 even if we are short on bytes
+                      and will break out of the loop */
+                   if (!next_uni_bytes(aTHX_ &ptr, strend, result, 1))
+                       break;
+                   len = UTF8SKIP(result);
+                   if (!next_uni_bytes(aTHX_ &ptr, strend, &result[1], len-1))
+                       break;
+                   auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
+                   s = ptr;
+               } else {
+                   auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
+                   if (retlen == (STRLEN) -1 || retlen == 0)
+                       Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+                   s += retlen;
+               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv((UV) auv)));
                else if (checksum > bits_in_uv)
-                   cdouble += (NV)auint;
+                   cdouble += (NV) auv;
                else
-                   cuv += auint;
+                   cuv += auv;
            }
            break;
        case 's' | TYPE_IS_SHRIEKING:
 #if SHORTSIZE != SIZE16
            while (len-- > 0) {
-               COPYNN(s, &ashort, sizeof(short));
-               DO_BO_UNPACK(ashort, s);
-               s += sizeof(short);
-               if (!checksum) {
+               short ashort;
+               COPYVAR(s, strend, utf8, ashort, s);
+               if (!checksum)
                    PUSHs(sv_2mortal(newSViv((IV)ashort)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)ashort;
                else
@@ -1159,16 +1491,25 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
 #endif
        case 's':
            while (len-- > 0) {
+               I16 ai16;
+
+#if U16SIZE > SIZE16
+               ai16 = 0;
+#endif
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend, 
+                                       OFF16(&ai16), SIZE16)) break;
+               } else {
                COPY16(s, &ai16);
+                   s += SIZE16;
+               }
                DO_BO_UNPACK(ai16, 16);
 #if U16SIZE > SIZE16
                if (ai16 > 32767)
                    ai16 -= 65536;
 #endif
-               s += SIZE16;
-               if (!checksum) {
+               if (!checksum)
                    PUSHs(sv_2mortal(newSViv((IV)ai16)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)ai16;
                else
@@ -1178,12 +1519,10 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'S' | TYPE_IS_SHRIEKING:
 #if SHORTSIZE != SIZE16
            while (len-- > 0) {
-               COPYNN(s, &aushort, sizeof(unsigned short));
-               DO_BO_UNPACK(aushort, s);
-               s += sizeof(unsigned short);
-               if (!checksum) {
-                   PUSHs(sv_2mortal(newSViv((UV)aushort)));
-               }
+               unsigned short aushort;
+               COPYVAR(s, strend, utf8, aushort, s);
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv((UV) aushort)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aushort;
                else
@@ -1197,9 +1536,18 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'n':
        case 'S':
            while (len-- > 0) {
+               U16 au16;
+#if U16SIZE > SIZE16
+               au16 = 0;
+#endif
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend, 
+                                       OFF16(&au16), SIZE16)) break;
+               } else {
                COPY16(s, &au16);
-               DO_BO_UNPACK(au16, 16);
                s += SIZE16;
+               }
+               DO_BO_UNPACK(au16, 16);
 #ifdef HAS_NTOHS
                if (datumtype == 'n')
                    au16 = PerlSock_ntohs(au16);
@@ -1208,46 +1556,53 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                if (datumtype == 'v')
                    au16 = vtohs(au16);
 #endif
-               if (!checksum) {
-                   PUSHs(sv_2mortal(newSViv((UV)au16)));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv((UV)au16)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)au16;
                else
                    cuv += au16;
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'v' | TYPE_IS_SHRIEKING:
        case 'n' | TYPE_IS_SHRIEKING:
            while (len-- > 0) {
+               I16 ai16;
+# if U16SIZE > SIZE16
+               ai16 = 0;
+# endif
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend,
+                                       (char *) &ai16, sizeof(ai16))) break;
+               } else {
                COPY16(s, &ai16);
                s += SIZE16;
-#ifdef HAS_NTOHS
+               }
+# ifdef HAS_NTOHS
                if (datumtype == ('n' | TYPE_IS_SHRIEKING))
-                   ai16 = (I16)PerlSock_ntohs((U16)ai16);
-#endif
-#ifdef HAS_VTOHS
+                   ai16 = (I16) PerlSock_ntohs((U16) ai16);
+# endif /* HAS_NTOHS */
+# ifdef HAS_VTOHS
                if (datumtype == ('v' | TYPE_IS_SHRIEKING))
-                   ai16 = (I16)vtohs((U16)ai16);
-#endif
-               if (!checksum) {
+                   ai16 = (I16) vtohs((U16) ai16);
+# endif /* HAS_VTOHS */
+               if (!checksum)
                    PUSHs(sv_2mortal(newSViv((IV)ai16)));
-               }
                else if (checksum > bits_in_uv)
-                   cdouble += (NV)ai16;
+                   cdouble += (NV) ai16;
                else
                    cuv += ai16;
            }
            break;
+#endif /* PERL_PACK_CAN_SHRIEKSIGN */
        case 'i':
        case 'i' | TYPE_IS_SHRIEKING:
            while (len-- > 0) {
-               Copy(s, &aint, 1, int);
-               DO_BO_UNPACK(aint, i);
-               s += sizeof(int);
-               if (!checksum) {
+               int aint;
+               COPYVAR(s, strend, utf8, aint, i);
+               if (!checksum)
                    PUSHs(sv_2mortal(newSViv((IV)aint)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aint;
                else
@@ -1257,12 +1612,10 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'I':
        case 'I' | TYPE_IS_SHRIEKING:
            while (len-- > 0) {
-               Copy(s, &auint, 1, unsigned int);
-               DO_BO_UNPACK(auint, i);
-               s += sizeof(unsigned int);
-               if (!checksum) {
+               unsigned int auint;
+               COPYVAR(s, strend, utf8, auint, i);
+               if (!checksum)
                    PUSHs(sv_2mortal(newSVuv((UV)auint)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)auint;
                else
@@ -1271,18 +1624,18 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
        case 'j':
            while (len-- > 0) {
-               Copy(s, &aiv, 1, IV);
+               IV aiv;
 #if IVSIZE == INTSIZE
-               DO_BO_UNPACK(aiv, i);
+               COPYVAR(s, strend, utf8, aiv, i);
 #elif IVSIZE == LONGSIZE
-               DO_BO_UNPACK(aiv, l);
+               COPYVAR(s, strend, utf8, aiv, l);
 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
-               DO_BO_UNPACK(aiv, 64);
+               COPYVAR(s, strend, utf8, aiv, 64);
+#else
+               Perl_croak(aTHX_ "'j' not supported on this platform");
 #endif
-               s += IVSIZE;
-               if (!checksum) {
+               if (!checksum)
                    PUSHs(sv_2mortal(newSViv(aiv)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aiv;
                else
@@ -1291,18 +1644,18 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
        case 'J':
            while (len-- > 0) {
-               Copy(s, &auv, 1, UV);
-#if UVSIZE == INTSIZE
-               DO_BO_UNPACK(auv, i);
-#elif UVSIZE == LONGSIZE
-               DO_BO_UNPACK(auv, l);
-#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
-               DO_BO_UNPACK(auv, 64);
+               UV auv;
+#if IVSIZE == INTSIZE
+               COPYVAR(s, strend, utf8, auv, i);
+#elif IVSIZE == LONGSIZE
+               COPYVAR(s, strend, utf8, auv, l);
+#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
+               COPYVAR(s, strend, utf8, auv, 64);
+#else
+               Perl_croak(aTHX_ "'J' not supported on this platform");
 #endif
-               s += UVSIZE;
-               if (!checksum) {
+               if (!checksum)
                    PUSHs(sv_2mortal(newSVuv(auv)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)auv;
                else
@@ -1312,12 +1665,10 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'l' | TYPE_IS_SHRIEKING:
 #if LONGSIZE != SIZE32
            while (len-- > 0) {
-               COPYNN(s, &along, sizeof(long));
-               DO_BO_UNPACK(along, l);
-               s += sizeof(long);
-               if (!checksum) {
+               long along;
+               COPYVAR(s, strend, utf8, along, l);
+               if (!checksum)
                    PUSHs(sv_2mortal(newSViv((IV)along)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)along;
                else
@@ -1329,16 +1680,23 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
 #endif
        case 'l':
            while (len-- > 0) {
+               I32 ai32;
+#if U32SIZE > SIZE32
+               ai32 = 0;
+#endif
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend,
+                                       OFF32(&ai32), SIZE32)) break;
+               } else {
                COPY32(s, &ai32);
+                   s += SIZE32;
+               }
                DO_BO_UNPACK(ai32, 32);
 #if U32SIZE > SIZE32
-               if (ai32 > 2147483647)
-                   ai32 -= 4294967296;
+               if (ai32 > 2147483647) ai32 -= 4294967296;
 #endif
-               s += SIZE32;
-               if (!checksum) {
+               if (!checksum)
                    PUSHs(sv_2mortal(newSViv((IV)ai32)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)ai32;
                else
@@ -1348,12 +1706,10 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'L' | TYPE_IS_SHRIEKING:
 #if LONGSIZE != SIZE32
            while (len-- > 0) {
-               COPYNN(s, &aulong, sizeof(unsigned long));
-               DO_BO_UNPACK(aulong, l);
-               s += sizeof(unsigned long);
-               if (!checksum) {
+               unsigned long aulong;
+               COPYVAR(s, strend, utf8, aulong, l);
+               if (!checksum)
                    PUSHs(sv_2mortal(newSVuv((UV)aulong)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aulong;
                else
@@ -1367,9 +1723,18 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'N':
        case 'L':
            while (len-- > 0) {
+               U32 au32;
+#if U32SIZE > SIZE32
+               au32 = 0;
+#endif
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend,
+                                       OFF32(&au32), SIZE32)) break;
+               } else {
                COPY32(s, &au32);
-               DO_BO_UNPACK(au32, 32);
                s += SIZE32;
+               }
+               DO_BO_UNPACK(au32, 32);
 #ifdef HAS_NTOHL
                if (datumtype == 'N')
                    au32 = PerlSock_ntohl(au32);
@@ -1378,46 +1743,57 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                if (datumtype == 'V')
                    au32 = vtohl(au32);
 #endif
-                if (!checksum) {
+               if (!checksum)
                     PUSHs(sv_2mortal(newSVuv((UV)au32)));
-                }
                 else if (checksum > bits_in_uv)
                     cdouble += (NV)au32;
                 else
                     cuv += au32;
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'V' | TYPE_IS_SHRIEKING:
        case 'N' | TYPE_IS_SHRIEKING:
            while (len-- > 0) {
+               I32 ai32;
+# if U32SIZE > SIZE32
+               ai32 = 0;
+# endif
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend,
+                                       OFF32(&ai32), SIZE32)) break;
+               } else {
                COPY32(s, &ai32);
                s += SIZE32;
-#ifdef HAS_NTOHL
+               }
+# ifdef HAS_NTOHL
                if (datumtype == ('N' | TYPE_IS_SHRIEKING))
                    ai32 = (I32)PerlSock_ntohl((U32)ai32);
-#endif
-#ifdef HAS_VTOHL
+# endif
+# ifdef HAS_VTOHL
                if (datumtype == ('V' | TYPE_IS_SHRIEKING))
                    ai32 = (I32)vtohl((U32)ai32);
-#endif
-               if (!checksum) {
+# endif
+               if (!checksum)
                    PUSHs(sv_2mortal(newSViv((IV)ai32)));
-               }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)ai32;
                else
                    cuv += ai32;
            }
            break;
+#endif /* PERL_PACK_CAN_SHRIEKSIGN */
        case 'p':
            while (len-- > 0) {
-               if (sizeof(char*) > strend - s)
-                   break;
-               else {
-                   Copy(s, &aptr, 1, char*);
-                   DO_BO_UNPACK_P(aptr);
-                   s += sizeof(char*);
+               char *aptr;
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend,
+                                       (char *) &aptr, sizeof(aptr))) break;
+               } else {
+               Copy(s, &aptr, 1, char*);
+                   s += sizeof(aptr);
                }
+               DO_BO_UNPACK_P(aptr);
                /* newSVpv generates undef if aptr is NULL */
                PUSHs(sv_2mortal(newSVpv(aptr, 0)));
            }
@@ -1427,23 +1803,27 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                UV auv = 0;
                U32 bytes = 0;
                
-               while ((len > 0) && (s < strend)) {
-                   auv = (auv << 7) | (*s & 0x7f);
+               while (len > 0 && s < strend) {
+                   U8 ch;
+                   ch = NEXT_BYTE(utf8, s, strend, 'w');
+                   auv = (auv << 7) | (ch & 0x7f);
                    /* UTF8_IS_XXXXX not right here - using constant 0x80 */
-                   if ((U8)(*s++) < 0x80) {
+                   if (ch < 0x80) {
                        bytes = 0;
                        PUSHs(sv_2mortal(newSVuv(auv)));
                        len--;
                        auv = 0;
+                       continue;
                    }
-                   else if (++bytes >= sizeof(UV)) {   /* promote to string */
+                   if (++bytes >= sizeof(UV)) {        /* promote to string */
                        char *t;
                        STRLEN n_a;
 
                        sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
                        while (s < strend) {
-                           sv = mul128(sv, (U8)(*s & 0x7f));
-                           if (!(*s++ & 0x80)) {
+                           ch = NEXT_BYTE(utf8, s, strend, 'w');
+                           sv = mul128(sv, (U8)(ch & 0x7f));
+                           if (!(ch & 0x80)) {
                                bytes = 0;
                                break;
                            }
@@ -1465,27 +1845,28 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            if (symptr->howlen == e_star)
                Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
            EXTEND(SP, 1);
-           if (sizeof(char*) > strend - s)
-               break;
-           else {
+           if (sizeof(char*) <= strend - s) {
+               char *aptr;
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend, (char *) &aptr,
+                                       sizeof(aptr))) break;
+               } else {
                Copy(s, &aptr, 1, char*);
-               DO_BO_UNPACK_P(aptr);
-               s += sizeof(char*);
+                   s += sizeof(aptr);
            }
+               DO_BO_UNPACK_P(aptr);
            /* newSVpvn generates undef if aptr is NULL */
            PUSHs(sv_2mortal(newSVpvn(aptr, len)));
+           }
            break;
 #ifdef HAS_QUAD
        case 'q':
            while (len-- > 0) {
-               assert (s + sizeof(Quad_t) <= strend);
-               Copy(s, &aquad, 1, Quad_t);
-               DO_BO_UNPACK(aquad, 64);
-               s += sizeof(Quad_t);
-               if (!checksum) {
-                    PUSHs(sv_2mortal((aquad >= IV_MIN && aquad <= IV_MAX) ?
+               Quad_t aquad;
+               COPYVAR(s, strend, utf8, aquad, 64);
+               if (!checksum)
+                    PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
                                     newSViv((IV)aquad) : newSVnv((NV)aquad)));
-                }
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aquad;
                else
@@ -1494,72 +1875,86 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
        case 'Q':
            while (len-- > 0) {
-               assert (s + sizeof(Uquad_t) <= strend);
-               Copy(s, &auquad, 1, Uquad_t);
-               DO_BO_UNPACK(auquad, 64);
-               s += sizeof(Uquad_t);
-               if (!checksum) {
-                   PUSHs(sv_2mortal((auquad <= UV_MAX) ?
-                                    newSVuv((UV)auquad) : newSVnv((NV)auquad)));
-               }
+               Uquad_t auquad;
+               COPYVAR(s, strend, utf8, auquad, 64);
+               if (!checksum)
+                   PUSHs(sv_2mortal(auquad <= UV_MAX ?
+                                    newSVuv((UV)auquad):newSVnv((NV)auquad)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)auquad;
                else
                    cuv += auquad;
            }
            break;
-#endif
+#endif /* HAS_QUAD */
        /* float and double added gnb@melba.bby.oz.au 22/11/89 */
        case 'f':
            while (len-- > 0) {
+               float afloat;
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend, (char *) &afloat,
+                                       sizeof(afloat))) break;
+               } else {
                Copy(s, &afloat, 1, float);
-               DO_BO_UNPACK_N(afloat, float);
                s += sizeof(float);
-               if (!checksum) {
-                   PUSHs(sv_2mortal(newSVnv((NV)afloat)));
                }
-               else {
+               DO_BO_UNPACK_N(afloat, float);
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVnv((NV)afloat)));
+               else
                    cdouble += afloat;
                }
-           }
            break;
        case 'd':
            while (len-- > 0) {
+               double adouble;
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend, (char *) &adouble,
+                                       sizeof(adouble))) break;
+               } else {
                Copy(s, &adouble, 1, double);
-               DO_BO_UNPACK_N(adouble, double);
                s += sizeof(double);
-               if (!checksum) {
-                   PUSHs(sv_2mortal(newSVnv((NV)adouble)));
                }
-               else {
+               DO_BO_UNPACK_N(adouble, double);
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVnv((NV)adouble)));
+               else
                    cdouble += adouble;
                }
-           }
            break;
        case 'F':
            while (len-- > 0) {
+               NV anv;
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend,
+                                       (char *) &anv, sizeof(anv))) break;
+               } else {
                Copy(s, &anv, 1, NV);
-               DO_BO_UNPACK_N(anv, NV);
                s += NVSIZE;
-               if (!checksum) {
-                   PUSHs(sv_2mortal(newSVnv(anv)));
                }
-               else {
+               DO_BO_UNPACK_N(anv, NV);
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVnv(anv)));
+               else
                    cdouble += anv;
                }
-           }
            break;
 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
        case 'D':
            while (len-- > 0) {
+               long double aldouble;
+               if (utf8) {
+                   if (!next_uni_bytes(aTHX_ &s, strend, (char *) &aldouble,
+                                       sizeof(aldouble))) break;
+               } else {
                Copy(s, &aldouble, 1, long double);
-               DO_BO_UNPACK_N(aldouble, long double);
                s += LONG_DOUBLESIZE;
-               if (!checksum) {
-                   PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
-               }
-               else {cdouble += aldouble;
                }
+               DO_BO_UNPACK_N(aldouble, long double);
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
+               else
+                   cdouble += aldouble;
            }
            break;
 #endif
@@ -1580,11 +1975,38 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                  */
                 PL_uudmap[' '] = 0;
             }
-
-           along = (strend - s) * 3 / 4;
-           sv = NEWSV(42, along);
-           if (along)
-               SvPOK_on(sv);
+           {
+               STRLEN l = (STRLEN) (strend - s) * 3 / 4;
+               sv = sv_2mortal(NEWSV(42, l));
+               if (l) SvPOK_on(sv);
+           }
+           if (utf8) {
+               while (next_uni_uu(aTHX_ &s, strend, &len)) {
+                   I32 a, b, c, d;
+                   char hunk[4];
+
+                   hunk[3] = '\0';
+                   while (len > 0) {
+                       next_uni_uu(aTHX_ &s, strend, &a);
+                       next_uni_uu(aTHX_ &s, strend, &b);
+                       next_uni_uu(aTHX_ &s, strend, &c);
+                       next_uni_uu(aTHX_ &s, strend, &d);
+                       hunk[0] = (char)((a << 2) | (b >> 4));
+                       hunk[1] = (char)((b << 4) | (c >> 2));
+                       hunk[2] = (char)((c << 6) | d);
+                       sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
+                       len -= 3;
+                   }
+                   if (s < strend) {
+                       if (*s == '\n') s++;
+                       else {
+                           /* possible checksum byte */
+                           char *skip = s+UTF8SKIP(s);
+                           if (skip < strend && *skip == '\n') s = skip+1;
+                       }
+                   }
+               }
+           } else {
            while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
                I32 a, b, c, d;
                char hunk[4];
@@ -1620,24 +2042,25 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                    if (s + 1 < strend && s[1] == '\n')
                        s += 2;
            }
-           XPUSHs(sv_2mortal(sv));
+           }
+           XPUSHs(sv);
            break;
        }
 
        if (checksum) {
            if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
              (checksum > bits_in_uv &&
-              strchr("csSiIlLnNUvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
-               NV trouble;
+              strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
+               NV trouble, anv;
 
-                adouble = (NV) (1 << (checksum & 15));
+                anv = (NV) (1 << (checksum & 15));
                while (checksum >= 16) {
                    checksum -= 16;
-                   adouble *= 65536.0;
+                   anv *= 65536.0;
                }
                while (cdouble < 0.0)
-                   cdouble += adouble;
-               cdouble = Perl_modf(cdouble / adouble, &trouble) * adouble;
+                   cdouble += anv;
+               cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
                sv = newSVnv(cdouble);
            }
            else {
@@ -1670,6 +2093,7 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                Perl_croak(aTHX_ "Code missing after '/' in unpack" );
             }
             datumtype = symptr->code;
+            explicit_length = FALSE;
            goto redo_switch;
         }
     }
@@ -1687,18 +2111,11 @@ PP(pp_unpack)
     I32 gimme = GIMME_V;
     STRLEN llen;
     STRLEN rlen;
-    register char *pat = SvPV(left, llen);
-#ifdef PACKED_IS_OCTETS
-    /* Packed side is assumed to be octets - so force downgrade if it
-       has been UTF-8 encoded by accident
-     */
-    register char *s = SvPVbyte(right, rlen);
-#else
-    register char *s = SvPV(right, rlen);
-#endif
+    char *pat = SvPV(left,  llen);
+    char *s   = SvPV(right, rlen);
     char *strend = s + rlen;
-    register char *patend = pat + llen;
-    register I32 cnt;
+    char *patend = pat + llen;
+    I32 cnt;
 
     PUTBACK;
     cnt = unpackstring(pat, patend, s, strend,
@@ -2019,7 +2436,7 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
            }
            if ((I32)fromlen >= len) {
                sv_catpvn(cat, aptr, len);
-               if (datumtype == 'Z')
+               if (datumtype == 'Z' && len > 0)
                    *(SvEND(cat)-1) = '\0';
            }
            else {
@@ -2183,7 +2600,7 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
            while (len-- > 0) {
                fromstr = NEXTFROM;
                auint = UNI_TO_NATIVE(SvUV(fromstr));
-               SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1);
+               SvGROW(cat, SvCUR(cat) + UTF8_MAXBYTES + 1);
                SvCUR_set(cat,
                          (char*)uvchr_to_utf8_flags((U8*)SvEND(cat),
                                                     auint,
@@ -2281,7 +2698,9 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
            }
            break;
 #endif
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'n' | TYPE_IS_SHRIEKING:
+#endif
        case 'n':
            while (len-- > 0) {
                fromstr = NEXTFROM;
@@ -2292,7 +2711,9 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                CAT16(cat, &ai16);
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'v' | TYPE_IS_SHRIEKING:
+#endif
        case 'v':
            while (len-- > 0) {
                fromstr = NEXTFROM;
@@ -2497,7 +2918,9 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                sv_catpvn(cat, (char*)&aint, sizeof(int));
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'N' | TYPE_IS_SHRIEKING:
+#endif
        case 'N':
            while (len-- > 0) {
                fromstr = NEXTFROM;
@@ -2508,7 +2931,9 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                CAT32(cat, &au32);
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'V' | TYPE_IS_SHRIEKING:
+#endif
        case 'V':
            while (len-- > 0) {
                fromstr = NEXTFROM;
@@ -2589,8 +3014,8 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
        case 'p':
            while (len-- > 0) {
                fromstr = NEXTFROM;
-               if (fromstr == &PL_sv_undef)
-                   aptr = NULL;
+               SvGETMAGIC(fromstr);
+               if (!SvOK(fromstr)) aptr = NULL;
                else {
                    STRLEN n_a;
                    /* XXX better yet, could spirit away the string to
@@ -2606,9 +3031,9 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                                "Attempt to pack pointer to temporary value");
                    }
                    if (SvPOK(fromstr) || SvNIOK(fromstr))
-                       aptr = SvPV(fromstr,n_a);
+                       aptr = SvPV_flags(fromstr, n_a, 0);
                    else
-                       aptr = SvPV_force(fromstr,n_a);
+                       aptr = SvPV_force_flags(fromstr, n_a, 0);
                }
                DO_BO_PACK_P(aptr);
                sv_catpvn(cat, (char*)&aptr, sizeof(char*));