extra code in pp_concat, Take 2
[p5sagit/p5-mst-13.2.git] / pp_pack.c
index 1e470b7..5ee841b 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -183,9 +183,9 @@ S_mul128(pTHX_ SV *sv, U8 m)
 #define TYPE_NO_MODIFIERS(t)   ((t) & 0xFF)
 
 #ifdef PERL_PACK_CAN_SHRIEKSIGN
-#define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV"
+# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
 #else
-#define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
+# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
 #endif
 
 #ifndef PERL_PACK_CAN_BYTEORDER
@@ -197,8 +197,8 @@ S_mul128(pTHX_ SV *sv, U8 m)
 
 # define DO_BO_UNPACK(var, type)
 # define DO_BO_PACK(var, type)
-# define DO_BO_UNPACK_PTR(var, type, pre_cast)
-# define DO_BO_PACK_PTR(var, type, pre_cast)
+# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
+# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
 # define DO_BO_UNPACK_N(var, type)
 # define DO_BO_PACK_N(var, type)
 # define DO_BO_UNPACK_P(var)
@@ -229,28 +229,28 @@ S_mul128(pTHX_ SV *sv, U8 m)
           }                                                                   \
         } STMT_END
 
-# define DO_BO_UNPACK_PTR(var, type, pre_cast)                                \
+# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)                     \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:                                          \
-              var = (void *) my_betoh ## type ((pre_cast) var);               \
+              var = (post_cast*) my_betoh ## type ((pre_cast) var);           \
               break;                                                          \
             case TYPE_IS_LITTLE_ENDIAN:                                       \
-              var = (void *) my_letoh ## type ((pre_cast) var);               \
+              var = (post_cast *) my_letoh ## type ((pre_cast) var);          \
               break;                                                          \
             default:                                                          \
               break;                                                          \
           }                                                                   \
         } STMT_END
 
-# define DO_BO_PACK_PTR(var, type, pre_cast)                                  \
+# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)                       \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:                                          \
-              var = (void *) my_htobe ## type ((pre_cast) var);               \
+              var = (post_cast *) my_htobe ## type ((pre_cast) var);          \
               break;                                                          \
             case TYPE_IS_LITTLE_ENDIAN:                                       \
-              var = (void *) my_htole ## type ((pre_cast) var);               \
+              var = (post_cast *) my_htole ## type ((pre_cast) var);          \
               break;                                                          \
             default:                                                          \
               break;                                                          \
@@ -274,11 +274,15 @@ S_mul128(pTHX_ SV *sv, U8 m)
          } STMT_END
 
 # if PTRSIZE == INTSIZE
-#  define DO_BO_UNPACK_P(var)  DO_BO_UNPACK_PTR(var, i, int)
-#  define DO_BO_PACK_P(var)    DO_BO_PACK_PTR(var, i, int)
+#  define DO_BO_UNPACK_P(var)  DO_BO_UNPACK_PTR(var, i, int, void)
+#  define DO_BO_PACK_P(var)    DO_BO_PACK_PTR(var, i, int, void)
+#  define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
+#  define DO_BO_PACK_PC(var)   DO_BO_PACK_PTR(var, i, int, char)
 # elif PTRSIZE == LONGSIZE
-#  define DO_BO_UNPACK_P(var)  DO_BO_UNPACK_PTR(var, l, long)
-#  define DO_BO_PACK_P(var)    DO_BO_PACK_PTR(var, l, long)
+#  define DO_BO_UNPACK_P(var)  DO_BO_UNPACK_PTR(var, l, long, void)
+#  define DO_BO_PACK_P(var)    DO_BO_PACK_PTR(var, l, long, void)
+#  define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
+#  define DO_BO_PACK_PC(var)   DO_BO_PACK_PTR(var, l, long, char)
 # else
 #  define DO_BO_UNPACK_P(var)  BO_CANT_DOIT(unpack, pointer)
 #  define DO_BO_PACK_P(var)    BO_CANT_DOIT(pack, pointer)
@@ -585,7 +589,7 @@ uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
        val &= 0xff;
     }
     *s += retlen;
-    return val;
+    return (U8)val;
 }
 
 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
@@ -612,7 +616,7 @@ uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
            bad |= 2;
            val &= 0xff;
        }
-       *(U8 *)buf++ = val;
+       *(U8 *)buf++ = (U8)val;
     }
     /* We have enough characters for the buffer. Did we have problems ? */
     if (bad) {
@@ -704,7 +708,7 @@ STMT_START {                                        \
     if (utf8) gl *= UTF8_EXPAND;               \
     if ((cur) + gl >= (start) + SvLEN(cat)) {  \
         *cur = '\0';                           \
-        SvCUR(cat) = (cur) - (start);          \
+        SvCUR_set((cat), (cur) - (start));     \
        (start) = sv_exp_grow(aTHX_ cat, gl);   \
        (cur) = (start) + SvCUR(cat);           \
     }                                          \
@@ -761,13 +765,18 @@ S_measure_struct(pTHX_ tempsym_t* symptr)
                Perl_croak(aTHX_ "Invalid type '%c' in %s",
                           (int)TYPE_NO_MODIFIERS(symptr->code),
                           symptr->flags & FLAG_PACK ? "pack" : "unpack" );
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+           case '.' | TYPE_IS_SHRIEKING:
+           case '@' | TYPE_IS_SHRIEKING:
+#endif
            case '@':
+           case '.':
            case '/':
            case 'U':                   /* XXXX Is it correct? */
            case 'w':
            case 'u':
                Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
-                          (int)symptr->code,
+                          (int) TYPE_NO_MODIFIERS(symptr->code),
                           symptr->flags & FLAG_PACK ? "pack" : "unpack" );
            case '%':
                size = 0;
@@ -1073,7 +1082,7 @@ need_utf8(const char *pat, const char *patend)
     while (pat < patend) {
        if (pat[0] == '#') {
            pat++;
-           pat = memchr(pat, '\n', patend-pat);
+           pat = (char *) memchr(pat, '\n', patend-pat);
            if (!pat) return FALSE;
        } else if (pat[0] == 'U') {
            if (first || pat[1] == '0') return TRUE;
@@ -1088,7 +1097,7 @@ first_symbol(const char *pat, const char *patend) {
     while (pat < patend) {
        if (pat[0] != '#') return pat[0];
        pat++;
-       pat = memchr(pat, '\n', patend-pat);
+       pat = (char *) memchr(pat, '\n', patend-pat);
        if (!pat) return 0;
        pat++;
     }
@@ -1177,11 +1186,11 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
     UV cuv = 0;
     NV cdouble = 0.0;
     const int bits_in_uv = CHAR_BIT * sizeof(cuv);
-    char* strrelbeg = s;
     bool beyond = FALSE;
     bool explicit_length;
     bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
+    symptr->strbeg = s - strbeg;
 
     while (next_symbol(symptr)) {
        packprops_t props;
@@ -1242,6 +1251,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
            U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
            symptr->flags |= group_modifiers;
             symptr->patend = savsym.grpend;
+           symptr->previous = &savsym;
             symptr->level++;
            PUTBACK;
            while (len--) {
@@ -1253,14 +1263,46 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
                    break; /* No way to continue */
            }
            SPAGAIN;
-           symptr->flags &= ~group_modifiers;
-            savsym.flags = symptr->flags;
+            savsym.flags = symptr->flags & ~group_modifiers;
             *symptr = savsym;
            break;
        }
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+       case '.' | TYPE_IS_SHRIEKING:
+#endif
+       case '.': {
+           char *from;
+           SV *sv;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+           bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+           bool u8 = utf8;
+#endif
+           if (howlen == e_star) from = strbeg;
+           else if (len <= 0) from = s;
+           else {
+               tempsym_t *group = symptr;
+
+               while (--len && group) group = group->previous;
+               from = group ? strbeg + group->strbeg : strbeg;
+           }
+           sv = from <= s ?
+               newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
+               newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
+           XPUSHs(sv_2mortal(sv));
+           break;
+       }
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+       case '@' | TYPE_IS_SHRIEKING:
+#endif
        case '@':
-           if (utf8) {
-               s = strrelbeg;
+           s = strbeg + symptr->strbeg;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+           if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+           if (utf8)
+#endif
+           {
                while (len > 0) {
                    if (s >= strend)
                        Perl_croak(aTHX_ "'@' outside of string in unpack");
@@ -1270,9 +1312,9 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
                if (s > strend)
                    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
            } else {
-               if (len > strend - strrelbeg)
+               if (strend-s < len)
                    Perl_croak(aTHX_ "'@' outside of string in unpack");
-               s = strrelbeg + len;
+               s += len;
            }
            break;
        case 'X' | TYPE_IS_SHRIEKING:
@@ -1379,7 +1421,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
                            !is_utf8_space((U8 *) ptr)) break;
                    if (ptr >= s) ptr += UTF8SKIP(ptr);
                    else ptr++;
-                   if (ptr > s+len) 
+                   if (ptr > s+len)
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                } else {
                    for (ptr = s+len-1; ptr >= s; ptr--)
@@ -1898,7 +1940,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
            while (len-- > 0) {
                char *aptr;
                SHIFT_VAR(utf8, s, strend, aptr, datumtype);
-               DO_BO_UNPACK_P(aptr);
+               DO_BO_UNPACK_PC(aptr);
                /* newSVpv generates undef if aptr is NULL */
                PUSHs(sv_2mortal(newSVpv(aptr, 0)));
            }
@@ -1953,7 +1995,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char
            if (sizeof(char*) <= strend - s) {
                char *aptr;
                SHIFT_VAR(utf8, s, strend, aptr, datumtype);
-               DO_BO_UNPACK_P(aptr);
+               DO_BO_UNPACK_PC(aptr);
                /* newSVpvn generates undef if aptr is NULL */
                PUSHs(sv_2mortal(newSVpvn(aptr, len)));
            }
@@ -2402,7 +2444,7 @@ marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
 
     if (SvOOK(sv)) {
        if (SvIVX(sv)) {
-           SvLEN(sv)  += SvIVX(sv);
+           SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
            from_start -= SvIVX(sv);
            SvIV_set(sv, 0);
        }
@@ -2410,9 +2452,9 @@ marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
     }
     if (SvLEN(sv) != 0)
        Safefree(from_start);
-    SvPVX(sv) = to_start;
-    SvCUR(sv) = to_ptr - to_start;
-    SvLEN(sv) = len;
+    SvPV_set(sv, to_start);
+    SvCUR_set(sv, to_ptr - to_start);
+    SvLEN_set(sv, len);
     SvUTF8_on(sv);
 }
 
@@ -2513,30 +2555,65 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                       (int) TYPE_NO_MODIFIERS(datumtype));
        case '%':
            Perl_croak(aTHX_ "'%%' may not be used in pack");
+       {
+           char *from;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+       case '.' | TYPE_IS_SHRIEKING:
+#endif
+       case '.':
+           if (howlen == e_star) from = start;
+           else if (len == 0) from = cur;
+           else {
+               tempsym_t *group = symptr;
+
+               while (--len && group) group = group->previous;
+               from = group ? start + group->strbeg : start;
+           }
+           fromstr = NEXTFROM;
+           len = SvIV(fromstr);
+           goto resize;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+       case '@' | TYPE_IS_SHRIEKING:
+#endif
        case '@':
-           if (utf8) {
-               char *s = start + symptr->strbeg;
-               while (len > 0 && s < cur) {
-                   s += UTF8SKIP(s);
-                   len--;
+           from = start + symptr->strbeg;
+         resize:
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+           if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+           if (utf8)
+#endif
+               if (len >= 0) {
+                   while (len && from < cur) {
+                       from += UTF8SKIP(from);
+                       len--;
+                   }
+                   if (from > cur)
+                       Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+                   if (len) {
+                       /* Here we know from == cur */
+                     grow:
+                       GROWING(0, cat, start, cur, len);
+                       Zero(cur, len, char);
+                       cur += len;
+                   } else if (from < cur) {
+                       len = cur - from;
+                       goto shrink;
+                   } else goto no_change;
+               } else {
+                   cur = from;
+                   len = -len;
+                   goto utf8_shrink;
                }
-               if (s > cur)
-                   Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
-               if (len > 0) {
-                 grow:
-                   GROWING(0, cat, start, cur, len);
-                   Zero(cur, len, char);
-                   cur += len;
-               } else if (s < cur) cur = s;
-               else goto no_change;
-           } else {
-               len -= cur - (start+symptr->strbeg);
+           else {
+               len -= cur - from;
                if (len > 0) goto grow;
+               if (len == 0) goto no_change;
                len = -len;
-               if (len > 0) goto shrink;
-               else goto no_change;
+               goto shrink;
            }
            break;
+       }
        case '(': {
             tempsym_t savsym = *symptr;
            U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
@@ -2585,19 +2662,23 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
        case 'X':
            if (utf8) {
                if (len < 1) goto no_change;
+             utf8_shrink:
                while (len > 0) {
                    if (cur <= start)
-                       Perl_croak(aTHX_ "'X' outside of string in pack");
+                       Perl_croak(aTHX_ "'%c' outside of string in pack",
+                                  (int) TYPE_NO_MODIFIERS(datumtype));
                    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
                        if (cur <= start)
-                           Perl_croak(aTHX_ "'X' outside of string in pack");
+                           Perl_croak(aTHX_ "'%c' outside of string in pack",
+                                      (int) TYPE_NO_MODIFIERS(datumtype));
                    }
                    len--;
                }
            } else {
              shrink:
                if (cur - start < len)
-                   Perl_croak(aTHX_ "'X' outside of string in pack");
+                   Perl_croak(aTHX_ "'%c' outside of string in pack",
+                              (int) TYPE_NO_MODIFIERS(datumtype));
                cur -= len;
            }
            if (cur < start+symptr->strbeg) {
@@ -2899,7 +2980,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                  W_utf8:
                    if (cur > end) {
                        *cur = '\0';
-                       SvCUR(cat) = cur - start;
+                       SvCUR_set(cat, cur - start);
 
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
@@ -2912,7 +2993,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                    if (auv >= 0x100) {
                        if (!SvUTF8(cat)) {
                            *cur = '\0';
-                           SvCUR(cat) = cur - start;
+                           SvCUR_set(cat, cur - start);
                            marked_upgrade(aTHX_ cat, symptr);
                            lookahead.flags |= FLAG_DO_UTF8;
                            lookahead.strbeg = symptr->strbeg;
@@ -2929,11 +3010,11 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                    }
                    if (cur >= end) {
                        *cur = '\0';
-                       SvCUR(cat) = cur - start;
+                       SvCUR_set(cat, cur - start);
                        GROWING(0, cat, start, cur, len+1);
                        end = start+SvLEN(cat)-1;
                    }
-                   *(U8 *) cur++ = auv;
+                   *(U8 *) cur++ = (U8)auv;
                }
            }
            break;
@@ -2964,7 +3045,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                                               0 : UNICODE_ALLOW_ANY);
                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
                        *cur = '\0';
-                       SvCUR(cat) = cur - start;
+                       SvCUR_set(cat, cur - start);
                        GROWING(0, cat, start, cur,
                                len+(endb-buffer)*UTF8_EXPAND);
                        end = start+SvLEN(cat);
@@ -2973,7 +3054,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                } else {
                    if (cur >= end) {
                        *cur = '\0';
-                       SvCUR(cat) = cur - start;
+                       SvCUR_set(cat, cur - start);
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
@@ -3206,7 +3287,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
 
                if (anv < 0) {
                    *cur = '\0';
-                   SvCUR(cat) = cur - start;
+                   SvCUR_set(cat, cur - start);
                    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
                }
 
@@ -3415,7 +3496,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                    else
                        aptr = SvPV_force_flags(fromstr, n_a, 0);
                }
-               DO_BO_PACK_P(aptr);
+               DO_BO_PACK_PC(aptr);
                PUSH_VAR(utf8, cur, aptr);
            }
            break;
@@ -3452,7 +3533,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                    if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
                                      'u' | TYPE_IS_PACK)) {
                        *cur = '\0';
-                       SvCUR(cat) = cur - start;
+                       SvCUR_set(cat, cur - start);
                        Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
                    }
                    end = doencodes(hunk, buffer, todo);
@@ -3467,7 +3548,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
        }
        }
        *cur = '\0';
-       SvCUR(cat) = cur - start;
+       SvCUR_set(cat, cur - start);
       no_change:
        *symptr = lookahead;
     }