From: Rafael Garcia-Suarez Date: Wed, 9 Mar 2005 22:17:33 +0000 (+0000) Subject: Further pack optimisations by Ton Hospel X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=3473cf637176ce9e9e990cc9b108dfc4974b52c4;p=p5sagit%2Fp5-mst-13.2.git Further pack optimisations by Ton Hospel p4raw-id: //depot/perl@24012 --- diff --git a/pp_pack.c b/pp_pack.c index 6c9b92a..204b17c 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -39,6 +39,8 @@ #ifndef CHAR_BIT # define CHAR_BIT 8 #endif +/* Maximum number of bytes to which a byte can grow due to upgrade */ +#define UTF8_EXPAND 2 /* * Offset for integer pack/unpack. @@ -648,7 +650,7 @@ STMT_START { \ #define GROWING(utf8, cat, start, cur, in_len) \ STMT_START { \ STRLEN glen = (in_len); \ - if (utf8) glen *= 2; \ + if (utf8) glen *= UTF8_EXPAND; \ if ((cur) + glen >= (start) + SvLEN(cat)) { \ (start) = sv_exp_grow(aTHX_ cat, glen); \ (cur) = (start) + SvCUR(cat); \ @@ -659,7 +661,7 @@ STMT_START { \ STMT_START { \ STRLEN glen = (in_len); \ STRLEN gl = glen; \ - if (utf8) gl *= 2; \ + if (utf8) gl *= UTF8_EXPAND; \ if ((cur) + gl >= (start) + SvLEN(cat)) { \ *cur = '\0'; \ SvCUR(cat) = (cur) - (start); \ @@ -2335,8 +2337,7 @@ marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) { return; } - /* We assume a char translates to at most 2 UTF-8 bytes */ - len = (from_end-from_ptr)*2+(from_ptr-from_start)+1; + len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1; New('U', to_start, len, char); Copy(from_start, to_start, from_ptr-from_start, char); to_ptr = to_start + (from_ptr-from_start); @@ -2643,9 +2644,10 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) fromlen = len; if (datumtype == 'Z' && fromlen > 0) fromlen--; } - /* assumes a byte expands to at most 2 bytes on upgrade: - expected_length <= from_len*2 + (len-from_len) */ - GROWING(0, cat, start, cur, fromlen+len); + /* assumes a byte expands to at most UTF8_EXPAND bytes on + upgrade, so: + expected_length <= from_len*UTF8_EXPAND + (len-from_len) */ + GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len); len -= fromlen; while (fromlen > 0) { cur = uvchr_to_utf8(cur, * (U8 *) aptr); @@ -2921,11 +2923,12 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) endb = uvuni_to_utf8_flags(buffer, auv, ckWARN(WARN_UTF8) ? 0 : UNICODE_ALLOW_ANY); - if (cur >= end-(endb-buffer)*2) { + if (cur+(endb-buffer)*UTF8_EXPAND >= end) { *cur = '\0'; SvCUR(cat) = cur - start; - GROWING(0, cat, start, cur, len+(endb-buffer)*2); - end = start+SvLEN(cat)-UTF8_MAXLEN; + GROWING(0, cat, start, cur, + len+(endb-buffer)*UTF8_EXPAND); + end = start+SvLEN(cat); } bytes_to_uni(aTHX_ buffer, endb-buffer, &cur); } else {