Retract #15745 for now; won't work without more Encode fixes.
[p5sagit/p5-mst-13.2.git] / ext / Encode / Encode.xs
index 52fee5a..741b679 100644 (file)
@@ -1,14 +1,18 @@
 #define PERL_NO_GET_CONTEXT
-
 #include "EXTERN.h"
 #include "perl.h"
 #include "XSUB.h"
 #define U8 U8
 #include "encode.h"
-#include "8859.h"
-#include "EBCDIC.h"
-#include "Symbols.h"
+#include "def_t.h"
+
+#define ENCODE_XS_PROFILE 0 /* set 1 to profile.
+                              t/encoding.t dumps core because of
+                              Perl_warner and PerlIO don't work well */
 
+#define ENCODE_XS_USEFP   1 /* set 0 to disable floating point to calculate
+                              buffer size for encode_method().
+                              1 is recommended. 2 restores NI-S original  */
 
 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX;   \
                          Perl_croak(aTHX_ "panic_unimplemented"); \
@@ -91,7 +95,7 @@ PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
     if (!SvROK(e->enc)) {
        e->enc = Nullsv;
        errno = EINVAL;
-       Perl_warner(aTHX_ WARN_IO, "Cannot find encoding \"%" SVf "\"",
+       Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
                    arg);
        code = -1;
     }
@@ -118,7 +122,7 @@ PerlIOEncode_popped(pTHX_ PerlIO * f)
     }
     if (e->dataSV) {
        SvREFCNT_dec(e->dataSV);
-       e->bufsv = Nullsv;
+       e->dataSV = Nullsv;
     }
     return 0;
 }
@@ -227,7 +231,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
            SvPVX(e->dataSV) = (char *) ptr;
            SvLEN(e->dataSV) = 0;  /* Hands off sv.c - it isn't yours */
            SvCUR_set(e->dataSV,use);
-           SvPOK_on(e->dataSV);
+           SvPOK_only(e->dataSV);
        }
        SvUTF8_off(e->dataSV);
        PUSHMARK(sp);
@@ -255,7 +259,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
               (The copy is a pain - need a put-it-here option for decode.)
             */
            sv_setpvn(e->bufsv,s,len);
-           e->base.ptr = e->base.buf = (U8*)SvPVX(e->bufsv);
+           e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
            e->base.end = e->base.ptr + SvCUR(e->bufsv);
            PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
            SvUTF8_on(e->bufsv);
@@ -328,6 +332,11 @@ PerlIOEncode_flush(pTHX_ PerlIO * f)
            if (PerlIO_flush(PerlIONext(f)) != 0) {
                code = -1;
            }
+           if (SvCUR(e->bufsv)) {
+               /* Did not all translate */
+               e->base.ptr = e->base.buf+SvCUR(e->bufsv);
+               return code;
+           }
        }
        else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
            /* read case */
@@ -351,6 +360,7 @@ PerlIOEncode_flush(pTHX_ PerlIO * f)
                SvPVX(str) = (char*)e->base.ptr;
                SvLEN(str) = 0;
                SvCUR_set(str, e->base.end - e->base.ptr);
+               SvPOK_only(str);
                SvUTF8_on(str);
                PUSHMARK(sp);
                XPUSHs(e->enc);
@@ -384,6 +394,9 @@ PerlIOEncode_close(pTHX_ PerlIO * f)
     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
     IV code = PerlIOBase_close(aTHX_ f);
     if (e->bufsv) {
+       if (e->base.buf && e->base.ptr > e->base.buf) {
+           Perl_croak(aTHX_ "Close with partial character");
+       }
        SvREFCNT_dec(e->bufsv);
        e->bufsv = Nullsv;
     }
@@ -402,6 +415,9 @@ PerlIOEncode_tell(pTHX_ PerlIO * f)
        the UTF8 we have in bufefr and then ask layer below
      */
     PerlIO_flush(f);
+    if (b->buf && b->ptr > b->buf) {
+       Perl_croak(aTHX_ "Cannot tell at partial character");
+    }
     return PerlIO_tell(PerlIONext(f));
 }
 
@@ -422,7 +438,7 @@ PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
 PerlIO_funcs PerlIO_encode = {
     "encoding",
     sizeof(PerlIOEncode),
-    PERLIO_K_BUFFERED,
+    PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
     PerlIOEncode_pushed,
     PerlIOEncode_popped,
     PerlIOBuf_open,
@@ -470,6 +486,7 @@ Encode_XSEncoding(pTHX_ encode_t * enc)
 void
 call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
 {
+ /* Exists for breakpointing */
 }
 
 static SV *
@@ -481,17 +498,20 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
     STRLEN tlen  = slen;
     STRLEN ddone = 0;
     STRLEN sdone = 0;
-    SV *dst = sv_2mortal(newSV(slen+1));
+
+    /* We allocate slen+1.  
+        PerlIO dumps core if this value is smaller than this. */
+    SV *dst = sv_2mortal(newSV(slen+1)); 
     if (slen) {
        U8 *d = (U8 *) SvPVX(dst);
        STRLEN dlen = SvLEN(dst)-1;
        int code;
        while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
            SvCUR_set(dst, dlen+ddone);
-           SvPOK_on(dst);
+           SvPOK_only(dst);
 
-#if 0
-           Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1);
+#if ENCODE_XS_PROFILE >= 3
+           Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d\n",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1);
 #endif
        
            if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL)
@@ -499,18 +519,30 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
 
            switch (code) {
            case ENCODE_NOSPACE:
-               {
-                   STRLEN need ;
+           {   
+                   STRLEN more, sleft;
                    sdone += slen;
                    ddone += dlen;
-                   if (sdone) {
-                       need = (tlen*SvLEN(dst)+sdone-1)/sdone+UTF8_MAXLEN;
-                   }
-                   else {
-                       need = SvLEN(dst) + UTF8_MAXLEN;
+                   sleft = tlen - sdone;
+                   if (sdone) { /* has src ever been processed ? */
+#if   ENCODE_XS_USEFP == 2
+                           more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
+                                   - SvLEN(dst);
+#elif ENCODE_XS_USEFP
+                           more = (1.0*SvLEN(dst)+1)/sdone * sleft;
+#else
+                           /* safe until SvLEN(dst) == MAX_INT/16 */
+                           more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
+#endif
                    }
-               
-                   d = (U8 *) SvGROW(dst, need);
+                   more += UTF8_MAXLEN; /* insurance policy */
+#if ENCODE_XS_PROFILE >= 2
+                 Perl_warn(aTHX_ 
+                 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
+                           more, sdone, sleft, SvLEN(dst));
+#endif
+                   d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
+                   /* dst need to grow need MORE bytes! */
                    if (ddone >= SvLEN(dst)) {
                        Perl_croak(aTHX_ "Destination couldn't be grown.");
                    }
@@ -519,7 +551,7 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
                    s   += slen;
                    slen = tlen-sdone;
                    continue;
-               }
+           }
 
            case ENCODE_NOREP:
                if (dir == enc->f_utf8) {
@@ -528,7 +560,7 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
                        UV ch =
                            utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
                                           &clen, 0);
-                       Perl_warner(aTHX_ WARN_UTF8,
+                       Perl_warner(aTHX_ packWARN(WARN_UTF8),
                                    "\"\\N{U+%" UVxf
                                    "}\" does not map to %s", ch,
                                    enc->name[0]);
@@ -542,10 +574,12 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
                    }
                }
                else {
-                   /* UTF-8 is supposed to be "Universal" so should not happen */
-                   Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
-                              enc->name[0], (int) (SvCUR(src) - slen),
-                              s + slen);
+                   /* UTF-8 is supposed to be "Universal" so should not happen
+                      for real characters, but some encodings have non-assigned
+                      codes which may occur.
+                    */
+                   Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
+                              enc->name[0], (U8) s[slen], code);
                }
                break;
 
@@ -557,20 +591,37 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
            }
        }
        SvCUR_set(dst, dlen+ddone);
-       SvPOK_on(dst);
+       SvPOK_only(dst);
        if (check) {
            sdone = SvCUR(src) - (slen+sdone);
            if (sdone) {
+#if 1
+               /* FIXME: A Move() is dangerous - PV could be mmap'ed readonly
+                  SvOOK would be ideal - but sv_backoff does not understand SvLEN == 0
+                  type SVs and sv_clear() calls it ...
+                */
+                sv_setpvn(src, (char*)s+slen, sdone);
+#else
                Move(s + slen, SvPVX(src), sdone , U8);
+#endif
            }
            SvCUR_set(src, sdone);
-           *SvEND(src) = '\0';
        }
     }
     else {
        SvCUR_set(dst, 0);
-       SvPOK_on(dst);
+       SvPOK_only(dst);
     }
+#if ENCODE_XS_PROFILE
+    if (SvCUR(dst) > SvCUR(src)){
+           Perl_warn(aTHX_ 
+                     "SvLEN(dst)=%d, SvCUR(dst)=%d. "
+                     "%d bytes unused(%f %%)\n",
+                     SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), 
+                     (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
+           
+    }
+#endif      
     *SvEND(dst) = '\0';
     return dst;
 }
@@ -580,6 +631,16 @@ MODULE = Encode            PACKAGE = Encode::XS    PREFIX = Method_
 PROTOTYPES: ENABLE
 
 void
+Method_name(obj)
+SV *   obj
+CODE:
+ {
+  encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+  ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
+  XSRETURN(1);
+ }
+
+void
 Method_decode(obj,src,check = FALSE)
 SV *   obj
 SV *   src
@@ -758,7 +819,5 @@ BOOT:
 #if defined(USE_PERLIO) && !defined(USE_SFIO)
  PerlIO_define_layer(aTHX_ &PerlIO_encode);
 #endif
-#include "8859_def.h"
-#include "EBCDIC_def.h"
-#include "Symbols_def.h"
+#include "def_t.exh"
 }