Upgrade to Encode 2.00.
[p5sagit/p5-mst-13.2.git] / ext / Encode / Unicode / Unicode.xs
index 4e21de9..6dadbdc 100644 (file)
@@ -1,11 +1,13 @@
 /*
- $Id: Unicode.xs,v 1.2 2002/04/19 05:36:43 dankogai Exp $
+ $Id: Unicode.xs,v 2.0 2004/05/16 20:55:16 dankogai Exp $
  */
 
 #define PERL_NO_GET_CONTEXT
 #include "EXTERN.h"
 #include "perl.h"
 #include "XSUB.h"
+#define U8 U8
+#include "../Encode/encode.h"
 
 #define FBCHAR                 0xFFFd
 #define BOM_BE                 0xFeFF
@@ -52,14 +54,14 @@ enc_unpack(pTHX_ U8 **sp,U8 *e,STRLEN size,U8 endian)
 void
 enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value)
 {
-    U8 *d = (U8 *)SvGROW(result,SvCUR(result)+size);
+    U8 *d = (U8 *)SvGROW(result,SvCUR(result)+size+1);
     switch(endian) {
     case 'v':
     case 'V':
        d += SvCUR(result);
        SvCUR_set(result,SvCUR(result)+size);
        while (size--) {
-           *d++ = value & 0xFF;
+           *d++ = (U8)(value & 0xFF);
            value >>= 8;
        }
        break;
@@ -68,7 +70,7 @@ enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value)
        SvCUR_set(result,SvCUR(result)+size);
        d += SvCUR(result);
        while (size--) {
-           *--d = value & 0xFF;
+           *--d = (U8)(value & 0xFF);
            value >>= 8;
        }
        break;
@@ -80,17 +82,23 @@ enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value)
 
 MODULE = Encode::Unicode PACKAGE = Encode::Unicode
 
+PROTOTYPES: DISABLE
+
+#define attr(k, l)  (hv_exists((HV *)SvRV(obj),k,l) ? \
+    *hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef)
+
 void
-decode_xs(obj, str, chk = &PL_sv_undef)
+decode_xs(obj, str, check = 0)
 SV *   obj
 SV *   str
-SV *   chk
+IV     check
 CODE:
 {
-    int size    = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
-    U8 endian   = *((U8 *)SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"endian",6,0)));
-    int ucs2    = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0));
-    SV *result = newSVpvn("",0);
+    U8 endian   = *((U8 *)SvPV_nolen(attr("endian", 6)));
+    int size    =   SvIV(attr("size",   4));
+    int ucs2    = SvTRUE(attr("ucs2",   4));
+    int clone   = SvTRUE(attr("clone",  5));
+    SV *result  = newSVpvn("",0);
     STRLEN ulen;
     U8 *s = (U8 *)SvPVbyte(str,ulen);
     U8 *e = (U8 *)SvEND(str);
@@ -109,14 +117,16 @@ CODE:
                endian = 'V';
            }
            else {
-               croak("%s:Unregognised BOM %"UVxf,
-                      SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),
+               croak("%"SVf":Unrecognised BOM %"UVxf,
+                      *hv_fetch((HV *)SvRV(obj),"Name",4,0),
                      bom);
            }
        }
-#if 0
-       /* Update endian for this sequence */
-       hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+#if 1
+       /* Update endian for next sequence */
+       if (clone) {
+           hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+       }
 #endif
     }
     while (s < e && s+size <= e) {
@@ -124,22 +134,22 @@ CODE:
        U8 *d;
        if (size != 4 && invalid_ucs2(ord)) {
            if (ucs2) {
-               if (SvTRUE(chk)) {
-                   croak("%s:no surrogates allowed %"UVxf,
-                         SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),
+               if (check) {
+                   croak("%"SVf":no surrogates allowed %"UVxf,
+                         *hv_fetch((HV *)SvRV(obj),"Name",4,0),
                          ord);
                }
                if (s+size <= e) {
                     /* skip the next one as well */
-                   enc_unpack(aTHX_ &s,e,size,endian); 
+                   enc_unpack(aTHX_ &s,e,size,endian);
                }
                ord = FBCHAR;
            }
            else {
                UV lo;
                if (!isHiSurrogate(ord)) {
-                   croak("%s:Malformed HI surrogate %"UVxf,
-                         SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),
+                   croak("%"SVf":Malformed HI surrogate %"UVxf,
+                         *hv_fetch((HV *)SvRV(obj),"Name",4,0),
                          ord);
                }
                if (s+size > e) {
@@ -149,8 +159,8 @@ CODE:
                }
                lo = enc_unpack(aTHX_ &s,e,size,endian);
                if (!isLoSurrogate(lo)){
-                   croak("%s:Malformed LO surrogate %"UVxf,
-                         SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),
+                   croak("%"SVf":Malformed LO surrogate %"UVxf,
+                         *hv_fetch((HV *)SvRV(obj),"Name",4,0),
                          ord);
                }
                ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
@@ -160,10 +170,15 @@ CODE:
        d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
        SvCUR_set(result,d - (U8 *)SvPVX(result));
     }
-    if (SvTRUE(chk)) {
+    if (s < e) {
+       /* unlikely to happen because it's fixed-length -- dankogai */
+       if (check & ENCODE_WARN_ON_ERR){
+           Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
+                       *hv_fetch((HV *)SvRV(obj),"Name",4,0));
+       }
+    }
+    if (check && !(check & ENCODE_LEAVE_SRC)){
        if (s < e) {
-           Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
-                       SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
            Move(s,SvPVX(str),e-s,U8);
            SvCUR_set(str,(e-s));
        }
@@ -176,16 +191,17 @@ CODE:
 }
 
 void
-encode_xs(obj, utf8, chk = &PL_sv_undef)
-    SV *       obj
+encode_xs(obj, utf8, check = 0)
+SV *   obj
 SV *   utf8
-SV *   chk
+IV     check
 CODE:
 {
-    int size   = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
-    U8 endian = *((U8 *)SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"endian",6,0)));
-    int ucs2   = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0));
-    SV *result = newSVpvn("",0);
+    U8 endian   = *((U8 *)SvPV_nolen(attr("endian", 6)));
+    int size    =   SvIV(attr("size",   4));
+    int ucs2    = SvTRUE(attr("ucs2",   4));
+    int clone   = SvTRUE(attr("clone",  5));
+    SV *result  = newSVpvn("",0);
     STRLEN ulen;
     U8 *s = (U8 *)SvPVutf8(utf8,ulen);
     U8 *e = (U8 *)SvEND(utf8);
@@ -193,9 +209,11 @@ CODE:
     if (!endian) {
        endian = (size == 4) ? 'N' : 'n';
        enc_pack(aTHX_ result,size,endian,BOM_BE);
-#if 0
-       /* Update endian for this sequence */
-       hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+#if 1
+       /* Update endian for next sequence */
+       if (clone){
+           hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+       }
 #endif
     }
     while (s < e && s+UTF8SKIP(s) <= e) {
@@ -205,11 +223,9 @@ CODE:
        if (size != 4 && invalid_ucs2(ord)) {
            if (!issurrogate(ord)){
                if (ucs2) {
-                   if (SvTRUE(chk)) {
-                       croak("%s:code point \"\\x{"UVxf"}\" too high",
-                             SvPV_nolen(
-                                 *hv_fetch((HV *)SvRV(obj),"Name",4,0))
-                             ,ord);
+                   if (check) {
+                       croak("%"SVf":code point \"\\x{%"UVxf"}\" too high",
+                                 *hv_fetch((HV *)SvRV(obj),"Name",4,0),ord);
                    }
                    enc_pack(aTHX_ result,size,endian,FBCHAR);
                }else{
@@ -228,10 +244,20 @@ CODE:
            enc_pack(aTHX_ result,size,endian,ord);
        }
     }
-    if (SvTRUE(chk)) {
+    if (s < e) {
+       /* UTF-8 partial char happens often on PerlIO.
+          Since this is okay and normal, we do not warn.
+          But this is critical when you choose to LEAVE_SRC
+          in which case we die */
+       if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)){
+           Perl_croak(aTHX_ "%"SVf":partial character is not allowed "
+                      "when CHECK = 0x%" UVuf,
+                      *hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
+       }
+       
+    }
+    if (check && !(check & ENCODE_LEAVE_SRC)){
        if (s < e) {
-           Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
-                       SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
            Move(s,SvPVX(utf8),e-s,U8);
            SvCUR_set(utf8,(e-s));
        }
@@ -239,7 +265,7 @@ CODE:
            SvCUR_set(utf8,0);
        }
        *SvEND(utf8) = '\0';
-    }
+    } 
     XSRETURN(1);
 }