Integrate //depot/perlio into mainline
Nick Ing-Simmons [Mon, 22 Apr 2002 09:01:43 +0000 (09:01 +0000)]
p4raw-id: //depot/perl@16066

ext/Encode/Encode.pm
ext/Encode/Encode.xs
ext/Encode/t/perlio.t
ext/PerlIO/encoding/encoding.pm
ext/PerlIO/encoding/encoding.xs

index 0bf6a24..fb80200 100644 (file)
@@ -253,14 +253,18 @@ sub predefine_encodings{
            $_[1] = '' if $chk;
            return $octets;
        };
-       $Encode::Encoding{utf8} = 
+       $Encode::Encoding{utf8} =
            bless {Name => "utf8"} => "Encode::utf8";
     }
 }
 
 require Encode::Encoding;
+@Encode::XS::ISA = qw(Encode::Encoding);
 
-eval { 
+# This is very dodgy - PerlIO::encoding does "use Encode" and  _BEFORE_ it gets a
+# chance to set its VERSION we potentially delete it from %INC so it will be re-loaded
+# NI-S
+eval {
     require PerlIO::encoding;
     unless (PerlIO::encoding->VERSION >= 0.02){
        delete $INC{"PerlIO/encoding.pm"};
index c208af0..b898780 100644 (file)
 
 /* set 1 or more to profile.  t/encoding.t dumps core because of
    Perl_warner and PerlIO don't work well */
-#define ENCODE_XS_PROFILE 0 
+#define ENCODE_XS_PROFILE 0
 
 /* set 0 to disable floating point to calculate buffer size for
    encode_method().  1 is recommended. 2 restores NI-S original */
-#define ENCODE_XS_USEFP   1 
+#define ENCODE_XS_USEFP   1
 
 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX;   \
                          Perl_croak(aTHX_ "panic_unimplemented"); \
@@ -119,40 +119,40 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
        }
        case ENCODE_NOREP:
            /* encoding */      
-           if (dir == enc->f_utf8) { 
+           if (dir == enc->f_utf8) {
                STRLEN clen;
                UV ch =
-                   utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), 
+                   utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
                                   &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
                if (check & ENCODE_DIE_ON_ERR) {
                    Perl_croak(
-                       aTHX_ "\"\\N{U+%" UVxf "}\" does not map to %s, %d", 
+                       aTHX_ "\"\\N{U+%" UVxf "}\" does not map to %s, %d",
                        ch, enc->name[0], __LINE__);
                }else{
                    if (check & ENCODE_RETURN_ON_ERR){
                        if (check & ENCODE_WARN_ON_ERR){
                            Perl_warner(
                                aTHX_ packWARN(WARN_UTF8),
-                               "\"\\N{U+%" UVxf "}\" does not map to %s", 
+                               "\"\\N{U+%" UVxf "}\" does not map to %s",
                                ch,enc->name[0]);
                        }
                                goto ENCODE_SET_SRC;
                    }else if (check & ENCODE_PERLQQ){
-                       SV* perlqq = 
+                       SV* perlqq =
                            sv_2mortal(newSVpvf("\\x{%04x}", ch));
                        sdone += slen + clen;
                        ddone += dlen + SvCUR(perlqq);
                        sv_catsv(dst, perlqq);
-                   } else { 
+                   } else {
                        /* fallback char */
                        sdone += slen + clen;
-                       ddone += dlen + enc->replen; 
-                       sv_catpvn(dst, (char*)enc->rep, enc->replen); 
+                       ddone += dlen + enc->replen;
+                       sv_catpvn(dst, (char*)enc->rep, enc->replen);
                    }                   
-               } 
+               }
            }
            /* decoding */
-           else {           
+           else {
                if (check & ENCODE_DIE_ON_ERR){
                    Perl_croak(
                        aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
@@ -167,22 +167,22 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
                        }
                        goto ENCODE_SET_SRC;
                    }else if (check & ENCODE_PERLQQ){
-                       SV* perlqq = 
+                       SV* perlqq =
                            sv_2mortal(newSVpvf("\\x%02X", s[slen]));
                        sdone += slen + 1;
                        ddone += dlen + SvCUR(perlqq);
                        sv_catsv(dst, perlqq);
                    } else {
                        sdone += slen + 1;
-                       ddone += dlen + strlen(FBCHAR_UTF8); 
-                       sv_catpv(dst, FBCHAR_UTF8); 
+                       ddone += dlen + strlen(FBCHAR_UTF8);
+                       sv_catpv(dst, FBCHAR_UTF8);
                    }
                }
            }
            /* settle variables when fallback */
            d    = (U8 *)SvEND(dst);
-            dlen = SvLEN(dst) - ddone - 1;
-           s    = (U8*)SvPVX(src) + sdone; 
+           dlen = SvLEN(dst) - ddone - 1;
+           s    = (U8*)SvPVX(src) + sdone;
            slen = tlen - sdone;
            break;
 
@@ -205,10 +205,10 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
     if (code && !(check & ENCODE_RETURN_ON_ERR)) {
        return &PL_sv_undef;
     }
-    
+
     SvCUR_set(dst, dlen+ddone);
     SvPOK_only(dst);
-    
+
 #if ENCODE_XS_PROFILE
     if (SvCUR(dst) > SvCUR(src)){
        Perl_warn(aTHX_
@@ -217,7 +217,7 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
                  (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
     }
 #endif
-    
+
  ENCODE_END:
     *SvEND(dst) = '\0';
     return dst;
@@ -273,7 +273,7 @@ SV *    sv
 CODE:
 {
     SV * encoding = items == 2 ? ST(1) : Nullsv;
-    
+
     if (encoding)
     RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
     else {
@@ -310,7 +310,7 @@ CODE:
            /* Must do things the slow way */
            U8 *dest;
             /* We need a copy to pass to check() */
-           U8 *src  = (U8*)savepv((char *)s); 
+           U8 *src  = (U8*)savepv((char *)s);
            U8 *send = s + len;
 
            New(83, dest, len, U8); /* I think */
@@ -335,8 +335,8 @@ CODE:
                
                    /* Note change to utf8.c variable naming, for variety */
                    while (ulen--) {
-                       if ((*s & 0xc0) != 0x80){ 
-                           goto failure; 
+                       if ((*s & 0xc0) != 0x80){
+                           goto failure;
                        } else {
                            uv = (uv << 6) | (*s++ & 0x3f);
                        }
@@ -422,7 +422,7 @@ CODE:
 OUTPUT:
     RETVAL
 
-int 
+int
 WARN_ON_ERR()
 CODE:
     RETVAL = ENCODE_WARN_ON_ERR;
index 936eeb0..3381a12 100644 (file)
@@ -59,7 +59,7 @@ for my $e (qw/euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1/){
 
     # first create a file without perlio
     dump2file($sfile, &encode($e, $utext, 0));
-    
+
     # then create a file via perlio without autoflush
        
  SKIP:{
index 9aa0e9a..1d9c73f 100644 (file)
@@ -1,7 +1,8 @@
 package PerlIO::encoding;
-our $VERSION = '0.02';
+our $VERSION = '0.03';
 use XSLoader ();
-use Encode;
+use Encode (); # Load but do not import anything.
+our $check;
 XSLoader::load 'PerlIO::encoding';
 1;
 __END__
@@ -15,6 +16,9 @@ PerlIO::encoding - encoding layer
   open($f, "<:encoding(foo)", "infoo");
   open($f, ">:encoding(bar)", "outbar");
 
+  use Encode;
+  $PerlIO::encoding::check = Encode::FB_PERLQQ();
+
 =head1 DESCRIPTION
 
 Open a filehandle with a transparent encoding filter.
@@ -24,6 +28,10 @@ character set and encoding to Perl string data (Unicode and
 Perl's internal Unicode encoding, UTF-8).  On output, convert
 Perl string data into the specified character set and encoding.
 
+When the layer is pushed the current value of C<$PerlIO::encoding::check>
+is saved and used as the check argument when calling the Encodings
+encode and decode.
+
 =head1 SEE ALSO
 
 L<open>, L<Encode>, L<perlfunc/binmode>, L<perluniintro>
index 23de989..a864c8a 100644 (file)
@@ -45,11 +45,16 @@ typedef struct {
     SV *dataSV;                        /* data we have read from layer below */
     SV *enc;                   /* the encoding object */
     SV *chk;                    /* CHECK in Encode methods */
+    int flags;                 /* Flags currently just needs lines */
 } PerlIOEncode;
 
+#define NEEDS_LINES    1
 
-#define ENCODE_FB_QUIET "Encode::FB_QUIET"
-
+#if 0
+#define OUR_ENCODE_FB "Encode::FB_PERLQQ"
+#else
+#define OUR_ENCODE_FB "Encode::FB_QUIET"
+#endif
 
 SV *
 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
@@ -78,21 +83,12 @@ PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
     dSP;
     IV code;
+    SV *result = Nullsv;
     code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
     ENTER;
     SAVETMPS;
 
     PUSHMARK(sp);
-    PUTBACK;
-    if (call_pv(ENCODE_FB_QUIET, G_SCALAR|G_NOARGS) != 1) {
-       Perl_die(aTHX_ "Call to Encode::FB_QUIET failed!");
-       code = -1;
-    }
-    SPAGAIN;
-    e->chk = newSVsv(POPs);
-    PUTBACK;
-
-    PUSHMARK(sp);
     XPUSHs(arg);
     PUTBACK;
     if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
@@ -101,20 +97,52 @@ PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
        return -1;
     }
     SPAGAIN;
-    e->enc = POPs;
+    result = POPs;
     PUTBACK;
 
-    if (!SvROK(e->enc)) {
+    if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
        e->enc = Nullsv;
-       errno = EINVAL;
        Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
-                   arg); 
+                   arg);
+       errno = EINVAL;
        code = -1;
     }
     else {
-       SvREFCNT_inc(e->enc);
+#ifdef USE_NEW_SEQUENCE
+       PUSHMARK(sp);
+       XPUSHs(result);
+       PUTBACK;
+       if (call_method("new_sequence",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
+           Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support new_sequence",
+                       arg);
+       }
+       else {
+           SPAGAIN;
+           result = POPs;
+           PUTBACK;
+       }
+#endif
+       e->enc = newSVsv(result);
+       PUSHMARK(sp);
+       XPUSHs(e->enc);
+       PUTBACK;
+       if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
+           Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
+                       arg);
+       }
+       else {
+           SPAGAIN;
+           result = POPs;
+           PUTBACK;
+           if (SvTRUE(result)) {
+               e->flags |= NEEDS_LINES;
+           }
+       }
        PerlIOBase(f)->flags |= PERLIO_F_UTF8;
     }
+
+    e->chk = newSVsv(get_sv("PerlIO::encoding::check",0));
+
     FREETMPS;
     LEAVE;
     return code;
@@ -136,6 +164,10 @@ PerlIOEncode_popped(pTHX_ PerlIO * f)
        SvREFCNT_dec(e->dataSV);
        e->dataSV = Nullsv;
     }
+    if (e->chk) {
+       SvREFCNT_dec(e->chk);
+       e->dataSV = Nullsv;
+    }
     return 0;
 }
 
@@ -210,9 +242,9 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
                avail = 0;
        }
     }
-    if (avail > 0) {
+    if (avail > 0 || (e->flags & NEEDS_LINES)) {
        STDCHAR *ptr = PerlIO_get_ptr(n);
-       SSize_t use  = avail;
+       SSize_t use  = (avail >= 0) ? avail : 0;
        SV *uni;
        char *s;
        STRLEN len = 0;
@@ -223,12 +255,45 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
        if (SvTYPE(e->dataSV) < SVt_PV) {
            sv_upgrade(e->dataSV,SVt_PV);
        }
+       if (e->flags & NEEDS_LINES) {
+           /* Encoding needs whole lines (e.g. iso-2022-*)
+              search back from end of available data for
+              and line marker
+            */
+           STDCHAR *nl = ptr+use-1;
+           while (nl >= ptr) {
+               if (*nl == '\n') {
+                   break;
+               }
+               nl--;
+           }
+           if (nl >= ptr && *nl == '\n') {
+               /* found a line - take up to and including that */
+               use = (nl+1)-ptr;
+           }
+           else if (avail > 0) {
+               /* No line, but not EOF - append avail to the pending data */
+               sv_catpvn(e->dataSV, ptr, use);
+               PerlIO_set_ptrcnt(n, ptr+use, 0);
+               goto retry;
+           }
+           else if (!SvCUR(e->dataSV)) {
+               goto end_of_file;
+           }
+       }
        if (SvCUR(e->dataSV)) {
            /* something left over from last time - create a normal
               SV with new data appended
             */
            if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
-              use = e->base.bufsiz - SvCUR(e->dataSV);
+               if (e->flags & NEEDS_LINES) {
+                   /* Have to grow buffer */
+                   e->base.bufsiz = use + SvCUR(e->dataSV);
+                   PerlIOEncode_get_base(aTHX_ f);
+               }
+               else {
+                   use = e->base.bufsiz - SvCUR(e->dataSV);
+               }
            }
            sv_catpvn(e->dataSV,(char*)ptr,use);
        }
@@ -238,7 +303,14 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
                Safefree(SvPVX(e->dataSV));
            }
            if (use > (SSize_t)e->base.bufsiz) {
-              use = e->base.bufsiz;
+               if (e->flags & NEEDS_LINES) {
+                   /* Have to grow buffer */
+                   e->base.bufsiz = use;
+                   PerlIOEncode_get_base(aTHX_ f);
+               }
+               else {
+                   use = e->base.bufsiz;
+               }
            }
            SvPVX(e->dataSV) = (char *) ptr;
            SvLEN(e->dataSV) = 0;  /* Hands off sv.c - it isn't yours */
@@ -300,6 +372,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
        return code;
     }
     else {
+    end_of_file:
        if (avail == 0)
            PerlIOBase(f)->flags |= PERLIO_F_EOF;
        else
@@ -449,6 +522,38 @@ PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
     return f;
 }
 
+SSize_t
+PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
+{
+    PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+    if (e->flags & NEEDS_LINES) {
+       SSize_t done = 0;
+       const char *ptr = (const char *) vbuf;
+       const char *end = ptr+count;
+       while (ptr < end) {
+           const char *nl = ptr;
+           while (nl < end && *nl++ != '\n') /* empty body */;
+           done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
+           if (done != nl-ptr) {
+               if (done > 0) {
+                   ptr += done;
+               }
+               break;
+           }
+           ptr += done;
+           if (ptr[-1] == '\n') {
+               if (PerlIOEncode_flush(aTHX_ f) != 0) {
+                   break;
+               }
+           }
+       }
+       return (SSize_t) (ptr - (const char *) vbuf);
+    }
+    else {
+       return PerlIOBuf_write(aTHX_ f, vbuf, count);
+    }
+}
+
 PerlIO_funcs PerlIO_encode = {
     "encoding",
     sizeof(PerlIOEncode),
@@ -461,7 +566,7 @@ PerlIO_funcs PerlIO_encode = {
     PerlIOEncode_dup,
     PerlIOBuf_read,
     PerlIOBuf_unread,
-    PerlIOBuf_write,
+    PerlIOEncode_write,
     PerlIOBuf_seek,
     PerlIOEncode_tell,
     PerlIOEncode_close,
@@ -485,6 +590,19 @@ PROTOTYPES: ENABLE
 
 BOOT:
 {
+    SV *sv = get_sv("PerlIO::encoding::check", GV_ADD|GV_ADDMULTI);
+    sv_setiv(sv,0);
+    PUSHMARK(sp);
+    PUTBACK;
+    if (call_pv(OUR_ENCODE_FB, G_SCALAR) != 1) {
+       Perl_warner(aTHX_ packWARN(WARN_IO),
+                   "Call to %s failed!",OUR_ENCODE_FB);
+    }
+    else {
+       SPAGAIN;
+       sv_setsv(sv,POPs);
+       PUTBACK;
+    }
 #ifdef PERLIO_LAYERS
  PerlIO_define_layer(aTHX_ &PerlIO_encode);
 #endif