1 #define PERL_NO_GET_CONTEXT
11 #define BOM16LE 0xFFFe
12 #define BOM32LE 0xFFFe0000
14 #define valid_ucs2(x) ((0 <= (x) && (x) < 0xD800) || (0xDFFF < (x) && (x) <= 0xFFFF))
16 #define issurrogate(x) (0xD800 <= (x) && (x) <= 0xDFFF )
17 #define isHiSurrogate(x) (0xD800 <= (x) && (x) < 0xDC00 )
18 #define isLoSurrogate(x) (0xDC00 <= (x) && (x) <= 0xDFFF )
21 enc_unpack(pTHX_ U8 **sp,U8 *e,STRLEN size,U8 endian)
26 croak("Partial character %c",(char) endian);
46 croak("Unknown endian %c",(char) endian);
54 enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value)
56 U8 *d = SvGROW(result,SvCUR(result)+size);
61 SvCUR_set(result,SvCUR(result)+size);
69 SvCUR_set(result,SvCUR(result)+size);
77 croak("Unknown endian %c",(char) endian);
82 #define ENCODE_XS_PROFILE 0 /* set 1 or more to profile.
83 t/encoding.t dumps core because of
84 Perl_warner and PerlIO don't work well */
86 #define ENCODE_XS_USEFP 1 /* set 0 to disable floating point to calculate
87 buffer size for encode_method().
88 1 is recommended. 2 restores NI-S original */
90 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
91 Perl_croak(aTHX_ "panic_unimplemented"); \
92 return (y)0; /* fool picky compilers */ \
94 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
95 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
96 #if defined(USE_PERLIO) && !defined(USE_SFIO)
97 /* Define an encoding "layer" in the perliol.h sense.
98 The layer defined here "inherits" in an object-oriented sense from the
99 "perlio" layer with its PerlIOBuf_* "methods".
100 The implementation is particularly efficient as until Encode settles down
101 there is no point in tryint to tune it.
103 The layer works by overloading the "fill" and "flush" methods.
105 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
106 to convert the encoded data to UTF-8 form, then copies it back to the
107 buffer. The "base class's" read methods then see the UTF-8 data.
109 "flush" transforms the UTF-8 data deposited by the "base class's write
110 method in the buffer back into the encoded form using the encode OO perl API,
111 then copies data back into the buffer and calls "SUPER::flush.
113 Note that "flush" is _also_ called for read mode - we still do the (back)-translate
114 so that the the base class's "flush" sees the correct number of encoded chars
115 for positioning the seek pointer. (This double translation is the worst performance
116 issue - particularly with all-perl encode engine.)
121 PerlIOBuf base; /* PerlIOBuf stuff */
122 SV *bufsv; /* buffer seen by layers above */
123 SV *dataSV; /* data we have read from layer below */
124 SV *enc; /* the encoding object */
128 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
130 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
131 SV *sv = &PL_sv_undef;
139 if (perl_call_method("name", G_SCALAR) == 1) {
149 PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
151 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
154 code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
160 if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
161 /* should never happen */
162 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
168 if (!SvROK(e->enc)) {
171 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
176 SvREFCNT_inc(e->enc);
177 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
185 PerlIOEncode_popped(pTHX_ PerlIO * f)
187 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
189 SvREFCNT_dec(e->enc);
193 SvREFCNT_dec(e->bufsv);
197 SvREFCNT_dec(e->dataSV);
204 PerlIOEncode_get_base(pTHX_ PerlIO * f)
206 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
208 e->base.bufsiz = 1024;
210 e->bufsv = newSV(e->base.bufsiz);
211 sv_setpvn(e->bufsv, "", 0);
213 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
215 e->base.ptr = e->base.buf;
217 e->base.end = e->base.buf;
218 if (e->base.ptr < e->base.buf
219 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
220 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
221 e->base.buf + SvLEN(e->bufsv));
224 if (SvLEN(e->bufsv) < e->base.bufsiz) {
225 SSize_t poff = e->base.ptr - e->base.buf;
226 SSize_t eoff = e->base.end - e->base.buf;
227 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
228 e->base.ptr = e->base.buf + poff;
229 e->base.end = e->base.buf + eoff;
231 if (e->base.ptr < e->base.buf
232 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
233 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
234 e->base.buf + SvLEN(e->bufsv));
241 PerlIOEncode_fill(pTHX_ PerlIO * f)
243 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
248 if (PerlIO_flush(f) != 0)
251 if (!PerlIO_fast_gets(n)) {
252 /* Things get too messy if we don't have a buffer layer
253 push a :perlio to do the job */
255 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
257 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
263 avail = PerlIO_get_cnt(n);
265 avail = PerlIO_fill(n);
267 avail = PerlIO_get_cnt(n);
270 if (!PerlIO_error(n) && PerlIO_eof(n))
275 STDCHAR *ptr = PerlIO_get_ptr(n);
280 e->base.ptr = e->base.end = (STDCHAR *) Nullch;
281 (void) PerlIOEncode_get_base(aTHX_ f);
283 e->dataSV = newSV(0);
284 if (SvTYPE(e->dataSV) < SVt_PV) {
285 sv_upgrade(e->dataSV,SVt_PV);
287 if (SvCUR(e->dataSV)) {
288 /* something left over from last time - create a normal
289 SV with new data appended
291 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
292 use = e->base.bufsiz - SvCUR(e->dataSV);
294 sv_catpvn(e->dataSV,(char*)ptr,use);
297 /* Create a "dummy" SV to represent the available data from layer below */
298 if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
299 Safefree(SvPVX(e->dataSV));
301 if (use > e->base.bufsiz) {
302 use = e->base.bufsiz;
304 SvPVX(e->dataSV) = (char *) ptr;
305 SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
306 SvCUR_set(e->dataSV,use);
307 SvPOK_only(e->dataSV);
309 SvUTF8_off(e->dataSV);
315 if (perl_call_method("decode", G_SCALAR) != 1) {
316 Perl_die(aTHX_ "panic: decode did not return a value");
321 /* Now get translated string (forced to UTF-8) and use as buffer */
323 s = SvPVutf8(uni, len);
324 #ifdef PARANOID_ENCODE_CHECKS
325 if (len && !is_utf8_string((U8*)s,len)) {
326 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
332 /* if decode gave us back dataSV then data may vanish when
333 we do ptrcnt adjust - so take our copy now.
334 (The copy is a pain - need a put-it-here option for decode.)
336 sv_setpvn(e->bufsv,s,len);
337 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
338 e->base.end = e->base.ptr + SvCUR(e->bufsv);
339 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
342 /* Adjust ptr/cnt not taking anything which
343 did not translate - not clear this is a win */
344 /* compute amount we took */
345 use -= SvCUR(e->dataSV);
346 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
347 /* and as we did not take it it isn't pending */
348 SvCUR_set(e->dataSV,0);
350 /* Got nothing - assume partial character so we need some more */
351 /* Make sure e->dataSV is a normal SV before re-filling as
352 buffer alias will change under us
354 s = SvPV(e->dataSV,len);
355 sv_setpvn(e->dataSV,s,len);
356 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
365 PerlIOBase(f)->flags |= PERLIO_F_EOF;
367 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
373 PerlIOEncode_flush(pTHX_ PerlIO * f)
375 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
377 if (e->bufsv && (e->base.ptr > e->base.buf)) {
383 if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
384 /* Write case encode the buffer and write() to layer below */
389 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
394 if (perl_call_method("encode", G_SCALAR) != 1) {
395 Perl_die(aTHX_ "panic: encode did not return a value");
401 count = PerlIO_write(PerlIONext(f),s,len);
407 if (PerlIO_flush(PerlIONext(f)) != 0) {
410 if (SvCUR(e->bufsv)) {
411 /* Did not all translate */
412 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
416 else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
418 /* if we have any untranslated stuff then unread that first */
419 if (e->dataSV && SvCUR(e->dataSV)) {
420 s = SvPV(e->dataSV, len);
421 count = PerlIO_unread(PerlIONext(f),s,len);
426 /* See if there is anything left in the buffer */
427 if (e->base.ptr < e->base.end) {
428 /* Bother - have unread data.
429 re-encode and unread() to layer below
433 str = sv_newmortal();
434 sv_upgrade(str, SVt_PV);
435 SvPVX(str) = (char*)e->base.ptr;
437 SvCUR_set(str, e->base.end - e->base.ptr);
445 if (perl_call_method("encode", G_SCALAR) != 1) {
446 Perl_die(aTHX_ "panic: encode did not return a value");
452 count = PerlIO_unread(PerlIONext(f),s,len);
460 e->base.ptr = e->base.end = e->base.buf;
461 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
467 PerlIOEncode_close(pTHX_ PerlIO * f)
469 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
470 IV code = PerlIOBase_close(aTHX_ f);
472 if (e->base.buf && e->base.ptr > e->base.buf) {
473 Perl_croak(aTHX_ "Close with partial character");
475 SvREFCNT_dec(e->bufsv);
481 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
486 PerlIOEncode_tell(pTHX_ PerlIO * f)
488 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
489 /* Unfortunately the only way to get a postion is to (re-)translate,
490 the UTF8 we have in bufefr and then ask layer below
493 if (b->buf && b->ptr > b->buf) {
494 Perl_croak(aTHX_ "Cannot tell at partial character");
496 return PerlIO_tell(PerlIONext(f));
500 PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
501 CLONE_PARAMS * params, int flags)
503 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
504 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
505 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
507 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
513 PerlIO_funcs PerlIO_encode = {
515 sizeof(PerlIOEncode),
516 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
534 PerlIOBase_setlinebuf,
535 PerlIOEncode_get_base,
539 PerlIOBuf_set_ptrcnt,
541 #endif /* encode layer */
544 Encode_XSEncoding(pTHX_ encode_t * enc)
547 HV *stash = gv_stashpv("Encode::XS", TRUE);
548 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
552 while (enc->name[i]) {
553 const char *name = enc->name[i++];
554 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
557 call_pv("Encode::define_encoding", G_DISCARD);
562 call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
564 /* Exists for breakpointing */
568 encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
572 U8 *s = (U8 *) SvPV(src, slen);
577 /* We allocate slen+1.
578 PerlIO dumps core if this value is smaller than this. */
579 SV *dst = sv_2mortal(newSV(slen+1));
581 U8 *d = (U8 *) SvPVX(dst);
582 STRLEN dlen = SvLEN(dst)-1;
584 while ((code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))) {
585 SvCUR_set(dst, dlen+ddone);
588 #if ENCODE_XS_PROFILE >= 3
589 Perl_warn(aTHX_ "code=%d @ s=%d/%d/%d d=%d/%d/%d\n",code,slen,sdone,tlen,dlen,ddone,SvLEN(dst)-1);
592 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL)
598 STRLEN more = 0; /* make sure you initialize! */
602 sleft = tlen - sdone;
603 #if ENCODE_XS_PROFILE >= 2
605 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
606 more, sdone, sleft, SvLEN(dst));
608 if (sdone != 0) { /* has src ever been processed ? */
609 #if ENCODE_XS_USEFP == 2
610 more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
612 #elif ENCODE_XS_USEFP
613 more = (1.0*SvLEN(dst)+1)/sdone * sleft;
615 /* safe until SvLEN(dst) == MAX_INT/16 */
616 more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
619 more += UTF8_MAXLEN; /* insurance policy */
620 #if ENCODE_XS_PROFILE >= 2
622 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
623 more, sdone, sleft, SvLEN(dst));
625 d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
626 /* dst need to grow need MORE bytes! */
627 if (ddone >= SvLEN(dst)) {
628 Perl_croak(aTHX_ "Destination couldn't be grown.");
630 dlen = SvLEN(dst)-ddone-1;
638 if (dir == enc->f_utf8) {
639 if (!check && ckWARN_d(WARN_UTF8)) {
642 utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
644 Perl_warner(aTHX_ packWARN(WARN_UTF8),
646 "}\" does not map to %s", ch,
648 /* FIXME: Skip over the character, copy in replacement and continue
649 * but that is messy so for now just fail.
658 /* UTF-8 is supposed to be "Universal" so should not happen
659 for real characters, but some encodings have non-assigned
660 codes which may occur.
662 Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
663 enc->name[0], (U8) s[slen], code);
668 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
669 code, (dir == enc->f_utf8) ? "to" : "from",
674 SvCUR_set(dst, dlen+ddone);
677 sdone = SvCUR(src) - (slen+sdone);
680 /* FIXME: A Move() is dangerous - PV could be mmap'ed readonly
681 SvOOK would be ideal - but sv_backoff does not understand SvLEN == 0
682 type SVs and sv_clear() calls it ...
684 sv_setpvn(src, (char*)s+slen, sdone);
686 Move(s + slen, SvPVX(src), sdone , U8);
689 SvCUR_set(src, sdone);
696 #if ENCODE_XS_PROFILE
697 if (SvCUR(dst) > SvCUR(src)){
699 "SvLEN(dst)=%d, SvCUR(dst)=%d. "
700 "%d bytes unused(%f %%)\n",
701 SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst),
702 (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
710 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
719 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
720 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
725 Method_decode(obj,src,check = FALSE)
731 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
732 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
738 Method_encode(obj,src,check = FALSE)
744 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
745 sv_utf8_upgrade(src);
746 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
750 MODULE = Encode PACKAGE = Encode::Unicode
753 decode_xs(obj, str, chk = &PL_sv_undef)
759 int size = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
760 U8 endian = *((U8 *)SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"endian",6,0)));
761 int ucs2 = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0));
762 SV *result = newSVpvn("",0);
764 U8 *s = SvPVbyte(str,ulen);
766 ST(0) = sv_2mortal(result);
769 if (!endian && s+size <= e) {
771 endian = (size == 4) ? 'N' : 'n';
772 bom = enc_unpack(aTHX_ &s,e,size,endian);
774 if (bom == BOM16LE) {
777 else if (bom == BOM32LE) {
781 croak("%s:Unregognised BOM %"UVxf,
782 SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),bom);
786 /* Update endian for this sequence */
787 hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
790 while (s < e && s+size <= e) {
791 UV ord = enc_unpack(aTHX_ &s,e,size,endian);
793 if (size != 4 && !valid_ucs2(ord)) {
796 croak("%s:no surrogates allowed %"UVxf,
797 SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),ord);
800 enc_unpack(aTHX_ &s,e,size,endian); /* skip the next one as well */
806 if (!isHiSurrogate(ord)) {
807 croak("%s:Malformed HI surrogate %"UVxf,
808 SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),ord);
811 /* Partial character */
812 s -= size; /* back up to 1st half */
813 break; /* And exit loop */
815 lo = enc_unpack(aTHX_ &s,e,size,endian);
816 if (!isLoSurrogate(lo)){
817 croak("%s:Malformed LO surrogate %"UVxf,
818 SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),ord);
820 ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00);
823 d = (U8 *) SvGROW(result,SvCUR(result)+UTF8_MAXLEN+1);
824 d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
825 SvCUR_set(result,d - (U8 *)SvPVX(result));
829 Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
830 SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
831 Move(s,SvPVX(str),e-s,U8);
832 SvCUR_set(str,(e-s));
843 encode_xs(obj, utf8, chk = &PL_sv_undef)
849 int size = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
850 U8 endian = *((U8 *)SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"endian",6,0)));
851 int ucs2 = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0));
852 SV *result = newSVpvn("",0);
854 U8 *s = SvPVutf8(utf8,ulen);
856 ST(0) = sv_2mortal(result);
858 endian = (size == 4) ? 'N' : 'n';
859 enc_pack(aTHX_ result,size,endian,BOM_BE);
861 /* Update endian for this sequence */
862 hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
865 while (s < e && s+UTF8SKIP(s) <= e) {
867 UV ord = utf8n_to_uvuni(s, e-s, &len, 0);
869 if (size != 4 && !valid_ucs2(ord)) {
870 if (!issurrogate(ord)){
873 croak("%s:code point \"\\x{"UVxf"}\" too high",
874 SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),ord);
876 enc_pack(aTHX_ result,size,endian,FBCHAR);
878 UV hi = ((ord - 0x10000) >> 10) + 0xD800;
879 UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00;
880 enc_pack(aTHX_ result,size,endian,hi);
881 enc_pack(aTHX_ result,size,endian,lo);
885 /* not supposed to happen */
886 enc_pack(aTHX_ result,size,endian,FBCHAR);
890 enc_pack(aTHX_ result,size,endian,ord);
895 Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
896 SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
897 Move(s,SvPVX(utf8),e-s,U8);
898 SvCUR_set(utf8,(e-s));
908 MODULE = Encode PACKAGE = Encode
913 _bytes_to_utf8(sv, ...)
917 SV * encoding = items == 2 ? ST(1) : Nullsv;
920 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
923 U8* s = (U8*)SvPV(sv, len);
926 converted = bytes_to_utf8(s, &len); /* This allocs */
927 sv_setpvn(sv, (char *)converted, len);
928 SvUTF8_on(sv); /* XXX Should we? */
929 Safefree(converted); /* ... so free it */
937 _utf8_to_bytes(sv, ...)
941 SV * to = items > 1 ? ST(1) : Nullsv;
942 SV * check = items > 2 ? ST(2) : Nullsv;
945 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
948 U8 *s = (U8*)SvPV(sv, len);
952 /* Must do things the slow way */
954 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
957 New(83, dest, len, U8); /* I think */
966 /* Have to do it all ourselves because of error routine,
970 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
971 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
972 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
973 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
974 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
975 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
976 else { ulen = 13; uv = 0; }
978 /* Note change to utf8.c variable naming, for variety */
980 if ((*s & 0xc0) != 0x80)
984 uv = (uv << 6) | (*s++ & 0x3f);
988 call_failure(check, s, dest, src);
989 /* Now what happens? */
995 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
1002 is_utf8(sv, check = FALSE)
1007 if (SvGMAGICAL(sv)) /* it could be $1, for example */
1008 sv = newSVsv(sv); /* GMAGIG will be done */
1010 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
1013 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1019 SvREFCNT_dec(sv); /* it was a temp copy */
1030 SV *rsv = newSViv(SvUTF8(sv));
1034 RETVAL = &PL_sv_undef;
1046 SV *rsv = newSViv(SvUTF8(sv));
1050 RETVAL = &PL_sv_undef;
1058 #if defined(USE_PERLIO) && !defined(USE_SFIO)
1059 PerlIO_define_layer(aTHX_ &PerlIO_encode);
1061 #include "def_t.exh"