From: Gurusamy Sarathy Date: Tue, 11 Jul 2000 18:49:43 +0000 (+0000) Subject: integrate cfgperl changes#6261..6266 into mainline X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b250498faaf6fbd04315d2b632649596e2498c42;p=p5sagit%2Fp5-mst-13.2.git integrate cfgperl changes#6261..6266 into mainline p4raw-link: @6266 on //depot/cfgperl: a009ce76c9b4ddbde44a58eab3fe27d331cf27fe p4raw-link: @6261 on //depot/cfgperl: 27d76ecff97d0a9449f569d789504cc8b69a6d01 p4raw-id: //depot/perl@6363 p4raw-integrated: from //depot/cfgperl@6362 'copy in' README.epoc epoc/createpkg.pl epoc/epocish.c (@5586..) epoc/epocish.h t/comp/require.t (@5639..) cygwin/Makefile.SHs (@6096..) ext/POSIX/POSIX.pm (@6140..) hints/bsdos.sh (@6156..) epoc/config.sh (@6168..) ext/POSIX/POSIX.xs (@6198..) p4raw-integrated: from //depot/cfgperl@6265 'copy in' ext/POSIX/POSIX.pod (@5586..) p4raw-integrated: from //depot/cfgperl@6263 'copy in' doop.c (@6256..) p4raw-integrated: from //depot/cfgperl@6261 'merge in' pod/perldiag.pod (@6206..) toke.c (@6250..) --- diff --git a/README.epoc b/README.epoc index b4bcca6..2163c46 100644 --- a/README.epoc +++ b/README.epoc @@ -4,7 +4,7 @@ Perl 5 README file for the EPOC operating system. Olaf Flebbe http://www.linuxstart.com/~oflebbe/perl/perl5.html -2000-02-20 +2000-05-15 ===================================================================== Introduction @@ -13,9 +13,8 @@ Introduction EPOC is a OS for palmtops and mobile phones. For more informations look at: http://www.symbian.com/ -This is a port of Perl version 5.5.650 to EPOC. It runs on the Perl -Series 5, Series 5mx and the Psion Revo. I have no reports for other -EPOC devices. +This is a port of Perl version 5.6.0 to EPOC. It runs on the Perl +Series 5, Series 5mx and the Psion Revo and on the Ericson M128. Features are left out, because of restrictions of the POSIX support. @@ -157,4 +156,4 @@ Support Status I'm offering this port "as is". You can ask me questions, but I can't guarantee I'll be able to answer them; I don't know much about Perl -internals myself; +internals myself. diff --git a/cygwin/Makefile.SHs b/cygwin/Makefile.SHs index ca083d4..120e8ee 100644 --- a/cygwin/Makefile.SHs +++ b/cygwin/Makefile.SHs @@ -157,10 +157,15 @@ esac # libperl.a is _the_ library both in dll and static cases # $(LIBPERL)$(LIB_EXT) expands to this name dependless of build model # +# NOTE: The "-Wl,-Bstatic $(LLIBPERL) -Wl,-Bdynamic" is required to give +# the import library linking priority over the dynamic library, since both +# the .dll and .a are in the same directory. When the new standard for +# naming import/dynamic/static libraries emerges this should be updated. +# $spitshell >>Makefile <<'!NO!SUBS!' perl: $& perlmain$(OBJ_EXT) $(LIBPERL)$(LIB_EXT) $(DYNALOADER) $(static_ext) ext.libs - $(SHRPENV) $(LDLIBPTH) $(CC) $(LARGE) $(CLDFLAGS) $(CCDLFLAGS) -o perl perlmain$(OBJ_EXT) $(DYNALOADER) $(static_ext) $(LLIBPERL) `cat ext.libs` $(libs) + $(SHRPENV) $(LDLIBPTH) $(CC) $(LARGE) $(CLDFLAGS) $(CCDLFLAGS) -o perl perlmain$(OBJ_EXT) $(DYNALOADER) $(static_ext) -Wl,-Bstatic $(LLIBPERL) -Wl,-Bdynamic `cat ext.libs` $(libs) pureperl: $& perlmain$(OBJ_EXT) $(LIBPERL)$(LIB_EXT) $(DYNALOADER) $(static_ext) ext.libs $(SHRPENV) $(LDLIBPTH) purify $(CC) $(LARGE) $(CLDFLAGS) $(CCDLFLAGS) -o pureperl perlmain$(OBJ_EXT) $(DYNALOADER) $(static_ext) $(LLIBPERL) `cat ext.libs` $(libs) diff --git a/doop.c b/doop.c index 7dc5a2b..4a74309 100644 --- a/doop.c +++ b/doop.c @@ -21,14 +21,27 @@ #endif #endif + +#define HALF_UPGRADE(start,end) { \ + U8* new; \ + STRLEN len; \ + len = end-start; \ + new = bytes_to_utf8(start, &len); \ + Copy(new,start,len,U8*); \ + end = start + len; \ + } + + STATIC I32 -S_do_trans_simple(pTHX_ SV *sv) /* SPC - OK */ +S_do_trans_simple(pTHX_ SV *sv) { dTHR; U8 *s; + U8 *d; U8 *send; + U8 *dstart; I32 matches = 0; - I32 hasutf = SvUTF8(sv); + I32 sutf = SvUTF8(sv); STRLEN len; short *tbl; I32 ch; @@ -40,19 +53,46 @@ S_do_trans_simple(pTHX_ SV *sv) /* SPC - OK */ s = (U8*)SvPV(sv, len); send = s + len; + /* First, take care of non-UTF8 input strings, because they're easy */ + if (!sutf) { while (s < send) { - if (hasutf && *s & 0x80) - s+=UTF8SKIP(s); /* Given that we're here because tbl is !UTF8...*/ - else { if ((ch = tbl[*s]) >= 0) { matches++; - *s = ch; - } + *s++ = ch; + } else s++; } - } SvSETMAGIC(sv); + return matches; + } + /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */ + Newz(0, d, len*2+1, U8); + dstart = d; + while (s < send) { + I32 ulen; + short c; + + ulen = 1; + /* Need to check this, otherwise 128..255 won't match */ + c = utf8_to_uv(s, &ulen); + if (c < 0x100 && (ch = tbl[(short)c]) >= 0) { + matches++; + if (ch < 0x80) + *d++ = ch; + else + d = uv_to_utf8(d,ch); + s += ulen; + } else { /* No match -> copy */ + while (ulen--) + *d++ = *s++; + } + } + *d='\0'; + sv_setpvn(sv, dstart, d - dstart); + SvUTF8_on(sv); + SvLEN_set(sv, 2*len+1); + SvSETMAGIC(sv); return matches; } @@ -78,9 +118,16 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ if (hasutf && *s & 0x80) s+=UTF8SKIP(s); else { - if (tbl[*s] >= 0) + UV c; + I32 ulen; + ulen = 1; + if (hasutf) + c = utf8_to_uv(s,&ulen); + else + c = *s; + if (c < 0x100 && tbl[c] >= 0) matches++; - s++; + s+=ulen; } } @@ -88,7 +135,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ } STATIC I32 -S_do_trans_complex(pTHX_ SV *sv)/* SPC - OK */ +S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ { dTHR; U8 *s; @@ -191,30 +238,15 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ if ((uv = swash_fetch(rv, s)) < none) { s += UTF8SKIP(s); matches++; - if (uv & 0x80 && !isutf) { - /* Sneaky-upgrade dstart...d */ - U8* new; - STRLEN len; - len = dstart - d; - new = bytes_to_utf8(dstart, &len); - Copy(new,dstart,len,U8*); - d = dstart + len; - isutf++; - } + if (uv & 0x80 && !isutf++) + HALF_UPGRADE(dstart,d); d = uv_to_utf8(d, uv); } else if (uv == none) { int i; i = UTF8SKIP(s); - if (i > 1 && !isutf) { - U8* new; - STRLEN len; - len = dstart - d; - new = bytes_to_utf8(dstart, &len); - Copy(new,dstart,len,U8*); - d = dstart + len; - isutf++; - } + if (i > 1 && !isutf++) + HALF_UPGRADE(dstart,d); while(i--) *d++ = *s++; } @@ -223,23 +255,15 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ i = UTF8SKIP(s); s += i; matches++; - if (i > 1 && !isutf) { - U8* new; - STRLEN len; - len = dstart - d; - new = bytes_to_utf8(dstart, &len); - Copy(new,dstart,len,U8*); - d = dstart + len; - isutf++; - } + if (i > 1 && !isutf++) + HALF_UPGRADE(dstart,d); d = uv_to_utf8(d, final); } else s += UTF8SKIP(s); } *d = '\0'; - SvPV_set(sv, dstart); - SvCUR_set(sv, d - dstart); + sv_setpvn(sv, dstart, d - dstart); SvSETMAGIC(sv); if (isutf) SvUTF8_on(sv); @@ -285,8 +309,6 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ U8 *d; I32 matches = 0; I32 squash = PL_op->op_private & OPpTRANS_SQUASH; - I32 from_utf = PL_op->op_private & OPpTRANS_FROM_UTF; - I32 to_utf = PL_op->op_private & OPpTRANS_TO_UTF; I32 del = PL_op->op_private & OPpTRANS_DELETE; SV* rv = (SV*)cSVOP->op_sv; HV* hv = (HV*)SvRV(rv); @@ -297,6 +319,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ UV uv; STRLEN len; U8 *dst; + I32 isutf = SvUTF8(sv); s = (U8*)SvPV(sv, len); send = s + len; @@ -305,27 +328,14 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ if (svp) final = SvUV(*svp); - if (PL_op->op_private & OPpTRANS_GROWS) { - I32 bits = 16; - - svp = hv_fetch(hv, "BITS", 4, FALSE); - if (svp) - bits = (I32)SvIV(*svp); - - Newz(801, d, len * (bits >> 3) + 1, U8); + Newz(0, d, len*2+1, U8); dst = d; - } - else { - d = s; - dst = 0; - } if (squash) { UV puv = 0xfeedface; while (s < send) { - if (from_utf) { + if (SvUTF8(sv)) uv = swash_fetch(rv, s); - } else { U8 tmpbuf[2]; uv = *s++; @@ -337,63 +347,42 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ } uv = swash_fetch(rv, tmpbuf); } + if (uv < none) { matches++; if (uv != puv) { - if (uv >= 0x80 && to_utf) + if (uv & 0x80 && !isutf++) + HALF_UPGRADE(dst,d); d = uv_to_utf8(d, uv); - else - *d++ = (U8)uv; puv = uv; } - if (from_utf) s += UTF8SKIP(s); continue; } else if (uv == none) { /* "none" is unmapped character */ - if (from_utf) { - if (*s < 0x80) - *d++ = *s++; - else if (to_utf) { - int i; - for (i = UTF8SKIP(s); i; --i) - *d++ = *s++; - } - else { I32 ulen; *d++ = (U8)utf8_to_uv(s, &ulen); s += ulen; - } - } - else { /* must be to_utf only */ - d = uv_to_utf8(d, s[-1]); - } puv = 0xfeedface; continue; } else if (uv == extra && !del) { matches++; if (uv != puv) { - if (final >= 0x80 && to_utf) d = uv_to_utf8(d, final); - else - *d++ = (U8)final; puv = final; } - if (from_utf) s += UTF8SKIP(s); continue; } matches++; /* "none+1" is delete character */ - if (from_utf) s += UTF8SKIP(s); } } else { while (s < send) { - if (from_utf) { + if (SvUTF8(sv)) uv = swash_fetch(rv, s); - } else { U8 tmpbuf[2]; uv = *s++; @@ -407,46 +396,23 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ } if (uv < none) { matches++; - if (uv >= 0x80 && to_utf) d = uv_to_utf8(d, uv); - else - *d++ = (U8)uv; - if (from_utf) s += UTF8SKIP(s); continue; } else if (uv == none) { /* "none" is unmapped character */ - if (from_utf) { - if (*s < 0x80) - *d++ = *s++; - else if (to_utf) { - int i; - for (i = UTF8SKIP(s); i; --i) - *d++ = *s++; - } - else { I32 ulen; *d++ = (U8)utf8_to_uv(s, &ulen); s += ulen; - } - } - else { /* must be to_utf only */ - d = uv_to_utf8(d, s[-1]); - } continue; } else if (uv == extra && !del) { matches++; - if (final >= 0x80 && to_utf) d = uv_to_utf8(d, final); - else - *d++ = (U8)final; - if (from_utf) s += UTF8SKIP(s); continue; } matches++; /* "none+1" is delete character */ - if (from_utf) s += UTF8SKIP(s); } } diff --git a/epoc/config.sh b/epoc/config.sh index 113260f..5b37e3a 100644 --- a/epoc/config.sh +++ b/epoc/config.sh @@ -79,7 +79,7 @@ cppsymbols='' crosscompile='define' cryptlib='' csh='csh' -d_Gconvert='sprintf((b),"%.*g",(n),(x))' +d_Gconvert='epoc_gcvt((x),(n),(b))' d_PRIEldbl='undef' d_PRIFldbl='undef' d_PRIGldbl='undef' @@ -194,7 +194,7 @@ d_htonl='define' d_iconv='undef' d_index='undef' d_inetaton='define' -d_int64t='undef' +d_int64_t='undef' d_iovec_s='undef' d_isascii='define' d_isnan='define' @@ -385,7 +385,7 @@ emacs='' eunicefix=':' exe_ext='' expr='expr' -extensions='Data/Dumper File/Glob IO Socket' +extensions='Data/Dumper File/Glob IO Socket Fcntl' fflushNULL='undef' fflushall='define' find='' @@ -497,7 +497,7 @@ installstyle='' installusrbinperl='undef' installvendorlib='' intsize='4' -known_extensions='Data/Dumper File/Glob IO Socket' +known_extensions='Data/Dumper File/Glob IO Socket Fcntl' ksh='' large='' ld='echo' @@ -645,7 +645,7 @@ sleep='' smail='' small='' so='' -socksizetype='int' +socksizetype='size_t' sockethdr='' socketlib='' sort='sort' @@ -656,7 +656,7 @@ src='.' ssizetype='long' startperl='' startsh='#!/bin/sh' -static_ext='Data/Dumper File/Glob IO Socket' +static_ext='Data/Dumper File/Glob IO Socket Fcntl' stdchar='char' stdio_base='' stdio_bufsiz='' @@ -794,3 +794,159 @@ use5005threads='undef' useithreads='undef' inc_version_list=' ' inc_version_list_init='0' +d_madvise='undef' +d_mkdtemp='undef' +d_mkstemp='undef' +d_mkstemps='undef' +d_mmap='undef' +d_mprotect='undef' +d_msync='undef' +d_munmap='undef' +d_qgcvt='undef' +d_socklen_t='undef' +d_vendorarch='' +i_iconv='undef' +i_ieeefp='undef' +i_sunmath='undef' +i_syslog='undef' +i_sysmman='undef' +i_sysutsname='undef' +installvendorarch='' +mmaptype='' +revision='5' +sizesize='4' +socksizetype='int' + +double='undef' +usemorebits='undef' +usemultiplicity='undef' +usemymalloc='n' +usenm='' +useopcode='' +useperlio='undef' +useposix='' +usesfio='' +useshrplib='' +usesocks='undef' +usethreads='undef' +usevendorprefix='' +usevfork='' +usrinc='' +uuname='' +vendorlib='' +vendorlib_stem='' +vendorlibexp='' +vendorprefix='' +vendorprefixexp='' +version='5.6.0' +vi='' +voidflags='15' +xlibpth='' +zcat='' +zip='' +# Configure command line arguments. +config_arg0='' +config_args='' +config_argc=11 +config_arg1='' +config_arg2='' +config_arg3='' +config_arg4='' +config_arg5='' +config_arg6='' +config_arg7='' +config_arg8='' +config_arg9='' +config_arg10='' +config_arg11='' +PERL_REVISION=5 +PERL_VERSION=6 +PERL_SUBVERSION=0 +PERL_API_REVISION=5 +PERL_API_VERSION=6 +PERL_API_SUBVERSION=0 +CONFIGDOTSH=true +# Variables propagated from previous config.sh file. +pp_sys_cflags='' +epocish_cflags='ccflags="$cflags -xc++"' +ivtype='int' +uvtype='unsigned int' +i8type='char' +u8type='unsigned char' +i16type='short' +u16type='unsigned short' +i32type='int' +u32type='unsigned int' +i64type='long long' +u64type='unsigned long long' +d_quad='define' +quadtype='long long' +quadtype='unsigned long long' +quadkind='QUAD_IS_LONG_LONG' +nvtype='double' +ivsize='4' +uvsize='4' +i8size='1' +u8size='1' +i16size='2' +u16size='2' +i32size='4' +u32size='4' +i64size='8' +u64size='8' +d_fs_data_s='undef' +d_fseeko='undef' +d_ldbl_dig='undef' +d_sqrtl='undef' +d_getmnt='undef' +d_statfs_f_flags='undef' +d_statfs_s='undef' +d_ustat='undef' +i_sysstatfs='undef' +i_sysvfs='undef' +i_ustat='undef' +uidsize='2' +uidsign='1' +gidsize='2' +gidsign='1' +ivdformat='"ld"' +uvuformat='"lu"' +uvoformat='"lo"' +uvxformat='"lx"' +uidformat='"hu"' +gidformat='"hu"' +d_strtold='undef' +d_strtoll='undef' +d_strtouq='undef' +d_nv_preserves_uv='define' +use5005threads='undef' +useithreads='undef' +inc_version_list=' ' +inc_version_list_init='0' +d_madvise='undef' +d_mkdtemp='undef' +d_mkstemp='undef' +d_mkstemps='undef' +d_mmap='undef' +d_mprotect='undef' +d_msync='undef' +d_munmap='undef' +d_qgcvt='undef' +d_socklen_t='undef' +d_vendorarch='' +i_iconv='undef' +i_ieeefp='undef' +i_sunmath='undef' +i_syslog='undef' +i_sysmman='undef' +i_sysutsname='undef' +installvendorarch='' +mmaptype='' +revision='5' +sizesize='4' +socksizetype='int' +xs_apiversion='5.005' +d_getcwd='define' +i_sysmode='undef' +d_vendorarch='undef' + diff --git a/epoc/createpkg.pl b/epoc/createpkg.pl index 6977bd3..77dafb1 100644 --- a/epoc/createpkg.pl +++ b/epoc/createpkg.pl @@ -3,11 +3,11 @@ use File::Find; use Cwd; -$VERSION="5.5"; -$PATCH="650"; -$EPOC_VERSION=19; +$VERSION="5.6"; +$PATCH="0"; +$EPOC_VERSION=20; $CROSSCOMPILEPATH=cwd; -$CROSSREPLACEPATH="H:\\devel\\perl5.5.650"; +$CROSSREPLACEPATH="H:\\perl"; sub filefound { diff --git a/epoc/epocish.c b/epoc/epocish.c index 134eaef..4963a2e5 100644 --- a/epoc/epocish.c +++ b/epoc/epocish.c @@ -6,7 +6,7 @@ * */ -/* This is indeed C++ Code !! */ +/* This is C++ Code !! */ #include @@ -31,4 +31,25 @@ epoc_spawn( char *cmd, char *cmdline) { return 0; } + + /* Workaround for defect atof(), see java defect list for epoc */ + double epoc_atof( const char* str) { + TReal64 aRes; + + TLex lex( _L( str)); + TInt err = lex.Val( aRes, TChar( '.')); + return aRes; + } + + void epoc_gcvt( double x, int digits, unsigned char *buf) { + TRealFormat trel; + + trel.iPlaces = digits; + trel.iPoint = TChar( '.'); + + TPtr result( buf, 80); + + result.Num( x, trel); + result.Append( TChar( 0)); + } } diff --git a/epoc/epocish.h b/epoc/epocish.h index f4be0ff..75a64fc 100644 --- a/epoc/epocish.h +++ b/epoc/epocish.h @@ -121,9 +121,6 @@ /* getsockname returns the size of struct sockaddr_in *without* padding */ #define BOGUS_GETNAME_RETURN 8 -/* Yes, size_t is size_t */ -#define Sock_size_t size_t - /* read() on a socket blocks until buf is filled completly, recv() returns each massage @@ -133,3 +130,13 @@ /* No /dev/random available*/ #define PERL_NO_DEV_RANDOM + +/* + work around for buggy atof(): + atof() in ER5 stdlib depends on locale. +*/ + +double epoc_atof( const char *ptr); +#define atof(a) epoc_atof(a) + + diff --git a/ext/POSIX/POSIX.pm b/ext/POSIX/POSIX.pm index d4d9c33..252e5bb 100644 --- a/ext/POSIX/POSIX.pm +++ b/ext/POSIX/POSIX.pm @@ -893,7 +893,7 @@ sub load_imports { difftime mktime strftime tzset tzname)], unistd_h => [qw(F_OK NULL R_OK SEEK_CUR SEEK_END SEEK_SET - STRERR_FILENO STDIN_FILENO STDOUT_FILENO W_OK X_OK + STDERR_FILENO STDIN_FILENO STDOUT_FILENO W_OK X_OK _PC_CHOWN_RESTRICTED _PC_LINK_MAX _PC_MAX_CANON _PC_MAX_INPUT _PC_NAME_MAX _PC_NO_TRUNC _PC_PATH_MAX _PC_PIPE_BUF _PC_VDISABLE _POSIX_CHOWN_RESTRICTED diff --git a/ext/POSIX/POSIX.pod b/ext/POSIX/POSIX.pod index 08300e4..186d72e 100644 --- a/ext/POSIX/POSIX.pod +++ b/ext/POSIX/POSIX.pod @@ -1715,7 +1715,7 @@ CLK_TCK CLOCKS_PER_SEC =item Constants -R_OK SEEK_CUR SEEK_END SEEK_SET STDIN_FILENO STDOUT_FILENO STRERR_FILENO W_OK X_OK +R_OK SEEK_CUR SEEK_END SEEK_SET STDIN_FILENO STDOUT_FILENO STDERR_FILENO W_OK X_OK =back diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index c401527..b8b80d4 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -2306,9 +2306,9 @@ constant(char *name, int arg) #else goto not_there; #endif - if (strEQ(name, "STRERR_FILENO")) -#ifdef STRERR_FILENO - return STRERR_FILENO; + if (strEQ(name, "STDERR_FILENO")) +#ifdef STDERR_FILENO + return STDERR_FILENO; #else goto not_there; #endif diff --git a/hints/bsdos.sh b/hints/bsdos.sh index d3b1b70..1d1d823 100644 --- a/hints/bsdos.sh +++ b/hints/bsdos.sh @@ -98,7 +98,8 @@ case "$osvers" in case "$cc" in '') cc='cc' # cc is gcc2 in 4.0 cccdlflags="-fPIC" - ccdlflags=" " ;; + ccdlflags="-rdynamic -Wl,-rpath,$privlib/$archname/CORE" + ;; esac case "$ld" in diff --git a/pod/perldiag.pod b/pod/perldiag.pod index e4d4b45..c034c36 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -3407,6 +3407,11 @@ Note that under some systems, like OS/2, there may be different flavors of Perl executables, some of which may support fork, some not. Try changing the name you call Perl by to C, C, and so on. +=item Unsupported script encoding + +(F) Your program file begins with a Unicode Byte Order Mark (BOM) which +declares it to be in a Unicode encoding that Perl cannot yet read. + =item Unsupported socket function "%s" called (F) Your machine doesn't support the Berkeley socket mechanism, or at diff --git a/t/comp/require.t b/t/comp/require.t index 1d92687..48e3e00 100755 --- a/t/comp/require.t +++ b/t/comp/require.t @@ -7,7 +7,7 @@ BEGIN { # don't make this lexical $i = 1; -print "1..20\n"; +print "1..23\n"; sub do_require { %INC = (); @@ -124,6 +124,16 @@ sub dofile { do "bleah.do"; }; print $x; $i++; +# UTF-encoded things +my $utf8 = chr(0xFEFF); +my $utf16 = chr(255).chr(254); +do_require("${utf8}print \"ok $i\n\"; 1;\n"); +$i++; +do_require("$utf8\nprint \"ok $i\n\"; 1;\n"); +$i++; +do_require("$utf16\n1;"); +print "ok $i\n" if $@ =~ /Unsupported script encoding/; + END { 1 while unlink 'bleah.pm'; 1 while unlink 'bleah.do'; } # ***interaction with pod (don't put any thing after here)*** diff --git a/toke.c b/toke.c index 6b5fc49..f601cf1 100644 --- a/toke.c +++ b/toke.c @@ -326,7 +326,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) } #endif -#if 0 +#ifdef PERL_UTF16_FILTER STATIC I32 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) { @@ -2490,6 +2490,8 @@ Perl_yylex(pTHX) goto retry; } do { + bool bof; + bof = PL_rsfp && (PerlIO_tell(PL_rsfp)==0); /* *Before* read! */ if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) { fake_eof: if (PL_rsfp) { @@ -2525,7 +2527,9 @@ Perl_yylex(pTHX) PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_doextract = FALSE; } - } + } + if (bof) + s = swallow_bom(s); incline(s); } while (PL_doextract); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; @@ -7407,3 +7411,55 @@ restore_rsfp(pTHXo_ void *f) PerlIO_close(PL_rsfp); PL_rsfp = fp; } + +STATIC char* +S_swallow_bom(pTHX_ char *s) { + STRLEN slen; + slen = SvCUR(PL_linestr); + switch (*s) { + case -1: + if ((s[1] & 255) == 254) { + /* UTF-16 little-endian */ +#ifdef PERL_UTF16_FILTER + U8 *news; +#endif + s+=2; + if (*s == 0 && s[1] == 0) /* UTF-32 little-endian */ + Perl_croak(aTHX_ "Unsupported script encoding"); +#ifdef PERL_UTF16_FILTER + filter_add(S_utf16rev_textfilter, NULL); + New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8); + PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s); + s = news; +#else + Perl_croak(aTHX_ "Unsupported script encoding"); +#endif + } + break; + + case -2: + if ((s[1] & 255) == 255) { /* UTF-16 big-endian */ +#ifdef PERL_UTF16_FILTER + U8 *news; + filter_add(S_utf16_textfilter, NULL); + New(898, news, (PL_bufend - s) * 3 / 2 + 1, U8); + PL_bufend = utf16_to_utf8((U16*)s, news, PL_bufend - s); + s = news; +#else + Perl_croak(aTHX_ "Unsupported script encoding"); +#endif + } + break; + + case -17: + if ( slen>2 && (s[1] & 255) == 187 && (s[2] & 255) == 191) { + s+=3; /* UTF-8 */ + } + break; + case 0: + if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */ + s[2] & 255 == 254 && s[3] & 255 == 255) + Perl_croak(aTHX_ "Unsupported script encoding"); +} +return s; +}