}
STATIC I32
-S_do_trans_UC_simple(pTHX_ SV *sv)
-{
- dTHR;
- U8 *s;
- U8 *send;
- U8 *d;
- I32 matches = 0;
- STRLEN len;
-
- SV* rv = (SV*)cSVOP->op_sv;
- HV* hv = (HV*)SvRV(rv);
- SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
- UV none = svp ? SvUV(*svp) : 0x7fffffff;
- UV extra = none + 1;
- UV final;
- UV uv;
-
- s = (U8*)SvPV(sv, len);
- send = s + len;
-
- svp = hv_fetch(hv, "FINAL", 5, FALSE);
- if (svp)
- final = SvUV(*svp);
-
- d = s;
- while (s < send) {
- if ((uv = swash_fetch(rv, s)) < none) {
- s += UTF8SKIP(s);
- matches++;
- *d++ = (U8)uv;
- }
- else if (uv == none) {
- I32 ulen;
- uv = utf8_to_uv(s, &ulen);
- s += ulen;
- *d++ = (U8)uv;
- }
- else if (uv == extra) {
- s += UTF8SKIP(s);
- matches++;
- *d++ = (U8)final;
- }
- else
- s += UTF8SKIP(s);
- }
- *d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
- SvSETMAGIC(sv);
-
- return matches;
-}
-
-STATIC I32
-S_do_trans_CU_simple(pTHX_ SV *sv)
-{
- dTHR;
- U8 *s;
- U8 *send;
- U8 *d;
- U8 *dst;
- I32 matches = 0;
- STRLEN len;
-
- SV* rv = (SV*)cSVOP->op_sv;
- HV* hv = (HV*)SvRV(rv);
- SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
- UV none = svp ? SvUV(*svp) : 0x7fffffff;
- UV extra = none + 1;
- UV final;
- UV uv;
- U8 tmpbuf[UTF8_MAXLEN];
- I32 bits = 16;
-
- s = (U8*)SvPV(sv, len);
- send = s + len;
-
- svp = hv_fetch(hv, "BITS", 4, FALSE);
- if (svp)
- bits = (I32)SvIV(*svp);
-
- svp = hv_fetch(hv, "FINAL", 5, FALSE);
- if (svp)
- final = SvUV(*svp);
-
- Newz(801, d, len * (bits >> 3) + 1, U8);
- dst = d;
-
- while (s < send) {
- uv = *s++;
- if (uv < 0x80)
- tmpbuf[0] = uv;
- else {
- tmpbuf[0] = (( uv >> 6) | 0xc0);
- tmpbuf[1] = (( uv & 0x3f) | 0x80);
- }
-
- if ((uv = swash_fetch(rv, tmpbuf)) < none) {
- matches++;
- d = uv_to_utf8(d, uv);
- }
- else if (uv == none)
- d = uv_to_utf8(d, s[-1]);
- else if (uv == extra) {
- matches++;
- d = uv_to_utf8(d, final);
- }
- }
- *d = '\0';
- sv_usepvn_mg(sv, (char*)dst, d - dst);
-
- return matches;
-}
-
-/* utf-8 to latin-1 */
-
-STATIC I32
-S_do_trans_UC_trivial(pTHX_ SV *sv)
-{
- dTHR;
- U8 *s;
- U8 *send;
- U8 *d;
- STRLEN len;
-
- s = (U8*)SvPV(sv, len);
- send = s + len;
-
- d = s;
- while (s < send) {
- if (*s < 0x80)
- *d++ = *s++;
- else {
- I32 ulen;
- UV uv = utf8_to_uv(s, &ulen);
- s += ulen;
- *d++ = (U8)uv;
- }
- }
- *d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
- SvSETMAGIC(sv);
-
- return SvCUR(sv);
-}
-
-/* latin-1 to utf-8 */
-
-STATIC I32
-S_do_trans_CU_trivial(pTHX_ SV *sv)
-{
- dTHR;
- U8 *s;
- U8 *send;
- U8 *d;
- U8 *dst;
- I32 matches;
- STRLEN len;
-
- s = (U8*)SvPV(sv, len);
- send = s + len;
-
- Newz(801, d, len * 2 + 1, U8);
- dst = d;
-
- matches = send - s;
-
- while (s < send) {
- if (*s < 0x80)
- *d++ = *s++;
- else {
- UV uv = *s++;
- *d++ = (( uv >> 6) | 0xc0);
- *d++ = (( uv & 0x3f) | 0x80);
- }
- }
- *d = '\0';
- sv_usepvn_mg(sv, (char*)dst, d - dst);
-
- return matches;
-}
-
-STATIC I32
S_do_trans_UU_complex(pTHX_ SV *sv)
{
dTHR;
switch (PL_op->op_private & 63) {
case 0:
- return do_trans_CC_simple(sv);
-
- case OPpTRANS_FROM_UTF:
- return do_trans_UC_simple(sv);
-
- case OPpTRANS_TO_UTF:
- return do_trans_CU_simple(sv);
-
- case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF:
- return do_trans_UU_simple(sv);
+ if (SvUTF8(sv))
+ return do_trans_UU_simple(sv);
+ else
+ return do_trans_CC_simple(sv);
case OPpTRANS_IDENTICAL:
- return do_trans_CC_count(sv);
-
- case OPpTRANS_FROM_UTF|OPpTRANS_IDENTICAL:
- return do_trans_UC_trivial(sv);
-
- case OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL:
- return do_trans_CU_trivial(sv);
-
- case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL:
- return do_trans_UU_count(sv);
+ if (SvUTF8(sv))
+ return do_trans_UU_count(sv);
+ else
+ return do_trans_CC_count(sv);
default:
- if (PL_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
+ if (SvUTF8(sv))
return do_trans_UU_complex(sv); /* could be UC or CU too */
else
return do_trans_CC_complex(sv);
Ap |U8* |utf16_to_utf8_reversed|U16* p|U8 *d|I32 bytelen
Ap |I32 |utf8_distance |U8 *a|U8 *b
Ap |U8* |utf8_hop |U8 *s|I32 off
+Ap |U8* |utf8_to_bytes |U8 *s|STRLEN len
+Ap |U8* |bytes_to_utf8 |U8 *s|STRLEN len
Ap |UV |utf8_to_uv |U8 *s|I32* retlen
Ap |U8* |uv_to_utf8 |U8 *d|UV uv
p |void |vivify_defelem |SV* sv
s |I32 |do_trans_UU_simple |SV *sv
s |I32 |do_trans_UU_count |SV *sv
s |I32 |do_trans_UU_complex |SV *sv
-s |I32 |do_trans_UC_simple |SV *sv
-s |I32 |do_trans_CU_simple |SV *sv
s |I32 |do_trans_UC_trivial |SV *sv
s |I32 |do_trans_CU_trivial |SV *sv
#endif
## Check for too many filenames
pod2usage("$0: Too many files given.\n") if (@ARGV > 1);
-Some user's however may feel that the above "economy of expression" is
+Some users however may feel that the above "economy of expression" is
not particularly readable nor consistent and may instead choose to do
something more like the following:
PL_svref_mutex
PL_cred_mutex
PL_eval_mutex
+ PL_fdpid_mutex
+ PL_sv_lock_mutex
PL_eval_cond
PL_eval_owner
PL_threads_mutex
Perl_find_threadsv
Perl_unlock_condpair
Perl_magic_mutexfree
+ Perl_lock
)];
}
# expand tabs to 8-column spacing
1 while s/\t+/' ' x (length($&)*8 - length($`)%8)/e;
-=item tr/SEARCHLIST/REPLACEMENTLIST/cdsUC
+=item tr/SEARCHLIST/REPLACEMENTLIST/cds
-=item y/SEARCHLIST/REPLACEMENTLIST/cdsUC
+=item y/SEARCHLIST/REPLACEMENTLIST/cds
Transliterates all occurrences of the characters found in the search list
with the corresponding character in the replacement list. It returns
c Complement the SEARCHLIST.
d Delete found but unreplaced characters.
s Squash duplicate replaced characters.
- U Translate to/from UTF-8.
- C Translate to/from 8-bit char (octet).
If the C</c> modifier is specified, the SEARCHLIST character set
is complemented. If the C</d> modifier is specified, any characters
This latter is useful for counting characters in a class or for
squashing character sequences in a class.
-The first C</U> or C</C> modifier applies to the left side of the translation.
-The second one applies to the right side. If present, these modifiers override
-the current utf8 state.
-
Examples:
$ARGV[1] =~ tr/A-Z/a-z/; # canonicalize to lower case
tr [\200-\377]
[\000-\177]; # delete 8th bit
- tr/\0-\xFF//CU; # change Latin-1 to Unicode
- tr/\0-\x{FF}//UC; # change Unicode to Latin-1
-
If multiple transliterations are given for a character, only the
first one is used:
Perl_croak(aTHX_ "Transliteration replacement not terminated");
}
- if (UTF) {
- o = newSVOP(OP_TRANS, 0, 0);
- utf8 = OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF;
- }
- else {
New(803,tbl,256,short);
o = newPVOP(OP_TRANS, 0, (char*)tbl);
- utf8 = 0;
- }
complement = del = squash = 0;
- while (strchr("cdsCU", *s)) {
+ while (strchr("cds", *s)) {
if (*s == 'c')
complement = OPpTRANS_COMPLEMENT;
else if (*s == 'd')
del = OPpTRANS_DELETE;
else if (*s == 's')
squash = OPpTRANS_SQUASH;
- else {
- switch (count++) {
- case 0:
- if (*s == 'C')
- utf8 &= ~OPpTRANS_FROM_UTF;
- else
- utf8 |= OPpTRANS_FROM_UTF;
- break;
- case 1:
- if (*s == 'C')
- utf8 &= ~OPpTRANS_TO_UTF;
- else
- utf8 |= OPpTRANS_TO_UTF;
- break;
- default:
- Perl_croak(aTHX_ "Too many /C and /U options");
- }
- }
s++;
}
- o->op_private = del|squash|complement|utf8;
+ o->op_private = del|squash|complement;
PL_lex_op = o;
yylval.ival = OP_TRANS;
return s;
}
+/*
+=for apidoc utf8_to_bytes
+
+Converts a string C<s> of length C<len> from UTF8 into ASCII encoding.
+Unlike C<bytes_to_utf8>, this over-writes the original string.
+
+=cut
+*/
+
+U8 *
+Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len)
+{
+ dTHR;
+ U8 *send;
+ U8 *d;
+ U8 *save;
+
+ send = s + len;
+ d = save = s;
+ while (s < send) {
+ if (*s < 0x80)
+ *d++ = *s++;
+ else {
+ I32 ulen;
+ UV uv = utf8_to_uv(s, &ulen);
+ s += ulen;
+ *d++ = (U8)uv;
+ }
+ }
+ *d = '\0';
+ return save;
+}
+
+/*
+=for apidoc bytes_to_utf8
+
+Converts a string C<s> of length C<len> from ASCII into UTF8 encoding.
+Returns a pointer to the newly-created string.
+
+*/
+
+U8*
+Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len)
+{
+ dTHR;
+ U8 *send;
+ U8 *d;
+ U8 *dst;
+ send = s + len;
+
+ Newz(801, d, len * 2 + 1, U8);
+ dst = d;
+
+ while (s < send) {
+ if (*s < 0x80)
+ *d++ = *s++;
+ else {
+ UV uv = *s++;
+ *d++ = (( uv >> 6) | 0xc0);
+ *d++ = (( uv & 0x3f) | 0x80);
+ }
+ }
+ *d = '\0';
+ return dst;
+}
+
/* XXX NOTHING CALLS THE FOLLOWING TWO ROUTINES YET!!! */
/*
* Convert native or reversed UTF-16 to UTF-8.
#define USE_FIXED_OSFHANDLE
#endif
+/* Define PERL_WIN32_SOCK_DLOAD to have Perl dynamically load the winsock
+ DLL when needed. Don't use if your compiler supports delayloading (ie, VC++ 6.0)
+ -- BKS 5-29-2000 */
+#if !(defined(_M_IX86) && _MSC_VER >= 1200)
+#define PERL_WIN32_SOCK_DLOAD
+#endif
#define ENV_IS_CASELESS
#ifndef VER_PLATFORM_WIN32_WINDOWS /* VC-2.0 headers don't have this */
#endif
#define flushall _flushall
#define fcloseall _fcloseall
+#define isnan _isnan /* ...same libraries as MSVC */
#ifdef PERL_OBJECT
# define MEMBER_TO_FPTR(name) &(name)