=item *
Case translation operators use the Unicode case translation tables
-when provided character input. Note that C<uc()> translates to
-uppercase, while C<ucfirst> translates to titlecase (for languages
-that make the distinction). Naturally the corresponding backslash
-sequences have the same semantics.
+when provided character input. Note that C<uc()> (also known as C<\U>
+in doublequoted strings) translates to uppercase, while C<ucfirst>
+(also known as C<\u> in doublequoted strings) translates to titlecase
+(for languages that make the distinction). Naturally the
+corresponding backslash sequences have the same semantics.
=item *
register U8 *s;
STRLEN slen;
- if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && UTF8_IS_START(*s)) {
- STRLEN ulen;
+ if (DO_UTF8(sv)) {
U8 tmpbuf[UTF8_MAXLEN*2+1];
- U8 *tend;
- UV uv;
+ STRLEN ulen;
+ STRLEN tculen;
- toTITLE_utf8(s, tmpbuf, &ulen); /* XXX --jhi */
- uv = utf8_to_uvchr(tmpbuf, 0);
-
- tend = uvchr_to_utf8(tmpbuf, uv);
+ s = (U8*)SvPV(sv, slen);
+ utf8_to_uvchr(s, &ulen);
- if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) {
+ toTITLE_utf8(s, tmpbuf, &tculen);
+ utf8_to_uvchr(tmpbuf, 0);
+
+ if (!SvPADTMP(sv) || SvREADONLY(sv)) {
dTARGET;
- sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf);
+ sv_setpvn(TARG, (char*)tmpbuf, tculen);
sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
SvUTF8_on(TARG);
SETs(TARG);
}
else {
s = (U8*)SvPV_force(sv, slen);
- Copy(tmpbuf, s, ulen, U8);
+ Copy(tmpbuf, s, tculen, U8);
}
}
else {
U8 *tend;
UV uv;
- toLOWER_utf8(s, tmpbuf, &ulen); /* XXX --jhi */
+ toLOWER_utf8(s, tmpbuf, &ulen);
uv = utf8_to_uvchr(tmpbuf, 0);
tend = uvchr_to_utf8(tmpbuf, uv);
d = (U8*)SvPVX(TARG);
send = s + len;
while (s < send) {
- toUPPER_utf8(s, tmpbuf, &ulen); /* XXX --jhi */
+ toUPPER_utf8(s, tmpbuf, &ulen);
Copy(tmpbuf, d, ulen, U8);
d += ulen;
s += UTF8SKIP(s);
d = (U8*)SvPVX(TARG);
send = s + len;
while (s < send) {
- toLOWER_utf8(s, tmpbuf, &ulen); /* XXX --jhi */
+ toLOWER_utf8(s, tmpbuf, &ulen);
Copy(tmpbuf, d, ulen, U8);
d += ulen;
s += UTF8SKIP(s);
ok("\L\x{DF}AB\x{149}CD" eq "\x{DF}ab\x{149}cd",
"multicharacter lowercase");
+# titlecase is used for \u / ucfirst.
+
+# \x{587} is ARMENIAN SMALL LIGATURE ECH YIWN and its titlecase is
+# \x{535}\x{582} ARMENIAN CAPITAL LETTER ECH + ARMENIAN SMALL LETTER YIWN
+# while its lowercase is
+# \x{587} itself
+# and its uppercase is
+# \x{535}\x{552} ARMENIAN CAPITAL LETTER ECH + ARMENIAN CAPITAL LETTER YIWN
+
+$a = "\x{587}";
+
+ok("\L\x{587}" eq "\x{587}", "ligature lowercase");
+ok("\u\x{587}" eq "\x{535}\x{582}", "ligature titlecase");
+ok("\U\x{587}" eq "\x{535}\x{552}", "ligature uppercase");
+