X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=utf8.c;h=d7b078486d44c3d7419865c8bee366ec059276f9;hb=701a277b5182d929c4baa83d419c46c6d08d2101;hp=ac90a38283fcfe8633982ecb38e024c2c10e4a90;hpb=6d47b937228c87804679e481f9585c659c524592;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/utf8.c b/utf8.c
index ac90a38..d7b0784 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1180,6 +1180,30 @@ Perl_is_utf8_mark(pTHX_ U8 *p)
     return swash_fetch(PL_utf8_mark, p, TRUE);
 }
 
+/*
+=for apidoc A|UV|to_utf8_case|U8 *p|U8* ustrp|STRLEN *lenp|SV **swash|char *normal|char *special
+
+The "p" contains the pointer to the UTF-8 string encoding
+the character that is being converted.
+
+The "ustrp" is a pointer to the character buffer to put the
+conversion result to.  The "lenp" is a pointer to the length
+of the result.
+
+The "swash" is a pointer to the swash to use.
+
+The "normal" is a string like "ToLower" which means the swash
+$utf8::ToLower, which is stored in lib/unicore/To/Lower.pl,
+and loaded by SWASHGET, using lib/utf8_heavy.pl.
+
+The "special" is a string like "utf8::ToSpecLower", which means
+the hash %utf8::ToSpecLower, which is stored in the same file,
+lib/unicore/To/Lower.pl, and also loaded by SWASHGET.  The access
+to the hash is by Perl_to_utf8_case().
+
+=cut
+ */
+
 UV
 Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal, char *special)
 {
@@ -1196,6 +1220,8 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal
 	 HE *he;
 
 	 uv = utf8_to_uvchr(p, 0);
+	 if (uv <= 0xff)
+	     uv = NATIVE_TO_UTF(uv);
 
 	 if ((hv    = get_hv(special, FALSE)) &&
 	     (keysv = sv_2mortal(Perl_newSVpvf(aTHX_ "%04"UVXf, uv))) &&
@@ -1206,6 +1232,7 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal
 	      if (*lenp > 1 || UNI_IS_INVARIANT(c))
 		   Copy(s, ustrp, *lenp, U8);
 	      else {
+		   c = UTF_TO_NATIVE(c);
 		   /* something in the 0x80..0xFF range */
 		   ustrp[0] = UTF8_EIGHT_BIT_HI(c);
 		   ustrp[1] = UTF8_EIGHT_BIT_LO(c);
@@ -1223,21 +1250,28 @@ UV
 Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
 {
     return Perl_to_utf8_case(aTHX_ p, ustrp, lenp,
-			     &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper");
+                             &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper");
 }
 
 UV
 Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
 {
     return Perl_to_utf8_case(aTHX_ p, ustrp, lenp,
-			     &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle");
+                             &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle");
 }
 
 UV
 Perl_to_utf8_lower(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
 {
     return Perl_to_utf8_case(aTHX_ p, ustrp, lenp,
-			     &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower");
+                             &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower");
+}
+
+UV
+Perl_to_utf8_fold(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
+{
+    return Perl_to_utf8_case(aTHX_ p, ustrp, lenp,
+                             &PL_utf8_tofold, "ToFold", "utf8::ToSpecFold");
 }
 
 /* a "swash" is a swatch hash */
@@ -1313,7 +1347,7 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8)
     U32 off;
     STRLEN slen;
     STRLEN needents;
-    U8 *tmps;
+    U8 *tmps = NULL;
     U32 bit;
     SV *retval;
     U8 tmputf8[2];
@@ -1484,4 +1518,76 @@ Perl_utf8n_to_uvchr(pTHX_ U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
     return UNI_TO_NATIVE(uv);
 }
 
+char *
+Perl_pv_uni_display(pTHX_ SV *dsv, U8 *spv, STRLEN len, STRLEN pvlim, UV flags)
+{
+    int truncated = 0;
+    char *s, *e;
+
+    sv_setpvn(dsv, "", 0);
+    for (s = (char *)spv, e = s + len; s < e; s += UTF8SKIP(s)) {
+	 UV u;
+	 if (pvlim && SvCUR(dsv) >= pvlim) {
+	      truncated++;
+	      break;
+	 }
+	 u = utf8_to_uvchr((U8*)s, 0);
+	 Perl_sv_catpvf(aTHX_ dsv, "\\x{%"UVxf"}", u);
+    }
+    if (truncated)
+	 sv_catpvn(dsv, "...", 3);
+    
+    return SvPVX(dsv);
+}
+
+char *
+Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags)
+{
+     return Perl_pv_uni_display(aTHX_ dsv, (U8*)SvPVX(ssv), SvCUR(ssv),
+				pvlim, flags);
+}
+
+I32
+Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, const char *s2, bool u2, register I32 len)
+{
+     register U8 *a = (U8*)s1;
+     register U8 *b = (U8*)s2;
+     STRLEN la, lb;
+     UV ca, cb;
+     STRLEN ulen1, ulen2;
+     U8 tmpbuf1[UTF8_MAXLEN*3+1];
+     U8 tmpbuf2[UTF8_MAXLEN*3+1];
+
+     while (len) {
+	  if (u1)
+	       ca = utf8_to_uvchr((U8*)a, &la);
+	  else {
+	       ca = *a;
+	       la = 1;
+	  }
+	  if (u2)
+	       cb = utf8_to_uvchr((U8*)b, &lb);
+	  else {
+	       cb = *b;
+	       lb = 1;
+	  }
+	  if (ca != cb) {
+	       if (u1)
+		    to_uni_lower(NATIVE_TO_UNI(ca), tmpbuf1, &ulen1);
+	       else
+		    ulen1 = 1;
+	       if (u2)
+		    to_uni_lower(NATIVE_TO_UNI(cb), tmpbuf2, &ulen2);
+	       else
+		    ulen2 = 1;
+	       if (ulen1 != ulen2
+		   || (ulen1 == 1 && PL_fold[ca] != PL_fold[cb])
+		   || memNE(tmpbuf1, tmpbuf2, ulen1))
+		    return 1;
+	  }
+	  a += la;
+	  b += lb;
+    }
+    return 0;
+}