Upgrade to Unicode::Normalize 0.30.

diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes

index e63656c..bb1b693 100644 (file)
--- a/ext/Unicode/Normalize/Changes
+++ b/ext/Unicode/Normalize/Changes
@@ -1,5 +1,9 @@
 Revision history for Perl extension Unicode::Normalize.
 
+0.30  Sun May  2 14:35:00 2004
+    - XSUB: (perl 5.8.1 or later) improved utf8 upgrade of non-POK (private POK)
+      values like tied scalars, overloaded objects, etc.
+
 0.28  Sat Nov 22 23:46:24 2003
     - XSUB: even if string contains a malformed, "short" Unicode character,
       decompose() and reorder() will be safe. Garbage will be no longer added.
diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm

index ef8ec68..09ef371 100644 (file)
--- a/ext/Unicode/Normalize/Normalize.pm
+++ b/ext/Unicode/Normalize/Normalize.pm
@@ -13,7 +13,7 @@ use Carp;
 
 no warnings 'utf8';
 
-our $VERSION = '0.28';
+our $VERSION = '0.30';
 our $PACKAGE = __PACKAGE__;
 
 require Exporter;
@@ -117,6 +117,8 @@ Unicode::Normalize - Unicode Normalization Forms
 
 =head1 SYNOPSIS
 
+(1) using function names exported by default:
+
   use Unicode::Normalize;
 
   $NFD_string  = NFD($string);  # Normalization Form D
@@ -124,7 +126,7 @@ Unicode::Normalize - Unicode Normalization Forms
   $NFKD_string = NFKD($string); # Normalization Form KD
   $NFKC_string = NFKC($string); # Normalization Form KC
 
-   or
+(2) using function names exported on request:
 
   use Unicode::Normalize 'normalize';
 
@@ -382,11 +384,11 @@ C<normalize> and other some functions: on request.
 
 =head1 AUTHOR
 
-SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>
+SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
 
   http://homepage1.nifty.com/nomenclator/perl/
 
-  Copyright(C) 2001-2003, SADAHIRO Tomoyuki. Japan. All rights reserved.
+  Copyright(C) 2001-2004, SADAHIRO Tomoyuki. Japan. All rights reserved.
 
   This module is free software; you can redistribute it
   and/or modify it under the same terms as Perl itself.
diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs

index 2ce7cbc..13544c9 100644 (file)
--- a/ext/Unicode/Normalize/Normalize.xs
+++ b/ext/Unicode/Normalize/Normalize.xs
@@ -182,34 +182,44 @@ static void sv_cat_uvuni (SV* sv, UV uv)
     sv_catpvn(sv, (char *)tmp, t - tmp);
 }
 
+static char * sv_2pvunicode(SV *sv, STRLEN *lp)
+{
+    char *s;
+    STRLEN len;
+    s = (char*)SvPV(sv,len);
+    if (!SvUTF8(sv)) {
+       SV* tmpsv = sv_mortalcopy(sv);
+       if (!SvPOK(tmpsv))
+           (void)sv_pvn_force(tmpsv,&len);
+       sv_utf8_upgrade(tmpsv);
+       s = (char*)SvPV(tmpsv,len);
+    }
+    *lp = len;
+    return s;
+}
+
 MODULE = Unicode::Normalize    PACKAGE = Unicode::Normalize
 
 SV*
-decompose(arg, compat = &PL_sv_no)
-    SV * arg
+decompose(src, compat = &PL_sv_no)
+    SV * src
     SV * compat
   PROTOTYPE: $;$
   PREINIT:
-    UV uv;
-    SV *src, *dst;
+    SV *dst;
     STRLEN srclen, retlen;
     U8 *s, *e, *p, *r;
+    UV uv;
     bool iscompat;
   CODE:
-    if (SvUTF8(arg)) {
-       src = arg;
-    } else {
-       src = sv_mortalcopy(arg);
-       sv_utf8_upgrade(src);
-    }
     iscompat = SvTRUE(compat);
+    s = (U8*)sv_2pvunicode(src,&srclen);
+    e = s + srclen;
 
     dst = newSV(1);
     (void)SvPOK_only(dst);
     SvUTF8_on(dst);
 
-    s = (U8*)SvPV(src,srclen);
-    e = s + srclen;
     for (p = s; p < e; p += retlen) {
        uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
        if (!retlen)
@@ -232,11 +242,11 @@ decompose(arg, compat = &PL_sv_no)
 
 
 SV*
-reorder(arg)
-    SV * arg
+reorder(src)
+    SV * src
   PROTOTYPE: $
   PREINIT:
-    SV *src, *dst;
+    SV *dst;
     STRLEN srclen, dstlen, retlen, stk_cc_max;
     U8 *s, *e, *p, *d, curCC;
     UV uv, uvlast;
@@ -244,15 +254,9 @@ reorder(arg)
     STRLEN i, cc_pos;
     bool valid_uvlast;
   CODE:
-    if (SvUTF8(arg)) {
-       src = arg;
-    } else {
-       src = sv_mortalcopy(arg);
-       sv_utf8_upgrade(src);
-    }
-
-    s = (U8*)SvPV(src, srclen);
+    s = (U8*)sv_2pvunicode(src,&srclen);
     e = s + srclen;
+
     dstlen = srclen + 1;
     dst = newSV(dstlen);
     (void)SvPOK_only(dst);
@@ -326,27 +330,21 @@ reorder(arg)
 
 
 SV*
-compose(arg)
-    SV * arg
+compose(src)
+    SV * src
   PROTOTYPE: $
   ALIAS:
     composeContiguous = 1
   PREINIT:
-    SV  *src, *dst, *tmp;
+    SV  *dst, *tmp;
     U8  *s, *p, *e, *d, *t, *tmp_start, curCC, preCC;
     UV uv, uvS, uvComp;
     STRLEN srclen, dstlen, tmplen, retlen;
     bool beginning = TRUE;
   CODE:
-    if (SvUTF8(arg)) {
-       src = arg;
-    } else {
-       src = sv_mortalcopy(arg);
-       sv_utf8_upgrade(src);
-    }
-
-    s = (U8*)SvPV(src, srclen);
+    s = (U8*)sv_2pvunicode(src,&srclen);
     e = s + srclen;
+
     dstlen = srclen + 1;
     dst = newSV(dstlen);
     (void)SvPOK_only(dst);
@@ -429,25 +427,17 @@ compose(arg)
 
 
 void
-checkNFD(arg)
-    SV * arg
+checkNFD(src)
+    SV * src
   PROTOTYPE: $
   ALIAS:
     checkNFKD = 1
   PREINIT:
-    UV uv;
-    SV *src;
     STRLEN srclen, retlen;
     U8 *s, *e, *p, curCC, preCC;
+    UV uv;
   CODE:
-    if (SvUTF8(arg)) {
-       src = arg;
-    } else {
-       src = sv_mortalcopy(arg);
-       sv_utf8_upgrade(src);
-    }
-    
-    s = (U8*)SvPV(src,srclen);
+    s = (U8*)sv_2pvunicode(src,&srclen);
     e = s + srclen;
 
     preCC = 0;
@@ -468,26 +458,18 @@ checkNFD(arg)
 
 
 void
-checkNFC(arg)
-    SV * arg
+checkNFC(src)
+    SV * src
   PROTOTYPE: $
   ALIAS:
     checkNFKC = 1
   PREINIT:
-    UV uv;
-    SV *src;
     STRLEN srclen, retlen;
     U8 *s, *e, *p, curCC, preCC;
+    UV uv;
     bool isMAYBE;
   CODE:
-    if (SvUTF8(arg)) {
-       src = arg;
-    } else {
-       src = sv_mortalcopy(arg);
-       sv_utf8_upgrade(src);
-    }
-    
-    s = (U8*)SvPV(src,srclen);
+    s = (U8*)sv_2pvunicode(src,&srclen);
     e = s + srclen;
 
     preCC = 0;
@@ -528,27 +510,19 @@ checkNFC(arg)
 
 
 void
-checkFCD(arg)
-    SV * arg
+checkFCD(src)
+    SV * src
   PROTOTYPE: $
   ALIAS:
     checkFCC = 1
   PREINIT:
-    UV uv, uvLead, uvTrail;
-    SV *src;
     STRLEN srclen, retlen, canlen, canret;
     U8 *s, *e, *p, curCC, preCC;
+    UV uv, uvLead, uvTrail;
     U8 *sCan, *pCan, *eCan;
     bool isMAYBE;
   CODE:
-    if (SvUTF8(arg)) {
-       src = arg;
-    } else {
-       src = sv_mortalcopy(arg);
-       sv_utf8_upgrade(src);
-    }
-    
-    s = (U8*)SvPV(src,srclen);
+    s = (U8*)sv_2pvunicode(src,&srclen);
     e = s + srclen;
 
     preCC = 0;
@@ -709,22 +683,15 @@ getCanon(uv)
 
 
 void
-splitOnLastStarter(arg)
-    SV * arg
+splitOnLastStarter(src)
+    SV * src
   PREINIT:
-    UV uv;
-    SV *src, *svp;
+    SV *svp;
     STRLEN srclen, retlen;
     U8 *s, *e, *p;
+    UV uv;
   PPCODE:
-    if (SvUTF8(arg)) {
-       src = arg;
-    } else {
-       src = sv_mortalcopy(arg);
-       sv_utf8_upgrade(src);
-    }
-
-    s = (U8*)SvPV(src,srclen);
+    s = (U8*)sv_2pvunicode(src,&srclen);
     e = s + srclen;
 
     for (p = e; s < p; ) {
diff --git a/ext/Unicode/Normalize/t/illegal.t b/ext/Unicode/Normalize/t/illegal.t

index 76cd833..9d18aad 100644 (file)
--- a/ext/Unicode/Normalize/t/illegal.t
+++ b/ext/Unicode/Normalize/t/illegal.t
@@ -14,9 +14,16 @@ BEGIN {
     }
 }
 
+BEGIN {
+    unless (5.006001 <= $]) {
+       print "1..0 # skipped: Perl 5.6.1 or later".
+               " needed for this test\n";
+       exit;
+    }
+}
+
 #########################
 
-use 5.006001;
 use Test;
 use strict;
 use warnings;
diff --git a/ext/Unicode/Normalize/t/short.t b/ext/Unicode/Normalize/t/short.t

index 1f185ac..d799f4a 100644 (file)
--- a/ext/Unicode/Normalize/t/short.t
+++ b/ext/Unicode/Normalize/t/short.t
@@ -14,6 +14,14 @@ BEGIN {
     }
 }
 
+BEGIN {
+    unless (5.006001 <= $]) {
+       print "1..0 # skipped: Perl 5.6.1 or later".
+               " needed for this test\n";
+       exit;
+    }
+}
+
 #########################
 
 use strict;
@@ -26,11 +34,8 @@ print "ok 1\n";
 
 no warnings qw(utf8);
 
-our $a = "\x{3042}"; # 3-byte length (in UTF-8/UTF-EBCDIC)
-{
-    use bytes;
-    substr($a,1,length($a), ''); # remove trailing octets
-}
+# U+3042 is 3-byte length (in UTF-8/UTF-EBCDIC)
+our $a = pack 'U0C', unpack 'C', "\x{3042}";
 
 print NFD($a) eq "\0"
    ? "ok" : "not ok", " 2\n";
diff --git a/ext/Unicode/Normalize/t/split.t b/ext/Unicode/Normalize/t/split.t

index 03b599e..6bf7ff6 100644 (file)
--- a/ext/Unicode/Normalize/t/split.t
+++ b/ext/Unicode/Normalize/t/split.t
@@ -14,6 +14,14 @@ BEGIN {
     }
 }
 
+BEGIN {
+    unless (5.006001 <= $]) {
+       print "1..0 # skipped: Perl 5.6.1 or later".
+               " needed for this test\n";
+       exit;
+    }
+}
+
 #########################
 
 use Test;
ext/Unicode/Normalize/Changes		patch \| blob \| blame \| history
ext/Unicode/Normalize/Normalize.pm		patch \| blob \| blame \| history
ext/Unicode/Normalize/Normalize.xs		patch \| blob \| blame \| history
ext/Unicode/Normalize/t/illegal.t		patch \| blob \| blame \| history
ext/Unicode/Normalize/t/short.t		patch \| blob \| blame \| history
ext/Unicode/Normalize/t/split.t		patch \| blob \| blame \| history