X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FUnicode%2FNormalize%2Ft%2Ftest.t;h=e07f6f09258d83aa89caf354d21d4b2de81adad7;hb=fe067ad959549a513d3f99948bd05deb85d6e222;hp=d02bcc0304a469eda95425b158c7dc082300174c;hpb=c983aa87c110c324b4ec293f7b7fd915959a9597;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t index d02bcc0..e07f6f0 100644 --- a/ext/Unicode/Normalize/t/test.t +++ b/ext/Unicode/Normalize/t/test.t @@ -1,47 +1,77 @@ -# Before `make install' is performed this script should be runnable with -# `make test'. After `make install' it should work as `perl test.pl' + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} ######################### use Test; use strict; use warnings; -BEGIN { plan tests => 20 }; +BEGIN { plan tests => 58 }; use Unicode::Normalize; ok(1); # If we made it this far, we're ok. +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } + ######################### -ok(NFC(""), ""); ok(NFD(""), ""); +ok(NFC(""), ""); +ok(NFKD(""), ""); +ok(NFKC(""), ""); + +ok(NFD("A"), "A"); +ok(NFC("A"), "A"); +ok(NFKD("A"), "A"); +ok(NFKC("A"), "A"); + +# don't modify the source +# don't modify the source +my $sNFD = "\x{FA19}"; +ok(NFD($sNFD), "\x{795E}"); +ok($sNFD, "\x{FA19}"); + +my $sNFC = "\x{FA1B}"; +ok(NFC($sNFC), "\x{798F}"); +ok($sNFC, "\x{FA1B}"); + +my $sNFKD = "\x{FA1E}"; +ok(NFKD($sNFKD), "\x{7FBD}"); +ok($sNFKD, "\x{FA1E}"); + +my $sNFKC = "\x{FA26}"; +ok(NFKC($sNFKC), "\x{90FD}"); +ok($sNFKC, "\x{FA26}"); + sub hexNFC { join " ", map sprintf("%04X", $_), - unpack 'U*', NFC pack 'U*', map hex(), split ' ', shift; + _unpack_U NFC _pack_U map hex, split ' ', shift; } sub hexNFD { join " ", map sprintf("%04X", $_), - unpack 'U*', NFD pack 'U*', map hex(), split ' ', shift; + _unpack_U NFD _pack_U map hex, split ' ', shift; } -my $ordA = ord("A"); -my $ASCII = $ordA == 0x41; -my $EBCDIC = $ordA == 0xc1; - -if ($ASCII) { - ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); - ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); - ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); -} elsif ($EBCDIC) { - # A WITH GRAVE is 0044 in EBCDIC, not 00E0 - # SMALL LATIN B is 0082 in EBCDIC, not 0062 - ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "0044 05AE 05C4 0315 0082"); - ok(hexNFC("00E0 05AE 05C4 0315 0062"), "0044 05AE 05C4 0315 0082"); - ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "0044 05AE 05C4 0315 0082"); -} else { - skip("Neither ASCII nor EBCDIC based") for 1..3; -} +ok(hexNFD("1E14 AC01"), "0045 0304 0300 1100 1161 11A8"); +ok(hexNFD("AC00 AE00"), "1100 1161 1100 1173 11AF"); +ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); @@ -49,39 +79,48 @@ ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); - -if ($ASCII) { - ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); - ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); -} elsif ($EBCDIC) { - # SMALL LATIN A is 0081 in EBCDIC, not 0061 - # SMALL LATIN B is 0082 in EBCDIC, not 0062 - ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0081 05AE 05C4 0300 0315 0082"); - ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0081 05AE 05C4 0300 0315 0082"); -} else { - skip("Neither ASCII nor EBCDIC based") for 1..2; -} - +ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); +ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); -if ($ASCII) { - ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); -} elsif ($EBCDIC) { - # CAPITAL LATIN A is 00C1 in EBCDIC, not 0041 - ok(hexNFC("0000 0041 0000 0000"), "0000 00C1 0000 0000"); -} else { - skip("Neither ASCII nor EBCDIC based"); -} +ok(hexNFC("AC00 11A7"), "AC00 11A7"); +ok(hexNFC("AC00 11A8"), "AC01"); +ok(hexNFC("AC00 11A9"), "AC02"); +ok(hexNFC("AC00 11C2"), "AC1B"); +ok(hexNFC("AC00 11C3"), "AC00 11C3"); -ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); +# Test Cases from Public Review Issue #29: Normalization Issue +# cf. http://www.unicode.org/review/pr-29.html +ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E"); +ok(hexNFC("1100 0300 1161"), "1100 0300 1161"); +ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300"); +ok(hexNFC("1100 1161 0300"), "AC00 0300"); +ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327"); +ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327"); + +ok(hexNFC("0300 0041"), "0300 0041"); +ok(hexNFC("0300 0301 0041"), "0300 0301 0041"); +ok(hexNFC("0301 0300 0041"), "0301 0300 0041"); +ok(hexNFC("0000 0300 0000 0301"), "0000 0300 0000 0301"); +ok(hexNFC("0000 0301 0000 0300"), "0000 0301 0000 0300"); + +ok(hexNFC("0327 0061 0300"), "0327 00E0"); +ok(hexNFC("0301 0061 0300"), "0301 00E0"); +ok(hexNFC("0315 0061 0300"), "0315 00E0"); +ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0"); +ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0"); +ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0"); + +# NFC() should be unary. +my $str11 = _pack_U(0x41, 0x0302, 0x0301, 0x62); +my $str12 = _pack_U(0x1EA4, 0x62); +ok(NFC $str11 eq $str12); + +# NFD() should be unary. +my $str21 = _pack_U(0xE0, 0xAC00); +my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161); +ok(NFD $str21 eq $str22); -# should be unary. -if ($ASCII) { - ok(NFC "\x{41}\x{0302}\x{0301}\x62" eq "\x{1EA4}\x62"); -} elsif ($EBCDIC) { - ok(NFC "\x{41}\x{0302}\x{0301}\x62" eq "\x{1EA4}\x82"); -} else { - skip("Neither ASCII nor EBCDIC based"); -} -ok(NFD "\x{E0}\x{AC00}" eq "\x{61}\x{0300}\x{1100}\x{1161}");