Move Encode from ext/ to cpan/
[p5sagit/p5-mst-13.2.git] / cpan / Encode / t / gsm0338.t
CommitLineData
e74d7437 1BEGIN {
2 if ($ENV{'PERL_CORE'}){
3 chdir 't';
4 unshift @INC, '../lib';
5 }
6 require Config; import Config;
7 if ($Config{'extensions'} !~ /\bEncode\b/) {
8 print "1..0 # Skip: Encode was not built\n";
9 exit 0;
10 }
11 $| = 1;
12}
13
14use strict;
44b3b9c7 15use utf8;
16use Test::More tests => 778;
e74d7437 17use Encode;
44b3b9c7 18use Encode::GSM0338;
e74d7437 19
20# The specification of GSM 03.38 is not awfully clear.
21# (http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT)
22# The various combinations of 0x00 and 0x1B as leading bytes
23# are unclear, as is the semantics of those bytes as standalone
24# or as final single bytes.
25
44b3b9c7 26
27my $chk = Encode::LEAVE_SRC();
28
29# escapes
30# see http://www.csoft.co.uk/sms/character_sets/gsm.htm
31my %esc_seq = (
32 "\x{20ac}" => "\x1b\x65",
33 "\x0c" => "\x1b\x0A",
34 "[" => "\x1b\x3C",
35 "\\" => "\x1b\x2F",
36 "]" => "\x1b\x3E",
37 "^" => "\x1b\x14",
38 "{" => "\x1b\x28",
39 "|" => "\x1b\x40",
40 "}" => "\x1b\x29",
41 "~" => "\x1b\x3D",
42);
43
44my %unesc_seq = reverse %esc_seq;
45
46
47sub eu{
48 $_[0] =~ /[\x00-\x1f]/ ?
49 sprintf("\\x{%04X}", ord($_[0])) : encode_utf8($_[0]);
50
51}
52
53for my $c ( map { chr } 0 .. 127 ) {
54 my $u = $Encode::GSM0338::GSM2UNI{$c};
55
56 # default character set
57 is decode( "gsm0338", $c, $chk ), $u,
58 sprintf( "decode \\x%02X", ord($c) );
59 eval { decode( "gsm0338", $c . "\xff", $chk ) };
60 ok( $@, $@ );
61 is encode( "gsm0338", $u, $chk ), $c, sprintf( "encode %s", eu($u) );
62 eval { encode( "gsm0338", $u . "\x{3000}", $chk ) };
63 ok( $@, $@ );
64
65 # nasty atmark
66 if ( $c eq "\x00" ) {
67 is decode( "gsm0338", "\x00" . $c, $chk ), "\x00",
68 sprintf( '@@ =>: \x00+\x%02X', ord($c) );
69 }
70 else {
71 is decode( "gsm0338", "\x00" . $c ), '@' . decode( "gsm0338", $c ),
72 sprintf( '@: decode \x00+\x%02X', ord($c) );
73 }
74
75 # escape seq.
76 my $ecs = "\x1b" . $c;
77 if ( $unesc_seq{$ecs} ) {
78 is decode( "gsm0338", $ecs, $chk ), $unesc_seq{$ecs},
79 sprintf( "ESC: decode ESC+\\x%02X", ord($c) );
80 is encode( "gsm0338", $unesc_seq{$ecs}, $chk ), $ecs,
81 sprintf( "ESC: encode %s ", eu( $unesc_seq{$ecs} ) );
82 }
83 else {
84 is decode( "gsm0338", $ecs, $chk ),
85 "\xA0" . decode( "gsm0338", $c ),
86 sprintf( "decode ESC+\\x%02X", ord($c) );
87 }
88}
89
90__END__
91for my $c (map { chr } 0..127){
92 my $b = "\x1b$c";
93 my $u = $Encode::GSM0338::GSM2UNI{$b};
94 next unless $u;
95 $u ||= "\xA0" . $Encode::GSM0338::GSM2UNI{$c};
96 is decode("gsm0338", $b), $u, sprintf("decode ESC+\\x%02X", ord($c) );
97}
98
99__END__
100# old test follows
101ub t { is(decode("gsm0338", my $t = $_[0]), $_[1]) }
e74d7437 102
103# t("\x00", "\x00"); # ???
104
105# "Round-trip".
106t("\x41", "\x41");
107
108t("\x01", "\xA3");
109t("\x02", "\x24");
110t("\x03", "\xA5");
111t("\x09", "\xE7");
112
113t("\x00\x00", "\x00\x00"); # Maybe?
114t("\x00\x1B", "\x40\xA0"); # Maybe?
115t("\x00\x41", "\x40\x41");
116
117# t("\x1B", "\x1B"); # ???
118
119# Escape with no special second byte is just a NBSP.
120t("\x1B\x41", "\xA0\x41");
121
122t("\x1B\x00", "\xA0\x40"); # Maybe?
123
124# Special escape characters.
125t("\x1B\x0A", "\x0C");
126t("\x1B\x14", "\x5E");
127t("\x1B\x28", "\x7B");
128t("\x1B\x29", "\x7D");
129t("\x1B\x2F", "\x5C");
130t("\x1B\x3C", "\x5B");
131t("\x1B\x3D", "\x7E");
132t("\x1B\x3E", "\x5D");
133t("\x1B\x40", "\x7C");
134t("\x1B\x40", "\x7C");
135t("\x1B\x65", "\x{20AC}");