Commit | Line | Data |
e74d7437 |
1 | BEGIN { |
2 | if ($ENV{'PERL_CORE'}){ |
3 | chdir 't'; |
4 | unshift @INC, '../lib'; |
5 | } |
6 | require Config; import Config; |
7 | if ($Config{'extensions'} !~ /\bEncode\b/) { |
8 | print "1..0 # Skip: Encode was not built\n"; |
9 | exit 0; |
10 | } |
11 | $| = 1; |
12 | } |
13 | |
14 | use strict; |
44b3b9c7 |
15 | use utf8; |
16 | use Test::More tests => 778; |
e74d7437 |
17 | use Encode; |
44b3b9c7 |
18 | use Encode::GSM0338; |
e74d7437 |
19 | |
20 | # The specification of GSM 03.38 is not awfully clear. |
21 | # (http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT) |
22 | # The various combinations of 0x00 and 0x1B as leading bytes |
23 | # are unclear, as is the semantics of those bytes as standalone |
24 | # or as final single bytes. |
25 | |
44b3b9c7 |
26 | |
27 | my $chk = Encode::LEAVE_SRC(); |
28 | |
29 | # escapes |
30 | # see http://www.csoft.co.uk/sms/character_sets/gsm.htm |
31 | my %esc_seq = ( |
32 | "\x{20ac}" => "\x1b\x65", |
33 | "\x0c" => "\x1b\x0A", |
34 | "[" => "\x1b\x3C", |
35 | "\\" => "\x1b\x2F", |
36 | "]" => "\x1b\x3E", |
37 | "^" => "\x1b\x14", |
38 | "{" => "\x1b\x28", |
39 | "|" => "\x1b\x40", |
40 | "}" => "\x1b\x29", |
41 | "~" => "\x1b\x3D", |
42 | ); |
43 | |
44 | my %unesc_seq = reverse %esc_seq; |
45 | |
46 | |
47 | sub eu{ |
48 | $_[0] =~ /[\x00-\x1f]/ ? |
49 | sprintf("\\x{%04X}", ord($_[0])) : encode_utf8($_[0]); |
50 | |
51 | } |
52 | |
53 | for my $c ( map { chr } 0 .. 127 ) { |
54 | my $u = $Encode::GSM0338::GSM2UNI{$c}; |
55 | |
56 | # default character set |
57 | is decode( "gsm0338", $c, $chk ), $u, |
58 | sprintf( "decode \\x%02X", ord($c) ); |
59 | eval { decode( "gsm0338", $c . "\xff", $chk ) }; |
60 | ok( $@, $@ ); |
61 | is encode( "gsm0338", $u, $chk ), $c, sprintf( "encode %s", eu($u) ); |
62 | eval { encode( "gsm0338", $u . "\x{3000}", $chk ) }; |
63 | ok( $@, $@ ); |
64 | |
65 | # nasty atmark |
66 | if ( $c eq "\x00" ) { |
67 | is decode( "gsm0338", "\x00" . $c, $chk ), "\x00", |
68 | sprintf( '@@ =>: \x00+\x%02X', ord($c) ); |
69 | } |
70 | else { |
71 | is decode( "gsm0338", "\x00" . $c ), '@' . decode( "gsm0338", $c ), |
72 | sprintf( '@: decode \x00+\x%02X', ord($c) ); |
73 | } |
74 | |
75 | # escape seq. |
76 | my $ecs = "\x1b" . $c; |
77 | if ( $unesc_seq{$ecs} ) { |
78 | is decode( "gsm0338", $ecs, $chk ), $unesc_seq{$ecs}, |
79 | sprintf( "ESC: decode ESC+\\x%02X", ord($c) ); |
80 | is encode( "gsm0338", $unesc_seq{$ecs}, $chk ), $ecs, |
81 | sprintf( "ESC: encode %s ", eu( $unesc_seq{$ecs} ) ); |
82 | } |
83 | else { |
84 | is decode( "gsm0338", $ecs, $chk ), |
85 | "\xA0" . decode( "gsm0338", $c ), |
86 | sprintf( "decode ESC+\\x%02X", ord($c) ); |
87 | } |
88 | } |
89 | |
90 | __END__ |
91 | for my $c (map { chr } 0..127){ |
92 | my $b = "\x1b$c"; |
93 | my $u = $Encode::GSM0338::GSM2UNI{$b}; |
94 | next unless $u; |
95 | $u ||= "\xA0" . $Encode::GSM0338::GSM2UNI{$c}; |
96 | is decode("gsm0338", $b), $u, sprintf("decode ESC+\\x%02X", ord($c) ); |
97 | } |
98 | |
99 | __END__ |
100 | # old test follows |
101 | ub t { is(decode("gsm0338", my $t = $_[0]), $_[1]) } |
e74d7437 |
102 | |
103 | # t("\x00", "\x00"); # ??? |
104 | |
105 | # "Round-trip". |
106 | t("\x41", "\x41"); |
107 | |
108 | t("\x01", "\xA3"); |
109 | t("\x02", "\x24"); |
110 | t("\x03", "\xA5"); |
111 | t("\x09", "\xE7"); |
112 | |
113 | t("\x00\x00", "\x00\x00"); # Maybe? |
114 | t("\x00\x1B", "\x40\xA0"); # Maybe? |
115 | t("\x00\x41", "\x40\x41"); |
116 | |
117 | # t("\x1B", "\x1B"); # ??? |
118 | |
119 | # Escape with no special second byte is just a NBSP. |
120 | t("\x1B\x41", "\xA0\x41"); |
121 | |
122 | t("\x1B\x00", "\xA0\x40"); # Maybe? |
123 | |
124 | # Special escape characters. |
125 | t("\x1B\x0A", "\x0C"); |
126 | t("\x1B\x14", "\x5E"); |
127 | t("\x1B\x28", "\x7B"); |
128 | t("\x1B\x29", "\x7D"); |
129 | t("\x1B\x2F", "\x5C"); |
130 | t("\x1B\x3C", "\x5B"); |
131 | t("\x1B\x3D", "\x7E"); |
132 | t("\x1B\x3E", "\x5D"); |
133 | t("\x1B\x40", "\x7C"); |
134 | t("\x1B\x40", "\x7C"); |
135 | t("\x1B\x65", "\x{20AC}"); |