ext/Encode/t/gsm0338.t

   1 BEGIN {
   2     if ($ENV{'PERL_CORE'}){
   3         chdir 't';
   4         unshift @INC, '../lib';
   5     }
   6     require Config; import Config;
   7     if ($Config{'extensions'} !~ /\bEncode\b/) {
   8       print "1..0 # Skip: Encode was not built\n";
   9       exit 0;
  10     }
  11     $| = 1;
  12 }
  13
  14 use strict;
  15 use utf8;
  16 use Test::More tests => 778;
  17 use Encode;
  18 use Encode::GSM0338;
  19
  20 # The specification of GSM 03.38 is not awfully clear.
  21 # (http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT)
  22 # The various combinations of 0x00 and 0x1B as leading bytes
  23 # are unclear, as is the semantics of those bytes as standalone
  24 # or as final single bytes.
  25
  26
  27 my $chk = Encode::LEAVE_SRC();
  28
  29 # escapes
  30 # see http://www.csoft.co.uk/sms/character_sets/gsm.htm
  31 my %esc_seq = (
  32                "\x{20ac}" => "\x1b\x65",
  33                "\x0c"     => "\x1b\x0A",
  34                "["        => "\x1b\x3C",
  35                "\\"       => "\x1b\x2F",
  36                "]"        => "\x1b\x3E",
  37                "^"        => "\x1b\x14",
  38                "{"        => "\x1b\x28",
  39                "|"        => "\x1b\x40",
  40                "}"        => "\x1b\x29",
  41                "~"        => "\x1b\x3D",
  42 );
  43
  44 my %unesc_seq = reverse %esc_seq;
  45
  46
  47 sub eu{
  48     $_[0] =~ /[\x00-\x1f]/ ?
  49         sprintf("\\x{%04X}", ord($_[0])) : encode_utf8($_[0]);
  50
  51 }
  52
  53 for my $c ( map { chr } 0 .. 127 ) {
  54     my $u = $Encode::GSM0338::GSM2UNI{$c};
  55
  56     # default character set
  57     is decode( "gsm0338", $c, $chk ), $u,
  58       sprintf( "decode \\x%02X", ord($c) );
  59     eval { decode( "gsm0338", $c . "\xff", $chk ) };
  60     ok( $@, $@ );
  61     is encode( "gsm0338", $u, $chk ), $c, sprintf( "encode %s", eu($u) );
  62     eval { encode( "gsm0338", $u . "\x{3000}", $chk ) };
  63     ok( $@, $@ );
  64
  65     # nasty atmark
  66     if ( $c eq "\x00" ) {
  67         is decode( "gsm0338", "\x00" . $c, $chk ), "\x00",
  68           sprintf( '@@ =>: \x00+\x%02X', ord($c) );
  69     }
  70     else {
  71         is decode( "gsm0338", "\x00" . $c ), '@' . decode( "gsm0338", $c ),
  72           sprintf( '@: decode \x00+\x%02X', ord($c) );
  73     }
  74
  75     # escape seq.
  76     my $ecs = "\x1b" . $c;
  77     if ( $unesc_seq{$ecs} ) {
  78         is decode( "gsm0338", $ecs, $chk ), $unesc_seq{$ecs},
  79           sprintf( "ESC: decode ESC+\\x%02X", ord($c) );
  80         is encode( "gsm0338", $unesc_seq{$ecs}, $chk ), $ecs,
  81           sprintf( "ESC: encode %s ", eu( $unesc_seq{$ecs} ) );
  82     }
  83     else {
  84         is decode( "gsm0338", $ecs, $chk ),
  85           "\xA0" . decode( "gsm0338", $c ),
  86           sprintf( "decode ESC+\\x%02X", ord($c) );
  87     }
  88 }
  89
  90 __END__
  91 for my $c (map { chr } 0..127){
  92     my $b = "\x1b$c";
  93     my $u =  $Encode::GSM0338::GSM2UNI{$b};
  94     next unless $u;
  95     $u ||= "\xA0" . $Encode::GSM0338::GSM2UNI{$c};
  96     is decode("gsm0338", $b), $u, sprintf("decode ESC+\\x%02X", ord($c) );
  97 }
  98
  99 __END__
 100 # old test follows
 101 ub t { is(decode("gsm0338", my $t = $_[0]), $_[1]) }
 102
 103 # t("\x00",     "\x00"); # ???
 104
 105 # "Round-trip".
 106 t("\x41",     "\x41");
 107
 108 t("\x01",     "\xA3");
 109 t("\x02",     "\x24");
 110 t("\x03",     "\xA5");
 111 t("\x09",     "\xE7");
 112
 113 t("\x00\x00", "\x00\x00"); # Maybe?
 114 t("\x00\x1B", "\x40\xA0"); # Maybe?
 115 t("\x00\x41", "\x40\x41");
 116
 117 # t("\x1B",     "\x1B"); # ???
 118
 119 # Escape with no special second byte is just a NBSP.
 120 t("\x1B\x41", "\xA0\x41");
 121
 122 t("\x1B\x00", "\xA0\x40"); # Maybe?
 123
 124 # Special escape characters.
 125 t("\x1B\x0A", "\x0C");
 126 t("\x1B\x14", "\x5E");
 127 t("\x1B\x28", "\x7B");
 128 t("\x1B\x29", "\x7D");
 129 t("\x1B\x2F", "\x5C");
 130 t("\x1B\x3C", "\x5B");
 131 t("\x1B\x3D", "\x7E");
 132 t("\x1B\x3E", "\x5D");
 133 t("\x1B\x40", "\x7C");
 134 t("\x1B\x40", "\x7C");
 135 t("\x1B\x65", "\x{20AC}");