Support $! stringification of socket error codes on Windows.
[p5sagit/p5-mst-13.2.git] / cpan / Encode / bin / ucmlint
CommitLineData
fcb875d4 1#!/usr/local/bin/perl
2#
0dbed2e5 3# $Id: ucmlint,v 2.2 2008/03/12 09:51:11 dankogai Exp $
fcb875d4 4#
5
6use strict;
0dbed2e5 7our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
fcb875d4 8
9use Getopt::Std;
10our %Opt;
11getopts("Dehfv", \%Opt);
12
13if ($Opt{e}){
14 eval{ require Encode; };
15 $@ and die "can't load Encode : $@";
16}
17
18$Opt{h} and help();
19@ARGV or help();
20
21sub help{
22 print <<"";
23$0 -[Dehfv] [ucm files ...]
24 -D debug mode on
25 -e test with Encode module also (requires perl 5.7.3 or higher)
26 -h shows this message
27 -f forces roundtrip check even for |[123]
28 -v verbose mode
29
30}
31
32$| = 1;
0dbed2e5 33my (%Hdr, %U2E, %E2U, %Fallback);
fcb875d4 34my $in_charmap = 0;
35my $nerror = 0;
36my $nwarning = 0;
37
38sub nit($;$){
39 my ($msg, $level) = @_;
40 my $lstr;
41 if ($level == 2){
0dbed2e5 42 $lstr = 'notice';
fcb875d4 43 }elsif ($level == 1){
0dbed2e5 44 $lstr = 'warning'; $nwarning++;
fcb875d4 45 }else{
0dbed2e5 46 $lstr = 'error'; $nerror++;
fcb875d4 47 }
48 print "$ARGV:$lstr in line $.: $msg\n";
49}
50
51for $ARGV (@ARGV){
52 open UCM, $ARGV or die "$ARGV:$!";
0dbed2e5 53 %Hdr = %U2E = %E2U = %Fallback = ();
fcb875d4 54 $in_charmap = $nerror = $nwarning = 0;
55 $. = 0;
56 while(<UCM>){
0dbed2e5 57 chomp;
58 s/\s*#.*$//o; /^$/ and next;
59 if ($_ eq "CHARMAP"){
60 $in_charmap = 1;
61 for my $must (qw/code_set_name mb_cur_min mb_cur_max/){
62 exists $Hdr{$must} or nit "<$must> nonexistent";
d1256cb1 63 }
0dbed2e5 64 $Hdr{mb_cur_min} > $Hdr{mb_cur_max}
65 and nit sprintf("mb_cur_min(%d) > mb_cur_max(%d)",
66 $Hdr{mb_cur_min},$Hdr{mb_cur_max});
67 $in_charmap = 1;
68 next;
d1256cb1 69 }
0dbed2e5 70 unless ($in_charmap){
71 my($hkey, $hvalue) = /^<(\S+)>\s+[\"\']?([^\"\']+)/o or next;
72 $Opt{D} and warn "$hkey => $hvalue";
73 if ($hkey eq "code_set_name"){ # name check
74 exists $Hdr{code_set_name}
75 and nit "Duplicate <code_set_name>: $hkey";
76 }
77 if ($hkey eq "code_set_alias"){ # alias check
78 $hvalue eq $Hdr{code_set_name}
79 and nit qq(alias "$hvalue" is already in <code_set_name>);
80 }
81 $Hdr{$hkey} = $hvalue;
d1256cb1 82 }else{
0dbed2e5 83 my $name = $Hdr{code_set_name};
84 my($unistr, $encstr, $fb) = /^(\S+)\s+(\S+)\s(\S+)/o or next;
85 $Opt{v} and nit $_, 2;
86 my $uni = uniparse($unistr);
87 my $enc = encparse($encstr);
88 $fb =~ /^\|([0123])$/ or nit "malformed fallback: $fb";
89 $fb = $1;
90 $Opt{f} and $fb = 0;
91 unless ($fb == 3){ # check uni -> enc
92 if (exists $U2E{$uni}){
93 nit "dupe encode map: U$uni => $U2E{$uni} and $enc", 1;
94 }else{
95 $U2E{$uni} = $enc;
96 $Fallback{$uni}{$enc} = 1 if $fb == 1;
97 if ($Opt{e}) {
98 my $e = hex2enc($enc);
99 my $u = hex2uni($uni);
100 my $eu = Encode::encode($name, $u);
101 $e eq $eu
102 or nit qq(encode('$name', $uni) != $enc);
103 }
104 }
d1256cb1 105 }
0dbed2e5 106 unless ($fb == 1){ # check enc -> uni
107 if (exists $E2U{$enc}){
108 nit "dupe decode map: $enc => U$E2U{$enc} and U$uni", 1;
109 }else{
110 $E2U{$enc} = $uni;
111 $Fallback{$enc}{$uni} = 1 if $fb == 3;
112 if ($Opt{e}) {
113 my $e = hex2enc($enc);
114 my $u = hex2uni($uni);
115 $Opt{D} and warn "$uni, $enc";
116 my $de = Encode::decode($name, $e);
117 $de eq $u
118 or nit qq(decode('$name', $enc) != $uni);
119 }
120 }
121 }
122 # warn "$uni, $enc, $fb";
d1256cb1 123 }
fcb875d4 124 }
125 $in_charmap or nit "Where is CHARMAP?";
126 checkRT();
127 printf ("$ARGV: %s error%s found\n",
0dbed2e5 128 ($nerror == 0 ? 'no' : $nerror),
129 ($nerror > 1 ? 's' : ''));
fcb875d4 130}
131
132exit;
133
134sub hex2enc{
135 pack("C*", map {hex($_)} split(",", shift));
136}
137sub hex2uni{
138 join("", map { chr(hex($_)) } split(",", shift));
139}
140
141sub checkRT{
142 for my $uni (keys %E2U){
0dbed2e5 143 my $enc = $U2E{$uni} or next; # okay
144 $E2U{$U2E{$uni}} eq $uni or $Fallback{$uni}{$enc} or
145 nit "RT failure: U$uni => $enc =>U$E2U{$U2E{$uni}}";
fcb875d4 146 }
147 for my $enc (keys %E2U){
0dbed2e5 148 my $uni = $E2U{$enc} or next; # okay
149 $U2E{$E2U{$enc}} eq $enc or $Fallback{$enc}{$uni} or
150 nit "RT failure: $enc => U$uni => $U2E{$E2U{$enc}}";
fcb875d4 151 }
152}
153
154
155sub uniparse{
156 my $str = shift;
157 my @u;
158 push @u, $1 while($str =~ /\G<U(.*?)>/ig);
159 for my $u (@u){
0dbed2e5 160 $u =~ /^([0-9A-Za-z]+)$/o
161 or nit "malformed Unicode character: $u";
fcb875d4 162 }
163 return join(',', @u);
164}
165
166sub encparse{
167 my $str = shift;
168 my @e;
169 for my $e (split /\\x/io, $str){
0dbed2e5 170 $e or next; # first \x
171 $e =~ /^([0-9A-Za-z]{1,2})$/io
172 or nit "Hex $e in $str is bogus";
173 push @e, $1;
fcb875d4 174 }
175 return join(',', @e);
176}
177
178
179
180__END__
181
0ab8f81e 182A UCM file looks like this.
fcb875d4 183
184 #
185 # Comments
186 #
187 <code_set_name> "US-ascii" # Required
188 <code_set_alias> "ascii" # Optional
189 <mb_cur_min> 1 # Required; usually 1
190 <mb_cur_max> 1 # Max. # of bytes/char
191 <subchar> \x3F # Substitution char
192 #
193 CHARMAP
194 <U0000> \x00 |0 # <control>
195 <U0001> \x01 |0 # <control>
196 <U0002> \x02 |0 # <control>
197 ....
198 <U007C> \x7C |0 # VERTICAL LINE
199 <U007D> \x7D |0 # RIGHT CURLY BRACKET
200 <U007E> \x7E |0 # TILDE
201 <U007F> \x7F |0 # <control>
202 END CHARMAP
203