From: Nick Ing-Simmons Date: Mon, 5 Mar 2001 22:35:07 +0000 (+0000) Subject: More Encode alias tidying. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=016cb72cf9c105870bee6a8400914494bc6b9d12;p=p5sagit%2Fp5-mst-13.2.git More Encode alias tidying. p4raw-id: //depot/perlio@9051 --- diff --git a/MANIFEST b/MANIFEST index 2ca30f8..69d9390 100644 --- a/MANIFEST +++ b/MANIFEST @@ -279,6 +279,7 @@ ext/Encode/Encode/jis0201.enc Encoding tables ext/Encode/Encode/jis0208.enc Encoding tables ext/Encode/Encode/jis0212.enc Encoding tables ext/Encode/Encode/koi8-r.enc Encoding tables +ext/Encode/Encode/koi8-r.ucm Encoding tables ext/Encode/Encode/ksc5601.enc Encoding tables ext/Encode/Encode/macCentEuro.enc Encoding tables ext/Encode/Encode/macCroatian.enc Encoding tables diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 296ddc3..b5ba929 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -96,28 +96,26 @@ sub define_alias } } -my %isolatin2num = - ( - 1 => 1, - 2 => 2, - 3 => 3, - 4 => 4, - 5 => 9, - 6 => 10, - 7 => 13, - 8 => 14, - 9 => 15, - 10 => 16, - ); - +# Allow variants of iso-8859-1 etc. define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); -define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i => - '"iso-8859-$isolatin2num{$1}"' ); -define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); -#define_alias( sub { return /^iso-(\d+-\d+)$/i ? "iso$1" : '' } ); -define_alias( 'ascii' => 'US-ascii'); + +# Allow latin-1 style names as well + # 0 1 2 3 4 5 6 7 8 9 10 +my @latin2iso_num = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); +define_alias( qr/^latin[-_]?(\d+)$/i => '"iso-8859-$latin2iso_num[$1]"' ); + +# Common names for non-latin prefered MIME names +define_alias( 'ascii' => 'US-ascii', + 'cyrillic' => 'iso-8859-5', + 'arabic' => 'iso-8859-6', + 'greek' => 'iso-8859-7', + 'hebrew' => 'iso-8859-8'); + define_alias( 'ibm-1047' => 'cp1047'); +# Map white space and _ to '-' +define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); + sub define_encoding { my $obj = shift; diff --git a/ext/Encode/Encode/koi8-r.ucm b/ext/Encode/Encode/koi8-r.ucm new file mode 100644 index 0000000..376ce5f --- /dev/null +++ b/ext/Encode/Encode/koi8-r.ucm @@ -0,0 +1,265 @@ +# Written $Id: //depot/perlio/ext/Encode/compile#16 $ +# ./compile -n koi8-r -o Encode/koi8-r.ucm Encode/koi8-r.enc + "koi8-r" + 1 + 1 + \x3F +# +CHARMAP + \x00 |0 # + \x01 |0 # + \x02 |0 # + \x03 |0 # + \x04 |0 # + \x05 |0 # + \x06 |0 # + \x07 |0 # + \x08 |0 # + \x09 |0 # + \x0A |0 # + \x0B |0 # + \x0C |0 # + \x0D |0 # + \x0E |0 # + \x0F |0 # + \x10 |0 # + \x11 |0 # + \x12 |0 # + \x13 |0 # + \x14 |0 # + \x15 |0 # + \x16 |0 # + \x17 |0 # + \x18 |0 # + \x19 |0 # + \x1A |0 # + \x1B |0 # + \x1C |0 # + \x1D |0 # + \x1E |0 # + \x1F |0 # + \x20 |0 # SPACE + \x21 |0 # EXCLAMATION MARK + \x22 |0 # QUOTATION MARK + \x23 |0 # NUMBER SIGN + \x24 |0 # DOLLAR SIGN + \x25 |0 # PERCENT SIGN + \x26 |0 # AMPERSAND + \x27 |0 # APOSTROPHE + \x28 |0 # LEFT PARENTHESIS + \x29 |0 # RIGHT PARENTHESIS + \x2A |0 # ASTERISK + \x2B |0 # PLUS SIGN + \x2C |0 # COMMA + \x2D |0 # HYPHEN-MINUS + \x2E |0 # FULL STOP + \x2F |0 # SOLIDUS + \x30 |0 # DIGIT ZERO + \x31 |0 # DIGIT ONE + \x32 |0 # DIGIT TWO + \x33 |0 # DIGIT THREE + \x34 |0 # DIGIT FOUR + \x35 |0 # DIGIT FIVE + \x36 |0 # DIGIT SIX + \x37 |0 # DIGIT SEVEN + \x38 |0 # DIGIT EIGHT + \x39 |0 # DIGIT NINE + \x3A |0 # COLON + \x3B |0 # SEMICOLON + \x3C |0 # LESS-THAN SIGN + \x3D |0 # EQUALS SIGN + \x3E |0 # GREATER-THAN SIGN + \x3F |0 # QUESTION MARK + \x40 |0 # COMMERCIAL AT + \x41 |0 # LATIN CAPITAL LETTER A + \x42 |0 # LATIN CAPITAL LETTER B + \x43 |0 # LATIN CAPITAL LETTER C + \x44 |0 # LATIN CAPITAL LETTER D + \x45 |0 # LATIN CAPITAL LETTER E + \x46 |0 # LATIN CAPITAL LETTER F + \x47 |0 # LATIN CAPITAL LETTER G + \x48 |0 # LATIN CAPITAL LETTER H + \x49 |0 # LATIN CAPITAL LETTER I + \x4A |0 # LATIN CAPITAL LETTER J + \x4B |0 # LATIN CAPITAL LETTER K + \x4C |0 # LATIN CAPITAL LETTER L + \x4D |0 # LATIN CAPITAL LETTER M + \x4E |0 # LATIN CAPITAL LETTER N + \x4F |0 # LATIN CAPITAL LETTER O + \x50 |0 # LATIN CAPITAL LETTER P + \x51 |0 # LATIN CAPITAL LETTER Q + \x52 |0 # LATIN CAPITAL LETTER R + \x53 |0 # LATIN CAPITAL LETTER S + \x54 |0 # LATIN CAPITAL LETTER T + \x55 |0 # LATIN CAPITAL LETTER U + \x56 |0 # LATIN CAPITAL LETTER V + \x57 |0 # LATIN CAPITAL LETTER W + \x58 |0 # LATIN CAPITAL LETTER X + \x59 |0 # LATIN CAPITAL LETTER Y + \x5A |0 # LATIN CAPITAL LETTER Z + \x5B |0 # LEFT SQUARE BRACKET + \x5C |0 # REVERSE SOLIDUS + \x5D |0 # RIGHT SQUARE BRACKET + \x5E |0 # CIRCUMFLEX ACCENT + \x5F |0 # LOW LINE + \x60 |0 # GRAVE ACCENT + \x61 |0 # LATIN SMALL LETTER A + \x62 |0 # LATIN SMALL LETTER B + \x63 |0 # LATIN SMALL LETTER C + \x64 |0 # LATIN SMALL LETTER D + \x65 |0 # LATIN SMALL LETTER E + \x66 |0 # LATIN SMALL LETTER F + \x67 |0 # LATIN SMALL LETTER G + \x68 |0 # LATIN SMALL LETTER H + \x69 |0 # LATIN SMALL LETTER I + \x6A |0 # LATIN SMALL LETTER J + \x6B |0 # LATIN SMALL LETTER K + \x6C |0 # LATIN SMALL LETTER L + \x6D |0 # LATIN SMALL LETTER M + \x6E |0 # LATIN SMALL LETTER N + \x6F |0 # LATIN SMALL LETTER O + \x70 |0 # LATIN SMALL LETTER P + \x71 |0 # LATIN SMALL LETTER Q + \x72 |0 # LATIN SMALL LETTER R + \x73 |0 # LATIN SMALL LETTER S + \x74 |0 # LATIN SMALL LETTER T + \x75 |0 # LATIN SMALL LETTER U + \x76 |0 # LATIN SMALL LETTER V + \x77 |0 # LATIN SMALL LETTER W + \x78 |0 # LATIN SMALL LETTER X + \x79 |0 # LATIN SMALL LETTER Y + \x7A |0 # LATIN SMALL LETTER Z + \x7B |0 # LEFT CURLY BRACKET + \x7C |0 # VERTICAL LINE + \x7D |0 # RIGHT CURLY BRACKET + \x7E |0 # TILDE + \x7F |0 # + \x80 |0 # BOX DRAWINGS LIGHT HORIZONTAL + \x81 |0 # BOX DRAWINGS LIGHT VERTICAL + \x82 |0 # BOX DRAWINGS LIGHT DOWN AND RIGHT + \x83 |0 # BOX DRAWINGS LIGHT DOWN AND LEFT + \x84 |0 # BOX DRAWINGS LIGHT UP AND RIGHT + \x85 |0 # BOX DRAWINGS LIGHT UP AND LEFT + \x86 |0 # BOX DRAWINGS LIGHT VERTICAL AND RIGHT + \x87 |0 # BOX DRAWINGS LIGHT VERTICAL AND LEFT + \x88 |0 # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + \x89 |0 # BOX DRAWINGS LIGHT UP AND HORIZONTAL + \x8A |0 # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + \x8B |0 # UPPER HALF BLOCK + \x8C |0 # LOWER HALF BLOCK + \x8D |0 # FULL BLOCK + \x8E |0 # LEFT HALF BLOCK + \x8F |0 # RIGHT HALF BLOCK + \x90 |0 # LIGHT SHADE + \x91 |0 # MEDIUM SHADE + \x92 |0 # DARK SHADE + \x93 |0 # TOP HALF INTEGRAL + \x94 |0 # BLACK SQUARE + \x95 |0 # BULLET OPERATOR + \x96 |0 # SQUARE ROOT + \x97 |0 # ALMOST EQUAL TO + \x98 |0 # LESS-THAN OR EQUAL TO + \x99 |0 # GREATER-THAN OR EQUAL TO + \x9A |0 # NO-BREAK SPACE + \x9B |0 # BOTTOM HALF INTEGRAL + \x9C |0 # DEGREE SIGN + \x9D |0 # SUPERSCRIPT TWO + \x9E |0 # MIDDLE DOT + \x9F |0 # DIVISION SIGN + \xA0 |0 # BOX DRAWINGS DOUBLE HORIZONTAL + \xA1 |0 # BOX DRAWINGS DOUBLE VERTICAL + \xA2 |0 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + \xA3 |0 # CYRILLIC SMALL LETTER IO + \xA4 |0 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + \xA5 |0 # BOX DRAWINGS DOUBLE DOWN AND RIGHT + \xA6 |0 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + \xA7 |0 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + \xA8 |0 # BOX DRAWINGS DOUBLE DOWN AND LEFT + \xA9 |0 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + \xAA |0 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + \xAB |0 # BOX DRAWINGS DOUBLE UP AND RIGHT + \xAC |0 # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + \xAD |0 # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + \xAE |0 # BOX DRAWINGS DOUBLE UP AND LEFT + \xAF |0 # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + \xB0 |0 # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + \xB1 |0 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + \xB2 |0 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + \xB3 |0 # CYRILLIC CAPITAL LETTER IO + \xB4 |0 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + \xB5 |0 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT + \xB6 |0 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + \xB7 |0 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + \xB8 |0 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + \xB9 |0 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + \xBA |0 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + \xBB |0 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL + \xBC |0 # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + \xBD |0 # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + \xBE |0 # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + \xBF |0 # COPYRIGHT SIGN + \xC0 |0 # CYRILLIC SMALL LETTER YU + \xC1 |0 # CYRILLIC SMALL LETTER A + \xC2 |0 # CYRILLIC SMALL LETTER BE + \xC3 |0 # CYRILLIC SMALL LETTER TSE + \xC4 |0 # CYRILLIC SMALL LETTER DE + \xC5 |0 # CYRILLIC SMALL LETTER IE + \xC6 |0 # CYRILLIC SMALL LETTER EF + \xC7 |0 # CYRILLIC SMALL LETTER GHE + \xC8 |0 # CYRILLIC SMALL LETTER HA + \xC9 |0 # CYRILLIC SMALL LETTER I + \xCA |0 # CYRILLIC SMALL LETTER SHORT I + \xCB |0 # CYRILLIC SMALL LETTER KA + \xCC |0 # CYRILLIC SMALL LETTER EL + \xCD |0 # CYRILLIC SMALL LETTER EM + \xCE |0 # CYRILLIC SMALL LETTER EN + \xCF |0 # CYRILLIC SMALL LETTER O + \xD0 |0 # CYRILLIC SMALL LETTER PE + \xD1 |0 # CYRILLIC SMALL LETTER YA + \xD2 |0 # CYRILLIC SMALL LETTER ER + \xD3 |0 # CYRILLIC SMALL LETTER ES + \xD4 |0 # CYRILLIC SMALL LETTER TE + \xD5 |0 # CYRILLIC SMALL LETTER U + \xD6 |0 # CYRILLIC SMALL LETTER ZHE + \xD7 |0 # CYRILLIC SMALL LETTER VE + \xD8 |0 # CYRILLIC SMALL LETTER SOFT SIGN + \xD9 |0 # CYRILLIC SMALL LETTER YERU + \xDA |0 # CYRILLIC SMALL LETTER ZE + \xDB |0 # CYRILLIC SMALL LETTER SHA + \xDC |0 # CYRILLIC SMALL LETTER E + \xDD |0 # CYRILLIC SMALL LETTER SHCHA + \xDE |0 # CYRILLIC SMALL LETTER CHE + \xDF |0 # CYRILLIC SMALL LETTER HARD SIGN + \xE0 |0 # CYRILLIC CAPITAL LETTER YU + \xE1 |0 # CYRILLIC CAPITAL LETTER A + \xE2 |0 # CYRILLIC CAPITAL LETTER BE + \xE3 |0 # CYRILLIC CAPITAL LETTER TSE + \xE4 |0 # CYRILLIC CAPITAL LETTER DE + \xE5 |0 # CYRILLIC CAPITAL LETTER IE + \xE6 |0 # CYRILLIC CAPITAL LETTER EF + \xE7 |0 # CYRILLIC CAPITAL LETTER GHE + \xE8 |0 # CYRILLIC CAPITAL LETTER HA + \xE9 |0 # CYRILLIC CAPITAL LETTER I + \xEA |0 # CYRILLIC CAPITAL LETTER SHORT I + \xEB |0 # CYRILLIC CAPITAL LETTER KA + \xEC |0 # CYRILLIC CAPITAL LETTER EL + \xED |0 # CYRILLIC CAPITAL LETTER EM + \xEE |0 # CYRILLIC CAPITAL LETTER EN + \xEF |0 # CYRILLIC CAPITAL LETTER O + \xF0 |0 # CYRILLIC CAPITAL LETTER PE + \xF1 |0 # CYRILLIC CAPITAL LETTER YA + \xF2 |0 # CYRILLIC CAPITAL LETTER ER + \xF3 |0 # CYRILLIC CAPITAL LETTER ES + \xF4 |0 # CYRILLIC CAPITAL LETTER TE + \xF5 |0 # CYRILLIC CAPITAL LETTER U + \xF6 |0 # CYRILLIC CAPITAL LETTER ZHE + \xF7 |0 # CYRILLIC CAPITAL LETTER VE + \xF8 |0 # CYRILLIC CAPITAL LETTER SOFT SIGN + \xF9 |0 # CYRILLIC CAPITAL LETTER YERU + \xFA |0 # CYRILLIC CAPITAL LETTER ZE + \xFB |0 # CYRILLIC CAPITAL LETTER SHA + \xFC |0 # CYRILLIC CAPITAL LETTER E + \xFD |0 # CYRILLIC CAPITAL LETTER SHCHA + \xFE |0 # CYRILLIC CAPITAL LETTER CHE + \xFF |0 # CYRILLIC CAPITAL LETTER HARD SIGN +END CHARMAP diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL index 0b20c48..97254ac 100644 --- a/ext/Encode/Makefile.PL +++ b/ext/Encode/Makefile.PL @@ -1,6 +1,6 @@ use ExtUtils::MakeMaker; -my %tables = (iso8859 => ['ascii.ucm', 'cp1250.ucm'], +my %tables = (iso8859 => ['ascii.ucm', 'cp1250.ucm', 'koi8-r.ucm' ], EBCDIC => ['cp1047.ucm','cp37.ucm','posix-bc.ucm'], Symbols => ['symbol.ucm','dingbats.ucm'], );