From: Jarkko Hietaniemi Date: Fri, 1 Aug 2003 13:45:10 +0000 (+0000) Subject: Encode pre-1.98 update from Dan Kogai, sent from X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f9674d8395f3ae151e95db6523de9d832afc2b2b;p=p5sagit%2Fp5-mst-13.2.git Encode pre-1.98 update from Dan Kogai, sent from the ashes of his smoldering motherboard. p4raw-id: //depot/perl@20432 --- diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index b565a0f..9cf8dd4 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -34,6 +34,7 @@ Michael G Schwern Nicholas Clark Nick Ing-Simmons Paul Marquess +Peter Prymmer Philip Newton Robin Barker SADAHIRO Tomoyuki diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 5f8b52c..5c5cf6f 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -3,6 +3,15 @@ # $Id: Changes,v 1.97 2003/07/08 21:52:14 dankogai Exp $ # $Revision: 1.97 $ $Date: 2003/07/08 21:52:14 $ +! t/enc_eucjp.t t/enc_utf8.t AUTHORS + Encode test fixes for VMS by Peter Prymmer + Message-ID: +! lib/Encode/Alias.pm t/Aliases.t + koi-8 aliases bug detected and patched by sadahiro. + Further fix and test suite by dankogai + Message-Id: <20030713102228.C76A.BQW10602@nifty.com> + +1.97 2003/07/08 21:52:14 ! encoding.pm lib/Encode/Guess.pm lib/Encode/Alias.pm lib/Encode/JP/JIS7.pm lib/Encode/Encoder.pm Encode.pm $DEBUG replaced with DEBUG() so perl optimizes better, diff --git a/ext/Encode/META.yml b/ext/Encode/META.yml index 1216c72..2e50ff6 100644 --- a/ext/Encode/META.yml +++ b/ext/Encode/META.yml @@ -6,4 +6,4 @@ installdirs: perl requires: distribution_type: module -generated_by: ExtUtils::MakeMaker version 6.10_05 +generated_by: ExtUtils::MakeMaker version 6.12 diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm index 5eeb125..49ae79e 100644 --- a/ext/Encode/lib/Encode/Alias.pm +++ b/ext/Encode/lib/Encode/Alias.pm @@ -193,8 +193,7 @@ sub init_aliases # Standardize on the dashed versions. # define_alias( qr/\butf8$/i => '"utf-8"' ); - define_alias( qr/\bkoi8r$/i => '"koi8-r"' ); - define_alias( qr/\bkoi8u$/i => '"koi8-u"' ); + define_alias( qr/\bkoi8[\s-_]*([ru])$/i => '"koi8-$1"' ); unless ($Encode::ON_EBCDIC){ # for Encode::CN diff --git a/ext/Encode/t/Aliases.t b/ext/Encode/t/Aliases.t index c270c0e..64a42a4 100644 --- a/ext/Encode/t/Aliases.t +++ b/ext/Encode/t/Aliases.t @@ -51,6 +51,8 @@ sub init_a2c{ 'WinArabic' => 'cp1256', 'WinBaltic' => 'cp1257', 'WinVietnamese' => 'cp1258', + 'koi8r' => 'koi8-r', + 'koi8u' => 'koi8-u', 'ja_JP.euc' => $ON_EBCDIC ? '' : 'euc-jp', 'x-euc-jp' => $ON_EBCDIC ? '' : 'euc-jp', 'zh_CN.euc' => $ON_EBCDIC ? '' : 'euc-cn', diff --git a/ext/Encode/t/perlio.t b/ext/Encode/t/perlio.t index 1b1de89..012bbe6 100644 --- a/ext/Encode/t/perlio.t +++ b/ext/Encode/t/perlio.t @@ -122,10 +122,10 @@ for my $src (sort keys %e) { dump2file("$pfile.$seq", $dtext); } } - unless ( $DEBUG ) { - 1 while unlink $sfile; - 1 while unlink $pfile; - } + if ( ! $DEBUG ) { + 1 while unlink ($sfile); + 1 while unlink ($pfile); + } } } diff --git a/ext/Encode/ucm/macArabic.ucm b/ext/Encode/ucm/macArabic.ucm index 5dbb37e..5ac3a2b 100644 --- a/ext/Encode/ucm/macArabic.ucm +++ b/ext/Encode/ucm/macArabic.ucm @@ -41,16 +41,38 @@ CHARMAP \x1D |0 # \x1E |0 # \x1F |0 # - \x30 |0 # DIGIT ZERO - \x31 |0 # DIGIT ONE - \x32 |0 # DIGIT TWO - \x33 |0 # DIGIT THREE - \x34 |0 # DIGIT FOUR - \x35 |0 # DIGIT FIVE - \x36 |0 # DIGIT SIX - \x37 |0 # DIGIT SEVEN - \x38 |0 # DIGIT EIGHT - \x39 |0 # DIGIT NINE + \xA0 |0 # SPACE, right-left + \xA1 |0 # EXCLAMATION MARK, right-left + \xA2 |0 # QUOTATION MARK, right-left + \xA3 |0 # NUMBER SIGN, right-left + \xA4 |0 # DOLLAR SIGN, right-left + \x25 |0 # PERCENT SIGN, left-right + \xA6 |0 # AMPERSAND, right-left + \xA7 |0 # APOSTROPHE, right-left + \xA8 |0 # LEFT PARENTHESIS, right-left + \xA9 |0 # RIGHT PARENTHESIS, right-left + \xAA |0 # ASTERISK, right-left + \xAB |0 # PLUS SIGN, right-left + \x2C |0 # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + \xAD |0 # HYPHEN-MINUS, right-left + \xAE |0 # FULL STOP, right-left + \xAF |0 # SOLIDUS, right-left + \x30 |0 # DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO + \x31 |0 # DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE + \x32 |0 # DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO + \x33 |0 # DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE + \x34 |0 # DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR + \x35 |0 # DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE + \x36 |0 # DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX + \x37 |0 # DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN + \x38 |0 # DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT + \x39 |0 # DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE + \xBA |0 # COLON, right-left + \x3B |0 # SEMICOLON, left-right + \xBC |0 # LESS-THAN SIGN, right-left + \xBD |0 # EQUALS SIGN, right-left + \xBE |0 # GREATER-THAN SIGN, right-left + \x3F |0 # QUESTION MARK, left-right \x40 |0 # COMMERCIAL AT \x41 |0 # LATIN CAPITAL LETTER A \x42 |0 # LATIN CAPITAL LETTER B @@ -78,6 +100,11 @@ CHARMAP \x58 |0 # LATIN CAPITAL LETTER X \x59 |0 # LATIN CAPITAL LETTER Y \x5A |0 # LATIN CAPITAL LETTER Z + \xDB |0 # LEFT SQUARE BRACKET, right-left + \xDC |0 # REVERSE SOLIDUS, right-left + \xDD |0 # RIGHT SQUARE BRACKET, right-left + \xDE |0 # CIRCUMFLEX ACCENT, right-left + \xDF |0 # LOW LINE, right-left \x60 |0 # GRAVE ACCENT \x61 |0 # LATIN SMALL LETTER A \x62 |0 # LATIN SMALL LETTER B @@ -105,7 +132,13 @@ CHARMAP \x78 |0 # LATIN SMALL LETTER X \x79 |0 # LATIN SMALL LETTER Y \x7A |0 # LATIN SMALL LETTER Z + \xFB |0 # LEFT CURLY BRACKET, right-left + \xFC |0 # VERTICAL LINE, right-left + \xFD |0 # RIGHT CURLY BRACKET, right-left \x7E |0 # TILDE + \x81 |0 # NO-BREAK SPACE, right-left + \x8C |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + \x98 |0 # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left \x80 |0 # LATIN CAPITAL LETTER A WITH DIAERESIS \x82 |0 # LATIN CAPITAL LETTER C WITH CEDILLA \x83 |0 # LATIN CAPITAL LETTER E WITH ACUTE @@ -128,6 +161,7 @@ CHARMAP \x97 |0 # LATIN SMALL LETTER O WITH ACUTE \x99 |0 # LATIN SMALL LETTER O WITH CIRCUMFLEX \x9A |0 # LATIN SMALL LETTER O WITH DIAERESIS + \x9B |0 # DIVISION SIGN, right-left \x9D |0 # LATIN SMALL LETTER U WITH GRAVE \x9C |0 # LATIN SMALL LETTER U WITH ACUTE \x9E |0 # LATIN SMALL LETTER U WITH CIRCUMFLEX @@ -179,6 +213,16 @@ CHARMAP \xEF |0 # ARABIC DAMMA \xF0 |0 # ARABIC KASRA \xF1 |0 # ARABIC SHADDA + \xB0 |0 # ARABIC-INDIC DIGIT ZERO, right-left (need override) + \xB1 |0 # ARABIC-INDIC DIGIT ONE, right-left (need override) + \xB2 |0 # ARABIC-INDIC DIGIT TWO, right-left (need override) + \xB3 |0 # ARABIC-INDIC DIGIT THREE, right-left (need override) + \xB4 |0 # ARABIC-INDIC DIGIT FOUR, right-left (need override) + \xB5 |0 # ARABIC-INDIC DIGIT FIVE, right-left (need override) + \xB6 |0 # ARABIC-INDIC DIGIT SIX, right-left (need override) + \xB7 |0 # ARABIC-INDIC DIGIT SEVEN, right-left (need override) + \xB8 |0 # ARABIC-INDIC DIGIT EIGHT, right-left (need override) + \xB9 |0 # ARABIC-INDIC DIGIT NINE, right-left (need override) \xF2 |0 # ARABIC SUKUN \xA5 |0 # ARABIC PERCENT SIGN \xF4 |0 # ARABIC LETTER TTEH @@ -192,4 +236,6 @@ CHARMAP \x8B |0 # ARABIC LETTER NOON GHUNNA \xFF |0 # ARABIC LETTER YEH BARREE \xF6 |0 # ARABIC LETTER AE + \x93 |0 # HORIZONTAL ELLIPSIS, right-left + \xC0 |0 # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left END CHARMAP diff --git a/ext/Encode/ucm/macFarsi.ucm b/ext/Encode/ucm/macFarsi.ucm index 889bb91..46da8f4 100644 --- a/ext/Encode/ucm/macFarsi.ucm +++ b/ext/Encode/ucm/macFarsi.ucm @@ -41,16 +41,38 @@ CHARMAP \x1D |0 # \x1E |0 # \x1F |0 # - \x30 |0 # DIGIT ZERO - \x31 |0 # DIGIT ONE - \x32 |0 # DIGIT TWO - \x33 |0 # DIGIT THREE - \x34 |0 # DIGIT FOUR - \x35 |0 # DIGIT FIVE - \x36 |0 # DIGIT SIX - \x37 |0 # DIGIT SEVEN - \x38 |0 # DIGIT EIGHT - \x39 |0 # DIGIT NINE + \xA0 |0 # SPACE, right-left + \xA1 |0 # EXCLAMATION MARK, right-left + \xA2 |0 # QUOTATION MARK, right-left + \xA3 |0 # NUMBER SIGN, right-left + \xA4 |0 # DOLLAR SIGN, right-left + \x25 |0 # PERCENT SIGN, left-right + \xA6 |0 # AMPERSAND, right-left + \xA7 |0 # APOSTROPHE, right-left + \xA8 |0 # LEFT PARENTHESIS, right-left + \xA9 |0 # RIGHT PARENTHESIS, right-left + \xAA |0 # ASTERISK, right-left + \xAB |0 # PLUS SIGN, right-left + \x2C |0 # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR + \xAD |0 # HYPHEN-MINUS, right-left + \xAE |0 # FULL STOP, right-left + \xAF |0 # SOLIDUS, right-left + \x30 |0 # DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO + \x31 |0 # DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE + \x32 |0 # DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO + \x33 |0 # DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE + \x34 |0 # DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR + \x35 |0 # DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE + \x36 |0 # DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX + \x37 |0 # DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN + \x38 |0 # DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT + \x39 |0 # DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE + \xBA |0 # COLON, right-left + \x3B |0 # SEMICOLON, left-right + \xBC |0 # LESS-THAN SIGN, right-left + \xBD |0 # EQUALS SIGN, right-left + \xBE |0 # GREATER-THAN SIGN, right-left + \x3F |0 # QUESTION MARK, left-right \x40 |0 # COMMERCIAL AT \x41 |0 # LATIN CAPITAL LETTER A \x42 |0 # LATIN CAPITAL LETTER B @@ -78,6 +100,11 @@ CHARMAP \x58 |0 # LATIN CAPITAL LETTER X \x59 |0 # LATIN CAPITAL LETTER Y \x5A |0 # LATIN CAPITAL LETTER Z + \xDB |0 # LEFT SQUARE BRACKET, right-left + \xDC |0 # REVERSE SOLIDUS, right-left + \xDD |0 # RIGHT SQUARE BRACKET, right-left + \xDE |0 # CIRCUMFLEX ACCENT, right-left + \xDF |0 # LOW LINE, right-left \x60 |0 # GRAVE ACCENT \x61 |0 # LATIN SMALL LETTER A \x62 |0 # LATIN SMALL LETTER B @@ -105,7 +132,13 @@ CHARMAP \x78 |0 # LATIN SMALL LETTER X \x79 |0 # LATIN SMALL LETTER Y \x7A |0 # LATIN SMALL LETTER Z + \xFB |0 # LEFT CURLY BRACKET, right-left + \xFC |0 # VERTICAL LINE, right-left + \xFD |0 # RIGHT CURLY BRACKET, right-left \x7E |0 # TILDE + \x81 |0 # NO-BREAK SPACE, right-left + \x8C |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left + \x98 |0 # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left \x80 |0 # LATIN CAPITAL LETTER A WITH DIAERESIS \x82 |0 # LATIN CAPITAL LETTER C WITH CEDILLA \x83 |0 # LATIN CAPITAL LETTER E WITH ACUTE @@ -128,6 +161,7 @@ CHARMAP \x97 |0 # LATIN SMALL LETTER O WITH ACUTE \x99 |0 # LATIN SMALL LETTER O WITH CIRCUMFLEX \x9A |0 # LATIN SMALL LETTER O WITH DIAERESIS + \x9B |0 # DIVISION SIGN, right-left \x9D |0 # LATIN SMALL LETTER U WITH GRAVE \x9C |0 # LATIN SMALL LETTER U WITH ACUTE \x9E |0 # LATIN SMALL LETTER U WITH CIRCUMFLEX @@ -192,4 +226,16 @@ CHARMAP \x8B |0 # ARABIC LETTER NOON GHUNNA \xFF |0 # ARABIC LETTER YEH BARREE \xF6 |0 # ARABIC LETTER AE + \xB0 |0 # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) + \xB1 |0 # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) + \xB2 |0 # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) + \xB3 |0 # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) + \xB4 |0 # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) + \xB5 |0 # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) + \xB6 |0 # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) + \xB7 |0 # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) + \xB8 |0 # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) + \xB9 |0 # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) + \x93 |0 # HORIZONTAL ELLIPSIS, right-left + \xC0 |0 # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left END CHARMAP diff --git a/ext/Encode/ucm/macHebrew.ucm b/ext/Encode/ucm/macHebrew.ucm index 99c3003..77e24d8 100644 --- a/ext/Encode/ucm/macHebrew.ucm +++ b/ext/Encode/ucm/macHebrew.ucm @@ -41,7 +41,38 @@ CHARMAP \x1D |0 # \x1E |0 # \x1F |0 # + \xA0 |0 # SPACE, right-left + \xA1 |0 # EXCLAMATION MARK, right-left + \xA2 |0 # QUOTATION MARK, right-left + \xA3 |0 # NUMBER SIGN, right-left + \xA4 |0 # DOLLAR SIGN, right-left + \xA5 |0 # PERCENT SIGN, right-left \x26 |0 # AMPERSAND + \xA7 |0 # APOSTROPHE, right-left + \xA9 |0 # LEFT PARENTHESIS, right-left + \xA8 |0 # RIGHT PARENTHESIS, right-left + \xAA |0 # ASTERISK, right-left + \xAB |0 # PLUS SIGN, right-left + \xAC |0 # COMMA, right-left + \xAD |0 # HYPHEN-MINUS, right-left + \xAE |0 # FULL STOP, right-left + \xAF |0 # SOLIDUS, right-left + \xB0 |0 # DIGIT ZERO, right-left (need override) + \xB1 |0 # DIGIT ONE, right-left (need override) + \xB2 |0 # DIGIT TWO, right-left (need override) + \xB3 |0 # DIGIT THREE, right-left (need override) + \xB4 |0 # DIGIT FOUR, right-left (need override) + \xB5 |0 # DIGIT FIVE, right-left (need override) + \xB6 |0 # DIGIT SIX, right-left (need override) + \xB7 |0 # DIGIT SEVEN, right-left (need override) + \xB8 |0 # DIGIT EIGHT, right-left (need override) + \xB9 |0 # DIGIT NINE, right-left (need override) + \xBA |0 # COLON, right-left + \xBB |0 # SEMICOLON, right-left + \xBC |0 # LESS-THAN SIGN, right-left + \xBD |0 # EQUALS SIGN, right-left + \xBE |0 # GREATER-THAN SIGN, right-left + \xBF |0 # QUESTION MARK, right-left \x40 |0 # COMMERCIAL AT \x41 |0 # LATIN CAPITAL LETTER A \x42 |0 # LATIN CAPITAL LETTER B @@ -69,7 +100,9 @@ CHARMAP \x58 |0 # LATIN CAPITAL LETTER X \x59 |0 # LATIN CAPITAL LETTER Y \x5A |0 # LATIN CAPITAL LETTER Z + \xFE |0 # LEFT SQUARE BRACKET, right-left \x5C |0 # REVERSE SOLIDUS + \xFC |0 # RIGHT SQUARE BRACKET, right-left \x5E |0 # CIRCUMFLEX ACCENT \x5F |0 # LOW LINE \x60 |0 # GRAVE ACCENT @@ -99,7 +132,11 @@ CHARMAP \x78 |0 # LATIN SMALL LETTER X \x79 |0 # LATIN SMALL LETTER Y \x7A |0 # LATIN SMALL LETTER Z + \xFD |0 # LEFT CURLY BRACKET, right-left + \xFF |0 # VERTICAL LINE, right-left + \xFB |0 # RIGHT CURLY BRACKET, right-left \x7E |0 # TILDE + \xCA |0 # NO-BREAK SPACE, right-left \x80 |0 # LATIN CAPITAL LETTER A WITH DIAERESIS \x82 |0 # LATIN CAPITAL LETTER C WITH CEDILLA \x83 |0 # LATIN CAPITAL LETTER E WITH ACUTE @@ -172,13 +209,21 @@ CHARMAP \xF8 |0 # HEBREW LETTER RESH \xF9 |0 # HEBREW LETTER SHIN \xFA |0 # HEBREW LETTER TAV + \x81 |3 # HEBREW LIGATURE YIDDISH YOD YOD PATAH + \xD0 |0 # EN DASH, right-left + \xD1 |0 # EM DASH, right-left + \xD4 |0 # LEFT SINGLE QUOTATION MARK, right-left + \xD5 |0 # RIGHT SINGLE QUOTATION MARK, right-left + \xD2 |0 # LEFT DOUBLE QUOTATION MARK, right-left + \xD3 |0 # RIGHT DOUBLE QUOTATION MARK, right-left + \xC1 |0 # DOUBLE LOW-9 QUOTATION MARK, right-left + \xC9 |0 # HORIZONTAL ELLIPSIS, right-left \xA6 |0 # NEW SHEQEL SIGN \xC0 |3 # Hebrew ligature lamed holam \xC2 |0 # Hebrew canoral 1 \xC3 |0 # Hebrew canoral 2 \xC4 |0 # Hebrew canoral 3 \xC5 |0 # Hebrew canoral 4 - \x81 |0 # HEBREW LIGATURE YIDDISH YOD YOD PATAH \xD6 |0 # HEBREW LETTER SHIN WITH SHIN DOT \xD7 |0 # HEBREW LETTER SHIN WITH SIN DOT \xC8 |0 # HEBREW LETTER VAV WITH DAGESH