[p5sagit/p5-mst-13.2.git] / ext / Encode / JP / JP.pm

package Encode::JP;

BEGIN {
    if ( ord("A") == 193 ) {
        die "Encode::JP not supported on EBCDIC\n";
    }
}
use Encode;
our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };

use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );

use Encode::JP::JIS7;

1;
__END__

=head1 NAME

Encode::JP - Japanese Encodings

=head1 SYNOPSIS

    use Encode qw/encode decode/; 
    $euc_jp = encode("euc-jp", $utf8);   # loads Encode::JP implicitly
    $utf8   = decode("euc-jp", $euc_jp); # ditto

=head1 ABSTRACT

This module implements Japanese charset encodings.  Encodings
supported are as follows.

  Canonical   Alias		Description
  --------------------------------------------------------------------
  euc-jp      /\beuc.*jp$/i	EUC (Extended Unix Character)
              /\bjp.*euc/i   
          /\bujis$/i
  shiftjis    /\bshift.*jis$/i	Shift JIS (aka MS Kanji)
          /\bsjis$/i
  7bit-jis    /\bjis$/i		7bit JIS
  iso-2022-jp			ISO-2022-JP                  [RFC1468]
                = 7bit JIS with all Halfwidth Kana 
                  converted to Fullwidth
  iso-2022-jp-1			ISO-2022-JP-1                [RFC2237]
                                = ISO-2022-JP with JIS X 0212-1990
                  support.  See below
  MacJapanese	                Shift JIS + Apple vendor mappings
  cp932       /\bwindows-31j$/i Code Page 932
                                = Shift JIS + MS/IBM vendor mappings
  jis0201-raw                   JIS0201, raw format
  jis0208-raw                   JIS0201, raw format
  jis0212-raw                   JIS0201, raw format
  --------------------------------------------------------------------

=head1 DESCRIPTION

To find out how to use this module in detail, see L<Encode>.

=head1 Note on ISO-2022-JP(-1)?

ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which
adds support for JIS X 0212-1990.  That means you can use the same
code to decode to utf8 but not vice versa.

  $utf8 = decode('iso-2022-jp-1', $stream);

and

  $utf8 = decode('iso-2022-jp',   $stream);

yield the same result but

  $with_0212 = encode('iso-2022-jp-1', $utf8);

is now different from

  $without_0212 = encode('iso-2022-jp', $utf8 );

In the latter case, characters that map to 0212 are first converted
to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu' or
'geta mark') then fed to the decoding engine.  U+FFFD is not used,
in order to preserve text layout as much as possible.

=head1 BUGS

The ASCII region (0x00-0x7f) is preserved for all encodings, even
though this conflicts with mappings by the Unicode Consortium.  See

L<http://www.debian.or.jp/~kubota/unicode-symbols.html.en>

to find out why it is implemented that way.

=head1 SEE ALSO

L<Encode>

=cut
Commit	Line	Data
0e567a6c	1	package Encode::JP;
d1256cb1	2
0f3b375a	3	BEGIN {
d1256cb1	4	if ( ord("A") == 193 ) {
d1256cb1	5	die "Encode::JP not supported on EBCDIC\n";
0f3b375a	6	}
0f3b375a	7	}
0e567a6c	8	use Encode;
d1256cb1	9	our $VERSION = do { my @r = ( q$Revision: 2.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
ee981de6	10
0e567a6c	11	use XSLoader;
d1256cb1	12	XSLoader::load( __PACKAGE__, $VERSION );
0e567a6c	13
aae85ceb	14	use Encode::JP::JIS7;
0e567a6c	15
	16	1;
	17	__END__
67d7b5ef	18
557e5ea9	19	=head1 NAME
	20
	21	Encode::JP - Japanese Encodings
	22
	23	=head1 SYNOPSIS
	24
1b2c56c8	25	use Encode qw/encode decode/;
2b217bf7	26	$euc_jp = encode("euc-jp", $utf8); # loads Encode::JP implicitly
ee981de6	27	$utf8 = decode("euc-jp", $euc_jp); # ditto
557e5ea9	28
	29	=head1 ABSTRACT
	30
	31	This module implements Japanese charset encodings. Encodings
	32	supported are as follows.
	33
fab31126	34	Canonical Alias Description
fab31126	35	--------------------------------------------------------------------
67d7b5ef	36	euc-jp /\beuc.*jp$/i EUC (Extended Unix Character)
67d7b5ef	37	/\bjp.*euc/i
d1256cb1	38	/\bujis$/i
67d7b5ef	39	shiftjis /\bshift.*jis$/i Shift JIS (aka MS Kanji)
d1256cb1	40	/\bsjis$/i
67d7b5ef	41	7bit-jis /\bjis$/i 7bit JIS
67d7b5ef	42	iso-2022-jp ISO-2022-JP [RFC1468]
d1256cb1	43	= 7bit JIS with all Halfwidth Kana
d1256cb1	44	converted to Fullwidth
67d7b5ef	45	iso-2022-jp-1 ISO-2022-JP-1 [RFC2237]
a999c27c	46	= ISO-2022-JP with JIS X 0212-1990
d1256cb1	47	support. See below
a999c27c	48	MacJapanese Shift JIS + Apple vendor mappings
8f1ed24a	49	cp932 /\bwindows-31j$/i Code Page 932
a999c27c	50	= Shift JIS + MS/IBM vendor mappings
f2a2953c	51	jis0201-raw JIS0201, raw format
	52	jis0208-raw JIS0201, raw format
	53	jis0212-raw JIS0201, raw format
5129552c	54	--------------------------------------------------------------------
557e5ea9	55
	56	=head1 DESCRIPTION
	57
0ab8f81e	58	To find out how to use this module in detail, see L<Encode>.
557e5ea9	59
a63c962f	60	=head1 Note on ISO-2022-JP(-1)?
	61
	62	ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which
	63	adds support for JIS X 0212-1990. That means you can use the same
	64	code to decode to utf8 but not vice versa.
	65
	66	$utf8 = decode('iso-2022-jp-1', $stream);
0ab8f81e	67
	68	and
	69
a63c962f	70	$utf8 = decode('iso-2022-jp', $stream);
a63c962f	71
0ab8f81e	72	yield the same result but
a63c962f	73
	74	$with_0212 = encode('iso-2022-jp-1', $utf8);
	75
	76	is now different from
	77
	78	$without_0212 = encode('iso-2022-jp', $utf8 );
	79
0ab8f81e	80	In the latter case, characters that map to 0212 are first converted
	81	to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu' or
	82	'geta mark') then fed to the decoding engine. U+FFFD is not used,
	83	in order to preserve text layout as much as possible.
a63c962f	84
557e5ea9	85	=head1 BUGS
557e5ea9	86
0ab8f81e	87	The ASCII region (0x00-0x7f) is preserved for all encodings, even
0ab8f81e	88	though this conflicts with mappings by the Unicode Consortium. See
557e5ea9	89
a63c962f	90	L<http://www.debian.or.jp/~kubota/unicode-symbols.html.en>
557e5ea9	91
0ab8f81e	92	to find out why it is implemented that way.
557e5ea9	93
	94	=head1 SEE ALSO
	95
85982a32	96	L<Encode>
557e5ea9	97
557e5ea9	98	=cut