[p5sagit/p5-mst-13.2.git] / ext / Encode / JP / JP.pm

package Encode::JP;
BEGIN {
    if (ord("A") == 193) {
	die "Encode::JP not supported on EBCDIC\n";
    }
}
use Encode;
our $VERSION = do { my @r = (q$Revision: 0.98 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };

use XSLoader;
XSLoader::load('Encode::JP',$VERSION);

use Encode::JP::JIS;
use Encode::JP::ISO_2022_JP;
use Encode::JP::ISO_2022_JP_1;

1;
__END__
=head1 NAME

Encode::JP - Japanese Encodings

=head1 SYNOPSIS

    use Encode qw/encode decode/; 
    $euc_jp = encode("euc-jp", $utf8);   # loads Encode::JP implicitly
    $utf8   = decode("euc-jp", $euc_jp); # ditto

=head1 ABSTRACT

This module implements Japanese charset encodings.  Encodings
supported are as follows.

  Canonical   Alias		Description
  --------------------------------------------------------------------
  euc-jp      /euc.*jp$/i	EUC (Extended Unix Character)
              /jp.*euc/i   
	      /ujis$/i
  shiftjis    /shift.*jis$/i	Shift JIS (aka MS Kanji)
	      /sjis$/i
  7bit-jis    /^jis$/i		7bit JIS
  iso-2022-jp			ISO-2022-JP 
				(7bit JIS with all Halfwidth Kana 
				 converted to Fullwidth)
  iso-2022-jp-1			ISO-2022-JP-1
                                (ISO-2022-JP with JIS X 0212-1990
				 support. See below)
  macjapan      Mac Japan	(Shift JIS + Apple vendor mappings)
  cp932         Code Page 932	(Shift JIS + MS/IBM vendor mappings)
  --------------------------------------------------------------------

=head1 DESCRIPTION

To find how to use this module in detail, see L<Encode>.

=head1 Note on ISO-2022-JP(-1)?

ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which
adds support for JIS X 0212-1990.  That means you can use the same
code to decode to utf8 but not vice versa.

  $utf8 = decode('iso-2022-jp-1', $stream);
  $utf8 = decode('iso-2022-jp',   $stream);

Yields the same result but

  $with_0212 = encode('iso-2022-jp-1', $utf8);

is now different from

  $without_0212 = encode('iso-2022-jp', $utf8 );

In the latter case, characters that map to 0212 are at first converted
to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu') then
fed to decoding engine.  U+FFFD is not used to preserve text layout as
much as possible.

=head1 BUGS

ASCII part (0x00-0x7f) is preserved for all encodings, even though it
conflicts with mappings by the Unicode Consortium.  See

L<http://www.debian.or.jp/~kubota/unicode-symbols.html.en>

to find why it is implemented that way.

=head1 SEE ALSO

L<Encode>

=cut
Commit	Line	Data
0e567a6c	1	package Encode::JP;
0f3b375a	2	BEGIN {
	3	if (ord("A") == 193) {
	4	die "Encode::JP not supported on EBCDIC\n";
	5	}
	6	}
0e567a6c	7	use Encode;
a63c962f	8	our $VERSION = do { my @r = (q$Revision: 0.98 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
ee981de6	9
0e567a6c	10	use XSLoader;
	11	XSLoader::load('Encode::JP',$VERSION);
	12
	13	use Encode::JP::JIS;
	14	use Encode::JP::ISO_2022_JP;
a63c962f	15	use Encode::JP::ISO_2022_JP_1;
0e567a6c	16
	17	1;
	18	__END__
557e5ea9	19	=head1 NAME
	20
	21	Encode::JP - Japanese Encodings
	22
	23	=head1 SYNOPSIS
	24
1b2c56c8	25	use Encode qw/encode decode/;
2b217bf7	26	$euc_jp = encode("euc-jp", $utf8); # loads Encode::JP implicitly
ee981de6	27	$utf8 = decode("euc-jp", $euc_jp); # ditto
557e5ea9	28
	29	=head1 ABSTRACT
	30
	31	This module implements Japanese charset encodings. Encodings
	32	supported are as follows.
	33
fab31126	34	Canonical Alias Description
	35	--------------------------------------------------------------------
	36	euc-jp /euc.*jp$/i EUC (Extended Unix Character)
	37	/jp.*euc/i
	38	/ujis$/i
	39	shiftjis /shift.*jis$/i Shift JIS (aka MS Kanji)
	40	/sjis$/i
	41	7bit-jis /^jis$/i 7bit JIS
	42	iso-2022-jp ISO-2022-JP
	43	(7bit JIS with all Halfwidth Kana
	44	converted to Fullwidth)
a63c962f	45	iso-2022-jp-1 ISO-2022-JP-1
	46	(ISO-2022-JP with JIS X 0212-1990
	47	support. See below)
fab31126	48	macjapan Mac Japan (Shift JIS + Apple vendor mappings)
5129552c	49	cp932 Code Page 932 (Shift JIS + MS/IBM vendor mappings)
5129552c	50	--------------------------------------------------------------------
557e5ea9	51
	52	=head1 DESCRIPTION
	53
	54	To find how to use this module in detail, see L<Encode>.
	55
a63c962f	56	=head1 Note on ISO-2022-JP(-1)?
	57
	58	ISO-2022-JP-1 (RFC2237) is a superset of ISO-2022-JP (RFC1468) which
	59	adds support for JIS X 0212-1990. That means you can use the same
	60	code to decode to utf8 but not vice versa.
	61
	62	$utf8 = decode('iso-2022-jp-1', $stream);
	63	$utf8 = decode('iso-2022-jp', $stream);
	64
	65	Yields the same result but
	66
	67	$with_0212 = encode('iso-2022-jp-1', $utf8);
	68
	69	is now different from
	70
	71	$without_0212 = encode('iso-2022-jp', $utf8 );
	72
	73	In the latter case, characters that map to 0212 are at first converted
	74	to U+3013 (0xA2AE in EUC-JP; a white square also known as 'Tofu') then
	75	fed to decoding engine. U+FFFD is not used to preserve text layout as
	76	much as possible.
	77
557e5ea9	78	=head1 BUGS
557e5ea9	79
557e5ea9	80	ASCII part (0x00-0x7f) is preserved for all encodings, even though it
	81	conflicts with mappings by the Unicode Consortium. See
	82
a63c962f	83	L<http://www.debian.or.jp/~kubota/unicode-symbols.html.en>
557e5ea9	84
	85	to find why it is implemented that way.
	86
	87	=head1 SEE ALSO
	88
	89	L<Encode>
	90
	91	=cut