X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FEncode%2FUnicode%2FUnicode.pm;h=fa508ebb7bac077a7fbc2b8291981999566e6c1c;hb=4b291ae6c6eed18b8842058ee57489c11dec5862;hp=fdf826ef16e368ec01debff30afd3daafb23ffe5;hpb=0ab8f81ed97bef3f6feac6e615e45b8291ca05fa;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/Encode/Unicode/Unicode.pm b/ext/Encode/Unicode/Unicode.pm index fdf826e..fa508eb 100644 --- a/ext/Encode/Unicode/Unicode.pm +++ b/ext/Encode/Unicode/Unicode.pm @@ -3,7 +3,7 @@ package Encode::Unicode; use strict; use warnings; -our $VERSION = do { my @r = (q$Revision: 1.34 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.37 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; XSLoader::load(__PACKAGE__,$VERSION); @@ -13,6 +13,7 @@ XSLoader::load(__PACKAGE__,$VERSION); # require Encode; + for my $name (qw(UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE UTF-32LE UCS-2BE UCS-2LE)) @@ -37,23 +38,7 @@ for my $name (qw(UTF-16 UTF-16BE UTF-16LE } -sub name { shift->{'Name'} } -sub new_sequence -{ - my $self = shift; - # Return the original if endian known - return $self if ($self->{endian}); - # Return a clone - return bless {%$self},ref($self); -} - -sub needs_lines { 0 }; - -sub perlio_ok { - exists $INC{"PerlIO/encoding.pm"} or return 0; - return 1; -} - +use base qw(Encode::Encoding); # # three implementations of (en|de)code exist. The XS version is the @@ -303,7 +288,8 @@ for UTF-8, which is a native format in perl). I A character encoding form plus byte serialization. There are seven character encoding schemes in Unicode: -UTF-8, UTF-16, UTF-16BE, UTF-16LE, UTF-32, UTF-32BE and UTF-32LE. +UTF-8, UTF-16, UTF-16BE, UTF-16LE, UTF-32 (UCS-4), UTF-32BE (UCS-4BE) and +UTF-32LE (UCS-4LE). =item Quick Reference @@ -345,7 +331,7 @@ form a character. Bogus surrogates result in death. When \x{10000} or above is encountered during encode(), it Cs them and pushes the surrogate pair to the output stream. -UTF-32 is a fixed-length encoding with each character taking 32 bits. +UTF-32 (UCS-4) is a fixed-length encoding with each character taking 32 bits. Since it is 32-bit, there is no need for I. =head2 by endianness @@ -449,10 +435,11 @@ every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I. =head1 SEE ALSO L, L, +L, RFC 2781 L, -L +The whole Unicode standard L Ch. 15, pp. 403 of C by Larry Wall, Tom Christiansen, Jon Orwant;