Re: [PATCH] xsubpp prototypes warnings

[p5sagit/p5-mst-13.2.git] / lib / encoding.pm
diff --git a/lib/encoding.pm b/lib/encoding.pm

index 4938bfd..44fc2fd 100644 (file)
--- a/lib/encoding.pm
+++ b/lib/encoding.pm
@@ -4,6 +4,13 @@ our $VERSION = '1.00';
 
 use Encode;
 
+BEGIN {
+    if (ord("A") == 193) {
+       require Carp;
+       Carp::croak "encoding pragma does not support EBCDIC platforms";
+    }
+}
+
 sub import {
     my ($class, $name) = @_;
     $name = $ENV{PERL_ENCODING} if @_ < 2;
@@ -45,10 +52,15 @@ encoding - pragma to control the conversion of legacy data into Unicode
 
     print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
 
-    # but pack/unpack are not affected, in case you still
+    # ... as are eq and cmp ...
+
+    print "peta\n" if "\x{3af}" eq  pack("C", 0xdf);
+    print "exa\n"  if "\x{3af}" cmp pack("C", 0xdf) == 0;
+
+    # ... but pack/unpack C are not affected, in case you still
     # want back to your native encoding
 
-    print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
+    print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
 
 =head1 DESCRIPTION
 
@@ -77,6 +89,13 @@ since the C<\xDF> on the left will B<not> be upgraded to C<\x{3af}>
 because of the C<\x{100}> on the left.  You should not be mixing your
 legacy data and Unicode in the same string.
 
+This pragma also affects encoding of the 0x80..0xFF code point range:
+normally characters in that range are left as eight-bit bytes (unless
+they are combined with characters with code points 0x100 or larger,
+in which case all characters need to become UTF-8 encoded), but if
+the C<encoding> pragma is present, even the 0x80..0xFF range always
+gets UTF-8 encoded.
+
 If no encoding is specified, the environment variable L<PERL_ENCODING>
 is consulted.  If that fails, "latin1" (ISO 8859-1) is assumed.  If no
 encoding can be found, C<Unknown encoding '...'> error will be thrown.
@@ -87,6 +106,8 @@ For native multibyte encodings (either fixed or variable length)
 the current implementation of the regular expressions may introduce
 recoding errors for longer regular expression literals than 127 bytes.
 
+The encoding pragma is not supported on EBCDIC platforms.
+
 =head1 SEE ALSO
 
 L<perlunicode>, L<Encode>