lib/utf8.pm

   1 package utf8;
   2
   3 sub import {
   4     $^H |= 0x00800000;
   5     $enc{caller()} = $_[1] if $_[1];
   6 }
   7
   8 sub unimport {
   9     $^H &= ~0x00800000;
  10 }
  11
  12 sub AUTOLOAD {
  13     require "utf8_heavy.pl";
  14     goto &$AUTOLOAD;
  15 }
  16
  17 1;
  18 __END__
  19
  20 =head1 NAME
  21
  22 utf8 - Perl pragma to enable/disable UTF-8 in source code
  23
  24 =head1 SYNOPSIS
  25
  26     use utf8;
  27     no utf8;
  28
  29 =head1 DESCRIPTION
  30
  31 WARNING: The implementation of Unicode support in Perl is incomplete.
  32 Expect sudden and unannounced changes!
  33
  34 The C<use utf8> pragma tells the Perl parser to allow UTF-8 in the
  35 program text in the current lexical scope.  The C<no utf8> pragma
  36 tells Perl to switch back to treating the source text as literal
  37 bytes in the current lexical scope.
  38
  39 This pragma is primarily a compatibility device.  Perl versions
  40 earlier than 5.6 allowed arbitrary bytes in source code, whereas
  41 in future we would like to standardize on the UTF-8 encoding for
  42 source text.  Until UTF-8 becomes the default format for source
  43 text, this pragma should be used to recognize UTF-8 in the source.
  44 When UTF-8 becomes the standard source format, this pragma will
  45 effectively become a no-op.
  46
  47 Enabling the C<utf8> pragma has the following effects:
  48
  49 =over
  50
  51 =item *
  52
  53 Bytes in the source text that have their high-bit set will be treated
  54 as being part of a literal UTF-8 character.  This includes most literals
  55 such as identifiers, string constants, constant regular expression patterns
  56 and package names.
  57
  58 =item *
  59
  60 In the absence of inputs marked as UTF-8, regular expressions within the
  61 scope of this pragma will default to using character semantics instead
  62 of byte semantics.
  63
  64     @bytes_or_chars = split //, $data;  # may split to bytes if data
  65                                         # $data isn't UTF-8
  66     {
  67         use utf8;                       # force char semantics
  68         @chars = split //, $data;       # splits characters
  69     }
  70
  71 =head1 SEE ALSO
  72
  73 L<perlunicode>, L<bytes>
  74
  75 =cut