ext/Encode/Encode.pm

   1 package Encode;
   2
   3 $VERSION = 0.01;
   4
   5 require DynaLoader;
   6 require Exporter;
   7
   8 @ISA = qw(Exporter DynaLoader);
   9
  10 # Public, encouraged API is exported by default
  11 @EXPORT = qw (
  12   encode
  13   decode
  14   encode_utf8
  15   decode_utf8
  16   find_encoding
  17 );
  18
  19 @EXPORT_OK =
  20     qw(
  21        encodings
  22        from_to
  23        is_utf8
  24        is_8bit
  25        is_16bit
  26        utf8_upgrade
  27        utf8_downgrade
  28        _utf8_on
  29        _utf8_off
  30       );
  31
  32 bootstrap Encode ();
  33
  34 # Documentation moved after __END__ for speed - NI-S
  35
  36 use Carp;
  37
  38 # The global hash is declared in XS code
  39 $encoding{Unicode}      = bless({},'Encode::Unicode');
  40 $encoding{utf8}         = bless({},'Encode::utf8');
  41 $encoding{'iso10646-1'} = bless({},'Encode::iso10646_1');
  42
  43 sub encodings
  44 {
  45  my ($class) = @_;
  46  foreach my $dir (@INC)
  47   {
  48    if (opendir(my $dh,"$dir/Encode"))
  49     {
  50      while (defined(my $name = readdir($dh)))
  51       {
  52        if ($name =~ /^(.*)\.enc$/)
  53         {
  54          next if exists $encoding{$1};
  55          $encoding{$1} = "$dir/$name";
  56         }
  57       }
  58      closedir($dh);
  59     }
  60   }
  61  return keys %encoding;
  62 }
  63
  64 sub loadEncoding
  65 {
  66  my ($class,$name,$file) = @_;
  67  if (open(my $fh,$file))
  68   {
  69    my $type;
  70    while (1)
  71     {
  72      my $line = <$fh>;
  73      $type = substr($line,0,1);
  74      last unless $type eq '#';
  75     }
  76    $class .= ('::'.(($type eq 'E') ? 'Escape' : 'Table'));
  77    #warn "Loading $file";
  78    return $class->read($fh,$name,$type);
  79   }
  80  else
  81   {
  82    return undef;
  83   }
  84 }
  85
  86 sub getEncoding
  87 {
  88  my ($class,$name) = @_;
  89  my $enc;
  90  unless (ref($enc = $encoding{$name}))
  91   {
  92    $enc = $class->loadEncoding($name,$enc) if defined $enc;
  93    unless (ref($enc))
  94     {
  95      foreach my $dir (@INC)
  96       {
  97        last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
  98       }
  99     }
 100    $encoding{$name} = $enc;
 101   }
 102  return $enc;
 103 }
 104
 105 sub find_encoding
 106 {
 107  my ($name) = @_;
 108  return __PACKAGE__->getEncoding($name);
 109 }
 110
 111 sub encode
 112 {
 113  my ($name,$string,$check) = @_;
 114  my $enc = find_encoding($name);
 115  croak("Unknown encoding '$name'") unless defined $enc;
 116  my $octets = $enc->fromUnicode($string,$check);
 117  return undef if ($check && length($string));
 118  return $octets;
 119 }
 120
 121 sub decode
 122 {
 123  my ($name,$octets,$check) = @_;
 124  my $enc = find_encoding($name);
 125  croak("Unknown encoding '$name'") unless defined $enc;
 126  my $string = $enc->toUnicode($octets,$check);
 127  return undef if ($check && length($octets));
 128  return $string;
 129 }
 130
 131 sub from_to
 132 {
 133  my ($string,$from,$to,$check) = @_;
 134  my $f = find_encoding($from);
 135  croak("Unknown encoding '$from'") unless defined $f;
 136  my $t = find_encoding($to);
 137  croak("Unknown encoding '$to'") unless defined $t;
 138  my $uni = $f->toUnicode($string,$check);
 139  return undef if ($check && length($string));
 140  $string = $t->fromUnicode($uni,$check);
 141  return undef if ($check && length($uni));
 142  return length($_[0] = $string);
 143 }
 144
 145 sub encode_utf8
 146 {
 147  my ($str) = @_;
 148  utf8_encode($str);
 149  return $str;
 150 }
 151
 152 sub decode_utf8
 153 {
 154  my ($str) = @_;
 155  return undef unless utf8_decode($str);
 156  return $str;
 157 }
 158
 159 package Encode::Unicode;
 160
 161 # Dummy package that provides the encode interface but leaves data
 162 # as UTF-8 encoded. It is here so that from_to() works.
 163
 164 sub name { 'Unicode' }
 165
 166 sub toUnicode
 167 {
 168  my ($obj,$str,$chk) = @_;
 169  Encode::utf8_upgrade($str);
 170  $_[1] = '' if $chk;
 171  return $str;
 172 }
 173
 174 *fromUnicode = \&toUnicode;
 175
 176 package Encode::utf8;
 177
 178 # package to allow long-hand
 179 #   $octets = encode( utf8 => $string );
 180 #
 181
 182 sub name { 'utf8' }
 183
 184 sub toUnicode
 185 {
 186  my ($obj,$octets,$chk) = @_;
 187  my $str = Encode::decode_utf8($octets);
 188  if (defined $str)
 189   {
 190    $_[1] = '' if $chk;
 191    return $str;
 192   }
 193  return undef;
 194 }
 195
 196 sub fromUnicode
 197 {
 198  my ($obj,$string,$chk) = @_;
 199  my $octets = Encode::encode_utf8($string);
 200  $_[1] = '' if $chk;
 201  return $octets;
 202 }
 203
 204 package Encode::Table;
 205
 206 sub read
 207 {
 208  my ($class,$fh,$name,$type) = @_;
 209  my $rep = $class->can("rep_$type");
 210  my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
 211  my @touni;
 212  my %fmuni;
 213  my $count = 0;
 214  $def = hex($def);
 215  while ($pages--)
 216   {
 217    my $line = <$fh>;
 218    chomp($line);
 219    my $page = hex($line);
 220    my @page;
 221    my $ch = $page * 256;
 222    for (my $i = 0; $i < 16; $i++)
 223     {
 224      my $line = <$fh>;
 225      for (my $j = 0; $j < 16; $j++)
 226       {
 227        my $val = hex(substr($line,0,4,''));
 228        if ($val || !$ch)
 229         {
 230          my $uch = chr($val);
 231          push(@page,$uch);
 232          $fmuni{$uch} = $ch;
 233          $count++;
 234         }
 235        else
 236         {
 237          push(@page,undef);
 238         }
 239        $ch++;
 240       }
 241     }
 242    $touni[$page] = \@page;
 243   }
 244
 245  return bless {Name  => $name,
 246                Rep   => $rep,
 247                ToUni => \@touni,
 248                FmUni => \%fmuni,
 249                Def   => $def,
 250                Num   => $count,
 251               },$class;
 252 }
 253
 254 sub name { shift->{'Name'} }
 255
 256 sub rep_S { 'C' }
 257
 258 sub rep_D { 'n' }
 259
 260 sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
 261
 262 sub representation
 263 {
 264  my ($obj,$ch) = @_;
 265  $ch = 0 unless @_ > 1;
 266  $obj-{'Rep'}->($ch);
 267 }
 268
 269 sub toUnicode
 270 {
 271  my ($obj,$str,$chk) = @_;
 272  my $rep   = $obj->{'Rep'};
 273  my $touni = $obj->{'ToUni'};
 274  my $uni   = '';
 275  while (length($str))
 276   {
 277    my $ch = ord(substr($str,0,1,''));
 278    my $x;
 279    if (&$rep($ch) eq 'C')
 280     {
 281      $x = $touni->[0][$ch];
 282     }
 283    else
 284     {
 285      $x = $touni->[$ch][ord(substr($str,0,1,''))];
 286     }
 287    unless (defined $x)
 288     {
 289      last if $chk;
 290      # What do we do here ?
 291      $x = '';
 292     }
 293    $uni .= $x;
 294   }
 295  $_[1] = $str if $chk;
 296  return $uni;
 297 }
 298
 299 sub fromUnicode
 300 {
 301  my ($obj,$uni,$chk) = @_;
 302  my $fmuni = $obj->{'FmUni'};
 303  my $str   = '';
 304  my $def   = $obj->{'Def'};
 305  my $rep   = $obj->{'Rep'};
 306  while (length($uni))
 307   {
 308    my $ch = substr($uni,0,1,'');
 309    my $x  = $fmuni->{chr(ord($ch))};
 310    unless (defined $x)
 311     {
 312      last if ($chk);
 313      $x = $def;
 314     }
 315    $str .= pack(&$rep($x),$x);
 316   }
 317  $_[1] = $uni if $chk;
 318  return $str;
 319 }
 320
 321 package Encode::iso10646_1;
 322 # Encoding is 16-bit network order Unicode
 323 # Used for X font encodings
 324
 325 sub name { 'iso10646-1' }
 326
 327 sub toUnicode
 328 {
 329  my ($obj,$str,$chk) = @_;
 330  my $uni   = '';
 331  while (length($str))
 332   {
 333    my $code = unpack('n',substr($str,0,2,'')) & 0xffff;
 334    $uni .= chr($code);
 335   }
 336  $_[1] = $str if $chk;
 337  Encode::utf8_upgrade($uni);
 338  return $uni;
 339 }
 340
 341 sub fromUnicode
 342 {
 343  my ($obj,$uni,$chk) = @_;
 344  my $str   = '';
 345  while (length($uni))
 346   {
 347    my $ch = substr($uni,0,1,'');
 348    my $x  = ord($ch);
 349    unless ($x < 32768)
 350     {
 351      last if ($chk);
 352      $x = 0;
 353     }
 354    $str .= pack('n',$x);
 355   }
 356  $_[1] = $uni if $chk;
 357  return $str;
 358 }
 359
 360
 361 package Encode::Escape;
 362 use Carp;
 363
 364 sub read
 365 {
 366  my ($class,$fh,$name) = @_;
 367  my %self = (Name => $name, Num => 0);
 368  while (<$fh>)
 369   {
 370    my ($key,$val) = /^(\S+)\s+(.*)$/;
 371    $val =~ s/^\{(.*?)\}/$1/g;
 372    $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
 373    $self{$key} = $val;
 374   }
 375  return bless \%self,$class;
 376 }
 377
 378 sub name { shift->{'Name'} }
 379
 380 sub toUnicode
 381 {
 382  croak("Not implemented yet");
 383 }
 384
 385 sub fromUnicode
 386 {
 387  croak("Not implemented yet");
 388 }
 389
 390 # switch back to Encode package in case we ever add AutoLoader
 391 package Encode;
 392
 393 1;
 394
 395 __END__
 396
 397 =head1 NAME
 398
 399 Encode - character encodings
 400
 401 =head1 SYNOPSIS
 402
 403     use Encode;
 404
 405 =head1 DESCRIPTION
 406
 407 The C<Encode> module provides the interfaces between perl's strings
 408 and the rest of the system. Perl strings are sequences of B<characters>.
 409
 410 The repertoire of characters that Perl can represent is at least that
 411 defined by the Unicode Consortium. On most platforms the ordinal values
 412 of the  characters (as returned by C<ord(ch)>) is the "Unicode codepoint" for
 413 the character (the exceptions are those platforms where the legacy
 414 encoding is some variant of EBCDIC rather than a super-set of ASCII
 415 - see L<perlebcdic>).
 416
 417 Traditionaly computer data has been moved around in 8-bit chunks
 418 often called "bytes". These chunks are also known as "octets" in
 419 networking standards. Perl is widely used to manipulate data of
 420 many types - not only strings of characters representing human or
 421 computer languages but also "binary" data being the machines representation
 422 of numbers, pixels in an image - or just about anything.
 423
 424 When perl is processing "binary data" the programmer wants perl to process
 425 "sequences of bytes". This is not a problem for perl - as a byte has 256
 426 possible values it easily fits in perl's much larger "logical character".
 427
 428 =head2 TERMINOLOGY
 429
 430 =over
 431
 432 =item *
 433
 434 I<character>: a character in the range 0..(2**32-1) (or more).
 435 (What perl's strings are made of.)
 436
 437 =item *
 438
 439 I<byte>: a character in the range 0..255
 440 (A special case of a perl character.)
 441
 442 =item *
 443
 444 I<octet>: 8 bits of data, with ordinal values 0..255
 445 (Term for bytes passed to or from a non-perl context, e.g. disk file.)
 446
 447 =back
 448
 449 The marker [INTERNAL] marks Internal Implementation Details, in
 450 general meant only for those who think they know what they are doing,
 451 and such details may change in future releases.
 452
 453 =head1 ENCODINGS
 454
 455 =head2 Characteristics of an Encoding
 456
 457 An encoding has a "repertoire" of characters that it can represent,
 458 and for each representable character there is at least one sequence of
 459 octets that represents it.
 460
 461 =head2 Types of Encodings
 462
 463 Encodings can be divided into the following types:
 464
 465 =over 4
 466
 467 =item * Fixed length 8-bit (or less) encodings.
 468
 469 Each character is a single octet so may have a repertoire of up to
 470 256 characters. ASCII and iso-8859-* are typical examples.
 471
 472 =item * Fixed length 16-bit encodings
 473
 474 Each character is two octets so may have a repertoire of up to
 475 65,536 characters. Unicode's UCS-2 is an example. Also used for
 476 encodings for East Asian languages.
 477
 478 =item * Fixed length 32-bit encodings.
 479
 480 Not really very "encoded" encodings. The Unicode code points
 481 are just represented as 4-octet integers. None the less because
 482 different architectures use different representations of integers
 483 (so called "endian") there at least two disctinct encodings.
 484
 485 =item * Multi-byte encodings
 486
 487 The number of octets needed to represent a character varies.
 488 UTF-8 is a particularly complex but regular case of a multi-byte
 489 encoding. Several East Asian countries use a multi-byte encoding
 490 where 1-octet is used to cover western roman characters and Asian
 491 characters get 2-octets.
 492 (UTF-16 is strictly a multi-byte encoding taking either 2 or 4 octets
 493 to represent a Unicode code point.)
 494
 495 =item * "Escape" encodings.
 496
 497 These encodings embed "escape sequences" into the octet sequence
 498 which describe how the following octets are to be interpreted.
 499 The iso-2022-* family is typical. Following the escape sequence
 500 octets are encoded by an "embedded" encoding (which will be one
 501 of the above types) until another escape sequence switches to
 502 a different "embedded" encoding.
 503
 504 These schemes are very flexible and can handle mixed languages but are
 505 very complex to process (and have state).
 506 No escape encodings are implemented for perl yet.
 507
 508 =back
 509
 510 =head2 Specifying Encodings
 511
 512 Encodings can be specified to the API described below in two ways:
 513
 514 =over 4
 515
 516 =item 1. By name
 517
 518 Encoding names are strings with characters taken from a restricted repertoire.
 519 See L</"Encoding Names">.
 520
 521 =item 2. As an object
 522
 523 Encoding objects are returned by C<find_encoding($name)>.
 524
 525 =back
 526
 527 =head2 Encoding Names
 528
 529 Encoding names are case insensitive. White space in names is ignored.
 530 In addition an encoding may have aliases. Each encoding has one "canonical" name.
 531 The "canonical" name is chosen from the names of the encoding by picking
 532 the first in the following sequence:
 533
 534 =over 4
 535
 536 =item * The MIME name as defined in IETF RFC-XXXX.
 537
 538 =item * The name in the IANA registry.
 539
 540 =item * The name used by the the organization that defined it.
 541
 542 =back
 543
 544 Because of all the alias issues, and because in the general case
 545 encodings have state C<Encode> uses the encoding object internally
 546 once an operation is in progress.
 547
 548 I<Aliasing is not yet implemented.>
 549
 550 =head1 PERL ENCODING API
 551
 552 =head2 Generic Encoding Interface
 553
 554 =over 4
 555
 556 =item *
 557
 558         $bytes  = encode(ENCODING, $string[, CHECK])
 559
 560 Encodes string from perl's internal form into I<ENCODING> and returns a
 561 sequence of octets.
 562 See L</"Handling Malformed Data">.
 563
 564 =item *
 565
 566         $string = decode(ENCODING, $bytes[, CHECK])
 567
 568 Decode sequence of octets assumed to be in I<ENCODING> into perls internal
 569 form and returns the resuting string.
 570 See L</"Handling Malformed Data">.
 571
 572 =back
 573
 574 =head2 Handling Malformed Data
 575
 576 If CHECK is not set, C<undef> is returned.  If the data is supposed to
 577 be UTF-8, an optional lexical warning (category utf8) is given.
 578 If CHECK is true but not a code reference, dies.
 579
 580 It would desirable to have a way to indicate that transform should use the
 581 encodings "replacement character" - no such mechanism is defined yet.
 582
 583 It is also planned to allow I<CHECK> to be a code reference.
 584
 585 This is not yet implemented as there are design issues with what its arguments
 586 should be and how it returns its results.
 587
 588 =over 4
 589
 590 =item Scheme 1
 591
 592 Passed remaining fragment of string being processed.
 593 Modifies it in place to remove bytes/characters it can understand
 594 and returns a string used to represent them.
 595 e.g.
 596
 597  sub fixup {
 598    my $ch = substr($_[0],0,1,'');
 599    return sprintf("\x{%02X}",ord($ch);
 600  }
 601
 602 This scheme is close to how underlying C code for Encode works, but gives
 603 the fixup routine very little context.
 604
 605 =item Scheme 2
 606
 607 Passed original string, and an index into it of the problem area,
 608 and output string so far.
 609 Appends what it will to output string and returns new index into
 610 original string.
 611 e.g.
 612
 613  sub fixup {
 614    # my ($s,$i,$d) = @_;
 615    my $ch = substr($_[0],$_[1],1);
 616    $_[2] .= sprintf("\x{%02X}",ord($ch);
 617    return $_[1]+1;
 618  }
 619
 620 This scheme gives maximal control to the fixup routine but is more complicated
 621 to code, and may need internals of Encode to be tweaked to keep original
 622 string intact.
 623
 624 =item Other Schemes
 625
 626 Hybrids of above.
 627
 628 Multiple return values rather than in-place modifications.
 629
 630 Index into the string could be pos($str) allowing s/\G...//.
 631
 632 =back
 633
 634 =head2 UTF-8 / utf8
 635
 636 The Unicode consortium defines the UTF-8 standard as a way of encoding
 637 the entire Unicode repertiore as sequences of octets. This encoding
 638 is expected to become very widespread. Perl can use this form internaly
 639 to represent strings, so conversions to and from this form are particularly
 640 efficient (as octets in memory do not have to change, just the meta-data
 641 that tells perl how to treat them).
 642
 643 =over 4
 644
 645 =item *
 646
 647         $bytes = encode_utf8($string);
 648
 649 The characters that comprise string are encoded in perl's superset of UTF-8
 650 and the resulting octets returned as a sequence of bytes. All possible
 651 characters have a UTF-8 representation so this function cannot fail.
 652
 653 =item *
 654
 655         $string = decode_utf8($bytes [,CHECK]);
 656
 657 The sequence of octets represented by $bytes is decoded from UTF-8 into
 658 a sequence of logical characters. Not all sequences of octets form valid
 659 UTF-8 encodings, so it is possible for this call to fail.
 660 See L</"Handling Malformed Data">.
 661
 662 =back
 663
 664 =head2 Other Encodings of Unicode
 665
 666 UTF-16 is similar to UCS-2, 16 bit or 2-byte chunks.
 667 UCS-2 can only represent 0..0xFFFF, while UTF-16 has a "surogate pair"
 668 scheme which allows it to cover the whole Unicode range.
 669
 670 Encode implements big-endian UCS-2 as the encoding "iso10646-1" as that
 671 happens to be the name used by that representation when used with X11 fonts.
 672
 673 UTF-32 or UCS-4 is 32-bit or 4-byte chunks.  Perl's logical characters
 674 can be considered as being in this form without encoding. An encoding
 675 to transfer strings in this form (e.g. to write them to a file) would need to
 676
 677      pack('L',map(chr($_),split(//,$string)));   # native
 678   or
 679      pack('V',map(chr($_),split(//,$string)));   # little-endian
 680   or
 681      pack('N',map(chr($_),split(//,$string)));   # big-endian
 682
 683 depending on the endian required.
 684
 685 No UTF-32 encodings are not yet implemented.
 686
 687 Both UCS-2 and UCS-4 style encodings can have "byte order marks" by representing
 688 the code point 0xFFFE as the very first thing in a file.
 689
 690 =head1 Encoding and IO
 691
 692 It is very common to want to do encoding transformations when
 693 reading or writing files, network connections, pipes etc.
 694 If perl is configured to use the new 'perlio' IO system then
 695 C<Encode> provides a "layer" (See L<perliol>) which can transform
 696 data as it is read or written.
 697
 698      open(my $ilyad,'>:encoding(iso8859-7)','ilyad.greek');
 699      print $ilyad @epic;
 700
 701 In addition the new IO system can also be configured to read/write
 702 UTF-8 encoded characters (as noted above this is efficient):
 703
 704      open(my $fh,'>:utf8','anything');
 705      print $fh "Any \x{0021} string \N{SMILEY FACE}\n";
 706
 707 Either of the above forms of "layer" specifications can be made the default
 708 for a lexical scope with the C<use open ...> pragma. See L<open>.
 709
 710 Once a handle is open is layers can be altered using C<binmode>.
 711
 712 Without any such configuration, or if perl itself is built using
 713 system's own IO, then write operations assume that file handle accepts
 714 only I<bytes> and will C<die> if a character larger than 255 is
 715 written to the handle. When reading, each octet from the handle
 716 becomes a byte-in-a-character. Note that this default is the same
 717 behaviour as bytes-only languages (including perl before v5.6) would have,
 718 and is sufficient to handle native 8-bit encodings e.g. iso-8859-1,
 719 EBCDIC etc. and any legacy mechanisms for handling other encodings
 720 and binary data.
 721
 722 In other cases it is the programs responsibility
 723 to transform characters into bytes using the API above before
 724 doing writes, and to transform the bytes read from a handle into characters
 725 before doing "character operations" (e.g. C<lc>, C</\W+/>, ...).
 726
 727 =head1 Encoding How to ...
 728
 729 To do:
 730
 731 =over 4
 732
 733 =item * IO with mixed content (faking iso-2020-*)
 734
 735 =item * MIME's Content-Length:
 736
 737 =item * UTF-8 strings in binary data.
 738
 739 =item * perl/Encode wrappers on non-Unicode XS modules.
 740
 741 =back
 742
 743 =head1 Messing with Perl's Internals
 744
 745 The following API uses parts of perl's internals in the current implementation.
 746 As such they are efficient, but may change.
 747
 748 =over 4
 749
 750 =item *
 751
 752         $num_octets = utf8_upgrade($string);
 753
 754 Converts internal representation of string to the UTF-8 form.
 755 Returns the number of octets necessary to represent the string as UTF-8.
 756
 757 =item * utf8_downgrade($string[, CHECK])
 758
 759 Converts internal representation of string to be un-encoded bytes.
 760
 761 =item * is_utf8(STRING [, CHECK])
 762
 763 [INTERNAL] Test whether the UTF-8 flag is turned on in the STRING.
 764 If CHECK is true, also checks the data in STRING for being
 765 well-formed UTF-8.  Returns true if successful, false otherwise.
 766
 767 =item * valid_utf8(STRING)
 768
 769 [INTERNAL] Test whether STRING is in a consistent state.
 770 Will return true if string is held as bytes, or is well-formed UTF-8
 771 and has the UTF-8 flag on.
 772 Main reason for this routine is to allow perl's testsuite to check
 773 that operations have left strings in a consistent state.
 774
 775 =item *
 776
 777         _utf8_on(STRING)
 778
 779 [INTERNAL] Turn on the UTF-8 flag in STRING.  The data in STRING is
 780 B<not> checked for being well-formed UTF-8.  Do not use unless you
 781 B<know> that the STRING is well-formed UTF-8.  Returns the previous
 782 state of the UTF-8 flag (so please don't test the return value as
 783 I<not> success or failure), or C<undef> if STRING is not a string.
 784
 785 =item *
 786
 787         _utf8_off(STRING)
 788
 789 [INTERNAL] Turn off the UTF-8 flag in STRING.  Do not use frivolously.
 790 Returns the previous state of the UTF-8 flag (so please don't test the
 791 return value as I<not> success or failure), or C<undef> if STRING is
 792 not a string.
 793
 794 =back
 795
 796 =head1 SEE ALSO
 797
 798 L<perlunicode>, L<perlebcdic>, L<perlfunc/open>
 799
 800 =cut
 801
 802
 803