2 # $Id: UTF7.pm,v 2.4 2006/06/03 20:28:48 dankogai Exp $
4 package Encode::Unicode::UTF7;
7 no warnings 'redefine';
8 use base qw(Encode::Encoding);
9 __PACKAGE__->Define('UTF-7');
10 our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
15 # Algorithms taken from Unicode::String by Gisle Aas
18 our $OPTIONAL_DIRECT_CHARS = 1;
19 my $specials = quotemeta "\'(),-./:?";
20 $OPTIONAL_DIRECT_CHARS
21 and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
23 # \s will not work because it matches U+3000 DEOGRAPHIC SPACE
24 # We use qr/[\n\r\t\ ] instead
25 my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
26 my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
27 my $e_utf16 = find_encoding("UTF-16BE");
32 my ( $obj, $str, $chk ) = @_;
33 my $len = length($str);
36 while ( pos($str) < $len ) {
37 if ( $str =~ /\G($re_asis+)/ogc ) {
40 elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
46 my $base64 = encode_base64( $e_utf16->encode($s), '' );
48 $bytes .= "+$base64-";
52 die "This should not happen! (pos=" . pos($str) . ")";
60 my ( $obj, $bytes, $chk ) = @_;
61 my $len = length($bytes);
63 no warnings 'uninitialized';
64 while ( pos($bytes) < $len ) {
65 if ( $bytes =~ /\G([^+]+)/ogc ) {
68 elsif ( $bytes =~ /\G\+-/ogc ) {
71 elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) {
73 my $pad = length($base64) % 4;
74 $base64 .= "=" x ( 4 - $pad ) if $pad;
75 $str .= $e_utf16->decode( decode_base64($base64) );
77 elsif ( $bytes =~ /\G\+/ogc ) {
78 $^W and warn "Bad UTF7 data escape";
82 die "This should not happen " . pos($bytes);
93 Encode::Unicode::UTF7 -- UTF-7 encoding
97 use Encode qw/encode decode/;
98 $utf7 = encode("UTF-7", $utf8);
99 $utf8 = decode("UTF-7", $ucs2);
103 This module implements UTF-7 encoding documented in RFC 2152. UTF-7,
104 as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It
105 is designed to be MTA-safe and expected to be a standard way to
106 exchange Unicoded mails via mails. But with the advent of UTF-8 and
107 8-bit compliant MTAs, UTF-7 is hardly ever used.
109 UTF-7 was not supported by Encode until version 1.95 because of that.
110 But Unicode::String, a module by Gisle Aas which adds Unicode supports
111 to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
112 so Encode can supersede Unicode::String 100%.
116 When you want to encode Unicode for mails and web pages, however, do
117 not use UTF-7 unless you are sure your recipients and readers can
118 handle it. Very few MUAs and WWW Browsers support these days (only
119 Mozilla seems to support one). For general cases, use UTF-8 for
120 message body and MIME-Header for header instead.
124 L<Encode>, L<Encode::Unicode>, L<Unicode::String>
126 RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>