Upgrade to Encode 2.18
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / Unicode / UTF7.pm
CommitLineData
1485817e 1#
656ebd29 2# $Id: UTF7.pm,v 2.4 2006/06/03 20:28:48 dankogai Exp dankogai $
1485817e 3#
4package Encode::Unicode::UTF7;
5use strict;
656ebd29 6use warnings;
1485817e 7no warnings 'redefine';
8use base qw(Encode::Encoding);
9__PACKAGE__->Define('UTF-7');
656ebd29 10our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
1485817e 11use MIME::Base64;
12use Encode;
13
14#
15# Algorithms taken from Unicode::String by Gisle Aas
16#
17
18our $OPTIONAL_DIRECT_CHARS = 1;
d1256cb1 19my $specials = quotemeta "\'(),-./:?";
20$OPTIONAL_DIRECT_CHARS
21 and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
22
1485817e 23# \s will not work because it matches U+3000 DEOGRAPHIC SPACE
d1256cb1 24# We use qr/[\n\r\t\ ] instead
25my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
842a5aa6 26my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
d1256cb1 27my $e_utf16 = find_encoding("UTF-16BE");
1485817e 28
d1256cb1 29sub needs_lines { 1 }
1485817e 30
d1256cb1 31sub encode($$;$) {
32 my ( $obj, $str, $chk ) = @_;
1485817e 33 my $len = length($str);
34 pos($str) = 0;
35 my $bytes = '';
d1256cb1 36 while ( pos($str) < $len ) {
37 if ( $str =~ /\G($re_asis+)/ogc ) {
38 $bytes .= $1;
39 }
40 elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
41 if ( $1 eq "+" ) {
42 $bytes .= "+-";
43 }
44 else {
45 my $s = $1;
46 my $base64 = encode_base64( $e_utf16->encode($s), '' );
47 $base64 =~ s/=+$//;
48 $bytes .= "+$base64-";
49 }
50 }
51 else {
52 die "This should not happen! (pos=" . pos($str) . ")";
53 }
1485817e 54 }
55 $_[1] = '' if $chk;
56 return $bytes;
57}
d1256cb1 58
59sub decode($$;$) {
60 my ( $obj, $bytes, $chk ) = @_;
1485817e 61 my $len = length($bytes);
62 my $str = "";
0a8c69ed 63 no warnings 'uninitialized';
d1256cb1 64 while ( pos($bytes) < $len ) {
65 if ( $bytes =~ /\G([^+]+)/ogc ) {
66 $str .= $1;
67 }
68 elsif ( $bytes =~ /\G\+-/ogc ) {
69 $str .= "+";
70 }
71 elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) {
72 my $base64 = $1;
73 my $pad = length($base64) % 4;
74 $base64 .= "=" x ( 4 - $pad ) if $pad;
75 $str .= $e_utf16->decode( decode_base64($base64) );
76 }
77 elsif ( $bytes =~ /\G\+/ogc ) {
78 $^W and warn "Bad UTF7 data escape";
79 $str .= "+";
80 }
81 else {
82 die "This should not happen " . pos($bytes);
83 }
1485817e 84 }
85 $_[1] = '' if $chk;
86 return $str;
87}
881;
89__END__
90
91=head1 NAME
92
93Encode::Unicode::UTF7 -- UTF-7 encoding
94
95=head1 SYNOPSIS
96
97 use Encode qw/encode decode/;
98 $utf7 = encode("UTF-7", $utf8);
99 $utf8 = decode("UTF-7", $ucs2);
100
101=head1 ABSTRACT
102
103This module implements UTF-7 encoding documented in RFC 2152. UTF-7,
104as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It
105is designed to be MTA-safe and expected to be a standard way to
106exchange Unicoded mails via mails. But with the advent of UTF-8 and
1078-bit compliant MTAs, UTF-7 is hardly ever used.
108
109UTF-7 was not supported by Encode until version 1.95 because of that.
110But Unicode::String, a module by Gisle Aas which adds Unicode supports
111to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
112so Encode can supersede Unicode::String 100%.
113
114=head1 In Practice
115
116When you want to encode Unicode for mails and web pages, however, do
117not use UTF-7 unless you are sure your recipients and readers can
118handle it. Very few MUAs and WWW Browsers support these days (only
119Mozilla seems to support one). For general cases, use UTF-8 for
120message body and MIME-Header for header instead.
121
122=head1 SEE ALSO
123
124L<Encode>, L<Encode::Unicode>, L<Unicode::String>
125
126RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
127
128=cut