Upgrade to Encode 2.00.
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / Unicode / UTF7.pm
CommitLineData
1485817e 1#
7237418a 2# $Id: UTF7.pm,v 2.0 2004/05/16 20:55:17 dankogai Exp $
1485817e 3#
4package Encode::Unicode::UTF7;
5use strict;
6no warnings 'redefine';
7use base qw(Encode::Encoding);
8__PACKAGE__->Define('UTF-7');
7237418a 9our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
1485817e 10use MIME::Base64;
11use Encode;
12
13#
14# Algorithms taken from Unicode::String by Gisle Aas
15#
16
17our $OPTIONAL_DIRECT_CHARS = 1;
842a5aa6 18my $specials = quotemeta "\'(),-./:?";
1485817e 19$OPTIONAL_DIRECT_CHARS and
20 $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}";
21# \s will not work because it matches U+3000 DEOGRAPHIC SPACE
03871ea6 22# We use qr/[\n\r\t\ ] instead
842a5aa6 23my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/;
24my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/;
1485817e 25my $e_utf16 = find_encoding("UTF-16BE");
26
27sub needs_lines { 1 };
28
29sub encode($$;$){
30 my ($obj, $str, $chk) = @_;
31 my $len = length($str);
32 pos($str) = 0;
33 my $bytes = '';
34 while (pos($str) < $len){
35 if ($str =~ /\G($re_asis+)/ogc){
36 $bytes .= $1;
37 }elsif($str =~ /\G($re_encoded+)/ogsc){
38 if ($1 eq "+"){
39 $bytes .= "+-";
40 }else{
41 my $base64 = encode_base64($e_utf16->encode($1), '');
42 $base64 =~ s/=+$//;
43 $bytes .= "+$base64-";
44 }
45 }else{
46 die "This should not happen! (pos=" . pos($str) . ")";
47 }
48 }
49 $_[1] = '' if $chk;
50 return $bytes;
51}
52
53sub decode{
54 my ($obj, $bytes, $chk) = @_;
55 my $len = length($bytes);
56 my $str = "";
57 while (pos($bytes) < $len) {
58 if ($bytes =~ /\G([^+]+)/ogc) {
59 $str .= $1;
60 }elsif($bytes =~ /\G\+-/ogc) {
61 $str .= "+";
62 }elsif($bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc) {
63 my $base64 = $1;
64 my $pad = length($base64) % 4;
65 $base64 .= "=" x (4 - $pad) if $pad;
66 $str .= $e_utf16->decode(decode_base64($base64));
67 }elsif($bytes =~ /\G\+/ogc) {
68 $^W and warn "Bad UTF7 data escape";
69 $str .= "+";
70 }else{
71 die "This should not happen " . pos($bytes);
72 }
73 }
74 $_[1] = '' if $chk;
75 return $str;
76}
771;
78__END__
79
80=head1 NAME
81
82Encode::Unicode::UTF7 -- UTF-7 encoding
83
84=head1 SYNOPSIS
85
86 use Encode qw/encode decode/;
87 $utf7 = encode("UTF-7", $utf8);
88 $utf8 = decode("UTF-7", $ucs2);
89
90=head1 ABSTRACT
91
92This module implements UTF-7 encoding documented in RFC 2152. UTF-7,
93as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It
94is designed to be MTA-safe and expected to be a standard way to
95exchange Unicoded mails via mails. But with the advent of UTF-8 and
968-bit compliant MTAs, UTF-7 is hardly ever used.
97
98UTF-7 was not supported by Encode until version 1.95 because of that.
99But Unicode::String, a module by Gisle Aas which adds Unicode supports
100to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added
101so Encode can supersede Unicode::String 100%.
102
103=head1 In Practice
104
105When you want to encode Unicode for mails and web pages, however, do
106not use UTF-7 unless you are sure your recipients and readers can
107handle it. Very few MUAs and WWW Browsers support these days (only
108Mozilla seems to support one). For general cases, use UTF-8 for
109message body and MIME-Header for header instead.
110
111=head1 SEE ALSO
112
113L<Encode>, L<Encode::Unicode>, L<Unicode::String>
114
115RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt>
116
117=cut