Commit | Line | Data |
1485817e |
1 | # |
2 | # $Id: UTF7.pm,v 0.1 2003/05/16 18:06:24 dankogai Exp dankogai $ |
3 | # |
4 | package Encode::Unicode::UTF7; |
5 | use strict; |
6 | no warnings 'redefine'; |
7 | use base qw(Encode::Encoding); |
8 | __PACKAGE__->Define('UTF-7'); |
9 | our $VERSION = do { my @r = (q$Revision: 0.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; |
10 | use MIME::Base64; |
11 | use Encode; |
12 | |
13 | # |
14 | # Algorithms taken from Unicode::String by Gisle Aas |
15 | # |
16 | |
17 | our $OPTIONAL_DIRECT_CHARS = 1; |
18 | my $specials = quotemeta "\'(),-.:?"; |
19 | $OPTIONAL_DIRECT_CHARS and |
20 | $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}"; |
21 | # \s will not work because it matches U+3000 DEOGRAPHIC SPACE |
22 | # We use \x00-\x20 instead (controls + space) |
23 | my $re_asis = qr/(?:[\x00-\x20A-Za-z0-9$specials])/; |
24 | my $re_encoded = qr/(?:[^\x00-\x20A-Za-z0-9$specials])/; |
25 | my $e_utf16 = find_encoding("UTF-16BE"); |
26 | |
27 | sub needs_lines { 1 }; |
28 | |
29 | sub encode($$;$){ |
30 | my ($obj, $str, $chk) = @_; |
31 | my $len = length($str); |
32 | pos($str) = 0; |
33 | my $bytes = ''; |
34 | while (pos($str) < $len){ |
35 | if ($str =~ /\G($re_asis+)/ogc){ |
36 | $bytes .= $1; |
37 | }elsif($str =~ /\G($re_encoded+)/ogsc){ |
38 | if ($1 eq "+"){ |
39 | $bytes .= "+-"; |
40 | }else{ |
41 | my $base64 = encode_base64($e_utf16->encode($1), ''); |
42 | $base64 =~ s/=+$//; |
43 | $bytes .= "+$base64-"; |
44 | } |
45 | }else{ |
46 | die "This should not happen! (pos=" . pos($str) . ")"; |
47 | } |
48 | } |
49 | $_[1] = '' if $chk; |
50 | return $bytes; |
51 | } |
52 | |
53 | sub decode{ |
54 | my ($obj, $bytes, $chk) = @_; |
55 | my $len = length($bytes); |
56 | my $str = ""; |
57 | while (pos($bytes) < $len) { |
58 | if ($bytes =~ /\G([^+]+)/ogc) { |
59 | $str .= $1; |
60 | }elsif($bytes =~ /\G\+-/ogc) { |
61 | $str .= "+"; |
62 | }elsif($bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc) { |
63 | my $base64 = $1; |
64 | my $pad = length($base64) % 4; |
65 | $base64 .= "=" x (4 - $pad) if $pad; |
66 | $str .= $e_utf16->decode(decode_base64($base64)); |
67 | }elsif($bytes =~ /\G\+/ogc) { |
68 | $^W and warn "Bad UTF7 data escape"; |
69 | $str .= "+"; |
70 | }else{ |
71 | die "This should not happen " . pos($bytes); |
72 | } |
73 | } |
74 | $_[1] = '' if $chk; |
75 | return $str; |
76 | } |
77 | 1; |
78 | __END__ |
79 | |
80 | =head1 NAME |
81 | |
82 | Encode::Unicode::UTF7 -- UTF-7 encoding |
83 | |
84 | =head1 SYNOPSIS |
85 | |
86 | use Encode qw/encode decode/; |
87 | $utf7 = encode("UTF-7", $utf8); |
88 | $utf8 = decode("UTF-7", $ucs2); |
89 | |
90 | =head1 ABSTRACT |
91 | |
92 | This module implements UTF-7 encoding documented in RFC 2152. UTF-7, |
93 | as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It |
94 | is designed to be MTA-safe and expected to be a standard way to |
95 | exchange Unicoded mails via mails. But with the advent of UTF-8 and |
96 | 8-bit compliant MTAs, UTF-7 is hardly ever used. |
97 | |
98 | UTF-7 was not supported by Encode until version 1.95 because of that. |
99 | But Unicode::String, a module by Gisle Aas which adds Unicode supports |
100 | to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added |
101 | so Encode can supersede Unicode::String 100%. |
102 | |
103 | =head1 In Practice |
104 | |
105 | When you want to encode Unicode for mails and web pages, however, do |
106 | not use UTF-7 unless you are sure your recipients and readers can |
107 | handle it. Very few MUAs and WWW Browsers support these days (only |
108 | Mozilla seems to support one). For general cases, use UTF-8 for |
109 | message body and MIME-Header for header instead. |
110 | |
111 | =head1 SEE ALSO |
112 | |
113 | L<Encode>, L<Encode::Unicode>, L<Unicode::String> |
114 | |
115 | RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt> |
116 | |
117 | =cut |