Commit | Line | Data |
1485817e |
1 | # |
51e4e64d |
2 | # $Id: UTF7.pm,v 2.4 2006/06/03 20:28:48 dankogai Exp $ |
1485817e |
3 | # |
4 | package Encode::Unicode::UTF7; |
5 | use strict; |
656ebd29 |
6 | use warnings; |
1485817e |
7 | no warnings 'redefine'; |
8 | use base qw(Encode::Encoding); |
9 | __PACKAGE__->Define('UTF-7'); |
656ebd29 |
10 | our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; |
1485817e |
11 | use MIME::Base64; |
12 | use Encode; |
13 | |
14 | # |
15 | # Algorithms taken from Unicode::String by Gisle Aas |
16 | # |
17 | |
18 | our $OPTIONAL_DIRECT_CHARS = 1; |
d1256cb1 |
19 | my $specials = quotemeta "\'(),-./:?"; |
20 | $OPTIONAL_DIRECT_CHARS |
21 | and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}"; |
22 | |
1485817e |
23 | # \s will not work because it matches U+3000 DEOGRAPHIC SPACE |
d1256cb1 |
24 | # We use qr/[\n\r\t\ ] instead |
25 | my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/; |
842a5aa6 |
26 | my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/; |
d1256cb1 |
27 | my $e_utf16 = find_encoding("UTF-16BE"); |
1485817e |
28 | |
d1256cb1 |
29 | sub needs_lines { 1 } |
1485817e |
30 | |
d1256cb1 |
31 | sub encode($$;$) { |
32 | my ( $obj, $str, $chk ) = @_; |
1485817e |
33 | my $len = length($str); |
34 | pos($str) = 0; |
35 | my $bytes = ''; |
d1256cb1 |
36 | while ( pos($str) < $len ) { |
37 | if ( $str =~ /\G($re_asis+)/ogc ) { |
38 | $bytes .= $1; |
39 | } |
40 | elsif ( $str =~ /\G($re_encoded+)/ogsc ) { |
41 | if ( $1 eq "+" ) { |
42 | $bytes .= "+-"; |
43 | } |
44 | else { |
45 | my $s = $1; |
46 | my $base64 = encode_base64( $e_utf16->encode($s), '' ); |
47 | $base64 =~ s/=+$//; |
48 | $bytes .= "+$base64-"; |
49 | } |
50 | } |
51 | else { |
52 | die "This should not happen! (pos=" . pos($str) . ")"; |
53 | } |
1485817e |
54 | } |
55 | $_[1] = '' if $chk; |
56 | return $bytes; |
57 | } |
d1256cb1 |
58 | |
59 | sub decode($$;$) { |
60 | my ( $obj, $bytes, $chk ) = @_; |
1485817e |
61 | my $len = length($bytes); |
62 | my $str = ""; |
0a8c69ed |
63 | no warnings 'uninitialized'; |
d1256cb1 |
64 | while ( pos($bytes) < $len ) { |
65 | if ( $bytes =~ /\G([^+]+)/ogc ) { |
66 | $str .= $1; |
67 | } |
68 | elsif ( $bytes =~ /\G\+-/ogc ) { |
69 | $str .= "+"; |
70 | } |
71 | elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) { |
72 | my $base64 = $1; |
73 | my $pad = length($base64) % 4; |
74 | $base64 .= "=" x ( 4 - $pad ) if $pad; |
75 | $str .= $e_utf16->decode( decode_base64($base64) ); |
76 | } |
77 | elsif ( $bytes =~ /\G\+/ogc ) { |
78 | $^W and warn "Bad UTF7 data escape"; |
79 | $str .= "+"; |
80 | } |
81 | else { |
82 | die "This should not happen " . pos($bytes); |
83 | } |
1485817e |
84 | } |
85 | $_[1] = '' if $chk; |
86 | return $str; |
87 | } |
88 | 1; |
89 | __END__ |
90 | |
91 | =head1 NAME |
92 | |
93 | Encode::Unicode::UTF7 -- UTF-7 encoding |
94 | |
95 | =head1 SYNOPSIS |
96 | |
97 | use Encode qw/encode decode/; |
98 | $utf7 = encode("UTF-7", $utf8); |
99 | $utf8 = decode("UTF-7", $ucs2); |
100 | |
101 | =head1 ABSTRACT |
102 | |
103 | This module implements UTF-7 encoding documented in RFC 2152. UTF-7, |
104 | as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It |
105 | is designed to be MTA-safe and expected to be a standard way to |
106 | exchange Unicoded mails via mails. But with the advent of UTF-8 and |
107 | 8-bit compliant MTAs, UTF-7 is hardly ever used. |
108 | |
109 | UTF-7 was not supported by Encode until version 1.95 because of that. |
110 | But Unicode::String, a module by Gisle Aas which adds Unicode supports |
111 | to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added |
112 | so Encode can supersede Unicode::String 100%. |
113 | |
114 | =head1 In Practice |
115 | |
116 | When you want to encode Unicode for mails and web pages, however, do |
117 | not use UTF-7 unless you are sure your recipients and readers can |
118 | handle it. Very few MUAs and WWW Browsers support these days (only |
119 | Mozilla seems to support one). For general cases, use UTF-8 for |
120 | message body and MIME-Header for header instead. |
121 | |
122 | =head1 SEE ALSO |
123 | |
124 | L<Encode>, L<Encode::Unicode>, L<Unicode::String> |
125 | |
126 | RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt> |
127 | |
128 | =cut |