Commit | Line | Data |
6fba102d |
1 | # |
8be5f608 |
2 | # $Id: QuotedPrint.pm,v 2.17 2003/10/09 19:04:29 gisle Exp $ |
6fba102d |
3 | |
4 | package MIME::QuotedPrint; |
5 | |
6 | =head1 NAME |
7 | |
8 | MIME::QuotedPrint - Encoding and decoding of quoted-printable strings |
9 | |
10 | =head1 SYNOPSIS |
11 | |
12 | use MIME::QuotedPrint; |
13 | |
14 | $encoded = encode_qp($decoded); |
15 | $decoded = decode_qp($encoded); |
16 | |
17 | =head1 DESCRIPTION |
18 | |
19 | This module provides functions to encode and decode strings into the |
20 | Quoted-Printable encoding specified in RFC 2045 - I<MIME (Multipurpose |
21 | Internet Mail Extensions)>. The Quoted-Printable encoding is intended |
22 | to represent data that largely consists of bytes that correspond to |
23 | printable characters in the ASCII character set. Non-printable |
24 | characters (as defined by english americans) are represented by a |
25 | triplet consisting of the character "=" followed by two hexadecimal |
26 | digits. |
27 | |
28 | The following functions are provided: |
29 | |
30 | =over 4 |
31 | |
32 | =item encode_qp($str) |
33 | |
6a63fb82 |
34 | =item encode_qp($str, $eol) |
6fba102d |
35 | |
6a63fb82 |
36 | This function will return an encoded version of the string given as |
8be5f608 |
37 | argument. |
38 | |
39 | The second argument is the line ending sequence to use. It is |
40 | optional and defaults to "\n". Every occurence of "\n" will be |
41 | replaced with this string and it will also be used for additional |
42 | "soft line breaks" to ensure that no line is longer than 76 |
43 | characters. You might want to pass it as "\015\012" to produce data |
44 | suitable external consumption. The string "\r\n" will produce the |
45 | same result on many platforms, but not all. |
46 | |
47 | An $eol of "" special. If passed no "soft line breaks" are introduced |
48 | and any literal "\n" in the original data is encoded as well. |
6fba102d |
49 | |
50 | =item decode_qp($str); |
51 | |
52 | This function will return the plain text version of the string given |
8be5f608 |
53 | as argument. The lines of the result will be "\n" terminated even it |
54 | the $str argument contains "\r\n" terminated lines. |
6fba102d |
55 | |
56 | =back |
57 | |
58 | |
59 | If you prefer not to import these routines into your namespace you can |
60 | call them as: |
61 | |
62 | use MIME::QuotedPrint (); |
63 | $encoded = MIME::QuotedPrint::encode($decoded); |
64 | $decoded = MIME::QuotedPrint::decode($encoded); |
65 | |
8be5f608 |
66 | Perl v5.6 and better allow extended Unicode characters in strings. |
67 | Such strings cannot be encoded directly as the quoted-printable |
68 | encoding is only defined for bytes. The solution is to use the Encode |
69 | module to select the byte encoding you want. For example: |
70 | |
71 | use MIME::QuotedPrint qw(encode_qp); |
72 | use Encode qw(encode); |
73 | |
74 | $encoded = encode_qp(encode("UTF-8", "\x{FFFF}\n")); |
75 | print $encoded; |
76 | |
6fba102d |
77 | =head1 COPYRIGHT |
78 | |
6a63fb82 |
79 | Copyright 1995-1997,2002-2003 Gisle Aas. |
6fba102d |
80 | |
81 | This library is free software; you can redistribute it and/or |
82 | modify it under the same terms as Perl itself. |
83 | |
8be5f608 |
84 | =head1 SEE ALSO |
85 | |
86 | L<MIME::Base64> |
87 | |
6fba102d |
88 | =cut |
89 | |
90 | use strict; |
91 | use vars qw(@ISA @EXPORT $VERSION); |
95635e5f |
92 | if (ord('A') == 193) { # on EBCDIC machines we need translation help |
46891979 |
93 | require Encode; |
95635e5f |
94 | } |
6fba102d |
95 | |
96 | require Exporter; |
97 | @ISA = qw(Exporter); |
98 | @EXPORT = qw(encode_qp decode_qp); |
99 | |
8be5f608 |
100 | $VERSION = "2.21"; |
6fba102d |
101 | |
6a63fb82 |
102 | use MIME::Base64; # try to load XS version of encode_qp |
103 | unless (defined &encode_qp) { |
104 | *encode_qp = \&old_encode_qp; |
105 | *decode_qp = \&old_decode_qp; |
106 | } |
107 | |
108 | sub old_encode_qp ($;$) |
6fba102d |
109 | { |
110 | my $res = shift; |
b9e0df4c |
111 | if ($] >= 5.006) { |
112 | require bytes; |
113 | if (bytes::length($res) > length($res) || |
8be5f608 |
114 | ($] >= 5.008 && $res =~ /[^\0-\xFF]/)) |
115 | { |
116 | require Carp; |
117 | Carp::croak("The Quoted-Printable encoding is only defined for bytes"); |
b9e0df4c |
118 | } |
119 | } |
b1387238 |
120 | |
6a63fb82 |
121 | my $eol = shift; |
8be5f608 |
122 | $eol = "\n" unless defined $eol; |
6a63fb82 |
123 | |
95635e5f |
124 | # Do not mention ranges such as $res =~ s/([^ \t\n!-<>-~])/sprintf("=%02X", ord($1))/eg; |
125 | # since that will not even compile on an EBCDIC machine (where ord('!') > ord('<')). |
126 | if (ord('A') == 193) { # EBCDIC style machine |
127 | if (ord('[') == 173) { |
128 | $res =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp1047',$1))))/eg; # rule #2,#3 |
129 | $res =~ s/([ \t]+)$/ |
130 | join('', map { sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp1047',$_)))) } |
131 | split('', $1) |
132 | )/egm; # rule #3 (encode whitespace at eol) |
133 | } |
134 | elsif (ord('[') == 187) { |
135 | $res =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('posix-bc',$1))))/eg; # rule #2,#3 |
136 | $res =~ s/([ \t]+)$/ |
137 | join('', map { sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('posix-bc',$_)))) } |
138 | split('', $1) |
139 | )/egm; # rule #3 (encode whitespace at eol) |
140 | } |
141 | elsif (ord('[') == 186) { |
142 | $res =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp37',$1))))/eg; # rule #2,#3 |
143 | $res =~ s/([ \t]+)$/ |
144 | join('', map { sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp37',$_)))) } |
145 | split('', $1) |
146 | )/egm; # rule #3 (encode whitespace at eol) |
147 | } |
148 | } |
149 | else { # ASCII style machine |
150 | $res =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord($1))/eg; # rule #2,#3 |
8be5f608 |
151 | $res =~ s/\n/=0A/g unless length($eol); |
95635e5f |
152 | $res =~ s/([ \t]+)$/ |
153 | join('', map { sprintf("=%02X", ord($_)) } |
154 | split('', $1) |
155 | )/egm; # rule #3 (encode whitespace at eol) |
156 | } |
6fba102d |
157 | |
8be5f608 |
158 | return $res unless length($eol); |
159 | |
6fba102d |
160 | # rule #5 (lines must be shorter than 76 chars, but we are not allowed |
161 | # to break =XX escapes. This makes things complicated :-( ) |
162 | my $brokenlines = ""; |
6a63fb82 |
163 | $brokenlines .= "$1=$eol" |
6fba102d |
164 | while $res =~ s/(.*?^[^\n]{73} (?: |
165 | [^=\n]{2} (?! [^=\n]{0,1} $) # 75 not followed by .?\n |
166 | |[^=\n] (?! [^=\n]{0,2} $) # 74 not followed by .?.?\n |
167 | | (?! [^=\n]{0,3} $) # 73 not followed by .?.?.?\n |
168 | ))//xsm; |
8be5f608 |
169 | $res =~ s/\n\z/$eol/; |
6fba102d |
170 | |
171 | "$brokenlines$res"; |
172 | } |
173 | |
174 | |
6a63fb82 |
175 | sub old_decode_qp ($) |
6fba102d |
176 | { |
177 | my $res = shift; |
6a63fb82 |
178 | $res =~ s/\r\n/\n/g; # normalize newlines |
179 | $res =~ s/[ \t]+\n/\n/g; # rule #3 (trailing space must be deleted) |
180 | $res =~ s/=\n//g; # rule #5 (soft line breaks) |
95635e5f |
181 | if (ord('A') == 193) { # EBCDIC style machine |
182 | if (ord('[') == 173) { |
183 | $res =~ s/=([\da-fA-F]{2})/Encode::encode('cp1047',Encode::decode('iso-8859-1',pack("C", hex($1))))/ge; |
184 | } |
185 | elsif (ord('[') == 187) { |
186 | $res =~ s/=([\da-fA-F]{2})/Encode::encode('posix-bc',Encode::decode('iso-8859-1',pack("C", hex($1))))/ge; |
187 | } |
188 | elsif (ord('[') == 186) { |
189 | $res =~ s/=([\da-fA-F]{2})/Encode::encode('cp37',Encode::decode('iso-8859-1',pack("C", hex($1))))/ge; |
190 | } |
191 | } |
192 | else { # ASCII style machine |
193 | $res =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge; |
194 | } |
6fba102d |
195 | $res; |
196 | } |
197 | |
198 | # Set up aliases so that these functions also can be called as |
199 | # |
200 | # MIME::QuotedPrint::encode(); |
201 | # MIME::QuotedPrint::decode(); |
202 | |
203 | *encode = \&encode_qp; |
204 | *decode = \&decode_qp; |
205 | |
206 | 1; |