Be more clear. Use simple words.
[p5sagit/p5-mst-13.2.git] / ext / MIME / Base64 / QuotedPrint.pm
CommitLineData
6fba102d 1#
691d66bd 2# $Id: QuotedPrint.pm,v 2.19 2004/01/08 14:07:26 gisle Exp $
6fba102d 3
4package MIME::QuotedPrint;
5
6=head1 NAME
7
8MIME::QuotedPrint - Encoding and decoding of quoted-printable strings
9
10=head1 SYNOPSIS
11
12 use MIME::QuotedPrint;
13
14 $encoded = encode_qp($decoded);
15 $decoded = decode_qp($encoded);
16
17=head1 DESCRIPTION
18
691d66bd 19This module provides functions to encode and decode strings into and from the
20quoted-printable encoding specified in RFC 2045 - I<MIME (Multipurpose
21Internet Mail Extensions)>. The quoted-printable encoding is intended
6fba102d 22to represent data that largely consists of bytes that correspond to
691d66bd 23printable characters in the ASCII character set. Each non-printable
24character (as defined by English Americans) is represented by a
6fba102d 25triplet consisting of the character "=" followed by two hexadecimal
26digits.
27
28The following functions are provided:
29
30=over 4
31
32=item encode_qp($str)
33
6a63fb82 34=item encode_qp($str, $eol)
6fba102d 35
691d66bd 36This function returns an encoded version of the string given as
8be5f608 37argument.
38
691d66bd 39The second argument is the line-ending sequence to use. It is
40optional and defaults to "\n". Every occurrence of "\n" is
41replaced with this string, and it is also used for additional
8be5f608 42"soft line breaks" to ensure that no line is longer than 76
43characters. You might want to pass it as "\015\012" to produce data
691d66bd 44suitable for external consumption. The string "\r\n" produces the
8be5f608 45same result on many platforms, but not all.
46
691d66bd 47An $eol of "" (the empty string) is special. In this case, no "soft line breaks" are introduced
8be5f608 48and any literal "\n" in the original data is encoded as well.
6fba102d 49
50=item decode_qp($str);
51
691d66bd 52This function returns the plain text version of the string given
53as argument. The lines of the result are "\n" terminated, even if
8be5f608 54the $str argument contains "\r\n" terminated lines.
6fba102d 55
56=back
57
58
691d66bd 59If you prefer not to import these routines into your namespace, you can
6fba102d 60call them as:
61
62 use MIME::QuotedPrint ();
63 $encoded = MIME::QuotedPrint::encode($decoded);
64 $decoded = MIME::QuotedPrint::decode($encoded);
65
8be5f608 66Perl v5.6 and better allow extended Unicode characters in strings.
691d66bd 67Such strings cannot be encoded directly, as the quoted-printable
68encoding is only defined for single-byte characters. The solution is to use the Encode
8be5f608 69module to select the byte encoding you want. For example:
70
71 use MIME::QuotedPrint qw(encode_qp);
72 use Encode qw(encode);
73
74 $encoded = encode_qp(encode("UTF-8", "\x{FFFF}\n"));
75 print $encoded;
76
6fba102d 77=head1 COPYRIGHT
78
691d66bd 79Copyright 1995-1997,2002-2004 Gisle Aas.
6fba102d 80
81This library is free software; you can redistribute it and/or
82modify it under the same terms as Perl itself.
83
8be5f608 84=head1 SEE ALSO
85
86L<MIME::Base64>
87
6fba102d 88=cut
89
90use strict;
91use vars qw(@ISA @EXPORT $VERSION);
95635e5f 92if (ord('A') == 193) { # on EBCDIC machines we need translation help
46891979 93 require Encode;
95635e5f 94}
6fba102d 95
96require Exporter;
97@ISA = qw(Exporter);
98@EXPORT = qw(encode_qp decode_qp);
99
8be5f608 100$VERSION = "2.21";
6fba102d 101
6a63fb82 102use MIME::Base64; # try to load XS version of encode_qp
103unless (defined &encode_qp) {
104 *encode_qp = \&old_encode_qp;
105 *decode_qp = \&old_decode_qp;
106}
107
108sub old_encode_qp ($;$)
6fba102d 109{
110 my $res = shift;
b9e0df4c 111 if ($] >= 5.006) {
112 require bytes;
113 if (bytes::length($res) > length($res) ||
8be5f608 114 ($] >= 5.008 && $res =~ /[^\0-\xFF]/))
115 {
116 require Carp;
117 Carp::croak("The Quoted-Printable encoding is only defined for bytes");
b9e0df4c 118 }
119 }
b1387238 120
6a63fb82 121 my $eol = shift;
8be5f608 122 $eol = "\n" unless defined $eol;
6a63fb82 123
95635e5f 124 # Do not mention ranges such as $res =~ s/([^ \t\n!-<>-~])/sprintf("=%02X", ord($1))/eg;
125 # since that will not even compile on an EBCDIC machine (where ord('!') > ord('<')).
126 if (ord('A') == 193) { # EBCDIC style machine
127 if (ord('[') == 173) {
128 $res =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp1047',$1))))/eg; # rule #2,#3
129 $res =~ s/([ \t]+)$/
130 join('', map { sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp1047',$_)))) }
131 split('', $1)
132 )/egm; # rule #3 (encode whitespace at eol)
133 }
134 elsif (ord('[') == 187) {
135 $res =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('posix-bc',$1))))/eg; # rule #2,#3
136 $res =~ s/([ \t]+)$/
137 join('', map { sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('posix-bc',$_)))) }
138 split('', $1)
139 )/egm; # rule #3 (encode whitespace at eol)
140 }
141 elsif (ord('[') == 186) {
142 $res =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp37',$1))))/eg; # rule #2,#3
143 $res =~ s/([ \t]+)$/
144 join('', map { sprintf("=%02X", ord(Encode::encode('iso-8859-1',Encode::decode('cp37',$_)))) }
145 split('', $1)
146 )/egm; # rule #3 (encode whitespace at eol)
147 }
148 }
149 else { # ASCII style machine
150 $res =~ s/([^ \t\n!"#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~])/sprintf("=%02X", ord($1))/eg; # rule #2,#3
8be5f608 151 $res =~ s/\n/=0A/g unless length($eol);
95635e5f 152 $res =~ s/([ \t]+)$/
153 join('', map { sprintf("=%02X", ord($_)) }
154 split('', $1)
155 )/egm; # rule #3 (encode whitespace at eol)
156 }
6fba102d 157
8be5f608 158 return $res unless length($eol);
159
6fba102d 160 # rule #5 (lines must be shorter than 76 chars, but we are not allowed
161 # to break =XX escapes. This makes things complicated :-( )
162 my $brokenlines = "";
6a63fb82 163 $brokenlines .= "$1=$eol"
6fba102d 164 while $res =~ s/(.*?^[^\n]{73} (?:
165 [^=\n]{2} (?! [^=\n]{0,1} $) # 75 not followed by .?\n
166 |[^=\n] (?! [^=\n]{0,2} $) # 74 not followed by .?.?\n
167 | (?! [^=\n]{0,3} $) # 73 not followed by .?.?.?\n
168 ))//xsm;
8be5f608 169 $res =~ s/\n\z/$eol/;
6fba102d 170
171 "$brokenlines$res";
172}
173
174
6a63fb82 175sub old_decode_qp ($)
6fba102d 176{
177 my $res = shift;
6a63fb82 178 $res =~ s/\r\n/\n/g; # normalize newlines
179 $res =~ s/[ \t]+\n/\n/g; # rule #3 (trailing space must be deleted)
180 $res =~ s/=\n//g; # rule #5 (soft line breaks)
95635e5f 181 if (ord('A') == 193) { # EBCDIC style machine
182 if (ord('[') == 173) {
183 $res =~ s/=([\da-fA-F]{2})/Encode::encode('cp1047',Encode::decode('iso-8859-1',pack("C", hex($1))))/ge;
184 }
185 elsif (ord('[') == 187) {
186 $res =~ s/=([\da-fA-F]{2})/Encode::encode('posix-bc',Encode::decode('iso-8859-1',pack("C", hex($1))))/ge;
187 }
188 elsif (ord('[') == 186) {
189 $res =~ s/=([\da-fA-F]{2})/Encode::encode('cp37',Encode::decode('iso-8859-1',pack("C", hex($1))))/ge;
190 }
191 }
192 else { # ASCII style machine
193 $res =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge;
194 }
6fba102d 195 $res;
196}
197
198# Set up aliases so that these functions also can be called as
199#
200# MIME::QuotedPrint::encode();
201# MIME::QuotedPrint::decode();
202
203*encode = \&encode_qp;
204*decode = \&decode_qp;
205
2061;