Re: [perl #20912] UTF8 related glitch + fix
[p5sagit/p5-mst-13.2.git] / ext / MIME / Base64 / Base64.xs
CommitLineData
6a63fb82 1/* $Id: Base64.xs,v 1.32 2003/01/05 07:49:07 gisle Exp $
6fba102d 2
6a63fb82 3Copyright 1997-2003 Gisle Aas
6fba102d 4
5This library is free software; you can redistribute it and/or
6modify it under the same terms as Perl itself.
7
8
9The tables and some of the code that used to be here was borrowed from
10metamail, which comes with this message:
11
12 Copyright (c) 1991 Bell Communications Research, Inc. (Bellcore)
13
7d85a32c 14 Permission to use, copy, modify, and distribute this material
15 for any purpose and without fee is hereby granted, provided
16 that the above copyright notice and this permission notice
17 appear in all copies, and that the name of Bellcore not be
18 used in advertising or publicity pertaining to this
19 material without the specific, prior written permission
20 of an authorized representative of Bellcore. BELLCORE
21 MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY
22 OF THIS MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS",
6fba102d 23 WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
24
25*/
26
27
28#ifdef __cplusplus
29extern "C" {
30#endif
31#include "EXTERN.h"
32#include "perl.h"
33#include "XSUB.h"
34#ifdef __cplusplus
35}
36#endif
37
6a63fb82 38#include "patchlevel.h"
39#if PATCHLEVEL <= 4 && !defined(PL_dowarn)
40 #define PL_dowarn dowarn
41#endif
6fba102d 42
43#define MAX_LINE 76 /* size of encoded lines */
44
45static char basis_64[] =
46 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
47
48#define XX 255 /* illegal base64 char */
49#define EQ 254 /* padding */
50#define INVALID XX
51
52static unsigned char index_64[256] = {
53 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
54 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
55 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,62, XX,XX,XX,63,
56 52,53,54,55, 56,57,58,59, 60,61,XX,XX, XX,EQ,XX,XX,
57 XX, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
58 15,16,17,18, 19,20,21,22, 23,24,25,XX, XX,XX,XX,XX,
59 XX,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
60 41,42,43,44, 45,46,47,48, 49,50,51,XX, XX,XX,XX,XX,
61
62 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
63 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
64 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
65 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
66 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
67 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
68 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
69 XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
70};
71
6a63fb82 72#ifdef SvPVbyte
73# if PERL_REVISION == 5 && PERL_VERSION < 7
74 /* SvPVbyte does not work in perl-5.6.1, borrowed version for 5.7.3 */
75# undef SvPVbyte
76# define SvPVbyte(sv, lp) \
77 ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == (SVf_POK) \
78 ? ((lp = SvCUR(sv)), SvPVX(sv)) : my_sv_2pvbyte(aTHX_ sv, &lp))
79 static char *
80 my_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp)
81 {
82 sv_utf8_downgrade(sv,0);
83 return SvPV(sv,*lp);
84 }
85# endif
86#else
87# define SvPVbyte SvPV
88#endif
6fba102d 89
6a63fb82 90#ifndef NATIVE_TO_ASCII
91# define NATIVE_TO_ASCII(ch) (ch)
92#endif
6fba102d 93
94MODULE = MIME::Base64 PACKAGE = MIME::Base64
95
96SV*
97encode_base64(sv,...)
98 SV* sv
99 PROTOTYPE: $;$
100
101 PREINIT:
102 char *str; /* string to encode */
103 SSize_t len; /* length of the string */
104 char *eol; /* the end-of-line sequence to use */
105 STRLEN eollen; /* length of the EOL sequence */
106 char *r; /* result string */
107 STRLEN rlen; /* length of result string */
108 unsigned char c1, c2, c3;
109 int chunk;
110
111 CODE:
6a63fb82 112#if PERL_REVISION == 5 && PERL_VERSION >= 6
6fba102d 113 sv_utf8_downgrade(sv, FALSE);
6a63fb82 114#endif
6fba102d 115 str = SvPV(sv, rlen); /* SvPV(sv, len) gives warning for signed len */
116 len = (SSize_t)rlen;
117
118 /* set up EOL from the second argument if present, default to "\n" */
119 if (items > 1 && SvOK(ST(1))) {
120 eol = SvPV(ST(1), eollen);
121 } else {
122 eol = "\n";
123 eollen = 1;
124 }
125
126 /* calculate the length of the result */
127 rlen = (len+2) / 3 * 4; /* encoded bytes */
128 if (rlen) {
129 /* add space for EOL */
130 rlen += ((rlen-1) / MAX_LINE + 1) * eollen;
131 }
132
133 /* allocate a result buffer */
134 RETVAL = newSV(rlen ? rlen : 1);
135 SvPOK_on(RETVAL);
136 SvCUR_set(RETVAL, rlen);
137 r = SvPVX(RETVAL);
138
139 /* encode */
140 for (chunk=0; len > 0; len -= 3, chunk++) {
141 if (chunk == (MAX_LINE/4)) {
142 char *c = eol;
143 char *e = eol + eollen;
144 while (c < e)
145 *r++ = *c++;
146 chunk = 0;
147 }
148 c1 = *str++;
149 c2 = *str++;
150 *r++ = basis_64[c1>>2];
151 *r++ = basis_64[((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4)];
152 if (len > 2) {
153 c3 = *str++;
154 *r++ = basis_64[((c2 & 0xF) << 2) | ((c3 & 0xC0) >>6)];
155 *r++ = basis_64[c3 & 0x3F];
156 } else if (len == 2) {
157 *r++ = basis_64[(c2 & 0xF) << 2];
158 *r++ = '=';
159 } else { /* len == 1 */
160 *r++ = '=';
161 *r++ = '=';
162 }
163 }
164 if (rlen) {
165 /* append eol to the result string */
166 char *c = eol;
167 char *e = eol + eollen;
168 while (c < e)
169 *r++ = *c++;
170 }
171 *r = '\0'; /* every SV in perl should be NUL-terminated */
172
173 OUTPUT:
174 RETVAL
175
176SV*
177decode_base64(sv)
178 SV* sv
179 PROTOTYPE: $
180
181 PREINIT:
182 STRLEN len;
7d85a32c 183 register unsigned char *str = (unsigned char*)SvPVbyte(sv, len);
6fba102d 184 unsigned char const* end = str + len;
185 char *r;
186 unsigned char c[4];
187
188 CODE:
189 {
190 /* always enough, but might be too much */
191 STRLEN rlen = len * 3 / 4;
192 RETVAL = newSV(rlen ? rlen : 1);
193 }
194 SvPOK_on(RETVAL);
195 r = SvPVX(RETVAL);
196
197 while (str < end) {
198 int i = 0;
199 do {
7d85a32c 200 unsigned char uc = index_64[NATIVE_TO_ASCII(*str++)];
6fba102d 201 if (uc != INVALID)
202 c[i++] = uc;
203
204 if (str == end) {
205 if (i < 4) {
206 if (i && PL_dowarn)
207 warn("Premature end of base64 data");
208 if (i < 2) goto thats_it;
209 if (i == 2) c[2] = EQ;
210 c[3] = EQ;
211 }
212 break;
213 }
214 } while (i < 4);
7d85a32c 215
6fba102d 216 if (c[0] == EQ || c[1] == EQ) {
217 if (PL_dowarn) warn("Premature padding of base64 data");
218 break;
219 }
c6c619a9 220 /* printf("c0=%d,c1=%d,c2=%d,c3=%d\n", c[0],c[1],c[2],c[3]);*/
6fba102d 221
222 *r++ = (c[0] << 2) | ((c[1] & 0x30) >> 4);
223
224 if (c[2] == EQ)
225 break;
226 *r++ = ((c[1] & 0x0F) << 4) | ((c[2] & 0x3C) >> 2);
227
228 if (c[3] == EQ)
229 break;
230 *r++ = ((c[2] & 0x03) << 6) | c[3];
231 }
232
233 thats_it:
234 SvCUR_set(RETVAL, r - SvPVX(RETVAL));
235 *r = '\0';
236
237 OUTPUT:
238 RETVAL
6a63fb82 239
240
241MODULE = MIME::Base64 PACKAGE = MIME::QuotedPrint
242
243#define qp_isplain(c) ((c) == '\t' || ((c) >= ' ' && (c) <= '~') && (c) != '=')
244
245SV*
246encode_qp(sv,...)
247 SV* sv
248 PROTOTYPE: $;$
249
250 PREINIT:
251 char *eol;
252 STRLEN eol_len;
253 STRLEN sv_len;
254 STRLEN linelen;
255 char *beg;
256 char *end;
257 char *p;
258 char *p_beg;
259 STRLEN p_len;
260
261 CODE:
262#if PERL_REVISION == 5 && PERL_VERSION >= 6
263 sv_utf8_downgrade(sv, FALSE);
264#endif
265 /* set up EOL from the second argument if present, default to "\n" */
266 if (items > 1 && SvOK(ST(1))) {
267 eol = SvPV(ST(1), eol_len);
268 } else {
269 eol = "\n";
270 eol_len = 1;
271 }
272
273 beg = SvPV(sv, sv_len);
274 end = beg + sv_len;
275
276 RETVAL = newSV(sv_len + 1);
277 sv_setpv(RETVAL, "");
278 linelen = 0;
279
280 p = beg;
281 while (1) {
282 p_beg = p;
283
284 /* skip past as much plain text as possible */
285 while (p < end && qp_isplain(*p)) {
286 p++;
287 }
288 if (*p == '\n' || p == end) {
289 /* whitespace at end of line must be encoded */
290 while (p > p_beg && (*(p - 1) == '\t' || *(p - 1) == ' '))
291 p--;
292 }
293
294 p_len = p - p_beg;
295 if (p_len) {
296 /* output plain text (with line breaks) */
297 if (eol_len) {
298 STRLEN max_last_line = (*p == '\n' || p == end)
299 ? MAX_LINE /* .......\n */
300 : (*(p + 1) == '\n' || (p + 1) == end)
301 ? MAX_LINE - 3 /* ....=XX\n */
302 : MAX_LINE - 4; /* ...=XX=\n */
303 while (p_len + linelen > max_last_line) {
304 STRLEN len = MAX_LINE - 1 - linelen;
305 if (len > p_len)
306 len = p_len;
307 sv_catpvn(RETVAL, p_beg, len);
308 p_beg += len;
309 p_len -= len;
310 sv_catpvn(RETVAL, "=", 1);
311 sv_catpvn(RETVAL, eol, eol_len);
312 linelen = 0;
313 }
314 }
315 if (p_len) {
316 sv_catpvn(RETVAL, p_beg, p_len);
317 linelen += p_len;
318 }
319 }
320
321 if (*p == '\n') {
322 sv_catpvn(RETVAL, eol, eol_len);
323 p++;
324 linelen = 0;
325 }
326 else if (p < end) {
327 /* output escaped char (with line breaks) */
328 if (eol_len && linelen > MAX_LINE - 4) {
329 sv_catpvn(RETVAL, "=", 1);
330 sv_catpvn(RETVAL, eol, eol_len);
331 linelen = 0;
332 }
333 sv_catpvf(RETVAL, "=%02X", (unsigned char)*p);
334 p++;
335 linelen += 3;
336 }
337 else {
338 assert(p == end);
339 break;
340 }
341
342 /* optimize reallocs a bit */
343 if (SvLEN(RETVAL) > 80 && SvLEN(RETVAL) - SvCUR(RETVAL) < 3) {
344 STRLEN expected_len = (SvCUR(RETVAL) * sv_len) / (p - beg);
345 SvGROW(RETVAL, expected_len);
346 }
347 }
348
349 OUTPUT:
350 RETVAL
351
352SV*
353decode_qp(sv)
354 SV* sv
355 PROTOTYPE: $
356
357 PREINIT:
358 STRLEN len;
7235ff25 359 char *str = SvPVbyte(sv, len);
6a63fb82 360 char const* end = str + len;
361 char *r;
362 char *whitespace = 0;
363
364 CODE:
365 RETVAL = newSV(len ? len : 1);
366 SvPOK_on(RETVAL);
367 r = SvPVX(RETVAL);
368 while (str < end) {
369 if (*str == ' ' || *str == '\t') {
370 if (!whitespace)
371 whitespace = str;
372 str++;
373 }
374 else if (*str == '\r' && (str + 1) < end && str[1] == '\n') {
375 str++;
376 }
377 else if (*str == '\n') {
378 whitespace = 0;
379 *r++ = *str++;
380 }
381 else {
382 if (whitespace) {
383 while (whitespace < str) {
384 *r++ = *whitespace++;
385 }
386 whitespace = 0;
387 }
388 if (*str == '=' && (str + 2) < end && isxdigit(str[1]) && isxdigit(str[2])) {
389 char buf[3];
390 str++;
391 buf[0] = *str++;
392 buf[1] = *str++;
393 buf[2] = '\0';
394 *r++ = (char)strtol(buf, 0, 16);
395 }
396 else if (*str == '=' && (str + 1) < end && str[1] == '\n') {
397 str += 2;
398 }
399 else if (*str == '=' && (str + 2) < end && str[1] == '\r' && str[2] == '\n') {
400 str += 3;
401 }
402 else {
403 *r++ = *str++;
404 }
405 }
406 }
407 *r = '\0';
408 SvCUR_set(RETVAL, r - SvPVX(RETVAL));
409
410 OUTPUT:
411 RETVAL
412
413
414MODULE = MIME::Base64 PACKAGE = MIME::Base64