Digest::MD5 on UTF8 strings
[p5sagit/p5-mst-13.2.git] / ext / Digest / MD5 / MD5.xs
CommitLineData
3357b1b1 1/*
2 * This library is free software; you can redistribute it and/or
3 * modify it under the same terms as Perl itself.
4 *
5 * Copyright 1998-2000 Gisle Aas.
6 * Copyright 1995-1996 Neil Winton.
7 * Copyright 1991-1992 RSA Data Security, Inc.
8 *
9 * This code is derived from Neil Winton's MD5-1.7 Perl module, which in
10 * turn is derived from the reference implementation in RFC 1231 which
11 * comes with this message:
12 *
13 * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
14 * rights reserved.
15 *
16 * License to copy and use this software is granted provided that it
17 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
18 * Algorithm" in all material mentioning or referencing this software
19 * or this function.
20 *
21 * License is also granted to make and use derivative works provided
22 * that such works are identified as "derived from the RSA Data
23 * Security, Inc. MD5 Message-Digest Algorithm" in all material
24 * mentioning or referencing the derived work.
25 *
26 * RSA Data Security, Inc. makes no representations concerning either
27 * the merchantability of this software or the suitability of this
28 * software for any particular purpose. It is provided "as is"
29 * without express or implied warranty of any kind.
30 *
31 * These notices must be retained in any copies of any part of this
32 * documentation and/or software.
33 */
34
35#ifdef __cplusplus
36extern "C" {
37#endif
38#include "EXTERN.h"
39#include "perl.h"
40#include "XSUB.h"
41#ifdef __cplusplus
42}
43#endif
44
45/*#define MD5_DEBUG /**/
46
47/* Perl does not guarantee that U32 is exactly 32 bits. Some system
48 * has no integral type with exactly 32 bits. For instance, A Cray has
49 * short, int and long all at 64 bits so we need to apply this macro
50 * to reduce U32 values to 32 bits at appropriate places. If U32
51 * really does have 32 bits then this is a no-op.
52 */
53#if BYTEORDER > 0x4321 || defined(TRUNCATE_U32)
54 #define TO32(x) ((x) & 0xFFFFffff)
55 #define TRUNC32(x) ((x) &= 0xFFFFffff)
56#else
57 #define TO32(x) (x)
58 #define TRUNC32(x) /*nothing*/
59#endif
60
61/* The MD5 algorithm is defined in terms of little endian 32-bit
62 * values. The following macros (and functions) allow us to convert
63 * between native integers and such values.
64 */
65#undef BYTESWAP
66#ifndef U32_ALIGNMENT_REQUIRED
67 #if BYTEORDER == 0x1234 /* 32-bit little endian */
68 #define BYTESWAP(x) (x) /* no-op */
69
70 #elif BYTEORDER == 0x4321 /* 32-bit big endian */
71 #define BYTESWAP(x) ((((x)&0xFF)<<24) \
72 |(((x)>>24)&0xFF) \
73 |(((x)&0x0000FF00)<<8) \
74 |(((x)&0x00FF0000)>>8) )
75 #endif
76#endif
77
78#ifndef BYTESWAP
79static void u2s(U32 u, U8* s)
80{
81 *s++ = u & 0xFF;
82 *s++ = (u >> 8) & 0xFF;
83 *s++ = (u >> 16) & 0xFF;
84 *s = (u >> 24) & 0xFF;
85}
86
87#define s2u(s,u) ((u) = (U32)(*s) | \
88 ((U32)(*(s+1)) << 8) | \
89 ((U32)(*(s+2)) << 16) | \
90 ((U32)(*(s+3)) << 24))
91#endif
92
93#define MD5_CTX_SIGNATURE 200003165
94
95/* This stucture keeps the current state of algorithm.
96 */
97typedef struct {
98 U32 signature; /* safer cast in get_md5_ctx() */
99 U32 A, B, C, D; /* current digest */
100 U32 bytes_low; /* counts bytes in message */
101 U32 bytes_high; /* turn it into a 64-bit counter */
102 U8 buffer[128]; /* collect complete 64 byte blocks */
103} MD5_CTX;
104
105
106/* Padding is added at the end of the message in order to fill a
107 * complete 64 byte block (- 8 bytes for the message length). The
108 * padding is also the reason the buffer in MD5_CTX have to be
109 * 128 bytes.
110 */
111static unsigned char PADDING[64] = {
112 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
115};
116
117/* Constants for MD5Transform routine.
118 */
119#define S11 7
120#define S12 12
121#define S13 17
122#define S14 22
123#define S21 5
124#define S22 9
125#define S23 14
126#define S24 20
127#define S31 4
128#define S32 11
129#define S33 16
130#define S34 23
131#define S41 6
132#define S42 10
133#define S43 15
134#define S44 21
135
136/* F, G, H and I are basic MD5 functions.
137 */
138#define F(x, y, z) (((x) & ((y) ^ (z)) ^ (z)))
139#define G(x, y, z) F(z, x, y)
140#define H(x, y, z) ((x) ^ (y) ^ (z))
141#define I(x, y, z) ((y) ^ ((x) | (~z)))
142
143/* ROTATE_LEFT rotates x left n bits.
144 */
145#define ROTATE_LEFT(x, n) (((x) << (n) | ((x) >> (32-(n)))))
146
147/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
148 * Rotation is separate from addition to prevent recomputation.
149 */
150#define FF(a, b, c, d, s, ac) \
151 (a) += F ((b), (c), (d)) + (NEXTx) + (U32)(ac); \
152 TRUNC32((a)); \
153 (a) = ROTATE_LEFT ((a), (s)); \
154 (a) += (b); \
155 TRUNC32((a));
156
157#define GG(a, b, c, d, x, s, ac) \
158 (a) += G ((b), (c), (d)) + X[x] + (U32)(ac); \
159 TRUNC32((a)); \
160 (a) = ROTATE_LEFT ((a), (s)); \
161 (a) += (b); \
162 TRUNC32((a));
163
164#define HH(a, b, c, d, x, s, ac) \
165 (a) += H ((b), (c), (d)) + X[x] + (U32)(ac); \
166 TRUNC32((a)); \
167 (a) = ROTATE_LEFT ((a), (s)); \
168 (a) += (b); \
169 TRUNC32((a));
170
171#define II(a, b, c, d, x, s, ac) \
172 (a) += I ((b), (c), (d)) + X[x] + (U32)(ac); \
173 TRUNC32((a)); \
174 (a) = ROTATE_LEFT ((a), (s)); \
175 (a) += (b); \
176 TRUNC32((a));
177
178
179static void
180MD5Init(MD5_CTX *ctx)
181{
182 /* Start state */
183 ctx->A = 0x67452301;
184 ctx->B = 0xefcdab89;
185 ctx->C = 0x98badcfe;
186 ctx->D = 0x10325476;
187
188 /* message length */
189 ctx->bytes_low = ctx->bytes_high = 0;
190}
191
192
193static void
194MD5Transform(MD5_CTX* ctx, const U8* buf, STRLEN blocks)
195{
196 static int tcount = 0;
197
198 U32 A = ctx->A;
199 U32 B = ctx->B;
200 U32 C = ctx->C;
201 U32 D = ctx->D;
202
203#ifndef U32_ALIGNMENT_REQUIRED
204 const U32 *x = (U32*)buf; /* really just type casting */
205#endif
206
207 do {
208 U32 a = A;
209 U32 b = B;
210 U32 c = C;
211 U32 d = D;
212
213#if BYTEORDER == 0x1234 && !defined(U32_ALIGNMENT_REQUIRED)
214 const U32 *X = x;
215 #define NEXTx (*x++)
216#else
217 U32 X[16]; /* converted values, used in round 2-4 */
218 U32 *uptr = X;
219 U32 tmp;
220 #ifdef BYTESWAP
221 #define NEXTx (tmp=*x++, *uptr++ = BYTESWAP(tmp))
222 #else
223 #define NEXTx (s2u(buf,tmp), buf += 4, *uptr++ = tmp)
224 #endif
225#endif
226
227#ifdef MD5_DEBUG
228 if (buf == ctx->buffer)
229 fprintf(stderr,"%5d: Transform ctx->buffer", ++tcount);
230 else
231 fprintf(stderr,"%5d: Transform %p (%d)", ++tcount, buf, blocks);
232
233 {
234 int i;
235 fprintf(stderr,"[");
236 for (i = 0; i < 16; i++) {
237 fprintf(stderr,"%x,", x[i]);
238 }
239 fprintf(stderr,"]\n");
240 }
241#endif
242
243 /* Round 1 */
244 FF (a, b, c, d, S11, 0xd76aa478); /* 1 */
245 FF (d, a, b, c, S12, 0xe8c7b756); /* 2 */
246 FF (c, d, a, b, S13, 0x242070db); /* 3 */
247 FF (b, c, d, a, S14, 0xc1bdceee); /* 4 */
248 FF (a, b, c, d, S11, 0xf57c0faf); /* 5 */
249 FF (d, a, b, c, S12, 0x4787c62a); /* 6 */
250 FF (c, d, a, b, S13, 0xa8304613); /* 7 */
251 FF (b, c, d, a, S14, 0xfd469501); /* 8 */
252 FF (a, b, c, d, S11, 0x698098d8); /* 9 */
253 FF (d, a, b, c, S12, 0x8b44f7af); /* 10 */
254 FF (c, d, a, b, S13, 0xffff5bb1); /* 11 */
255 FF (b, c, d, a, S14, 0x895cd7be); /* 12 */
256 FF (a, b, c, d, S11, 0x6b901122); /* 13 */
257 FF (d, a, b, c, S12, 0xfd987193); /* 14 */
258 FF (c, d, a, b, S13, 0xa679438e); /* 15 */
259 FF (b, c, d, a, S14, 0x49b40821); /* 16 */
260
261 /* Round 2 */
262 GG (a, b, c, d, 1, S21, 0xf61e2562); /* 17 */
263 GG (d, a, b, c, 6, S22, 0xc040b340); /* 18 */
264 GG (c, d, a, b, 11, S23, 0x265e5a51); /* 19 */
265 GG (b, c, d, a, 0, S24, 0xe9b6c7aa); /* 20 */
266 GG (a, b, c, d, 5, S21, 0xd62f105d); /* 21 */
267 GG (d, a, b, c, 10, S22, 0x2441453); /* 22 */
268 GG (c, d, a, b, 15, S23, 0xd8a1e681); /* 23 */
269 GG (b, c, d, a, 4, S24, 0xe7d3fbc8); /* 24 */
270 GG (a, b, c, d, 9, S21, 0x21e1cde6); /* 25 */
271 GG (d, a, b, c, 14, S22, 0xc33707d6); /* 26 */
272 GG (c, d, a, b, 3, S23, 0xf4d50d87); /* 27 */
273 GG (b, c, d, a, 8, S24, 0x455a14ed); /* 28 */
274 GG (a, b, c, d, 13, S21, 0xa9e3e905); /* 29 */
275 GG (d, a, b, c, 2, S22, 0xfcefa3f8); /* 30 */
276 GG (c, d, a, b, 7, S23, 0x676f02d9); /* 31 */
277 GG (b, c, d, a, 12, S24, 0x8d2a4c8a); /* 32 */
278
279 /* Round 3 */
280 HH (a, b, c, d, 5, S31, 0xfffa3942); /* 33 */
281 HH (d, a, b, c, 8, S32, 0x8771f681); /* 34 */
282 HH (c, d, a, b, 11, S33, 0x6d9d6122); /* 35 */
283 HH (b, c, d, a, 14, S34, 0xfde5380c); /* 36 */
284 HH (a, b, c, d, 1, S31, 0xa4beea44); /* 37 */
285 HH (d, a, b, c, 4, S32, 0x4bdecfa9); /* 38 */
286 HH (c, d, a, b, 7, S33, 0xf6bb4b60); /* 39 */
287 HH (b, c, d, a, 10, S34, 0xbebfbc70); /* 40 */
288 HH (a, b, c, d, 13, S31, 0x289b7ec6); /* 41 */
289 HH (d, a, b, c, 0, S32, 0xeaa127fa); /* 42 */
290 HH (c, d, a, b, 3, S33, 0xd4ef3085); /* 43 */
291 HH (b, c, d, a, 6, S34, 0x4881d05); /* 44 */
292 HH (a, b, c, d, 9, S31, 0xd9d4d039); /* 45 */
293 HH (d, a, b, c, 12, S32, 0xe6db99e5); /* 46 */
294 HH (c, d, a, b, 15, S33, 0x1fa27cf8); /* 47 */
295 HH (b, c, d, a, 2, S34, 0xc4ac5665); /* 48 */
296
297 /* Round 4 */
298 II (a, b, c, d, 0, S41, 0xf4292244); /* 49 */
299 II (d, a, b, c, 7, S42, 0x432aff97); /* 50 */
300 II (c, d, a, b, 14, S43, 0xab9423a7); /* 51 */
301 II (b, c, d, a, 5, S44, 0xfc93a039); /* 52 */
302 II (a, b, c, d, 12, S41, 0x655b59c3); /* 53 */
303 II (d, a, b, c, 3, S42, 0x8f0ccc92); /* 54 */
304 II (c, d, a, b, 10, S43, 0xffeff47d); /* 55 */
305 II (b, c, d, a, 1, S44, 0x85845dd1); /* 56 */
306 II (a, b, c, d, 8, S41, 0x6fa87e4f); /* 57 */
307 II (d, a, b, c, 15, S42, 0xfe2ce6e0); /* 58 */
308 II (c, d, a, b, 6, S43, 0xa3014314); /* 59 */
309 II (b, c, d, a, 13, S44, 0x4e0811a1); /* 60 */
310 II (a, b, c, d, 4, S41, 0xf7537e82); /* 61 */
311 II (d, a, b, c, 11, S42, 0xbd3af235); /* 62 */
312 II (c, d, a, b, 2, S43, 0x2ad7d2bb); /* 63 */
313 II (b, c, d, a, 9, S44, 0xeb86d391); /* 64 */
314
315 A += a; TRUNC32(A);
316 B += b; TRUNC32(B);
317 C += c; TRUNC32(C);
318 D += d; TRUNC32(D);
319
320 } while (--blocks);
321 ctx->A = A;
322 ctx->B = B;
323 ctx->C = C;
324 ctx->D = D;
325}
326
327
328#ifdef MD5_DEBUG
329static char*
330ctx_dump(MD5_CTX* ctx)
331{
332 static char buf[1024];
333 sprintf(buf, "{A=%x,B=%x,C=%x,D=%x,%d,%d(%d)}",
334 ctx->A, ctx->B, ctx->C, ctx->D,
335 ctx->bytes_low, ctx->bytes_high, (ctx->bytes_low&0x3F));
336 return buf;
337}
338#endif
339
340
341static void
342MD5Update(MD5_CTX* ctx, const U8* buf, STRLEN len)
343{
344 STRLEN blocks;
345 STRLEN fill = ctx->bytes_low & 0x3F;
346
347#ifdef MD5_DEBUG
348 static int ucount = 0;
349 fprintf(stderr,"%5i: Update(%s, %p, %d)\n", ++ucount, ctx_dump(ctx),
350 buf, len);
351#endif
352
353 ctx->bytes_low += len;
354 if (ctx->bytes_low < len) /* wrap around */
355 ctx->bytes_high++;
356
357 if (fill) {
358 STRLEN missing = 64 - fill;
359 if (len < missing) {
360 Copy(buf, ctx->buffer + fill, len, U8);
361 return;
362 }
363 Copy(buf, ctx->buffer + fill, missing, U8);
364 MD5Transform(ctx, ctx->buffer, 1);
365 buf += missing;
366 len -= missing;
367 }
368
369 blocks = len >> 6;
370 if (blocks)
371 MD5Transform(ctx, buf, blocks);
372 if ( (len &= 0x3F)) {
373 Copy(buf + (blocks << 6), ctx->buffer, len, U8);
374 }
375}
376
377
378static void
379MD5Final(U8* digest, MD5_CTX *ctx)
380{
381 STRLEN fill = ctx->bytes_low & 0x3F;
382 STRLEN padlen = (fill < 56 ? 56 : 120) - fill;
383 U32 bits_low, bits_high;
384#ifdef MD5_DEBUG
385 fprintf(stderr," Final: %s\n", ctx_dump(ctx));
386#endif
387 Copy(PADDING, ctx->buffer + fill, padlen, U8);
388 fill += padlen;
389
390 bits_low = ctx->bytes_low << 3;
391 bits_high = (ctx->bytes_high << 3) | (ctx->bytes_low >> 29);
392#ifdef BYTESWAP
393 *(U32*)(ctx->buffer + fill) = BYTESWAP(bits_low); fill += 4;
394 *(U32*)(ctx->buffer + fill) = BYTESWAP(bits_high); fill += 4;
395#else
396 u2s(bits_low, ctx->buffer + fill); fill += 4;
397 u2s(bits_high, ctx->buffer + fill); fill += 4;
398#endif
399
400 MD5Transform(ctx, ctx->buffer, fill >> 6);
401#ifdef MD5_DEBUG
402 fprintf(stderr," Result: %s\n", ctx_dump(ctx));
403#endif
404
405#ifdef BYTESWAP
406 *(U32*)digest = BYTESWAP(ctx->A); digest += 4;
407 *(U32*)digest = BYTESWAP(ctx->B); digest += 4;
408 *(U32*)digest = BYTESWAP(ctx->C); digest += 4;
409 *(U32*)digest = BYTESWAP(ctx->D);
410#else
411 u2s(ctx->A, digest);
412 u2s(ctx->B, digest+4);
413 u2s(ctx->C, digest+8);
414 u2s(ctx->D, digest+12);
415#endif
416}
417
418#ifndef INT2PTR
419#define INT2PTR(any,d) (any)(d)
420#endif
421
422static MD5_CTX* get_md5_ctx(SV* sv)
423{
424 if (SvROK(sv)) {
425 sv = SvRV(sv);
426 if (SvIOK(sv)) {
427 MD5_CTX* ctx = INT2PTR(MD5_CTX*, SvIV(sv));
428 if (ctx && ctx->signature == MD5_CTX_SIGNATURE) {
429 return ctx;
430 }
431 }
432 }
433 croak("Not a reference to a Digest::MD5 object");
434 return (MD5_CTX*)0; /* some compilers insist on a return value */
435}
436
437
438static char* hex_16(const unsigned char* from, char* to)
439{
440 static char *hexdigits = "0123456789abcdef";
441 const unsigned char *end = from + 16;
442 char *d = to;
443
444 while (from < end) {
445 *d++ = hexdigits[(*from >> 4)];
446 *d++ = hexdigits[(*from & 0x0F)];
447 from++;
448 }
449 *d = '\0';
450 return to;
451}
452
453static char* base64_16(const unsigned char* from, char* to)
454{
455 static char* base64 =
456 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
457 const unsigned char *end = from + 16;
458 unsigned char c1, c2, c3;
459 char *d = to;
460
461 while (1) {
462 c1 = *from++;
463 *d++ = base64[c1>>2];
464 if (from == end) {
465 *d++ = base64[(c1 & 0x3) << 4];
466 break;
467 }
468 c2 = *from++;
469 c3 = *from++;
470 *d++ = base64[((c1 & 0x3) << 4) | ((c2 & 0xF0) >> 4)];
471 *d++ = base64[((c2 & 0xF) << 2) | ((c3 & 0xC0) >>6)];
472 *d++ = base64[c3 & 0x3F];
473 }
474 *d = '\0';
475 return to;
476}
477
478/* Formats */
479#define F_BIN 0
480#define F_HEX 1
481#define F_B64 2
482
483static SV* make_mortal_sv(const unsigned char *src, int type)
484{
485 STRLEN len;
486 char result[33];
487 char *ret;
488
489 switch (type) {
490 case F_BIN:
491 ret = (char*)src;
492 len = 16;
493 break;
494 case F_HEX:
495 ret = hex_16(src, result);
496 len = 32;
497 break;
498 case F_B64:
499 ret = base64_16(src, result);
500 len = 22;
501 break;
502 default:
503 croak("Bad convertion type (%d)", type);
504 break;
505 }
506 return sv_2mortal(newSVpv(ret,len));
507}
508
509
510/********************************************************************/
511
512typedef PerlIO* InputStream;
513
514MODULE = Digest::MD5 PACKAGE = Digest::MD5
515
516PROTOTYPES: DISABLE
517
518void
519new(xclass)
520 SV* xclass
521 PREINIT:
522 MD5_CTX* context;
523 PPCODE:
524 if (!SvROK(xclass)) {
525 STRLEN my_na;
526 char *sclass = SvPV(xclass, my_na);
527 New(55, context, 1, MD5_CTX);
528 context->signature = MD5_CTX_SIGNATURE;
529 ST(0) = sv_newmortal();
530 sv_setref_pv(ST(0), sclass, (void*)context);
531 SvREADONLY_on(SvRV(ST(0)));
532 } else {
533 context = get_md5_ctx(xclass);
534 }
535 MD5Init(context);
536 XSRETURN(1);
537
538void
539DESTROY(context)
540 MD5_CTX* context
541 CODE:
542 Safefree(context);
543
544void
545add(self, ...)
546 SV* self
547 PREINIT:
548 MD5_CTX* context = get_md5_ctx(self);
549 int i;
550 unsigned char *data;
551 STRLEN len;
552 PPCODE:
553 for (i = 1; i < items; i++) {
844f0213 554 data = (unsigned char *)(SvPVbyte(ST(i), len));
3357b1b1 555 MD5Update(context, data, len);
556 }
557 XSRETURN(1); /* self */
558
559void
560addfile(self, fh)
561 SV* self
562 InputStream fh
563 PREINIT:
564 MD5_CTX* context = get_md5_ctx(self);
565 STRLEN fill = context->bytes_low & 0x3F;
566 unsigned char buffer[4096];
567 int n;
568 CODE:
569 if (fh) {
570 if (fill) {
571 /* The MD5Update() function is faster if it can work with
572 * complete blocks. This will fill up any buffered block
573 * first.
574 */
575 STRLEN missing = 64 - fill;
576 if ( (n = PerlIO_read(fh, buffer, missing)))
577 MD5Update(context, buffer, n);
578 else
579 XSRETURN(1); /* self */
580 }
581
582 /* Process blocks until EOF */
583 while ( (n = PerlIO_read(fh, buffer, sizeof(buffer)))) {
584 MD5Update(context, buffer, n);
585 }
586 }
587 XSRETURN(1); /* self */
588
589void
590digest(context)
591 MD5_CTX* context
592 ALIAS:
593 Digest::MD5::digest = F_BIN
594 Digest::MD5::hexdigest = F_HEX
595 Digest::MD5::b64digest = F_B64
596 PREINIT:
597 unsigned char digeststr[16];
598 PPCODE:
599 MD5Final(digeststr, context);
600 MD5Init(context); /* In case it is reused */
601 ST(0) = make_mortal_sv(digeststr, ix);
602 XSRETURN(1);
603
604void
605md5(...)
606 ALIAS:
607 Digest::MD5::md5 = F_BIN
608 Digest::MD5::md5_hex = F_HEX
609 Digest::MD5::md5_base64 = F_B64
610 PREINIT:
611 MD5_CTX ctx;
612 int i;
613 unsigned char *data;
614 STRLEN len;
615 unsigned char digeststr[16];
616 PPCODE:
617 MD5Init(&ctx);
618 for (i = 0; i < items; i++) {
844f0213 619 data = (unsigned char *)(SvPVbyte(ST(i), len));
3357b1b1 620 MD5Update(&ctx, data, len);
621 }
622 MD5Final(digeststr, &ctx);
623 ST(0) = make_mortal_sv(digeststr, ix);
624 XSRETURN(1);