lib/Digest.pm

   1 package Digest;
   2
   3 use strict;
   4 use vars qw($VERSION %MMAP $AUTOLOAD);
   5
   6 $VERSION = "1.10";
   7
   8 %MMAP = (
   9   "SHA-1"      => ["Digest::SHA1", ["Digest::SHA", 1], ["Digest::SHA2", 1]],
  10   "SHA-256"    => [["Digest::SHA", 256], ["Digest::SHA2", 256]],
  11   "SHA-384"    => [["Digest::SHA", 384], ["Digest::SHA2", 384]],
  12   "SHA-512"    => [["Digest::SHA", 512], ["Digest::SHA2", 512]],
  13   "HMAC-MD5"   => "Digest::HMAC_MD5",
  14   "HMAC-SHA-1" => "Digest::HMAC_SHA1",
  15   "CRC-16"     => [["Digest::CRC", type => "crc16"]],
  16   "CRC-32"     => [["Digest::CRC", type => "crc32"]],
  17   "CRC-CCITT"  => [["Digest::CRC", type => "crcccitt"]],
  18 );
  19
  20 sub new
  21 {
  22     shift;  # class ignored
  23     my $algorithm = shift;
  24     my $impl = $MMAP{$algorithm} || do {
  25         $algorithm =~ s/\W+//;
  26         "Digest::$algorithm";
  27     };
  28     $impl = [$impl] unless ref($impl);
  29     my $err;
  30     for  (@$impl) {
  31         my $class = $_;
  32         my @args;
  33         ($class, @args) = @$class if ref($class);
  34         no strict 'refs';
  35         unless (exists ${"$class\::"}{"VERSION"}) {
  36             eval "require $class";
  37             if ($@) {
  38                 $err ||= $@;
  39                 next;
  40             }
  41         }
  42         return $class->new(@args, @_);
  43     }
  44     die $err;
  45 }
  46
  47 sub AUTOLOAD
  48 {
  49     my $class = shift;
  50     my $algorithm = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2);
  51     $class->new($algorithm, @_);
  52 }
  53
  54 1;
  55
  56 __END__
  57
  58 =head1 NAME
  59
  60 Digest - Modules that calculate message digests
  61
  62 =head1 SYNOPSIS
  63
  64   $md5  = Digest->new("MD5");
  65   $sha1 = Digest->new("SHA-1");
  66   $sha256 = Digest->new("SHA-256");
  67   $sha384 = Digest->new("SHA-384");
  68   $sha512 = Digest->new("SHA-512");
  69
  70   $hmac = Digest->HMAC_MD5($key);
  71
  72 =head1 DESCRIPTION
  73
  74 The C<Digest::> modules calculate digests, also called "fingerprints"
  75 or "hashes", of some data, called a message.  The digest is (usually)
  76 some small/fixed size string.  The actual size of the digest depend of
  77 the algorithm used.  The message is simply a sequence of arbitrary
  78 bytes or bits.
  79
  80 An important property of the digest algorithms is that the digest is
  81 I<likely> to change if the message change in some way.  Another
  82 property is that digest functions are one-way functions, i.e. it
  83 should be I<hard> to find a message that correspond to some given
  84 digest.  Algorithms differ in how "likely" and how "hard", as well as
  85 how efficient they are to compute.
  86
  87 All C<Digest::> modules provide the same programming interface.  A
  88 functional interface for simple use, as well as an object oriented
  89 interface that can handle messages of arbitrary length and which can
  90 read files directly.
  91
  92 The digest can be delivered in three formats:
  93
  94 =over 8
  95
  96 =item I<binary>
  97
  98 This is the most compact form, but it is not well suited for printing
  99 or embedding in places that can't handle arbitrary data.
 100
 101 =item I<hex>
 102
 103 A twice as long string of lowercase hexadecimal digits.
 104
 105 =item I<base64>
 106
 107 A string of portable printable characters.  This is the base64 encoded
 108 representation of the digest with any trailing padding removed.  The
 109 string will be about 30% longer than the binary version.
 110 L<MIME::Base64> tells you more about this encoding.
 111
 112 =back
 113
 114
 115 The functional interface is simply importable functions with the same
 116 name as the algorithm.  The functions take the message as argument and
 117 return the digest.  Example:
 118
 119   use Digest::MD5 qw(md5);
 120   $digest = md5($message);
 121
 122 There are also versions of the functions with "_hex" or "_base64"
 123 appended to the name, which returns the digest in the indicated form.
 124
 125 =head1 OO INTERFACE
 126
 127 The following methods are available for all C<Digest::> modules:
 128
 129 =over 4
 130
 131 =item $ctx = Digest->XXX($arg,...)
 132
 133 =item $ctx = Digest->new(XXX => $arg,...)
 134
 135 =item $ctx = Digest::XXX->new($arg,...)
 136
 137 The constructor returns some object that encapsulate the state of the
 138 message-digest algorithm.  You can add data to the object and finally
 139 ask for the digest.  The "XXX" should of course be replaced by the proper
 140 name of the digest algorithm you want to use.
 141
 142 The two first forms are simply syntactic sugar which automatically
 143 load the right module on first use.  The second form allow you to use
 144 algorithm names which contains letters which are not legal perl
 145 identifiers, e.g. "SHA-1".  If no implementation for the given algorithm
 146 can be found, then an exception is raised.
 147
 148 If new() is called as an instance method (i.e. $ctx->new) it will just
 149 reset the state the object to the state of a newly created object.  No
 150 new object is created in this case, and the return value is the
 151 reference to the object (i.e. $ctx).
 152
 153 =item $other_ctx = $ctx->clone
 154
 155 The clone method creates a copy of the digest state object and returns
 156 a reference to the copy.
 157
 158 =item $ctx->reset
 159
 160 This is just an alias for $ctx->new.
 161
 162 =item $ctx->add( $data, ... )
 163
 164 The $data provided as argument are appended to the message we
 165 calculate the digest for.  The return value is the $ctx object itself.
 166
 167 =item $ctx->addfile( $io_handle )
 168
 169 The $io_handle is read until EOF and the content is appended to the
 170 message we calculate the digest for.  The return value is the $ctx
 171 object itself.
 172
 173 =item $ctx->add_bits( $data, $nbits )
 174
 175 =item $ctx->add_bits( $bitstring )
 176
 177 The bits provided are appended to the message we calculate the digest
 178 for.  The return value is the $ctx object itself.
 179
 180 The two argument form of add_bits() will add the first $nbits bits
 181 from data.  For the last potentially partial byte only the high order
 182 C<< $nbits % 8 >> bits are used.  If $nbits is greater than C<<
 183 length($data) * 8 >>, then this method would do the same as C<<
 184 $ctx->add($data) >>, i.e. $nbits is silently ignored.
 185
 186 The one argument form of add_bits() takes a $bitstring of "1" and "0"
 187 chars as argument.  It's a shorthand for C<< $ctx->add_bits(pack("B*",
 188 $bitstring), length($bitstring)) >>.
 189
 190 This example shows two calls that should have the same effect:
 191
 192    $ctx->add_bits("111100001010");
 193    $ctx->add_bits("\xF0\xA0", 12);
 194
 195 Most digest algorithms are byte based.  For those it is not possible
 196 to add bits that are not a multiple of 8, and the add_bits() method
 197 will croak if you try.
 198
 199 =item $ctx->digest
 200
 201 Return the binary digest for the message.
 202
 203 Note that the C<digest> operation is effectively a destructive,
 204 read-once operation. Once it has been performed, the $ctx object is
 205 automatically C<reset> and can be used to calculate another digest
 206 value.  Call $ctx->clone->digest if you want to calculate the digest
 207 without reseting the digest state.
 208
 209 =item $ctx->hexdigest
 210
 211 Same as $ctx->digest, but will return the digest in hexadecimal form.
 212
 213 =item $ctx->b64digest
 214
 215 Same as $ctx->digest, but will return the digest as a base64 encoded
 216 string.
 217
 218 =back
 219
 220 =head1 Digest speed
 221
 222 This table should give some indication on the relative speed of
 223 different algorithms.  It is sorted by throughput based on a benchmark
 224 done with of some implementations of this API:
 225
 226  Algorithm      Size    Implementation                  MB/s
 227
 228  MD4            128     Digest::MD4 v1.3               165.0
 229  MD5            128     Digest::MD5 v2.33               98.8
 230  SHA-256        256     Digest::SHA2 v1.1.0             66.7
 231  SHA-1          160     Digest::SHA v4.3.1              58.9
 232  SHA-1          160     Digest::SHA1 v2.10              48.8
 233  SHA-256        256     Digest::SHA v4.3.1              41.3
 234  Haval-256      256     Digest::Haval256 v1.0.4         39.8
 235  SHA-384        384     Digest::SHA2 v1.1.0             19.6
 236  SHA-512        512     Digest::SHA2 v1.1.0             19.3
 237  SHA-384        384     Digest::SHA v4.3.1              19.2
 238  SHA-512        512     Digest::SHA v4.3.1              19.2
 239  Whirlpool      512     Digest::Whirlpool v1.0.2        13.0
 240  MD2            128     Digest::MD2 v2.03                9.5
 241
 242  Adler-32        32     Digest::Adler32 v0.03            1.3
 243  CRC-16          16     Digest::CRC v0.05                1.1
 244  CRC-32          32     Digest::CRC v0.05                1.1
 245  MD5            128     Digest::Perl::MD5 v1.5           1.0
 246  CRC-CCITT       16     Digest::CRC v0.05                0.8
 247
 248 These numbers was achieved Apr 2004 with ActivePerl-5.8.3 running
 249 under Linux on a P4 2.8 GHz CPU.  The last 5 entries differ by being
 250 pure perl implementations of the algorithms, which explains why they
 251 are so slow.
 252
 253 =head1 SEE ALSO
 254
 255 L<Digest::Adler32>, L<Digest::CRC>, L<Digest::Haval256>,
 256 L<Digest::HMAC>, L<Digest::MD2>, L<Digest::MD4>, L<Digest::MD5>,
 257 L<Digest::SHA>, L<Digest::SHA1>, L<Digest::SHA2>, L<Digest::Whirlpool>
 258
 259 New digest implementations should consider subclassing from L<Digest::base>.
 260
 261 L<MIME::Base64>
 262
 263 =head1 AUTHOR
 264
 265 Gisle Aas <gisle@aas.no>
 266
 267 The C<Digest::> interface is based on the interface originally
 268 developed by Neil Winton for his C<MD5> module.
 269
 270 This library is free software; you can redistribute it and/or
 271 modify it under the same terms as Perl itself.
 272
 273     Copyright 1998-2001,2003-2004 Gisle Aas.
 274     Copyright 1995-1996 Neil Winton.
 275
 276 =cut