From: Jarkko Hietaniemi Date: Mon, 3 Sep 2001 19:50:57 +0000 (+0000) Subject: Make crypt() do something more sane for Unicode X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=85c16d835facb3e1567f0ad453769c0d9a8da60b;p=p5sagit%2Fp5-mst-13.2.git Make crypt() do something more sane for Unicode (take crypt() of the low eight bits of the characters, instead of taking crypt() of the UTF-8 of the scalar); add a test for crypt(). p4raw-id: //depot/perl@11852 --- diff --git a/MANIFEST b/MANIFEST index 3a93a81..8f39648 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2019,6 +2019,7 @@ t/op/cmp.t See if the various string and numeric compare work t/op/concat.t See if string concatenation works t/op/cond.t See if conditional expressions work t/op/context.t See if context propagation works +t/op/crypt.t See if crypt works t/op/defins.t See if auto-insert of defined() works t/op/delete.t See if delete works t/op/die.t See if die works diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 1626f6e..ebac4b7 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -806,17 +806,29 @@ extirpated as a potential munition). This can prove useful for checking the password file for lousy passwords, amongst other things. Only the guys wearing white hats should do this. -Note that C is intended to be a one-way function, much like breaking -eggs to make an omelette. There is no (known) corresponding decrypt -function. As a result, this function isn't all that useful for +Note that C is intended to be a one-way function, much like +breaking eggs to make an omelette. There is no (known) corresponding +decrypt function (in other words, the crypt() is a one-way hash +function). As a result, this function isn't all that useful for cryptography. (For that, see your nearby CPAN mirror.) -When verifying an existing encrypted string you should use the encrypted -text as the salt (like C). This -allows your code to work with the standard C and with more -exotic implementations. When choosing a new salt create a random two -character string whose characters come from the set C<[./0-9A-Za-z]> -(like C). +When verifying an existing encrypted string you should use the +encrypted text as the salt (like C). This allows your code to work with the standard C +and with more exotic implementations. In other words, do not assume +anything about the returned string itself, or how many bytes in +the encrypted string matter. + +Traditionally the result is a string of 13 bytes: two first bytes of +the salt, followed by 11 bytes from the set C<[./0-9A-Za-z]>, and only +the first eight bytes of the encrypted string mattered, but +alternative hashing schemes (like MD5), higher level security schemes +(like C2), and implementations on non-UNIX platforms may produce +different strings. + +When choosing a new salt create a random two character string whose +characters come from the set C<[./0-9A-Za-z]> (like C). Here's an example that makes sure that whoever runs this program knows their own password: @@ -844,6 +856,11 @@ back. Look at the F and F directories on your favorite CPAN mirror for a slew of potentially useful modules. +If using crypt() on an Unicode string (which potentially has +characters with codepoints above 255), Perl tries to make sense of +the situation by using only the low eight bits of the characters when +calling crypt(). + =item dbmclose HASH [This function has been largely superseded by the C function.] diff --git a/pp.c b/pp.c index 5538cf4..8b09a52 100644 --- a/pp.c +++ b/pp.c @@ -3095,12 +3095,28 @@ PP(pp_crypt) dSP; dTARGET; dPOPTOPssrl; STRLEN n_a; #ifdef HAS_CRYPT - char *tmps = SvPV(left, n_a); + STRLEN len; + char *tmps = SvPV(left, len); + char *t = 0; + if (DO_UTF8(left)) { + /* If Unicode take the crypt() of the low 8 bits + * of the characters of the string. */ + char *s = tmps; + char *send = tmps + len; + STRLEN i = 0; + Newz(688, t, len, char); + while (s < send) { + t[i++] = utf8_to_uvchr((U8*)s, 0) & 0xFF; + s += UTF8SKIP(s); + } + tmps = t; + } #ifdef FCRYPT sv_setpv(TARG, fcrypt(tmps, SvPV(right, n_a))); #else sv_setpv(TARG, PerlProc_crypt(tmps, SvPV(right, n_a))); #endif + Safefree(t); #else DIE(aTHX_ "The crypt() function is unimplemented due to excessive paranoia."); diff --git a/t/op/crypt.t b/t/op/crypt.t new file mode 100644 index 0000000..26eb06a --- /dev/null +++ b/t/op/crypt.t @@ -0,0 +1,15 @@ +use Test::More tests => 2; + +# Can't assume too much about the string returned by crypt(), +# and about how many bytes of the encrypted (really, hashed) +# string matter. +# +# HISTORICALLY the results started with the first two bytes of the salt, +# followed by 11 bytes from the set [./0-9A-Za-z], and only the first +# eight characters mattered, but those are probably no more safe +# bets, given alternative encryption/hashing schemes like MD5, +# C2 (or higher) security schemes, and non-UNIX platforms. + +ok(substr(crypt("ab", "cd"), 2) ne substr(crypt("ab", "ce"), 2), "salt"); + +ok(crypt("HI", "HO") eq crypt(v4040.4041, "HO"), "Unicode");