* string.c (hash): use Bob Jenkins' hash algorithm.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@11255 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2006-11-01 14:23:42 +00:00
parent dd937ba9dc
commit 11e562f34a
2 changed files with 118 additions and 88 deletions

View File

@ -1,3 +1,7 @@
Wed Nov 1 23:01:55 2006 Yukihiro Matsumoto <matz@ruby-lang.org>
* string.c (hash): use Bob Jenkins' hash algorithm.
Wed Nov 1 02:22:31 2006 Akinori MUSHA <knu@iDaemons.org> Wed Nov 1 02:22:31 2006 Akinori MUSHA <knu@iDaemons.org>
* ext/digest/lib/digest/hmac.rb (Digest::HMAC::update): Minor * ext/digest/lib/digest/hmac.rb (Digest::HMAC::update): Minor

198
string.c
View File

@ -879,102 +879,128 @@ rb_str_concat(VALUE str1, VALUE str2)
return rb_str_append(str1, str2); return rb_str_append(str1, str2);
} }
/* typedef unsigned int ub4; /* unsigned 4-byte quantities */
* hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code typedef unsigned char ub1; /* unsigned 1-byte quantities */
*
* @(#) $hash_32.Revision: 1.1 $ #define hashsize(n) ((ub4)1<<(n))
* @(#) $hash_32.Id: hash_32a.c,v 1.1 2003/10/03 20:38:53 chongo Exp $ #define hashmask(n) (hashsize(n)-1)
* @(#) $hash_32.Source: /usr/local/src/cmd/fnv/RCS/hash_32a.c,v $
*
***
*
* Fowler/Noll/Vo hash
*
* The basis of this hash algorithm was taken from an idea sent
* as reviewer comments to the IEEE POSIX P1003.2 committee by:
*
* Phong Vo (http://www.research.att.com/info/kpv/)
* Glenn Fowler (http://www.research.att.com/~gsf/)
*
* In a subsequent ballot round:
*
* Landon Curt Noll (http://www.isthe.com/chongo/)
*
* improved on their algorithm. Some people tried this hash
* and found that it worked rather well. In an EMail message
* to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash.
*
* FNV hashes are designed to be fast while maintaining a low
* collision rate. The FNV speed allows one to quickly hash lots
* of data while maintaining a reasonable collision rate. See:
*
* http://www.isthe.com/chongo/tech/comp/fnv/index.html
*
* for more details as well as other forms of the FNV hash.
***
*
* To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the
* Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str().
*
***
*
* Please do not copyright this code. This code is in the public domain.
*
* LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
* EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
* USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*
* By:
* chongo <Landon Curt Noll> /\oo/\
* http://www.isthe.com/chongo/
*
* Share and Enjoy! :-)
*/
/* /*
* 32 bit FNV-1 and FNV-1a non-zero initial basis --------------------------------------------------------------------
* mix -- mix 3 32-bit values reversibly.
* The FNV-1 initial basis is the FNV-0 hash of the following 32 octets: For every delta with one or two bits set, and the deltas of all three
* high bits or all three low bits, whether the original value of a,b,c
* chongo <Landon Curt Noll> /\../\ is almost all zero or is uniformly distributed,
* * If mix() is run forward or backward, at least 32 bits in a,b,c
* NOTE: The \'s above are not back-slashing escape characters. have at least 1/4 probability of changing.
* They are literal ASCII backslash 0x5c characters. * If mix() is run forward, every bit of c will change between 1/3 and
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
* NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition. mix() was built out of 36 single-cycle latency instructions in a
structure that could supported 2x parallelism, like so:
a -= b;
a -= c; x = (c>>13);
b -= c; a ^= x;
b -= a; x = (a<<8);
c -= a; b ^= x;
c -= b; x = (b>>13);
...
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
of that parallelism. They've also turned some of those single-cycle
latency instructions into multi-cycle latency instructions. Still,
this is the fastest good hash I could find. There were about 2^^68
to choose from. I only looked at a billion or so.
--------------------------------------------------------------------
*/ */
#define FNV1_32A_INIT 0x811c9dc5 #define mix(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
/* /*
* 32 bit magic FNV-1a prime --------------------------------------------------------------------
hash() -- hash a variable-length key into a 32-bit value
k : the key (the unaligned variable-length array of bytes)
len : the length of the key, counting by bytes
initval : can be any 4-byte value
Returns a 32-bit value. Every bit of the key affects every bit of
the return value. Every 1-bit and 2-bit delta achieves avalanche.
About 6*len+35 instructions.
The best hash table sizes are powers of 2. There is no need to do
mod a prime (mod is sooo slow!). If you need less than 32 bits,
use a bitmask. For example, if you need only 10 bits, do
h = (h & hashmask(10));
In which case, the hash table should have hashsize(10) elements.
If you are hashing n strings (ub1 **)k, do it like this:
for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
code any way you wish, private, educational, or commercial. It's free.
See http://burtleburtle.net/bob/hash/evahash.html
Use for hash table lookup, or anything where one collision in 2^^32 is
acceptable. Do NOT use for cryptographic purposes.
--------------------------------------------------------------------
*/ */
#define FNV_32_PRIME 0x01000193
static ub4
hash(const ub1 *k, ub4 length, ub4 initval)
/* k: the key */
/* length: the length of the key */
/* initval: the previous hash, or an arbitrary value */
{
register ub4 a,b,c,len;
/* Set up the internal state */
len = length;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* the previous hash value */
/*---------------------------------------- handle most of the key */
while (len >= 12) {
a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
mix(a,b,c);
k += 12; len -= 12;
}
/*------------------------------------- handle the last 11 bytes */
c += length;
switch(len) /* all the case statements fall through */
{
case 11: c+=((ub4)k[10]<<24);
case 10: c+=((ub4)k[9]<<16);
case 9 : c+=((ub4)k[8]<<8);
/* the first byte of c is reserved for the length */
case 8 : b+=((ub4)k[7]<<24);
case 7 : b+=((ub4)k[6]<<16);
case 6 : b+=((ub4)k[5]<<8);
case 5 : b+=k[4];
case 4 : a+=((ub4)k[3]<<24);
case 3 : a+=((ub4)k[2]<<16);
case 2 : a+=((ub4)k[1]<<8);
case 1 : a+=k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
int int
rb_memhash(const void *ptr, long len) rb_memhash(const void *ptr, long len)
{ {
register const unsigned char *p = ptr; return hash(ptr, len, 0);
register unsigned int hval = FNV1_32A_INIT;
/*
* FNV-1a hash each octet in the buffer
*/
while (len--) {
/* xor the bottom with the current octet */
hval ^= (unsigned int)*p++;
/* multiply by the 32 bit FNV magic prime mod 2^32 */
#if defined(FNV_GCC_OPTIMIZATION)
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
#else
hval *= FNV_32_PRIME;
#endif
}
return hval;
} }
int int