Use faster any_hash logic in rb_hash

From the documentation of rb_obj_hash:

> Certain core classes such as Integer use built-in hash calculations and
> do not call the #hash method when used as a hash key.

So if you override, say, Integer#hash it won't be used from rb_hash_aref
and similar. This avoids method lookups in many common cases.

This commit uses the same optimization in rb_hash, a method used
internally and in the C API to get the hash value of an object. Usually
this is used to build the hash of an object based on its elements.
Previously it would always do a method lookup for 'hash'.

This is primarily intended to speed up hashing of Arrays and Hashes,
which call rb_hash for each element.

    compare-ruby: ruby 3.0.1p64 (2021-04-05 revision 0fb782ee38) [x86_64-linux]
    built-ruby: ruby 3.1.0dev (2021-09-29T02:13:24Z fast_hash d670bf88b2) [x86_64-linux]
    # Iteration per second (i/s)

    |                 |compare-ruby|built-ruby|
    |:----------------|-----------:|---------:|
    |hash_aref_array  |       1.008|     1.769|
    |                 |           -|     1.76x|
This commit is contained in:
John Hawthorn 2021-09-28 19:13:24 -07:00 committed by Aaron Patterson
parent 529fc204af
commit bb488a1a7f
Notes: git 2021-10-01 05:07:19 +09:00
2 changed files with 35 additions and 29 deletions

View File

@ -0,0 +1,5 @@
h = {}
arrays = (0..99).each_slice(10).to_a
#STDERR.puts arrays.inspect
arrays.each { |s| h[s] = s }
200_000.times { arrays.each { |s| h[s] } }

59
hash.c
View File

@ -122,33 +122,6 @@ hash_recursive(VALUE obj, VALUE arg, int recurse)
return rb_funcallv(obj, id_hash, 0, 0); return rb_funcallv(obj, id_hash, 0, 0);
} }
VALUE
rb_hash(VALUE obj)
{
VALUE hval = rb_check_funcall_basic_kw(obj, id_hash, rb_mKernel, 0, 0, 0);
if (hval == Qundef) {
hval = rb_exec_recursive_outer(hash_recursive, obj, 0);
}
while (!FIXNUM_P(hval)) {
if (RB_BIGNUM_TYPE_P(hval)) {
int sign;
unsigned long ul;
sign = rb_integer_pack(hval, &ul, 1, sizeof(ul), 0,
INTEGER_PACK_NATIVE_BYTE_ORDER);
if (sign < 0) {
hval = LONG2FIX(ul | FIXNUM_MIN);
}
else {
hval = LONG2FIX(ul & FIXNUM_MAX);
}
}
hval = rb_to_int(hval);
}
return hval;
}
static long rb_objid_hash(st_index_t index); static long rb_objid_hash(st_index_t index);
static st_index_t static st_index_t
@ -216,8 +189,29 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
static st_index_t static st_index_t
obj_any_hash(VALUE obj) obj_any_hash(VALUE obj)
{ {
obj = rb_hash(obj); VALUE hval = rb_check_funcall_basic_kw(obj, id_hash, rb_mKernel, 0, 0, 0);
return FIX2LONG(obj);
if (hval == Qundef) {
hval = rb_exec_recursive_outer(hash_recursive, obj, 0);
}
while (!FIXNUM_P(hval)) {
if (RB_TYPE_P(hval, T_BIGNUM)) {
int sign;
unsigned long ul;
sign = rb_integer_pack(hval, &ul, 1, sizeof(ul), 0,
INTEGER_PACK_NATIVE_BYTE_ORDER);
if (sign < 0) {
hval = LONG2FIX(ul | FIXNUM_MIN);
}
else {
hval = LONG2FIX(ul & FIXNUM_MAX);
}
}
hval = rb_to_int(hval);
}
return FIX2LONG(hval);
} }
static st_index_t static st_index_t
@ -226,6 +220,13 @@ rb_any_hash(VALUE a)
return any_hash(a, obj_any_hash); return any_hash(a, obj_any_hash);
} }
VALUE
rb_hash(VALUE obj)
{
return LONG2FIX(any_hash(obj, obj_any_hash));
}
/* Here is a hash function for 64-bit key. It is about 5 times faster /* Here is a hash function for 64-bit key. It is about 5 times faster
(2 times faster when uint128 type is absent) on Haswell than (2 times faster when uint128 type is absent) on Haswell than
tailored Spooky or City hash function can be. */ tailored Spooky or City hash function can be. */