Implement Hash ST tables on VWA

This commit is contained in:
Peter Zhu 2023-04-19 16:02:36 -04:00
parent 5199f2aaf9
commit 0938964ba1
Notes: git 2023-05-17 13:20:12 +00:00
6 changed files with 103 additions and 100 deletions

10
array.c
View File

@ -6416,6 +6416,12 @@ rb_ary_count(int argc, VALUE *argv, VALUE ary)
static VALUE static VALUE
flatten(VALUE ary, int level) flatten(VALUE ary, int level)
{ {
static const rb_data_type_t flatten_memo_data_type = {
.wrap_struct_name = "array_flatten_memo_data_type",
.function = { NULL, (RUBY_DATA_FUNC)st_free_table },
NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
};
long i; long i;
VALUE stack, result, tmp = 0, elt, vmemo; VALUE stack, result, tmp = 0, elt, vmemo;
st_table *memo = 0; st_table *memo = 0;
@ -6441,10 +6447,8 @@ flatten(VALUE ary, int level)
rb_ary_push(stack, LONG2NUM(i + 1)); rb_ary_push(stack, LONG2NUM(i + 1));
if (level < 0) { if (level < 0) {
vmemo = rb_hash_new();
RBASIC_CLEAR_CLASS(vmemo);
memo = st_init_numtable(); memo = st_init_numtable();
rb_hash_st_table_set(vmemo, memo); vmemo = TypedData_Wrap_Struct(0, &flatten_memo_data_type, memo);
st_insert(memo, (st_data_t)ary, (st_data_t)Qtrue); st_insert(memo, (st_data_t)ary, (st_data_t)Qtrue);
st_insert(memo, (st_data_t)tmp, (st_data_t)Qtrue); st_insert(memo, (st_data_t)tmp, (st_data_t)Qtrue);
} }

18
gc.c
View File

@ -3568,9 +3568,12 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
RB_DEBUG_COUNTER_INC(obj_hash_st); RB_DEBUG_COUNTER_INC(obj_hash_st);
} }
#endif #endif
if (!RHASH_AR_TABLE_P(obj)) {
GC_ASSERT(RHASH_ST_TABLE_P(obj)); if (RHASH_ST_TABLE_P(obj)) {
st_free_table(RHASH(obj)->as.st); st_table *tab = RHASH_ST_TABLE(obj);
if (tab->bins != NULL) free(tab->bins);
free(tab->entries);
} }
break; break;
case T_REGEXP: case T_REGEXP:
@ -4909,11 +4912,10 @@ obj_memsize_of(VALUE obj, int use_all_types)
size += rb_ary_memsize(obj); size += rb_ary_memsize(obj);
break; break;
case T_HASH: case T_HASH:
if (RHASH_AR_TABLE_P(obj)) { if (RHASH_ST_TABLE_P(obj)) {
}
else {
VM_ASSERT(RHASH_ST_TABLE(obj) != NULL); VM_ASSERT(RHASH_ST_TABLE(obj) != NULL);
size += st_memsize(RHASH_ST_TABLE(obj)); /* st_table is in the slot */
size += st_memsize(RHASH_ST_TABLE(obj)) - sizeof(st_table);
} }
break; break;
case T_REGEXP: case T_REGEXP:
@ -8402,7 +8404,7 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V
break; break;
case T_HASH: case T_HASH:
obj_size = rb_hash_size_as_embedded(src); obj_size = RHASH_SLOT_SIZE;
break; break;
default: default:

110
hash.c
View File

@ -378,27 +378,6 @@ typedef st_index_t st_hash_t;
#define RHASH_AR_TABLE_REF(hash, n) (&RHASH_AR_TABLE(hash)->pairs[n]) #define RHASH_AR_TABLE_REF(hash, n) (&RHASH_AR_TABLE(hash)->pairs[n])
#define RHASH_AR_CLEARED_HINT 0xff #define RHASH_AR_CLEARED_HINT 0xff
typedef struct ar_table_pair_struct {
VALUE key;
VALUE val;
} ar_table_pair;
typedef struct ar_table_struct {
/* 64bit CPU: 8B * 2 * 8 = 128B */
ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE];
} ar_table;
#define RHASH_EMBED_SIZE (offsetof(struct RHash, as.st) + sizeof(ar_table))
size_t
rb_hash_size_as_embedded(VALUE hash)
{
if (RHASH_AR_TABLE_P(hash)) {
return RHASH_EMBED_SIZE;
}
return sizeof(struct RHash);
}
static inline st_hash_t static inline st_hash_t
ar_do_hash(st_data_t key) ar_do_hash(st_data_t key)
{ {
@ -539,6 +518,7 @@ hash_verify_(VALUE hash, const char *file, int line)
HASH_ASSERT(RHASH_AR_TABLE_SIZE_RAW(hash) == 0); HASH_ASSERT(RHASH_AR_TABLE_SIZE_RAW(hash) == 0);
HASH_ASSERT(RHASH_AR_TABLE_BOUND_RAW(hash) == 0); HASH_ASSERT(RHASH_AR_TABLE_BOUND_RAW(hash) == 0);
} }
return hash; return hash;
} }
@ -564,25 +544,25 @@ RHASH_TABLE_EMPTY_P(VALUE hash)
return RHASH_SIZE(hash) == 0; return RHASH_SIZE(hash) == 0;
} }
#define RHASH_SET_ST_FLAG(h) FL_SET_RAW(h, RHASH_ST_TABLE_FLAG)
#define RHASH_UNSET_ST_FLAG(h) FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG)
static void
hash_st_table_init(VALUE hash, const struct st_hash_type *type, st_index_t size)
{
st_init_existing_table_with_size(RHASH_ST_TABLE(hash), type, size);
RHASH_SET_ST_FLAG(hash);
}
void void
rb_hash_st_table_set(VALUE hash, st_table *st) rb_hash_st_table_set(VALUE hash, st_table *st)
{ {
HASH_ASSERT(st != NULL); HASH_ASSERT(st != NULL);
FL_SET_RAW((hash), RHASH_ST_TABLE_FLAG); RHASH_SET_ST_FLAG(hash);
RHASH(hash)->as.st = st;
}
static void *RHASH_ST_TABLE(hash) = *st;
hash_ar_table_set(VALUE hash, ar_table *ar)
{
HASH_ASSERT(RHASH_AR_TABLE_P(hash));
*(RHASH_AR_TABLE(hash)) = *ar;
hash_verify(hash);
} }
#define RHASH_SET_ST_FLAG(h) FL_SET_RAW(h, RHASH_ST_TABLE_FLAG)
#define RHASH_UNSET_ST_FLAG(h) FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG)
static inline void static inline void
RHASH_AR_TABLE_BOUND_SET(VALUE h, st_index_t n) RHASH_AR_TABLE_BOUND_SET(VALUE h, st_index_t n)
{ {
@ -646,9 +626,6 @@ ar_alloc_table(VALUE hash)
ar_table *tab = RHASH_AR_TABLE(hash); ar_table *tab = RHASH_AR_TABLE(hash);
memset(tab, 0, sizeof(ar_table)); memset(tab, 0, sizeof(ar_table));
// RHASH_UNSET_ST_FLAG(hash);
// hash_ar_table_set(hash, tab);
RHASH_AR_TABLE_SIZE_SET(hash, 0); RHASH_AR_TABLE_SIZE_SET(hash, 0);
RHASH_AR_TABLE_BOUND_SET(hash, 0); RHASH_AR_TABLE_BOUND_SET(hash, 0);
@ -730,28 +707,29 @@ ar_try_convert_table(VALUE hash)
const unsigned size = RHASH_AR_TABLE_SIZE(hash); const unsigned size = RHASH_AR_TABLE_SIZE(hash);
st_table *new_tab;
st_index_t i;
if (size < RHASH_AR_TABLE_MAX_SIZE) { if (size < RHASH_AR_TABLE_MAX_SIZE) {
return; return;
} }
new_tab = st_init_table_with_size(&objhash, size * 2); st_table tab;
st_table *new_tab = &tab;
rb_st_init_existing_table_with_size(new_tab, &objhash, size * 2);
for (i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) { for (st_index_t i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) {
ar_table_pair *pair = RHASH_AR_TABLE_REF(hash, i); ar_table_pair *pair = RHASH_AR_TABLE_REF(hash, i);
st_add_direct(new_tab, pair->key, pair->val); st_add_direct(new_tab, pair->key, pair->val);
} }
ar_free_and_clear_table(hash); ar_free_and_clear_table(hash);
RHASH_ST_TABLE_SET(hash, new_tab); RHASH_ST_TABLE_SET(hash, new_tab);
return;
} }
static st_table * static st_table *
ar_force_convert_table(VALUE hash, const char *file, int line) ar_force_convert_table(VALUE hash, const char *file, int line)
{ {
st_table *new_tab; st_table *new_tab;
st_table tab;
new_tab = &tab;
if (RHASH_ST_TABLE_P(hash)) { if (RHASH_ST_TABLE_P(hash)) {
return RHASH_ST_TABLE(hash); return RHASH_ST_TABLE(hash);
@ -760,13 +738,7 @@ ar_force_convert_table(VALUE hash, const char *file, int line)
if (RHASH_AR_TABLE(hash)) { if (RHASH_AR_TABLE(hash)) {
unsigned i, bound = RHASH_AR_TABLE_BOUND(hash); unsigned i, bound = RHASH_AR_TABLE_BOUND(hash);
#if defined(RHASH_CONVERT_TABLE_DEBUG) && RHASH_CONVERT_TABLE_DEBUG rb_st_init_existing_table_with_size(new_tab, &objhash, RHASH_AR_TABLE_SIZE(hash));
rb_obj_info_dump(hash);
fprintf(stderr, "force_convert: %s:%d\n", file, line);
RB_DEBUG_COUNTER_INC(obj_hash_force_convert);
#endif
new_tab = st_init_table_with_size(&objhash, RHASH_AR_TABLE_SIZE(hash));
for (i = 0; i < bound; i++) { for (i = 0; i < bound; i++) {
if (ar_cleared_entry(hash, i)) continue; if (ar_cleared_entry(hash, i)) continue;
@ -777,10 +749,13 @@ ar_force_convert_table(VALUE hash, const char *file, int line)
ar_free_and_clear_table(hash); ar_free_and_clear_table(hash);
} }
else { else {
new_tab = st_init_table(&objhash); rb_st_init_existing_table_with_size(new_tab, &objhash, 0);
} }
RHASH_ST_TABLE_SET(hash, new_tab); RHASH_ST_TABLE_SET(hash, new_tab);
new_tab = RHASH_ST_TABLE(hash);
return new_tab; return new_tab;
} }
@ -1193,7 +1168,6 @@ ar_copy(VALUE hash1, VALUE hash2)
RHASH(hash1)->ar_hint.word = RHASH(hash2)->ar_hint.word; RHASH(hash1)->ar_hint.word = RHASH(hash2)->ar_hint.word;
RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2)); RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2));
RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2)); RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2));
hash_ar_table_set(hash1, new_tab);
rb_gc_writebarrier_remember(hash1); rb_gc_writebarrier_remember(hash1);
@ -1454,7 +1428,7 @@ static VALUE
hash_alloc_flags(VALUE klass, VALUE flags, VALUE ifnone) hash_alloc_flags(VALUE klass, VALUE flags, VALUE ifnone)
{ {
const VALUE wb = (RGENGC_WB_PROTECTED_HASH ? FL_WB_PROTECTED : 0); const VALUE wb = (RGENGC_WB_PROTECTED_HASH ? FL_WB_PROTECTED : 0);
NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_EMBED_SIZE, 0); NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_SLOT_SIZE, 0);
RHASH_SET_IFNONE((VALUE)hash, ifnone); RHASH_SET_IFNONE((VALUE)hash, ifnone);
@ -1498,7 +1472,7 @@ rb_hash_new_with_size(st_index_t size)
/* do nothing */ /* do nothing */
} }
else if (size > RHASH_AR_TABLE_MAX_SIZE) { else if (size > RHASH_AR_TABLE_MAX_SIZE) {
RHASH_ST_TABLE_SET(ret, st_init_table_with_size(&objhash, size)); hash_st_table_init(ret, &objhash, size);
} }
return ret; return ret;
} }
@ -1981,10 +1955,12 @@ rb_hash_rehash(VALUE hash)
else if (RHASH_ST_TABLE_P(hash)) { else if (RHASH_ST_TABLE_P(hash)) {
st_table *old_tab = RHASH_ST_TABLE(hash); st_table *old_tab = RHASH_ST_TABLE(hash);
tmp = hash_alloc(0); tmp = hash_alloc(0);
tbl = st_init_table_with_size(old_tab->type, old_tab->num_entries);
RHASH_ST_TABLE_SET(tmp, tbl); hash_st_table_init(tmp, old_tab->type, old_tab->num_entries);
tbl = RHASH_ST_TABLE(tmp);
rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
st_free_table(old_tab);
RHASH_ST_TABLE_SET(hash, tbl); RHASH_ST_TABLE_SET(hash, tbl);
RHASH_ST_CLEAR(tmp); RHASH_ST_CLEAR(tmp);
} }
@ -2925,13 +2901,12 @@ rb_hash_replace(VALUE hash, VALUE hash2)
ar_free_and_clear_table(hash); ar_free_and_clear_table(hash);
} }
else { else {
st_free_table(RHASH_ST_TABLE(hash));
RHASH_ST_CLEAR(hash); RHASH_ST_CLEAR(hash);
} }
hash_copy(hash, hash2); hash_copy(hash, hash2);
if (RHASH_EMPTY_P(hash2) && RHASH_ST_TABLE_P(hash2)) { if (RHASH_EMPTY_P(hash2) && RHASH_ST_TABLE_P(hash2)) {
/* ident hash */ /* ident hash */
RHASH_ST_TABLE_SET(hash, st_init_table_with_size(RHASH_TYPE(hash2), 0)); hash_st_table_init(hash, RHASH_TYPE(hash2), 0);
} }
rb_gc_writebarrier_remember(hash); rb_gc_writebarrier_remember(hash);
@ -4306,8 +4281,6 @@ rb_hash_compact_bang(VALUE hash)
return Qnil; return Qnil;
} }
static st_table *rb_init_identtable_with_size(st_index_t size);
/* /*
* call-seq: * call-seq:
* hash.compare_by_identity -> self * hash.compare_by_identity -> self
@ -4349,10 +4322,11 @@ rb_hash_compare_by_id(VALUE hash)
HASH_ASSERT(RHASH_ST_TABLE_P(hash)); HASH_ASSERT(RHASH_ST_TABLE_P(hash));
tmp = hash_alloc(0); tmp = hash_alloc(0);
identtable = rb_init_identtable_with_size(RHASH_SIZE(hash)); hash_st_table_init(tmp, &identhash, RHASH_SIZE(hash));
RHASH_ST_TABLE_SET(tmp, identtable); identtable = RHASH_ST_TABLE(tmp);
rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
st_free_table(RHASH_ST_TABLE(hash));
RHASH_ST_TABLE_SET(hash, identtable); RHASH_ST_TABLE_SET(hash, identtable);
RHASH_ST_CLEAR(tmp); RHASH_ST_CLEAR(tmp);
@ -4376,7 +4350,7 @@ VALUE
rb_ident_hash_new(void) rb_ident_hash_new(void)
{ {
VALUE hash = rb_hash_new(); VALUE hash = rb_hash_new();
RHASH_ST_TABLE_SET(hash, st_init_table(&identhash)); hash_st_table_init(hash, &identhash, 0);
return hash; return hash;
} }
@ -4384,7 +4358,7 @@ VALUE
rb_ident_hash_new_with_size(st_index_t size) rb_ident_hash_new_with_size(st_index_t size)
{ {
VALUE hash = rb_hash_new(); VALUE hash = rb_hash_new();
RHASH_ST_TABLE_SET(hash, st_init_table_with_size(&identhash, size)); hash_st_table_init(hash, &identhash, size);
return hash; return hash;
} }
@ -4394,12 +4368,6 @@ rb_init_identtable(void)
return st_init_table(&identhash); return st_init_table(&identhash);
} }
static st_table *
rb_init_identtable_with_size(st_index_t size)
{
return st_init_table_with_size(&identhash, size);
}
static int static int
any_p_i(VALUE key, VALUE value, VALUE arg) any_p_i(VALUE key, VALUE value, VALUE arg)
{ {

View File

@ -104,6 +104,8 @@ st_table *rb_st_init_table(const struct st_hash_type *);
#define st_init_table rb_st_init_table #define st_init_table rb_st_init_table
st_table *rb_st_init_table_with_size(const struct st_hash_type *, st_index_t); st_table *rb_st_init_table_with_size(const struct st_hash_type *, st_index_t);
#define st_init_table_with_size rb_st_init_table_with_size #define st_init_table_with_size rb_st_init_table_with_size
st_table *rb_st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size);
#define st_init_existing_table_with_size rb_st_init_existing_table_with_size
st_table *rb_st_init_numtable(void); st_table *rb_st_init_numtable(void);
#define st_init_numtable rb_st_init_numtable #define st_init_numtable rb_st_init_numtable
st_table *rb_st_init_numtable_with_size(st_index_t); st_table *rb_st_init_numtable_with_size(st_index_t);

View File

@ -40,6 +40,16 @@ enum ruby_rhash_flags {
RHASH_LEV_MAX = 127, /* 7 bits */ RHASH_LEV_MAX = 127, /* 7 bits */
}; };
typedef struct ar_table_pair_struct {
VALUE key;
VALUE val;
} ar_table_pair;
typedef struct ar_table_struct {
/* 64bit CPU: 8B * 2 * 8 = 128B */
ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE];
} ar_table;
struct RHash { struct RHash {
struct RBasic basic; struct RBasic basic;
const VALUE ifnone; const VALUE ifnone;
@ -47,13 +57,16 @@ struct RHash {
ar_hint_t ary[RHASH_AR_TABLE_MAX_SIZE]; ar_hint_t ary[RHASH_AR_TABLE_MAX_SIZE];
VALUE word; VALUE word;
} ar_hint; } ar_hint;
union {
st_table *st;
} as;
}; };
#define RHASH(obj) ((struct RHash *)(obj)) #define RHASH(obj) ((struct RHash *)(obj))
#ifndef MAX
# define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
#define RHASH_SLOT_SIZE (sizeof(struct RHash) + MAX(sizeof(ar_table), sizeof(st_table)))
#ifdef RHASH_IFNONE #ifdef RHASH_IFNONE
# undef RHASH_IFNONE # undef RHASH_IFNONE
#endif #endif
@ -85,8 +98,6 @@ int rb_hash_stlike_foreach_with_replace(VALUE hash, st_foreach_check_callback_fu
int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg); int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg);
VALUE rb_ident_hash_new_with_size(st_index_t size); VALUE rb_ident_hash_new_with_size(st_index_t size);
size_t rb_hash_size_as_embedded(VALUE hash);
static inline unsigned RHASH_AR_TABLE_SIZE_RAW(VALUE h); static inline unsigned RHASH_AR_TABLE_SIZE_RAW(VALUE h);
static inline VALUE RHASH_IFNONE(VALUE h); static inline VALUE RHASH_IFNONE(VALUE h);
static inline size_t RHASH_SIZE(VALUE h); static inline size_t RHASH_SIZE(VALUE h);
@ -126,13 +137,13 @@ RHASH_AR_TABLE_P(VALUE h)
static inline struct ar_table_struct * static inline struct ar_table_struct *
RHASH_AR_TABLE(VALUE h) RHASH_AR_TABLE(VALUE h)
{ {
return (struct ar_table_struct *)((uintptr_t)h + offsetof(struct RHash, as.st)); return (struct ar_table_struct *)((uintptr_t)h + sizeof(struct RHash));
} }
static inline st_table * static inline st_table *
RHASH_ST_TABLE(VALUE h) RHASH_ST_TABLE(VALUE h)
{ {
return RHASH(h)->as.st; return (st_table *)((uintptr_t)h + sizeof(struct RHash));
} }
static inline VALUE static inline VALUE
@ -173,7 +184,7 @@ RHASH_ST_SIZE(VALUE h)
static inline void static inline void
RHASH_ST_CLEAR(VALUE h) RHASH_ST_CLEAR(VALUE h)
{ {
RHASH(h)->as.st = NULL; memset(RHASH_ST_TABLE(h), 0, sizeof(st_table));
FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG); FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG);
} }

36
st.c
View File

@ -509,13 +509,9 @@ stat_col(void)
} }
#endif #endif
/* Create and return table with TYPE which can hold at least SIZE
entries. The real number of entries which the table can hold is
the nearest power of two for SIZE. */
st_table * st_table *
st_init_table_with_size(const struct st_hash_type *type, st_index_t size) st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
{ {
st_table *tab;
int n; int n;
#ifdef HASH_LOG #ifdef HASH_LOG
@ -536,11 +532,7 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
if (n < 0) if (n < 0)
return NULL; return NULL;
#endif #endif
tab = (st_table *) malloc(sizeof (st_table));
#ifndef RUBY
if (tab == NULL)
return NULL;
#endif
tab->type = type; tab->type = type;
tab->entry_power = n; tab->entry_power = n;
tab->bin_power = features[n].bin_power; tab->bin_power = features[n].bin_power;
@ -569,6 +561,30 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
return tab; return tab;
} }
/* Create and return table with TYPE which can hold at least SIZE
entries. The real number of entries which the table can hold is
the nearest power of two for SIZE. */
st_table *
st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
{
st_table *tab = malloc(sizeof(st_table));
#ifndef RUBY
if (tab == NULL)
return NULL;
#endif
#ifdef RUBY
st_init_existing_table_with_size(tab, type, size);
#else
if (st_init_existing_table_with_size(tab, type, size) == NULL) {
free(tab);
return NULL;
}
#endif
return tab;
}
size_t size_t
st_table_size(const struct st_table *tbl) st_table_size(const struct st_table *tbl)
{ {