Implement Hash ST tables on VWA

This commit is contained in:
Peter Zhu 2023-04-19 16:02:36 -04:00
parent 5199f2aaf9
commit 0938964ba1
Notes: git 2023-05-17 13:20:12 +00:00
6 changed files with 103 additions and 100 deletions

10
array.c
View File

@ -6416,6 +6416,12 @@ rb_ary_count(int argc, VALUE *argv, VALUE ary)
static VALUE
flatten(VALUE ary, int level)
{
static const rb_data_type_t flatten_memo_data_type = {
.wrap_struct_name = "array_flatten_memo_data_type",
.function = { NULL, (RUBY_DATA_FUNC)st_free_table },
NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
};
long i;
VALUE stack, result, tmp = 0, elt, vmemo;
st_table *memo = 0;
@ -6441,10 +6447,8 @@ flatten(VALUE ary, int level)
rb_ary_push(stack, LONG2NUM(i + 1));
if (level < 0) {
vmemo = rb_hash_new();
RBASIC_CLEAR_CLASS(vmemo);
memo = st_init_numtable();
rb_hash_st_table_set(vmemo, memo);
vmemo = TypedData_Wrap_Struct(0, &flatten_memo_data_type, memo);
st_insert(memo, (st_data_t)ary, (st_data_t)Qtrue);
st_insert(memo, (st_data_t)tmp, (st_data_t)Qtrue);
}

18
gc.c
View File

@ -3568,9 +3568,12 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
RB_DEBUG_COUNTER_INC(obj_hash_st);
}
#endif
if (!RHASH_AR_TABLE_P(obj)) {
GC_ASSERT(RHASH_ST_TABLE_P(obj));
st_free_table(RHASH(obj)->as.st);
if (RHASH_ST_TABLE_P(obj)) {
st_table *tab = RHASH_ST_TABLE(obj);
if (tab->bins != NULL) free(tab->bins);
free(tab->entries);
}
break;
case T_REGEXP:
@ -4909,11 +4912,10 @@ obj_memsize_of(VALUE obj, int use_all_types)
size += rb_ary_memsize(obj);
break;
case T_HASH:
if (RHASH_AR_TABLE_P(obj)) {
}
else {
if (RHASH_ST_TABLE_P(obj)) {
VM_ASSERT(RHASH_ST_TABLE(obj) != NULL);
size += st_memsize(RHASH_ST_TABLE(obj));
/* st_table is in the slot */
size += st_memsize(RHASH_ST_TABLE(obj)) - sizeof(st_table);
}
break;
case T_REGEXP:
@ -8402,7 +8404,7 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V
break;
case T_HASH:
obj_size = rb_hash_size_as_embedded(src);
obj_size = RHASH_SLOT_SIZE;
break;
default:

110
hash.c
View File

@ -378,27 +378,6 @@ typedef st_index_t st_hash_t;
#define RHASH_AR_TABLE_REF(hash, n) (&RHASH_AR_TABLE(hash)->pairs[n])
#define RHASH_AR_CLEARED_HINT 0xff
typedef struct ar_table_pair_struct {
VALUE key;
VALUE val;
} ar_table_pair;
typedef struct ar_table_struct {
/* 64bit CPU: 8B * 2 * 8 = 128B */
ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE];
} ar_table;
#define RHASH_EMBED_SIZE (offsetof(struct RHash, as.st) + sizeof(ar_table))
size_t
rb_hash_size_as_embedded(VALUE hash)
{
if (RHASH_AR_TABLE_P(hash)) {
return RHASH_EMBED_SIZE;
}
return sizeof(struct RHash);
}
static inline st_hash_t
ar_do_hash(st_data_t key)
{
@ -539,6 +518,7 @@ hash_verify_(VALUE hash, const char *file, int line)
HASH_ASSERT(RHASH_AR_TABLE_SIZE_RAW(hash) == 0);
HASH_ASSERT(RHASH_AR_TABLE_BOUND_RAW(hash) == 0);
}
return hash;
}
@ -564,25 +544,25 @@ RHASH_TABLE_EMPTY_P(VALUE hash)
return RHASH_SIZE(hash) == 0;
}
#define RHASH_SET_ST_FLAG(h) FL_SET_RAW(h, RHASH_ST_TABLE_FLAG)
#define RHASH_UNSET_ST_FLAG(h) FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG)
static void
hash_st_table_init(VALUE hash, const struct st_hash_type *type, st_index_t size)
{
st_init_existing_table_with_size(RHASH_ST_TABLE(hash), type, size);
RHASH_SET_ST_FLAG(hash);
}
void
rb_hash_st_table_set(VALUE hash, st_table *st)
{
HASH_ASSERT(st != NULL);
FL_SET_RAW((hash), RHASH_ST_TABLE_FLAG);
RHASH(hash)->as.st = st;
}
RHASH_SET_ST_FLAG(hash);
static void
hash_ar_table_set(VALUE hash, ar_table *ar)
{
HASH_ASSERT(RHASH_AR_TABLE_P(hash));
*(RHASH_AR_TABLE(hash)) = *ar;
hash_verify(hash);
*RHASH_ST_TABLE(hash) = *st;
}
#define RHASH_SET_ST_FLAG(h) FL_SET_RAW(h, RHASH_ST_TABLE_FLAG)
#define RHASH_UNSET_ST_FLAG(h) FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG)
static inline void
RHASH_AR_TABLE_BOUND_SET(VALUE h, st_index_t n)
{
@ -646,9 +626,6 @@ ar_alloc_table(VALUE hash)
ar_table *tab = RHASH_AR_TABLE(hash);
memset(tab, 0, sizeof(ar_table));
// RHASH_UNSET_ST_FLAG(hash);
// hash_ar_table_set(hash, tab);
RHASH_AR_TABLE_SIZE_SET(hash, 0);
RHASH_AR_TABLE_BOUND_SET(hash, 0);
@ -730,28 +707,29 @@ ar_try_convert_table(VALUE hash)
const unsigned size = RHASH_AR_TABLE_SIZE(hash);
st_table *new_tab;
st_index_t i;
if (size < RHASH_AR_TABLE_MAX_SIZE) {
return;
}
new_tab = st_init_table_with_size(&objhash, size * 2);
st_table tab;
st_table *new_tab = &tab;
rb_st_init_existing_table_with_size(new_tab, &objhash, size * 2);
for (i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) {
for (st_index_t i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) {
ar_table_pair *pair = RHASH_AR_TABLE_REF(hash, i);
st_add_direct(new_tab, pair->key, pair->val);
}
ar_free_and_clear_table(hash);
RHASH_ST_TABLE_SET(hash, new_tab);
return;
}
static st_table *
ar_force_convert_table(VALUE hash, const char *file, int line)
{
st_table *new_tab;
st_table tab;
new_tab = &tab;
if (RHASH_ST_TABLE_P(hash)) {
return RHASH_ST_TABLE(hash);
@ -760,13 +738,7 @@ ar_force_convert_table(VALUE hash, const char *file, int line)
if (RHASH_AR_TABLE(hash)) {
unsigned i, bound = RHASH_AR_TABLE_BOUND(hash);
#if defined(RHASH_CONVERT_TABLE_DEBUG) && RHASH_CONVERT_TABLE_DEBUG
rb_obj_info_dump(hash);
fprintf(stderr, "force_convert: %s:%d\n", file, line);
RB_DEBUG_COUNTER_INC(obj_hash_force_convert);
#endif
new_tab = st_init_table_with_size(&objhash, RHASH_AR_TABLE_SIZE(hash));
rb_st_init_existing_table_with_size(new_tab, &objhash, RHASH_AR_TABLE_SIZE(hash));
for (i = 0; i < bound; i++) {
if (ar_cleared_entry(hash, i)) continue;
@ -777,10 +749,13 @@ ar_force_convert_table(VALUE hash, const char *file, int line)
ar_free_and_clear_table(hash);
}
else {
new_tab = st_init_table(&objhash);
rb_st_init_existing_table_with_size(new_tab, &objhash, 0);
}
RHASH_ST_TABLE_SET(hash, new_tab);
new_tab = RHASH_ST_TABLE(hash);
return new_tab;
}
@ -1193,7 +1168,6 @@ ar_copy(VALUE hash1, VALUE hash2)
RHASH(hash1)->ar_hint.word = RHASH(hash2)->ar_hint.word;
RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2));
RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2));
hash_ar_table_set(hash1, new_tab);
rb_gc_writebarrier_remember(hash1);
@ -1454,7 +1428,7 @@ static VALUE
hash_alloc_flags(VALUE klass, VALUE flags, VALUE ifnone)
{
const VALUE wb = (RGENGC_WB_PROTECTED_HASH ? FL_WB_PROTECTED : 0);
NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_EMBED_SIZE, 0);
NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_SLOT_SIZE, 0);
RHASH_SET_IFNONE((VALUE)hash, ifnone);
@ -1498,7 +1472,7 @@ rb_hash_new_with_size(st_index_t size)
/* do nothing */
}
else if (size > RHASH_AR_TABLE_MAX_SIZE) {
RHASH_ST_TABLE_SET(ret, st_init_table_with_size(&objhash, size));
hash_st_table_init(ret, &objhash, size);
}
return ret;
}
@ -1981,10 +1955,12 @@ rb_hash_rehash(VALUE hash)
else if (RHASH_ST_TABLE_P(hash)) {
st_table *old_tab = RHASH_ST_TABLE(hash);
tmp = hash_alloc(0);
tbl = st_init_table_with_size(old_tab->type, old_tab->num_entries);
RHASH_ST_TABLE_SET(tmp, tbl);
hash_st_table_init(tmp, old_tab->type, old_tab->num_entries);
tbl = RHASH_ST_TABLE(tmp);
rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
st_free_table(old_tab);
RHASH_ST_TABLE_SET(hash, tbl);
RHASH_ST_CLEAR(tmp);
}
@ -2925,13 +2901,12 @@ rb_hash_replace(VALUE hash, VALUE hash2)
ar_free_and_clear_table(hash);
}
else {
st_free_table(RHASH_ST_TABLE(hash));
RHASH_ST_CLEAR(hash);
}
hash_copy(hash, hash2);
if (RHASH_EMPTY_P(hash2) && RHASH_ST_TABLE_P(hash2)) {
/* ident hash */
RHASH_ST_TABLE_SET(hash, st_init_table_with_size(RHASH_TYPE(hash2), 0));
hash_st_table_init(hash, RHASH_TYPE(hash2), 0);
}
rb_gc_writebarrier_remember(hash);
@ -4306,8 +4281,6 @@ rb_hash_compact_bang(VALUE hash)
return Qnil;
}
static st_table *rb_init_identtable_with_size(st_index_t size);
/*
* call-seq:
* hash.compare_by_identity -> self
@ -4349,10 +4322,11 @@ rb_hash_compare_by_id(VALUE hash)
HASH_ASSERT(RHASH_ST_TABLE_P(hash));
tmp = hash_alloc(0);
identtable = rb_init_identtable_with_size(RHASH_SIZE(hash));
RHASH_ST_TABLE_SET(tmp, identtable);
hash_st_table_init(tmp, &identhash, RHASH_SIZE(hash));
identtable = RHASH_ST_TABLE(tmp);
rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
st_free_table(RHASH_ST_TABLE(hash));
RHASH_ST_TABLE_SET(hash, identtable);
RHASH_ST_CLEAR(tmp);
@ -4376,7 +4350,7 @@ VALUE
rb_ident_hash_new(void)
{
VALUE hash = rb_hash_new();
RHASH_ST_TABLE_SET(hash, st_init_table(&identhash));
hash_st_table_init(hash, &identhash, 0);
return hash;
}
@ -4384,7 +4358,7 @@ VALUE
rb_ident_hash_new_with_size(st_index_t size)
{
VALUE hash = rb_hash_new();
RHASH_ST_TABLE_SET(hash, st_init_table_with_size(&identhash, size));
hash_st_table_init(hash, &identhash, size);
return hash;
}
@ -4394,12 +4368,6 @@ rb_init_identtable(void)
return st_init_table(&identhash);
}
static st_table *
rb_init_identtable_with_size(st_index_t size)
{
return st_init_table_with_size(&identhash, size);
}
static int
any_p_i(VALUE key, VALUE value, VALUE arg)
{

View File

@ -104,6 +104,8 @@ st_table *rb_st_init_table(const struct st_hash_type *);
#define st_init_table rb_st_init_table
st_table *rb_st_init_table_with_size(const struct st_hash_type *, st_index_t);
#define st_init_table_with_size rb_st_init_table_with_size
st_table *rb_st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size);
#define st_init_existing_table_with_size rb_st_init_existing_table_with_size
st_table *rb_st_init_numtable(void);
#define st_init_numtable rb_st_init_numtable
st_table *rb_st_init_numtable_with_size(st_index_t);

View File

@ -40,6 +40,16 @@ enum ruby_rhash_flags {
RHASH_LEV_MAX = 127, /* 7 bits */
};
typedef struct ar_table_pair_struct {
VALUE key;
VALUE val;
} ar_table_pair;
typedef struct ar_table_struct {
/* 64bit CPU: 8B * 2 * 8 = 128B */
ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE];
} ar_table;
struct RHash {
struct RBasic basic;
const VALUE ifnone;
@ -47,13 +57,16 @@ struct RHash {
ar_hint_t ary[RHASH_AR_TABLE_MAX_SIZE];
VALUE word;
} ar_hint;
union {
st_table *st;
} as;
};
#define RHASH(obj) ((struct RHash *)(obj))
#ifndef MAX
# define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
#define RHASH_SLOT_SIZE (sizeof(struct RHash) + MAX(sizeof(ar_table), sizeof(st_table)))
#ifdef RHASH_IFNONE
# undef RHASH_IFNONE
#endif
@ -85,8 +98,6 @@ int rb_hash_stlike_foreach_with_replace(VALUE hash, st_foreach_check_callback_fu
int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg);
VALUE rb_ident_hash_new_with_size(st_index_t size);
size_t rb_hash_size_as_embedded(VALUE hash);
static inline unsigned RHASH_AR_TABLE_SIZE_RAW(VALUE h);
static inline VALUE RHASH_IFNONE(VALUE h);
static inline size_t RHASH_SIZE(VALUE h);
@ -126,13 +137,13 @@ RHASH_AR_TABLE_P(VALUE h)
static inline struct ar_table_struct *
RHASH_AR_TABLE(VALUE h)
{
return (struct ar_table_struct *)((uintptr_t)h + offsetof(struct RHash, as.st));
return (struct ar_table_struct *)((uintptr_t)h + sizeof(struct RHash));
}
static inline st_table *
RHASH_ST_TABLE(VALUE h)
{
return RHASH(h)->as.st;
return (st_table *)((uintptr_t)h + sizeof(struct RHash));
}
static inline VALUE
@ -173,7 +184,7 @@ RHASH_ST_SIZE(VALUE h)
static inline void
RHASH_ST_CLEAR(VALUE h)
{
RHASH(h)->as.st = NULL;
memset(RHASH_ST_TABLE(h), 0, sizeof(st_table));
FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG);
}

36
st.c
View File

@ -509,13 +509,9 @@ stat_col(void)
}
#endif
/* Create and return table with TYPE which can hold at least SIZE
entries. The real number of entries which the table can hold is
the nearest power of two for SIZE. */
st_table *
st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
{
st_table *tab;
int n;
#ifdef HASH_LOG
@ -536,11 +532,7 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
if (n < 0)
return NULL;
#endif
tab = (st_table *) malloc(sizeof (st_table));
#ifndef RUBY
if (tab == NULL)
return NULL;
#endif
tab->type = type;
tab->entry_power = n;
tab->bin_power = features[n].bin_power;
@ -569,6 +561,30 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
return tab;
}
/* Create and return table with TYPE which can hold at least SIZE
entries. The real number of entries which the table can hold is
the nearest power of two for SIZE. */
st_table *
st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
{
st_table *tab = malloc(sizeof(st_table));
#ifndef RUBY
if (tab == NULL)
return NULL;
#endif
#ifdef RUBY
st_init_existing_table_with_size(tab, type, size);
#else
if (st_init_existing_table_with_size(tab, type, size) == NULL) {
free(tab);
return NULL;
}
#endif
return tab;
}
size_t
st_table_size(const struct st_table *tbl)
{