From 7ba2506232d3fa6c4e82e3708c0ff746a1a8de5c Mon Sep 17 00:00:00 2001 From: Koichi Sasada Date: Fri, 15 Dec 2023 03:42:40 +0900 Subject: [PATCH] check modifcation whil ar->st * delete `ar_try_convert` but use `ar_force_convert_table` to make program simple. * `ar_force_convert_table` checks hash modification while calling `#hash` method with the following strategy: 1. copy keys (and vals) of ar_table 2. calc hashes from keys 3. check copied keys and hash's keys. if not matched, repeat from 1 fix [Bug #20050] --- hash.c | 93 +++++++++++++++++++++++++++++++---------------------- parser_st.c | 1 + st.c | 7 ++++ 3 files changed, 62 insertions(+), 39 deletions(-) diff --git a/hash.c b/hash.c index 787b824da2..5d210a5e6e 100644 --- a/hash.c +++ b/hash.c @@ -694,61 +694,75 @@ hash_ar_free_and_clear_table(VALUE hash) HASH_ASSERT(RHASH_AR_TABLE_BOUND(hash) == 0); } -static void -ar_try_convert_table(VALUE hash) +void rb_st_add_direct_with_hash(st_table *tab, st_data_t key, st_data_t value, st_hash_t hash); // st.c + +enum ar_each_key_type { + ar_each_key_copy, + ar_each_key_cmp, + ar_each_key_insert, +}; + +static inline int +ar_each_key(ar_table *ar, int max, enum ar_each_key_type type, st_data_t *dst_keys, st_table *new_tab, st_hash_t *hashes) { - if (!RHASH_AR_TABLE_P(hash)) return; + for (int i = 0; i < max; i++) { + ar_table_pair *pair = &ar->pairs[i]; - const unsigned size = RHASH_AR_TABLE_SIZE(hash); - - if (size < RHASH_AR_TABLE_MAX_SIZE) { - return; + switch (type) { + case ar_each_key_copy: + dst_keys[i] = pair->key; + break; + case ar_each_key_cmp: + if (dst_keys[i] != pair->key) return 1; + break; + case ar_each_key_insert: + if (UNDEF_P(pair->key)) continue; // deleted entry + rb_st_add_direct_with_hash(new_tab, pair->key, pair->val, hashes[i]); + break; + } } - st_table tab; - st_table *new_tab = &tab; - rb_st_init_existing_table_with_size(new_tab, &objhash, size * 2); - - for (st_index_t i = 0; i < RHASH_AR_TABLE_MAX_BOUND; i++) { - ar_table_pair *pair = RHASH_AR_TABLE_REF(hash, i); - st_add_direct(new_tab, pair->key, pair->val); - } - - hash_ar_free_and_clear_table(hash); - RHASH_ST_TABLE_SET(hash, new_tab); + return 0; } static st_table * ar_force_convert_table(VALUE hash, const char *file, int line) { - st_table *new_tab; - st_table tab; - new_tab = &tab; - if (RHASH_ST_TABLE_P(hash)) { return RHASH_ST_TABLE(hash); } + else { + ar_table *ar = RHASH_AR_TABLE(hash); + st_hash_t hashes[RHASH_AR_TABLE_MAX_SIZE]; + unsigned int bound, size; - RUBY_ASSERT(RHASH_AR_TABLE(hash)); - { - unsigned i, bound = RHASH_AR_TABLE_BOUND(hash); + // prepare hash values + do { + st_data_t keys[RHASH_AR_TABLE_MAX_SIZE]; + bound = RHASH_AR_TABLE_BOUND(hash); + size = RHASH_AR_TABLE_SIZE(hash); + ar_each_key(ar, bound, ar_each_key_copy, keys, NULL, NULL); - rb_st_init_existing_table_with_size(new_tab, &objhash, RHASH_AR_TABLE_SIZE(hash)); + for (unsigned int i = 0; i < bound; i++) { + // do_hash calls #hash method and it can modify hash object + hashes[i] = UNDEF_P(keys[i]) ? 0 : ar_do_hash(keys[i]); + } - for (i = 0; i < bound; i++) { - if (ar_cleared_entry(hash, i)) continue; + // check if modified + if (UNLIKELY(!RHASH_AR_TABLE_P(hash))) return RHASH_ST_TABLE(hash); + if (UNLIKELY(RHASH_AR_TABLE_BOUND(hash) != bound)) continue; + if (UNLIKELY(ar_each_key(ar, bound, ar_each_key_cmp, keys, NULL, NULL))) continue; + } while (0); - ar_table_pair *pair = RHASH_AR_TABLE_REF(hash, i); - st_add_direct(new_tab, pair->key, pair->val); - } + // make st + st_table tab; + st_table *new_tab = &tab; + rb_st_init_existing_table_with_size(new_tab, &objhash, size); + ar_each_key(ar, bound, ar_each_key_insert, NULL, new_tab, hashes); hash_ar_free_and_clear_table(hash); + RHASH_ST_TABLE_SET(hash, new_tab); + return RHASH_ST_TABLE(hash); } - - RHASH_ST_TABLE_SET(hash, new_tab); - - new_tab = RHASH_ST_TABLE(hash); - - return new_tab; } static int @@ -1638,7 +1652,7 @@ rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, if (RHASH_AR_TABLE_P(hash)) { int result = ar_update(hash, key, func, arg); if (result == -1) { - ar_try_convert_table(hash); + ar_force_convert_table(hash, __FILE__, __LINE__); } else { return result; @@ -4704,8 +4718,9 @@ rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val) if (ret != -1) { return ret; } - ar_try_convert_table(hash); + ar_force_convert_table(hash, __FILE__, __LINE__); } + tbl = RHASH_TBL_RAW(hash); return st_update(tbl, (st_data_t)key, add_new_i, (st_data_t)args); diff --git a/parser_st.c b/parser_st.c index c7a45d25e6..17f669e763 100644 --- a/parser_st.c +++ b/parser_st.c @@ -107,6 +107,7 @@ nonempty_memcpy(void *dest, const void *src, size_t n) #define st_get_key rb_parser_st_get_key #undef st_add_direct #define st_add_direct rb_parser_st_add_direct +#define rb_st_add_direct_with_hash rb_parser_st_add_direct_with_hash #undef st_insert2 #define st_insert2 rb_parser_st_insert2 #undef st_replace diff --git a/st.c b/st.c index 2825ed1eb6..b921b1bf3b 100644 --- a/st.c +++ b/st.c @@ -1173,6 +1173,13 @@ st_add_direct_with_hash(st_table *tab, } } +void +rb_st_add_direct_with_hash(st_table *tab, + st_data_t key, st_data_t value, st_hash_t hash) +{ + st_add_direct_with_hash(tab, key, value, hash); +} + /* Insert (KEY, VALUE) into table TAB. The table should not have entry with KEY before the insertion. */ void