From 543ebbcf8e07b3c84e7a920ffa597aa81f3655b6 Mon Sep 17 00:00:00 2001 From: Jitesh Chawla Date: Sun, 9 Mar 2025 19:39:27 +0530 Subject: [PATCH] MDEV-35876 - speedup collation/charset lookup Replaces O(n) linear scans for collation lookups with O(1) hash lookups to eliminate performance bottlenecks as collation counts grow. --- mysys/charset.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/mysys/charset.c b/mysys/charset.c index 9a18e48c2b0..69132d574f7 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -30,6 +30,7 @@ #endif static HASH charset_name_hash; +static HASH collation_name_hash; /* The code below implements this functionality: @@ -44,17 +45,9 @@ static HASH charset_name_hash; static uint get_collation_number_internal(const char *name) { - - CHARSET_INFO **cs; - for (cs= all_charsets; - cs < all_charsets + array_elements(all_charsets); - cs++) - { - if (cs[0] && cs[0]->coll_name.str && - !my_strcasecmp_latin1(cs[0]->coll_name.str, name)) - return cs[0]->number; - } - return 0; + CHARSET_INFO *cs= (CHARSET_INFO*) my_hash_search(&collation_name_hash, + (uchar*) name, strlen(name)); + return cs ? cs->number : 0; } @@ -805,6 +798,14 @@ static const uchar *get_charset_key(const void *object, size_t *size, return (const uchar*) cs->cs_name.str; } +static const uchar *get_collation_key(const void *object, size_t *length, + my_bool not_used __attribute__((unused))) +{ + CHARSET_INFO *cs= (CHARSET_INFO*) object; + *length= cs->coll_name.length; + return (const uchar*) cs->coll_name.str; +} + static void init_available_charsets(void) { char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; @@ -819,6 +820,10 @@ static void init_available_charsets(void) &my_charset_latin1, 64, 0, 0, get_charset_key, 0, 0, HASH_UNIQUE); + my_hash_init2(key_memory_charsets, &collation_name_hash, 16, + &my_charset_latin1, 64, 0, 0, get_collation_key, + 0, 0, HASH_UNIQUE); + init_compiled_charsets(MYF(0)); /* Copy compiled charsets */ @@ -839,6 +844,18 @@ static void init_available_charsets(void) my_charset_loader_init_mysys(&loader); strmov(get_charsets_dir(fname), MY_CHARSET_INDEX); my_read_charset_file(&loader, fname, MYF(0)); + + /* Populate the collation_name_hash */ + for (cs= (struct charset_info_st**) all_charsets; + cs < (struct charset_info_st**) all_charsets + + array_elements(all_charsets); + cs++) + { + CHARSET_INFO *c= *cs; + if (c && c->coll_name.str) + my_hash_insert(&collation_name_hash, (uchar*) c); + } + DBUG_VOID_RETURN; } @@ -847,6 +864,7 @@ void free_charsets(void) { charsets_initialized= charsets_template; my_hash_free(&charset_name_hash); + my_hash_free(&collation_name_hash); }