From c94133a71d7691ad8e6aa5781475002f5676a550 Mon Sep 17 00:00:00 2001 From: bsrikanth-mariadb Date: Sat, 19 Apr 2025 18:02:51 -0400 Subject: [PATCH 1/9] MDEV-21510: In Optimizer Trace, print index name in chosen_access_method --- mysql-test/main/opt_trace.result | 13 ++++++++++ .../main/opt_trace_index_merge_innodb.result | 1 + mysql-test/main/opt_trace_selectivity.result | 1 + .../selectivity_innodb_notembedded.result | 1 + .../main/selectivity_notembedded.result | 1 + sql/opt_trace.cc | 25 ++++++++++++++----- 6 files changed, 36 insertions(+), 6 deletions(-) diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 1082cf1a492..5efe9010c22 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -1285,6 +1285,7 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b { ], "chosen_access_method": { "type": "ref", + "index": "a", "rows_read": 1, "rows_out": 1, "cost": 0.1821659, @@ -1340,6 +1341,7 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b { ], "chosen_access_method": { "type": "ref", + "index": "a", "rows_read": 1, "rows_out": 1, "cost": 0.1821659, @@ -2533,6 +2535,7 @@ explain select * from t1 where a=1 and b=2 order by c limit 1 { ], "chosen_access_method": { "type": "ref", + "index": "a_b", "rows_read": 41, "rows_out": 41, "cost": 0.051379171, @@ -3012,6 +3015,7 @@ explain select * from t1 left join t2 on t2.a=t1.a { ], "chosen_access_method": { "type": "eq_ref", + "index": "PRIMARY", "rows_read": 1, "rows_out": 1, "cost": 0.007120904, @@ -3980,6 +3984,7 @@ explain select * from t1 where pk = 2 and a=5 and b=1 { ], "chosen_access_method": { "type": "ref", + "index": "pk_a_b", "rows_read": 1, "rows_out": 1, "cost": 0.000928812, @@ -4676,6 +4681,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 { ], "chosen_access_method": { "type": "range", + "index": "a", "rows_read": 3, "rows_out": 3, "cost": 0.001755494, @@ -4702,6 +4708,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 { ], "chosen_access_method": { "type": "range", + "index": "a", "rows_read": 3, "rows_out": 3, "cost": 0.001755494, @@ -4744,6 +4751,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 { ], "chosen_access_method": { "type": "ref", + "index": "a", "rows_read": 1, "rows_out": 1, "cost": 0.002376836, @@ -4795,6 +4803,7 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 { ], "chosen_access_method": { "type": "ref", + "index": "a", "rows_read": 1.166666667, "rows_out": 1.166666667, "cost": 0.002392836, @@ -11275,6 +11284,7 @@ JS "chosen_access_method": { "type": "ref", + "index": "b", "rows_read": 1, "rows_out": 1, "cost": 0.01901531, @@ -11521,6 +11531,7 @@ JS "chosen_access_method": { "type": "ref", + "index": "a", "rows_read": 1, "rows_out": 1, "cost": 0.01840091, @@ -12906,6 +12917,7 @@ json_detailed(json_extract(trace, '$**.choose_best_splitting')) "chosen_access_method": { "type": "ref", + "index": "idx_a", "rows_read": 1.8367, "rows_out": 1.8367, "cost": 0.002051185, @@ -13201,6 +13213,7 @@ explain select * from t1 where a<10 and b between 10 and 50 and c < 10 { ], "chosen_access_method": { "type": "range", + "index": "a", "rows_read": 0.189, "rows_out": 0.017766, "cost": 0.006364199, diff --git a/mysql-test/main/opt_trace_index_merge_innodb.result b/mysql-test/main/opt_trace_index_merge_innodb.result index 02509aa9610..79d62df3846 100644 --- a/mysql-test/main/opt_trace_index_merge_innodb.result +++ b/mysql-test/main/opt_trace_index_merge_innodb.result @@ -227,6 +227,7 @@ explain select * from t1 where pk1 != 0 and key1 = 1 { ], "chosen_access_method": { "type": "ref", + "index": "key1", "rows_read": 1, "rows_out": 1, "cost": 0.00345856, diff --git a/mysql-test/main/opt_trace_selectivity.result b/mysql-test/main/opt_trace_selectivity.result index d6abad79637..26d5671f064 100644 --- a/mysql-test/main/opt_trace_selectivity.result +++ b/mysql-test/main/opt_trace_selectivity.result @@ -186,6 +186,7 @@ JS "chosen_access_method": { "type": "ref", + "index": "a", "rows_read": 6, "rows_out": 0.6, "cost": 0.005388489, diff --git a/mysql-test/main/selectivity_innodb_notembedded.result b/mysql-test/main/selectivity_innodb_notembedded.result index 8cb25772ee6..cf965d1c08c 100644 --- a/mysql-test/main/selectivity_innodb_notembedded.result +++ b/mysql-test/main/selectivity_innodb_notembedded.result @@ -298,6 +298,7 @@ JS "chosen_access_method": { "type": "range", + "index": "PRIMARY", "rows_read": 5, "rows_out": 2.490196078, "cost": 0.00948507, diff --git a/mysql-test/main/selectivity_notembedded.result b/mysql-test/main/selectivity_notembedded.result index ba7d72eca57..633e2b233c9 100644 --- a/mysql-test/main/selectivity_notembedded.result +++ b/mysql-test/main/selectivity_notembedded.result @@ -293,6 +293,7 @@ JS "chosen_access_method": { "type": "range", + "index": "PRIMARY", "rows_read": 5, "rows_out": 2.490196078, "cost": 0.010014472, diff --git a/sql/opt_trace.cc b/sql/opt_trace.cc index 93090acfb96..449146bbaf6 100644 --- a/sql/opt_trace.cc +++ b/sql/opt_trace.cc @@ -712,12 +712,25 @@ void print_best_access_for_table(THD *thd, POSITION *pos) DBUG_ASSERT(thd->trace_started()); Json_writer_object obj(thd, "chosen_access_method"); - obj. - add("type", pos->type == JT_ALL ? "scan" : join_type_str[pos->type]). - add("rows_read", pos->records_read). - add("rows_out", pos->records_out). - add("cost", pos->read_time). - add("uses_join_buffering", pos->use_join_buffer); + + obj.add("type", pos->type == JT_ALL ? "scan" : join_type_str[pos->type]); + + if (pos->type == JT_EQ_REF || pos->type == JT_REF || pos->type == JT_FT) + { + obj.add("index", pos->key->table->key_info[pos->key->key].name); + } + + if (pos->type == JT_RANGE) + { + obj.add("index", + pos->table->table->key_info[pos->table->quick->index].name); + } + + obj.add("rows_read", pos->records_read) + .add("rows_out", pos->records_out) + .add("cost", pos->read_time) + .add("uses_join_buffering", pos->use_join_buffer); + if (pos->range_rowid_filter_info) { uint key_no= pos->range_rowid_filter_info->get_key_no(); From da5a4d05b9da58705498a42b6ffa5d9211f446af Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Wed, 15 Jan 2025 09:18:12 +1100 Subject: [PATCH 2/9] MDEV-35850 make HOSTNAME a cmake configure variable As seen with openwrt and some other distros, the determination of hostname can sometime need alternate commmands. This provides a cmake option HOSTNAME for non-windows machines for the mariadb-install-db and mariadbd-safe scripts and the support-files init scripts.. --- CMakeLists.txt | 1 + scripts/CMakeLists.txt | 1 - support-files/CMakeLists.txt | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 56975f82b32..943d4b4f0c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -379,6 +379,7 @@ IF(WIN32) ELSE() SET(DEFAULT_MYSQL_HOME ${CMAKE_INSTALL_PREFIX}) SET(SHAREDIR ${INSTALL_MYSQLSHAREDIRABS}) + SET(HOSTNAME "uname -n" CACHE STRING "Command for determining hostname") ENDIF() SET(DEFAULT_BASEDIR "${DEFAULT_MYSQL_HOME}") diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 9e50ae833b1..5c463e52107 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -177,7 +177,6 @@ ELSE() SET(CHECK_PID "kill -s SIGCONT $PID > /dev/null 2> /dev/null") ENDIF() -SET(HOSTNAME "uname -n") SET(MYSQLD_USER "mysql") SET(MYSQLD_GROUP "mysql") ENDIF(UNIX) diff --git a/support-files/CMakeLists.txt b/support-files/CMakeLists.txt index ee1d420e126..a44ed21bdbe 100644 --- a/support-files/CMakeLists.txt +++ b/support-files/CMakeLists.txt @@ -31,7 +31,6 @@ ELSE() SET(MYSQLD_USER "mysql") SET(MYSQLD_GROUP "mysql") SET(ini_file_extension "cnf") - SET(HOSTNAME "uname -n") # Define directly here, as cmake/install_layout.cmake has no LOGDIR to be inherited SET(su_user "su mysql mysql") From 791fcea1d7938cf88bd4645c8f1f5b2e52d06306 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 13 May 2025 12:27:36 +0300 Subject: [PATCH 3/9] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 283afc0001c..a46cc7020c5 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=11 -MYSQL_VERSION_PATCH=12 +MYSQL_VERSION_PATCH=13 SERVER_MATURITY=stable From a7278a30248cb53173957558aaa3374a0753e6b8 Mon Sep 17 00:00:00 2001 From: Brandon Nesterenko Date: Mon, 28 Apr 2025 10:22:58 -0600 Subject: [PATCH 4/9] MDEV-36663: Testcase Fixup There were two issues with the test: 1. A race between a race_condition.inc and status variable, where the status variable Rpl_semi_sync_master_status could be ON before the semi-sync connection finished establishing, resulting in Rpl_semi_sync_master_clients showing 0 (instead of 1). To fix this, we simply instead wait for Rpl_semi_sync_master_clients to be 1 before proceeding. 2. Another race between a race_condition.inc and status variable, where the wait_condition waited on a process_list command of 'BINLOG DUMP' to disappear to infer the binlog dump thread was killed, to where we then verified semi-sync state was correct using status variables. However, the 'BINLOG DUMP' command is overridden with a killed status before the semi-sync tear-down happens, and thereby we could see invalid values. The fix for this is to change the wait_condition to instead wait for the connection with the replication user is gone, because that stays through the binlog dump thread tear-down life-cycle --- mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test b/mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test index cab9caf8ac4..e9d6e241f84 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test @@ -49,8 +49,8 @@ SET @@GLOBAL.rpl_semi_sync_slave_enabled= 1; --connection master --echo # Verify Semi-Sync is active ---let $status_var= Rpl_semi_sync_master_status ---let $status_var_value= ON +--let $status_var= Rpl_semi_sync_master_clients +--let $status_var_value= 1 --source include/wait_for_status_var.inc SHOW STATUS LIKE 'Rpl_semi_sync_master_clients'; @@ -67,7 +67,7 @@ STOP SLAVE; --echo # MDEV-36663: Verifying dump thread connection is killed.. # Prior to MDEV-36663 fixes, this would time out and # Rpl_semi_sync_master_clients would remain 1. ---let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.PROCESSLIST WHERE COMMAND = 'Binlog Dump' +--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.PROCESSLIST WHERE USER = 'replssl' --source include/wait_condition.inc --let $n_master_clients= query_get_value(SHOW STATUS LIKE 'Rpl_semi_sync_master_clients', Value, 1) From 0c18e5a2927369fd5acb2a4b6a333ad928bffd65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 8 May 2025 11:18:16 +0300 Subject: [PATCH 5/9] MDEV-36760 log_t::append_prepare_wait(): Bogus assertion on write_lsn log_t::append_prepare_wait(): Do not attempt to read log_sys.write_lsn because it is not protected by log_sys.latch but by write_lock, which we cannot hold here. The assertion could fail if log_t::write_buf() is executing concurrently, and it has not yet executed log_write_buf() or updated log_sys.write_lsn. Fixes up commit acd071f599f416ddb4821dec485c4d912844213f (MDEV-21923) --- storage/innobase/mtr/mtr0mtr.cc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 716dac624d5..3c984da3d5d 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -916,17 +916,16 @@ ATTRIBUTE_COLD void log_t::append_prepare_wait(bool late, bool ex) noexcept { got_ex: const uint64_t l= write_lsn_offset.load(std::memory_order_relaxed); - const lsn_t lsn{base_lsn.load(std::memory_order_relaxed)}; - ut_d(lsn_t ll= lsn + (l & (WRITE_BACKOFF - 1))); - ut_ad(is_mmap() - ? ll - get_flushed_lsn(std::memory_order_relaxed) < capacity() - : ll - write_lsn - ((write_size - 1) & (write_lsn - first_lsn)) < - buf_size); + const lsn_t lsn= base_lsn.load(std::memory_order_relaxed) + + (l & (WRITE_BACKOFF - 1)); waits++; #ifdef HAVE_PMEM const bool is_pmem{is_mmap()}; if (is_pmem) - persist(lsn + (l & (WRITE_BACKOFF - 1))); + { + ut_ad(lsn - get_flushed_lsn(std::memory_order_relaxed) < capacity()); + persist(lsn); + } #endif latch.wr_unlock(); /* write_buf() or persist() will clear the WRITE_BACKOFF flag, @@ -934,7 +933,7 @@ ATTRIBUTE_COLD void log_t::append_prepare_wait(bool late, bool ex) noexcept #ifdef HAVE_PMEM if (!is_pmem) #endif - log_write_up_to(lsn + (l & (WRITE_BACKOFF - 1)), false); + log_write_up_to(lsn, false); if (ex) { latch.wr_lock(SRW_LOCK_CALL); From 56e0be34bc5d1e967ad610a9b8e24c3f5553bdd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 13 May 2025 12:27:42 +0300 Subject: [PATCH 6/9] MDEV-36780: InnoDB buffer pool reserves all assigned memory In commit b6923420f326ac030e4f3ef89a2acddb45eccb30 (MDEV-29445) we started to specify the MAP_POPULATE flag for allocating the InnoDB buffer pool. This would cause a lot of time to be spent on __mm_populate() inside the Linux kernel, such as 16 seconds to pre-fault or commit innodb_buffer_pool_size=64G. Let us revert to the previous way of allocating the buffer pool at startup. Note: An attempt to increase the buffer pool size by SET GLOBAL innodb_buffer_pool_size (up to innodb_buffer_pool_size_max) will invoke my_virtual_mem_commit(), which will use MAP_POPULATE to zero-fill and prefault the requested additional memory area, blocking buf_pool.mutex. Before MDEV-29445 we allocated the InnoDB buffer pool by invoking mmap(2) once (via my_large_malloc()). After the change, we would invoke mmap(2) twice, first via my_virtual_mem_reserve() and then via my_virtual_mem_commit(). Outside Microsoft Windows, we are reverting back to my_large_malloc() like allocation. my_virtual_mem_reserve(): Define only for Microsoft Windows. Other platforms should invoke my_large_virtual_alloc() and update_malloc_size() instead of my_virtual_mem_reserve() and my_virtual_mem_commit(). my_large_virtual_alloc(): Define only outside Microsoft Windows. Do not specify MAP_NORESERVE nor MAP_POPULATE, to preserve compatibility with my_large_malloc(). Were MAP_POPULATE specified, the mmap() system call would be significantly slower, for example 18 seconds to reserve 64 GiB upfront. --- include/my_sys.h | 4 ++- include/my_virtual_mem.h | 2 ++ mysys/my_largepage.c | 46 +++------------------------------ mysys/my_virtual_mem.c | 12 +++------ storage/innobase/buf/buf0buf.cc | 8 ++++++ 5 files changed, 19 insertions(+), 53 deletions(-) diff --git a/include/my_sys.h b/include/my_sys.h index 5eca29b3274..23799f661c4 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -177,7 +177,9 @@ extern my_bool my_use_large_pages; int my_init_large_pages(void); uchar *my_large_malloc(size_t *size, myf my_flags); -#if defined _WIN32 || defined HAVE_MMAP +#ifdef _WIN32 +/* On Windows, use my_virtual_mem_reserve() and my_virtual_mem_commit(). */ +#else char *my_large_virtual_alloc(size_t *size); #endif void my_large_free(void *ptr, size_t size); diff --git a/include/my_virtual_mem.h b/include/my_virtual_mem.h index 56b2f03b329..8f9f6660e3b 100644 --- a/include/my_virtual_mem.h +++ b/include/my_virtual_mem.h @@ -24,7 +24,9 @@ extern "C" { #endif +# ifdef _WIN32 char *my_virtual_mem_reserve(size_t *size); +# endif char *my_virtual_mem_commit(char *ptr, size_t size); void my_virtual_mem_decommit(char *ptr, size_t size); void my_virtual_mem_release(char *ptr, size_t size); diff --git a/mysys/my_largepage.c b/mysys/my_largepage.c index 240c8e84fc7..22561ddc564 100644 --- a/mysys/my_largepage.c +++ b/mysys/my_largepage.c @@ -423,7 +423,7 @@ uchar *my_large_malloc(size_t *size, myf my_flags) DBUG_RETURN(ptr); } -#ifdef _WIN32 +#ifndef _WIN32 /** Special large pages allocator, with possibility to commit to allocating more memory later. @@ -434,37 +434,10 @@ char *my_large_virtual_alloc(size_t *size) char *ptr; DBUG_ENTER("my_large_virtual_alloc"); - if (my_use_large_pages) - { - size_t s= *size; - s= MY_ALIGN(s, (size_t) my_large_page_size); - ptr= VirtualAlloc(NULL, s, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, - PAGE_READWRITE); - if (ptr) - { - *size= s; - DBUG_RETURN(ptr); - } - } - - DBUG_RETURN(VirtualAlloc(NULL, *size, MEM_RESERVE, PAGE_READWRITE)); -} -#elif defined HAVE_MMAP -/** - Special large pages allocator, with possibility to commit to allocating - more memory later. - Every implementation returns a zero filled buffer here. -*/ -char *my_large_mmap(size_t *size, int prot) -{ - char *ptr; - DBUG_ENTER("my_large_virtual_alloc"); - if (my_use_large_pages) { size_t large_page_size; int page_i= 0; - prot= PROT_READ | PROT_WRITE; while ((large_page_size= my_next_large_page_size(*size, &page_i)) != 0) { @@ -488,7 +461,7 @@ char *my_large_mmap(size_t *size, int prot) OS_MAP_ANON; size_t aligned_size= MY_ALIGN(*size, (size_t) large_page_size); - ptr= mmap(NULL, aligned_size, prot, mapflag, -1, 0); + ptr= mmap(NULL, aligned_size, PROT_READ | PROT_WRITE, mapflag, -1, 0); if (ptr == (void*) -1) { ptr= NULL; @@ -511,10 +484,7 @@ char *my_large_mmap(size_t *size, int prot) } } - ptr= mmap(NULL, *size, prot, -# ifdef MAP_NORESERVE - MAP_NORESERVE | -# endif + ptr= mmap(NULL, *size, PROT_READ | PROT_WRITE, MAP_PRIVATE | OS_MAP_ANON, -1, 0); if (ptr == MAP_FAILED) { @@ -524,16 +494,6 @@ char *my_large_mmap(size_t *size, int prot) DBUG_RETURN(ptr); } - -/** - Special large pages allocator, with possibility to commit to allocating - more memory later. - Every implementation returns a zero filled buffer here. -*/ -char *my_large_virtual_alloc(size_t *size) -{ - return my_large_mmap(size, PROT_READ | PROT_WRITE); -} #endif /** diff --git a/mysys/my_virtual_mem.c b/mysys/my_virtual_mem.c index 47e3a29788a..649d8c693ff 100644 --- a/mysys/my_virtual_mem.c +++ b/mysys/my_virtual_mem.c @@ -34,13 +34,9 @@ We try to respect use_large_pages setting, on Windows and Linux */ -#ifndef _WIN32 -char *my_large_mmap(size_t *size, int prot); -#endif - +#ifdef _WIN32 char *my_virtual_mem_reserve(size_t *size) { -#ifdef _WIN32 DWORD flags= my_use_large_pages ? MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT : MEM_RESERVE; @@ -53,10 +49,8 @@ char *my_virtual_mem_reserve(size_t *size) my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), *size); } return ptr; -#else - return my_large_mmap(size, PROT_NONE); -#endif } +#endif #if defined _WIN32 && !defined DBUG_OFF static my_bool is_memory_committed(char *ptr, size_t size) @@ -88,7 +82,7 @@ char *my_virtual_mem_commit(char *ptr, size_t size) } #else if (my_use_large_pages) - /* my_large_mmap() already created a read/write mapping. */; + /* my_large_virtual_alloc() already created a read/write mapping. */; else { # ifdef _AIX diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 261a796141d..b2fde057c43 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1336,7 +1336,11 @@ bool buf_pool_t::create() noexcept retry: { NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; +#ifdef _WIN32 memory_unaligned= my_virtual_mem_reserve(&size); +#else + memory_unaligned= my_large_virtual_alloc(&size); +#endif } if (!memory_unaligned) @@ -1370,6 +1374,7 @@ bool buf_pool_t::create() noexcept #ifdef UNIV_PFS_MEMORY PSI_MEMORY_CALL(memory_alloc)(mem_key_buf_buf_pool, actual_size, &owner); #endif +#ifdef _WIN32 if (!my_virtual_mem_commit(memory, actual_size)) { my_virtual_mem_release(memory_unaligned, size_unaligned); @@ -1377,6 +1382,9 @@ bool buf_pool_t::create() noexcept memory_unaligned= nullptr; goto oom; } +#else + update_malloc_size(actual_size, 0); +#endif #ifdef HAVE_LIBNUMA if (srv_numa_interleave) From bb48d7bc812baf7cbd71c9e41b29fac6288cec97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 13 May 2025 12:27:46 +0300 Subject: [PATCH 7/9] MDEV-36781: Assertion i < BUF_BUDDY_SIZES failed in buf_buddy_shrink() buf_buddy_shrink(): Properly cover the case when KEY_BLOCK_SIZE corresponds to the innodb_page_size, that is, the ROW_FORMAT=COMPRESSED page frame is directly allocated from the buffer pool, not via the binary buddy allocator. buf_LRU_check_size_of_non_data_objects(): Avoid a crash when the buffer pool is being shrunk. buf_pool_t::shrink(): Abort if over 95% of the shrunk buffer pool would be occupied by the adaptive hash index or record locks. --- .../suite/innodb/r/innodb_buffer_pool_resize.result | 12 +++++++++++- .../suite/innodb/t/innodb_buffer_pool_resize.test | 11 ++++++++--- storage/innobase/buf/buf0buddy.cc | 13 ++++--------- storage/innobase/buf/buf0buf.cc | 3 +++ storage/innobase/buf/buf0lru.cc | 5 ++++- storage/innobase/include/buf0buddy.h | 2 +- 6 files changed, 31 insertions(+), 15 deletions(-) diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result index 5db74a71636..66b36f18bc8 100644 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result @@ -12,12 +12,19 @@ select @@innodb_buffer_pool_size; 10485760 create table t1 (id int primary key, val int not null) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; +create table t2 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=$kbs; SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR INSERT INTO t1 SELECT seq*4,seq*4 FROM seq_1_to_262144; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t2 SELECT seq*4,seq*4 FROM seq_1_to_16384; set global innodb_buffer_pool_size = 7340032; select count(val) from t1; count(val) 262144 +select count(val) from t2; +count(val) +16384 set global innodb_adaptive_hash_index=OFF; set global innodb_buffer_pool_size = 24117248; set global innodb_buffer_pool_size = 26214400; @@ -29,7 +36,10 @@ select @@innodb_buffer_pool_size; select count(val) from t1; count(val) 262144 -drop table t1; +select count(val) from t2; +count(val) +16384 +drop table t1,t2; SET GLOBAL innodb_max_purge_lag_wait = 0; SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test index 612a0c1be64..4cbbdba9974 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test @@ -21,6 +21,7 @@ set global innodb_buffer_pool_size = 9437184; set global innodb_buffer_pool_size = 10485760; select @@innodb_buffer_pool_size; +let $kbs=`SELECT CAST(@@innodb_page_size / 1024 AS INT)`; # fill buffer pool --disable_query_log @@ -29,9 +30,13 @@ SET GLOBAL innodb_read_only_compressed=OFF; --enable_query_log create table t1 (id int primary key, val int not null) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; +evalp create table t2 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=$kbs; SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR INSERT INTO t1 SELECT seq*4,seq*4 FROM seq_1_to_262144; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t2 SELECT seq*4,seq*4 FROM seq_1_to_16384; --disable_query_log SET GLOBAL innodb_read_only_compressed=@save_innodb_read_only_compressed; @@ -42,6 +47,7 @@ SET GLOBAL innodb_read_only_compressed=@save_innodb_read_only_compressed; set global innodb_buffer_pool_size = 7340032; select count(val) from t1; +select count(val) from t2; set global innodb_adaptive_hash_index=OFF; @@ -52,8 +58,9 @@ set global innodb_buffer_pool_size = 26214400; select @@innodb_buffer_pool_size; select count(val) from t1; +select count(val) from t2; -drop table t1; +drop table t1,t2; SET GLOBAL innodb_max_purge_lag_wait = 0; SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; @@ -66,8 +73,6 @@ SELECT variable_value = 0 FROM information_schema.global_status WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; --source include/wait_condition.inc -# this may occasionally be aborted on a heavily loaded builder ---error 0,ER_WRONG_USAGE SET GLOBAL innodb_buffer_pool_size = @old_innodb_buffer_pool_size; SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index 6da47d84307..a56f9047eea 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -637,7 +637,7 @@ func_exit: buf_buddy_add_to_free(reinterpret_cast(buf), i); } -/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::resize(). +/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::shrink(). @param bpage page descriptor covering a ROW_FORMAT=COMPRESSED page @param block uncompressed block for storage @return block @@ -672,10 +672,9 @@ buf_block_t *buf_buddy_shrink(buf_page_t *bpage, buf_block_t *block) noexcept bpage->zip.data= static_cast(dst); buf_pool.buddy_stat[i].relocated++; - for (;;) + while (i < BUF_BUDDY_SIZES) { MEM_UNDEFINED(src, BUF_BUDDY_LOW << i); - ut_ad(i < BUF_BUDDY_SIZES); /* Try to combine adjacent blocks. */ buf_buddy_free_t *buddy= reinterpret_cast (buf_buddy_get(static_cast(src), BUF_BUDDY_LOW << i)); @@ -684,20 +683,16 @@ buf_block_t *buf_buddy_shrink(buf_page_t *bpage, buf_block_t *block) noexcept { ut_ad(!buf_pool.contains_zip(src, BUF_BUDDY_LOW_SHIFT + i)); buf_buddy_add_to_free(static_cast(src), i); - break; + return block; } /* The buddy is free: recombine */ buf_buddy_remove_from_free(buddy, i); i++; src= ut_align_down(src, BUF_BUDDY_LOW << i); - if (i == BUF_BUDDY_SIZES) - { - buf_buddy_block_free(src); - break; - } } + buf_buddy_block_free(src); return block; } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index b2fde057c43..f9769f3fb18 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1796,6 +1796,9 @@ ATTRIBUTE_COLD buf_pool_t::shrink_status buf_pool_t::shrink(size_t size) goto next; } + if (UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < usable_size() / 20) + return SHRINK_ABORT; + mysql_mutex_lock(&flush_list_mutex); if (LRU_warned && !UT_LIST_GET_FIRST(free)) diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 1f5d6eab259..bbf2014608d 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -276,7 +276,10 @@ static void buf_LRU_check_size_of_non_data_objects() noexcept auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); - if (s < curr_size / 20) + if (s >= curr_size / 20); + else if (buf_pool.is_shrinking()) + buf_pool.LRU_warn(); + else { sql_print_error("[FATAL] InnoDB: Over 95 percent of the buffer pool is" " occupied by lock heaps" diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h index 9ac26c7d4be..11c42307b47 100644 --- a/storage/innobase/include/buf0buddy.h +++ b/storage/innobase/include/buf0buddy.h @@ -76,7 +76,7 @@ inline void buf_buddy_free(void* buf, ulint size) noexcept } ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result)) -/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::resize(). +/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::shrink(). @param bpage page descriptor covering a ROW_FORMAT=COMPRESSED page @param block uncompressed block for storage @return block From 8fb09426b98583916ccfd4f8c49741adc115bac3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 13 May 2025 12:27:50 +0300 Subject: [PATCH 8/9] MDEV-36759: Huge performance drop In commit b6923420f326ac030e4f3ef89a2acddb45eccb30 (MDEV-29445) some hash tables were accidentally created with the minimum size (101 entries) instead of correctly deriving the size from the initial innodb_buffer_pool_size. This led to very long hash bucket chains, which are very slow to traverse. ut_find_prime(): Assert that the size is nonzero in order to catch this type of regression in the future. innodb_init_params(): Do not bother reading buf_pool.curr_size() when it is known to be 0, srv_start(): Correctly initialize srv_lock_table_size to 5 times buf_pool.curr_size(), that is, the buffer pool size in pages, between invoking buf_pool.create() and lock_sys.create(). btr_search_enable(), dict_sys_t::create(), dict_sys_t::resize(): Correctly refer to buf_pool.curr_pool_size(), that is, innodb_buffer_pool_size in bytes, when calculating the hash table size. In MDEV-29445 the expressions buf_pool_get_curr_size() were accidentally replaced with buf_pool.curr_size(). --- storage/innobase/btr/btr0sea.cc | 2 +- storage/innobase/dict/dict0dict.cc | 4 ++-- storage/innobase/handler/ha_innodb.cc | 1 - storage/innobase/srv/srv0start.cc | 2 +- storage/innobase/ut/ut0rnd.cc | 2 ++ 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index 75256526a32..4bfea0f3100 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -276,7 +276,7 @@ ATTRIBUTE_COLD void btr_search_enable(bool resize) } btr_search_x_lock_all(); - ulint hash_size = buf_pool.curr_size() / sizeof(void *) / 64; + ulint hash_size = buf_pool.curr_pool_size() / sizeof(void *) / 64; if (btr_search_sys.parts[0].heap) { ut_ad(btr_search_enabled); diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index a155ce4c0e1..7d6b704f086 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -933,7 +933,7 @@ void dict_sys_t::create() noexcept UT_LIST_INIT(table_LRU, &dict_table_t::table_LRU); UT_LIST_INIT(table_non_LRU, &dict_table_t::table_LRU); - const ulint hash_size = buf_pool.curr_size() + const ulint hash_size = buf_pool.curr_pool_size() / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); table_hash.create(hash_size); @@ -4399,7 +4399,7 @@ void dict_sys_t::resize() noexcept table_id_hash.free(); temp_id_hash.free(); - const ulint hash_size = buf_pool.curr_size() + const ulint hash_size = buf_pool.curr_pool_size() / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); table_hash.create(hash_size); table_id_hash.create(hash_size); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f1c48ccfcb7..d03ef08d1db 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4075,7 +4075,6 @@ static int innodb_init_params() #else ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC); #endif - srv_lock_table_size = 5 * buf_pool.curr_size(); DBUG_RETURN(0); } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 3577cd7b118..ba8b202191d 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1311,7 +1311,7 @@ dberr_t srv_start(bool create_new_db) log_sys.create(); recv_sys.create(); - lock_sys.create(srv_lock_table_size); + lock_sys.create(srv_lock_table_size = 5 * buf_pool.curr_size()); srv_startup_is_before_trx_rollback_phase = true; diff --git a/storage/innobase/ut/ut0rnd.cc b/storage/innobase/ut/ut0rnd.cc index a2e569514cb..0e0e0004bb1 100644 --- a/storage/innobase/ut/ut0rnd.cc +++ b/storage/innobase/ut/ut0rnd.cc @@ -48,6 +48,8 @@ ut_find_prime( ulint pow2; ulint i; + ut_ad(n); + n += 100; pow2 = 1; From f5b5de9cf9e4b68cbfee863e1453e66635cba1c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 13 May 2025 13:43:53 +0300 Subject: [PATCH 9/9] bump the VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0a37eafcf46..f046bd823a0 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=11 MYSQL_VERSION_MINOR=4 -MYSQL_VERSION_PATCH=6 +MYSQL_VERSION_PATCH=7 SERVER_MATURITY=stable