From 4bf7c966b362eb9767266f04b04388aa77e22b36 Mon Sep 17 00:00:00 2001 From: Monty Date: Sat, 27 Jul 2024 13:34:26 +0300 Subject: [PATCH] MDEV-34664: Add an option to fix InnoDB's doubling of secondary index cardinalities (With trivial fixes by sergey@mariadb.com) Added option fix_innodb_cardinality to optimizer_adjust_secondary_key_costs Using fix_innodb_cardinality disables the 'divide by 2' of rec_per_key_int in InnoDB that in effect doubles the Cardinality for secondary keys. This has the biggest effect for indexes where a few rows has the same key value. Using this may also cause table scans for very small tables (which in some cases may be better than an index scan). The user visible effect is that 'SHOW INDEX FROM table_name' will for InnoDB show the true Cardinality (and not 2x the real value). It will also allow the optimizer to chose a better index in some cases as the division by 2 could have a bad effect for tables with 2-5 identical values per key. A few notes about using fix_innodb_cardinality: - It has direct affect for SHOW INDEX FROM table_name. SHOW INDEX will also update the statistics in table share. - The effect of fix_innodb_cardinality for query plans or EXPLAIN is only visible after first open of the table. This is why one must do a flush tables or use SHOW INDEX for the option to take effect. - Using fix_innodb_cardinality can thus affect all user in their query plans if they are using the same tables. Because of this, it is strongly recommended that one uses optimizer_adjust_secondary_key_costs=fix_innodb_cardinality mainly in configuration files to not cause issues for other users. --- mysql-test/main/mysqld--help.result | 7 ++- mysql-test/main/secondary_key_costs.result | 63 +++++++++++++++++++ mysql-test/main/secondary_key_costs.test | 38 +++++++++++ .../sys_vars/r/sysvars_server_embedded.result | 4 +- .../r/sysvars_server_notembedded.result | 4 +- sql/sql_class.cc | 11 ++++ sql/sql_class.h | 1 + sql/sql_priv.h | 1 + sql/sys_vars.cc | 15 +++-- storage/innobase/handler/ha_innodb.cc | 3 +- 10 files changed, 133 insertions(+), 14 deletions(-) diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 79c0d7005f2..cabf8037508 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -719,9 +719,10 @@ The following specify which files/extra groups are read (specified before remain costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic - forced index in GROUP BY. This variable will be deleted - in MariaDB 11.0 as it is not needed with the new 11.0 - optimizer. + forced index in GROUP BY. fix_innodb_cardinality = + Disable doubling of the Cardinality for InnoDB secondary + keys. This variable will be deleted in MariaDB 11.0 as it + is not needed with the new 11.0 optimizer. Use 'ALL' to set all combinations. --optimizer-max-sel-arg-weight=# The maximum weight of the SEL_ARG graph. Set to 0 for no diff --git a/mysql-test/main/secondary_key_costs.result b/mysql-test/main/secondary_key_costs.result index dbdaaa3ed95..b246b666115 100644 --- a/mysql-test/main/secondary_key_costs.result +++ b/mysql-test/main/secondary_key_costs.result @@ -115,3 +115,66 @@ b sum(d) 6 125005000 8 125015000 drop table t1; +# +# MDEV-34664: fix_innodb_cardinality +# +set @save_userstat=@@global.userstat; +set @save_ispsp=@@global.innodb_stats_persistent_sample_pages; +set @@global.innodb_stats_persistent_sample_pages=20; +set @@global.userstat=on; +set use_stat_tables=PREFERABLY_FOR_QUERIES; +create or replace table t1 (a int primary key, b int, c int, d int, key(b,c,d)) engine=innodb; +insert into t1 select seq,seq/100,seq/60,seq/10 from seq_1_to_1000; +create or replace table t2 (a int); +insert into t2 values (1),(2),(3); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select count(distinct b),count(distinct b,c), count(distinct b,c,d) from t1; +count(distinct b) count(distinct b,c) count(distinct b,c,d) +11 25 125 +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Ignored +t1 0 PRIMARY 1 a A 1000 NULL NULL BTREE NO +t1 1 b 1 b A 22 NULL NULL YES BTREE NO +t1 1 b 2 c A 50 NULL NULL YES BTREE NO +t1 1 b 3 d A 250 NULL NULL YES BTREE NO +explain select * from t1,t2 where t1.b=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 Using where +1 SIMPLE t1 ref b b 5 test.t2.a 45 Using index +set @@optimizer_adjust_secondary_key_costs=8; +explain select * from t1,t2 where t1.b=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 Using where +1 SIMPLE t1 ref b b 5 test.t2.a 45 Using index +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Ignored +t1 0 PRIMARY 1 a A 1000 NULL NULL BTREE NO +t1 1 b 1 b A 11 NULL NULL YES BTREE NO +t1 1 b 2 c A 25 NULL NULL YES BTREE NO +t1 1 b 3 d A 125 NULL NULL YES BTREE NO +flush tables; +explain select * from t1,t2 where t1.b=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 Using where +1 SIMPLE t1 ref b b 5 test.t2.a 90 Using index +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Ignored +t1 0 PRIMARY 1 a A 1000 NULL NULL BTREE NO +t1 1 b 1 b A 11 NULL NULL YES BTREE NO +t1 1 b 2 c A 25 NULL NULL YES BTREE NO +t1 1 b 3 d A 125 NULL NULL YES BTREE NO +connect user2, localhost, root,,; +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Ignored +t1 0 PRIMARY 1 a A 1000 NULL NULL BTREE NO +t1 1 b 1 b A 22 NULL NULL YES BTREE NO +t1 1 b 2 c A 50 NULL NULL YES BTREE NO +t1 1 b 3 d A 250 NULL NULL YES BTREE NO +connection default; +disconnect user2; +drop table t1,t2; +set global userstat=@save_userstat; +set global innodb_stats_persistent_sample_pages=@save_ispsp; +set @@optimizer_adjust_secondary_key_costs=default; diff --git a/mysql-test/main/secondary_key_costs.test b/mysql-test/main/secondary_key_costs.test index bf662d4078d..32e30156a08 100644 --- a/mysql-test/main/secondary_key_costs.test +++ b/mysql-test/main/secondary_key_costs.test @@ -1,6 +1,8 @@ --source include/have_sequence.inc --source include/not_embedded.inc --source include/have_innodb.inc +# Testcase for MDEV-33306 takes ~6 minutes with valgrind: +--source include/not_valgrind.inc # # Show the costs for rowid filter @@ -72,3 +74,39 @@ set @@optimizer_adjust_secondary_key_costs="disable_forced_index_in_group_by"; explain select b, sum(d) from t1 where c=0 group by b; select b, sum(d) from t1 where c=0 group by b; drop table t1; + +--echo # +--echo # MDEV-34664: fix_innodb_cardinality +--echo # + +set @save_userstat=@@global.userstat; +set @save_ispsp=@@global.innodb_stats_persistent_sample_pages; +set @@global.innodb_stats_persistent_sample_pages=20; +set @@global.userstat=on; +set use_stat_tables=PREFERABLY_FOR_QUERIES; + +create or replace table t1 (a int primary key, b int, c int, d int, key(b,c,d)) engine=innodb; +insert into t1 select seq,seq/100,seq/60,seq/10 from seq_1_to_1000; +create or replace table t2 (a int); +insert into t2 values (1),(2),(3); +analyze table t1; +select count(distinct b),count(distinct b,c), count(distinct b,c,d) from t1; +show index from t1; +explain select * from t1,t2 where t1.b=t2.a; +set @@optimizer_adjust_secondary_key_costs=8; +explain select * from t1,t2 where t1.b=t2.a; +show index from t1; +# Flush tables or show index is needed to refresh the data in table share +flush tables; +explain select * from t1,t2 where t1.b=t2.a; +show index from t1; +# Check that the option does not affect other usage +connect (user2, localhost, root,,); +show index from t1; +connection default; +disconnect user2; +drop table t1,t2; +set global userstat=@save_userstat; +set global innodb_stats_persistent_sample_pages=@save_ispsp; + +set @@optimizer_adjust_secondary_key_costs=default; diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index b05f4d494e9..b5c47ac5ecd 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -2275,11 +2275,11 @@ COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS VARIABLE_SCOPE SESSION VARIABLE_TYPE SET -VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. +VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by +ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 9c9384f9533..59b9cbb8d96 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -2435,11 +2435,11 @@ COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS VARIABLE_SCOPE SESSION VARIABLE_TYPE SET -VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. +VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by +ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_MAX_SEL_ARGS diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 6419a58fbe4..539ab77db63 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5374,6 +5374,17 @@ extern "C" int thd_current_status(MYSQL_THD thd) } +extern "C" int thd_double_innodb_cardinality(MYSQL_THD thd) +{ + /* + The original behavior was to double the cardinality. + OPTIMIZER_FIX_INNODB_CARDINALITY means do not double. + */ + return !(thd->variables.optimizer_adjust_secondary_key_costs & + OPTIMIZER_FIX_INNODB_CARDINALITY); +} + + extern "C" enum enum_server_command thd_current_command(MYSQL_THD thd) { return thd->get_command(); diff --git a/sql/sql_class.h b/sql/sql_class.h index a9d89871025..a347700f72f 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -214,6 +214,7 @@ extern "C" const char *thd_client_ip(MYSQL_THD thd); extern "C" LEX_CSTRING *thd_current_db(MYSQL_THD thd); extern "C" int thd_current_status(MYSQL_THD thd); extern "C" enum enum_server_command thd_current_command(MYSQL_THD thd); +extern "C" int thd_double_innodb_cardinality(MYSQL_THD thd); /** @class CSET_STRING diff --git a/sql/sql_priv.h b/sql/sql_priv.h index 7a7a0241ab0..0bcb09a80ad 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -274,6 +274,7 @@ #define OPTIMIZER_ADJ_SEC_KEY_COST (1) #define OPTIMIZER_ADJ_DISABLE_MAX_SEEKS (2) #define OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY (4) +#define OPTIMIZER_FIX_INNODB_CARDINALITY (8) /* Replication uses 8 bytes to store SQL_MODE in the binary log. The day you diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 86c1b50e9af..42ec52b38a9 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2823,18 +2823,21 @@ static Sys_var_ulong Sys_optimizer_trace_max_mem_size( */ static const char *adjust_secondary_key_cost[]= { - "adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by", 0 + "adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by", "fix_innodb_cardinality",0 }; static Sys_var_set Sys_optimizer_adjust_secondary_key_costs( "optimizer_adjust_secondary_key_costs", "A bit field with the following values: " - "adjust_secondary_key_cost = Update secondary key costs for ranges to be at least " - "5x of clustered primary key costs. " - "disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight " - "adjustment of filter cost. " - "disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. " + "adjust_secondary_key_cost = Update secondary key costs for ranges to be " + "at least 5x of clustered primary key costs. " + "disable_max_seek = Disable 'max_seek optimization' for secondary keys and " + "slight adjustment of filter cost. " + "disable_forced_index_in_group_by = Disable automatic forced index in " + "GROUP BY. " + "fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB " + "secondary keys. " "This variable will be deleted in MariaDB 11.0 as it is not needed with the " "new 11.0 optimizer.", SESSION_VAR(optimizer_adjust_secondary_key_costs), CMD_LINE(REQUIRED_ARG), diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 790274f46b3..b99d5873bf5 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -15013,7 +15013,8 @@ ha_innobase::info_low( index selectivity is 2 times better than our estimate: */ - rec_per_key_int = rec_per_key_int / 2; + rec_per_key_int /= 1 + + thd_double_innodb_cardinality(m_user_thd); if (rec_per_key_int == 0) { rec_per_key_int = 1;