From 4941ac9192394a1489f0bf01b6dd2ee5ec2906c9 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Tue, 19 Sep 2023 18:22:49 +0300 Subject: [PATCH] MDEV-32113: utf8mb3_key_col=utf8mb4_value cannot be used for ref (Variant#3: Allow cross-charset comparisons, use a special CHARSET_INFO to create lookup keys. Review input addressed.) Equalities that compare utf8mb{3,4}_general_ci strings, like: WHERE ... utf8mb3_key_col=utf8mb4_value (MB3-4-CMP) can now be used to construct ref[const] access and also participate in multiple-equalities. This means that utf8mb3_key_col can be used for key-lookups when compared with an utf8mb4 constant, field or expression using '=' or '<=>' comparison operators. This is controlled by optimizer_switch='cset_narrowing=on', which is OFF by default. IMPLEMENTATION Item value comparison in (MB3-4-CMP) is done using utf8mb4_general_ci. This is valid as any utf8mb3 value is also an utf8mb4 value. When making index lookup value for utf8mb3_key_col, we do "Charset Narrowing": characters that are in the Basic Multilingual Plane (=BMP) are copied as-is, as they can be represented in utf8mb3. Characters that are outside the BMP cannot be represented in utf8mb3 and are replaced with U+FFFD, the "Replacement Character". In utf8mb4_general_ci, the Replacement Character compares as equal to any character that's not in BMP. Because of this, the constructed lookup value will find all index records that would be considered equal by the original condition (MB3-4-CMP). Approved-by: Monty --- include/m_ctype.h | 3 + libmysqld/CMakeLists.txt | 1 + mysql-test/main/cset_narrowing.result | 470 ++++++++++++++++++ mysql-test/main/cset_narrowing.test | 150 ++++++ mysql-test/main/mysqld--help.result | 2 +- .../main/mysqltest_tracking_info.result | 2 +- .../sys_vars/r/optimizer_switch_basic.result | 32 +- .../sys_vars/r/sysvars_server_embedded.result | 2 +- .../r/sysvars_server_notembedded.result | 2 +- sql/CMakeLists.txt | 4 +- sql/cset_narrowing.cc | 35 ++ sql/cset_narrowing.h | 143 ++++++ sql/field.cc | 15 +- sql/item.cc | 26 +- sql/item.h | 41 +- sql/item_cmpfunc.cc | 34 +- sql/opt_range.cc | 19 +- sql/sql_priv.h | 1 + sql/sql_select.cc | 3 +- sql/sql_select.h | 34 +- sql/sql_type.h | 8 +- sql/sys_vars.cc | 1 + strings/ctype-utf8.c | 12 + 23 files changed, 1001 insertions(+), 39 deletions(-) create mode 100644 mysql-test/main/cset_narrowing.result create mode 100644 mysql-test/main/cset_narrowing.test create mode 100644 sql/cset_narrowing.cc create mode 100644 sql/cset_narrowing.h diff --git a/include/m_ctype.h b/include/m_ctype.h index cda43d81b39..d5a274367a8 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -1711,6 +1711,9 @@ my_well_formed_length(CHARSET_INFO *cs, const char *b, const char *e, #include "t_ctype.h" #endif +int my_wc_mb_utf8mb4_bmp_only(CHARSET_INFO *cs, my_wc_t wc, uchar *r, + uchar *e); + #ifdef __cplusplus } #endif diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt index 6cff07ee132..b414903f705 100644 --- a/libmysqld/CMakeLists.txt +++ b/libmysqld/CMakeLists.txt @@ -50,6 +50,7 @@ ENDIF() SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc libmysql.c ../sql-common/errmsg.c ../sql-common/client.c + ../sql/cset_narrowing.cc ../sql-common/my_user.c ../sql-common/pack.c ../sql-common/client_plugin.c ../sql/password.c ../sql/discover.cc ../sql/derror.cc diff --git a/mysql-test/main/cset_narrowing.result b/mysql-test/main/cset_narrowing.result new file mode 100644 index 00000000000..b7cb32bc86c --- /dev/null +++ b/mysql-test/main/cset_narrowing.result @@ -0,0 +1,470 @@ +set +@tmp_csetn_os= @@optimizer_switch, +optimizer_switch='cset_narrowing=on'; +set names utf8mb4; +create table t1 ( +mb3name varchar(32), +mb3 varchar(32) collate utf8mb3_general_ci, +key(mb3) +); +insert into t1 select seq, seq from seq_1_to_10000; +insert into t1 values ('mb3-question-mark', '?'); +insert into t1 values ('mb3-replacement-char', _utf8mb3 0xEFBFBD); +create table t10 ( +pk int auto_increment primary key, +mb4name varchar(32), +mb4 varchar(32) character set utf8mb4 collate utf8mb4_general_ci +); +insert into t10 (mb4name, mb4) values +('mb4-question-mark','?'), +('mb4-replacement-char', _utf8mb4 0xEFBFBD), +('mb4-smiley', _utf8mb4 0xF09F988A), +('1', '1'); +analyze table t1,t10 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +test.t10 analyze status Engine-independent statistics collected +test.t10 analyze status OK +# +# Check that constants are already handled: the following should use +# ref/range, because constants are converted into utf8mb3. +# +select collation('abc'); +collation('abc') +utf8mb4_general_ci +explain select * from t1 force index (mb3) where t1.mb3='abc'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref mb3 mb3 99 const 1 Using index condition +explain select * from t1 force index (mb3) where t1.mb3 in ('abc','cde','xyz'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range mb3 mb3 99 NULL 3 Using index condition +explain select * from t1 force index (mb3) where t1.mb3 between 'abc' and 'acc'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range mb3 mb3 99 NULL 1 Using index condition +explain select * from t1 force index (mb3) where t1.mb3 <'000'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range mb3 mb3 99 NULL 1 Using index condition +# If a constant can't be represented in utf8mb3, an error is produced: +explain select * from t1 force index (mb3) where t1.mb3='😊'; +ERROR HY000: Illegal mix of collations (utf8mb3_general_ci,IMPLICIT) and (utf8mb4_general_ci,COERCIBLE) for operation '=' +# +# Check ref access on mb3_field=mb4_field +# +explain format=json +select * from t10,t1 where t10.mb4=t1.mb3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t10", + "access_type": "ALL", + "rows": 4, + "filtered": 100, + "attached_condition": "t10.mb4 is not null" + }, + "table": { + "table_name": "t1", + "access_type": "ref", + "possible_keys": ["mb3"], + "key": "mb3", + "key_length": "99", + "used_key_parts": ["mb3"], + "ref": ["test.t10.mb4"], + "rows": 1, + "filtered": 100, + "index_condition": "t1.mb3 = t10.mb4" + } + } +} +select * from t10,t1 where t10.mb4=t1.mb3; +pk mb4name mb4 mb3name mb3 +1 mb4-question-mark ? mb3-question-mark ? +2 mb4-replacement-char � mb3-replacement-char � +3 mb4-smiley 😊 mb3-replacement-char � +4 1 1 1 1 +select * from t10,t1 use index() where t10.mb4=t1.mb3; +pk mb4name mb4 mb3name mb3 +4 1 1 1 1 +1 mb4-question-mark ? mb3-question-mark ? +2 mb4-replacement-char � mb3-replacement-char � +3 mb4-smiley 😊 mb3-replacement-char � +explain format=json +select * from t10,t1 where t10.mb4<=>t1.mb3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t10", + "access_type": "ALL", + "rows": 4, + "filtered": 100 + }, + "table": { + "table_name": "t1", + "access_type": "ref", + "possible_keys": ["mb3"], + "key": "mb3", + "key_length": "99", + "used_key_parts": ["mb3"], + "ref": ["test.t10.mb4"], + "rows": 1, + "filtered": 100, + "index_condition": "t10.mb4 <=> t1.mb3" + } + } +} +select * from t10,t1 where t10.mb4<=>t1.mb3; +pk mb4name mb4 mb3name mb3 +1 mb4-question-mark ? mb3-question-mark ? +2 mb4-replacement-char � mb3-replacement-char � +3 mb4-smiley 😊 mb3-replacement-char � +4 1 1 1 1 +set statement optimizer_switch='cset_narrowing=off', join_cache_level=0 for +explain format=json +select * from t10,t1 where t10.mb4=t1.mb3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t10", + "access_type": "ALL", + "rows": 4, + "filtered": 100 + }, + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 10002, + "filtered": 100, + "attached_condition": "t10.mb4 = convert(t1.mb3 using utf8mb4)" + } + } +} +# +# Check ref access on mb3_field=mb4_expr +# +explain format=json +select * from t10,t1 where t1.mb3=concat('',t10.mb4); +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t10", + "access_type": "ALL", + "rows": 4, + "filtered": 100 + }, + "table": { + "table_name": "t1", + "access_type": "ref", + "possible_keys": ["mb3"], + "key": "mb3", + "key_length": "99", + "used_key_parts": ["mb3"], + "ref": ["func"], + "rows": 1, + "filtered": 100, + "index_condition": "t1.mb3 = concat('',t10.mb4)" + } + } +} +select * from t10,t1 where t1.mb3=concat('',t10.mb4); +pk mb4name mb4 mb3name mb3 +1 mb4-question-mark ? mb3-question-mark ? +2 mb4-replacement-char � mb3-replacement-char � +3 mb4-smiley 😊 mb3-replacement-char � +4 1 1 1 1 +select * from t10,t1 use index() where t1.mb3=concat('',t10.mb4); +pk mb4name mb4 mb3name mb3 +4 1 1 1 1 +1 mb4-question-mark ? mb3-question-mark ? +2 mb4-replacement-char � mb3-replacement-char � +3 mb4-smiley 😊 mb3-replacement-char � +# Check that ref optimizer gets the right constant. +# We need a const table for that, because key=const is handled by +# coercing the constant. +# +# So, we take the smiley: +select * from t10 where t10.pk=3; +pk mb4name mb4 +3 mb4-smiley 😊 +set optimizer_trace=1; +# And see that we've got the Replacement Character in the ranges: +explain +select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 const PRIMARY PRIMARY 4 const 1 +1 SIMPLE t1 ref mb3 mb3 99 const 3 Using index condition +select +json_detailed(json_extract(trace, '$**.range_scan_alternatives')) as JS +from +information_schema.optimizer_trace; +JS +[ + [ + { + "index": "mb3", + "ranges": + ["(�) <= (mb3) <= (�)"], + "rowid_ordered": true, + "using_mrr": false, + "index_only": false, + "rows": 3, + "cost": 3.760377105, + "chosen": true + } + ] +] +select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3; +pk mb4name mb4 mb3name mb3 +3 mb4-smiley 😊 mb3-replacement-char � +# +# Will range optimizer handle t1.mb3>t10.mb4? No... +# +explain format=json +select * from t10, t1 where (t1.mb3=t10.mb4 or t1.mb3='hello') and t10.pk=3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t10", + "access_type": "const", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["pk"], + "ref": ["const"], + "rows": 1, + "filtered": 100 + }, + "table": { + "table_name": "t1", + "access_type": "range", + "possible_keys": ["mb3"], + "key": "mb3", + "key_length": "99", + "used_key_parts": ["mb3"], + "rows": 4, + "filtered": 100, + "index_condition": "t1.mb3 = '????' or t1.mb3 = 'hello'" + } + } +} +explain format=json +select * from t10, t1 where t1.mb3>t10.mb4 and t10.pk=3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t10", + "access_type": "const", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["pk"], + "ref": ["const"], + "rows": 1, + "filtered": 100 + }, + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 10002, + "filtered": 100, + "attached_condition": "convert(t1.mb3 using utf8mb4) > '????'" + } + } +} +# For comparison, it will handle it when collations match: +create table t2 ( +mb4name varchar(32), +mb4 varchar(32) collate utf8mb4_general_ci, +key(mb4) +); +insert into t2 select * from t1; +explain format=json +select * from t10, t2 where t2.mb4>t10.mb4 and t10.pk=3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t10", + "access_type": "const", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["pk"], + "ref": ["const"], + "rows": 1, + "filtered": 100 + }, + "table": { + "table_name": "t2", + "access_type": "range", + "possible_keys": ["mb4"], + "key": "mb4", + "key_length": "131", + "used_key_parts": ["mb4"], + "rows": 3, + "filtered": 100, + "index_condition": "t2.mb4 > '????'" + } + } +} +# +# Check multiple equalities +# +# - ref acccess lookup keys do use equality substitution, +# - concat() arguments don't +explain format=json +select straight_join * from t10,t1 force index(mb3),t2 +where +t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe'; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t10", + "access_type": "ALL", + "rows": 4, + "filtered": 100, + "attached_condition": "t10.mb4 is not null and t10.mb4 is not null" + }, + "table": { + "table_name": "t1", + "access_type": "ref", + "possible_keys": ["mb3"], + "key": "mb3", + "key_length": "99", + "used_key_parts": ["mb3"], + "ref": ["test.t10.mb4"], + "rows": 1, + "filtered": 100, + "index_condition": "t1.mb3 = t10.mb4" + }, + "table": { + "table_name": "t2", + "access_type": "ref", + "possible_keys": ["mb4"], + "key": "mb4", + "key_length": "131", + "used_key_parts": ["mb4"], + "ref": ["test.t10.mb4"], + "rows": 1, + "filtered": 100, + "index_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'" + } + } +} +select json_detailed(json_extract(trace, '$**.condition_processing')) as JS +from information_schema.optimizer_trace; +JS +[ + { + "condition": "WHERE", + "original_condition": "t1.mb3 = t2.mb4 and t2.mb4 = t10.mb4 and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'", + "steps": + [ + { + "transformation": "equality_propagation", + "resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" + }, + { + "transformation": "constant_propagation", + "resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" + }, + { + "transformation": "trivial_condition_removal", + "resulting_condition": "concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" + } + ] + } +] +select straight_join * from t10,t1 force index(mb3),t2 +where +t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe'; +pk mb4name mb4 mb3name mb3 mb4name mb4 +1 mb4-question-mark ? mb3-question-mark ? mb3-question-mark ? +2 mb4-replacement-char � mb3-replacement-char � mb3-replacement-char � +3 mb4-smiley 😊 mb3-replacement-char � mb3-replacement-char � +4 1 1 1 1 1 1 +# Equality substitution doesn't happen for constants, for both narrowing +# and non-narrowing comparisons: +explain format=json +select * from t10,t1,t2 +where +t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and t10.mb4='hello' and +concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe'; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t10", + "access_type": "ALL", + "rows": 4, + "filtered": 0.78125, + "attached_condition": "t10.mb4 = 'hello'" + }, + "table": { + "table_name": "t1", + "access_type": "ref", + "possible_keys": ["mb3"], + "key": "mb3", + "key_length": "99", + "used_key_parts": ["mb3"], + "ref": ["const"], + "rows": 1, + "filtered": 100, + "index_condition": "t1.mb3 = t10.mb4" + }, + "table": { + "table_name": "t2", + "access_type": "ref", + "possible_keys": ["mb4"], + "key": "mb4", + "key_length": "131", + "used_key_parts": ["mb4"], + "ref": ["const"], + "rows": 1, + "filtered": 100, + "index_condition": "t2.mb4 = t10.mb4 and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'" + } + } +} +select json_detailed(json_extract(trace, '$**.condition_processing')) as JS +from information_schema.optimizer_trace; +JS +[ + { + "condition": "WHERE", + "original_condition": "t1.mb3 = t2.mb4 and t2.mb4 = t10.mb4 and t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe'", + "steps": + [ + { + "transformation": "equality_propagation", + "resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" + }, + { + "transformation": "constant_propagation", + "resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" + }, + { + "transformation": "trivial_condition_removal", + "resulting_condition": "t10.mb4 = 'hello' and concat(convert(t1.mb3 using utf8mb4),t2.mb4,t10.mb4) <> 'Bebebe' and multiple equal(t1.mb3, t2.mb4, t10.mb4)" + } + ] + } +] +drop table t2; +drop table t1, t10; +set optimizer_switch=@tmp_csetn_os; diff --git a/mysql-test/main/cset_narrowing.test b/mysql-test/main/cset_narrowing.test new file mode 100644 index 00000000000..4d0947f6376 --- /dev/null +++ b/mysql-test/main/cset_narrowing.test @@ -0,0 +1,150 @@ +# +# Test character set narrowing +# + +--source include/have_utf8mb4.inc +--source include/have_sequence.inc +--source include/not_embedded.inc + +set + @tmp_csetn_os= @@optimizer_switch, + optimizer_switch='cset_narrowing=on'; + +set names utf8mb4; +create table t1 ( + mb3name varchar(32), + mb3 varchar(32) collate utf8mb3_general_ci, + key(mb3) +); +insert into t1 select seq, seq from seq_1_to_10000; +insert into t1 values ('mb3-question-mark', '?'); +insert into t1 values ('mb3-replacement-char', _utf8mb3 0xEFBFBD); + +create table t10 ( + pk int auto_increment primary key, + mb4name varchar(32), + mb4 varchar(32) character set utf8mb4 collate utf8mb4_general_ci +); + +insert into t10 (mb4name, mb4) values + ('mb4-question-mark','?'), + ('mb4-replacement-char', _utf8mb4 0xEFBFBD), + ('mb4-smiley', _utf8mb4 0xF09F988A), + ('1', '1'); + +analyze table t1,t10 persistent for all; +--echo # +--echo # Check that constants are already handled: the following should use +--echo # ref/range, because constants are converted into utf8mb3. +--echo # +select collation('abc'); +explain select * from t1 force index (mb3) where t1.mb3='abc'; +explain select * from t1 force index (mb3) where t1.mb3 in ('abc','cde','xyz'); +explain select * from t1 force index (mb3) where t1.mb3 between 'abc' and 'acc'; +explain select * from t1 force index (mb3) where t1.mb3 <'000'; + +--echo # If a constant can't be represented in utf8mb3, an error is produced: +--error ER_CANT_AGGREGATE_2COLLATIONS +explain select * from t1 force index (mb3) where t1.mb3='😊'; + +--echo # +--echo # Check ref access on mb3_field=mb4_field +--echo # +explain format=json +select * from t10,t1 where t10.mb4=t1.mb3; + +select * from t10,t1 where t10.mb4=t1.mb3; + +select * from t10,t1 use index() where t10.mb4=t1.mb3; + +explain format=json +select * from t10,t1 where t10.mb4<=>t1.mb3; + +select * from t10,t1 where t10.mb4<=>t1.mb3; + +set statement optimizer_switch='cset_narrowing=off', join_cache_level=0 for +explain format=json +select * from t10,t1 where t10.mb4=t1.mb3; + +--echo # +--echo # Check ref access on mb3_field=mb4_expr +--echo # +explain format=json +select * from t10,t1 where t1.mb3=concat('',t10.mb4); + +select * from t10,t1 where t1.mb3=concat('',t10.mb4); + +select * from t10,t1 use index() where t1.mb3=concat('',t10.mb4); + +--echo # Check that ref optimizer gets the right constant. +--echo # We need a const table for that, because key=const is handled by +--echo # coercing the constant. +--echo # +--echo # So, we take the smiley: +select * from t10 where t10.pk=3; +set optimizer_trace=1; + +--echo # And see that we've got the Replacement Character in the ranges: +explain +select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3; + +select + json_detailed(json_extract(trace, '$**.range_scan_alternatives')) as JS +from + information_schema.optimizer_trace; + +select * from t10, t1 where t10.mb4=t1.mb3 and t10.pk=3; + +--echo # +--echo # Will range optimizer handle t1.mb3>t10.mb4? No... +--echo # + +explain format=json +select * from t10, t1 where (t1.mb3=t10.mb4 or t1.mb3='hello') and t10.pk=3; + +explain format=json +select * from t10, t1 where t1.mb3>t10.mb4 and t10.pk=3; + +--echo # For comparison, it will handle it when collations match: +create table t2 ( + mb4name varchar(32), + mb4 varchar(32) collate utf8mb4_general_ci, + key(mb4) +); +insert into t2 select * from t1; +explain format=json +select * from t10, t2 where t2.mb4>t10.mb4 and t10.pk=3; + +--echo # +--echo # Check multiple equalities +--echo # + +--echo # - ref acccess lookup keys do use equality substitution, +--echo # - concat() arguments don't +explain format=json +select straight_join * from t10,t1 force index(mb3),t2 +where + t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe'; +select json_detailed(json_extract(trace, '$**.condition_processing')) as JS +from information_schema.optimizer_trace; + +select straight_join * from t10,t1 force index(mb3),t2 +where + t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe'; + +--echo # Equality substitution doesn't happen for constants, for both narrowing +--echo # and non-narrowing comparisons: +explain format=json +select * from t10,t1,t2 +where + t1.mb3=t2.mb4 and t2.mb4=t10.mb4 and t10.mb4='hello' and + concat(t1.mb3, t2.mb4, t10.mb4)<>'Bebebe'; + +select json_detailed(json_extract(trace, '$**.condition_processing')) as JS +from information_schema.optimizer_trace; + +drop table t2; +drop table t1, t10; + +set optimizer_switch=@tmp_csetn_os; + diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 01167912b37..1c851b9d06c 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -755,7 +755,7 @@ The following specify which files/extra groups are read (specified before remain condition_pushdown_for_derived, split_materialized, condition_pushdown_for_subquery, rowid_filter, condition_pushdown_from_having, not_null_range_scan, - hash_join_cardinality + hash_join_cardinality, cset_narrowing --optimizer-trace=name Controls tracing of the Optimizer: optimizer_trace=option=val[,option=val...], where option diff --git a/mysql-test/main/mysqltest_tracking_info.result b/mysql-test/main/mysqltest_tracking_info.result index 5436dd87122..cad96ef80c4 100644 --- a/mysql-test/main/mysqltest_tracking_info.result +++ b/mysql-test/main/mysqltest_tracking_info.result @@ -38,7 +38,7 @@ SET @@session.session_track_system_variables='optimizer_switch'; set optimizer_switch='index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off'; -- Tracker : SESSION_TRACK_SYSTEM_VARIABLES -- optimizer_switch --- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off +-- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off Warnings: Warning 1681 'engine_condition_pushdown=on' is deprecated and will be removed in a future release diff --git a/mysql-test/suite/sys_vars/r/optimizer_switch_basic.result b/mysql-test/suite/sys_vars/r/optimizer_switch_basic.result index 7f2e1c6586c..8f5d9071e1e 100644 --- a/mysql-test/suite/sys_vars/r/optimizer_switch_basic.result +++ b/mysql-test/suite/sys_vars/r/optimizer_switch_basic.result @@ -1,60 +1,60 @@ set @@global.optimizer_switch=@@optimizer_switch; select @@global.optimizer_switch; @@global.optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off select @@session.optimizer_switch; @@session.optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off show global variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off show session variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off select * from information_schema.global_variables where variable_name='optimizer_switch'; VARIABLE_NAME VARIABLE_VALUE -OPTIMIZER_SWITCH index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off +OPTIMIZER_SWITCH index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off select * from information_schema.session_variables where variable_name='optimizer_switch'; VARIABLE_NAME VARIABLE_VALUE -OPTIMIZER_SWITCH index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off +OPTIMIZER_SWITCH index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off set global optimizer_switch=4101; set session optimizer_switch=2058; select @@global.optimizer_switch; @@global.optimizer_switch -index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off +index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off select @@session.optimizer_switch; @@session.optimizer_switch -index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off +index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off set global optimizer_switch="index_merge_sort_union=on"; set session optimizer_switch="index_merge=off"; select @@global.optimizer_switch; @@global.optimizer_switch -index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off +index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off select @@session.optimizer_switch; @@session.optimizer_switch -index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off +index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off show global variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off +optimizer_switch index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off show session variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off +optimizer_switch index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off select * from information_schema.global_variables where variable_name='optimizer_switch'; VARIABLE_NAME VARIABLE_VALUE -OPTIMIZER_SWITCH index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off +OPTIMIZER_SWITCH index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off select * from information_schema.session_variables where variable_name='optimizer_switch'; VARIABLE_NAME VARIABLE_VALUE -OPTIMIZER_SWITCH index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off +OPTIMIZER_SWITCH index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off set session optimizer_switch="default"; select @@session.optimizer_switch; @@session.optimizer_switch -index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off +index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,hash_join_cardinality=off,cset_narrowing=off set optimizer_switch = replace(@@optimizer_switch, '=off', '=on'); Warnings: Warning 1681 'engine_condition_pushdown=on' is deprecated and will be removed in a future release select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=on,hash_join_cardinality=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=on,hash_join_cardinality=on,cset_narrowing=on set global optimizer_switch=1.1; ERROR 42000: Incorrect argument type to variable 'optimizer_switch' set global optimizer_switch=1e1; diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index a3f33afc416..a6ed72520e5 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -2329,7 +2329,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,default +ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,cset_narrowing,default READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_TRACE diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 17e8339a352..2ade2a9649a 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -2489,7 +2489,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,default +ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,cset_narrowing,default READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_TRACE diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index c667920edff..557fd5eb506 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -90,7 +90,9 @@ ENDIF() SET (SQL_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.cc ${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.cc - ../sql-common/client.c compat56.cc derror.cc des_key_file.cc + ../sql-common/client.c + cset_narrowing.cc + compat56.cc derror.cc des_key_file.cc discover.cc ../sql-common/errmsg.c field.cc field_conv.cc field_comp.cc filesort_utils.cc diff --git a/sql/cset_narrowing.cc b/sql/cset_narrowing.cc new file mode 100644 index 00000000000..abdaec16416 --- /dev/null +++ b/sql/cset_narrowing.cc @@ -0,0 +1,35 @@ +/* + Copyright (c) 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_select.h" + +Charset_utf8narrow utf8mb3_from_mb4; + +bool Utf8_narrow::should_do_narrowing(const THD *thd, + CHARSET_INFO *field_cset, + CHARSET_INFO *value_cset) +{ + return optimizer_flag(thd, OPTIMIZER_SWITCH_CSET_NARROWING) && + field_cset == &my_charset_utf8mb3_general_ci && + value_cset == &my_charset_utf8mb4_general_ci; +} + diff --git a/sql/cset_narrowing.h b/sql/cset_narrowing.h new file mode 100644 index 00000000000..bb0a3960b8e --- /dev/null +++ b/sql/cset_narrowing.h @@ -0,0 +1,143 @@ +/* + Copyright (c) 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef CSET_NARROWING_H_INCLUDED +#define CSET_NARROWING_H_INCLUDED + +/* + A singleton class to provide "utf8mb3_from_mb4.charset()". + + This is a variant of utf8mb3_general_ci that one can use when they have data + in MB4 and want to make index lookup keys in MB3. +*/ +extern +class Charset_utf8narrow +{ + struct my_charset_handler_st cset_handler; + struct charset_info_st cset; +public: + Charset_utf8narrow() : + cset_handler(*my_charset_utf8mb3_general_ci.cset), + cset(my_charset_utf8mb3_general_ci) /* Copy the CHARSET_INFO structure */ + { + /* Insert our function wc_mb */ + cset_handler.wc_mb= my_wc_mb_utf8mb4_bmp_only; + cset.cset=&cset_handler; + + /* Charsets are compared by their name, so assign a different name */ + LEX_CSTRING tmp= {STRING_WITH_LEN("utf8_mb4_to_mb3")}; + cset.cs_name= tmp; + } + + CHARSET_INFO *charset() { return &cset; } + +} utf8mb3_from_mb4; + + +/* + A class to temporary change a field that uses utf8mb3_general_ci to enable + correct lookup key construction from string value in utf8mb4_general_ci + + Intended usage: + + // can do this in advance: + bool do_narrowing= Utf8_narrow::should_do_narrowing(field, value_cset); + ... + + // This sets the field to do narrowing if necessary: + Utf8_narrow narrow(field, do_narrowing); + + // write to 'field' here + // item->save_in_field(field) or something else + + // Stop doing narrowing + narrow.stop(); +*/ + +class Utf8_narrow +{ + Field *field; + DTCollation save_collation; + +public: + static bool should_do_narrowing(const THD *thd, CHARSET_INFO *field_cset, + CHARSET_INFO *value_cset); + + static bool should_do_narrowing(const Field *field, CHARSET_INFO *value_cset) + { + CHARSET_INFO *field_cset= field->charset(); + THD *thd= field->table->in_use; + return should_do_narrowing(thd, field_cset, value_cset); + } + + Utf8_narrow(Field *field_arg, bool is_applicable) + { + field= NULL; + if (is_applicable) + { + DTCollation mb3_from_mb4= utf8mb3_from_mb4.charset(); + field= field_arg; + save_collation= field->dtcollation(); + field->change_charset(mb3_from_mb4); + } + } + + void stop() + { + if (field) + field->change_charset(save_collation); +#ifndef NDEBUG + field= NULL; +#endif + } + + ~Utf8_narrow() + { + DBUG_ASSERT(!field); + } +}; + + +/* + @brief + Check if two fields can participate in a multiple equality using charset + narrowing. + + @detail + Normally, check_simple_equality() checks this by calling: + + left_field->eq_def(right_field) + + This function does the same but takes into account we might use charset + narrowing: + - collations are not the same but rather an utf8mb{3,4}_general_ci pair + - for field lengths, should compare # characters, not #bytes. +*/ + +inline +bool fields_equal_using_narrowing(const THD *thd, const Field *left, const Field *right) +{ + return + dynamic_cast(left) && + dynamic_cast(right) && + left->real_type() == right->real_type() && + (Utf8_narrow::should_do_narrowing(left, right->charset()) || + Utf8_narrow::should_do_narrowing(right, left->charset())) && + left->char_length() == right->char_length(); +}; + + +#endif /* CSET_NARROWING_H_INCLUDED */ diff --git a/sql/field.cc b/sql/field.cc index b52f481170e..44445e08d5d 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -7508,7 +7508,8 @@ Field_longstr::cmp_to_string_with_stricter_collation(const Item_bool_func *cond, return !cmp_is_done_using_type_handler_of_this(cond, item) ? Data_type_compatibility::INCOMPATIBLE_DATA_TYPE : (charset() != cond->compare_collation() && - !(cond->compare_collation()->state & MY_CS_BINSORT)) ? + !(cond->compare_collation()->state & MY_CS_BINSORT) && + !Utf8_narrow::should_do_narrowing(this, cond->compare_collation())) ? Data_type_compatibility::INCOMPATIBLE_COLLATION : Data_type_compatibility::OK; } @@ -7519,6 +7520,18 @@ Field_longstr::can_optimize_keypart_ref(const Item_bool_func *cond, const Item *item) const { DBUG_ASSERT(cmp_type() == STRING_RESULT); + /* + So, we have an equality: tbl.string_key = 'abc' + + The comparison is the string comparison. Can we use index lookups to + find matching rows? We can do that when: + - The comparison uses the same collation as tbl.string_key + - the comparison uses binary collation, while tbl.string_key + uses some other collation. + In this case, we will find matches in some collation. For example, for + 'abc' we may find 'abc', 'ABC', and 'äbc'. + But we're certain that will find the row with the identical binary, 'abc'. + */ return cmp_to_string_with_stricter_collation(cond, item); } diff --git a/sql/item.cc b/sql/item.cc index 1e03614ebf9..f810d7e88ce 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -2522,7 +2522,8 @@ bool DTCollation::aggregate(const DTCollation &dt, uint flags) /******************************/ static -void my_coll_agg_error(DTCollation &c1, DTCollation &c2, const char *fname) +void my_coll_agg_error(const DTCollation &c1, const DTCollation &c2, + const char *fname) { my_error(ER_CANT_AGGREGATE_2COLLATIONS,MYF(0), c1.collation->coll_name.str, c1.derivation_name(), @@ -2605,10 +2606,17 @@ bool Type_std_attributes::agg_item_collations(DTCollation &c, } +/* + @param single_err When nargs==1, use *single_err as the second aggregated + collation when producing error message. +*/ + bool Type_std_attributes::agg_item_set_converter(const DTCollation &coll, const LEX_CSTRING &fname, Item **args, uint nargs, - uint flags, int item_sep) + uint flags, int item_sep, + const Single_coll_err + *single_err) { THD *thd= current_thd; if (thd->lex->is_ps_or_view_context_analysis()) @@ -2646,7 +2654,19 @@ bool Type_std_attributes::agg_item_set_converter(const DTCollation &coll, args[0]= safe_args[0]; args[item_sep]= safe_args[1]; } - my_coll_agg_error(args, nargs, fname.str, item_sep); + if (nargs == 1 && single_err) + { + /* + Use *single_err to produce an error message mentioning two + collations. + */ + if (single_err->first) + my_coll_agg_error(args[0]->collation, single_err->coll, fname.str); + else + my_coll_agg_error(single_err->coll, args[0]->collation, fname.str); + } + else + my_coll_agg_error(args, nargs, fname.str, item_sep); return TRUE; } diff --git a/sql/item.h b/sql/item.h index 3be7cd7283a..3c6ebfb7d56 100644 --- a/sql/item.h +++ b/sql/item.h @@ -30,6 +30,8 @@ #include "sql_time.h" #include "mem_root_array.h" +#include "cset_narrowing.h" + C_MODE_START #include @@ -5399,8 +5401,10 @@ protected: public: // This method is used by Arg_comparator - bool agg_arg_charsets_for_comparison(CHARSET_INFO **cs, Item **a, Item **b) + bool agg_arg_charsets_for_comparison(CHARSET_INFO **cs, Item **a, Item **b, + bool allow_narrowing) { + THD *thd= current_thd; DTCollation tmp; if (tmp.set((*a)->collation, (*b)->collation, MY_COLL_CMP_CONV) || tmp.derivation == DERIVATION_NONE) @@ -5413,11 +5417,40 @@ public: func_name()); return true; } + + if (allow_narrowing && + (*a)->collation.derivation == (*b)->collation.derivation) + { + // allow_narrowing==true only for = and <=> comparisons. + if (Utf8_narrow::should_do_narrowing(thd, (*a)->collation.collation, + (*b)->collation.collation)) + { + // a is a subset, b is a superset (e.g. utf8mb3 vs utf8mb4) + *cs= (*b)->collation.collation; // Compare using the wider cset + return false; + } + else + if (Utf8_narrow::should_do_narrowing(thd, (*b)->collation.collation, + (*a)->collation.collation)) + { + // a is a superset, b is a subset (e.g. utf8mb4 vs utf8mb3) + *cs= (*a)->collation.collation; // Compare using the wider cset + return false; + } + } + /* + If necessary, convert both *a and *b to the collation in tmp: + */ + Single_coll_err error_for_a= {(*b)->collation, true}; + Single_coll_err error_for_b= {(*a)->collation, false}; + if (agg_item_set_converter(tmp, func_name_cstring(), - a, 1, MY_COLL_CMP_CONV, 1) || + a, 1, MY_COLL_CMP_CONV, 1, + /*just for error message*/ &error_for_a) || agg_item_set_converter(tmp, func_name_cstring(), - b, 1, MY_COLL_CMP_CONV, 1)) - return true; + b, 1, MY_COLL_CMP_CONV, 1, + /*just for error message*/ &error_for_b)) + return true; *cs= tmp.collation; return false; } diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 0ea012f82b2..ed6417a4747 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -463,10 +463,24 @@ bool Item_func::setup_args_and_comparator(THD *thd, Arg_comparator *cmp) if (args[0]->cmp_type() == STRING_RESULT && args[1]->cmp_type() == STRING_RESULT) { - DTCollation tmp; - if (agg_arg_charsets_for_comparison(tmp, args, 2)) + CHARSET_INFO *tmp; + /* + Use charset narrowing only for equalities, as that would allow + to construct ref access. + Non-equality comparisons with constants work without charset narrowing, + the constant gets converted. + Non-equality comparisons with non-constants would need narrowing to + enable range optimizer to handle e.g. + t1.mb3key_col <= const_table.mb4_col + But this doesn't look important. + */ + bool allow_narrowing= MY_TEST(functype()==Item_func::EQ_FUNC || + functype()==Item_func::EQUAL_FUNC); + + if (agg_arg_charsets_for_comparison(&tmp, &args[0], &args[1], + allow_narrowing)) return true; - cmp->m_compare_collation= tmp.collation; + cmp->m_compare_collation= tmp; } // Convert constants when compared to int/year field DBUG_ASSERT(functype() != LIKE_FUNC); @@ -589,9 +603,19 @@ bool Arg_comparator::set_cmp_func_string(THD *thd) { /* We must set cmp_collation here as we may be called from for an automatic - generated item, like in natural join + generated item, like in natural join. + Allow reinterpted superset as subset. */ - if (owner->agg_arg_charsets_for_comparison(&m_compare_collation, a, b)) + bool allow_narrowing= false; + if (owner->type() == Item::FUNC_ITEM) + { + Item_func::Functype ftype= ((Item_func*)owner)->functype(); + if (ftype == Item_func::EQUAL_FUNC || ftype==Item_func::EQ_FUNC) + allow_narrowing= true; + } + + if (owner->agg_arg_charsets_for_comparison(&m_compare_collation, a, b, + allow_narrowing)) return true; if ((*a)->type() == Item::FUNC_ITEM && diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 09b19944bb5..1180b2acda2 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -8998,11 +8998,28 @@ SEL_ARG *Field_str::get_mm_leaf(RANGE_OPT_PARAM *prm, KEY_PART *key_part, const Item_bool_func *cond, scalar_comparison_op op, Item *value) { + int err; DBUG_ENTER("Field_str::get_mm_leaf"); if (can_optimize_scalar_range(prm, key_part, cond, op, value) != Data_type_compatibility::OK) DBUG_RETURN(0); - int err= value->save_in_field_no_warnings(this, 1); + + { + /* + Do CharsetNarrowing if necessary + This means that we are temporary changing the character set of the + current key field to make key lookups possible. + This is needed when comparing an utf8mb3 key field with an utf8mb4 value. + See cset_narrowing.h for more details. + */ + bool do_narrowing= + Utf8_narrow::should_do_narrowing(this, value->collation.collation); + Utf8_narrow narrow(this, do_narrowing); + + err= value->save_in_field_no_warnings(this, 1); + narrow.stop(); + } + if ((op != SCALAR_CMP_EQUAL && is_real_null()) || err < 0) DBUG_RETURN(&null_element); if (err > 0) diff --git a/sql/sql_priv.h b/sql/sql_priv.h index 549f279e95a..a1adcb58079 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -235,6 +235,7 @@ #define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34) #define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35) #define OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY (1ULL << 36) +#define OPTIMIZER_SWITCH_CSET_NARROWING (1ULL << 37) #define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 52eee901243..9f2e65cb59f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -15702,7 +15702,8 @@ bool check_simple_equality(THD *thd, const Item::Context &ctx, Field *left_field= ((Item_field*) left_item)->field; Field *right_field= ((Item_field*) right_item)->field; - if (!left_field->eq_def(right_field)) + if (!left_field->eq_def(right_field) && + !fields_equal_using_narrowing(thd, left_field, right_field)) return FALSE; /* Search for multiple equalities containing field1 and/or field2 */ diff --git a/sql/sql_select.h b/sql/sql_select.h index f8358a12708..02a6a846bc4 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -34,6 +34,8 @@ #include "opt_range.h" /* SQL_SELECT, QUICK_SELECT_I */ #include "filesort.h" +#include "cset_narrowing.h" + typedef struct st_join_table JOIN_TAB; /* Values in optimize */ #define KEY_OPTIMIZE_EXISTS 1U @@ -1921,7 +1923,14 @@ public: { enum_check_fields org_count_cuted_fields= thd->count_cuted_fields; Use_relaxed_field_copy urfc(to_field->table->in_use); + + /* If needed, perform CharsetNarrowing for making ref access lookup keys. */ + Utf8_narrow do_narrow(to_field, do_cset_narrowing); + store_key_result result= copy_inner(); + + do_narrow.stop(); + thd->count_cuted_fields= org_count_cuted_fields; return result; } @@ -1931,6 +1940,12 @@ public: uchar *null_ptr; uchar err; + /* + This is set to true if we need to do Charset Narrowing when making a lookup + key. + */ + bool do_cset_narrowing= false; + virtual enum store_key_result copy_inner()=0; }; @@ -1950,6 +1965,7 @@ class store_key_field: public store_key if (to_field) { copy_field.set(to_field,from_field,0); + setup_charset_narrowing(); } } @@ -1960,6 +1976,15 @@ class store_key_field: public store_key { copy_field.set(to_field, fld_item->field, 0); field_name= fld_item->full_name(); + setup_charset_narrowing(); + } + + /* Setup CharsetNarrowing if necessary */ + void setup_charset_narrowing() + { + do_cset_narrowing= + Utf8_narrow::should_do_narrowing(copy_field.to_field, + copy_field.from_field->charset()); } protected: @@ -2000,7 +2025,12 @@ public: :store_key(thd, to_field_arg, ptr, null_ptr_arg ? null_ptr_arg : item_arg->maybe_null() ? &err : (uchar*) 0, length), item(item_arg), use_value(val) - {} + { + /* Setup CharsetNarrowing to be done if necessary */ + do_cset_narrowing= + Utf8_narrow::should_do_narrowing(to_field, + item->collation.collation); + } store_key_item(store_key &arg, Item *new_item, bool val) :store_key(arg), item(new_item), use_value(val) {} @@ -2388,7 +2418,7 @@ Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, extern bool test_if_ref(Item *, Item_field *left_item,Item *right_item); -inline bool optimizer_flag(THD *thd, ulonglong flag) +inline bool optimizer_flag(const THD *thd, ulonglong flag) { return (thd->variables.optimizer_switch & flag); } diff --git a/sql/sql_type.h b/sql/sql_type.h index e7f02eb6b13..9992c8bf0bd 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -3268,10 +3268,16 @@ public: bool agg_item_collations(DTCollation &c, const LEX_CSTRING &name, Item **items, uint nitems, uint flags, int item_sep); + struct Single_coll_err + { + const DTCollation& coll; + bool first; + }; bool agg_item_set_converter(const DTCollation &coll, const LEX_CSTRING &name, Item **args, uint nargs, - uint flags, int item_sep); + uint flags, int item_sep, + const Single_coll_err *single_item_err= NULL); /* Collect arguments' character sets together. diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index aea757fdb3e..7550084456b 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2767,6 +2767,7 @@ export const char *optimizer_switch_names[]= "condition_pushdown_from_having", "not_null_range_scan", "hash_join_cardinality", + "cset_narrowing", "default", NullS }; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 68e37e699ad..2f8fe534468 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -7973,4 +7973,16 @@ struct charset_info_st my_charset_utf8mb4_nopad_bin= &my_collation_utf8mb4_nopad_bin_handler }; +/* + Take a my_wc_t character and convert it to utf8mb3 representation. + Characters that are not in Basic Multilingual Plane are replaced with + MY_CS_REPLACEMENT_CHARACTER. +*/ +int my_wc_mb_utf8mb4_bmp_only(CHARSET_INFO *cs, my_wc_t wc, uchar *r, uchar *e) +{ + if (wc > 0xFFFF) + wc= MY_CS_REPLACEMENT_CHARACTER; + return my_wc_mb_utf8mb4(cs, wc, r, e); +} + #endif /* HAVE_CHARSET_utf8mb4 */