MDEV-9959: A serious MariaDB server performance bug

If a derived table has SELECT DISTINCT, provide index statistics for it so that the join optimizer in the upper select knows that ref access to the table will produce one row.
2019-04-30 21:05:50 +05:30 · 2019-04-30 21:05:50 +05:30 · cb9fa1a08b
commit cb9fa1a08b
parent 8c8bee0a56
10 changed files with 153 additions and 7 deletions
--- a/mysql-test/main/cte_nonrecursive.result
+++ b/mysql-test/main/cte_nonrecursive.result
@ -244,7 +244,7 @@ with t as (select distinct a from t1 where b >= 'c')
 select * from t as r1, t as r2 where r1.a=r2.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	8	Using where
-1	PRIMARY	<derived3>	ref	key0	key0	5	r1.a	2	
+1	PRIMARY	<derived3>	ref	key0	key0	5	r1.a	1	
 3	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where; Using temporary
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where; Using temporary
 explain
@ -253,7 +253,7 @@ select * from (select distinct a from t1 where b >= 'c') as r1,
 where r1.a=r2.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	<derived2>	ALL	NULL	NULL	NULL	NULL	8	Using where
-1	PRIMARY	<derived3>	ref	key0	key0	5	r1.a	2	
+1	PRIMARY	<derived3>	ref	key0	key0	5	r1.a	1	
 3	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where; Using temporary
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where; Using temporary
 # two references to t specified by a query
@ -369,7 +369,7 @@ select c as a from t2 where c < 4)
 select * from t2,t where t2.c=t.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	4	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.c	2	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.c	1	
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where
 3	UNION	t2	ALL	NULL	NULL	NULL	NULL	4	Using where
 NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
@ -381,7 +381,7 @@ select c as a from t2 where c < 4) as t
 where t2.c=t.a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	4	Using where
-1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.c	2	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.c	1	
 2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	8	Using where
 3	UNION	t2	ALL	NULL	NULL	NULL	NULL	4	Using where
 NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
--- a/mysql-test/main/derived.result
+++ b/mysql-test/main/derived.result
@ -1195,3 +1195,57 @@ drop table t1,t2,t3;
 #
 # End of 10.2 tests
 #
+#
+# MDEV-9959: A serious MariaDB server performance bug
+#
+create table t1(a int);
+insert into t1 values (1),(2),(3),(4),(5),(6);
+create table t2(a int, b int,c int);
+insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5);
+create table t3(a int, b int);
+insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2);
+table "<derived2>" should have type=ref and rows=1
+one select in derived table
+with distinct
+analyze select * from t1 , ((select distinct t2.a from t2 order by c))q  where t1.a=q.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	1	1.00	100.00	100.00	
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using temporary; Using filesort
+analyze select * from t1 , ((select distinct t2.a, t2.b from t2 order by c))q  where t1.a=q.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	2	1.00	100.00	100.00	
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using temporary; Using filesort
+# multiple selects in derived table
+# NO UNION ALL
+analyze select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	1	1.00	100.00	100.00	
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+3	UNION	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	6.00	NULL	NULL	
+select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
+a	a
+1	1
+2	2
+3	3
+4	4
+5	5
+6	6
+# UNION ALL and EXCEPT
+analyze select * from t1 , ( (select t2.a from t2 order by c) union all (select  t2.a from t2 order by c) except(select t3.a from t3 order by b))q  where t1.a=q.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	r_rows	filtered	r_filtered	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.a	1	0.50	100.00	100.00	
+2	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+3	UNION	t2	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+4	EXCEPT	t3	ALL	NULL	NULL	NULL	NULL	6	6.00	100.00	100.00	
+NULL	UNIT RESULT	<unit2,3,4>	ALL	NULL	NULL	NULL	NULL	NULL	3.00	NULL	NULL	
+select * from t1 , ( (select t2.a from t2 order by c) union all (select  t2.a from t2 order by c) except(select t3.a from t3 order by b))q  where t1.a=q.a;
+a	a
+3	3
+4	4
+6	6
+drop table t1,t2,t3;
--- a/mysql-test/main/derived.test
+++ b/mysql-test/main/derived.test
@ -1032,3 +1032,33 @@ drop table t1,t2,t3;
 --echo #
 --echo # End of 10.2 tests
 --echo #
+
+--echo #
+--echo # MDEV-9959: A serious MariaDB server performance bug
+--echo #
+
+create table t1(a int);
+insert into t1 values (1),(2),(3),(4),(5),(6);
+create table t2(a int, b int,c int);
+insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5);
+create table t3(a int, b int);
+insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2);
+
+--echo table "<derived2>" should have type=ref and rows=1
+--echo one select in derived table
+
+--echo with distinct
+analyze select * from t1 , ((select distinct t2.a from t2 order by c))q  where t1.a=q.a; 
+analyze select * from t1 , ((select distinct t2.a, t2.b from t2 order by c))q  where t1.a=q.a;
+
+--echo # multiple selects in derived table
+--echo # NO UNION ALL
+analyze select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
+select * from t1 , ( (select t2.a from t2 order by c) union  (select t2.a from t2 order by c))q  where t1.a=q.a;
+
+--echo # UNION ALL and EXCEPT
+analyze select * from t1 , ( (select t2.a from t2 order by c) union all (select  t2.a from t2 order by c) except(select t3.a from t3 order by b))q  where t1.a=q.a;
+
+select * from t1 , ( (select t2.a from t2 order by c) union all (select  t2.a from t2 order by c) except(select t3.a from t3 order by b))q  where t1.a=q.a;
+
+drop table t1,t2,t3;
--- a/mysql-test/main/derived_view.result
+++ b/mysql-test/main/derived_view.result
@ -1525,7 +1525,7 @@ EXPLAIN
 SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	Using where
-1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	2	FirstMatch(t1)
+1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	1	FirstMatch(t1)
 3	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	
 4	UNION	t3	ALL	NULL	NULL	NULL	NULL	4	
 NULL	UNION RESULT	<union3,4>	ALL	NULL	NULL	NULL	NULL	NULL	
--- a/mysql-test/main/subselect_extra.result
+++ b/mysql-test/main/subselect_extra.result
@ -409,7 +409,7 @@ EXPLAIN
 SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	Using where
-1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	2	FirstMatch(t1)
+1	PRIMARY	<derived3>	ref	key0	key0	10	test.t1.a,test.t1.b	1	FirstMatch(t1)
 3	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	
 4	UNION	t3	ALL	NULL	NULL	NULL	NULL	4	
 NULL	UNION RESULT	<union3,4>	ALL	NULL	NULL	NULL	NULL	NULL	
--- a/mysql-test/main/subselect_extra_no_semijoin.result
+++ b/mysql-test/main/subselect_extra_no_semijoin.result
@ -411,7 +411,7 @@ EXPLAIN
 SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	Using where
-2	DEPENDENT SUBQUERY	<derived3>	index_subquery	key0	key0	10	func,func	2	Using where
+2	DEPENDENT SUBQUERY	<derived3>	index_subquery	key0	key0	10	func,func	1	Using where
 3	DERIVED	t2	ALL	NULL	NULL	NULL	NULL	6	
 4	UNION	t3	ALL	NULL	NULL	NULL	NULL	4	
 NULL	UNION RESULT	<union3,4>	ALL	NULL	NULL	NULL	NULL	NULL	
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@ -847,6 +847,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier, bool skip_global_sys_var_lock)
  invoker.init();
  prepare_derived_at_open= FALSE;
  create_tmp_table_for_derived= FALSE;
+  force_read_stats= FALSE;
  save_prep_leaf_list= FALSE;
  org_charset= 0;
  /* Restore THR_THD */
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@ -896,6 +896,7 @@ public:
  bool union_needs_tmp_table();

  void set_unique_exclude();
+  bool check_distinct_in_union();

  friend struct LEX;
  friend int subselect_union_engine::exec();
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@ -2049,3 +2049,43 @@ void st_select_lex_unit::set_unique_exclude()
    }
  }
 }
+
+/**
+  @brief
+  Check if the derived table is guaranteed to have distinct rows because of
+  UNION operations used to populate it.
+
+  @detail
+    UNION operation removes duplicate rows from its output. That is, a query like
+
+      select * from t1 UNION select * from t2
+
+    will not produce duplicate rows in its output, even if table t1 (and/or t2)
+    contain duplicate rows. EXCEPT and INTERSECT operations also have this
+    property.
+
+    On the other hand, UNION ALL operation doesn't remove duplicates. (The SQL
+    standard also defines EXCEPT ALL and INTERSECT ALL, but we don't support
+    them).
+
+    st_select_lex_unit computes its value left to right. That is, if there is
+     a st_select_lex_unit object describing
+
+      (select #1) OP1 (select #2) OP2 (select #3)
+
+    then ((select #1) OP1 (select #2)) is computed first, and OP2 is computed
+    second.
+
+    How can one tell if st_select_lex_unit is guaranteed to have distinct
+    output rows? This depends on whether the last operation was duplicate-
+    removing or not:
+    - UNION ALL is not duplicate-removing
+    - all other operations are duplicate-removing
+*/
+
+bool st_select_lex_unit::check_distinct_in_union()
+{
+  if (union_distinct && !union_distinct->next_select())
+    return true;
+  return false;
+}
--- a/sql/table.cc
+++ b/sql/table.cc
@ -7270,6 +7270,26 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
    key_part_info++;
  }

+  /*
+    For the case when there is a derived table that would give distinct rows,
+    the index statistics are passed to the join optimizer to tell that a ref
+    access to all the fields of the derived table will produce only one row.
+  */
+
+  st_select_lex_unit* derived= pos_in_table_list ?
+                               pos_in_table_list->derived: NULL;
+  if (derived)
+  {
+    st_select_lex* first= derived->first_select();
+    uint select_list_items= first->get_item_list()->elements;
+    if (key_parts == select_list_items)
+    {
+      if ((!first->is_part_of_union() && (first->options & SELECT_DISTINCT)) ||
+          derived->check_distinct_in_union())
+        keyinfo->rec_per_key[key_parts - 1]= 1;
+    }
+  }
+
  set_if_bigger(s->max_key_length, keyinfo->key_length);
  s->keys++;
  return FALSE;