From 3beeb5d045241afb9d3733fba8dd4ec6e5b9dd86 Mon Sep 17 00:00:00 2001
From: Martin Hansson <martin.hansson@sun.com>
Date: Mon, 13 Sep 2010 13:33:19 +0200
Subject: [PATCH] Bug #50394: Regression in EXPLAIN with index scan, LIMIT,
 GROUP BY and ORDER BY computed col

GROUP BY implies ORDER BY in the MySQL dialect of SQL. Therefore, when an
index on the first table in the query is used, and that index satisfies
ordering according to the GROUP BY clause, the query optimizer estimates the
number of tuples that need to be read from this index. If there is a LIMIT
clause, table statistics on tables following this 'sort table' are employed.

There may be a separate ORDER BY clause however, which mandates reading the
whole 'sort table' anyway. But the previous estimate was left untouched.

Fixed by removing the estimate from EXPLAIN output if GROUP BY is used in
conjunction with an ORDER BY clause that mandates using a temporary table.
---
 mysql-test/r/order_by.result | 21 +++++++++++++++++++++
 mysql-test/t/order_by.test   | 25 +++++++++++++++++++++++++
 sql/sql_select.cc            |  9 +++++++++
 3 files changed, 55 insertions(+)

diff --git a/mysql-test/r/order_by.result b/mysql-test/r/order_by.result
index 6827fd0bc76..ba639fa9763 100644
--- a/mysql-test/r/order_by.result
+++ b/mysql-test/r/order_by.result
@@ -1617,4 +1617,25 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	range	a	a	5	NULL	2	Using where; Using temporary; Using filesort
 1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	10	Using join buffer
 DROP TABLE t1, t2;
+#
+# Bug #50394: Regression in EXPLAIN with index scan, LIMIT, GROUP BY and
+# ORDER BY computed col
+#
+CREATE TABLE t1 ( a INT NOT NULL, b INT NOT NULL, KEY( a, b ) );
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t1 SELECT a + 5, b + 5 FROM t1;
+CREATE TABLE t2( a INT PRIMARY KEY, b INT );
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t2 SELECT a + 5, b + 5 FROM t2;
+EXPLAIN
+SELECT count(*) AS c, t1.a
+FROM t1 JOIN t2 ON t1.b = t2.a
+WHERE t2.b = 1
+GROUP BY t1.a
+ORDER by c
+LIMIT 2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	8	NULL	10	Using index; Using temporary; Using filesort
+1	SIMPLE	t2	eq_ref	PRIMARY	PRIMARY	4	test.t1.b	1	Using where
+DROP TABLE t1, t2;
 End of 5.1 tests
diff --git a/mysql-test/t/order_by.test b/mysql-test/t/order_by.test
index 36b6015c5d8..a8a6ad00648 100644
--- a/mysql-test/t/order_by.test
+++ b/mysql-test/t/order_by.test
@@ -1467,4 +1467,29 @@ SELECT * FROM t1 FORCE INDEX FOR JOIN (a), t2 WHERE t1.a < 2 ORDER BY t1.a;
 
 DROP TABLE t1, t2;
 
+--echo #
+--echo # Bug #50394: Regression in EXPLAIN with index scan, LIMIT, GROUP BY and
+--echo # ORDER BY computed col
+--echo #
+CREATE TABLE t1 ( a INT NOT NULL, b INT NOT NULL, KEY( a, b ) );
+
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t1 SELECT a + 5, b + 5 FROM t1;
+
+CREATE TABLE t2( a INT PRIMARY KEY, b INT );
+
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t2 SELECT a + 5, b + 5 FROM t2;
+
+EXPLAIN
+SELECT count(*) AS c, t1.a
+FROM t1 JOIN t2 ON t1.b = t2.a
+WHERE t2.b = 1
+GROUP BY t1.a
+ORDER by c
+LIMIT 2;
+
+DROP TABLE t1, t2;
+
+
 --echo End of 5.1 tests
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index f550f75c8b8..605819a1646 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1486,6 +1486,15 @@ JOIN::optimize()
     }
     if (order)
     {
+      /*
+        Do we need a temporary table due to the ORDER BY not being equal to
+        the GROUP BY? The call to test_if_skip_sort_order above tests for the
+        GROUP BY clause only and hence is not valid in this case. So the
+        estimated number of rows to be read from the first table is not valid.
+        We clear it here so that it doesn't show up in EXPLAIN.
+       */
+      if (need_tmp && (select_options & SELECT_DESCRIBE) != 0)
+        join_tab[const_tables].limit= 0;
       /*
         Force using of tmp table if sorting by a SP or UDF function due to
         their expensive and probably non-deterministic nature.