diff --git a/mysql-test/r/subselect3.result b/mysql-test/r/subselect3.result index f055b40116a..d5fb1a7bbde 100644 --- a/mysql-test/r/subselect3.result +++ b/mysql-test/r/subselect3.result @@ -895,3 +895,72 @@ t1.a < (select t4.a+10 from t4, t5 limit 2)); ERROR 21000: Subquery returns more than 1 row drop table t0, t1, t2, t3, t4, t5; +# +# BUG#48177 - SELECTs with NOT IN subqueries containing NULL +# values return too many records +# +CREATE TABLE t1 ( +i1 int DEFAULT NULL, +i2 int DEFAULT NULL +) ; +INSERT INTO t1 VALUES (1, NULL); +INSERT INTO t1 VALUES (2, 3); +INSERT INTO t1 VALUES (4, NULL); +INSERT INTO t1 VALUES (4, 0); +INSERT INTO t1 VALUES (NULL, NULL); +CREATE TABLE t2 ( +i1 int DEFAULT NULL, +i2 int DEFAULT NULL +) ; +INSERT INTO t2 VALUES (4, NULL); +INSERT INTO t2 VALUES (5, 0); + +Data in t1 +SELECT i1, i2 FROM t1; +i1 i2 +1 NULL +2 3 +4 NULL +4 0 +NULL NULL + +Data in subquery (should be filtered out) +SELECT i1, i2 FROM t2 ORDER BY i1; +i1 i2 +4 NULL +5 0 +FLUSH STATUS; + +SELECT i1, i2 +FROM t1 +WHERE (i1, i2) +NOT IN (SELECT i1, i2 FROM t2); +i1 i2 +1 NULL +2 3 + +# Check that the subquery only has to be evaluated once +# for all-NULL values even though there are two (NULL,NULL) records +# Baseline: +SHOW STATUS LIKE '%Handler_read_rnd_next'; +Variable_name Value +Handler_read_rnd_next 17 + +INSERT INTO t1 VALUES (NULL, NULL); +FLUSH STATUS; + +SELECT i1, i2 +FROM t1 +WHERE (i1, i2) +NOT IN (SELECT i1, i2 FROM t2); +i1 i2 +1 NULL +2 3 + +# Handler_read_rnd_next should be one more than baseline +# (read record from t1, but do not read from t2) +SHOW STATUS LIKE '%Handler_read_rnd_next'; +Variable_name Value +Handler_read_rnd_next 18 +DROP TABLE t1,t2; +End of 5.1 tests diff --git a/mysql-test/t/subselect3.test b/mysql-test/t/subselect3.test index 7a2a9f328ef..fab0a462157 100644 --- a/mysql-test/t/subselect3.test +++ b/mysql-test/t/subselect3.test @@ -728,3 +728,69 @@ where from t4, t5 limit 2)); drop table t0, t1, t2, t3, t4, t5; + +--echo # +--echo # BUG#48177 - SELECTs with NOT IN subqueries containing NULL +--echo # values return too many records +--echo # + +CREATE TABLE t1 ( + i1 int DEFAULT NULL, + i2 int DEFAULT NULL +) ; + +INSERT INTO t1 VALUES (1, NULL); +INSERT INTO t1 VALUES (2, 3); +INSERT INTO t1 VALUES (4, NULL); +INSERT INTO t1 VALUES (4, 0); +INSERT INTO t1 VALUES (NULL, NULL); + +CREATE TABLE t2 ( + i1 int DEFAULT NULL, + i2 int DEFAULT NULL +) ; + +INSERT INTO t2 VALUES (4, NULL); +INSERT INTO t2 VALUES (5, 0); + +--echo +--echo Data in t1 +SELECT i1, i2 FROM t1; + +--echo +--echo Data in subquery (should be filtered out) +SELECT i1, i2 FROM t2 ORDER BY i1; + +FLUSH STATUS; + +--echo +SELECT i1, i2 +FROM t1 +WHERE (i1, i2) + NOT IN (SELECT i1, i2 FROM t2); + +--echo +--echo # Check that the subquery only has to be evaluated once +--echo # for all-NULL values even though there are two (NULL,NULL) records +--echo # Baseline: +SHOW STATUS LIKE '%Handler_read_rnd_next'; + +--echo +INSERT INTO t1 VALUES (NULL, NULL); +FLUSH STATUS; + +--echo +SELECT i1, i2 +FROM t1 +WHERE (i1, i2) + NOT IN (SELECT i1, i2 FROM t2); + +--echo +--echo # Handler_read_rnd_next should be one more than baseline +--echo # (read record from t1, but do not read from t2) +SHOW STATUS LIKE '%Handler_read_rnd_next'; + + +DROP TABLE t1,t2; + +--echo End of 5.1 tests diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index df92c165f2d..c6b88cd8188 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -1585,61 +1585,73 @@ longlong Item_in_optimizer::val_int() if (cache->null_value) { + /* + We're evaluating + " [NOT] IN (SELECT ...)" + where one or more of the outer values is NULL. + */ if (((Item_in_subselect*)args[1])->is_top_level_item()) { /* - We're evaluating "NULL IN (SELECT ...)". The result can be NULL or - FALSE, and we can return one instead of another. Just return NULL. + We're evaluating a top level item, e.g. + " IN (SELECT ...)", + and in this case a NULL value in the outer_value_list means + that the result shall be NULL/FALSE (makes no difference for + top level items). The cached value is NULL, so just return + NULL. */ null_value= 1; } else { - if (!((Item_in_subselect*)args[1])->is_correlated && - result_for_null_param != UNKNOWN) + /* + We're evaluating an item where a NULL value in either the + outer or inner value list does not automatically mean that we + can return NULL/FALSE. An example of such a query is + " NOT IN (SELECT ...)" + The result when there is at least one NULL value is: NULL if the + SELECT evaluated over the non-NULL values produces at least + one row, FALSE otherwise + */ + Item_in_subselect *item_subs=(Item_in_subselect*)args[1]; + bool all_left_cols_null= true; + const uint ncols= cache->cols(); + + /* + Turn off the predicates that are based on column compares for + which the left part is currently NULL + */ + for (uint i= 0; i < ncols; i++) { - /* Use cached value from previous execution */ - null_value= result_for_null_param; + if (cache->element_index(i)->null_value) + item_subs->set_cond_guard_var(i, FALSE); + else + all_left_cols_null= false; } - else + + if (!((Item_in_subselect*)args[1])->is_correlated && + all_left_cols_null && result_for_null_param != UNKNOWN) { - /* - We're evaluating "NULL IN (SELECT ...)". The result is: - FALSE if SELECT produces an empty set, or - NULL otherwise. - We disable the predicates we've pushed down into subselect, run the - subselect and see if it has produced any rows. + /* + This is a non-correlated subquery, all values in the outer + value list are NULL, and we have already evaluated the + subquery for all NULL values: Return the same result we + did last time without evaluating the subquery. */ - Item_in_subselect *item_subs=(Item_in_subselect*)args[1]; - if (cache->cols() == 1) - { - item_subs->set_cond_guard_var(0, FALSE); - (void) args[1]->val_bool_result(); - result_for_null_param= null_value= !item_subs->engine->no_rows(); - item_subs->set_cond_guard_var(0, TRUE); - } - else - { - uint i; - uint ncols= cache->cols(); - /* - Turn off the predicates that are based on column compares for - which the left part is currently NULL - */ - for (i= 0; i < ncols; i++) - { - if (cache->element_index(i)->null_value) - item_subs->set_cond_guard_var(i, FALSE); - } - - (void) args[1]->val_bool_result(); - result_for_null_param= null_value= !item_subs->engine->no_rows(); - - /* Turn all predicates back on */ - for (i= 0; i < ncols; i++) - item_subs->set_cond_guard_var(i, TRUE); - } + null_value= result_for_null_param; + } + else + { + /* The subquery has to be evaluated */ + (void) args[1]->val_bool_result(); + null_value= !item_subs->engine->no_rows(); + if (all_left_cols_null) + result_for_null_param= null_value; } + + /* Turn all predicates back on */ + for (uint i= 0; i < ncols; i++) + item_subs->set_cond_guard_var(i, TRUE); } return 0; }