diff --git a/mysql-test/r/subselect4.result b/mysql-test/r/subselect4.result index fad65d3f000..63265970c4b 100644 --- a/mysql-test/r/subselect4.result +++ b/mysql-test/r/subselect4.result @@ -77,6 +77,92 @@ Note 1249 Select 2 was reduced during optimization CREATE VIEW v1 AS SELECT 1 LIKE ( 1 IN ( SELECT 1 ) ); CREATE VIEW v2 AS SELECT 1 LIKE '%' ESCAPE ( 1 IN ( SELECT 1 ) ); DROP VIEW v1, v2; +# +# Bug#51070: Query with a NOT IN subquery predicate returns a wrong +# result set +# +CREATE TABLE t1 ( a INT, b INT ); +INSERT INTO t1 VALUES ( 1, NULL ), ( 2, NULL ); +CREATE TABLE t2 ( c INT, d INT ); +INSERT INTO t2 VALUES ( NULL, 3 ), ( NULL, 4 ); +CREATE TABLE t3 ( e INT, f INT ); +INSERT INTO t3 VALUES ( NULL, NULL ), ( NULL, NULL ); +CREATE TABLE t4 ( a INT ); +INSERT INTO t4 VALUES (1), (2), (3); +CREATE TABLE t5 ( a INT ); +INSERT INTO t5 VALUES (NULL), (2); +EXPLAIN +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ); +id select_type table type possible_keys key key_len ref rows Extra +x PRIMARY x x x x x x x x +x DEPENDENT SUBQUERY x x x x x x x x +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ); +a b +EXPLAIN +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where +2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 2 Using where +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL; +a b +1 NULL +2 NULL +SELECT * FROM t1 WHERE ( a, b ) IN ( SELECT c, d FROM t2 ) IS NULL; +a b +1 NULL +2 NULL +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS UNKNOWN; +a b +1 NULL +2 NULL +SELECT * FROM t1 WHERE (( a, b ) NOT IN ( SELECT c, d FROM t2 )) IS UNKNOWN; +a b +1 NULL +2 NULL +SELECT * FROM t1 WHERE 1 = 1 AND ( a, b ) NOT IN ( SELECT c, d FROM t2 ); +a b +EXPLAIN +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 ); +id select_type table type possible_keys key key_len ref rows Extra +x PRIMARY x x x x x x x x +x DEPENDENT SUBQUERY x x x x x x x x +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 ); +a b +EXPLAIN +SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 ); +id select_type table type possible_keys key key_len ref rows Extra +x PRIMARY x x x x x x x x +x DEPENDENT SUBQUERY x x x x x x x x +SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 ); +c d +EXPLAIN +SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 ); +id select_type table type possible_keys key key_len ref rows Extra +x PRIMARY x x x x x x x x +x DEPENDENT SUBQUERY x x x x x x x x +SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 ); +e f +EXPLAIN +SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 ); +id select_type table type possible_keys key key_len ref rows Extra +x PRIMARY x x x x x x x x +x DEPENDENT SUBQUERY x x x x x x x x +SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 ); +c d +SELECT * FROM t1 WHERE ( a, b ) NOT IN +( SELECT c, d FROM t2 WHERE c = 1 AND c <> 1 ); +a b +1 NULL +2 NULL +SELECT * FROM t1 WHERE b NOT IN ( SELECT c FROM t2 WHERE c = 1 ); +a b +1 NULL +2 NULL +SELECT * FROM t1 WHERE NULL NOT IN ( SELECT c FROM t2 WHERE c = 1 AND c <> 1 ); +a b +1 NULL +2 NULL +DROP TABLE t1, t2, t3, t4, t5; # # End of 5.1 tests. # diff --git a/mysql-test/t/subselect4.test b/mysql-test/t/subselect4.test index 2c6efdbaac2..eb8baf9bac8 100644 --- a/mysql-test/t/subselect4.test +++ b/mysql-test/t/subselect4.test @@ -74,6 +74,68 @@ CREATE VIEW v1 AS SELECT 1 LIKE ( 1 IN ( SELECT 1 ) ); CREATE VIEW v2 AS SELECT 1 LIKE '%' ESCAPE ( 1 IN ( SELECT 1 ) ); DROP VIEW v1, v2; +--echo # +--echo # Bug#51070: Query with a NOT IN subquery predicate returns a wrong +--echo # result set +--echo # +CREATE TABLE t1 ( a INT, b INT ); +INSERT INTO t1 VALUES ( 1, NULL ), ( 2, NULL ); + +CREATE TABLE t2 ( c INT, d INT ); +INSERT INTO t2 VALUES ( NULL, 3 ), ( NULL, 4 ); + +CREATE TABLE t3 ( e INT, f INT ); +INSERT INTO t3 VALUES ( NULL, NULL ), ( NULL, NULL ); + +CREATE TABLE t4 ( a INT ); +INSERT INTO t4 VALUES (1), (2), (3); + +CREATE TABLE t5 ( a INT ); +INSERT INTO t5 VALUES (NULL), (2); + +--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x +EXPLAIN +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ); +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ); + +EXPLAIN +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL; +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL; +SELECT * FROM t1 WHERE ( a, b ) IN ( SELECT c, d FROM t2 ) IS NULL; +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS UNKNOWN; +SELECT * FROM t1 WHERE (( a, b ) NOT IN ( SELECT c, d FROM t2 )) IS UNKNOWN; + +SELECT * FROM t1 WHERE 1 = 1 AND ( a, b ) NOT IN ( SELECT c, d FROM t2 ); + +--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x +EXPLAIN +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 ); +SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 ); + +--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x +EXPLAIN +SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 ); +SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 ); + +--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x +EXPLAIN +SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 ); +SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 ); + +--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x +EXPLAIN +SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 ); +SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 ); + +SELECT * FROM t1 WHERE ( a, b ) NOT IN + ( SELECT c, d FROM t2 WHERE c = 1 AND c <> 1 ); + +SELECT * FROM t1 WHERE b NOT IN ( SELECT c FROM t2 WHERE c = 1 ); + +SELECT * FROM t1 WHERE NULL NOT IN ( SELECT c FROM t2 WHERE c = 1 AND c <> 1 ); + +DROP TABLE t1, t2, t3, t4, t5; + --echo # --echo # End of 5.1 tests. diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index fe4616f64d7..605d12742f8 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -1751,6 +1751,76 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref) } +/** + The implementation of optimized \ [NOT] IN \ + predicates. The implementation works as follows. + + For the current value of the outer expression + + - If it contains only NULL values, the original (before rewrite by the + Item_in_subselect rewrite methods) inner subquery is non-correlated and + was previously executed, there is no need to re-execute it, and the + previous return value is returned. + + - If it contains NULL values, check if there is a partial match for the + inner query block by evaluating it. For clarity we repeat here the + transformation previously performed on the sub-query. The expression + + + ( oc_1, ..., oc_n ) + \ + ( SELECT ic_1, ..., ic_n + FROM \ + WHERE \ + ) + + + was transformed into + + + ( oc_1, ..., oc_n ) + \ + ( SELECT ic_1, ..., ic_n + FROM \ + WHERE \ AND ... ( ic_k = oc_k OR ic_k IS NULL ) + HAVING ... NOT ic_k IS NULL + ) + + + The evaluation will now proceed according to special rules set up + elsewhere. These rules include: + + - The HAVING NOT \ IS NULL conditions added by the + aforementioned rewrite methods will detect whether they evaluated (and + rejected) a NULL value and if so, will cause the subquery to evaluate + to NULL. + + - The added WHERE and HAVING conditions are present only for those inner + columns that correspond to outer column that are not NULL at the moment. + + - If there is an eligible index for executing the subquery, the special + access method "Full scan on NULL key" is employed which ensures that + the inner query will detect if there are NULL values resulting from the + inner query. This access method will quietly resort to table scan if it + needs to find NULL values as well. + + - Under these conditions, the sub-query need only be evaluated in order to + find out whether it produced any rows. + + - If it did, we know that there was a partial match since there are + NULL values in the outer row expression. + + - If it did not, the result is FALSE or UNKNOWN. If at least one of the + HAVING sub-predicates rejected a NULL value corresponding to an outer + non-NULL, and hence the inner query block returns UNKNOWN upon + evaluation, there was a partial match and the result is UNKNOWN. + + - If it contains no NULL values, the call is forwarded to the inner query + block. + + @see Item_in_subselect::val_bool() + @see Item_is_not_null_test::val_int() + */ longlong Item_in_optimizer::val_int() { bool tmp; @@ -1804,7 +1874,7 @@ longlong Item_in_optimizer::val_int() all_left_cols_null= false; } - if (!((Item_in_subselect*)args[1])->is_correlated && + if (!item_subs->is_correlated && all_left_cols_null && result_for_null_param != UNKNOWN) { /* @@ -1818,8 +1888,11 @@ longlong Item_in_optimizer::val_int() else { /* The subquery has to be evaluated */ - (void) args[1]->val_bool_result(); - null_value= !item_subs->engine->no_rows(); + (void) item_subs->val_bool_result(); + if (item_subs->engine->no_rows()) + null_value= item_subs->null_value; + else + null_value= TRUE; if (all_left_cols_null) result_for_null_param= null_value; } diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index b93ea6f241b..d0c933df845 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -47,7 +47,7 @@ Item_subselect::Item_subselect(): item value is NULL if select_subselect not changed this value (i.e. some rows will be found returned) */ - null_value= 1; + null_value= TRUE; } @@ -427,9 +427,9 @@ void Item_maxmin_subselect::print(String *str, enum_query_type query_type) void Item_singlerow_subselect::reset() { - null_value= 1; + null_value= TRUE; if (value) - value->null_value= 1; + value->null_value= TRUE; } @@ -574,7 +574,7 @@ double Item_singlerow_subselect::val_real() DBUG_ASSERT(fixed == 1); if (!exec() && !value->null_value) { - null_value= 0; + null_value= FALSE; return value->val_real(); } else @@ -589,7 +589,7 @@ longlong Item_singlerow_subselect::val_int() DBUG_ASSERT(fixed == 1); if (!exec() && !value->null_value) { - null_value= 0; + null_value= FALSE; return value->val_int(); } else @@ -603,7 +603,7 @@ String *Item_singlerow_subselect::val_str(String *str) { if (!exec() && !value->null_value) { - null_value= 0; + null_value= FALSE; return value->val_str(str); } else @@ -618,7 +618,7 @@ my_decimal *Item_singlerow_subselect::val_decimal(my_decimal *decimal_value) { if (!exec() && !value->null_value) { - null_value= 0; + null_value= FALSE; return value->val_decimal(decimal_value); } else @@ -633,7 +633,7 @@ bool Item_singlerow_subselect::val_bool() { if (!exec() && !value->null_value) { - null_value= 0; + null_value= FALSE; return value->val_bool(); } else @@ -651,7 +651,7 @@ Item_exists_subselect::Item_exists_subselect(st_select_lex *select_lex): bool val_bool(); init(select_lex, new select_exists_subselect(this)); max_columns= UINT_MAX; - null_value= 0; //can't be NULL + null_value= FALSE; //can't be NULL maybe_null= 0; //can't be NULL value= 0; DBUG_VOID_RETURN; @@ -814,15 +814,14 @@ double Item_in_subselect::val_real() */ DBUG_ASSERT(0); DBUG_ASSERT(fixed == 1); - null_value= 0; + null_value= was_null= FALSE; if (exec()) { reset(); - null_value= 1; return 0; } if (was_null && !value) - null_value= 1; + null_value= TRUE; return (double) value; } @@ -835,15 +834,14 @@ longlong Item_in_subselect::val_int() */ DBUG_ASSERT(0); DBUG_ASSERT(fixed == 1); - null_value= 0; + null_value= was_null= FALSE; if (exec()) { reset(); - null_value= 1; return 0; } if (was_null && !value) - null_value= 1; + null_value= TRUE; return value; } @@ -856,16 +854,15 @@ String *Item_in_subselect::val_str(String *str) */ DBUG_ASSERT(0); DBUG_ASSERT(fixed == 1); - null_value= 0; + null_value= was_null= FALSE; if (exec()) { reset(); - null_value= 1; return 0; } if (was_null && !value) { - null_value= 1; + null_value= TRUE; return 0; } str->set((ulonglong)value, &my_charset_bin); @@ -876,20 +873,14 @@ String *Item_in_subselect::val_str(String *str) bool Item_in_subselect::val_bool() { DBUG_ASSERT(fixed == 1); - null_value= 0; + null_value= was_null= FALSE; if (exec()) { reset(); - /* - Must mark the IN predicate as NULL so as to make sure an enclosing NOT - predicate will return FALSE. See the comments in - subselect_uniquesubquery_engine::copy_ref_key for further details. - */ - null_value= 1; return 0; } if (was_null && !value) - null_value= 1; + null_value= TRUE; return value; } @@ -900,16 +891,15 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value) method should not be used */ DBUG_ASSERT(0); - null_value= 0; + null_value= was_null= FALSE; DBUG_ASSERT(fixed == 1); if (exec()) { reset(); - null_value= 1; return 0; } if (was_null && !value) - null_value= 1; + null_value= TRUE; int2my_decimal(E_DEC_FATAL_ERROR, value, 0, decimal_value); return decimal_value; }