From 8ee7e48de8a12e506496278d79daa053f3a354cd Mon Sep 17 00:00:00 2001 From: "thek@adventure.(none)" <> Date: Thu, 12 Jul 2007 13:29:51 +0200 Subject: [PATCH] Bug#28249 Query Cache returns wrong result with concurrent insert / certain lock A race condition in the integration between MyISAM and the query cache code caused the query cache to fail to invalidate itself on concurrently inserted data. This patch fix this problem by using the existing handler interface which, upon each statement cache attempt, compare the size of the table as viewed from the cache writing thread and with any snap shot of the global table state. If the two sizes are different the global table size is unknown and the current statement can't be cached. --- mysql-test/r/query_cache.result | 39 +++++++++++++++++ mysql-test/t/query_cache.test | 73 ++++++++++++++++++++++++++++++++ sql/ha_myisam.cc | 74 +++++++++++++++++++++++++++++++++ sql/ha_myisam.h | 7 ++++ sql/handler.h | 45 +++++++++++++++++--- 5 files changed, 232 insertions(+), 6 deletions(-) diff --git a/mysql-test/r/query_cache.result b/mysql-test/r/query_cache.result index 79471ee5c02..58b8aad6fc9 100644 --- a/mysql-test/r/query_cache.result +++ b/mysql-test/r/query_cache.result @@ -1409,3 +1409,42 @@ set GLOBAL query_cache_type=default; set GLOBAL query_cache_limit=default; set GLOBAL query_cache_min_res_unit=default; set GLOBAL query_cache_size= default; +Bug#28249 Query Cache returns wrong result with concurrent insert/ certain lock +set GLOBAL query_cache_type=1; +set GLOBAL query_cache_limit=10000; +set GLOBAL query_cache_min_res_unit=0; +set GLOBAL query_cache_size= 100000; +flush tables; +drop table if exists t1, t2; +create table t1 (a int); +create table t2 (a int); +insert into t1 values (1),(2),(3); +Locking table T2 with a write lock. +lock table t2 write; +Select blocked by write lock. +select *, (select count(*) from t2) from t1;; +Sleeing is ok, because selecting should be done very fast. +Inserting into table T1. +insert into t1 values (4); +Unlocking the tables. +unlock tables; +Collecting result from previously blocked select. +Next select should contain 4 rows, as the insert is long finished. +select *, (select count(*) from t2) from t1; +a (select count(*) from t2) +1 0 +2 0 +3 0 +4 0 +reset query cache; +select *, (select count(*) from t2) from t1; +a (select count(*) from t2) +1 0 +2 0 +3 0 +4 0 +drop table t1,t2; +set GLOBAL query_cache_type=default; +set GLOBAL query_cache_limit=default; +set GLOBAL query_cache_min_res_unit=default; +set GLOBAL query_cache_size=default; diff --git a/mysql-test/t/query_cache.test b/mysql-test/t/query_cache.test index 1ef104f820b..17073e039c2 100644 --- a/mysql-test/t/query_cache.test +++ b/mysql-test/t/query_cache.test @@ -970,4 +970,77 @@ set GLOBAL query_cache_limit=default; set GLOBAL query_cache_min_res_unit=default; set GLOBAL query_cache_size= default; +# +# Bug #28249 Query Cache returns wrong result with concurrent insert / certain lock +# +--echo Bug#28249 Query Cache returns wrong result with concurrent insert/ certain lock +connect (user1,localhost,root,,test,,); +connect (user2,localhost,root,,test,,); +connect (user3,localhost,root,,test,,); + +connection user1; + +set GLOBAL query_cache_type=1; +set GLOBAL query_cache_limit=10000; +set GLOBAL query_cache_min_res_unit=0; +set GLOBAL query_cache_size= 100000; + +flush tables; +--disable_warnings +drop table if exists t1, t2; +--enable_warnings +create table t1 (a int); +create table t2 (a int); +insert into t1 values (1),(2),(3); +connection user2; +--echo Locking table T2 with a write lock. +lock table t2 write; + +connection user1; +--echo Select blocked by write lock. +--send select *, (select count(*) from t2) from t1; +--echo Sleeing is ok, because selecting should be done very fast. +sleep 5; + +connection user3; +--echo Inserting into table T1. +insert into t1 values (4); + +connection user2; +--echo Unlocking the tables. +unlock tables; + +connection user1; +--echo Collecting result from previously blocked select. +# +# Since the lock ordering rule in thr_multi_lock depends on +# pointer values, from execution to execution we might have +# different lock order, and therefore, sometimes lock t1 and block +# on t2, and sometimes block on t2 right away. In the second case, +# the following insert succeeds, and only then this select can +# proceed, and we actually test nothing, as the very first select +# returns 4 rows right away. +# It's fine to have a test case that covers the problematic area +# at least once in a while. +# We, however, need to disable the result log here to make the +# test repeatable. +--disable_result_log +--reap +--enable_result_log +--echo Next select should contain 4 rows, as the insert is long finished. +select *, (select count(*) from t2) from t1; +reset query cache; +select *, (select count(*) from t2) from t1; + +drop table t1,t2; + +connection default; +disconnect user1; +disconnect user2; +disconnect user3; +set GLOBAL query_cache_type=default; +set GLOBAL query_cache_limit=default; +set GLOBAL query_cache_min_res_unit=default; +set GLOBAL query_cache_size=default; # End of 5.0 tests + diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc index 6d8a770175d..ebaa1d6bd3b 100644 --- a/sql/ha_myisam.cc +++ b/sql/ha_myisam.cc @@ -1922,3 +1922,77 @@ uint ha_myisam::checksum() const return (uint)file->state->checksum; } +#ifdef HAVE_QUERY_CACHE +/** + @brief Register a named table with a call back function to the query cache. + + @param thd The thread handle + @param table_key A pointer to the table name in the table cache + @param key_length The length of the table name + @param[out] engine_callback The pointer to the storage engine call back + function, currently 0 + @param[out] engine_data Engine data will be set to 0. + + @note Despite the name of this function, it is used to check each statement + before it is cached and not to register a table or callback function. + + @see handler::register_query_cache_table + + @return The error code. The engine_data and engine_callback will be set to 0. + @retval TRUE Success + @retval FALSE An error occured +*/ + +my_bool ha_myisam::register_query_cache_table(THD *thd, char *table_name, + uint table_name_len, + qc_engine_callback + *engine_callback, + ulonglong *engine_data) +{ + /* + No call back function is needed to determine if a cached statement + is valid or not. + */ + *engine_callback= 0; + + /* + No engine data is needed. + */ + *engine_data= 0; + + /* + If a concurrent INSERT has happened just before the currently processed + SELECT statement, the total size of the table is unknown. + + To determine if the table size is known, the current thread's snap shot of + the table size with the actual table size are compared. + + If the table size is unknown the SELECT statement can't be cached. + */ + ulonglong actual_data_file_length; + ulonglong current_data_file_length; + + /* + POSIX visibility rules specify that "2. Whatever memory values a + thread can see when it unlocks a mutex <...> can also be seen by any + thread that later locks the same mutex". In this particular case, + concurrent insert thread had modified the data_file_length in + MYISAM_SHARE before it has unlocked (or even locked) + structure_guard_mutex. So, here we're guaranteed to see at least that + value after we've locked the same mutex. We can see a later value + (modified by some other thread) though, but it's ok, as we only want + to know if the variable was changed, the actual new value doesn't matter + */ + actual_data_file_length= file->s->state.state.data_file_length; + current_data_file_length= file->save_state.data_file_length; + + if (current_data_file_length != actual_data_file_length) + { + /* Don't cache current statement. */ + return FALSE; + } + + /* It is ok to try to cache current statement. */ + return TRUE; +} +#endif diff --git a/sql/ha_myisam.h b/sql/ha_myisam.h index b186d9c7bb8..536ea211820 100644 --- a/sql/ha_myisam.h +++ b/sql/ha_myisam.h @@ -127,4 +127,11 @@ class ha_myisam: public handler int dump(THD* thd, int fd); int net_read_dump(NET* net); #endif +#ifdef HAVE_QUERY_CACHE + my_bool register_query_cache_table(THD *thd, char *table_key, + uint key_length, + qc_engine_callback + *engine_callback, + ulonglong *engine_data); +#endif }; diff --git a/sql/handler.h b/sql/handler.h index 9863d541b5f..d25796d8546 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -841,16 +841,49 @@ public: /* Type of table for caching query */ virtual uint8 table_cache_type() { return HA_CACHE_TBL_NONTRANSACT; } - /* ask handler about permission to cache table when query is to be cached */ + + + /** + @brief Register a named table with a call back function to the query cache. + + @param thd The thread handle + @param table_key A pointer to the table name in the table cache + @param key_length The length of the table name + @param[out] engine_callback The pointer to the storage engine call back + function + @param[out] engine_data Storage engine specific data which could be + anything + + This method offers the storage engine, the possibility to store a reference + to a table name which is going to be used with query cache. + The method is called each time a statement is written to the cache and can + be used to verify if a specific statement is cachable. It also offers + the possibility to register a generic (but static) call back function which + is called each time a statement is matched against the query cache. + + @note If engine_data supplied with this function is different from + engine_data supplied with the callback function, and the callback returns + FALSE, a table invalidation on the current table will occur. + + @return Upon success the engine_callback will point to the storage engine + call back function, if any, and engine_data will point to any storage + engine data used in the specific implementation. + @retval TRUE Success + @retval FALSE The specified table or current statement should not be + cached + */ + virtual my_bool register_query_cache_table(THD *thd, char *table_key, - uint key_length, - qc_engine_callback - *engine_callback, - ulonglong *engine_data) + uint key_length, + qc_engine_callback + *engine_callback, + ulonglong *engine_data) { *engine_callback= 0; - return 1; + return TRUE; } + + /* RETURN true Primary key (if there is one) is clustered key covering all fields