From b6fe4713fe14c059c4bceca60ed1863a90b7e512 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 23 Aug 2010 12:46:25 +0300 Subject: [PATCH 1/3] Fix for LP#612894 Some aggregate functions (such as MIN MAX) work incorrectly in subqueries after getting NULL value mysql-test/r/group_by.result: Added test that showed problems that no_rows_in_results() didn't work for expressions mysql-test/r/subselect4.result: Test case for LP#612894 mysql-test/t/group_by.test: Added test that showed problems that no_rows_in_results() didn't work for expressions mysql-test/t/subselect4.test: Test case for LP#612894 sql/item.h: Added restore_to_before_no_rows_in_result() Added function processor for no_rows_in_results() and restore_to_before_no_rows_in_results() to ensure it works with functions Fix that above functions are handled by Item_ref() sql/item_func.h: Ensure that no_rows_in_results() and restore_to_before_no_rows_in_result() are called for all function arguments sql/item_sum.cc: Added restore_to_before_no_rows_in_result() to restore settings after Item_sum_hybrid::no_rows_in_result() was called. This is needed to handle the case where we have made 'make_const()' on the item in opt_sum(), but the item will be reused again in a sub query. Ignore multiple calls to no_rows_in_result() as Item_ref is calling it twice. sql/item_sum.h: Added restore_to_before_no_rows_in_result(); sql/sql_select.cc: Added reset of no_rows_in_result() for JOIN::reinit() sql/sql_select.h: Added marker if no_rows_in_result() is called. --- mysql-test/r/group_by.result | 17 +++++++++++++++++ mysql-test/r/subselect4.result | 23 +++++++++++++++++++++++ mysql-test/t/group_by.test | 17 +++++++++++++++++ mysql-test/t/subselect4.test | 26 ++++++++++++++++++++++++++ sql/item.h | 24 ++++++++++++++++++++++++ sql/item_func.h | 15 +++++++++++++++ sql/item_sum.cc | 18 ++++++++++++++++-- sql/item_sum.h | 4 +++- sql/sql_select.cc | 13 +++++++++++++ sql/sql_select.h | 29 ++++++++++++++++------------- 10 files changed, 170 insertions(+), 16 deletions(-) diff --git a/mysql-test/r/group_by.result b/mysql-test/r/group_by.result index 645dd460735..deda00364aa 100644 --- a/mysql-test/r/group_by.result +++ b/mysql-test/r/group_by.result @@ -1809,5 +1809,22 @@ SELECT MAX(t2.a) FROM t2 LEFT JOIN t1 ON t2.a = t1.a; MAX(t2.a) 2 DROP TABLE t1, t2; +CREATE TABLE t1 (a int(11) NOT NULL); +INSERT INTO t1 VALUES (1),(2); +CREATE TABLE t2 ( +key_col int(11) NOT NULL, +KEY (key_col) +); +INSERT INTO t2 VALUES (1),(2); +select min(t2.key_col) from t1,t2 where t1.a=1; +min(t2.key_col) +1 +select min(t2.key_col) from t1,t2 where t1.a > 1000; +min(t2.key_col) +NULL +select min(t2.key_col)+1 from t1,t2 where t1.a> 1000; +min(t2.key_col)+1 +NULL +drop table t1,t2; # # End of 5.1 tests diff --git a/mysql-test/r/subselect4.result b/mysql-test/r/subselect4.result index 482e0045840..153fbed9622 100644 --- a/mysql-test/r/subselect4.result +++ b/mysql-test/r/subselect4.result @@ -59,3 +59,26 @@ FROM t3 WHERE 1 = 0 GROUP BY 1; (SELECT 1 FROM t1,t2 WHERE t2.b > t3.b) DROP TABLE t1,t2,t3; End of 5.0 tests. +CREATE TABLE t1 (col_int_nokey int(11) NOT NULL, col_varchar_nokey varchar(1) NOT NULL) engine=myisam; +INSERT INTO t1 VALUES (2,'s'),(0,'v'),(2,'s'); +CREATE TABLE t2 ( +pk int(11) NOT NULL AUTO_INCREMENT, +`col_int_key` int(11) NOT NULL, +col_varchar_key varchar(1) NOT NULL, +PRIMARY KEY (pk), +KEY `col_int_key` (`col_int_key`), +KEY `col_varchar_key` (`col_varchar_key`) +) ENGINE=MyISAM; +INSERT INTO t2 VALUES (4,10,'g'), (5,20,'v'); +SELECT t1.col_int_nokey,(SELECT MIN( t2_a.col_int_key ) FROM t2 t2_a, t2 t2_b, t1 t1_a WHERE t1_a.col_varchar_nokey = t2_b.col_varchar_key and t1.col_int_nokey ) as sub FROM t1; +col_int_nokey sub +2 10 +0 NULL +2 10 +SELECT t1.col_int_nokey,(SELECT MIN( t2_a.col_int_key ) +1 FROM t2 t2_a, t2 t2_b, t1 t1_a WHERE t1_a.col_varchar_nokey = t2_b.col_varchar_key and t1.col_int_nokey ) as sub FROM t1; +col_int_nokey sub +2 11 +0 NULL +2 11 +DROP TABLE t1,t2; +End of 5.1 tests. diff --git a/mysql-test/t/group_by.test b/mysql-test/t/group_by.test index c5b27ee1a62..c640a58d597 100644 --- a/mysql-test/t/group_by.test +++ b/mysql-test/t/group_by.test @@ -1219,6 +1219,23 @@ EXPLAIN SELECT MAX(t2.a) FROM t2 LEFT JOIN t1 ON t2.a = t1.a; SELECT MAX(t2.a) FROM t2 LEFT JOIN t1 ON t2.a = t1.a; DROP TABLE t1, t2; +# +# min() returns wrong value when used in expression when there is no matching +# rows +# + +CREATE TABLE t1 (a int(11) NOT NULL); +INSERT INTO t1 VALUES (1),(2); +CREATE TABLE t2 ( + key_col int(11) NOT NULL, + KEY (key_col) +); +INSERT INTO t2 VALUES (1),(2); + +select min(t2.key_col) from t1,t2 where t1.a=1; +select min(t2.key_col) from t1,t2 where t1.a > 1000; +select min(t2.key_col)+1 from t1,t2 where t1.a> 1000; +drop table t1,t2; --echo # diff --git a/mysql-test/t/subselect4.test b/mysql-test/t/subselect4.test index 440eca22828..c287091ab54 100644 --- a/mysql-test/t/subselect4.test +++ b/mysql-test/t/subselect4.test @@ -62,3 +62,29 @@ FROM t3 WHERE 1 = 0 GROUP BY 1; DROP TABLE t1,t2,t3; --echo End of 5.0 tests. + +# +# Fix for LP#612894 +# Some aggregate functions (such as MIN MAX) work incorrectly in subqueries +# after getting NULL value +# + +CREATE TABLE t1 (col_int_nokey int(11) NOT NULL, col_varchar_nokey varchar(1) NOT NULL) engine=myisam; +INSERT INTO t1 VALUES (2,'s'),(0,'v'),(2,'s'); +CREATE TABLE t2 ( + pk int(11) NOT NULL AUTO_INCREMENT, + `col_int_key` int(11) NOT NULL, + col_varchar_key varchar(1) NOT NULL, + PRIMARY KEY (pk), + KEY `col_int_key` (`col_int_key`), + KEY `col_varchar_key` (`col_varchar_key`) +) ENGINE=MyISAM; +INSERT INTO t2 VALUES (4,10,'g'), (5,20,'v'); + +SELECT t1.col_int_nokey,(SELECT MIN( t2_a.col_int_key ) FROM t2 t2_a, t2 t2_b, t1 t1_a WHERE t1_a.col_varchar_nokey = t2_b.col_varchar_key and t1.col_int_nokey ) as sub FROM t1; + +SELECT t1.col_int_nokey,(SELECT MIN( t2_a.col_int_key ) +1 FROM t2 t2_a, t2 t2_b, t1 t1_a WHERE t1_a.col_varchar_nokey = t2_b.col_varchar_key and t1.col_int_nokey ) as sub FROM t1; + +DROP TABLE t1,t2; + +--echo End of 5.1 tests. diff --git a/sql/item.h b/sql/item.h index 45cfb4881dd..af0d7648689 100644 --- a/sql/item.h +++ b/sql/item.h @@ -852,6 +852,7 @@ public: set value of aggregate function in case of no rows for grouping were found */ virtual void no_rows_in_result() {} + virtual void restore_to_before_no_rows_in_result() {} virtual Item *copy_or_same(THD *thd) { return this; } virtual Item *copy_andor_structure(THD *thd) { return this; } virtual Item *real_item() { return this; } @@ -908,6 +909,21 @@ public: virtual bool register_field_in_read_map(uchar *arg) { return 0; } virtual bool enumerate_field_refs_processor(uchar *arg) { return 0; } virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; } + + /* To call bool function for all arguments */ + struct bool_func_call_args + { + Item *original_func_item; + void (Item::*bool_function)(); + }; + bool call_bool_func_processor(uchar *org_item) + { + bool_func_call_args *info= (bool_func_call_args*) org_item; + /* Avoid recursion, as walk also calls for original item */ + if (info->original_func_item != this) + (this->*(info->bool_function))(); + return FALSE; + } /* Check if a partition function is allowed SYNOPSIS @@ -2321,6 +2337,14 @@ public: return (*ref)->walk(processor, walk_subquery, arg) || (this->*processor)(arg); } + void no_rows_in_result() + { + (*ref)->no_rows_in_result(); + } + void restore_to_before_no_rows_in_result() + { + (*ref)->restore_to_before_no_rows_in_result(); + } virtual void print(String *str, enum_query_type query_type); bool result_as_longlong() { diff --git a/sql/item_func.h b/sql/item_func.h index b7994bd941e..2b466960af2 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -217,6 +217,21 @@ public: { return functype() == *(Functype *) arg; } + + void no_rows_in_result() + { + bool_func_call_args info; + info.original_func_item= this; + info.bool_function= &Item::no_rows_in_result; + walk(&Item::call_bool_func_processor, FALSE, (uchar*) &info); + } + void restore_to_before_no_rows_in_result() + { + bool_func_call_args info; + info.original_func_item= this; + info.bool_function= &Item::restore_to_before_no_rows_in_result; + walk(&Item::call_bool_func_processor, FALSE, (uchar*) &info); + } }; diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 2f79fd65ff3..b3b07119066 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -1638,8 +1638,22 @@ void Item_sum_hybrid::cleanup() void Item_sum_hybrid::no_rows_in_result() { - was_values= FALSE; - clear(); + /* We may be called here twice in case of ref field in function */ + if (was_values) + { + was_values= FALSE; + was_null_value= value->null_value; + clear(); + } +} + +void Item_sum_hybrid::restore_to_before_no_rows_in_result() +{ + if (!was_values) + { + was_values= TRUE; + null_value= value->null_value= was_null_value; + } } diff --git a/sql/item_sum.h b/sql/item_sum.h index b82f2d961f1..ac6a56400a4 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -496,7 +496,7 @@ public: enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; } void reset_field() {} // not used void update_field() {} // not used - virtual void no_rows_in_result() {} + void no_rows_in_result() {} void fix_length_and_dec(); enum Item_result result_type () const { return val.traits->type(); } virtual void calculate_val_and_count(); @@ -845,6 +845,7 @@ protected: enum_field_types hybrid_field_type; int cmp_sign; bool was_values; // Set if we have found at least one row (for max/min only) + bool was_null_value; public: Item_sum_hybrid(Item *item_par,int sign) @@ -876,6 +877,7 @@ protected: void cleanup(); bool any_value() { return was_values; } void no_rows_in_result(); + void restore_to_before_no_rows_in_result(); Field *create_tmp_field(bool group, TABLE *table, uint convert_blob_length); }; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 60da06fadf1..50fe78492c0 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1688,6 +1688,16 @@ JOIN::reinit() func->clear(); } + if (no_rows_in_result_called) + { + /* Reset effect of possible no_rows_in_result() */ + List_iterator_fast it(fields_list); + Item *item; + + no_rows_in_result_called= 0; + while ((item= it++)) + item->restore_to_before_no_rows_in_result(); + } DBUG_RETURN(0); } @@ -12681,8 +12691,11 @@ end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), { List_iterator_fast it(*join->fields); Item *item; + DBUG_PRINT("info", ("no matching rows")); + /* No matching rows for group function */ join->clear(); + join->no_rows_in_result_called= 1; while ((item= it++)) item->no_rows_in_result(); diff --git a/sql/sql_select.h b/sql/sql_select.h index 285019b4a5c..83aa1ec1661 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -365,24 +365,26 @@ public: the number of rows in it may vary from one subquery execution to another. */ bool no_const_tables; + bool no_rows_in_result_called; /** Copy of this JOIN to be used with temporary tables. - tmp_join is used when the JOIN needs to be "reusable" (e.g. in a subquery - that gets re-executed several times) and we know will use temporary tables - for materialization. The materialization to a temporary table overwrites the - JOIN structure to point to the temporary table after the materialization is - done. This is where tmp_join is used : it's a copy of the JOIN before the - materialization and is used in restoring before re-execution by overwriting - the current JOIN structure with the saved copy. - Because of this we should pay extra care of not freeing up helper structures - that are referenced by the original contents of the JOIN. We can check for - this by making sure the "current" join is not the temporary copy, e.g. - !tmp_join || tmp_join != join + tmp_join is used when the JOIN needs to be "reusable" (e.g. in a + subquery that gets re-executed several times) and we know will use + temporary tables for materialization. The materialization to a + temporary table overwrites the JOIN structure to point to the + temporary table after the materialization is done. This is where + tmp_join is used : it's a copy of the JOIN before the + materialization and is used in restoring before re-execution by + overwriting the current JOIN structure with the saved copy. + Because of this we should pay extra care of not freeing up helper + structures that are referenced by the original contents of the + JOIN. We can check for this by making sure the "current" join is + not the temporary copy, e.g. !tmp_join || tmp_join != join - We should free these sub-structures at JOIN::destroy() if the "current" join - has a copy is not that copy. + We should free these sub-structures at JOIN::destroy() if the + "current" join has a copy is not that copy. */ JOIN *tmp_join; ROLLUP rollup; ///< Used with rollup @@ -512,6 +514,7 @@ public: optimized= 0; cond_equal= 0; group_optimized_away= 0; + no_rows_in_result_called= 0; all_fields= fields_arg; if (&fields_list != &fields_arg) /* Avoid valgrind-warning */ From 53310c6ee7d602768998856c85519b9420820f46 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 23 Aug 2010 12:52:57 +0300 Subject: [PATCH 2/3] Trivial fixes, more safe DBUG_ASSERT()'s and some more DBUG_ - CTRL-C now aborts 'source' commands in mysql client - Fix that thread id's are removed in convert-debug-for-diff.sh client/mysql.cc: CTRL-C now aborts 'source' commands scripts/convert-debug-for-diff.sh: Fix expression to remove thread id storage/maria/ha_maria.cc: Don't call DBUG_ASSERT() when we kill a query during REPAIR / ALTER TABLE storage/maria/ma_bitmap.c: Added DBUG_ASSERT() if we call _ma_bitmap_set_full_page_bits() storage/maria/ma_key_recover.c: Don't do an assert if table is marked crashed. storage/maria/ma_recovery.c: Added DBUG_ENTER --- client/mysql.cc | 15 +++++++++++---- scripts/convert-debug-for-diff.sh | 2 +- storage/maria/ha_maria.cc | 2 +- storage/maria/ma_bitmap.c | 7 +++++++ storage/maria/ma_key_recover.c | 5 +++-- storage/maria/ma_recovery.c | 13 +++++++------ 6 files changed, 30 insertions(+), 14 deletions(-) diff --git a/client/mysql.cc b/client/mysql.cc index fb090cd74d5..39ca04ccbb9 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -156,6 +156,7 @@ static my_bool ignore_errors=0,wait_flag=0,quick=0, static my_bool debug_info_flag, debug_check_flag, batch_abort_on_error; static my_bool column_types_flag; static my_bool preserve_comments= 0; +static my_bool in_com_source, aborted= 0; static ulong opt_max_allowed_packet, opt_net_buffer_length; static uint verbose=0,opt_silent=0,opt_mysql_port=0, opt_local_infile=0; static uint my_end_arg; @@ -1087,6 +1088,7 @@ int main(int argc,char *argv[]) "\\N [\\d]> ",MYF(MY_WME)); current_prompt = my_strdup(default_prompt,MYF(MY_WME)); prompt_counter=0; + aborted= 0; outfile[0]=0; // no (default) outfile strmov(pager, "stdout"); // the default, if --pager wasn't given @@ -1281,8 +1283,10 @@ sig_handler mysql_end(int sig) /* This function handles sigint calls If query is in process, kill query + If 'source' is executed, abort source command no query in process, terminate like previous behavior */ + sig_handler handle_sigint(int sig) { char kill_buffer[40]; @@ -1321,7 +1325,8 @@ sig_handler handle_sigint(int sig) mysql_close(kill_mysql); tee_fprintf(stdout, "Ctrl-C -- query killed. Continuing normally.\n"); interrupted_query= 0; - + if (in_com_source) + aborted= 1; // Abort source command return; err: @@ -1886,7 +1891,7 @@ static int read_and_execute(bool interactive) bool truncated= 0; status.exit_status=1; - for (;;) + while (!aborted) { if (!interactive) { @@ -4066,17 +4071,19 @@ static int com_source(String *buffer, char *line) status.file_name=source_name; glob_buffer.length(0); // Empty command buffer ignore_errors= !batch_abort_on_error; + in_com_source= 1; error= read_and_execute(false); ignore_errors= save_ignore_errors; status=old_status; // Continue as before + in_com_source= aborted= 0; my_fclose(sql_file,MYF(0)); batch_readline_end(line_buff); /* If we got an error during source operation, don't abort the client if ignore_errors is set */ - if (error && batch_abort_on_error && ignore_errors) - error= -1; + if (error && !batch_abort_on_error && ignore_errors) + error= -1; // Ignore error return error; } diff --git a/scripts/convert-debug-for-diff.sh b/scripts/convert-debug-for-diff.sh index 2c4c86acd3c..b6c837504cb 100755 --- a/scripts/convert-debug-for-diff.sh +++ b/scripts/convert-debug-for-diff.sh @@ -18,7 +18,7 @@ while (<>) { - s/^T@[0-9]+://g; + s/^T@[0-9]+\s*://g; s/0x[0-9a-f]+(\s|\n|\))/#$1/g; s/size: [0-9]+/size: #/g; print $_; diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 1d117dddab5..bbfe2c3ccbb 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -1814,7 +1814,7 @@ int ha_maria::enable_indexes(uint mode) "retrying", my_errno, param.db_name, param.table_name); /* This should never fail normally */ - DBUG_ASSERT(0); + DBUG_ASSERT(thd->killed != 0); /* Repairing by sort failed. Now try standard repair method. */ param.testflag &= ~T_REP_BY_SORT; error= (repair(thd, ¶m, 0) != HA_ADMIN_OK); diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index cec7cd782be..a76b36e1115 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -2063,6 +2063,13 @@ my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info, safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock); bitmap_page= page - page % bitmap->pages_covered; + if (page == bitmap_page || + page + page_count >= bitmap_page + bitmap->pages_covered) + { + DBUG_ASSERT(0); /* Wrong in data */ + DBUG_RETURN(1); + } + if (bitmap_page != bitmap->page && _ma_change_bitmap_page(info, bitmap, bitmap_page)) DBUG_RETURN(1); diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c index 6d6cc2b4da1..82f8099fe6a 100644 --- a/storage/maria/ma_key_recover.c +++ b/storage/maria/ma_key_recover.c @@ -64,8 +64,9 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn) builds. */ #ifdef EXTRA_DEBUG - DBUG_ASSERT(!pinned_page->changed || - undo_lsn != LSN_IMPOSSIBLE || !info->s->now_transactional); + DBUG_ASSERT((!pinned_page->changed || + undo_lsn != LSN_IMPOSSIBLE || !info->s->now_transactional) || + (info->s->state.changed & STATE_CRASHED)); #endif pagecache_unlock_by_link(info->s->pagecache, pinned_page->link, pinned_page->unlock, PAGECACHE_UNPIN, diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 4a62ae8948d..a10fac9a15d 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -2397,6 +2397,7 @@ static int run_redo_phase(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply) struct st_translog_scanner_data scanner; int len; uint i; + DBUG_ENTER("run_redo_phase"); /* install hooks for execution */ #define install_redo_exec_hook(R) \ @@ -2461,7 +2462,7 @@ static int run_redo_phase(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply) { tprint(tracef, "checkpoint address refers to the log end log or " "log is empty, nothing to do.\n"); - return 0; + DBUG_RETURN(0); } len= translog_read_record_header(lsn, &rec); @@ -2469,12 +2470,12 @@ static int run_redo_phase(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply) if (len == RECHEADER_READ_ERROR) { eprint(tracef, "Failed to read header of the first record."); - return 1; + DBUG_RETURN(1); } if (translog_scanner_init(lsn, 1, &scanner, 1)) { tprint(tracef, "Scanner init failed\n"); - return 1; + DBUG_RETURN(1); } for (i= 1;;i++) { @@ -2527,7 +2528,7 @@ static int run_redo_phase(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply) LSN_IN_PARTS(rec2.lsn)); translog_destroy_scanner(&scanner); translog_free_record_header(&rec); - return(0); + DBUG_RETURN(0); } if (translog_scanner_init(rec2.lsn, 1, &scanner2, 1)) @@ -2625,12 +2626,12 @@ static int run_redo_phase(LSN lsn, LSN lsn_end, enum maria_apply_log_way apply) fflush(stderr); procent_printed= 1; } - return 0; + DBUG_RETURN(0); err: translog_destroy_scanner(&scanner); translog_free_record_header(&rec); - return 1; + DBUG_RETURN(1); } From ecf95c6e7f9d62b2c01b9f0af2f7b3fdb2e51a2e Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 24 Aug 2010 20:17:17 +0300 Subject: [PATCH 3/3] Fix of LP#616253 Crash in _ma_bitmap_set_full_page_bits on Aria recovery The bug was based on wrong undo data in recovery file and not enough checking of bad data. sql/sql_select.h: Added comment storage/maria/ma_blockrec.c: - Removed wrong sanity checks (didn't work for UNDO records) - More sanity checks and DBUG_ASSERT - More DBUG_ENTER and DBUG_PRINT - Removed filler blocks in extent_to_bitmap_blocks() as it caused problems in write_block_record(). This was the main cause of the bug. (This change can make records generated by UNDO slightly smaller than original record, which we have to fix by correcting row_pos.length before calling write_block_record()) - Fixed some problems in write_block_record() while doing UNDO. - Store header_length without TRANSID_SIZE into recovery log (as UNDO entires doesn't have TRANSID_SIZE) - Mark table crashed if something goes wrong during UNDO storage/maria/maria_def.h: Added header_length --- sql/sql_select.h | 5 ++ storage/maria/ma_blockrec.c | 152 ++++++++++++++++++++++++++---------- storage/maria/maria_def.h | 3 +- 3 files changed, 118 insertions(+), 42 deletions(-) diff --git a/sql/sql_select.h b/sql/sql_select.h index 83aa1ec1661..fe3cc1af400 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -365,6 +365,11 @@ public: the number of rows in it may vary from one subquery execution to another. */ bool no_const_tables; + /* + This flag is set if we call no_rows_in_result() as par of end_group(). + This is used as a simple speed optimization to avoiding calling + restore_no_rows_in_result() in ::reinit() + */ bool no_rows_in_result_called; /** diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index ac04eca4398..0779564c8af 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -334,6 +334,7 @@ typedef struct st_maria_extent_cursor } \ } while (0) + static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails); static my_bool delete_head_or_tail(MARIA_HA *info, pgcache_page_no_t page, uint record_number, @@ -1850,16 +1851,11 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info, goto err; } + /* + The following dir entry is unused in case of insert / update but + not in case of undo_update / undo_delete + */ dir= dir_entry_pos(buff, block_size, rownr); -#ifdef SANITY_CHECKS - /* Tail's should always be unused */ - if (page_type == TAIL_PAGE && max_entry > rownr && - (dir[0] != 0 || dir[1] != 0)) - { - DBUG_ASSERT(0); - goto err; - } -#endif if (extend_area_on_page(page_type == HEAD_PAGE ? info : 0, buff, dir, rownr, block_size, length, @@ -2236,6 +2232,8 @@ static void store_extent_info(uchar *to, MARIA_BITMAP_BLOCK *block, *end_block; uint copy_length; my_bool first_found= 0; + DBUG_ENTER("store_extent_info"); + DBUG_PRINT("enter", ("count: %u", count)); for (block= first_block, end_block= first_block+count ; block < end_block; block++) @@ -2255,6 +2253,7 @@ static void store_extent_info(uchar *to, page_count|= START_EXTENT_BIT; } pagerange_store(to + PAGE_STORE_SIZE, page_count); + DBUG_DUMP("extent", to, ROW_EXTENT_SIZE); to+= ROW_EXTENT_SIZE; if (!first_found) { @@ -2269,6 +2268,7 @@ static void store_extent_info(uchar *to, data. */ bzero(to, (size_t) (row_extents_second_part + copy_length - to)); + DBUG_VOID_RETURN; } @@ -2287,7 +2287,8 @@ static void store_extent_info(uchar *to, @return @retval 0 ok - @retval 1 Error (out of memory or disk error changing bitmap) + @retval 1 Error (out of memory or disk error changing bitmap) or + wrong information in extent information */ static my_bool extent_to_bitmap_blocks(MARIA_HA *info, @@ -2298,7 +2299,7 @@ static my_bool extent_to_bitmap_blocks(MARIA_HA *info, { MARIA_BITMAP_BLOCK *block, *start_block; MARIA_SHARE *share= info->s; - uint i; + uint i, tail_page; DBUG_ENTER("extent_to_bitmap_blocks"); if (allocate_dynamic(&info->bitmap_blocks, extent_count + 2)) @@ -2324,13 +2325,36 @@ static my_bool extent_to_bitmap_blocks(MARIA_HA *info, page_count&= ~START_EXTENT_BIT; start_block->sub_blocks= (uint) (block - start_block); start_block= block; - } block->page= page_korr(extent_info); block->page_count= page_count; block->sub_blocks= 0; + if (block->page_count == 0) + { + /* Extend allocated but not used by write_block_record() */ + DBUG_ASSERT(block->page == 0); + /* This is the last block */ + blocks->count= i; + break; + } + if ((tail_page= page_count & TAIL_BIT)) + page_count= 1; - if (page_count & TAIL_BIT) + /* Check if wrong data */ + if (block->page == 0 || page_count == 0 || + (block->page + page_count) * share->block_size > + share->state.state.data_file_length) + { + DBUG_PRINT("error", ("page: %lu page_count: %u tail: %u length: %ld data_length: %ld", + (ulong) block->page, + (block->page_count & ~TAIL_BIT), + (uint) test(block->page_count & TAIL_BIT), + (ulong) ((block->page + (page_count & ~TAIL_BIT)) * + share->block_size), + (ulong) share->state.state.data_file_length)); + DBUG_RETURN(1); + } + if (tail_page) { block->org_bitmap_value= _ma_bitmap_get_page_bits(info, &share->bitmap, block->page); @@ -2342,7 +2366,7 @@ static my_bool extent_to_bitmap_blocks(MARIA_HA *info, my_bool res; pthread_mutex_lock(&share->bitmap.bitmap_lock); res= _ma_bitmap_set_full_page_bits(info, &share->bitmap, - block->page, block->page_count); + block->page, page_count); pthread_mutex_unlock(&share->bitmap.bitmap_lock); if (res) DBUG_RETURN(1); @@ -2764,9 +2788,16 @@ static my_bool write_block_record(MARIA_HA *info, sizeof(char*)); memcpy(data, tmp_pos, *blob_lengths); data+= *blob_lengths; - /* Skip over tail page that was to be used to store blob */ - block++; - bitmap_blocks->tail_page_skipped= 1; + /* + The following is not true when we want to insert data into original + place. In this case we don't have any extra blocks allocated + */ + if (likely(undo_lsn == LSN_ERROR)) + { + /* Skip over tail page that was prepared for storing blob */ + block++; + bitmap_blocks->tail_page_skipped= 1; + } } if (head_block->sub_blocks > 1) { @@ -2779,7 +2810,9 @@ static my_bool write_block_record(MARIA_HA *info, /* Update page directory */ head_length= (uint) (data - row_pos->data); - DBUG_PRINT("info", ("Used head length on page: %u", head_length)); + DBUG_PRINT("info", ("Used head length on page: %u header_length: %u", + head_length, + (uint) (flag & ROW_FLAG_TRANSID ? TRANSID_SIZE : 0))); DBUG_ASSERT(data <= end_of_data); if (head_length < share->base.min_block_length) { @@ -2789,6 +2822,7 @@ static my_bool write_block_record(MARIA_HA *info, data+= diff_length; head_length= share->base.min_block_length; } + DBUG_ASSERT(undo_lsn == LSN_ERROR || head_length == row_pos->length); int2store(row_pos->dir + 2, head_length); /* update empty space at start of block */ row_pos->empty_space-= head_length; @@ -2852,11 +2886,13 @@ static my_bool write_block_record(MARIA_HA *info, { /* Set only a bit, to not cause bitmap code to believe a block is full - when there is still a lot of entries in it + when there is still a lot of entries in it. */ block->used|= BLOCKUSED_USED; } } + DBUG_ASSERT((undo_lsn == LSN_ERROR || + block == bitmap_blocks->block + bitmap_blocks->count)); column= save_column; block= save_block; blob_lengths= save_blob_lengths; @@ -3250,9 +3286,10 @@ static my_bool write_block_record(MARIA_HA *info, else { uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + - PAGE_STORE_SIZE + DIRPOS_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE + 2 + HA_CHECKSUM_STORE_SIZE + 2 + PAGERANGE_STORE_SIZE + ROW_EXTENT_SIZE]; + uchar *log_pos; ha_checksum checksum_delta; /* LOGREC_UNDO_ROW_INSERT & LOGREC_UNDO_ROW_UPDATE share same header */ @@ -3262,18 +3299,17 @@ static my_bool write_block_record(MARIA_HA *info, dirpos_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE, row_pos->rownr); - - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= - (LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + - DIRPOS_STORE_SIZE); + log_pos= (log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE); store_checksum_in_rec(share, checksum_delta, row->checksum - old_record_checksum, - log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + - PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - log_array[TRANSLOG_INTERNAL_PARTS + 0].length); + log_pos, log_pos); compile_time_assert(sizeof(ha_checksum) == HA_CHECKSUM_STORE_SIZE); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - + log_data); + if (!old_record) { /* Store undo_lsn in case we are aborting the insert */ @@ -3292,16 +3328,18 @@ static my_bool write_block_record(MARIA_HA *info, else { /* Write UNDO log record for the UPDATE */ - uchar *log_pos= (log_data + - log_array[TRANSLOG_INTERNAL_PARTS + 0].length); size_t row_length, extents_length; - uint row_parts_count; + uint row_parts_count, cur_head_length; /* Write head length and extents of the original row so that we - during UNDO can put it back in the original position + during UNDO can put it back in the original position. + We don't store size for TRANSID, as we don't write this during + UNDO. */ - int2store(log_pos, info->cur_row.head_length); + cur_head_length= (info->cur_row.head_length - + info->cur_row.header_length); + int2store(log_pos, cur_head_length); pagerange_store(log_pos + 2, info->cur_row.extents_count); log_pos+= 2 + PAGERANGE_STORE_SIZE; log_array[TRANSLOG_INTERNAL_PARTS + 0].length+= (2 + @@ -3783,6 +3821,8 @@ err: This is the main reason we don't make a lot of subfunctions that are common between _ma_update_block_record2() and this function. + + Note: If something goes wrong we mark the file crashed */ static my_bool _ma_update_at_original_place(MARIA_HA *info, @@ -3838,6 +3878,10 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, if ((org_empty_size + cur_row->head_length) < length_on_head_page) { + DBUG_PRINT("error", + ("org_empty_size: %u head_length: %u length_on_page: %u", + org_empty_size, (uint) cur_row->head_length, + length_on_head_page)); my_errno= HA_ERR_WRONG_IN_RECORD; goto err; } @@ -3857,7 +3901,6 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, row_pos.empty_space= empty_size; row_pos.dir= dir; row_pos.data= buff + rec_offset; - row_pos.length= length_on_head_page; /* Delete old row */ if (*cur_row->tail_positions && @@ -3887,12 +3930,17 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, max(new_row->total_length, share->base.min_block_length) <= length_on_head_page); + /* Store same amount of data on head page as on original page */ + row_pos.length= (length_on_head_page - + (extent_count + 1 - blocks->count) * ROW_EXTENT_SIZE); + set_if_bigger(row_pos.length, share->base.min_block_length); if ((res= write_block_record(info, oldrec, record, new_row, blocks, 1, &row_pos, undo_lsn, old_checksum))) goto err; DBUG_RETURN(0); err: + _ma_mark_file_crashed(share); if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE); @@ -4223,7 +4271,8 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record) log_pos= log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE; dirpos_store(log_pos, record_number); log_pos+= DIRPOS_STORE_SIZE; - int2store(log_pos, info->cur_row.head_length); + int2store(log_pos, info->cur_row.head_length - + info->cur_row.header_length); log_pos+= 2; pagerange_store(log_pos, info->cur_row.extents_count); log_pos+= PAGERANGE_STORE_SIZE; @@ -4601,6 +4650,8 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, cur_row->head_length= (uint) (end_of_data - data); cur_row->full_page_count= cur_row->tail_count= 0; cur_row->blob_length= 0; + /* Number of bytes in header that we don't need to write during undo */ + cur_row->header_length= total_header_size[(flag & PRECALC_HEADER_BITMASK)]-1; if (flag & ROW_FLAG_TRANSID) { @@ -4612,7 +4663,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, } /* Skip trans header (for now, until we have MVCC csupport) */ - data+= total_header_size[(flag & PRECALC_HEADER_BITMASK)]; + data+= cur_row->header_length + 1 ; if (flag & ROW_FLAG_NULLS_EXTENDED) cur_null_bytes+= data[-1]; @@ -5008,7 +5059,10 @@ int _ma_read_block_record(MARIA_HA *info, uchar *record, uint offset; uint block_size= share->block_size; DBUG_ENTER("_ma_read_block_record"); - DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos)); + DBUG_PRINT("enter", ("rowid: %lu page: %lu rownr: %u", + (ulong) record_pos, + (ulong) ma_recordpos_to_page(record_pos), + ma_recordpos_to_dir_entry(record_pos))); offset= ma_recordpos_to_dir_entry(record_pos); @@ -6756,7 +6810,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn, pgcache_page_no_t page; uint rownr; uchar *buff; - my_bool res= 1; + my_bool res; MARIA_PINNED_PAGE page_link; MARIA_SHARE *share= info->s; ha_checksum checksum; @@ -6802,11 +6856,16 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn, goto err; res= 0; -err: +end: if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); _ma_unpin_all_pages_and_finalize_row(info, lsn); DBUG_RETURN(res); + +err: + res= 1; + _ma_mark_file_crashed(share); + goto end; } @@ -7029,6 +7088,10 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn, { DBUG_ASSERT(row.checksum == (share->calc_checksum)(info, record)); } + /* Store same amount of data on head page as on original page */ + row_pos.length= (length_on_head_page - + (extent_count + 1 - blocks->count) * ROW_EXTENT_SIZE); + set_if_bigger(row_pos.length, share->base.min_block_length); if (write_block_record(info, (uchar*) 0, record, &row, blocks, blocks->block->org_bitmap_value != 0, &row_pos, undo_lsn, 0)) @@ -7038,6 +7101,7 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn, DBUG_RETURN(0); err: + _ma_mark_file_crashed(share); if (info->non_flushable_state) _ma_bitmap_flushable(info, -1); _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE); @@ -7068,7 +7132,7 @@ my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn, pgcache_page_no_t page; ha_checksum checksum_delta; uint rownr, field_length_header, extent_count, length_on_head_page; - int error= 1; + int error; DBUG_ENTER("_ma_apply_undo_row_update"); LINT_INIT(checksum_delta); @@ -7076,6 +7140,7 @@ my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn, header+= PAGE_STORE_SIZE; rownr= dirpos_korr(header); header+= DIRPOS_STORE_SIZE; + record_pos= ma_recordpos(page, rownr); DBUG_PRINT("enter", ("rowid: %lu page: %lu rownr: %u", (ulong) record_pos, (ulong) page, rownr)); @@ -7205,9 +7270,14 @@ my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn, goto err; error= 0; -err: +end: my_free(current_record, MYF(0)); DBUG_RETURN(error); + +err: + error= 1; + _ma_mark_file_crashed(share); + goto end; } diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 8c2ce447a36..bde04186931 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -459,8 +459,9 @@ typedef struct st_maria_row uint *null_field_lengths; /* All null field lengths */ ulong *blob_lengths; /* Length for each blob */ ulong min_length, normal_length, char_length, varchar_length; - ulong blob_length, head_length, total_length; + ulong blob_length, total_length; size_t extents_buffer_length; /* Size of 'extents' buffer */ + uint head_length, header_length; uint field_lengths_length; /* Length of data in field_lengths */ uint extents_count; /* number of extents in 'extents' */ uint full_page_count, tail_count; /* For maria_chk */