From 3c313312bbcd76cf9f8e34fd82622240b6a7c337 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Sun, 19 Sep 2010 18:46:39 -0700 Subject: [PATCH 1/8] Fixed bug #56862 (lp bug #640419). Made sure that rr_quick is used to read the next record whenever a quick select is used to retrieve the table records. --- mysql-test/r/index_merge_innodb.result | 55 ++++++++++++++++++++++++++ mysql-test/t/index_merge_innodb.test | 53 +++++++++++++++++++++++++ sql/records.cc | 3 +- 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/index_merge_innodb.result b/mysql-test/r/index_merge_innodb.result index ff00654aed8..bd49af16105 100644 --- a/mysql-test/r/index_merge_innodb.result +++ b/mysql-test/r/index_merge_innodb.result @@ -581,3 +581,58 @@ WHERE `RUNID`= '' AND `SUBMITNR`= '' AND `ORDERNR`='' AND `PROGRAMM`='' AND `TESTID`='' AND `UCCHECK`=''; drop table t1; +# +# BUG#56862/640419: Wrong result with sort_union index merge when one +# of the merged index scans is the primary key scan +# +CREATE TABLE t1 ( +pk int NOT NULL AUTO_INCREMENT PRIMARY KEY, +a int, +b int, +INDEX idx(a)) +ENGINE=INNODB; +INSERT INTO t1(a,b) VALUES +(11, 1100), (2, 200), (1, 100), (14, 1400), (5, 500), +(3, 300), (17, 1700), (4, 400), (12, 1200), (8, 800), +(6, 600), (18, 1800), (9, 900), (10, 1000), (7, 700), +(13, 1300), (15, 1500), (19, 1900), (16, 1600), (20, 2000); +INSERT INTO t1(a,b) SELECT a+20, b+2000 FROM t1; +INSERT INTO t1(a,b) SELECT a+40, b+4000 FROM t1; +INSERT INTO t1(a,b) SELECT a+80, b+8000 FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1 VALUES (1000000, 0, 0); +SET SESSION sort_buffer_size = 1024*36; +EXPLAIN +SELECT COUNT(*) FROM +(SELECT * FROM t1 +WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away +2 DERIVED t1 index_merge PRIMARY,idx idx,PRIMARY 5,4 NULL 11419 Using sort_union(idx,PRIMARY); Using where +SELECT COUNT(*) FROM +(SELECT * FROM t1 +WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; +COUNT(*) +6145 +EXPLAIN +SELECT COUNT(*) FROM +(SELECT * FROM t1 IGNORE INDEX(idx) +WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY NULL NULL NULL NULL NULL NULL # Select tables optimized away +2 DERIVED t1 ALL PRIMARY NULL NULL NULL # Using where +SELECT COUNT(*) FROM +(SELECT * FROM t1 IGNORE INDEX(idx) +WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; +COUNT(*) +6145 +DROP TABLE t1; diff --git a/mysql-test/t/index_merge_innodb.test b/mysql-test/t/index_merge_innodb.test index 7d4a27d9204..e68f414e0bc 100644 --- a/mysql-test/t/index_merge_innodb.test +++ b/mysql-test/t/index_merge_innodb.test @@ -29,3 +29,56 @@ let $merge_table_support= 0; --source include/index_merge2.inc --source include/index_merge_2sweeps.inc --source include/index_merge_ror_cpk.inc + +--echo # +--echo # BUG#56862/640419: Wrong result with sort_union index merge when one +--echo # of the merged index scans is the primary key scan +--echo # + +CREATE TABLE t1 ( + pk int NOT NULL AUTO_INCREMENT PRIMARY KEY, + a int, + b int, + INDEX idx(a)) +ENGINE=INNODB; + +INSERT INTO t1(a,b) VALUES + (11, 1100), (2, 200), (1, 100), (14, 1400), (5, 500), + (3, 300), (17, 1700), (4, 400), (12, 1200), (8, 800), + (6, 600), (18, 1800), (9, 900), (10, 1000), (7, 700), + (13, 1300), (15, 1500), (19, 1900), (16, 1600), (20, 2000); +INSERT INTO t1(a,b) SELECT a+20, b+2000 FROM t1; +INSERT INTO t1(a,b) SELECT a+40, b+4000 FROM t1; +INSERT INTO t1(a,b) SELECT a+80, b+8000 FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1(a,b) SELECT a,b FROM t1; +INSERT INTO t1 VALUES (1000000, 0, 0); + +SET SESSION sort_buffer_size = 1024*36; + +EXPLAIN +SELECT COUNT(*) FROM + (SELECT * FROM t1 + WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; +SELECT COUNT(*) FROM + (SELECT * FROM t1 + WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; + +--replace_column 9 # +EXPLAIN +SELECT COUNT(*) FROM + (SELECT * FROM t1 IGNORE INDEX(idx) + WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; +SELECT COUNT(*) FROM + (SELECT * FROM t1 IGNORE INDEX(idx) + WHERE a BETWEEN 2 AND 7 OR pk=1000000) AS t; + +DROP TABLE t1; diff --git a/sql/records.cc b/sql/records.cc index 2fc5a26a210..827450201c9 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -196,7 +196,8 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table, tempfile= &select->file; else tempfile= table->sort.io_cache; - if (tempfile && my_b_inited(tempfile)) // Test if ref-records was used + if (tempfile && my_b_inited(tempfile) && + !(select && select->quick)) { DBUG_PRINT("info",("using rr_from_tempfile")); info->read_record= (table->sort.addon_field ? From 992ee8e1c0e36da0058e1d31c432a45a6cbd80e3 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Mon, 20 Sep 2010 21:22:00 -0700 Subject: [PATCH 2/8] Fixed bug #53161. The implementation of the virtual method not_null_tables for the class Item_outer_ref must always return 0. --- mysql-test/r/join_outer.result | 41 ++++++++++++++++++++++++++++++++++ mysql-test/t/join_outer.test | 38 +++++++++++++++++++++++++++++++ sql/item.h | 1 + 3 files changed, 80 insertions(+) diff --git a/mysql-test/r/join_outer.result b/mysql-test/r/join_outer.result index c1cc03729d2..10425e6082a 100644 --- a/mysql-test/r/join_outer.result +++ b/mysql-test/r/join_outer.result @@ -1411,4 +1411,45 @@ select * from t1 left join t2 on t1.b=t2.b where 1=1; a b a b 1 NULL NULL NULL drop table t1,t2; +# +# Bug#53161: outer join in the derived table is erroneously converted +# into an inner join for a query with a group by clause +# +create table t1 (pk int not null primary key, a int not null); +create table t2 like t1; +create table t3 like t1; +create table t4 (pk int not null primary key); +insert into t1 values (1000, 1), (1001, 1); +insert into t2 values (2000, 2), (2001, 2); +insert into t3 values (3000, 3), (3001, 2); +insert into t4 values (4000), (4001); +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch='table_elimination=off'; +explain extended +select t2.pk, +(select t3.pk +from t3 left join t4 on t4.pk=t3.pk +where t3.pk=t2.pk+1000 limit 1 ) as t +from t1,t2 +where t2.pk=t1.pk+1000 and t1.pk>1000 +group by t2.pk; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t1 index PRIMARY PRIMARY 4 NULL 2 100.00 Using where; Using index; Using temporary; Using filesort +1 PRIMARY t2 eq_ref PRIMARY PRIMARY 4 func 1 100.00 Using where; Using index +2 DEPENDENT SUBQUERY t3 eq_ref PRIMARY PRIMARY 4 func 1 100.00 Using where; Using index +2 DEPENDENT SUBQUERY t4 eq_ref PRIMARY PRIMARY 4 test.t3.pk 1 100.00 Using where; Using index +Warnings: +Note 1276 Field or reference 'test.t2.pk' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `test`.`t2`.`pk` AS `pk`,(select `test`.`t3`.`pk` from `test`.`t3` left join `test`.`t4` on((`test`.`t4`.`pk` = `test`.`t3`.`pk`)) where (`test`.`t3`.`pk` = (`test`.`t2`.`pk` + 1000)) limit 1) AS `t` from `test`.`t1` join `test`.`t2` where ((`test`.`t2`.`pk` = (`test`.`t1`.`pk` + 1000)) and (`test`.`t1`.`pk` > 1000)) group by `test`.`t2`.`pk` +select t2.pk, +(select t3.pk +from t3 left join t4 on t4.pk=t3.pk +where t3.pk=t2.pk+1000 limit 1 ) as t +from t1,t2 +where t2.pk=t1.pk+1000 and t1.pk>1000 +group by t2.pk; +pk t +2001 3001 +set @@optimizer_switch=@save_optimizer_switch; +drop table t1,t2,t3,t4; End of 5.1 tests diff --git a/mysql-test/t/join_outer.test b/mysql-test/t/join_outer.test index 77a352fe532..5694692dd47 100644 --- a/mysql-test/t/join_outer.test +++ b/mysql-test/t/join_outer.test @@ -996,4 +996,42 @@ select * from t1 left join t2 on t1.b=t2.b where 1=1; drop table t1,t2; +--echo # +--echo # Bug#53161: outer join in the derived table is erroneously converted +--echo # into an inner join for a query with a group by clause +--echo # + +create table t1 (pk int not null primary key, a int not null); +create table t2 like t1; +create table t3 like t1; +create table t4 (pk int not null primary key); +insert into t1 values (1000, 1), (1001, 1); +insert into t2 values (2000, 2), (2001, 2); +insert into t3 values (3000, 3), (3001, 2); +insert into t4 values (4000), (4001); + +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch='table_elimination=off'; + +explain extended +select t2.pk, + (select t3.pk + from t3 left join t4 on t4.pk=t3.pk + where t3.pk=t2.pk+1000 limit 1 ) as t + from t1,t2 + where t2.pk=t1.pk+1000 and t1.pk>1000 + group by t2.pk; + +select t2.pk, + (select t3.pk + from t3 left join t4 on t4.pk=t3.pk + where t3.pk=t2.pk+1000 limit 1 ) as t + from t1,t2 + where t2.pk=t1.pk+1000 and t1.pk>1000 + group by t2.pk; + +set @@optimizer_switch=@save_optimizer_switch; + +drop table t1,t2,t3,t4; + --echo End of 5.1 tests diff --git a/sql/item.h b/sql/item.h index 05fde79ce31..d923fb932cb 100644 --- a/sql/item.h +++ b/sql/item.h @@ -2505,6 +2505,7 @@ public: { return (*ref)->const_item() ? 0 : OUTER_REF_TABLE_BIT; } + table_map not_null_tables() const { return 0; } virtual Ref_Type ref_type() { return OUTER_REF; } bool check_inner_refs_processor(uchar * arg); }; From a77b145b06e4054182832584a8b52cf4fa294acb Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 21 Sep 2010 17:32:36 +0200 Subject: [PATCH 3/8] clarified mtr treatment of the --plugin-load option in the mysql-test/README.suites file. --- mysql-test/README.suites | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mysql-test/README.suites b/mysql-test/README.suites index 076fc216a0e..de49bef2715 100644 --- a/mysql-test/README.suites +++ b/mysql-test/README.suites @@ -32,6 +32,16 @@ files cannot. Special options are --timezone, --plugin-load, --result-file, --config-file-template, --default-time-zone, --force-restart +In particular, all --plugin-load instances on the command line (on the +combined command line, assembled from different .opt and combinations +files) are merged into one. That is, if, say, test-master.opt file contains +--plugin-load=aaa.so and suite.opt has --plugin-load=bbb.so that mysqld +will get --plugin-load=aaa.so:bbb.so. Also, empty --plugin-load options are +removed from the command line. Which means that one can safely specify +--plugin-load=$AAA_SO and if aaa.so was not built (perhaps, the plugin was +statically linked into the server), the .opt file will not result in the +invalid command line option that can cause the server to refuse to start. + ========================== A suite can have suite.pm file in the suitedir. It must declare a package that inherits from My::Suite. From 0f1b52c6638a0a7a67e8b5945521c429740dcf11 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Sat, 25 Sep 2010 09:00:01 -0700 Subject: [PATCH 4/8] Changed the test case for bug #53161 to make it independent on the setting of optimizer switch for table elimination. --- mysql-test/r/join_outer.result | 9 +++------ mysql-test/t/join_outer.test | 9 ++------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/mysql-test/r/join_outer.result b/mysql-test/r/join_outer.result index 10425e6082a..8c9c121c9be 100644 --- a/mysql-test/r/join_outer.result +++ b/mysql-test/r/join_outer.result @@ -1423,11 +1423,9 @@ insert into t1 values (1000, 1), (1001, 1); insert into t2 values (2000, 2), (2001, 2); insert into t3 values (3000, 3), (3001, 2); insert into t4 values (4000), (4001); -set @save_optimizer_switch=@@optimizer_switch; -set @@optimizer_switch='table_elimination=off'; explain extended select t2.pk, -(select t3.pk +(select t3.pk+if(isnull(t4.pk),0,t4.pk) from t3 left join t4 on t4.pk=t3.pk where t3.pk=t2.pk+1000 limit 1 ) as t from t1,t2 @@ -1440,9 +1438,9 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t4 eq_ref PRIMARY PRIMARY 4 test.t3.pk 1 100.00 Using where; Using index Warnings: Note 1276 Field or reference 'test.t2.pk' of SELECT #2 was resolved in SELECT #1 -Note 1003 select `test`.`t2`.`pk` AS `pk`,(select `test`.`t3`.`pk` from `test`.`t3` left join `test`.`t4` on((`test`.`t4`.`pk` = `test`.`t3`.`pk`)) where (`test`.`t3`.`pk` = (`test`.`t2`.`pk` + 1000)) limit 1) AS `t` from `test`.`t1` join `test`.`t2` where ((`test`.`t2`.`pk` = (`test`.`t1`.`pk` + 1000)) and (`test`.`t1`.`pk` > 1000)) group by `test`.`t2`.`pk` +Note 1003 select `test`.`t2`.`pk` AS `pk`,(select (`test`.`t3`.`pk` + if(isnull(`test`.`t4`.`pk`),0,`test`.`t4`.`pk`)) from `test`.`t3` left join `test`.`t4` on((`test`.`t4`.`pk` = `test`.`t3`.`pk`)) where (`test`.`t3`.`pk` = (`test`.`t2`.`pk` + 1000)) limit 1) AS `t` from `test`.`t1` join `test`.`t2` where ((`test`.`t2`.`pk` = (`test`.`t1`.`pk` + 1000)) and (`test`.`t1`.`pk` > 1000)) group by `test`.`t2`.`pk` select t2.pk, -(select t3.pk +(select t3.pk+if(isnull(t4.pk),0,t4.pk) from t3 left join t4 on t4.pk=t3.pk where t3.pk=t2.pk+1000 limit 1 ) as t from t1,t2 @@ -1450,6 +1448,5 @@ where t2.pk=t1.pk+1000 and t1.pk>1000 group by t2.pk; pk t 2001 3001 -set @@optimizer_switch=@save_optimizer_switch; drop table t1,t2,t3,t4; End of 5.1 tests diff --git a/mysql-test/t/join_outer.test b/mysql-test/t/join_outer.test index 5694692dd47..cbc65d66624 100644 --- a/mysql-test/t/join_outer.test +++ b/mysql-test/t/join_outer.test @@ -1010,12 +1010,9 @@ insert into t2 values (2000, 2), (2001, 2); insert into t3 values (3000, 3), (3001, 2); insert into t4 values (4000), (4001); -set @save_optimizer_switch=@@optimizer_switch; -set @@optimizer_switch='table_elimination=off'; - explain extended select t2.pk, - (select t3.pk + (select t3.pk+if(isnull(t4.pk),0,t4.pk) from t3 left join t4 on t4.pk=t3.pk where t3.pk=t2.pk+1000 limit 1 ) as t from t1,t2 @@ -1023,15 +1020,13 @@ select t2.pk, group by t2.pk; select t2.pk, - (select t3.pk + (select t3.pk+if(isnull(t4.pk),0,t4.pk) from t3 left join t4 on t4.pk=t3.pk where t3.pk=t2.pk+1000 limit 1 ) as t from t1,t2 where t2.pk=t1.pk+1000 and t1.pk>1000 group by t2.pk; -set @@optimizer_switch=@save_optimizer_switch; - drop table t1,t2,t3,t4; --echo End of 5.1 tests From 716e84164a62396041251ea6d38bcb87d82330a4 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Sun, 26 Sep 2010 09:12:34 -0700 Subject: [PATCH 5/8] Fixed bug #57024. The condition over the outer tables now are extracted from the on condition of any outer join. This condition is saved in a special field of the JOIN_TAB structure for the first inner table of the outer join. The condition is checked before the first inner table is accessed. If it turns out to be false the table is not accessed at all and a null complemented row is generated immediately. --- mysql-test/r/join_outer.result | 59 ++++++++++++++++++++++++++++++++++ mysql-test/t/join_outer.test | 44 +++++++++++++++++++++++++ sql/sql_select.cc | 28 ++++++++++------ sql/sql_select.h | 4 ++- 4 files changed, 124 insertions(+), 11 deletions(-) diff --git a/mysql-test/r/join_outer.result b/mysql-test/r/join_outer.result index 8c9c121c9be..f9fb545bd0e 100644 --- a/mysql-test/r/join_outer.result +++ b/mysql-test/r/join_outer.result @@ -1449,4 +1449,63 @@ group by t2.pk; pk t 2001 3001 drop table t1,t2,t3,t4; +# +# Bug#57024: Poor performance when conjunctive condition over the outer +# table is used in the on condition of an outer join +# +create table t1 (a int); +insert into t1 values (NULL), (NULL), (NULL), (NULL); +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 values (4), (2), (1), (3); +create table t2 like t1; +insert into t2 select if(t1.a is null, 10, t1.a) from t1; +create table t3 (a int, b int, index idx(a)); +insert into t3 values (1, 100), (3, 301), (4, 402), (1, 102), (1, 101); +analyze table t1,t2,t3; +Table Op Msg_type Msg_text +test.t1 analyze status OK +test.t2 analyze status OK +test.t3 analyze status OK +flush status; +select sum(t3.b) from t1 left join t3 on t3.a=t1.a and t1.a is not null; +sum(t3.b) +1006 +show status like "handler_read%"; +Variable_name Value +Handler_read_first 0 +Handler_read_key 4 +Handler_read_next 5 +Handler_read_prev 0 +Handler_read_rnd 0 +Handler_read_rnd_next 1048581 +flush status; +select sum(t3.b) from t2 left join t3 on t3.a=t2.a and t2.a <> 10; +sum(t3.b) +1006 +show status like "handler_read%"; +Variable_name Value +Handler_read_first 0 +Handler_read_key 4 +Handler_read_next 5 +Handler_read_prev 0 +Handler_read_rnd 0 +Handler_read_rnd_next 1048581 +drop table t1,t2,t3; End of 5.1 tests diff --git a/mysql-test/t/join_outer.test b/mysql-test/t/join_outer.test index cbc65d66624..6d1ef15337f 100644 --- a/mysql-test/t/join_outer.test +++ b/mysql-test/t/join_outer.test @@ -1029,4 +1029,48 @@ select t2.pk, drop table t1,t2,t3,t4; +--echo # +--echo # Bug#57024: Poor performance when conjunctive condition over the outer +--echo # table is used in the on condition of an outer join +--echo # + +create table t1 (a int); +insert into t1 values (NULL), (NULL), (NULL), (NULL); +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 values (4), (2), (1), (3); + +create table t2 like t1; +insert into t2 select if(t1.a is null, 10, t1.a) from t1; + +create table t3 (a int, b int, index idx(a)); +insert into t3 values (1, 100), (3, 301), (4, 402), (1, 102), (1, 101); + +analyze table t1,t2,t3; + +flush status; +select sum(t3.b) from t1 left join t3 on t3.a=t1.a and t1.a is not null; +show status like "handler_read%"; +flush status; +select sum(t3.b) from t2 left join t3 on t3.a=t2.a and t2.a <> 10; +show status like "handler_read%"; + +drop table t1,t2,t3; + --echo End of 5.1 tests diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 8df9b6aff3f..5471b35e250 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -6632,6 +6632,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) if (tmp_cond) { JOIN_TAB *cond_tab= tab < first_inner_tab ? first_inner_tab : tab; + Item **sel_cond_ref= tab < first_inner_tab ? + &first_inner_tab->on_precond : + &tab->select_cond; /* First add the guards for match variables of all embedding outer join operations. @@ -6654,14 +6657,14 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) tmp_cond->quick_fix_field(); /* Add the predicate to other pushed down predicates */ DBUG_PRINT("info", ("Item_cond_and")); - cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond : - new Item_cond_and(cond_tab->select_cond, - tmp_cond); + *sel_cond_ref= !(*sel_cond_ref) ? + tmp_cond : + new Item_cond_and(*sel_cond_ref, tmp_cond); DBUG_PRINT("info", ("Item_cond_and 0x%lx", - (ulong)cond_tab->select_cond)); - if (!cond_tab->select_cond) - DBUG_RETURN(1); - cond_tab->select_cond->quick_fix_field(); + (ulong)(*sel_cond_ref))); + if (!(*sel_cond_ref)) + DBUG_RETURN(1); + (*sel_cond_ref)->quick_fix_field(); } } first_inner_tab= first_inner_tab->first_upper; @@ -11646,7 +11649,7 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) return (*join_tab->next_select)(join,join_tab+1,end_of_records); int error; - enum_nested_loop_state rc; + enum_nested_loop_state rc= NESTED_LOOP_OK; READ_RECORD *info= &join_tab->read_record; if (join->resume_nested_loop) @@ -11674,11 +11677,16 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) /* Set first_unmatched for the last inner table of this group */ join_tab->last_inner->first_unmatched= join_tab; + if (join_tab->on_precond && !join_tab->on_precond->val_int()) + rc= NESTED_LOOP_NO_MORE_ROWS; } join->thd->row_count= 0; - error= (*join_tab->read_first_record)(join_tab); - rc= evaluate_join_record(join, join_tab, error); + if (rc != NESTED_LOOP_NO_MORE_ROWS) + { + error= (*join_tab->read_first_record)(join_tab); + rc= evaluate_join_record(join, join_tab, error); + } } while (rc == NESTED_LOOP_OK) diff --git a/sql/sql_select.h b/sql/sql_select.h index fe3cc1af400..b3938fbbbb6 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -154,7 +154,9 @@ typedef struct st_join_table { TABLE *table; KEYUSE *keyuse; /**< pointer to first used key */ SQL_SELECT *select; - COND *select_cond; + COND *select_cond; + COND *on_precond; /**< part of on condition to check before + accessing the first inner table */ QUICK_SELECT_I *quick; Item **on_expr_ref; /**< pointer to the associated on expression */ COND_EQUAL *cond_equal; /**< multiple equalities for the on expression */ From 8b833274cad540f756039c8b730a5e692219d11c Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 28 Sep 2010 16:05:45 +0300 Subject: [PATCH 6/8] Merge with 1.0.11-7 Pre-GA - 2010-09-09 Updated results for failing test cases (In all cases the estimated number of rows was different) --- .../suite/pbxt/r/negation_elimination.result | 2 +- mysql-test/suite/pbxt/r/select.result | 38 +-- storage/pbxt/ChangeLog | 44 ++- storage/pbxt/src/Makefile.am | 4 +- storage/pbxt/src/cache_xt.cc | 21 +- storage/pbxt/src/cache_xt.h | 8 + storage/pbxt/src/database_xt.h | 1 + storage/pbxt/src/datadic_xt.cc | 9 +- storage/pbxt/src/datalog_xt.cc | 8 +- storage/pbxt/src/discover_xt.cc | 15 +- storage/pbxt/src/ha_pbxt.cc | 140 +++++++++- storage/pbxt/src/index_xt.cc | 124 ++++++--- storage/pbxt/src/index_xt.h | 2 +- storage/pbxt/src/myxt_xt.cc | 8 +- storage/pbxt/src/myxt_xt.h | 12 +- storage/pbxt/src/restart_xt.cc | 58 ++++ storage/pbxt/src/strutil_xt.cc | 2 +- storage/pbxt/src/table_xt.cc | 251 +++++++++++++++++- storage/pbxt/src/table_xt.h | 2 + storage/pbxt/src/thread_xt.cc | 9 +- storage/pbxt/src/xaction_xt.cc | 4 +- storage/pbxt/src/xt_defs.h | 24 +- 22 files changed, 683 insertions(+), 103 deletions(-) diff --git a/mysql-test/suite/pbxt/r/negation_elimination.result b/mysql-test/suite/pbxt/r/negation_elimination.result index fdf09954e18..f3edfccf67e 100644 --- a/mysql-test/suite/pbxt/r/negation_elimination.result +++ b/mysql-test/suite/pbxt/r/negation_elimination.result @@ -388,7 +388,7 @@ Table Op Msg_type Msg_text test.t1 analyze status OK explain extended select a, not(not(a)), not(a <= 2 and not(a)), not(a not like "1"), not (a not in (1,2)), not(a != 2) from t1 where not(not(a)) having not(not(a)); id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 index NULL a 5 NULL 21 100.00 Using where; Using index +1 SIMPLE t1 index NULL a 5 NULL 5 100.00 Using where; Using index Warnings: Note 1003 select `test`.`t1`.`a` AS `a`,(`test`.`t1`.`a` <> 0) AS `not(not(a))`,((`test`.`t1`.`a` > 2) or `test`.`t1`.`a`) AS `not(a <= 2 and not(a))`,(`test`.`t1`.`a` like '1') AS `not(a not like "1")`,(`test`.`t1`.`a` in (1,2)) AS `not (a not in (1,2))`,(`test`.`t1`.`a` = 2) AS `not(a != 2)` from `test`.`t1` where `test`.`t1`.`a` having `test`.`t1`.`a` drop table t1; diff --git a/mysql-test/suite/pbxt/r/select.result b/mysql-test/suite/pbxt/r/select.result index f85e6ccab1d..11244cd3689 100644 --- a/mysql-test/suite/pbxt/r/select.result +++ b/mysql-test/suite/pbxt/r/select.result @@ -1384,52 +1384,52 @@ Table Op Msg_type Msg_text test.t2 analyze status OK explain select t2.companynr,companyname from t4 left join t2 using (companynr) where t2.companynr > 0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 Using where 1 SIMPLE t4 eq_ref PRIMARY PRIMARY 1 test.t2.companynr 1 explain select t2.companynr,companyname from t4 left join t2 using (companynr) where t2.companynr > 0 or t2.companynr < 0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 Using where 1 SIMPLE t4 eq_ref PRIMARY PRIMARY 1 test.t2.companynr 1 explain select t2.companynr,companyname from t4 left join t2 using (companynr) where t2.companynr > 0 and t4.companynr > 0; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 Using where 1 SIMPLE t4 eq_ref PRIMARY PRIMARY 1 test.t2.companynr 1 explain select companynr,companyname from t4 left join t2 using (companynr) where companynr > 0; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 ALL PRIMARY NULL NULL NULL 12 Using where -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 explain select companynr,companyname from t4 left join t2 using (companynr) where companynr > 0 or companynr < 0; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 ALL PRIMARY NULL NULL NULL 12 Using where -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 explain select companynr,companyname from t4 left join t2 using (companynr) where companynr > 0 and companynr > 0; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 ALL PRIMARY NULL NULL NULL 12 Using where -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 explain select t2.companynr,companyname from t4 left join t2 using (companynr) where t2.companynr > 0 or t2.companynr is null; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 ALL NULL NULL NULL NULL 12 -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 Using where explain select t2.companynr,companyname from t4 left join t2 using (companynr) where t2.companynr > 0 or t2.companynr < 0 or t4.companynr > 0; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 ALL PRIMARY NULL NULL NULL 12 -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 Using where explain select t2.companynr,companyname from t4 left join t2 using (companynr) where ifnull(t2.companynr,1)>0; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 ALL NULL NULL NULL NULL 12 -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 Using where explain select companynr,companyname from t4 left join t2 using (companynr) where companynr > 0 or companynr is null; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 ALL PRIMARY NULL NULL NULL 12 Using where -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 explain select companynr,companyname from t4 left join t2 using (companynr) where companynr > 0 or companynr < 0 or companynr > 0; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 ALL PRIMARY NULL NULL NULL 12 Using where -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 explain select companynr,companyname from t4 left join t2 using (companynr) where ifnull(companynr,1)>0; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 ALL NULL NULL NULL NULL 12 Using where -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 select distinct t2.companynr,t4.companynr from t2,t4 where t2.companynr=t4.companynr+1; companynr companynr 37 36 @@ -1437,7 +1437,7 @@ companynr companynr explain select distinct t2.companynr,t4.companynr from t2,t4 where t2.companynr=t4.companynr+1; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t4 index NULL PRIMARY 1 NULL 12 Using index; Using temporary -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 Using where; Using join buffer +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 Using where; Using join buffer select t2.fld1,t2.companynr,fld3,period from t3,t2 where t2.fld1 = 38208 and t2.fld1=t3.t2nr and period = 1008 or t2.fld1 = 38008 and t2.fld1 =t3.t2nr and period = 1008; fld1 companynr fld3 period 038008 37 reporters 1008 @@ -1511,7 +1511,7 @@ count(*) min(fld4) max(fld4) sum(fld1) avg(fld1) std(fld1) variance(fld1) 70 absentee vest 17788966 254128.0857 3272.5940 10709871.3069 explain extended select count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1),variance(fld1) from t2 where companynr = 34 and fld4<>""; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 100.00 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 100.00 Using where Warnings: Note 1003 select count(0) AS `count(*)`,min(`test`.`t2`.`fld4`) AS `min(fld4)`,max(`test`.`t2`.`fld4`) AS `max(fld4)`,sum(`test`.`t2`.`fld1`) AS `sum(fld1)`,avg(`test`.`t2`.`fld1`) AS `avg(fld1)`,std(`test`.`t2`.`fld1`) AS `std(fld1)`,variance(`test`.`t2`.`fld1`) AS `variance(fld1)` from `test`.`t2` where ((`test`.`t2`.`companynr` = 34) and (`test`.`t2`.`fld4` <> '')) select companynr,count(*),min(fld4),max(fld4),sum(fld1),avg(fld1),std(fld1),variance(fld1) from t2 group by companynr limit 3; @@ -1955,7 +1955,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE explain select fld3 from t2 where fld1=fld1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1200 +1 SIMPLE t2 ALL NULL NULL NULL NULL 1199 select companynr,fld1 from t2 HAVING fld1=250501 or fld1=250502; companynr fld1 34 250501 @@ -2007,7 +2007,7 @@ count(*) 4181 explain select min(fld1),max(fld1),count(*) from t2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index NULL fld1 4 NULL 1200 Using index +1 SIMPLE t2 index NULL fld1 4 NULL 1199 Using index select min(fld1),max(fld1),count(*) from t2; min(fld1) max(fld1) count(*) 0 1232609 1199 @@ -2093,9 +2093,9 @@ show full columns from t2 from test like 's%'; Field Type Collation Null Key Default Extra Privileges Comment show keys from t2; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t2 0 PRIMARY 1 auto A 1200 NULL NULL BTREE -t2 0 fld1 1 fld1 A 1200 NULL NULL BTREE -t2 1 fld3 1 fld3 A 1200 NULL NULL BTREE +t2 0 PRIMARY 1 auto A 1199 NULL NULL BTREE +t2 0 fld1 1 fld1 A 1199 NULL NULL BTREE +t2 1 fld3 1 fld3 A 1199 NULL NULL BTREE drop table t4, t3, t2, t1; DO 1; DO benchmark(100,1+1),1,1; diff --git a/storage/pbxt/ChangeLog b/storage/pbxt/ChangeLog index b6023d26139..f0f9864d0d5 100644 --- a/storage/pbxt/ChangeLog +++ b/storage/pbxt/ChangeLog @@ -1,7 +1,49 @@ PBXT Release Notes ================== -+------- 1.0.11 Pre-GA - 2010-05-11 +------- 1.0.11-7 Pre-GA - 2010-09-09 + +RN336: Compiled and tested with MySQL 5.1.50. + +RN335: Fixed bug #523994: Deleting all records does not update table statistics. + +RN334: Made a change to reduce the time that only temporary tables exist during the ALTER TABLE and REPAIR TABLE statements. This increases the chance of recovery if a crash occurs during these operations. + +RN333: Log name of table when PBXT recovers an index on startup. If an error occurs during index recovery, the index is set to "repair pending". + +RN332: Fixed an inifinite loop when a record in a row is corrupt. Added logging and set the table to "repair pending" in this case. + +RN331: Fixed bug #626890: Crash on truncate table operation. + +RN330: Added additional checks for corruption of the index free list. + +------- 1.0.11-6 Pre-GA - 2010-07-08 + +RN329: Fixed bug #601245: make fails. PBXT did not compile if the partition engine was disabled in the MySQL build. + +------- 1.0.11-5 Pre-GA - 2010-06-18 + +RN328: Fixed bug #595478: Compile fails (1.0.11-4). + +------- 1.0.11-4 Pre-GA - 2010-06-15 + +RN327: Fixed a bug that caused a crash during delete on the index. The crash occurred due to memory overwrite when a long key is promoted after a shorter key is deleted, and the difference causes a node size overflow. + +------- 1.0.11-3 Pre-GA - 2010-06-11 + +RN326: Fixed bug #587740: pbxt-1.0.11-pre2-ga first time create partition table error. This was not a new bug. The problem was the PBXT system table's .frm files are corrupted when the first PBXT table created is a partition table. + +RN325: Fixed the "to-sweep" column output in xtstat. + +------- 1.0.11-2 Pre-GA - 2010-05-26 + +RN324: Fixed bug #584070:pbxt-1.0.11-pre-ga does not work with mysql 5.1.47. This bug fix removes a hack which was done to avoid running into the LOCK_plugin lock. + +------- 1.0.11-1 Pre-GA - 2010-05-19 + +RN323: Detect corruption of a key length in an index page. This bug fix avoids a possible crash due to index page corruption. + +------- 1.0.11 Pre-GA - 2010-05-11 RN322: Creating a table the references a non-existing table can now only be done if you set: foreign_key_checks = 0. Also fixed a failure when creating tables with recursive foreign key declarations. diff --git a/storage/pbxt/src/Makefile.am b/storage/pbxt/src/Makefile.am index 20399aee484..fc4c4ef8f1e 100644 --- a/storage/pbxt/src/Makefile.am +++ b/storage/pbxt/src/Makefile.am @@ -40,8 +40,8 @@ libpbxt_la_LDFLAGS = -module # These are the warning Drizzle uses: # DRIZZLE_WARNINGS = -W -Wall -Wextra -pedantic -Wundef -Wredundant-decls -Wno-strict-aliasing -Wno-long-long -Wno-unused-parameter -libpbxt_la_CXXFLAGS = -shared $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN -libpbxt_la_CFLAGS = -shared $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -std=c99 +libpbxt_la_CXXFLAGS = $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN +libpbxt_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN -std=c99 EXTRA_LIBRARIES = libpbxt.a noinst_LIBRARIES = libpbxt.a diff --git a/storage/pbxt/src/cache_xt.cc b/storage/pbxt/src/cache_xt.cc index 85eea41dd79..24e42d9e984 100644 --- a/storage/pbxt/src/cache_xt.cc +++ b/storage/pbxt/src/cache_xt.cc @@ -667,6 +667,9 @@ xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size) #ifdef XT_USE_DIRECT_IO_ON_INDEX block->cb_data = buffer; buffer += XT_INDEX_PAGE_SIZE; +#endif +#ifdef CHECK_BLOCK_TRAILERS + XT_SET_DISK_4(block->cp_check, 0xDEADBEEF); #endif ind_cac_globals.cg_free_list = block; block++; @@ -684,6 +687,19 @@ xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size) cont_(a); } +#ifdef CHECK_BLOCK_TRAILERS +xtPublic void check_block_trailers() +{ + XTIndBlockPtr block; + + block = ind_cac_globals.cg_blocks; + for (u_int i=0; icp_check) == 0xDEADBEEF); + block++; + } +} +#endif + xtPublic void xt_ind_exit(XTThreadPtr self) { #ifdef XT_USE_MYSYS @@ -1283,7 +1299,7 @@ static XTIndBlockPtr ind_cac_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNod * Conditionally count the number of deleted entries in the index: * We do this before other threads can read the block. */ - if (ind->mi_lazy_delete && read_data) + if (ind && ind->mi_lazy_delete && read_data) xt_ind_count_deleted_items(ot->ot_table, ind, block); /* Add to the hash table: */ @@ -1357,6 +1373,9 @@ xtPublic xtBool xt_ind_write(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID ad IDX_CAC_UNLOCK(seg, ot->ot_thread); #ifdef XT_TRACK_INDEX_UPDATES ot->ot_ind_changed++; +#endif +#ifdef CHECK_BLOCK_TRAILERS + check_block_trailers(); #endif return OK; } diff --git a/storage/pbxt/src/cache_xt.h b/storage/pbxt/src/cache_xt.h index ca796ab1a74..63a5164e466 100644 --- a/storage/pbxt/src/cache_xt.h +++ b/storage/pbxt/src/cache_xt.h @@ -33,6 +33,7 @@ struct XTIdxReadBuffer; #ifdef DEBUG //#define XT_USE_CACHE_DEBUG_SIZES +//#define CHECK_BLOCK_TRAILERS #endif #ifdef XT_USE_CACHE_DEBUG_SIZES @@ -116,6 +117,9 @@ typedef struct XTIndBlock { #else xtWord1 cb_data[XT_INDEX_PAGE_SIZE]; #endif +#ifdef CHECK_BLOCK_TRAILERS + xtWord1 cp_check[4]; +#endif } XTIndBlockRec, *XTIndBlockPtr; typedef struct XTIndReference { @@ -177,6 +181,10 @@ xtBool xt_ind_copy_on_write(XTIndReferencePtr iref); XTIndHandlePtr xt_ind_get_handle(struct XTOpenTable *ot, XTIndexPtr ind, XTIndReferencePtr iref); void xt_ind_release_handle(XTIndHandlePtr handle, xtBool have_lock, XTThreadPtr thread); +#ifdef CHECK_BLOCK_TRAILERS +extern void check_block_trailers(); +#endif + #ifdef DEBUG //#define DEBUG_CHECK_IND_CACHE #endif diff --git a/storage/pbxt/src/database_xt.h b/storage/pbxt/src/database_xt.h index 1b1863d2045..7744aeeac31 100644 --- a/storage/pbxt/src/database_xt.h +++ b/storage/pbxt/src/database_xt.h @@ -117,6 +117,7 @@ typedef struct XTDatabase : public XTHeap { XTSortedListPtr db_table_by_id; XTSortedListPtr db_table_paths; /* A list of table paths used by this database. */ xtBool db_multi_path; + XTSortedListPtr db_error_list; /* A list of errors already reported. */ /* The open table pool: */ XTAllTablePoolsRec db_ot_pool; diff --git a/storage/pbxt/src/datadic_xt.cc b/storage/pbxt/src/datadic_xt.cc index 1a1ffeeb096..6a58d23d980 100644 --- a/storage/pbxt/src/datadic_xt.cc +++ b/storage/pbxt/src/datadic_xt.cc @@ -396,7 +396,7 @@ void XTToken::expectNumber(XTThreadPtr self) struct charset_info_st; class XTTokenizer { - struct charset_info_st *tkn_charset; + MX_CONST_CHARSET_INFO *tkn_charset; char *tkn_cstring; char *tkn_curr_pos; XTToken *tkn_current; @@ -1324,7 +1324,7 @@ void XTParseTable::parseDropIndex(XTThreadPtr self) class XTCreateTable : public XTParseTable { public: bool ct_convert; - struct charset_info_st *ct_charset; + MX_CONST_CHARSET_INFO *ct_charset; XTPathStrPtr ct_tab_path; u_int ct_contraint_no; XTDDTable *ct_curr_table; @@ -2039,11 +2039,6 @@ void XTDDTableRef::deleteAllRows(XTThreadPtr self) if (!(ot = xt_db_open_table_using_tab(tr_fkey->co_table->dt_table, self))) xt_throw(self); - /* {FREE-ROWS-BAD} */ - /* - row_count = ((xtInt8) ot->ot_table->tab_row_eof_id) - 1; - row_count -= (xtInt8) ot->ot_table->tab_row_fnum; - */ /* Check if there are any rows in the referencing table: */ if (!xt_tab_seq_init(ot)) goto failed; diff --git a/storage/pbxt/src/datalog_xt.cc b/storage/pbxt/src/datalog_xt.cc index ff58a122e10..3238f0cbd17 100644 --- a/storage/pbxt/src/datalog_xt.cc +++ b/storage/pbxt/src/datalog_xt.cc @@ -1249,7 +1249,7 @@ xtBool XTDataLogBuffer::dlb_write_thru_log(xtLogID XT_NDEBUG_UNUSED(log_id), xtL */ dlb_data_log->dlf_log_eof += size; #ifdef DEBUG - if ((ulonglong) (log_offset + size) > (ulonglong) dlb_max_write_offset) + if (log_offset + (xtLogOffset) size > (xtLogOffset) dlb_max_write_offset) dlb_max_write_offset = log_offset + size; #endif dlb_flush_required = TRUE; @@ -1291,7 +1291,7 @@ xtBool XTDataLogBuffer::dlb_append_log(xtLogID XT_NDEBUG_UNUSED(log_id), xtLogOf if (!xt_pwrite_file(dlb_data_log->dlf_log_file, log_offset, size, data, &thread->st_statistics.st_data, thread)) return FAILED; #ifdef DEBUG - if ((ulonglong) (log_offset + size) > (ulonglong) dlb_max_write_offset) + if (log_offset + (xtLogOffset) size > (xtLogOffset) dlb_max_write_offset) dlb_max_write_offset = log_offset + size; #endif dlb_flush_required = TRUE; @@ -1734,8 +1734,8 @@ static xtBool dl_collect_garbage(XTThreadPtr self, XTDatabaseHPtr db, XTDataLogF xtLogOffset src_log_offset; xtLogID curr_log_id; xtLogOffset curr_log_offset; - xtLogID dest_log_id= 0; - xtLogOffset dest_log_offset= 0; + xtLogID dest_log_id = 0; + xtLogOffset dest_log_offset = 0; off_t garbage_count = 0; memset(&cs, 0, sizeof(XTCompactorStateRec)); diff --git a/storage/pbxt/src/discover_xt.cc b/storage/pbxt/src/discover_xt.cc index 2a42c77ac69..7f7281d8c30 100644 --- a/storage/pbxt/src/discover_xt.cc +++ b/storage/pbxt/src/discover_xt.cc @@ -1622,7 +1622,11 @@ int xt_create_table_frm(handlerton *hton, THD* thd, const char *db, const char * COLUMN_FORMAT_TYPE_FIXED, #endif NULL /*default_value*/, NULL /*on_update_value*/, &comment, NULL /*change*/, - NULL /*interval_list*/, info->field_charset, 0 /*uint_geom_type*/)) + NULL /*interval_list*/, info->field_charset, 0 /*uint_geom_type*/ +#if defined(MARIADB_BASE_VERSION) && MYSQL_VERSION_ID > 50200 + , NULL /*vcol_info*/, NULL /* create options */ +#endif + )) #endif goto error; @@ -1654,8 +1658,17 @@ int xt_create_table_frm(handlerton *hton, THD* thd, const char *db, const char * if (mysql_create_table_no_lock(thd, db, name, &create_info, &table_proto, &stmt->alter_info, 1, 0)) goto error; #else +#ifdef WITH_PARTITION_STORAGE_ENGINE + partition_info *part_info; + + part_info = thd->work_part_info; + thd->work_part_info = NULL; +#endif if (mysql_create_table_no_lock(thd, db, name, &mylex.create_info, &mylex.alter_info, 1, 0)) goto error; +#ifdef WITH_PARTITION_STORAGE_ENGINE + thd->work_part_info = part_info; +#endif #endif noerror: diff --git a/storage/pbxt/src/ha_pbxt.cc b/storage/pbxt/src/ha_pbxt.cc index 5bf2069f656..ef0ae582c07 100644 --- a/storage/pbxt/src/ha_pbxt.cc +++ b/storage/pbxt/src/ha_pbxt.cc @@ -1232,6 +1232,11 @@ static int pbxt_init(void *p) THD *thd = NULL; #ifndef DRIZZLED +#if MYSQL_VERSION_ID < 50147 + /* A hack which is no longer required after 5.1.46 */ + extern myxt_mutex_t LOCK_plugin; +#endif + /* {MYSQL QUIRK} * I have to release this lock for PBXT recovery to * work, because it needs to open .frm files. @@ -1248,8 +1253,7 @@ static int pbxt_init(void *p) * Only real problem, 2 threads try to load the same * plugin at the same time. */ -#if MYSQL_VERSION_ID <= 50146 - extern myxt_mutex_t LOCK_plugin; +#if MYSQL_VERSION_ID < 50147 myxt_mutex_unlock(&LOCK_plugin); #endif #endif @@ -1285,8 +1289,10 @@ static int pbxt_init(void *p) if (thd) myxt_destroy_thread(thd, FALSE); -#if MYSQL_VERSION_ID <= 50146 && !defined(DRIZZLED) +#ifndef DRIZZLED +#if MYSQL_VERSION_ID < 50147 myxt_mutex_lock(&LOCK_plugin); +#endif #endif } #endif @@ -1948,8 +1954,13 @@ xtPublic int ha_pbxt::reopen() * selectity of the indices, as soon as the number of rows * exceeds 200 (see [**]) */ +#ifdef XT_ROW_COUNT_CORRECTED + /* {CORRECTED-ROW-COUNT} */ + pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150; +#else /* {FREE-ROWS-BAD} */ pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150; +#endif } /* I am not doing this anymore because it was only required @@ -2006,7 +2017,7 @@ static int pbxt_statistics_fill_table(THD *thd, TABLE_LIST *tables, COND *cond) xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL); } - err = myxt_statistics_fill_table(self, thd, tables, cond, system_charset_info); + err = myxt_statistics_fill_table(self, thd, tables, cond, (void*) system_charset_info); } catch_(a) { err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE); @@ -2296,6 +2307,36 @@ void ha_pbxt::internal_close(THD *thd, struct XTThread *self) */ if (!thd || thd_sql_command(thd) == SQLCOM_FLUSH) // FLUSH TABLES xt_sync_flush_table(self, ot); + else { + /* This change is a result of a problem mentioned by Arjen. + * REPAIR and ALTER lead to the following sequence: + * 1. tab -- copy --> tmp1 + * 2. tab -- rename --> tmp2 + * 3. tmp1 -- rename --> tab + * 4. delete tmp2 + * + * PBXT flushes a table before rename. + * In the sequence above results in a table flush in step 3 which can + * take a very long time. + * + * The problem is, during this time frame we have only temp tables. + * A crash in this state leaves the database in a bad state. + * + * To reduce the time in this state, the flush needs to be done + * elsewhere. The code below causes the flish to occur after + * step 1: + */ + switch (thd_sql_command(thd)) { + case SQLCOM_REPAIR: + case SQLCOM_RENAME_TABLE: + case SQLCOM_OPTIMIZE: + case SQLCOM_ANALYZE: + case SQLCOM_ALTER_TABLE: + case SQLCOM_CREATE_INDEX: + xt_sync_flush_table(self, ot); + break; + } + } } freer_(); // xt_db_return_table_to_pool(ot); } @@ -2356,9 +2397,15 @@ int ha_pbxt::open(const char *table_path, int XT_UNUSED(mode), uint XT_UNUSED(te #else xt_tab_load_row_pointers(self, pb_open_tab); #endif + xt_ind_set_index_selectivity(pb_open_tab, self); +#ifdef XT_ROW_COUNT_CORRECTED + /* {CORRECTED-ROW-COUNT} */ + pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150; +#else /* {FREE-ROWS-BAD} */ pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150; +#endif } init_auto_increment(0); @@ -3929,6 +3976,8 @@ int ha_pbxt::info(uint flag) if ((ot = pb_open_tab)) { if (flag & HA_STATUS_VARIABLE) { + register XTTableHPtr tab = ot->ot_table; + /* {FREE-ROWS-BAD} * Free row count is not reliable, so ignore it. * The problem is if tab_row_fnum > tab_row_eof_id - 1 then @@ -3955,11 +4004,26 @@ int ha_pbxt::info(uint flag) * the actual number of vectors. But it must assume that it has at * least EXTRA_RECORDS vectors. */ - stats.deleted = /* ot->ot_table->tab_row_fnum */ 0; - stats.records = (ha_rows) (ot->ot_table->tab_row_eof_id - 1 /* - stats.deleted */); - stats.data_file_length = xt_rec_id_to_rec_offset(ot->ot_table, ot->ot_table->tab_rec_eof_id); - stats.index_file_length = xt_ind_node_to_offset(ot->ot_table, ot->ot_table->tab_ind_eof); - stats.delete_length = ot->ot_table->tab_rec_fnum * ot->ot_rec_size; +#ifdef XT_ROW_COUNT_CORRECTED + if (tab->tab_row_eof_id <= tab->tab_row_fnum || + (!tab->tab_row_free_id && tab->tab_row_fnum)) + xt_tab_check_free_lists(NULL, ot, false, true); + stats.records = (ha_rows) tab->tab_row_eof_id - 1; + if (stats.records >= tab->tab_row_fnum) { + stats.deleted = tab->tab_row_fnum; + stats.records -= stats.deleted; + } + else { + stats.deleted = 0; + stats.records = 2; + } +#else + stats.deleted = /* tab->tab_row_fnum */ 0; + stats.records = (ha_rows) (tab->tab_row_eof_id - 1 /* - stats.deleted */); +#endif + stats.data_file_length = xt_rec_id_to_rec_offset(tab, tab->tab_rec_eof_id); + stats.index_file_length = xt_ind_node_to_offset(tab, tab->tab_ind_eof); + stats.delete_length = tab->tab_rec_fnum * ot->ot_rec_size; //check_time = info.check_time; stats.mean_rec_length = (ulong) ot->ot_rec_size; } @@ -4584,13 +4648,24 @@ xtPublic int ha_pbxt::external_lock(THD *thd, int lock_type) } if (pb_share->sh_recalc_selectivity) { +#ifdef XT_ROW_COUNT_CORRECTED + /* {CORRECTED-ROW-COUNT} */ + if ((pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) >= 200) +#else /* {FREE-ROWS-BAD} */ - if ((pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) >= 200) { + if ((pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) >= 200) +#endif + { /* [**] */ pb_share->sh_recalc_selectivity = FALSE; xt_ind_set_index_selectivity(pb_open_tab, self); +#ifdef XT_ROW_COUNT_CORRECTED + /* {CORRECTED-ROW-COUNT} */ + pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150; +#else /* {FREE-ROWS-BAD} */ pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150; +#endif } } } @@ -4638,6 +4713,17 @@ xtPublic int ha_pbxt::external_lock(THD *thd, int lock_type) goto complete; } cont_(a); + + /* Occurs if you do: + * truncate table t1; + * truncate table t1; + */ + if (!pb_open_tab) { + if ((err = reopen())) { + pb_ex_in_use = 0; + goto complete; + } + } } else { pb_ex_in_use = 1; @@ -6076,6 +6162,40 @@ mysql_declare_plugin(pbxt) drizzle_declare_plugin_end; #else mysql_declare_plugin_end; +#if defined(MARIADB_BASE_VERSION) && MYSQL_VERSION_ID > 50200 +maria_declare_plugin(pbxt) +{ /* PBXT */ + MYSQL_STORAGE_ENGINE_PLUGIN, + &pbxt_storage_engine, + "PBXT", + "Paul McCullagh, PrimeBase Technologies GmbH", + "High performance, multi-versioning transactional engine", + PLUGIN_LICENSE_GPL, + pbxt_init, /* Plugin Init */ + pbxt_end, /* Plugin Deinit */ + 0x0001 /* 0.1 */, + NULL, /* status variables */ + pbxt_system_variables, /* system variables */ + "1.0.11-7 Pre-GA", /* string version */ + MariaDB_PLUGIN_MATURITY_GAMMA /* maturity */ +}, +{ /* PBXT_STATISTICS */ + MYSQL_INFORMATION_SCHEMA_PLUGIN, + &pbxt_statitics, + "PBXT_STATISTICS", + "Paul McCullagh, PrimeBase Technologies GmbH", + "PBXT internal system statitics", + PLUGIN_LICENSE_GPL, + pbxt_init_statistics, /* plugin init */ + pbxt_exit_statistics, /* plugin deinit */ + 0x0005, + NULL, /* status variables */ + NULL, /* system variables */ + "1.0.11-7 Pre-GA", /* string version */ + MariaDB_PLUGIN_MATURITY_GAMMA /* maturity */ +} +maria_declare_plugin_end; +#endif #endif #if defined(XT_WIN) && defined(XT_COREDUMP) diff --git a/storage/pbxt/src/index_xt.cc b/storage/pbxt/src/index_xt.cc index c8995fe253c..f6c4b4d8aa3 100644 --- a/storage/pbxt/src/index_xt.cc +++ b/storage/pbxt/src/index_xt.cc @@ -272,10 +272,17 @@ static xtBool idx_new_branch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID *a } if ((XT_NODE_ID(wrote_pos) = XT_NODE_ID(tab->tab_ind_free))) { + xtIndexNodeID next_node; + /* Use the block on the free list: */ - if (!xt_ind_read_bytes(ot, ind, wrote_pos, sizeof(XTIndFreeBlockRec), (xtWord1 *) &free_block)) + if (!xt_ind_read_bytes(ot, NULL, wrote_pos, sizeof(XTIndFreeBlockRec), (xtWord1 *) &free_block)) goto failed; - XT_NODE_ID(tab->tab_ind_free) = (xtIndexNodeID) XT_GET_DISK_8(free_block.if_next_block_8); + XT_NODE_ID(next_node) = (xtIndexNodeID) XT_GET_DISK_8(free_block.if_next_block_8); + if (XT_NODE_ID(next_node) >= XT_NODE_ID(tab->tab_ind_eof)) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, tab->tab_name); + goto failed; + } + XT_NODE_ID(tab->tab_ind_free) = XT_NODE_ID(next_node); xt_unlock_mutex_ns(&tab->tab_ind_lock); *address = wrote_pos; TRACK_BLOCK_ALLOC(wrote_pos); @@ -1415,30 +1422,45 @@ static xtBool idx_replace_node_key(XTOpenTablePtr ot, XTIndexPtr ind, IdxStackIt if (idx_is_item_deleted(iref.ir_branch, &item->i_pos)) iref.ir_block->cp_del_count--; } - memmove(&iref.ir_branch->tb_data[item->i_pos.i_item_offset + item_size], - &iref.ir_branch->tb_data[item->i_pos.i_item_offset + item->i_pos.i_item_size], - item->i_pos.i_total_size - item->i_pos.i_item_offset - item->i_pos.i_item_size); - memcpy(&iref.ir_branch->tb_data[item->i_pos.i_item_offset], - item_buf, item_size); - if (ind->mi_lazy_delete) { - if (idx_is_item_deleted(iref.ir_branch, &item->i_pos)) - iref.ir_block->cp_del_count++; - } - item->i_pos.i_total_size = item->i_pos.i_total_size + item_size - item->i_pos.i_item_size; - XT_SET_DISK_2(iref.ir_branch->tb_size_2, XT_MAKE_NODE_SIZE(item->i_pos.i_total_size)); - IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(iref.ir_branch->tb_size_2)); - iref.ir_updated = TRUE; + + if (item->i_pos.i_total_size + item_size - item->i_pos.i_item_size <= XT_INDEX_PAGE_DATA_SIZE) { + /* The new item is larger than the old, this can result + * in overflow of the node! + */ + memmove(&iref.ir_branch->tb_data[item->i_pos.i_item_offset + item_size], + &iref.ir_branch->tb_data[item->i_pos.i_item_offset + item->i_pos.i_item_size], + item->i_pos.i_total_size - item->i_pos.i_item_offset - item->i_pos.i_item_size); + memcpy(&iref.ir_branch->tb_data[item->i_pos.i_item_offset], + item_buf, item_size); + if (ind->mi_lazy_delete) { + if (idx_is_item_deleted(iref.ir_branch, &item->i_pos)) + iref.ir_block->cp_del_count++; + } + item->i_pos.i_total_size = item->i_pos.i_total_size + item_size - item->i_pos.i_item_size; + XT_SET_DISK_2(iref.ir_branch->tb_size_2, XT_MAKE_NODE_SIZE(item->i_pos.i_total_size)); + IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(iref.ir_branch->tb_size_2)); + iref.ir_updated = TRUE; #ifdef DEBUG - if (ind->mi_lazy_delete) ASSERT_NS(item->i_pos.i_total_size <= XT_INDEX_PAGE_DATA_SIZE); #endif - if (item->i_pos.i_total_size <= XT_INDEX_PAGE_DATA_SIZE) return xt_ind_release(ot, ind, XT_UNLOCK_W_UPDATE, &iref); + } /* The node has overflowed!! */ result.sr_item = item->i_pos; + memcpy(ot->ot_ind_wbuf.tb_data, iref.ir_branch->tb_data, item->i_pos.i_item_offset); // First part of the buffer + memcpy(&ot->ot_ind_wbuf.tb_data[item->i_pos.i_item_offset], item_buf, item_size); // The new item + memcpy(&ot->ot_ind_wbuf.tb_data[item->i_pos.i_item_offset + item_size], + &iref.ir_branch->tb_data[item->i_pos.i_item_offset + item->i_pos.i_item_size], + item->i_pos.i_total_size - item->i_pos.i_item_offset - item->i_pos.i_item_size); + item->i_pos.i_total_size += item_size - item->i_pos.i_item_size; + item->i_pos.i_item_size = item_size; + XT_SET_DISK_2(ot->ot_ind_wbuf.tb_size_2, XT_MAKE_LEAF_SIZE(item->i_pos.i_total_size)); + IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(ot->ot_ind_wbuf.tb_size_2)); + ASSERT_NS(item->i_pos.i_total_size > XT_INDEX_PAGE_DATA_SIZE && item->i_pos.i_total_size <= XT_INDEX_PAGE_DATA_SIZE*2); + /* Adjust the stack (we want the parents of the delete node): */ for (;;) { if (idx_pop(stack) == item) @@ -1448,7 +1470,7 @@ static xtBool idx_replace_node_key(XTOpenTablePtr ot, XTIndexPtr ind, IdxStackIt /* We assume that value can be overwritten (which is the case) */ key_value.sv_flags = XT_SEARCH_WHOLE_KEY; key_value.sv_key = key_buf; - if (!idx_get_middle_branch_item(ot, ind, iref.ir_branch, &key_value, &result)) + if (!idx_get_middle_branch_item(ot, ind, &ot->ot_ind_wbuf, &key_value, &result)) goto failed_1; if (!idx_new_branch(ot, ind, &new_branch)) @@ -1456,7 +1478,6 @@ static xtBool idx_replace_node_key(XTOpenTablePtr ot, XTIndexPtr ind, IdxStackIt /* Split the node: */ new_size = result.sr_item.i_total_size - result.sr_item.i_item_offset - result.sr_item.i_item_size; - // TODO: Are 2 buffers now required? new_branch_ptr = (XTIdxBranchDPtr) &ot->ot_ind_wbuf.tb_data[XT_INDEX_PAGE_DATA_SIZE]; memmove(new_branch_ptr->tb_data, &iref.ir_branch->tb_data[result.sr_item.i_item_offset + result.sr_item.i_item_size], new_size); @@ -1466,10 +1487,10 @@ static xtBool idx_replace_node_key(XTOpenTablePtr ot, XTIndexPtr ind, IdxStackIt goto failed_2; /* Change the size of the old branch: */ - XT_SET_DISK_2(iref.ir_branch->tb_size_2, XT_MAKE_NODE_SIZE(result.sr_item.i_item_offset)); - IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(iref.ir_branch->tb_size_2)); + XT_SET_DISK_2(ot->ot_ind_wbuf.tb_size_2, XT_MAKE_NODE_SIZE(result.sr_item.i_item_offset)); + IDX_TRACE("%d-> %x\n", (int) XT_NODE_ID(current), (int) XT_GET_DISK_2(ot->ot_ind_wbuf.tb_size_2)); + memcpy(iref.ir_branch, &ot->ot_ind_wbuf, offsetof(XTIdxBranchDRec, tb_data) + result.sr_item.i_item_offset); iref.ir_updated = TRUE; - xt_ind_release(ot, ind, XT_UNLOCK_W_UPDATE, &iref); /* Insert the new branch into the parent node, using the new middle key value: */ @@ -2071,6 +2092,11 @@ xtPublic xtBool xt_idx_insert(XTOpenTablePtr ot, XTIndexPtr ind, xtRowID row_id, if (!idx_new_branch(ot, ind, &new_branch)) goto failed_1; + if (XT_NODE_ID(current) == XT_NODE_ID(new_branch)) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + goto failed_1; + } + /* Copy and write the rest of the data to the new node: */ new_size = result.sr_item.i_total_size - result.sr_item.i_item_offset - result.sr_item.i_item_size; new_branch_ptr = (XTIdxBranchDPtr) &ot->ot_ind_wbuf.tb_data[XT_INDEX_PAGE_DATA_SIZE]; @@ -2723,6 +2749,10 @@ xtPublic xtBool xt_idx_search(XTOpenTablePtr ot, XTIndexPtr ind, register XTIdxS #endif ASSERT_NS(iref.ir_xlock == 2); ASSERT_NS(iref.ir_updated == 2); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; failed: @@ -2874,6 +2904,10 @@ xtPublic xtBool xt_idx_search_prev(XTOpenTablePtr ot, XTIndexPtr ind, register X //idx_check_index(ot, ind, TRUE); //idx_check_on_key(ot); #endif + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; failed: @@ -2964,6 +2998,10 @@ xtPublic xtBool xt_idx_next(register XTOpenTablePtr ot, register XTIndexPtr ind, if (!(XT_NODE_ID(current) = XT_NODE_ID(ind->mi_root))) { XT_INDEX_UNLOCK(ind, ot); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; } @@ -3071,6 +3109,10 @@ xtPublic xtBool xt_idx_next(register XTOpenTablePtr ot, register XTIndexPtr ind, ot->ot_curr_rec_id = 0; ot->ot_curr_row_id = 0; XT_INDEX_UNLOCK(ind, ot); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; } @@ -3112,6 +3154,10 @@ xtPublic xtBool xt_idx_next(register XTOpenTablePtr ot, register XTIndexPtr ind, ot->ot_curr_row_id = result.sr_row_id; ot->ot_ind_state = result.sr_item; + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; failed: @@ -3178,6 +3224,10 @@ xtPublic xtBool xt_idx_prev(register XTOpenTablePtr ot, register XTIndexPtr ind, if (!(XT_NODE_ID(current) = XT_NODE_ID(ind->mi_root))) { XT_INDEX_UNLOCK(ind, ot); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; } @@ -3274,6 +3324,10 @@ xtPublic xtBool xt_idx_prev(register XTOpenTablePtr ot, register XTIndexPtr ind, ot->ot_curr_row_id = 0; XT_INDEX_UNLOCK(ind, ot); + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; unlock_check_on_key: @@ -3302,6 +3356,10 @@ xtPublic xtBool xt_idx_prev(register XTOpenTablePtr ot, register XTIndexPtr ind, ot->ot_curr_rec_id = result.sr_rec_id; ot->ot_curr_row_id = result.sr_row_id; ot->ot_ind_state = result.sr_item; + if (ind->mi_key_corrupted) { + xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name); + return FAILED; + } return OK; failed: @@ -3648,7 +3706,7 @@ xtPublic void xt_check_indices(XTOpenTablePtr ot) track_block_exists(current); #endif printf("%d ", (int) XT_NODE_ID(current)); - if (!xt_ind_read_bytes(ot, *ind, current, sizeof(XTIndFreeBlockRec), (xtWord1 *) &free_block)) { + if (!xt_ind_read_bytes(ot, NULL, current, sizeof(XTIndFreeBlockRec), (xtWord1 *) &free_block)) { xt_log_and_clear_exception_ns(); break; } @@ -4141,11 +4199,18 @@ void XTIndexLogPool::ilp_init(struct XTThread *self, struct XTDatabase *db, size if (!ilp_open_log(&il, log_id, FALSE, self)) goto failed; if (il->il_tab_id && il->il_log_eof) { + char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; + if (!il->il_open_table(&ot)) goto failed; if (ot) { - if (!il->il_apply_log(ot)) - goto failed; + xt_tab_make_table_name(ot->ot_table, table_name, sizeof(table_name)); + xt_logf(XT_NT_INFO, "PBXT: Recovering index, table: %s, bytes to read: %llu\n", table_name, (u_llong) il->il_log_eof); + if (!il->il_apply_log(ot)) { + /* If recovery of an index fails, then it is corrupt! */ + xt_tab_disable_index(ot->ot_table, XT_INDEX_CORRUPTED); + xt_log_and_clear_exception_ns(); + } ot->ot_thread = self; il->il_close_table(ot); } @@ -4468,8 +4533,7 @@ xtBool XTIndexLog::il_apply_log(struct XTOpenTable *ot) /* Corrupt log?! */ if (il_buffer_len < req_size) { xt_register_ixterr(XT_REG_CONTEXT, XT_ERR_INDEX_LOG_CORRUPT, xt_file_path(il_of)); - xt_log_and_clear_exception_ns(); - return OK; + return FAILED; } if (!xt_pread_file(il_of, offset, il_buffer_len, il_buffer_len, il_buffer, NULL, &ot->ot_thread->st_statistics.st_ilog, ot->ot_thread)) return FAILED; @@ -4548,8 +4612,7 @@ xtBool XTIndexLog::il_apply_log(struct XTOpenTable *ot) /* Corrupt log?! */ if (il_buffer_len < req_size) { xt_register_ixterr(XT_REG_CONTEXT, XT_ERR_INDEX_LOG_CORRUPT, xt_file_path(il_of)); - xt_log_and_clear_exception_ns(); - return OK; + return FAILED; } if (!xt_pread_file(il_of, offset, il_buffer_len, il_buffer_len, il_buffer, NULL, &ot->ot_thread->st_statistics.st_ilog, ot->ot_thread)) return FAILED; @@ -4597,8 +4660,7 @@ xtBool XTIndexLog::il_apply_log(struct XTOpenTable *ot) break; default: xt_register_ixterr(XT_REG_CONTEXT, XT_ERR_INDEX_LOG_CORRUPT, xt_file_path(il_of)); - xt_log_and_clear_exception_ns(); - return OK; + return FAILED; } } diff --git a/storage/pbxt/src/index_xt.h b/storage/pbxt/src/index_xt.h index 52f8f32dd33..a56e7b3cdb7 100644 --- a/storage/pbxt/src/index_xt.h +++ b/storage/pbxt/src/index_xt.h @@ -312,7 +312,7 @@ typedef struct XTIndex { u_int mi_flags; u_int mi_key_size; u_int mi_max_items; /* The maximum number of items that can fit in a leaf node. */ - xtBool mi_low_byte_first; + xtBool mi_key_corrupted; /* Set to TRUE if a currupted index key is detected. */ xtBool mi_fix_key; xtBool mi_lazy_delete; /* TRUE if index entries are "lazy deleted". */ u_int mi_single_type; /* Used when the index contains a single field. */ diff --git a/storage/pbxt/src/myxt_xt.cc b/storage/pbxt/src/myxt_xt.cc index 51490fc00f5..410bf2d2f3c 100644 --- a/storage/pbxt/src/myxt_xt.cc +++ b/storage/pbxt/src/myxt_xt.cc @@ -1088,7 +1088,10 @@ xtPublic u_int myxt_get_key_length(XTIndexPtr ind, xtWord1 *key_buf) } end: - return (xtWord1 *) key_data - key_buf; + u_int ilen = (xtWord1 *) key_data - key_buf; + if (ilen > XT_INDEX_MAX_KEY_SIZE) + ind->mi_key_corrupted = TRUE; + return ilen; } /* Derived from ha_key_cmp */ @@ -2183,7 +2186,8 @@ static XTIndexPtr my_create_index(XTThreadPtr self, TABLE *table_arg, u_int idx, xt_spinlock_init_with_autoname(self, &ind->mi_dirty_lock); ind->mi_index_no = idx; ind->mi_flags = (index->flags & (HA_NOSAME | HA_NULL_ARE_EQUAL | HA_UNIQUE_CHECK)); - ind->mi_low_byte_first = TS(table_arg)->db_low_byte_first; + //ind->mi_low_byte_first = TS(table_arg)->db_low_byte_first; + ind->mi_key_corrupted = FALSE; ind->mi_fix_key = TRUE; ind->mi_select_total = 0; ind->mi_subset_of = 0; diff --git a/storage/pbxt/src/myxt_xt.h b/storage/pbxt/src/myxt_xt.h index 546b57c6e84..3898c8e30c6 100644 --- a/storage/pbxt/src/myxt_xt.h +++ b/storage/pbxt/src/myxt_xt.h @@ -69,17 +69,17 @@ void myxt_free_dictionary(XTThreadPtr self, XTDictionary *dic); void myxt_move_dictionary(XTDictionaryPtr dic, XTDictionaryPtr source_dic); XTDDTable *myxt_create_table_from_table(XTThreadPtr self, STRUCT_TABLE *my_tab); -void myxt_static_convert_identifier(XTThreadPtr self, struct charset_info_st *cs, char *from, char *to, size_t to_len); -char *myxt_convert_identifier(XTThreadPtr self, struct charset_info_st *cs, char *from); +void myxt_static_convert_identifier(XTThreadPtr self, MX_CONST_CHARSET_INFO *cs, char *from, char *to, size_t to_len); +char *myxt_convert_identifier(XTThreadPtr self, MX_CONST_CHARSET_INFO *cs, char *from); void myxt_static_convert_table_name(XTThreadPtr self, char *from, char *to, size_t to_len); void myxt_static_convert_file_name(char *from, char *to, size_t to_len); char *myxt_convert_table_name(XTThreadPtr self, char *from); int myxt_strcasecmp(char * a, char *b); -int myxt_isspace(struct charset_info_st *cs, char a); -int myxt_ispunct(struct charset_info_st *cs, char a); -int myxt_isdigit(struct charset_info_st *cs, char a); +int myxt_isspace(MX_CONST_CHARSET_INFO *cs, char a); +int myxt_ispunct(MX_CONST_CHARSET_INFO *cs, char a); +int myxt_isdigit(MX_CONST_CHARSET_INFO *cs, char a); -struct charset_info_st *myxt_getcharset(bool convert); +MX_CONST_CHARSET_INFO *myxt_getcharset(bool convert); void *myxt_create_thread(); void myxt_destroy_thread(void *thread, xtBool end_threads); diff --git a/storage/pbxt/src/restart_xt.cc b/storage/pbxt/src/restart_xt.cc index b0c8f2854ae..93720f2b113 100644 --- a/storage/pbxt/src/restart_xt.cc +++ b/storage/pbxt/src/restart_xt.cc @@ -1359,6 +1359,57 @@ static xtBool xres_sync_operations(XTThreadPtr self, XTDatabaseHPtr db, XTWriter return op_synced; } +#ifdef XT_CORRECT_TABLE_FREE_COUNT +#define CORRECT_COUNT TRUE +#else +#define CORRECT_COUNT FALSE +#endif +#ifdef XT_CHECK_RECORD_FREE_COUNT +#define CHECK_RECS TRUE +#else +#define CHECK_RECS FALSE +#endif +#if defined(XT_CHECK_RECORD_FREE_COUNT) || defined(XT_CHECK_ROW_FREE_COUNT) +#define RECOVER_FREE_COUNTS +#endif + +#ifdef RECOVER_FREE_COUNTS +/* {CORRECTED-ROW-COUNT} + * This error can be repeated by crashing the server during + * high activitity, after flush table writes the table header + * + * On recovery, the free count "from the future" is used as + * the starting point for subsequent allocation and frees. + * The count is wrong after that point. + * + * The recovery of the count only works correctly if a + * checkpoint is complete successfully after that table + * header is flushed. Basically the writing of the table + * header should be synchronsized with the writing of the + * end of the checkpoint. + * + * Another solution would be to log the count, along with + * the allocate and free commannds. + * + * The 3rd solution is the one used here. The count is corrected + * after recovery. + */ +static void xres_recover_table_free_counts(XTThreadPtr self, XTDatabaseHPtr db, XTWriterStatePtr ws) +{ + u_int edx; + XTTableEntryPtr te_ptr; + XTTableHPtr tab; + + xt_enum_tables_init(&edx); + while ((te_ptr = xt_enum_tables_next(self, db, &edx))) { + if ((tab = te_ptr->te_table)) { + if (xres_open_table(self, ws, te_ptr->te_tab_id)) + xt_tab_check_free_lists(self, ws->ws_ot, CHECK_RECS, CORRECT_COUNT); + } + } +} +#endif + /* * Operations from the log are applied in sequence order. * If the operations are out of sequence, they are buffered @@ -2175,6 +2226,13 @@ xtBool XTXactRestart::xres_restart(XTThreadPtr self, xtLogID *log_id, xtLogOffse /* This is true because if no transaction was placed in RAM then * the next transaction in RAM will have the next ID: */ db->db_xn_min_ram_id = db->db_xn_curr_id + 1; + +#ifdef RECOVER_FREE_COUNTS + if (xres_cp_log_id != *log_id || xres_cp_log_offset != *log_offset) { + /* Recovery took place, correct the row count! */ + xres_recover_table_free_counts(self, db, &ws); + } +#endif } failed: diff --git a/storage/pbxt/src/strutil_xt.cc b/storage/pbxt/src/strutil_xt.cc index 02132fbb06b..8183034a204 100644 --- a/storage/pbxt/src/strutil_xt.cc +++ b/storage/pbxt/src/strutil_xt.cc @@ -380,7 +380,7 @@ xtPublic void xt_int8_to_byte_size(xtInt8 value, char *string) /* Version number must also be set in configure.in! */ xtPublic c_char *xt_get_version(void) { - return "1.0.11 Pre-GA"; + return "1.0.11-7 Pre-GA"; } /* Copy and URL decode! */ diff --git a/storage/pbxt/src/table_xt.cc b/storage/pbxt/src/table_xt.cc index b01f4404ce3..2d93f161ac9 100644 --- a/storage/pbxt/src/table_xt.cc +++ b/storage/pbxt/src/table_xt.cc @@ -78,6 +78,65 @@ #define XT_MAX_TABLE_FILE_NAME_SIZE (XT_TABLE_NAME_SIZE+6+40) +/* + * ----------------------------------------------------------------------- + * Handle Error Detected in a Table + */ + +struct XTTableError { + xtTableID ter_tab_id; + xtRecordID ter_rec_id; +}; + +static int tab_comp_tab_error(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b) +{ + XTTableError *ter_a = ((XTTableError *) a); + XTTableError *ter_b = (XTTableError *) b; + + if (ter_a->ter_tab_id < ter_b->ter_tab_id) + return -1; + if (ter_a->ter_tab_id == ter_b->ter_tab_id) { + if (ter_a->ter_rec_id < ter_b->ter_rec_id) + return -1; + if (ter_a->ter_rec_id == ter_b->ter_rec_id) + return 0; + return 1; + } + return 1; +} + +static xtBool tab_record_corrupt(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, bool not_valid, int where) +{ + XTTableHPtr tab = ot->ot_table; + XTDatabaseHPtr db = tab->tab_db; + XTTableError ter; + XTTableError *ter_ptr; + + ter.ter_tab_id = tab->tab_id; + ter.ter_rec_id = rec_id; + + xt_sl_lock_ns(db->db_error_list, ot->ot_thread); + if (!(ter_ptr = (XTTableError *) xt_sl_find(NULL, db->db_error_list, &ter))) { + xtBool ok; + char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; + + ok = xt_sl_insert(NULL, db->db_error_list, &ter, &ter); + xt_sl_unlock_ns(db->db_error_list); + if (!ok) + return FAILED; + xt_tab_set_table_repair_pending(tab); + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); + xt_logf(XT_NT_ERROR, "#%d Table %s: row %llu, record %llu, is %s, REPAIR TABLE required.\n", where, + table_name, + (u_llong) row_id, + (u_llong) rec_id, + not_valid ? "not valid" : "free"); + } + else + xt_sl_unlock_ns(db->db_error_list); + return OK; +} + /* * ----------------------------------------------------------------------- * Compare paths: @@ -425,6 +484,7 @@ xtPublic void xt_tab_init_db(XTThreadPtr self, XTDatabaseHPtr db) db->db_tables = xt_new_hashtable(self, tab_list_comp, tab_list_hash, tab_list_free, TRUE, TRUE); db->db_table_by_id = xt_new_sortedlist(self, sizeof(XTTableEntryRec), 20, 20, tab_comp_by_id, db, tab_free_by_id, FALSE, FALSE); db->db_table_paths = xt_new_sortedlist(self, sizeof(XTTablePathPtr), 20, 20, tab_comp_path, db, tab_free_path, FALSE, FALSE); + db->db_error_list = xt_new_sortedlist(self, sizeof(XTTableError), 20, 20, tab_comp_tab_error, db, NULL, TRUE, FALSE); if (db->db_multi_path) { XTOpenFilePtr of; @@ -649,6 +709,10 @@ xtPublic void xt_tab_exit_db(XTThreadPtr self, XTDatabaseHPtr db) xt_free_sortedlist(self, db->db_table_paths); db->db_table_paths = NULL; } + if (db->db_error_list) { + xt_free_sortedlist(self, db->db_error_list); + db->db_error_list = NULL; + } } static void tab_check_table(XTThreadPtr self, XTTableHPtr XT_UNUSED(tab)) @@ -1713,6 +1777,116 @@ xtPublic void xt_drop_table(XTThreadPtr self, XTPathStrPtr tab_name, xtBool drop exit_(); } +xtPublic void xt_tab_check_free_lists(XTThreadPtr self, XTOpenTablePtr ot, bool check_recs, bool correct_count) +{ + char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; + register XTTableHPtr tab = ot->ot_table; + xtRowID prev_row_id; + xtRowID row_id; + xtRefID next_row_id; + u_llong free_count; + + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); + if (check_recs) { + xtRecordID prev_rec_id; + xtRecordID rec_id; + XTTabRecExtDRec rec_buf; + + xt_lock_mutex_ns(&tab->tab_rec_lock); + /* Checking the free list: */ + prev_rec_id = 0; + free_count = 0; + rec_id = tab->tab_rec_free_id; + while (rec_id) { + if (rec_id >= tab->tab_rec_eof_id) { + xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free list: %llu, ", table_name, (u_llong) rec_id); + if (prev_rec_id) + xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_rec_id); + else + xt_logf(XT_NT_ERROR, "reference by list head pointer\n"); + xt_tab_set_table_repair_pending(tab); + break; + } + if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) &rec_buf)) { + if (self) + xt_throw(self); + else + xt_log_and_clear_warning(ot->ot_thread); + break; + } + if ((rec_buf.tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED) + xt_logf(XT_NT_INFO, "Table %s: record, %llu, on free list is not free\n", table_name, (u_llong) rec_id); + free_count++; + prev_rec_id = rec_id; + rec_id = XT_GET_DISK_4(rec_buf.tr_prev_rec_id_4); + } + if (free_count != tab->tab_rec_fnum) { + if (correct_count) { + tab->tab_rec_fnum = free_count; + tab->tab_head_rec_fnum = free_count; + tab->tab_flush_pending = TRUE; + xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) has been set to the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count); + } + else + xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) differs from the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count); + } + xt_unlock_mutex_ns(&tab->tab_rec_lock); + } + + /* Check the row free list: */ + xt_lock_mutex_ns(&tab->tab_row_lock); + + prev_row_id = 0; + free_count = 0; + row_id = tab->tab_row_free_id; + while (row_id) { + if (row_id >= tab->tab_row_eof_id) { + xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free row: %llu, ", table_name, (u_llong) row_id); + if (prev_row_id) + xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_row_id); + else + xt_logf(XT_NT_ERROR, "reference by list head pointer\n"); + xt_tab_set_table_repair_pending(tab); + break; + } + if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) { + if (self) + xt_throw(self); + else + xt_log_and_clear_warning(ot->ot_thread); + break; + } + free_count++; + prev_row_id = row_id; + row_id = next_row_id; + } + if (free_count != tab->tab_row_fnum) { + if (correct_count) { + /* tab_row_fnum is the current value, and tab_head_row_fnum is the value on + * disk. tab_head_row_fnum is set by the writer as the changes are applied + * to the database. + * + * This is the value then stored in the header of the file. This value + * is in sync with other changes to the file. + * + * So the fact that I am setting both value means this will not work at + * runtime, unless all changes have been applied by the writer. + * + * The correct way to do this at run time would be to add the change to the + * transaction log, so that it is applied by the writer. + */ + tab->tab_row_fnum = free_count; + tab->tab_head_row_fnum = free_count; + tab->tab_flush_pending = TRUE; + xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) has been set to the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count); + } + else + xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) differs from the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count); + } + + xt_unlock_mutex_ns(&tab->tab_row_lock); +} + /* * Record buffer size: * ------------------- @@ -2010,7 +2184,7 @@ xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot) prec_id = rec_id; rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4); } - if (free_count2 < free_rec_count) + if (free_count2 != free_rec_count) xt_logf(XT_INFO, "Table %s: not all free blocks (%llu) on free list: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) free_count2); freer_(); // xt_unlock_mutex_ns(&tab->tab_rec_lock); @@ -2042,6 +2216,29 @@ xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot) rec_id++; } + prec_id = 0; + free_count2 = 0; + row_id = tab->tab_row_free_id; + while (row_id) { + if (row_id >= tab->tab_row_eof_id) { + xt_logf(XT_INFO, "Table %s: invalid reference on free row: %llu, ", tab->tab_name, (u_llong) row_id); + if (prec_id) + xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id); + else + xt_logf(XT_INFO, "reference by list head pointer\n"); + break; + } + if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &ref_id, self)) { + xt_log_and_clear_exception(self); + break; + } + free_count2++; + prec_id = row_id; + row_id = ref_id; + } + if (free_count2 != tab->tab_row_fnum) + xt_logf(XT_INFO, "Table %s: free row count (%llu) differs from the number of row on the list: %llu\n", tab->tab_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count2); + freer_(); // xt_unlock_mutex(&tab->tab_row_lock); #ifdef CHECK_INDEX_ON_CHECK_TABLE @@ -3117,10 +3314,18 @@ static int tab_visible(register XTOpenTablePtr ot, XTTabRecHeadDPtr rec_head, xt #endif break; case XT_XN_REREAD: + /* {RETRY-READ} + * TODO: This is not as "correct" as it could be. + * Such records should be considered to be aborted, + * and removed from the list. + */ if (invalid_rec != var_rec_id) { invalid_rec = var_rec_id; goto retry_3; } + if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 1)) + goto failed; + /* Assume end of list. */ #ifdef XT_CRASH_DEBUG /* Should not happen! */ @@ -3308,6 +3513,8 @@ xtPublic int xt_tab_visible(XTOpenTablePtr ot) /* Avoid infinite loop: */ if (read_again) { /* Should not happen! */ + if (!tab_record_corrupt(ot, row_id, ot->ot_curr_rec_id, true, 2)) + return XT_ERR; #ifdef XT_CRASH_DEBUG /* Generate a core dump! */ xt_crash_me(); @@ -3364,6 +3571,8 @@ xtPublic int xt_tab_read_record(register XTOpenTablePtr ot, xtWord1 *buffer) /* Avoid infinite loop: */ if (read_again) { /* Should not happen! */ + if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4), ot->ot_curr_rec_id, true, 3)) + return XT_ERR; #ifdef XT_CRASH_DEBUG /* Generate a core dump! */ xt_crash_me(); @@ -3580,6 +3789,7 @@ xtPublic xtBool xt_tab_free_row(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_ } tab->tab_row_free_id = row_id; tab->tab_row_fnum++; + ASSERT_NS(tab->tab_row_fnum < tab->tab_row_eof_id); xt_unlock_mutex_ns(&tab->tab_row_lock); if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_ROW_FREED, op_seq, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, ot->ot_thread)) @@ -3776,7 +3986,7 @@ xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 xt_lock_mutex_ns(&tab->tab_db->db_co_ext_lock); if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer)) { xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock); - return FAILED; + return XT_ERR; } xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock); @@ -3824,7 +4034,7 @@ xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id); if (!xt_tab_put_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), ot->ot_row_rbuffer, &op_seq)) { xt_unlock_mutex_ns(&tab->tab_rec_lock); - return FAILED; + return XT_ERR; } tab->tab_rec_free_id = rec_id; ASSERT_NS(tab->tab_rec_free_id < tab->tab_rec_eof_id); @@ -3832,7 +4042,9 @@ xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 xt_unlock_mutex_ns(&tab->tab_rec_lock); free_rec->rf_rec_type_1 = old_rec_type; - return xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_REC_REMOVED_BI, op_seq, (xtRecordID) new_rec_type, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread); + if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_REC_REMOVED_BI, op_seq, (xtRecordID) new_rec_type, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread)) + return XT_ERR; + return OK; } static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab) @@ -3851,6 +4063,7 @@ static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab) return 0; } tab->tab_row_free_id = next_row_id; + ASSERT_NS(tab->tab_row_fnum > 0); tab->tab_row_fnum--; } else { @@ -4170,9 +4383,12 @@ static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordI return FAILED; if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1)) goto locked; - if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) + if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) { /* Should not happen: */ + if (!tab_record_corrupt(ot, row_id, var_rec_id, false, 4)) + return FAILED; goto record_invalid; + } xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4); switch (xt_xn_status(ot, xn_id, var_rec_id)) { case XT_XN_VISIBLE: @@ -4195,6 +4411,8 @@ static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordI XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread); goto retry; case XT_XN_REREAD: + if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 5)) + return FAILED; goto record_invalid; } var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4); @@ -4206,9 +4424,10 @@ static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordI return FAILED; record_invalid: + /* {RETRY-READ} */ /* Prevent an infinite loop due to a bad record: */ if (invalid_rec != var_rec_id) { - var_rec_id = invalid_rec; + invalid_rec = var_rec_id; goto retry; } /* The record is invalid, it will be "overwritten"... */ @@ -4280,9 +4499,12 @@ xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXact #ifdef TRACE_VARIATIONS_IN_DUP_CHECK t_type="Re-read"; #endif + /* {RETRY-READ} */ /* Avoid infinite loop: */ if (invalid_rec == rec_id) { /* Should not happen! */ + if (!tab_record_corrupt(ot, XT_GET_DISK_4(rec_head.tr_row_id_4), rec_id, true, 6)) + goto failed; #ifdef XT_CRASH_DEBUG /* Generate a core dump! */ xt_crash_me(); @@ -4327,7 +4549,7 @@ xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXact if (XT_REC_IS_FREE(rec_head.tr_rec_type_1)) { /* Should not happen: */ if (invalid_rec != var_rec_id) { - var_rec_id = invalid_rec; + invalid_rec = var_rec_id; goto retry; } /* Assume end of list. */ @@ -4364,11 +4586,14 @@ xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXact } break; case XT_XN_REREAD: + /* {RETRY-READ} */ if (invalid_rec != var_rec_id) { - var_rec_id = invalid_rec; + invalid_rec = var_rec_id; goto retry; } /* Assume end of list. */ + if (!tab_record_corrupt(ot, row_id, invalid_rec, true, 7)) + goto failed; #ifdef XT_CRASH_DEBUG /* Should not happen! */ xt_crash_me(); @@ -5068,6 +5293,8 @@ xtPublic xtBool xt_tab_seq_next(XTOpenTablePtr ot, xtWord1 *buffer, xtBool *eof) ot->ot_on_page = FALSE; goto next_page; } + if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) buff_ptr)->tr_row_id_4), invalid_rec, true, 8)) + return XT_ERR; #ifdef XT_CRASH_DEBUG /* Should not happen! */ xt_crash_me(); @@ -5240,7 +5467,7 @@ static xtBool tab_exec_repair_pending(XTDatabaseHPtr db, int what, char *table_n return FALSE; } -static void tab_make_table_name(XTTableHPtr tab, char *table_name, size_t size) +xtPublic void xt_tab_make_table_name(XTTableHPtr tab, char *table_name, size_t size) { char *nptr; @@ -5316,7 +5543,7 @@ xtPublic xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab) { char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; - tab_make_table_name(tab, table_name, sizeof(table_name)); + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); return tab_exec_repair_pending(tab->tab_db, REP_FIND, table_name); } @@ -5326,7 +5553,7 @@ xtPublic void xt_tab_table_repaired(XTTableHPtr tab) char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; tab->tab_repair_pending = FALSE; - tab_make_table_name(tab, table_name, sizeof(table_name)); + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); tab_exec_repair_pending(tab->tab_db, REP_DEL, table_name); } } @@ -5337,7 +5564,7 @@ xtPublic void xt_tab_set_table_repair_pending(XTTableHPtr tab) char table_name[XT_IDENTIFIER_NAME_SIZE*3+3]; tab->tab_repair_pending = TRUE; - tab_make_table_name(tab, table_name, sizeof(table_name)); + xt_tab_make_table_name(tab, table_name, sizeof(table_name)); tab_exec_repair_pending(tab->tab_db, REP_ADD, table_name); } } diff --git a/storage/pbxt/src/table_xt.h b/storage/pbxt/src/table_xt.h index 83f2168dd6e..f6c32587419 100644 --- a/storage/pbxt/src/table_xt.h +++ b/storage/pbxt/src/table_xt.h @@ -507,6 +507,7 @@ int xt_tab_compare_names(const char *n1, const char *n2); int xt_tab_compare_paths(char *n1, char *n2); void xt_tab_init_db(struct XTThread *self, struct XTDatabase *db); void xt_tab_exit_db(struct XTThread *self, struct XTDatabase *db); +void xt_tab_check_free_lists(struct XTThread *self, XTOpenTablePtr ot, bool check_recs, bool correct_count); void xt_check_tables(struct XTThread *self); char *xt_tab_file_to_name(size_t size, char *tab_name, char *file_name); @@ -572,6 +573,7 @@ xtBool xt_tab_get_rec_data(register XTOpenTablePtr ot, xtRecordID rec_id, siz void xt_tab_disable_index(XTTableHPtr tab, u_int ind_error); void xt_tab_set_index_error(XTTableHPtr tab); +void xt_tab_make_table_name(XTTableHPtr tab, char *table_name, size_t size); xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab); void xt_tab_table_repaired(XTTableHPtr tab); void xt_tab_set_table_repair_pending(XTTableHPtr tab); diff --git a/storage/pbxt/src/thread_xt.cc b/storage/pbxt/src/thread_xt.cc index ac42896d22f..52c2c6c29c5 100644 --- a/storage/pbxt/src/thread_xt.cc +++ b/storage/pbxt/src/thread_xt.cc @@ -224,11 +224,16 @@ static void thr_log_va(XTThreadPtr self, c_char *func, c_char *file, u_int line, #else /* Use the buffer, unless it is too small */ va_list ap2; + int bufsize; va_copy(ap2, ap); - if (vsnprintf(buffer, DEFAULT_LOG_BUFFER_SIZE, fmt, ap) >= DEFAULT_LOG_BUFFER_SIZE) { - if (vasprintf(&log_string, fmt, ap2) == -1) + bufsize = vsnprintf(buffer, DEFAULT_LOG_BUFFER_SIZE, fmt, ap); + if (bufsize >= DEFAULT_LOG_BUFFER_SIZE) { + log_string = (char *) malloc(bufsize + 1); + if (vsnprintf(log_string, bufsize + 1, fmt, ap2) > bufsize) { + free(log_string); log_string = NULL; + } } else log_string = buffer; diff --git a/storage/pbxt/src/xaction_xt.cc b/storage/pbxt/src/xaction_xt.cc index 7281eafd8db..48abc5d2b66 100644 --- a/storage/pbxt/src/xaction_xt.cc +++ b/storage/pbxt/src/xaction_xt.cc @@ -1558,6 +1558,8 @@ xtPublic int xt_xn_status(XTOpenTablePtr ot, xtXactID xn_id, xtRecordID XT_UNUSE * Because we are only here because the record was valid but not * clean (you can confirm this by looking at the code that * calls this function). + * + * See {RETRY-READ} */ return XT_XN_REREAD; } @@ -1743,7 +1745,7 @@ xtPublic xtWord8 xt_xn_bytes_to_sweep(XTDatabaseHPtr db, XTThreadPtr thread) } else { xn_log_id = x_log_id; - x_log_offset = x_log_offset; + xn_log_offset = x_log_offset; } } xn_id++; diff --git a/storage/pbxt/src/xt_defs.h b/storage/pbxt/src/xt_defs.h index 4a03f941cf8..3c77415265c 100644 --- a/storage/pbxt/src/xt_defs.h +++ b/storage/pbxt/src/xt_defs.h @@ -397,6 +397,24 @@ typedef struct XTPathStr { */ #define XT_XLOG_FLUSH_FREQ 1000 +/* + * Define here if you want to check (and correct) the table free list + * counts. The free list counts are not durable, because they are not + * written to the log. + * + * The row free count is most critical because it can be used to + * estimate the the of rows in the record. + */ +#define XT_CHECK_ROW_FREE_COUNT +#ifdef DEBUG +#define XT_CHECK_RECORD_FREE_COUNT +#endif +#define XT_CORRECT_TABLE_FREE_COUNT + +#if defined(XT_CHECK_ROW_FREE_COUNT) && defined(XT_CORRECT_TABLE_FREE_COUNT) +#define XT_ROW_COUNT_CORRECTED +#endif + /* ---------------------------------------------------------------------- * GLOBAL CONSTANTS */ @@ -873,7 +891,11 @@ extern "C" void session_mark_transaction_to_rollback(Session *session, bool all) #define MX_ULONGLONG_T ulonglong #define MX_LONGLONG_T longlong #define MX_CHARSET_INFO CHARSET_INFO -#define MX_CONST_CHARSET_INFO struct charset_info_st +#if defined(MARIADB_BASE_VERSION) && MYSQL_VERSION_ID > 50200 +#define MX_CONST_CHARSET_INFO const struct charset_info_st +#else +#define MX_CONST_CHARSET_INFO struct charset_info_st +#endif #define MX_CONST #define MX_BITMAP MY_BITMAP #define MX_BIT_SIZE() n_bits From 5f3c0f568ec34f7c964ef286c75dfecc9756d5a0 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 3 Oct 2010 17:04:46 +0200 Subject: [PATCH 7/8] Don't use deprecated --skip-locking option in example config files. --- support-files/my-huge.cnf.sh | 2 +- support-files/my-large.cnf.sh | 2 +- support-files/my-medium.cnf.sh | 2 +- support-files/my-small.cnf.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/support-files/my-huge.cnf.sh b/support-files/my-huge.cnf.sh index 17a7ddb5855..27ef0e5cc07 100644 --- a/support-files/my-huge.cnf.sh +++ b/support-files/my-huge.cnf.sh @@ -25,7 +25,7 @@ socket = @MYSQL_UNIX_ADDR@ [mysqld] port = @MYSQL_TCP_PORT@ socket = @MYSQL_UNIX_ADDR@ -skip-locking +skip-external-locking key_buffer_size = 384M max_allowed_packet = 1M table_open_cache = 512 diff --git a/support-files/my-large.cnf.sh b/support-files/my-large.cnf.sh index bbdfdb32a96..812009136f1 100644 --- a/support-files/my-large.cnf.sh +++ b/support-files/my-large.cnf.sh @@ -25,7 +25,7 @@ socket = @MYSQL_UNIX_ADDR@ [mysqld] port = @MYSQL_TCP_PORT@ socket = @MYSQL_UNIX_ADDR@ -skip-locking +skip-external-locking key_buffer_size = 256M max_allowed_packet = 1M table_open_cache = 256 diff --git a/support-files/my-medium.cnf.sh b/support-files/my-medium.cnf.sh index 88113d0a8d4..8fd62eaf958 100644 --- a/support-files/my-medium.cnf.sh +++ b/support-files/my-medium.cnf.sh @@ -26,7 +26,7 @@ socket = @MYSQL_UNIX_ADDR@ [mysqld] port = @MYSQL_TCP_PORT@ socket = @MYSQL_UNIX_ADDR@ -skip-locking +skip-external-locking key_buffer_size = 16M max_allowed_packet = 1M table_open_cache = 64 diff --git a/support-files/my-small.cnf.sh b/support-files/my-small.cnf.sh index 7dce9e60ed9..c0a86ae9671 100644 --- a/support-files/my-small.cnf.sh +++ b/support-files/my-small.cnf.sh @@ -26,7 +26,7 @@ socket = @MYSQL_UNIX_ADDR@ [mysqld] port = @MYSQL_TCP_PORT@ socket = @MYSQL_UNIX_ADDR@ -skip-locking +skip-external-locking key_buffer_size = 16K max_allowed_packet = 1M table_open_cache = 4 From 9f35e98a4baa518e8e0a464ebea8571f362ac641 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 6 Oct 2010 11:45:30 +0300 Subject: [PATCH 8/8] Fix for Bug#43152 "Assertion `bitmap_is_set_all(&table->s->all_set)' failed in handler::ha_reset" The reason for this was that some bitmap test functions changed the bitmap, which caused problems when the same bitmap was used by multiple threads. include/my_bitmap.h: Changed order of elements to get better alignment. mysys/my_bitmap.c: Change bitmap test functions to not modify the bitmap. Fixed compiler errors in test_bitmap --- include/my_bitmap.h | 4 +- mysys/my_bitmap.c | 179 ++++++++++++++++++++++++-------------------- 2 files changed, 100 insertions(+), 83 deletions(-) diff --git a/include/my_bitmap.h b/include/my_bitmap.h index 994c5cb4299..39ee1d2f7fc 100644 --- a/include/my_bitmap.h +++ b/include/my_bitmap.h @@ -25,8 +25,6 @@ typedef uint32 my_bitmap_map; typedef struct st_bitmap { my_bitmap_map *bitmap; - uint n_bits; /* number of bits occupied by the above */ - my_bitmap_map last_word_mask; my_bitmap_map *last_word_ptr; /* mutex will be acquired for the duration of each bitmap operation if @@ -36,6 +34,8 @@ typedef struct st_bitmap #ifdef THREAD pthread_mutex_t *mutex; #endif + my_bitmap_map last_word_mask; + uint32 n_bits; /* number of bits occupied by the above */ } MY_BITMAP; #ifdef __cplusplus diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index e7e5f75f486..0c3f45be374 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -40,16 +40,31 @@ #include #include + +/* Create a mask of the significant bits for the last byte (1,3,7,..255) */ + +static inline uchar last_byte_mask(uint bits) +{ + /* Get the number of used bits-1 (0..7) in the last byte */ + unsigned int const used= (bits - 1U) & 7U; + /* Return bitmask for the significant bits */ + return ((2U << used) - 1); +} + +/* + Create a mask with the upper 'unused' bits set and the lower 'used' + bits clear. The bits within each byte is stored in big-endian order. +*/ + +static inline uchar invers_last_byte_mask(uint bits) +{ + return last_byte_mask(bits) ^ 255; +} + + void create_last_word_mask(MY_BITMAP *map) { - /* Get the number of used bits (1..8) in the last byte */ - unsigned int const used= 1U + ((map->n_bits-1U) & 0x7U); - - /* - Create a mask with the upper 'unused' bits set and the lower 'used' - bits clear. The bits within each byte is stored in big-endian order. - */ - unsigned char const mask= (~((1 << used) - 1)) & 255; + unsigned char const mask= invers_last_byte_mask(map->n_bits); /* The first bytes are to be set to zero since they represent real bits @@ -267,40 +282,41 @@ void bitmap_set_prefix(MY_BITMAP *map, uint prefix_size) my_bool bitmap_is_prefix(const MY_BITMAP *map, uint prefix_size) { - uint prefix_bits= prefix_size & 0x7, res; - uchar *m= (uchar*)map->bitmap; - uchar *end_prefix= m+prefix_size/8; + uint prefix_mask= last_byte_mask(prefix_size); + uchar *m= (uchar*) map->bitmap; + uchar *end_prefix= m+(prefix_size-1)/8; uchar *end; DBUG_ASSERT(m && prefix_size <= map->n_bits); - end= m+no_bytes_in_map(map); + + /* Empty prefix is always true */ + if (!prefix_size) + return 1; while (m < end_prefix) if (*m++ != 0xff) return 0; - *map->last_word_ptr&= ~map->last_word_mask; /*Clear bits*/ - res= 0; - if (prefix_bits && *m++ != (1 << prefix_bits)-1) - goto ret; + end= ((uchar*) map->bitmap) + no_bytes_in_map(map) - 1; + if (m == end) + return ((*m & last_byte_mask(map->n_bits)) == prefix_mask); - while (m < end) - if (*m++ != 0) - goto ret; - res= 1; -ret: - return res; + if (*m != prefix_mask) + return 0; + + while (++m < end) + if (*m != 0) + return 0; + return ((*m & last_byte_mask(map->n_bits)) == 0); } - my_bool bitmap_is_set_all(const MY_BITMAP *map) { my_bitmap_map *data_ptr= map->bitmap; my_bitmap_map *end= map->last_word_ptr; - *map->last_word_ptr |= map->last_word_mask; - for (; data_ptr <= end; data_ptr++) + for (; data_ptr < end; data_ptr++) if (*data_ptr != 0xFFFFFFFF) return FALSE; - return TRUE; + return (*data_ptr | map->last_word_mask) == 0xFFFFFFFF; } @@ -308,13 +324,11 @@ my_bool bitmap_is_clear_all(const MY_BITMAP *map) { my_bitmap_map *data_ptr= map->bitmap; my_bitmap_map *end; - if (*map->last_word_ptr & ~map->last_word_mask) - return FALSE; end= map->last_word_ptr; for (; data_ptr < end; data_ptr++) if (*data_ptr) return FALSE; - return TRUE; + return (*data_ptr & ~map->last_word_mask) == 0; } /* Return TRUE if map1 is a subset of map2 */ @@ -327,14 +341,13 @@ my_bool bitmap_is_subset(const MY_BITMAP *map1, const MY_BITMAP *map2) map1->n_bits==map2->n_bits); end= map1->last_word_ptr; - *map1->last_word_ptr &= ~map1->last_word_mask; - *map2->last_word_ptr &= ~map2->last_word_mask; - while (m1 <= end) + while (m1 < end) { if ((*m1++) & ~(*m2++)) return 0; } - return 1; + /* here both maps have the same number of bits - see assert above */ + return ((*m1 & ~*m2 & ~map1->last_word_mask) ? 0 : 1); } /* True if bitmaps has any common bits */ @@ -347,14 +360,13 @@ my_bool bitmap_is_overlapping(const MY_BITMAP *map1, const MY_BITMAP *map2) map1->n_bits==map2->n_bits); end= map1->last_word_ptr; - *map1->last_word_ptr &= ~map1->last_word_mask; - *map2->last_word_ptr &= ~map2->last_word_mask; - while (m1 <= end) + while (m1 < end) { if ((*m1++) & (*m2++)) return 1; } - return 0; + /* here both maps have the same number of bits - see assert above */ + return ((*m1 & *m2 & ~map1->last_word_mask) ? 1 : 0); } @@ -366,15 +378,15 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2) DBUG_ASSERT(map->bitmap && map2->bitmap); end= to+min(len,len2); - *map2->last_word_ptr&= ~map2->last_word_mask; /*Clear last bits in map2*/ while (to < end) *to++ &= *from++; - if (len2 < len) + if (len2 <= len) { - end+=len-len2; + to[-1]&= ~map2->last_word_mask; /* Clear last not relevant bits */ + end+= len-len2; while (to < end) - *to++=0; + *to++= 0; } } @@ -386,13 +398,12 @@ my_bool bitmap_union_is_set_all(const MY_BITMAP *map1, const MY_BITMAP *map2) DBUG_ASSERT(map1->bitmap && map2->bitmap && map1->n_bits==map2->n_bits); - *map1->last_word_ptr|= map1->last_word_mask; - end= map1->last_word_ptr; - while ( m1 <= end) + while ( m1 < end) if ((*m1++ | *m2++) != 0xFFFFFFFF) return FALSE; - return TRUE; + /* here both maps have the same number of bits - see assert above */ + return ((*m1 | *m2 | map1->last_word_mask) != 0xFFFFFFFF); } @@ -479,14 +490,13 @@ void bitmap_invert(MY_BITMAP *map) uint bitmap_bits_set(const MY_BITMAP *map) { uchar *m= (uchar*)map->bitmap; - uchar *end= m + no_bytes_in_map(map); + uchar *end= m + no_bytes_in_map(map) - 1; uint res= 0; DBUG_ASSERT(map->bitmap); - *map->last_word_ptr&= ~map->last_word_mask; /*Reset last bits to zero*/ while (m < end) res+= my_count_bits_ushort(*m++); - return res; + return res + my_count_bits_ushort(*m & last_byte_mask(map->n_bits)); } @@ -510,27 +520,30 @@ uint bitmap_get_first_set(const MY_BITMAP *map) DBUG_ASSERT(map->bitmap); data_ptr= map->bitmap; - *map->last_word_ptr &= ~map->last_word_mask; - for (i=0; data_ptr <= end; data_ptr++, i++) - { + for (i=0; data_ptr < end; data_ptr++, i++) if (*data_ptr) + goto found; + if (!(*data_ptr & ~map->last_word_mask)) + return MY_BIT_NONE; + +found: + { + byte_ptr= (uchar*)data_ptr; + for (j=0; ; j++, byte_ptr++) { - byte_ptr= (uchar*)data_ptr; - for (j=0; ; j++, byte_ptr++) + if (*byte_ptr) { - if (*byte_ptr) + for (k=0; ; k++) { - for (k=0; ; k++) - { - if (*byte_ptr & (1 << k)) - return (i*32) + (j*8) + k; - } + if (*byte_ptr & (1 << k)) + return (i*32) + (j*8) + k; } } } } - return MY_BIT_NONE; + DBUG_ASSERT(0); + return MY_BIT_NONE; /* Impossible */ } @@ -544,25 +557,29 @@ uint bitmap_get_first(const MY_BITMAP *map) data_ptr= map->bitmap; *map->last_word_ptr|= map->last_word_mask; - for (i=0; data_ptr <= end; data_ptr++, i++) - { + for (i=0; data_ptr < end; data_ptr++, i++) if (*data_ptr != 0xFFFFFFFF) + goto found; + if ((*data_ptr | map->last_word_mask) == 0xFFFFFFFF) + return MY_BIT_NONE; + +found: + { + byte_ptr= (uchar*)data_ptr; + for (j=0; ; j++, byte_ptr++) { - byte_ptr= (uchar*)data_ptr; - for (j=0; ; j++, byte_ptr++) + if (*byte_ptr != 0xFF) { - if (*byte_ptr != 0xFF) + for (k=0; ; k++) { - for (k=0; ; k++) - { - if (!(*byte_ptr & (1 << k))) - return (i*32) + (j*8) + k; - } + if (!(*byte_ptr & (1 << k))) + return (i*32) + (j*8) + k; } } } } - return MY_BIT_NONE; + DBUG_ASSERT(0); + return MY_BIT_NONE; /* Impossible */ } @@ -777,7 +794,7 @@ uint get_rand_bit(uint bitsize) return (rand() % bitsize); } -bool test_set_get_clear_bit(MY_BITMAP *map, uint bitsize) +my_bool test_set_get_clear_bit(MY_BITMAP *map, uint bitsize) { uint i, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -800,7 +817,7 @@ error2: return TRUE; } -bool test_flip_bit(MY_BITMAP *map, uint bitsize) +my_bool test_flip_bit(MY_BITMAP *map, uint bitsize) { uint i, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -823,13 +840,13 @@ error2: return TRUE; } -bool test_operators(MY_BITMAP *map __attribute__((unused)), +my_bool test_operators(MY_BITMAP *map __attribute__((unused)), uint bitsize __attribute__((unused))) { return FALSE; } -bool test_get_all_bits(MY_BITMAP *map, uint bitsize) +my_bool test_get_all_bits(MY_BITMAP *map, uint bitsize) { uint i; bitmap_set_all(map); @@ -871,7 +888,7 @@ error6: return TRUE; } -bool test_compare_operators(MY_BITMAP *map, uint bitsize) +my_bool test_compare_operators(MY_BITMAP *map, uint bitsize) { uint i, j, test_bit1, test_bit2, test_bit3,test_bit4; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -977,7 +994,7 @@ error5: return TRUE; } -bool test_count_bits_set(MY_BITMAP *map, uint bitsize) +my_bool test_count_bits_set(MY_BITMAP *map, uint bitsize) { uint i, bit_count=0, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -1003,7 +1020,7 @@ error2: return TRUE; } -bool test_get_first_bit(MY_BITMAP *map, uint bitsize) +my_bool test_get_first_bit(MY_BITMAP *map, uint bitsize) { uint i, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -1028,7 +1045,7 @@ error2: return TRUE; } -bool test_get_next_bit(MY_BITMAP *map, uint bitsize) +my_bool test_get_next_bit(MY_BITMAP *map, uint bitsize) { uint i, j, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -1047,7 +1064,7 @@ error1: return TRUE; } -bool test_prefix(MY_BITMAP *map, uint bitsize) +my_bool test_prefix(MY_BITMAP *map, uint bitsize) { uint i, j, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -1082,7 +1099,7 @@ error3: } -bool do_test(uint bitsize) +my_bool do_test(uint bitsize) { MY_BITMAP map; my_bitmap_map buf[1024];