From b970363acfae02be4667825501316b412ef6486a Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Mon, 10 Aug 2020 18:23:25 +1000 Subject: [PATCH 01/11] MDEV-23440: mysql_tzinfo_to_sql to use transactions Since MDEV-18778, timezone tables get changed to innodb to allow them to be replicated to other galera nodes. Even without galera, timezone tables could be declared innodb. With the standalone innodb tables, the mysql_tzinfo_to_sql takes approximately 27 seconds. With the transactions enabled in this patch, 1.2 seconds is the approximate load time. While explicit checks for the engine of the time zone tables could be done, or checks against !opt_skip_write_binlog, non-transactional storage engines will just ignore the transactional state without even a warning so its safe to enact globally. Leap seconds are pretty much ignored as they are a single insert statement and have gone out of favour as they have caused MariaDB stalls in the past. --- mysql-test/r/mysql_tzinfo_to_sql_symlink.result | 6 ++++++ mysql-test/suite/wsrep/r/mysql_tzinfo_to_sql_symlink.result | 4 ++++ .../suite/wsrep/r/mysql_tzinfo_to_sql_symlink_skip.result | 4 ++++ sql/tztime.cc | 3 +++ 4 files changed, 17 insertions(+) diff --git a/mysql-test/r/mysql_tzinfo_to_sql_symlink.result b/mysql-test/r/mysql_tzinfo_to_sql_symlink.result index fc9ddce08b1..0de4a5ada95 100644 --- a/mysql-test/r/mysql_tzinfo_to_sql_symlink.result +++ b/mysql-test/r/mysql_tzinfo_to_sql_symlink.result @@ -15,6 +15,7 @@ TRUNCATE TABLE time_zone; TRUNCATE TABLE time_zone_name; TRUNCATE TABLE time_zone_transition; TRUNCATE TABLE time_zone_transition_type; +START TRANSACTION; INSERT INTO time_zone (Use_leap_seconds) VALUES ('N'); SET @time_zone_id= LAST_INSERT_ID(); INSERT INTO time_zone_name (Name, Time_zone_id) VALUES ('GMT', @time_zone_id); @@ -32,6 +33,7 @@ INSERT INTO time_zone_transition_type (Time_zone_id, Transition_type_id, Offset, Warning: Unable to load 'MYSQLTEST_VARDIR/zoneinfo/posix/garbage' as time zone. Skipping it. Warning: Unable to load 'MYSQLTEST_VARDIR/zoneinfo/posix/ignored.tab' as time zone. Skipping it. Warning: Skipping directory 'MYSQLTEST_VARDIR/zoneinfo/posix/posix': to avoid infinite symlink recursion. +COMMIT; ALTER TABLE time_zone_transition ORDER BY Time_zone_id, Transition_time; ALTER TABLE time_zone_transition_type ORDER BY Time_zone_id, Transition_type_id; \d | @@ -57,6 +59,7 @@ TRUNCATE TABLE time_zone; TRUNCATE TABLE time_zone_name; TRUNCATE TABLE time_zone_transition; TRUNCATE TABLE time_zone_transition_type; +START TRANSACTION; INSERT INTO time_zone (Use_leap_seconds) VALUES ('N'); SET @time_zone_id= LAST_INSERT_ID(); INSERT INTO time_zone_name (Name, Time_zone_id) VALUES ('GMT', @time_zone_id); @@ -71,6 +74,7 @@ INSERT INTO time_zone_transition_type (Time_zone_id, Transition_type_id, Offset, (@time_zone_id, 0, 0, 0, 'GMT') ; Warning: Unable to load 'MYSQLTEST_VARDIR/zoneinfo/posix/garbage' as time zone. Skipping it. +COMMIT; ALTER TABLE time_zone_transition ORDER BY Time_zone_id, Transition_time; ALTER TABLE time_zone_transition_type ORDER BY Time_zone_id, Transition_type_id; \d | @@ -142,6 +146,8 @@ TRUNCATE TABLE time_zone; TRUNCATE TABLE time_zone_name; TRUNCATE TABLE time_zone_transition; TRUNCATE TABLE time_zone_transition_type; +START TRANSACTION; +COMMIT; ALTER TABLE time_zone_transition ORDER BY Time_zone_id, Transition_time; ALTER TABLE time_zone_transition_type ORDER BY Time_zone_id, Transition_type_id; \d | diff --git a/mysql-test/suite/wsrep/r/mysql_tzinfo_to_sql_symlink.result b/mysql-test/suite/wsrep/r/mysql_tzinfo_to_sql_symlink.result index 1e6ebbbd34d..d146b7187e4 100644 --- a/mysql-test/suite/wsrep/r/mysql_tzinfo_to_sql_symlink.result +++ b/mysql-test/suite/wsrep/r/mysql_tzinfo_to_sql_symlink.result @@ -15,6 +15,7 @@ TRUNCATE TABLE time_zone; TRUNCATE TABLE time_zone_name; TRUNCATE TABLE time_zone_transition; TRUNCATE TABLE time_zone_transition_type; +START TRANSACTION; INSERT INTO time_zone (Use_leap_seconds) VALUES ('N'); SET @time_zone_id= LAST_INSERT_ID(); INSERT INTO time_zone_name (Name, Time_zone_id) VALUES ('GMT', @time_zone_id); @@ -32,6 +33,7 @@ INSERT INTO time_zone_transition_type (Time_zone_id, Transition_type_id, Offset, Warning: Unable to load 'MYSQLTEST_VARDIR/zoneinfo/posix/garbage' as time zone. Skipping it. Warning: Unable to load 'MYSQLTEST_VARDIR/zoneinfo/posix/ignored.tab' as time zone. Skipping it. Warning: Skipping directory 'MYSQLTEST_VARDIR/zoneinfo/posix/posix': to avoid infinite symlink recursion. +COMMIT; ALTER TABLE time_zone_transition ORDER BY Time_zone_id, Transition_time; ALTER TABLE time_zone_transition_type ORDER BY Time_zone_id, Transition_type_id; \d | @@ -57,6 +59,7 @@ TRUNCATE TABLE time_zone; TRUNCATE TABLE time_zone_name; TRUNCATE TABLE time_zone_transition; TRUNCATE TABLE time_zone_transition_type; +START TRANSACTION; INSERT INTO time_zone (Use_leap_seconds) VALUES ('N'); SET @time_zone_id= LAST_INSERT_ID(); INSERT INTO time_zone_name (Name, Time_zone_id) VALUES ('GMT', @time_zone_id); @@ -71,6 +74,7 @@ INSERT INTO time_zone_transition_type (Time_zone_id, Transition_type_id, Offset, (@time_zone_id, 0, 0, 0, 'GMT') ; Warning: Unable to load 'MYSQLTEST_VARDIR/zoneinfo/posix/garbage' as time zone. Skipping it. +COMMIT; ALTER TABLE time_zone_transition ORDER BY Time_zone_id, Transition_time; ALTER TABLE time_zone_transition_type ORDER BY Time_zone_id, Transition_type_id; \d | diff --git a/mysql-test/suite/wsrep/r/mysql_tzinfo_to_sql_symlink_skip.result b/mysql-test/suite/wsrep/r/mysql_tzinfo_to_sql_symlink_skip.result index 85c4d858be2..aff02cb413e 100644 --- a/mysql-test/suite/wsrep/r/mysql_tzinfo_to_sql_symlink_skip.result +++ b/mysql-test/suite/wsrep/r/mysql_tzinfo_to_sql_symlink_skip.result @@ -9,6 +9,7 @@ TRUNCATE TABLE time_zone; TRUNCATE TABLE time_zone_name; TRUNCATE TABLE time_zone_transition; TRUNCATE TABLE time_zone_transition_type; +START TRANSACTION; INSERT INTO time_zone (Use_leap_seconds) VALUES ('N'); SET @time_zone_id= LAST_INSERT_ID(); INSERT INTO time_zone_name (Name, Time_zone_id) VALUES ('GMT', @time_zone_id); @@ -26,6 +27,7 @@ INSERT INTO time_zone_transition_type (Time_zone_id, Transition_type_id, Offset, Warning: Unable to load 'MYSQLTEST_VARDIR/zoneinfo/posix/garbage' as time zone. Skipping it. Warning: Unable to load 'MYSQLTEST_VARDIR/zoneinfo/posix/ignored.tab' as time zone. Skipping it. Warning: Skipping directory 'MYSQLTEST_VARDIR/zoneinfo/posix/posix': to avoid infinite symlink recursion. +COMMIT; ALTER TABLE time_zone_transition ORDER BY Time_zone_id, Transition_time; ALTER TABLE time_zone_transition_type ORDER BY Time_zone_id, Transition_type_id; # Silent run @@ -36,6 +38,7 @@ TRUNCATE TABLE time_zone; TRUNCATE TABLE time_zone_name; TRUNCATE TABLE time_zone_transition; TRUNCATE TABLE time_zone_transition_type; +START TRANSACTION; INSERT INTO time_zone (Use_leap_seconds) VALUES ('N'); SET @time_zone_id= LAST_INSERT_ID(); INSERT INTO time_zone_name (Name, Time_zone_id) VALUES ('GMT', @time_zone_id); @@ -50,6 +53,7 @@ INSERT INTO time_zone_transition_type (Time_zone_id, Transition_type_id, Offset, (@time_zone_id, 0, 0, 0, 'GMT') ; Warning: Unable to load 'MYSQLTEST_VARDIR/zoneinfo/posix/garbage' as time zone. Skipping it. +COMMIT; ALTER TABLE time_zone_transition ORDER BY Time_zone_id, Transition_time; ALTER TABLE time_zone_transition_type ORDER BY Time_zone_id, Transition_type_id; # diff --git a/sql/tztime.cc b/sql/tztime.cc index 960dde38237..fa5a24d3c8a 100644 --- a/sql/tztime.cc +++ b/sql/tztime.cc @@ -2748,9 +2748,11 @@ main(int argc, char **argv) printf("TRUNCATE TABLE time_zone_name;\n"); printf("TRUNCATE TABLE time_zone_transition;\n"); printf("TRUNCATE TABLE time_zone_transition_type;\n"); + printf("START TRANSACTION;\n"); if (scan_tz_dir(root_name_end, 0, opt_verbose)) { + printf("ROLLBACK;\n"); fflush(stdout); fprintf(stderr, "There were fatal errors during processing " @@ -2758,6 +2760,7 @@ main(int argc, char **argv) return 1; } + printf("COMMIT;\n"); printf("ALTER TABLE time_zone_transition " "ORDER BY Time_zone_id, Transition_time;\n"); printf("ALTER TABLE time_zone_transition_type " From 5796021174fd7096267003b999e02d6cf98f555b Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Tue, 11 Aug 2020 14:03:02 +0200 Subject: [PATCH 02/11] MDEV-21039: Server fails to start with unknown mysqld_safe options Adding any unknown option to the "[mysqld_safe]" section makes mysqld impossible to start with mysqld_multi. For example, after adding the unknown option "numa_interleave" to the "[mysqld_safe]" section, mysqld_multi exits with the following diagnostics: [ERROR] /usr/local/mysql/bin/mysqld: unknown option '--numa_interleave' To get rid of this behavior, this patch by default adds the "--loose-" prefix to all unknown (for mysqld_safe) options. This behavior can be enabled explicitly with the --ignore-unknown option and disabled with the --no-ignore-unknown option. --- scripts/mysqld_safe.sh | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh index 87dc81e8b5f..3d3d4141dc5 100644 --- a/scripts/mysqld_safe.sh +++ b/scripts/mysqld_safe.sh @@ -24,6 +24,7 @@ unsafe_my_cnf=0 wsrep_on=0 dry_run=0 defaults_group_suffix= +ignore_unknown=1 # Initial logging status: error log is not open, and not using syslog logging=init @@ -383,11 +384,22 @@ parse_arguments() { --help) usage ;; + --ignore-unknown) ignore_unknown=1 ;; + --no-ignore-unknown|--not-ignore-unknown) ignore_unknown=0 ;; + *) - case "$unrecognized_handling" in - collect) append_arg_to_args "$arg" ;; - complain) log_error "unknown option '$arg'" ;; - esac + if test $ignore_unknown -eq 0 + then + case "$unrecognized_handling" in + collect) append_arg_to_args "$arg" ;; + complain) log_error "unknown option '$arg'" + esac + else + case "$arg" in + "--loose-"*) append_arg_to_args "$arg" ;; + *) append_arg_to_args "--loose-$arg" + esac + fi esac done } From ece0b0623c9c14a2a1de61c53be9e1bcc2b6216c Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Sun, 16 Aug 2020 22:14:59 +0200 Subject: [PATCH 03/11] MDEV-23491: __bss_start breaks compilation of various platforms Remove __bss_start & Co, because systen call "write" check buffer address and return EFAULT if it is wrong. --- mysys/stacktrace.c | 122 +++++----------------------------- unittest/mysys/CMakeLists.txt | 2 +- unittest/mysys/stacktrace-t.c | 67 +++++++++++++++++++ 3 files changed, 84 insertions(+), 107 deletions(-) create mode 100644 unittest/mysys/stacktrace-t.c diff --git a/mysys/stacktrace.c b/mysys/stacktrace.c index db28acb9f7e..339111f6fee 100644 --- a/mysys/stacktrace.c +++ b/mysys/stacktrace.c @@ -34,107 +34,16 @@ #include #endif -#ifdef __linux__ -#define PTR_SANE(p) ((p) && (char*)(p) >= heap_start && (char*)(p) <= heap_end) -static char *heap_start; -char *__bss_start; -#else -#define PTR_SANE(p) (p) -#endif /* __linux */ - void my_init_stacktrace() { -#ifdef __linux__ - heap_start = (char*) &__bss_start; -#endif /* __linux__ */ } -#ifdef __linux__ - -static void print_buffer(char *buffer, size_t count) -{ - const char s[]= " "; - for (; count && *buffer; --count) - { - my_write_stderr(isprint(*buffer) ? buffer : s, 1); - ++buffer; - } -} - -/** - Access the pages of this process through /proc/self/task//mem - in order to safely print the contents of a memory address range. - - @param addr The address at the start of the memory region. - @param max_len The length of the memory region. - - @return Zero on success. -*/ -static int safe_print_str(const char *addr, int max_len) -{ - int fd; - pid_t tid; - off_t offset; - ssize_t nbytes= 0; - size_t total, count; - char buf[256]; - - tid= (pid_t) syscall(SYS_gettid); - - sprintf(buf, "/proc/self/task/%d/mem", tid); - - if ((fd= open(buf, O_RDONLY)) < 0) - return -1; - - /* Ensure that off_t can hold a pointer. */ - compile_time_assert(sizeof(off_t) >= sizeof(intptr)); - - total= max_len; - offset= (intptr) addr; - - /* Read up to the maximum number of bytes. */ - while (total) - { - count= MY_MIN(sizeof(buf), total); - - if ((nbytes= pread(fd, buf, count, offset)) < 0) - { - /* Just in case... */ - if (errno == EINTR) - continue; - else - break; - } - - /* Advance offset into memory. */ - total-= nbytes; - offset+= nbytes; - addr+= nbytes; - - /* Output the printable characters. */ - print_buffer(buf, nbytes); - - /* Break if less than requested... */ - if ((count - nbytes)) - break; - } - - if (nbytes == -1) - my_safe_printf_stderr("Can't read from address %p", addr); - - close(fd); - - return 0; -} - -#endif /* Attempt to print a char * pointer as a string. SYNOPSIS - Prints either until the end of string ('\0'), or max_len characters have - been printed. + Prints until max_len characters have been printed. RETURN VALUE 0 Pointer was within the heap address space. @@ -149,24 +58,25 @@ static int safe_print_str(const char *addr, int max_len) int my_safe_print_str(const char* val, int max_len) { -#ifdef __linux__ - char *heap_end; - - // Try and make use of /proc filesystem to safely print memory contents. - if (!safe_print_str(val, max_len)) - return 0; - - heap_end= (char*) sbrk(0); -#endif - - if (!PTR_SANE(val)) + const char *orig_val= val; + if (!val) { - my_safe_printf_stderr("%s", "is an invalid pointer"); + my_safe_printf_stderr("%s", "(null)"); return 1; } - for (; max_len && PTR_SANE(val) && *val; --max_len) - my_write_stderr((val++), 1); + for (; max_len; --max_len) + { + if (my_write_stderr((val++), 1) != 1) + { + if ((errno == EFAULT) &&(val == orig_val + 1)) + { + // We can not read the address from very beginning + my_safe_printf_stderr("Can't access address %p", orig_val); + } + break; + } + } my_safe_printf_stderr("%s", "\n"); return 0; diff --git a/unittest/mysys/CMakeLists.txt b/unittest/mysys/CMakeLists.txt index 9ad7e76f547..dd1aa85c9c6 100644 --- a/unittest/mysys/CMakeLists.txt +++ b/unittest/mysys/CMakeLists.txt @@ -15,7 +15,7 @@ MY_ADD_TESTS(bitmap base64 my_atomic my_rdtsc lf my_malloc my_getopt dynstring aes - queues LINK_LIBRARIES mysys) + queues stacktrace LINK_LIBRARIES mysys) MY_ADD_TESTS(my_vsnprintf LINK_LIBRARIES strings mysys) ADD_DEFINITIONS(${SSL_DEFINES}) diff --git a/unittest/mysys/stacktrace-t.c b/unittest/mysys/stacktrace-t.c new file mode 100644 index 00000000000..8fa0db15b36 --- /dev/null +++ b/unittest/mysys/stacktrace-t.c @@ -0,0 +1,67 @@ + +/* Copyright (c) 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +#include +#include +#include + +char b_bss[10]; + +void test_my_safe_print_str() +{ + char b_stack[10]; + char *b_heap= strdup("LEGAL"); + memcpy(b_stack, "LEGAL", 6); + memcpy(b_bss, "LEGAL", 6); + +#ifndef __SANITIZE_ADDRESS__ + fprintf(stderr, "\n===== stack =====\n"); + my_safe_print_str(b_stack, 65535); + fprintf(stderr, "\n===== heap =====\n"); + my_safe_print_str(b_heap, 65535); + fprintf(stderr, "\n===== BSS =====\n"); + my_safe_print_str(b_bss, 65535); + fprintf(stderr, "\n===== data =====\n"); + my_safe_print_str("LEGAL", 65535); + fprintf(stderr, "\n===== Above is a junk, but it is expected. =====\n"); +#endif /*__SANITIZE_ADDRESS__*/ + fprintf(stderr, "\n===== Nornal length test =====\n"); + my_safe_print_str("LEGAL", 5); + fprintf(stderr, "\n===== NULL =====\n"); + my_safe_print_str(0, 5); +#ifndef __SANITIZE_ADDRESS__ + fprintf(stderr, "\n===== (const char*) 1 =====\n"); + my_safe_print_str((const char*)1, 5); +#endif /*__SANITIZE_ADDRESS__*/ + + free(b_heap); + + ok(1, "test_my_safe_print_str"); +} + + +int main(int argc __attribute__((unused)), char **argv) +{ + MY_INIT(argv[0]); + plan(1); + + test_my_safe_print_str(); + + my_end(0); + return exit_status(); +} From 362b18c53672c4c2f8c49545b8f5cc586e26a325 Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Mon, 3 Aug 2020 16:51:41 +0530 Subject: [PATCH 04/11] MDEV-23380 InnoDB reads a page from disk despite parsing MLOG_INIT_FILE_PAGE2 record This problem is caused by 6697135c6d03935118c3dfa1c97faea7fa76afa6 (MDEV-21572). During recovery, InnoDB prefetches the siblings of change buffer index leaf page. It does asynchronous page read and recovery scenario wasn't handled in buf_read_page_background(). It leads to the refusal of startup of the server. Solution: ========= InnoDB shouldn't allow the change buffer index page siblings to be prefetched. --- storage/innobase/btr/btr0cur.cc | 5 +++-- storage/innobase/include/dict0mem.h | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index d7d15faa852..770c6d73585 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3145,7 +3145,7 @@ fail: /* prefetch siblings of the leaf for the pessimistic operation, if the page is leaf. */ - if (page_is_leaf(page)) { + if (page_is_leaf(page) && !index->is_ibuf()) { btr_cur_prefetch_siblings(block); } fail_err: @@ -4041,6 +4041,7 @@ btr_cur_optimistic_update( if (rec_offs_any_extern(*offsets)) { any_extern: + ut_ad(!index->is_ibuf()); /* Externally stored fields are treated in pessimistic update */ @@ -4220,7 +4221,7 @@ func_exit: } } - if (err != DB_SUCCESS) { + if (err != DB_SUCCESS && !index->is_ibuf()) { /* prefetch siblings of the leaf for the pessimistic operation. */ btr_cur_prefetch_siblings(block); diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 0d4ee9d23ec..804176e2d5b 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1034,6 +1034,9 @@ struct dict_index_t{ } } + /** @return whether this is the change buffer */ + bool is_ibuf() const { return UNIV_UNLIKELY(type & DICT_IBUF); } + #ifdef BTR_CUR_HASH_ADAPT /** @return a clone of this */ dict_index_t* clone() const; From 8268f26605c871f19cb78be08c84f621f4e0c4cb Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Fri, 7 Aug 2020 19:02:48 +0530 Subject: [PATCH 05/11] MDEV-22934 Table disappear after two alter table command Problem: ======= InnoDB drops the column which has foreign key relations on it. So it tries to load the foreign key during rename process of copy algorithm even though the foreign_key_check is disabled. Solution: ======== During alter copy algorithm, InnoDB ignores the error while loading the foreign key constraint if foreign key check is disabled. It should throw the warning about failure of the foreign key constraint when foreign key check is disabled. --- mysql-test/suite/innodb/r/foreign_key.result | 27 ++++++++++++++++++++ mysql-test/suite/innodb/r/innodb.result | 2 +- mysql-test/suite/innodb/t/foreign_key.test | 23 +++++++++++++++++ mysql-test/suite/innodb/t/innodb.test | 2 +- storage/innobase/row/row0mysql.cc | 10 +++++++- 5 files changed, 61 insertions(+), 3 deletions(-) diff --git a/mysql-test/suite/innodb/r/foreign_key.result b/mysql-test/suite/innodb/r/foreign_key.result index b0b4635157e..bab4ea16643 100644 --- a/mysql-test/suite/innodb/r/foreign_key.result +++ b/mysql-test/suite/innodb/r/foreign_key.result @@ -659,6 +659,7 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency; # MDEV-17187 table doesn't exist in engine after ALTER other tables # with CONSTRAINTs # +call mtr.add_suppression("\\[Warning\\] InnoDB: In ALTER TABLE `test`\\.`t2` has or is referenced in foreign key constraints which are not compatible with the new table definition."); set foreign_key_checks=on; create table t1 (id int not null primary key) engine=innodb; create table t2 (id int not null primary key, fid int not null, @@ -714,4 +715,30 @@ drop table t1,t2; ERROR 23000: Cannot delete or update a parent row: a foreign key constraint fails drop table t1,t2; ERROR 42S02: Unknown table 'test.t2' +# +# MDEV-22934 Table disappear after two alter table command +# +CREATE TABLE t1(f1 INT NOT NULL AUTO_INCREMENT, +f2 INT NOT NULL, +PRIMARY KEY (f1), INDEX (f2))ENGINE=InnoDB; +CREATE TABLE t2(f1 INT NOT NULL, +f2 INT NOT NULL, f3 INT NOT NULL, +PRIMARY KEY(f1, f2), UNIQUE KEY(f2), +CONSTRAINT `t2_ibfk_1` FOREIGN KEY (f2) REFERENCES t1(f2) ON DELETE CASCADE, +CONSTRAINT `t2_ibfk_2` FOREIGN KEY (f1) REFERENCES t1(f1) ON DELETE CASCADE +) ENGINE=InnoDB; +SET FOREIGN_KEY_CHECKS=0; +ALTER TABLE t2 DROP PRIMARY KEY, ADD PRIMARY KEY(f3), ALGORITHM=INPLACE; +ALTER TABLE t2 DROP INDEX `f2`, ALGORITHM=COPY; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `f1` int(11) NOT NULL, + `f2` int(11) NOT NULL, + `f3` int(11) NOT NULL, + PRIMARY KEY (`f3`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +CREATE TABLE t2 (f1 INT NOT NULL)ENGINE=InnoDB; +ERROR 42S01: Table 't2' already exists +DROP TABLE t2, t1; # End of 10.2 tests diff --git a/mysql-test/suite/innodb/r/innodb.result b/mysql-test/suite/innodb/r/innodb.result index 0d92968b095..921f9880d47 100644 --- a/mysql-test/suite/innodb/r/innodb.result +++ b/mysql-test/suite/innodb/r/innodb.result @@ -2598,7 +2598,6 @@ set foreign_key_checks=0; create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb; create table t1(a varchar(10) primary key) engine = innodb; alter table t1 modify column a int; -Got one of the listed errors set foreign_key_checks=1; drop table t2,t1; set foreign_key_checks=0; @@ -2607,6 +2606,7 @@ create table t1(a varchar(10) primary key) engine = innodb DEFAULT CHARSET=latin alter table t1 convert to character set utf8; set foreign_key_checks=1; drop table t2,t1; +call mtr.add_suppression("\\[Warning\\] InnoDB: In ALTER TABLE `test`.`t1` has or is referenced in foreign key constraints which are not compatible with the new table definition."); set foreign_key_checks=0; create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb DEFAULT CHARSET=latin1; create table t3(a varchar(10) primary key) engine = innodb DEFAULT CHARSET=utf8; diff --git a/mysql-test/suite/innodb/t/foreign_key.test b/mysql-test/suite/innodb/t/foreign_key.test index 27fa63f144e..40bc3a32a4f 100644 --- a/mysql-test/suite/innodb/t/foreign_key.test +++ b/mysql-test/suite/innodb/t/foreign_key.test @@ -657,6 +657,8 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency; --echo # with CONSTRAINTs --echo # +call mtr.add_suppression("\\[Warning\\] InnoDB: In ALTER TABLE `test`\\.`t2` has or is referenced in foreign key constraints which are not compatible with the new table definition."); + set foreign_key_checks=on; create table t1 (id int not null primary key) engine=innodb; create table t2 (id int not null primary key, fid int not null, @@ -698,6 +700,27 @@ drop table t1,t2; --error ER_BAD_TABLE_ERROR drop table t1,t2; +--echo # +--echo # MDEV-22934 Table disappear after two alter table command +--echo # +CREATE TABLE t1(f1 INT NOT NULL AUTO_INCREMENT, + f2 INT NOT NULL, + PRIMARY KEY (f1), INDEX (f2))ENGINE=InnoDB; +CREATE TABLE t2(f1 INT NOT NULL, + f2 INT NOT NULL, f3 INT NOT NULL, + PRIMARY KEY(f1, f2), UNIQUE KEY(f2), +CONSTRAINT `t2_ibfk_1` FOREIGN KEY (f2) REFERENCES t1(f2) ON DELETE CASCADE, +CONSTRAINT `t2_ibfk_2` FOREIGN KEY (f1) REFERENCES t1(f1) ON DELETE CASCADE +) ENGINE=InnoDB; + +SET FOREIGN_KEY_CHECKS=0; +ALTER TABLE t2 DROP PRIMARY KEY, ADD PRIMARY KEY(f3), ALGORITHM=INPLACE; +ALTER TABLE t2 DROP INDEX `f2`, ALGORITHM=COPY; +SHOW CREATE TABLE t2; +--error ER_TABLE_EXISTS_ERROR +CREATE TABLE t2 (f1 INT NOT NULL)ENGINE=InnoDB; +DROP TABLE t2, t1; + --echo # End of 10.2 tests --source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/innodb/t/innodb.test b/mysql-test/suite/innodb/t/innodb.test index 661d7dff129..7aa146d7d99 100644 --- a/mysql-test/suite/innodb/t/innodb.test +++ b/mysql-test/suite/innodb/t/innodb.test @@ -1662,7 +1662,6 @@ drop table t1; set foreign_key_checks=0; create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb; create table t1(a varchar(10) primary key) engine = innodb; --- error 1025,1025 alter table t1 modify column a int; set foreign_key_checks=1; drop table t2,t1; @@ -1678,6 +1677,7 @@ drop table t2,t1; # test that RENAME does not allow invalid charsets when f_k_c is 0 +call mtr.add_suppression("\\[Warning\\] InnoDB: In ALTER TABLE `test`.`t1` has or is referenced in foreign key constraints which are not compatible with the new table definition."); set foreign_key_checks=0; create table t2 (a varchar(10), foreign key (a) references t1(a)) engine = innodb DEFAULT CHARSET=latin1; create table t3(a varchar(10) primary key) engine = innodb DEFAULT CHARSET=utf8; diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 7c61ad9b45b..3370cdca570 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -4536,12 +4536,20 @@ end: if (err != DB_SUCCESS) { if (old_is_tmp) { - ib::error() << "In ALTER TABLE " + /* In case of copy alter, ignore the + loading of foreign key constraint + when foreign_key_check is disabled */ + ib::error_or_warn(trx->check_foreigns) + << "In ALTER TABLE " << ut_get_name(trx, new_name) << " has or is referenced in foreign" " key constraints which are not" " compatible with the new table" " definition."; + if (!trx->check_foreigns) { + err = DB_SUCCESS; + goto funct_exit; + } } else { ib::error() << "In RENAME TABLE table " << ut_get_name(trx, new_name) From 4c50120d1400eab236af70d36f1c683d2b30d59a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 14 Aug 2020 14:52:14 +0300 Subject: [PATCH 06/11] MDEV-23474 InnoDB fails to restart after SET GLOBAL innodb_log_checksums=OFF Regretfully, the parameter innodb_log_checksums was introduced in MySQL 5.7.9 (the first GA release of that series) by mysql/mysql-server@af0acedd885eb7103e319f79d25fda7386ef1506 which partly replaced a parameter that had been introduced in 5.7.8 mysql/mysql-server@22ba38218e1d76c24f69b5a5595ad3bf5933acb0 as innodb_log_checksum_algorithm. Given that the CRC-32C operations are accelerated on many processor implementations (AMD64 with SSE4.2; since MDEV-22669 also on IA-32 with SSE4.2, POWER 8 and later, ARMv8 with some extensions) and by lookup tables when only generic SISD instructions are available, there should be no valid reason to disable checksums. In MariaDB 10.5.2, as a preparation for MDEV-12353, MDEV-19543 deprecated and ignored the parameter innodb_log_checksums altogether. This should imply that after a clean shutdown with innodb_log_checksums=OFF one cannot upgrade to MariaDB Server 10.5 at all. Due to these problems, let us deprecate the parameter innodb_log_checksums and honor it only during server startup. The command SET GLOBAL innodb_log_checksums will always set the parameter to ON. --- extra/mariabackup/xtrabackup.cc | 4 --- .../encryption/r/innodb_encrypt_log.result | 2 +- .../innodb/r/innodb_xtradb_compat.result | 2 +- .../r/innodb_log_checksums_basic.result | 4 ++- .../suite/sys_vars/r/sysvars_innodb.result | 2 +- storage/innobase/handler/ha_innodb.cc | 26 +++++-------------- storage/innobase/handler/ha_xtradb.h | 23 +--------------- storage/innobase/include/log0log.h | 25 ++---------------- storage/innobase/include/log0log.ic | 25 +----------------- storage/innobase/log/log0log.cc | 9 +++---- storage/innobase/log/log0recv.cc | 5 +++- 11 files changed, 23 insertions(+), 104 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 02d232b59d6..9b3d9f9ea39 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -2023,10 +2023,6 @@ innodb_init_param(void) srv_undo_dir = (char*) "."; } - log_checksum_algorithm_ptr = innodb_log_checksums || srv_encrypt_log - ? log_block_calc_checksum_crc32 - : log_block_calc_checksum_none; - return(FALSE); error: diff --git a/mysql-test/suite/encryption/r/innodb_encrypt_log.result b/mysql-test/suite/encryption/r/innodb_encrypt_log.result index b999e8cb34a..cd37330f2b0 100644 --- a/mysql-test/suite/encryption/r/innodb_encrypt_log.result +++ b/mysql-test/suite/encryption/r/innodb_encrypt_log.result @@ -7,7 +7,7 @@ # SET GLOBAL innodb_log_checksums=0; Warnings: -Warning 138 innodb_encrypt_log implies innodb_log_checksums +Warning 138 innodb_log_checksums is deprecated and has no effect outside recovery SELECT @@global.innodb_log_checksums; @@global.innodb_log_checksums 1 diff --git a/mysql-test/suite/innodb/r/innodb_xtradb_compat.result b/mysql-test/suite/innodb/r/innodb_xtradb_compat.result index 20b6ac4c892..5722120356a 100644 --- a/mysql-test/suite/innodb/r/innodb_xtradb_compat.result +++ b/mysql-test/suite/innodb/r/innodb_xtradb_compat.result @@ -148,7 +148,7 @@ Warnings: Warning 1287 Using innodb_log_checksum_algorithm is deprecated and the parameter may be removed in future releases. Ignoning the parameter. select @@innodb_log_checksum_algorithm, @@innodb_log_checksums; @@innodb_log_checksum_algorithm @@innodb_log_checksums -NONE 0 +STRICT_INNODB 1 set global innodb_log_checksum_algorithm=STRICT_INNODB; Warnings: Warning 1287 Using innodb_log_checksum_algorithm is deprecated and the parameter may be removed in future releases. Ignoning the parameter. diff --git a/mysql-test/suite/sys_vars/r/innodb_log_checksums_basic.result b/mysql-test/suite/sys_vars/r/innodb_log_checksums_basic.result index 6679ca87249..7724ef9c2ee 100644 --- a/mysql-test/suite/sys_vars/r/innodb_log_checksums_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_log_checksums_basic.result @@ -28,9 +28,11 @@ SELECT @@global.innodb_log_checksums; @@global.innodb_log_checksums 1 SET GLOBAL innodb_log_checksums = OFF; +Warnings: +Warning 138 innodb_log_checksums is deprecated and has no effect outside recovery SELECT @@global.innodb_log_checksums; @@global.innodb_log_checksums -0 +1 SET GLOBAL innodb_log_checksums = default; SET GLOBAL innodb_log_checksums = ON; SELECT @@global.innodb_log_checksums; diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 98d303ce4bb..bed23a777a8 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -1499,7 +1499,7 @@ SESSION_VALUE NULL DEFAULT_VALUE ON VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Whether to compute and require checksums for InnoDB redo log blocks +VARIABLE_COMMENT DEPRECATED. Whether to require checksums for InnoDB redo log blocks. NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f9439ee1ce9..8b58ca57a74 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3599,8 +3599,7 @@ static const char* deprecated_mtflush_threads static my_bool innodb_instrument_semaphores; -/** Update log_checksum_algorithm_ptr with a pointer to the function -corresponding to whether checksums are enabled. +/** If applicable, emit a message that log checksums cannot be disabled. @param[in,out] thd client session, or NULL if at startup @param[in] check whether redo log block checksums are enabled @return whether redo log block checksums are enabled */ @@ -3608,34 +3607,21 @@ static inline bool innodb_log_checksums_func_update(THD* thd, bool check) { - static const char msg[] = "innodb_encrypt_log implies" - " innodb_log_checksums"; + static const char msg[] = "innodb_log_checksums is deprecated" + " and has no effect outside recovery"; ut_ad(!thd == !srv_was_started); if (!check) { - check = srv_encrypt_log; - if (!check) { - } else if (thd) { + if (thd) { push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_UNSUPPORTED, msg); + check = true; } else { sql_print_warning(msg); } } - if (thd) { - log_mutex_enter(); - log_checksum_algorithm_ptr = check - ? log_block_calc_checksum_crc32 - : log_block_calc_checksum_none; - log_mutex_exit(); - } else { - log_checksum_algorithm_ptr = check - ? log_block_calc_checksum_crc32 - : log_block_calc_checksum_none; - } - return(check); } @@ -19905,7 +19891,7 @@ static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm, static MYSQL_SYSVAR_BOOL(log_checksums, innodb_log_checksums, PLUGIN_VAR_RQCMDARG, - "Whether to compute and require checksums for InnoDB redo log blocks", + "DEPRECATED. Whether to require checksums for InnoDB redo log blocks.", NULL, innodb_log_checksums_update, TRUE); static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums, diff --git a/storage/innobase/handler/ha_xtradb.h b/storage/innobase/handler/ha_xtradb.h index 9e898818a01..b049905613c 100644 --- a/storage/innobase/handler/ha_xtradb.h +++ b/storage/innobase/handler/ha_xtradb.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2018, MariaDB Corporation. +Copyright (c) 2017, 2020, MariaDB Corporation. Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted @@ -507,18 +507,6 @@ set_log_checksum_algorithm(THD* thd, st_mysql_sys_var*, void*, const void* save) ER_WARN_DEPRECATED_SYNTAX, innodb_deprecated_msg, "innodb_log_checksum_algorithm"); - log_mutex_enter(); - srv_log_checksum_algorithm = *static_cast(save); - if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) { - ib::info() << "Setting innodb_log_checksums = false"; - innodb_log_checksums = false; - log_checksum_algorithm_ptr = log_block_calc_checksum_none; - } else { - ib::info() << "Setting innodb_log_checksums = true"; - innodb_log_checksums = true; - log_checksum_algorithm_ptr = log_block_calc_checksum_crc32; - } - log_mutex_exit(); } static MYSQL_SYSVAR_ENUM(log_checksum_algorithm, srv_log_checksum_algorithm, PLUGIN_VAR_RQCMDARG, @@ -869,15 +857,6 @@ innodb_check_deprecated(void) if (srv_log_checksum_algorithm != SRV_CHECKSUM_ALGORITHM_DEPRECATED) { innodb_print_deprecation("innodb-log-checksum-algorithm"); - if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) { - ib::info() << "Setting innodb_log_checksums = false"; - innodb_log_checksums = false; - log_checksum_algorithm_ptr = log_block_calc_checksum_none; - } else { - ib::info() << "Setting innodb_log_checksums = true"; - innodb_log_checksums = true; - log_checksum_algorithm_ptr = log_block_calc_checksum_crc32; - } } if (srv_max_changed_pages) { diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 0fd983a1a10..612a27976e7 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -2,7 +2,7 @@ Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2009, Google Inc. -Copyright (c) 2017, 2019, MariaDB Corporation. +Copyright (c) 2017, 2020, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -56,12 +56,6 @@ step which modifies the database, is started */ #define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE) #define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE) -typedef ulint (*log_checksum_func_t)(const byte* log_block); - -/** Pointer to the log checksum calculation function. Protected with -log_sys->mutex. */ -extern log_checksum_func_t log_checksum_algorithm_ptr; - /** Append a string to the log. @param[in] str string @param[in] len string length @@ -295,14 +289,6 @@ log_block_set_data_len( /*===================*/ byte* log_block, /*!< in/out: log block */ ulint len); /*!< in: data length */ -/************************************************************//** -Calculates the checksum for a log block. -@return checksum */ -UNIV_INLINE -ulint -log_block_calc_checksum( -/*====================*/ - const byte* block); /*!< in: log block */ /** Calculates the checksum for a log block using the CRC32 algorithm. @param[in] block log block @@ -312,13 +298,6 @@ ulint log_block_calc_checksum_crc32( const byte* block); -/** Calculates the checksum for a log block using the "no-op" algorithm. -@param[in] block the redo log block -@return the calculated checksum value */ -UNIV_INLINE -ulint -log_block_calc_checksum_none(const byte* block); - /************************************************************//** Gets a log block checksum field value. @return checksum */ @@ -403,7 +382,7 @@ log_group_close_all(void); void log_shutdown(); -/** Whether to generate and require checksums on the redo log pages */ +/** Whether to require checksums on the redo log pages */ extern my_bool innodb_log_checksums; /* Values used as flags */ diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic index 36caaedfaa2..c366affcdef 100644 --- a/storage/innobase/include/log0log.ic +++ b/storage/innobase/include/log0log.ic @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -190,18 +190,6 @@ log_block_convert_lsn_to_no( 0xFUL, 0x3FFFFFFFUL)) + 1); } -/************************************************************//** -Calculates the checksum for a log block. -@return checksum */ -UNIV_INLINE -ulint -log_block_calc_checksum( -/*====================*/ - const byte* block) /*!< in: log block */ -{ - return(log_checksum_algorithm_ptr(block)); -} - /** Calculate the checksum for a log block using the pre-5.7.9 algorithm. @param[in] block log block @return checksum */ @@ -242,17 +230,6 @@ log_block_calc_checksum_crc32( return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE)); } -/** Calculates the checksum for a log block using the "no-op" algorithm. -@param[in] block log block -@return checksum */ -UNIV_INLINE -ulint -log_block_calc_checksum_none( - const byte* block) -{ - return(LOG_NO_CHECKSUM_MAGIC); -} - /************************************************************//** Gets a log block checksum field value. @return checksum */ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 972bbe7c6c0..4c68f3743e9 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. -Copyright (c) 2014, 2019, MariaDB Corporation. +Copyright (c) 2014, 2020, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -83,12 +83,9 @@ reduce the size of the log. /** Redo log system */ log_t* log_sys = NULL; -/** Whether to generate and require checksums on the redo log pages */ +/** Whether to require checksums on the redo log pages */ my_bool innodb_log_checksums; -/** Pointer to the log checksum calculation function */ -log_checksum_func_t log_checksum_algorithm_ptr; - /* Next log block number to do dummy record filling if no log records written for a while */ static ulint next_lbn_to_pad = 0; @@ -857,7 +854,7 @@ log_block_store_checksum( /*=====================*/ byte* block) /*!< in/out: pointer to a log block */ { - log_block_set_checksum(block, log_block_calc_checksum(block)); + log_block_set_checksum(block, log_block_calc_checksum_crc32(block)); } /******************************************************//** diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 4980e92ea6d..c4fd5f84879 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1223,7 +1223,10 @@ recv_log_recover_10_3() % univ_page_size.physical()), OS_FILE_LOG_BLOCK_SIZE, buf, NULL); - if (log_block_calc_checksum(buf) != log_block_get_checksum(buf)) { + const ulint cksum = log_block_get_checksum(buf); + + if (cksum != LOG_NO_CHECKSUM_MAGIC + && cksum != log_block_calc_checksum_crc32(buf)) { return(DB_CORRUPTION); } From 1509363970e9cb574005e3af560299c055dda983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 18 Aug 2020 17:30:34 +0300 Subject: [PATCH 07/11] MDEV-23484 Rollback unnecessarily acquires dict_operation_lock for every row InnoDB transaction rollback includes an unnecessary work-around for a data corruption bug that was fixed by me in MySQL 5.6.12 mysql/mysql-server@935ba09d52c1908bde273ad1940b5ab919d9763d and ported to MariaDB 10.0.8 by commit c291ddfdf774b50c34b9741c6e39c57bae8fd1dc in 2013 and 2014, respectively. By acquiring and releasing dict_operation_lock in shared mode, row_undo() hopes to prevent the table from being dropped while the undo log record is being rolled back. But, thanks to mentioned fix, debug assertions (that we are adding) show that the rollback is protected by transactional locks (table IX lock, in addition to implicit or explicit exclusive locks on the records that had been modified). Because row_drop_table_for_mysql() would invoke row_add_table_to_background_drop_list() if any locks exist on the table, the mere existence of locks (which is guaranteed during ROLLBACK) is enough to protect the table from disappearing. Hence, acquiring and releasing dict_operation_lock for every row that is being rolled back is unnecessary. row_undo(): Remove the unnecessary acquisition and release of dict_operation_lock. Note: row_add_table_to_background_drop_list() is mostly working around bugs outside InnoDB: MDEV-21175 (insufficient MDL protection of FOREIGN KEY operations) MDEV-21602 (incorrect error handling of CREATE TABLE...SELECT). --- storage/innobase/row/row0uins.cc | 7 +++-- storage/innobase/row/row0umod.cc | 45 +++++--------------------------- storage/innobase/row/row0undo.cc | 17 ------------ 3 files changed, 11 insertions(+), 58 deletions(-) diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 54fa244fca6..7e5aac1ba9f 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -81,6 +81,7 @@ row_undo_ins_remove_clust_rec( mtr.set_log_mode(MTR_LOG_NO_REDO); } else { mtr.set_named_space(index->space); + ut_ad(lock_table_has_locks(index->table)); } /* This is similar to row_undo_mod_clust(). The DDL thread may @@ -91,8 +92,7 @@ row_undo_ins_remove_clust_rec( online = dict_index_is_online_ddl(index); if (online) { - ut_ad(node->trx->dict_operation_lock_mode - != RW_X_LATCH); + ut_ad(!node->trx->dict_operation_lock_mode); ut_ad(node->table->id != DICT_INDEXES_ID); mtr_s_lock(dict_index_get_lock(index), &mtr); } @@ -491,6 +491,9 @@ row_undo_ins( return(DB_SUCCESS); } + ut_ad(node->table->is_temporary() + || lock_table_has_locks(node->table)); + /* Iterate over all the indexes and undo the insert.*/ node->index = dict_table_get_first_index(node->table); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 80d90f40379..b1cc994cdd8 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -264,10 +264,7 @@ row_undo_mod_clust( bool online; ut_ad(thr_get_trx(thr) == node->trx); - ut_ad(node->trx->dict_operation_lock_mode); ut_ad(node->trx->in_rollback); - ut_ad(rw_lock_own_flagged(&dict_operation_lock, - RW_LOCK_FLAG_X | RW_LOCK_FLAG_S)); log_free_check(); pcur = &node->pcur; @@ -278,11 +275,12 @@ row_undo_mod_clust( mtr.set_log_mode(MTR_LOG_NO_REDO); } else { mtr.set_named_space(index->space); + ut_ad(lock_table_has_locks(index->table)); } online = dict_index_is_online_ddl(index); if (online) { - ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH); + ut_ad(!node->trx->dict_operation_lock_mode); mtr_s_lock(dict_index_get_lock(index), &mtr); } @@ -806,37 +804,6 @@ func_exit_no_pcur: return(err); } -/***********************************************************//** -Flags a secondary index corrupted. */ -static MY_ATTRIBUTE((nonnull)) -void -row_undo_mod_sec_flag_corrupted( -/*============================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_index_t* index) /*!< in: secondary index */ -{ - ut_ad(!dict_index_is_clust(index)); - - switch (trx->dict_operation_lock_mode) { - case RW_S_LATCH: - /* Because row_undo() is holding an S-latch - on the data dictionary during normal rollback, - we can only mark the index corrupted in the - data dictionary cache. TODO: fix this somehow.*/ - mutex_enter(&dict_sys->mutex); - dict_set_corrupted_index_cache_only(index); - mutex_exit(&dict_sys->mutex); - break; - default: - ut_ad(0); - /* fall through */ - case RW_X_LATCH: - /* This should be the rollback of a data dictionary - transaction. */ - dict_set_corrupted(index, trx, "rollback"); - } -} - /***********************************************************//** Undoes a modify in secondary indexes when undo record type is UPD_DEL. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ @@ -950,8 +917,7 @@ row_undo_mod_del_mark_sec( } if (err == DB_DUPLICATE_KEY) { - row_undo_mod_sec_flag_corrupted( - thr_get_trx(thr), index); + index->type |= DICT_CORRUPT; err = DB_SUCCESS; /* Do not return any error to the caller. The duplicate will be reported by ALTER TABLE or @@ -1097,8 +1063,7 @@ row_undo_mod_upd_exist_sec( } if (err == DB_DUPLICATE_KEY) { - row_undo_mod_sec_flag_corrupted( - thr_get_trx(thr), index); + index->type |= DICT_CORRUPT; err = DB_SUCCESS; } else if (err != DB_SUCCESS) { break; @@ -1250,6 +1215,8 @@ row_undo_mod( return(DB_SUCCESS); } + ut_ad(node->table->is_temporary() + || lock_table_has_locks(node->table)); node->index = dict_table_get_first_index(node->table); ut_ad(dict_index_is_clust(node->index)); /* Skip the clustered index (the first index) */ diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index b65b173fedb..34b55f25527 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -279,18 +279,6 @@ row_undo( ? UNDO_NODE_INSERT : UNDO_NODE_MODIFY; } - /* Prevent DROP TABLE etc. while we are rolling back this row. - If we are doing a TABLE CREATE or some other dictionary operation, - then we already have dict_operation_lock locked in x-mode. Do not - try to lock again, because that would cause a hang. */ - - const bool locked_data_dict = (trx->dict_operation_lock_mode == 0); - - if (locked_data_dict) { - - row_mysql_freeze_data_dictionary(trx); - } - dberr_t err; if (node->state == UNDO_NODE_INSERT) { @@ -303,11 +291,6 @@ row_undo( err = row_undo_mod(node, thr); } - if (locked_data_dict) { - - row_mysql_unfreeze_data_dictionary(trx); - } - /* Do some cleanup */ btr_pcur_close(&(node->pcur)); From 309302a3dad5f06cb62b0846dcb8a3671d91ff29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 19 Aug 2020 11:18:56 +0300 Subject: [PATCH 08/11] MDEV-23475 InnoDB performance regression for write-heavy workloads In commit fe39d02f51b96536dccca7ff89faf05e13548877 (MDEV-20638) we removed some wake-up signaling of the master thread that should have been there, to ensure a steady log checkpointing workload. Common sense suggests that the commit omitted some necessary calls to srv_inc_activity_count(). But, an attempt to add the call to trx_flush_log_if_needed_low() as well as to reinstate the function innobase_active_small() did not restore the performance for the case where sync_binlog=1 is set. Therefore, we will revert the entire commit in MariaDB Server 10.2. In MariaDB Server 10.5, adding a srv_inc_activity_count() call to trx_flush_log_if_needed_low() did restore the performance, so we will not revert MDEV-20638 across all versions. --- extra/mariabackup/xtrabackup.cc | 6 ++ storage/innobase/buf/buf0flu.cc | 4 +- storage/innobase/handler/ha_innodb.cc | 48 +++++++++++ storage/innobase/handler/handler0alter.cc | 5 ++ storage/innobase/include/srv0srv.h | 25 ++++-- storage/innobase/row/row0mysql.cc | 2 +- storage/innobase/row/row0trunc.cc | 2 +- storage/innobase/srv/srv0srv.cc | 100 +++++++++++++++++----- storage/innobase/srv/srv0start.cc | 4 +- storage/innobase/trx/trx0roll.cc | 3 + storage/innobase/trx/trx0trx.cc | 6 ++ 11 files changed, 170 insertions(+), 35 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 9b3d9f9ea39..4845f51e6b2 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -254,6 +254,12 @@ my_bool innobase_locks_unsafe_for_binlog; my_bool innobase_rollback_on_timeout; my_bool innobase_create_status_file; +/* The following counter is used to convey information to InnoDB +about server activity: in selects it is not sensible to call +srv_active_wake_master_thread after each fetch or search, we only do +it every INNOBASE_WAKE_INTERVAL'th step. */ + +#define INNOBASE_WAKE_INTERVAL 32 ulong innobase_active_counter = 0; diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 84f95f4b5ec..26610337d0d 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -3141,7 +3141,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*) /* The page_cleaner skips sleep if the server is idle and there are no pending IOs in the buffer pool and there is work to do. */ - if (srv_check_activity(&last_activity) + if (srv_check_activity(last_activity) || buf_get_n_pending_read_ios() || n_flushed == 0) { @@ -3233,7 +3233,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*) n_flushed = n_flushed_lru + n_flushed_list; - } else if (srv_check_activity(&last_activity)) { + } else if (srv_check_activity(last_activity)) { ulint n_to_flush; lsn_t lsn_limit = 0; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 8b58ca57a74..4db7f58b513 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -446,6 +446,14 @@ static TYPELIB innodb_lock_schedule_algorithm_typelib = { NULL }; +/* The following counter is used to convey information to InnoDB +about server activity: in case of normal DML ops it is not +sensible to call srv_active_wake_master_thread after each +operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */ + +#define INNOBASE_WAKE_INTERVAL 32 +static ulong innobase_active_counter = 0; + /** Allowed values of innodb_change_buffering */ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = { "none", /* IBUF_USE_NONE */ @@ -1885,6 +1893,23 @@ thd_to_trx_id( } #endif /* WITH_WSREP */ +/********************************************************************//** +Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth +time calls srv_active_wake_master_thread. This function should be used +when a single database operation may introduce a small need for +server utility activity, like checkpointing. */ +inline +void +innobase_active_small(void) +/*=======================*/ +{ + innobase_active_counter++; + + if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) { + srv_active_wake_master_thread(); + } +} + /********************************************************************//** Converts an InnoDB error code to a MySQL error code and also tells to MySQL about a possible transaction rollback inside InnoDB caused by a lock wait @@ -6607,6 +6632,11 @@ ha_innobase::close() MONITOR_INC(MONITOR_TABLE_CLOSE); + /* Tell InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + DBUG_RETURN(0); } @@ -8321,6 +8351,8 @@ report_error: } func_exit: + innobase_active_small(); + DBUG_RETURN(error_result); } @@ -8991,6 +9023,11 @@ func_exit: error, m_prebuilt->table->flags, m_user_thd); } + /* Tell InnoDB server that there might be work for + utility threads: */ + + innobase_active_small(); + #ifdef WITH_WSREP if (error == DB_SUCCESS && trx->is_wsrep() && wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE && @@ -9046,6 +9083,11 @@ ha_innobase::delete_row( innobase_srv_conc_exit_innodb(m_prebuilt); + /* Tell the InnoDB server that there might be work for + utility threads: */ + + innobase_active_small(); + #ifdef WITH_WSREP if (error == DB_SUCCESS && trx->is_wsrep() && wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE @@ -12933,6 +12975,7 @@ create_table_info_t::create_table_update_dict() if (m_flags2 & DICT_TF2_FTS) { if (!innobase_fts_load_stopword(innobase_table, NULL, m_thd)) { dict_table_close(innobase_table, FALSE, FALSE); + srv_active_wake_master_thread(); trx_free_for_mysql(m_trx); DBUG_RETURN(-1); } @@ -13078,6 +13121,11 @@ ha_innobase::create( error = info.create_table_update_dict(); + /* Tell the InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + DBUG_RETURN(error); } diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 0f5ac81f663..f0aae491eae 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -8641,6 +8641,11 @@ foreign_fail: log_append_on_checkpoint(NULL); + /* Tell the InnoDB server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + if (fail) { for (inplace_alter_handler_ctx** pctx = ctx_array; *pctx; pctx++) { diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 20130541d60..5214953f308 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -809,6 +809,19 @@ srv_reset_io_thread_op_info(); /** Wake up the purge threads if there is work to do. */ void srv_wake_purge_thread_if_not_active(); +/** Wake up the InnoDB master thread if it was suspended (not sleeping). */ +void +srv_active_wake_master_thread_low(); + +#define srv_active_wake_master_thread() \ + do { \ + if (!srv_read_only_mode) { \ + srv_active_wake_master_thread_low(); \ + } \ + } while (0) +/** Wake up the master thread if it is suspended or being suspended. */ +void +srv_wake_master_thread(); /******************************************************************//** Outputs to a file the output of the InnoDB Monitor. @@ -837,13 +850,13 @@ reading this value as it is only used in heuristics. ulint srv_get_activity_count(void); /*========================*/ - -/** Check if there has been any activity. -@param[in,out] activity_count recent activity count to be returned -if there is a change +/*******************************************************************//** +Check if there has been any activity. @return FALSE if no change in activity counter. */ -bool srv_check_activity(ulint *activity_count); - +ibool +srv_check_activity( +/*===============*/ + ulint old_activity_count); /*!< old activity count */ /******************************************************************//** Increment the server activity counter. */ void diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 3370cdca570..446895b8f09 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -3820,7 +3820,7 @@ funct_exit: trx->op_info = ""; - srv_inc_activity_count(); + srv_wake_master_thread(); DBUG_RETURN(err); } diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc index 209767d0fe1..618e161bee4 100644 --- a/storage/innobase/row/row0trunc.cc +++ b/storage/innobase/row/row0trunc.cc @@ -1249,7 +1249,7 @@ row_truncate_complete( ut_ad(!trx_is_started(trx)); - srv_inc_activity_count(); + srv_wake_master_thread(); DBUG_EXECUTE_IF("ib_trunc_crash_after_truncate_done", DBUG_SUICIDE();); diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 6bf8e3dd8f6..f1216dcd51e 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1982,6 +1982,33 @@ srv_get_active_thread_type(void) return(ret); } +/** Wake up the InnoDB master thread if it was suspended (not sleeping). */ +void +srv_active_wake_master_thread_low() +{ + ut_ad(!srv_read_only_mode); + ut_ad(!srv_sys_mutex_own()); + + srv_inc_activity_count(); + + if (my_atomic_loadlint(&srv_sys.n_threads_active[SRV_MASTER]) == 0) { + srv_slot_t* slot; + + srv_sys_mutex_enter(); + + slot = &srv_sys.sys_threads[SRV_MASTER_SLOT]; + + /* Only if the master thread has been started. */ + + if (slot->in_use) { + ut_a(srv_slot_get_type(slot) == SRV_MASTER); + os_event_set(slot->event); + } + + srv_sys_mutex_exit(); + } +} + /** Wake up the purge threads if there is work to do. */ void srv_wake_purge_thread_if_not_active() @@ -1996,6 +2023,14 @@ srv_wake_purge_thread_if_not_active() } } +/** Wake up the master thread if it is suspended or being suspended. */ +void +srv_wake_master_thread() +{ + srv_inc_activity_count(); + srv_release_threads(SRV_MASTER, 1); +} + /*******************************************************************//** Get current server activity count. We don't hold srv_sys::mutex while reading this value as it is only used in heuristics. @@ -2007,20 +2042,15 @@ srv_get_activity_count(void) return(srv_sys.activity_count); } -/** Check if there has been any activity. -@param[in,out] activity_count recent activity count to be returned -if there is a change +/*******************************************************************//** +Check if there has been any activity. @return FALSE if no change in activity counter. */ -bool srv_check_activity(ulint *activity_count) +ibool +srv_check_activity( +/*===============*/ + ulint old_activity_count) /*!< in: old activity count */ { - ulint new_activity_count= srv_sys.activity_count; - if (new_activity_count != *activity_count) - { - *activity_count= new_activity_count; - return true; - } - - return false; + return(srv_sys.activity_count != old_activity_count); } /********************************************************************//** @@ -2427,30 +2457,52 @@ DECLARE_THREAD(srv_master_thread)( slot = srv_reserve_slot(SRV_MASTER); ut_a(slot == srv_sys.sys_threads); +loop: while (srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) { srv_master_sleep(); MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP); - if (srv_check_activity(&old_activity_count)) { + if (srv_check_activity(old_activity_count)) { + old_activity_count = srv_get_activity_count(); srv_master_do_active_tasks(); } else { srv_master_do_idle_tasks(); } } - ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS - || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP); - - if (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP - && srv_fast_shutdown < 2) { - srv_shutdown(srv_fast_shutdown == 0); + switch (srv_shutdown_state) { + case SRV_SHUTDOWN_NONE: + case SRV_SHUTDOWN_INITIATED: + break; + case SRV_SHUTDOWN_FLUSH_PHASE: + case SRV_SHUTDOWN_LAST_PHASE: + ut_ad(0); + /* fall through */ + case SRV_SHUTDOWN_EXIT_THREADS: + /* srv_init_abort() must have been invoked */ + case SRV_SHUTDOWN_CLEANUP: + if (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP + && srv_fast_shutdown < 2) { + srv_shutdown(srv_fast_shutdown == 0); + } + srv_suspend_thread(slot); + my_thread_end(); + os_thread_exit(); } + srv_main_thread_op_info = "suspending"; + srv_suspend_thread(slot); - my_thread_end(); - os_thread_exit(); - OS_THREAD_DUMMY_RETURN; + + /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql() + waits for database activity to die down when converting < 4.1.x + databases, and relies on this string being exactly as it is. InnoDB + manual also mentions this string in several places. */ + srv_main_thread_op_info = "waiting for server activity"; + + srv_resume_thread(slot); + goto loop; } /** Check if purge should stop. @@ -2647,13 +2699,15 @@ srv_do_purge(ulint* n_total_purged ++n_use_threads; } - } else if (srv_check_activity(&old_activity_count) + } else if (srv_check_activity(old_activity_count) && n_use_threads > 1) { /* History length same or smaller since last snapshot, use fewer threads. */ --n_use_threads; + + old_activity_count = srv_get_activity_count(); } /* Ensure that the purge threads are less than what diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index a8f2b78d0a8..0d8ebbe98cd 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1238,7 +1238,7 @@ srv_shutdown_all_bg_threads() if (srv_start_state_is_set(SRV_START_STATE_MASTER)) { /* c. We wake the master thread so that it exits */ - srv_inc_activity_count(); + srv_wake_master_thread(); } if (srv_start_state_is_set(SRV_START_STATE_PURGE)) { @@ -2762,7 +2762,7 @@ srv_shutdown_bg_undo_sources() fts_optimize_shutdown(); dict_stats_shutdown(); while (row_get_background_drop_list_len_low()) { - srv_inc_activity_count(); + srv_wake_master_thread(); os_thread_yield(); } srv_undo_sources = false; diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index ef3d93cd65e..c5f70452bf2 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -124,6 +124,9 @@ trx_rollback_to_savepoint_low( mem_heap_free(heap); + /* There might be work for utility threads.*/ + srv_active_wake_master_thread(); + MONITOR_DEC(MONITOR_TRX_ACTIVE); } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 0dbd985b6c3..d4cd020b321 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1804,6 +1804,12 @@ trx_commit_in_memory( } trx->commit_lsn = lsn; + + /* Tell server some activity has happened, since the trx + does changes something. Background utility threads like + master thread, purge thread or page_cleaner thread might + have some work to do. */ + srv_active_wake_master_thread(); } ut_ad(!trx->rsegs.m_noredo.undo); From 22c4a7512f8dc3f2d2586a856b362ad97ab2bf7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 20 Aug 2020 08:34:55 +0300 Subject: [PATCH 09/11] MDEV-23514 Race conditions between ROLLBACK and ALTER TABLE Since commit 1509363970e9cb574005e3af560299c055dda983 (MDEV-23484) the rollback of InnoDB transactions is no longer protected by dict_operation_lock. Removing that protection revealed a race condition between transaction rollback and the rollback of an online table-rebuilding operation (OPTIMIZE TABLE, or any online ALTER TABLE that is rebuilding the table). row_undo_mod_clust(): Re-check dict_index_is_online_ddl() after acquiring index->lock, similar to how row_undo_ins_remove_clust_rec() is doing it. Because innobase_online_rebuild_log_free() is holding exclusive index->lock while invoking row_log_free(), this re-check will ensure that row_log_table_low() will not be invoked when index->online_log=NULL. A different race condition is possible between the rollback of a recovered transaction and the start of online secondary index creation. Because prepare_inplace_alter_table_dict() is not acquiring an InnoDB table lock in this case, and because recovered transactions are not covered by metadata locks (MDL), the dict_table_t::indexes could be modified by prepare_inplace_alter_table_dict() while the rollback of a recovered transaction is being executed. Normal transactions would be covered by MDL, and during prepare_inplace_alter_table_dict() we do hold MDL_EXCLUSIVE, that is, an online ALTER TABLE operation may not execute concurrently with other transactions that have accessed the table. row_undo(): To prevent a race condition with prepare_inplace_alter_table_dict(), acquire dict_operation_lock for all recovered transactions. Before MDEV-23484 we used to acquire it for all transactions, not only recovered ones. Note: row_merge_drop_indexes() would not invoke dict_index_remove_from_cache() while transactional locks exist on the table, or while any thread is holding an open table handle. OK, it does that for FULLTEXT INDEX, but ADD FULLTEXT INDEX is not supported as an online operation, and therefore prepare_inplace_alter_table_dict() would acquire a table S lock, which cannot succeed as long as recovered transactions on the table exist, because they would hold a conflicting IX lock on the table. --- storage/innobase/handler/handler0alter.cc | 4 ++-- storage/innobase/row/row0umod.cc | 12 +----------- storage/innobase/row/row0undo.cc | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index f0aae491eae..09bfc5f5503 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -8267,11 +8267,11 @@ ha_innobase::commit_inplace_alter_table( /* Exclusively lock the table, to ensure that no other transaction is holding locks on the table while we - change the table definition. The MySQL meta-data lock + change the table definition. The meta-data lock (MDL) should normally guarantee that no conflicting locks exist. However, FOREIGN KEY constraints checks and any transactions collected during crash recovery could be - holding InnoDB locks only, not MySQL locks. */ + holding InnoDB locks only, not MDL. */ dberr_t error = row_merge_lock_table( m_prebuilt->trx, ctx->old_table, LOCK_X); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index b1cc994cdd8..0de0760834e 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -319,17 +319,7 @@ row_undo_mod_clust( ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE); } - /* Online rebuild cannot be initiated while we are holding - dict_operation_lock and index->lock. (It can be aborted.) */ - ut_ad(online || !dict_index_is_online_ddl(index)); - - if (err == DB_SUCCESS && online) { - - ut_ad(rw_lock_own_flagged( - &index->lock, - RW_LOCK_FLAG_S | RW_LOCK_FLAG_X - | RW_LOCK_FLAG_SX)); - + if (err == DB_SUCCESS && online && dict_index_is_online_ddl(index)) { switch (node->rec_type) { case TRX_UNDO_DEL_MARK_REC: row_log_table_insert( diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index 34b55f25527..185a71e670b 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -279,6 +279,19 @@ row_undo( ? UNDO_NODE_INSERT : UNDO_NODE_MODIFY; } + /* Prevent prepare_inplace_alter_table_dict() from adding + dict_table_t::indexes while we are processing the record. + Recovered transactions are not protected by MDL, and the + secondary index creation is not protected by table locks + for online operation. (A table lock would only be acquired + when committing the ALTER TABLE operation.) */ + const bool locked_data_dict = UNIV_UNLIKELY(trx->is_recovered) + && !trx->dict_operation_lock_mode; + + if (UNIV_UNLIKELY(locked_data_dict)) { + row_mysql_freeze_data_dictionary(trx); + } + dberr_t err; if (node->state == UNDO_NODE_INSERT) { @@ -291,6 +304,10 @@ row_undo( err = row_undo_mod(node, thr); } + if (UNIV_UNLIKELY(locked_data_dict)) { + row_mysql_unfreeze_data_dictionary(trx); + } + /* Do some cleanup */ btr_pcur_close(&(node->pcur)); From e9d6f1c7ac0b33f565301ca1f269a36adc35270b Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Tue, 18 Aug 2020 13:41:03 +0530 Subject: [PATCH 10/11] MDEV-23452 Assertion `buf_page_get_io_fix(bpage) == BUF_IO_NONE' failed in buf_page_set_sticky commit a1f899a8abb6bb0b046db28d6da9dd4b7fc3c8c4 (MDEV-23233) added the code to make page sticky. So that InnoDB can't allow the page to be grabbed by other thread while doing lazy drop of ahi. But the block could be in flush list and it could have io_fix value as BUF_IO_WRITE. It could lead to the failure in buf_page_set_sticky(). buf_page_create(): If btr_search_drop_page_hash_index() must be invoked, take x-latch on the block. If the block io_fix value is other than BUF_IO_NONE, release the buffer pool mutex and page hash lock and wait for I/O to complete. --- storage/innobase/buf/buf0buf.cc | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index ad53a11ea66..a1fd7c48301 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -5591,7 +5591,21 @@ buf_page_create( if (drop_hash_entry) { mutex_enter(&block->mutex); - buf_page_set_sticky(&block->page); + /* Avoid a hang if I/O is going on. Release + the buffer pool mutex and page hash lock + and wait for I/O to complete */ + while (buf_block_get_io_fix(block) != BUF_IO_NONE) { + buf_block_fix(block); + mutex_exit(&block->mutex); + buf_pool_mutex_exit(buf_pool); + rw_lock_x_unlock(hash_lock); + + buf_pool_mutex_enter(buf_pool); + rw_lock_x_lock(hash_lock); + mutex_enter(&block->mutex); + buf_block_unfix(block); + } + rw_lock_x_lock(&block->lock); mutex_exit(&block->mutex); } #endif @@ -5603,11 +5617,7 @@ buf_page_create( #ifdef BTR_CUR_HASH_ADAPT if (drop_hash_entry) { btr_search_drop_page_hash_index(block); - buf_pool_mutex_enter(buf_pool); - mutex_enter(&block->mutex); - buf_page_unset_sticky(&block->page); - mutex_exit(&block->mutex); - buf_pool_mutex_exit(buf_pool); + rw_lock_x_unlock(&block->lock); } #endif /* BTR_CUR_HASH_ADAPT */ From a79c25789474f32097e083d3b4953927c04909bc Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Thu, 20 Aug 2020 11:38:10 +0530 Subject: [PATCH 11/11] MDEV-23452 Assertion `buf_page_get_io_fix(bpage) == BUF_IO_NONE' failed in buf_page_set_sticky - Adding os_thread_yield() in buf_page_create() to avoid the continuous buffer pool mutex acquistions. --- storage/innobase/buf/buf0buf.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index a1fd7c48301..5858d9cd2d3 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -5600,6 +5600,8 @@ buf_page_create( buf_pool_mutex_exit(buf_pool); rw_lock_x_unlock(hash_lock); + os_thread_yield(); + buf_pool_mutex_enter(buf_pool); rw_lock_x_lock(hash_lock); mutex_enter(&block->mutex);