From f9ceb0a67ffb20631c936a7e8e8776c000d677ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@mariadb.com>
Date: Mon, 25 Nov 2019 15:22:21 +0200
Subject: [PATCH] MDEV-20190 Instant operation fails when add column and
 collation change on non-indexed column

We must relax too strict debug assertions. For latin1_swedish_ci,
mtype=DATA_CHAR or mtype=DATA_VARCHAR will be used instead of
mtype=DATA_MYSQL or mtype=DATA_VARMYSQL. Likewise, some changes of
dtype_get_charset_coll() do not affect the data type encoding,
but only any indexes that are defined on the column.

Charset::same_encoding(): Check whether two charset-collations have
the same character set encoding.

dict_col_t::same_encoding(): Check whether two character columns
have the same character set encoding.

dict_col_t::same_type(): Check whether two columns have a compatible
data type encoding.

dict_col_t::same_format(), dict_table_t::instant_column(): Do not
compare mtype or the charset-collation of prtype directly.
Rely on dict_col_t::same_type() instead.

dtype_get_charset_coll(): Narrow the return type to uint16_t.

This is a refined version of a fix that was developed by
Thirunarayanan Balathandayuthapani.
---
 .../suite/innodb/r/instant_alter_bugs.result  | 33 +++++++++++++
 .../suite/innodb/t/instant_alter_bugs.test    | 25 ++++++++++
 sql/sql_string.h                              |  6 ++-
 storage/innobase/dict/dict0mem.cc             |  9 ++++
 storage/innobase/handler/handler0alter.cc     |  3 +-
 storage/innobase/include/data0type.h          | 17 +++----
 storage/innobase/include/data0type.ic         | 12 -----
 storage/innobase/include/dict0mem.h           | 47 ++++++++++++++++++-
 8 files changed, 129 insertions(+), 23 deletions(-)

diff --git a/mysql-test/suite/innodb/r/instant_alter_bugs.result b/mysql-test/suite/innodb/r/instant_alter_bugs.result
index 19262246c9b..95efacf294b 100644
--- a/mysql-test/suite/innodb/r/instant_alter_bugs.result
+++ b/mysql-test/suite/innodb/r/instant_alter_bugs.result
@@ -324,4 +324,37 @@ InnoDB		0 transactions not purged
 SELECT * FROM t1;
 a
 DROP TABLE t1;
+#
+# MDEV-20190 Instant operation fails when add column and collation
+#            change on non-indexed column
+#
+CREATE TABLE t1 (a CHAR)ENGINE=INNODB;
+ALTER TABLE t1 DEFAULT COLLATE= latin1_general_cs;
+ALTER TABLE t1 ADD COLUMN b INT NOT NULL, MODIFY a CHAR, ALGORITHM=INSTANT;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` char(1) COLLATE latin1_general_cs DEFAULT NULL,
+  `b` int(11) NOT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs
+DROP TABLE t1;
+CREATE TABLE t1 (a CHAR NOT NULL) ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
+ALTER TABLE t1 DEFAULT COLLATE = latin1_general_cs;
+ALTER TABLE t1 MODIFY a CHAR, ALGORITHM=INSTANT;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` char(1) COLLATE latin1_general_cs DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs ROW_FORMAT=REDUNDANT
+DROP TABLE t1;
+CREATE TABLE t1 (a CHAR NOT NULL) CHARSET latin2 COLLATE latin2_bin
+ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
+ALTER TABLE t1 DEFAULT COLLATE = latin2_general_ci;
+ALTER TABLE t1 MODIFY a CHAR, ALGORITHM=INSTANT;
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` char(1) DEFAULT NULL
+) ENGINE=InnoDB DEFAULT CHARSET=latin2 ROW_FORMAT=REDUNDANT
+DROP TABLE t1;
 SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency;
diff --git a/mysql-test/suite/innodb/t/instant_alter_bugs.test b/mysql-test/suite/innodb/t/instant_alter_bugs.test
index 090a4aef787..ac93089e60e 100644
--- a/mysql-test/suite/innodb/t/instant_alter_bugs.test
+++ b/mysql-test/suite/innodb/t/instant_alter_bugs.test
@@ -348,4 +348,29 @@ ALTER TABLE t1 DROP b, DROP c, DROP d, DROP e;
 --source include/wait_all_purged.inc
 SELECT * FROM t1;
 DROP TABLE t1;
+
+--echo #
+--echo # MDEV-20190 Instant operation fails when add column and collation
+--echo #            change on non-indexed column
+--echo #
+
+CREATE TABLE t1 (a CHAR)ENGINE=INNODB;
+ALTER TABLE t1 DEFAULT COLLATE= latin1_general_cs;
+ALTER TABLE t1 ADD COLUMN b INT NOT NULL, MODIFY a CHAR, ALGORITHM=INSTANT;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a CHAR NOT NULL) ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
+ALTER TABLE t1 DEFAULT COLLATE = latin1_general_cs;
+ALTER TABLE t1 MODIFY a CHAR, ALGORITHM=INSTANT;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a CHAR NOT NULL) CHARSET latin2 COLLATE latin2_bin
+ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
+ALTER TABLE t1 DEFAULT COLLATE = latin2_general_ci;
+ALTER TABLE t1 MODIFY a CHAR, ALGORITHM=INSTANT;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
 SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency;
diff --git a/sql/sql_string.h b/sql/sql_string.h
index 605480461f1..aad5314a973 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -3,7 +3,7 @@
 
 /*
    Copyright (c) 2000, 2013, Oracle and/or its affiliates.
-   Copyright (c) 2008, 2018, MariaDB Corporation.
+   Copyright (c) 2008, 2019, MariaDB Corporation.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -164,6 +164,10 @@ public:
   {
     swap_variables(CHARSET_INFO*, m_charset, other.m_charset);
   }
+  bool same_encoding(const Charset &other) const
+  {
+    return !strcmp(m_charset->csname, other.m_charset->csname);
+  }
   /*
     Collation name without the character set name.
     For example, in case of "latin1_swedish_ci",
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index 9e67f19e8b3..7d3f33d1ae2 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -37,6 +37,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "lock0lock.h"
 #include "sync0sync.h"
 #include "row0row.h"
+#include "sql_string.h"
 #include <iostream>
 
 #define	DICT_HEAP_SIZE		100	/*!< initial memory heap size when
@@ -115,6 +116,14 @@ operator<<(
 	return(s << ut_get_name(NULL, table_name.m_name));
 }
 
+bool dict_col_t::same_encoding(uint16_t a, uint16_t b)
+{
+  if (const CHARSET_INFO *acs= get_charset(a, MYF(MY_WME)))
+    if (const CHARSET_INFO *bcs= get_charset(b, MYF(MY_WME)))
+      return Charset(acs).same_encoding(bcs);
+  return false;
+}
+
 /**********************************************************************//**
 Creates a table memory object.
 @return own: table object */
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 7bb226b9a2e..d7b332e4dee 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -539,8 +539,9 @@ inline bool dict_table_t::instant_column(const dict_table_t& table,
 			c.def_val = o->def_val;
 			DBUG_ASSERT(!((c.prtype ^ o->prtype)
 				      & ~(DATA_NOT_NULL | DATA_VERSIONED
+					  | CHAR_COLL_MASK << 16
 					  | DATA_LONG_TRUE_VARCHAR)));
-			DBUG_ASSERT(c.mtype == o->mtype);
+			DBUG_ASSERT(c.same_type(*o));
 			DBUG_ASSERT(c.len >= o->len);
 
 			if (o->vers_sys_start()) {
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index 7168c104b1c..0e496085113 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -334,14 +334,15 @@ dtype_get_mblen(
 				multi-byte character */
 	ulint*	mbmaxlen);	/*!< out: maximum length of a
 				multi-byte character */
-/*********************************************************************//**
-Gets the MySQL charset-collation code for MySQL string types.
-@return MySQL charset-collation code */
-UNIV_INLINE
-ulint
-dtype_get_charset_coll(
-/*===================*/
-	ulint	prtype);/*!< in: precise data type */
+/**
+Get the charset-collation code for string types.
+@param  prtype  InnoDB precise type
+@return charset-collation code */
+inline uint16_t dtype_get_charset_coll(ulint prtype)
+{
+  return static_cast<uint16_t>(prtype >> 16) & CHAR_COLL_MASK;
+}
+
 /** Form a precise type from the < 4.1.2 format precise type plus the
 charset-collation code.
 @param[in]	old_prtype	MySQL type code and the flags
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index f2c499716ce..037a71a9345 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -27,18 +27,6 @@ Created 1/16/1996 Heikki Tuuri
 #include "mach0data.h"
 #include "ha_prototypes.h"
 
-/*********************************************************************//**
-Gets the MySQL charset-collation code for MySQL string types.
-@return MySQL charset-collation code */
-UNIV_INLINE
-ulint
-dtype_get_charset_coll(
-/*===================*/
-	ulint	prtype)	/*!< in: precise data type */
-{
-	return((prtype >> 16) & CHAR_COLL_MASK);
-}
-
 /*********************************************************************//**
 Determines if a MySQL string type is a subset of UTF-8.  This function
 may return false negatives, in case further character-set collation
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 5e1fa0033f7..6a0cb54c88f 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -674,18 +674,63 @@ public:
 		def_val.data = NULL;
 	}
 
+  /** @return whether two columns have compatible data type encoding */
+  bool same_type(const dict_col_t &other) const
+  {
+    if (mtype != other.mtype)
+    {
+      /* For latin1_swedish_ci, DATA_CHAR and DATA_VARCHAR
+      will be used instead of DATA_MYSQL and DATA_VARMYSQL.
+      As long as mtype,prtype are being written to InnoDB
+      data dictionary tables, we cannot simplify this. */
+      switch (mtype) {
+      default:
+        return false;
+      case DATA_VARCHAR:
+        if (other.mtype != DATA_VARMYSQL)
+          return false;
+        goto check_encoding;
+      case DATA_VARMYSQL:
+        if (other.mtype != DATA_VARCHAR)
+          return false;
+        goto check_encoding;
+      case DATA_CHAR:
+        if (other.mtype != DATA_MYSQL)
+          return false;
+        goto check_encoding;
+      case DATA_MYSQL:
+        if (other.mtype != DATA_CHAR)
+          return false;
+        goto check_encoding;
+      }
+    }
+    else if (dtype_is_string_type(mtype))
+    {
+    check_encoding:
+      const uint16_t cset= dtype_get_charset_coll(prtype);
+      const uint16_t ocset= dtype_get_charset_coll(other.prtype);
+      return cset == ocset || dict_col_t::same_encoding(cset, ocset);
+    }
+
+    return true;
+  }
+
+  /** @return whether two collations codes have the same character encoding */
+  static bool same_encoding(uint16_t a, uint16_t b);
+
 	/** Determine if the columns have the same format
 	except for is_nullable() and is_versioned().
 	@param[in]	other	column to compare to
 	@return	whether the columns have the same format */
 	bool same_format(const dict_col_t& other) const
 	{
-		return mtype == other.mtype
+		return same_type(other)
 			&& len >= other.len
 			&& mbminlen == other.mbminlen
 			&& mbmaxlen == other.mbmaxlen
 			&& !((prtype ^ other.prtype)
 			     & ~(DATA_NOT_NULL | DATA_VERSIONED
+				 | CHAR_COLL_MASK << 16
 				 | DATA_LONG_TRUE_VARCHAR));
 	}
 };