Backporting WL#1213

config/ac-macros/character_sets.m4: - Adding configure definitions for utf8mb4, utf16, utf32 include/config-win.h: - Enabling utf8mb4, utf16, utf32 in Windows build include/m_ctype.h: - Adding new flags - Adding new shared functions prototypes mysql-test/include/ctype_datetime.inc: - Adding test to check that datetime functions work with "real" multibyte character sets. mysql-test/include/ctype_like.inc: - Adding LIKE tests mysql-test/include/have_utf16.inc: New file mysql-test/include/have_utf32.inc: New file mysql-test/include/have_utf8mb4.inc: New file mysql-test/r/ctype_ldml.result: - Adding tests for utf8mb4, utf16, utf32 mysql-test/r/ctype_many.result: - Adding tests to check superset/subset relations between all Unicode character sets. mysql-test/r/ctype_utf16.result: New file mysql-test/r/ctype_utf16_uca.result: New file mysql-test/r/ctype_utf32.result: New file mysql-test/r/ctype_utf32_uca.result: New file mysql-test/r/ctype_utf8.result: - Adding tests for utf8mn3 alias mysql-test/r/ctype_utf8mb4.result: - Adding tests for utf8mb4 mysql-test/r/have_utf16.require: New file mysql-test/r/have_utf32.require: New file mysql-test/r/have_utf8mb4.require: New file mysql-test/std_data/Index.xml: - Adding tests for loadable utf8m4, utf16, utf32 collations mysql-test/suite/sys_vars/r/character_set_client_basic.result: - Adding tests for utf16, utf32. - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_connection_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_database_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_results_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/t/character_set_client_basic.test: - Adding tests for new character sets mysql-test/suite/sys_vars/t/character_set_connection_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_database_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_results_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/t/ctype_ldml.test: - Adding tests for dynamic utf8mb4, utf16, utf32 collations mysql-test/t/ctype_many.test: - Adding tests to check superset/subset relations between all Unicode character sets mysql-test/t/ctype_utf16.test: New file mysql-test/t/ctype_utf16_uca.test: New file mysql-test/t/ctype_utf32.test: New file mysql-test/t/ctype_utf32_uca.test: New file mysql-test/t/ctype_utf8.test: - Adding tests for utf8mb4 alias mysql-test/t/ctype_utf8mb4.test: New file mysys/charset-def.c: - Adding initialization of utf8mb4, utf16, utf32 built-int collations mysys/charset.c: - Adding initialization of utf8mb4, utf16, utf32 dynamic collations sql/field.cc: - Fixing "truncated" error with datetime functions: Force conversion in case of non-ascii character sets. sql/item.cc: - Adding superset/subset relation check for utf8mb4/utf8 sql/item_strfunc.cc: - Fixing a problem with CHAR(x USING utf32) sql/sql_string.cc: - Fixing problems with zero padding for UTF32 sql/sql_table.cc: - Fixing buffer size, to make utf32 comma fit. strings/ctype-mb.c: - Making handlers for multi-byte binary collations public strings/ctype-uca.c: - Adding definitions for utf8mb4, utf16, utf32 UCA collations strings/ctype-ucs2.c: - Adding functions which are shared between ucs2, utf16, utf32 - Ading utf16 implementation - Adding utf32 implementation strings/ctype-utf8.c: - Adding functions shared between utf8 and utf8mb4 - Adding implementation of utf8mb4
2010-02-24 13:15:34 +04:00 · 2010-02-24 13:15:34 +04:00 · 8994fad85d
commit 8994fad85d
parent d2af6c43c0
49 changed files with 19441 additions and 1208 deletions
--- a/config/ac-macros/character_sets.m4
+++ b/config/ac-macros/character_sets.m4
@ -13,11 +13,11 @@ define(CHARSETS_AVAILABLE1,armscii8 ascii big5 cp1250 cp1251 cp1256 cp1257)
 define(CHARSETS_AVAILABLE2,cp850 cp852 cp866 cp932 dec8 eucjpms euckr gb2312 gbk geostd8)
 define(CHARSETS_AVAILABLE3,greek hebrew hp8 keybcs2 koi8r koi8u)
 define(CHARSETS_AVAILABLE4,latin1 latin2 latin5 latin7 macce macroman)
-define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8)
+define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32)

 DEFAULT_CHARSET=latin1
 CHARSETS_AVAILABLE="CHARSETS_AVAILABLE0 CHARSETS_AVAILABLE1 CHARSETS_AVAILABLE2 CHARSETS_AVAILABLE3 CHARSETS_AVAILABLE4 CHARSETS_AVAILABLE5"
-CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8"
+CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32"

 AC_DIVERT_POP

@ -50,7 +50,7 @@ AC_ARG_WITH(extra-charsets,

 AC_MSG_CHECKING("character sets")

-CHARSETS="$default_charset latin1 utf8"
+CHARSETS="$default_charset latin1 utf8mb4 utf8"

 if test "$extra_charsets" = no; then
  CHARSETS="$CHARSETS"
@ -195,8 +195,23 @@ do
      AC_DEFINE([USE_MB], [1], [Use multi-byte character routines])
      AC_DEFINE(USE_MB_IDENT, 1)
      ;;
+    utf8mb4)
+      AC_DEFINE(HAVE_CHARSET_utf8mb4, 1, [Define to enable utf8mb4])
+      AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
+      AC_DEFINE(USE_MB_IDENT, 1)
+      ;;
    utf8)
-      AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable ut8])
+      AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable utf8])
+      AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
+      AC_DEFINE(USE_MB_IDENT, 1)
+      ;;
+    utf16)
+      AC_DEFINE(HAVE_CHARSET_utf16, 1, [Define to enable utf16])
+      AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
+      AC_DEFINE(USE_MB_IDENT, 1)
+      ;;
+    utf32)
+      AC_DEFINE(HAVE_CHARSET_utf32, 1, [Define to enable utf32])
      AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
      AC_DEFINE(USE_MB_IDENT, 1)
      ;;
@ -381,6 +396,48 @@ case $default_charset in
      fi
      default_charset_collations="$UTFC"
      ;;
+    utf8mb4)
+      default_charset_default_collation="utf8mb4_general_ci"
+      define(UTFC1, utf8mb4_general_ci utf8mb4_bin)
+      define(UTFC2, utf8mb4_czech_ci utf8mb4_danish_ci)
+      define(UTFC3, utf8mb4_esperanto_ci utf8mb4_estonian_ci utf8mb4_hungarian_ci)
+      define(UTFC4, utf8mb4_icelandic_ci utf8mb4_latvian_ci utf8mb4_lithuanian_ci)
+      define(UTFC5, utf8mb4_persian_ci utf8mb4_polish_ci utf8mb4_romanian_ci)
+      define(UTFC6, utf8mb4_sinhala_ci utf8mb4_slovak_ci utf8mb4_slovenian_ci)
+      define(UTFC7, utf8mb4_spanish2_ci utf8mb4_spanish_ci)
+      define(UTFC8, utf8mb4_swedish_ci utf8mb4_turkish_ci)
+      define(UTFC9, utf8mb4_unicode_ci)
+      UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
+      default_charset_collations="$UTFC"
+      ;;
+    utf16)
+      default_charset_default_collation="utf16_general_ci"
+      define(UTFC1, utf16_general_ci utf16_bin)
+      define(UTFC2, utf16_czech_ci utf16_danish_ci)
+      define(UTFC3, utf16_esperanto_ci utf16_estonian_ci utf16_hungarian_ci)
+      define(UTFC4, utf16_icelandic_ci utf16_latvian_ci utf16_lithuanian_ci)
+      define(UTFC5, utf16_persian_ci utf16_polish_ci utf16_romanian_ci)
+      define(UTFC6, utf16_sinhala_ci utf16_slovak_ci utf16_slovenian_ci)
+      define(UTFC7, utf16_spanish2_ci utf16_spanish_ci)
+      define(UTFC8, utf16_swedish_ci utf16_turkish_ci)
+      define(UTFC9, utf16_unicode_ci)
+      UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
+      default_charset_collations="$UTFC"
+      ;;
+    utf32)
+      default_charset_default_collation="utf32_general_ci"
+      define(UTFC1, utf32_general_ci utf32_bin)
+      define(UTFC2, utf32_czech_ci utf32_danish_ci)
+      define(UTFC3, utf32_esperanto_ci utf32_estonian_ci utf32_hungarian_ci)
+      define(UTFC4, utf32_icelandic_ci utf32_latvian_ci utf32_lithuanian_ci)
+      define(UTFC5, utf32_persian_ci utf32_polish_ci utf32_romanian_ci)
+      define(UTFC6, utf32_sinhala_ci utf32_slovak_ci utf32_slovenian_ci)
+      define(UTFC7, utf32_spanish2_ci utf32_spanish_ci)
+      define(UTFC8, utf32_swedish_ci utf32_turkish_ci)
+      define(UTFC9, utf32_unicode_ci)
+      UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
+      default_charset_collations="$UTFC"
+      ;;
    *)
      AC_MSG_ERROR([Charset $cs not available. (Available are: $CHARSETS_AVAILABLE).
      See the Installation chapter in the Reference Manual.])
--- a/include/config-win.h
+++ b/include/config-win.h
@ -432,6 +432,9 @@ inline ulonglong double2ulonglong(double d)
 #define HAVE_CHARSET_ucs2 1
 #define HAVE_CHARSET_ujis 1
 #define HAVE_CHARSET_utf8 1
+#define HAVE_CHARSET_utf8mb4 1
+#define HAVE_CHARSET_utf16 1
+#define HAVE_CHARSET_utf32 1

 #define HAVE_UCA_COLLATIONS 1
 #define HAVE_BOOL 1
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -98,13 +98,14 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
 #define MY_CS_BINSORT	16     /* if binary sort order           */
 #define MY_CS_PRIMARY	32     /* if primary collation           */
 #define MY_CS_STRNXFRM	64     /* if strnxfrm is used for sort   */
-#define MY_CS_UNICODE	128    /* is a charset is full unicode   */
+#define MY_CS_UNICODE	128    /* is a charset is BMP Unicode    */
 #define MY_CS_READY	256    /* if a charset is initialized    */
 #define MY_CS_AVAILABLE	512    /* If either compiled-in or loaded*/
 #define MY_CS_CSSORT	1024   /* if case sensitive sort order   */	
 #define MY_CS_HIDDEN	2048   /* don't display in SHOW          */	
 #define MY_CS_PUREASCII 4096   /* if a charset is pure ascii     */
 #define MY_CS_NONASCII  8192   /* if not ASCII-compatible        */
+#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
 #define MY_CHARSET_UNDEFINED 0

 /* Character repertoire flags */
@ -112,7 +113,6 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
 #define MY_REPERTOIRE_EXTENDED   2 /* Extended characters:  U+0080..U+FFFF */
 #define MY_REPERTOIRE_UNICODE30  3 /* ASCII | EXTENDED:     U+0000..U+FFFF */

-
 typedef struct my_uni_idx_st
 {
  uint16 from;
@ -304,10 +304,14 @@ typedef struct charset_info_st


 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
+extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
+
 extern CHARSET_INFO my_charset_big5_chinese_ci;
 extern CHARSET_INFO my_charset_big5_bin;
 extern CHARSET_INFO my_charset_cp932_japanese_ci;
 extern CHARSET_INFO my_charset_cp932_bin;
+extern CHARSET_INFO my_charset_cp1250_czech_ci;
 extern CHARSET_INFO my_charset_eucjpms_japanese_ci;
 extern CHARSET_INFO my_charset_eucjpms_bin;
 extern CHARSET_INFO my_charset_euckr_korean_ci;
@ -316,7 +320,6 @@ extern CHARSET_INFO my_charset_gb2312_chinese_ci;
 extern CHARSET_INFO my_charset_gb2312_bin;
 extern CHARSET_INFO my_charset_gbk_chinese_ci;
 extern CHARSET_INFO my_charset_gbk_bin;
-extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
 extern CHARSET_INFO my_charset_latin1_german2_ci;
 extern CHARSET_INFO my_charset_latin1_bin;
 extern CHARSET_INFO my_charset_latin2_czech_ci;
@ -329,11 +332,22 @@ extern CHARSET_INFO my_charset_ucs2_bin;
 extern CHARSET_INFO my_charset_ucs2_unicode_ci;
 extern CHARSET_INFO my_charset_ujis_japanese_ci;
 extern CHARSET_INFO my_charset_ujis_bin;
+extern CHARSET_INFO my_charset_utf16_bin;
+extern CHARSET_INFO my_charset_utf16_general_ci;
+extern CHARSET_INFO my_charset_utf16_unicode_ci;
+extern CHARSET_INFO my_charset_utf32_bin;
+extern CHARSET_INFO my_charset_utf32_general_ci;
+extern CHARSET_INFO my_charset_utf32_unicode_ci;
+
 extern CHARSET_INFO my_charset_utf8_general_ci;
 extern CHARSET_INFO my_charset_utf8_unicode_ci;
 extern CHARSET_INFO my_charset_utf8_bin;
-extern CHARSET_INFO my_charset_cp1250_czech_ci;
-extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
+extern CHARSET_INFO my_charset_utf8mb4_bin;
+extern CHARSET_INFO my_charset_utf8mb4_general_ci;
+extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
+#define MY_UTF8MB3                 "utf8"
+#define MY_UTF8MB4                 "utf8mb4"
+

 /* declarations for simple charsets */
 extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
@ -430,6 +444,19 @@ my_bool  my_like_range_ucs2(CHARSET_INFO *cs,
 			    char *min_str, char *max_str,
 			    size_t *min_length, size_t *max_length);

+my_bool  my_like_range_utf16(CHARSET_INFO *cs,
+			     const char *ptr, size_t ptr_length,
+			     pbool escape, pbool w_one, pbool w_many,
+			     size_t res_length,
+			     char *min_str, char *max_str,
+			     size_t *min_length, size_t *max_length);
+
+my_bool  my_like_range_utf32(CHARSET_INFO *cs,
+			     const char *ptr, size_t ptr_length,
+			     pbool escape, pbool w_one, pbool w_many,
+			     size_t res_length,
+			     char *min_str, char *max_str,
+			     size_t *min_length, size_t *max_length);

 int my_wildcmp_8bit(CHARSET_INFO *,
 		    const char *str,const char *str_end,
@ -480,6 +507,31 @@ uint my_instr_mb(struct charset_info_st *,
                 const char *s, size_t s_length,
                 my_match_t *match, uint nmatch);

+int my_strnncoll_mb_bin(CHARSET_INFO * cs,
+                        const uchar *s, size_t slen,
+                        const uchar *t, size_t tlen,
+                        my_bool t_is_prefix);
+
+int my_strnncollsp_mb_bin(CHARSET_INFO *cs,
+                          const uchar *a, size_t a_length,
+                          const uchar *b, size_t b_length,
+                          my_bool diff_if_only_endspace_difference);
+
+int my_wildcmp_mb_bin(CHARSET_INFO *cs,
+                      const char *str,const char *str_end,
+                      const char *wildstr,const char *wildend,
+                      int escape, int w_one, int w_many);
+
+int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
+                         const char *s, const char *t);
+
+void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
+                         const uchar *key, size_t len,ulong *nr1, ulong *nr2);
+
+size_t my_strnxfrm_unicode(CHARSET_INFO *,
+                           uchar *dst, size_t dstlen,
+                           const uchar *src, size_t srclen);
+
 int my_wildcmp_unicode(CHARSET_INFO *cs,
                       const char *str, const char *str_end,
                       const char *wildstr, const char *wildend,
--- a/mysql-test/include/ctype_datetime.inc
+++ b/mysql-test/include/ctype_datetime.inc
@ -0,0 +1,11 @@
+#
+# Bug#32390 Character sets: casting utf32 to/from date doesn't work
+#
+CREATE TABLE t1 AS SELECT repeat('a',20) AS s1 LIMIT 0;
+SET timestamp=1216359724;
+INSERT INTO t1 VALUES (current_date);
+INSERT INTO t1 VALUES (current_time);
+INSERT INTO t1 VALUES (current_timestamp);
+SELECT s1, hex(s1) FROM t1;
+DROP TABLE t1;
+SET timestamp=0;
--- a/mysql-test/include/ctype_like.inc
+++ b/mysql-test/include/ctype_like.inc
@ -0,0 +1,50 @@
+select @@collation_connection;
+
+#
+# Create a table with a nullable varchar(10) column
+#  using currect character_set_connection.
+create table t1 as select repeat(' ',10) as a union select null;
+alter table t1 add key(a);
+show create table t1;
+insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
+explain select * from t1 where a like 'abc%';
+explain select * from t1 where a like concat('abc','%');
+select * from t1 where a like "abc%";
+select * from t1 where a like concat("abc","%");
+select * from t1 where a like "ABC%";
+select * from t1 where a like "test%";
+select * from t1 where a like "te_t";
+select * from t1 where a like "%a%";
+select * from t1 where a like "%abcd%";
+select * from t1 where a like "%abc\d%";
+drop table t1;
+
+#
+# Bug #2619 ucs2 LIKE comparison fails in some cases
+#
+
+select 'AA' like 'AA'; 
+select 'AA' like 'A%A';
+select 'AA' like 'A%%A';
+select 'AA' like 'AA%';
+select 'AA' like '%AA%';
+select 'AA' like '%A';
+select 'AA' like '%AA';
+select 'AA' like 'A%A%';
+select 'AA' like '_%_%';
+select 'AA' like '%A%A';
+select 'AAA'like 'A%A%A';
+
+select 'AZ' like 'AZ'; 
+select 'AZ' like 'A%Z';
+select 'AZ' like 'A%%Z';
+select 'AZ' like 'AZ%';
+select 'AZ' like '%AZ%';
+select 'AZ' like '%Z';
+select 'AZ' like '%AZ';
+select 'AZ' like 'A%Z%';
+select 'AZ' like '_%_%';
+select 'AZ' like '%A%Z';
+select 'AZ' like 'A_';
+select 'AZ' like '_Z';
+select 'AMZ'like 'A%M%Z';
--- a/mysql-test/include/have_utf16.inc
+++ b/mysql-test/include/have_utf16.inc
@ -0,0 +1,4 @@
+-- require r/have_utf16.require
+disable_query_log;
+show collation like 'utf16_general_ci';
+enable_query_log;
--- a/mysql-test/include/have_utf32.inc
+++ b/mysql-test/include/have_utf32.inc
@ -0,0 +1,4 @@
+-- require r/have_utf32.require
+disable_query_log;
+show collation like 'utf32_general_ci';
+enable_query_log;
--- a/mysql-test/include/have_utf8mb4.inc
+++ b/mysql-test/include/have_utf8mb4.inc
@ -0,0 +1,7 @@
+--require r/have_utf8mb4.require
+
+--disable_query_log
+
+SHOW COLLATION LIKE 'utf8mb4_general_ci';
+
+--enable_query_log
--- a/mysql-test/r/ctype_ldml.result
+++ b/mysql-test/r/ctype_ldml.result
@ -53,6 +53,33 @@ select * from t1 where c1='b';
 c1
 a
 drop table t1;
+show collation like 'utf8mb4_test_ci';
+Collation	Charset	Id	Default	Compiled	Sortlen
+utf8mb4_test_ci	utf8mb4	326			8
+create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci);
+insert into t1 values ('a');
+select * from t1 where c1='b';
+c1
+a
+drop table t1;
+show collation like 'utf16_test_ci';
+Collation	Charset	Id	Default	Compiled	Sortlen
+utf16_test_ci	utf16	327			8
+create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
+insert into t1 values ('a');
+select * from t1 where c1='b';
+c1
+a
+drop table t1;
+show collation like 'utf32_test_ci';
+Collation	Charset	Id	Default	Compiled	Sortlen
+utf32_test_ci	utf32	391			8
+create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
+insert into t1 values ('a');
+select * from t1 where c1='b';
+c1
+a
+drop table t1;
 CREATE TABLE t1 (
 col1 varchar(100) character set utf8 collate utf8_test_ci
 );
@ -373,16 +400,22 @@ select "foo" = "foo " collate latin1_test;
 The following tests check that two-byte collation IDs work
 select * from information_schema.collations where id>256 order by id;
 COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
+utf8mb4_test_ci	utf8mb4	326			8
+utf16_test_ci	utf16	327			8
 utf8_phone_ci	utf8	352			8
 utf8_test_ci	utf8	353			8
 ucs2_test_ci	ucs2	358			8
 ucs2_vn_ci	ucs2	359			8
+utf32_test_ci	utf32	391			8
 utf8_maxuserid_ci	utf8	2047			8
 show collation like '%test%';
 Collation	Charset	Id	Default	Compiled	Sortlen
 latin1_test	latin1	99		Yes	1
 utf8_test_ci	utf8	353			8
 ucs2_test_ci	ucs2	358			8
+utf8mb4_test_ci	utf8mb4	326			8
+utf16_test_ci	utf16	327			8
+utf32_test_ci	utf32	391			8
 show collation like 'ucs2_vn_ci';
 Collation	Charset	Id	Default	Compiled	Sortlen
 ucs2_vn_ci	ucs2	359			8
--- a/mysql-test/r/ctype_many.result
+++ b/mysql-test/r/ctype_many.result
@ -1683,3 +1683,59 @@ ARMENIAN CAPIT DA	2
 ARMENIAN CAPIT ECH	2
 ARMENIAN CAPIT ZA	2
 DROP TABLE t1;
+#
+# WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
+# Testing that only utf8mb4 is superset for utf8
+# No other Unicode character set pairs have superset/subset relations
+#
+CREATE TABLE t1 (
+utf8 CHAR CHARACTER SET utf8,
+utf8mb4 CHAR CHARACTER SET utf8mb4,
+ucs2 CHAR CHARACTER SET ucs2,
+utf16 CHAR CHARACTER SET utf16,
+utf32 CHAR CHARACTER SET utf32
+);
+INSERT INTO t1 VALUES ('','','','','');
+SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1;
+CHARSET(CONCAT(utf8, utf8mb4))
+utf8mb4
+SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1;
+CHARSET(CONCAT(utf8mb4, utf8))
+utf8mb4
+SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1;
+ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1;
+ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1;
+ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1;
+ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
+SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
+ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
+DROP TABLE t1;
--- a/mysql-test/r/ctype_utf16.result
+++ b/mysql-test/r/ctype_utf16.result
--- a/mysql-test/r/ctype_utf16_uca.result
+++ b/mysql-test/r/ctype_utf16_uca.result
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
--- a/mysql-test/r/ctype_utf32_uca.result
+++ b/mysql-test/r/ctype_utf32_uca.result
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@ -1899,6 +1899,20 @@ CONVERT(a, CHAR)	CONVERT(b, CHAR)
 DROP TABLE t1;
 End of 5.0 tests
 Start of 5.4 tests
+SET NAMES utf8mb3;
+SHOW VARIABLES LIKE 'character_set_results%';
+Variable_name	Value
+character_set_results	utf8
+CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` char(1) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+DROP TABLE t1;
+SELECT _utf8mb3'test';
+test
+test
 CREATE TABLE t1 (
 clipid INT NOT NULL,
 Tape TINYTEXT,
--- a/mysql-test/r/ctype_utf8mb4.result
+++ b/mysql-test/r/ctype_utf8mb4.result
--- a/mysql-test/r/have_utf16.require
+++ b/mysql-test/r/have_utf16.require
@ -0,0 +1,2 @@
+Collation	Charset	Id	Default	Compiled	Sortlen
+utf16_general_ci	utf16	54	Yes	Yes	1
--- a/mysql-test/r/have_utf32.require
+++ b/mysql-test/r/have_utf32.require
@ -0,0 +1,2 @@
+Collation	Charset	Id	Default	Compiled	Sortlen
+utf32_general_ci	utf32	60	Yes	Yes	1
--- a/mysql-test/r/have_utf8mb4.require
+++ b/mysql-test/r/have_utf8mb4.require
@ -0,0 +1,2 @@
+Collation	Charset	Id	Default	Compiled	Sortlen
+utf8mb4_general_ci	utf8mb4	45	Yes	Yes	1
--- a/mysql-test/std_data/Index.xml
+++ b/mysql-test/std_data/Index.xml
@ -33,6 +33,36 @@
    </collation>
  </charset>

+
+  <charset name="utf8mb4">
+    <collation name="utf8mb4_test_ci" id="326">
+      <rules>
+        <reset>a</reset>
+        <s>b</s>
+      </rules>
+    </collation>
+  </charset>
+
+  <charset name="utf16">
+    <collation name="utf16_test_ci" id="327">
+      <rules>
+        <reset>a</reset>
+        <s>b</s>
+      </rules>
+    </collation>
+  </charset>
+
+
+  <charset name="utf32">
+    <collation name="utf32_test_ci" id="391">
+      <rules>
+        <reset>a</reset>
+        <s>b</s>
+      </rules>
+    </collation>
+  </charset>
+
+
  <charset name="ucs2">
    <collation name="ucs2_test_ci" id="358">
      <rules>
--- a/mysql-test/suite/sys_vars/r/character_set_client_basic.result
+++ b/mysql-test/suite/sys_vars/r/character_set_client_basic.result
@ -162,8 +162,16 @@ SET @@character_set_client = utf8;
 SELECT @@character_set_client;
@@character_set_client
 utf8
+SET @@character_set_client = utf8mb4;
+SELECT @@character_set_client;
+@@character_set_client
+utf8mb4
 SET @@character_set_client = ucs2;
 ERROR 42000: Variable 'character_set_client' can't be set to the value of 'ucs2'
+SET @@character_set_client = utf16;
+ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf16'
+SET @@character_set_client = utf32;
+ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf32'
 SET @@character_set_client = cp866;
 SELECT @@character_set_client;
@@character_set_client
@ -422,7 +430,7 @@ ERROR 42000: Unknown character set: '100'
 SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
 SELECT @total_charset;
@total_charset
-36
+39
 '#--------------------FN_DYNVARS_010_10-------------------------#'
 SET @@character_set_client = abc;
 ERROR 42000: Unknown character set: 'abc'
--- a/mysql-test/suite/sys_vars/r/character_set_connection_basic.result
+++ b/mysql-test/suite/sys_vars/r/character_set_connection_basic.result
@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
 SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
 SELECT @total_charset;
@total_charset
-36
+39
 '#--------------------FN_DYNVARS_011_10-------------------------#'
 SET @@character_set_connection = abc;
 ERROR 42000: Unknown character set: 'abc'
--- a/mysql-test/suite/sys_vars/r/character_set_database_basic.result
+++ b/mysql-test/suite/sys_vars/r/character_set_database_basic.result
@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
 SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
 SELECT @total_charset;
@total_charset
-36
+39
 '#--------------------FN_DYNVARS_012_10-------------------------#'
 SET @@character_set_database = "grek";
 ERROR 42000: Unknown character set: 'grek'
--- a/mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result
+++ b/mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result
@ -402,7 +402,7 @@ ERROR 42000: Unknown character set: '100'
 SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
 SELECT @total_charset;
@total_charset
-36
+39
 '#--------------------FN_DYNVARS_008_10-------------------------#'
 SET @@character_set_filesystem = abc;
 ERROR 42000: Unknown character set: 'abc'
--- a/mysql-test/suite/sys_vars/r/character_set_results_basic.result
+++ b/mysql-test/suite/sys_vars/r/character_set_results_basic.result
--- a/mysql-test/suite/sys_vars/t/character_set_client_basic.test
+++ b/mysql-test/suite/sys_vars/t/character_set_client_basic.test
@ -27,6 +27,9 @@
 --source include/have_sjis.inc
 --source include/have_utf8.inc
 --source include/have_ucs2.inc
+--source include/have_utf8mb4.inc
+--source include/have_utf16.inc
+--source include/have_utf32.inc

 --source include/load_sysvars.inc
 ###################################################
@ -163,9 +166,15 @@ SET @@character_set_client = armscii8;
 SELECT @@character_set_client;
 SET @@character_set_client = utf8;
 SELECT @@character_set_client;
+SET @@character_set_client = utf8mb4;
+SELECT @@character_set_client;

 --error ER_WRONG_VALUE_FOR_VAR
 SET @@character_set_client = ucs2;
+--error ER_WRONG_VALUE_FOR_VAR
+SET @@character_set_client = utf16;
+--error ER_WRONG_VALUE_FOR_VAR
+SET @@character_set_client = utf32;

 SET @@character_set_client = cp866;
 SELECT @@character_set_client;
--- a/mysql-test/suite/sys_vars/t/character_set_connection_basic.test
+++ b/mysql-test/suite/sys_vars/t/character_set_connection_basic.test
@ -27,6 +27,9 @@
 --source include/have_sjis.inc
 --source include/have_utf8.inc
 --source include/have_ucs2.inc
+--source include/have_utf8mb4.inc
+--source include/have_utf16.inc
+--source include/have_utf32.inc

 --source include/load_sysvars.inc
 ###################################################
--- a/mysql-test/suite/sys_vars/t/character_set_database_basic.test
+++ b/mysql-test/suite/sys_vars/t/character_set_database_basic.test
@ -27,6 +27,9 @@
 --source include/have_sjis.inc
 --source include/have_utf8.inc
 --source include/have_ucs2.inc
+--source include/have_utf8mb4.inc
+--source include/have_utf16.inc
+--source include/have_utf32.inc

 --source include/load_sysvars.inc
 ###################################################
--- a/mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test
+++ b/mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test
@ -27,6 +27,9 @@
 --source include/have_sjis.inc
 --source include/have_utf8.inc
 --source include/have_ucs2.inc
+--source include/have_utf8mb4.inc
+--source include/have_utf16.inc
+--source include/have_utf32.inc

 --source include/load_sysvars.inc

--- a/mysql-test/suite/sys_vars/t/character_set_results_basic.test
+++ b/mysql-test/suite/sys_vars/t/character_set_results_basic.test
@ -27,6 +27,9 @@
 --source include/have_sjis.inc
 --source include/have_utf8.inc
 --source include/have_ucs2.inc
+--source include/have_utf8mb4.inc
+--source include/have_utf16.inc
+--source include/have_utf32.inc

 --source include/load_sysvars.inc
 ################################################
--- a/mysql-test/t/ctype_ldml.test
+++ b/mysql-test/t/ctype_ldml.test
@ -1,4 +1,7 @@
 --source include/have_ucs2.inc
+--source include/have_utf8mb4.inc
+--source include/have_utf16.inc
+--source include/have_utf32.inc

 --disable_warnings
 drop table if exists t1;
@ -40,6 +43,24 @@ insert into t1 values ('a');
 select * from t1 where c1='b';
 drop table t1;

+show collation like 'utf8mb4_test_ci';
+create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci);
+insert into t1 values ('a');
+select * from t1 where c1='b';
+drop table t1;
+
+show collation like 'utf16_test_ci';
+create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
+insert into t1 values ('a');
+select * from t1 where c1='b';
+drop table t1;
+
+show collation like 'utf32_test_ci';
+create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
+insert into t1 values ('a');
+select * from t1 where c1='b';
+drop table t1;
+

 #
 # Bug#41084 full-text index added to custom UCA collation not working
--- a/mysql-test/t/ctype_many.test
+++ b/mysql-test/t/ctype_many.test
@ -1,4 +1,7 @@
 -- source include/have_ucs2.inc
+-- source include/have_utf8mb4.inc
+-- source include/have_utf16.inc
+-- source include/have_utf32.inc

 --disable_warnings
 DROP TABLE IF EXISTS t1;
@ -211,3 +214,73 @@ SELECT min(comment),count(*) FROM t1 GROUP BY ucs2_f;
 DROP TABLE t1;

 # End of 4.1 tests
+
+
+--echo #
+--echo # WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
+--echo # Testing that only utf8mb4 is superset for utf8
+--echo # No other Unicode character set pairs have superset/subset relations
+--echo #
+
+CREATE TABLE t1 (
+  utf8 CHAR CHARACTER SET utf8,
+  utf8mb4 CHAR CHARACTER SET utf8mb4,
+  ucs2 CHAR CHARACTER SET ucs2,
+  utf16 CHAR CHARACTER SET utf16,
+  utf32 CHAR CHARACTER SET utf32
+);
+INSERT INTO t1 VALUES ('','','','','');
+
+# utf8mb4 is superset only for utf8
+SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1;
+
+
+# utf8mb4 is superset only for utf8
+SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1;
+
+
+# ucs2 is not a superset for the other Unicode character sets
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1;
+
+
+# utf16 is not a superset for the other Unicode character sets
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1;
+
+
+# utf32 is not a superset for the other Unicode character sets
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1;
+--error ER_CANT_AGGREGATE_2COLLATIONS
+SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
+
+DROP TABLE t1;
--- a/mysql-test/t/ctype_utf16.test
+++ b/mysql-test/t/ctype_utf16.test
@ -0,0 +1,731 @@
+-- source include/have_utf16.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #
+--echo # Start of 5.5 tests
+--echo #
+
+SET NAMES latin1;
+SET character_set_connection=utf16;
+select hex('a'), hex('a ');
+-- source include/endspace.inc
+
+
+# Check that incomplete utf16 characters in HEX notation
+# are left-padded with zeros
+#
+select hex(_utf16 0x44);
+select hex(_utf16 0x3344);
+select hex(_utf16 0x113344);
+
+
+# Check that 0x20 is only trimmed when it is 
+# a part of real SPACE character, not just a part
+# of a multibyte sequence.
+# Note, CYRILLIC LETTER ER is used as an example, which
+# is stored as 0x0420 in utf16, thus contains 0x20 in the
+# low byte. The second character is THREE-PER-M, U+2004,
+# which contains 0x20 in the high byte.
+
+CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16;
+INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
+SELECT hex(word) FROM t1 ORDER BY word;
+SELECT hex(word2) FROM t1 ORDER BY word2;
+DELETE FROM t1;
+
+#
+# Check that real spaces are correctly trimmed.
+#
+INSERT INTO t1 VALUES (X'042000200020',X'042000200020'), (X'200400200020', X'200400200020');
+SELECT hex(word) FROM t1 ORDER BY word;
+SELECT hex(word2) FROM t1 ORDER BY word2;
+DROP TABLE t1;
+
+
+#
+# Check LPAD/RPAD
+#
+SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'0421'));
+SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'04210422'));
+SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'042104220423'));
+SELECT hex(LPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423'));
+SELECT hex(LPAD(_utf16 X'D800DC00', 10, _utf16 X'0421'));
+SELECT hex(LPAD(_utf16 X'0421', 10, _utf16 X'D800DC00'));
+
+SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'0421'));
+SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'04210422'));
+SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'042104220423'));
+SELECT hex(RPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423'));
+SELECT hex(RPAD(_utf16 X'D800DC00', 10, _utf16 X'0421'));
+SELECT hex(RPAD(_utf16 X'0421', 10, _utf16 X'D800DC00'));
+
+CREATE TABLE t1 SELECT 
+LPAD(_utf16 X'0420',10,_utf16 X'0421') l,
+RPAD(_utf16 X'0420',10,_utf16 X'0421') r;
+SHOW CREATE TABLE t1;
+select hex(l), hex(r) from t1;
+DROP TABLE t1;
+
+create table t1 (f1 char(30));
+insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
+select lpad(f1, 12, "-o-/") from t1;
+drop table t1;
+
+######################################################
+#
+# Test of like
+#
+
+SET NAMES latin1;
+SET character_set_connection=utf16;
+--source include/ctype_like.inc
+
+SET NAMES utf8;
+SET character_set_connection=utf16;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16);
+INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
+INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
+INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
+INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
+SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
+SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
+SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
+SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf16_bin ORDER BY BINARY a;
+DROP TABLE t1;
+
+CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
+ENGINE=MyISAM CHARACTER SET utf16;
+INSERT INTO t1 (word) VALUES ("cat");
+SELECT * FROM t1 WHERE word LIKE "c%";
+SELECT * FROM t1 WHERE word LIKE "ca_";
+SELECT * FROM t1 WHERE word LIKE "cat";
+SELECT * FROM t1 WHERE word LIKE _utf16 x'00630025';     # "c%"
+SELECT * FROM t1 WHERE word LIKE _utf16 x'00630061005F'; # "ca_"
+DROP TABLE t1;
+
+
+#
+# Check that INSERT() works fine. 
+# This invokes charpos() function.
+select insert(_utf16 0x006100620063,10,2,_utf16 0x006400650066);
+select insert(_utf16 0x006100620063,1,2,_utf16 0x006400650066);
+
+########################################################
+#
+# Bug 1264
+#
+# Description: 
+#
+# When using a ucs2 table in MySQL, 
+# either with ucs2_general_ci or ucs2_bin collation,
+# words are returned in an incorrect order when using ORDER BY
+# on an _indexed_ CHAR or VARCHAR column. They are sorted with
+# the longest word *first* instead of last. I.E. The word "aardvark"
+# is in the results before the word "a".
+#
+# If there is no index for the column, the problem does not occur.
+#
+# Interestingly, if there is no second column, the words are returned
+# in the correct order. 
+#
+# According to EXPLAIN, it looks like when the output includes columns that
+# are not part of the index sorted on, it does a filesort, which fails. 
+# Using a straight index yields correct results.
+
+SET NAMES latin1;
+
+#
+# Two fields, index
+#
+
+CREATE TABLE t1 (
+   word VARCHAR(64),
+   bar INT(11) default 0,
+   PRIMARY KEY (word))
+   ENGINE=MyISAM
+   CHARSET utf16
+   COLLATE utf16_general_ci ;
+
+INSERT INTO t1 (word) VALUES ("aar");
+INSERT INTO t1 (word) VALUES ("a");
+INSERT INTO t1 (word) VALUES ("aardvar");
+INSERT INTO t1 (word) VALUES ("aardvark");
+INSERT INTO t1 (word) VALUES ("aardvara");
+INSERT INTO t1 (word) VALUES ("aardvarz");
+EXPLAIN SELECT * FROM t1 ORDER BY word;
+SELECT * FROM t1 ORDER BY word;
+EXPLAIN SELECT word FROM t1 ORDER BY word;
+SELECT word FROM t1 ORDER by word;
+DROP TABLE t1;
+
+
+#
+# One field, index
+# 
+
+CREATE TABLE t1 (
+   word VARCHAR(64) ,
+   PRIMARY KEY (word))
+   ENGINE=MyISAM
+   CHARSET utf16
+   COLLATE utf16_general_ci;
+
+INSERT INTO t1 (word) VALUES ("aar");
+INSERT INTO t1 (word) VALUES ("a");
+INSERT INTO t1 (word) VALUES ("aardvar");
+INSERT INTO t1 (word) VALUES ("aardvark");
+INSERT INTO t1 (word) VALUES ("aardvara");
+INSERT INTO t1 (word) VALUES ("aardvarz");
+EXPLAIN SELECT * FROM t1 ORDER BY WORD;
+SELECT * FROM t1 ORDER BY word;
+DROP TABLE t1;
+
+
+#
+# Two fields, no index
+#
+
+CREATE TABLE t1 (
+   word TEXT,
+   bar INT(11) AUTO_INCREMENT,
+   PRIMARY KEY (bar))
+   ENGINE=MyISAM
+   CHARSET utf16
+   COLLATE utf16_general_ci ;
+INSERT INTO t1 (word) VALUES ("aar");
+INSERT INTO t1 (word) VALUES ("a" );
+INSERT INTO t1 (word) VALUES ("aardvar");
+INSERT INTO t1 (word) VALUES ("aardvark");
+INSERT INTO t1 (word) VALUES ("aardvara");
+INSERT INTO t1 (word) VALUES ("aardvarz");
+EXPLAIN SELECT * FROM t1 ORDER BY word;
+SELECT * FROM t1 ORDER BY word;
+EXPLAIN SELECT word FROM t1 ORDER BY word;
+SELECT word FROM t1 ORDER BY word;
+DROP TABLE t1;
+
+#
+# END OF Bug 1264 test
+#
+########################################################
+
+
+#
+# Check alignment for from-binary-conversion with CAST and CONVERT
+#
+SELECT hex(cast(0xAA as char character set utf16));
+SELECT hex(convert(0xAA using utf16));
+
+#
+# Check alignment for string types
+#
+CREATE TABLE t1 (a char(10) character set utf16);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a varchar(10) character set utf16);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a text character set utf16);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a mediumtext character set utf16);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a longtext character set utf16);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+##
+## Bug #5024 Server crashes with queries on fields
+##  with certain charset/collation settings
+##
+##
+#create table t1 (s1 char character set utf16 collate utf16_czech_ci);
+#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
+#select s1 from t1 where s1 > 'a' order by s1;
+#drop table t1;
+#
+
+#
+# Bug #5081 : UCS2 fields are filled with '0x2020'
+# after extending field length
+#
+create table t1(a char(1)) default charset utf16;
+insert into t1 values ('a'),('b'),('c');
+alter table t1 modify a char(5);
+select a, hex(a) from t1;
+drop table t1;
+
+#
+# Check prepare statement from an UTF16 string
+#
+set @ivar= 1234;
+set @str1 = 'select ?';
+set @str2 = convert(@str1 using utf16);
+prepare stmt1 from @str2;
+execute stmt1 using @ivar;
+
+#
+# Check that utf16 works with ENUM and SET type
+#
+set names utf8;
+create table t1 (a enum('x','y','z') character set utf16);
+show create table t1;
+insert into t1 values ('x');
+insert into t1 values ('y');
+insert into t1 values ('z');
+select a, hex(a) from t1 order by a;
+alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf16;
+show create table t1;
+insert into t1 values ('D');
+insert into t1 values ('E ');
+insert into t1 values ('ä');
+insert into t1 values ('ö');
+insert into t1 values ('ü');
+select a, hex(a) from t1 order by a;
+drop table t1;
+
+create table t1 (a set ('x','y','z','ä','ö','ü') character set utf16);
+show create table t1;
+insert into t1 values ('x');
+insert into t1 values ('y');
+insert into t1 values ('z');
+insert into t1 values ('x,y');
+insert into t1 values ('x,y,z,ä,ö,ü');
+select a, hex(a) from t1 order by a;
+drop table t1;
+
+#
+# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
+#
+create table t1(a enum('a','b','c')) default character set utf16;
+insert into t1 values('a'),('b'),('c');
+alter table t1 add b char(1);
+show warnings;
+select * from t1 order by a;
+drop table t1;
+
+SET NAMES latin1;
+SET collation_connection='utf16_general_ci';
+-- source include/ctype_filesort.inc
+-- source include/ctype_like_escape.inc
+SET NAMES latin1;
+SET collation_connection='utf16_bin';
+-- source include/ctype_filesort.inc
+-- source include/ctype_like_escape.inc
+
+#
+# Bug#10344 Some string functions fail for UCS2
+#
+select hex(substr(_utf16 0x00e400e50068,1));
+select hex(substr(_utf16 0x00e400e50068,2));
+select hex(substr(_utf16 0x00e400e50068,3));
+select hex(substr(_utf16 0x00e400e50068,-1));
+select hex(substr(_utf16 0x00e400e50068,-2));
+select hex(substr(_utf16 0x00e400e50068,-3));
+select hex(substr(_utf16 0x00e400e5D800DC00,1));
+select hex(substr(_utf16 0x00e400e5D800DC00,2));
+select hex(substr(_utf16 0x00e400e5D800DC00,3));
+select hex(substr(_utf16 0x00e400e5D800DC00,-1));
+select hex(substr(_utf16 0x00e400e5D800DC00,-2));
+select hex(substr(_utf16 0x00e400e5D800DC00,-3));
+
+SET NAMES latin1;
+
+##
+## Bug#8235
+##
+## This bug also helped to find another problem that
+## INSERT of a UCS2 string containing a negative number
+## into a unsigned int column didn't produce warnings.
+## This test covers both problems.
+##
+##SET collation_connection='ucs2_swedish_ci';
+##CREATE TABLE t1 (Field1 int(10) default '0');
+### no warnings, negative numbers are allowed
+##INSERT INTO t1 VALUES ('-1');
+##SELECT * FROM t1;
+##DROP TABLE t1;
+##CREATE TABLE t1 (Field1 int(10) unsigned default '0');
+### this should generate a "Data truncated" warning
+##INSERT INTO t1 VALUES ('-1');
+##DROP TABLE t1;
+##SET NAMES latin1;
+
+###
+### Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
+###
+##--disable_warnings
+##create table t1(f1 varchar(5) CHARACTER SET utf16 COLLATE utf16_bin NOT NULL) engine=InnoDB;
+##--enable_warnings
+##insert into t1 values('a');
+##create index t1f1 on t1(f1);
+##select f1 from t1 where f1 like 'a%';
+##drop table t1;
+
+#
+# Bug#9442 Set parameter make query fail if column character set is UCS2
+#
+create table t1 (utext varchar(20) character set utf16);
+insert into t1 values ("lily");
+insert into t1 values ("river");
+prepare stmt from 'select utext from t1 where utext like ?';
+set @param1='%%';
+execute stmt using @param1;
+execute stmt using @param1;
+select utext from t1 where utext like '%%';
+drop table t1;
+deallocate prepare stmt;
+
+#
+# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
+#
+create table t1 (
+  a char(10) character set utf16 not null, 
+  index a (a)
+) engine=myisam;
+insert into t1 values (repeat(0x201f, 10));
+insert into t1 values (repeat(0x2020, 10));
+insert into t1 values (repeat(0x2021, 10));
+# make sure "index read" is used
+explain select hex(a) from t1 order by a;
+select hex(a) from t1 order by a;
+alter table t1 drop index a;
+select hex(a) from t1 order by a;
+drop table t1;
+
+##
+## Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
+##             over a 'ucs2' field uses a temporary table 
+##
+##CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
+##INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
+##SELECT id, MIN(s) FROM t1 GROUP BY id;
+##DROP TABLE t1;
+
+###
+### Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
+###
+##
+##--disable_warnings
+##drop table if exists bug20536;
+##--enable_warnings
+##
+##set names latin1;
+##create table bug20536 (id bigint not null auto_increment primary key, name
+##varchar(255) character set ucs2 not null);
+##insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
+##select md5(name) from bug20536;
+##select sha1(name) from bug20536;
+##select make_set(3, name, upper(name)) from bug20536;
+##select export_set(5, name, upper(name)) from bug20536;
+##select export_set(5, name, upper(name), ",", 5) from bug20536;
+
+#
+# Bug #20108: corrupted default enum value for a ucs2 field              
+#
+
+CREATE TABLE t1 (
+  status enum('active','passive') character set utf16 collate utf16_general_ci 
+    NOT NULL default 'passive'
+);
+SHOW CREATE TABLE t1;
+ALTER TABLE t1 ADD a int NOT NULL AFTER status; 
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+##CREATE TABLE t2 (
+##  status enum('active','passive') collate ucs2_turkish_ci 
+##    NOT NULL default 'passive'
+##);
+##SHOW CREATE TABLE t2;
+##ALTER TABLE t2 ADD a int NOT NULL AFTER status; 
+##DROP TABLE t2;
+
+
+--echo End of 4.1 tests
+
+#
+# Conversion from an UTF16 string to a decimal column
+#
+CREATE TABLE t1 (a varchar(64) character set utf16, b decimal(10,3));
+INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
+update t1 set b=a;
+SELECT *, hex(a) FROM t1;
+DROP TABLE t1;
+
+#
+# Bug#9442 Set parameter make query fail if column character set is UCS2
+#
+create table t1 (utext varchar(20) character set utf16);
+insert into t1 values ("lily");
+insert into t1 values ("river");
+prepare stmt from 'select utext from t1 where utext like ?';
+set @param1='%%';
+execute stmt using @param1;
+execute stmt using @param1;
+select utext from t1 where utext like '%%';
+drop table t1;
+deallocate prepare stmt;
+
+#
+# Bug#22638 SOUNDEX broken for international characters
+#
+set names latin1;
+set character_set_connection=utf16;
+select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
+select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
+select 'mood' sounds like 'mud';
+# Cyrillic A, BE, VE
+select hex(soundex(_utf16 0x041004110412));
+# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
+select hex(soundex(_utf16 0x00BF00C0));
+set names latin1;
+
+#
+# Bug #14290: character_maximum_length for text fields
+#
+create table t1(a blob, b text charset utf16);
+select data_type, character_octet_length, character_maximum_length
+  from information_schema.columns where table_name='t1';
+drop table t1;
+
+
+set names latin1;
+set collation_connection=utf16_general_ci;
+#
+# Testing cs->coll->instr()
+#
+select position('bb' in 'abba');
+
+#
+# Testing cs->coll->hash_sort()
+#
+create table t1 (a varchar(10) character set utf16) engine=heap;
+insert into t1 values ('a'),('A'),('b'),('B');
+select * from t1 where a='a' order by binary a;
+select hex(min(binary a)),count(*) from t1 group by a;
+drop table t1;
+
+#
+# Testing cs->cset->numchars()
+#
+select char_length('abcd'), octet_length('abcd');
+select char_length(_utf16 0xD800DC00), octet_length(_utf16 0xD800DC00);
+select char_length(_utf16 0xD87FDFFF), octet_length(_utf16 0xD87FDFFF);
+
+#
+# Testing cs->cset->charpos()
+#
+select left('abcd',2);
+select hex(left(_utf16 0xD800DC00D87FDFFF, 1));
+select hex(right(_utf16 0xD800DC00D87FDFFF, 1));
+
+#
+# Testing cs->cset->well_formed_length()
+#
+create table t1 (a varchar(10) character set utf16);
+# Bad sequences
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf16 0xD800);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf16 0xDC00);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf16 0xD800D800);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf16 0xD800E800);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf16 0xD8000800);
+# Good sequences
+insert into t1 values (_utf16 0xD800DC00);
+insert into t1 values (_utf16 0xD800DCFF);
+insert into t1 values (_utf16 0xDBFFDC00);
+insert into t1 values (_utf16 0xDBFFDCFF);
+select hex(a) from t1;
+drop table t1;
+
+#
+# Bug#32393 Character sets: illegal characters in utf16 columns
+#
+# Tests that cs->cset->wc_mb() doesn't accept surrogate parts
+#
+# via alter
+#
+create table t1 (s1 varchar(50) character set ucs2);
+insert into t1 values (0xdf84);
+alter table t1 modify column s1 varchar(50) character set utf16;
+select hex(s1) from t1;
+drop table t1;
+#
+# via update
+#
+create table t1 (s1 varchar(5) character set ucs2, s2 varchar(5) character set utf16);
+insert into t1 (s1) values (0xdf84);
+update t1 set s2 = s1;
+select hex(s2) from t1;
+drop table t1;
+
+
+
+#
+# Testing cs->cset->lengthsp()
+#
+create table t1 (a char(10)) character set utf16;
+insert into t1 values ('a   ');
+select hex(a) from t1;
+drop table t1;
+
+#
+# Testing cs->cset->caseup() and cs->cset->casedn()
+#
+select upper('abcd'), lower('ABCD');
+
+#
+# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf16
+# Testing cs->cset->snprintf()
+#
+#create table t1 (a date);
+#insert into t1 values ('2007-09-16');
+#select * from t1;
+#drop table t1;
+
+#
+# Testing cs->cset->l10tostr
+# !!! Not used in the code
+
+#
+# Testing cs->cset->ll10tostr
+#
+create table t1 (a varchar(10) character set utf16);
+insert into t1 values (123456);
+select a, hex(a) from t1;
+drop table t1;
+
+
+# Testing cs->cset->fill
+# SOUNDEX fills strings with DIGIT ZERO up to four characters
+select hex(soundex('a'));
+
+#
+# Testing cs->cset->strntol
+# !!! Not used in the code
+
+#
+# Testing cs->cset->strntoul
+#
+create table t1 (a enum ('a','b','c')) character set utf16;
+insert into t1 values ('1');
+select * from t1;
+drop table t1;
+
+#
+# Testing cs->cset->strntoll and cs->cset->strntoull
+#
+set names latin1;
+select hex(conv(convert('123' using utf16), -10, 16));
+select hex(conv(convert('123' using utf16), 10, 16));
+
+#
+# Testing cs->cset->strntod
+#
+set names latin1;
+set character_set_connection=utf16;
+select 1.1 + '1.2';
+select 1.1 + '1.2xxx';
+
+# Testing strntoll10_utf16
+# Testing cs->cset->strtoll10
+select left('aaa','1');
+
+#
+# Testing cs->cset->strntoull10rnd
+#
+create table t1 (a int);
+insert into t1 values ('-1234.1e2');
+insert into t1 values ('-1234.1e2xxxx');
+insert into t1 values ('-1234.1e2    ');
+select * from t1;
+drop table t1;
+
+#
+# Testing cs->cset->scan
+#
+create table t1 (a int);
+insert into t1 values ('1 ');
+insert into t1 values ('1 x');
+select * from t1;
+drop table t1;
+
+#
+# Testing auto-conversion to TEXT
+#
+create table t1 (a varchar(17000) character set utf16);
+show create table t1;
+drop table t1;
+
+#
+# Testing that maximim possible key length is 1000 bytes
+#
+create table t1 (a varchar(250) character set utf16 primary key);
+show create table t1;
+drop table t1;
+--error ER_TOO_LONG_KEY
+create table t1 (a varchar(334) character set utf16 primary key);
+
+#
+# Conversion to utf8
+#
+create table t1 (a char(1) character set utf16);
+insert into t1 values (0xD800DC00),(0xD800DCFF),(0xDB7FDC00),(0xDB7FDCFF);
+insert into t1 values (0x00C0), (0x00FF),(0xE000), (0xFFFF);
+select hex(a), hex(@a:=convert(a using utf8mb4)), hex(convert(@a using utf16)) from t1;
+drop table t1;
+
+#
+# Test basic regex functionality
+#
+set collation_connection=utf16_general_ci;
+--source include/ctype_regex.inc
+set names latin1;
+
+#
+# Test how character set works with date/time
+#
+SET collation_connection=utf16_general_ci;
+--source include/ctype_datetime.inc
+SET NAMES latin1;
+
+#
+# Bug#33073 Character sets: ordering fails with utf32
+#
+SET collation_connection=utf16_general_ci;
+CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
+SELECT * FROM t1 ORDER BY s1;
+SET max_sort_length=4;
+SELECT * FROM t1 ORDER BY s1;
+DROP TABLE t1;
+SET max_sort_length=DEFAULT;
+SET NAMES latin1;
+
+
+#
+## TODO: add tests for all engines
+#
+
+--echo #
+--echo # End of 5.5 tests
+--echo #
--- a/mysql-test/t/ctype_utf16_uca.test
+++ b/mysql-test/t/ctype_utf16_uca.test
@ -0,0 +1,290 @@
+-- source include/have_utf16.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #
+--echo # Start of 5.5 tests
+--echo #
+
+set names utf8;
+set collation_connection=utf16_unicode_ci;
+select hex('a'), hex('a ');
+-- source include/endspace.inc
+
+#
+# Bug #6787 LIKE not working properly with _ and utf8 data
+#
+select 'c' like '\_' as want0; 
+
+#
+# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters
+#
+CREATE TABLE t (
+  c char(20) NOT NULL
+) ENGINE=MyISAM DEFAULT CHARACTER SET utf16 COLLATE utf16_unicode_ci;
+INSERT INTO t VALUES ('a'),('ab'),('aba');
+ALTER TABLE t ADD INDEX (c);
+SELECT c FROM t WHERE c LIKE 'a%';
+DROP TABLE t;
+
+
+create table t1 (c1 char(10) character set utf16 collate utf16_bin);
+
+#
+# Basic Latin
+#
+insert into t1 values ('A'),('a');
+insert into t1 values ('B'),('b');
+insert into t1 values ('C'),('c');
+insert into t1 values ('D'),('d');
+insert into t1 values ('E'),('e');
+insert into t1 values ('F'),('f');
+insert into t1 values ('G'),('g');
+insert into t1 values ('H'),('h');
+insert into t1 values ('I'),('i');
+insert into t1 values ('J'),('j');
+insert into t1 values ('K'),('k');
+insert into t1 values ('L'),('l');
+insert into t1 values ('M'),('m');
+insert into t1 values ('N'),('n');
+insert into t1 values ('O'),('o');
+insert into t1 values ('P'),('p');
+insert into t1 values ('Q'),('q');
+insert into t1 values ('R'),('r');
+insert into t1 values ('S'),('s');
+insert into t1 values ('T'),('t');
+insert into t1 values ('U'),('u');
+insert into t1 values ('V'),('v');
+insert into t1 values ('W'),('w');
+insert into t1 values ('X'),('x');
+insert into t1 values ('Y'),('y');
+insert into t1 values ('Z'),('z');
+
+#
+# Latin1 suppliment
+#
+insert into t1 values (0x00e0),(0x00c0);
+insert into t1 values (0x00e1),(0x00c1);
+insert into t1 values (0x00e2),(0x00c2);
+insert into t1 values (0x00e3),(0x00c3);
+insert into t1 values (0x00e4),(0x00c4);
+insert into t1 values (0x00e5),(0x00c5);
+insert into t1 values (0x00e6),(0x00c6);
+insert into t1 values (0x00e7),(0x00c7);
+insert into t1 values (0x00e8),(0x00c8);
+insert into t1 values (0x00e9),(0x00c9);
+insert into t1 values (0x00ea),(0x00ca);
+insert into t1 values (0x00eb),(0x00cb);
+insert into t1 values (0x00ec),(0x00cc);
+insert into t1 values (0x00ed),(0x00cd);
+insert into t1 values (0x00ee),(0x00ce);
+insert into t1 values (0x00ef),(0x00cf);
+
+insert into t1 values (0x00f0),(0x00d0);
+insert into t1 values (0x00f1),(0x00d1);
+insert into t1 values (0x00f2),(0x00d2);
+insert into t1 values (0x00f3),(0x00d3);
+insert into t1 values (0x00f4),(0x00d4);
+insert into t1 values (0x00f5),(0x00d5);
+insert into t1 values (0x00f6),(0x00d6);
+insert into t1 values (0x00f7),(0x00d7);
+insert into t1 values (0x00f8),(0x00d8);
+insert into t1 values (0x00f9),(0x00d9);
+insert into t1 values (0x00fa),(0x00da);
+insert into t1 values (0x00fb),(0x00db);
+insert into t1 values (0x00fc),(0x00dc);
+insert into t1 values (0x00fd),(0x00dd);
+insert into t1 values (0x00fe),(0x00de);
+insert into t1 values (0x00ff),(0x00df);
+
+#
+# Latin extended-A, 0100-017F
+#
+insert into t1 values (0x0100),(0x0101),(0x0102),(0x0103);
+insert into t1 values (0x0104),(0x0105),(0x0106),(0x0107);
+insert into t1 values (0x0108),(0x0109),(0x010a),(0x010b);
+insert into t1 values (0x010c),(0x010d),(0x010e),(0x010f);
+insert into t1 values (0x0110),(0x0111),(0x0112),(0x0113);
+insert into t1 values (0x0114),(0x0115),(0x0116),(0x0117);
+insert into t1 values (0x0118),(0x0119),(0x011a),(0x011b);
+insert into t1 values (0x011c),(0x011d),(0x011e),(0x011f);
+insert into t1 values (0x0120),(0x0121),(0x0122),(0x0123);
+insert into t1 values (0x0124),(0x0125),(0x0126),(0x0127);
+insert into t1 values (0x0128),(0x0129),(0x012a),(0x012b);
+insert into t1 values (0x012c),(0x012d),(0x012e),(0x012f);
+insert into t1 values (0x0130),(0x0131),(0x0132),(0x0133);
+insert into t1 values (0x0134),(0x0135),(0x0136),(0x0137);
+insert into t1 values (0x0138),(0x0139),(0x013a),(0x013b);
+insert into t1 values (0x013c),(0x013d),(0x013e),(0x013f);
+insert into t1 values (0x0140),(0x0141),(0x0142),(0x0143);
+insert into t1 values (0x0144),(0x0145),(0x0146),(0x0147);
+insert into t1 values (0x0148),(0x0149),(0x014a),(0x014b);
+insert into t1 values (0x014c),(0x014d),(0x014e),(0x014f);
+insert into t1 values (0x0150),(0x0151),(0x0152),(0x0153);
+insert into t1 values (0x0154),(0x0155),(0x0156),(0x0157);
+insert into t1 values (0x0158),(0x0159),(0x015a),(0x015b);
+insert into t1 values (0x015c),(0x015d),(0x015e),(0x015f);
+insert into t1 values (0x0160),(0x0161),(0x0162),(0x0163);
+insert into t1 values (0x0164),(0x0165),(0x0166),(0x0167);
+insert into t1 values (0x0168),(0x0169),(0x016a),(0x016b);
+insert into t1 values (0x016c),(0x016d),(0x016e),(0x016f);
+insert into t1 values (0x0170),(0x0171),(0x0172),(0x0173);
+insert into t1 values (0x0174),(0x0175),(0x0176),(0x0177);
+insert into t1 values (0x0178),(0x0179),(0x017a),(0x017b);
+insert into t1 values (0x017c),(0x017d),(0x017e),(0x017f);
+
+#
+# Latin extended-B, 0180-024F
+#
+insert into t1 values (0x0180),(0x0181),(0x0182),(0x0183);
+insert into t1 values (0x0184),(0x0185),(0x0186),(0x0187);
+insert into t1 values (0x0188),(0x0189),(0x018a),(0x018b);
+insert into t1 values (0x018c),(0x018d),(0x018e),(0x018f);
+insert into t1 values (0x0190),(0x0191),(0x0192),(0x0193);
+insert into t1 values (0x0194),(0x0195),(0x0196),(0x0197);
+insert into t1 values (0x0198),(0x0199),(0x019a),(0x019b);
+insert into t1 values (0x019c),(0x019d),(0x019e),(0x019f);
+insert into t1 values (0x01a0),(0x01a1),(0x01a2),(0x01a3);
+insert into t1 values (0x01a4),(0x01a5),(0x01a6),(0x01a7);
+insert into t1 values (0x01a8),(0x01a9),(0x01aa),(0x01ab);
+insert into t1 values (0x01ac),(0x01ad),(0x01ae),(0x01af);
+insert into t1 values (0x01b0),(0x01b1),(0x01b2),(0x01b3);
+insert into t1 values (0x01b4),(0x01b5),(0x01b6),(0x01b7);
+insert into t1 values (0x01b8),(0x01b9),(0x01ba),(0x01bb);
+insert into t1 values (0x01bc),(0x01bd),(0x01be),(0x01bf);
+insert into t1 values (0x01c0),(0x01c1),(0x01c2),(0x01c3);
+insert into t1 values (0x01c4),(0x01c5),(0x01c6),(0x01c7);
+insert into t1 values (0x01c8),(0x01c9),(0x01ca),(0x01cb);
+insert into t1 values (0x01cc),(0x01cd),(0x01ce),(0x01cf);
+insert into t1 values (0x01d0),(0x01d1),(0x01d2),(0x01d3);
+insert into t1 values (0x01d4),(0x01d5),(0x01d6),(0x01d7);
+insert into t1 values (0x01d8),(0x01d9),(0x01da),(0x01db);
+insert into t1 values (0x01dc),(0x01dd),(0x01de),(0x01df);
+insert into t1 values (0x01e0),(0x01e1),(0x01e2),(0x01e3);
+insert into t1 values (0x01e4),(0x01e5),(0x01e6),(0x01e7);
+insert into t1 values (0x01e8),(0x01e9),(0x01ea),(0x01eb);
+insert into t1 values (0x01ec),(0x01ed),(0x01ee),(0x01ef);
+insert into t1 values (0x01f0),(0x01f1),(0x01f2),(0x01f3);
+insert into t1 values (0x01f4),(0x01f5),(0x01f6),(0x01f7);
+insert into t1 values (0x01f8),(0x01f9),(0x01fa),(0x01fb);
+insert into t1 values (0x01fc),(0x01fd),(0x01fe),(0x01ff);
+
+
+insert into t1 values ('AA'),('Aa'),('aa'),('aA');
+insert into t1 values ('CH'),('Ch'),('ch'),('cH');
+insert into t1 values ('DZ'),('Dz'),('dz'),('dZ');
+insert into t1 values ('IJ'),('Ij'),('ij'),('iJ');
+insert into t1 values ('LJ'),('Lj'),('lj'),('lJ');
+insert into t1 values ('LL'),('Ll'),('ll'),('lL');
+insert into t1 values ('NJ'),('Nj'),('nj'),('nJ');
+insert into t1 values ('OE'),('Oe'),('oe'),('oE');
+insert into t1 values ('SS'),('Ss'),('ss'),('sS');
+insert into t1 values ('RR'),('Rr'),('rr'),('rR');
+
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_unicode_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_icelandic_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_latvian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_romanian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovenian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_polish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_estonian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_swedish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_turkish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_czech_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_danish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_lithuanian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovak_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish2_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_roman_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_esperanto_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_hungarian_ci;
+
+drop table t1;
+
+#
+# Bug#5324
+#
+SET NAMES utf8;
+
+CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_general_ci NOT NULL, INDEX (c));
+INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
+#Check one row
+SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_general_ci;
+INSERT INTO t1 VALUES (0x039C03C903B4);
+#Check two rows
+SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
+COLLATE utf16_general_ci ORDER BY c;
+DROP TABLE t1;
+
+CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c));
+INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
+#Check one row
+SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci;
+INSERT INTO t1 VALUES (0x039C03C903B4);
+#Check two rows
+SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
+COLLATE utf16_unicode_ci ORDER BY c;
+DROP TABLE t1;
+
+CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c));
+INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
+#Check one row row
+SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci;
+INSERT INTO t1 VALUES (0x039C03C903B4);
+#Check two rows
+SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
+COLLATE utf16_unicode_ci ORDER BY c;
+DROP TABLE t1;
+
+
+SET NAMES utf8;
+SET @test_character_set='utf16';
+SET @test_collation='utf16_swedish_ci';
+-- source include/ctype_common.inc
+
+
+SET collation_connection='utf16_unicode_ci';
+-- source include/ctype_filesort.inc
+-- source include/ctype_like_escape.inc
+
+--echo End of 4.1 tests
+
+#
+# Check UPPER/LOWER changing length
+#
+# Result shorter than argument
+CREATE TABLE t1 (id int, a varchar(30) character set utf16);
+INSERT INTO t1 VALUES (1, 0x01310069), (2, 0x01310131);
+INSERT INTO t1 VALUES (3, 0x00690069), (4, 0x01300049);
+INSERT INTO t1 VALUES (5, 0x01300130), (6, 0x00490049);
+SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
+FROM t1 ORDER BY id;
+ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf16 collate utf16_turkish_ci;
+SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
+FROM t1 ORDER BY id;
+DROP TABLE t1;
+
+#
+# Bug #27079 Crash while grouping empty ucs2 strings
+#
+CREATE TABLE t1 (
+ c1 text character set utf16 collate utf16_polish_ci NOT NULL
+) ENGINE=MyISAM;
+insert into t1 values (''),('a');
+SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
+DROP TABLE IF EXISTS t1;
+
+
+#
+# Test basic regex functionality
+#
+set collation_connection=utf16_unicode_ci;
+--source include/ctype_regex.inc
+
+
+--echo #
+--echo # End of 5.5 tests
+--echo #
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@ -0,0 +1,784 @@
+-- source include/have_utf32.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #
+--echo # Start of 5.5 tests
+--echo #
+
+SET NAMES latin1;
+SET character_set_connection=utf32;
+select hex('a'), hex('a ');
+-- source include/endspace.inc
+
+#
+# Check that incomplete utf32 characters in HEX notation
+# are left-padded with zeros
+#
+select hex(_utf32 0x44);
+select hex(_utf32 0x3344);
+select hex(_utf32 0x103344);
+
+select hex(_utf32 X'44');
+select hex(_utf32 X'3344');
+select hex(_utf32 X'103344');
+
+
+#
+# Check that 0x20 is only trimmed when it is 
+# a part of real SPACE character, not just a part
+# of a multibyte sequence.
+# Note, CYRILLIC LETTER ER is used as an example, which
+# is stored as 0x0420 in UCS2, thus contains 0x20 in the
+# low byte. The second character is THREE-PER-M, U+2004,
+# which contains 0x20 in the high byte.
+#
+
+CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf32;
+INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
+SELECT hex(word) FROM t1 ORDER BY word;
+SELECT hex(word2) FROM t1 ORDER BY word2;
+DELETE FROM t1;
+
+#
+# Check that real spaces are correctly trimmed.
+#
+
+INSERT INTO t1 VALUES
+  (X'000004200000002000000020',X'000004200000002000000020'),
+  (X'000020040000002000000020',X'000020040000002000000020');
+SELECT hex(word) FROM t1 ORDER BY word;
+SELECT hex(word2) FROM t1 ORDER BY word2;
+DROP TABLE t1;
+
+#
+# Check LPAD/RPAD
+#
+SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0421'));
+SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
+SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
+SELECT hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
+
+SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0421'));
+SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
+SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
+SELECT hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
+
+CREATE TABLE t1 SELECT 
+LPAD(_utf32 X'0420',10,_utf32 X'0421') l,
+RPAD(_utf32 X'0420',10,_utf32 X'0421') r;
+SHOW CREATE TABLE t1;
+select hex(l), hex(r) from t1;
+DROP TABLE t1;
+
+create table t1 (f1 char(30));
+insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
+select lpad(f1, 12, "-o-/") from t1;
+drop table t1;
+
+######################################################
+#
+# Test of like
+#
+
+SET NAMES latin1;
+SET character_set_connection=utf32;
+--source include/ctype_like.inc
+
+SET NAMES utf8;
+SET character_set_connection=utf32;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
+INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
+INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
+INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
+INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
+SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
+SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
+SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
+SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf32_bin ORDER BY BINARY a;
+DROP TABLE t1;
+
+CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
+ENGINE=MyISAM CHARACTER SET utf32;
+INSERT INTO t1 (word) VALUES ("cat");
+SELECT * FROM t1 WHERE word LIKE "c%";
+SELECT * FROM t1 WHERE word LIKE "ca_";
+SELECT * FROM t1 WHERE word LIKE "cat";
+SELECT * FROM t1 WHERE word LIKE _utf32 x'0000006300000025'; # "c%"
+SELECT * FROM t1 WHERE word LIKE _utf32 x'00000063000000610000005F'; # "ca_"
+DROP TABLE t1;
+
+
+#
+# Check that INSERT() works fine. 
+# This invokes charpos() function.
+select insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066);
+select insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066);
+
+#######################################################
+
+#
+# Bug 1264
+#
+# Description: 
+#
+# When using a ucs2 table in MySQL, 
+# either with ucs2_general_ci or ucs2_bin collation,
+# words are returned in an incorrect order when using ORDER BY
+# on an _indexed_ CHAR or VARCHAR column. They are sorted with
+# the longest word *first* instead of last. I.E. The word "aardvark"
+# is in the results before the word "a".
+#
+# If there is no index for the column, the problem does not occur.
+#
+# Interestingly, if there is no second column, the words are returned
+# in the correct order. 
+#
+# According to EXPLAIN, it looks like when the output includes columns that
+# are not part of the index sorted on, it does a filesort, which fails. 
+# Using a straight index yields correct results.
+
+SET NAMES latin1;
+
+#
+# Two fields, index
+#
+
+CREATE TABLE t1 (
+   word VARCHAR(64),
+   bar INT(11) default 0,
+   PRIMARY KEY (word))
+   ENGINE=MyISAM
+   CHARSET utf32
+   COLLATE utf32_general_ci ;
+
+INSERT INTO t1 (word) VALUES ("aar");
+INSERT INTO t1 (word) VALUES ("a");
+INSERT INTO t1 (word) VALUES ("aardvar");
+INSERT INTO t1 (word) VALUES ("aardvark");
+INSERT INTO t1 (word) VALUES ("aardvara");
+INSERT INTO t1 (word) VALUES ("aardvarz");
+EXPLAIN SELECT * FROM t1 ORDER BY word;
+SELECT * FROM t1 ORDER BY word;
+EXPLAIN SELECT word FROM t1 ORDER BY word;
+SELECT word FROM t1 ORDER by word;
+DROP TABLE t1;
+
+
+#
+# One field, index
+# 
+
+CREATE TABLE t1 (
+   word VARCHAR(64) ,
+   PRIMARY KEY (word))
+   ENGINE=MyISAM
+   CHARSET utf32
+   COLLATE utf32_general_ci;
+
+INSERT INTO t1 (word) VALUES ("aar");
+INSERT INTO t1 (word) VALUES ("a");
+INSERT INTO t1 (word) VALUES ("aardvar");
+INSERT INTO t1 (word) VALUES ("aardvark");
+INSERT INTO t1 (word) VALUES ("aardvara");
+INSERT INTO t1 (word) VALUES ("aardvarz");
+EXPLAIN SELECT * FROM t1 ORDER BY WORD;
+SELECT * FROM t1 ORDER BY word;
+DROP TABLE t1;
+
+
+#
+# Two fields, no index
+#
+
+CREATE TABLE t1 (
+   word TEXT,
+   bar INT(11) AUTO_INCREMENT,
+   PRIMARY KEY (bar))
+   ENGINE=MyISAM
+   CHARSET utf32
+   COLLATE utf32_general_ci ;
+INSERT INTO t1 (word) VALUES ("aar");
+INSERT INTO t1 (word) VALUES ("a" );
+INSERT INTO t1 (word) VALUES ("aardvar");
+INSERT INTO t1 (word) VALUES ("aardvark");
+INSERT INTO t1 (word) VALUES ("aardvara");
+INSERT INTO t1 (word) VALUES ("aardvarz");
+EXPLAIN SELECT * FROM t1 ORDER BY word;
+SELECT * FROM t1 ORDER BY word;
+EXPLAIN SELECT word FROM t1 ORDER BY word;
+SELECT word FROM t1 ORDER BY word;
+DROP TABLE t1;
+
+#
+# END OF Bug 1264 test
+#
+########################################################
+
+
+#
+# Check alignment for from-binary-conversion with CAST and CONVERT
+#
+SELECT hex(cast(0xAA as char character set utf32));
+SELECT hex(convert(0xAA using utf32));
+
+#
+# Check alignment for string types
+#
+CREATE TABLE t1 (a char(10) character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a varchar(10) character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a text character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a mediumtext character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a longtext character set utf32);
+INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+##
+## Bug #5024 Server crashes with queries on fields
+##  with certain charset/collation settings
+##
+#
+#create table t1 (s1 char character set `ucs2` collate `ucs2_czech_ci`);
+#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
+#select s1 from t1 where s1 > 'a' order by s1;
+#drop table t1;
+
+#
+# Bug #5081 : UCS2 fields are filled with '0x2020'
+# after extending field length
+#
+create table t1(a char(1)) default charset utf32;
+insert into t1 values ('a'),('b'),('c');
+alter table t1 modify a char(5);
+select a, hex(a) from t1;
+drop table t1;
+
+#
+# Check prepare statement from an UTF32 string
+#
+set @ivar= 1234;
+set @str1 = 'select ?';
+set @str2 = convert(@str1 using utf32);
+prepare stmt1 from @str2;
+execute stmt1 using @ivar;
+
+#
+# Check that utf32 works with ENUM and SET type
+#
+set names utf8;
+create table t1 (a enum('x','y','z') character set utf32);
+show create table t1;
+insert into t1 values ('x');
+insert into t1 values ('y');
+insert into t1 values ('z');
+select a, hex(a) from t1 order by a;
+alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf32;
+show create table t1;
+insert into t1 values ('D');
+insert into t1 values ('E ');
+insert into t1 values ('ä');
+insert into t1 values ('ö');
+insert into t1 values ('ü');
+select a, hex(a) from t1 order by a;
+drop table t1;
+
+create table t1 (a set ('x','y','z','ä','ö','ü') character set utf32);
+show create table t1;
+insert into t1 values ('x');
+insert into t1 values ('y');
+insert into t1 values ('z');
+insert into t1 values ('x,y');
+insert into t1 values ('x,y,z,ä,ö,ü');
+select a, hex(a) from t1 order by a;
+drop table t1;
+
+#
+# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
+#
+create table t1(a enum('a','b','c')) default character set utf32;
+insert into t1 values('a'),('b'),('c');
+alter table t1 add b char(1);
+show warnings;
+select * from t1 order by a;
+drop table t1;
+
+SET NAMES latin1;
+SET collation_connection='utf32_general_ci';
+-- source include/ctype_filesort.inc
+-- source include/ctype_like_escape.inc
+SET NAMES latin1;
+SET collation_connection='utf32_bin';
+-- source include/ctype_filesort.inc
+-- source include/ctype_like_escape.inc
+
+#
+# Bug#10344 Some string functions fail for UCS2
+#
+select hex(substr(_utf32 0x000000e4000000e500000068,1));
+select hex(substr(_utf32 0x000000e4000000e500000068,2));
+select hex(substr(_utf32 0x000000e4000000e500000068,3));
+select hex(substr(_utf32 0x000000e4000000e500000068,-1));
+select hex(substr(_utf32 0x000000e4000000e500000068,-2));
+select hex(substr(_utf32 0x000000e4000000e500000068,-3));
+
+#SET NAMES latin1;
+#
+# Bug#8235
+#
+# This bug also helped to find another problem that
+# INSERT of a UCS2 string containing a negative number
+# into a unsigned int column didn't produce warnings.
+# This test covers both problems.
+#
+#SET collation_connection='ucs2_swedish_ci';
+#CREATE TABLE t1 (Field1 int(10) default '0');
+## no warnings, negative numbers are allowed
+#INSERT INTO t1 VALUES ('-1');
+#SELECT * FROM t1;
+#DROP TABLE t1;
+#CREATE TABLE t1 (Field1 int(10) unsigned default '0');
+## this should generate a "Data truncated" warning
+#INSERT INTO t1 VALUES ('-1');
+#DROP TABLE t1;
+#SET NAMES latin1;
+
+#
+##
+## Bug#18691 Converting number to UNICODE string returns invalid result
+##
+#SELECT CONVERT(103, CHAR(50) UNICODE);
+#SELECT CONVERT(103.0, CHAR(50) UNICODE);
+#SELECT CONVERT(-103, CHAR(50) UNICODE);
+#SELECT CONVERT(-103.0, CHAR(50) UNICODE);
+
+#
+# Bug#9557 MyISAM utf8 table crash
+#
+CREATE TABLE t1 (
+  a varchar(250) NOT NULL default '',
+  KEY a (a)
+) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE utf32_general_ci;
+insert into t1 values (0x803d);
+insert into t1 values (0x005b);
+select hex(a) from t1;
+drop table t1;
+
+##
+## Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
+##
+#--disable_warnings
+#create table t1(f1 varchar(5) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL) engine=InnoDB;
+#--enable_warnings
+#insert into t1 values('a');
+#create index t1f1 on t1(f1);
+#select f1 from t1 where f1 like 'a%';
+#drop table t1;
+
+#
+# Bug#9442 Set parameter make query fail if column character set is UCS2
+#
+create table t1 (utext varchar(20) character set utf32);
+insert into t1 values ("lily");
+insert into t1 values ("river");
+prepare stmt from 'select utext from t1 where utext like ?';
+set @param1='%%';
+execute stmt using @param1;
+execute stmt using @param1;
+select utext from t1 where utext like '%%';
+drop table t1;
+deallocate prepare stmt;
+
+#
+# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
+#
+create table t1 (
+  a char(10) character set utf32 not null, 
+  index a (a)
+) engine=myisam;
+insert into t1 values (repeat(0x0000201f, 10));
+insert into t1 values (repeat(0x00002020, 10));
+insert into t1 values (repeat(0x00002021, 10));
+# make sure "index read" is used
+explain select hex(a) from t1 order by a;
+select hex(a) from t1 order by a;
+alter table t1 drop index a;
+select hex(a) from t1 order by a;
+drop table t1;
+
+#
+# Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
+#             over a 'ucs2' field uses a temporary table 
+#
+#CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
+#INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
+#SELECT id, MIN(s) FROM t1 GROUP BY id;
+#DROP TABLE t1;
+
+##
+## Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
+##
+#
+#--disable_warnings
+#drop table if exists bug20536;
+#--enable_warnings
+#
+#set names latin1;
+#create table bug20536 (id bigint not null auto_increment primary key, name
+#varchar(255) character set ucs2 not null);
+#insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
+#select md5(name) from bug20536;
+#select sha1(name) from bug20536;
+#select make_set(3, name, upper(name)) from bug20536;
+#select export_set(5, name, upper(name)) from bug20536;
+#select export_set(5, name, upper(name), ",", 5) from bug20536;
+
+#
+# Bug #20108: corrupted default enum value for a ucs2 field              
+#
+
+CREATE TABLE t1 (
+  status enum('active','passive') character set utf32 collate utf32_general_ci 
+    NOT NULL default 'passive'
+);
+SHOW CREATE TABLE t1;
+ALTER TABLE t1 ADD a int NOT NULL AFTER status; 
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+#CREATE TABLE t2 (
+#  status enum('active','passive') collate ucs2_turkish_ci 
+#    NOT NULL default 'passive'
+#);
+#SHOW CREATE TABLE t2;
+#ALTER TABLE t2 ADD a int NOT NULL AFTER status; 
+#DROP TABLE t2;
+
+
+## Some broken functions:  add these tests just to document current behavior.
+#
+## PASSWORD and OLD_PASSWORD don't work with UCS2 strings, but to fix it would
+## not be backwards compatible in all cases, so it's best to leave it alone
+#select password(name) from bug20536;
+#select old_password(name) from bug20536;
+#
+## Disable test case as encrypt relies on 'crypt' function.
+## "decrypt" is noramlly tested in func_crypt.test which have a
+## "have_crypt.inc" test
+#--disable_parsing
+## ENCRYPT relies on OS function crypt() which takes a NUL-terminated string; it
+## doesn't return good results for strings with embedded 0 bytes.  It won't be
+## fixed unless we choose to re-implement the crypt() function ourselves to take
+## an extra size_t string_length argument.
+#select encrypt(name, 'SALT') from bug20536;
+#--enable_parsing
+#
+## QUOTE doesn't work with UCS2 data.  It would require a total rewrite
+## of Item_func_quote::val_str(), which isn't worthwhile until UCS2 is
+## supported fully as a client character set.
+#select quote(name) from bug20536;
+#
+#drop table bug20536;
+#
+--echo End of 4.1 tests
+
+
+#
+# Conversion from an UTF32 string to a decimal column
+#
+CREATE TABLE t1 (a varchar(64) character set utf32, b decimal(10,3));
+INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
+update t1 set b=a;
+SELECT *, hex(a) FROM t1;
+DROP TABLE t1;
+
+#
+# Bug#9442 Set parameter make query fail if column character set is UCS2
+#
+create table t1 (utext varchar(20) character set utf32);
+insert into t1 values ("lily");
+insert into t1 values ("river");
+prepare stmt from 'select utext from t1 where utext like ?';
+set @param1='%%';
+execute stmt using @param1;
+execute stmt using @param1;
+select utext from t1 where utext like '%%';
+drop table t1;
+deallocate prepare stmt;
+
+#
+# Bug#22638 SOUNDEX broken for international characters
+#
+set names latin1;
+set character_set_connection=utf32;
+select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
+select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
+select 'mood' sounds like 'mud';
+# Cyrillic A, BE, VE
+select hex(soundex(_utf32 0x000004100000041100000412));
+# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
+select hex(soundex(_utf32 0x000000BF000000C0));
+set names latin1;
+
+#
+# Bug #14290: character_maximum_length for text fields
+#
+create table t1(a blob, b text charset utf32);
+select data_type, character_octet_length, character_maximum_length
+  from information_schema.columns where table_name='t1';
+drop table t1;
+
+
+set names latin1;
+set collation_connection=utf32_general_ci;
+#
+# Testing cs->coll->instr()
+#
+select position('bb' in 'abba');
+
+#
+# Testing cs->coll->hash_sort()
+#
+create table t1 (a varchar(10) character set utf32) engine=heap;
+insert into t1 values ('a'),('A'),('b'),('B');
+select * from t1 where a='a' order by binary a;
+select hex(min(binary a)),count(*) from t1 group by a;
+drop table t1;
+
+#
+# Testing cs->cset->numchars()
+#
+select char_length('abcd'), octet_length('abcd');
+
+#
+# Testing cs->cset->charpos()
+#
+select left('abcd',2);
+
+#
+# Testing cs->cset->well_formed_length()
+#
+create table t1 (a varchar(10) character set utf32);
+insert into t1 values (_utf32 0x0010FFFF);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf32 0x00110000);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf32 0x00110101);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf32 0x01000101);
+--error ER_INVALID_CHARACTER_STRING
+insert into t1 values (_utf32 0x11000101);
+select hex(a) from t1;
+drop table t1;
+
+#
+# Bug#32914 Character sets: illegal characters in utf8 and utf32 columns
+#
+create table t1 (utf32 varchar(2) character set utf32);
+--echo Wrong character with pad
+insert into t1 values (0x110000);
+--echo Wrong chsaracter without pad
+insert into t1 values (0x00110000);
+--echo Wrong character with pad followed by another wrong character
+insert into t1 values (0x11000000110000);
+--echo Good character with pad followed by bad character
+insert into t1 values (0x10000000110000);
+--echo Good character without pad followed by bad character
+insert into t1 values (0x0010000000110000);
+--echo Wrong character with the second byte higher than 0x10
+insert into t1 values (0x00800037);
+--echo Wrong character with pad with the second byte higher than 0x10
+insert into t1 values (0x00800037);
+drop table t1;
+
+#
+# Bug#32394 Character sets: crash if comparison with 0xfffd
+#
+select _utf32'a' collate utf32_general_ci = 0xfffd;
+select hex(concat(_utf32 0x0410 collate utf32_general_ci, 0x61));
+create table t1 (s1 varchar(5) character set utf32);
+insert into t1 values (0xfffd);
+select case when s1 = 0xfffd then 1 else 0 end from t1;
+select hex(s1) from t1 where s1 = 0xfffd;
+drop table t1;
+
+#
+# Testing cs->cset->lengthsp()
+#
+create table t1 (a char(10)) character set utf32;
+insert into t1 values ('a   ');
+select hex(a) from t1;
+drop table t1;
+
+#
+# Testing cs->cset->caseup() and cs->cset->casedn()
+#
+select upper('abcd'), lower('ABCD');
+
+#
+# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf32
+# Testing cs->cset->snprintf()
+#
+#create table t1 (a date);
+#insert into t1 values ('2007-09-16');
+#select * from t1;
+#drop table t1;
+
+#
+# Testing cs->cset->l10tostr
+# !!! Not used in the code
+
+#
+# Testing cs->cset->ll10tostr
+#
+create table t1 (a varchar(10) character set utf32);
+insert into t1 values (123456);
+select a, hex(a) from t1;
+drop table t1;
+
+#
+# Testing cs->cset->fill
+# SOUNDEX fills strings with DIGIT ZERO up to four characters
+select hex(soundex('a'));
+
+#
+# Testing cs->cset->strntol
+# !!! Not used in the code
+
+#
+# Testing cs->cset->strntoul
+#
+create table t1 (a enum ('a','b','c')) character set utf32;
+insert into t1 values ('1');
+select * from t1;
+drop table t1;
+
+#
+# Testing cs->cset->strntoll and cs->cset->strntoull
+#
+set names latin1;
+select hex(conv(convert('123' using utf32), -10, 16));
+select hex(conv(convert('123' using utf32), 10, 16));
+
+#
+# Testing cs->cset->strntod
+#
+set names latin1;
+set character_set_connection=utf32;
+select 1.1 + '1.2';
+select 1.1 + '1.2xxx';
+
+# Testing strntoll10_utf32
+# Testing cs->cset->strtoll10
+select left('aaa','1');
+
+#
+# Testing cs->cset->strntoull10rnd
+#
+create table t1 (a int);
+insert into t1 values ('-1234.1e2');
+insert into t1 values ('-1234.1e2xxxx');
+insert into t1 values ('-1234.1e2    ');
+select * from t1;
+drop table t1;
+
+#
+# Testing cs->cset->scan
+#
+create table t1 (a int);
+insert into t1 values ('1 ');
+insert into t1 values ('1 x');
+select * from t1;
+drop table t1;
+
+#
+# Testing auto-conversion to TEXT
+#
+create table t1 (a varchar(17000) character set utf32);
+show create table t1;
+drop table t1;
+
+#
+# Testing that maximim possible key length is 1332 bytes
+#
+create table t1 (a varchar(250) character set utf32 primary key);
+show create table t1;
+drop table t1;
+--error ER_TOO_LONG_KEY
+create table t1 (a varchar(334) character set utf32 primary key);
+
+#
+# Testing mi_check with long key values
+#
+create table t1 (a varchar(333) character set utf32, key(a));
+insert into t1 values (repeat('a',333)), (repeat('b',333));
+flush tables;
+check table t1;
+drop table t1;
+
+#
+# Test how character set works with date/time
+#
+SET collation_connection=utf32_general_ci;
+--source include/ctype_datetime.inc
+SET NAMES latin1;
+
+#
+# Test basic regex functionality
+#
+set collation_connection=utf32_general_ci;
+--source include/ctype_regex.inc
+set names latin1;
+
+
+# TODO: add tests for all engines
+
+#
+# Bug #36418 Character sets: crash if char(256 using utf32)
+#
+select hex(char(0x01 using utf32));
+select hex(char(0x0102 using utf32));
+select hex(char(0x010203 using utf32));
+select hex(char(0x01020304 using utf32));
+create table t1 (s1 varchar(1) character set utf32, s2 text character set utf32);
+create index i on t1 (s1);
+insert into t1 values (char(256 using utf32), char(256 using utf32));
+select hex(s1), hex(s2) from t1;
+drop table t1;
+
+
+#
+# Bug#33073 Character sets: ordering fails with utf32
+#
+SET collation_connection=utf32_general_ci;
+CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0;
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
+SELECT * FROM t1 ORDER BY s1;
+SET max_sort_length=4;
+SELECT * FROM t1 ORDER BY s1;
+DROP TABLE t1;
+SET max_sort_length=DEFAULT;
+SET NAMES latin1;
+
+--echo #
+--echo # End of 5.5 tests
+--echo #
--- a/mysql-test/t/ctype_utf32_uca.test
+++ b/mysql-test/t/ctype_utf32_uca.test
@ -0,0 +1,291 @@
+-- source include/have_ucs2.inc
+-- source include/have_utf32.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+--echo #
+--echo # Start of 5.5 tests
+--echo #
+
+set names utf8;
+set collation_connection=utf32_unicode_ci;
+select hex('a'), hex('a ');
+-- source include/endspace.inc
+
+#
+# Bug #6787 LIKE not working properly with _ and utf8 data
+#
+select 'c' like '\_' as want0; 
+
+#
+# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters
+#
+CREATE TABLE t (
+  c char(20) NOT NULL
+) ENGINE=MyISAM DEFAULT CHARACTER SET utf32 COLLATE=utf32_unicode_ci;
+INSERT INTO t VALUES ('a'),('ab'),('aba');
+ALTER TABLE t ADD INDEX (c);
+SELECT c FROM t WHERE c LIKE 'a%';
+DROP TABLE t;
+
+
+create table t1 (c1 char(10) character set utf32 collate utf32_bin);
+
+#
+# Basic Latin
+#
+insert into t1 values ('A'),('a');
+insert into t1 values ('B'),('b');
+insert into t1 values ('C'),('c');
+insert into t1 values ('D'),('d');
+insert into t1 values ('E'),('e');
+insert into t1 values ('F'),('f');
+insert into t1 values ('G'),('g');
+insert into t1 values ('H'),('h');
+insert into t1 values ('I'),('i');
+insert into t1 values ('J'),('j');
+insert into t1 values ('K'),('k');
+insert into t1 values ('L'),('l');
+insert into t1 values ('M'),('m');
+insert into t1 values ('N'),('n');
+insert into t1 values ('O'),('o');
+insert into t1 values ('P'),('p');
+insert into t1 values ('Q'),('q');
+insert into t1 values ('R'),('r');
+insert into t1 values ('S'),('s');
+insert into t1 values ('T'),('t');
+insert into t1 values ('U'),('u');
+insert into t1 values ('V'),('v');
+insert into t1 values ('W'),('w');
+insert into t1 values ('X'),('x');
+insert into t1 values ('Y'),('y');
+insert into t1 values ('Z'),('z');
+
+#
+# Latin1 suppliment
+#
+insert into t1 values (_ucs2 0x00e0),(_ucs2 0x00c0);
+insert into t1 values (_ucs2 0x00e1),(_ucs2 0x00c1);
+insert into t1 values (_ucs2 0x00e2),(_ucs2 0x00c2);
+insert into t1 values (_ucs2 0x00e3),(_ucs2 0x00c3);
+insert into t1 values (_ucs2 0x00e4),(_ucs2 0x00c4);
+insert into t1 values (_ucs2 0x00e5),(_ucs2 0x00c5);
+insert into t1 values (_ucs2 0x00e6),(_ucs2 0x00c6);
+insert into t1 values (_ucs2 0x00e7),(_ucs2 0x00c7);
+insert into t1 values (_ucs2 0x00e8),(_ucs2 0x00c8);
+insert into t1 values (_ucs2 0x00e9),(_ucs2 0x00c9);
+insert into t1 values (_ucs2 0x00ea),(_ucs2 0x00ca);
+insert into t1 values (_ucs2 0x00eb),(_ucs2 0x00cb);
+insert into t1 values (_ucs2 0x00ec),(_ucs2 0x00cc);
+insert into t1 values (_ucs2 0x00ed),(_ucs2 0x00cd);
+insert into t1 values (_ucs2 0x00ee),(_ucs2 0x00ce);
+insert into t1 values (_ucs2 0x00ef),(_ucs2 0x00cf);
+
+insert into t1 values (_ucs2 0x00f0),(_ucs2 0x00d0);
+insert into t1 values (_ucs2 0x00f1),(_ucs2 0x00d1);
+insert into t1 values (_ucs2 0x00f2),(_ucs2 0x00d2);
+insert into t1 values (_ucs2 0x00f3),(_ucs2 0x00d3);
+insert into t1 values (_ucs2 0x00f4),(_ucs2 0x00d4);
+insert into t1 values (_ucs2 0x00f5),(_ucs2 0x00d5);
+insert into t1 values (_ucs2 0x00f6),(_ucs2 0x00d6);
+insert into t1 values (_ucs2 0x00f7),(_ucs2 0x00d7);
+insert into t1 values (_ucs2 0x00f8),(_ucs2 0x00d8);
+insert into t1 values (_ucs2 0x00f9),(_ucs2 0x00d9);
+insert into t1 values (_ucs2 0x00fa),(_ucs2 0x00da);
+insert into t1 values (_ucs2 0x00fb),(_ucs2 0x00db);
+insert into t1 values (_ucs2 0x00fc),(_ucs2 0x00dc);
+insert into t1 values (_ucs2 0x00fd),(_ucs2 0x00dd);
+insert into t1 values (_ucs2 0x00fe),(_ucs2 0x00de);
+insert into t1 values (_ucs2 0x00ff),(_ucs2 0x00df);
+
+#
+# Latin extended-A, 0100-017F
+#
+insert into t1 values (_ucs2 0x0100),(_ucs2 0x0101),(_ucs2 0x0102),(_ucs2 0x0103);
+insert into t1 values (_ucs2 0x0104),(_ucs2 0x0105),(_ucs2 0x0106),(_ucs2 0x0107);
+insert into t1 values (_ucs2 0x0108),(_ucs2 0x0109),(_ucs2 0x010a),(_ucs2 0x010b);
+insert into t1 values (_ucs2 0x010c),(_ucs2 0x010d),(_ucs2 0x010e),(_ucs2 0x010f);
+insert into t1 values (_ucs2 0x0110),(_ucs2 0x0111),(_ucs2 0x0112),(_ucs2 0x0113);
+insert into t1 values (_ucs2 0x0114),(_ucs2 0x0115),(_ucs2 0x0116),(_ucs2 0x0117);
+insert into t1 values (_ucs2 0x0118),(_ucs2 0x0119),(_ucs2 0x011a),(_ucs2 0x011b);
+insert into t1 values (_ucs2 0x011c),(_ucs2 0x011d),(_ucs2 0x011e),(_ucs2 0x011f);
+insert into t1 values (_ucs2 0x0120),(_ucs2 0x0121),(_ucs2 0x0122),(_ucs2 0x0123);
+insert into t1 values (_ucs2 0x0124),(_ucs2 0x0125),(_ucs2 0x0126),(_ucs2 0x0127);
+insert into t1 values (_ucs2 0x0128),(_ucs2 0x0129),(_ucs2 0x012a),(_ucs2 0x012b);
+insert into t1 values (_ucs2 0x012c),(_ucs2 0x012d),(_ucs2 0x012e),(_ucs2 0x012f);
+insert into t1 values (_ucs2 0x0130),(_ucs2 0x0131),(_ucs2 0x0132),(_ucs2 0x0133);
+insert into t1 values (_ucs2 0x0134),(_ucs2 0x0135),(_ucs2 0x0136),(_ucs2 0x0137);
+insert into t1 values (_ucs2 0x0138),(_ucs2 0x0139),(_ucs2 0x013a),(_ucs2 0x013b);
+insert into t1 values (_ucs2 0x013c),(_ucs2 0x013d),(_ucs2 0x013e),(_ucs2 0x013f);
+insert into t1 values (_ucs2 0x0140),(_ucs2 0x0141),(_ucs2 0x0142),(_ucs2 0x0143);
+insert into t1 values (_ucs2 0x0144),(_ucs2 0x0145),(_ucs2 0x0146),(_ucs2 0x0147);
+insert into t1 values (_ucs2 0x0148),(_ucs2 0x0149),(_ucs2 0x014a),(_ucs2 0x014b);
+insert into t1 values (_ucs2 0x014c),(_ucs2 0x014d),(_ucs2 0x014e),(_ucs2 0x014f);
+insert into t1 values (_ucs2 0x0150),(_ucs2 0x0151),(_ucs2 0x0152),(_ucs2 0x0153);
+insert into t1 values (_ucs2 0x0154),(_ucs2 0x0155),(_ucs2 0x0156),(_ucs2 0x0157);
+insert into t1 values (_ucs2 0x0158),(_ucs2 0x0159),(_ucs2 0x015a),(_ucs2 0x015b);
+insert into t1 values (_ucs2 0x015c),(_ucs2 0x015d),(_ucs2 0x015e),(_ucs2 0x015f);
+insert into t1 values (_ucs2 0x0160),(_ucs2 0x0161),(_ucs2 0x0162),(_ucs2 0x0163);
+insert into t1 values (_ucs2 0x0164),(_ucs2 0x0165),(_ucs2 0x0166),(_ucs2 0x0167);
+insert into t1 values (_ucs2 0x0168),(_ucs2 0x0169),(_ucs2 0x016a),(_ucs2 0x016b);
+insert into t1 values (_ucs2 0x016c),(_ucs2 0x016d),(_ucs2 0x016e),(_ucs2 0x016f);
+insert into t1 values (_ucs2 0x0170),(_ucs2 0x0171),(_ucs2 0x0172),(_ucs2 0x0173);
+insert into t1 values (_ucs2 0x0174),(_ucs2 0x0175),(_ucs2 0x0176),(_ucs2 0x0177);
+insert into t1 values (_ucs2 0x0178),(_ucs2 0x0179),(_ucs2 0x017a),(_ucs2 0x017b);
+insert into t1 values (_ucs2 0x017c),(_ucs2 0x017d),(_ucs2 0x017e),(_ucs2 0x017f);
+
+#
+# Latin extended-B, 0180-024F
+#
+insert into t1 values (_ucs2 0x0180),(_ucs2 0x0181),(_ucs2 0x0182),(_ucs2 0x0183);
+insert into t1 values (_ucs2 0x0184),(_ucs2 0x0185),(_ucs2 0x0186),(_ucs2 0x0187);
+insert into t1 values (_ucs2 0x0188),(_ucs2 0x0189),(_ucs2 0x018a),(_ucs2 0x018b);
+insert into t1 values (_ucs2 0x018c),(_ucs2 0x018d),(_ucs2 0x018e),(_ucs2 0x018f);
+insert into t1 values (_ucs2 0x0190),(_ucs2 0x0191),(_ucs2 0x0192),(_ucs2 0x0193);
+insert into t1 values (_ucs2 0x0194),(_ucs2 0x0195),(_ucs2 0x0196),(_ucs2 0x0197);
+insert into t1 values (_ucs2 0x0198),(_ucs2 0x0199),(_ucs2 0x019a),(_ucs2 0x019b);
+insert into t1 values (_ucs2 0x019c),(_ucs2 0x019d),(_ucs2 0x019e),(_ucs2 0x019f);
+insert into t1 values (_ucs2 0x01a0),(_ucs2 0x01a1),(_ucs2 0x01a2),(_ucs2 0x01a3);
+insert into t1 values (_ucs2 0x01a4),(_ucs2 0x01a5),(_ucs2 0x01a6),(_ucs2 0x01a7);
+insert into t1 values (_ucs2 0x01a8),(_ucs2 0x01a9),(_ucs2 0x01aa),(_ucs2 0x01ab);
+insert into t1 values (_ucs2 0x01ac),(_ucs2 0x01ad),(_ucs2 0x01ae),(_ucs2 0x01af);
+insert into t1 values (_ucs2 0x01b0),(_ucs2 0x01b1),(_ucs2 0x01b2),(_ucs2 0x01b3);
+insert into t1 values (_ucs2 0x01b4),(_ucs2 0x01b5),(_ucs2 0x01b6),(_ucs2 0x01b7);
+insert into t1 values (_ucs2 0x01b8),(_ucs2 0x01b9),(_ucs2 0x01ba),(_ucs2 0x01bb);
+insert into t1 values (_ucs2 0x01bc),(_ucs2 0x01bd),(_ucs2 0x01be),(_ucs2 0x01bf);
+insert into t1 values (_ucs2 0x01c0),(_ucs2 0x01c1),(_ucs2 0x01c2),(_ucs2 0x01c3);
+insert into t1 values (_ucs2 0x01c4),(_ucs2 0x01c5),(_ucs2 0x01c6),(_ucs2 0x01c7);
+insert into t1 values (_ucs2 0x01c8),(_ucs2 0x01c9),(_ucs2 0x01ca),(_ucs2 0x01cb);
+insert into t1 values (_ucs2 0x01cc),(_ucs2 0x01cd),(_ucs2 0x01ce),(_ucs2 0x01cf);
+insert into t1 values (_ucs2 0x01d0),(_ucs2 0x01d1),(_ucs2 0x01d2),(_ucs2 0x01d3);
+insert into t1 values (_ucs2 0x01d4),(_ucs2 0x01d5),(_ucs2 0x01d6),(_ucs2 0x01d7);
+insert into t1 values (_ucs2 0x01d8),(_ucs2 0x01d9),(_ucs2 0x01da),(_ucs2 0x01db);
+insert into t1 values (_ucs2 0x01dc),(_ucs2 0x01dd),(_ucs2 0x01de),(_ucs2 0x01df);
+insert into t1 values (_ucs2 0x01e0),(_ucs2 0x01e1),(_ucs2 0x01e2),(_ucs2 0x01e3);
+insert into t1 values (_ucs2 0x01e4),(_ucs2 0x01e5),(_ucs2 0x01e6),(_ucs2 0x01e7);
+insert into t1 values (_ucs2 0x01e8),(_ucs2 0x01e9),(_ucs2 0x01ea),(_ucs2 0x01eb);
+insert into t1 values (_ucs2 0x01ec),(_ucs2 0x01ed),(_ucs2 0x01ee),(_ucs2 0x01ef);
+insert into t1 values (_ucs2 0x01f0),(_ucs2 0x01f1),(_ucs2 0x01f2),(_ucs2 0x01f3);
+insert into t1 values (_ucs2 0x01f4),(_ucs2 0x01f5),(_ucs2 0x01f6),(_ucs2 0x01f7);
+insert into t1 values (_ucs2 0x01f8),(_ucs2 0x01f9),(_ucs2 0x01fa),(_ucs2 0x01fb);
+insert into t1 values (_ucs2 0x01fc),(_ucs2 0x01fd),(_ucs2 0x01fe),(_ucs2 0x01ff);
+
+
+insert into t1 values ('AA'),('Aa'),('aa'),('aA');
+insert into t1 values ('CH'),('Ch'),('ch'),('cH');
+insert into t1 values ('DZ'),('Dz'),('dz'),('dZ');
+insert into t1 values ('IJ'),('Ij'),('ij'),('iJ');
+insert into t1 values ('LJ'),('Lj'),('lj'),('lJ');
+insert into t1 values ('LL'),('Ll'),('ll'),('lL');
+insert into t1 values ('NJ'),('Nj'),('nj'),('nJ');
+insert into t1 values ('OE'),('Oe'),('oe'),('oE');
+insert into t1 values ('SS'),('Ss'),('ss'),('sS');
+insert into t1 values ('RR'),('Rr'),('rr'),('rR');
+
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_unicode_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_icelandic_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_latvian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_romanian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovenian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_polish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_estonian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_swedish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_turkish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_czech_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_danish_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_lithuanian_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovak_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish2_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_roman_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_esperanto_ci;
+select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_hungarian_ci;
+
+drop table t1;
+
+#
+# Bug#5324
+#
+SET NAMES utf8;
+#test1
+CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_general_ci NOT NULL, INDEX (c));
+INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
+#Check one row
+SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_general_ci;
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
+#Check two rows
+SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025
+COLLATE utf32_general_ci ORDER BY c;
+DROP TABLE t1;
+#test2
+CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c));
+INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
+#Check one row
+SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_unicode_ci;
+INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
+#Check two rows
+SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025
+COLLATE utf32_unicode_ci ORDER BY c;
+DROP TABLE t1;
+#test 3
+CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c));
+INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
+#Check one row row
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32) COLLATE utf32_unicode_ci;
+INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
+#Check two rows
+SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32)
+COLLATE utf32_unicode_ci ORDER BY c;
+DROP TABLE t1;
+
+
+SET NAMES utf8;
+SET @test_character_set='utf32';
+SET @test_collation='utf32_swedish_ci';
+-- source include/ctype_common.inc
+
+
+SET collation_connection='utf32_unicode_ci';
+-- source include/ctype_filesort.inc
+-- source include/ctype_like_escape.inc
+
+--echo End of 4.1 tests
+
+#
+# Check UPPER/LOWER changing length
+#
+# Result shorter than argument
+CREATE TABLE t1 (id int, a varchar(30) character set utf32);
+INSERT INTO t1 VALUES (1, _ucs2 0x01310069), (2, _ucs2 0x01310131);
+INSERT INTO t1 VALUES (3, _ucs2 0x00690069), (4, _ucs2 0x01300049);
+INSERT INTO t1 VALUES (5, _ucs2 0x01300130), (6, _ucs2 0x00490049);
+SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
+FROM t1 ORDER BY id;
+ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf32 collate utf32_turkish_ci;
+SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
+FROM t1 ORDER BY id;
+DROP TABLE t1;
+
+#
+# Bug #27079 Crash while grouping empty ucs2 strings
+#
+CREATE TABLE t1 (
+ c1 text character set utf32 collate utf32_polish_ci NOT NULL
+) ENGINE=MyISAM;
+insert into t1 values (''),('a');
+SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
+DROP TABLE IF EXISTS t1;
+
+
+#
+# Test basic regex functionality
+#
+set collation_connection=utf32_unicode_ci;
+--source include/ctype_regex.inc
+
+
+--echo #
+--echo # End of 5.5 tests
+--echo #
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@ -1440,6 +1440,17 @@ DROP TABLE t1;


 --echo Start of 5.4 tests
+
+#
+# WL#1213: utf8mb3 is an alias for utf8
+#
+SET NAMES utf8mb3;
+SHOW VARIABLES LIKE 'character_set_results%'; 
+CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin);
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+SELECT _utf8mb3'test';
+
 #
 # Bug#26180: Can't add columns to tables created with utf8 text indexes
 #
--- a/mysql-test/t/ctype_utf8mb4.test
+++ b/mysql-test/t/ctype_utf8mb4.test
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@ -45,6 +45,53 @@ extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
 extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci;
 #endif

+
+#ifdef HAVE_CHARSET_utf32
+extern CHARSET_INFO my_charset_utf32_icelandic_uca_ci;
+extern CHARSET_INFO my_charset_utf32_latvian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_romanian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_slovenian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_polish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_estonian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_spanish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_swedish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_turkish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_czech_uca_ci;
+extern CHARSET_INFO my_charset_utf32_danish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_lithuanian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_slovak_uca_ci;
+extern CHARSET_INFO my_charset_utf32_spanish2_uca_ci;
+extern CHARSET_INFO my_charset_utf32_roman_uca_ci;
+extern CHARSET_INFO my_charset_utf32_persian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_esperanto_uca_ci;
+extern CHARSET_INFO my_charset_utf32_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_sinhala_uca_ci;
+#endif /* HAVE_CHARSET_utf32 */
+
+
+#ifdef HAVE_CHARSET_utf16
+extern CHARSET_INFO my_charset_utf16_icelandic_uca_ci;
+extern CHARSET_INFO my_charset_utf16_latvian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_romanian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_slovenian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_polish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_estonian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_spanish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_swedish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_turkish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_czech_uca_ci;
+extern CHARSET_INFO my_charset_utf16_danish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_lithuanian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_slovak_uca_ci;
+extern CHARSET_INFO my_charset_utf16_spanish2_uca_ci;
+extern CHARSET_INFO my_charset_utf16_roman_uca_ci;
+extern CHARSET_INFO my_charset_utf16_persian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_esperanto_uca_ci;
+extern CHARSET_INFO my_charset_utf16_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_sinhala_uca_ci;
+#endif  /* HAVE_CHARSET_utf16 */
+
+
 #ifdef HAVE_CHARSET_utf8
 extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci;
 extern CHARSET_INFO my_charset_utf8_latvian_uca_ci;
@ -70,6 +117,28 @@ extern CHARSET_INFO my_charset_utf8_general_cs;
 #endif
 #endif

+#ifdef HAVE_CHARSET_utf8mb4
+extern CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_polish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_czech_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_danish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_roman_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_persian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci;
+#endif /* HAVE_CHARSET_utf8mb4 */
+
 #endif /* HAVE_UCA_COLLATIONS */

 my_bool init_compiled_charsets(myf flags __attribute__((unused)))
@ -191,7 +260,91 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
  add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
  add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
 #endif
-#endif
+#endif /* HAVE_CHARSET_utf8 */
+
+
+#ifdef HAVE_CHARSET_utf8mb4
+  add_compiled_collation(&my_charset_utf8mb4_general_ci);
+  add_compiled_collation(&my_charset_utf8mb4_bin);
+#ifdef HAVE_UCA_COLLATIONS
+  add_compiled_collation(&my_charset_utf8mb4_unicode_ci);
+  add_compiled_collation(&my_charset_utf8mb4_icelandic_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_latvian_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_romanian_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_slovenian_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_polish_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_estonian_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_spanish_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_swedish_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_turkish_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_czech_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_danish_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_lithuanian_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_slovak_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_spanish2_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_roman_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_persian_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_esperanto_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_hungarian_uca_ci);
+  add_compiled_collation(&my_charset_utf8mb4_sinhala_uca_ci);
+#endif /* HAVE_UCA_COLLATIONS  */
+#endif /* HAVE_CHARSET_utf8mb4 */
+
+
+#ifdef HAVE_CHARSET_utf16
+  add_compiled_collation(&my_charset_utf16_general_ci);
+  add_compiled_collation(&my_charset_utf16_bin);
+#ifdef HAVE_UCA_COLLATIONS
+  add_compiled_collation(&my_charset_utf16_unicode_ci);
+  add_compiled_collation(&my_charset_utf16_icelandic_uca_ci);
+  add_compiled_collation(&my_charset_utf16_latvian_uca_ci);
+  add_compiled_collation(&my_charset_utf16_romanian_uca_ci);
+  add_compiled_collation(&my_charset_utf16_slovenian_uca_ci);
+  add_compiled_collation(&my_charset_utf16_polish_uca_ci);
+  add_compiled_collation(&my_charset_utf16_estonian_uca_ci);
+  add_compiled_collation(&my_charset_utf16_spanish_uca_ci);
+  add_compiled_collation(&my_charset_utf16_swedish_uca_ci);
+  add_compiled_collation(&my_charset_utf16_turkish_uca_ci);
+  add_compiled_collation(&my_charset_utf16_czech_uca_ci);
+  add_compiled_collation(&my_charset_utf16_danish_uca_ci);
+  add_compiled_collation(&my_charset_utf16_lithuanian_uca_ci);
+  add_compiled_collation(&my_charset_utf16_slovak_uca_ci);
+  add_compiled_collation(&my_charset_utf16_spanish2_uca_ci);
+  add_compiled_collation(&my_charset_utf16_roman_uca_ci);
+  add_compiled_collation(&my_charset_utf16_persian_uca_ci);
+  add_compiled_collation(&my_charset_utf16_esperanto_uca_ci);
+  add_compiled_collation(&my_charset_utf16_hungarian_uca_ci);
+  add_compiled_collation(&my_charset_utf16_sinhala_uca_ci);
+#endif /* HAVE_UCA_COLLATIOINS */
+#endif /* HAVE_CHARSET_utf16 */
+
+
+#ifdef HAVE_CHARSET_utf32
+  add_compiled_collation(&my_charset_utf32_general_ci);
+  add_compiled_collation(&my_charset_utf32_bin);
+#ifdef HAVE_UCA_COLLATIONS
+  add_compiled_collation(&my_charset_utf32_unicode_ci);
+  add_compiled_collation(&my_charset_utf32_icelandic_uca_ci);
+  add_compiled_collation(&my_charset_utf32_latvian_uca_ci);
+  add_compiled_collation(&my_charset_utf32_romanian_uca_ci);
+  add_compiled_collation(&my_charset_utf32_slovenian_uca_ci);
+  add_compiled_collation(&my_charset_utf32_polish_uca_ci);
+  add_compiled_collation(&my_charset_utf32_estonian_uca_ci);
+  add_compiled_collation(&my_charset_utf32_spanish_uca_ci);
+  add_compiled_collation(&my_charset_utf32_swedish_uca_ci);
+  add_compiled_collation(&my_charset_utf32_turkish_uca_ci);
+  add_compiled_collation(&my_charset_utf32_czech_uca_ci);
+  add_compiled_collation(&my_charset_utf32_danish_uca_ci);
+  add_compiled_collation(&my_charset_utf32_lithuanian_uca_ci);
+  add_compiled_collation(&my_charset_utf32_slovak_uca_ci);
+  add_compiled_collation(&my_charset_utf32_spanish2_uca_ci);
+  add_compiled_collation(&my_charset_utf32_roman_uca_ci);
+  add_compiled_collation(&my_charset_utf32_persian_uca_ci);
+  add_compiled_collation(&my_charset_utf32_esperanto_uca_ci);
+  add_compiled_collation(&my_charset_utf32_hungarian_uca_ci);
+  add_compiled_collation(&my_charset_utf32_sinhala_uca_ci);
+#endif /* HAVE_UCA_COLLATIONS */
+#endif /* HAVE_CHARSET_utf32 */

  /* Copy compiled charsets */
  for (cs=compiled_charsets; cs->name; cs++)
--- a/mysys/charset.c
+++ b/mysys/charset.c
@ -252,13 +252,35 @@ static int add_collation(CHARSET_INFO *cs)
        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
 #endif        
      }
-      else if (!strcmp(cs->csname, "utf8"))
+      else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3"))
      {
 #if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
        copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
        newcs->ctype= my_charset_utf8_unicode_ci.ctype;
        if (init_state_maps(newcs))
          return MY_XML_ERROR;
+#endif
+      }
+      else if (!strcmp(cs->csname, "utf8mb4"))
+      {
+#if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
+        copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
+        newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
+        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
+#endif
+      }
+      else if (!strcmp(cs->csname, "utf16"))
+      {
+#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
+        copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
+        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
+#endif
+      }
+      else if (!strcmp(cs->csname, "utf32"))
+      {
+#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
+        copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
+        newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
 #endif
      }
      else
@ -433,17 +455,35 @@ static void init_available_charsets(void)
 }


-uint get_collation_number(const char *name)
+static const char*
+get_collation_name_alias(const char *name, char *buf, size_t bufsize)
 {
-  my_pthread_once(&charsets_initialized, init_available_charsets);
-  return get_collation_number_internal(name);
+  if (!strncasecmp(name, "utf8mb3_", 8))
+  {
+    my_snprintf(buf, bufsize, "utf8_%s", name + 8);
+    return buf;
+  }
+  return NULL;
 }


-uint get_charset_number(const char *charset_name, uint cs_flags)
+uint get_collation_number(const char *name)
+{
+  uint id;
+  char alias[64];
+  my_pthread_once(&charsets_initialized, init_available_charsets);
+  if ((id= get_collation_number_internal(name)))
+    return id;
+  if ((name= get_collation_name_alias(name, alias, sizeof(alias))))
+    return get_collation_number_internal(name);
+  return 0;
+}
+
+
+static uint
+get_charset_number_internal(const char *charset_name, uint cs_flags)
 {
  CHARSET_INFO **cs;
-  my_pthread_once(&charsets_initialized, init_available_charsets);
  
  for (cs= all_charsets;
       cs < all_charsets + array_elements(all_charsets);
@ -457,6 +497,27 @@ uint get_charset_number(const char *charset_name, uint cs_flags)
 }


+static const char*
+get_charset_name_alias(const char *name)
+{
+  if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3"))
+    return "utf8";
+  return NULL;
+}
+
+
+uint get_charset_number(const char *charset_name, uint cs_flags)
+{
+  uint id;
+  my_pthread_once(&charsets_initialized, init_available_charsets);
+  if ((id= get_charset_number_internal(charset_name, cs_flags)))
+    return id;
+  if ((charset_name= get_charset_name_alias(charset_name)))
+    return get_charset_number_internal(charset_name, cs_flags);
+  return 0;
+}
+                  
+
 const char *get_charset_name(uint charset_number)
 {
  CHARSET_INFO *cs;
--- a/sql/field.cc
+++ b/sql/field.cc
@ -1836,7 +1836,9 @@ int Field::store_time(MYSQL_TIME *ltime, timestamp_type type_arg)
  ASSERT_COLUMN_MARKED_FOR_WRITE;
  char buff[MAX_DATE_STRING_REP_LENGTH];
  uint length= (uint) my_TIME_to_str(ltime, buff);
-  return store(buff, length, &my_charset_bin);
+  return store(buff, length,
+               (charset()->state & MY_CS_NONASCII) ?
+               &my_charset_latin1 : &my_charset_bin);
 }


--- a/sql/item.cc
+++ b/sql/item.cc
@ -854,7 +854,7 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
    cnvitem->max_length= cnvitem->str_value.numchars() * tocs->mbmaxlen;
    return cnvitem;
  }
-  return NULL;
+  return Item::safe_charset_converter(tocs);
 }


@ -1436,7 +1436,12 @@ left_is_superset(DTCollation *left, DTCollation *right)
  if (left->collation->state & MY_CS_UNICODE &&
      (left->derivation < right->derivation ||
       (left->derivation == right->derivation &&
-        !(right->collation->state & MY_CS_UNICODE))))
+        (!(right->collation->state & MY_CS_UNICODE) ||
+         /* The code below makes 4-byte utf8 a superset over 3-byte utf8 */
+         (left->collation->state & MY_CS_UNICODE_SUPPLEMENT &&
+          !(right->collation->state & MY_CS_UNICODE_SUPPLEMENT) &&
+          left->collation->mbmaxlen > right->collation->mbmaxlen &&
+          left->collation->mbminlen == right->collation->mbminlen)))))
    return TRUE;
  /* Allow convert from ASCII */
  if (right->repertoire == MY_REPERTOIRE_ASCII &&
@ -1695,7 +1700,7 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname,
  {
    Item* conv;
    uint32 dummy_offset;
-    if (!String::needs_conversion(0, (*arg)->collation.collation,
+    if (!String::needs_conversion(1, (*arg)->collation.collation,
                                  coll.collation,
                                  &dummy_offset))
      continue;
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@ -2371,17 +2371,27 @@ String *Item_func_char::val_str(String *str)
    int32 num=(int32) args[i]->val_int();
    if (!args[i]->null_value)
    {
-      char char_num= (char) num;
-      if (num&0xFF000000L) {
-        str->append((char)(num>>24));
-        goto b2;
-      } else if (num&0xFF0000L) {
-    b2:        str->append((char)(num>>16));
-        goto b1;
-      } else if (num&0xFF00L) {
-    b1:        str->append((char)(num>>8));
+      char tmp[4];
+      if (num & 0xFF000000L)
+      {
+        mi_int4store(tmp, num);
+        str->append(tmp, 4, &my_charset_bin);
+      }
+      else if (num & 0xFF0000L)
+      {
+        mi_int3store(tmp, num);
+        str->append(tmp, 3, &my_charset_bin);
+      }
+      else if (num & 0xFF00L)
+      {
+        mi_int2store(tmp, num);
+        str->append(tmp, 2, &my_charset_bin);
+      }
+      else
+      {
+        tmp[0]= (char) num;
+        str->append(tmp, 1, &my_charset_bin);
      }
-      str->append(&char_num, 1);
    }
  }
  str->realloc(str->length());			// Add end 0 (for Purify)
@ -2769,7 +2779,8 @@ String *Item_func_conv_charset::val_str(String *str)
 void Item_func_conv_charset::fix_length_and_dec()
 {
  collation.set(conv_charset, DERIVATION_IMPLICIT);
-  max_length = args[0]->max_length*conv_charset->mbmaxlen;
+  max_length = args[0]->max_length / args[0]->collation.collation->mbmaxlen *
+               conv_charset->mbmaxlen;
 }

 void Item_func_conv_charset::print(String *str, enum_query_type query_type)
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@ -412,11 +412,25 @@ bool String::append(const char *s)

 bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
 {
-  uint32 dummy_offset;
+  uint32 offset;
  
-  if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
+  if (needs_conversion(arg_length, cs, str_charset, &offset))
  {
-    uint32 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
+    uint32 add_length;
+    if ((cs == &my_charset_bin) && offset)
+    {
+      DBUG_ASSERT(str_charset->mbminlen > offset);
+      offset= str_charset->mbminlen - offset; // How many characters to pad
+      add_length= arg_length + offset;
+      if (realloc(str_length + add_length))
+        return TRUE;
+      bzero((char*) Ptr + str_length, offset);
+      memcpy(Ptr + str_length + offset, s, arg_length);
+      str_length+= add_length;
+      return FALSE;
+    }
+
+    add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
    uint dummy_errors;
    if (realloc(str_length + add_length)) 
      return TRUE;
@ -966,6 +980,24 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
        uint pad_length= to_cs->mbminlen - from_offset;
        bzero(to, pad_length);
        memmove(to + pad_length, from, from_offset);
+        /*
+          In some cases left zero-padding can create an incorrect character.
+          For example:
+            INSERT INTO t1 (utf32_column) VALUES (0x110000);
+          We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
+          The valid characters range is limited to 0x00000000..0x0010FFFF.
+          
+          Make sure we didn't pad to an incorrect character.
+        */
+        if (to_cs->cset->well_formed_len(to_cs,
+                                         to, to + to_cs->mbminlen, 1,
+                                         &well_formed_error) !=
+                                         to_cs->mbminlen)
+        {
+          *from_end_pos= *well_formed_error_pos= from;
+          *cannot_convert_error_pos= NULL;
+          return 0;
+        }
        nchars--;
        from+= from_offset;
        from_length-= from_offset;
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@ -2776,7 +2776,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
                                               sql_field->interval_list);
        List_iterator<String> int_it(sql_field->interval_list);
        String conv, *tmp;
-        char comma_buf[2];
+        char comma_buf[4]; /* 4 bytes for utf32 */
        int comma_length= cs->cset->wc_mb(cs, ',', (uchar*) comma_buf,
                                          (uchar*) comma_buf + 
                                          sizeof(comma_buf));
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@ -467,10 +467,11 @@ uint my_instr_mb(CHARSET_INFO *cs,

 /* BINARY collations handlers for MB charsets */

-static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
-				const uchar *s, size_t slen,
-				const uchar *t, size_t tlen,
-                                my_bool t_is_prefix)
+int
+my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
+                    const uchar *s, size_t slen,
+                    const uchar *t, size_t tlen,
+                    my_bool t_is_prefix)
 {
  size_t len=min(slen,tlen);
  int cmp= memcmp(s,t,len);
@ -503,10 +504,11 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
    0 if strings are equal
 */

-static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
-                                 const uchar *a, size_t a_length, 
-                                 const uchar *b, size_t b_length,
-                                 my_bool diff_if_only_endspace_difference)
+int
+my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
+                      const uchar *a, size_t a_length, 
+                      const uchar *b, size_t b_length,
+                      my_bool diff_if_only_endspace_difference)
 {
  const uchar *end;
  size_t length;
@ -562,14 +564,17 @@ static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
 }


-static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
-		      const char *s, const char *t)
+int
+my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
+                     const char *s, const char *t)
 {
  return strcmp(s,t);
 }

-static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
-		      const uchar *key, size_t len,ulong *nr1, ulong *nr2)
+
+void
+my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
+                    const uchar *key, size_t len,ulong *nr1, ulong *nr2)
 {
  const uchar *pos = key;
  
@ -787,10 +792,11 @@ fill_max_and_min:
 }


-static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
-		  const char *str,const char *str_end,
-		  const char *wildstr,const char *wildend,
-		  int escape, int w_one, int w_many)
+int
+my_wildcmp_mb_bin(CHARSET_INFO *cs,
+                  const char *str,const char *str_end,
+                  const char *wildstr,const char *wildend,
+                  int escape, int w_one, int w_many)
 {
  int result= -1;				/* Not found, using wildcards */

--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c