Backporting WL#1213
config/ac-macros/character_sets.m4: - Adding configure definitions for utf8mb4, utf16, utf32 include/config-win.h: - Enabling utf8mb4, utf16, utf32 in Windows build include/m_ctype.h: - Adding new flags - Adding new shared functions prototypes mysql-test/include/ctype_datetime.inc: - Adding test to check that datetime functions work with "real" multibyte character sets. mysql-test/include/ctype_like.inc: - Adding LIKE tests mysql-test/include/have_utf16.inc: New file mysql-test/include/have_utf32.inc: New file mysql-test/include/have_utf8mb4.inc: New file mysql-test/r/ctype_ldml.result: - Adding tests for utf8mb4, utf16, utf32 mysql-test/r/ctype_many.result: - Adding tests to check superset/subset relations between all Unicode character sets. mysql-test/r/ctype_utf16.result: New file mysql-test/r/ctype_utf16_uca.result: New file mysql-test/r/ctype_utf32.result: New file mysql-test/r/ctype_utf32_uca.result: New file mysql-test/r/ctype_utf8.result: - Adding tests for utf8mn3 alias mysql-test/r/ctype_utf8mb4.result: - Adding tests for utf8mb4 mysql-test/r/have_utf16.require: New file mysql-test/r/have_utf32.require: New file mysql-test/r/have_utf8mb4.require: New file mysql-test/std_data/Index.xml: - Adding tests for loadable utf8m4, utf16, utf32 collations mysql-test/suite/sys_vars/r/character_set_client_basic.result: - Adding tests for utf16, utf32. - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_connection_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_database_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_results_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/t/character_set_client_basic.test: - Adding tests for new character sets mysql-test/suite/sys_vars/t/character_set_connection_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_database_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_results_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/t/ctype_ldml.test: - Adding tests for dynamic utf8mb4, utf16, utf32 collations mysql-test/t/ctype_many.test: - Adding tests to check superset/subset relations between all Unicode character sets mysql-test/t/ctype_utf16.test: New file mysql-test/t/ctype_utf16_uca.test: New file mysql-test/t/ctype_utf32.test: New file mysql-test/t/ctype_utf32_uca.test: New file mysql-test/t/ctype_utf8.test: - Adding tests for utf8mb4 alias mysql-test/t/ctype_utf8mb4.test: New file mysys/charset-def.c: - Adding initialization of utf8mb4, utf16, utf32 built-int collations mysys/charset.c: - Adding initialization of utf8mb4, utf16, utf32 dynamic collations sql/field.cc: - Fixing "truncated" error with datetime functions: Force conversion in case of non-ascii character sets. sql/item.cc: - Adding superset/subset relation check for utf8mb4/utf8 sql/item_strfunc.cc: - Fixing a problem with CHAR(x USING utf32) sql/sql_string.cc: - Fixing problems with zero padding for UTF32 sql/sql_table.cc: - Fixing buffer size, to make utf32 comma fit. strings/ctype-mb.c: - Making handlers for multi-byte binary collations public strings/ctype-uca.c: - Adding definitions for utf8mb4, utf16, utf32 UCA collations strings/ctype-ucs2.c: - Adding functions which are shared between ucs2, utf16, utf32 - Ading utf16 implementation - Adding utf32 implementation strings/ctype-utf8.c: - Adding functions shared between utf8 and utf8mb4 - Adding implementation of utf8mb4
This commit is contained in:
parent
d2af6c43c0
commit
8994fad85d
@ -13,11 +13,11 @@ define(CHARSETS_AVAILABLE1,armscii8 ascii big5 cp1250 cp1251 cp1256 cp1257)
|
||||
define(CHARSETS_AVAILABLE2,cp850 cp852 cp866 cp932 dec8 eucjpms euckr gb2312 gbk geostd8)
|
||||
define(CHARSETS_AVAILABLE3,greek hebrew hp8 keybcs2 koi8r koi8u)
|
||||
define(CHARSETS_AVAILABLE4,latin1 latin2 latin5 latin7 macce macroman)
|
||||
define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8)
|
||||
define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32)
|
||||
|
||||
DEFAULT_CHARSET=latin1
|
||||
CHARSETS_AVAILABLE="CHARSETS_AVAILABLE0 CHARSETS_AVAILABLE1 CHARSETS_AVAILABLE2 CHARSETS_AVAILABLE3 CHARSETS_AVAILABLE4 CHARSETS_AVAILABLE5"
|
||||
CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8"
|
||||
CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32"
|
||||
|
||||
AC_DIVERT_POP
|
||||
|
||||
@ -50,7 +50,7 @@ AC_ARG_WITH(extra-charsets,
|
||||
|
||||
AC_MSG_CHECKING("character sets")
|
||||
|
||||
CHARSETS="$default_charset latin1 utf8"
|
||||
CHARSETS="$default_charset latin1 utf8mb4 utf8"
|
||||
|
||||
if test "$extra_charsets" = no; then
|
||||
CHARSETS="$CHARSETS"
|
||||
@ -195,8 +195,23 @@ do
|
||||
AC_DEFINE([USE_MB], [1], [Use multi-byte character routines])
|
||||
AC_DEFINE(USE_MB_IDENT, 1)
|
||||
;;
|
||||
utf8mb4)
|
||||
AC_DEFINE(HAVE_CHARSET_utf8mb4, 1, [Define to enable utf8mb4])
|
||||
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
|
||||
AC_DEFINE(USE_MB_IDENT, 1)
|
||||
;;
|
||||
utf8)
|
||||
AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable ut8])
|
||||
AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable utf8])
|
||||
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
|
||||
AC_DEFINE(USE_MB_IDENT, 1)
|
||||
;;
|
||||
utf16)
|
||||
AC_DEFINE(HAVE_CHARSET_utf16, 1, [Define to enable utf16])
|
||||
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
|
||||
AC_DEFINE(USE_MB_IDENT, 1)
|
||||
;;
|
||||
utf32)
|
||||
AC_DEFINE(HAVE_CHARSET_utf32, 1, [Define to enable utf32])
|
||||
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
|
||||
AC_DEFINE(USE_MB_IDENT, 1)
|
||||
;;
|
||||
@ -381,6 +396,48 @@ case $default_charset in
|
||||
fi
|
||||
default_charset_collations="$UTFC"
|
||||
;;
|
||||
utf8mb4)
|
||||
default_charset_default_collation="utf8mb4_general_ci"
|
||||
define(UTFC1, utf8mb4_general_ci utf8mb4_bin)
|
||||
define(UTFC2, utf8mb4_czech_ci utf8mb4_danish_ci)
|
||||
define(UTFC3, utf8mb4_esperanto_ci utf8mb4_estonian_ci utf8mb4_hungarian_ci)
|
||||
define(UTFC4, utf8mb4_icelandic_ci utf8mb4_latvian_ci utf8mb4_lithuanian_ci)
|
||||
define(UTFC5, utf8mb4_persian_ci utf8mb4_polish_ci utf8mb4_romanian_ci)
|
||||
define(UTFC6, utf8mb4_sinhala_ci utf8mb4_slovak_ci utf8mb4_slovenian_ci)
|
||||
define(UTFC7, utf8mb4_spanish2_ci utf8mb4_spanish_ci)
|
||||
define(UTFC8, utf8mb4_swedish_ci utf8mb4_turkish_ci)
|
||||
define(UTFC9, utf8mb4_unicode_ci)
|
||||
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
|
||||
default_charset_collations="$UTFC"
|
||||
;;
|
||||
utf16)
|
||||
default_charset_default_collation="utf16_general_ci"
|
||||
define(UTFC1, utf16_general_ci utf16_bin)
|
||||
define(UTFC2, utf16_czech_ci utf16_danish_ci)
|
||||
define(UTFC3, utf16_esperanto_ci utf16_estonian_ci utf16_hungarian_ci)
|
||||
define(UTFC4, utf16_icelandic_ci utf16_latvian_ci utf16_lithuanian_ci)
|
||||
define(UTFC5, utf16_persian_ci utf16_polish_ci utf16_romanian_ci)
|
||||
define(UTFC6, utf16_sinhala_ci utf16_slovak_ci utf16_slovenian_ci)
|
||||
define(UTFC7, utf16_spanish2_ci utf16_spanish_ci)
|
||||
define(UTFC8, utf16_swedish_ci utf16_turkish_ci)
|
||||
define(UTFC9, utf16_unicode_ci)
|
||||
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
|
||||
default_charset_collations="$UTFC"
|
||||
;;
|
||||
utf32)
|
||||
default_charset_default_collation="utf32_general_ci"
|
||||
define(UTFC1, utf32_general_ci utf32_bin)
|
||||
define(UTFC2, utf32_czech_ci utf32_danish_ci)
|
||||
define(UTFC3, utf32_esperanto_ci utf32_estonian_ci utf32_hungarian_ci)
|
||||
define(UTFC4, utf32_icelandic_ci utf32_latvian_ci utf32_lithuanian_ci)
|
||||
define(UTFC5, utf32_persian_ci utf32_polish_ci utf32_romanian_ci)
|
||||
define(UTFC6, utf32_sinhala_ci utf32_slovak_ci utf32_slovenian_ci)
|
||||
define(UTFC7, utf32_spanish2_ci utf32_spanish_ci)
|
||||
define(UTFC8, utf32_swedish_ci utf32_turkish_ci)
|
||||
define(UTFC9, utf32_unicode_ci)
|
||||
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
|
||||
default_charset_collations="$UTFC"
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([Charset $cs not available. (Available are: $CHARSETS_AVAILABLE).
|
||||
See the Installation chapter in the Reference Manual.])
|
||||
|
@ -432,6 +432,9 @@ inline ulonglong double2ulonglong(double d)
|
||||
#define HAVE_CHARSET_ucs2 1
|
||||
#define HAVE_CHARSET_ujis 1
|
||||
#define HAVE_CHARSET_utf8 1
|
||||
#define HAVE_CHARSET_utf8mb4 1
|
||||
#define HAVE_CHARSET_utf16 1
|
||||
#define HAVE_CHARSET_utf32 1
|
||||
|
||||
#define HAVE_UCA_COLLATIONS 1
|
||||
#define HAVE_BOOL 1
|
||||
|
@ -98,13 +98,14 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
|
||||
#define MY_CS_BINSORT 16 /* if binary sort order */
|
||||
#define MY_CS_PRIMARY 32 /* if primary collation */
|
||||
#define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
|
||||
#define MY_CS_UNICODE 128 /* is a charset is full unicode */
|
||||
#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */
|
||||
#define MY_CS_READY 256 /* if a charset is initialized */
|
||||
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
|
||||
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
|
||||
#define MY_CS_HIDDEN 2048 /* don't display in SHOW */
|
||||
#define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */
|
||||
#define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
|
||||
#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
|
||||
#define MY_CHARSET_UNDEFINED 0
|
||||
|
||||
/* Character repertoire flags */
|
||||
@ -112,7 +113,6 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
|
||||
#define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
|
||||
#define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */
|
||||
|
||||
|
||||
typedef struct my_uni_idx_st
|
||||
{
|
||||
uint16 from;
|
||||
@ -304,10 +304,14 @@ typedef struct charset_info_st
|
||||
|
||||
|
||||
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin;
|
||||
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
|
||||
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
|
||||
|
||||
extern CHARSET_INFO my_charset_big5_chinese_ci;
|
||||
extern CHARSET_INFO my_charset_big5_bin;
|
||||
extern CHARSET_INFO my_charset_cp932_japanese_ci;
|
||||
extern CHARSET_INFO my_charset_cp932_bin;
|
||||
extern CHARSET_INFO my_charset_cp1250_czech_ci;
|
||||
extern CHARSET_INFO my_charset_eucjpms_japanese_ci;
|
||||
extern CHARSET_INFO my_charset_eucjpms_bin;
|
||||
extern CHARSET_INFO my_charset_euckr_korean_ci;
|
||||
@ -316,7 +320,6 @@ extern CHARSET_INFO my_charset_gb2312_chinese_ci;
|
||||
extern CHARSET_INFO my_charset_gb2312_bin;
|
||||
extern CHARSET_INFO my_charset_gbk_chinese_ci;
|
||||
extern CHARSET_INFO my_charset_gbk_bin;
|
||||
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
|
||||
extern CHARSET_INFO my_charset_latin1_german2_ci;
|
||||
extern CHARSET_INFO my_charset_latin1_bin;
|
||||
extern CHARSET_INFO my_charset_latin2_czech_ci;
|
||||
@ -329,11 +332,22 @@ extern CHARSET_INFO my_charset_ucs2_bin;
|
||||
extern CHARSET_INFO my_charset_ucs2_unicode_ci;
|
||||
extern CHARSET_INFO my_charset_ujis_japanese_ci;
|
||||
extern CHARSET_INFO my_charset_ujis_bin;
|
||||
extern CHARSET_INFO my_charset_utf16_bin;
|
||||
extern CHARSET_INFO my_charset_utf16_general_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_unicode_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_bin;
|
||||
extern CHARSET_INFO my_charset_utf32_general_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_unicode_ci;
|
||||
|
||||
extern CHARSET_INFO my_charset_utf8_general_ci;
|
||||
extern CHARSET_INFO my_charset_utf8_unicode_ci;
|
||||
extern CHARSET_INFO my_charset_utf8_bin;
|
||||
extern CHARSET_INFO my_charset_cp1250_czech_ci;
|
||||
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_bin;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_general_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
|
||||
#define MY_UTF8MB3 "utf8"
|
||||
#define MY_UTF8MB4 "utf8mb4"
|
||||
|
||||
|
||||
/* declarations for simple charsets */
|
||||
extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
|
||||
@ -430,6 +444,19 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
|
||||
my_bool my_like_range_utf16(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
|
||||
my_bool my_like_range_utf32(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
|
||||
int my_wildcmp_8bit(CHARSET_INFO *,
|
||||
const char *str,const char *str_end,
|
||||
@ -480,6 +507,31 @@ uint my_instr_mb(struct charset_info_st *,
|
||||
const char *s, size_t s_length,
|
||||
my_match_t *match, uint nmatch);
|
||||
|
||||
int my_strnncoll_mb_bin(CHARSET_INFO * cs,
|
||||
const uchar *s, size_t slen,
|
||||
const uchar *t, size_t tlen,
|
||||
my_bool t_is_prefix);
|
||||
|
||||
int my_strnncollsp_mb_bin(CHARSET_INFO *cs,
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool diff_if_only_endspace_difference);
|
||||
|
||||
int my_wildcmp_mb_bin(CHARSET_INFO *cs,
|
||||
const char *str,const char *str_end,
|
||||
const char *wildstr,const char *wildend,
|
||||
int escape, int w_one, int w_many);
|
||||
|
||||
int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
|
||||
const char *s, const char *t);
|
||||
|
||||
void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *key, size_t len,ulong *nr1, ulong *nr2);
|
||||
|
||||
size_t my_strnxfrm_unicode(CHARSET_INFO *,
|
||||
uchar *dst, size_t dstlen,
|
||||
const uchar *src, size_t srclen);
|
||||
|
||||
int my_wildcmp_unicode(CHARSET_INFO *cs,
|
||||
const char *str, const char *str_end,
|
||||
const char *wildstr, const char *wildend,
|
||||
|
11
mysql-test/include/ctype_datetime.inc
Normal file
11
mysql-test/include/ctype_datetime.inc
Normal file
@ -0,0 +1,11 @@
|
||||
#
|
||||
# Bug#32390 Character sets: casting utf32 to/from date doesn't work
|
||||
#
|
||||
CREATE TABLE t1 AS SELECT repeat('a',20) AS s1 LIMIT 0;
|
||||
SET timestamp=1216359724;
|
||||
INSERT INTO t1 VALUES (current_date);
|
||||
INSERT INTO t1 VALUES (current_time);
|
||||
INSERT INTO t1 VALUES (current_timestamp);
|
||||
SELECT s1, hex(s1) FROM t1;
|
||||
DROP TABLE t1;
|
||||
SET timestamp=0;
|
50
mysql-test/include/ctype_like.inc
Normal file
50
mysql-test/include/ctype_like.inc
Normal file
@ -0,0 +1,50 @@
|
||||
select @@collation_connection;
|
||||
|
||||
#
|
||||
# Create a table with a nullable varchar(10) column
|
||||
# using currect character_set_connection.
|
||||
create table t1 as select repeat(' ',10) as a union select null;
|
||||
alter table t1 add key(a);
|
||||
show create table t1;
|
||||
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
|
||||
explain select * from t1 where a like 'abc%';
|
||||
explain select * from t1 where a like concat('abc','%');
|
||||
select * from t1 where a like "abc%";
|
||||
select * from t1 where a like concat("abc","%");
|
||||
select * from t1 where a like "ABC%";
|
||||
select * from t1 where a like "test%";
|
||||
select * from t1 where a like "te_t";
|
||||
select * from t1 where a like "%a%";
|
||||
select * from t1 where a like "%abcd%";
|
||||
select * from t1 where a like "%abc\d%";
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug #2619 ucs2 LIKE comparison fails in some cases
|
||||
#
|
||||
|
||||
select 'AA' like 'AA';
|
||||
select 'AA' like 'A%A';
|
||||
select 'AA' like 'A%%A';
|
||||
select 'AA' like 'AA%';
|
||||
select 'AA' like '%AA%';
|
||||
select 'AA' like '%A';
|
||||
select 'AA' like '%AA';
|
||||
select 'AA' like 'A%A%';
|
||||
select 'AA' like '_%_%';
|
||||
select 'AA' like '%A%A';
|
||||
select 'AAA'like 'A%A%A';
|
||||
|
||||
select 'AZ' like 'AZ';
|
||||
select 'AZ' like 'A%Z';
|
||||
select 'AZ' like 'A%%Z';
|
||||
select 'AZ' like 'AZ%';
|
||||
select 'AZ' like '%AZ%';
|
||||
select 'AZ' like '%Z';
|
||||
select 'AZ' like '%AZ';
|
||||
select 'AZ' like 'A%Z%';
|
||||
select 'AZ' like '_%_%';
|
||||
select 'AZ' like '%A%Z';
|
||||
select 'AZ' like 'A_';
|
||||
select 'AZ' like '_Z';
|
||||
select 'AMZ'like 'A%M%Z';
|
4
mysql-test/include/have_utf16.inc
Normal file
4
mysql-test/include/have_utf16.inc
Normal file
@ -0,0 +1,4 @@
|
||||
-- require r/have_utf16.require
|
||||
disable_query_log;
|
||||
show collation like 'utf16_general_ci';
|
||||
enable_query_log;
|
4
mysql-test/include/have_utf32.inc
Normal file
4
mysql-test/include/have_utf32.inc
Normal file
@ -0,0 +1,4 @@
|
||||
-- require r/have_utf32.require
|
||||
disable_query_log;
|
||||
show collation like 'utf32_general_ci';
|
||||
enable_query_log;
|
7
mysql-test/include/have_utf8mb4.inc
Normal file
7
mysql-test/include/have_utf8mb4.inc
Normal file
@ -0,0 +1,7 @@
|
||||
--require r/have_utf8mb4.require
|
||||
|
||||
--disable_query_log
|
||||
|
||||
SHOW COLLATION LIKE 'utf8mb4_general_ci';
|
||||
|
||||
--enable_query_log
|
@ -53,6 +53,33 @@ select * from t1 where c1='b';
|
||||
c1
|
||||
a
|
||||
drop table t1;
|
||||
show collation like 'utf8mb4_test_ci';
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
utf8mb4_test_ci utf8mb4 326 8
|
||||
create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci);
|
||||
insert into t1 values ('a');
|
||||
select * from t1 where c1='b';
|
||||
c1
|
||||
a
|
||||
drop table t1;
|
||||
show collation like 'utf16_test_ci';
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
utf16_test_ci utf16 327 8
|
||||
create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
|
||||
insert into t1 values ('a');
|
||||
select * from t1 where c1='b';
|
||||
c1
|
||||
a
|
||||
drop table t1;
|
||||
show collation like 'utf32_test_ci';
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
utf32_test_ci utf32 391 8
|
||||
create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
|
||||
insert into t1 values ('a');
|
||||
select * from t1 where c1='b';
|
||||
c1
|
||||
a
|
||||
drop table t1;
|
||||
CREATE TABLE t1 (
|
||||
col1 varchar(100) character set utf8 collate utf8_test_ci
|
||||
);
|
||||
@ -373,16 +400,22 @@ select "foo" = "foo " collate latin1_test;
|
||||
The following tests check that two-byte collation IDs work
|
||||
select * from information_schema.collations where id>256 order by id;
|
||||
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
|
||||
utf8mb4_test_ci utf8mb4 326 8
|
||||
utf16_test_ci utf16 327 8
|
||||
utf8_phone_ci utf8 352 8
|
||||
utf8_test_ci utf8 353 8
|
||||
ucs2_test_ci ucs2 358 8
|
||||
ucs2_vn_ci ucs2 359 8
|
||||
utf32_test_ci utf32 391 8
|
||||
utf8_maxuserid_ci utf8 2047 8
|
||||
show collation like '%test%';
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
latin1_test latin1 99 Yes 1
|
||||
utf8_test_ci utf8 353 8
|
||||
ucs2_test_ci ucs2 358 8
|
||||
utf8mb4_test_ci utf8mb4 326 8
|
||||
utf16_test_ci utf16 327 8
|
||||
utf32_test_ci utf32 391 8
|
||||
show collation like 'ucs2_vn_ci';
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
ucs2_vn_ci ucs2 359 8
|
||||
|
@ -1683,3 +1683,59 @@ ARMENIAN CAPIT DA 2
|
||||
ARMENIAN CAPIT ECH 2
|
||||
ARMENIAN CAPIT ZA 2
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
|
||||
# Testing that only utf8mb4 is superset for utf8
|
||||
# No other Unicode character set pairs have superset/subset relations
|
||||
#
|
||||
CREATE TABLE t1 (
|
||||
utf8 CHAR CHARACTER SET utf8,
|
||||
utf8mb4 CHAR CHARACTER SET utf8mb4,
|
||||
ucs2 CHAR CHARACTER SET ucs2,
|
||||
utf16 CHAR CHARACTER SET utf16,
|
||||
utf32 CHAR CHARACTER SET utf32
|
||||
);
|
||||
INSERT INTO t1 VALUES ('','','','','');
|
||||
SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1;
|
||||
CHARSET(CONCAT(utf8, utf8mb4))
|
||||
utf8mb4
|
||||
SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1;
|
||||
CHARSET(CONCAT(utf8mb4, utf8))
|
||||
utf8mb4
|
||||
SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
|
||||
SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
|
||||
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
|
||||
DROP TABLE t1;
|
||||
|
1038
mysql-test/r/ctype_utf16.result
Normal file
1038
mysql-test/r/ctype_utf16.result
Normal file
File diff suppressed because it is too large
Load Diff
2373
mysql-test/r/ctype_utf16_uca.result
Normal file
2373
mysql-test/r/ctype_utf16_uca.result
Normal file
File diff suppressed because it is too large
Load Diff
1052
mysql-test/r/ctype_utf32.result
Normal file
1052
mysql-test/r/ctype_utf32.result
Normal file
File diff suppressed because it is too large
Load Diff
2373
mysql-test/r/ctype_utf32_uca.result
Normal file
2373
mysql-test/r/ctype_utf32_uca.result
Normal file
File diff suppressed because it is too large
Load Diff
@ -1899,6 +1899,20 @@ CONVERT(a, CHAR) CONVERT(b, CHAR)
|
||||
DROP TABLE t1;
|
||||
End of 5.0 tests
|
||||
Start of 5.4 tests
|
||||
SET NAMES utf8mb3;
|
||||
SHOW VARIABLES LIKE 'character_set_results%';
|
||||
Variable_name Value
|
||||
character_set_results utf8
|
||||
CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin);
|
||||
SHOW CREATE TABLE t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`a` char(1) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
DROP TABLE t1;
|
||||
SELECT _utf8mb3'test';
|
||||
test
|
||||
test
|
||||
CREATE TABLE t1 (
|
||||
clipid INT NOT NULL,
|
||||
Tape TINYTEXT,
|
||||
|
2250
mysql-test/r/ctype_utf8mb4.result
Normal file
2250
mysql-test/r/ctype_utf8mb4.result
Normal file
File diff suppressed because it is too large
Load Diff
2
mysql-test/r/have_utf16.require
Normal file
2
mysql-test/r/have_utf16.require
Normal file
@ -0,0 +1,2 @@
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
2
mysql-test/r/have_utf32.require
Normal file
2
mysql-test/r/have_utf32.require
Normal file
@ -0,0 +1,2 @@
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
utf32_general_ci utf32 60 Yes Yes 1
|
2
mysql-test/r/have_utf8mb4.require
Normal file
2
mysql-test/r/have_utf8mb4.require
Normal file
@ -0,0 +1,2 @@
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
|
@ -33,6 +33,36 @@
|
||||
</collation>
|
||||
</charset>
|
||||
|
||||
|
||||
<charset name="utf8mb4">
|
||||
<collation name="utf8mb4_test_ci" id="326">
|
||||
<rules>
|
||||
<reset>a</reset>
|
||||
<s>b</s>
|
||||
</rules>
|
||||
</collation>
|
||||
</charset>
|
||||
|
||||
<charset name="utf16">
|
||||
<collation name="utf16_test_ci" id="327">
|
||||
<rules>
|
||||
<reset>a</reset>
|
||||
<s>b</s>
|
||||
</rules>
|
||||
</collation>
|
||||
</charset>
|
||||
|
||||
|
||||
<charset name="utf32">
|
||||
<collation name="utf32_test_ci" id="391">
|
||||
<rules>
|
||||
<reset>a</reset>
|
||||
<s>b</s>
|
||||
</rules>
|
||||
</collation>
|
||||
</charset>
|
||||
|
||||
|
||||
<charset name="ucs2">
|
||||
<collation name="ucs2_test_ci" id="358">
|
||||
<rules>
|
||||
|
@ -162,8 +162,16 @@ SET @@character_set_client = utf8;
|
||||
SELECT @@character_set_client;
|
||||
@@character_set_client
|
||||
utf8
|
||||
SET @@character_set_client = utf8mb4;
|
||||
SELECT @@character_set_client;
|
||||
@@character_set_client
|
||||
utf8mb4
|
||||
SET @@character_set_client = ucs2;
|
||||
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'ucs2'
|
||||
SET @@character_set_client = utf16;
|
||||
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf16'
|
||||
SET @@character_set_client = utf32;
|
||||
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf32'
|
||||
SET @@character_set_client = cp866;
|
||||
SELECT @@character_set_client;
|
||||
@@character_set_client
|
||||
@ -422,7 +430,7 @@ ERROR 42000: Unknown character set: '100'
|
||||
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
|
||||
SELECT @total_charset;
|
||||
@total_charset
|
||||
36
|
||||
39
|
||||
'#--------------------FN_DYNVARS_010_10-------------------------#'
|
||||
SET @@character_set_client = abc;
|
||||
ERROR 42000: Unknown character set: 'abc'
|
||||
|
@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
|
||||
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
|
||||
SELECT @total_charset;
|
||||
@total_charset
|
||||
36
|
||||
39
|
||||
'#--------------------FN_DYNVARS_011_10-------------------------#'
|
||||
SET @@character_set_connection = abc;
|
||||
ERROR 42000: Unknown character set: 'abc'
|
||||
|
@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
|
||||
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
|
||||
SELECT @total_charset;
|
||||
@total_charset
|
||||
36
|
||||
39
|
||||
'#--------------------FN_DYNVARS_012_10-------------------------#'
|
||||
SET @@character_set_database = "grek";
|
||||
ERROR 42000: Unknown character set: 'grek'
|
||||
|
@ -402,7 +402,7 @@ ERROR 42000: Unknown character set: '100'
|
||||
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
|
||||
SELECT @total_charset;
|
||||
@total_charset
|
||||
36
|
||||
39
|
||||
'#--------------------FN_DYNVARS_008_10-------------------------#'
|
||||
SET @@character_set_filesystem = abc;
|
||||
ERROR 42000: Unknown character set: 'abc'
|
||||
|
Binary file not shown.
@ -27,6 +27,9 @@
|
||||
--source include/have_sjis.inc
|
||||
--source include/have_utf8.inc
|
||||
--source include/have_ucs2.inc
|
||||
--source include/have_utf8mb4.inc
|
||||
--source include/have_utf16.inc
|
||||
--source include/have_utf32.inc
|
||||
|
||||
--source include/load_sysvars.inc
|
||||
###################################################
|
||||
@ -163,9 +166,15 @@ SET @@character_set_client = armscii8;
|
||||
SELECT @@character_set_client;
|
||||
SET @@character_set_client = utf8;
|
||||
SELECT @@character_set_client;
|
||||
SET @@character_set_client = utf8mb4;
|
||||
SELECT @@character_set_client;
|
||||
|
||||
--error ER_WRONG_VALUE_FOR_VAR
|
||||
SET @@character_set_client = ucs2;
|
||||
--error ER_WRONG_VALUE_FOR_VAR
|
||||
SET @@character_set_client = utf16;
|
||||
--error ER_WRONG_VALUE_FOR_VAR
|
||||
SET @@character_set_client = utf32;
|
||||
|
||||
SET @@character_set_client = cp866;
|
||||
SELECT @@character_set_client;
|
||||
|
@ -27,6 +27,9 @@
|
||||
--source include/have_sjis.inc
|
||||
--source include/have_utf8.inc
|
||||
--source include/have_ucs2.inc
|
||||
--source include/have_utf8mb4.inc
|
||||
--source include/have_utf16.inc
|
||||
--source include/have_utf32.inc
|
||||
|
||||
--source include/load_sysvars.inc
|
||||
###################################################
|
||||
|
@ -27,6 +27,9 @@
|
||||
--source include/have_sjis.inc
|
||||
--source include/have_utf8.inc
|
||||
--source include/have_ucs2.inc
|
||||
--source include/have_utf8mb4.inc
|
||||
--source include/have_utf16.inc
|
||||
--source include/have_utf32.inc
|
||||
|
||||
--source include/load_sysvars.inc
|
||||
###################################################
|
||||
|
@ -27,6 +27,9 @@
|
||||
--source include/have_sjis.inc
|
||||
--source include/have_utf8.inc
|
||||
--source include/have_ucs2.inc
|
||||
--source include/have_utf8mb4.inc
|
||||
--source include/have_utf16.inc
|
||||
--source include/have_utf32.inc
|
||||
|
||||
--source include/load_sysvars.inc
|
||||
|
||||
|
@ -27,6 +27,9 @@
|
||||
--source include/have_sjis.inc
|
||||
--source include/have_utf8.inc
|
||||
--source include/have_ucs2.inc
|
||||
--source include/have_utf8mb4.inc
|
||||
--source include/have_utf16.inc
|
||||
--source include/have_utf32.inc
|
||||
|
||||
--source include/load_sysvars.inc
|
||||
################################################
|
||||
|
@ -1,4 +1,7 @@
|
||||
--source include/have_ucs2.inc
|
||||
--source include/have_utf8mb4.inc
|
||||
--source include/have_utf16.inc
|
||||
--source include/have_utf32.inc
|
||||
|
||||
--disable_warnings
|
||||
drop table if exists t1;
|
||||
@ -40,6 +43,24 @@ insert into t1 values ('a');
|
||||
select * from t1 where c1='b';
|
||||
drop table t1;
|
||||
|
||||
show collation like 'utf8mb4_test_ci';
|
||||
create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci);
|
||||
insert into t1 values ('a');
|
||||
select * from t1 where c1='b';
|
||||
drop table t1;
|
||||
|
||||
show collation like 'utf16_test_ci';
|
||||
create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
|
||||
insert into t1 values ('a');
|
||||
select * from t1 where c1='b';
|
||||
drop table t1;
|
||||
|
||||
show collation like 'utf32_test_ci';
|
||||
create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
|
||||
insert into t1 values ('a');
|
||||
select * from t1 where c1='b';
|
||||
drop table t1;
|
||||
|
||||
|
||||
#
|
||||
# Bug#41084 full-text index added to custom UCA collation not working
|
||||
|
@ -1,4 +1,7 @@
|
||||
-- source include/have_ucs2.inc
|
||||
-- source include/have_utf8mb4.inc
|
||||
-- source include/have_utf16.inc
|
||||
-- source include/have_utf32.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE IF EXISTS t1;
|
||||
@ -211,3 +214,73 @@ SELECT min(comment),count(*) FROM t1 GROUP BY ucs2_f;
|
||||
DROP TABLE t1;
|
||||
|
||||
# End of 4.1 tests
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
|
||||
--echo # Testing that only utf8mb4 is superset for utf8
|
||||
--echo # No other Unicode character set pairs have superset/subset relations
|
||||
--echo #
|
||||
|
||||
CREATE TABLE t1 (
|
||||
utf8 CHAR CHARACTER SET utf8,
|
||||
utf8mb4 CHAR CHARACTER SET utf8mb4,
|
||||
ucs2 CHAR CHARACTER SET ucs2,
|
||||
utf16 CHAR CHARACTER SET utf16,
|
||||
utf32 CHAR CHARACTER SET utf32
|
||||
);
|
||||
INSERT INTO t1 VALUES ('','','','','');
|
||||
|
||||
# utf8mb4 is superset only for utf8
|
||||
SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1;
|
||||
|
||||
|
||||
# utf8mb4 is superset only for utf8
|
||||
SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1;
|
||||
|
||||
|
||||
# ucs2 is not a superset for the other Unicode character sets
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1;
|
||||
|
||||
|
||||
# utf16 is not a superset for the other Unicode character sets
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1;
|
||||
|
||||
|
||||
# utf32 is not a superset for the other Unicode character sets
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1;
|
||||
--error ER_CANT_AGGREGATE_2COLLATIONS
|
||||
SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
|
||||
|
||||
DROP TABLE t1;
|
||||
|
731
mysql-test/t/ctype_utf16.test
Normal file
731
mysql-test/t/ctype_utf16.test
Normal file
@ -0,0 +1,731 @@
|
||||
-- source include/have_utf16.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE IF EXISTS t1;
|
||||
--enable_warnings
|
||||
|
||||
--echo #
|
||||
--echo # Start of 5.5 tests
|
||||
--echo #
|
||||
|
||||
SET NAMES latin1;
|
||||
SET character_set_connection=utf16;
|
||||
select hex('a'), hex('a ');
|
||||
-- source include/endspace.inc
|
||||
|
||||
|
||||
# Check that incomplete utf16 characters in HEX notation
|
||||
# are left-padded with zeros
|
||||
#
|
||||
select hex(_utf16 0x44);
|
||||
select hex(_utf16 0x3344);
|
||||
select hex(_utf16 0x113344);
|
||||
|
||||
|
||||
# Check that 0x20 is only trimmed when it is
|
||||
# a part of real SPACE character, not just a part
|
||||
# of a multibyte sequence.
|
||||
# Note, CYRILLIC LETTER ER is used as an example, which
|
||||
# is stored as 0x0420 in utf16, thus contains 0x20 in the
|
||||
# low byte. The second character is THREE-PER-M, U+2004,
|
||||
# which contains 0x20 in the high byte.
|
||||
|
||||
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16;
|
||||
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
|
||||
SELECT hex(word) FROM t1 ORDER BY word;
|
||||
SELECT hex(word2) FROM t1 ORDER BY word2;
|
||||
DELETE FROM t1;
|
||||
|
||||
#
|
||||
# Check that real spaces are correctly trimmed.
|
||||
#
|
||||
INSERT INTO t1 VALUES (X'042000200020',X'042000200020'), (X'200400200020', X'200400200020');
|
||||
SELECT hex(word) FROM t1 ORDER BY word;
|
||||
SELECT hex(word2) FROM t1 ORDER BY word2;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
#
|
||||
# Check LPAD/RPAD
|
||||
#
|
||||
SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'0421'));
|
||||
SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'04210422'));
|
||||
SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'042104220423'));
|
||||
SELECT hex(LPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423'));
|
||||
SELECT hex(LPAD(_utf16 X'D800DC00', 10, _utf16 X'0421'));
|
||||
SELECT hex(LPAD(_utf16 X'0421', 10, _utf16 X'D800DC00'));
|
||||
|
||||
SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'0421'));
|
||||
SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'04210422'));
|
||||
SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'042104220423'));
|
||||
SELECT hex(RPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423'));
|
||||
SELECT hex(RPAD(_utf16 X'D800DC00', 10, _utf16 X'0421'));
|
||||
SELECT hex(RPAD(_utf16 X'0421', 10, _utf16 X'D800DC00'));
|
||||
|
||||
CREATE TABLE t1 SELECT
|
||||
LPAD(_utf16 X'0420',10,_utf16 X'0421') l,
|
||||
RPAD(_utf16 X'0420',10,_utf16 X'0421') r;
|
||||
SHOW CREATE TABLE t1;
|
||||
select hex(l), hex(r) from t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
create table t1 (f1 char(30));
|
||||
insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
|
||||
select lpad(f1, 12, "-o-/") from t1;
|
||||
drop table t1;
|
||||
|
||||
######################################################
|
||||
#
|
||||
# Test of like
|
||||
#
|
||||
|
||||
SET NAMES latin1;
|
||||
SET character_set_connection=utf16;
|
||||
--source include/ctype_like.inc
|
||||
|
||||
SET NAMES utf8;
|
||||
SET character_set_connection=utf16;
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16);
|
||||
INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
|
||||
INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
|
||||
INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
|
||||
INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
|
||||
SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
|
||||
SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
|
||||
SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
|
||||
SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf16_bin ORDER BY BINARY a;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
|
||||
ENGINE=MyISAM CHARACTER SET utf16;
|
||||
INSERT INTO t1 (word) VALUES ("cat");
|
||||
SELECT * FROM t1 WHERE word LIKE "c%";
|
||||
SELECT * FROM t1 WHERE word LIKE "ca_";
|
||||
SELECT * FROM t1 WHERE word LIKE "cat";
|
||||
SELECT * FROM t1 WHERE word LIKE _utf16 x'00630025'; # "c%"
|
||||
SELECT * FROM t1 WHERE word LIKE _utf16 x'00630061005F'; # "ca_"
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
#
|
||||
# Check that INSERT() works fine.
|
||||
# This invokes charpos() function.
|
||||
select insert(_utf16 0x006100620063,10,2,_utf16 0x006400650066);
|
||||
select insert(_utf16 0x006100620063,1,2,_utf16 0x006400650066);
|
||||
|
||||
########################################################
|
||||
#
|
||||
# Bug 1264
|
||||
#
|
||||
# Description:
|
||||
#
|
||||
# When using a ucs2 table in MySQL,
|
||||
# either with ucs2_general_ci or ucs2_bin collation,
|
||||
# words are returned in an incorrect order when using ORDER BY
|
||||
# on an _indexed_ CHAR or VARCHAR column. They are sorted with
|
||||
# the longest word *first* instead of last. I.E. The word "aardvark"
|
||||
# is in the results before the word "a".
|
||||
#
|
||||
# If there is no index for the column, the problem does not occur.
|
||||
#
|
||||
# Interestingly, if there is no second column, the words are returned
|
||||
# in the correct order.
|
||||
#
|
||||
# According to EXPLAIN, it looks like when the output includes columns that
|
||||
# are not part of the index sorted on, it does a filesort, which fails.
|
||||
# Using a straight index yields correct results.
|
||||
|
||||
SET NAMES latin1;
|
||||
|
||||
#
|
||||
# Two fields, index
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (
|
||||
word VARCHAR(64),
|
||||
bar INT(11) default 0,
|
||||
PRIMARY KEY (word))
|
||||
ENGINE=MyISAM
|
||||
CHARSET utf16
|
||||
COLLATE utf16_general_ci ;
|
||||
|
||||
INSERT INTO t1 (word) VALUES ("aar");
|
||||
INSERT INTO t1 (word) VALUES ("a");
|
||||
INSERT INTO t1 (word) VALUES ("aardvar");
|
||||
INSERT INTO t1 (word) VALUES ("aardvark");
|
||||
INSERT INTO t1 (word) VALUES ("aardvara");
|
||||
INSERT INTO t1 (word) VALUES ("aardvarz");
|
||||
EXPLAIN SELECT * FROM t1 ORDER BY word;
|
||||
SELECT * FROM t1 ORDER BY word;
|
||||
EXPLAIN SELECT word FROM t1 ORDER BY word;
|
||||
SELECT word FROM t1 ORDER by word;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
#
|
||||
# One field, index
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (
|
||||
word VARCHAR(64) ,
|
||||
PRIMARY KEY (word))
|
||||
ENGINE=MyISAM
|
||||
CHARSET utf16
|
||||
COLLATE utf16_general_ci;
|
||||
|
||||
INSERT INTO t1 (word) VALUES ("aar");
|
||||
INSERT INTO t1 (word) VALUES ("a");
|
||||
INSERT INTO t1 (word) VALUES ("aardvar");
|
||||
INSERT INTO t1 (word) VALUES ("aardvark");
|
||||
INSERT INTO t1 (word) VALUES ("aardvara");
|
||||
INSERT INTO t1 (word) VALUES ("aardvarz");
|
||||
EXPLAIN SELECT * FROM t1 ORDER BY WORD;
|
||||
SELECT * FROM t1 ORDER BY word;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
#
|
||||
# Two fields, no index
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (
|
||||
word TEXT,
|
||||
bar INT(11) AUTO_INCREMENT,
|
||||
PRIMARY KEY (bar))
|
||||
ENGINE=MyISAM
|
||||
CHARSET utf16
|
||||
COLLATE utf16_general_ci ;
|
||||
INSERT INTO t1 (word) VALUES ("aar");
|
||||
INSERT INTO t1 (word) VALUES ("a" );
|
||||
INSERT INTO t1 (word) VALUES ("aardvar");
|
||||
INSERT INTO t1 (word) VALUES ("aardvark");
|
||||
INSERT INTO t1 (word) VALUES ("aardvara");
|
||||
INSERT INTO t1 (word) VALUES ("aardvarz");
|
||||
EXPLAIN SELECT * FROM t1 ORDER BY word;
|
||||
SELECT * FROM t1 ORDER BY word;
|
||||
EXPLAIN SELECT word FROM t1 ORDER BY word;
|
||||
SELECT word FROM t1 ORDER BY word;
|
||||
DROP TABLE t1;
|
||||
|
||||
#
|
||||
# END OF Bug 1264 test
|
||||
#
|
||||
########################################################
|
||||
|
||||
|
||||
#
|
||||
# Check alignment for from-binary-conversion with CAST and CONVERT
|
||||
#
|
||||
SELECT hex(cast(0xAA as char character set utf16));
|
||||
SELECT hex(convert(0xAA using utf16));
|
||||
|
||||
#
|
||||
# Check alignment for string types
|
||||
#
|
||||
CREATE TABLE t1 (a char(10) character set utf16);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (a varchar(10) character set utf16);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (a text character set utf16);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (a mediumtext character set utf16);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (a longtext character set utf16);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
##
|
||||
## Bug #5024 Server crashes with queries on fields
|
||||
## with certain charset/collation settings
|
||||
##
|
||||
##
|
||||
#create table t1 (s1 char character set utf16 collate utf16_czech_ci);
|
||||
#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
|
||||
#select s1 from t1 where s1 > 'a' order by s1;
|
||||
#drop table t1;
|
||||
#
|
||||
|
||||
#
|
||||
# Bug #5081 : UCS2 fields are filled with '0x2020'
|
||||
# after extending field length
|
||||
#
|
||||
create table t1(a char(1)) default charset utf16;
|
||||
insert into t1 values ('a'),('b'),('c');
|
||||
alter table t1 modify a char(5);
|
||||
select a, hex(a) from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Check prepare statement from an UTF16 string
|
||||
#
|
||||
set @ivar= 1234;
|
||||
set @str1 = 'select ?';
|
||||
set @str2 = convert(@str1 using utf16);
|
||||
prepare stmt1 from @str2;
|
||||
execute stmt1 using @ivar;
|
||||
|
||||
#
|
||||
# Check that utf16 works with ENUM and SET type
|
||||
#
|
||||
set names utf8;
|
||||
create table t1 (a enum('x','y','z') character set utf16);
|
||||
show create table t1;
|
||||
insert into t1 values ('x');
|
||||
insert into t1 values ('y');
|
||||
insert into t1 values ('z');
|
||||
select a, hex(a) from t1 order by a;
|
||||
alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf16;
|
||||
show create table t1;
|
||||
insert into t1 values ('D');
|
||||
insert into t1 values ('E ');
|
||||
insert into t1 values ('ä');
|
||||
insert into t1 values ('ö');
|
||||
insert into t1 values ('ü');
|
||||
select a, hex(a) from t1 order by a;
|
||||
drop table t1;
|
||||
|
||||
create table t1 (a set ('x','y','z','ä','ö','ü') character set utf16);
|
||||
show create table t1;
|
||||
insert into t1 values ('x');
|
||||
insert into t1 values ('y');
|
||||
insert into t1 values ('z');
|
||||
insert into t1 values ('x,y');
|
||||
insert into t1 values ('x,y,z,ä,ö,ü');
|
||||
select a, hex(a) from t1 order by a;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
|
||||
#
|
||||
create table t1(a enum('a','b','c')) default character set utf16;
|
||||
insert into t1 values('a'),('b'),('c');
|
||||
alter table t1 add b char(1);
|
||||
show warnings;
|
||||
select * from t1 order by a;
|
||||
drop table t1;
|
||||
|
||||
SET NAMES latin1;
|
||||
SET collation_connection='utf16_general_ci';
|
||||
-- source include/ctype_filesort.inc
|
||||
-- source include/ctype_like_escape.inc
|
||||
SET NAMES latin1;
|
||||
SET collation_connection='utf16_bin';
|
||||
-- source include/ctype_filesort.inc
|
||||
-- source include/ctype_like_escape.inc
|
||||
|
||||
#
|
||||
# Bug#10344 Some string functions fail for UCS2
|
||||
#
|
||||
select hex(substr(_utf16 0x00e400e50068,1));
|
||||
select hex(substr(_utf16 0x00e400e50068,2));
|
||||
select hex(substr(_utf16 0x00e400e50068,3));
|
||||
select hex(substr(_utf16 0x00e400e50068,-1));
|
||||
select hex(substr(_utf16 0x00e400e50068,-2));
|
||||
select hex(substr(_utf16 0x00e400e50068,-3));
|
||||
select hex(substr(_utf16 0x00e400e5D800DC00,1));
|
||||
select hex(substr(_utf16 0x00e400e5D800DC00,2));
|
||||
select hex(substr(_utf16 0x00e400e5D800DC00,3));
|
||||
select hex(substr(_utf16 0x00e400e5D800DC00,-1));
|
||||
select hex(substr(_utf16 0x00e400e5D800DC00,-2));
|
||||
select hex(substr(_utf16 0x00e400e5D800DC00,-3));
|
||||
|
||||
SET NAMES latin1;
|
||||
|
||||
##
|
||||
## Bug#8235
|
||||
##
|
||||
## This bug also helped to find another problem that
|
||||
## INSERT of a UCS2 string containing a negative number
|
||||
## into a unsigned int column didn't produce warnings.
|
||||
## This test covers both problems.
|
||||
##
|
||||
##SET collation_connection='ucs2_swedish_ci';
|
||||
##CREATE TABLE t1 (Field1 int(10) default '0');
|
||||
### no warnings, negative numbers are allowed
|
||||
##INSERT INTO t1 VALUES ('-1');
|
||||
##SELECT * FROM t1;
|
||||
##DROP TABLE t1;
|
||||
##CREATE TABLE t1 (Field1 int(10) unsigned default '0');
|
||||
### this should generate a "Data truncated" warning
|
||||
##INSERT INTO t1 VALUES ('-1');
|
||||
##DROP TABLE t1;
|
||||
##SET NAMES latin1;
|
||||
|
||||
###
|
||||
### Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
|
||||
###
|
||||
##--disable_warnings
|
||||
##create table t1(f1 varchar(5) CHARACTER SET utf16 COLLATE utf16_bin NOT NULL) engine=InnoDB;
|
||||
##--enable_warnings
|
||||
##insert into t1 values('a');
|
||||
##create index t1f1 on t1(f1);
|
||||
##select f1 from t1 where f1 like 'a%';
|
||||
##drop table t1;
|
||||
|
||||
#
|
||||
# Bug#9442 Set parameter make query fail if column character set is UCS2
|
||||
#
|
||||
create table t1 (utext varchar(20) character set utf16);
|
||||
insert into t1 values ("lily");
|
||||
insert into t1 values ("river");
|
||||
prepare stmt from 'select utext from t1 where utext like ?';
|
||||
set @param1='%%';
|
||||
execute stmt using @param1;
|
||||
execute stmt using @param1;
|
||||
select utext from t1 where utext like '%%';
|
||||
drop table t1;
|
||||
deallocate prepare stmt;
|
||||
|
||||
#
|
||||
# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
|
||||
#
|
||||
create table t1 (
|
||||
a char(10) character set utf16 not null,
|
||||
index a (a)
|
||||
) engine=myisam;
|
||||
insert into t1 values (repeat(0x201f, 10));
|
||||
insert into t1 values (repeat(0x2020, 10));
|
||||
insert into t1 values (repeat(0x2021, 10));
|
||||
# make sure "index read" is used
|
||||
explain select hex(a) from t1 order by a;
|
||||
select hex(a) from t1 order by a;
|
||||
alter table t1 drop index a;
|
||||
select hex(a) from t1 order by a;
|
||||
drop table t1;
|
||||
|
||||
##
|
||||
## Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
|
||||
## over a 'ucs2' field uses a temporary table
|
||||
##
|
||||
##CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
|
||||
##INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
|
||||
##SELECT id, MIN(s) FROM t1 GROUP BY id;
|
||||
##DROP TABLE t1;
|
||||
|
||||
###
|
||||
### Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
|
||||
###
|
||||
##
|
||||
##--disable_warnings
|
||||
##drop table if exists bug20536;
|
||||
##--enable_warnings
|
||||
##
|
||||
##set names latin1;
|
||||
##create table bug20536 (id bigint not null auto_increment primary key, name
|
||||
##varchar(255) character set ucs2 not null);
|
||||
##insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
|
||||
##select md5(name) from bug20536;
|
||||
##select sha1(name) from bug20536;
|
||||
##select make_set(3, name, upper(name)) from bug20536;
|
||||
##select export_set(5, name, upper(name)) from bug20536;
|
||||
##select export_set(5, name, upper(name), ",", 5) from bug20536;
|
||||
|
||||
#
|
||||
# Bug #20108: corrupted default enum value for a ucs2 field
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (
|
||||
status enum('active','passive') character set utf16 collate utf16_general_ci
|
||||
NOT NULL default 'passive'
|
||||
);
|
||||
SHOW CREATE TABLE t1;
|
||||
ALTER TABLE t1 ADD a int NOT NULL AFTER status;
|
||||
SHOW CREATE TABLE t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
##CREATE TABLE t2 (
|
||||
## status enum('active','passive') collate ucs2_turkish_ci
|
||||
## NOT NULL default 'passive'
|
||||
##);
|
||||
##SHOW CREATE TABLE t2;
|
||||
##ALTER TABLE t2 ADD a int NOT NULL AFTER status;
|
||||
##DROP TABLE t2;
|
||||
|
||||
|
||||
--echo End of 4.1 tests
|
||||
|
||||
#
|
||||
# Conversion from an UTF16 string to a decimal column
|
||||
#
|
||||
CREATE TABLE t1 (a varchar(64) character set utf16, b decimal(10,3));
|
||||
INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
|
||||
update t1 set b=a;
|
||||
SELECT *, hex(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
#
|
||||
# Bug#9442 Set parameter make query fail if column character set is UCS2
|
||||
#
|
||||
create table t1 (utext varchar(20) character set utf16);
|
||||
insert into t1 values ("lily");
|
||||
insert into t1 values ("river");
|
||||
prepare stmt from 'select utext from t1 where utext like ?';
|
||||
set @param1='%%';
|
||||
execute stmt using @param1;
|
||||
execute stmt using @param1;
|
||||
select utext from t1 where utext like '%%';
|
||||
drop table t1;
|
||||
deallocate prepare stmt;
|
||||
|
||||
#
|
||||
# Bug#22638 SOUNDEX broken for international characters
|
||||
#
|
||||
set names latin1;
|
||||
set character_set_connection=utf16;
|
||||
select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
|
||||
select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
|
||||
select 'mood' sounds like 'mud';
|
||||
# Cyrillic A, BE, VE
|
||||
select hex(soundex(_utf16 0x041004110412));
|
||||
# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
|
||||
select hex(soundex(_utf16 0x00BF00C0));
|
||||
set names latin1;
|
||||
|
||||
#
|
||||
# Bug #14290: character_maximum_length for text fields
|
||||
#
|
||||
create table t1(a blob, b text charset utf16);
|
||||
select data_type, character_octet_length, character_maximum_length
|
||||
from information_schema.columns where table_name='t1';
|
||||
drop table t1;
|
||||
|
||||
|
||||
set names latin1;
|
||||
set collation_connection=utf16_general_ci;
|
||||
#
|
||||
# Testing cs->coll->instr()
|
||||
#
|
||||
select position('bb' in 'abba');
|
||||
|
||||
#
|
||||
# Testing cs->coll->hash_sort()
|
||||
#
|
||||
create table t1 (a varchar(10) character set utf16) engine=heap;
|
||||
insert into t1 values ('a'),('A'),('b'),('B');
|
||||
select * from t1 where a='a' order by binary a;
|
||||
select hex(min(binary a)),count(*) from t1 group by a;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->numchars()
|
||||
#
|
||||
select char_length('abcd'), octet_length('abcd');
|
||||
select char_length(_utf16 0xD800DC00), octet_length(_utf16 0xD800DC00);
|
||||
select char_length(_utf16 0xD87FDFFF), octet_length(_utf16 0xD87FDFFF);
|
||||
|
||||
#
|
||||
# Testing cs->cset->charpos()
|
||||
#
|
||||
select left('abcd',2);
|
||||
select hex(left(_utf16 0xD800DC00D87FDFFF, 1));
|
||||
select hex(right(_utf16 0xD800DC00D87FDFFF, 1));
|
||||
|
||||
#
|
||||
# Testing cs->cset->well_formed_length()
|
||||
#
|
||||
create table t1 (a varchar(10) character set utf16);
|
||||
# Bad sequences
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
insert into t1 values (_utf16 0xD800);
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
insert into t1 values (_utf16 0xDC00);
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
insert into t1 values (_utf16 0xD800D800);
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
insert into t1 values (_utf16 0xD800E800);
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
insert into t1 values (_utf16 0xD8000800);
|
||||
# Good sequences
|
||||
insert into t1 values (_utf16 0xD800DC00);
|
||||
insert into t1 values (_utf16 0xD800DCFF);
|
||||
insert into t1 values (_utf16 0xDBFFDC00);
|
||||
insert into t1 values (_utf16 0xDBFFDCFF);
|
||||
select hex(a) from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug#32393 Character sets: illegal characters in utf16 columns
|
||||
#
|
||||
# Tests that cs->cset->wc_mb() doesn't accept surrogate parts
|
||||
#
|
||||
# via alter
|
||||
#
|
||||
create table t1 (s1 varchar(50) character set ucs2);
|
||||
insert into t1 values (0xdf84);
|
||||
alter table t1 modify column s1 varchar(50) character set utf16;
|
||||
select hex(s1) from t1;
|
||||
drop table t1;
|
||||
#
|
||||
# via update
|
||||
#
|
||||
create table t1 (s1 varchar(5) character set ucs2, s2 varchar(5) character set utf16);
|
||||
insert into t1 (s1) values (0xdf84);
|
||||
update t1 set s2 = s1;
|
||||
select hex(s2) from t1;
|
||||
drop table t1;
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Testing cs->cset->lengthsp()
|
||||
#
|
||||
create table t1 (a char(10)) character set utf16;
|
||||
insert into t1 values ('a ');
|
||||
select hex(a) from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->caseup() and cs->cset->casedn()
|
||||
#
|
||||
select upper('abcd'), lower('ABCD');
|
||||
|
||||
#
|
||||
# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf16
|
||||
# Testing cs->cset->snprintf()
|
||||
#
|
||||
#create table t1 (a date);
|
||||
#insert into t1 values ('2007-09-16');
|
||||
#select * from t1;
|
||||
#drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->l10tostr
|
||||
# !!! Not used in the code
|
||||
|
||||
#
|
||||
# Testing cs->cset->ll10tostr
|
||||
#
|
||||
create table t1 (a varchar(10) character set utf16);
|
||||
insert into t1 values (123456);
|
||||
select a, hex(a) from t1;
|
||||
drop table t1;
|
||||
|
||||
|
||||
# Testing cs->cset->fill
|
||||
# SOUNDEX fills strings with DIGIT ZERO up to four characters
|
||||
select hex(soundex('a'));
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntol
|
||||
# !!! Not used in the code
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntoul
|
||||
#
|
||||
create table t1 (a enum ('a','b','c')) character set utf16;
|
||||
insert into t1 values ('1');
|
||||
select * from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntoll and cs->cset->strntoull
|
||||
#
|
||||
set names latin1;
|
||||
select hex(conv(convert('123' using utf16), -10, 16));
|
||||
select hex(conv(convert('123' using utf16), 10, 16));
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntod
|
||||
#
|
||||
set names latin1;
|
||||
set character_set_connection=utf16;
|
||||
select 1.1 + '1.2';
|
||||
select 1.1 + '1.2xxx';
|
||||
|
||||
# Testing strntoll10_utf16
|
||||
# Testing cs->cset->strtoll10
|
||||
select left('aaa','1');
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntoull10rnd
|
||||
#
|
||||
create table t1 (a int);
|
||||
insert into t1 values ('-1234.1e2');
|
||||
insert into t1 values ('-1234.1e2xxxx');
|
||||
insert into t1 values ('-1234.1e2 ');
|
||||
select * from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->scan
|
||||
#
|
||||
create table t1 (a int);
|
||||
insert into t1 values ('1 ');
|
||||
insert into t1 values ('1 x');
|
||||
select * from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing auto-conversion to TEXT
|
||||
#
|
||||
create table t1 (a varchar(17000) character set utf16);
|
||||
show create table t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing that maximim possible key length is 1000 bytes
|
||||
#
|
||||
create table t1 (a varchar(250) character set utf16 primary key);
|
||||
show create table t1;
|
||||
drop table t1;
|
||||
--error ER_TOO_LONG_KEY
|
||||
create table t1 (a varchar(334) character set utf16 primary key);
|
||||
|
||||
#
|
||||
# Conversion to utf8
|
||||
#
|
||||
create table t1 (a char(1) character set utf16);
|
||||
insert into t1 values (0xD800DC00),(0xD800DCFF),(0xDB7FDC00),(0xDB7FDCFF);
|
||||
insert into t1 values (0x00C0), (0x00FF),(0xE000), (0xFFFF);
|
||||
select hex(a), hex(@a:=convert(a using utf8mb4)), hex(convert(@a using utf16)) from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Test basic regex functionality
|
||||
#
|
||||
set collation_connection=utf16_general_ci;
|
||||
--source include/ctype_regex.inc
|
||||
set names latin1;
|
||||
|
||||
#
|
||||
# Test how character set works with date/time
|
||||
#
|
||||
SET collation_connection=utf16_general_ci;
|
||||
--source include/ctype_datetime.inc
|
||||
SET NAMES latin1;
|
||||
|
||||
#
|
||||
# Bug#33073 Character sets: ordering fails with utf32
|
||||
#
|
||||
SET collation_connection=utf16_general_ci;
|
||||
CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0;
|
||||
SHOW CREATE TABLE t1;
|
||||
INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
|
||||
SELECT * FROM t1 ORDER BY s1;
|
||||
SET max_sort_length=4;
|
||||
SELECT * FROM t1 ORDER BY s1;
|
||||
DROP TABLE t1;
|
||||
SET max_sort_length=DEFAULT;
|
||||
SET NAMES latin1;
|
||||
|
||||
|
||||
#
|
||||
## TODO: add tests for all engines
|
||||
#
|
||||
|
||||
--echo #
|
||||
--echo # End of 5.5 tests
|
||||
--echo #
|
290
mysql-test/t/ctype_utf16_uca.test
Normal file
290
mysql-test/t/ctype_utf16_uca.test
Normal file
@ -0,0 +1,290 @@
|
||||
-- source include/have_utf16.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE IF EXISTS t1;
|
||||
--enable_warnings
|
||||
|
||||
--echo #
|
||||
--echo # Start of 5.5 tests
|
||||
--echo #
|
||||
|
||||
set names utf8;
|
||||
set collation_connection=utf16_unicode_ci;
|
||||
select hex('a'), hex('a ');
|
||||
-- source include/endspace.inc
|
||||
|
||||
#
|
||||
# Bug #6787 LIKE not working properly with _ and utf8 data
|
||||
#
|
||||
select 'c' like '\_' as want0;
|
||||
|
||||
#
|
||||
# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters
|
||||
#
|
||||
CREATE TABLE t (
|
||||
c char(20) NOT NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARACTER SET utf16 COLLATE utf16_unicode_ci;
|
||||
INSERT INTO t VALUES ('a'),('ab'),('aba');
|
||||
ALTER TABLE t ADD INDEX (c);
|
||||
SELECT c FROM t WHERE c LIKE 'a%';
|
||||
DROP TABLE t;
|
||||
|
||||
|
||||
create table t1 (c1 char(10) character set utf16 collate utf16_bin);
|
||||
|
||||
#
|
||||
# Basic Latin
|
||||
#
|
||||
insert into t1 values ('A'),('a');
|
||||
insert into t1 values ('B'),('b');
|
||||
insert into t1 values ('C'),('c');
|
||||
insert into t1 values ('D'),('d');
|
||||
insert into t1 values ('E'),('e');
|
||||
insert into t1 values ('F'),('f');
|
||||
insert into t1 values ('G'),('g');
|
||||
insert into t1 values ('H'),('h');
|
||||
insert into t1 values ('I'),('i');
|
||||
insert into t1 values ('J'),('j');
|
||||
insert into t1 values ('K'),('k');
|
||||
insert into t1 values ('L'),('l');
|
||||
insert into t1 values ('M'),('m');
|
||||
insert into t1 values ('N'),('n');
|
||||
insert into t1 values ('O'),('o');
|
||||
insert into t1 values ('P'),('p');
|
||||
insert into t1 values ('Q'),('q');
|
||||
insert into t1 values ('R'),('r');
|
||||
insert into t1 values ('S'),('s');
|
||||
insert into t1 values ('T'),('t');
|
||||
insert into t1 values ('U'),('u');
|
||||
insert into t1 values ('V'),('v');
|
||||
insert into t1 values ('W'),('w');
|
||||
insert into t1 values ('X'),('x');
|
||||
insert into t1 values ('Y'),('y');
|
||||
insert into t1 values ('Z'),('z');
|
||||
|
||||
#
|
||||
# Latin1 suppliment
|
||||
#
|
||||
insert into t1 values (0x00e0),(0x00c0);
|
||||
insert into t1 values (0x00e1),(0x00c1);
|
||||
insert into t1 values (0x00e2),(0x00c2);
|
||||
insert into t1 values (0x00e3),(0x00c3);
|
||||
insert into t1 values (0x00e4),(0x00c4);
|
||||
insert into t1 values (0x00e5),(0x00c5);
|
||||
insert into t1 values (0x00e6),(0x00c6);
|
||||
insert into t1 values (0x00e7),(0x00c7);
|
||||
insert into t1 values (0x00e8),(0x00c8);
|
||||
insert into t1 values (0x00e9),(0x00c9);
|
||||
insert into t1 values (0x00ea),(0x00ca);
|
||||
insert into t1 values (0x00eb),(0x00cb);
|
||||
insert into t1 values (0x00ec),(0x00cc);
|
||||
insert into t1 values (0x00ed),(0x00cd);
|
||||
insert into t1 values (0x00ee),(0x00ce);
|
||||
insert into t1 values (0x00ef),(0x00cf);
|
||||
|
||||
insert into t1 values (0x00f0),(0x00d0);
|
||||
insert into t1 values (0x00f1),(0x00d1);
|
||||
insert into t1 values (0x00f2),(0x00d2);
|
||||
insert into t1 values (0x00f3),(0x00d3);
|
||||
insert into t1 values (0x00f4),(0x00d4);
|
||||
insert into t1 values (0x00f5),(0x00d5);
|
||||
insert into t1 values (0x00f6),(0x00d6);
|
||||
insert into t1 values (0x00f7),(0x00d7);
|
||||
insert into t1 values (0x00f8),(0x00d8);
|
||||
insert into t1 values (0x00f9),(0x00d9);
|
||||
insert into t1 values (0x00fa),(0x00da);
|
||||
insert into t1 values (0x00fb),(0x00db);
|
||||
insert into t1 values (0x00fc),(0x00dc);
|
||||
insert into t1 values (0x00fd),(0x00dd);
|
||||
insert into t1 values (0x00fe),(0x00de);
|
||||
insert into t1 values (0x00ff),(0x00df);
|
||||
|
||||
#
|
||||
# Latin extended-A, 0100-017F
|
||||
#
|
||||
insert into t1 values (0x0100),(0x0101),(0x0102),(0x0103);
|
||||
insert into t1 values (0x0104),(0x0105),(0x0106),(0x0107);
|
||||
insert into t1 values (0x0108),(0x0109),(0x010a),(0x010b);
|
||||
insert into t1 values (0x010c),(0x010d),(0x010e),(0x010f);
|
||||
insert into t1 values (0x0110),(0x0111),(0x0112),(0x0113);
|
||||
insert into t1 values (0x0114),(0x0115),(0x0116),(0x0117);
|
||||
insert into t1 values (0x0118),(0x0119),(0x011a),(0x011b);
|
||||
insert into t1 values (0x011c),(0x011d),(0x011e),(0x011f);
|
||||
insert into t1 values (0x0120),(0x0121),(0x0122),(0x0123);
|
||||
insert into t1 values (0x0124),(0x0125),(0x0126),(0x0127);
|
||||
insert into t1 values (0x0128),(0x0129),(0x012a),(0x012b);
|
||||
insert into t1 values (0x012c),(0x012d),(0x012e),(0x012f);
|
||||
insert into t1 values (0x0130),(0x0131),(0x0132),(0x0133);
|
||||
insert into t1 values (0x0134),(0x0135),(0x0136),(0x0137);
|
||||
insert into t1 values (0x0138),(0x0139),(0x013a),(0x013b);
|
||||
insert into t1 values (0x013c),(0x013d),(0x013e),(0x013f);
|
||||
insert into t1 values (0x0140),(0x0141),(0x0142),(0x0143);
|
||||
insert into t1 values (0x0144),(0x0145),(0x0146),(0x0147);
|
||||
insert into t1 values (0x0148),(0x0149),(0x014a),(0x014b);
|
||||
insert into t1 values (0x014c),(0x014d),(0x014e),(0x014f);
|
||||
insert into t1 values (0x0150),(0x0151),(0x0152),(0x0153);
|
||||
insert into t1 values (0x0154),(0x0155),(0x0156),(0x0157);
|
||||
insert into t1 values (0x0158),(0x0159),(0x015a),(0x015b);
|
||||
insert into t1 values (0x015c),(0x015d),(0x015e),(0x015f);
|
||||
insert into t1 values (0x0160),(0x0161),(0x0162),(0x0163);
|
||||
insert into t1 values (0x0164),(0x0165),(0x0166),(0x0167);
|
||||
insert into t1 values (0x0168),(0x0169),(0x016a),(0x016b);
|
||||
insert into t1 values (0x016c),(0x016d),(0x016e),(0x016f);
|
||||
insert into t1 values (0x0170),(0x0171),(0x0172),(0x0173);
|
||||
insert into t1 values (0x0174),(0x0175),(0x0176),(0x0177);
|
||||
insert into t1 values (0x0178),(0x0179),(0x017a),(0x017b);
|
||||
insert into t1 values (0x017c),(0x017d),(0x017e),(0x017f);
|
||||
|
||||
#
|
||||
# Latin extended-B, 0180-024F
|
||||
#
|
||||
insert into t1 values (0x0180),(0x0181),(0x0182),(0x0183);
|
||||
insert into t1 values (0x0184),(0x0185),(0x0186),(0x0187);
|
||||
insert into t1 values (0x0188),(0x0189),(0x018a),(0x018b);
|
||||
insert into t1 values (0x018c),(0x018d),(0x018e),(0x018f);
|
||||
insert into t1 values (0x0190),(0x0191),(0x0192),(0x0193);
|
||||
insert into t1 values (0x0194),(0x0195),(0x0196),(0x0197);
|
||||
insert into t1 values (0x0198),(0x0199),(0x019a),(0x019b);
|
||||
insert into t1 values (0x019c),(0x019d),(0x019e),(0x019f);
|
||||
insert into t1 values (0x01a0),(0x01a1),(0x01a2),(0x01a3);
|
||||
insert into t1 values (0x01a4),(0x01a5),(0x01a6),(0x01a7);
|
||||
insert into t1 values (0x01a8),(0x01a9),(0x01aa),(0x01ab);
|
||||
insert into t1 values (0x01ac),(0x01ad),(0x01ae),(0x01af);
|
||||
insert into t1 values (0x01b0),(0x01b1),(0x01b2),(0x01b3);
|
||||
insert into t1 values (0x01b4),(0x01b5),(0x01b6),(0x01b7);
|
||||
insert into t1 values (0x01b8),(0x01b9),(0x01ba),(0x01bb);
|
||||
insert into t1 values (0x01bc),(0x01bd),(0x01be),(0x01bf);
|
||||
insert into t1 values (0x01c0),(0x01c1),(0x01c2),(0x01c3);
|
||||
insert into t1 values (0x01c4),(0x01c5),(0x01c6),(0x01c7);
|
||||
insert into t1 values (0x01c8),(0x01c9),(0x01ca),(0x01cb);
|
||||
insert into t1 values (0x01cc),(0x01cd),(0x01ce),(0x01cf);
|
||||
insert into t1 values (0x01d0),(0x01d1),(0x01d2),(0x01d3);
|
||||
insert into t1 values (0x01d4),(0x01d5),(0x01d6),(0x01d7);
|
||||
insert into t1 values (0x01d8),(0x01d9),(0x01da),(0x01db);
|
||||
insert into t1 values (0x01dc),(0x01dd),(0x01de),(0x01df);
|
||||
insert into t1 values (0x01e0),(0x01e1),(0x01e2),(0x01e3);
|
||||
insert into t1 values (0x01e4),(0x01e5),(0x01e6),(0x01e7);
|
||||
insert into t1 values (0x01e8),(0x01e9),(0x01ea),(0x01eb);
|
||||
insert into t1 values (0x01ec),(0x01ed),(0x01ee),(0x01ef);
|
||||
insert into t1 values (0x01f0),(0x01f1),(0x01f2),(0x01f3);
|
||||
insert into t1 values (0x01f4),(0x01f5),(0x01f6),(0x01f7);
|
||||
insert into t1 values (0x01f8),(0x01f9),(0x01fa),(0x01fb);
|
||||
insert into t1 values (0x01fc),(0x01fd),(0x01fe),(0x01ff);
|
||||
|
||||
|
||||
insert into t1 values ('AA'),('Aa'),('aa'),('aA');
|
||||
insert into t1 values ('CH'),('Ch'),('ch'),('cH');
|
||||
insert into t1 values ('DZ'),('Dz'),('dz'),('dZ');
|
||||
insert into t1 values ('IJ'),('Ij'),('ij'),('iJ');
|
||||
insert into t1 values ('LJ'),('Lj'),('lj'),('lJ');
|
||||
insert into t1 values ('LL'),('Ll'),('ll'),('lL');
|
||||
insert into t1 values ('NJ'),('Nj'),('nj'),('nJ');
|
||||
insert into t1 values ('OE'),('Oe'),('oe'),('oE');
|
||||
insert into t1 values ('SS'),('Ss'),('ss'),('sS');
|
||||
insert into t1 values ('RR'),('Rr'),('rr'),('rR');
|
||||
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_unicode_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_icelandic_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_latvian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_romanian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovenian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_polish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_estonian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_swedish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_turkish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_czech_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_danish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_lithuanian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovak_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish2_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_roman_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_esperanto_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_hungarian_ci;
|
||||
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug#5324
|
||||
#
|
||||
SET NAMES utf8;
|
||||
|
||||
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_general_ci NOT NULL, INDEX (c));
|
||||
INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
|
||||
#Check one row
|
||||
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_general_ci;
|
||||
INSERT INTO t1 VALUES (0x039C03C903B4);
|
||||
#Check two rows
|
||||
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
|
||||
COLLATE utf16_general_ci ORDER BY c;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c));
|
||||
INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
|
||||
#Check one row
|
||||
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci;
|
||||
INSERT INTO t1 VALUES (0x039C03C903B4);
|
||||
#Check two rows
|
||||
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
|
||||
COLLATE utf16_unicode_ci ORDER BY c;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c));
|
||||
INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
|
||||
#Check one row row
|
||||
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci;
|
||||
INSERT INTO t1 VALUES (0x039C03C903B4);
|
||||
#Check two rows
|
||||
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
|
||||
COLLATE utf16_unicode_ci ORDER BY c;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
SET NAMES utf8;
|
||||
SET @test_character_set='utf16';
|
||||
SET @test_collation='utf16_swedish_ci';
|
||||
-- source include/ctype_common.inc
|
||||
|
||||
|
||||
SET collation_connection='utf16_unicode_ci';
|
||||
-- source include/ctype_filesort.inc
|
||||
-- source include/ctype_like_escape.inc
|
||||
|
||||
--echo End of 4.1 tests
|
||||
|
||||
#
|
||||
# Check UPPER/LOWER changing length
|
||||
#
|
||||
# Result shorter than argument
|
||||
CREATE TABLE t1 (id int, a varchar(30) character set utf16);
|
||||
INSERT INTO t1 VALUES (1, 0x01310069), (2, 0x01310131);
|
||||
INSERT INTO t1 VALUES (3, 0x00690069), (4, 0x01300049);
|
||||
INSERT INTO t1 VALUES (5, 0x01300130), (6, 0x00490049);
|
||||
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
|
||||
FROM t1 ORDER BY id;
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf16 collate utf16_turkish_ci;
|
||||
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
|
||||
FROM t1 ORDER BY id;
|
||||
DROP TABLE t1;
|
||||
|
||||
#
|
||||
# Bug #27079 Crash while grouping empty ucs2 strings
|
||||
#
|
||||
CREATE TABLE t1 (
|
||||
c1 text character set utf16 collate utf16_polish_ci NOT NULL
|
||||
) ENGINE=MyISAM;
|
||||
insert into t1 values (''),('a');
|
||||
SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
|
||||
DROP TABLE IF EXISTS t1;
|
||||
|
||||
|
||||
#
|
||||
# Test basic regex functionality
|
||||
#
|
||||
set collation_connection=utf16_unicode_ci;
|
||||
--source include/ctype_regex.inc
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # End of 5.5 tests
|
||||
--echo #
|
784
mysql-test/t/ctype_utf32.test
Normal file
784
mysql-test/t/ctype_utf32.test
Normal file
@ -0,0 +1,784 @@
|
||||
-- source include/have_utf32.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE IF EXISTS t1;
|
||||
--enable_warnings
|
||||
|
||||
--echo #
|
||||
--echo # Start of 5.5 tests
|
||||
--echo #
|
||||
|
||||
SET NAMES latin1;
|
||||
SET character_set_connection=utf32;
|
||||
select hex('a'), hex('a ');
|
||||
-- source include/endspace.inc
|
||||
|
||||
#
|
||||
# Check that incomplete utf32 characters in HEX notation
|
||||
# are left-padded with zeros
|
||||
#
|
||||
select hex(_utf32 0x44);
|
||||
select hex(_utf32 0x3344);
|
||||
select hex(_utf32 0x103344);
|
||||
|
||||
select hex(_utf32 X'44');
|
||||
select hex(_utf32 X'3344');
|
||||
select hex(_utf32 X'103344');
|
||||
|
||||
|
||||
#
|
||||
# Check that 0x20 is only trimmed when it is
|
||||
# a part of real SPACE character, not just a part
|
||||
# of a multibyte sequence.
|
||||
# Note, CYRILLIC LETTER ER is used as an example, which
|
||||
# is stored as 0x0420 in UCS2, thus contains 0x20 in the
|
||||
# low byte. The second character is THREE-PER-M, U+2004,
|
||||
# which contains 0x20 in the high byte.
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf32;
|
||||
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
|
||||
SELECT hex(word) FROM t1 ORDER BY word;
|
||||
SELECT hex(word2) FROM t1 ORDER BY word2;
|
||||
DELETE FROM t1;
|
||||
|
||||
#
|
||||
# Check that real spaces are correctly trimmed.
|
||||
#
|
||||
|
||||
INSERT INTO t1 VALUES
|
||||
(X'000004200000002000000020',X'000004200000002000000020'),
|
||||
(X'000020040000002000000020',X'000020040000002000000020');
|
||||
SELECT hex(word) FROM t1 ORDER BY word;
|
||||
SELECT hex(word2) FROM t1 ORDER BY word2;
|
||||
DROP TABLE t1;
|
||||
|
||||
#
|
||||
# Check LPAD/RPAD
|
||||
#
|
||||
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0421'));
|
||||
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
|
||||
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
|
||||
SELECT hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
|
||||
|
||||
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0421'));
|
||||
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
|
||||
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
|
||||
SELECT hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
|
||||
|
||||
CREATE TABLE t1 SELECT
|
||||
LPAD(_utf32 X'0420',10,_utf32 X'0421') l,
|
||||
RPAD(_utf32 X'0420',10,_utf32 X'0421') r;
|
||||
SHOW CREATE TABLE t1;
|
||||
select hex(l), hex(r) from t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
create table t1 (f1 char(30));
|
||||
insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
|
||||
select lpad(f1, 12, "-o-/") from t1;
|
||||
drop table t1;
|
||||
|
||||
######################################################
|
||||
#
|
||||
# Test of like
|
||||
#
|
||||
|
||||
SET NAMES latin1;
|
||||
SET character_set_connection=utf32;
|
||||
--source include/ctype_like.inc
|
||||
|
||||
SET NAMES utf8;
|
||||
SET character_set_connection=utf32;
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
|
||||
INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
|
||||
INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
|
||||
INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
|
||||
INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
|
||||
SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
|
||||
SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
|
||||
SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
|
||||
SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf32_bin ORDER BY BINARY a;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
|
||||
ENGINE=MyISAM CHARACTER SET utf32;
|
||||
INSERT INTO t1 (word) VALUES ("cat");
|
||||
SELECT * FROM t1 WHERE word LIKE "c%";
|
||||
SELECT * FROM t1 WHERE word LIKE "ca_";
|
||||
SELECT * FROM t1 WHERE word LIKE "cat";
|
||||
SELECT * FROM t1 WHERE word LIKE _utf32 x'0000006300000025'; # "c%"
|
||||
SELECT * FROM t1 WHERE word LIKE _utf32 x'00000063000000610000005F'; # "ca_"
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
#
|
||||
# Check that INSERT() works fine.
|
||||
# This invokes charpos() function.
|
||||
select insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066);
|
||||
select insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066);
|
||||
|
||||
#######################################################
|
||||
|
||||
#
|
||||
# Bug 1264
|
||||
#
|
||||
# Description:
|
||||
#
|
||||
# When using a ucs2 table in MySQL,
|
||||
# either with ucs2_general_ci or ucs2_bin collation,
|
||||
# words are returned in an incorrect order when using ORDER BY
|
||||
# on an _indexed_ CHAR or VARCHAR column. They are sorted with
|
||||
# the longest word *first* instead of last. I.E. The word "aardvark"
|
||||
# is in the results before the word "a".
|
||||
#
|
||||
# If there is no index for the column, the problem does not occur.
|
||||
#
|
||||
# Interestingly, if there is no second column, the words are returned
|
||||
# in the correct order.
|
||||
#
|
||||
# According to EXPLAIN, it looks like when the output includes columns that
|
||||
# are not part of the index sorted on, it does a filesort, which fails.
|
||||
# Using a straight index yields correct results.
|
||||
|
||||
SET NAMES latin1;
|
||||
|
||||
#
|
||||
# Two fields, index
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (
|
||||
word VARCHAR(64),
|
||||
bar INT(11) default 0,
|
||||
PRIMARY KEY (word))
|
||||
ENGINE=MyISAM
|
||||
CHARSET utf32
|
||||
COLLATE utf32_general_ci ;
|
||||
|
||||
INSERT INTO t1 (word) VALUES ("aar");
|
||||
INSERT INTO t1 (word) VALUES ("a");
|
||||
INSERT INTO t1 (word) VALUES ("aardvar");
|
||||
INSERT INTO t1 (word) VALUES ("aardvark");
|
||||
INSERT INTO t1 (word) VALUES ("aardvara");
|
||||
INSERT INTO t1 (word) VALUES ("aardvarz");
|
||||
EXPLAIN SELECT * FROM t1 ORDER BY word;
|
||||
SELECT * FROM t1 ORDER BY word;
|
||||
EXPLAIN SELECT word FROM t1 ORDER BY word;
|
||||
SELECT word FROM t1 ORDER by word;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
#
|
||||
# One field, index
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (
|
||||
word VARCHAR(64) ,
|
||||
PRIMARY KEY (word))
|
||||
ENGINE=MyISAM
|
||||
CHARSET utf32
|
||||
COLLATE utf32_general_ci;
|
||||
|
||||
INSERT INTO t1 (word) VALUES ("aar");
|
||||
INSERT INTO t1 (word) VALUES ("a");
|
||||
INSERT INTO t1 (word) VALUES ("aardvar");
|
||||
INSERT INTO t1 (word) VALUES ("aardvark");
|
||||
INSERT INTO t1 (word) VALUES ("aardvara");
|
||||
INSERT INTO t1 (word) VALUES ("aardvarz");
|
||||
EXPLAIN SELECT * FROM t1 ORDER BY WORD;
|
||||
SELECT * FROM t1 ORDER BY word;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
#
|
||||
# Two fields, no index
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (
|
||||
word TEXT,
|
||||
bar INT(11) AUTO_INCREMENT,
|
||||
PRIMARY KEY (bar))
|
||||
ENGINE=MyISAM
|
||||
CHARSET utf32
|
||||
COLLATE utf32_general_ci ;
|
||||
INSERT INTO t1 (word) VALUES ("aar");
|
||||
INSERT INTO t1 (word) VALUES ("a" );
|
||||
INSERT INTO t1 (word) VALUES ("aardvar");
|
||||
INSERT INTO t1 (word) VALUES ("aardvark");
|
||||
INSERT INTO t1 (word) VALUES ("aardvara");
|
||||
INSERT INTO t1 (word) VALUES ("aardvarz");
|
||||
EXPLAIN SELECT * FROM t1 ORDER BY word;
|
||||
SELECT * FROM t1 ORDER BY word;
|
||||
EXPLAIN SELECT word FROM t1 ORDER BY word;
|
||||
SELECT word FROM t1 ORDER BY word;
|
||||
DROP TABLE t1;
|
||||
|
||||
#
|
||||
# END OF Bug 1264 test
|
||||
#
|
||||
########################################################
|
||||
|
||||
|
||||
#
|
||||
# Check alignment for from-binary-conversion with CAST and CONVERT
|
||||
#
|
||||
SELECT hex(cast(0xAA as char character set utf32));
|
||||
SELECT hex(convert(0xAA using utf32));
|
||||
|
||||
#
|
||||
# Check alignment for string types
|
||||
#
|
||||
CREATE TABLE t1 (a char(10) character set utf32);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (a varchar(10) character set utf32);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (a text character set utf32);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (a mediumtext character set utf32);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (a longtext character set utf32);
|
||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
##
|
||||
## Bug #5024 Server crashes with queries on fields
|
||||
## with certain charset/collation settings
|
||||
##
|
||||
#
|
||||
#create table t1 (s1 char character set `ucs2` collate `ucs2_czech_ci`);
|
||||
#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
|
||||
#select s1 from t1 where s1 > 'a' order by s1;
|
||||
#drop table t1;
|
||||
|
||||
#
|
||||
# Bug #5081 : UCS2 fields are filled with '0x2020'
|
||||
# after extending field length
|
||||
#
|
||||
create table t1(a char(1)) default charset utf32;
|
||||
insert into t1 values ('a'),('b'),('c');
|
||||
alter table t1 modify a char(5);
|
||||
select a, hex(a) from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Check prepare statement from an UTF32 string
|
||||
#
|
||||
set @ivar= 1234;
|
||||
set @str1 = 'select ?';
|
||||
set @str2 = convert(@str1 using utf32);
|
||||
prepare stmt1 from @str2;
|
||||
execute stmt1 using @ivar;
|
||||
|
||||
#
|
||||
# Check that utf32 works with ENUM and SET type
|
||||
#
|
||||
set names utf8;
|
||||
create table t1 (a enum('x','y','z') character set utf32);
|
||||
show create table t1;
|
||||
insert into t1 values ('x');
|
||||
insert into t1 values ('y');
|
||||
insert into t1 values ('z');
|
||||
select a, hex(a) from t1 order by a;
|
||||
alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf32;
|
||||
show create table t1;
|
||||
insert into t1 values ('D');
|
||||
insert into t1 values ('E ');
|
||||
insert into t1 values ('ä');
|
||||
insert into t1 values ('ö');
|
||||
insert into t1 values ('ü');
|
||||
select a, hex(a) from t1 order by a;
|
||||
drop table t1;
|
||||
|
||||
create table t1 (a set ('x','y','z','ä','ö','ü') character set utf32);
|
||||
show create table t1;
|
||||
insert into t1 values ('x');
|
||||
insert into t1 values ('y');
|
||||
insert into t1 values ('z');
|
||||
insert into t1 values ('x,y');
|
||||
insert into t1 values ('x,y,z,ä,ö,ü');
|
||||
select a, hex(a) from t1 order by a;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
|
||||
#
|
||||
create table t1(a enum('a','b','c')) default character set utf32;
|
||||
insert into t1 values('a'),('b'),('c');
|
||||
alter table t1 add b char(1);
|
||||
show warnings;
|
||||
select * from t1 order by a;
|
||||
drop table t1;
|
||||
|
||||
SET NAMES latin1;
|
||||
SET collation_connection='utf32_general_ci';
|
||||
-- source include/ctype_filesort.inc
|
||||
-- source include/ctype_like_escape.inc
|
||||
SET NAMES latin1;
|
||||
SET collation_connection='utf32_bin';
|
||||
-- source include/ctype_filesort.inc
|
||||
-- source include/ctype_like_escape.inc
|
||||
|
||||
#
|
||||
# Bug#10344 Some string functions fail for UCS2
|
||||
#
|
||||
select hex(substr(_utf32 0x000000e4000000e500000068,1));
|
||||
select hex(substr(_utf32 0x000000e4000000e500000068,2));
|
||||
select hex(substr(_utf32 0x000000e4000000e500000068,3));
|
||||
select hex(substr(_utf32 0x000000e4000000e500000068,-1));
|
||||
select hex(substr(_utf32 0x000000e4000000e500000068,-2));
|
||||
select hex(substr(_utf32 0x000000e4000000e500000068,-3));
|
||||
|
||||
#SET NAMES latin1;
|
||||
#
|
||||
# Bug#8235
|
||||
#
|
||||
# This bug also helped to find another problem that
|
||||
# INSERT of a UCS2 string containing a negative number
|
||||
# into a unsigned int column didn't produce warnings.
|
||||
# This test covers both problems.
|
||||
#
|
||||
#SET collation_connection='ucs2_swedish_ci';
|
||||
#CREATE TABLE t1 (Field1 int(10) default '0');
|
||||
## no warnings, negative numbers are allowed
|
||||
#INSERT INTO t1 VALUES ('-1');
|
||||
#SELECT * FROM t1;
|
||||
#DROP TABLE t1;
|
||||
#CREATE TABLE t1 (Field1 int(10) unsigned default '0');
|
||||
## this should generate a "Data truncated" warning
|
||||
#INSERT INTO t1 VALUES ('-1');
|
||||
#DROP TABLE t1;
|
||||
#SET NAMES latin1;
|
||||
|
||||
#
|
||||
##
|
||||
## Bug#18691 Converting number to UNICODE string returns invalid result
|
||||
##
|
||||
#SELECT CONVERT(103, CHAR(50) UNICODE);
|
||||
#SELECT CONVERT(103.0, CHAR(50) UNICODE);
|
||||
#SELECT CONVERT(-103, CHAR(50) UNICODE);
|
||||
#SELECT CONVERT(-103.0, CHAR(50) UNICODE);
|
||||
|
||||
#
|
||||
# Bug#9557 MyISAM utf8 table crash
|
||||
#
|
||||
CREATE TABLE t1 (
|
||||
a varchar(250) NOT NULL default '',
|
||||
KEY a (a)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE utf32_general_ci;
|
||||
insert into t1 values (0x803d);
|
||||
insert into t1 values (0x005b);
|
||||
select hex(a) from t1;
|
||||
drop table t1;
|
||||
|
||||
##
|
||||
## Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
|
||||
##
|
||||
#--disable_warnings
|
||||
#create table t1(f1 varchar(5) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL) engine=InnoDB;
|
||||
#--enable_warnings
|
||||
#insert into t1 values('a');
|
||||
#create index t1f1 on t1(f1);
|
||||
#select f1 from t1 where f1 like 'a%';
|
||||
#drop table t1;
|
||||
|
||||
#
|
||||
# Bug#9442 Set parameter make query fail if column character set is UCS2
|
||||
#
|
||||
create table t1 (utext varchar(20) character set utf32);
|
||||
insert into t1 values ("lily");
|
||||
insert into t1 values ("river");
|
||||
prepare stmt from 'select utext from t1 where utext like ?';
|
||||
set @param1='%%';
|
||||
execute stmt using @param1;
|
||||
execute stmt using @param1;
|
||||
select utext from t1 where utext like '%%';
|
||||
drop table t1;
|
||||
deallocate prepare stmt;
|
||||
|
||||
#
|
||||
# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
|
||||
#
|
||||
create table t1 (
|
||||
a char(10) character set utf32 not null,
|
||||
index a (a)
|
||||
) engine=myisam;
|
||||
insert into t1 values (repeat(0x0000201f, 10));
|
||||
insert into t1 values (repeat(0x00002020, 10));
|
||||
insert into t1 values (repeat(0x00002021, 10));
|
||||
# make sure "index read" is used
|
||||
explain select hex(a) from t1 order by a;
|
||||
select hex(a) from t1 order by a;
|
||||
alter table t1 drop index a;
|
||||
select hex(a) from t1 order by a;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
|
||||
# over a 'ucs2' field uses a temporary table
|
||||
#
|
||||
#CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
|
||||
#INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
|
||||
#SELECT id, MIN(s) FROM t1 GROUP BY id;
|
||||
#DROP TABLE t1;
|
||||
|
||||
##
|
||||
## Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
|
||||
##
|
||||
#
|
||||
#--disable_warnings
|
||||
#drop table if exists bug20536;
|
||||
#--enable_warnings
|
||||
#
|
||||
#set names latin1;
|
||||
#create table bug20536 (id bigint not null auto_increment primary key, name
|
||||
#varchar(255) character set ucs2 not null);
|
||||
#insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
|
||||
#select md5(name) from bug20536;
|
||||
#select sha1(name) from bug20536;
|
||||
#select make_set(3, name, upper(name)) from bug20536;
|
||||
#select export_set(5, name, upper(name)) from bug20536;
|
||||
#select export_set(5, name, upper(name), ",", 5) from bug20536;
|
||||
|
||||
#
|
||||
# Bug #20108: corrupted default enum value for a ucs2 field
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (
|
||||
status enum('active','passive') character set utf32 collate utf32_general_ci
|
||||
NOT NULL default 'passive'
|
||||
);
|
||||
SHOW CREATE TABLE t1;
|
||||
ALTER TABLE t1 ADD a int NOT NULL AFTER status;
|
||||
SHOW CREATE TABLE t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
#CREATE TABLE t2 (
|
||||
# status enum('active','passive') collate ucs2_turkish_ci
|
||||
# NOT NULL default 'passive'
|
||||
#);
|
||||
#SHOW CREATE TABLE t2;
|
||||
#ALTER TABLE t2 ADD a int NOT NULL AFTER status;
|
||||
#DROP TABLE t2;
|
||||
|
||||
|
||||
## Some broken functions: add these tests just to document current behavior.
|
||||
#
|
||||
## PASSWORD and OLD_PASSWORD don't work with UCS2 strings, but to fix it would
|
||||
## not be backwards compatible in all cases, so it's best to leave it alone
|
||||
#select password(name) from bug20536;
|
||||
#select old_password(name) from bug20536;
|
||||
#
|
||||
## Disable test case as encrypt relies on 'crypt' function.
|
||||
## "decrypt" is noramlly tested in func_crypt.test which have a
|
||||
## "have_crypt.inc" test
|
||||
#--disable_parsing
|
||||
## ENCRYPT relies on OS function crypt() which takes a NUL-terminated string; it
|
||||
## doesn't return good results for strings with embedded 0 bytes. It won't be
|
||||
## fixed unless we choose to re-implement the crypt() function ourselves to take
|
||||
## an extra size_t string_length argument.
|
||||
#select encrypt(name, 'SALT') from bug20536;
|
||||
#--enable_parsing
|
||||
#
|
||||
## QUOTE doesn't work with UCS2 data. It would require a total rewrite
|
||||
## of Item_func_quote::val_str(), which isn't worthwhile until UCS2 is
|
||||
## supported fully as a client character set.
|
||||
#select quote(name) from bug20536;
|
||||
#
|
||||
#drop table bug20536;
|
||||
#
|
||||
--echo End of 4.1 tests
|
||||
|
||||
|
||||
#
|
||||
# Conversion from an UTF32 string to a decimal column
|
||||
#
|
||||
CREATE TABLE t1 (a varchar(64) character set utf32, b decimal(10,3));
|
||||
INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
|
||||
update t1 set b=a;
|
||||
SELECT *, hex(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
#
|
||||
# Bug#9442 Set parameter make query fail if column character set is UCS2
|
||||
#
|
||||
create table t1 (utext varchar(20) character set utf32);
|
||||
insert into t1 values ("lily");
|
||||
insert into t1 values ("river");
|
||||
prepare stmt from 'select utext from t1 where utext like ?';
|
||||
set @param1='%%';
|
||||
execute stmt using @param1;
|
||||
execute stmt using @param1;
|
||||
select utext from t1 where utext like '%%';
|
||||
drop table t1;
|
||||
deallocate prepare stmt;
|
||||
|
||||
#
|
||||
# Bug#22638 SOUNDEX broken for international characters
|
||||
#
|
||||
set names latin1;
|
||||
set character_set_connection=utf32;
|
||||
select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
|
||||
select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
|
||||
select 'mood' sounds like 'mud';
|
||||
# Cyrillic A, BE, VE
|
||||
select hex(soundex(_utf32 0x000004100000041100000412));
|
||||
# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
|
||||
select hex(soundex(_utf32 0x000000BF000000C0));
|
||||
set names latin1;
|
||||
|
||||
#
|
||||
# Bug #14290: character_maximum_length for text fields
|
||||
#
|
||||
create table t1(a blob, b text charset utf32);
|
||||
select data_type, character_octet_length, character_maximum_length
|
||||
from information_schema.columns where table_name='t1';
|
||||
drop table t1;
|
||||
|
||||
|
||||
set names latin1;
|
||||
set collation_connection=utf32_general_ci;
|
||||
#
|
||||
# Testing cs->coll->instr()
|
||||
#
|
||||
select position('bb' in 'abba');
|
||||
|
||||
#
|
||||
# Testing cs->coll->hash_sort()
|
||||
#
|
||||
create table t1 (a varchar(10) character set utf32) engine=heap;
|
||||
insert into t1 values ('a'),('A'),('b'),('B');
|
||||
select * from t1 where a='a' order by binary a;
|
||||
select hex(min(binary a)),count(*) from t1 group by a;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->numchars()
|
||||
#
|
||||
select char_length('abcd'), octet_length('abcd');
|
||||
|
||||
#
|
||||
# Testing cs->cset->charpos()
|
||||
#
|
||||
select left('abcd',2);
|
||||
|
||||
#
|
||||
# Testing cs->cset->well_formed_length()
|
||||
#
|
||||
create table t1 (a varchar(10) character set utf32);
|
||||
insert into t1 values (_utf32 0x0010FFFF);
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
insert into t1 values (_utf32 0x00110000);
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
insert into t1 values (_utf32 0x00110101);
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
insert into t1 values (_utf32 0x01000101);
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
insert into t1 values (_utf32 0x11000101);
|
||||
select hex(a) from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug#32914 Character sets: illegal characters in utf8 and utf32 columns
|
||||
#
|
||||
create table t1 (utf32 varchar(2) character set utf32);
|
||||
--echo Wrong character with pad
|
||||
insert into t1 values (0x110000);
|
||||
--echo Wrong chsaracter without pad
|
||||
insert into t1 values (0x00110000);
|
||||
--echo Wrong character with pad followed by another wrong character
|
||||
insert into t1 values (0x11000000110000);
|
||||
--echo Good character with pad followed by bad character
|
||||
insert into t1 values (0x10000000110000);
|
||||
--echo Good character without pad followed by bad character
|
||||
insert into t1 values (0x0010000000110000);
|
||||
--echo Wrong character with the second byte higher than 0x10
|
||||
insert into t1 values (0x00800037);
|
||||
--echo Wrong character with pad with the second byte higher than 0x10
|
||||
insert into t1 values (0x00800037);
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug#32394 Character sets: crash if comparison with 0xfffd
|
||||
#
|
||||
select _utf32'a' collate utf32_general_ci = 0xfffd;
|
||||
select hex(concat(_utf32 0x0410 collate utf32_general_ci, 0x61));
|
||||
create table t1 (s1 varchar(5) character set utf32);
|
||||
insert into t1 values (0xfffd);
|
||||
select case when s1 = 0xfffd then 1 else 0 end from t1;
|
||||
select hex(s1) from t1 where s1 = 0xfffd;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->lengthsp()
|
||||
#
|
||||
create table t1 (a char(10)) character set utf32;
|
||||
insert into t1 values ('a ');
|
||||
select hex(a) from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->caseup() and cs->cset->casedn()
|
||||
#
|
||||
select upper('abcd'), lower('ABCD');
|
||||
|
||||
#
|
||||
# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf32
|
||||
# Testing cs->cset->snprintf()
|
||||
#
|
||||
#create table t1 (a date);
|
||||
#insert into t1 values ('2007-09-16');
|
||||
#select * from t1;
|
||||
#drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->l10tostr
|
||||
# !!! Not used in the code
|
||||
|
||||
#
|
||||
# Testing cs->cset->ll10tostr
|
||||
#
|
||||
create table t1 (a varchar(10) character set utf32);
|
||||
insert into t1 values (123456);
|
||||
select a, hex(a) from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->fill
|
||||
# SOUNDEX fills strings with DIGIT ZERO up to four characters
|
||||
select hex(soundex('a'));
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntol
|
||||
# !!! Not used in the code
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntoul
|
||||
#
|
||||
create table t1 (a enum ('a','b','c')) character set utf32;
|
||||
insert into t1 values ('1');
|
||||
select * from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntoll and cs->cset->strntoull
|
||||
#
|
||||
set names latin1;
|
||||
select hex(conv(convert('123' using utf32), -10, 16));
|
||||
select hex(conv(convert('123' using utf32), 10, 16));
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntod
|
||||
#
|
||||
set names latin1;
|
||||
set character_set_connection=utf32;
|
||||
select 1.1 + '1.2';
|
||||
select 1.1 + '1.2xxx';
|
||||
|
||||
# Testing strntoll10_utf32
|
||||
# Testing cs->cset->strtoll10
|
||||
select left('aaa','1');
|
||||
|
||||
#
|
||||
# Testing cs->cset->strntoull10rnd
|
||||
#
|
||||
create table t1 (a int);
|
||||
insert into t1 values ('-1234.1e2');
|
||||
insert into t1 values ('-1234.1e2xxxx');
|
||||
insert into t1 values ('-1234.1e2 ');
|
||||
select * from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing cs->cset->scan
|
||||
#
|
||||
create table t1 (a int);
|
||||
insert into t1 values ('1 ');
|
||||
insert into t1 values ('1 x');
|
||||
select * from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing auto-conversion to TEXT
|
||||
#
|
||||
create table t1 (a varchar(17000) character set utf32);
|
||||
show create table t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Testing that maximim possible key length is 1332 bytes
|
||||
#
|
||||
create table t1 (a varchar(250) character set utf32 primary key);
|
||||
show create table t1;
|
||||
drop table t1;
|
||||
--error ER_TOO_LONG_KEY
|
||||
create table t1 (a varchar(334) character set utf32 primary key);
|
||||
|
||||
#
|
||||
# Testing mi_check with long key values
|
||||
#
|
||||
create table t1 (a varchar(333) character set utf32, key(a));
|
||||
insert into t1 values (repeat('a',333)), (repeat('b',333));
|
||||
flush tables;
|
||||
check table t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Test how character set works with date/time
|
||||
#
|
||||
SET collation_connection=utf32_general_ci;
|
||||
--source include/ctype_datetime.inc
|
||||
SET NAMES latin1;
|
||||
|
||||
#
|
||||
# Test basic regex functionality
|
||||
#
|
||||
set collation_connection=utf32_general_ci;
|
||||
--source include/ctype_regex.inc
|
||||
set names latin1;
|
||||
|
||||
|
||||
# TODO: add tests for all engines
|
||||
|
||||
#
|
||||
# Bug #36418 Character sets: crash if char(256 using utf32)
|
||||
#
|
||||
select hex(char(0x01 using utf32));
|
||||
select hex(char(0x0102 using utf32));
|
||||
select hex(char(0x010203 using utf32));
|
||||
select hex(char(0x01020304 using utf32));
|
||||
create table t1 (s1 varchar(1) character set utf32, s2 text character set utf32);
|
||||
create index i on t1 (s1);
|
||||
insert into t1 values (char(256 using utf32), char(256 using utf32));
|
||||
select hex(s1), hex(s2) from t1;
|
||||
drop table t1;
|
||||
|
||||
|
||||
#
|
||||
# Bug#33073 Character sets: ordering fails with utf32
|
||||
#
|
||||
SET collation_connection=utf32_general_ci;
|
||||
CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0;
|
||||
SHOW CREATE TABLE t1;
|
||||
INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
|
||||
SELECT * FROM t1 ORDER BY s1;
|
||||
SET max_sort_length=4;
|
||||
SELECT * FROM t1 ORDER BY s1;
|
||||
DROP TABLE t1;
|
||||
SET max_sort_length=DEFAULT;
|
||||
SET NAMES latin1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 5.5 tests
|
||||
--echo #
|
291
mysql-test/t/ctype_utf32_uca.test
Normal file
291
mysql-test/t/ctype_utf32_uca.test
Normal file
@ -0,0 +1,291 @@
|
||||
-- source include/have_ucs2.inc
|
||||
-- source include/have_utf32.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE IF EXISTS t1;
|
||||
--enable_warnings
|
||||
|
||||
--echo #
|
||||
--echo # Start of 5.5 tests
|
||||
--echo #
|
||||
|
||||
set names utf8;
|
||||
set collation_connection=utf32_unicode_ci;
|
||||
select hex('a'), hex('a ');
|
||||
-- source include/endspace.inc
|
||||
|
||||
#
|
||||
# Bug #6787 LIKE not working properly with _ and utf8 data
|
||||
#
|
||||
select 'c' like '\_' as want0;
|
||||
|
||||
#
|
||||
# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters
|
||||
#
|
||||
CREATE TABLE t (
|
||||
c char(20) NOT NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARACTER SET utf32 COLLATE=utf32_unicode_ci;
|
||||
INSERT INTO t VALUES ('a'),('ab'),('aba');
|
||||
ALTER TABLE t ADD INDEX (c);
|
||||
SELECT c FROM t WHERE c LIKE 'a%';
|
||||
DROP TABLE t;
|
||||
|
||||
|
||||
create table t1 (c1 char(10) character set utf32 collate utf32_bin);
|
||||
|
||||
#
|
||||
# Basic Latin
|
||||
#
|
||||
insert into t1 values ('A'),('a');
|
||||
insert into t1 values ('B'),('b');
|
||||
insert into t1 values ('C'),('c');
|
||||
insert into t1 values ('D'),('d');
|
||||
insert into t1 values ('E'),('e');
|
||||
insert into t1 values ('F'),('f');
|
||||
insert into t1 values ('G'),('g');
|
||||
insert into t1 values ('H'),('h');
|
||||
insert into t1 values ('I'),('i');
|
||||
insert into t1 values ('J'),('j');
|
||||
insert into t1 values ('K'),('k');
|
||||
insert into t1 values ('L'),('l');
|
||||
insert into t1 values ('M'),('m');
|
||||
insert into t1 values ('N'),('n');
|
||||
insert into t1 values ('O'),('o');
|
||||
insert into t1 values ('P'),('p');
|
||||
insert into t1 values ('Q'),('q');
|
||||
insert into t1 values ('R'),('r');
|
||||
insert into t1 values ('S'),('s');
|
||||
insert into t1 values ('T'),('t');
|
||||
insert into t1 values ('U'),('u');
|
||||
insert into t1 values ('V'),('v');
|
||||
insert into t1 values ('W'),('w');
|
||||
insert into t1 values ('X'),('x');
|
||||
insert into t1 values ('Y'),('y');
|
||||
insert into t1 values ('Z'),('z');
|
||||
|
||||
#
|
||||
# Latin1 suppliment
|
||||
#
|
||||
insert into t1 values (_ucs2 0x00e0),(_ucs2 0x00c0);
|
||||
insert into t1 values (_ucs2 0x00e1),(_ucs2 0x00c1);
|
||||
insert into t1 values (_ucs2 0x00e2),(_ucs2 0x00c2);
|
||||
insert into t1 values (_ucs2 0x00e3),(_ucs2 0x00c3);
|
||||
insert into t1 values (_ucs2 0x00e4),(_ucs2 0x00c4);
|
||||
insert into t1 values (_ucs2 0x00e5),(_ucs2 0x00c5);
|
||||
insert into t1 values (_ucs2 0x00e6),(_ucs2 0x00c6);
|
||||
insert into t1 values (_ucs2 0x00e7),(_ucs2 0x00c7);
|
||||
insert into t1 values (_ucs2 0x00e8),(_ucs2 0x00c8);
|
||||
insert into t1 values (_ucs2 0x00e9),(_ucs2 0x00c9);
|
||||
insert into t1 values (_ucs2 0x00ea),(_ucs2 0x00ca);
|
||||
insert into t1 values (_ucs2 0x00eb),(_ucs2 0x00cb);
|
||||
insert into t1 values (_ucs2 0x00ec),(_ucs2 0x00cc);
|
||||
insert into t1 values (_ucs2 0x00ed),(_ucs2 0x00cd);
|
||||
insert into t1 values (_ucs2 0x00ee),(_ucs2 0x00ce);
|
||||
insert into t1 values (_ucs2 0x00ef),(_ucs2 0x00cf);
|
||||
|
||||
insert into t1 values (_ucs2 0x00f0),(_ucs2 0x00d0);
|
||||
insert into t1 values (_ucs2 0x00f1),(_ucs2 0x00d1);
|
||||
insert into t1 values (_ucs2 0x00f2),(_ucs2 0x00d2);
|
||||
insert into t1 values (_ucs2 0x00f3),(_ucs2 0x00d3);
|
||||
insert into t1 values (_ucs2 0x00f4),(_ucs2 0x00d4);
|
||||
insert into t1 values (_ucs2 0x00f5),(_ucs2 0x00d5);
|
||||
insert into t1 values (_ucs2 0x00f6),(_ucs2 0x00d6);
|
||||
insert into t1 values (_ucs2 0x00f7),(_ucs2 0x00d7);
|
||||
insert into t1 values (_ucs2 0x00f8),(_ucs2 0x00d8);
|
||||
insert into t1 values (_ucs2 0x00f9),(_ucs2 0x00d9);
|
||||
insert into t1 values (_ucs2 0x00fa),(_ucs2 0x00da);
|
||||
insert into t1 values (_ucs2 0x00fb),(_ucs2 0x00db);
|
||||
insert into t1 values (_ucs2 0x00fc),(_ucs2 0x00dc);
|
||||
insert into t1 values (_ucs2 0x00fd),(_ucs2 0x00dd);
|
||||
insert into t1 values (_ucs2 0x00fe),(_ucs2 0x00de);
|
||||
insert into t1 values (_ucs2 0x00ff),(_ucs2 0x00df);
|
||||
|
||||
#
|
||||
# Latin extended-A, 0100-017F
|
||||
#
|
||||
insert into t1 values (_ucs2 0x0100),(_ucs2 0x0101),(_ucs2 0x0102),(_ucs2 0x0103);
|
||||
insert into t1 values (_ucs2 0x0104),(_ucs2 0x0105),(_ucs2 0x0106),(_ucs2 0x0107);
|
||||
insert into t1 values (_ucs2 0x0108),(_ucs2 0x0109),(_ucs2 0x010a),(_ucs2 0x010b);
|
||||
insert into t1 values (_ucs2 0x010c),(_ucs2 0x010d),(_ucs2 0x010e),(_ucs2 0x010f);
|
||||
insert into t1 values (_ucs2 0x0110),(_ucs2 0x0111),(_ucs2 0x0112),(_ucs2 0x0113);
|
||||
insert into t1 values (_ucs2 0x0114),(_ucs2 0x0115),(_ucs2 0x0116),(_ucs2 0x0117);
|
||||
insert into t1 values (_ucs2 0x0118),(_ucs2 0x0119),(_ucs2 0x011a),(_ucs2 0x011b);
|
||||
insert into t1 values (_ucs2 0x011c),(_ucs2 0x011d),(_ucs2 0x011e),(_ucs2 0x011f);
|
||||
insert into t1 values (_ucs2 0x0120),(_ucs2 0x0121),(_ucs2 0x0122),(_ucs2 0x0123);
|
||||
insert into t1 values (_ucs2 0x0124),(_ucs2 0x0125),(_ucs2 0x0126),(_ucs2 0x0127);
|
||||
insert into t1 values (_ucs2 0x0128),(_ucs2 0x0129),(_ucs2 0x012a),(_ucs2 0x012b);
|
||||
insert into t1 values (_ucs2 0x012c),(_ucs2 0x012d),(_ucs2 0x012e),(_ucs2 0x012f);
|
||||
insert into t1 values (_ucs2 0x0130),(_ucs2 0x0131),(_ucs2 0x0132),(_ucs2 0x0133);
|
||||
insert into t1 values (_ucs2 0x0134),(_ucs2 0x0135),(_ucs2 0x0136),(_ucs2 0x0137);
|
||||
insert into t1 values (_ucs2 0x0138),(_ucs2 0x0139),(_ucs2 0x013a),(_ucs2 0x013b);
|
||||
insert into t1 values (_ucs2 0x013c),(_ucs2 0x013d),(_ucs2 0x013e),(_ucs2 0x013f);
|
||||
insert into t1 values (_ucs2 0x0140),(_ucs2 0x0141),(_ucs2 0x0142),(_ucs2 0x0143);
|
||||
insert into t1 values (_ucs2 0x0144),(_ucs2 0x0145),(_ucs2 0x0146),(_ucs2 0x0147);
|
||||
insert into t1 values (_ucs2 0x0148),(_ucs2 0x0149),(_ucs2 0x014a),(_ucs2 0x014b);
|
||||
insert into t1 values (_ucs2 0x014c),(_ucs2 0x014d),(_ucs2 0x014e),(_ucs2 0x014f);
|
||||
insert into t1 values (_ucs2 0x0150),(_ucs2 0x0151),(_ucs2 0x0152),(_ucs2 0x0153);
|
||||
insert into t1 values (_ucs2 0x0154),(_ucs2 0x0155),(_ucs2 0x0156),(_ucs2 0x0157);
|
||||
insert into t1 values (_ucs2 0x0158),(_ucs2 0x0159),(_ucs2 0x015a),(_ucs2 0x015b);
|
||||
insert into t1 values (_ucs2 0x015c),(_ucs2 0x015d),(_ucs2 0x015e),(_ucs2 0x015f);
|
||||
insert into t1 values (_ucs2 0x0160),(_ucs2 0x0161),(_ucs2 0x0162),(_ucs2 0x0163);
|
||||
insert into t1 values (_ucs2 0x0164),(_ucs2 0x0165),(_ucs2 0x0166),(_ucs2 0x0167);
|
||||
insert into t1 values (_ucs2 0x0168),(_ucs2 0x0169),(_ucs2 0x016a),(_ucs2 0x016b);
|
||||
insert into t1 values (_ucs2 0x016c),(_ucs2 0x016d),(_ucs2 0x016e),(_ucs2 0x016f);
|
||||
insert into t1 values (_ucs2 0x0170),(_ucs2 0x0171),(_ucs2 0x0172),(_ucs2 0x0173);
|
||||
insert into t1 values (_ucs2 0x0174),(_ucs2 0x0175),(_ucs2 0x0176),(_ucs2 0x0177);
|
||||
insert into t1 values (_ucs2 0x0178),(_ucs2 0x0179),(_ucs2 0x017a),(_ucs2 0x017b);
|
||||
insert into t1 values (_ucs2 0x017c),(_ucs2 0x017d),(_ucs2 0x017e),(_ucs2 0x017f);
|
||||
|
||||
#
|
||||
# Latin extended-B, 0180-024F
|
||||
#
|
||||
insert into t1 values (_ucs2 0x0180),(_ucs2 0x0181),(_ucs2 0x0182),(_ucs2 0x0183);
|
||||
insert into t1 values (_ucs2 0x0184),(_ucs2 0x0185),(_ucs2 0x0186),(_ucs2 0x0187);
|
||||
insert into t1 values (_ucs2 0x0188),(_ucs2 0x0189),(_ucs2 0x018a),(_ucs2 0x018b);
|
||||
insert into t1 values (_ucs2 0x018c),(_ucs2 0x018d),(_ucs2 0x018e),(_ucs2 0x018f);
|
||||
insert into t1 values (_ucs2 0x0190),(_ucs2 0x0191),(_ucs2 0x0192),(_ucs2 0x0193);
|
||||
insert into t1 values (_ucs2 0x0194),(_ucs2 0x0195),(_ucs2 0x0196),(_ucs2 0x0197);
|
||||
insert into t1 values (_ucs2 0x0198),(_ucs2 0x0199),(_ucs2 0x019a),(_ucs2 0x019b);
|
||||
insert into t1 values (_ucs2 0x019c),(_ucs2 0x019d),(_ucs2 0x019e),(_ucs2 0x019f);
|
||||
insert into t1 values (_ucs2 0x01a0),(_ucs2 0x01a1),(_ucs2 0x01a2),(_ucs2 0x01a3);
|
||||
insert into t1 values (_ucs2 0x01a4),(_ucs2 0x01a5),(_ucs2 0x01a6),(_ucs2 0x01a7);
|
||||
insert into t1 values (_ucs2 0x01a8),(_ucs2 0x01a9),(_ucs2 0x01aa),(_ucs2 0x01ab);
|
||||
insert into t1 values (_ucs2 0x01ac),(_ucs2 0x01ad),(_ucs2 0x01ae),(_ucs2 0x01af);
|
||||
insert into t1 values (_ucs2 0x01b0),(_ucs2 0x01b1),(_ucs2 0x01b2),(_ucs2 0x01b3);
|
||||
insert into t1 values (_ucs2 0x01b4),(_ucs2 0x01b5),(_ucs2 0x01b6),(_ucs2 0x01b7);
|
||||
insert into t1 values (_ucs2 0x01b8),(_ucs2 0x01b9),(_ucs2 0x01ba),(_ucs2 0x01bb);
|
||||
insert into t1 values (_ucs2 0x01bc),(_ucs2 0x01bd),(_ucs2 0x01be),(_ucs2 0x01bf);
|
||||
insert into t1 values (_ucs2 0x01c0),(_ucs2 0x01c1),(_ucs2 0x01c2),(_ucs2 0x01c3);
|
||||
insert into t1 values (_ucs2 0x01c4),(_ucs2 0x01c5),(_ucs2 0x01c6),(_ucs2 0x01c7);
|
||||
insert into t1 values (_ucs2 0x01c8),(_ucs2 0x01c9),(_ucs2 0x01ca),(_ucs2 0x01cb);
|
||||
insert into t1 values (_ucs2 0x01cc),(_ucs2 0x01cd),(_ucs2 0x01ce),(_ucs2 0x01cf);
|
||||
insert into t1 values (_ucs2 0x01d0),(_ucs2 0x01d1),(_ucs2 0x01d2),(_ucs2 0x01d3);
|
||||
insert into t1 values (_ucs2 0x01d4),(_ucs2 0x01d5),(_ucs2 0x01d6),(_ucs2 0x01d7);
|
||||
insert into t1 values (_ucs2 0x01d8),(_ucs2 0x01d9),(_ucs2 0x01da),(_ucs2 0x01db);
|
||||
insert into t1 values (_ucs2 0x01dc),(_ucs2 0x01dd),(_ucs2 0x01de),(_ucs2 0x01df);
|
||||
insert into t1 values (_ucs2 0x01e0),(_ucs2 0x01e1),(_ucs2 0x01e2),(_ucs2 0x01e3);
|
||||
insert into t1 values (_ucs2 0x01e4),(_ucs2 0x01e5),(_ucs2 0x01e6),(_ucs2 0x01e7);
|
||||
insert into t1 values (_ucs2 0x01e8),(_ucs2 0x01e9),(_ucs2 0x01ea),(_ucs2 0x01eb);
|
||||
insert into t1 values (_ucs2 0x01ec),(_ucs2 0x01ed),(_ucs2 0x01ee),(_ucs2 0x01ef);
|
||||
insert into t1 values (_ucs2 0x01f0),(_ucs2 0x01f1),(_ucs2 0x01f2),(_ucs2 0x01f3);
|
||||
insert into t1 values (_ucs2 0x01f4),(_ucs2 0x01f5),(_ucs2 0x01f6),(_ucs2 0x01f7);
|
||||
insert into t1 values (_ucs2 0x01f8),(_ucs2 0x01f9),(_ucs2 0x01fa),(_ucs2 0x01fb);
|
||||
insert into t1 values (_ucs2 0x01fc),(_ucs2 0x01fd),(_ucs2 0x01fe),(_ucs2 0x01ff);
|
||||
|
||||
|
||||
insert into t1 values ('AA'),('Aa'),('aa'),('aA');
|
||||
insert into t1 values ('CH'),('Ch'),('ch'),('cH');
|
||||
insert into t1 values ('DZ'),('Dz'),('dz'),('dZ');
|
||||
insert into t1 values ('IJ'),('Ij'),('ij'),('iJ');
|
||||
insert into t1 values ('LJ'),('Lj'),('lj'),('lJ');
|
||||
insert into t1 values ('LL'),('Ll'),('ll'),('lL');
|
||||
insert into t1 values ('NJ'),('Nj'),('nj'),('nJ');
|
||||
insert into t1 values ('OE'),('Oe'),('oe'),('oE');
|
||||
insert into t1 values ('SS'),('Ss'),('ss'),('sS');
|
||||
insert into t1 values ('RR'),('Rr'),('rr'),('rR');
|
||||
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_unicode_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_icelandic_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_latvian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_romanian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovenian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_polish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_estonian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_swedish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_turkish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_czech_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_danish_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_lithuanian_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovak_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish2_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_roman_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_esperanto_ci;
|
||||
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_hungarian_ci;
|
||||
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug#5324
|
||||
#
|
||||
SET NAMES utf8;
|
||||
#test1
|
||||
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_general_ci NOT NULL, INDEX (c));
|
||||
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
|
||||
#Check one row
|
||||
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_general_ci;
|
||||
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
|
||||
#Check two rows
|
||||
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025
|
||||
COLLATE utf32_general_ci ORDER BY c;
|
||||
DROP TABLE t1;
|
||||
#test2
|
||||
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c));
|
||||
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
|
||||
#Check one row
|
||||
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_unicode_ci;
|
||||
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
|
||||
#Check two rows
|
||||
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025
|
||||
COLLATE utf32_unicode_ci ORDER BY c;
|
||||
DROP TABLE t1;
|
||||
#test 3
|
||||
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c));
|
||||
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
|
||||
#Check one row row
|
||||
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32) COLLATE utf32_unicode_ci;
|
||||
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
|
||||
#Check two rows
|
||||
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32)
|
||||
COLLATE utf32_unicode_ci ORDER BY c;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
SET NAMES utf8;
|
||||
SET @test_character_set='utf32';
|
||||
SET @test_collation='utf32_swedish_ci';
|
||||
-- source include/ctype_common.inc
|
||||
|
||||
|
||||
SET collation_connection='utf32_unicode_ci';
|
||||
-- source include/ctype_filesort.inc
|
||||
-- source include/ctype_like_escape.inc
|
||||
|
||||
--echo End of 4.1 tests
|
||||
|
||||
#
|
||||
# Check UPPER/LOWER changing length
|
||||
#
|
||||
# Result shorter than argument
|
||||
CREATE TABLE t1 (id int, a varchar(30) character set utf32);
|
||||
INSERT INTO t1 VALUES (1, _ucs2 0x01310069), (2, _ucs2 0x01310131);
|
||||
INSERT INTO t1 VALUES (3, _ucs2 0x00690069), (4, _ucs2 0x01300049);
|
||||
INSERT INTO t1 VALUES (5, _ucs2 0x01300130), (6, _ucs2 0x00490049);
|
||||
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
|
||||
FROM t1 ORDER BY id;
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf32 collate utf32_turkish_ci;
|
||||
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
|
||||
FROM t1 ORDER BY id;
|
||||
DROP TABLE t1;
|
||||
|
||||
#
|
||||
# Bug #27079 Crash while grouping empty ucs2 strings
|
||||
#
|
||||
CREATE TABLE t1 (
|
||||
c1 text character set utf32 collate utf32_polish_ci NOT NULL
|
||||
) ENGINE=MyISAM;
|
||||
insert into t1 values (''),('a');
|
||||
SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
|
||||
DROP TABLE IF EXISTS t1;
|
||||
|
||||
|
||||
#
|
||||
# Test basic regex functionality
|
||||
#
|
||||
set collation_connection=utf32_unicode_ci;
|
||||
--source include/ctype_regex.inc
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # End of 5.5 tests
|
||||
--echo #
|
@ -1440,6 +1440,17 @@ DROP TABLE t1;
|
||||
|
||||
|
||||
--echo Start of 5.4 tests
|
||||
|
||||
#
|
||||
# WL#1213: utf8mb3 is an alias for utf8
|
||||
#
|
||||
SET NAMES utf8mb3;
|
||||
SHOW VARIABLES LIKE 'character_set_results%';
|
||||
CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin);
|
||||
SHOW CREATE TABLE t1;
|
||||
DROP TABLE t1;
|
||||
SELECT _utf8mb3'test';
|
||||
|
||||
#
|
||||
# Bug#26180: Can't add columns to tables created with utf8 text indexes
|
||||
#
|
||||
|
1670
mysql-test/t/ctype_utf8mb4.test
Normal file
1670
mysql-test/t/ctype_utf8mb4.test
Normal file
File diff suppressed because it is too large
Load Diff
@ -45,6 +45,53 @@ extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci;
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_CHARSET_utf32
|
||||
extern CHARSET_INFO my_charset_utf32_icelandic_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_latvian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_romanian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_slovenian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_polish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_estonian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_spanish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_swedish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_turkish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_czech_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_danish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_lithuanian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_slovak_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_spanish2_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_roman_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_persian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_esperanto_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_hungarian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf32_sinhala_uca_ci;
|
||||
#endif /* HAVE_CHARSET_utf32 */
|
||||
|
||||
|
||||
#ifdef HAVE_CHARSET_utf16
|
||||
extern CHARSET_INFO my_charset_utf16_icelandic_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_latvian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_romanian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_slovenian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_polish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_estonian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_spanish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_swedish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_turkish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_czech_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_danish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_lithuanian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_slovak_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_spanish2_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_roman_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_persian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_esperanto_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_hungarian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf16_sinhala_uca_ci;
|
||||
#endif /* HAVE_CHARSET_utf16 */
|
||||
|
||||
|
||||
#ifdef HAVE_CHARSET_utf8
|
||||
extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8_latvian_uca_ci;
|
||||
@ -70,6 +117,28 @@ extern CHARSET_INFO my_charset_utf8_general_cs;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CHARSET_utf8mb4
|
||||
extern CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_polish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_czech_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_danish_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_roman_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_persian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci;
|
||||
extern CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci;
|
||||
#endif /* HAVE_CHARSET_utf8mb4 */
|
||||
|
||||
#endif /* HAVE_UCA_COLLATIONS */
|
||||
|
||||
my_bool init_compiled_charsets(myf flags __attribute__((unused)))
|
||||
@ -191,7 +260,91 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
|
||||
add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
|
||||
#endif
|
||||
#endif
|
||||
#endif /* HAVE_CHARSET_utf8 */
|
||||
|
||||
|
||||
#ifdef HAVE_CHARSET_utf8mb4
|
||||
add_compiled_collation(&my_charset_utf8mb4_general_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_bin);
|
||||
#ifdef HAVE_UCA_COLLATIONS
|
||||
add_compiled_collation(&my_charset_utf8mb4_unicode_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_icelandic_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_latvian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_romanian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_slovenian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_polish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_estonian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_spanish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_swedish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_turkish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_czech_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_danish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_lithuanian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_slovak_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_spanish2_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_roman_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_persian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_esperanto_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_hungarian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf8mb4_sinhala_uca_ci);
|
||||
#endif /* HAVE_UCA_COLLATIONS */
|
||||
#endif /* HAVE_CHARSET_utf8mb4 */
|
||||
|
||||
|
||||
#ifdef HAVE_CHARSET_utf16
|
||||
add_compiled_collation(&my_charset_utf16_general_ci);
|
||||
add_compiled_collation(&my_charset_utf16_bin);
|
||||
#ifdef HAVE_UCA_COLLATIONS
|
||||
add_compiled_collation(&my_charset_utf16_unicode_ci);
|
||||
add_compiled_collation(&my_charset_utf16_icelandic_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_latvian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_romanian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_slovenian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_polish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_estonian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_spanish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_swedish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_turkish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_czech_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_danish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_lithuanian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_slovak_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_spanish2_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_roman_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_persian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_esperanto_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_hungarian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf16_sinhala_uca_ci);
|
||||
#endif /* HAVE_UCA_COLLATIOINS */
|
||||
#endif /* HAVE_CHARSET_utf16 */
|
||||
|
||||
|
||||
#ifdef HAVE_CHARSET_utf32
|
||||
add_compiled_collation(&my_charset_utf32_general_ci);
|
||||
add_compiled_collation(&my_charset_utf32_bin);
|
||||
#ifdef HAVE_UCA_COLLATIONS
|
||||
add_compiled_collation(&my_charset_utf32_unicode_ci);
|
||||
add_compiled_collation(&my_charset_utf32_icelandic_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_latvian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_romanian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_slovenian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_polish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_estonian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_spanish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_swedish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_turkish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_czech_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_danish_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_lithuanian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_slovak_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_spanish2_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_roman_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_persian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_esperanto_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_hungarian_uca_ci);
|
||||
add_compiled_collation(&my_charset_utf32_sinhala_uca_ci);
|
||||
#endif /* HAVE_UCA_COLLATIONS */
|
||||
#endif /* HAVE_CHARSET_utf32 */
|
||||
|
||||
/* Copy compiled charsets */
|
||||
for (cs=compiled_charsets; cs->name; cs++)
|
||||
|
@ -252,13 +252,35 @@ static int add_collation(CHARSET_INFO *cs)
|
||||
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
|
||||
#endif
|
||||
}
|
||||
else if (!strcmp(cs->csname, "utf8"))
|
||||
else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3"))
|
||||
{
|
||||
#if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
|
||||
copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
|
||||
newcs->ctype= my_charset_utf8_unicode_ci.ctype;
|
||||
if (init_state_maps(newcs))
|
||||
return MY_XML_ERROR;
|
||||
#endif
|
||||
}
|
||||
else if (!strcmp(cs->csname, "utf8mb4"))
|
||||
{
|
||||
#if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
|
||||
copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
|
||||
newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
|
||||
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
|
||||
#endif
|
||||
}
|
||||
else if (!strcmp(cs->csname, "utf16"))
|
||||
{
|
||||
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
|
||||
copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
|
||||
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
|
||||
#endif
|
||||
}
|
||||
else if (!strcmp(cs->csname, "utf32"))
|
||||
{
|
||||
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
|
||||
copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
|
||||
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
@ -433,17 +455,35 @@ static void init_available_charsets(void)
|
||||
}
|
||||
|
||||
|
||||
uint get_collation_number(const char *name)
|
||||
static const char*
|
||||
get_collation_name_alias(const char *name, char *buf, size_t bufsize)
|
||||
{
|
||||
my_pthread_once(&charsets_initialized, init_available_charsets);
|
||||
return get_collation_number_internal(name);
|
||||
if (!strncasecmp(name, "utf8mb3_", 8))
|
||||
{
|
||||
my_snprintf(buf, bufsize, "utf8_%s", name + 8);
|
||||
return buf;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
uint get_charset_number(const char *charset_name, uint cs_flags)
|
||||
uint get_collation_number(const char *name)
|
||||
{
|
||||
uint id;
|
||||
char alias[64];
|
||||
my_pthread_once(&charsets_initialized, init_available_charsets);
|
||||
if ((id= get_collation_number_internal(name)))
|
||||
return id;
|
||||
if ((name= get_collation_name_alias(name, alias, sizeof(alias))))
|
||||
return get_collation_number_internal(name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static uint
|
||||
get_charset_number_internal(const char *charset_name, uint cs_flags)
|
||||
{
|
||||
CHARSET_INFO **cs;
|
||||
my_pthread_once(&charsets_initialized, init_available_charsets);
|
||||
|
||||
for (cs= all_charsets;
|
||||
cs < all_charsets + array_elements(all_charsets);
|
||||
@ -457,6 +497,27 @@ uint get_charset_number(const char *charset_name, uint cs_flags)
|
||||
}
|
||||
|
||||
|
||||
static const char*
|
||||
get_charset_name_alias(const char *name)
|
||||
{
|
||||
if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3"))
|
||||
return "utf8";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
uint get_charset_number(const char *charset_name, uint cs_flags)
|
||||
{
|
||||
uint id;
|
||||
my_pthread_once(&charsets_initialized, init_available_charsets);
|
||||
if ((id= get_charset_number_internal(charset_name, cs_flags)))
|
||||
return id;
|
||||
if ((charset_name= get_charset_name_alias(charset_name)))
|
||||
return get_charset_number_internal(charset_name, cs_flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
const char *get_charset_name(uint charset_number)
|
||||
{
|
||||
CHARSET_INFO *cs;
|
||||
|
@ -1836,7 +1836,9 @@ int Field::store_time(MYSQL_TIME *ltime, timestamp_type type_arg)
|
||||
ASSERT_COLUMN_MARKED_FOR_WRITE;
|
||||
char buff[MAX_DATE_STRING_REP_LENGTH];
|
||||
uint length= (uint) my_TIME_to_str(ltime, buff);
|
||||
return store(buff, length, &my_charset_bin);
|
||||
return store(buff, length,
|
||||
(charset()->state & MY_CS_NONASCII) ?
|
||||
&my_charset_latin1 : &my_charset_bin);
|
||||
}
|
||||
|
||||
|
||||
|
11
sql/item.cc
11
sql/item.cc
@ -854,7 +854,7 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
|
||||
cnvitem->max_length= cnvitem->str_value.numchars() * tocs->mbmaxlen;
|
||||
return cnvitem;
|
||||
}
|
||||
return NULL;
|
||||
return Item::safe_charset_converter(tocs);
|
||||
}
|
||||
|
||||
|
||||
@ -1436,7 +1436,12 @@ left_is_superset(DTCollation *left, DTCollation *right)
|
||||
if (left->collation->state & MY_CS_UNICODE &&
|
||||
(left->derivation < right->derivation ||
|
||||
(left->derivation == right->derivation &&
|
||||
!(right->collation->state & MY_CS_UNICODE))))
|
||||
(!(right->collation->state & MY_CS_UNICODE) ||
|
||||
/* The code below makes 4-byte utf8 a superset over 3-byte utf8 */
|
||||
(left->collation->state & MY_CS_UNICODE_SUPPLEMENT &&
|
||||
!(right->collation->state & MY_CS_UNICODE_SUPPLEMENT) &&
|
||||
left->collation->mbmaxlen > right->collation->mbmaxlen &&
|
||||
left->collation->mbminlen == right->collation->mbminlen)))))
|
||||
return TRUE;
|
||||
/* Allow convert from ASCII */
|
||||
if (right->repertoire == MY_REPERTOIRE_ASCII &&
|
||||
@ -1695,7 +1700,7 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname,
|
||||
{
|
||||
Item* conv;
|
||||
uint32 dummy_offset;
|
||||
if (!String::needs_conversion(0, (*arg)->collation.collation,
|
||||
if (!String::needs_conversion(1, (*arg)->collation.collation,
|
||||
coll.collation,
|
||||
&dummy_offset))
|
||||
continue;
|
||||
|
@ -2371,17 +2371,27 @@ String *Item_func_char::val_str(String *str)
|
||||
int32 num=(int32) args[i]->val_int();
|
||||
if (!args[i]->null_value)
|
||||
{
|
||||
char char_num= (char) num;
|
||||
if (num&0xFF000000L) {
|
||||
str->append((char)(num>>24));
|
||||
goto b2;
|
||||
} else if (num&0xFF0000L) {
|
||||
b2: str->append((char)(num>>16));
|
||||
goto b1;
|
||||
} else if (num&0xFF00L) {
|
||||
b1: str->append((char)(num>>8));
|
||||
char tmp[4];
|
||||
if (num & 0xFF000000L)
|
||||
{
|
||||
mi_int4store(tmp, num);
|
||||
str->append(tmp, 4, &my_charset_bin);
|
||||
}
|
||||
else if (num & 0xFF0000L)
|
||||
{
|
||||
mi_int3store(tmp, num);
|
||||
str->append(tmp, 3, &my_charset_bin);
|
||||
}
|
||||
else if (num & 0xFF00L)
|
||||
{
|
||||
mi_int2store(tmp, num);
|
||||
str->append(tmp, 2, &my_charset_bin);
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp[0]= (char) num;
|
||||
str->append(tmp, 1, &my_charset_bin);
|
||||
}
|
||||
str->append(&char_num, 1);
|
||||
}
|
||||
}
|
||||
str->realloc(str->length()); // Add end 0 (for Purify)
|
||||
@ -2769,7 +2779,8 @@ String *Item_func_conv_charset::val_str(String *str)
|
||||
void Item_func_conv_charset::fix_length_and_dec()
|
||||
{
|
||||
collation.set(conv_charset, DERIVATION_IMPLICIT);
|
||||
max_length = args[0]->max_length*conv_charset->mbmaxlen;
|
||||
max_length = args[0]->max_length / args[0]->collation.collation->mbmaxlen *
|
||||
conv_charset->mbmaxlen;
|
||||
}
|
||||
|
||||
void Item_func_conv_charset::print(String *str, enum_query_type query_type)
|
||||
|
@ -412,11 +412,25 @@ bool String::append(const char *s)
|
||||
|
||||
bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
|
||||
{
|
||||
uint32 dummy_offset;
|
||||
uint32 offset;
|
||||
|
||||
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
|
||||
if (needs_conversion(arg_length, cs, str_charset, &offset))
|
||||
{
|
||||
uint32 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
|
||||
uint32 add_length;
|
||||
if ((cs == &my_charset_bin) && offset)
|
||||
{
|
||||
DBUG_ASSERT(str_charset->mbminlen > offset);
|
||||
offset= str_charset->mbminlen - offset; // How many characters to pad
|
||||
add_length= arg_length + offset;
|
||||
if (realloc(str_length + add_length))
|
||||
return TRUE;
|
||||
bzero((char*) Ptr + str_length, offset);
|
||||
memcpy(Ptr + str_length + offset, s, arg_length);
|
||||
str_length+= add_length;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
|
||||
uint dummy_errors;
|
||||
if (realloc(str_length + add_length))
|
||||
return TRUE;
|
||||
@ -966,6 +980,24 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
|
||||
uint pad_length= to_cs->mbminlen - from_offset;
|
||||
bzero(to, pad_length);
|
||||
memmove(to + pad_length, from, from_offset);
|
||||
/*
|
||||
In some cases left zero-padding can create an incorrect character.
|
||||
For example:
|
||||
INSERT INTO t1 (utf32_column) VALUES (0x110000);
|
||||
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
|
||||
The valid characters range is limited to 0x00000000..0x0010FFFF.
|
||||
|
||||
Make sure we didn't pad to an incorrect character.
|
||||
*/
|
||||
if (to_cs->cset->well_formed_len(to_cs,
|
||||
to, to + to_cs->mbminlen, 1,
|
||||
&well_formed_error) !=
|
||||
to_cs->mbminlen)
|
||||
{
|
||||
*from_end_pos= *well_formed_error_pos= from;
|
||||
*cannot_convert_error_pos= NULL;
|
||||
return 0;
|
||||
}
|
||||
nchars--;
|
||||
from+= from_offset;
|
||||
from_length-= from_offset;
|
||||
|
@ -2776,7 +2776,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
|
||||
sql_field->interval_list);
|
||||
List_iterator<String> int_it(sql_field->interval_list);
|
||||
String conv, *tmp;
|
||||
char comma_buf[2];
|
||||
char comma_buf[4]; /* 4 bytes for utf32 */
|
||||
int comma_length= cs->cset->wc_mb(cs, ',', (uchar*) comma_buf,
|
||||
(uchar*) comma_buf +
|
||||
sizeof(comma_buf));
|
||||
|
@ -467,10 +467,11 @@ uint my_instr_mb(CHARSET_INFO *cs,
|
||||
|
||||
/* BINARY collations handlers for MB charsets */
|
||||
|
||||
static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
|
||||
const uchar *s, size_t slen,
|
||||
const uchar *t, size_t tlen,
|
||||
my_bool t_is_prefix)
|
||||
int
|
||||
my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
|
||||
const uchar *s, size_t slen,
|
||||
const uchar *t, size_t tlen,
|
||||
my_bool t_is_prefix)
|
||||
{
|
||||
size_t len=min(slen,tlen);
|
||||
int cmp= memcmp(s,t,len);
|
||||
@ -503,10 +504,11 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
|
||||
0 if strings are equal
|
||||
*/
|
||||
|
||||
static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool diff_if_only_endspace_difference)
|
||||
int
|
||||
my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool diff_if_only_endspace_difference)
|
||||
{
|
||||
const uchar *end;
|
||||
size_t length;
|
||||
@ -562,14 +564,17 @@ static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
|
||||
static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
|
||||
const char *s, const char *t)
|
||||
int
|
||||
my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
|
||||
const char *s, const char *t)
|
||||
{
|
||||
return strcmp(s,t);
|
||||
}
|
||||
|
||||
static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *key, size_t len,ulong *nr1, ulong *nr2)
|
||||
|
||||
void
|
||||
my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *key, size_t len,ulong *nr1, ulong *nr2)
|
||||
{
|
||||
const uchar *pos = key;
|
||||
|
||||
@ -787,10 +792,11 @@ fill_max_and_min:
|
||||
}
|
||||
|
||||
|
||||
static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
|
||||
const char *str,const char *str_end,
|
||||
const char *wildstr,const char *wildend,
|
||||
int escape, int w_one, int w_many)
|
||||
int
|
||||
my_wildcmp_mb_bin(CHARSET_INFO *cs,
|
||||
const char *str,const char *str_end,
|
||||
const char *wildstr,const char *wildend,
|
||||
int escape, int w_one, int w_many)
|
||||
{
|
||||
int result= -1; /* Not found, using wildcards */
|
||||
|
||||
|
1986
strings/ctype-uca.c
1986
strings/ctype-uca.c
File diff suppressed because it is too large
Load Diff
3916
strings/ctype-ucs2.c
3916
strings/ctype-ucs2.c
File diff suppressed because it is too large
Load Diff
1054
strings/ctype-utf8.c
1054
strings/ctype-utf8.c
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user