Backporting WL#1213

config/ac-macros/character_sets.m4:
  - Adding configure definitions for utf8mb4, utf16, utf32
include/config-win.h:
  - Enabling utf8mb4, utf16, utf32 in Windows build
include/m_ctype.h:
  - Adding new flags
  - Adding new shared functions prototypes
mysql-test/include/ctype_datetime.inc:
  - Adding test to check that datetime functions
    work with "real" multibyte character sets.
mysql-test/include/ctype_like.inc:
  - Adding LIKE tests
mysql-test/include/have_utf16.inc:
  New file
mysql-test/include/have_utf32.inc:
  New file
mysql-test/include/have_utf8mb4.inc:
  New file
mysql-test/r/ctype_ldml.result:
  - Adding tests for utf8mb4, utf16, utf32
mysql-test/r/ctype_many.result:
  - Adding tests to check superset/subset relations
    between all Unicode character sets.
mysql-test/r/ctype_utf16.result:
  New file
mysql-test/r/ctype_utf16_uca.result:
  New file
mysql-test/r/ctype_utf32.result:
  New file
mysql-test/r/ctype_utf32_uca.result:
  New file
mysql-test/r/ctype_utf8.result:
  - Adding tests for utf8mn3 alias
mysql-test/r/ctype_utf8mb4.result:
  - Adding tests for utf8mb4
mysql-test/r/have_utf16.require:
  New file
mysql-test/r/have_utf32.require:
  New file
mysql-test/r/have_utf8mb4.require:
  New file
mysql-test/std_data/Index.xml:
  - Adding tests for loadable utf8m4, utf16, utf32 collations
mysql-test/suite/sys_vars/r/character_set_client_basic.result:
  - Adding tests for utf16, utf32.
  - Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_connection_basic.result:
  - Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_database_basic.result:
  - Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result:
  - Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_results_basic.result:
  - Fixing new number of character sets
mysql-test/suite/sys_vars/t/character_set_client_basic.test:
  - Adding tests for new character sets
mysql-test/suite/sys_vars/t/character_set_connection_basic.test:
  - Adding dependency on utf8mb4, utf16, utf32
mysql-test/suite/sys_vars/t/character_set_database_basic.test:
  - Adding dependency on utf8mb4, utf16, utf32
mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test:
  - Adding dependency on utf8mb4, utf16, utf32
mysql-test/suite/sys_vars/t/character_set_results_basic.test:
  - Adding dependency on utf8mb4, utf16, utf32
mysql-test/t/ctype_ldml.test:
  - Adding tests for dynamic utf8mb4, utf16, utf32 collations
mysql-test/t/ctype_many.test:
  - Adding tests to check superset/subset relations
    between all Unicode character sets
mysql-test/t/ctype_utf16.test:
  New file
mysql-test/t/ctype_utf16_uca.test:
  New file
mysql-test/t/ctype_utf32.test:
  New file
mysql-test/t/ctype_utf32_uca.test:
  New file
mysql-test/t/ctype_utf8.test:
  - Adding tests for utf8mb4 alias
mysql-test/t/ctype_utf8mb4.test:
  New file
mysys/charset-def.c:
  - Adding initialization of utf8mb4, utf16, utf32 built-int collations
mysys/charset.c:
  - Adding initialization of utf8mb4, utf16, utf32 dynamic collations
sql/field.cc:
  - Fixing "truncated" error with datetime functions:
    Force conversion in case of non-ascii character sets.
sql/item.cc:
  - Adding superset/subset relation check for utf8mb4/utf8
sql/item_strfunc.cc:
  - Fixing a problem with CHAR(x USING utf32)
sql/sql_string.cc:
  - Fixing problems with zero padding for UTF32
sql/sql_table.cc:
  - Fixing buffer size, to make utf32 comma fit.
strings/ctype-mb.c:
  - Making handlers for multi-byte binary collations public
strings/ctype-uca.c:
  - Adding definitions for utf8mb4, utf16, utf32 UCA collations
strings/ctype-ucs2.c:
  - Adding functions which are shared between ucs2, utf16, utf32
  - Ading utf16 implementation
  - Adding utf32 implementation
strings/ctype-utf8.c:
  - Adding functions shared between utf8 and utf8mb4
  - Adding implementation of utf8mb4
This commit is contained in:
Alexander Barkov 2010-02-24 13:15:34 +04:00
parent d2af6c43c0
commit 8994fad85d
49 changed files with 19441 additions and 1208 deletions

View File

@ -13,11 +13,11 @@ define(CHARSETS_AVAILABLE1,armscii8 ascii big5 cp1250 cp1251 cp1256 cp1257)
define(CHARSETS_AVAILABLE2,cp850 cp852 cp866 cp932 dec8 eucjpms euckr gb2312 gbk geostd8)
define(CHARSETS_AVAILABLE3,greek hebrew hp8 keybcs2 koi8r koi8u)
define(CHARSETS_AVAILABLE4,latin1 latin2 latin5 latin7 macce macroman)
define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8)
define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32)
DEFAULT_CHARSET=latin1
CHARSETS_AVAILABLE="CHARSETS_AVAILABLE0 CHARSETS_AVAILABLE1 CHARSETS_AVAILABLE2 CHARSETS_AVAILABLE3 CHARSETS_AVAILABLE4 CHARSETS_AVAILABLE5"
CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8"
CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32"
AC_DIVERT_POP
@ -50,7 +50,7 @@ AC_ARG_WITH(extra-charsets,
AC_MSG_CHECKING("character sets")
CHARSETS="$default_charset latin1 utf8"
CHARSETS="$default_charset latin1 utf8mb4 utf8"
if test "$extra_charsets" = no; then
CHARSETS="$CHARSETS"
@ -195,8 +195,23 @@ do
AC_DEFINE([USE_MB], [1], [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf8mb4)
AC_DEFINE(HAVE_CHARSET_utf8mb4, 1, [Define to enable utf8mb4])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf8)
AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable ut8])
AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable utf8])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf16)
AC_DEFINE(HAVE_CHARSET_utf16, 1, [Define to enable utf16])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf32)
AC_DEFINE(HAVE_CHARSET_utf32, 1, [Define to enable utf32])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
@ -381,6 +396,48 @@ case $default_charset in
fi
default_charset_collations="$UTFC"
;;
utf8mb4)
default_charset_default_collation="utf8mb4_general_ci"
define(UTFC1, utf8mb4_general_ci utf8mb4_bin)
define(UTFC2, utf8mb4_czech_ci utf8mb4_danish_ci)
define(UTFC3, utf8mb4_esperanto_ci utf8mb4_estonian_ci utf8mb4_hungarian_ci)
define(UTFC4, utf8mb4_icelandic_ci utf8mb4_latvian_ci utf8mb4_lithuanian_ci)
define(UTFC5, utf8mb4_persian_ci utf8mb4_polish_ci utf8mb4_romanian_ci)
define(UTFC6, utf8mb4_sinhala_ci utf8mb4_slovak_ci utf8mb4_slovenian_ci)
define(UTFC7, utf8mb4_spanish2_ci utf8mb4_spanish_ci)
define(UTFC8, utf8mb4_swedish_ci utf8mb4_turkish_ci)
define(UTFC9, utf8mb4_unicode_ci)
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
default_charset_collations="$UTFC"
;;
utf16)
default_charset_default_collation="utf16_general_ci"
define(UTFC1, utf16_general_ci utf16_bin)
define(UTFC2, utf16_czech_ci utf16_danish_ci)
define(UTFC3, utf16_esperanto_ci utf16_estonian_ci utf16_hungarian_ci)
define(UTFC4, utf16_icelandic_ci utf16_latvian_ci utf16_lithuanian_ci)
define(UTFC5, utf16_persian_ci utf16_polish_ci utf16_romanian_ci)
define(UTFC6, utf16_sinhala_ci utf16_slovak_ci utf16_slovenian_ci)
define(UTFC7, utf16_spanish2_ci utf16_spanish_ci)
define(UTFC8, utf16_swedish_ci utf16_turkish_ci)
define(UTFC9, utf16_unicode_ci)
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
default_charset_collations="$UTFC"
;;
utf32)
default_charset_default_collation="utf32_general_ci"
define(UTFC1, utf32_general_ci utf32_bin)
define(UTFC2, utf32_czech_ci utf32_danish_ci)
define(UTFC3, utf32_esperanto_ci utf32_estonian_ci utf32_hungarian_ci)
define(UTFC4, utf32_icelandic_ci utf32_latvian_ci utf32_lithuanian_ci)
define(UTFC5, utf32_persian_ci utf32_polish_ci utf32_romanian_ci)
define(UTFC6, utf32_sinhala_ci utf32_slovak_ci utf32_slovenian_ci)
define(UTFC7, utf32_spanish2_ci utf32_spanish_ci)
define(UTFC8, utf32_swedish_ci utf32_turkish_ci)
define(UTFC9, utf32_unicode_ci)
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
default_charset_collations="$UTFC"
;;
*)
AC_MSG_ERROR([Charset $cs not available. (Available are: $CHARSETS_AVAILABLE).
See the Installation chapter in the Reference Manual.])

View File

@ -432,6 +432,9 @@ inline ulonglong double2ulonglong(double d)
#define HAVE_CHARSET_ucs2 1
#define HAVE_CHARSET_ujis 1
#define HAVE_CHARSET_utf8 1
#define HAVE_CHARSET_utf8mb4 1
#define HAVE_CHARSET_utf16 1
#define HAVE_CHARSET_utf32 1
#define HAVE_UCA_COLLATIONS 1
#define HAVE_BOOL 1

View File

@ -98,13 +98,14 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_CS_BINSORT 16 /* if binary sort order */
#define MY_CS_PRIMARY 32 /* if primary collation */
#define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
#define MY_CS_UNICODE 128 /* is a charset is full unicode */
#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */
#define MY_CS_READY 256 /* if a charset is initialized */
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
#define MY_CS_HIDDEN 2048 /* don't display in SHOW */
#define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */
#define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
#define MY_CHARSET_UNDEFINED 0
/* Character repertoire flags */
@ -112,7 +113,6 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
#define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */
typedef struct my_uni_idx_st
{
uint16 from;
@ -304,10 +304,14 @@ typedef struct charset_info_st
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
extern CHARSET_INFO my_charset_big5_chinese_ci;
extern CHARSET_INFO my_charset_big5_bin;
extern CHARSET_INFO my_charset_cp932_japanese_ci;
extern CHARSET_INFO my_charset_cp932_bin;
extern CHARSET_INFO my_charset_cp1250_czech_ci;
extern CHARSET_INFO my_charset_eucjpms_japanese_ci;
extern CHARSET_INFO my_charset_eucjpms_bin;
extern CHARSET_INFO my_charset_euckr_korean_ci;
@ -316,7 +320,6 @@ extern CHARSET_INFO my_charset_gb2312_chinese_ci;
extern CHARSET_INFO my_charset_gb2312_bin;
extern CHARSET_INFO my_charset_gbk_chinese_ci;
extern CHARSET_INFO my_charset_gbk_bin;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
extern CHARSET_INFO my_charset_latin1_german2_ci;
extern CHARSET_INFO my_charset_latin1_bin;
extern CHARSET_INFO my_charset_latin2_czech_ci;
@ -329,11 +332,22 @@ extern CHARSET_INFO my_charset_ucs2_bin;
extern CHARSET_INFO my_charset_ucs2_unicode_ci;
extern CHARSET_INFO my_charset_ujis_japanese_ci;
extern CHARSET_INFO my_charset_ujis_bin;
extern CHARSET_INFO my_charset_utf16_bin;
extern CHARSET_INFO my_charset_utf16_general_ci;
extern CHARSET_INFO my_charset_utf16_unicode_ci;
extern CHARSET_INFO my_charset_utf32_bin;
extern CHARSET_INFO my_charset_utf32_general_ci;
extern CHARSET_INFO my_charset_utf32_unicode_ci;
extern CHARSET_INFO my_charset_utf8_general_ci;
extern CHARSET_INFO my_charset_utf8_unicode_ci;
extern CHARSET_INFO my_charset_utf8_bin;
extern CHARSET_INFO my_charset_cp1250_czech_ci;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
extern CHARSET_INFO my_charset_utf8mb4_bin;
extern CHARSET_INFO my_charset_utf8mb4_general_ci;
extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
#define MY_UTF8MB3 "utf8"
#define MY_UTF8MB4 "utf8mb4"
/* declarations for simple charsets */
extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
@ -430,6 +444,19 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf16(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf32(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
int my_wildcmp_8bit(CHARSET_INFO *,
const char *str,const char *str_end,
@ -480,6 +507,31 @@ uint my_instr_mb(struct charset_info_st *,
const char *s, size_t s_length,
my_match_t *match, uint nmatch);
int my_strnncoll_mb_bin(CHARSET_INFO * cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix);
int my_strnncollsp_mb_bin(CHARSET_INFO *cs,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference);
int my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many);
int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t);
void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2);
size_t my_strnxfrm_unicode(CHARSET_INFO *,
uchar *dst, size_t dstlen,
const uchar *src, size_t srclen);
int my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr, const char *wildend,

View File

@ -0,0 +1,11 @@
#
# Bug#32390 Character sets: casting utf32 to/from date doesn't work
#
CREATE TABLE t1 AS SELECT repeat('a',20) AS s1 LIMIT 0;
SET timestamp=1216359724;
INSERT INTO t1 VALUES (current_date);
INSERT INTO t1 VALUES (current_time);
INSERT INTO t1 VALUES (current_timestamp);
SELECT s1, hex(s1) FROM t1;
DROP TABLE t1;
SET timestamp=0;

View File

@ -0,0 +1,50 @@
select @@collation_connection;
#
# Create a table with a nullable varchar(10) column
# using currect character_set_connection.
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
explain select * from t1 where a like concat('abc','%');
select * from t1 where a like "abc%";
select * from t1 where a like concat("abc","%");
select * from t1 where a like "ABC%";
select * from t1 where a like "test%";
select * from t1 where a like "te_t";
select * from t1 where a like "%a%";
select * from t1 where a like "%abcd%";
select * from t1 where a like "%abc\d%";
drop table t1;
#
# Bug #2619 ucs2 LIKE comparison fails in some cases
#
select 'AA' like 'AA';
select 'AA' like 'A%A';
select 'AA' like 'A%%A';
select 'AA' like 'AA%';
select 'AA' like '%AA%';
select 'AA' like '%A';
select 'AA' like '%AA';
select 'AA' like 'A%A%';
select 'AA' like '_%_%';
select 'AA' like '%A%A';
select 'AAA'like 'A%A%A';
select 'AZ' like 'AZ';
select 'AZ' like 'A%Z';
select 'AZ' like 'A%%Z';
select 'AZ' like 'AZ%';
select 'AZ' like '%AZ%';
select 'AZ' like '%Z';
select 'AZ' like '%AZ';
select 'AZ' like 'A%Z%';
select 'AZ' like '_%_%';
select 'AZ' like '%A%Z';
select 'AZ' like 'A_';
select 'AZ' like '_Z';
select 'AMZ'like 'A%M%Z';

View File

@ -0,0 +1,4 @@
-- require r/have_utf16.require
disable_query_log;
show collation like 'utf16_general_ci';
enable_query_log;

View File

@ -0,0 +1,4 @@
-- require r/have_utf32.require
disable_query_log;
show collation like 'utf32_general_ci';
enable_query_log;

View File

@ -0,0 +1,7 @@
--require r/have_utf8mb4.require
--disable_query_log
SHOW COLLATION LIKE 'utf8mb4_general_ci';
--enable_query_log

View File

@ -53,6 +53,33 @@ select * from t1 where c1='b';
c1
a
drop table t1;
show collation like 'utf8mb4_test_ci';
Collation Charset Id Default Compiled Sortlen
utf8mb4_test_ci utf8mb4 326 8
create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
c1
a
drop table t1;
show collation like 'utf16_test_ci';
Collation Charset Id Default Compiled Sortlen
utf16_test_ci utf16 327 8
create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
c1
a
drop table t1;
show collation like 'utf32_test_ci';
Collation Charset Id Default Compiled Sortlen
utf32_test_ci utf32 391 8
create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
c1
a
drop table t1;
CREATE TABLE t1 (
col1 varchar(100) character set utf8 collate utf8_test_ci
);
@ -373,16 +400,22 @@ select "foo" = "foo " collate latin1_test;
The following tests check that two-byte collation IDs work
select * from information_schema.collations where id>256 order by id;
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
utf8mb4_test_ci utf8mb4 326 8
utf16_test_ci utf16 327 8
utf8_phone_ci utf8 352 8
utf8_test_ci utf8 353 8
ucs2_test_ci ucs2 358 8
ucs2_vn_ci ucs2 359 8
utf32_test_ci utf32 391 8
utf8_maxuserid_ci utf8 2047 8
show collation like '%test%';
Collation Charset Id Default Compiled Sortlen
latin1_test latin1 99 Yes 1
utf8_test_ci utf8 353 8
ucs2_test_ci ucs2 358 8
utf8mb4_test_ci utf8mb4 326 8
utf16_test_ci utf16 327 8
utf32_test_ci utf32 391 8
show collation like 'ucs2_vn_ci';
Collation Charset Id Default Compiled Sortlen
ucs2_vn_ci ucs2 359 8

View File

@ -1683,3 +1683,59 @@ ARMENIAN CAPIT DA 2
ARMENIAN CAPIT ECH 2
ARMENIAN CAPIT ZA 2
DROP TABLE t1;
#
# WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
# Testing that only utf8mb4 is superset for utf8
# No other Unicode character set pairs have superset/subset relations
#
CREATE TABLE t1 (
utf8 CHAR CHARACTER SET utf8,
utf8mb4 CHAR CHARACTER SET utf8mb4,
ucs2 CHAR CHARACTER SET ucs2,
utf16 CHAR CHARACTER SET utf16,
utf32 CHAR CHARACTER SET utf32
);
INSERT INTO t1 VALUES ('','','','','');
SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1;
CHARSET(CONCAT(utf8, utf8mb4))
utf8mb4
SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1;
CHARSET(CONCAT(utf8mb4, utf8))
utf8mb4
SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1;
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1;
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1;
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1;
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1;
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1;
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1;
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1;
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1;
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1;
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1;
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
DROP TABLE t1;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1899,6 +1899,20 @@ CONVERT(a, CHAR) CONVERT(b, CHAR)
DROP TABLE t1;
End of 5.0 tests
Start of 5.4 tests
SET NAMES utf8mb3;
SHOW VARIABLES LIKE 'character_set_results%';
Variable_name Value
character_set_results utf8
CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(1) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
SELECT _utf8mb3'test';
test
test
CREATE TABLE t1 (
clipid INT NOT NULL,
Tape TINYTEXT,

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Collation Charset Id Default Compiled Sortlen
utf16_general_ci utf16 54 Yes Yes 1

View File

@ -0,0 +1,2 @@
Collation Charset Id Default Compiled Sortlen
utf32_general_ci utf32 60 Yes Yes 1

View File

@ -0,0 +1,2 @@
Collation Charset Id Default Compiled Sortlen
utf8mb4_general_ci utf8mb4 45 Yes Yes 1

View File

@ -33,6 +33,36 @@
</collation>
</charset>
<charset name="utf8mb4">
<collation name="utf8mb4_test_ci" id="326">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
</charset>
<charset name="utf16">
<collation name="utf16_test_ci" id="327">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
</charset>
<charset name="utf32">
<collation name="utf32_test_ci" id="391">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
</charset>
<charset name="ucs2">
<collation name="ucs2_test_ci" id="358">
<rules>

View File

@ -162,8 +162,16 @@ SET @@character_set_client = utf8;
SELECT @@character_set_client;
@@character_set_client
utf8
SET @@character_set_client = utf8mb4;
SELECT @@character_set_client;
@@character_set_client
utf8mb4
SET @@character_set_client = ucs2;
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'ucs2'
SET @@character_set_client = utf16;
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf16'
SET @@character_set_client = utf32;
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf32'
SET @@character_set_client = cp866;
SELECT @@character_set_client;
@@character_set_client
@ -422,7 +430,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_010_10-------------------------#'
SET @@character_set_client = abc;
ERROR 42000: Unknown character set: 'abc'

View File

@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_011_10-------------------------#'
SET @@character_set_connection = abc;
ERROR 42000: Unknown character set: 'abc'

View File

@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_012_10-------------------------#'
SET @@character_set_database = "grek";
ERROR 42000: Unknown character set: 'grek'

View File

@ -402,7 +402,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_008_10-------------------------#'
SET @@character_set_filesystem = abc;
ERROR 42000: Unknown character set: 'abc'

View File

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc
###################################################
@ -163,9 +166,15 @@ SET @@character_set_client = armscii8;
SELECT @@character_set_client;
SET @@character_set_client = utf8;
SELECT @@character_set_client;
SET @@character_set_client = utf8mb4;
SELECT @@character_set_client;
--error ER_WRONG_VALUE_FOR_VAR
SET @@character_set_client = ucs2;
--error ER_WRONG_VALUE_FOR_VAR
SET @@character_set_client = utf16;
--error ER_WRONG_VALUE_FOR_VAR
SET @@character_set_client = utf32;
SET @@character_set_client = cp866;
SELECT @@character_set_client;

View File

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc
###################################################

View File

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc
###################################################

View File

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc

View File

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc
################################################

View File

@ -1,4 +1,7 @@
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--disable_warnings
drop table if exists t1;
@ -40,6 +43,24 @@ insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
show collation like 'utf8mb4_test_ci';
create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
show collation like 'utf16_test_ci';
create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
show collation like 'utf32_test_ci';
create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
#
# Bug#41084 full-text index added to custom UCA collation not working

View File

@ -1,4 +1,7 @@
-- source include/have_ucs2.inc
-- source include/have_utf8mb4.inc
-- source include/have_utf16.inc
-- source include/have_utf32.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
@ -211,3 +214,73 @@ SELECT min(comment),count(*) FROM t1 GROUP BY ucs2_f;
DROP TABLE t1;
# End of 4.1 tests
--echo #
--echo # WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
--echo # Testing that only utf8mb4 is superset for utf8
--echo # No other Unicode character set pairs have superset/subset relations
--echo #
CREATE TABLE t1 (
utf8 CHAR CHARACTER SET utf8,
utf8mb4 CHAR CHARACTER SET utf8mb4,
ucs2 CHAR CHARACTER SET ucs2,
utf16 CHAR CHARACTER SET utf16,
utf32 CHAR CHARACTER SET utf32
);
INSERT INTO t1 VALUES ('','','','','');
# utf8mb4 is superset only for utf8
SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1;
# utf8mb4 is superset only for utf8
SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1;
# ucs2 is not a superset for the other Unicode character sets
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1;
# utf16 is not a superset for the other Unicode character sets
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1;
# utf32 is not a superset for the other Unicode character sets
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
DROP TABLE t1;

View File

@ -0,0 +1,731 @@
-- source include/have_utf16.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
--echo #
--echo # Start of 5.5 tests
--echo #
SET NAMES latin1;
SET character_set_connection=utf16;
select hex('a'), hex('a ');
-- source include/endspace.inc
# Check that incomplete utf16 characters in HEX notation
# are left-padded with zeros
#
select hex(_utf16 0x44);
select hex(_utf16 0x3344);
select hex(_utf16 0x113344);
# Check that 0x20 is only trimmed when it is
# a part of real SPACE character, not just a part
# of a multibyte sequence.
# Note, CYRILLIC LETTER ER is used as an example, which
# is stored as 0x0420 in utf16, thus contains 0x20 in the
# low byte. The second character is THREE-PER-M, U+2004,
# which contains 0x20 in the high byte.
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16;
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
SELECT hex(word) FROM t1 ORDER BY word;
SELECT hex(word2) FROM t1 ORDER BY word2;
DELETE FROM t1;
#
# Check that real spaces are correctly trimmed.
#
INSERT INTO t1 VALUES (X'042000200020',X'042000200020'), (X'200400200020', X'200400200020');
SELECT hex(word) FROM t1 ORDER BY word;
SELECT hex(word2) FROM t1 ORDER BY word2;
DROP TABLE t1;
#
# Check LPAD/RPAD
#
SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'0421'));
SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'04210422'));
SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'042104220423'));
SELECT hex(LPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423'));
SELECT hex(LPAD(_utf16 X'D800DC00', 10, _utf16 X'0421'));
SELECT hex(LPAD(_utf16 X'0421', 10, _utf16 X'D800DC00'));
SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'0421'));
SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'04210422'));
SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'042104220423'));
SELECT hex(RPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423'));
SELECT hex(RPAD(_utf16 X'D800DC00', 10, _utf16 X'0421'));
SELECT hex(RPAD(_utf16 X'0421', 10, _utf16 X'D800DC00'));
CREATE TABLE t1 SELECT
LPAD(_utf16 X'0420',10,_utf16 X'0421') l,
RPAD(_utf16 X'0420',10,_utf16 X'0421') r;
SHOW CREATE TABLE t1;
select hex(l), hex(r) from t1;
DROP TABLE t1;
create table t1 (f1 char(30));
insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
select lpad(f1, 12, "-o-/") from t1;
drop table t1;
######################################################
#
# Test of like
#
SET NAMES latin1;
SET character_set_connection=utf16;
--source include/ctype_like.inc
SET NAMES utf8;
SET character_set_connection=utf16;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16);
INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf16_bin ORDER BY BINARY a;
DROP TABLE t1;
CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
ENGINE=MyISAM CHARACTER SET utf16;
INSERT INTO t1 (word) VALUES ("cat");
SELECT * FROM t1 WHERE word LIKE "c%";
SELECT * FROM t1 WHERE word LIKE "ca_";
SELECT * FROM t1 WHERE word LIKE "cat";
SELECT * FROM t1 WHERE word LIKE _utf16 x'00630025'; # "c%"
SELECT * FROM t1 WHERE word LIKE _utf16 x'00630061005F'; # "ca_"
DROP TABLE t1;
#
# Check that INSERT() works fine.
# This invokes charpos() function.
select insert(_utf16 0x006100620063,10,2,_utf16 0x006400650066);
select insert(_utf16 0x006100620063,1,2,_utf16 0x006400650066);
########################################################
#
# Bug 1264
#
# Description:
#
# When using a ucs2 table in MySQL,
# either with ucs2_general_ci or ucs2_bin collation,
# words are returned in an incorrect order when using ORDER BY
# on an _indexed_ CHAR or VARCHAR column. They are sorted with
# the longest word *first* instead of last. I.E. The word "aardvark"
# is in the results before the word "a".
#
# If there is no index for the column, the problem does not occur.
#
# Interestingly, if there is no second column, the words are returned
# in the correct order.
#
# According to EXPLAIN, it looks like when the output includes columns that
# are not part of the index sorted on, it does a filesort, which fails.
# Using a straight index yields correct results.
SET NAMES latin1;
#
# Two fields, index
#
CREATE TABLE t1 (
word VARCHAR(64),
bar INT(11) default 0,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf16
COLLATE utf16_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER by word;
DROP TABLE t1;
#
# One field, index
#
CREATE TABLE t1 (
word VARCHAR(64) ,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf16
COLLATE utf16_general_ci;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY WORD;
SELECT * FROM t1 ORDER BY word;
DROP TABLE t1;
#
# Two fields, no index
#
CREATE TABLE t1 (
word TEXT,
bar INT(11) AUTO_INCREMENT,
PRIMARY KEY (bar))
ENGINE=MyISAM
CHARSET utf16
COLLATE utf16_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a" );
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER BY word;
DROP TABLE t1;
#
# END OF Bug 1264 test
#
########################################################
#
# Check alignment for from-binary-conversion with CAST and CONVERT
#
SELECT hex(cast(0xAA as char character set utf16));
SELECT hex(convert(0xAA using utf16));
#
# Check alignment for string types
#
CREATE TABLE t1 (a char(10) character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a varchar(10) character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a text character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a mediumtext character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a longtext character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
##
## Bug #5024 Server crashes with queries on fields
## with certain charset/collation settings
##
##
#create table t1 (s1 char character set utf16 collate utf16_czech_ci);
#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
#select s1 from t1 where s1 > 'a' order by s1;
#drop table t1;
#
#
# Bug #5081 : UCS2 fields are filled with '0x2020'
# after extending field length
#
create table t1(a char(1)) default charset utf16;
insert into t1 values ('a'),('b'),('c');
alter table t1 modify a char(5);
select a, hex(a) from t1;
drop table t1;
#
# Check prepare statement from an UTF16 string
#
set @ivar= 1234;
set @str1 = 'select ?';
set @str2 = convert(@str1 using utf16);
prepare stmt1 from @str2;
execute stmt1 using @ivar;
#
# Check that utf16 works with ENUM and SET type
#
set names utf8;
create table t1 (a enum('x','y','z') character set utf16);
show create table t1;
insert into t1 values ('x');
insert into t1 values ('y');
insert into t1 values ('z');
select a, hex(a) from t1 order by a;
alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf16;
show create table t1;
insert into t1 values ('D');
insert into t1 values ('E ');
insert into t1 values ('ä');
insert into t1 values ('ö');
insert into t1 values ('ü');
select a, hex(a) from t1 order by a;
drop table t1;
create table t1 (a set ('x','y','z','ä','ö','ü') character set utf16);
show create table t1;
insert into t1 values ('x');
insert into t1 values ('y');
insert into t1 values ('z');
insert into t1 values ('x,y');
insert into t1 values ('x,y,z,ä,ö,ü');
select a, hex(a) from t1 order by a;
drop table t1;
#
# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
#
create table t1(a enum('a','b','c')) default character set utf16;
insert into t1 values('a'),('b'),('c');
alter table t1 add b char(1);
show warnings;
select * from t1 order by a;
drop table t1;
SET NAMES latin1;
SET collation_connection='utf16_general_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
SET NAMES latin1;
SET collation_connection='utf16_bin';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
#
# Bug#10344 Some string functions fail for UCS2
#
select hex(substr(_utf16 0x00e400e50068,1));
select hex(substr(_utf16 0x00e400e50068,2));
select hex(substr(_utf16 0x00e400e50068,3));
select hex(substr(_utf16 0x00e400e50068,-1));
select hex(substr(_utf16 0x00e400e50068,-2));
select hex(substr(_utf16 0x00e400e50068,-3));
select hex(substr(_utf16 0x00e400e5D800DC00,1));
select hex(substr(_utf16 0x00e400e5D800DC00,2));
select hex(substr(_utf16 0x00e400e5D800DC00,3));
select hex(substr(_utf16 0x00e400e5D800DC00,-1));
select hex(substr(_utf16 0x00e400e5D800DC00,-2));
select hex(substr(_utf16 0x00e400e5D800DC00,-3));
SET NAMES latin1;
##
## Bug#8235
##
## This bug also helped to find another problem that
## INSERT of a UCS2 string containing a negative number
## into a unsigned int column didn't produce warnings.
## This test covers both problems.
##
##SET collation_connection='ucs2_swedish_ci';
##CREATE TABLE t1 (Field1 int(10) default '0');
### no warnings, negative numbers are allowed
##INSERT INTO t1 VALUES ('-1');
##SELECT * FROM t1;
##DROP TABLE t1;
##CREATE TABLE t1 (Field1 int(10) unsigned default '0');
### this should generate a "Data truncated" warning
##INSERT INTO t1 VALUES ('-1');
##DROP TABLE t1;
##SET NAMES latin1;
###
### Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
###
##--disable_warnings
##create table t1(f1 varchar(5) CHARACTER SET utf16 COLLATE utf16_bin NOT NULL) engine=InnoDB;
##--enable_warnings
##insert into t1 values('a');
##create index t1f1 on t1(f1);
##select f1 from t1 where f1 like 'a%';
##drop table t1;
#
# Bug#9442 Set parameter make query fail if column character set is UCS2
#
create table t1 (utext varchar(20) character set utf16);
insert into t1 values ("lily");
insert into t1 values ("river");
prepare stmt from 'select utext from t1 where utext like ?';
set @param1='%%';
execute stmt using @param1;
execute stmt using @param1;
select utext from t1 where utext like '%%';
drop table t1;
deallocate prepare stmt;
#
# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
#
create table t1 (
a char(10) character set utf16 not null,
index a (a)
) engine=myisam;
insert into t1 values (repeat(0x201f, 10));
insert into t1 values (repeat(0x2020, 10));
insert into t1 values (repeat(0x2021, 10));
# make sure "index read" is used
explain select hex(a) from t1 order by a;
select hex(a) from t1 order by a;
alter table t1 drop index a;
select hex(a) from t1 order by a;
drop table t1;
##
## Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
## over a 'ucs2' field uses a temporary table
##
##CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
##INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
##SELECT id, MIN(s) FROM t1 GROUP BY id;
##DROP TABLE t1;
###
### Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
###
##
##--disable_warnings
##drop table if exists bug20536;
##--enable_warnings
##
##set names latin1;
##create table bug20536 (id bigint not null auto_increment primary key, name
##varchar(255) character set ucs2 not null);
##insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
##select md5(name) from bug20536;
##select sha1(name) from bug20536;
##select make_set(3, name, upper(name)) from bug20536;
##select export_set(5, name, upper(name)) from bug20536;
##select export_set(5, name, upper(name), ",", 5) from bug20536;
#
# Bug #20108: corrupted default enum value for a ucs2 field
#
CREATE TABLE t1 (
status enum('active','passive') character set utf16 collate utf16_general_ci
NOT NULL default 'passive'
);
SHOW CREATE TABLE t1;
ALTER TABLE t1 ADD a int NOT NULL AFTER status;
SHOW CREATE TABLE t1;
DROP TABLE t1;
##CREATE TABLE t2 (
## status enum('active','passive') collate ucs2_turkish_ci
## NOT NULL default 'passive'
##);
##SHOW CREATE TABLE t2;
##ALTER TABLE t2 ADD a int NOT NULL AFTER status;
##DROP TABLE t2;
--echo End of 4.1 tests
#
# Conversion from an UTF16 string to a decimal column
#
CREATE TABLE t1 (a varchar(64) character set utf16, b decimal(10,3));
INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
update t1 set b=a;
SELECT *, hex(a) FROM t1;
DROP TABLE t1;
#
# Bug#9442 Set parameter make query fail if column character set is UCS2
#
create table t1 (utext varchar(20) character set utf16);
insert into t1 values ("lily");
insert into t1 values ("river");
prepare stmt from 'select utext from t1 where utext like ?';
set @param1='%%';
execute stmt using @param1;
execute stmt using @param1;
select utext from t1 where utext like '%%';
drop table t1;
deallocate prepare stmt;
#
# Bug#22638 SOUNDEX broken for international characters
#
set names latin1;
set character_set_connection=utf16;
select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
select 'mood' sounds like 'mud';
# Cyrillic A, BE, VE
select hex(soundex(_utf16 0x041004110412));
# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
select hex(soundex(_utf16 0x00BF00C0));
set names latin1;
#
# Bug #14290: character_maximum_length for text fields
#
create table t1(a blob, b text charset utf16);
select data_type, character_octet_length, character_maximum_length
from information_schema.columns where table_name='t1';
drop table t1;
set names latin1;
set collation_connection=utf16_general_ci;
#
# Testing cs->coll->instr()
#
select position('bb' in 'abba');
#
# Testing cs->coll->hash_sort()
#
create table t1 (a varchar(10) character set utf16) engine=heap;
insert into t1 values ('a'),('A'),('b'),('B');
select * from t1 where a='a' order by binary a;
select hex(min(binary a)),count(*) from t1 group by a;
drop table t1;
#
# Testing cs->cset->numchars()
#
select char_length('abcd'), octet_length('abcd');
select char_length(_utf16 0xD800DC00), octet_length(_utf16 0xD800DC00);
select char_length(_utf16 0xD87FDFFF), octet_length(_utf16 0xD87FDFFF);
#
# Testing cs->cset->charpos()
#
select left('abcd',2);
select hex(left(_utf16 0xD800DC00D87FDFFF, 1));
select hex(right(_utf16 0xD800DC00D87FDFFF, 1));
#
# Testing cs->cset->well_formed_length()
#
create table t1 (a varchar(10) character set utf16);
# Bad sequences
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xD800);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xDC00);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xD800D800);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xD800E800);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xD8000800);
# Good sequences
insert into t1 values (_utf16 0xD800DC00);
insert into t1 values (_utf16 0xD800DCFF);
insert into t1 values (_utf16 0xDBFFDC00);
insert into t1 values (_utf16 0xDBFFDCFF);
select hex(a) from t1;
drop table t1;
#
# Bug#32393 Character sets: illegal characters in utf16 columns
#
# Tests that cs->cset->wc_mb() doesn't accept surrogate parts
#
# via alter
#
create table t1 (s1 varchar(50) character set ucs2);
insert into t1 values (0xdf84);
alter table t1 modify column s1 varchar(50) character set utf16;
select hex(s1) from t1;
drop table t1;
#
# via update
#
create table t1 (s1 varchar(5) character set ucs2, s2 varchar(5) character set utf16);
insert into t1 (s1) values (0xdf84);
update t1 set s2 = s1;
select hex(s2) from t1;
drop table t1;
#
# Testing cs->cset->lengthsp()
#
create table t1 (a char(10)) character set utf16;
insert into t1 values ('a ');
select hex(a) from t1;
drop table t1;
#
# Testing cs->cset->caseup() and cs->cset->casedn()
#
select upper('abcd'), lower('ABCD');
#
# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf16
# Testing cs->cset->snprintf()
#
#create table t1 (a date);
#insert into t1 values ('2007-09-16');
#select * from t1;
#drop table t1;
#
# Testing cs->cset->l10tostr
# !!! Not used in the code
#
# Testing cs->cset->ll10tostr
#
create table t1 (a varchar(10) character set utf16);
insert into t1 values (123456);
select a, hex(a) from t1;
drop table t1;
# Testing cs->cset->fill
# SOUNDEX fills strings with DIGIT ZERO up to four characters
select hex(soundex('a'));
#
# Testing cs->cset->strntol
# !!! Not used in the code
#
# Testing cs->cset->strntoul
#
create table t1 (a enum ('a','b','c')) character set utf16;
insert into t1 values ('1');
select * from t1;
drop table t1;
#
# Testing cs->cset->strntoll and cs->cset->strntoull
#
set names latin1;
select hex(conv(convert('123' using utf16), -10, 16));
select hex(conv(convert('123' using utf16), 10, 16));
#
# Testing cs->cset->strntod
#
set names latin1;
set character_set_connection=utf16;
select 1.1 + '1.2';
select 1.1 + '1.2xxx';
# Testing strntoll10_utf16
# Testing cs->cset->strtoll10
select left('aaa','1');
#
# Testing cs->cset->strntoull10rnd
#
create table t1 (a int);
insert into t1 values ('-1234.1e2');
insert into t1 values ('-1234.1e2xxxx');
insert into t1 values ('-1234.1e2 ');
select * from t1;
drop table t1;
#
# Testing cs->cset->scan
#
create table t1 (a int);
insert into t1 values ('1 ');
insert into t1 values ('1 x');
select * from t1;
drop table t1;
#
# Testing auto-conversion to TEXT
#
create table t1 (a varchar(17000) character set utf16);
show create table t1;
drop table t1;
#
# Testing that maximim possible key length is 1000 bytes
#
create table t1 (a varchar(250) character set utf16 primary key);
show create table t1;
drop table t1;
--error ER_TOO_LONG_KEY
create table t1 (a varchar(334) character set utf16 primary key);
#
# Conversion to utf8
#
create table t1 (a char(1) character set utf16);
insert into t1 values (0xD800DC00),(0xD800DCFF),(0xDB7FDC00),(0xDB7FDCFF);
insert into t1 values (0x00C0), (0x00FF),(0xE000), (0xFFFF);
select hex(a), hex(@a:=convert(a using utf8mb4)), hex(convert(@a using utf16)) from t1;
drop table t1;
#
# Test basic regex functionality
#
set collation_connection=utf16_general_ci;
--source include/ctype_regex.inc
set names latin1;
#
# Test how character set works with date/time
#
SET collation_connection=utf16_general_ci;
--source include/ctype_datetime.inc
SET NAMES latin1;
#
# Bug#33073 Character sets: ordering fails with utf32
#
SET collation_connection=utf16_general_ci;
CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0;
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
SELECT * FROM t1 ORDER BY s1;
SET max_sort_length=4;
SELECT * FROM t1 ORDER BY s1;
DROP TABLE t1;
SET max_sort_length=DEFAULT;
SET NAMES latin1;
#
## TODO: add tests for all engines
#
--echo #
--echo # End of 5.5 tests
--echo #

View File

@ -0,0 +1,290 @@
-- source include/have_utf16.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
--echo #
--echo # Start of 5.5 tests
--echo #
set names utf8;
set collation_connection=utf16_unicode_ci;
select hex('a'), hex('a ');
-- source include/endspace.inc
#
# Bug #6787 LIKE not working properly with _ and utf8 data
#
select 'c' like '\_' as want0;
#
# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters
#
CREATE TABLE t (
c char(20) NOT NULL
) ENGINE=MyISAM DEFAULT CHARACTER SET utf16 COLLATE utf16_unicode_ci;
INSERT INTO t VALUES ('a'),('ab'),('aba');
ALTER TABLE t ADD INDEX (c);
SELECT c FROM t WHERE c LIKE 'a%';
DROP TABLE t;
create table t1 (c1 char(10) character set utf16 collate utf16_bin);
#
# Basic Latin
#
insert into t1 values ('A'),('a');
insert into t1 values ('B'),('b');
insert into t1 values ('C'),('c');
insert into t1 values ('D'),('d');
insert into t1 values ('E'),('e');
insert into t1 values ('F'),('f');
insert into t1 values ('G'),('g');
insert into t1 values ('H'),('h');
insert into t1 values ('I'),('i');
insert into t1 values ('J'),('j');
insert into t1 values ('K'),('k');
insert into t1 values ('L'),('l');
insert into t1 values ('M'),('m');
insert into t1 values ('N'),('n');
insert into t1 values ('O'),('o');
insert into t1 values ('P'),('p');
insert into t1 values ('Q'),('q');
insert into t1 values ('R'),('r');
insert into t1 values ('S'),('s');
insert into t1 values ('T'),('t');
insert into t1 values ('U'),('u');
insert into t1 values ('V'),('v');
insert into t1 values ('W'),('w');
insert into t1 values ('X'),('x');
insert into t1 values ('Y'),('y');
insert into t1 values ('Z'),('z');
#
# Latin1 suppliment
#
insert into t1 values (0x00e0),(0x00c0);
insert into t1 values (0x00e1),(0x00c1);
insert into t1 values (0x00e2),(0x00c2);
insert into t1 values (0x00e3),(0x00c3);
insert into t1 values (0x00e4),(0x00c4);
insert into t1 values (0x00e5),(0x00c5);
insert into t1 values (0x00e6),(0x00c6);
insert into t1 values (0x00e7),(0x00c7);
insert into t1 values (0x00e8),(0x00c8);
insert into t1 values (0x00e9),(0x00c9);
insert into t1 values (0x00ea),(0x00ca);
insert into t1 values (0x00eb),(0x00cb);
insert into t1 values (0x00ec),(0x00cc);
insert into t1 values (0x00ed),(0x00cd);
insert into t1 values (0x00ee),(0x00ce);
insert into t1 values (0x00ef),(0x00cf);
insert into t1 values (0x00f0),(0x00d0);
insert into t1 values (0x00f1),(0x00d1);
insert into t1 values (0x00f2),(0x00d2);
insert into t1 values (0x00f3),(0x00d3);
insert into t1 values (0x00f4),(0x00d4);
insert into t1 values (0x00f5),(0x00d5);
insert into t1 values (0x00f6),(0x00d6);
insert into t1 values (0x00f7),(0x00d7);
insert into t1 values (0x00f8),(0x00d8);
insert into t1 values (0x00f9),(0x00d9);
insert into t1 values (0x00fa),(0x00da);
insert into t1 values (0x00fb),(0x00db);
insert into t1 values (0x00fc),(0x00dc);
insert into t1 values (0x00fd),(0x00dd);
insert into t1 values (0x00fe),(0x00de);
insert into t1 values (0x00ff),(0x00df);
#
# Latin extended-A, 0100-017F
#
insert into t1 values (0x0100),(0x0101),(0x0102),(0x0103);
insert into t1 values (0x0104),(0x0105),(0x0106),(0x0107);
insert into t1 values (0x0108),(0x0109),(0x010a),(0x010b);
insert into t1 values (0x010c),(0x010d),(0x010e),(0x010f);
insert into t1 values (0x0110),(0x0111),(0x0112),(0x0113);
insert into t1 values (0x0114),(0x0115),(0x0116),(0x0117);
insert into t1 values (0x0118),(0x0119),(0x011a),(0x011b);
insert into t1 values (0x011c),(0x011d),(0x011e),(0x011f);
insert into t1 values (0x0120),(0x0121),(0x0122),(0x0123);
insert into t1 values (0x0124),(0x0125),(0x0126),(0x0127);
insert into t1 values (0x0128),(0x0129),(0x012a),(0x012b);
insert into t1 values (0x012c),(0x012d),(0x012e),(0x012f);
insert into t1 values (0x0130),(0x0131),(0x0132),(0x0133);
insert into t1 values (0x0134),(0x0135),(0x0136),(0x0137);
insert into t1 values (0x0138),(0x0139),(0x013a),(0x013b);
insert into t1 values (0x013c),(0x013d),(0x013e),(0x013f);
insert into t1 values (0x0140),(0x0141),(0x0142),(0x0143);
insert into t1 values (0x0144),(0x0145),(0x0146),(0x0147);
insert into t1 values (0x0148),(0x0149),(0x014a),(0x014b);
insert into t1 values (0x014c),(0x014d),(0x014e),(0x014f);
insert into t1 values (0x0150),(0x0151),(0x0152),(0x0153);
insert into t1 values (0x0154),(0x0155),(0x0156),(0x0157);
insert into t1 values (0x0158),(0x0159),(0x015a),(0x015b);
insert into t1 values (0x015c),(0x015d),(0x015e),(0x015f);
insert into t1 values (0x0160),(0x0161),(0x0162),(0x0163);
insert into t1 values (0x0164),(0x0165),(0x0166),(0x0167);
insert into t1 values (0x0168),(0x0169),(0x016a),(0x016b);
insert into t1 values (0x016c),(0x016d),(0x016e),(0x016f);
insert into t1 values (0x0170),(0x0171),(0x0172),(0x0173);
insert into t1 values (0x0174),(0x0175),(0x0176),(0x0177);
insert into t1 values (0x0178),(0x0179),(0x017a),(0x017b);
insert into t1 values (0x017c),(0x017d),(0x017e),(0x017f);
#
# Latin extended-B, 0180-024F
#
insert into t1 values (0x0180),(0x0181),(0x0182),(0x0183);
insert into t1 values (0x0184),(0x0185),(0x0186),(0x0187);
insert into t1 values (0x0188),(0x0189),(0x018a),(0x018b);
insert into t1 values (0x018c),(0x018d),(0x018e),(0x018f);
insert into t1 values (0x0190),(0x0191),(0x0192),(0x0193);
insert into t1 values (0x0194),(0x0195),(0x0196),(0x0197);
insert into t1 values (0x0198),(0x0199),(0x019a),(0x019b);
insert into t1 values (0x019c),(0x019d),(0x019e),(0x019f);
insert into t1 values (0x01a0),(0x01a1),(0x01a2),(0x01a3);
insert into t1 values (0x01a4),(0x01a5),(0x01a6),(0x01a7);
insert into t1 values (0x01a8),(0x01a9),(0x01aa),(0x01ab);
insert into t1 values (0x01ac),(0x01ad),(0x01ae),(0x01af);
insert into t1 values (0x01b0),(0x01b1),(0x01b2),(0x01b3);
insert into t1 values (0x01b4),(0x01b5),(0x01b6),(0x01b7);
insert into t1 values (0x01b8),(0x01b9),(0x01ba),(0x01bb);
insert into t1 values (0x01bc),(0x01bd),(0x01be),(0x01bf);
insert into t1 values (0x01c0),(0x01c1),(0x01c2),(0x01c3);
insert into t1 values (0x01c4),(0x01c5),(0x01c6),(0x01c7);
insert into t1 values (0x01c8),(0x01c9),(0x01ca),(0x01cb);
insert into t1 values (0x01cc),(0x01cd),(0x01ce),(0x01cf);
insert into t1 values (0x01d0),(0x01d1),(0x01d2),(0x01d3);
insert into t1 values (0x01d4),(0x01d5),(0x01d6),(0x01d7);
insert into t1 values (0x01d8),(0x01d9),(0x01da),(0x01db);
insert into t1 values (0x01dc),(0x01dd),(0x01de),(0x01df);
insert into t1 values (0x01e0),(0x01e1),(0x01e2),(0x01e3);
insert into t1 values (0x01e4),(0x01e5),(0x01e6),(0x01e7);
insert into t1 values (0x01e8),(0x01e9),(0x01ea),(0x01eb);
insert into t1 values (0x01ec),(0x01ed),(0x01ee),(0x01ef);
insert into t1 values (0x01f0),(0x01f1),(0x01f2),(0x01f3);
insert into t1 values (0x01f4),(0x01f5),(0x01f6),(0x01f7);
insert into t1 values (0x01f8),(0x01f9),(0x01fa),(0x01fb);
insert into t1 values (0x01fc),(0x01fd),(0x01fe),(0x01ff);
insert into t1 values ('AA'),('Aa'),('aa'),('aA');
insert into t1 values ('CH'),('Ch'),('ch'),('cH');
insert into t1 values ('DZ'),('Dz'),('dz'),('dZ');
insert into t1 values ('IJ'),('Ij'),('ij'),('iJ');
insert into t1 values ('LJ'),('Lj'),('lj'),('lJ');
insert into t1 values ('LL'),('Ll'),('ll'),('lL');
insert into t1 values ('NJ'),('Nj'),('nj'),('nJ');
insert into t1 values ('OE'),('Oe'),('oe'),('oE');
insert into t1 values ('SS'),('Ss'),('ss'),('sS');
insert into t1 values ('RR'),('Rr'),('rr'),('rR');
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_unicode_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_icelandic_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_latvian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_romanian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovenian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_polish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_estonian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_swedish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_turkish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_czech_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_danish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_lithuanian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovak_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish2_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_roman_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_esperanto_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_hungarian_ci;
drop table t1;
#
# Bug#5324
#
SET NAMES utf8;
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_general_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_general_ci;
INSERT INTO t1 VALUES (0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
COLLATE utf16_general_ci ORDER BY c;
DROP TABLE t1;
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci;
INSERT INTO t1 VALUES (0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
COLLATE utf16_unicode_ci ORDER BY c;
DROP TABLE t1;
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
#Check one row row
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci;
INSERT INTO t1 VALUES (0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
COLLATE utf16_unicode_ci ORDER BY c;
DROP TABLE t1;
SET NAMES utf8;
SET @test_character_set='utf16';
SET @test_collation='utf16_swedish_ci';
-- source include/ctype_common.inc
SET collation_connection='utf16_unicode_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
--echo End of 4.1 tests
#
# Check UPPER/LOWER changing length
#
# Result shorter than argument
CREATE TABLE t1 (id int, a varchar(30) character set utf16);
INSERT INTO t1 VALUES (1, 0x01310069), (2, 0x01310131);
INSERT INTO t1 VALUES (3, 0x00690069), (4, 0x01300049);
INSERT INTO t1 VALUES (5, 0x01300130), (6, 0x00490049);
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
FROM t1 ORDER BY id;
ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf16 collate utf16_turkish_ci;
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
FROM t1 ORDER BY id;
DROP TABLE t1;
#
# Bug #27079 Crash while grouping empty ucs2 strings
#
CREATE TABLE t1 (
c1 text character set utf16 collate utf16_polish_ci NOT NULL
) ENGINE=MyISAM;
insert into t1 values (''),('a');
SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
DROP TABLE IF EXISTS t1;
#
# Test basic regex functionality
#
set collation_connection=utf16_unicode_ci;
--source include/ctype_regex.inc
--echo #
--echo # End of 5.5 tests
--echo #

View File

@ -0,0 +1,784 @@
-- source include/have_utf32.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
--echo #
--echo # Start of 5.5 tests
--echo #
SET NAMES latin1;
SET character_set_connection=utf32;
select hex('a'), hex('a ');
-- source include/endspace.inc
#
# Check that incomplete utf32 characters in HEX notation
# are left-padded with zeros
#
select hex(_utf32 0x44);
select hex(_utf32 0x3344);
select hex(_utf32 0x103344);
select hex(_utf32 X'44');
select hex(_utf32 X'3344');
select hex(_utf32 X'103344');
#
# Check that 0x20 is only trimmed when it is
# a part of real SPACE character, not just a part
# of a multibyte sequence.
# Note, CYRILLIC LETTER ER is used as an example, which
# is stored as 0x0420 in UCS2, thus contains 0x20 in the
# low byte. The second character is THREE-PER-M, U+2004,
# which contains 0x20 in the high byte.
#
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf32;
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
SELECT hex(word) FROM t1 ORDER BY word;
SELECT hex(word2) FROM t1 ORDER BY word2;
DELETE FROM t1;
#
# Check that real spaces are correctly trimmed.
#
INSERT INTO t1 VALUES
(X'000004200000002000000020',X'000004200000002000000020'),
(X'000020040000002000000020',X'000020040000002000000020');
SELECT hex(word) FROM t1 ORDER BY word;
SELECT hex(word2) FROM t1 ORDER BY word2;
DROP TABLE t1;
#
# Check LPAD/RPAD
#
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0421'));
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
SELECT hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0421'));
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
SELECT hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
CREATE TABLE t1 SELECT
LPAD(_utf32 X'0420',10,_utf32 X'0421') l,
RPAD(_utf32 X'0420',10,_utf32 X'0421') r;
SHOW CREATE TABLE t1;
select hex(l), hex(r) from t1;
DROP TABLE t1;
create table t1 (f1 char(30));
insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
select lpad(f1, 12, "-o-/") from t1;
drop table t1;
######################################################
#
# Test of like
#
SET NAMES latin1;
SET character_set_connection=utf32;
--source include/ctype_like.inc
SET NAMES utf8;
SET character_set_connection=utf32;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf32_bin ORDER BY BINARY a;
DROP TABLE t1;
CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
ENGINE=MyISAM CHARACTER SET utf32;
INSERT INTO t1 (word) VALUES ("cat");
SELECT * FROM t1 WHERE word LIKE "c%";
SELECT * FROM t1 WHERE word LIKE "ca_";
SELECT * FROM t1 WHERE word LIKE "cat";
SELECT * FROM t1 WHERE word LIKE _utf32 x'0000006300000025'; # "c%"
SELECT * FROM t1 WHERE word LIKE _utf32 x'00000063000000610000005F'; # "ca_"
DROP TABLE t1;
#
# Check that INSERT() works fine.
# This invokes charpos() function.
select insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066);
select insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066);
#######################################################
#
# Bug 1264
#
# Description:
#
# When using a ucs2 table in MySQL,
# either with ucs2_general_ci or ucs2_bin collation,
# words are returned in an incorrect order when using ORDER BY
# on an _indexed_ CHAR or VARCHAR column. They are sorted with
# the longest word *first* instead of last. I.E. The word "aardvark"
# is in the results before the word "a".
#
# If there is no index for the column, the problem does not occur.
#
# Interestingly, if there is no second column, the words are returned
# in the correct order.
#
# According to EXPLAIN, it looks like when the output includes columns that
# are not part of the index sorted on, it does a filesort, which fails.
# Using a straight index yields correct results.
SET NAMES latin1;
#
# Two fields, index
#
CREATE TABLE t1 (
word VARCHAR(64),
bar INT(11) default 0,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf32
COLLATE utf32_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER by word;
DROP TABLE t1;
#
# One field, index
#
CREATE TABLE t1 (
word VARCHAR(64) ,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf32
COLLATE utf32_general_ci;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY WORD;
SELECT * FROM t1 ORDER BY word;
DROP TABLE t1;
#
# Two fields, no index
#
CREATE TABLE t1 (
word TEXT,
bar INT(11) AUTO_INCREMENT,
PRIMARY KEY (bar))
ENGINE=MyISAM
CHARSET utf32
COLLATE utf32_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a" );
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER BY word;
DROP TABLE t1;
#
# END OF Bug 1264 test
#
########################################################
#
# Check alignment for from-binary-conversion with CAST and CONVERT
#
SELECT hex(cast(0xAA as char character set utf32));
SELECT hex(convert(0xAA using utf32));
#
# Check alignment for string types
#
CREATE TABLE t1 (a char(10) character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a varchar(10) character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a text character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a mediumtext character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a longtext character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
##
## Bug #5024 Server crashes with queries on fields
## with certain charset/collation settings
##
#
#create table t1 (s1 char character set `ucs2` collate `ucs2_czech_ci`);
#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
#select s1 from t1 where s1 > 'a' order by s1;
#drop table t1;
#
# Bug #5081 : UCS2 fields are filled with '0x2020'
# after extending field length
#
create table t1(a char(1)) default charset utf32;
insert into t1 values ('a'),('b'),('c');
alter table t1 modify a char(5);
select a, hex(a) from t1;
drop table t1;
#
# Check prepare statement from an UTF32 string
#
set @ivar= 1234;
set @str1 = 'select ?';
set @str2 = convert(@str1 using utf32);
prepare stmt1 from @str2;
execute stmt1 using @ivar;
#
# Check that utf32 works with ENUM and SET type
#
set names utf8;
create table t1 (a enum('x','y','z') character set utf32);
show create table t1;
insert into t1 values ('x');
insert into t1 values ('y');
insert into t1 values ('z');
select a, hex(a) from t1 order by a;
alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf32;
show create table t1;
insert into t1 values ('D');
insert into t1 values ('E ');
insert into t1 values ('ä');
insert into t1 values ('ö');
insert into t1 values ('ü');
select a, hex(a) from t1 order by a;
drop table t1;
create table t1 (a set ('x','y','z','ä','ö','ü') character set utf32);
show create table t1;
insert into t1 values ('x');
insert into t1 values ('y');
insert into t1 values ('z');
insert into t1 values ('x,y');
insert into t1 values ('x,y,z,ä,ö,ü');
select a, hex(a) from t1 order by a;
drop table t1;
#
# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
#
create table t1(a enum('a','b','c')) default character set utf32;
insert into t1 values('a'),('b'),('c');
alter table t1 add b char(1);
show warnings;
select * from t1 order by a;
drop table t1;
SET NAMES latin1;
SET collation_connection='utf32_general_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
SET NAMES latin1;
SET collation_connection='utf32_bin';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
#
# Bug#10344 Some string functions fail for UCS2
#
select hex(substr(_utf32 0x000000e4000000e500000068,1));
select hex(substr(_utf32 0x000000e4000000e500000068,2));
select hex(substr(_utf32 0x000000e4000000e500000068,3));
select hex(substr(_utf32 0x000000e4000000e500000068,-1));
select hex(substr(_utf32 0x000000e4000000e500000068,-2));
select hex(substr(_utf32 0x000000e4000000e500000068,-3));
#SET NAMES latin1;
#
# Bug#8235
#
# This bug also helped to find another problem that
# INSERT of a UCS2 string containing a negative number
# into a unsigned int column didn't produce warnings.
# This test covers both problems.
#
#SET collation_connection='ucs2_swedish_ci';
#CREATE TABLE t1 (Field1 int(10) default '0');
## no warnings, negative numbers are allowed
#INSERT INTO t1 VALUES ('-1');
#SELECT * FROM t1;
#DROP TABLE t1;
#CREATE TABLE t1 (Field1 int(10) unsigned default '0');
## this should generate a "Data truncated" warning
#INSERT INTO t1 VALUES ('-1');
#DROP TABLE t1;
#SET NAMES latin1;
#
##
## Bug#18691 Converting number to UNICODE string returns invalid result
##
#SELECT CONVERT(103, CHAR(50) UNICODE);
#SELECT CONVERT(103.0, CHAR(50) UNICODE);
#SELECT CONVERT(-103, CHAR(50) UNICODE);
#SELECT CONVERT(-103.0, CHAR(50) UNICODE);
#
# Bug#9557 MyISAM utf8 table crash
#
CREATE TABLE t1 (
a varchar(250) NOT NULL default '',
KEY a (a)
) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE utf32_general_ci;
insert into t1 values (0x803d);
insert into t1 values (0x005b);
select hex(a) from t1;
drop table t1;
##
## Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
##
#--disable_warnings
#create table t1(f1 varchar(5) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL) engine=InnoDB;
#--enable_warnings
#insert into t1 values('a');
#create index t1f1 on t1(f1);
#select f1 from t1 where f1 like 'a%';
#drop table t1;
#
# Bug#9442 Set parameter make query fail if column character set is UCS2
#
create table t1 (utext varchar(20) character set utf32);
insert into t1 values ("lily");
insert into t1 values ("river");
prepare stmt from 'select utext from t1 where utext like ?';
set @param1='%%';
execute stmt using @param1;
execute stmt using @param1;
select utext from t1 where utext like '%%';
drop table t1;
deallocate prepare stmt;
#
# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
#
create table t1 (
a char(10) character set utf32 not null,
index a (a)
) engine=myisam;
insert into t1 values (repeat(0x0000201f, 10));
insert into t1 values (repeat(0x00002020, 10));
insert into t1 values (repeat(0x00002021, 10));
# make sure "index read" is used
explain select hex(a) from t1 order by a;
select hex(a) from t1 order by a;
alter table t1 drop index a;
select hex(a) from t1 order by a;
drop table t1;
#
# Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
# over a 'ucs2' field uses a temporary table
#
#CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
#INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
#SELECT id, MIN(s) FROM t1 GROUP BY id;
#DROP TABLE t1;
##
## Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
##
#
#--disable_warnings
#drop table if exists bug20536;
#--enable_warnings
#
#set names latin1;
#create table bug20536 (id bigint not null auto_increment primary key, name
#varchar(255) character set ucs2 not null);
#insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
#select md5(name) from bug20536;
#select sha1(name) from bug20536;
#select make_set(3, name, upper(name)) from bug20536;
#select export_set(5, name, upper(name)) from bug20536;
#select export_set(5, name, upper(name), ",", 5) from bug20536;
#
# Bug #20108: corrupted default enum value for a ucs2 field
#
CREATE TABLE t1 (
status enum('active','passive') character set utf32 collate utf32_general_ci
NOT NULL default 'passive'
);
SHOW CREATE TABLE t1;
ALTER TABLE t1 ADD a int NOT NULL AFTER status;
SHOW CREATE TABLE t1;
DROP TABLE t1;
#CREATE TABLE t2 (
# status enum('active','passive') collate ucs2_turkish_ci
# NOT NULL default 'passive'
#);
#SHOW CREATE TABLE t2;
#ALTER TABLE t2 ADD a int NOT NULL AFTER status;
#DROP TABLE t2;
## Some broken functions: add these tests just to document current behavior.
#
## PASSWORD and OLD_PASSWORD don't work with UCS2 strings, but to fix it would
## not be backwards compatible in all cases, so it's best to leave it alone
#select password(name) from bug20536;
#select old_password(name) from bug20536;
#
## Disable test case as encrypt relies on 'crypt' function.
## "decrypt" is noramlly tested in func_crypt.test which have a
## "have_crypt.inc" test
#--disable_parsing
## ENCRYPT relies on OS function crypt() which takes a NUL-terminated string; it
## doesn't return good results for strings with embedded 0 bytes. It won't be
## fixed unless we choose to re-implement the crypt() function ourselves to take
## an extra size_t string_length argument.
#select encrypt(name, 'SALT') from bug20536;
#--enable_parsing
#
## QUOTE doesn't work with UCS2 data. It would require a total rewrite
## of Item_func_quote::val_str(), which isn't worthwhile until UCS2 is
## supported fully as a client character set.
#select quote(name) from bug20536;
#
#drop table bug20536;
#
--echo End of 4.1 tests
#
# Conversion from an UTF32 string to a decimal column
#
CREATE TABLE t1 (a varchar(64) character set utf32, b decimal(10,3));
INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
update t1 set b=a;
SELECT *, hex(a) FROM t1;
DROP TABLE t1;
#
# Bug#9442 Set parameter make query fail if column character set is UCS2
#
create table t1 (utext varchar(20) character set utf32);
insert into t1 values ("lily");
insert into t1 values ("river");
prepare stmt from 'select utext from t1 where utext like ?';
set @param1='%%';
execute stmt using @param1;
execute stmt using @param1;
select utext from t1 where utext like '%%';
drop table t1;
deallocate prepare stmt;
#
# Bug#22638 SOUNDEX broken for international characters
#
set names latin1;
set character_set_connection=utf32;
select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
select 'mood' sounds like 'mud';
# Cyrillic A, BE, VE
select hex(soundex(_utf32 0x000004100000041100000412));
# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
select hex(soundex(_utf32 0x000000BF000000C0));
set names latin1;
#
# Bug #14290: character_maximum_length for text fields
#
create table t1(a blob, b text charset utf32);
select data_type, character_octet_length, character_maximum_length
from information_schema.columns where table_name='t1';
drop table t1;
set names latin1;
set collation_connection=utf32_general_ci;
#
# Testing cs->coll->instr()
#
select position('bb' in 'abba');
#
# Testing cs->coll->hash_sort()
#
create table t1 (a varchar(10) character set utf32) engine=heap;
insert into t1 values ('a'),('A'),('b'),('B');
select * from t1 where a='a' order by binary a;
select hex(min(binary a)),count(*) from t1 group by a;
drop table t1;
#
# Testing cs->cset->numchars()
#
select char_length('abcd'), octet_length('abcd');
#
# Testing cs->cset->charpos()
#
select left('abcd',2);
#
# Testing cs->cset->well_formed_length()
#
create table t1 (a varchar(10) character set utf32);
insert into t1 values (_utf32 0x0010FFFF);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf32 0x00110000);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf32 0x00110101);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf32 0x01000101);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf32 0x11000101);
select hex(a) from t1;
drop table t1;
#
# Bug#32914 Character sets: illegal characters in utf8 and utf32 columns
#
create table t1 (utf32 varchar(2) character set utf32);
--echo Wrong character with pad
insert into t1 values (0x110000);
--echo Wrong chsaracter without pad
insert into t1 values (0x00110000);
--echo Wrong character with pad followed by another wrong character
insert into t1 values (0x11000000110000);
--echo Good character with pad followed by bad character
insert into t1 values (0x10000000110000);
--echo Good character without pad followed by bad character
insert into t1 values (0x0010000000110000);
--echo Wrong character with the second byte higher than 0x10
insert into t1 values (0x00800037);
--echo Wrong character with pad with the second byte higher than 0x10
insert into t1 values (0x00800037);
drop table t1;
#
# Bug#32394 Character sets: crash if comparison with 0xfffd
#
select _utf32'a' collate utf32_general_ci = 0xfffd;
select hex(concat(_utf32 0x0410 collate utf32_general_ci, 0x61));
create table t1 (s1 varchar(5) character set utf32);
insert into t1 values (0xfffd);
select case when s1 = 0xfffd then 1 else 0 end from t1;
select hex(s1) from t1 where s1 = 0xfffd;
drop table t1;
#
# Testing cs->cset->lengthsp()
#
create table t1 (a char(10)) character set utf32;
insert into t1 values ('a ');
select hex(a) from t1;
drop table t1;
#
# Testing cs->cset->caseup() and cs->cset->casedn()
#
select upper('abcd'), lower('ABCD');
#
# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf32
# Testing cs->cset->snprintf()
#
#create table t1 (a date);
#insert into t1 values ('2007-09-16');
#select * from t1;
#drop table t1;
#
# Testing cs->cset->l10tostr
# !!! Not used in the code
#
# Testing cs->cset->ll10tostr
#
create table t1 (a varchar(10) character set utf32);
insert into t1 values (123456);
select a, hex(a) from t1;
drop table t1;
#
# Testing cs->cset->fill
# SOUNDEX fills strings with DIGIT ZERO up to four characters
select hex(soundex('a'));
#
# Testing cs->cset->strntol
# !!! Not used in the code
#
# Testing cs->cset->strntoul
#
create table t1 (a enum ('a','b','c')) character set utf32;
insert into t1 values ('1');
select * from t1;
drop table t1;
#
# Testing cs->cset->strntoll and cs->cset->strntoull
#
set names latin1;
select hex(conv(convert('123' using utf32), -10, 16));
select hex(conv(convert('123' using utf32), 10, 16));
#
# Testing cs->cset->strntod
#
set names latin1;
set character_set_connection=utf32;
select 1.1 + '1.2';
select 1.1 + '1.2xxx';
# Testing strntoll10_utf32
# Testing cs->cset->strtoll10
select left('aaa','1');
#
# Testing cs->cset->strntoull10rnd
#
create table t1 (a int);
insert into t1 values ('-1234.1e2');
insert into t1 values ('-1234.1e2xxxx');
insert into t1 values ('-1234.1e2 ');
select * from t1;
drop table t1;
#
# Testing cs->cset->scan
#
create table t1 (a int);
insert into t1 values ('1 ');
insert into t1 values ('1 x');
select * from t1;
drop table t1;
#
# Testing auto-conversion to TEXT
#
create table t1 (a varchar(17000) character set utf32);
show create table t1;
drop table t1;
#
# Testing that maximim possible key length is 1332 bytes
#
create table t1 (a varchar(250) character set utf32 primary key);
show create table t1;
drop table t1;
--error ER_TOO_LONG_KEY
create table t1 (a varchar(334) character set utf32 primary key);
#
# Testing mi_check with long key values
#
create table t1 (a varchar(333) character set utf32, key(a));
insert into t1 values (repeat('a',333)), (repeat('b',333));
flush tables;
check table t1;
drop table t1;
#
# Test how character set works with date/time
#
SET collation_connection=utf32_general_ci;
--source include/ctype_datetime.inc
SET NAMES latin1;
#
# Test basic regex functionality
#
set collation_connection=utf32_general_ci;
--source include/ctype_regex.inc
set names latin1;
# TODO: add tests for all engines
#
# Bug #36418 Character sets: crash if char(256 using utf32)
#
select hex(char(0x01 using utf32));
select hex(char(0x0102 using utf32));
select hex(char(0x010203 using utf32));
select hex(char(0x01020304 using utf32));
create table t1 (s1 varchar(1) character set utf32, s2 text character set utf32);
create index i on t1 (s1);
insert into t1 values (char(256 using utf32), char(256 using utf32));
select hex(s1), hex(s2) from t1;
drop table t1;
#
# Bug#33073 Character sets: ordering fails with utf32
#
SET collation_connection=utf32_general_ci;
CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0;
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
SELECT * FROM t1 ORDER BY s1;
SET max_sort_length=4;
SELECT * FROM t1 ORDER BY s1;
DROP TABLE t1;
SET max_sort_length=DEFAULT;
SET NAMES latin1;
--echo #
--echo # End of 5.5 tests
--echo #

View File

@ -0,0 +1,291 @@
-- source include/have_ucs2.inc
-- source include/have_utf32.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
--echo #
--echo # Start of 5.5 tests
--echo #
set names utf8;
set collation_connection=utf32_unicode_ci;
select hex('a'), hex('a ');
-- source include/endspace.inc
#
# Bug #6787 LIKE not working properly with _ and utf8 data
#
select 'c' like '\_' as want0;
#
# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters
#
CREATE TABLE t (
c char(20) NOT NULL
) ENGINE=MyISAM DEFAULT CHARACTER SET utf32 COLLATE=utf32_unicode_ci;
INSERT INTO t VALUES ('a'),('ab'),('aba');
ALTER TABLE t ADD INDEX (c);
SELECT c FROM t WHERE c LIKE 'a%';
DROP TABLE t;
create table t1 (c1 char(10) character set utf32 collate utf32_bin);
#
# Basic Latin
#
insert into t1 values ('A'),('a');
insert into t1 values ('B'),('b');
insert into t1 values ('C'),('c');
insert into t1 values ('D'),('d');
insert into t1 values ('E'),('e');
insert into t1 values ('F'),('f');
insert into t1 values ('G'),('g');
insert into t1 values ('H'),('h');
insert into t1 values ('I'),('i');
insert into t1 values ('J'),('j');
insert into t1 values ('K'),('k');
insert into t1 values ('L'),('l');
insert into t1 values ('M'),('m');
insert into t1 values ('N'),('n');
insert into t1 values ('O'),('o');
insert into t1 values ('P'),('p');
insert into t1 values ('Q'),('q');
insert into t1 values ('R'),('r');
insert into t1 values ('S'),('s');
insert into t1 values ('T'),('t');
insert into t1 values ('U'),('u');
insert into t1 values ('V'),('v');
insert into t1 values ('W'),('w');
insert into t1 values ('X'),('x');
insert into t1 values ('Y'),('y');
insert into t1 values ('Z'),('z');
#
# Latin1 suppliment
#
insert into t1 values (_ucs2 0x00e0),(_ucs2 0x00c0);
insert into t1 values (_ucs2 0x00e1),(_ucs2 0x00c1);
insert into t1 values (_ucs2 0x00e2),(_ucs2 0x00c2);
insert into t1 values (_ucs2 0x00e3),(_ucs2 0x00c3);
insert into t1 values (_ucs2 0x00e4),(_ucs2 0x00c4);
insert into t1 values (_ucs2 0x00e5),(_ucs2 0x00c5);
insert into t1 values (_ucs2 0x00e6),(_ucs2 0x00c6);
insert into t1 values (_ucs2 0x00e7),(_ucs2 0x00c7);
insert into t1 values (_ucs2 0x00e8),(_ucs2 0x00c8);
insert into t1 values (_ucs2 0x00e9),(_ucs2 0x00c9);
insert into t1 values (_ucs2 0x00ea),(_ucs2 0x00ca);
insert into t1 values (_ucs2 0x00eb),(_ucs2 0x00cb);
insert into t1 values (_ucs2 0x00ec),(_ucs2 0x00cc);
insert into t1 values (_ucs2 0x00ed),(_ucs2 0x00cd);
insert into t1 values (_ucs2 0x00ee),(_ucs2 0x00ce);
insert into t1 values (_ucs2 0x00ef),(_ucs2 0x00cf);
insert into t1 values (_ucs2 0x00f0),(_ucs2 0x00d0);
insert into t1 values (_ucs2 0x00f1),(_ucs2 0x00d1);
insert into t1 values (_ucs2 0x00f2),(_ucs2 0x00d2);
insert into t1 values (_ucs2 0x00f3),(_ucs2 0x00d3);
insert into t1 values (_ucs2 0x00f4),(_ucs2 0x00d4);
insert into t1 values (_ucs2 0x00f5),(_ucs2 0x00d5);
insert into t1 values (_ucs2 0x00f6),(_ucs2 0x00d6);
insert into t1 values (_ucs2 0x00f7),(_ucs2 0x00d7);
insert into t1 values (_ucs2 0x00f8),(_ucs2 0x00d8);
insert into t1 values (_ucs2 0x00f9),(_ucs2 0x00d9);
insert into t1 values (_ucs2 0x00fa),(_ucs2 0x00da);
insert into t1 values (_ucs2 0x00fb),(_ucs2 0x00db);
insert into t1 values (_ucs2 0x00fc),(_ucs2 0x00dc);
insert into t1 values (_ucs2 0x00fd),(_ucs2 0x00dd);
insert into t1 values (_ucs2 0x00fe),(_ucs2 0x00de);
insert into t1 values (_ucs2 0x00ff),(_ucs2 0x00df);
#
# Latin extended-A, 0100-017F
#
insert into t1 values (_ucs2 0x0100),(_ucs2 0x0101),(_ucs2 0x0102),(_ucs2 0x0103);
insert into t1 values (_ucs2 0x0104),(_ucs2 0x0105),(_ucs2 0x0106),(_ucs2 0x0107);
insert into t1 values (_ucs2 0x0108),(_ucs2 0x0109),(_ucs2 0x010a),(_ucs2 0x010b);
insert into t1 values (_ucs2 0x010c),(_ucs2 0x010d),(_ucs2 0x010e),(_ucs2 0x010f);
insert into t1 values (_ucs2 0x0110),(_ucs2 0x0111),(_ucs2 0x0112),(_ucs2 0x0113);
insert into t1 values (_ucs2 0x0114),(_ucs2 0x0115),(_ucs2 0x0116),(_ucs2 0x0117);
insert into t1 values (_ucs2 0x0118),(_ucs2 0x0119),(_ucs2 0x011a),(_ucs2 0x011b);
insert into t1 values (_ucs2 0x011c),(_ucs2 0x011d),(_ucs2 0x011e),(_ucs2 0x011f);
insert into t1 values (_ucs2 0x0120),(_ucs2 0x0121),(_ucs2 0x0122),(_ucs2 0x0123);
insert into t1 values (_ucs2 0x0124),(_ucs2 0x0125),(_ucs2 0x0126),(_ucs2 0x0127);
insert into t1 values (_ucs2 0x0128),(_ucs2 0x0129),(_ucs2 0x012a),(_ucs2 0x012b);
insert into t1 values (_ucs2 0x012c),(_ucs2 0x012d),(_ucs2 0x012e),(_ucs2 0x012f);
insert into t1 values (_ucs2 0x0130),(_ucs2 0x0131),(_ucs2 0x0132),(_ucs2 0x0133);
insert into t1 values (_ucs2 0x0134),(_ucs2 0x0135),(_ucs2 0x0136),(_ucs2 0x0137);
insert into t1 values (_ucs2 0x0138),(_ucs2 0x0139),(_ucs2 0x013a),(_ucs2 0x013b);
insert into t1 values (_ucs2 0x013c),(_ucs2 0x013d),(_ucs2 0x013e),(_ucs2 0x013f);
insert into t1 values (_ucs2 0x0140),(_ucs2 0x0141),(_ucs2 0x0142),(_ucs2 0x0143);
insert into t1 values (_ucs2 0x0144),(_ucs2 0x0145),(_ucs2 0x0146),(_ucs2 0x0147);
insert into t1 values (_ucs2 0x0148),(_ucs2 0x0149),(_ucs2 0x014a),(_ucs2 0x014b);
insert into t1 values (_ucs2 0x014c),(_ucs2 0x014d),(_ucs2 0x014e),(_ucs2 0x014f);
insert into t1 values (_ucs2 0x0150),(_ucs2 0x0151),(_ucs2 0x0152),(_ucs2 0x0153);
insert into t1 values (_ucs2 0x0154),(_ucs2 0x0155),(_ucs2 0x0156),(_ucs2 0x0157);
insert into t1 values (_ucs2 0x0158),(_ucs2 0x0159),(_ucs2 0x015a),(_ucs2 0x015b);
insert into t1 values (_ucs2 0x015c),(_ucs2 0x015d),(_ucs2 0x015e),(_ucs2 0x015f);
insert into t1 values (_ucs2 0x0160),(_ucs2 0x0161),(_ucs2 0x0162),(_ucs2 0x0163);
insert into t1 values (_ucs2 0x0164),(_ucs2 0x0165),(_ucs2 0x0166),(_ucs2 0x0167);
insert into t1 values (_ucs2 0x0168),(_ucs2 0x0169),(_ucs2 0x016a),(_ucs2 0x016b);
insert into t1 values (_ucs2 0x016c),(_ucs2 0x016d),(_ucs2 0x016e),(_ucs2 0x016f);
insert into t1 values (_ucs2 0x0170),(_ucs2 0x0171),(_ucs2 0x0172),(_ucs2 0x0173);
insert into t1 values (_ucs2 0x0174),(_ucs2 0x0175),(_ucs2 0x0176),(_ucs2 0x0177);
insert into t1 values (_ucs2 0x0178),(_ucs2 0x0179),(_ucs2 0x017a),(_ucs2 0x017b);
insert into t1 values (_ucs2 0x017c),(_ucs2 0x017d),(_ucs2 0x017e),(_ucs2 0x017f);
#
# Latin extended-B, 0180-024F
#
insert into t1 values (_ucs2 0x0180),(_ucs2 0x0181),(_ucs2 0x0182),(_ucs2 0x0183);
insert into t1 values (_ucs2 0x0184),(_ucs2 0x0185),(_ucs2 0x0186),(_ucs2 0x0187);
insert into t1 values (_ucs2 0x0188),(_ucs2 0x0189),(_ucs2 0x018a),(_ucs2 0x018b);
insert into t1 values (_ucs2 0x018c),(_ucs2 0x018d),(_ucs2 0x018e),(_ucs2 0x018f);
insert into t1 values (_ucs2 0x0190),(_ucs2 0x0191),(_ucs2 0x0192),(_ucs2 0x0193);
insert into t1 values (_ucs2 0x0194),(_ucs2 0x0195),(_ucs2 0x0196),(_ucs2 0x0197);
insert into t1 values (_ucs2 0x0198),(_ucs2 0x0199),(_ucs2 0x019a),(_ucs2 0x019b);
insert into t1 values (_ucs2 0x019c),(_ucs2 0x019d),(_ucs2 0x019e),(_ucs2 0x019f);
insert into t1 values (_ucs2 0x01a0),(_ucs2 0x01a1),(_ucs2 0x01a2),(_ucs2 0x01a3);
insert into t1 values (_ucs2 0x01a4),(_ucs2 0x01a5),(_ucs2 0x01a6),(_ucs2 0x01a7);
insert into t1 values (_ucs2 0x01a8),(_ucs2 0x01a9),(_ucs2 0x01aa),(_ucs2 0x01ab);
insert into t1 values (_ucs2 0x01ac),(_ucs2 0x01ad),(_ucs2 0x01ae),(_ucs2 0x01af);
insert into t1 values (_ucs2 0x01b0),(_ucs2 0x01b1),(_ucs2 0x01b2),(_ucs2 0x01b3);
insert into t1 values (_ucs2 0x01b4),(_ucs2 0x01b5),(_ucs2 0x01b6),(_ucs2 0x01b7);
insert into t1 values (_ucs2 0x01b8),(_ucs2 0x01b9),(_ucs2 0x01ba),(_ucs2 0x01bb);
insert into t1 values (_ucs2 0x01bc),(_ucs2 0x01bd),(_ucs2 0x01be),(_ucs2 0x01bf);
insert into t1 values (_ucs2 0x01c0),(_ucs2 0x01c1),(_ucs2 0x01c2),(_ucs2 0x01c3);
insert into t1 values (_ucs2 0x01c4),(_ucs2 0x01c5),(_ucs2 0x01c6),(_ucs2 0x01c7);
insert into t1 values (_ucs2 0x01c8),(_ucs2 0x01c9),(_ucs2 0x01ca),(_ucs2 0x01cb);
insert into t1 values (_ucs2 0x01cc),(_ucs2 0x01cd),(_ucs2 0x01ce),(_ucs2 0x01cf);
insert into t1 values (_ucs2 0x01d0),(_ucs2 0x01d1),(_ucs2 0x01d2),(_ucs2 0x01d3);
insert into t1 values (_ucs2 0x01d4),(_ucs2 0x01d5),(_ucs2 0x01d6),(_ucs2 0x01d7);
insert into t1 values (_ucs2 0x01d8),(_ucs2 0x01d9),(_ucs2 0x01da),(_ucs2 0x01db);
insert into t1 values (_ucs2 0x01dc),(_ucs2 0x01dd),(_ucs2 0x01de),(_ucs2 0x01df);
insert into t1 values (_ucs2 0x01e0),(_ucs2 0x01e1),(_ucs2 0x01e2),(_ucs2 0x01e3);
insert into t1 values (_ucs2 0x01e4),(_ucs2 0x01e5),(_ucs2 0x01e6),(_ucs2 0x01e7);
insert into t1 values (_ucs2 0x01e8),(_ucs2 0x01e9),(_ucs2 0x01ea),(_ucs2 0x01eb);
insert into t1 values (_ucs2 0x01ec),(_ucs2 0x01ed),(_ucs2 0x01ee),(_ucs2 0x01ef);
insert into t1 values (_ucs2 0x01f0),(_ucs2 0x01f1),(_ucs2 0x01f2),(_ucs2 0x01f3);
insert into t1 values (_ucs2 0x01f4),(_ucs2 0x01f5),(_ucs2 0x01f6),(_ucs2 0x01f7);
insert into t1 values (_ucs2 0x01f8),(_ucs2 0x01f9),(_ucs2 0x01fa),(_ucs2 0x01fb);
insert into t1 values (_ucs2 0x01fc),(_ucs2 0x01fd),(_ucs2 0x01fe),(_ucs2 0x01ff);
insert into t1 values ('AA'),('Aa'),('aa'),('aA');
insert into t1 values ('CH'),('Ch'),('ch'),('cH');
insert into t1 values ('DZ'),('Dz'),('dz'),('dZ');
insert into t1 values ('IJ'),('Ij'),('ij'),('iJ');
insert into t1 values ('LJ'),('Lj'),('lj'),('lJ');
insert into t1 values ('LL'),('Ll'),('ll'),('lL');
insert into t1 values ('NJ'),('Nj'),('nj'),('nJ');
insert into t1 values ('OE'),('Oe'),('oe'),('oE');
insert into t1 values ('SS'),('Ss'),('ss'),('sS');
insert into t1 values ('RR'),('Rr'),('rr'),('rR');
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_unicode_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_icelandic_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_latvian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_romanian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovenian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_polish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_estonian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_swedish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_turkish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_czech_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_danish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_lithuanian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovak_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish2_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_roman_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_esperanto_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_hungarian_ci;
drop table t1;
#
# Bug#5324
#
SET NAMES utf8;
#test1
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_general_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_general_ci;
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025
COLLATE utf32_general_ci ORDER BY c;
DROP TABLE t1;
#test2
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_unicode_ci;
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025
COLLATE utf32_unicode_ci ORDER BY c;
DROP TABLE t1;
#test 3
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
#Check one row row
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32) COLLATE utf32_unicode_ci;
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
#Check two rows
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32)
COLLATE utf32_unicode_ci ORDER BY c;
DROP TABLE t1;
SET NAMES utf8;
SET @test_character_set='utf32';
SET @test_collation='utf32_swedish_ci';
-- source include/ctype_common.inc
SET collation_connection='utf32_unicode_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
--echo End of 4.1 tests
#
# Check UPPER/LOWER changing length
#
# Result shorter than argument
CREATE TABLE t1 (id int, a varchar(30) character set utf32);
INSERT INTO t1 VALUES (1, _ucs2 0x01310069), (2, _ucs2 0x01310131);
INSERT INTO t1 VALUES (3, _ucs2 0x00690069), (4, _ucs2 0x01300049);
INSERT INTO t1 VALUES (5, _ucs2 0x01300130), (6, _ucs2 0x00490049);
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
FROM t1 ORDER BY id;
ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf32 collate utf32_turkish_ci;
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
FROM t1 ORDER BY id;
DROP TABLE t1;
#
# Bug #27079 Crash while grouping empty ucs2 strings
#
CREATE TABLE t1 (
c1 text character set utf32 collate utf32_polish_ci NOT NULL
) ENGINE=MyISAM;
insert into t1 values (''),('a');
SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
DROP TABLE IF EXISTS t1;
#
# Test basic regex functionality
#
set collation_connection=utf32_unicode_ci;
--source include/ctype_regex.inc
--echo #
--echo # End of 5.5 tests
--echo #

View File

@ -1440,6 +1440,17 @@ DROP TABLE t1;
--echo Start of 5.4 tests
#
# WL#1213: utf8mb3 is an alias for utf8
#
SET NAMES utf8mb3;
SHOW VARIABLES LIKE 'character_set_results%';
CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin);
SHOW CREATE TABLE t1;
DROP TABLE t1;
SELECT _utf8mb3'test';
#
# Bug#26180: Can't add columns to tables created with utf8 text indexes
#

File diff suppressed because it is too large Load Diff

View File

@ -45,6 +45,53 @@ extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci;
#endif
#ifdef HAVE_CHARSET_utf32
extern CHARSET_INFO my_charset_utf32_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf32_latvian_uca_ci;
extern CHARSET_INFO my_charset_utf32_romanian_uca_ci;
extern CHARSET_INFO my_charset_utf32_slovenian_uca_ci;
extern CHARSET_INFO my_charset_utf32_polish_uca_ci;
extern CHARSET_INFO my_charset_utf32_estonian_uca_ci;
extern CHARSET_INFO my_charset_utf32_spanish_uca_ci;
extern CHARSET_INFO my_charset_utf32_swedish_uca_ci;
extern CHARSET_INFO my_charset_utf32_turkish_uca_ci;
extern CHARSET_INFO my_charset_utf32_czech_uca_ci;
extern CHARSET_INFO my_charset_utf32_danish_uca_ci;
extern CHARSET_INFO my_charset_utf32_lithuanian_uca_ci;
extern CHARSET_INFO my_charset_utf32_slovak_uca_ci;
extern CHARSET_INFO my_charset_utf32_spanish2_uca_ci;
extern CHARSET_INFO my_charset_utf32_roman_uca_ci;
extern CHARSET_INFO my_charset_utf32_persian_uca_ci;
extern CHARSET_INFO my_charset_utf32_esperanto_uca_ci;
extern CHARSET_INFO my_charset_utf32_hungarian_uca_ci;
extern CHARSET_INFO my_charset_utf32_sinhala_uca_ci;
#endif /* HAVE_CHARSET_utf32 */
#ifdef HAVE_CHARSET_utf16
extern CHARSET_INFO my_charset_utf16_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf16_latvian_uca_ci;
extern CHARSET_INFO my_charset_utf16_romanian_uca_ci;
extern CHARSET_INFO my_charset_utf16_slovenian_uca_ci;
extern CHARSET_INFO my_charset_utf16_polish_uca_ci;
extern CHARSET_INFO my_charset_utf16_estonian_uca_ci;
extern CHARSET_INFO my_charset_utf16_spanish_uca_ci;
extern CHARSET_INFO my_charset_utf16_swedish_uca_ci;
extern CHARSET_INFO my_charset_utf16_turkish_uca_ci;
extern CHARSET_INFO my_charset_utf16_czech_uca_ci;
extern CHARSET_INFO my_charset_utf16_danish_uca_ci;
extern CHARSET_INFO my_charset_utf16_lithuanian_uca_ci;
extern CHARSET_INFO my_charset_utf16_slovak_uca_ci;
extern CHARSET_INFO my_charset_utf16_spanish2_uca_ci;
extern CHARSET_INFO my_charset_utf16_roman_uca_ci;
extern CHARSET_INFO my_charset_utf16_persian_uca_ci;
extern CHARSET_INFO my_charset_utf16_esperanto_uca_ci;
extern CHARSET_INFO my_charset_utf16_hungarian_uca_ci;
extern CHARSET_INFO my_charset_utf16_sinhala_uca_ci;
#endif /* HAVE_CHARSET_utf16 */
#ifdef HAVE_CHARSET_utf8
extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf8_latvian_uca_ci;
@ -70,6 +117,28 @@ extern CHARSET_INFO my_charset_utf8_general_cs;
#endif
#endif
#ifdef HAVE_CHARSET_utf8mb4
extern CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_polish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_czech_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_danish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_roman_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_persian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci;
#endif /* HAVE_CHARSET_utf8mb4 */
#endif /* HAVE_UCA_COLLATIONS */
my_bool init_compiled_charsets(myf flags __attribute__((unused)))
@ -191,7 +260,91 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
#endif
#endif
#endif /* HAVE_CHARSET_utf8 */
#ifdef HAVE_CHARSET_utf8mb4
add_compiled_collation(&my_charset_utf8mb4_general_ci);
add_compiled_collation(&my_charset_utf8mb4_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf8mb4_unicode_ci);
add_compiled_collation(&my_charset_utf8mb4_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_latvian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_romanian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_slovenian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_polish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_estonian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_spanish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_swedish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_turkish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_czech_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_danish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_lithuanian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_slovak_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_spanish2_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_roman_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_persian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_esperanto_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_sinhala_uca_ci);
#endif /* HAVE_UCA_COLLATIONS */
#endif /* HAVE_CHARSET_utf8mb4 */
#ifdef HAVE_CHARSET_utf16
add_compiled_collation(&my_charset_utf16_general_ci);
add_compiled_collation(&my_charset_utf16_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf16_unicode_ci);
add_compiled_collation(&my_charset_utf16_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf16_latvian_uca_ci);
add_compiled_collation(&my_charset_utf16_romanian_uca_ci);
add_compiled_collation(&my_charset_utf16_slovenian_uca_ci);
add_compiled_collation(&my_charset_utf16_polish_uca_ci);
add_compiled_collation(&my_charset_utf16_estonian_uca_ci);
add_compiled_collation(&my_charset_utf16_spanish_uca_ci);
add_compiled_collation(&my_charset_utf16_swedish_uca_ci);
add_compiled_collation(&my_charset_utf16_turkish_uca_ci);
add_compiled_collation(&my_charset_utf16_czech_uca_ci);
add_compiled_collation(&my_charset_utf16_danish_uca_ci);
add_compiled_collation(&my_charset_utf16_lithuanian_uca_ci);
add_compiled_collation(&my_charset_utf16_slovak_uca_ci);
add_compiled_collation(&my_charset_utf16_spanish2_uca_ci);
add_compiled_collation(&my_charset_utf16_roman_uca_ci);
add_compiled_collation(&my_charset_utf16_persian_uca_ci);
add_compiled_collation(&my_charset_utf16_esperanto_uca_ci);
add_compiled_collation(&my_charset_utf16_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf16_sinhala_uca_ci);
#endif /* HAVE_UCA_COLLATIOINS */
#endif /* HAVE_CHARSET_utf16 */
#ifdef HAVE_CHARSET_utf32
add_compiled_collation(&my_charset_utf32_general_ci);
add_compiled_collation(&my_charset_utf32_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf32_unicode_ci);
add_compiled_collation(&my_charset_utf32_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf32_latvian_uca_ci);
add_compiled_collation(&my_charset_utf32_romanian_uca_ci);
add_compiled_collation(&my_charset_utf32_slovenian_uca_ci);
add_compiled_collation(&my_charset_utf32_polish_uca_ci);
add_compiled_collation(&my_charset_utf32_estonian_uca_ci);
add_compiled_collation(&my_charset_utf32_spanish_uca_ci);
add_compiled_collation(&my_charset_utf32_swedish_uca_ci);
add_compiled_collation(&my_charset_utf32_turkish_uca_ci);
add_compiled_collation(&my_charset_utf32_czech_uca_ci);
add_compiled_collation(&my_charset_utf32_danish_uca_ci);
add_compiled_collation(&my_charset_utf32_lithuanian_uca_ci);
add_compiled_collation(&my_charset_utf32_slovak_uca_ci);
add_compiled_collation(&my_charset_utf32_spanish2_uca_ci);
add_compiled_collation(&my_charset_utf32_roman_uca_ci);
add_compiled_collation(&my_charset_utf32_persian_uca_ci);
add_compiled_collation(&my_charset_utf32_esperanto_uca_ci);
add_compiled_collation(&my_charset_utf32_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf32_sinhala_uca_ci);
#endif /* HAVE_UCA_COLLATIONS */
#endif /* HAVE_CHARSET_utf32 */
/* Copy compiled charsets */
for (cs=compiled_charsets; cs->name; cs++)

View File

@ -252,13 +252,35 @@ static int add_collation(CHARSET_INFO *cs)
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
#endif
}
else if (!strcmp(cs->csname, "utf8"))
else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3"))
{
#if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
newcs->ctype= my_charset_utf8_unicode_ci.ctype;
if (init_state_maps(newcs))
return MY_XML_ERROR;
#endif
}
else if (!strcmp(cs->csname, "utf8mb4"))
{
#if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
#endif
}
else if (!strcmp(cs->csname, "utf16"))
{
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
#endif
}
else if (!strcmp(cs->csname, "utf32"))
{
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
#endif
}
else
@ -433,17 +455,35 @@ static void init_available_charsets(void)
}
uint get_collation_number(const char *name)
static const char*
get_collation_name_alias(const char *name, char *buf, size_t bufsize)
{
my_pthread_once(&charsets_initialized, init_available_charsets);
return get_collation_number_internal(name);
if (!strncasecmp(name, "utf8mb3_", 8))
{
my_snprintf(buf, bufsize, "utf8_%s", name + 8);
return buf;
}
return NULL;
}
uint get_charset_number(const char *charset_name, uint cs_flags)
uint get_collation_number(const char *name)
{
uint id;
char alias[64];
my_pthread_once(&charsets_initialized, init_available_charsets);
if ((id= get_collation_number_internal(name)))
return id;
if ((name= get_collation_name_alias(name, alias, sizeof(alias))))
return get_collation_number_internal(name);
return 0;
}
static uint
get_charset_number_internal(const char *charset_name, uint cs_flags)
{
CHARSET_INFO **cs;
my_pthread_once(&charsets_initialized, init_available_charsets);
for (cs= all_charsets;
cs < all_charsets + array_elements(all_charsets);
@ -457,6 +497,27 @@ uint get_charset_number(const char *charset_name, uint cs_flags)
}
static const char*
get_charset_name_alias(const char *name)
{
if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3"))
return "utf8";
return NULL;
}
uint get_charset_number(const char *charset_name, uint cs_flags)
{
uint id;
my_pthread_once(&charsets_initialized, init_available_charsets);
if ((id= get_charset_number_internal(charset_name, cs_flags)))
return id;
if ((charset_name= get_charset_name_alias(charset_name)))
return get_charset_number_internal(charset_name, cs_flags);
return 0;
}
const char *get_charset_name(uint charset_number)
{
CHARSET_INFO *cs;

View File

@ -1836,7 +1836,9 @@ int Field::store_time(MYSQL_TIME *ltime, timestamp_type type_arg)
ASSERT_COLUMN_MARKED_FOR_WRITE;
char buff[MAX_DATE_STRING_REP_LENGTH];
uint length= (uint) my_TIME_to_str(ltime, buff);
return store(buff, length, &my_charset_bin);
return store(buff, length,
(charset()->state & MY_CS_NONASCII) ?
&my_charset_latin1 : &my_charset_bin);
}

View File

@ -854,7 +854,7 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
cnvitem->max_length= cnvitem->str_value.numchars() * tocs->mbmaxlen;
return cnvitem;
}
return NULL;
return Item::safe_charset_converter(tocs);
}
@ -1436,7 +1436,12 @@ left_is_superset(DTCollation *left, DTCollation *right)
if (left->collation->state & MY_CS_UNICODE &&
(left->derivation < right->derivation ||
(left->derivation == right->derivation &&
!(right->collation->state & MY_CS_UNICODE))))
(!(right->collation->state & MY_CS_UNICODE) ||
/* The code below makes 4-byte utf8 a superset over 3-byte utf8 */
(left->collation->state & MY_CS_UNICODE_SUPPLEMENT &&
!(right->collation->state & MY_CS_UNICODE_SUPPLEMENT) &&
left->collation->mbmaxlen > right->collation->mbmaxlen &&
left->collation->mbminlen == right->collation->mbminlen)))))
return TRUE;
/* Allow convert from ASCII */
if (right->repertoire == MY_REPERTOIRE_ASCII &&
@ -1695,7 +1700,7 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname,
{
Item* conv;
uint32 dummy_offset;
if (!String::needs_conversion(0, (*arg)->collation.collation,
if (!String::needs_conversion(1, (*arg)->collation.collation,
coll.collation,
&dummy_offset))
continue;

View File

@ -2371,17 +2371,27 @@ String *Item_func_char::val_str(String *str)
int32 num=(int32) args[i]->val_int();
if (!args[i]->null_value)
{
char char_num= (char) num;
if (num&0xFF000000L) {
str->append((char)(num>>24));
goto b2;
} else if (num&0xFF0000L) {
b2: str->append((char)(num>>16));
goto b1;
} else if (num&0xFF00L) {
b1: str->append((char)(num>>8));
char tmp[4];
if (num & 0xFF000000L)
{
mi_int4store(tmp, num);
str->append(tmp, 4, &my_charset_bin);
}
else if (num & 0xFF0000L)
{
mi_int3store(tmp, num);
str->append(tmp, 3, &my_charset_bin);
}
else if (num & 0xFF00L)
{
mi_int2store(tmp, num);
str->append(tmp, 2, &my_charset_bin);
}
else
{
tmp[0]= (char) num;
str->append(tmp, 1, &my_charset_bin);
}
str->append(&char_num, 1);
}
}
str->realloc(str->length()); // Add end 0 (for Purify)
@ -2769,7 +2779,8 @@ String *Item_func_conv_charset::val_str(String *str)
void Item_func_conv_charset::fix_length_and_dec()
{
collation.set(conv_charset, DERIVATION_IMPLICIT);
max_length = args[0]->max_length*conv_charset->mbmaxlen;
max_length = args[0]->max_length / args[0]->collation.collation->mbmaxlen *
conv_charset->mbmaxlen;
}
void Item_func_conv_charset::print(String *str, enum_query_type query_type)

View File

@ -412,11 +412,25 @@ bool String::append(const char *s)
bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
{
uint32 dummy_offset;
uint32 offset;
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
if (needs_conversion(arg_length, cs, str_charset, &offset))
{
uint32 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
uint32 add_length;
if ((cs == &my_charset_bin) && offset)
{
DBUG_ASSERT(str_charset->mbminlen > offset);
offset= str_charset->mbminlen - offset; // How many characters to pad
add_length= arg_length + offset;
if (realloc(str_length + add_length))
return TRUE;
bzero((char*) Ptr + str_length, offset);
memcpy(Ptr + str_length + offset, s, arg_length);
str_length+= add_length;
return FALSE;
}
add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
uint dummy_errors;
if (realloc(str_length + add_length))
return TRUE;
@ -966,6 +980,24 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
uint pad_length= to_cs->mbminlen - from_offset;
bzero(to, pad_length);
memmove(to + pad_length, from, from_offset);
/*
In some cases left zero-padding can create an incorrect character.
For example:
INSERT INTO t1 (utf32_column) VALUES (0x110000);
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
The valid characters range is limited to 0x00000000..0x0010FFFF.
Make sure we didn't pad to an incorrect character.
*/
if (to_cs->cset->well_formed_len(to_cs,
to, to + to_cs->mbminlen, 1,
&well_formed_error) !=
to_cs->mbminlen)
{
*from_end_pos= *well_formed_error_pos= from;
*cannot_convert_error_pos= NULL;
return 0;
}
nchars--;
from+= from_offset;
from_length-= from_offset;

View File

@ -2776,7 +2776,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
sql_field->interval_list);
List_iterator<String> int_it(sql_field->interval_list);
String conv, *tmp;
char comma_buf[2];
char comma_buf[4]; /* 4 bytes for utf32 */
int comma_length= cs->cset->wc_mb(cs, ',', (uchar*) comma_buf,
(uchar*) comma_buf +
sizeof(comma_buf));

View File

@ -467,10 +467,11 @@ uint my_instr_mb(CHARSET_INFO *cs,
/* BINARY collations handlers for MB charsets */
static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
int
my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
size_t len=min(slen,tlen);
int cmp= memcmp(s,t,len);
@ -503,10 +504,11 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
0 if strings are equal
*/
static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference)
int
my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference)
{
const uchar *end;
size_t length;
@ -562,14 +564,17 @@ static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
}
static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t)
int
my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t)
{
return strcmp(s,t);
}
static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2)
void
my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2)
{
const uchar *pos = key;
@ -787,10 +792,11 @@ fill_max_and_min:
}
static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
int
my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
int result= -1; /* Not found, using wildcards */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff