diff --git a/include/my_sys.h b/include/my_sys.h index 4f7e75a836e..7218f91cccd 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -879,6 +879,8 @@ extern CHARSET_INFO *get_charset(uint cs_number, myf flags); extern CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags); extern CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint cs_flags, myf my_flags); +extern CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO + *original_cs); extern void free_charsets(void); extern char *get_charsets_dir(char *buf); extern my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2); diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result index 023267c227c..28e3b50e371 100644 --- a/mysql-test/r/ctype_ucs.result +++ b/mysql-test/r/ctype_ucs.result @@ -811,6 +811,12 @@ quote(name) ???????? ???????????????? drop table bug20536; +CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci); +INSERT INTO t1 VALUES('abcd'); +SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE); +a +abcd +DROP TABLE t1; End of 4.1 tests CREATE TABLE t1 (a varchar(64) character set ucs2, b decimal(10,3)); INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0); diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test index bca3a9c3a96..c498f9b9a8d 100644 --- a/mysql-test/t/ctype_ucs.test +++ b/mysql-test/t/ctype_ucs.test @@ -547,6 +547,14 @@ select quote(name) from bug20536; drop table bug20536; +# +# BUG#31159 - fulltext search on ucs2 column crashes server +# +CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci); +INSERT INTO t1 VALUES('abcd'); +SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE); +DROP TABLE t1; + --echo End of 4.1 tests # diff --git a/mysys/charset.c b/mysys/charset.c index 4c3f2d0ab71..d1cca150308 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -810,3 +810,43 @@ ulong escape_quotes_for_mysql(CHARSET_INFO *charset_info, *to= 0; return overflow ? (ulong)~0 : (ulong) (to - to_start); } + + +/** + @brief Find compatible character set with ctype. + + @param[in] original_cs Original character set + + @note + 128 my_charset_ucs2_general_uca ->192 my_charset_utf8_general_uca_ci + 129 my_charset_ucs2_icelandic_uca_ci ->193 my_charset_utf8_icelandic_uca_ci + 130 my_charset_ucs2_latvian_uca_ci ->194 my_charset_utf8_latvian_uca_ci + 131 my_charset_ucs2_romanian_uca_ci ->195 my_charset_utf8_romanian_uca_ci + 132 my_charset_ucs2_slovenian_uca_ci ->196 my_charset_utf8_slovenian_uca_ci + 133 my_charset_ucs2_polish_uca_ci ->197 my_charset_utf8_polish_uca_ci + 134 my_charset_ucs2_estonian_uca_ci ->198 my_charset_utf8_estonian_uca_ci + 135 my_charset_ucs2_spanish_uca_ci ->199 my_charset_utf8_spanish_uca_ci + 136 my_charset_ucs2_swedish_uca_ci ->200 my_charset_utf8_swedish_uca_ci + 137 my_charset_ucs2_turkish_uca_ci ->201 my_charset_utf8_turkish_uca_ci + 138 my_charset_ucs2_czech_uca_ci ->202 my_charset_utf8_czech_uca_ci + 139 my_charset_ucs2_danish_uca_ci ->203 my_charset_utf8_danish_uca_ci + 140 my_charset_ucs2_lithuanian_uca_ci->204 my_charset_utf8_lithuanian_uca_ci + 141 my_charset_ucs2_slovak_uca_ci ->205 my_charset_utf8_slovak_uca_ci + 142 my_charset_ucs2_spanish2_uca_ci ->206 my_charset_utf8_spanish2_uca_ci + 143 my_charset_ucs2_roman_uca_ci ->207 my_charset_utf8_roman_uca_ci + 144 my_charset_ucs2_persian_uca_ci ->208 my_charset_utf8_persian_uca_ci + + @return Compatible character set or NULL. +*/ + +CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO *original_cs) +{ + CHARSET_INFO *compatible_cs= 0; + DBUG_ENTER("get_compatible_charset_with_ctype"); + if (!strcmp(original_cs->csname, "ucs2") && + (compatible_cs= get_charset(original_cs->number + 64, MYF(0))) && + (!compatible_cs->ctype || + strcmp(original_cs->name + 4, compatible_cs->name + 4))) + compatible_cs= 0; + DBUG_RETURN(compatible_cs); +} diff --git a/sql/item_func.cc b/sql/item_func.cc index e4d370bbdf2..619736b99ff 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -4939,13 +4939,44 @@ bool Item_func_match::fix_fields(THD *thd, Item **ref) my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH"); return TRUE; } - table=((Item_field *)item)->field->table; + /* + With prepared statements Item_func_match::fix_fields is called twice. + When it is called first time we have original item tree here and add + conversion layer for character sets that do not have ctype array a few + lines below. When it is called second time, we already have conversion + layer in item tree. + */ + table= (item->type() == Item::FIELD_ITEM) ? + ((Item_field *)item)->field->table : + ((Item_field *)((Item_func_conv *)item)->key_item())->field->table; if (!(table->file->table_flags() & HA_CAN_FULLTEXT)) { my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0)); return 1; } table->fulltext_searched=1; + /* A workaround for ucs2 character set */ + if (!args[1]->collation.collation->ctype) + { + CHARSET_INFO *compatible_cs= + get_compatible_charset_with_ctype(args[1]->collation.collation); + bool rc= 1; + if (compatible_cs) + { + Item_string *conv_item= new Item_string("", 0, compatible_cs, + DERIVATION_EXPLICIT); + item= args[0]; + args[0]= conv_item; + rc= agg_item_charsets(cmp_collation, func_name(), args, arg_count, + MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV | + MY_COLL_DISALLOW_NONE, 1); + args[0]= item; + } + else + my_error(ER_WRONG_ARGUMENTS, MYF(0), "MATCH"); + return rc; + } return agg_arg_collations_for_comparison(cmp_collation, args+1, arg_count-1, 0); }