From 5d1ccce58ac10b2abff245734cd8e3f5ed2e9f67 Mon Sep 17 00:00:00 2001 From: "svoj@mysql.com/june.mysql.com" <> Date: Wed, 24 Oct 2007 16:09:30 +0500 Subject: [PATCH] BUG#31159 - fulltext search on ucs2 column crashes server ucs2 doesn't provide required by fulltext ctype array. Crash happens because fulltext attempts to use unitialized ctype array. Fixed by converting ucs2 fields to compatible utf8 analogue. --- include/my_sys.h | 2 ++ mysql-test/r/ctype_ucs.result | 6 ++++++ mysql-test/t/ctype_ucs.test | 8 +++++++ mysys/charset.c | 40 +++++++++++++++++++++++++++++++++++ sql/item_func.cc | 33 ++++++++++++++++++++++++++++- 5 files changed, 88 insertions(+), 1 deletion(-) diff --git a/include/my_sys.h b/include/my_sys.h index 759531fa649..4a0586b9f2d 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -784,6 +784,8 @@ extern CHARSET_INFO *get_charset(uint cs_number, myf flags); extern CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags); extern CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint cs_flags, myf my_flags); +extern CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO + *original_cs); extern void free_charsets(void); extern char *get_charsets_dir(char *buf); extern my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2); diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result index bf827209795..fef13b19ae8 100644 --- a/mysql-test/r/ctype_ucs.result +++ b/mysql-test/r/ctype_ucs.result @@ -803,4 +803,10 @@ quote(name) ???????? ???????????????? drop table bug20536; +CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci); +INSERT INTO t1 VALUES('abcd'); +SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE); +a +abcd +DROP TABLE t1; End of 4.1 tests diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test index 10559d33eb3..57f741597e0 100644 --- a/mysql-test/t/ctype_ucs.test +++ b/mysql-test/t/ctype_ucs.test @@ -535,4 +535,12 @@ select quote(name) from bug20536; drop table bug20536; +# +# BUG#31159 - fulltext search on ucs2 column crashes server +# +CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci); +INSERT INTO t1 VALUES('abcd'); +SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE); +DROP TABLE t1; + --echo End of 4.1 tests diff --git a/mysys/charset.c b/mysys/charset.c index 6f2d4d3c347..f0ac61ceed5 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -673,3 +673,43 @@ CHARSET_INFO *fs_character_set() return fs_cset_cache; } #endif + + +/** + @brief Find compatible character set with ctype. + + @param[in] original_cs Original character set + + @note + 128 my_charset_ucs2_general_uca ->192 my_charset_utf8_general_uca_ci + 129 my_charset_ucs2_icelandic_uca_ci ->193 my_charset_utf8_icelandic_uca_ci + 130 my_charset_ucs2_latvian_uca_ci ->194 my_charset_utf8_latvian_uca_ci + 131 my_charset_ucs2_romanian_uca_ci ->195 my_charset_utf8_romanian_uca_ci + 132 my_charset_ucs2_slovenian_uca_ci ->196 my_charset_utf8_slovenian_uca_ci + 133 my_charset_ucs2_polish_uca_ci ->197 my_charset_utf8_polish_uca_ci + 134 my_charset_ucs2_estonian_uca_ci ->198 my_charset_utf8_estonian_uca_ci + 135 my_charset_ucs2_spanish_uca_ci ->199 my_charset_utf8_spanish_uca_ci + 136 my_charset_ucs2_swedish_uca_ci ->200 my_charset_utf8_swedish_uca_ci + 137 my_charset_ucs2_turkish_uca_ci ->201 my_charset_utf8_turkish_uca_ci + 138 my_charset_ucs2_czech_uca_ci ->202 my_charset_utf8_czech_uca_ci + 139 my_charset_ucs2_danish_uca_ci ->203 my_charset_utf8_danish_uca_ci + 140 my_charset_ucs2_lithuanian_uca_ci->204 my_charset_utf8_lithuanian_uca_ci + 141 my_charset_ucs2_slovak_uca_ci ->205 my_charset_utf8_slovak_uca_ci + 142 my_charset_ucs2_spanish2_uca_ci ->206 my_charset_utf8_spanish2_uca_ci + 143 my_charset_ucs2_roman_uca_ci ->207 my_charset_utf8_roman_uca_ci + 144 my_charset_ucs2_persian_uca_ci ->208 my_charset_utf8_persian_uca_ci + + @return Compatible character set or NULL. +*/ + +CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO *original_cs) +{ + CHARSET_INFO *compatible_cs= 0; + DBUG_ENTER("get_compatible_charset_with_ctype"); + if (!strcmp(original_cs->csname, "ucs2") && + (compatible_cs= get_charset(original_cs->number + 64, MYF(0))) && + (!compatible_cs->ctype || + strcmp(original_cs->name + 4, compatible_cs->name + 4))) + compatible_cs= 0; + DBUG_RETURN(compatible_cs); +} diff --git a/sql/item_func.cc b/sql/item_func.cc index f71297515d6..6bfb920d7c8 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -3135,13 +3135,44 @@ bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref) my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH"); return 1; } - table=((Item_field *)item)->field->table; + /* + With prepared statements Item_func_match::fix_fields is called twice. + When it is called first time we have original item tree here and add + conversion layer for character sets that do not have ctype array a few + lines below. When it is called second time, we already have conversion + layer in item tree. + */ + table= (item->type() == Item::FIELD_ITEM) ? + ((Item_field *)item)->field->table : + ((Item_field *)((Item_func_conv *)item)->key_item())->field->table; if (!(table->file->table_flags() & HA_CAN_FULLTEXT)) { my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0)); return 1; } table->fulltext_searched=1; + /* A workaround for ucs2 character set */ + if (!args[1]->collation.collation->ctype) + { + CHARSET_INFO *compatible_cs= + get_compatible_charset_with_ctype(args[1]->collation.collation); + bool rc= 1; + if (compatible_cs) + { + Item_string *conv_item= new Item_string("", 0, compatible_cs, + DERIVATION_EXPLICIT); + item= args[0]; + args[0]= conv_item; + rc= agg_item_charsets(cmp_collation, func_name(), args, arg_count, + MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV | + MY_COLL_DISALLOW_NONE); + args[0]= item; + } + else + my_error(ER_WRONG_ARGUMENTS, MYF(0), "MATCH"); + return rc; + } return agg_arg_collations_for_comparison(cmp_collation, args+1, arg_count-1); }