Bug#11753363 (bug#44793) CHARACTER SETS: CASE CLAUSE, UCS2 OR UTF32, FAILURE
Problem: in case of string CASE/WHEN arguments with different character sets, Item_func_case::find_item() called comparator cmp_items[x] on mixed character set Items, so a 8-bit value could be errouneously referenced to as being utf16/utf32 value, which led to crash on DBUG_ASSERT() because of wrong value length. This was wrong, as string comparator expects arguments in the same character set. Fix: modify Item_func_case's argument list after calling agg_arg_charsets_for_comparison() - put the Items in "agg" array back to "args", because some of the Items in the "agg" array might have been changed to character set converters: - to Item_func_conv_charset for non-constant items - to Item_string for constant items In other words, perform the same substitution which is done in all other operations string comparison or string result operations: Replace CASE latin1_item WHEN utf16_item THEN ... END to CASE CONVERT(latin1_item USING utf16) WHEN utf16_item THEN ... END Replace CASE utf16_item WHEN latin1_item THEN ... END to CASE utf16_item WHEN CONVERT(latin1_item USING utf16) THEN ... END @ mysql-test/r/ctype_utf16.result @ mysql-test/r/ctype_utf32.result @ mysql-test/t/ctype_utf16.test @ mysql-test/t/ctype_utf32.test Adding tests @ sql/item_cmpfunc.cc Put "agg" back to "args". @ sql/sql_string.cc Backporting a fix for String::set_or_copy_aligned() from 5.6, for better test coverage: "SELECT _utf16 0x61" should expand the string to 0x0061 rather than to 0x000061. This fix was made in 5.6 under terms of "WL#4616 Implement UTF16-LE".
This commit is contained in:
parent
48126a574c
commit
8a83d30436
@ -30,13 +30,13 @@ binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a'
|
|||||||
1 1 1
|
1 1 1
|
||||||
select hex(_utf16 0x44);
|
select hex(_utf16 0x44);
|
||||||
hex(_utf16 0x44)
|
hex(_utf16 0x44)
|
||||||
00000044
|
0044
|
||||||
select hex(_utf16 0x3344);
|
select hex(_utf16 0x3344);
|
||||||
hex(_utf16 0x3344)
|
hex(_utf16 0x3344)
|
||||||
3344
|
3344
|
||||||
select hex(_utf16 0x113344);
|
select hex(_utf16 0x113344);
|
||||||
hex(_utf16 0x113344)
|
hex(_utf16 0x113344)
|
||||||
000000113344
|
00113344
|
||||||
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16;
|
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16;
|
||||||
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
|
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
|
||||||
SELECT hex(word) FROM t1 ORDER BY word;
|
SELECT hex(word) FROM t1 ORDER BY word;
|
||||||
@ -434,10 +434,10 @@ aardvarz
|
|||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
SELECT hex(cast(0xAA as char character set utf16));
|
SELECT hex(cast(0xAA as char character set utf16));
|
||||||
hex(cast(0xAA as char character set utf16))
|
hex(cast(0xAA as char character set utf16))
|
||||||
000000AA
|
00AA
|
||||||
SELECT hex(convert(0xAA using utf16));
|
SELECT hex(convert(0xAA using utf16));
|
||||||
hex(convert(0xAA using utf16))
|
hex(convert(0xAA using utf16))
|
||||||
000000AA
|
00AA
|
||||||
CREATE TABLE t1 (a char(10) character set utf16);
|
CREATE TABLE t1 (a char(10) character set utf16);
|
||||||
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
|
||||||
SELECT HEX(a) FROM t1;
|
SELECT HEX(a) FROM t1;
|
||||||
@ -1102,5 +1102,20 @@ t2 CREATE TABLE `t2` (
|
|||||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||||
DROP TABLE t1, t2;
|
DROP TABLE t1, t2;
|
||||||
#
|
#
|
||||||
|
# Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
|
||||||
|
#
|
||||||
|
SELECT CASE _latin1'a' WHEN _utf16'a' THEN 'A' END;
|
||||||
|
CASE _latin1'a' WHEN _utf16'a' THEN 'A' END
|
||||||
|
A
|
||||||
|
SELECT CASE _utf16'a' WHEN _latin1'a' THEN 'A' END;
|
||||||
|
CASE _utf16'a' WHEN _latin1'a' THEN 'A' END
|
||||||
|
A
|
||||||
|
CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf16);
|
||||||
|
INSERT INTO t1 VALUES ('a');
|
||||||
|
SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
|
||||||
|
CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END
|
||||||
|
b
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
# End of 5.5 tests
|
# End of 5.5 tests
|
||||||
#
|
#
|
||||||
|
@ -1152,5 +1152,20 @@ d
|
|||||||
f
|
f
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
#
|
#
|
||||||
|
# Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
|
||||||
|
#
|
||||||
|
SELECT CASE _latin1'a' WHEN _utf32'a' THEN 'A' END;
|
||||||
|
CASE _latin1'a' WHEN _utf32'a' THEN 'A' END
|
||||||
|
A
|
||||||
|
SELECT CASE _utf32'a' WHEN _latin1'a' THEN 'A' END;
|
||||||
|
CASE _utf32'a' WHEN _latin1'a' THEN 'A' END
|
||||||
|
A
|
||||||
|
CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf32);
|
||||||
|
INSERT INTO t1 VALUES ('a');
|
||||||
|
SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
|
||||||
|
CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END
|
||||||
|
b
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
# End of 5.5 tests
|
# End of 5.5 tests
|
||||||
#
|
#
|
||||||
|
@ -745,6 +745,15 @@ CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
|
|||||||
SHOW CREATE TABLE t2;
|
SHOW CREATE TABLE t2;
|
||||||
DROP TABLE t1, t2;
|
DROP TABLE t1, t2;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
|
||||||
|
--echo #
|
||||||
|
SELECT CASE _latin1'a' WHEN _utf16'a' THEN 'A' END;
|
||||||
|
SELECT CASE _utf16'a' WHEN _latin1'a' THEN 'A' END;
|
||||||
|
CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf16);
|
||||||
|
INSERT INTO t1 VALUES ('a');
|
||||||
|
SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
#
|
#
|
||||||
## TODO: add tests for all engines
|
## TODO: add tests for all engines
|
||||||
|
@ -830,6 +830,16 @@ INSERT INTO t1 VALUES ('d'),('f');
|
|||||||
SELECT * FROM t1 WHERE b BETWEEN 'a' AND 'z';
|
SELECT * FROM t1 WHERE b BETWEEN 'a' AND 'z';
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
|
||||||
|
--echo #
|
||||||
|
SELECT CASE _latin1'a' WHEN _utf32'a' THEN 'A' END;
|
||||||
|
SELECT CASE _utf32'a' WHEN _latin1'a' THEN 'A' END;
|
||||||
|
CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf32);
|
||||||
|
INSERT INTO t1 VALUES ('a');
|
||||||
|
SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of 5.5 tests
|
--echo # End of 5.5 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
@ -3054,20 +3054,59 @@ void Item_func_case::fix_length_and_dec()
|
|||||||
agg[0]= args[first_expr_num];
|
agg[0]= args[first_expr_num];
|
||||||
left_result_type= agg[0]->result_type();
|
left_result_type= agg[0]->result_type();
|
||||||
|
|
||||||
|
/*
|
||||||
|
As the first expression and WHEN expressions
|
||||||
|
are intermixed in args[] array THEN and ELSE items,
|
||||||
|
extract the first expression and all WHEN expressions into
|
||||||
|
a temporary array, to process them easier.
|
||||||
|
*/
|
||||||
for (nagg= 0; nagg < ncases/2 ; nagg++)
|
for (nagg= 0; nagg < ncases/2 ; nagg++)
|
||||||
agg[nagg+1]= args[nagg*2];
|
agg[nagg+1]= args[nagg*2];
|
||||||
nagg++;
|
nagg++;
|
||||||
if (!(found_types= collect_cmp_types(agg, nagg)))
|
if (!(found_types= collect_cmp_types(agg, nagg)))
|
||||||
return;
|
return;
|
||||||
|
if (found_types & (1 << STRING_RESULT))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
If we'll do string comparison, we also need to aggregate
|
||||||
|
character set and collation for first/WHEN items and
|
||||||
|
install converters for some of them to cmp_collation when necessary.
|
||||||
|
This is done because cmp_item compatators cannot compare
|
||||||
|
strings in two different character sets.
|
||||||
|
Some examples when we install converters:
|
||||||
|
|
||||||
|
1. Converter installed for the first expression:
|
||||||
|
|
||||||
|
CASE latin1_item WHEN utf16_item THEN ... END
|
||||||
|
|
||||||
|
is replaced to:
|
||||||
|
|
||||||
|
CASE CONVERT(latin1_item USING utf16) WHEN utf16_item THEN ... END
|
||||||
|
|
||||||
|
2. Converter installed for the left WHEN item:
|
||||||
|
|
||||||
|
CASE utf16_item WHEN latin1_item THEN ... END
|
||||||
|
|
||||||
|
is replaced to:
|
||||||
|
|
||||||
|
CASE utf16_item WHEN CONVERT(latin1_item USING utf16) THEN ... END
|
||||||
|
*/
|
||||||
|
if (agg_arg_charsets_for_comparison(cmp_collation, agg, nagg))
|
||||||
|
return;
|
||||||
|
/*
|
||||||
|
Now copy first expression and all WHEN expressions back to args[]
|
||||||
|
arrray, because some of the items might have been changed to converters
|
||||||
|
(e.g. Item_func_conv_charset, or Item_string for constants).
|
||||||
|
*/
|
||||||
|
args[first_expr_num]= agg[0];
|
||||||
|
for (nagg= 0; nagg < ncases / 2; nagg++)
|
||||||
|
args[nagg * 2]= agg[nagg + 1];
|
||||||
|
}
|
||||||
for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
|
for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
|
||||||
{
|
{
|
||||||
if (found_types & (1 << i) && !cmp_items[i])
|
if (found_types & (1 << i) && !cmp_items[i])
|
||||||
{
|
{
|
||||||
DBUG_ASSERT((Item_result)i != ROW_RESULT);
|
DBUG_ASSERT((Item_result)i != ROW_RESULT);
|
||||||
if ((Item_result)i == STRING_RESULT &&
|
|
||||||
agg_arg_charsets_for_comparison(cmp_collation, agg, nagg))
|
|
||||||
return;
|
|
||||||
if (!(cmp_items[i]=
|
if (!(cmp_items[i]=
|
||||||
cmp_item::get_comparator((Item_result)i,
|
cmp_item::get_comparator((Item_result)i,
|
||||||
cmp_collation.collation)))
|
cmp_collation.collation)))
|
||||||
|
@ -252,8 +252,8 @@ bool String::copy_aligned(const char *str,uint32 arg_length, uint32 offset,
|
|||||||
CHARSET_INFO *cs)
|
CHARSET_INFO *cs)
|
||||||
{
|
{
|
||||||
/* How many bytes are in incomplete character */
|
/* How many bytes are in incomplete character */
|
||||||
offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
|
offset= cs->mbminlen - offset; /* How many zeros we should prepend */
|
||||||
DBUG_ASSERT(offset && offset != cs->mbmaxlen);
|
DBUG_ASSERT(offset && offset != cs->mbminlen);
|
||||||
|
|
||||||
uint32 aligned_length= arg_length + offset;
|
uint32 aligned_length= arg_length + offset;
|
||||||
if (alloc(aligned_length))
|
if (alloc(aligned_length))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user