Bug#11753363 (bug#44793) CHARACTER SETS: CASE CLAUSE, UCS2 OR UTF32, FAILURE

Problem: in case of string CASE/WHEN arguments with different character sets, Item_func_case::find_item() called comparator cmp_items[x] on mixed character set Items, so a 8-bit value could be errouneously referenced to as being utf16/utf32 value, which led to crash on DBUG_ASSERT() because of wrong value length. This was wrong, as string comparator expects arguments in the same character set. Fix: modify Item_func_case's argument list after calling agg_arg_charsets_for_comparison() - put the Items in "agg" array back to "args", because some of the Items in the "agg" array might have been changed to character set converters: - to Item_func_conv_charset for non-constant items - to Item_string for constant items In other words, perform the same substitution which is done in all other operations string comparison or string result operations: Replace CASE latin1_item WHEN utf16_item THEN ... END to CASE CONVERT(latin1_item USING utf16) WHEN utf16_item THEN ... END Replace CASE utf16_item WHEN latin1_item THEN ... END to CASE utf16_item WHEN CONVERT(latin1_item USING utf16) THEN ... END @ mysql-test/r/ctype_utf16.result @ mysql-test/r/ctype_utf32.result @ mysql-test/t/ctype_utf16.test @ mysql-test/t/ctype_utf32.test Adding tests @ sql/item_cmpfunc.cc Put "agg" back to "args". @ sql/sql_string.cc Backporting a fix for String::set_or_copy_aligned() from 5.6, for better test coverage: "SELECT _utf16 0x61" should expand the string to 0x0061 rather than to 0x000061. This fix was made in 5.6 under terms of "WL#4616 Implement UTF16-LE".
2011-03-01 15:09:37 +03:00 · 2011-03-01 15:09:37 +03:00 · 8a83d30436
commit 8a83d30436
parent 48126a574c
6 changed files with 97 additions and 9 deletions
--- a/mysql-test/r/ctype_utf16.result
+++ b/mysql-test/r/ctype_utf16.result
@ -30,13 +30,13 @@ binary 'a  a' > 'a'	binary 'a  \0' > 'a'	binary 'a\0' > 'a'
 1	1	1
 select hex(_utf16 0x44);
 hex(_utf16 0x44)
-00000044
+0044
 select hex(_utf16 0x3344);
 hex(_utf16 0x3344)
 3344
 select hex(_utf16 0x113344);
 hex(_utf16 0x113344)
-000000113344
+00113344
 CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16;
 INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
 SELECT hex(word) FROM t1 ORDER BY word;
@ -434,10 +434,10 @@ aardvarz
 DROP TABLE t1;
 SELECT hex(cast(0xAA as char character set utf16));
 hex(cast(0xAA as char character set utf16))
-000000AA
+00AA
 SELECT hex(convert(0xAA using utf16));
 hex(convert(0xAA using utf16))
-000000AA
+00AA
 CREATE TABLE t1 (a char(10) character set utf16);
 INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
 SELECT HEX(a) FROM t1;
@ -1102,5 +1102,20 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=MyISAM DEFAULT CHARSET=latin1
 DROP TABLE t1, t2;
 #
 # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
 #
 SELECT CASE _latin1'a' WHEN _utf16'a' THEN 'A' END;
 CASE _latin1'a' WHEN _utf16'a' THEN 'A' END
 A
 SELECT CASE _utf16'a' WHEN _latin1'a' THEN 'A' END;
 CASE _utf16'a' WHEN _latin1'a' THEN 'A' END
 A
 CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf16);
 INSERT INTO t1 VALUES ('a');
 SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
 CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END
 b
 DROP TABLE t1;
 #
 # End of 5.5 tests
 #
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@ -1152,5 +1152,20 @@ d
 f
 DROP TABLE t1;
 #
 # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
 #
 SELECT CASE _latin1'a' WHEN _utf32'a' THEN 'A' END;
 CASE _latin1'a' WHEN _utf32'a' THEN 'A' END
 A
 SELECT CASE _utf32'a' WHEN _latin1'a' THEN 'A' END;
 CASE _utf32'a' WHEN _latin1'a' THEN 'A' END
 A
 CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf32);
 INSERT INTO t1 VALUES ('a');
 SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
 CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END
 b
 DROP TABLE t1;
 #
 # End of 5.5 tests
 #
--- a/mysql-test/t/ctype_utf16.test
+++ b/mysql-test/t/ctype_utf16.test
@ -745,6 +745,15 @@ CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
 SHOW CREATE TABLE t2;
 DROP TABLE t1, t2;
 --echo #
 --echo # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
 --echo #
 SELECT CASE _latin1'a' WHEN _utf16'a' THEN 'A' END;
 SELECT CASE _utf16'a' WHEN _latin1'a' THEN 'A' END;
 CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf16);
 INSERT INTO t1 VALUES ('a');
 SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
 DROP TABLE t1;
 #
 ## TODO: add tests for all engines
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@ -830,6 +830,16 @@ INSERT INTO t1 VALUES ('d'),('f');
 SELECT * FROM t1 WHERE b BETWEEN 'a' AND 'z';
 DROP TABLE t1;
 --echo #
 --echo # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
 --echo #
 SELECT CASE _latin1'a' WHEN _utf32'a' THEN 'A' END;
 SELECT CASE _utf32'a' WHEN _latin1'a' THEN 'A' END;
 CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf32);
 INSERT INTO t1 VALUES ('a');
 SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
 DROP TABLE t1;
 --echo #
 --echo # End of 5.5 tests
 --echo #
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@ -3054,20 +3054,59 @@ void Item_func_case::fix_length_and_dec()
    agg[0]= args[first_expr_num];
    left_result_type= agg[0]->result_type();
    /*
      As the first expression and WHEN expressions
      are intermixed in args[] array THEN and ELSE items,
      extract the first expression and all WHEN expressions into 
      a temporary array, to process them easier.
    */
    for (nagg= 0; nagg < ncases/2 ; nagg++)
      agg[nagg+1]= args[nagg*2];
    nagg++;
    if (!(found_types= collect_cmp_types(agg, nagg)))
      return;
    if (found_types & (1 << STRING_RESULT))
    {
      /*
        If we'll do string comparison, we also need to aggregate
        character set and collation for first/WHEN items and
        install converters for some of them to cmp_collation when necessary.
        This is done because cmp_item compatators cannot compare
        strings in two different character sets.
        Some examples when we install converters:
        1. Converter installed for the first expression:
           CASE         latin1_item              WHEN utf16_item THEN ... END
        is replaced to:
           CASE CONVERT(latin1_item USING utf16) WHEN utf16_item THEN ... END
        2. Converter installed for the left WHEN item:
          CASE utf16_item WHEN         latin1_item              THEN ... END
        is replaced to:
           CASE utf16_item WHEN CONVERT(latin1_item USING utf16) THEN ... END
      */
      if (agg_arg_charsets_for_comparison(cmp_collation, agg, nagg))
        return;
      /*
        Now copy first expression and all WHEN expressions back to args[]
        arrray, because some of the items might have been changed to converters
        (e.g. Item_func_conv_charset, or Item_string for constants).
      */
      args[first_expr_num]= agg[0];
      for (nagg= 0; nagg < ncases / 2; nagg++)
        args[nagg * 2]= agg[nagg + 1];
    }
    for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
    {
      if (found_types & (1 << i) && !cmp_items[i])
      {
        DBUG_ASSERT((Item_result)i != ROW_RESULT);
        if ((Item_result)i == STRING_RESULT &&
            agg_arg_charsets_for_comparison(cmp_collation, agg, nagg))
          return;
        if (!(cmp_items[i]=
            cmp_item::get_comparator((Item_result)i,
                                     cmp_collation.collation)))
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@ -252,8 +252,8 @@ bool String::copy_aligned(const char *str,uint32 arg_length, uint32 offset,
 			  CHARSET_INFO *cs)
 {
  /* How many bytes are in incomplete character */
-  offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
+  offset= cs->mbminlen - offset; /* How many zeros we should prepend */
-  DBUG_ASSERT(offset && offset != cs->mbmaxlen);
+  DBUG_ASSERT(offset && offset != cs->mbminlen);
  uint32 aligned_length= arg_length + offset;
  if (alloc(aligned_length))