MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx

Synchronizing sources in: - my_wildcmp_uca_impl() handling utf8_unicode_ci - my_wildcmp_unicode_impl() handling utf8_general_ci The latter has already had a fix for a similar MySQL bug in utf8_general_ci: Bug#11754 SET NAMES utf8 followed by SELECT "A\\" LIKE "A\\" returns 0 So fix is now propagated to utf8_unicode_ci.
2018-10-15 13:22:18 +04:00 · 2018-10-15 13:22:18 +04:00 · 34f8a4071e
commit 34f8a4071e
parent ae3fe14c17
4 changed files with 119 additions and 82 deletions
--- a/mysql-test/r/ctype_uca.result
+++ b/mysql-test/r/ctype_uca.result
@ -13571,5 +13571,26 @@ Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where ((`test`.`t1`.`a` = 'oe') and (`test`.`t1`.`a` = 'oe'))
 DROP TABLE t1;
 #
+# MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
+#
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+name
+radio! test
+ALTER TABLE t1 CHANGE COLUMN name name VARCHAR(20) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+name
+radio! test
+DROP TABLE t1;
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT name LIKE '%!!%' ESCAPE '!' AS c1,
+name LIKE '%!!%' COLLATE utf8_general_ci ESCAPE '!' AS c2
+FROM t1;
+c1	c2
+1	1
+DROP TABLE t1;
+#
 # End of MariaDB-10.0 tests
 #
--- a/mysql-test/t/ctype_uca.test
+++ b/mysql-test/t/ctype_uca.test
@ -617,6 +617,24 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' AND a='oe' COLLATE utf8_german2_c
 EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' COLLATE utf8_german2_ci AND a='oe';
 DROP TABLE t1;

+--echo #
+--echo # MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
+--echo #
+
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+ALTER TABLE t1 CHANGE COLUMN name name VARCHAR(20) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+DROP TABLE t1;
+
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT name LIKE '%!!%' ESCAPE '!' AS c1,
+       name LIKE '%!!%' COLLATE utf8_general_ci ESCAPE '!' AS c2
+FROM t1;
+DROP TABLE t1;
+
 --echo #
 --echo # End of MariaDB-10.0 tests
 --echo #
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@ -21092,7 +21092,7 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
      }

      wildstr+= scan;
-      if (w_wc ==  (my_wc_t)escape)
+      if (w_wc ==  (my_wc_t) escape && wildstr < wildend)
      {
        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                         (const uchar*)wildend)) <= 0)
@ -21113,16 +21113,14 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
      else
      {
        if (my_uca_charcmp(cs,s_wc,w_wc))
-          return 1;
+          return 1;                               /* No match */
      }
      if (wildstr == wildend)
        return (str != str_end);                  /* Match if both are at end */
    }

-    
    if (w_wc == (my_wc_t) w_many)
    {                                             /* Found w_many */
-    
      /* Remove any '%' and '_' from the wild search string */
      for ( ; wildstr != wildend ; )
      {
@ -21157,13 +21155,17 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                       (const uchar*)wildend)) <= 0)
        return 1;
+      wildstr+= scan;

      if (w_wc ==  (my_wc_t) escape)
      {
-        wildstr+= scan;
+        if (wildstr < wildend)
+        {
          if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                           (const uchar*)wildend)) <= 0)
            return 1;
+          wildstr+= scan;
+        }
      }

      while (1)
@ -21182,19 +21184,19 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
        if (str == str_end)
          return -1;

+        str+= scan;
        result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend,
-                                    escape, w_one, w_many, recurse_level+1);
-        
+                                    escape, w_one, w_many,
+                                    recurse_level + 1);
        if (result <= 0)
          return result;
-        
-        str+= scan;
      }
    }
  }
  return (str != str_end ? 1 : 0);
 }

+
 int my_wildcmp_uca(CHARSET_INFO *cs,
                   const char *str,const char *str_end,
                   const char *wildstr,const char *wildend,
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@ -4400,9 +4400,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
  int result= -1;                             /* Not found, using wildcards */
  my_wc_t s_wc, w_wc;
  int scan;
-  int (*mb_wc)(CHARSET_INFO *, my_wc_t *,
-               const uchar *, const uchar *);
-  mb_wc= cs->cset->mb_wc;
+  my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;

  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
    return 1;
@ -4454,10 +4452,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
        return (str != str_end);                  /* Match if both are at end */
    }

-    
    if (w_wc == (my_wc_t) w_many)
    {                                             /* Found w_many */
-    
      /* Remove any '%' and '_' from the wild search string */
      for ( ; wildstr != wildend ; )
      {