From bcba75df423314a49da7436281eba1403e020897 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 9 Mar 2007 12:18:28 -0500 Subject: [PATCH 1/7] Bug #25543 Replication of wrong values if using rand() in stored procedure When rand() is called multiple times inside a stored procedure, the server does not binlog the correct random seed values. This patch corrects the problem by resetting rand_used= 0 in THD::cleanup_after_query() allowing the system to save the random seeds if needed for each command in a stored procedure body. However, rand_used is not reset if executing in a stored function or trigger because these operations are binlogged by call and thus only the calling statement need detect the call to rand() made by its substatements. These substatements must not set rand_used to 0 because it would remove the detection of rand() by the calling statement. mysql-test/r/rpl_misc_functions.result: Bug #25543 Replication of wrong values if using rand() in stored procedure The result file was modified to include the correct processing of the new additions to the test. The results from execution are written to files on both the master and the slave. The files are compared to ensure the values from rand() generated on the master are correctly generated on the slave. mysql-test/t/rpl_misc_functions.test: Bug #25543 Replication of wrong values if using rand() in stored procedure The test was modified to include a test of a stored procedure that calls the rand() function multiple times. The results from execution are written to files on both the master and the slave. The files are compared to ensure the values from rand() generated on the master are correctly generated on the slave. sql/sql_class.cc: Bug #25543 Replication of wrong values if using rand() in stored procedure The code was modified to reset rand_used so that detection of calls to rand() will save random seeds if needed by the slave. --- mysql-test/r/rpl_misc_functions.result | 27 +++++++++- mysql-test/t/rpl_misc_functions.test | 68 +++++++++++++++++++++++++- sql/sql_class.cc | 12 +++++ 3 files changed, 103 insertions(+), 4 deletions(-) diff --git a/mysql-test/r/rpl_misc_functions.result b/mysql-test/r/rpl_misc_functions.result index c11663b8ac8..526414cec9c 100644 --- a/mysql-test/r/rpl_misc_functions.result +++ b/mysql-test/r/rpl_misc_functions.result @@ -18,6 +18,29 @@ create table t2 like t1; load data local infile 'MYSQLTEST_VARDIR/master-data/test/rpl_misc_functions.outfile' into table t2; select * from t1, t2 where (t1.id=t2.id) and not(t1.i=t2.i and t1.r1=t2.r1 and t1.r2=t2.r2 and t1.p=t2.p); id i r1 r2 p id i r1 r2 p -stop slave; -drop table t1; drop table t1; +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (col_a double default NULL); +CREATE PROCEDURE test_replication_sp1() +BEGIN +INSERT INTO t1 VALUES (rand()), (rand()); +INSERT INTO t1 VALUES (rand()); +END| +CREATE PROCEDURE test_replication_sp2() +BEGIN +CALL test_replication_sp1(); +CALL test_replication_sp1(); +END| +CREATE FUNCTION test_replication_sf() RETURNS DOUBLE DETERMINISTIC +BEGIN +RETURN (rand() + rand()); +END| +CALL test_replication_sp1(); +CALL test_replication_sp2(); +INSERT INTO t1 VALUES (test_replication_sf()); +INSERT INTO t1 VALUES (test_replication_sf()); +INSERT INTO t1 VALUES (test_replication_sf()); +DROP PROCEDURE IF EXISTS test_replication_sp1; +DROP PROCEDURE IF EXISTS test_replication_sp2; +DROP FUNCTION IF EXISTS test_replication_sf; +DROP TABLE IF EXISTS t1; diff --git a/mysql-test/t/rpl_misc_functions.test b/mysql-test/t/rpl_misc_functions.test index 6e0bda90503..43ce3afc8ad 100644 --- a/mysql-test/t/rpl_misc_functions.test +++ b/mysql-test/t/rpl_misc_functions.test @@ -28,10 +28,74 @@ create table t2 like t1; eval load data local infile '$MYSQLTEST_VARDIR/master-data/test/rpl_misc_functions.outfile' into table t2; # compare them with the replica; the SELECT below should return no row select * from t1, t2 where (t1.id=t2.id) and not(t1.i=t2.i and t1.r1=t2.r1 and t1.r2=t2.r2 and t1.p=t2.p); -stop slave; -drop table t1; connection master; drop table t1; # End of 4.1 tests + +# +# BUG#25543 test calling rand() multiple times on the master in +# a stored procedure. +# + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings + +CREATE TABLE t1 (col_a double default NULL); + +DELIMITER |; + +# Use a SP that calls rand() multiple times +CREATE PROCEDURE test_replication_sp1() +BEGIN + INSERT INTO t1 VALUES (rand()), (rand()); + INSERT INTO t1 VALUES (rand()); +END| + +# Use a SP that calls another SP to call rand() multiple times +CREATE PROCEDURE test_replication_sp2() +BEGIN + CALL test_replication_sp1(); + CALL test_replication_sp1(); +END| + +# Use a SF that calls rand() multiple times +CREATE FUNCTION test_replication_sf() RETURNS DOUBLE DETERMINISTIC +BEGIN + RETURN (rand() + rand()); +END| + +DELIMITER ;| + +# Exercise the functions and procedures then compare the results on +# the master to those on the slave. +CALL test_replication_sp1(); +CALL test_replication_sp2(); +INSERT INTO t1 VALUES (test_replication_sf()); +INSERT INTO t1 VALUES (test_replication_sf()); +INSERT INTO t1 VALUES (test_replication_sf()); + +# Record the results of the query on the master +--exec $MYSQL --port=$MASTER_MYPORT test -e "SELECT * FROM test.t1" > $MYSQLTEST_VARDIR/tmp/rpl_rand_master.sql + +--sync_slave_with_master + +# Record the results of the query on the slave +--exec $MYSQL --port=$SLAVE_MYPORT test -e "SELECT * FROM test.t1" > $MYSQLTEST_VARDIR/tmp/rpl_rand_slave.sql + +# Compare the results from the master to the slave. +--exec diff $MYSQLTEST_VARDIR/tmp/rpl_rand_master.sql $MYSQLTEST_VARDIR/tmp/rpl_rand_slave.sql + +# Cleanup +--disable_warnings +DROP PROCEDURE IF EXISTS test_replication_sp1; +DROP PROCEDURE IF EXISTS test_replication_sp2; +DROP FUNCTION IF EXISTS test_replication_sf; +DROP TABLE IF EXISTS t1; +--enable_warnings + +# If all is good, when can cleanup our dump files. +--system rm $MYSQLTEST_VARDIR/tmp/rpl_rand_master.sql +--system rm $MYSQLTEST_VARDIR/tmp/rpl_rand_slave.sql diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 3c91e3e8fb7..adb7323c463 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -575,6 +575,18 @@ void THD::cleanup_after_query() clear_next_insert_id= 0; next_insert_id= 0; } + /* + Reset rand_used so that detection of calls to rand() will save random + seeds if needed by the slave. + + Do not reset rand_used if inside a stored function or trigger because + only the call to these operations is logged. Thus only the calling + statement needs to detect rand() calls made by its substatements. These + substatements must not set rand_used to 0 because it would remove the + detection of rand() by the calling statement. + */ + if (!in_sub_stmt) + rand_used= 0; /* Free Items that were created during this execution */ free_items(); /* Reset where. */ From 685d21b72f201a2eb16718e73c76e62ee708458d Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 22 Mar 2007 15:07:32 +0100 Subject: [PATCH 2/7] - renaming TMP_TABLE to NON_TRANSACTIONAL_TMP_TABLE because this is what it actually means (Monty approved the renaming) - correcting description of transaction_alloc command-line options (our manual is correct) - fix for a failure of rpl_trigger. mysql-test/t/rpl_misc_functions.test: test was cleaning up only on slave, but it's also needed on master, otherwise it influences rpl_trigger.test sql/lock.cc: clearer name sql/mysqld.cc: I checked the code that those two variables are not about binlogging but about the size of the transaction's memroot which is used to create savepoint structures and to store list of tables to be invalidated (for NDB). The manual has a correct description, no need to fix it. sql/sql_base.cc: clearer name sql/sql_derived.cc: clearer name sql/sql_select.cc: clearer name sql/table.h: clearer name: TMP_TABLE is used for non-transactional tables. --- mysql-test/t/rpl_misc_functions.test | 2 ++ sql/lock.cc | 8 ++++---- sql/mysqld.cc | 4 ++-- sql/sql_base.cc | 2 +- sql/sql_derived.cc | 2 +- sql/sql_select.cc | 2 +- sql/table.h | 3 ++- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/mysql-test/t/rpl_misc_functions.test b/mysql-test/t/rpl_misc_functions.test index 43ce3afc8ad..f00beff583a 100644 --- a/mysql-test/t/rpl_misc_functions.test +++ b/mysql-test/t/rpl_misc_functions.test @@ -89,12 +89,14 @@ INSERT INTO t1 VALUES (test_replication_sf()); --exec diff $MYSQLTEST_VARDIR/tmp/rpl_rand_master.sql $MYSQLTEST_VARDIR/tmp/rpl_rand_slave.sql # Cleanup +connection master; --disable_warnings DROP PROCEDURE IF EXISTS test_replication_sp1; DROP PROCEDURE IF EXISTS test_replication_sp2; DROP FUNCTION IF EXISTS test_replication_sf; DROP TABLE IF EXISTS t1; --enable_warnings +--sync_slave_with_master # If all is good, when can cleanup our dump files. --system rm $MYSQLTEST_VARDIR/tmp/rpl_rand_master.sql diff --git a/sql/lock.cc b/sql/lock.cc index bf1512b754c..233d12d9cc4 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -544,7 +544,7 @@ TABLE_LIST *mysql_lock_have_duplicate(THD *thd, TABLE_LIST *needle, goto end; /* A temporary table does not have locks. */ - if (table->s->tmp_table == TMP_TABLE) + if (table->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE) goto end; /* Get command lock or LOCK TABLES lock. Maybe empty for INSERT DELAYED. */ @@ -569,7 +569,7 @@ TABLE_LIST *mysql_lock_have_duplicate(THD *thd, TABLE_LIST *needle, if (haystack->placeholder()) continue; table2= haystack->table; - if (table2->s->tmp_table == TMP_TABLE) + if (table2->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE) continue; /* All tables in list must be in lock. */ @@ -655,7 +655,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, *write_lock_used=0; for (i=tables=lock_count=0 ; i < count ; i++) { - if (table_ptr[i]->s->tmp_table != TMP_TABLE) + if (table_ptr[i]->s->tmp_table != NON_TRANSACTIONAL_TMP_TABLE) { tables+=table_ptr[i]->file->lock_count(); lock_count++; @@ -697,7 +697,7 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, TABLE *table; enum thr_lock_type lock_type; - if ((table=table_ptr[i])->s->tmp_table == TMP_TABLE) + if ((table=table_ptr[i])->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE) continue; lock_type= table->reginfo.lock_type; if (lock_type >= TL_WRITE_ALLOW_WRITE) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 9a7928b214f..0237ed144e8 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -6120,12 +6120,12 @@ The minimum value for this variable is 4096.", (gptr*) &max_system_variables.tmp_table_size, 0, GET_ULL, REQUIRED_ARG, 32*1024*1024L, 1024, MAX_MEM_TABLE_SIZE, 0, 1, 0}, {"transaction_alloc_block_size", OPT_TRANS_ALLOC_BLOCK_SIZE, - "Allocation block size for transactions to be stored in binary log", + "Allocation block size for various transaction-related structures", (gptr*) &global_system_variables.trans_alloc_block_size, (gptr*) &max_system_variables.trans_alloc_block_size, 0, GET_ULONG, REQUIRED_ARG, QUERY_ALLOC_BLOCK_SIZE, 1024, ~0L, 0, 1024, 0}, {"transaction_prealloc_size", OPT_TRANS_PREALLOC_SIZE, - "Persistent buffer for transactions to be stored in binary log", + "Persistent buffer for various transaction-related structures", (gptr*) &global_system_variables.trans_prealloc_size, (gptr*) &max_system_variables.trans_prealloc_size, 0, GET_ULONG, REQUIRED_ARG, TRANS_ALLOC_PREALLOC_SIZE, 1024, ~0L, 0, 1024, 0}, diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 77bb1d9642b..e8cb3ae675d 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -2950,7 +2950,7 @@ TABLE *open_temporary_table(THD *thd, const char *path, const char *db, share= tmp_table->s; tmp_table->reginfo.lock_type=TL_WRITE; // Simulate locked share->tmp_table= (tmp_table->file->has_transactions() ? - TRANSACTIONAL_TMP_TABLE : TMP_TABLE); + TRANSACTIONAL_TMP_TABLE : NON_TRANSACTIONAL_TMP_TABLE); share->table_cache_key= (char*) (tmp_table+1); share->db= share->table_cache_key; share->key_length= (uint) (strmov(((char*) (share->table_name= diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index cd46f3bcc0e..84622398f6f 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -179,7 +179,7 @@ exit: orig_table_list->table_name= (char*) table->s->table_name; orig_table_list->table_name_length= strlen((char*)table->s->table_name); table->derived_select_number= first_select->select_number; - table->s->tmp_table= TMP_TABLE; + table->s->tmp_table= NON_TRANSACTIONAL_TMP_TABLE; #ifndef NO_EMBEDDED_ACCESS_CHECKS if (orig_table_list->referencing_view) table->grant= orig_table_list->grant; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 9fe92d63da3..433aef68e25 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -9165,7 +9165,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List &fields, table->s->table_name= table->s->path= tmpname; table->s->db= ""; table->s->blob_ptr_size= mi_portable_sizeof_char_ptr; - table->s->tmp_table= TMP_TABLE; + table->s->tmp_table= NON_TRANSACTIONAL_TMP_TABLE; table->s->db_low_byte_first=1; // True for HEAP and MyISAM table->s->table_charset= param->table_charset; table->s->keys_for_keyread.init(); diff --git a/sql/table.h b/sql/table.h index e2bd5ba0a7d..5fc73b22d2d 100644 --- a/sql/table.h +++ b/sql/table.h @@ -55,7 +55,8 @@ typedef struct st_grant_info ulong orig_want_privilege; } GRANT_INFO; -enum tmp_table_type {NO_TMP_TABLE=0, TMP_TABLE=1, TRANSACTIONAL_TMP_TABLE=2, +enum tmp_table_type {NO_TMP_TABLE=0, + NON_TRANSACTIONAL_TMP_TABLE=1, TRANSACTIONAL_TMP_TABLE=2, SYSTEM_TMP_TABLE=3}; enum frm_type_enum From 916245f9c41c3f6170e384ed6d9241ed79ca781e Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 27 Mar 2007 12:20:20 +0500 Subject: [PATCH 3/7] Bug#27079 Crash while grouping empty ucs2 strings Problem: GROUP BY on empty ucs2 strings crashed server. Reason: sometimes mi_unique_hash() is executed with ptr=null and length=0, which means "empty string". The branch of code handling UCS2 character set was not safe against ptr=null and fell into and endless loop even if length=0 because of poiter arithmetic overflow. Fix: adding special check for length=0 to avoid pointer arithmetic overflow. mysql-test/r/ctype_uca.result: Adding test case mysql-test/t/ctype_uca.test: Adding test case strings/ctype-uca.c: Fix my_uca_scanner_init_ucs2 to be safe against strings with length=0 and ptr=0. --- mysql-test/r/ctype_uca.result | 9 +++++++++ mysql-test/t/ctype_uca.test | 10 ++++++++++ strings/ctype-uca.c | 34 +++++++++++++++++++++++++++------- 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result index 3e286c77c00..1fd1493bf1e 100644 --- a/mysql-test/r/ctype_uca.result +++ b/mysql-test/r/ctype_uca.result @@ -2654,3 +2654,12 @@ ii 2 ii 2 İİ 4 İİ 4 ii 2 İİ 4 II 2 ıı 4 II 2 DROP TABLE t1; +CREATE TABLE t1 ( +c1 text character set ucs2 collate ucs2_polish_ci NOT NULL +) ENGINE=MyISAM; +insert into t1 values (''),('a'); +SELECT COUNT(*), c1 FROM t1 GROUP BY c1; +COUNT(*) c1 +1 +1 a +DROP TABLE IF EXISTS t1; diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test index 3e49b9de883..64349bc40a6 100644 --- a/mysql-test/t/ctype_uca.test +++ b/mysql-test/t/ctype_uca.test @@ -475,3 +475,13 @@ ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf8 collate utf8_turkish_ci; SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu FROM t1 ORDER BY id; DROP TABLE t1; + +# +# Bug #27079 Crash while grouping empty ucs2 strings +# +CREATE TABLE t1 ( + c1 text character set ucs2 collate ucs2_polish_ci NOT NULL +) ENGINE=MyISAM; +insert into t1 values (''),('a'); +SELECT COUNT(*), c1 FROM t1 GROUP BY c1; +DROP TABLE IF EXISTS t1; diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 1292d7f5ede..3aad36f858c 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -6744,7 +6744,7 @@ typedef struct my_uca_scanner_handler_st int (*next)(my_uca_scanner *scanner); } my_uca_scanner_handler; -static uint16 nochar[]= {0}; +static uint16 nochar[]= {0,0}; #ifdef HAVE_CHARSET_ucs2 @@ -6769,13 +6769,33 @@ static void my_uca_scanner_init_ucs2(my_uca_scanner *scanner, CHARSET_INFO *cs __attribute__((unused)), const uchar *str, uint length) { - /* Note, no needs to initialize scanner->wbeg */ - scanner->sbeg= str; - scanner->send= str + length - 2; scanner->wbeg= nochar; - scanner->uca_length= cs->sort_order; - scanner->uca_weight= cs->sort_order_big; - scanner->contractions= cs->contractions; + if (length) + { + scanner->sbeg= str; + scanner->send= str + length - 2; + scanner->uca_length= cs->sort_order; + scanner->uca_weight= cs->sort_order_big; + scanner->contractions= cs->contractions; + } + else + { + /* + Sometimes this function is called with + str=NULL and length=0, which should be + considered as an empty string. + + The above initialization is unsafe for such cases, + because scanner->send is initialized to (NULL-2), which is 0xFFFFFFFE. + Then we fall into an endless loop in my_uca_scanner_next_ucs2(). + + Do special initialization for the case when length=0. + Initialize scanner->sbeg to an address greater than scanner->send. + Next call of my_uca_scanner_next_ucs2() will correctly return with -1. + */ + scanner->sbeg= (uchar*) &nochar[1]; + scanner->send= (uchar*) &nochar[0]; + } } From 27b333b75c989ed820b8b10612c96cfe9f887822 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 27 Mar 2007 13:30:43 +0500 Subject: [PATCH 4/7] Bug#25946 Namespace not include for xsi usage within --xml output with null/nil values Fix: adding namespace reference into "mysql --xml" output, to make it work similary to "mysqldump --xml". client/mysql.cc: Adding namespace reference. mysql-test/r/client_xml.result: Fixing test results --- client/mysql.cc | 3 ++- mysql-test/r/client_xml.result | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/client/mysql.cc b/client/mysql.cc index 2faa2f31a50..510420fdf3d 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -2529,7 +2529,8 @@ print_table_data_xml(MYSQL_RES *result) tee_fputs("\n\n", PAGER); + tee_fputs("\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">", + PAGER); fields = mysql_fetch_fields(result); while ((cur = mysql_fetch_row(result))) diff --git a/mysql-test/r/client_xml.result b/mysql-test/r/client_xml.result index 7395b2433e8..6a148954fcd 100644 --- a/mysql-test/r/client_xml.result +++ b/mysql-test/r/client_xml.result @@ -7,7 +7,7 @@ insert into t1 values (1, 2, 'a&b ab'); +" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> 1 2 @@ -34,7 +34,7 @@ insert into t1 values (1, 2, 'a&b ab'); +" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> 1 @@ -42,7 +42,7 @@ insert into t1 values (1, 2, 'a&b ab'); +" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> 1 @@ -50,7 +50,7 @@ insert into t1 values (1, 2, 'a&b ab'); +" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> 0 @@ -58,7 +58,7 @@ insert into t1 values (1, 2, 'a&b ab'); +" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> 1 @@ -66,7 +66,7 @@ insert into t1 values (1, 2, 'a&b ab'); +" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> From b1c23f112f00a7e9ab05c69dbf314cea2466ba98 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 27 Mar 2007 15:06:41 +0500 Subject: [PATCH 5/7] Bug#22378 Make error, strings/ctype-utf8.c, uni_plane undeclared - Fixing utf8_general_cs according to recent changes. - Compiling utf8_general_cs in pentium-debug-max configuration to avoid these problems in the future. BUILD/compile-pentium-debug-max: Enable compiling of experimental collations in compile-pentium-debug-max config/ac-macros/character_sets.m4: Adding hidden flag --with-experimental-collations, not seen in "configure --help". strings/ctype-utf8.c: Compilation failure changes: catching up with previous character set changes: - uni_plane is now not a global variables - adding new parameter into my_strnncollsp_utf8_cs - adding my_strnxfrm_len into MY_COLLATION_HANDLER for utf8_general_cs --- BUILD/compile-pentium-debug-max | 2 +- config/ac-macros/character_sets.m4 | 13 +++++++++++++ strings/ctype-utf8.c | 7 ++++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/BUILD/compile-pentium-debug-max b/BUILD/compile-pentium-debug-max index 7a11ad24c44..a69513ac6bb 100755 --- a/BUILD/compile-pentium-debug-max +++ b/BUILD/compile-pentium-debug-max @@ -6,6 +6,6 @@ path=`dirname $0` extra_flags="$pentium_cflags $debug_cflags $max_cflags" c_warnings="$c_warnings $debug_extra_warnings" cxx_warnings="$cxx_warnings $debug_extra_warnings" -extra_configs="$pentium_configs $debug_configs $max_configs" +extra_configs="$pentium_configs $debug_configs $max_configs --with-experimental-collations" . "$path/FINISH.sh" diff --git a/config/ac-macros/character_sets.m4 b/config/ac-macros/character_sets.m4 index 1ab6e7dd780..8c3e8ca73b7 100644 --- a/config/ac-macros/character_sets.m4 +++ b/config/ac-macros/character_sets.m4 @@ -429,3 +429,16 @@ then else AC_MSG_RESULT(no) fi + + +# Shall we build experimental collations +AC_ARG_WITH(experimental-collations, + [], + [with_exp_coll=$withval], + [with_exp_coll=no] +) + +if test "$with_exp_coll" = "yes" +then + AC_DEFINE([HAVE_UTF8_GENERAL_CS], [1], [certain Japanese customer]) +fi diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 0e28ff7e342..387ce16a43d 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2764,6 +2764,7 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, const uchar *te=t+tlen; int save_diff = 0; int diff; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; while ( s < se && t < te ) { @@ -2800,13 +2801,16 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, const uchar *s, uint slen, - const uchar *t, uint tlen) + const uchar *t, uint tlen, + my_bool diff_if_only_endspace_difference + __attribute__((unused))) { int s_res,t_res; my_wc_t s_wc,t_wc; const uchar *se= s+slen; const uchar *te= t+tlen; int save_diff = 0; + MY_UNICASE_INFO **uni_plane= cs->caseinfo; while ( s < se && t < te ) { @@ -2875,6 +2879,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler = my_strnncoll_utf8_cs, my_strnncollsp_utf8_cs, my_strnxfrm_utf8, + my_strnxfrmlen_utf8, my_like_range_simple, my_wildcmp_mb, my_strcasecmp_utf8, From b5cc4fa61d615800ab13d428fae0296dbef82b81 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 28 Mar 2007 18:57:30 +0500 Subject: [PATCH 6/7] Bug#22638 SOUNDEX broken for international characters Problem: SOUNDEX returned an invalid string for international characters in multi-byte character sets. For example: for a Chinese/Japanese 3-byte long character _utf8 0xE99885 it took only the very first byte 0xE9, put it into the outout string and then appended with three DIGIT ZERO characters, so the result was 0xE9303030 - which is an invalide utf8 string. Fix: make SOUNDEX() multi-byte aware and - put only complete characters into result, thus return only valid strings. This patch also makes SOUNDEX() compatible with UCS2. mysql-test/r/ctype_ucs.result: Adding tests mysql-test/r/ctype_utf8.result: Adding tests mysql-test/t/ctype_ucs.test: Adding tests mysql-test/t/ctype_utf8.test: Adding tests sql/item_strfunc.cc: Making soundex multi-byte aware. --- mysql-test/r/ctype_ucs.result | 18 +++++ mysql-test/r/ctype_utf8.result | 12 +++ mysql-test/t/ctype_ucs.test | 14 ++++ mysql-test/t/ctype_utf8.test | 8 ++ sql/item_strfunc.cc | 133 ++++++++++++++++++++++++++------- 5 files changed, 157 insertions(+), 28 deletions(-) diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result index e32c1e8aae0..960953b3c5e 100644 --- a/mysql-test/r/ctype_ucs.result +++ b/mysql-test/r/ctype_ucs.result @@ -839,6 +839,24 @@ lily river drop table t1; deallocate prepare stmt; +set names latin1; +set character_set_connection=ucs2; +select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb'); +soundex('') soundex('he') soundex('hello all folks') soundex('#3556 in bugdb') + H000 H4142 I51231 +select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb')); +hex(soundex('')) hex(soundex('he')) hex(soundex('hello all folks')) hex(soundex('#3556 in bugdb')) + 0048003000300030 00480034003100340032 004900350031003200330031 +select 'mood' sounds like 'mud'; +'mood' sounds like 'mud' +1 +select hex(soundex(_ucs2 0x041004110412)); +hex(soundex(_ucs2 0x041004110412)) +0410003000300030 +select hex(soundex(_ucs2 0x00BF00C0)); +hex(soundex(_ucs2 0x00BF00C0)) +00C0003000300030 +set names latin1; create table t1(a blob, b text charset utf8, c text charset ucs2); select data_type, character_octet_length, character_maximum_length from information_schema.columns where table_name='t1'; diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index be1e1742ba6..1c6bc0e05b6 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -854,6 +854,18 @@ select * from t1 where soundex(a) = soundex('test'); id a 1 Test drop table t1; +select soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB); +soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB) +阅000 +select hex(soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)); +hex(soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)) +E99885303030 +select soundex(_utf8 0xD091D092D093); +soundex(_utf8 0xD091D092D093) +Б000 +select hex(soundex(_utf8 0xD091D092D093)); +hex(soundex(_utf8 0xD091D092D093)) +D091303030 SET collation_connection='utf8_general_ci'; create table t1 select repeat('a',4000) a; delete from t1; diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test index 5a3720dc431..c3320159c41 100644 --- a/mysql-test/t/ctype_ucs.test +++ b/mysql-test/t/ctype_ucs.test @@ -572,6 +572,20 @@ select utext from t1 where utext like '%%'; drop table t1; deallocate prepare stmt; +# +# Bug#22638 SOUNDEX broken for international characters +# +set names latin1; +set character_set_connection=ucs2; +select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb'); +select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb')); +select 'mood' sounds like 'mud'; +# Cyrillic A, BE, VE +select hex(soundex(_ucs2 0x041004110412)); +# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter +select hex(soundex(_ucs2 0x00BF00C0)); +set names latin1; + # # Bug #14290: character_maximum_length for text fields # diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 04b7ec78842..79b73fc7880 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -702,6 +702,14 @@ select * from t1 where soundex(a) = soundex('TEST'); select * from t1 where soundex(a) = soundex('test'); drop table t1; +# +# Bug#22638 SOUNDEX broken for international characters +# +select soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB); +select hex(soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)); +select soundex(_utf8 0xD091D092D093); +select hex(soundex(_utf8 0xD091D092D093)); + SET collation_connection='utf8_general_ci'; -- source include/ctype_filesort.inc diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 6b1921e5bc8..03887629519 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1805,7 +1805,8 @@ void Item_func_soundex::fix_length_and_dec() { collation.set(args[0]->collation); max_length=args[0]->max_length; - set_if_bigger(max_length,4); + set_if_bigger(max_length, 4 * collation.collation->mbminlen); + tmp_value.set_charset(collation.collation); } @@ -1815,14 +1816,15 @@ void Item_func_soundex::fix_length_and_dec() else return 0 */ -static char soundex_toupper(char ch) +static int soundex_toupper(int ch) { return (ch >= 'a' && ch <= 'z') ? ch - 'a' + 'A' : ch; } -static char get_scode(char *ptr) + +static char get_scode(int wc) { - uchar ch= soundex_toupper(*ptr); + int ch= soundex_toupper(wc); if (ch < 'A' || ch > 'Z') { // Thread extended alfa (country spec) @@ -1832,46 +1834,121 @@ static char get_scode(char *ptr) } +static bool my_uni_isalpha(int wc) +{ + /* + Return true for all Basic Latin letters: a..z A..Z. + Return true for all Unicode characters with code higher than U+00C0: + - characters between 'z' and U+00C0 are controls and punctuations. + - "U+00C0 LATIN CAPITAL LETTER A WITH GRAVE" is the first letter after 'z'. + */ + return (wc >= 'a' && wc <= 'z') || + (wc >= 'A' && wc <= 'Z') || + (wc >= 0xC0); +} + + String *Item_func_soundex::val_str(String *str) { DBUG_ASSERT(fixed == 1); String *res =args[0]->val_str(str); char last_ch,ch; CHARSET_INFO *cs= collation.collation; + my_wc_t wc; + uint nchars; + int rc; - if ((null_value=args[0]->null_value)) + if ((null_value= args[0]->null_value)) return 0; /* purecov: inspected */ - if (tmp_value.alloc(max(res->length(),4))) + if (tmp_value.alloc(max(res->length(), 4 * cs->mbminlen))) return str; /* purecov: inspected */ char *to= (char *) tmp_value.ptr(); - char *from= (char *) res->ptr(), *end=from+res->length(); - tmp_value.set_charset(cs); + char *to_end= to + tmp_value.alloced_length(); + char *from= (char *) res->ptr(), *end= from + res->length(); - while (from != end && !my_isalpha(cs,*from)) // Skip pre-space - from++; /* purecov: inspected */ - if (from == end) - return &my_empty_string; // No alpha characters. - *to++ = soundex_toupper(*from); // Copy first letter - last_ch = get_scode(from); // code of the first letter - // for the first 'double-letter check. - // Loop on input letters until - // end of input (null) or output - // letter code count = 3 - for (from++ ; from < end ; from++) + for ( ; ; ) /* Skip pre-space */ { - if (!my_isalpha(cs,*from)) - continue; - ch=get_scode(from); + if ((rc= cs->cset->mb_wc(cs, &wc, (uchar*) from, (uchar*) end)) <= 0) + return &my_empty_string; /* EOL or invalid byte sequence */ + + if (rc == 1 && cs->ctype) + { + /* Single byte letter found */ + if (my_isalpha(cs, *from)) + { + last_ch= get_scode(*from); // Code of the first letter + *to++= soundex_toupper(*from++); // Copy first letter + break; + } + from++; + } + else + { + from+= rc; + if (my_uni_isalpha(wc)) + { + /* Multibyte letter found */ + wc= soundex_toupper(wc); + last_ch= get_scode(wc); // Code of the first letter + if ((rc= cs->cset->wc_mb(cs, wc, (uchar*) to, (uchar*) to_end)) <= 0) + { + /* Extra safety - should not really happen */ + DBUG_ASSERT(false); + return &my_empty_string; + } + to+= rc; + break; + } + } + } + + /* + last_ch is now set to the first 'double-letter' check. + loop on input letters until end of input + */ + for (nchars= 1 ; ; ) + { + if ((rc= cs->cset->mb_wc(cs, &wc, (uchar*) from, (uchar*) end)) <= 0) + break; /* EOL or invalid byte sequence */ + + if (rc == 1 && cs->ctype) + { + if (!my_isalpha(cs, *from++)) + continue; + } + else + { + from+= rc; + if (!my_uni_isalpha(wc)) + continue; + } + + ch= get_scode(wc); if ((ch != '0') && (ch != last_ch)) // if not skipped or double { - *to++ = ch; // letter, copy to output - last_ch = ch; // save code of last input letter - } // for next double-letter check + // letter, copy to output + if ((rc= cs->cset->wc_mb(cs, (my_wc_t) ch, + (uchar*) to, (uchar*) to_end)) <= 0) + { + // Extra safety - should not really happen + DBUG_ASSERT(false); + break; + } + to+= rc; + nchars++; + last_ch= ch; // save code of last input letter + } // for next double-letter check } - for (end=(char*) tmp_value.ptr()+4 ; to < end ; to++) - *to = '0'; - *to=0; // end string + + /* Pad up to 4 characters with DIGIT ZERO, if the string is shorter */ + if (nchars < 4) + { + uint nbytes= (4 - nchars) * cs->mbminlen; + cs->cset->fill(cs, to, nbytes, '0'); + to+= nbytes; + } + tmp_value.length((uint) (to-tmp_value.ptr())); return &tmp_value; } From ee0475bf5d5b9256d599aa6b9dcc641d4857d301 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 29 Mar 2007 10:32:38 +0500 Subject: [PATCH 7/7] Code layout fix for bug N 27079 Thanks to Gluh for suggestion. --- strings/ctype-uca.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 3aad36f858c..1263882846d 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -6777,25 +6777,24 @@ static void my_uca_scanner_init_ucs2(my_uca_scanner *scanner, scanner->uca_length= cs->sort_order; scanner->uca_weight= cs->sort_order_big; scanner->contractions= cs->contractions; + return; } - else - { - /* - Sometimes this function is called with - str=NULL and length=0, which should be - considered as an empty string. + + /* + Sometimes this function is called with + str=NULL and length=0, which should be + considered as an empty string. + + The above initialization is unsafe for such cases, + because scanner->send is initialized to (NULL-2), which is 0xFFFFFFFE. + Then we fall into an endless loop in my_uca_scanner_next_ucs2(). - The above initialization is unsafe for such cases, - because scanner->send is initialized to (NULL-2), which is 0xFFFFFFFE. - Then we fall into an endless loop in my_uca_scanner_next_ucs2(). - - Do special initialization for the case when length=0. - Initialize scanner->sbeg to an address greater than scanner->send. - Next call of my_uca_scanner_next_ucs2() will correctly return with -1. - */ - scanner->sbeg= (uchar*) &nochar[1]; - scanner->send= (uchar*) &nochar[0]; - } + Do special initialization for the case when length=0. + Initialize scanner->sbeg to an address greater than scanner->send. + Next call of my_uca_scanner_next_ucs2() will correctly return with -1. + */ + scanner->sbeg= (uchar*) &nochar[1]; + scanner->send= (uchar*) &nochar[0]; }