diff --git a/mysql-test/r/statistics.result b/mysql-test/r/statistics.result index 9291b945193..20469c01a2e 100644 --- a/mysql-test/r/statistics.result +++ b/mysql-test/r/statistics.result @@ -1511,4 +1511,43 @@ test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0000FF3FFF7FFFBFFFFF set histogram_size=default; set histogram_type=default; drop table t1; +# +# Bug mdev-4369: histogram for a column with many distinct values +# +CREATE TABLE t1 (id int); +CREATE TABLE t2 (id int); +INSERT INTO t1 (id) VALUES (1), (1), (1),(1); +INSERT INTO t1 (id) SELECT id FROM t1; +INSERT INTO t1 SELECT id+1 FROM t1; +INSERT INTO t1 SELECT id+2 FROM t1; +INSERT INTO t1 SELECT id+4 FROM t1; +INSERT INTO t1 SELECT id+8 FROM t1; +INSERT INTO t1 SELECT id+16 FROM t1; +INSERT INTO t1 SELECT id+32 FROM t1; +INSERT INTO t1 SELECT id+64 FROM t1; +INSERT INTO t1 SELECT id+128 FROM t1; +INSERT INTO t1 SELECT id+256 FROM t1; +INSERT INTO t1 SELECT id+512 FROM t1; +INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand(); +SELECT COUNT(*) FROM t2; +COUNT(*) +8192 +SELECT COUNT(DISTINCT id) FROM t2; +COUNT(DISTINCT id) +1024 +set @@tmp_table_size=1024*16; +set @@max_heap_table_size=1024*16; +set histogram_size=63; +analyze table t2 persistent for all; +Table Op Msg_type Msg_text +test.t2 analyze status OK +select db_name, table_name, column_name, +min_value, max_value, +nulls_ratio, avg_frequency, +hist_size, hist_type, HEX(histogram) +FROM mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram) +test t2 id 1 1024 0.0000 8.0000 63 SINGLE_PREC_HB 03070B0F13171B1F23272B2F33373B3F43474B4F53575B5F63676B6F73777B7F83878B8F93979B9FA3A7ABAFB3B7BBBFC3C7CBCFD3D7DBDFE3E7EBEFF3F7FB +set histogram_size=default; +drop table t1, t2; set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/t/statistics.test b/mysql-test/t/statistics.test index 4d11e11f033..b2a052fd3e8 100644 --- a/mysql-test/t/statistics.test +++ b/mysql-test/t/statistics.test @@ -638,5 +638,48 @@ set histogram_type=default; drop table t1; +--echo # +--echo # Bug mdev-4369: histogram for a column with many distinct values +--echo # + + +CREATE TABLE t1 (id int); +CREATE TABLE t2 (id int); + +INSERT INTO t1 (id) VALUES (1), (1), (1),(1); +INSERT INTO t1 (id) SELECT id FROM t1; +INSERT INTO t1 SELECT id+1 FROM t1; +INSERT INTO t1 SELECT id+2 FROM t1; +INSERT INTO t1 SELECT id+4 FROM t1; +INSERT INTO t1 SELECT id+8 FROM t1; +INSERT INTO t1 SELECT id+16 FROM t1; +INSERT INTO t1 SELECT id+32 FROM t1; +INSERT INTO t1 SELECT id+64 FROM t1; +INSERT INTO t1 SELECT id+128 FROM t1; +INSERT INTO t1 SELECT id+256 FROM t1; +INSERT INTO t1 SELECT id+512 FROM t1; + +INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand(); + +SELECT COUNT(*) FROM t2; +SELECT COUNT(DISTINCT id) FROM t2; + +set @@tmp_table_size=1024*16; +set @@max_heap_table_size=1024*16; + +set histogram_size=63; + +analyze table t2 persistent for all; + +select db_name, table_name, column_name, + min_value, max_value, + nulls_ratio, avg_frequency, + hist_size, hist_type, HEX(histogram) + FROM mysql.column_stats; + +set histogram_size=default; + +drop table t1, t2; + set use_stat_tables=@save_use_stat_tables; diff --git a/sql/sql_class.h b/sql/sql_class.h index 599f59c7c27..286fbc4f81b 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3997,6 +3997,7 @@ class Unique :public Sql_alloc uint size; uint full_size; uint min_dupl_count; /* always 0 for unions, > 0 for intersections */ + bool with_counters; bool merge(TABLE *table, uchar *buff, bool without_last_merge); diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 6e476b34e0d..37c3a93ee08 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1348,7 +1348,7 @@ public: tree_key_length= field->pack_length(); tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field, - tree_key_length, max_heap_table_size); + tree_key_length, max_heap_table_size, 1); } virtual ~Count_distinct_field() @@ -1435,7 +1435,7 @@ public: tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, (void*) &tree_key_length, - tree_key_length, max_heap_table_size); + tree_key_length, max_heap_table_size, 1); } bool add() diff --git a/sql/uniques.cc b/sql/uniques.cc index 9fa06311ece..0c1c34d495b 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -86,6 +86,7 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, full_size= size; if (min_dupl_count_arg) full_size+= sizeof(element_count); + with_counters= test(min_dupl_count_arg); my_b_clear(&file); init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC)); @@ -428,6 +429,22 @@ static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2) C_MODE_END +inline +element_count get_counter_from_merged_element(void *ptr, uint ofs) +{ + element_count cnt; + memcpy((uchar *) &cnt, (uchar *) ptr + ofs, sizeof(element_count)); + return cnt; +} + + +inline +void put_counter_into_merged_element(void *ptr, uint ofs, element_count cnt) +{ + memcpy((uchar *) ptr + ofs, (uchar *) &cnt, sizeof(element_count)); +} + + /* DESCRIPTION @@ -457,6 +474,8 @@ C_MODE_END file file with all trees dumped. Trees in the file must contain sorted unique values. Cache must be initialized in read mode. + with counters take into account counters for equal merged + elements RETURN VALUE 0 ok <> 0 error @@ -466,7 +485,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, uint key_length, BUFFPEK *begin, BUFFPEK *end, tree_walk_action walk_action, void *walk_action_arg, qsort_cmp2 compare, void *compare_arg, - IO_CACHE *file) + IO_CACHE *file, bool with_counters) { BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg }; QUEUE queue; @@ -485,6 +504,8 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, uint bytes_read; /* to hold return value of read_to_buffer */ BUFFPEK *top; int res= 1; + uint cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0); + element_count cnt; /* Invariant: queue must contain top element from each tree, until a tree is not completely walked through. @@ -543,9 +564,17 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, /* new top has been obtained; if old top is unique, apply the action */ if (compare(compare_arg, old_key, top->key)) { - if (walk_action(old_key, 1, walk_action_arg)) + cnt= with_counters ? + get_counter_from_merged_element(old_key, cnt_ofs) : 1; + if (walk_action(old_key, cnt, walk_action_arg)) goto end; } + else if (with_counters) + { + cnt= get_counter_from_merged_element(top->key, cnt_ofs); + cnt+= get_counter_from_merged_element(old_key, cnt_ofs); + put_counter_into_merged_element(top->key, cnt_ofs, cnt); + } } /* Applying walk_action to the tail of the last tree: this is safe because @@ -556,7 +585,10 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, { do { - if (walk_action(top->key, 1, walk_action_arg)) + + cnt= with_counters ? + get_counter_from_merged_element(top->key, cnt_ofs) : 1; + if (walk_action(top->key, cnt, walk_action_arg)) goto end; top->key+= key_length; } @@ -620,7 +652,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) (BUFFPEK *) file_ptrs.buffer, (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements, action, walk_action_arg, - tree.compare, tree.custom_arg, &file); + tree.compare, tree.custom_arg, &file, with_counters); } my_free(merge_buffer); return res;