WL#926 "AVG(DISTINCT) and other distincts", part 2 (out of 3): clean up
Item_sum_count_distinct, and deploy Unique for use with COUNT(DISTINCT) if there is no blob column in the list of DISTINCT arguments.
This commit is contained in:
parent
98e83555a5
commit
6a2ef5577c
@ -116,7 +116,7 @@ count(distinct n)
|
|||||||
5000
|
5000
|
||||||
show status like 'Created_tmp_disk_tables';
|
show status like 'Created_tmp_disk_tables';
|
||||||
Variable_name Value
|
Variable_name Value
|
||||||
Created_tmp_disk_tables 1
|
Created_tmp_disk_tables 0
|
||||||
drop table t1;
|
drop table t1;
|
||||||
create table t1 (s text);
|
create table t1 (s text);
|
||||||
flush status;
|
flush status;
|
||||||
|
@ -881,3 +881,10 @@ SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
|
|||||||
MAX(id)
|
MAX(id)
|
||||||
NULL
|
NULL
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
CREATE TABLE t1 (a VARCHAR(400));
|
||||||
|
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
|
||||||
|
("B"), ("b"), ("b "), ("b ");
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
2
|
||||||
|
DROP TABLE t1;
|
||||||
|
@ -98,60 +98,60 @@ DROP TABLE t1;
|
|||||||
CREATE TABLE t1 (id INTEGER);
|
CREATE TABLE t1 (id INTEGER);
|
||||||
CREATE TABLE t2 (id INTEGER);
|
CREATE TABLE t2 (id INTEGER);
|
||||||
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
|
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
|
||||||
INSERT INTO t2 (id) SELECT id FROM t1;
|
INSERT INTO t1 (id) SELECT id FROM t1;
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
|
||||||
/* 8 */
|
/* 8 */
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
INSERT INTO t1 (id) SELECT id FROM t1;
|
||||||
/* 12 */
|
/* 12 */
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
INSERT INTO t1 (id) SELECT id FROM t1;
|
||||||
/* 16 */
|
/* 16 */
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
INSERT INTO t1 (id) SELECT id FROM t1;
|
||||||
/* 20 */
|
/* 20 */
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
INSERT INTO t1 (id) SELECT id FROM t1;
|
||||||
/* 24 */
|
/* 24 */
|
||||||
DELETE FROM t2;
|
INSERT INTO t1 SELECT id+1 FROM t1;
|
||||||
INSERT INTO t2 (id) SELECT id+1 FROM t1;
|
INSERT INTO t1 SELECT id+2 FROM t1;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
INSERT INTO t1 SELECT id+4 FROM t1;
|
||||||
DELETE FROM t2;
|
INSERT INTO t1 SELECT id+8 FROM t1;
|
||||||
INSERT INTO t2 (id) SELECT id+2 FROM t1;
|
INSERT INTO t1 SELECT id+16 FROM t1;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
INSERT INTO t1 SELECT id+32 FROM t1;
|
||||||
DELETE FROM t2;
|
INSERT INTO t1 SELECT id+64 FROM t1;
|
||||||
INSERT INTO t2 (id) SELECT id+4 FROM t1;
|
INSERT INTO t1 SELECT id+128 FROM t1;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
INSERT INTO t1 SELECT id+256 FROM t1;
|
||||||
DELETE FROM t2;
|
INSERT INTO t1 SELECT id+512 FROM t1;
|
||||||
INSERT INTO t2 (id) SELECT id+8 FROM t1;
|
SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
AVG(DISTINCT id)
|
||||||
DELETE FROM t2;
|
513.5000
|
||||||
INSERT INTO t2 (id) SELECT id+16 FROM t1;
|
508.0000
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
509.0000
|
||||||
DELETE FROM t2;
|
510.0000
|
||||||
INSERT INTO t2 (id) SELECT id+32 FROM t1;
|
511.0000
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
512.0000
|
||||||
DELETE FROM t2;
|
513.0000
|
||||||
INSERT INTO t2 (id) SELECT id+64 FROM t1;
|
514.0000
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
515.0000
|
||||||
DELETE FROM t2;
|
516.0000
|
||||||
INSERT INTO t2 (id) SELECT id+128 FROM t1;
|
517.0000
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
511.5000
|
||||||
DELETE FROM t2;
|
512.5000
|
||||||
INSERT INTO t2 (id) SELECT id+256 FROM t1;
|
SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
SUM(DISTINCT id)/COUNT(DISTINCT id)
|
||||||
DELETE FROM t2;
|
513.50000
|
||||||
INSERT INTO t2 (id) SELECT id+512 FROM t1;
|
508.00000
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
509.00000
|
||||||
DELETE FROM t2;
|
510.00000
|
||||||
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
|
511.00000
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
512.00000
|
||||||
DELETE FROM t2;
|
513.00000
|
||||||
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
|
514.00000
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
515.00000
|
||||||
DELETE FROM t2;
|
516.00000
|
||||||
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
|
517.00000
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
511.50000
|
||||||
DELETE FROM t2;
|
512.50000
|
||||||
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
|
INSERT INTO t1 SELECT id+1024 FROM t1;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
INSERT INTO t1 SELECT id+2048 FROM t1;
|
||||||
DELETE FROM t2;
|
INSERT INTO t1 SELECT id+4096 FROM t1;
|
||||||
|
INSERT INTO t1 SELECT id+8192 FROM t1;
|
||||||
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
|
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
|
||||||
SELECT SUM(DISTINCT id) sm FROM t1;
|
SELECT SUM(DISTINCT id) sm FROM t1;
|
||||||
sm
|
sm
|
||||||
|
@ -591,3 +591,13 @@ INSERT INTO t1 VALUES
|
|||||||
(1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1);
|
(1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1);
|
||||||
SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
|
SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
#
|
||||||
|
# Test that new VARCHAR correctly works with COUNT(DISTINCT)
|
||||||
|
#
|
||||||
|
|
||||||
|
CREATE TABLE t1 (a VARCHAR(400));
|
||||||
|
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
|
||||||
|
("B"), ("b"), ("b "), ("b ");
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
@ -103,64 +103,30 @@ CREATE TABLE t1 (id INTEGER);
|
|||||||
CREATE TABLE t2 (id INTEGER);
|
CREATE TABLE t2 (id INTEGER);
|
||||||
|
|
||||||
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
|
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
|
||||||
INSERT INTO t2 (id) SELECT id FROM t1;
|
INSERT INTO t1 (id) SELECT id FROM t1; /* 8 */
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 8 */
|
INSERT INTO t1 (id) SELECT id FROM t1; /* 12 */
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 12 */
|
INSERT INTO t1 (id) SELECT id FROM t1; /* 16 */
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 16 */
|
INSERT INTO t1 (id) SELECT id FROM t1; /* 20 */
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 20 */
|
INSERT INTO t1 (id) SELECT id FROM t1; /* 24 */
|
||||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 24 */
|
INSERT INTO t1 SELECT id+1 FROM t1;
|
||||||
DELETE FROM t2;
|
INSERT INTO t1 SELECT id+2 FROM t1;
|
||||||
INSERT INTO t2 (id) SELECT id+1 FROM t1;
|
INSERT INTO t1 SELECT id+4 FROM t1;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
INSERT INTO t1 SELECT id+8 FROM t1;
|
||||||
DELETE FROM t2;
|
INSERT INTO t1 SELECT id+16 FROM t1;
|
||||||
INSERT INTO t2 (id) SELECT id+2 FROM t1;
|
INSERT INTO t1 SELECT id+32 FROM t1;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
INSERT INTO t1 SELECT id+64 FROM t1;
|
||||||
DELETE FROM t2;
|
INSERT INTO t1 SELECT id+128 FROM t1;
|
||||||
INSERT INTO t2 (id) SELECT id+4 FROM t1;
|
INSERT INTO t1 SELECT id+256 FROM t1;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
INSERT INTO t1 SELECT id+512 FROM t1;
|
||||||
DELETE FROM t2;
|
|
||||||
INSERT INTO t2 (id) SELECT id+8 FROM t1;
|
# Just test that AVG(DISTINCT) is there
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
|
||||||
DELETE FROM t2;
|
SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
|
||||||
INSERT INTO t2 (id) SELECT id+16 FROM t1;
|
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
INSERT INTO t1 SELECT id+1024 FROM t1;
|
||||||
DELETE FROM t2;
|
INSERT INTO t1 SELECT id+2048 FROM t1;
|
||||||
INSERT INTO t2 (id) SELECT id+32 FROM t1;
|
INSERT INTO t1 SELECT id+4096 FROM t1;
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
INSERT INTO t1 SELECT id+8192 FROM t1;
|
||||||
DELETE FROM t2;
|
|
||||||
INSERT INTO t2 (id) SELECT id+64 FROM t1;
|
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
DELETE FROM t2;
|
|
||||||
INSERT INTO t2 (id) SELECT id+128 FROM t1;
|
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
DELETE FROM t2;
|
|
||||||
INSERT INTO t2 (id) SELECT id+256 FROM t1;
|
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
DELETE FROM t2;
|
|
||||||
INSERT INTO t2 (id) SELECT id+512 FROM t1;
|
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
DELETE FROM t2;
|
|
||||||
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
|
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
DELETE FROM t2;
|
|
||||||
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
|
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
DELETE FROM t2;
|
|
||||||
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
|
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
DELETE FROM t2;
|
|
||||||
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
|
|
||||||
INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
DELETE FROM t2;
|
|
||||||
#INSERT INTO t2 (id) SELECT id+16384 FROM t1;
|
|
||||||
#INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
#DELETE FROM t2;
|
|
||||||
#INSERT INTO t2 (id) SELECT id+32768 FROM t1;
|
|
||||||
#INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
#DELETE FROM t2;
|
|
||||||
#INSERT INTO t2 (id) SELECT id+65536 FROM t1;
|
|
||||||
#INSERT INTO t1 SELECT id FROM t2;
|
|
||||||
#DELETE FROM t2;
|
|
||||||
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
|
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
|
||||||
|
|
||||||
# SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';
|
# SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';
|
||||||
|
290
sql/item_sum.cc
290
sql/item_sum.cc
@ -719,6 +719,18 @@ String *Item_sum_distinct::val_str(String *str)
|
|||||||
|
|
||||||
/* Item_sum_avg_distinct */
|
/* Item_sum_avg_distinct */
|
||||||
|
|
||||||
|
void
|
||||||
|
Item_sum_avg_distinct::fix_length_and_dec()
|
||||||
|
{
|
||||||
|
Item_sum_distinct::fix_length_and_dec();
|
||||||
|
/*
|
||||||
|
AVG() will divide val by count. We need to reserve digits
|
||||||
|
after decimal point as the result can be fractional.
|
||||||
|
*/
|
||||||
|
decimals+= 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
Item_sum_avg_distinct::calculate_val_and_count()
|
Item_sum_avg_distinct::calculate_val_and_count()
|
||||||
{
|
{
|
||||||
@ -2115,12 +2127,8 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf)
|
|||||||
|
|
||||||
int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
|
int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
|
||||||
{
|
{
|
||||||
Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
|
Field *f= (Field*) arg;
|
||||||
CHARSET_INFO *cs=item->key_charset;
|
return f->cmp(key1, key2);
|
||||||
uint len=item->key_length;
|
|
||||||
return cs->coll->strnncollsp(cs,
|
|
||||||
(const uchar*) key1, len,
|
|
||||||
(const uchar*) key2, len, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2149,54 +2157,42 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
helper function for walking the tree when we dump it to MyISAM -
|
|
||||||
tree_walk will call it for each leaf
|
|
||||||
*/
|
|
||||||
|
|
||||||
int dump_leaf(byte* key, uint32 count __attribute__((unused)),
|
C_MODE_START
|
||||||
Item_sum_count_distinct* item)
|
|
||||||
|
static int count_distinct_walk(void *elem, unsigned int count, void *arg)
|
||||||
{
|
{
|
||||||
byte* buf = item->table->record[0];
|
(*((ulonglong*)arg))++;
|
||||||
int error;
|
|
||||||
/*
|
|
||||||
The first item->rec_offset bytes are taken care of with
|
|
||||||
restore_record(table,default_values) in setup()
|
|
||||||
*/
|
|
||||||
memcpy(buf + item->rec_offset, key, item->tree->size_of_element);
|
|
||||||
if ((error = item->table->file->write_row(buf)))
|
|
||||||
{
|
|
||||||
if (error != HA_ERR_FOUND_DUPP_KEY &&
|
|
||||||
error != HA_ERR_FOUND_DUPP_UNIQUE)
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
C_MODE_END
|
||||||
|
|
||||||
|
|
||||||
void Item_sum_count_distinct::cleanup()
|
void Item_sum_count_distinct::cleanup()
|
||||||
{
|
{
|
||||||
DBUG_ENTER("Item_sum_count_distinct::cleanup");
|
DBUG_ENTER("Item_sum_count_distinct::cleanup");
|
||||||
Item_sum_int::cleanup();
|
Item_sum_int::cleanup();
|
||||||
/*
|
|
||||||
Free table and tree if they belong to this item (if item have not pointer
|
/* Free objects only if we own them. */
|
||||||
to original item from which was made copy => it own its objects )
|
|
||||||
*/
|
|
||||||
if (!original)
|
if (!original)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
We need to delete the table and the tree in cleanup() as
|
||||||
|
they were allocated in the runtime memroot. Using the runtime
|
||||||
|
memroot reduces memory footprint for PS/SP and simplifies setup().
|
||||||
|
*/
|
||||||
|
delete tree;
|
||||||
|
tree= 0;
|
||||||
if (table)
|
if (table)
|
||||||
{
|
{
|
||||||
free_tmp_table(current_thd, table);
|
free_tmp_table(table->in_use, table);
|
||||||
table= 0;
|
table= 0;
|
||||||
}
|
}
|
||||||
delete tmp_table_param;
|
delete tmp_table_param;
|
||||||
tmp_table_param= 0;
|
tmp_table_param= 0;
|
||||||
if (use_tree)
|
|
||||||
{
|
|
||||||
delete_tree(tree);
|
|
||||||
use_tree= 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
always_null= FALSE;
|
||||||
DBUG_VOID_RETURN;
|
DBUG_VOID_RETURN;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2207,8 +2203,15 @@ void Item_sum_count_distinct::make_unique()
|
|||||||
{
|
{
|
||||||
table=0;
|
table=0;
|
||||||
original= 0;
|
original= 0;
|
||||||
use_tree= 0; // to prevent delete_tree call on uninitialized tree
|
tree= 0;
|
||||||
tree= &tree_base;
|
tmp_table_param= 0;
|
||||||
|
always_null= FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Item_sum_count_distinct::~Item_sum_count_distinct()
|
||||||
|
{
|
||||||
|
cleanup();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2216,9 +2219,14 @@ bool Item_sum_count_distinct::setup(THD *thd)
|
|||||||
{
|
{
|
||||||
List<Item> list;
|
List<Item> list;
|
||||||
SELECT_LEX *select_lex= thd->lex->current_select;
|
SELECT_LEX *select_lex= thd->lex->current_select;
|
||||||
if (select_lex->linkage == GLOBAL_OPTIONS_TYPE)
|
|
||||||
return 1;
|
/*
|
||||||
|
Setup can be called twice for ROLLUP items. This is a bug.
|
||||||
|
Please add DBUG_ASSERT(tree == 0) here when it's fixed.
|
||||||
|
*/
|
||||||
|
if (tree || table || tmp_table_param)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
if (!(tmp_table_param= new TMP_TABLE_PARAM))
|
if (!(tmp_table_param= new TMP_TABLE_PARAM))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@ -2238,11 +2246,7 @@ bool Item_sum_count_distinct::setup(THD *thd)
|
|||||||
if (always_null)
|
if (always_null)
|
||||||
return 0;
|
return 0;
|
||||||
count_field_types(tmp_table_param,list,0);
|
count_field_types(tmp_table_param,list,0);
|
||||||
if (table)
|
DBUG_ASSERT(table == 0);
|
||||||
{
|
|
||||||
free_tmp_table(thd, table);
|
|
||||||
tmp_table_param->cleanup();
|
|
||||||
}
|
|
||||||
if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
|
if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
|
||||||
0,
|
0,
|
||||||
select_lex->options | thd->options,
|
select_lex->options | thd->options,
|
||||||
@ -2251,123 +2255,77 @@ bool Item_sum_count_distinct::setup(THD *thd)
|
|||||||
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
|
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
|
||||||
table->no_rows=1;
|
table->no_rows=1;
|
||||||
|
|
||||||
|
|
||||||
// no blobs, otherwise it would be MyISAM
|
|
||||||
if (table->s->db_type == DB_TYPE_HEAP)
|
if (table->s->db_type == DB_TYPE_HEAP)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
No blobs, otherwise it would have been MyISAM: set up a compare
|
||||||
|
function and its arguments to use with Unique.
|
||||||
|
*/
|
||||||
qsort_cmp2 compare_key;
|
qsort_cmp2 compare_key;
|
||||||
void* cmp_arg;
|
void* cmp_arg;
|
||||||
|
Field **field= table->field;
|
||||||
|
Field **field_end= field + table->s->fields;
|
||||||
|
bool all_binary= TRUE;
|
||||||
|
|
||||||
// to make things easier for dump_leaf if we ever have to dump to MyISAM
|
for (tree_key_length= 0; field < field_end; ++field)
|
||||||
restore_record(table,s->default_values);
|
|
||||||
|
|
||||||
if (table->s->fields == 1)
|
|
||||||
{
|
{
|
||||||
/*
|
Field *f= *field;
|
||||||
If we have only one field, which is the most common use of
|
enum enum_field_types type= f->type();
|
||||||
count(distinct), it is much faster to use a simpler key
|
tree_key_length+= f->pack_length();
|
||||||
compare method that can take advantage of not having to worry
|
if (!f->binary() && (type == MYSQL_TYPE_STRING ||
|
||||||
about other fields
|
type == MYSQL_TYPE_VAR_STRING ||
|
||||||
*/
|
type == MYSQL_TYPE_VARCHAR))
|
||||||
Field* field = table->field[0];
|
{
|
||||||
switch (field->type()) {
|
all_binary= FALSE;
|
||||||
case MYSQL_TYPE_STRING:
|
break;
|
||||||
case MYSQL_TYPE_VAR_STRING:
|
|
||||||
if (field->binary())
|
|
||||||
{
|
|
||||||
compare_key = (qsort_cmp2)simple_raw_key_cmp;
|
|
||||||
cmp_arg = (void*) &key_length;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
If we have a string, we must take care of charsets and case
|
|
||||||
sensitivity
|
|
||||||
*/
|
|
||||||
compare_key = (qsort_cmp2)simple_str_key_cmp;
|
|
||||||
cmp_arg = (void*) this;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
/*
|
|
||||||
Since at this point we cannot have blobs anything else can
|
|
||||||
be compared with memcmp
|
|
||||||
*/
|
|
||||||
compare_key = (qsort_cmp2)simple_raw_key_cmp;
|
|
||||||
cmp_arg = (void*) &key_length;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
key_charset = field->charset();
|
|
||||||
key_length = field->pack_length();
|
|
||||||
rec_offset = 1;
|
|
||||||
}
|
}
|
||||||
else // too bad, cannot cheat - there is more than one field
|
if (all_binary)
|
||||||
{
|
{
|
||||||
bool all_binary = 1;
|
cmp_arg= (void*) &tree_key_length;
|
||||||
Field** field, **field_end;
|
compare_key= (qsort_cmp2) simple_raw_key_cmp;
|
||||||
field_end = (field = table->field) + table->s->fields;
|
}
|
||||||
uint32 *lengths;
|
else
|
||||||
if (!(field_lengths=
|
{
|
||||||
(uint32*) thd->alloc(sizeof(uint32) * table->s->fields)))
|
if (table->s->fields == 1)
|
||||||
return 1;
|
|
||||||
|
|
||||||
for (key_length = 0, lengths=field_lengths; field < field_end; ++field)
|
|
||||||
{
|
{
|
||||||
uint32 length= (*field)->pack_length();
|
/*
|
||||||
key_length += length;
|
If we have only one field, which is the most common use of
|
||||||
*lengths++ = length;
|
count(distinct), it is much faster to use a simpler key
|
||||||
if (!(*field)->binary())
|
compare method that can take advantage of not having to worry
|
||||||
all_binary = 0; // Can't break loop here
|
about other fields.
|
||||||
}
|
*/
|
||||||
rec_offset= table->s->reclength - key_length;
|
compare_key= (qsort_cmp2) simple_str_key_cmp;
|
||||||
if (all_binary)
|
cmp_arg= (void*) table->field[0];
|
||||||
{
|
/* tree_key_length has been set already */
|
||||||
compare_key = (qsort_cmp2)simple_raw_key_cmp;
|
|
||||||
cmp_arg = (void*) &key_length;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
compare_key = (qsort_cmp2) composite_key_cmp ;
|
uint32 *length;
|
||||||
cmp_arg = (void*) this;
|
compare_key= (qsort_cmp2) composite_key_cmp;
|
||||||
|
cmp_arg= (void*) this;
|
||||||
|
field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32));
|
||||||
|
for (tree_key_length= 0, length= field_lengths, field= table->field;
|
||||||
|
field < field_end; ++field, ++length)
|
||||||
|
{
|
||||||
|
*length= (*field)->pack_length();
|
||||||
|
tree_key_length+= *length;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
DBUG_ASSERT(tree == 0);
|
||||||
if (use_tree)
|
tree= new Unique(compare_key, cmp_arg, tree_key_length,
|
||||||
delete_tree(tree);
|
thd->variables.max_heap_table_size);
|
||||||
init_tree(tree, min(thd->variables.max_heap_table_size,
|
|
||||||
thd->variables.sortbuff_size/16), 0,
|
|
||||||
key_length, compare_key, 0, NULL, cmp_arg);
|
|
||||||
use_tree = 1;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The only time key_length could be 0 is if someone does
|
The only time tree_key_length could be 0 is if someone does
|
||||||
count(distinct) on a char(0) field - stupid thing to do,
|
count(distinct) on a char(0) field - stupid thing to do,
|
||||||
but this has to be handled - otherwise someone can crash
|
but this has to be handled - otherwise someone can crash
|
||||||
the server with a DoS attack
|
the server with a DoS attack
|
||||||
*/
|
*/
|
||||||
max_elements_in_tree = ((key_length) ?
|
if (! tree)
|
||||||
thd->variables.max_heap_table_size/key_length : 1);
|
return TRUE;
|
||||||
|
|
||||||
}
|
}
|
||||||
if (original)
|
return FALSE;
|
||||||
{
|
|
||||||
original->table= table;
|
|
||||||
original->use_tree= use_tree;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int Item_sum_count_distinct::tree_to_myisam()
|
|
||||||
{
|
|
||||||
if (create_myisam_from_heap(current_thd, table, tmp_table_param,
|
|
||||||
HA_ERR_RECORD_FILE_FULL, 1) ||
|
|
||||||
tree_walk(tree, (tree_walk_action)&dump_leaf, (void*)this,
|
|
||||||
left_root_right))
|
|
||||||
return 1;
|
|
||||||
delete_tree(tree);
|
|
||||||
use_tree = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2379,8 +2337,9 @@ Item *Item_sum_count_distinct::copy_or_same(THD* thd)
|
|||||||
|
|
||||||
void Item_sum_count_distinct::clear()
|
void Item_sum_count_distinct::clear()
|
||||||
{
|
{
|
||||||
if (use_tree)
|
/* tree and table can be both null only if always_null */
|
||||||
reset_tree(tree);
|
if (tree)
|
||||||
|
tree->reset();
|
||||||
else if (table)
|
else if (table)
|
||||||
{
|
{
|
||||||
table->file->extra(HA_EXTRA_NO_CACHE);
|
table->file->extra(HA_EXTRA_NO_CACHE);
|
||||||
@ -2401,32 +2360,21 @@ bool Item_sum_count_distinct::add()
|
|||||||
if ((*field)->is_real_null(0))
|
if ((*field)->is_real_null(0))
|
||||||
return 0; // Don't count NULL
|
return 0; // Don't count NULL
|
||||||
|
|
||||||
if (use_tree)
|
if (tree)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
If the tree got too big, convert to MyISAM, otherwise insert into the
|
The first few bytes of record (at least one) are just markers
|
||||||
tree.
|
for deleted and NULLs. We want to skip them since they will
|
||||||
|
bloat the tree without providing any valuable info. Besides,
|
||||||
|
key_length used to initialize the tree didn't include space for them.
|
||||||
*/
|
*/
|
||||||
if (tree->elements_in_tree > max_elements_in_tree)
|
return tree->unique_add(table->record[0] + table->s->null_bytes);
|
||||||
{
|
|
||||||
if (tree_to_myisam())
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
else if (!tree_insert(tree, table->record[0] + rec_offset, 0,
|
|
||||||
tree->custom_arg))
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
else if ((error=table->file->write_row(table->record[0])))
|
if ((error= table->file->write_row(table->record[0])) &&
|
||||||
{
|
error != HA_ERR_FOUND_DUPP_KEY &&
|
||||||
if (error != HA_ERR_FOUND_DUPP_KEY &&
|
error != HA_ERR_FOUND_DUPP_UNIQUE)
|
||||||
error != HA_ERR_FOUND_DUPP_UNIQUE)
|
return TRUE;
|
||||||
{
|
return FALSE;
|
||||||
if (create_myisam_from_heap(current_thd, table, tmp_table_param, error,
|
|
||||||
1))
|
|
||||||
return 1; // Not a table_is_full error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2435,8 +2383,16 @@ longlong Item_sum_count_distinct::val_int()
|
|||||||
DBUG_ASSERT(fixed == 1);
|
DBUG_ASSERT(fixed == 1);
|
||||||
if (!table) // Empty query
|
if (!table) // Empty query
|
||||||
return LL(0);
|
return LL(0);
|
||||||
if (use_tree)
|
if (tree)
|
||||||
return tree->elements_in_tree;
|
{
|
||||||
|
ulonglong count;
|
||||||
|
|
||||||
|
if (tree->elements == 0)
|
||||||
|
return (longlong) tree->elements_in_tree(); // everything fits in memory
|
||||||
|
count= 0;
|
||||||
|
tree->walk(count_distinct_walk, (void*) &count);
|
||||||
|
return (longlong) count;
|
||||||
|
}
|
||||||
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
|
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
|
||||||
return table->file->records;
|
return table->file->records;
|
||||||
}
|
}
|
||||||
|
@ -239,6 +239,7 @@ private:
|
|||||||
public:
|
public:
|
||||||
Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
|
Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
|
||||||
|
|
||||||
|
void fix_length_and_dec();
|
||||||
virtual void calculate_val_and_count();
|
virtual void calculate_val_and_count();
|
||||||
enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
|
enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
|
||||||
const char *func_name() const { return "avg_distinct"; }
|
const char *func_name() const { return "avg_distinct"; }
|
||||||
@ -280,68 +281,44 @@ class TMP_TABLE_PARAM;
|
|||||||
class Item_sum_count_distinct :public Item_sum_int
|
class Item_sum_count_distinct :public Item_sum_int
|
||||||
{
|
{
|
||||||
TABLE *table;
|
TABLE *table;
|
||||||
table_map used_table_cache;
|
|
||||||
uint32 *field_lengths;
|
uint32 *field_lengths;
|
||||||
TMP_TABLE_PARAM *tmp_table_param;
|
TMP_TABLE_PARAM *tmp_table_param;
|
||||||
TREE tree_base;
|
|
||||||
TREE *tree;
|
|
||||||
/*
|
|
||||||
Following is 0 normal object and pointer to original one for copy
|
|
||||||
(to correctly free resources)
|
|
||||||
*/
|
|
||||||
Item_sum_count_distinct *original;
|
|
||||||
|
|
||||||
uint key_length;
|
|
||||||
CHARSET_INFO *key_charset;
|
|
||||||
|
|
||||||
/*
|
|
||||||
Calculated based on max_heap_table_size. If reached,
|
|
||||||
walk the tree and dump it into MyISAM table
|
|
||||||
*/
|
|
||||||
uint max_elements_in_tree;
|
|
||||||
|
|
||||||
/*
|
|
||||||
The first few bytes of record ( at least one)
|
|
||||||
are just markers for deleted and NULLs. We want to skip them since
|
|
||||||
they will just bloat the tree without providing any valuable info
|
|
||||||
*/
|
|
||||||
int rec_offset;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
If there are no blobs, we can use a tree, which
|
If there are no blobs, we can use a tree, which
|
||||||
is faster than heap table. In that case, we still use the table
|
is faster than heap table. In that case, we still use the table
|
||||||
to help get things set up, but we insert nothing in it
|
to help get things set up, but we insert nothing in it
|
||||||
*/
|
*/
|
||||||
bool use_tree;
|
Unique *tree;
|
||||||
|
/*
|
||||||
|
Following is 0 normal object and pointer to original one for copy
|
||||||
|
(to correctly free resources)
|
||||||
|
*/
|
||||||
|
Item_sum_count_distinct *original;
|
||||||
|
uint tree_key_length;
|
||||||
|
|
||||||
|
|
||||||
bool always_null; // Set to 1 if the result is always NULL
|
bool always_null; // Set to 1 if the result is always NULL
|
||||||
|
|
||||||
int tree_to_myisam();
|
|
||||||
|
|
||||||
friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
|
friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
|
||||||
friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2);
|
friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2);
|
||||||
friend int simple_raw_key_cmp(void* arg, byte* key1, byte* key2);
|
|
||||||
friend int dump_leaf(byte* key, uint32 count __attribute__((unused)),
|
|
||||||
Item_sum_count_distinct* item);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Item_sum_count_distinct(List<Item> &list)
|
Item_sum_count_distinct(List<Item> &list)
|
||||||
:Item_sum_int(list), table(0), used_table_cache(~(table_map) 0),
|
:Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
|
||||||
tmp_table_param(0), tree(&tree_base), original(0), use_tree(0),
|
tree(0), original(0), always_null(FALSE)
|
||||||
always_null(0)
|
|
||||||
{ quick_group= 0; }
|
{ quick_group= 0; }
|
||||||
Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
|
Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
|
||||||
:Item_sum_int(thd, item), table(item->table),
|
:Item_sum_int(thd, item), table(item->table),
|
||||||
used_table_cache(item->used_table_cache),
|
|
||||||
field_lengths(item->field_lengths),
|
field_lengths(item->field_lengths),
|
||||||
tmp_table_param(item->tmp_table_param),
|
tmp_table_param(item->tmp_table_param),
|
||||||
tree(item->tree), original(item), key_length(item->key_length),
|
tree(item->tree), original(item), tree_key_length(item->tree_key_length),
|
||||||
max_elements_in_tree(item->max_elements_in_tree),
|
|
||||||
rec_offset(item->rec_offset), use_tree(item->use_tree),
|
|
||||||
always_null(item->always_null)
|
always_null(item->always_null)
|
||||||
{}
|
{}
|
||||||
|
~Item_sum_count_distinct();
|
||||||
|
|
||||||
void cleanup();
|
void cleanup();
|
||||||
|
|
||||||
table_map used_tables() const { return used_table_cache; }
|
|
||||||
enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
|
enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
|
||||||
void clear();
|
void clear();
|
||||||
bool add();
|
bool add();
|
||||||
|
@ -1831,6 +1831,7 @@ public:
|
|||||||
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
|
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
|
||||||
uint size_arg, ulong max_in_memory_size_arg);
|
uint size_arg, ulong max_in_memory_size_arg);
|
||||||
~Unique();
|
~Unique();
|
||||||
|
ulong elements_in_tree() { return tree.elements_in_tree; }
|
||||||
inline bool unique_add(void *ptr)
|
inline bool unique_add(void *ptr)
|
||||||
{
|
{
|
||||||
DBUG_ENTER("unique_add");
|
DBUG_ENTER("unique_add");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user