Implementation of SUM(DISTINCT), tests cases
sql/filesort.cc: Snippet of filesort() code moved to function reuse_freed_buff() - change buffpek pointers to use buff from freed BUFFPEK Used in filesort() and merge_walk(). sql/item_sum.cc: Implementation of Item_sum_sum_distinct - SUM(DISTINCT) item, which uses Unique to resolve duplicates sql/item_sum.h: New sum Item added - Item_sum_sum_distinct - for SUM(DISTINCT) function sql/sql_class.h: added walk() and reset() methods to Unique, used in Item_sum_sum_distinct. sql/sql_sort.h: declaration for reuse_freed_buff() to be able to use it in uniques.cc sql/sql_yacc.yy: parser extended to handle MIN(DISTICNT), MAX(DISTINCT), SUM(DISTINCT) sql/uniques.cc: Implementation for Unique::reset(), Unique::walk() as well as for merge_walk() algorithm.
This commit is contained in:
parent
844d9b766a
commit
58a52a2a84
203
mysql-test/r/sum_distinct.result
Normal file
203
mysql-test/r/sum_distinct.result
Normal file
@ -0,0 +1,203 @@
|
||||
DROP TABLE IF EXISTS t1;
|
||||
CREATE TABLE t1 (
|
||||
id INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,
|
||||
gender CHAR(1),
|
||||
name VARCHAR(20)
|
||||
);
|
||||
SELECT SUM(DISTINCT LENGTH(name)) s1 FROM t1;
|
||||
s1
|
||||
NULL
|
||||
INSERT INTO t1 (gender, name) VALUES (NULL, NULL);
|
||||
INSERT INTO t1 (gender, name) VALUES (NULL, NULL);
|
||||
INSERT INTO t1 (gender, name) VALUES (NULL, NULL);
|
||||
SELECT SUM(DISTINCT LENGTH(name)) s1 FROM t1;
|
||||
s1
|
||||
NULL
|
||||
INSERT INTO t1 (gender, name) VALUES ('F', 'Helen'), ('F', 'Anastasia'),
|
||||
('F', 'Katherine'), ('F', 'Margo'), ('F', 'Magdalene'), ('F', 'Mary');
|
||||
CREATE TABLE t2 SELECT name FROM t1;
|
||||
SELECT (SELECT SUM(DISTINCT LENGTH(name)) FROM t1) FROM t2;
|
||||
(SELECT SUM(DISTINCT LENGTH(name)) FROM t1)
|
||||
18
|
||||
18
|
||||
18
|
||||
18
|
||||
18
|
||||
18
|
||||
18
|
||||
18
|
||||
18
|
||||
DROP TABLE t2;
|
||||
INSERT INTO t1 (gender, name) VALUES ('F', 'Eva'), ('F', 'Sofia'),
|
||||
('F', 'Sara'), ('F', 'Golda'), ('F', 'Toba'), ('F', 'Victory'),
|
||||
('F', 'Faina'), ('F', 'Miriam'), ('F', 'Beki'), ('F', 'America'),
|
||||
('F', 'Susan'), ('F', 'Glory'), ('F', 'Priscilla'), ('F', 'Rosmary'),
|
||||
('F', 'Rose'), ('F', 'Margareth'), ('F', 'Elizabeth'), ('F', 'Meredith'),
|
||||
('F', 'Julie'), ('F', 'Xenia'), ('F', 'Zena'), ('F', 'Olga'),
|
||||
('F', 'Brunhilda'), ('F', 'Nataly'), ('F', 'Lara'), ('F', 'Svetlana'),
|
||||
('F', 'Grethem'), ('F', 'Irene');
|
||||
SELECT
|
||||
SUM(DISTINCT LENGTH(name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(NAME, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(name, 1, 4))) s3
|
||||
FROM t1;
|
||||
s1 s2 s3
|
||||
42 0 7
|
||||
SELECT
|
||||
SUM(DISTINCT LENGTH(g1.name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(g2.name, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(g3.name, 1, 4))) s3
|
||||
FROM t1 g1, t1 g2, t1 g3;
|
||||
s1 s2 s3
|
||||
42 0 7
|
||||
SELECT
|
||||
SUM(DISTINCT LENGTH(g1.name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(g2.name, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(g3.name, 1, 4))) s3
|
||||
FROM t1 g1, t1 g2, t1 g3 GROUP BY LENGTH(SUBSTRING(g3.name, 5, 10));
|
||||
s1 s2 s3
|
||||
42 0 NULL
|
||||
42 0 7
|
||||
42 0 4
|
||||
42 0 4
|
||||
42 0 4
|
||||
42 0 4
|
||||
42 0 4
|
||||
SELECT SQL_BUFFER_RESULT
|
||||
SUM(DISTINCT LENGTH(name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(NAME, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(name, 1, 4))) s3
|
||||
FROM t1;
|
||||
s1 s2 s3
|
||||
42 0 7
|
||||
SELECT SQL_BUFFER_RESULT
|
||||
SUM(DISTINCT LENGTH(g1.name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(g2.name, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(g3.name, 1, 4))) s3
|
||||
FROM t1 g1, t1 g2, t1 g3 GROUP BY LENGTH(SUBSTRING(g3.name, 5, 10));
|
||||
s1 s2 s3
|
||||
42 0 NULL
|
||||
42 0 7
|
||||
42 0 4
|
||||
42 0 4
|
||||
42 0 4
|
||||
42 0 4
|
||||
42 0 4
|
||||
SET @l=1;
|
||||
UPDATE t1 SET name=CONCAT(name, @l:=@l+1);
|
||||
SELECT SUM(DISTINCT RIGHT(name, 1)) FROM t1;
|
||||
SUM(DISTINCT RIGHT(name, 1))
|
||||
45
|
||||
SELECT SUM(DISTINCT id) FROM t1;
|
||||
SUM(DISTINCT id)
|
||||
703
|
||||
SELECT SUM(DISTINCT id % 11) FROM t1;
|
||||
SUM(DISTINCT id % 11)
|
||||
55
|
||||
DROP TABLE t1;
|
||||
CREATE TABLE t1 (id INTEGER);
|
||||
CREATE TABLE t2 (id INTEGER);
|
||||
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
|
||||
INSERT INTO t2 (id) SELECT id FROM t1;
|
||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
||||
/* 8 */
|
||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
||||
/* 12 */
|
||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
||||
/* 16 */
|
||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
||||
/* 20 */
|
||||
INSERT INTO t1 (id) SELECT id FROM t2;
|
||||
/* 24 */
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+1 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+2 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+4 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+8 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+16 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+32 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+64 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+128 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+256 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+512 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
|
||||
SELECT SUM(DISTINCT id) sm FROM t1;
|
||||
sm
|
||||
134225920
|
||||
SELECT SUM(DISTINCT id) sm FROM t2;
|
||||
sm
|
||||
134225920
|
||||
SELECT SUM(DISTINCT id) sm FROM t1 group by id % 13;
|
||||
sm
|
||||
10327590
|
||||
10328851
|
||||
10330112
|
||||
10331373
|
||||
10332634
|
||||
10317510
|
||||
10318770
|
||||
10320030
|
||||
10321290
|
||||
10322550
|
||||
10323810
|
||||
10325070
|
||||
10326330
|
||||
SET max_heap_table_size=16384;
|
||||
SHOW variables LIKE 'max_heap_table_size';
|
||||
Variable_name Value
|
||||
max_heap_table_size 16384
|
||||
SELECT SUM(DISTINCT id) sm FROM t1;
|
||||
sm
|
||||
134225920
|
||||
SELECT SUM(DISTINCT id) sm FROM t2;
|
||||
sm
|
||||
134225920
|
||||
SELECT SUM(DISTINCT id) sm FROM t1 GROUP BY id % 13;
|
||||
sm
|
||||
10327590
|
||||
10328851
|
||||
10330112
|
||||
10331373
|
||||
10332634
|
||||
10317510
|
||||
10318770
|
||||
10320030
|
||||
10321290
|
||||
10322550
|
||||
10323810
|
||||
10325070
|
||||
10326330
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t2;
|
188
mysql-test/t/sum_distinct.test
Normal file
188
mysql-test/t/sum_distinct.test
Normal file
@ -0,0 +1,188 @@
|
||||
#
|
||||
# Various tests for SUM(DISTINCT ...)
|
||||
#
|
||||
--disable_warnings
|
||||
DROP TABLE IF EXISTS t1;
|
||||
--enable_warnings
|
||||
|
||||
CREATE TABLE t1 (
|
||||
id INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,
|
||||
gender CHAR(1),
|
||||
name VARCHAR(20)
|
||||
);
|
||||
|
||||
# According to ANSI SQL, SUM(DISTINCT ...) should return NULL for empty
|
||||
# record set
|
||||
|
||||
SELECT SUM(DISTINCT LENGTH(name)) s1 FROM t1;
|
||||
|
||||
# According to ANSI SQL, SUM(DISTINCT ...) should return NULL for records sets
|
||||
# entirely consisting of NULLs
|
||||
|
||||
INSERT INTO t1 (gender, name) VALUES (NULL, NULL);
|
||||
INSERT INTO t1 (gender, name) VALUES (NULL, NULL);
|
||||
INSERT INTO t1 (gender, name) VALUES (NULL, NULL);
|
||||
|
||||
SELECT SUM(DISTINCT LENGTH(name)) s1 FROM t1;
|
||||
|
||||
|
||||
# Filling table with t1
|
||||
|
||||
INSERT INTO t1 (gender, name) VALUES ('F', 'Helen'), ('F', 'Anastasia'),
|
||||
('F', 'Katherine'), ('F', 'Margo'), ('F', 'Magdalene'), ('F', 'Mary');
|
||||
|
||||
CREATE TABLE t2 SELECT name FROM t1;
|
||||
|
||||
SELECT (SELECT SUM(DISTINCT LENGTH(name)) FROM t1) FROM t2;
|
||||
|
||||
DROP TABLE t2;
|
||||
|
||||
INSERT INTO t1 (gender, name) VALUES ('F', 'Eva'), ('F', 'Sofia'),
|
||||
('F', 'Sara'), ('F', 'Golda'), ('F', 'Toba'), ('F', 'Victory'),
|
||||
('F', 'Faina'), ('F', 'Miriam'), ('F', 'Beki'), ('F', 'America'),
|
||||
('F', 'Susan'), ('F', 'Glory'), ('F', 'Priscilla'), ('F', 'Rosmary'),
|
||||
('F', 'Rose'), ('F', 'Margareth'), ('F', 'Elizabeth'), ('F', 'Meredith'),
|
||||
('F', 'Julie'), ('F', 'Xenia'), ('F', 'Zena'), ('F', 'Olga'),
|
||||
('F', 'Brunhilda'), ('F', 'Nataly'), ('F', 'Lara'), ('F', 'Svetlana'),
|
||||
('F', 'Grethem'), ('F', 'Irene');
|
||||
|
||||
SELECT
|
||||
SUM(DISTINCT LENGTH(name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(NAME, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(name, 1, 4))) s3
|
||||
FROM t1;
|
||||
|
||||
SELECT
|
||||
SUM(DISTINCT LENGTH(g1.name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(g2.name, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(g3.name, 1, 4))) s3
|
||||
FROM t1 g1, t1 g2, t1 g3;
|
||||
|
||||
SELECT
|
||||
SUM(DISTINCT LENGTH(g1.name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(g2.name, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(g3.name, 1, 4))) s3
|
||||
FROM t1 g1, t1 g2, t1 g3 GROUP BY LENGTH(SUBSTRING(g3.name, 5, 10));
|
||||
|
||||
# here we explicitly request summing through temporary table (so
|
||||
# Item_sum_sum_distinct::copy_or_same() is called)
|
||||
|
||||
SELECT SQL_BUFFER_RESULT
|
||||
SUM(DISTINCT LENGTH(name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(NAME, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(name, 1, 4))) s3
|
||||
FROM t1;
|
||||
|
||||
SELECT SQL_BUFFER_RESULT
|
||||
SUM(DISTINCT LENGTH(g1.name)) s1,
|
||||
SUM(DISTINCT SUBSTRING(g2.name, 1, 3)) s2,
|
||||
SUM(DISTINCT LENGTH(SUBSTRING(g3.name, 1, 4))) s3
|
||||
FROM t1 g1, t1 g2, t1 g3 GROUP BY LENGTH(SUBSTRING(g3.name, 5, 10));
|
||||
|
||||
# this test demonstrates that strings are automatically converted to numbers
|
||||
# before summing
|
||||
|
||||
SET @l=1;
|
||||
UPDATE t1 SET name=CONCAT(name, @l:=@l+1);
|
||||
|
||||
SELECT SUM(DISTINCT RIGHT(name, 1)) FROM t1;
|
||||
|
||||
# this is a test case for ordinary t1
|
||||
|
||||
SELECT SUM(DISTINCT id) FROM t1;
|
||||
SELECT SUM(DISTINCT id % 11) FROM t1;
|
||||
|
||||
DROP TABLE t1;
|
||||
|
||||
#
|
||||
# Test the case when distinct values doesn't fit in memory and
|
||||
# filesort is used (see uniques.cc:merge_walk)
|
||||
#
|
||||
|
||||
CREATE TABLE t1 (id INTEGER);
|
||||
CREATE TABLE t2 (id INTEGER);
|
||||
|
||||
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
|
||||
INSERT INTO t2 (id) SELECT id FROM t1;
|
||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 8 */
|
||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 12 */
|
||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 16 */
|
||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 20 */
|
||||
INSERT INTO t1 (id) SELECT id FROM t2; /* 24 */
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+1 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+2 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+4 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+8 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+16 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+32 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+64 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+128 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+256 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+512 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
|
||||
INSERT INTO t1 SELECT id FROM t2;
|
||||
DELETE FROM t2;
|
||||
#INSERT INTO t2 (id) SELECT id+16384 FROM t1;
|
||||
#INSERT INTO t1 SELECT id FROM t2;
|
||||
#DELETE FROM t2;
|
||||
#INSERT INTO t2 (id) SELECT id+32768 FROM t1;
|
||||
#INSERT INTO t1 SELECT id FROM t2;
|
||||
#DELETE FROM t2;
|
||||
#INSERT INTO t2 (id) SELECT id+65536 FROM t1;
|
||||
#INSERT INTO t1 SELECT id FROM t2;
|
||||
#DELETE FROM t2;
|
||||
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
|
||||
|
||||
# SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';
|
||||
|
||||
SELECT SUM(DISTINCT id) sm FROM t1;
|
||||
SELECT SUM(DISTINCT id) sm FROM t2;
|
||||
SELECT SUM(DISTINCT id) sm FROM t1 group by id % 13;
|
||||
|
||||
# this limit for max_heap_table_size is set to force testing the case, when
|
||||
# all distinct sum values can not fit in memory and must be stored in a
|
||||
# temporary table
|
||||
|
||||
SET max_heap_table_size=16384;
|
||||
|
||||
# to check that max_heap_table_size was actually set (hard limit for minimum
|
||||
# max_heap_table_size is set in mysqld.cc):
|
||||
|
||||
SHOW variables LIKE 'max_heap_table_size';
|
||||
|
||||
SELECT SUM(DISTINCT id) sm FROM t1;
|
||||
SELECT SUM(DISTINCT id) sm FROM t2;
|
||||
SELECT SUM(DISTINCT id) sm FROM t1 GROUP BY id % 13;
|
||||
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t2;
|
@ -756,6 +756,39 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
|
||||
} /* read_to_buffer */
|
||||
|
||||
|
||||
/*
|
||||
Put all room used by freed buffer to use in adjacent buffer. Note, that
|
||||
we can't simply distribute memory evenly between all buffers, because
|
||||
new areas must not overlap with old ones.
|
||||
SYNOPSYS
|
||||
reuse_freed_buff()
|
||||
queue IN list of non-empty buffers, without freed buffer
|
||||
reuse IN empty buffer
|
||||
key_length IN key length
|
||||
*/
|
||||
|
||||
void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
|
||||
{
|
||||
uchar *reuse_end= reuse->base + reuse->max_keys * key_length;
|
||||
for (uint i= 0; i < queue->elements; ++i)
|
||||
{
|
||||
BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i);
|
||||
if (bp->base + bp->max_keys * key_length == reuse->base)
|
||||
{
|
||||
bp->max_keys+= reuse->max_keys;
|
||||
return;
|
||||
}
|
||||
else if (bp->base == reuse_end)
|
||||
{
|
||||
bp->base= reuse->base;
|
||||
bp->max_keys+= reuse->max_keys;
|
||||
return;
|
||||
}
|
||||
}
|
||||
DBUG_ASSERT(0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Merge buffers to one buffer
|
||||
*/
|
||||
@ -881,29 +914,8 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
|
||||
if (!(error= (int) read_to_buffer(from_file,buffpek,
|
||||
rec_length)))
|
||||
{
|
||||
uchar *base= buffpek->base;
|
||||
ulong max_keys= buffpek->max_keys;
|
||||
|
||||
VOID(queue_remove(&queue,0));
|
||||
|
||||
/* Put room used by buffer to use in other buffer */
|
||||
for (refpek= (BUFFPEK**) &queue_top(&queue);
|
||||
refpek <= (BUFFPEK**) &queue_end(&queue);
|
||||
refpek++)
|
||||
{
|
||||
buffpek= *refpek;
|
||||
if (buffpek->base+buffpek->max_keys*rec_length == base)
|
||||
{
|
||||
buffpek->max_keys+= max_keys;
|
||||
break;
|
||||
}
|
||||
else if (base+max_keys*rec_length == buffpek->base)
|
||||
{
|
||||
buffpek->base= base;
|
||||
buffpek->max_keys+= max_keys;
|
||||
break;
|
||||
}
|
||||
}
|
||||
reuse_freed_buff(&queue, buffpek, rec_length);
|
||||
break; /* One buffer have been removed */
|
||||
}
|
||||
else if (error == -1)
|
||||
|
121
sql/item_sum.cc
121
sql/item_sum.cc
@ -262,6 +262,122 @@ double Item_sum_sum::val()
|
||||
}
|
||||
|
||||
|
||||
/* Item_sum_sum_distinct */
|
||||
|
||||
Item_sum_sum_distinct::Item_sum_sum_distinct(Item *item)
|
||||
:Item_sum_num(item), sum(0.0), tree(0)
|
||||
{
|
||||
/*
|
||||
quick_group is an optimizer hint, which means that GROUP BY can be
|
||||
handled with help of index on grouped columns.
|
||||
By setting quick_group to zero we force creation of temporary table
|
||||
to perform GROUP BY.
|
||||
*/
|
||||
quick_group= 0;
|
||||
}
|
||||
|
||||
|
||||
Item_sum_sum_distinct::Item_sum_sum_distinct(THD *thd,
|
||||
Item_sum_sum_distinct &original)
|
||||
:Item_sum_num(thd, original), sum(0.0), tree(0)
|
||||
{
|
||||
quick_group= 0;
|
||||
}
|
||||
|
||||
|
||||
Item_sum_sum_distinct::~Item_sum_sum_distinct()
|
||||
{
|
||||
delete tree;
|
||||
}
|
||||
|
||||
|
||||
Item *
|
||||
Item_sum_sum_distinct::copy_or_same(THD *thd)
|
||||
{
|
||||
return new (&thd->mem_root) Item_sum_sum_distinct(thd, *this);
|
||||
}
|
||||
|
||||
C_MODE_START
|
||||
|
||||
static int simple_raw_key_cmp(void* arg, const void* key1, const void* key2)
|
||||
{
|
||||
return memcmp(key1, key2, *(uint *) arg);
|
||||
}
|
||||
|
||||
C_MODE_END
|
||||
|
||||
bool Item_sum_sum_distinct::setup(THD *thd)
|
||||
{
|
||||
SELECT_LEX *select_lex= thd->lex->current_select;
|
||||
/* what does it mean??? */
|
||||
if (select_lex->linkage == GLOBAL_OPTIONS_TYPE)
|
||||
return 1;
|
||||
|
||||
DBUG_ASSERT(tree == 0); /* setup can not be called twice */
|
||||
|
||||
/*
|
||||
Uniques handles all unique elements in a tree until they can't fit in.
|
||||
Then thee tree is dumped to the temporary file.
|
||||
See class Unique for details.
|
||||
*/
|
||||
null_value= maybe_null= 1;
|
||||
/*
|
||||
TODO: if underlying item result fits in 4 bytes we can take advantage
|
||||
of it and have tree of long/ulong. It gives 10% performance boost
|
||||
*/
|
||||
static uint key_length= sizeof(double);
|
||||
tree= new Unique(simple_raw_key_cmp, &key_length, key_length,
|
||||
thd->variables.max_heap_table_size);
|
||||
return tree == 0;
|
||||
}
|
||||
|
||||
void Item_sum_sum_distinct::clear()
|
||||
{
|
||||
DBUG_ASSERT(tree); /* we always have a tree */
|
||||
null_value= 1;
|
||||
tree->reset();
|
||||
}
|
||||
|
||||
bool Item_sum_sum_distinct::add()
|
||||
{
|
||||
/* args[0]->val() may reset args[0]->null_value */
|
||||
double val= args[0]->val();
|
||||
if (!args[0]->null_value)
|
||||
{
|
||||
DBUG_ASSERT(tree);
|
||||
null_value= 0;
|
||||
if (val)
|
||||
return tree->unique_add(&val);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
C_MODE_START
|
||||
|
||||
static int sum_sum_distinct(void *element, element_count num_of_dups,
|
||||
void *item_sum_sum_distinct)
|
||||
{
|
||||
((Item_sum_sum_distinct *)
|
||||
(item_sum_sum_distinct))->add(* (double *) element);
|
||||
return 0;
|
||||
}
|
||||
|
||||
C_MODE_END
|
||||
|
||||
double Item_sum_sum_distinct::val()
|
||||
{
|
||||
/*
|
||||
We don't have a tree only if 'setup()' hasn't been called;
|
||||
this is the case of sql_select.cc:return_zero_rows.
|
||||
*/
|
||||
sum= 0.0;
|
||||
if (tree)
|
||||
tree->walk(sum_sum_distinct, (void *) this);
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* end of Item_sum_sum_distinct */
|
||||
|
||||
Item *Item_sum_count::copy_or_same(THD* thd)
|
||||
{
|
||||
return new (&thd->mem_root) Item_sum_count(thd, *this);
|
||||
@ -1036,11 +1152,6 @@ String *Item_variance_field::val_str(String *str)
|
||||
|
||||
#include "sql_select.h"
|
||||
|
||||
int simple_raw_key_cmp(void* arg, byte* key1, byte* key2)
|
||||
{
|
||||
return memcmp(key1, key2, *(uint*) arg);
|
||||
}
|
||||
|
||||
int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
|
||||
{
|
||||
Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
|
||||
|
@ -27,9 +27,9 @@ class Item_sum :public Item_result_field
|
||||
{
|
||||
public:
|
||||
enum Sumfunctype
|
||||
{ COUNT_FUNC,COUNT_DISTINCT_FUNC,SUM_FUNC,AVG_FUNC,MIN_FUNC,
|
||||
MAX_FUNC, UNIQUE_USERS_FUNC,STD_FUNC,VARIANCE_FUNC,SUM_BIT_FUNC,
|
||||
UDF_SUM_FUNC, GROUP_CONCAT_FUNC
|
||||
{ COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
|
||||
MIN_FUNC, MAX_FUNC, UNIQUE_USERS_FUNC, STD_FUNC, VARIANCE_FUNC,
|
||||
SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC
|
||||
};
|
||||
|
||||
Item **args,*tmp_args[2];
|
||||
@ -138,6 +138,39 @@ class Item_sum_sum :public Item_sum_num
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Item_sum_sum_distinct - SELECT SUM(DISTINCT expr) FROM ...
|
||||
support. See also: MySQL manual, chapter 'Adding New Functions To MySQL'
|
||||
and comments in item_sum.cc.
|
||||
*/
|
||||
|
||||
class Unique;
|
||||
|
||||
class Item_sum_sum_distinct :public Item_sum_num
|
||||
{
|
||||
double sum;
|
||||
Unique *tree;
|
||||
private:
|
||||
Item_sum_sum_distinct(THD *thd, Item_sum_sum_distinct &item);
|
||||
public:
|
||||
Item_sum_sum_distinct(Item *item_par);
|
||||
~Item_sum_sum_distinct();
|
||||
|
||||
bool setup(THD *thd);
|
||||
void clear();
|
||||
bool add();
|
||||
double val();
|
||||
|
||||
inline void add(double val) { sum+= val; }
|
||||
enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
|
||||
void reset_field() {} // not used
|
||||
void update_field() {} // not used
|
||||
const char *func_name() const { return "sum_distinct"; }
|
||||
Item *copy_or_same(THD* thd);
|
||||
virtual void no_rows_in_result() {}
|
||||
};
|
||||
|
||||
|
||||
class Item_sum_count :public Item_sum_int
|
||||
{
|
||||
longlong count;
|
||||
|
@ -1207,8 +1207,13 @@ class user_var_entry
|
||||
DTCollation collation;
|
||||
};
|
||||
|
||||
|
||||
/* Class for unique (removing of duplicates) */
|
||||
/*
|
||||
Unique -- class for unique (removing of duplicates).
|
||||
Puts all values to the TREE. If the tree becomes too big,
|
||||
it's dumped to the file. User can request sorted values, or
|
||||
just iterate through them. In the last case tree merging is performed in
|
||||
memory simultaneously with iteration, so it should be ~2-3x faster.
|
||||
*/
|
||||
|
||||
class Unique :public Sql_alloc
|
||||
{
|
||||
@ -1222,10 +1227,10 @@ class Unique :public Sql_alloc
|
||||
|
||||
public:
|
||||
ulong elements;
|
||||
Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
|
||||
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
|
||||
uint size_arg, ulong max_in_memory_size_arg);
|
||||
~Unique();
|
||||
inline bool unique_add(gptr ptr)
|
||||
inline bool unique_add(void *ptr)
|
||||
{
|
||||
if (tree.elements_in_tree > max_elements && flush())
|
||||
return 1;
|
||||
@ -1234,6 +1239,9 @@ public:
|
||||
|
||||
bool get(TABLE *table);
|
||||
|
||||
void reset();
|
||||
bool walk(tree_walk_action action, void *walk_action_arg);
|
||||
|
||||
friend int unique_write_to_file(gptr key, element_count count, Unique *unique);
|
||||
friend int unique_write_to_ptrs(gptr key, element_count count, Unique *unique);
|
||||
};
|
||||
|
@ -78,3 +78,4 @@ int merge_buffers(SORTPARAM *param,IO_CACHE *from_file,
|
||||
IO_CACHE *to_file, uchar *sort_buffer,
|
||||
BUFFPEK *lastbuff,BUFFPEK *Fb,
|
||||
BUFFPEK *Tb,int flag);
|
||||
void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length);
|
||||
|
@ -4055,14 +4055,25 @@ sum_expr:
|
||||
{ $$= new Item_sum_unique_users($3,atoi($5.str),atoi($7.str),$9); }
|
||||
| MIN_SYM '(' in_sum_expr ')'
|
||||
{ $$=new Item_sum_min($3); }
|
||||
/*
|
||||
According to ANSI SQL, DISTINCT is allowed and has
|
||||
no sence inside MIN and MAX grouping functions; so MIN|MAX(DISTINCT ...)
|
||||
is processed like an ordinary MIN | MAX()
|
||||
*/
|
||||
| MIN_SYM '(' DISTINCT in_sum_expr ')'
|
||||
{ $$=new Item_sum_min($4); }
|
||||
| MAX_SYM '(' in_sum_expr ')'
|
||||
{ $$=new Item_sum_max($3); }
|
||||
| MAX_SYM '(' DISTINCT in_sum_expr ')'
|
||||
{ $$=new Item_sum_max($4); }
|
||||
| STD_SYM '(' in_sum_expr ')'
|
||||
{ $$=new Item_sum_std($3); }
|
||||
| VARIANCE_SYM '(' in_sum_expr ')'
|
||||
{ $$=new Item_sum_variance($3); }
|
||||
| SUM_SYM '(' in_sum_expr ')'
|
||||
{ $$=new Item_sum_sum($3); }
|
||||
| SUM_SYM '(' DISTINCT in_sum_expr ')'
|
||||
{ $$=new Item_sum_sum_distinct($4); }
|
||||
| GROUP_CONCAT_SYM '(' opt_distinct expr_list opt_gorder_clause
|
||||
opt_gconcat_separator ')'
|
||||
{
|
||||
|
234
sql/uniques.cc
234
sql/uniques.cc
@ -84,6 +84,7 @@ bool Unique::flush()
|
||||
elements+= tree.elements_in_tree;
|
||||
file_ptr.count=tree.elements_in_tree;
|
||||
file_ptr.file_pos=my_b_tell(&file);
|
||||
|
||||
if (tree_walk(&tree, (tree_walk_action) unique_write_to_file,
|
||||
(void*) this, left_root_right) ||
|
||||
insert_dynamic(&file_ptrs, (gptr) &file_ptr))
|
||||
@ -93,6 +94,237 @@ bool Unique::flush()
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Clear the tree and the file.
|
||||
You must call reset() if you want to reuse Unique after walk().
|
||||
*/
|
||||
|
||||
void
|
||||
Unique::reset()
|
||||
{
|
||||
reset_tree(&tree);
|
||||
/*
|
||||
If elements != 0, some trees were stored in the file (see how
|
||||
flush() works). Note, that we can not count on my_b_tell(&file) == 0
|
||||
here, because it can return 0 right after walk(), and walk() does not
|
||||
reset any Unique member.
|
||||
*/
|
||||
if (elements)
|
||||
{
|
||||
reset_dynamic(&file_ptrs);
|
||||
reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
|
||||
}
|
||||
elements= 0;
|
||||
}
|
||||
|
||||
/*
|
||||
The comparison function, passed to queue_init() in merge_walk() must
|
||||
use comparison function of Uniques::tree, but compare members of struct
|
||||
BUFFPEK.
|
||||
*/
|
||||
|
||||
struct BUFFPEK_COMPARE_CONTEXT
|
||||
{
|
||||
qsort_cmp2 key_compare;
|
||||
void *key_compare_arg;
|
||||
};
|
||||
|
||||
C_MODE_START
|
||||
|
||||
static int buffpek_compare(void *arg, byte *key_ptr1, byte *key_ptr2)
|
||||
{
|
||||
BUFFPEK_COMPARE_CONTEXT *ctx= (BUFFPEK_COMPARE_CONTEXT *) arg;
|
||||
return ctx->key_compare(ctx->key_compare_arg,
|
||||
*((byte **) key_ptr1), *((byte **)key_ptr2));
|
||||
}
|
||||
|
||||
C_MODE_END
|
||||
|
||||
|
||||
/*
|
||||
DESCRIPTION
|
||||
Function is very similar to merge_buffers, but instead of writing sorted
|
||||
unique keys to the output file, it invokes walk_action for each key.
|
||||
This saves I/O if you need to pass through all unique keys only once.
|
||||
SYNOPSIS
|
||||
merge_walk()
|
||||
All params are 'IN' (but see comment for begin, end):
|
||||
merge_buffer buffer to perform cached piece-by-piece loading
|
||||
of trees; initially the buffer is empty
|
||||
merge_buffer_size size of merge_buffer. Must be aligned with
|
||||
key_length
|
||||
key_length size of tree element; key_length * (end - begin)
|
||||
must be less or equal than merge_buffer_size.
|
||||
begin pointer to BUFFPEK struct for the first tree.
|
||||
end pointer to BUFFPEK struct for the last tree;
|
||||
end > begin and [begin, end) form a consecutive
|
||||
range. BUFFPEKs structs in that range are used and
|
||||
overwritten in merge_walk().
|
||||
walk_action element visitor. Action is called for each unique
|
||||
key.
|
||||
walk_action_arg argument to walk action. Passed to it on each call.
|
||||
compare elements comparison function
|
||||
compare_arg comparison function argument
|
||||
file file with all trees dumped. Trees in the file
|
||||
must contain sorted unique values. Cache must be
|
||||
initialized in read mode.
|
||||
RETURN VALUE
|
||||
0 ok
|
||||
<> 0 error
|
||||
*/
|
||||
|
||||
static bool merge_walk(uchar *merge_buffer, uint merge_buffer_size,
|
||||
uint key_length, BUFFPEK *begin, BUFFPEK *end,
|
||||
tree_walk_action walk_action, void *walk_action_arg,
|
||||
qsort_cmp2 compare, void *compare_arg,
|
||||
IO_CACHE *file)
|
||||
{
|
||||
BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
|
||||
QUEUE queue;
|
||||
if (end <= begin ||
|
||||
merge_buffer_size < key_length * (end - begin + 1) ||
|
||||
init_queue(&queue, end - begin, offsetof(BUFFPEK, key), 0,
|
||||
buffpek_compare, &compare_context))
|
||||
return 1;
|
||||
/* we need space for one key when a piece of merge buffer is re-read */
|
||||
merge_buffer_size-= key_length;
|
||||
uchar *save_key_buff= merge_buffer + merge_buffer_size;
|
||||
uint max_key_count_per_piece= merge_buffer_size/(end-begin)/key_length;
|
||||
/* if piece_size is aligned reuse_freed_buffer will always hit */
|
||||
uint piece_size= max_key_count_per_piece * key_length;
|
||||
uint bytes_read; /* to hold return value of read_to_buffer */
|
||||
BUFFPEK *top;
|
||||
int res= 1;
|
||||
/*
|
||||
Invariant: queue must contain top element from each tree, until a tree
|
||||
is not completely walked through.
|
||||
Here we're forcing the invariant, inserting one element from each tree
|
||||
to the queue.
|
||||
*/
|
||||
for (top= begin; top != end; ++top)
|
||||
{
|
||||
top->base= merge_buffer + (top - begin) * piece_size;
|
||||
top->max_keys= max_key_count_per_piece;
|
||||
bytes_read= read_to_buffer(file, top, key_length);
|
||||
if (bytes_read == (uint) (-1))
|
||||
goto end;
|
||||
DBUG_ASSERT(bytes_read);
|
||||
queue_insert(&queue, (byte *) top);
|
||||
}
|
||||
top= (BUFFPEK *) queue_top(&queue);
|
||||
while (queue.elements > 1)
|
||||
{
|
||||
/*
|
||||
Every iteration one element is removed from the queue, and one is
|
||||
inserted by the rules of the invariant. If two adjacent elements on
|
||||
the top of the queue are not equal, biggest one is unique, because all
|
||||
elements in each tree are unique. Action is applied only to unique
|
||||
elements.
|
||||
*/
|
||||
void *old_key= top->key;
|
||||
/*
|
||||
read next key from the cache or from the file and push it to the
|
||||
queue; this gives new top.
|
||||
*/
|
||||
top->key+= key_length;
|
||||
if (--top->mem_count)
|
||||
queue_replaced(&queue);
|
||||
else /* next piece should be read */
|
||||
{
|
||||
/* save old_key not to overwrite it in read_to_buffer */
|
||||
memcpy(save_key_buff, old_key, key_length);
|
||||
old_key= save_key_buff;
|
||||
bytes_read= read_to_buffer(file, top, key_length);
|
||||
if (bytes_read == (uint) (-1))
|
||||
goto end;
|
||||
else if (bytes_read > 0) /* top->key, top->mem_count are reset */
|
||||
queue_replaced(&queue); /* in read_to_buffer */
|
||||
else
|
||||
{
|
||||
/*
|
||||
Tree for old 'top' element is empty: remove it from the queue and
|
||||
give all its memory to the nearest tree.
|
||||
*/
|
||||
queue_remove(&queue, 0);
|
||||
reuse_freed_buff(&queue, top, key_length);
|
||||
}
|
||||
}
|
||||
top= (BUFFPEK *) queue_top(&queue);
|
||||
/* new top has been obtained; if old top is unique, apply the action */
|
||||
if (compare(compare_arg, old_key, top->key))
|
||||
{
|
||||
if (walk_action(old_key, 1, walk_action_arg))
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
/*
|
||||
Applying walk_action to the tail of the last tree: this is safe because
|
||||
either we had only one tree in the beginning, either we work with the
|
||||
last tree in the queue.
|
||||
*/
|
||||
do
|
||||
{
|
||||
do
|
||||
{
|
||||
if (walk_action(top->key, 1, walk_action_arg))
|
||||
goto end;
|
||||
top->key+= key_length;
|
||||
}
|
||||
while (--top->mem_count);
|
||||
bytes_read= read_to_buffer(file, top, key_length);
|
||||
if (bytes_read == (uint) (-1))
|
||||
goto end;
|
||||
}
|
||||
while (bytes_read);
|
||||
res= 0;
|
||||
end:
|
||||
delete_queue(&queue);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
DESCRIPTION
|
||||
Walks consecutively through all unique elements:
|
||||
if all elements are in memory, then it simply invokes 'tree_walk', else
|
||||
all flushed trees are loaded to memory piece-by-piece, pieces are
|
||||
sorted, and action is called for each unique value.
|
||||
Note: so as merging resets file_ptrs state, this method can change
|
||||
internal Unique state to undefined: if you want to reuse Unique after
|
||||
walk() you must call reset() first!
|
||||
SYNOPSIS
|
||||
Unique:walk()
|
||||
All params are 'IN':
|
||||
action function-visitor, typed in include/my_tree.h
|
||||
function is called for each unique element
|
||||
arg argument for visitor, which is passed to it on each call
|
||||
RETURN VALUE
|
||||
0 OK
|
||||
<> 0 error
|
||||
*/
|
||||
|
||||
bool Unique::walk(tree_walk_action action, void *walk_action_arg)
|
||||
{
|
||||
if (elements == 0) /* the whole tree is in memory */
|
||||
return tree_walk(&tree, action, walk_action_arg, left_root_right);
|
||||
|
||||
/* flush current tree to the file to have some memory for merge buffer */
|
||||
if (flush())
|
||||
return 1;
|
||||
if (flush_io_cache(&file) || reinit_io_cache(&file, READ_CACHE, 0L, 0, 0))
|
||||
return 1;
|
||||
uchar *merge_buffer= (uchar *) my_malloc(max_in_memory_size, MYF(0));
|
||||
if (merge_buffer == 0)
|
||||
return 1;
|
||||
int res= merge_walk(merge_buffer, max_in_memory_size, size,
|
||||
(BUFFPEK *) file_ptrs.buffer,
|
||||
(BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
|
||||
action, walk_action_arg,
|
||||
tree.compare, tree.custom_arg, &file);
|
||||
x_free(merge_buffer);
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
Modify the TABLE element so that when one calls init_records()
|
||||
the rows will be read in priority order.
|
||||
@ -114,7 +346,7 @@ bool Unique::get(TABLE *table)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
/* Not enough memory; Save the result to file */
|
||||
/* Not enough memory; Save the result to file && free memory used by tree */
|
||||
if (flush())
|
||||
return 1;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user