WL#926 "AVG(DISTINCT) and other distincts", part 2 (out of 3): clean up

Item_sum_count_distinct, and deploy Unique for use with COUNT(DISTINCT) if there is no blob column in the list of DISTINCT arguments.
2005-03-15 03:46:19 +03:00 · 2005-03-15 03:46:19 +03:00 · 6a2ef5577c
commit 6a2ef5577c
parent 98e83555a5
8 changed files with 231 additions and 314 deletions
--- a/mysql-test/r/count_distinct2.result
+++ b/mysql-test/r/count_distinct2.result
@ -116,7 +116,7 @@ count(distinct n)
 5000
 show status like 'Created_tmp_disk_tables';
 Variable_name	Value
-Created_tmp_disk_tables	1
+Created_tmp_disk_tables	0
 drop table t1;
 create table t1 (s text);
 flush status;
--- a/mysql-test/r/func_group.result
+++ b/mysql-test/r/func_group.result
@ -881,3 +881,10 @@ SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
 MAX(id)
 NULL
 DROP TABLE t1;
 CREATE TABLE t1 (a VARCHAR(400));
 INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a   "),
 ("B"), ("b"), ("b "), ("b   ");
 SELECT COUNT(DISTINCT a) FROM t1;
 COUNT(DISTINCT a)
 2
 DROP TABLE t1;
--- a/mysql-test/r/sum_distinct.result
+++ b/mysql-test/r/sum_distinct.result
@ -98,60 +98,60 @@ DROP TABLE t1;
 CREATE TABLE t1 (id INTEGER);
 CREATE TABLE t2 (id INTEGER);
 INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
-INSERT INTO t2 (id) SELECT id FROM t1;
+INSERT INTO t1 (id) SELECT id FROM t1;
 INSERT INTO t1 (id) SELECT id FROM t2;
 /* 8 */
-INSERT INTO t1 (id) SELECT id FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1;
 /* 12 */
-INSERT INTO t1 (id) SELECT id FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1;
 /* 16 */
-INSERT INTO t1 (id) SELECT id FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1;
 /* 20 */
-INSERT INTO t1 (id) SELECT id FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1;
 /* 24 */
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+1 FROM t1;
-INSERT INTO t2 (id) SELECT id+1 FROM t1;
+INSERT INTO t1 SELECT id+2 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
+INSERT INTO t1 SELECT id+4 FROM t1;
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+8 FROM t1;
-INSERT INTO t2 (id) SELECT id+2 FROM t1;
+INSERT INTO t1 SELECT id+16 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
+INSERT INTO t1 SELECT id+32 FROM t1;
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+64 FROM t1;
-INSERT INTO t2 (id) SELECT id+4 FROM t1;
+INSERT INTO t1 SELECT id+128 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
+INSERT INTO t1 SELECT id+256 FROM t1;
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+512 FROM t1;
-INSERT INTO t2 (id) SELECT id+8 FROM t1;
+SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
-INSERT INTO t1 SELECT id FROM t2;
+AVG(DISTINCT id)
-DELETE FROM t2;
+513.5000
-INSERT INTO t2 (id) SELECT id+16 FROM t1;
+508.0000
-INSERT INTO t1 SELECT id FROM t2;
+509.0000
-DELETE FROM t2;
+510.0000
-INSERT INTO t2 (id) SELECT id+32 FROM t1;
+511.0000
-INSERT INTO t1 SELECT id FROM t2;
+512.0000
-DELETE FROM t2;
+513.0000
-INSERT INTO t2 (id) SELECT id+64 FROM t1;
+514.0000
-INSERT INTO t1 SELECT id FROM t2;
+515.0000
-DELETE FROM t2;
+516.0000
-INSERT INTO t2 (id) SELECT id+128 FROM t1;
+517.0000
-INSERT INTO t1 SELECT id FROM t2;
+511.5000
-DELETE FROM t2;
+512.5000
-INSERT INTO t2 (id) SELECT id+256 FROM t1;
+SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
-INSERT INTO t1 SELECT id FROM t2;
+SUM(DISTINCT id)/COUNT(DISTINCT id)
-DELETE FROM t2;
+513.50000
-INSERT INTO t2 (id) SELECT id+512 FROM t1;
+508.00000
-INSERT INTO t1 SELECT id FROM t2;
+509.00000
-DELETE FROM t2;
+510.00000
-INSERT INTO t2 (id) SELECT id+1024 FROM t1;
+511.00000
-INSERT INTO t1 SELECT id FROM t2;
+512.00000
-DELETE FROM t2;
+513.00000
-INSERT INTO t2 (id) SELECT id+2048 FROM t1;
+514.00000
-INSERT INTO t1 SELECT id FROM t2;
+515.00000
-DELETE FROM t2;
+516.00000
-INSERT INTO t2 (id) SELECT id+4096 FROM t1;
+517.00000
-INSERT INTO t1 SELECT id FROM t2;
+511.50000
-DELETE FROM t2;
+512.50000
-INSERT INTO t2 (id) SELECT id+8192 FROM t1;
+INSERT INTO t1 SELECT id+1024 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
+INSERT INTO t1 SELECT id+2048 FROM t1;
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+4096 FROM t1;
 INSERT INTO t1 SELECT id+8192 FROM t1;
 INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
 SELECT SUM(DISTINCT id) sm FROM t1;
 sm
--- a/mysql-test/t/func_group.test
+++ b/mysql-test/t/func_group.test
@ -591,3 +591,13 @@ INSERT INTO t1 VALUES
  (1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1);
 SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
 DROP TABLE t1;
 #
 # Test that new VARCHAR correctly works with COUNT(DISTINCT)
 #
 CREATE TABLE t1 (a VARCHAR(400));
 INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a   "),
                          ("B"), ("b"), ("b "), ("b   ");
 SELECT COUNT(DISTINCT a) FROM t1;
 DROP TABLE t1;
--- a/mysql-test/t/sum_distinct.test
+++ b/mysql-test/t/sum_distinct.test
@ -103,64 +103,30 @@ CREATE TABLE t1 (id INTEGER);
 CREATE TABLE t2 (id INTEGER);
 INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
-INSERT INTO t2 (id) SELECT id FROM t1;
+INSERT INTO t1 (id) SELECT id FROM t1; /* 8 */
-INSERT INTO t1 (id) SELECT id FROM t2; /* 8 */
+INSERT INTO t1 (id) SELECT id FROM t1; /* 12 */
-INSERT INTO t1 (id) SELECT id FROM t2; /* 12 */
+INSERT INTO t1 (id) SELECT id FROM t1; /* 16 */
-INSERT INTO t1 (id) SELECT id FROM t2; /* 16 */
+INSERT INTO t1 (id) SELECT id FROM t1; /* 20 */
-INSERT INTO t1 (id) SELECT id FROM t2; /* 20 */
+INSERT INTO t1 (id) SELECT id FROM t1; /* 24 */
-INSERT INTO t1 (id) SELECT id FROM t2; /* 24 */
+INSERT INTO t1 SELECT id+1 FROM t1;
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+2 FROM t1;
-INSERT INTO t2 (id) SELECT id+1 FROM t1;
+INSERT INTO t1 SELECT id+4 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
+INSERT INTO t1 SELECT id+8 FROM t1;
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+16 FROM t1;
-INSERT INTO t2 (id) SELECT id+2 FROM t1;
+INSERT INTO t1 SELECT id+32 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
+INSERT INTO t1 SELECT id+64 FROM t1;
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+128 FROM t1;
-INSERT INTO t2 (id) SELECT id+4 FROM t1;
+INSERT INTO t1 SELECT id+256 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
+INSERT INTO t1 SELECT id+512 FROM t1;
-DELETE FROM t2;
+
-INSERT INTO t2 (id) SELECT id+8 FROM t1;
+# Just test that AVG(DISTINCT) is there
-INSERT INTO t1 SELECT id FROM t2;
+SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
-DELETE FROM t2;
+SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
-INSERT INTO t2 (id) SELECT id+16 FROM t1;
+
-INSERT INTO t1 SELECT id FROM t2;
+INSERT INTO t1 SELECT id+1024 FROM t1;
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+2048 FROM t1;
-INSERT INTO t2 (id) SELECT id+32 FROM t1;
+INSERT INTO t1 SELECT id+4096 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
+INSERT INTO t1 SELECT id+8192 FROM t1;
 DELETE FROM t2;
 INSERT INTO t2 (id) SELECT id+64 FROM t1;
 INSERT INTO t1 SELECT id FROM t2;
 DELETE FROM t2;
 INSERT INTO t2 (id) SELECT id+128 FROM t1;
 INSERT INTO t1 SELECT id FROM t2;
 DELETE FROM t2;
 INSERT INTO t2 (id) SELECT id+256 FROM t1;
 INSERT INTO t1 SELECT id FROM t2;
 DELETE FROM t2;
 INSERT INTO t2 (id) SELECT id+512 FROM t1;
 INSERT INTO t1 SELECT id FROM t2;
 DELETE FROM t2;
 INSERT INTO t2 (id) SELECT id+1024 FROM t1;
 INSERT INTO t1 SELECT id FROM t2;
 DELETE FROM t2;
 INSERT INTO t2 (id) SELECT id+2048 FROM t1;
 INSERT INTO t1 SELECT id FROM t2;
 DELETE FROM t2;
 INSERT INTO t2 (id) SELECT id+4096 FROM t1;
 INSERT INTO t1 SELECT id FROM t2;
 DELETE FROM t2;
 INSERT INTO t2 (id) SELECT id+8192 FROM t1;
 INSERT INTO t1 SELECT id FROM t2;
 DELETE FROM t2;
 #INSERT INTO t2 (id) SELECT id+16384 FROM t1;
 #INSERT INTO t1 SELECT id FROM t2;
 #DELETE FROM t2;
 #INSERT INTO t2 (id) SELECT id+32768 FROM t1;
 #INSERT INTO t1 SELECT id FROM t2;
 #DELETE FROM t2;
 #INSERT INTO t2 (id) SELECT id+65536 FROM t1;
 #INSERT INTO t1 SELECT id FROM t2;
 #DELETE FROM t2;
 INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
 # SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@ -719,6 +719,18 @@ String *Item_sum_distinct::val_str(String *str)
 /* Item_sum_avg_distinct */
 void
 Item_sum_avg_distinct::fix_length_and_dec()
 {
  Item_sum_distinct::fix_length_and_dec();
  /*
    AVG() will divide val by count. We need to reserve digits
    after decimal point as the result can be fractional.
  */
  decimals+= 4;
 }
 void
 Item_sum_avg_distinct::calculate_val_and_count()
 {
@ -2115,12 +2127,8 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf)
 int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
 {
-  Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
+  Field *f= (Field*) arg;
-  CHARSET_INFO *cs=item->key_charset;
+  return f->cmp(key1, key2);
  uint len=item->key_length;
  return cs->coll->strnncollsp(cs, 
 			       (const uchar*) key1, len, 
 			       (const uchar*) key2, len, 0);
 }
 /*
@ -2149,54 +2157,42 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
  return 0;
 }
 /*
  helper function for walking the tree when we dump it to MyISAM -
  tree_walk will call it for each leaf
 */
-int dump_leaf(byte* key, uint32 count __attribute__((unused)),
+C_MODE_START
-		     Item_sum_count_distinct* item)
+
 static int count_distinct_walk(void *elem, unsigned int count, void *arg)
 {
-  byte* buf = item->table->record[0];
+  (*((ulonglong*)arg))++;
  int error;
  /*
    The first item->rec_offset bytes are taken care of with
    restore_record(table,default_values) in setup()
  */
  memcpy(buf + item->rec_offset, key, item->tree->size_of_element);
  if ((error = item->table->file->write_row(buf)))
  {
    if (error != HA_ERR_FOUND_DUPP_KEY &&
 	error != HA_ERR_FOUND_DUPP_UNIQUE)
      return 1;
  }
  return 0;
 }
 C_MODE_END
 void Item_sum_count_distinct::cleanup()
 {
  DBUG_ENTER("Item_sum_count_distinct::cleanup");
  Item_sum_int::cleanup();
-  /*
+
-    Free table and tree if they belong to this item (if item have not pointer
+  /* Free objects only if we own them. */
    to original item from which was made copy => it own its objects )
  */
  if (!original)
  {
    /*
      We need to delete the table and the tree in cleanup() as
      they were allocated in the runtime memroot. Using the runtime
      memroot reduces memory footprint for PS/SP and simplifies setup().
    */
    delete tree;
    tree= 0;
    if (table)
    {
-      free_tmp_table(current_thd, table);
+      free_tmp_table(table->in_use, table);
      table= 0;
    }
    delete tmp_table_param;
    tmp_table_param= 0;
    if (use_tree)
    {
      delete_tree(tree);
      use_tree= 0;
    }
  }
  always_null= FALSE;
  DBUG_VOID_RETURN;
 }
@ -2207,8 +2203,15 @@ void Item_sum_count_distinct::make_unique()
 {
  table=0;
  original= 0;
-  use_tree= 0; // to prevent delete_tree call on uninitialized tree
+  tree= 0;
-  tree= &tree_base;
+  tmp_table_param= 0;
  always_null= FALSE;
 }
 Item_sum_count_distinct::~Item_sum_count_distinct()
 {
  cleanup();
 }
@ -2216,9 +2219,14 @@ bool Item_sum_count_distinct::setup(THD *thd)
 {
  List<Item> list;
  SELECT_LEX *select_lex= thd->lex->current_select;
-  if (select_lex->linkage == GLOBAL_OPTIONS_TYPE)
+
-    return 1;
+  /*
-    
+    Setup can be called twice for ROLLUP items. This is a bug.
    Please add DBUG_ASSERT(tree == 0) here when it's fixed.
  */
  if (tree || table || tmp_table_param)
    return FALSE;
  if (!(tmp_table_param= new TMP_TABLE_PARAM))
    return 1;
@ -2238,11 +2246,7 @@ bool Item_sum_count_distinct::setup(THD *thd)
  if (always_null)
    return 0;
  count_field_types(tmp_table_param,list,0);
-  if (table)
+  DBUG_ASSERT(table == 0);
  {
    free_tmp_table(thd, table);
    tmp_table_param->cleanup();
  }
  if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
 				0,
 				select_lex->options | thd->options,
@ -2251,123 +2255,77 @@ bool Item_sum_count_distinct::setup(THD *thd)
  table->file->extra(HA_EXTRA_NO_ROWS);		// Don't update rows
  table->no_rows=1;
  // no blobs, otherwise it would be MyISAM
  if (table->s->db_type == DB_TYPE_HEAP)
  {
    /*
      No blobs, otherwise it would have been MyISAM: set up a compare
      function and its arguments to use with Unique.
    */
    qsort_cmp2 compare_key;
    void* cmp_arg;
    Field **field= table->field;
    Field **field_end= field + table->s->fields;
    bool all_binary= TRUE;
-    // to make things easier for dump_leaf if we ever have to dump to MyISAM
+    for (tree_key_length= 0; field < field_end; ++field)
    restore_record(table,s->default_values);
    if (table->s->fields == 1)
    {
-      /*
+      Field *f= *field;
-	If we have only one field, which is the most common use of
+      enum enum_field_types type= f->type();
-	count(distinct), it is much faster to use a simpler key
+      tree_key_length+= f->pack_length();
-	compare method that can take advantage of not having to worry
+      if (!f->binary() && (type == MYSQL_TYPE_STRING ||
-	about other fields
+                           type == MYSQL_TYPE_VAR_STRING ||
-      */
+                           type == MYSQL_TYPE_VARCHAR))
-      Field* field = table->field[0];
+      {
-      switch (field->type()) {
+        all_binary= FALSE;
-      case MYSQL_TYPE_STRING:
+        break;
      case MYSQL_TYPE_VAR_STRING:
 	if (field->binary())
 	{
 	  compare_key = (qsort_cmp2)simple_raw_key_cmp;
 	  cmp_arg = (void*) &key_length;
 	}
 	else
 	{
 	  /*
 	    If we have a string, we must take care of charsets and case
 	    sensitivity
 	  */
 	  compare_key = (qsort_cmp2)simple_str_key_cmp;
 	  cmp_arg = (void*) this;
 	}
 	break;
      default:
 	/*
 	  Since at this point we cannot have blobs anything else can
 	  be compared with memcmp
 	*/
 	compare_key = (qsort_cmp2)simple_raw_key_cmp;
 	cmp_arg = (void*) &key_length;
 	break;
      }
      key_charset = field->charset();
      key_length  = field->pack_length();
      rec_offset  = 1;
    }
-    else // too bad, cannot cheat - there is more than one field
+    if (all_binary)
    {
-      bool all_binary = 1;
+      cmp_arg= (void*) &tree_key_length;
-      Field** field, **field_end;
+      compare_key= (qsort_cmp2) simple_raw_key_cmp;
-      field_end = (field = table->field) + table->s->fields;
+    }
-      uint32 *lengths;
+    else
-      if (!(field_lengths= 
+    {
-	    (uint32*) thd->alloc(sizeof(uint32) * table->s->fields)))
+      if (table->s->fields == 1)
 	return 1;
      for (key_length = 0, lengths=field_lengths; field < field_end; ++field)
      {
-	uint32 length= (*field)->pack_length();
+        /*
-	key_length += length;
+          If we have only one field, which is the most common use of
-	*lengths++ = length;
+          count(distinct), it is much faster to use a simpler key
-	if (!(*field)->binary())
+          compare method that can take advantage of not having to worry
-	  all_binary = 0;			// Can't break loop here
+          about other fields.
-      }
+        */
-      rec_offset= table->s->reclength - key_length;
+        compare_key= (qsort_cmp2) simple_str_key_cmp;
-      if (all_binary)
+        cmp_arg= (void*) table->field[0];
-      {
+        /* tree_key_length has been set already */
 	compare_key = (qsort_cmp2)simple_raw_key_cmp;
 	cmp_arg = (void*) &key_length;
      }
      else
      {
-	compare_key = (qsort_cmp2) composite_key_cmp ;
+        uint32 *length;
-	cmp_arg = (void*) this;
+        compare_key= (qsort_cmp2) composite_key_cmp;
        cmp_arg= (void*) this;
        field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32));
        for (tree_key_length= 0, length= field_lengths, field= table->field;
             field < field_end; ++field, ++length)
        {
          *length= (*field)->pack_length();
          tree_key_length+= *length;
        }
      }
    }
-
+    DBUG_ASSERT(tree == 0);
-    if (use_tree)
+    tree= new Unique(compare_key, cmp_arg, tree_key_length,
-      delete_tree(tree);
+                     thd->variables.max_heap_table_size);
    init_tree(tree, min(thd->variables.max_heap_table_size,
 			thd->variables.sortbuff_size/16), 0,
 	      key_length, compare_key, 0, NULL, cmp_arg);
    use_tree = 1;
    /*
-      The only time key_length could be 0 is if someone does
+      The only time tree_key_length could be 0 is if someone does
      count(distinct) on a char(0) field - stupid thing to do,
      but this has to be handled - otherwise someone can crash
      the server with a DoS attack
    */
-    max_elements_in_tree = ((key_length) ? 
+    if (! tree)
-			    thd->variables.max_heap_table_size/key_length : 1);
+      return TRUE;
  }
-  if (original)
+  return FALSE;
  {
    original->table= table;
    original->use_tree= use_tree;
  }
  return 0;
 }
 int Item_sum_count_distinct::tree_to_myisam()
 {
  if (create_myisam_from_heap(current_thd, table, tmp_table_param,
 			      HA_ERR_RECORD_FILE_FULL, 1) ||
      tree_walk(tree, (tree_walk_action)&dump_leaf, (void*)this,
 		left_root_right))
    return 1;
  delete_tree(tree);
  use_tree = 0;
  return 0;
 }
@ -2379,8 +2337,9 @@ Item *Item_sum_count_distinct::copy_or_same(THD* thd)
 void Item_sum_count_distinct::clear()
 {
-  if (use_tree)
+  /* tree and table can be both null only if always_null */
-    reset_tree(tree);
+  if (tree)
    tree->reset();
  else if (table)
  {
    table->file->extra(HA_EXTRA_NO_CACHE);
@ -2401,32 +2360,21 @@ bool Item_sum_count_distinct::add()
    if ((*field)->is_real_null(0))
      return 0;					// Don't count NULL
-  if (use_tree)
+  if (tree)
  {
    /*
-      If the tree got too big, convert to MyISAM, otherwise insert into the
+      The first few bytes of record (at least one) are just markers
-      tree.
+      for deleted and NULLs. We want to skip them since they will
      bloat the tree without providing any valuable info. Besides,
      key_length used to initialize the tree didn't include space for them.
    */
-    if (tree->elements_in_tree > max_elements_in_tree)
+    return tree->unique_add(table->record[0] + table->s->null_bytes);
    {
      if (tree_to_myisam())
 	return 1;
    }
    else if (!tree_insert(tree, table->record[0] + rec_offset, 0,
 			  tree->custom_arg))
      return 1;
  }
-  else if ((error=table->file->write_row(table->record[0])))
+  if ((error= table->file->write_row(table->record[0])) &&
-  {
+      error != HA_ERR_FOUND_DUPP_KEY &&
-    if (error != HA_ERR_FOUND_DUPP_KEY &&
+      error != HA_ERR_FOUND_DUPP_UNIQUE)
-	error != HA_ERR_FOUND_DUPP_UNIQUE)
+    return TRUE;
-    {
+  return FALSE;
      if (create_myisam_from_heap(current_thd, table, tmp_table_param, error,
 				  1))
 	return 1;				// Not a table_is_full error
    }
  }
  return 0;
 }
@ -2435,8 +2383,16 @@ longlong Item_sum_count_distinct::val_int()
  DBUG_ASSERT(fixed == 1);
  if (!table)					// Empty query
    return LL(0);
-  if (use_tree)
+  if (tree)
-    return tree->elements_in_tree;
+  {
    ulonglong count;
    if (tree->elements == 0)
      return (longlong) tree->elements_in_tree(); // everything fits in memory
    count= 0;
    tree->walk(count_distinct_walk, (void*) &count);
    return (longlong) count;
  }
  table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
  return table->file->records;
 }
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@ -239,6 +239,7 @@ private:
 public:
  Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
  void fix_length_and_dec();
  virtual void calculate_val_and_count();
  enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
  const char *func_name() const { return "avg_distinct"; }
@ -280,68 +281,44 @@ class TMP_TABLE_PARAM;
 class Item_sum_count_distinct :public Item_sum_int
 {
  TABLE *table;
  table_map used_table_cache;
  uint32 *field_lengths;
  TMP_TABLE_PARAM *tmp_table_param;
  TREE tree_base;
  TREE *tree;
  /*
    Following is 0 normal object and pointer to original one for copy 
    (to correctly free resources)
  */
  Item_sum_count_distinct *original;
  uint key_length;
  CHARSET_INFO *key_charset;
  /*
    Calculated based on max_heap_table_size. If reached,
    walk the tree and dump it into MyISAM table
  */
  uint max_elements_in_tree;
  /*
    The first few bytes of record ( at least one)
    are just markers for deleted and NULLs. We want to skip them since
    they will just bloat the tree without providing any valuable info
  */
  int rec_offset;
  /*
    If there are no blobs, we can use a tree, which
    is faster than heap table. In that case, we still use the table
    to help get things set up, but we insert nothing in it
  */
-  bool use_tree;
+  Unique *tree;
  /*
    Following is 0 normal object and pointer to original one for copy 
    (to correctly free resources)
  */
  Item_sum_count_distinct *original;
  uint tree_key_length;
  bool always_null;		// Set to 1 if the result is always NULL
  int tree_to_myisam();
  friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
  friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2);
  friend int simple_raw_key_cmp(void* arg, byte* key1, byte* key2);
  friend int dump_leaf(byte* key, uint32 count __attribute__((unused)),
 		       Item_sum_count_distinct* item);
-  public:
+public:
  Item_sum_count_distinct(List<Item> &list)
-    :Item_sum_int(list), table(0), used_table_cache(~(table_map) 0),
+    :Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
-     tmp_table_param(0), tree(&tree_base), original(0), use_tree(0),
+     tree(0), original(0), always_null(FALSE)
     always_null(0)
  { quick_group= 0; }
  Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
    :Item_sum_int(thd, item), table(item->table),
     used_table_cache(item->used_table_cache),
     field_lengths(item->field_lengths),
     tmp_table_param(item->tmp_table_param),
-     tree(item->tree), original(item), key_length(item->key_length),
+     tree(item->tree), original(item), tree_key_length(item->tree_key_length),
     max_elements_in_tree(item->max_elements_in_tree),
     rec_offset(item->rec_offset), use_tree(item->use_tree),
     always_null(item->always_null)
  {}
  ~Item_sum_count_distinct();
  void cleanup();
  table_map used_tables() const { return used_table_cache; }
  enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
  void clear();
  bool add();
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@ -1831,6 +1831,7 @@ public:
  Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
 	 uint size_arg, ulong max_in_memory_size_arg);
  ~Unique();
  ulong elements_in_tree() { return tree.elements_in_tree; }
  inline bool unique_add(void *ptr)
  {
    DBUG_ENTER("unique_add");