5.5 merge

2014-03-26 22:25:38 +01:00 · 2014-03-26 22:25:38 +01:00 · 10740939eb
commit 10740939eb
parent a91c59c2af 44002a34e6
600 changed files with 129062 additions and 121039 deletions
--- a/client/mysqltest.cc
+++ b/client/mysqltest.cc
@ -7943,6 +7943,7 @@ void handle_error(struct st_command *command,

  DBUG_ENTER("handle_error");

+  command->used_replace= 1;
  if (command->require_file)
  {
    /*
--- a/dbug/dbug.c
+++ b/dbug/dbug.c
@ -1642,6 +1642,7 @@ void _db_end_()

  cs->stack= &init_settings;
  FreeState(cs, 0);
+  pthread_mutex_destroy(&THR_LOCK_dbug);
  init_done= 0;
 }

--- a/include/heap.h
+++ b/include/heap.h
@ -102,8 +102,8 @@ typedef struct st_heap_block
  HP_PTRS *root;                        /* Top-level block */ 
  struct st_level_info level_info[HP_MAX_LEVELS+1];
  uint levels;                          /* number of used levels */
-  uint records_in_block;		/* Records in one heap-block */
  uint recbuffer;			/* Length of one saved record */
+  ulong records_in_block;		/* Records in one heap-block */
  ulong last_allocated; /* number of records there is allocated space for */
 } HP_BLOCK;

@ -134,14 +134,15 @@ typedef struct st_heap_share
 {
  HP_BLOCK block;
  HP_KEYDEF  *keydef;
-  ulong min_records,max_records;	/* Params to open */
  ulonglong data_length,index_length,max_table_size;
+  ulonglong auto_increment;
+  ulong min_records,max_records;	/* Params to open */
+  ulong records;			/* records */
+  ulong blength;			/* records rounded up to 2^n */
+  ulong deleted;			/* Deleted records in database */
  uint key_stat_version;                /* version to indicate insert/delete */
  uint key_version;                     /* Updated on key change */
  uint file_version;                    /* Update on clear */
-  uint records;				/* records */
-  uint blength;				/* records rounded up to 2^n */
-  uint deleted;				/* Deleted records in database */
  uint reclength;			/* Length of one record */
  uint changed;
  uint keys,max_key_length;
@ -157,7 +158,6 @@ typedef struct st_heap_share
  LIST open_list;
  uint auto_key;
  uint auto_key_type;			/* real type of the auto key segment */
-  ulonglong auto_increment;
 } HP_SHARE;

 struct st_hp_hash_info;
@ -188,12 +188,12 @@ typedef struct st_heap_info
 typedef struct st_heap_create_info
 {
  HP_KEYDEF *keydef;
-  ulong max_records;
-  ulong min_records;
  uint auto_key;                        /* keynr [1 - maxkey] for auto key */
  uint auto_key_type;
  uint keys;
  uint reclength;
+  ulong max_records;
+  ulong min_records;
  ulonglong max_table_size;
  ulonglong auto_increment;
  my_bool with_auto_increment;
--- a/libmysql/CMakeLists.txt
+++ b/libmysql/CMakeLists.txt
@ -306,7 +306,45 @@ IF(CMAKE_SYSTEM_NAME MATCHES "Linux")

    # DBD::mysql requires this
    is_prefix
-  )  
+  )
+
+ 
+  # And even more so on Debian
+  SET(CLIENT_API_5_5_EXTRA
+    # libmyodbc. Argh!
+    alloc_dynamic
+    alloc_root
+    delete_dynamic
+    dynstr_append
+    dynstr_append_mem
+    dynstr_append_os_quoted
+    dynstr_free
+    dynstr_realloc
+    free_root
+    get_dynamic
+    init_dynamic_array2
+    init_dynamic_string
+    int2str
+    list_add
+    list_delete
+    my_end
+    my_free
+    my_malloc
+    my_memdup
+    my_realloc
+    my_strdup
+    set_dynamic
+    strdup_root
+    strend
+    strfill
+    strmake
+    strmake_root
+    strxmov
+
+    # pam_mysql.so
+    make_scrambled_password
+    make_scrambled_password_323
+  )

  # Linker script to version symbols in Fedora- and Debian- compatible way, MDEV-5529
  SET(VERSION_SCRIPT_TEMPLATE ${CMAKE_CURRENT_SOURCE_DIR}/libmysql_versions.ld.in)
@ -324,7 +362,7 @@ IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
  ENDFOREACH()
      
  SET (CLIENT_API_5_5_LIST)
-  FOREACH (f ${CLIENT_API_FUNCTIONS_5_5})
+  FOREACH (f ${CLIENT_API_FUNCTIONS_5_5} ${CLIENT_API_5_5_EXTRA})
    SET(CLIENT_API_5_5_LIST "${CLIENT_API_5_5_LIST}\t${f};\n")
  ENDFOREACH()

@ -385,7 +423,9 @@ IF(UNIX)
 ENDIF()

 IF(NOT DISABLE_SHARED)
-  MERGE_LIBRARIES(libmysql SHARED ${LIBS} EXPORTS ${CLIENT_API_FUNCTIONS} ${CLIENT_API_5_1_EXTRA} COMPONENT SharedLibraries)
+  MERGE_LIBRARIES(libmysql SHARED ${LIBS}
+    EXPORTS ${CLIENT_API_FUNCTIONS} ${CLIENT_API_5_1_EXTRA} ${CLIENT_API_5_5_EXTRA}
+    COMPONENT SharedLibraries)
  IF(UNIX)
    # libtool compatability
    IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" OR APPLE)
--- a/mysql-test/include/have_unix_socket.inc
+++ b/mysql-test/include/have_unix_socket.inc
@ -0,0 +1,13 @@
+--source include/not_embedded.inc
+
+if (!$AUTH_SOCKET_SO) {
+  skip No unix_socket plugin;
+}
+
+if (!$USER) {
+  skip USER variable is undefined;
+}
+
+if (`SELECT count(*) <> 0 FROM mysql.user WHERE user = '$USER'`) {
+  skip \$USER=$USER which exists in mysql.user;
+}
--- a/mysql-test/r/ctype_binary.result
+++ b/mysql-test/r/ctype_binary.result
@ -740,7 +740,7 @@ create table t1 as select concat(uncompressed_length('')) as c1;
 show create table t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
-  `c1` varbinary(10) NOT NULL DEFAULT ''
+  `c1` varbinary(10) DEFAULT NULL
 ) ENGINE=MyISAM DEFAULT CHARSET=latin1
 drop table t1;
 create table t1 as select concat(connection_id()) as c1;
--- a/mysql-test/r/ctype_cp1251.result
+++ b/mysql-test/r/ctype_cp1251.result
@ -1132,7 +1132,7 @@ create table t1 as select concat(uncompressed_length('')) as c1;
 show create table t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
-  `c1` varchar(10) CHARACTER SET cp1251 NOT NULL DEFAULT ''
+  `c1` varchar(10) CHARACTER SET cp1251 DEFAULT NULL
 ) ENGINE=MyISAM DEFAULT CHARSET=latin1
 drop table t1;
 create table t1 as select concat(connection_id()) as c1;
--- a/mysql-test/r/ctype_latin1.result
+++ b/mysql-test/r/ctype_latin1.result
@ -1212,7 +1212,7 @@ create table t1 as select concat(uncompressed_length('')) as c1;
 show create table t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
-  `c1` varchar(10) NOT NULL DEFAULT ''
+  `c1` varchar(10) DEFAULT NULL
 ) ENGINE=MyISAM DEFAULT CHARSET=latin1
 drop table t1;
 create table t1 as select concat(connection_id()) as c1;
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@ -2212,7 +2212,7 @@ create table t1 as select concat(uncompressed_length('')) as c1;
 show create table t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
-  `c1` varchar(10) CHARACTER SET ucs2 NOT NULL DEFAULT ''
+  `c1` varchar(10) CHARACTER SET ucs2 DEFAULT NULL
 ) ENGINE=MyISAM DEFAULT CHARSET=latin1
 drop table t1;
 create table t1 as select concat(connection_id()) as c1;
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@ -3089,7 +3089,7 @@ create table t1 as select concat(uncompressed_length('')) as c1;
 show create table t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (
-  `c1` varchar(10) CHARACTER SET utf8 NOT NULL DEFAULT ''
+  `c1` varchar(10) CHARACTER SET utf8 DEFAULT NULL
 ) ENGINE=MyISAM DEFAULT CHARSET=latin1
 drop table t1;
 create table t1 as select concat(connection_id()) as c1;
--- a/mysql-test/r/derived_view.result
+++ b/mysql-test/r/derived_view.result
@ -2366,6 +2366,46 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 3	DEPENDENT SUBQUERY	pi	ref	gallery_id	gallery_id	4	test.gal.id	4	Using temporary; Using filesort
 drop table galleries, pictures;
 #
+# MDEV-5740: Assertion 
+#`!derived->first_select()->exclude_from_table_unique_test ||
+#derived->outer_select()-> exclude_from_table_unique_test'
+#failed on 2nd execution of PS with derived_merge
+#
+set @save_optimizer_switch5740=@@optimizer_switch;
+SET optimizer_switch = 'derived_merge=on';
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (b INT);
+INSERT INTO t2 VALUES (3),(4);
+PREPARE stmt FROM '
+  INSERT INTO t1 SELECT * FROM t2 UNION SELECT * FROM (SELECT * FROM t1) AS sq  
+';
+EXECUTE stmt;
+select * from t1;
+a
+1
+2
+3
+4
+1
+2
+EXECUTE stmt;
+select * from t1;
+a
+1
+2
+3
+4
+1
+2
+3
+4
+1
+2
+deallocate prepare stmt;
+drop table t1,t2;
+set optimizer_switch=@save_optimizer_switch5740;
+#
 # end of 5.3 tests
 #
 set optimizer_switch=@exit_optimizer_switch;
--- a/mysql-test/r/failed_auth_unixsocket.result
+++ b/mysql-test/r/failed_auth_unixsocket.result
@ -0,0 +1,12 @@
+update mysql.user set plugin='unix_socket';
+flush privileges;
+connect(localhost,USER,,test,MASTER_PORT,MASTER_SOCKET);
+ERROR HY000: Plugin 'unix_socket' is not loaded
+ERROR HY000: Plugin 'unix_socket' is not loaded
+install plugin unix_socket soname 'auth_socket.so';
+connect(localhost,USER,,test,MASTER_PORT,MASTER_SOCKET);
+ERROR 28000: Access denied for user 'USER'@'localhost'
+ERROR 28000: Access denied for user 'USER'@'localhost'
+update mysql.user set plugin='';
+flush privileges;
+uninstall plugin unix_socket;
--- a/mysql-test/r/func_compress.result
+++ b/mysql-test/r/func_compress.result
@ -127,3 +127,23 @@ NULL	825307441
 EXPLAIN EXTENDED SELECT * FROM (SELECT UNCOMPRESSED_LENGTH(c1) FROM t1) AS s;
 DROP TABLE t1;
 End of 5.0 tests
+#
+# Start of 5.3 tests
+#
+#
+# MDEV-5783 Assertion `0' failed in make_sortkey(SORTPARAM*, uchar*, uchar*) on ORDER BY HEX(UNCOMPRESSED_LENGTH(pk))
+#
+CREATE TABLE t1 (pk INT PRIMARY KEY);
+INSERT INTO t1 VALUES (1),(2);
+SELECT UNCOMPRESSED_LENGTH(pk) FROM t1;
+UNCOMPRESSED_LENGTH(pk)
+NULL
+NULL
+Warnings:
+Warning	1259	ZLIB: Input data corrupted
+Warning	1259	ZLIB: Input data corrupted
+SELECT * FROM t1 ORDER BY HEX(UNCOMPRESSED_LENGTH(pk));
+DROP TABLE t1;
+#
+# End of 5.3 tests
+#
--- a/mysql-test/r/func_misc.result
+++ b/mysql-test/r/func_misc.result
@ -272,6 +272,22 @@ SELECT NAME_CONST('a', -(1)) OR 1;
 NAME_CONST('a', -(1)) OR 1
 1
 #
+#MDEV-5446: Assertion `!table || (!table->read_set ||
+#bitmap_is_set(table->read_set, field_index))' fails on
+#EXPLAIN EXTENDED with VALUES function
+#
+CREATE TABLE t1 (a INT, b INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (1,10);
+CREATE VIEW v1 AS SELECT * FROM t1;
+EXPLAIN EXTENDED SELECT VALUES(b) FROM v1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	system	NULL	NULL	NULL	NULL	1	100.00	
+Warnings:
+Note	1003	select values(10) AS `VALUES(b)` from dual
+drop view v1;
+drop table t1;
+End of 5.3 tests
+# 
 # Bug #52165: Assertion failed: file .\dtoa.c, line 465
 # 
 CREATE TABLE t1 (a SET('a'), b INT);
--- a/mysql-test/r/locked_temporary-5955.result
+++ b/mysql-test/r/locked_temporary-5955.result
@ -0,0 +1,2 @@
+CREATE TEMPORARY TABLE tmp (i INT) ENGINE=InnoDB;
+LOCK TABLES tmp AS p WRITE;
--- a/mysql-test/r/partition.result
+++ b/mysql-test/r/partition.result
@ -2535,45 +2535,6 @@ i
 4
 DROP TABLE t1;
 #
-# MDEV-5177: ha_partition and innodb index intersection produce fewer rows (MySQL Bug#70703)
-#
-create table t1 (
-a int not null,
-b int not null,
-pk int not null,
-primary key (pk),
-key(a),
-key(b)
-) partition by hash(pk) partitions 10;
-insert into t1 values (1,2,4);
-insert into t1 values (1,0,17);
-insert into t1 values (1,2,25);
-insert into t1 values (10,20,122);
-insert into t1 values (10,20,123);
-create table t2 (a int);
-insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-insert into t1 select 1,2, 200 + A.a + 10*B.a + 100*C.a from t2 A, t2 B, t2 C;
-insert into t1 select 10+A.a + 10*B.a + 100*C.a + 1000*D.a,
-10+A.a + 10*B.a + 100*C.a  + 1000*D.a, 
-2000 + A.a + 10*B.a + 100*C.a + 1000*D.a
-from t2 A, t2 B, t2 C ,t2 D;
-explain select * from t1 where a=1 and b=2 and  pk between 1 and 999999 ;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	ref	PRIMARY,a,b	b	4	const	982	Using where
-create temporary table t3 as
-select * from t1 where a=1 and b=2 and  pk between 1 and 999 ;
-select count(*) from t3;
-count(*)
-802
-drop table t3;
-create temporary table t3 as
-select * from t1 ignore index(a,b)  where a=1 and b=2 and  pk between 1 and 999 ;
-select count(*) from t3;
-count(*)
-802
-drop table t3;
-drop table t1,t2;
-#
 # MDEV-5555: Incorrect index_merge on BTREE indices
 #
 CREATE TABLE t1 (
--- a/mysql-test/r/partition_innodb.result
+++ b/mysql-test/r/partition_innodb.result
@ -654,4 +654,43 @@ col1	col2	col3
 1	2	2013-03-11 16:33:04
 1	2	2013-03-11 16:33:24
 DROP TABLE t1;
+#
+# MDEV-5177: ha_partition and innodb index intersection produce fewer rows (MySQL Bug#70703)
+#
+create table t1 (
+a int not null,
+b int not null,
+pk int not null,
+primary key (pk),
+key(a),
+key(b)
+) engine=innodb partition by hash(pk) partitions 10;
+insert into t1 values (1,2,4);
+insert into t1 values (1,0,17);
+insert into t1 values (1,2,25);
+insert into t1 values (10,20,122);
+insert into t1 values (10,20,123);
+create table t2 (a int);
+insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+insert into t1 select 1,2, 200 + A.a + 10*B.a + 100*C.a from t2 A, t2 B, t2 C;
+insert into t1 select 10+A.a + 10*B.a + 100*C.a + 1000*D.a,
+10+A.a + 10*B.a + 100*C.a  + 1000*D.a, 
+2000 + A.a + 10*B.a + 100*C.a + 1000*D.a
+from t2 A, t2 B, t2 C ,t2 D;
+explain select * from t1 where a=1 and b=2 and  pk between 1 and 999999 ;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index_merge	PRIMARY,a,b	b,a	4,4	NULL	#	Using intersect(b,a); Using where; Using index
+create temporary table t3 as
+select * from t1 where a=1 and b=2 and  pk between 1 and 999 ;
+select count(*) from t3;
+count(*)
+802
+drop table t3;
+create temporary table t3 as
+select * from t1 ignore index(a,b)  where a=1 and b=2 and  pk between 1 and 999 ;
+select count(*) from t3;
+count(*)
+802
+drop table t3;
+drop table t1,t2;
 set global default_storage_engine=default;
--- a/mysql-test/r/partition_order.result
+++ b/mysql-test/r/partition_order.result
@ -734,8 +734,8 @@ a	b
 7	1
 35	2
 3	3
-2	4
 30	4
+2	4
 4	5
 6	6
 select * from t1 force index (b) where b < 10 ORDER BY b;
@ -744,16 +744,16 @@ a	b
 7	1
 35	2
 3	3
-2	4
 30	4
+2	4
 4	5
 6	6
 select * from t1 force index (b) where b < 10 ORDER BY b DESC;
 a	b
 6	6
 4	5
-30	4
 2	4
+30	4
 3	3
 35	2
 7	1
--- a/mysql-test/r/subselect4.result
+++ b/mysql-test/r/subselect4.result
@ -2331,6 +2331,28 @@ id	a2	a3	id	a2	a3
 DROP VIEW v2;
 DROP TABLE t1,t2;
 #
+# MDEV-5686: degenerate disjunct in NOT IN subquery
+#
+CREATE TABLE t1 (a int, b int, c varchar(3)) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (1,1,'CAN'),(2,2,'AUS');
+CREATE TABLE t2 (f int) ENGINE=MyISAM;
+INSERT INTO t2 VALUES (3);
+EXPLAIN EXTENDED
+SELECT * FROM t2 
+WHERE f NOT IN (SELECT b FROM t1
+WHERE 0 OR (c IN ('USA') OR c NOT IN ('USA')) AND a = b);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	PRIMARY	t2	system	NULL	NULL	NULL	NULL	1	100.00	
+2	DEPENDENT SUBQUERY	t1	ALL	NULL	NULL	NULL	NULL	2	100.00	Using where
+Warnings:
+Note	1003	select 3 AS `f` from dual where (not(<expr_cache><3>(<in_optimizer>(3,<exists>(select `test`.`t1`.`b` from `test`.`t1` where (((`test`.`t1`.`c` = 'USA') or (`test`.`t1`.`c` <> 'USA')) and trigcond(((<cache>(3) = `test`.`t1`.`b`) or isnull(`test`.`t1`.`b`))) and (`test`.`t1`.`b` = `test`.`t1`.`a`)) having trigcond(<is_not_null_test>(`test`.`t1`.`b`)))))))
+SELECT * FROM t2 
+WHERE f NOT IN (SELECT b FROM t1
+WHERE 0 OR (c IN ('USA') OR c NOT IN ('USA')) AND a = b);
+f
+3
+DROP TABLE t1,t2;
+#
 # MDEV-3899  Valgrind warnings (blocks are definitely lost) in filesort on IN subquery with SUM and DISTINCT
 #
 CREATE TABLE t1 (a INT) ENGINE=MyISAM;
--- a/mysql-test/r/subselect_mat.result
+++ b/mysql-test/r/subselect_mat.result
@ -2055,6 +2055,20 @@ EXECUTE stmt;
 a
 DROP TABLE t1, t2;
 DROP VIEW v2;
+#
+# MDEV-5811: Server crashes in best_access_path with materialization+semijoin and big_tables=ON
+#
+SET @tmp_mdev5811= @@big_tables;
+SET big_tables = ON;
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (b INT);
+INSERT INTO t2 VALUES (3),(4);
+SELECT * FROM t1 AS t1_1, t1 AS t1_2 
+WHERE ( t1_1.a, t1_2.a ) IN ( SELECT MAX(b), MIN(b) FROM t2 );
+a	a
+DROP TABLE t1,t2;
+SET big_tables=@tmp_mdev5811;
 # End of 5.3 tests
 #
 # MDEV-5056: Wrong result (extra rows) with materialization+semijoin, IN subqueries
--- a/mysql-test/r/subselect_sj_mat.result
+++ b/mysql-test/r/subselect_sj_mat.result
@ -2095,6 +2095,20 @@ EXECUTE stmt;
 a
 DROP TABLE t1, t2;
 DROP VIEW v2;
+#
+# MDEV-5811: Server crashes in best_access_path with materialization+semijoin and big_tables=ON
+#
+SET @tmp_mdev5811= @@big_tables;
+SET big_tables = ON;
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (b INT);
+INSERT INTO t2 VALUES (3),(4);
+SELECT * FROM t1 AS t1_1, t1 AS t1_2 
+WHERE ( t1_1.a, t1_2.a ) IN ( SELECT MAX(b), MIN(b) FROM t2 );
+a	a
+DROP TABLE t1,t2;
+SET big_tables=@tmp_mdev5811;
 # End of 5.3 tests
 #
 # MDEV-5056: Wrong result (extra rows) with materialization+semijoin, IN subqueries
--- a/mysql-test/r/view.result
+++ b/mysql-test/r/view.result
@ -5000,6 +5000,22 @@ v1_field1
 deallocate prepare my_stmt;
 DROP VIEW v1,v2;
 DROP TABLE t1,t2,t3,t4;
+#
+#MDEV-5717: Server crash with insert statement containing DEFAULT into
+#view
+#
+CREATE TABLE t1 (
+`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
+`test` tinyint(3) unsigned NOT NULL DEFAULT '0',
+PRIMARY KEY (`id`)
+);
+CREATE VIEW v1 AS (select  t1.id AS id,  t1.test AS test from t1);
+INSERT INTO v1 SET test = DEFAULT;
+select * from v1;
+id	test
+1	0
+drop view v1;
+drop table t1;
 # -----------------------------------------------------------------
 # -- End of 5.3 tests.
 # -----------------------------------------------------------------
--- a/mysql-test/suite/heap/heap.result
+++ b/mysql-test/suite/heap/heap.result
@ -758,6 +758,14 @@ SELECT * from t1;
 id	color	ts
 7	GREEN	2
 DROP TABLE t1;
+CREATE TABLE t1 (id INT);
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+SET @@max_heap_table_size = 1024*1024*1024*20;
+CREATE TEMPORARY TABLE tmp ENGINE=MEMORY
+SELECT id FROM t1;
+DROP TEMPORARY TABLE tmp;
+drop table t1;
 #
 # BUG#11825482: Broken key length calculation for btree index
 #
--- a/mysql-test/suite/heap/heap.test
+++ b/mysql-test/suite/heap/heap.test
@ -510,6 +510,22 @@ DELETE FROM t1 WHERE ts = 1 AND color = 'GREEN';
 SELECT * from t1;
 DROP TABLE t1;

+
+#
+# MDEV-5905 Creating tmp. memory table kills the server
+#
+
+CREATE TABLE t1 (id INT);
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (2);
+
+SET @@max_heap_table_size = 1024*1024*1024*20;
+
+CREATE TEMPORARY TABLE tmp ENGINE=MEMORY
+  SELECT id FROM t1;
+DROP TEMPORARY TABLE tmp;
+drop table t1;
+
 --echo #
 --echo # BUG#11825482: Broken key length calculation for btree index
 --echo #
--- a/mysql-test/suite/plugins/t/server_audit.test
+++ b/mysql-test/suite/plugins/t/server_audit.test
@ -14,6 +14,7 @@ set global server_audit_logging=on;
 connect (con1,localhost,root,,mysql);
 connection default;
 disconnect con1;
+--sleep 2
 --replace_result $MASTER_MYSOCK MASTER_SOCKET $MASTER_MYPORT MASTER_PORT
 --error ER_ACCESS_DENIED_ERROR
 connect (con1,localhost,no_such_user,,mysql);
--- a/mysql-test/suite/plugins/t/unix_socket.test
+++ b/mysql-test/suite/plugins/t/unix_socket.test
@ -1,20 +1,4 @@
--source include/not_embedded.inc
-# If we run this as root, $USER gets authenticated as the `root' user, and we
-# get .result differences from CURRENT_USER().
--source include/not_as_root.inc
-
-# The previous check verifies that the user does not have root permissions. 
-# However in some cases tests are run under a user named 'root',
-# even although this user does not have real root permissions. 
-# This test should be skipped in this case, since it does not expect
-# that there are records in mysql.user where user=<username>
-if ($USER=="root") {
-  skip Cannot be run by user named 'root' even if it does not have all privileges;
-}
-
-if (!$AUTH_SOCKET_SO) {
-  skip No auth_socket plugin;
-}
+--source include/have_unix_socket.inc

 if (!$USER) {
  skip USER variable is undefined;
--- a/mysql-test/suite/rpl/r/rpl_000011.result
+++ b/mysql-test/suite/rpl/r/rpl_000011.result
@ -2,7 +2,13 @@ include/master-slave.inc
 [connection master]
 create table t1 (n int);
 insert into t1 values(1);
+show global status like 'com_insert';
+Variable_name	Value
+Com_insert	1
 stop slave;
+show global status like 'com_insert';
+Variable_name	Value
+Com_insert	1
 include/wait_for_slave_to_stop.inc
 start slave;
 include/wait_for_slave_to_start.inc
--- a/mysql-test/suite/rpl/t/rpl_000011-slave.opt
+++ b/mysql-test/suite/rpl/t/rpl_000011-slave.opt
@ -0,0 +1 @@
+--verbose=1
--- a/mysql-test/suite/rpl/t/rpl_000011.test
+++ b/mysql-test/suite/rpl/t/rpl_000011.test
@ -1,9 +1,17 @@
+#
+# Test very simply slave replication (to ensure it works at all)
+# In addition, test also:
+# MDEV-5829 STOP SLAVE resets global status variables
+#
+
 source include/master-slave.inc;

 create table t1 (n int);
 insert into t1 values(1);
 sync_slave_with_master; 
+show global status like 'com_insert';
 stop slave;
+show global status like 'com_insert';
 --source include/wait_for_slave_to_stop.inc
 start slave;
 --source include/wait_for_slave_to_start.inc
--- a/mysql-test/suite/rpl/t/rpl_bug37426.test
+++ b/mysql-test/suite/rpl/t/rpl_bug37426.test
@ -3,8 +3,8 @@
 #   RBR breaks for CHAR() UTF8 fields > 85 chars
 #############################################################

-source include/master-slave.inc;
 source include/have_binlog_format_row.inc;
+source include/master-slave.inc;

 connection master;
 CREATE TABLE char128_utf8 (i1 INT NOT NULL, c CHAR(128) CHARACTER SET utf8 NOT NULL, i2 INT NOT NULL);
--- a/mysql-test/suite/rpl/t/rpl_connection.test
+++ b/mysql-test/suite/rpl/t/rpl_connection.test
@ -1,6 +1,6 @@
 --source include/not_embedded.inc
--source include/master-slave.inc
 --source include/have_binlog_format_mixed.inc
+--source include/master-slave.inc

 #
 # BUG#13427949: CHANGE MASTER TO USER='' (EMPTY USER) CAUSES ERRORS ON VALGRING 
--- a/mysql-test/suite/rpl/t/rpl_known_bugs_detection.test
+++ b/mysql-test/suite/rpl/t/rpl_known_bugs_detection.test
@ -8,13 +8,14 @@ call mtr.add_suppression("Unsafe statement written to the binary log using state
 source include/have_debug.inc;
 # because of pretend_version_50034_in_binlog the test can't run with checksum
 source include/have_binlog_checksum_off.inc;
-source include/master-slave.inc;
-
-call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.");

 # Currently only statement-based-specific bugs are here
 -- source include/have_binlog_format_statement.inc

+source include/master-slave.inc;
+
+call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.");
+
 #
 # This is to test that slave properly detects if
 # master may suffer from:
--- a/mysql-test/suite/rpl/t/rpl_mix_found_rows.test
+++ b/mysql-test/suite/rpl/t/rpl_mix_found_rows.test
@ -1,5 +1,5 @@
-source include/master-slave.inc;
 source include/have_binlog_format_mixed.inc;
+source include/master-slave.inc;

 # It is not possible to replicate FOUND_ROWS() using statement-based
 # replication, but there is a workaround that stores the result of
--- a/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test
+++ b/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test
@ -1,9 +1,9 @@
 #
 # Bug#11760927: 53375: RBR + NO PK => HIGH LOAD ON SLAVE (TABLE SCAN/CPU) => SLAVE FAILURE
 #
--source include/master-slave.inc
 --source include/have_binlog_format_row.inc
 --source include/have_debug.inc
+--source include/master-slave.inc

 # SETUP
 # - setup log_warnings and debug 
--- a/mysql-test/suite/rpl/t/rpl_row_unsafe_funcs.test
+++ b/mysql-test/suite/rpl/t/rpl_row_unsafe_funcs.test
@ -1,5 +1,5 @@
-source include/master-slave.inc;
 source include/have_binlog_format_mixed.inc;
+source include/master-slave.inc;

 #
 # Bug #30244: row_count/found_rows does not replicate well
--- a/mysql-test/suite/rpl/t/rpl_row_utf32.test
+++ b/mysql-test/suite/rpl/t/rpl_row_utf32.test
@ -1,6 +1,6 @@
-- source include/master-slave.inc
 -- source include/have_binlog_format_row.inc
 -- source include/have_utf32.inc
+-- source include/master-slave.inc

 #
 # BUG#51787 Assertion `(n % 4) == 0' on slave upon INSERT into a table with UTF32
--- a/mysql-test/suite/rpl/t/rpl_stm_sql_mode.test
+++ b/mysql-test/suite/rpl/t/rpl_stm_sql_mode.test
@ -1,5 +1,5 @@
-- source include/master-slave.inc
 -- source include/have_binlog_format_statement.inc
+-- source include/master-slave.inc

 #
 # Bug #51055    Replication failure on duplicate key + traditional SQL mode
--- a/mysql-test/suite/rpl/t/rpl_stm_stop_middle_group.test
+++ b/mysql-test/suite/rpl/t/rpl_stm_stop_middle_group.test
@ -2,9 +2,9 @@
 # Please, check extra/rpl_tests/rpl_stop_middle_group.test.
 ###################################################################################
 -- source include/have_debug.inc
-- source include/master-slave.inc
 -- source include/have_innodb.inc
 -- source include/have_binlog_format_statement.inc
+-- source include/master-slave.inc

 SET @@session.binlog_direct_non_transactional_updates= FALSE;
 -- source extra/rpl_tests/rpl_stop_middle_group.test
--- a/mysql-test/suite/rpl/t/rpl_tmp_table_and_DDL.test
+++ b/mysql-test/suite/rpl/t/rpl_tmp_table_and_DDL.test
@ -5,8 +5,8 @@
 # does not exist' base on myisam engine.
 #

-source include/master-slave.inc;
 source include/have_binlog_format_row.inc;
+source include/master-slave.inc;

 LET $ENGINE_TYPE= MyISAM;
 source extra/rpl_tests/rpl_tmp_table_and_DDL.test;
--- a/mysql-test/t/derived_view.test
+++ b/mysql-test/t/derived_view.test
@ -1703,6 +1703,33 @@ ORDER BY gallery_name ASC

 drop table galleries, pictures;

+--echo #
+--echo # MDEV-5740: Assertion 
+--echo #`!derived->first_select()->exclude_from_table_unique_test ||
+--echo #derived->outer_select()-> exclude_from_table_unique_test'
+--echo #failed on 2nd execution of PS with derived_merge
+--echo #
+
+set @save_optimizer_switch5740=@@optimizer_switch;
+SET optimizer_switch = 'derived_merge=on';
+
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (b INT);
+INSERT INTO t2 VALUES (3),(4);
+
+PREPARE stmt FROM '
+  INSERT INTO t1 SELECT * FROM t2 UNION SELECT * FROM (SELECT * FROM t1) AS sq  
+';
+EXECUTE stmt;
+select * from t1;
+EXECUTE stmt;
+select * from t1;
+deallocate prepare stmt;
+
+drop table t1,t2;
+set optimizer_switch=@save_optimizer_switch5740;
+
 --echo #
 --echo # end of 5.3 tests
 --echo #
--- a/mysql-test/t/failed_auth_unixsocket.test
+++ b/mysql-test/t/failed_auth_unixsocket.test
@ -0,0 +1,30 @@
+--source include/have_unix_socket.inc
+
+#
+# MDEV-3909 remote user enumeration
+# unix_socket tests
+#
+update mysql.user set plugin='unix_socket';
+flush privileges;
+
+--replace_result $MASTER_MYSOCK MASTER_SOCKET $MASTER_MYPORT MASTER_PORT $USER USER
+--error ER_PLUGIN_IS_NOT_LOADED
+connect (fail,localhost,$USER);
+
+--error ER_PLUGIN_IS_NOT_LOADED
+change_user $USER;
+
+eval install plugin unix_socket soname '$AUTH_SOCKET_SO';
+
+--replace_result $MASTER_MYSOCK MASTER_SOCKET $MASTER_MYPORT MASTER_PORT $USER USER
+--error ER_ACCESS_DENIED_NO_PASSWORD_ERROR
+connect (fail,localhost,$USER);
+
+--replace_result $USER USER
+--error ER_ACCESS_DENIED_NO_PASSWORD_ERROR
+change_user $USER;
+
+update mysql.user set plugin='';
+flush privileges;
+uninstall plugin unix_socket;
+
--- a/mysql-test/t/func_compress.test
+++ b/mysql-test/t/func_compress.test
@ -115,3 +115,24 @@ DROP TABLE t1;
 set @@global.max_allowed_packet=default;
 --enable_result_log
 --enable_query_log
+
+
+--echo #
+--echo # Start of 5.3 tests
+--echo #
+
+--echo #
+--echo # MDEV-5783 Assertion `0' failed in make_sortkey(SORTPARAM*, uchar*, uchar*) on ORDER BY HEX(UNCOMPRESSED_LENGTH(pk))
+--echo #
+CREATE TABLE t1 (pk INT PRIMARY KEY);
+INSERT INTO t1 VALUES (1),(2);
+SELECT UNCOMPRESSED_LENGTH(pk) FROM t1;
+# ORDER is not strict, so disable results
+--disable_result_log
+SELECT * FROM t1 ORDER BY HEX(UNCOMPRESSED_LENGTH(pk));
+--enable_result_log
+DROP TABLE t1;
+
+--echo #
+--echo # End of 5.3 tests
+--echo #
--- a/mysql-test/t/func_misc.test
+++ b/mysql-test/t/func_misc.test
@ -308,6 +308,22 @@ SELECT NAME_CONST('a', -(1 AND 2)) AND 1;
 SELECT NAME_CONST('a', -(1)) OR 1;

 --echo #
+--echo #MDEV-5446: Assertion `!table || (!table->read_set ||
+--echo #bitmap_is_set(table->read_set, field_index))' fails on
+--echo #EXPLAIN EXTENDED with VALUES function
+--echo #
+CREATE TABLE t1 (a INT, b INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (1,10);
+CREATE VIEW v1 AS SELECT * FROM t1;
+
+EXPLAIN EXTENDED SELECT VALUES(b) FROM v1;
+
+drop view v1;
+drop table t1;
+
+--echo End of 5.3 tests
+
+--echo # 
 --echo # Bug #52165: Assertion failed: file .\dtoa.c, line 465
 --echo # 

@ -556,4 +572,3 @@ select release_lock(repeat('a', 193));
 --echo #
 --echo # End of 5.5 tests
 --echo #
-
--- a/mysql-test/t/locked_temporary-5955.test
+++ b/mysql-test/t/locked_temporary-5955.test
@ -0,0 +1,10 @@
+#
+# MDEV-5955 Server crashes in handler::ha_external_lock or assertion `m_lock_type == 2' fails in handler::ha_close on disconnect with a locked temporary table
+#
+
+--source include/have_innodb.inc
+--connect (con1,localhost,root,,)
+CREATE TEMPORARY TABLE tmp (i INT) ENGINE=InnoDB;
+LOCK TABLES tmp AS p WRITE;
+--disconnect con1
+
--- a/mysql-test/t/partition.test
+++ b/mysql-test/t/partition.test
@ -2530,52 +2530,6 @@ ALTER TABLE t1 ADD PARTITION PARTITIONS 2;
 SELECT * from t1 order by i;
 DROP TABLE t1;

--echo #
--echo # MDEV-5177: ha_partition and innodb index intersection produce fewer rows (MySQL Bug#70703)
--echo #
-create table t1 (
-  a int not null,
-  b int not null,
-  pk int not null,
-  primary key (pk),
-  key(a),
-  key(b)
-) partition by hash(pk) partitions 10;
-
-insert into t1 values (1,2,4); # both
-insert into t1 values (1,0,17);  # left
-insert into t1 values (1,2,25);   # both
-
-insert into t1 values (10,20,122); 
-insert into t1 values (10,20,123);
-
-# Now, fill in some data so that the optimizer choses index_merge
-create table t2 (a int);
-insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-
-insert into t1 select 1,2, 200 + A.a + 10*B.a + 100*C.a from t2 A, t2 B, t2 C;
-
-insert into t1 select 10+A.a + 10*B.a + 100*C.a + 1000*D.a,
-                       10+A.a + 10*B.a + 100*C.a  + 1000*D.a, 
-                       2000 + A.a + 10*B.a + 100*C.a + 1000*D.a
-                       from t2 A, t2 B, t2 C ,t2 D;
-
-# This should show index_merge, using intersect
-explain select * from t1 where a=1 and b=2 and  pk between 1 and 999999 ;
-# 794 rows in output
-create temporary table t3 as
-select * from t1 where a=1 and b=2 and  pk between 1 and 999 ;
-select count(*) from t3;
-drop table t3;
-
-# 802 rows in output
-create temporary table t3 as
-select * from t1 ignore index(a,b)  where a=1 and b=2 and  pk between 1 and 999 ;
-select count(*) from t3;
-drop table t3;
-
-drop table t1,t2;
-
 --echo #
 --echo # MDEV-5555: Incorrect index_merge on BTREE indices
 --echo #
--- a/mysql-test/t/partition_innodb.test
+++ b/mysql-test/t/partition_innodb.test
@ -729,5 +729,52 @@ GROUP BY 1, 2, 3;

 DROP TABLE t1;

+--echo #
+--echo # MDEV-5177: ha_partition and innodb index intersection produce fewer rows (MySQL Bug#70703)
+--echo #
+create table t1 (
+  a int not null,
+  b int not null,
+  pk int not null,
+  primary key (pk),
+  key(a),
+  key(b)
+) engine=innodb partition by hash(pk) partitions 10;
+
+insert into t1 values (1,2,4); # both
+insert into t1 values (1,0,17);  # left
+insert into t1 values (1,2,25);   # both
+
+insert into t1 values (10,20,122); 
+insert into t1 values (10,20,123);
+
+# Now, fill in some data so that the optimizer choses index_merge
+create table t2 (a int);
+insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+insert into t1 select 1,2, 200 + A.a + 10*B.a + 100*C.a from t2 A, t2 B, t2 C;
+
+insert into t1 select 10+A.a + 10*B.a + 100*C.a + 1000*D.a,
+                       10+A.a + 10*B.a + 100*C.a  + 1000*D.a, 
+                       2000 + A.a + 10*B.a + 100*C.a + 1000*D.a
+                       from t2 A, t2 B, t2 C ,t2 D;
+
+# This should show index_merge, using intersect
+--replace_column 9 #
+explain select * from t1 where a=1 and b=2 and  pk between 1 and 999999 ;
+# 794 rows in output
+create temporary table t3 as
+select * from t1 where a=1 and b=2 and  pk between 1 and 999 ;
+select count(*) from t3;
+drop table t3;
+
+# 802 rows in output
+create temporary table t3 as
+select * from t1 ignore index(a,b)  where a=1 and b=2 and  pk between 1 and 999 ;
+select count(*) from t3;
+drop table t3;
+
+drop table t1,t2;
+

 set global default_storage_engine=default;
--- a/mysql-test/t/subselect4.test
+++ b/mysql-test/t/subselect4.test
@ -1885,6 +1885,27 @@ ORDER BY v2.id;
 DROP VIEW v2;
 DROP TABLE t1,t2;

+--echo #
+--echo # MDEV-5686: degenerate disjunct in NOT IN subquery
+--echo #
+
+CREATE TABLE t1 (a int, b int, c varchar(3)) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (1,1,'CAN'),(2,2,'AUS');
+
+CREATE TABLE t2 (f int) ENGINE=MyISAM;
+INSERT INTO t2 VALUES (3);
+
+EXPLAIN EXTENDED
+SELECT * FROM t2 
+  WHERE f NOT IN (SELECT b FROM t1
+                    WHERE 0 OR (c IN ('USA') OR c NOT IN ('USA')) AND a = b);
+
+SELECT * FROM t2 
+  WHERE f NOT IN (SELECT b FROM t1
+                    WHERE 0 OR (c IN ('USA') OR c NOT IN ('USA')) AND a = b);
+
+DROP TABLE t1,t2;
+
 --echo #
 --echo # MDEV-3899  Valgrind warnings (blocks are definitely lost) in filesort on IN subquery with SUM and DISTINCT
 --echo #
--- a/mysql-test/t/subselect_sj_mat.test
+++ b/mysql-test/t/subselect_sj_mat.test
@ -1745,6 +1745,24 @@ EXECUTE stmt;
 DROP TABLE t1, t2;
 DROP VIEW v2;

+--echo #
+--echo # MDEV-5811: Server crashes in best_access_path with materialization+semijoin and big_tables=ON
+--echo #
+SET @tmp_mdev5811= @@big_tables;
+SET big_tables = ON;
+
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2);
+
+CREATE TABLE t2 (b INT);
+INSERT INTO t2 VALUES (3),(4);
+
+SELECT * FROM t1 AS t1_1, t1 AS t1_2 
+  WHERE ( t1_1.a, t1_2.a ) IN ( SELECT MAX(b), MIN(b) FROM t2 );
+
+DROP TABLE t1,t2;
+SET big_tables=@tmp_mdev5811;
+
 --echo # End of 5.3 tests


--- a/mysql-test/t/view.test
+++ b/mysql-test/t/view.test
@ -4918,6 +4918,25 @@ deallocate prepare my_stmt;
 DROP VIEW v1,v2;
 DROP TABLE t1,t2,t3,t4;

+--echo #
+--echo #MDEV-5717: Server crash with insert statement containing DEFAULT into
+--echo #view
+--echo #
+CREATE TABLE t1 (
+  `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
+  `test` tinyint(3) unsigned NOT NULL DEFAULT '0',
+  PRIMARY KEY (`id`)
+);
+
+CREATE VIEW v1 AS (select  t1.id AS id,  t1.test AS test from t1);
+
+INSERT INTO v1 SET test = DEFAULT;
+
+select * from v1;
+
+drop view v1;
+drop table t1;
+
 --echo # -----------------------------------------------------------------
 --echo # -- End of 5.3 tests.
 --echo # -----------------------------------------------------------------
--- a/plugin/server_audit/server_audit.c
+++ b/plugin/server_audit/server_audit.c
@ -84,6 +84,9 @@ static void closelog() {}
 #include <typelib.h>
 #include <mysql/plugin.h>
 #include <mysql/plugin_audit.h>
+#ifndef RTLD_DEFAULT
+#define RTLD_DEFAULT NULL
+#endif

 #undef my_init_dynamic_array_ci
 #define init_dynamic_array2 loc_init_dynamic_array2
@ -110,6 +113,20 @@ static void closelog() {}
 #define pop_dynamic loc_pop_dynamic
 #define delete_dynamic loc_delete_dynamic
 void *loc_alloc_dynamic(DYNAMIC_ARRAY *array);
+#ifdef my_strnncoll
+#undef my_strnncoll
+#define my_strnncoll(s, a, b, c, d) (my_strnncoll_binary((s), (a), (b), (c), (d), 0))
+#endif
+
+static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)),
+    const uchar *s, size_t slen,
+    const uchar *t, size_t tlen,
+    my_bool t_is_prefix)
+{
+  size_t len= slen < tlen ? slen : tlen;
+  int cmp= memcmp(s,t,len);
+  return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
+}

 #include "../../mysys/array.c"
 #include "../../mysys/hash.c"
@ -172,6 +189,10 @@ static char default_file_name[DEFAULT_FILENAME_LEN+1]= "server_audit.log";

 static void update_file_path(MYSQL_THD thd, struct st_mysql_sys_var *var,
                             void *var_ptr, const void *save);
+static void update_file_rotate_size(MYSQL_THD thd, struct st_mysql_sys_var *var,
+                                    void *var_ptr, const void *save);
+static void update_file_rotations(MYSQL_THD thd, struct st_mysql_sys_var *var,
+                                  void *var_ptr, const void *save);
 static void update_incl_users(MYSQL_THD thd, struct st_mysql_sys_var *var,
                              void *var_ptr, const void *save);
 static void update_excl_users(MYSQL_THD thd, struct st_mysql_sys_var *var,
@ -230,11 +251,11 @@ static MYSQL_SYSVAR_STR(file_path, file_path, PLUGIN_VAR_RQCMDARG,
       "Path to the log file.", NULL, update_file_path, default_file_name);
 static MYSQL_SYSVAR_ULONGLONG(file_rotate_size, file_rotate_size,
       PLUGIN_VAR_RQCMDARG, "Maximum size of the log to start the rotation.",
-       NULL, NULL,
+       NULL, update_file_rotate_size,
       1000000, 100, ((long long) 0x7FFFFFFFFFFFFFFFLL), 1);
 static MYSQL_SYSVAR_UINT(file_rotations, rotations,
       PLUGIN_VAR_RQCMDARG, "Number of rotations before log is removed.",
-       NULL, NULL, 9, 0, 999, 1);
+       NULL, update_file_rotations, 9, 0, 999, 1);
 static MYSQL_SYSVAR_BOOL(file_rotate_now, rotate, PLUGIN_VAR_OPCMDARG,
       "Force log rotation now.", NULL, rotate_log, FALSE);
 static MYSQL_SYSVAR_BOOL(logging, logging,
@ -253,7 +274,13 @@ static const char *syslog_facility_names[]=
 {
  "LOG_USER", "LOG_MAIL", "LOG_DAEMON", "LOG_AUTH",
  "LOG_SYSLOG", "LOG_LPR", "LOG_NEWS", "LOG_UUCP",
-  "LOG_CRON", "LOG_AUTHPRIV", "LOG_FTP",
+  "LOG_CRON",
+#ifdef LOG_AUTHPRIV
+ "LOG_AUTHPRIV",
+#endif
+#ifdef LOG_FTP
+ "LOG_FTP",
+#endif
  "LOG_LOCAL0", "LOG_LOCAL1", "LOG_LOCAL2", "LOG_LOCAL3",
  "LOG_LOCAL4", "LOG_LOCAL5", "LOG_LOCAL6", "LOG_LOCAL7",
  0
@ -262,7 +289,13 @@ static unsigned int syslog_facility_codes[]=
 {
  LOG_USER, LOG_MAIL, LOG_DAEMON, LOG_AUTH,
  LOG_SYSLOG, LOG_LPR, LOG_NEWS, LOG_UUCP,
-  LOG_CRON, LOG_AUTHPRIV, LOG_FTP,
+  LOG_CRON,
+#ifdef LOG_AUTHPRIV
+ LOG_AUTHPRIV,
+#endif
+#ifdef LOG_FTP
+  LOG_FTP,
+#endif
  LOG_LOCAL0, LOG_LOCAL1, LOG_LOCAL2, LOG_LOCAL3,
  LOG_LOCAL4, LOG_LOCAL5, LOG_LOCAL6, LOG_LOCAL7,
 };
@ -1332,6 +1365,7 @@ exit_func:
    switch (after_action) {
    case AA_FREE_CONNECTION:
      my_hash_delete(&connection_hash, (uchar *) cn);
+      cn= 0;
      break;
    case AA_CHANGE_USER:
    {
@ -1434,11 +1468,11 @@ static int server_audit_init(void *p __attribute__((unused)))
  serv_ver= server_version;
 #endif /*_WIN32*/

-  my_hash_init_ptr= dlsym(NULL, "_my_hash_init");
+  my_hash_init_ptr= dlsym(RTLD_DEFAULT, "_my_hash_init");
  if (!my_hash_init_ptr)
  {
    maria_above_5= 1;
-    my_hash_init_ptr= dlsym(NULL, "my_hash_init2");
+    my_hash_init_ptr= dlsym(RTLD_DEFAULT, "my_hash_init2");
  }

  if (!serv_ver || !my_hash_init_ptr)
@ -1496,15 +1530,17 @@ static int server_audit_init(void *p __attribute__((unused)))
  /* so we warn users if both Query Cashe and TABLE events enabled.      */
  if (!started_mysql && FILTER(EVENT_TABLE))
  {
-    ulonglong *qc_size= (ulonglong *) dlsym(NULL, "query_cache_size");
+    ulonglong *qc_size= (ulonglong *) dlsym(RTLD_DEFAULT, "query_cache_size");
    if (qc_size == NULL || *qc_size != 0)
    {
      struct loc_system_variables *g_sys_var=
-        (struct loc_system_variables *) dlsym(NULL, "global_system_variables");
+        (struct loc_system_variables *) dlsym(RTLD_DEFAULT,
+                                          "global_system_variables");
      if (g_sys_var && g_sys_var->query_cache_type != 0)
      {
        error_header();
-        fprintf(stderr, "Query cache is enabled with the TABLE events. Some table reads can be veiled.");
+        fprintf(stderr, "Query cache is enabled with the TABLE events."
+                        " Some table reads can be veiled.");
      }
    }
  }
@ -1680,6 +1716,41 @@ exit_func:
 }


+static void update_file_rotations(MYSQL_THD thd  __attribute__((unused)),
+              struct st_mysql_sys_var *var  __attribute__((unused)),
+              void *var_ptr  __attribute__((unused)), const void *save)
+{
+  rotations= *(unsigned int *) save;
+  error_header();
+  fprintf(stderr, "Log file rotations was changed to '%d'.\n", rotations);
+
+  if (!logging || output_type != OUTPUT_FILE)
+    return;
+
+  flogger_mutex_lock(&lock_operations);
+  logfile->rotations= rotations;
+  flogger_mutex_unlock(&lock_operations);
+}
+
+
+static void update_file_rotate_size(MYSQL_THD thd  __attribute__((unused)),
+              struct st_mysql_sys_var *var  __attribute__((unused)),
+              void *var_ptr  __attribute__((unused)), const void *save)
+{
+  file_rotate_size= *(unsigned long long *) save;
+  error_header();
+  fprintf(stderr, "Log file rotate size was changed to '%lld'.\n",
+          file_rotate_size);
+
+  if (!logging || output_type != OUTPUT_FILE)
+    return;
+
+  flogger_mutex_lock(&lock_operations);
+  logfile->size_limit= file_rotate_size;
+  flogger_mutex_unlock(&lock_operations);
+}
+
+
 static void update_incl_users(MYSQL_THD thd,
              struct st_mysql_sys_var *var  __attribute__((unused)),
              void *var_ptr  __attribute__((unused)), const void *save)
@ -1821,6 +1892,7 @@ static void update_mode(MYSQL_THD thd  __attribute__((unused)),
  flogger_mutex_unlock(&lock_operations);
 }

+
 static void update_syslog_ident(MYSQL_THD thd  __attribute__((unused)),
              struct st_mysql_sys_var *var  __attribute__((unused)),
              void *var_ptr  __attribute__((unused)), const void *save)
@ -1828,8 +1900,15 @@ static void update_syslog_ident(MYSQL_THD thd  __attribute__((unused)),
  strncpy(syslog_ident_buffer, *(const char **) save,
          sizeof(syslog_ident_buffer));
  syslog_ident= syslog_ident_buffer;
+  error_header();
+  fprintf(stderr, "SYSYLOG ident was changed to '%s'\n", syslog_ident);
  flogger_mutex_lock(&lock_operations);
  mark_always_logged(thd);
+  if (logging && output_type == OUTPUT_SYSLOG)
+  {
+    stop_logging();
+    start_logging();
+  }
  flogger_mutex_unlock(&lock_operations);
 }

--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@ -89,7 +89,8 @@ static handler *partition_create_handler(handlerton *hton,
 static uint partition_flags();
 static uint alter_table_flags(uint flags);

-extern "C" int cmp_key_then_part_id(void *key_p, uchar *ref1, uchar *ref2);
+extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2);
+extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);

 /*
  If frm_error() is called then we will use this to to find out what file
@ -5094,7 +5095,10 @@ bool ha_partition::init_record_priority_queue()
    uint alloc_len;
    uint used_parts= bitmap_bits_set(&m_part_info->read_partitions);
    /* Allocate record buffer for each used partition. */
-    alloc_len= used_parts * (m_rec_length + PARTITION_BYTES_IN_POS);
+    m_priority_queue_rec_len= m_rec_length + PARTITION_BYTES_IN_POS;
+    if (!m_using_extended_keys)
+       m_priority_queue_rec_len += m_file[0]->ref_length;
+    alloc_len= used_parts * m_priority_queue_rec_len;
    /* Allocate a key for temporary use when setting up the scan. */
    alloc_len+= table_share->max_key_length;

@ -5116,12 +5120,24 @@ bool ha_partition::init_record_priority_queue()
    {
      DBUG_PRINT("info", ("init rec-buf for part %u", i));
      int2store(ptr, i);
-      ptr+= m_rec_length + PARTITION_BYTES_IN_POS;
+      ptr+= m_priority_queue_rec_len;
    }
    m_start_key.key= (const uchar*)ptr;
+    
    /* Initialize priority queue, initialized to reading forward. */
-    if (init_queue(&m_queue, used_parts, 0,
-                   0, cmp_key_then_part_id, (void*)m_curr_key_info, 0, 0))
+    int (*cmp_func)(void *, uchar *, uchar *);
+    void *cmp_arg;
+    if (!m_using_extended_keys)
+    {
+      cmp_func= cmp_key_rowid_part_id;
+      cmp_arg=  (void*)this;
+    }
+    else
+    {
+      cmp_func= cmp_key_part_id;
+      cmp_arg= (void*)m_curr_key_info;
+    }
+    if (init_queue(&m_queue, used_parts, 0, 0, cmp_func, cmp_arg, 0, 0))
    {
      my_free(m_ordered_rec_buffer);
      m_ordered_rec_buffer= NULL;
@ -5188,9 +5204,13 @@ int ha_partition::index_init(uint inx, bool sorted)
    DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp"));
    m_curr_key_info[1]= table->key_info+table->s->primary_key;
    m_curr_key_info[2]= NULL;
+    m_using_extended_keys= TRUE;
  }
  else
+  {
    m_curr_key_info[1]= NULL;
+    m_using_extended_keys= FALSE;
+  }

  if (init_record_priority_queue())
    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
@ -5331,36 +5351,12 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key,
 }


-/*
-  @brief
-  Provide ordering by (key_value, partition_id). 
-  
-  @detail
-  Ordering by partition id is required so that key scans on key=const
-  return rows in rowid order (this is required for some variants of 
-  index_merge to work).  
-  
-  In ha_partition, rowid is a (partition_id, underlying_table_rowid). 
-  handle_ordered_index_scan must return rows ordered by (key, rowid).
-
-  If two rows have the same key value and come from different partitions, 
-  it is sufficient to return them in the order of their partition_id.
-*/
-
-extern "C" int cmp_key_then_part_id(void *key_p, uchar *ref1, uchar *ref2)
+/* Compare two part_no partition numbers */
+static int cmp_part_ids(uchar *ref1, uchar *ref2)
 {
-  my_ptrdiff_t diff1, diff2;
-  int res;
-
-  if ((res= key_rec_cmp(key_p, ref1 + PARTITION_BYTES_IN_POS, 
-                        ref2 + PARTITION_BYTES_IN_POS)))
-  {
-    return res;
-  }
-  
  /* The following was taken from ha_partition::cmp_ref */
-  diff1= ref2[1] - ref1[1];
-  diff2= ref2[0] - ref1[0];
+  my_ptrdiff_t diff1= ref2[1] - ref1[1];
+  my_ptrdiff_t diff2= ref2[0] - ref1[0];
  if (!diff1 && !diff2)
    return 0;

@ -5377,6 +5373,45 @@ extern "C" int cmp_key_then_part_id(void *key_p, uchar *ref1, uchar *ref2)
 }


+/*
+  @brief
+    Provide ordering by (key_value, part_no). 
+*/
+
+extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2)
+{
+  int res;
+  if ((res= key_rec_cmp(key_p, ref1 + PARTITION_BYTES_IN_POS, 
+                        ref2 + PARTITION_BYTES_IN_POS)))
+  {
+    return res;
+  }
+  return cmp_part_ids(ref1, ref2);
+}
+
+/*
+  @brief
+    Provide ordering by (key_value, underying_table_rowid, part_no). 
+*/
+extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2)
+{
+  ha_partition *file= (ha_partition*)ptr;
+  int res;
+
+  if ((res= key_rec_cmp(file->m_curr_key_info, ref1 + PARTITION_BYTES_IN_POS,
+                        ref2 + PARTITION_BYTES_IN_POS)))
+  {
+    return res;
+  }
+  if ((res= file->m_file[0]->cmp_ref(ref1 + PARTITION_BYTES_IN_POS + file->m_rec_length,
+                                     ref2 + PARTITION_BYTES_IN_POS + file->m_rec_length)))
+  {
+    return res;
+  }
+  return cmp_part_ids(ref1, ref2);
+}
+
+
 /**
  Common routine for a number of index_read variants

@ -6077,7 +6112,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
       i < m_part_spec.start_part;
       i= bitmap_get_next_set(&m_part_info->read_partitions, i))
  {
-    part_rec_buf_ptr+= m_rec_length + PARTITION_BYTES_IN_POS;
+    part_rec_buf_ptr+= m_priority_queue_rec_len;
  }
  DBUG_PRINT("info", ("m_part_spec.start_part %u first_used_part %u",
                      m_part_spec.start_part, i));
@ -6126,6 +6161,11 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
    if (!error)
    {
      found= TRUE;
+      if (!m_using_extended_keys)
+      {
+        file->position(rec_buf_ptr);
+        memcpy(rec_buf_ptr + m_rec_length, file->ref, file->ref_length);
+      }
      /*
        Initialize queue without order first, simply insert
      */
@ -6142,7 +6182,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
      m_key_not_found= true;
      saved_error= error;
    }
-    part_rec_buf_ptr+= m_rec_length + PARTITION_BYTES_IN_POS;
+    part_rec_buf_ptr+= m_priority_queue_rec_len;
  }
  if (found)
  {
@ -6151,7 +6191,7 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
      after that read the first entry and copy it to the buffer to return in.
    */
    queue_set_max_at_top(&m_queue, reverse_order);
-    queue_set_cmp_arg(&m_queue, (void*)m_curr_key_info);
+    queue_set_cmp_arg(&m_queue, m_using_extended_keys? m_curr_key_info : (void*)this);
    m_queue.elements= j - queue_first_element(&m_queue);
    queue_fix(&m_queue);
    return_top_record(buf);
@ -6226,7 +6266,7 @@ int ha_partition::handle_ordered_index_scan_key_not_found()
      else if (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND)
        DBUG_RETURN(error);
    }
-    part_buf+= m_rec_length + PARTITION_BYTES_IN_POS;
+    part_buf += m_priority_queue_rec_len;
  }
  DBUG_ASSERT(curr_rec_buf);
  bitmap_clear_all(&m_key_not_found_partitions);
@ -6310,6 +6350,7 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
  else
    error= file->ha_index_next_same(rec_buf, m_start_key.key,
                                    m_start_key.length);
+
  if (error)
  {
    if (error == HA_ERR_END_OF_FILE)
@ -6327,6 +6368,13 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
    }
    DBUG_RETURN(error);
  }
+
+  if (!m_using_extended_keys)
+  {
+    file->position(rec_buf);
+    memcpy(rec_buf + m_rec_length, file->ref, file->ref_length);
+  }
+
  queue_replace_top(&m_queue);
  return_top_record(buf);
  DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
@ -8512,19 +8560,29 @@ uint ha_partition::min_record_length(uint options) const

 int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
 {
-  uint part_id;
+  int cmp;
  my_ptrdiff_t diff1, diff2;
-  handler *file;
  DBUG_ENTER("ha_partition::cmp_ref");

+  cmp = m_file[0]->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
+			   (ref2 + PARTITION_BYTES_IN_POS));
+  if (cmp)
+    DBUG_RETURN(cmp);
+
  if ((ref1[0] == ref2[0]) && (ref1[1] == ref2[1]))
  {
-    part_id= uint2korr(ref1);
-    file= m_file[part_id];
-    DBUG_ASSERT(part_id < m_tot_parts);
-    DBUG_RETURN(file->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
-			      (ref2 + PARTITION_BYTES_IN_POS)));
+   /* This means that the references are same and are in same partition.*/
+    DBUG_RETURN(0);
  }
+
+  /*
+    In Innodb we compare with either primary key value or global DB_ROW_ID so
+    it is not possible that the two references are equal and are in different
+    partitions, but in myisam it is possible since we are comparing offsets.
+    Remove this assert if DB_ROW_ID is changed to be per partition.
+  */
+  DBUG_ASSERT(!m_innodb);
+
  diff1= ref2[1] - ref1[1];
  diff2= ref2[0] - ref1[0];
  if (diff1 > 0)
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@ -117,6 +117,8 @@ public:
 };


+extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
+
 class ha_partition :public handler
 {
 private:
@ -157,6 +159,22 @@ private:
  uchar *m_rec0;                        // table->record[0]
  const uchar *m_err_rec;               // record which gave error
  QUEUE m_queue;                        // Prio queue used by sorted read
+
+  /*
+    Length of an element in m_ordered_rec_buffer. The elements are composed of
+
+      [part_no] [table->record copy] [underlying_table_rowid]
+    
+    underlying_table_rowid is only stored when the table has no extended keys.
+  */
+  uint m_priority_queue_rec_len;
+
+  /*
+    If true, then sorting records by key value also sorts them by their
+    underlying_table_rowid.
+  */
+  bool m_using_extended_keys;
+
  /*
    Since the partition handler is a handler on top of other handlers, it
    is necessary to keep information about what the underlying handler
@ -1264,6 +1282,9 @@ public:
      DBUG_ASSERT(h == m_file[i]->ht);
    return h;
  }
+
+
+  friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
 };

 #endif /* HA_PARTITION_INCLUDED */
--- a/sql/item.cc
+++ b/sql/item.cc
@ -8497,6 +8497,8 @@ bool Item_insert_value::fix_fields(THD *thd, Item **items)
    {
      tmp_field->init(field_arg->field->table);
      set_field(tmp_field);
+      // the index is important when read bits set
+      tmp_field->field_index= field_arg->field->field_index;
    }
  }
  return FALSE;
--- a/sql/item.h
+++ b/sql/item.h
@ -4009,7 +4009,7 @@ public:

  bool walk(Item_processor processor, bool walk_subquery, uchar *args)
  {
-    return arg->walk(processor, walk_subquery, args) ||
+    return (arg && arg->walk(processor, walk_subquery, args)) ||
      (this->*processor)(args);
  }

--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@ -1084,7 +1084,7 @@ class Item_func_uncompressed_length : public Item_int_func
 public:
  Item_func_uncompressed_length(Item *a):Item_int_func(a){}
  const char *func_name() const{return "uncompressed_length";}
-  void fix_length_and_dec() { max_length=10; }
+  void fix_length_and_dec() { max_length=10; maybe_null= true; }
  longlong val_int();
 };

--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@ -2715,9 +2715,7 @@ void unlink_thd(THD *thd)
  thd_cleanup(thd);
  dec_connection_count(thd);

-  mysql_mutex_lock(&LOCK_status);
-  add_to_status(&global_status_var, &thd->status_var);
-  mysql_mutex_unlock(&LOCK_status);
+  thd->add_status_to_global();

  mysql_mutex_lock(&LOCK_thread_count);
  thd->unlink();
--- a/sql/slave.cc
+++ b/sql/slave.cc
@ -4165,6 +4165,7 @@ err:
  if (mi->using_gtid != Master_info::USE_GTID_NO)
    flush_master_info(mi, TRUE, TRUE);
  THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
+  thd->add_status_to_global();
  mysql_mutex_lock(&mi->run_lock);

 err_during_init:
@ -4664,6 +4665,7 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME,
  if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
    flush_relay_log_info(rli);
  THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
+  thd->add_status_to_global();
  mysql_mutex_lock(&rli->run_lock);
 err_during_init:
  /* We need data_lock, at least to wake up any waiting master_pos_wait() */
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@ -12044,7 +12044,12 @@ bool acl_authenticate(THD *thd, uint connect_errors,
    auth_plugin_name= &mpvio.acl_user->plugin;
    res= do_auth_once(thd, auth_plugin_name, &mpvio);
  }
-
+  if (mpvio.make_it_fail)
+  {
+    mpvio.status= MPVIO_EXT::FAILURE;
+    res= CR_ERROR;
+  }
+ 
  Security_context *sctx= thd->security_ctx;
  const ACL_USER *acl_user= mpvio.acl_user;

--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@ -1347,6 +1347,17 @@ retry:
  DBUG_PRINT("info", ("real table: %s.%s", d_name, t_name));
  for (TABLE_LIST *tl= table_list;;)
  {
+    if (tl &&
+        tl->select_lex && tl->select_lex->master_unit() &&
+        tl->select_lex->master_unit()->executed)
+    {
+      /*
+        There is no sense to check tables of already executed parts
+        of the query
+      */
+      tl= tl->next_global;
+      continue;
+    }
    /*
      Table is unique if it is present only once in the global list
      of tables and once in the list of table locks.
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@ -1487,9 +1487,7 @@ void THD::init_for_queries()

 void THD::change_user(void)
 {
-  mysql_mutex_lock(&LOCK_status);
-  add_to_status(&global_status_var, &status_var);
-  mysql_mutex_unlock(&LOCK_status);
+  add_status_to_global();

  cleanup();
  reset_killed();
@ -1520,6 +1518,7 @@ void THD::cleanup(void)
 #endif

  mysql_ha_cleanup(this);
+  locked_tables_list.unlock_locked_tables(this);

  close_temporary_tables(this);

@ -1527,8 +1526,6 @@ void THD::cleanup(void)
  trans_rollback(this);
  xid_cache_delete(&transaction.xid_state);

-  locked_tables_list.unlock_locked_tables(this);
-
  DBUG_ASSERT(open_tables == NULL);
  /*
    If the thread was in the middle of an ongoing transaction (rolled
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@ -754,6 +754,11 @@ typedef struct system_status_var
 #define last_system_status_var questions
 #define last_cleared_system_status_var memory_used

+void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var);
+
+void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
+                        STATUS_VAR *dec_var);
+
 void mark_transaction_to_rollback(THD *thd, bool all);


@ -3616,6 +3621,13 @@ public:
  /* Wake this thread up from wait_for_wakeup_ready(). */
  void signal_wakeup_ready();

+  void add_status_to_global()
+  {
+    mysql_mutex_lock(&LOCK_status);
+    add_to_status(&global_status_var, &status_var);
+    mysql_mutex_unlock(&LOCK_status);
+  }
+
  wait_for_commit *wait_for_commit_ptr;
  int wait_for_prior_commit()
  {
@ -4816,10 +4828,6 @@ public:
 */
 #define CF_SKIP_QUESTIONS       (1U << 1)

-void add_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var);
-
-void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var,
-                        STATUS_VAR *dec_var);
 void mark_transaction_to_rollback(THD *thd, bool all);

 /* Inline functions */
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@ -68,8 +68,10 @@ mysql_handle_derived(LEX *lex, uint phases)
 {
  bool res= FALSE;
  THD *thd= lex->thd;
+  DBUG_ENTER("mysql_handle_derived");
+  DBUG_PRINT("enter", ("phases: 0x%x", phases));
  if (!lex->derived_tables)
-    return FALSE;
+    DBUG_RETURN(FALSE);

  lex->thd->derived_tables_processing= TRUE;

@ -127,7 +129,7 @@ mysql_handle_derived(LEX *lex, uint phases)
    }
  }
  lex->thd->derived_tables_processing= FALSE;
-  return res;
+  DBUG_RETURN(res);
 }

 /*
@ -166,8 +168,10 @@ mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases)
  THD *thd= lex->thd;
  uint8 allowed_phases= (derived->is_merged_derived() ? DT_PHASES_MERGE :
                         DT_PHASES_MATERIALIZE);
+  DBUG_ENTER("mysql_handle_single_derived");
+  DBUG_PRINT("enter", ("phases: 0x%x  allowed: 0x%x", phases, allowed_phases));
  if (!lex->derived_tables)
-    return FALSE;
+    DBUG_RETURN(FALSE);

  lex->thd->derived_tables_processing= TRUE;

@ -189,7 +193,7 @@ mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases)
      break;
  }
  lex->thd->derived_tables_processing= FALSE;
-  return res;
+  DBUG_RETURN(res);
 }


@ -354,16 +358,17 @@ bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived)
  uint tablenr;
  SELECT_LEX *parent_lex= derived->select_lex;
  Query_arena *arena, backup;
+  DBUG_ENTER("mysql_derived_merge");

  if (derived->merged)
-    return FALSE;
+    DBUG_RETURN(FALSE);

  if (dt_select->uncacheable & UNCACHEABLE_RAND)
  {
    /* There is random function => fall back to materialization. */
    derived->change_refs_to_fields();
    derived->set_materialized_derived();
-    return FALSE;
+    DBUG_RETURN(FALSE);
  }

 if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
@ -467,7 +472,7 @@ bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived)
 exit_merge:
  if (arena)
    thd->restore_active_arena(arena, &backup);
-  return res;
+  DBUG_RETURN(res);
 }


@ -492,14 +497,15 @@ exit_merge:

 bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived)
 {
+  DBUG_ENTER("mysql_derived_merge_for_insert");
  if (derived->merged_for_insert)
-    return FALSE;
+    DBUG_RETURN(FALSE);
  if (derived->is_materialized_derived())
-    return mysql_derived_prepare(thd, lex, derived);
+    DBUG_RETURN(mysql_derived_prepare(thd, lex, derived));
  if (!derived->is_multitable())
  {
    if (!derived->single_table_updatable())
-      return derived->create_field_translation(thd);
+      DBUG_RETURN(derived->create_field_translation(thd));
    if (derived->merge_underlying_list)
    {
      derived->table= derived->merge_underlying_list->table;
@ -507,7 +513,7 @@ bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived)
      derived->merged_for_insert= TRUE;
    }
  }  
-  return FALSE;
+  DBUG_RETURN(FALSE);
 }


@ -761,9 +767,10 @@ bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived)
  SELECT_LEX *save_current_select= lex->current_select;

  bool res= FALSE;
+  DBUG_ENTER("mysql_derived_optimize");

  if (unit->optimized)
-    return FALSE;
+    DBUG_RETURN(FALSE);
  lex->current_select= first_select;

  if (unit->is_union())
@ -803,7 +810,7 @@ bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived)
  }
 err:
  lex->current_select= save_current_select;
-  return res;
+  DBUG_RETURN(res);
 }


@ -825,11 +832,12 @@ err:

 bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived)
 {
+  DBUG_ENTER("mysql_derived_create");
  TABLE *table= derived->table;
  SELECT_LEX_UNIT *unit= derived->get_unit();

  if (table->created)
-    return FALSE;
+    DBUG_RETURN(FALSE);
  select_union *result= (select_union*)unit->result;
  if (table->s->db_type() == TMP_ENGINE_HTON)
  {
@ -839,13 +847,13 @@ bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived)
                                  &result->tmp_table_param.recinfo,
                                  (unit->first_select()->options |
                                   thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS)))
-      return(TRUE);
+      DBUG_RETURN(TRUE);
  }
  if (open_tmp_table(table))
-    return TRUE;
+    DBUG_RETURN(TRUE);
  table->file->extra(HA_EXTRA_WRITE_CACHE);
  table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
-  return FALSE;
+  DBUG_RETURN(FALSE);
 }


@ -874,11 +882,12 @@ bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived)

 bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
 {
+  DBUG_ENTER("mysql_derived_fill");
  SELECT_LEX_UNIT *unit= derived->get_unit();
  bool res= FALSE;

  if (unit->executed && !unit->uncacheable && !unit->describe)
-    return FALSE;
+    DBUG_RETURN(FALSE);
  /*check that table creation passed without problems. */
  DBUG_ASSERT(derived->table && derived->table->created);
  SELECT_LEX *first_select= unit->first_select();
@ -920,7 +929,7 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
    unit->cleanup();
  lex->current_select= save_current_select;

-  return res;
+  DBUG_RETURN(res);
 }


@ -943,6 +952,7 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)

 bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived)
 {
+  DBUG_ENTER("mysql_derived_reinit");
  st_select_lex_unit *unit= derived->get_unit();

  if (derived->table)
@ -952,6 +962,6 @@ bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived)
  /* for derived tables & PS (which can't be reset by Item_subquery) */
  unit->reinit_exec_mechanism();
  unit->set_thd(thd);
-  return FALSE;
+  DBUG_RETURN(FALSE);
 }

--- a/sql/sql_list.h
+++ b/sql/sql_list.h
@ -330,11 +330,12 @@ public:
  friend class error_list;
  friend class error_list_iterator;

+#ifndef DBUG_OFF
  /*
    Debugging help: return N-th element in the list, or NULL if the list has
    less than N elements.
  */
-  inline void *nth_element(int n)
+  void *elem(int n)
  {
    list_node *node= first;
    void *data= NULL;
@ -350,6 +351,8 @@ public:
    }
    return data;
  }
+#endif
+
 #ifdef LIST_EXTRA_DEBUG
  /*
    Check list invariants and print results into trace. Invariants are:
@ -528,7 +531,9 @@ public:
    }
    empty();
  }
-  inline T *nth_element(int n) { return (T*)base_list::nth_element(n); }
+#ifndef DBUG_OFF
+  T *elem(int n) { return (T*)base_list::elem(n); }
+#endif
 };


--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -3821,6 +3821,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
  { 
    conds->update_used_tables();
    conds= remove_eq_conds(join->thd, conds, &join->cond_value);
+    if (conds && conds->type() == Item::COND_ITEM &&
+        ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
+      join->cond_equal= &((Item_cond_and*) conds)->cond_equal;
    join->select_lex->where= conds;
    if (join->cond_value == Item::COND_FALSE)
    {
@ -14261,7 +14264,10 @@ optimize_cond(JOIN *join, COND *conds,
      Remove all and-levels where CONST item != CONST item
    */
    DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY););
-    conds= remove_eq_conds(thd, conds, cond_value) ;
+    conds= remove_eq_conds(thd, conds, cond_value);
+    if (conds && conds->type() == Item::COND_ITEM &&
+        ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
+      join->cond_equal= &((Item_cond_and*) conds)->cond_equal;
    DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY););
  }
  DBUG_RETURN(conds);
@ -16148,10 +16154,24 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
    keyinfo->name= (char*) "distinct_key";
    keyinfo->algorithm= HA_KEY_ALG_UNDEF;
    keyinfo->is_statistics_from_stat_tables= FALSE;
-    keyinfo->rec_per_key=0;
    keyinfo->read_stats= NULL;
    keyinfo->collected_stats= NULL;

+    /*
+      Needed by non-merged semi-joins: SJ-Materialized table must have a valid 
+      rec_per_key array, because it participates in join optimization. Since
+      the table has no data, the only statistics we can provide is "unknown",
+      i.e. zero values.
+
+      (For table record count, we calculate and set JOIN_TAB::found_records,
+       see get_delayed_table_estimates()).
+    */
+    size_t rpk_size= keyinfo->user_defined_key_parts * sizeof(keyinfo->rec_per_key[0]);
+    if (!(keyinfo->rec_per_key= (ulong*) alloc_root(&table->mem_root, 
+                                                    rpk_size)))
+      goto err;
+    bzero(keyinfo->rec_per_key, rpk_size);
+
    /*
      Create an extra field to hold NULL bits so that unique indexes on
      blobs can distinguish NULL from 0. This extra field is not needed
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@ -383,6 +383,15 @@ void print_sjm(SJ_MATERIALIZATION_INFO *sjm)
 }
 /* purecov: end */

+/*
+  Debugging help: force List<...>::elem function not be removed as unused.
+*/
+Item* (List<Item>:: *dbug_list_item_elem_ptr)(int)= &List<Item>::elem;
+Item_equal* (List<Item_equal>:: *dbug_list_item_equal_elem_ptr)(int)=
+  &List<Item_equal>::elem;
+TABLE_LIST* (List<TABLE_LIST>:: *dbug_list_table_list_elem_ptr)(int) =
+  &List<TABLE_LIST>::elem;
+
 #endif

 typedef struct st_debug_lock
--- a/sql/table.h
+++ b/sql/table.h
@ -2170,9 +2170,11 @@ struct TABLE_LIST
  }
  inline void set_merged_derived()
  {
+    DBUG_ENTER("set_merged_derived");
    derived_type= ((derived_type & DTYPE_MASK) |
                   DTYPE_TABLE | DTYPE_MERGE);
    set_check_merged();
+    DBUG_VOID_RETURN;
  }
  inline bool is_materialized_derived()
  {
@ -2180,9 +2182,11 @@ struct TABLE_LIST
  }
  void set_materialized_derived()
  {
+    DBUG_ENTER("set_materialized_derived");
    derived_type= ((derived_type & DTYPE_MASK) |
                   DTYPE_TABLE | DTYPE_MATERIALIZE);
    set_check_materialized();
+    DBUG_VOID_RETURN;
  }
  inline bool is_multitable()
  {
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@ -215,7 +215,7 @@ void ha_heap::update_key_stats()
      else
      {
        ha_rows hash_buckets= file->s->keydef[i].hash_buckets;
-        uint no_records= hash_buckets ? (uint) (file->s->records/hash_buckets) : 2;
+        ha_rows no_records= hash_buckets ? (file->s->records/hash_buckets) : 2;
        if (no_records < 2)
          no_records= 2;
        key->rec_per_key[key->user_defined_key_parts-1]= no_records;
@ -244,6 +244,7 @@ int ha_heap::write_row(uchar * buf)
       We can perform this safely since only one writer at the time is
       allowed on the table.
    */
+    records_changed= 0;
    file->s->key_stat_version++;
  }
  return res;
@ -260,6 +261,7 @@ int ha_heap::update_row(const uchar * old_data, uchar * new_data)
       We can perform this safely since only one writer at the time is
       allowed on the table.
    */
+    records_changed= 0;
    file->s->key_stat_version++;
  }
  return res;
@ -276,6 +278,7 @@ int ha_heap::delete_row(const uchar * buf)
       We can perform this safely since only one writer at the time is
       allowed on the table.
    */
+    records_changed= 0;
    file->s->key_stat_version++;
  }
  return res;
@ -726,8 +729,8 @@ heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table,
  if (share->max_rows && share->max_rows < max_rows)
    max_rows= share->max_rows;

-  hp_create_info->max_records= (ulong) max_rows;
-  hp_create_info->min_records= (ulong) share->min_rows;
+  hp_create_info->max_records= (ulong) MY_MIN(max_rows, ULONG_MAX);
+  hp_create_info->min_records= (ulong) MY_MIN(share->min_rows, ULONG_MAX);
  hp_create_info->keys= share->keys;
  hp_create_info->reclength= share->reclength;
  hp_create_info->keydef= keydef;
--- a/storage/heap/ha_heap.h
+++ b/storage/heap/ha_heap.h
@ -31,7 +31,7 @@ class ha_heap: public handler
  HP_SHARE *internal_share;
  key_map btree_keys;
  /* number of records changed since last statistics update */
-  uint    records_changed;
+  ulong   records_changed;
  uint    key_stat_version;
  my_bool internal_table;
 public:
--- a/storage/heap/hp_create.c
+++ b/storage/heap/hp_create.c
@ -248,7 +248,7 @@ static int keys_compare(heap_rb_param *param, uchar *key1, uchar *key2)
 static void init_block(HP_BLOCK *block, uint reclength, ulong min_records,
 		       ulong max_records)
 {
-  uint i,recbuffer,records_in_block;
+  ulong i,recbuffer,records_in_block;

  /*
    If not min_records and max_records are given, optimize for 1000 rows
@ -276,7 +276,7 @@ static void init_block(HP_BLOCK *block, uint reclength, ulong min_records,
    The + 1 is there to ensure that we get at least 1 row per level (for
    the exceptional case of very long rows)
  */
-  if (records_in_block*recbuffer >
+  if ((ulonglong) records_in_block*recbuffer >
      (my_default_record_cache_size-sizeof(HP_PTRS)*HP_MAX_LEVELS))
    records_in_block= (my_default_record_cache_size - sizeof(HP_PTRS) *
                       HP_MAX_LEVELS) / recbuffer + 1;
--- a/storage/heap/hp_delete.c
+++ b/storage/heap/hp_delete.c
@ -68,7 +68,7 @@ int hp_rb_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo,
 		   const uchar *record, uchar *recpos, int flag)
 {
  heap_rb_param custom_arg;
-  uint old_allocated;
+  ulong old_allocated;
  int res;

  if (flag) 
--- a/storage/heap/hp_open.c
+++ b/storage/heap/hp_open.c
@ -30,7 +30,7 @@ HP_INFO *heap_open_from_share(HP_SHARE *share, int mode)
  HP_INFO *info;
  DBUG_ENTER("heap_open_from_share");

-  if (!(info= (HP_INFO*) my_malloc((uint) sizeof(HP_INFO) +
+  if (!(info= (HP_INFO*) my_malloc(sizeof(HP_INFO) +
 				  2 * share->max_key_length,
                                   MYF(MY_ZEROFILL +
                                       (share->internal ?
@ -49,7 +49,7 @@ HP_INFO *heap_open_from_share(HP_SHARE *share, int mode)
 #ifndef DBUG_OFF
  info->opt_flag= READ_CHECK_USED;		/* Check when changing */
 #endif
-  DBUG_PRINT("exit",("heap: 0x%lx  reclength: %d  records_in_block: %d",
+  DBUG_PRINT("exit",("heap: 0x%lx  reclength: %d  records_in_block: %lu",
 		     (long) info, share->reclength,
                     share->block.records_in_block));
  DBUG_RETURN(info);
--- a/storage/heap/hp_write.c
+++ b/storage/heap/hp_write.c
@ -400,7 +400,7 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo,
 static HASH_INFO *hp_find_free_hash(HP_SHARE *info,
 				     HP_BLOCK *block, ulong records)
 {
-  uint block_pos;
+  ulong block_pos;
  size_t length;

  if (records < block->last_allocated)
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@ -14,7 +14,7 @@ IF(NOT TOKUDB_OK OR WITHOUT_TOKUDB OR WITHOUT_TOKUDB_STORAGE_ENGINE)
 ENDIF()

 ############################################
-SET(ENV{TOKUDB_VERSION} "7.1.0")
+SET(TOKUDB_VERSION "7.1.5")
 SET(TOKUDB_DEB_FILES "usr/lib/mysql/plugin/ha_tokudb.so\netc/mysql/conf.d/tokudb.cnf\nusr/bin/tokuftdump\nusr/share/doc/mariadb-server-10.0/README-TOKUDB\nusr/share/doc/mariadb-server-10.0/README.md" PARENT_SCOPE)
 SET(USE_BDB OFF CACHE BOOL "")
 SET(USE_VALGRIND OFF CACHE BOOL "")
@ -31,14 +31,13 @@ MARK_AS_ADVANCED(USE_VALGRIND)
 MARK_AS_ADVANCED(XZ_SOURCE_DIR)
 ############################################

-IF(DEFINED ENV{TOKUDB_VERSION})
-    SET(TOKUDB_VERSION $ENV{TOKUDB_VERSION})
-    ADD_DEFINITIONS("-DTOKUDB_VERSION=\"${TOKUDB_VERSION}\"")
+IF(NOT DEFINED TOKUDB_VERSION)
+    IF(DEFINED ENV{TOKUDB_VERSION})
+        SET(TOKUDB_VERSION $ENV{TOKUDB_VERSION})
+    ENDIF()
 ENDIF()
-
-IF(DEFINED ENV{TOKUDB_PATCHES})
-    SET(TOKUDB_PATCHES $ENV{TOKUDB_PATCHES})
-    ADD_DEFINITIONS("-DTOKUDB_PATCHES=${TOKUDB_PATCHES}")
+IF(DEFINED TOKUDB_VERSION)
+    ADD_DEFINITIONS("-DTOKUDB_VERSION=\"${TOKUDB_VERSION}\"")
 ENDIF()

 ADD_SUBDIRECTORY(ft-index)
--- a/storage/tokudb/README.md
+++ b/storage/tokudb/README.md
@ -24,26 +24,34 @@ working MySQL or MariaDB with Tokutek patches, and with the TokuDB storage
 engine, called `make.mysql.bash`.  This script will download copies of the
 needed source code from github and build everything.

-To build MySQL with TokuDB 7.0.4:
+To build MySQL with TokuDB 7.1.0:
 ```sh
-scripts/make.mysql.bash --mysqlbuild=mysql-5.5.30-tokudb-7.0.4-linux-x86_64
+scripts/make.mysql.bash --mysqlbuild=mysql-5.5.30-tokudb-7.1.0-linux-x86_64
 ```

-To build MariaDB with TokuDB 7.0.4:
+To build MariaDB with TokuDB 7.1.0:
 ```sh
-scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.30-tokudb-7.0.4-linux-x86_64
+scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.30-tokudb-7.1.0-linux-x86_64
 ```

 Before you start, make sure you have a C++11-compatible compiler (GCC >=
 4.7 is recommended), as well as CMake >=2.8.8, and the libraries and
-header files for valgrind,zlib, and Berkeley DB.  On Centos, `yum install
-valgrind-devel zlib-devel libdb-devel`, on Ubuntu, `apt-get install
-valgrind zlib1g-dev libdb-dev`.
+header files for valgrind,zlib, and Berkeley DB.
+
+On CentOS, `yum install valgrind-devel zlib-devel libdb-devel`
+
+On Ubuntu, `apt-get install valgrind zlib1g-dev libdb-dev`

 You can set the compiler by passing `--cc` and `--cxx` to the script, to
 select one that's new enough.  The default is `scripts/make.mysql.bash
 --cc=gcc47 --cxx=g++47`, which may not exist on your system.

+To build a debug MySQL with TokuDB using the head of the Tokutek github
+repositories, run this:
+```sh
+scripts/make.mysql.debug.env.bash
+```
+

 Contributing
 ------------
--- a/storage/tokudb/ft-index/CMakeLists.txt
+++ b/storage/tokudb/ft-index/CMakeLists.txt
@ -18,10 +18,12 @@ include(TokuMergeLibs)
 set(LIBTOKUPORTABILITY "tokuportability" CACHE STRING "Name of libtokuportability.so")
 set(LIBTOKUDB "tokufractaltree" CACHE STRING "Name of libtokufractaltree.so")

-if(USE_VALGRIND)
-include_directories(
-  ${VALGRIND_INCLUDE_DIR}
-  )
+set(INSTALL_LIBDIR "lib" CACHE STRING "where to install libs")
+
+if (USE_VALGRIND AND NOT VALGRIND_INCLUDE_DIR MATCHES NOTFOUND)
+  include_directories(
+    ${VALGRIND_INCLUDE_DIR}
+    )
 endif()
 include_directories(
  ${CMAKE_CURRENT_SOURCE_DIR}/include
--- a/storage/tokudb/ft-index/README.md
+++ b/storage/tokudb/ft-index/README.md
@ -2,15 +2,18 @@ TokuKV
 ======

 TokuKV is a high-performance, transactional key-value store, used in the
-TokuDB storage engine for MySQL and MariaDB.
+TokuDB storage engine for MySQL and MariaDB and in TokuMX, the
+high-performance MongoDB distribution.

 TokuKV is provided as a shared library with an interface similar to
 Berkeley DB.

 To build the full MySQL product, see the instructions for
-[ft-engine][ft-engine].  This document covers TokuKV only.
+[Tokutek/ft-engine][ft-engine].  To build TokuMX, see the instructions
+for [Tokutek/mongo][mongo].  This document covers TokuKV only.

-[ft-engine]: http://github.com/Tokutek/ft-engine
+[ft-engine]: https://github.com/Tokutek/ft-engine
+[mongo]: https://github.com/Tokutek/mongo


 Building
@ -18,9 +21,8 @@ Building

 TokuKV is built using CMake >= 2.8.9.  Out-of-source builds are
 recommended.  You need a C++11 compiler, though only GCC >= 4.7 and
-Apple's Clang are tested.  You also need zlib and valgrind development
-packages (`yum install valgrind-devel zlib-devel` or `apt-get install
-valgrind zlib1g-dev`).
+Apple's Clang are tested.  You also need zlib development packages
+(`yum install zlib-devel` or `apt-get install zlib1g-dev`).

 You will also need the source code for jemalloc, checked out in
 `third_party/`.
@ -35,6 +37,7 @@ CC=gcc47 CXX=g++47 cmake \
    -D CMAKE_BUILD_TYPE=Debug \
    -D USE_BDB=OFF \
    -D BUILD_TESTING=OFF \
+    -D USE_VALGRIND=OFF \
    -D CMAKE_INSTALL_PREFIX=../prefix/ \
    ..
 cmake --build . --target install
@ -102,13 +105,20 @@ Contributing

 Please report bugs in TokuKV here on github.

-We have two publicly accessible mailing lists:
+We have two publicly accessible mailing lists for TokuDB:

 - tokudb-user@googlegroups.com is for general and support related
   questions about the use of TokuDB.
 - tokudb-dev@googlegroups.com is for discussion of the development of
   TokuDB.

+and two for TokuMX:
+
+ - tokumx-user@googlegroups.com is for general and support related
+   questions about the use of TokuMX.
+ - tokumx-dev@googlegroups.com is for discussion of the development of
+   TokuMX.
+
 We are also available on IRC on freenode.net, in the #tokutek channel.


--- a/storage/tokudb/ft-index/buildheader/CMakeLists.txt
+++ b/storage/tokudb/ft-index/buildheader/CMakeLists.txt
@ -19,9 +19,11 @@ if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
    FILES "${CMAKE_CURRENT_BINARY_DIR}/db.h"
    DESTINATION include
    RENAME tokudb.h
+    COMPONENT tokukv_headers
    )
  install(
    FILES "${CMAKE_CURRENT_BINARY_DIR}/db.h"
    DESTINATION include
+    COMPONENT tokukv_headers
    )
 endif ()
--- a/storage/tokudb/ft-index/buildheader/make_tdb.cc
+++ b/storage/tokudb/ft-index/buildheader/make_tdb.cc
@ -207,6 +207,7 @@ enum {
        TOKUDB_BAD_CHECKSUM            = -100015,
        TOKUDB_HUGE_PAGES_ENABLED      = -100016,
        TOKUDB_OUT_OF_RANGE            = -100017,
+        TOKUDB_INTERRUPTED             = -100018,
        DONTUSE_I_JUST_PUT_THIS_HERE_SO_I_COULD_HAVE_A_COMMA_AFTER_EACH_ITEM
 };

@ -359,6 +360,7 @@ static void print_defines (void) {
    dodefine(TOKUDB_BAD_CHECKSUM);
    dodefine(TOKUDB_HUGE_PAGES_ENABLED);
    dodefine(TOKUDB_OUT_OF_RANGE);
+    dodefine(TOKUDB_INTERRUPTED);

    /* LOADER flags */
    printf("/* LOADER flags */\n");
@ -449,7 +451,7 @@ static void print_db_env_struct (void) {
                             "int (*set_lk_max_memory)                    (DB_ENV *env, uint64_t max)",
                             "int (*get_lk_max_memory)                    (DB_ENV *env, uint64_t *max)",
                             "void (*set_update)                          (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra))",
-                             "int (*set_lock_timeout)                     (DB_ENV *env, uint64_t lock_wait_time_msec)",
+                             "int (*set_lock_timeout)                     (DB_ENV *env, uint64_t default_lock_wait_time_msec, uint64_t (*get_lock_wait_time_cb)(uint64_t default_lock_wait_time))",
                             "int (*get_lock_timeout)                     (DB_ENV *env, uint64_t *lock_wait_time_msec)",
                             "int (*set_lock_timeout_callback)            (DB_ENV *env, lock_timeout_callback callback)",
                             "int (*txn_xa_recover)                       (DB_ENV*, TOKU_XA_XID list[/*count*/], long count, /*out*/ long *retp, uint32_t flags)",
@ -459,8 +461,9 @@ static void print_db_env_struct (void) {
                             "void (*change_fsync_log_period)             (DB_ENV*, uint32_t)",
                             "int (*iterate_live_transactions)            (DB_ENV *env, iterate_transactions_callback callback, void *extra)",
                             "int (*iterate_pending_lock_requests)        (DB_ENV *env, iterate_requests_callback callback, void *extra)",
-                             "void (*set_loader_memory_size)(DB_ENV *env, uint64_t loader_memory_size)",
+                             "void (*set_loader_memory_size)(DB_ENV *env, uint64_t (*get_loader_memory_size_callback)(void))",
                             "uint64_t (*get_loader_memory_size)(DB_ENV *env)",
+                             "void (*set_killed_callback)(DB_ENV *env, uint64_t default_killed_time_msec, uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec), int (*killed_callback)(void))",
                             NULL};

        sort_and_dump_fields("db_env", true, extra);
@ -529,7 +532,7 @@ static void print_db_struct (void) {
 			 "int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, uint32_t) /* change row/dictionary descriptor for a db.  Available only while db is open */",
 			 "int (*getf_set)(DB*, DB_TXN*, uint32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */",
 			 "int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */",
-			 "int (*hot_optimize)(DB*, DBT*, DBT*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra)",
+			 "int (*hot_optimize)(DB*, DBT*, DBT*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, uint64_t* loops_run)",
 			 "int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)",
 			 "int (*change_pagesize)(DB*,uint32_t)",
 			 "int (*change_readpagesize)(DB*,uint32_t)",
@ -538,6 +541,9 @@ static void print_db_struct (void) {
 			 "int (*change_compression_method)(DB*,TOKU_COMPRESSION_METHOD)",
 			 "int (*get_compression_method)(DB*,TOKU_COMPRESSION_METHOD*)",
 			 "int (*set_compression_method)(DB*,TOKU_COMPRESSION_METHOD)",
+			 "int (*change_fanout)(DB *db, uint32_t fanout)",
+			 "int (*get_fanout)(DB *db, uint32_t *fanout)",
+			 "int (*set_fanout)(DB *db, uint32_t fanout)",
 			 "int (*set_indexer)(DB*, DB_INDEXER*)",
 			 "void (*get_indexer)(DB*, DB_INDEXER**)",
 			 "int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)",
@ -546,6 +552,7 @@ static void print_db_struct (void) {
 			 "int (*get_fractal_tree_info64)(DB*,uint64_t*,uint64_t*,uint64_t*,uint64_t*)",
 			 "int (*iterate_fractal_tree_block_map)(DB*,int(*)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*),void*)",
                         "const char *(*get_dname)(DB *db)",
+                         "int (*get_last_key)(DB *db, YDB_CALLBACK_FUNCTION func, void* extra)",
 			 NULL};
    sort_and_dump_fields("db", true, extra);
 }
@ -602,6 +609,7 @@ static void print_dbc_struct (void) {
 	"int (*c_getf_set_range)(DBC *, uint32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
 	"int (*c_getf_set_range_reverse)(DBC *, uint32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
 	"int (*c_set_bounds)(DBC*, const DBT*, const DBT*, bool pre_acquire, int out_of_range_error)",
+    "void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*), void *)",
 	"void (*c_remove_restriction)(DBC*)",
 	NULL};
    sort_and_dump_fields("dbc", false, extra);
@ -729,10 +737,10 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
    printf("   CHARSTR,        // interpret as char * \n");
    printf("   UNIXTIME,       // interpret as time_t \n");
    printf("   TOKUTIME,       // interpret as tokutime_t \n");
-    printf("   PARCOUNT       // interpret as PARTITIONED_COUNTER\n");
+    printf("   PARCOUNT,       // interpret as PARTITIONED_COUNTER\n");
+    printf("   DOUBLE          // interpret as double\n");
    printf("} toku_engine_status_display_type; \n");

-
    printf("typedef enum {\n");
    printf("   TOKU_ENGINE_STATUS             = (1ULL<<0),  // Include when asking for engine status\n");
    printf("   TOKU_GLOBAL_STATUS = (1ULL<<1),  // Include when asking for information_schema.global_status\n");
@ -821,6 +829,7 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
    printf("int toku_set_trace_file (const char *fname) %s;\n", VISIBLE);
    printf("int toku_close_trace_file (void) %s;\n", VISIBLE);
    printf("void db_env_set_direct_io (bool direct_io_on) %s;\n", VISIBLE);
+    printf("void db_env_set_compress_buffers_before_eviction (bool compress_buffers) %s;\n", VISIBLE);
    printf("void db_env_set_func_fsync (int (*)(int)) %s;\n", VISIBLE);
    printf("void db_env_set_func_free (void (*)(void*)) %s;\n", VISIBLE);
    printf("void db_env_set_func_malloc (void *(*)(size_t)) %s;\n", VISIBLE);
--- a/storage/tokudb/ft-index/cmake_modules/TokuSetupCTest.cmake
+++ b/storage/tokudb/ft-index/cmake_modules/TokuSetupCTest.cmake
@ -85,8 +85,8 @@ include(CTest)
 set(TOKUDB_DATA "${TokuDB_SOURCE_DIR}/../tokudb.data" CACHE FILEPATH "Path to data files for tests")

 if (BUILD_TESTING OR BUILD_FT_TESTS OR BUILD_SRC_TESTS)
-  set(WARNED_ABOUT_DATA 1) # disable the warning below
-  if (NOT EXISTS "${TOKUDB_DATA}/" AND NOT WARNED_ABOUT_DATA)
+  set(WARNED_ABOUT_DATA 0)
+  if (NOT EXISTS "${TOKUDB_DATA}/" AND NOT WARNED_ABOUT_DATA AND CMAKE_PROJECT_NAME STREQUAL TokuDB)
    message(WARNING "Test data files are missing from ${TOKUDB_DATA}, which will cause some tests to fail.  Please put them there or modify TOKUDB_DATA to avoid this.")
    set(WARNED_ABOUT_DATA 1)
  endif ()
--- a/storage/tokudb/ft-index/cmake_modules/TokuSetupCompiler.cmake
+++ b/storage/tokudb/ft-index/cmake_modules/TokuSetupCompiler.cmake
@ -88,6 +88,7 @@ set_cflags_if_supported(
  -Wno-error=missing-format-attribute
  -Wno-error=address-of-array-temporary
  -Wno-error=tautological-constant-out-of-range-compare
+  -Wno-ignored-attributes
  -fno-rtti
  -fno-exceptions
  )
--- a/storage/tokudb/ft-index/cmake_modules/TokuThirdParty.cmake
+++ b/storage/tokudb/ft-index/cmake_modules/TokuThirdParty.cmake
@ -1,5 +1,40 @@
 include(ExternalProject)

+if (CMAKE_PROJECT_NAME STREQUAL TokuDB)
+    ## add jemalloc with an external project
+    set(JEMALLOC_SOURCE_DIR "${TokuDB_SOURCE_DIR}/third_party/jemalloc" CACHE FILEPATH "Where to find jemalloc sources.")
+    if (NOT EXISTS "${JEMALLOC_SOURCE_DIR}/configure")
+        message(FATAL_ERROR "Can't find jemalloc sources.  Please check them out to ${JEMALLOC_SOURCE_DIR} or modify JEMALLOC_SOURCE_DIR.")
+    endif ()
+    set(jemalloc_configure_opts "CC=${CMAKE_C_COMPILER}" "--with-jemalloc-prefix=" "--with-private-namespace=tokudb_jemalloc_internal_" "--enable-cc-silence")
+    option(JEMALLOC_DEBUG "Build jemalloc with --enable-debug." OFF)
+    if (JEMALLOC_DEBUG)
+        list(APPEND jemalloc_configure_opts --enable-debug)
+    endif ()
+    ExternalProject_Add(build_jemalloc
+        PREFIX jemalloc
+        SOURCE_DIR "${JEMALLOC_SOURCE_DIR}"
+        CONFIGURE_COMMAND
+            "${JEMALLOC_SOURCE_DIR}/configure" ${jemalloc_configure_opts}
+            "--prefix=${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/jemalloc"
+    )
+
+    add_library(jemalloc STATIC IMPORTED GLOBAL)
+    set_target_properties(jemalloc PROPERTIES IMPORTED_LOCATION
+        "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/jemalloc/lib/libjemalloc_pic.a")
+    add_dependencies(jemalloc build_jemalloc)
+    add_library(jemalloc_nopic STATIC IMPORTED GLOBAL)
+    set_target_properties(jemalloc_nopic PROPERTIES IMPORTED_LOCATION
+        "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/jemalloc/lib/libjemalloc.a")
+    add_dependencies(jemalloc_nopic build_jemalloc)
+
+    # detect when we are being built as a subproject
+    if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
+        install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/jemalloc/lib" DESTINATION .
+          COMPONENT tokukv_libs_extra)
+    endif ()
+endif ()
+
 ## add lzma with an external project
 set(xz_configure_opts --with-pic --enable-static)
 if (APPLE)
--- a/storage/tokudb/ft-index/examples/CMakeLists.txt
+++ b/storage/tokudb/ft-index/examples/CMakeLists.txt
@ -10,5 +10,7 @@ if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
      README.examples
    DESTINATION
      examples
+    COMPONENT
+      tokukv_examples
    )
 endif ()
--- a/storage/tokudb/ft-index/ft/CMakeLists.txt
+++ b/storage/tokudb/ft-index/ft/CMakeLists.txt
@ -8,7 +8,7 @@ set_source_files_properties(
  PROPERTIES GENERATED TRUE)

 add_executable(logformat logformat.cc)
-target_link_libraries(logformat ${LIBTOKUPORTABILITY})
+target_link_libraries(logformat ${LIBTOKUPORTABILITY}_static)

 add_custom_command(
  OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_code.cc"
--- a/storage/tokudb/ft-index/ft/bndata.cc
+++ b/storage/tokudb/ft-index/ft/bndata.cc
@ -105,7 +105,7 @@ void bn_data::init_zero() {

 void bn_data::initialize_empty() {
    toku_mempool_zero(&m_buffer_mempool);
-    m_buffer.create();
+    m_buffer.create_no_array();
 }

 void bn_data::initialize_from_data(uint32_t num_entries, unsigned char *buf, uint32_t data_size) {
@ -230,11 +230,11 @@ static int move_it (const KLPAIR &klpair, const uint32_t idx, struct omt_compres
 // Compress things, and grow the mempool if needed.
 void bn_data::omt_compress_kvspace(size_t added_size, void **maybe_free) {
    uint32_t total_size_needed = toku_mempool_get_used_space(&m_buffer_mempool) + added_size;
-    if (total_size_needed+total_size_needed >= m_buffer_mempool.size) {
-        m_buffer_mempool.size = total_size_needed+total_size_needed;
-    }
+    // set the new mempool size to be twice of the space we actually need.
+    // On top of the 25% that is padded within toku_mempool_construct (which we
+    // should consider getting rid of), that should be good enough.
    struct mempool new_kvspace;
-    toku_mempool_construct(&new_kvspace, m_buffer_mempool.size);
+    toku_mempool_construct(&new_kvspace, 2*total_size_needed);
    uint32_t numvals = omt_size();
    KLPAIR *XMALLOC_N(numvals, newvals);
    struct omt_compressor_state oc = { &new_kvspace, newvals };
--- a/storage/tokudb/ft-index/ft/bndata.h
+++ b/storage/tokudb/ft-index/ft/bndata.h
@ -116,7 +116,7 @@ struct klpair_struct {

 typedef struct klpair_struct *KLPAIR;

-static LEAFENTRY get_le_from_klpair(KLPAIR klpair){
+static inline LEAFENTRY get_le_from_klpair(KLPAIR klpair){
    uint32_t keylen = klpair->keylen;
    LEAFENTRY le = (LEAFENTRY)(klpair->key_le + keylen);
    return le;
@ -250,5 +250,7 @@ private:

    klpair_omt_t m_buffer;                     // pointers to individual leaf entries
    struct mempool m_buffer_mempool;  // storage for all leaf entries
+
+    friend class bndata_bugfix_test;
 };

--- a/storage/tokudb/ft-index/ft/cachetable-internal.h
+++ b/storage/tokudb/ft-index/ft/cachetable-internal.h
@ -458,7 +458,7 @@ private:
 //
 class checkpointer {
 public:
-    void init(pair_list *_pl, TOKULOGGER _logger, evictor *_ev, cachefile_list *files);
+    int init(pair_list *_pl, TOKULOGGER _logger, evictor *_ev, cachefile_list *files);
    void destroy();
    void set_checkpoint_period(uint32_t new_period);
    uint32_t get_checkpoint_period();
@ -480,6 +480,8 @@ private:
    cachefile_list *m_cf_list;
    pair_list *m_list;
    evictor *m_ev;
+    bool m_checkpointer_cron_init;
+    bool m_checkpointer_init;
    
    // variable used by the checkpoint thread to know
    // when all work induced by cloning on client threads is done
@ -512,7 +514,7 @@ const int EVICTION_PERIOD = 1;
 //
 class evictor {
 public:
-    void init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period);
+    int init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period);
    void destroy();
    void add_pair_attr(PAIR_ATTR attr);
    void remove_pair_attr(PAIR_ATTR attr);    
@ -522,7 +524,7 @@ public:
    uint64_t reserve_memory(double fraction, uint64_t upper_bound);
    void release_reserved_memory(uint64_t reserved_memory);
    void run_eviction_thread();
-    void do_partial_eviction(PAIR p, bool pair_mutex_held);
+    void do_partial_eviction(PAIR p);
    void evict_pair(PAIR p, bool checkpoint_pending);
    void wait_for_cache_pressure_to_subside();
    void signal_eviction_thread();
@ -597,6 +599,10 @@ private:

    // this variable is ONLY used for testing purposes
    uint64_t m_num_eviction_thread_runs;
+
+    bool m_ev_thread_init;
+    bool m_evictor_init;
+
    friend class evictor_test_helpers;
    friend class evictor_unit_test;
 };
@ -608,7 +614,7 @@ private:
 //
 class cleaner {
 public:
-    void init(uint32_t cleaner_iterations, pair_list* _pl, CACHETABLE _ct);
+    int init(uint32_t cleaner_iterations, pair_list* _pl, CACHETABLE _ct);
    void destroy(void);
    uint32_t get_iterations(void);
    void set_iterations(uint32_t new_iterations);
@ -625,6 +631,8 @@ private:
                                  // minimum period of 1s so if you want
                                  // more frequent cleaner runs you must
                                  // use this)
+    bool m_cleaner_cron_init;
+    bool m_cleaner_init;
 };

 ///////////////////////////////////////////////////////////////////////////////
--- a/storage/tokudb/ft-index/ft/cachetable.cc
+++ b/storage/tokudb/ft-index/ft/cachetable.cc
@ -106,6 +106,7 @@ PATENT RIGHTS GRANT:
 #include <portability/toku_time.h>
 #include <util/rwlock.h>
 #include <util/status.h>
+#include <util/context.h>

 ///////////////////////////////////////////////////////////////////////////////////
 // Engine status
@ -291,7 +292,10 @@ uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct) {
 // reserve 25% as "unreservable".  The loader cannot have it.
 #define unreservable_memory(size) ((size)/4)

-void toku_cachetable_create(CACHETABLE *result, long size_limit, LSN UU(initial_lsn), TOKULOGGER logger) {
+int toku_cachetable_create(CACHETABLE *ct_result, long size_limit, LSN UU(initial_lsn), TOKULOGGER logger) {
+    int result = 0;
+    int r;
+
    if (size_limit == 0) {
        size_limit = 128*1024*1024;
    }
@ -301,16 +305,46 @@ void toku_cachetable_create(CACHETABLE *result, long size_limit, LSN UU(initial_
    ct->cf_list.init();

    int num_processors = toku_os_get_number_active_processors();
-    ct->client_kibbutz = toku_kibbutz_create(num_processors);
-    ct->ct_kibbutz = toku_kibbutz_create(2*num_processors);
    int checkpointing_nworkers = (num_processors/4) ? num_processors/4 : 1;
-    ct->checkpointing_kibbutz = toku_kibbutz_create(checkpointing_nworkers);
+    r = toku_kibbutz_create(num_processors, &ct->client_kibbutz);
+    if (r != 0) {
+        result = r;
+        goto cleanup;
+    }
+    r = toku_kibbutz_create(2*num_processors, &ct->ct_kibbutz);
+    if (r != 0) {
+        result = r;
+        goto cleanup;
+    }
+    r = toku_kibbutz_create(checkpointing_nworkers, &ct->checkpointing_kibbutz);
+    if (r != 0) {
+        result = r;
+        goto cleanup;
+    }
    // must be done after creating ct_kibbutz
-    ct->ev.init(size_limit, &ct->list, &ct->cf_list, ct->ct_kibbutz, EVICTION_PERIOD);
-    ct->cp.init(&ct->list, logger, &ct->ev, &ct->cf_list);
-    ct->cl.init(1, &ct->list, ct); // by default, start with one iteration
+    r = ct->ev.init(size_limit, &ct->list, &ct->cf_list, ct->ct_kibbutz, EVICTION_PERIOD);
+    if (r != 0) {
+        result = r;
+        goto cleanup;
+    }
+    r = ct->cp.init(&ct->list, logger, &ct->ev, &ct->cf_list);
+    if (r != 0) {
+        result = r;
+        goto cleanup;
+    }
+    r = ct->cl.init(1, &ct->list, ct); // by default, start with one iteration
+    if (r != 0) {
+        result = r;
+        goto cleanup;
+    }
    ct->env_dir = toku_xstrdup(".");
-    *result = ct;
+cleanup:
+    if (result == 0) {
+        *ct_result = ct;
+    } else {
+        toku_cachetable_close(&ct);
+    }
+    return result;
 }

 // Returns a pointer to the checkpoint contained within
@ -618,39 +652,6 @@ static void cachetable_free_pair(PAIR p) {
    ctpair_destroy(p);
 }

-// Maybe remove a pair from the cachetable and free it, depending on whether
-// or not there are any threads interested in the pair.  The flush callback
-// is called with write_me and keep_me both false, and the pair is destroyed.
-// The sole purpose of this function is to remove the node, so the write_me 
-// argument to the flush callback is false, and the flush callback won't do
-// anything except destroy the node.
-//
-// on input, pair_list's write lock is held and PAIR's mutex is held
-// on exit, only the pair_list's write lock is still held
-//
-static void cachetable_maybe_remove_and_free_pair (
-    pair_list* pl, 
-    evictor* ev, 
-    PAIR p
-    ) 
-{
-    // this ensures that a clone running in the background first completes
-    if (p->value_rwlock.users() == 0 && p->refcount == 0) {
-        // assumption is that if we are about to remove the pair
-        // that no one has grabbed the disk_nb_mutex,
-        // and that there is no cloned_value_data, because
-        // no one is writing a cloned value out.
-        assert(nb_mutex_users(&p->disk_nb_mutex) == 0);
-        assert(p->cloned_value_data == NULL);
-        cachetable_remove_pair(pl, ev, p);
-        pair_unlock(p);
-        cachetable_free_pair(p);
-    }
-    else {
-        pair_unlock(p);
-    }
-}
-
 // assumes value_rwlock and disk_nb_mutex held on entry
 // responsibility of this function is to only write a locked PAIR to disk
 // and NOTHING else. We do not manipulate the state of the PAIR
@ -774,7 +775,7 @@ static void cachetable_evicter(void* extra) {
 static void cachetable_partial_eviction(void* extra) {
    PAIR p = (PAIR)extra;
    CACHEFILE cf = p->cachefile;
-    p->ev->do_partial_eviction(p, false);
+    p->ev->do_partial_eviction(p);
    bjm_remove_background_job(cf->bjm);
 }

@ -1483,6 +1484,8 @@ static bool try_pin_pair(
    bool partial_fetch_required = pf_req_callback(p->value_data,read_extraargs);
    
    if (partial_fetch_required) {    
+        toku::context pf_ctx(CTX_PARTIAL_FETCH);
+
        if (ct->ev.should_client_thread_sleep() && !already_slept) {
            pair_lock(p);
            unpin_pair(p, (lock_type == PL_READ));
@ -1634,6 +1637,8 @@ beginning:
        }
    }
    else {
+        toku::context fetch_ctx(CTX_FULL_FETCH);
+
        ct->list.pair_unlock_by_fullhash(fullhash);
        // we only want to sleep once per call to get_and_pin. If we have already
        // slept and there is still cache pressure, then we might as 
@ -2038,10 +2043,7 @@ maybe_pin_pair(
    if (retval == TOKUDB_TRY_AGAIN) {
        unpin_pair(p, (lock_type == PL_READ));
    }    
-    else {
-        // just a sanity check
-        assert(retval == 0);
-    }
+    pair_touch(p);
    pair_unlock(p);
    return retval;
 }
@ -2071,6 +2073,8 @@ try_again:
    ct->list.pair_lock_by_fullhash(fullhash);
    PAIR p = ct->list.find_pair(cf, key, fullhash);
    if (p == NULL) {
+        toku::context fetch_ctx(CTX_FULL_FETCH);
+
        // Not found
        ct->list.pair_unlock_by_fullhash(fullhash);
        ct->list.write_list_lock();
@ -2146,6 +2150,8 @@ try_again:
        // still check for partial fetch
        bool partial_fetch_required = pf_req_callback(p->value_data,read_extraargs);
        if (partial_fetch_required) {
+            toku::context fetch_ctx(CTX_PARTIAL_FETCH);
+
            run_unlockers(unlockers);

            // we are now getting an expensive write lock, because we
@ -2428,10 +2434,10 @@ static void remove_pair_for_close(PAIR p, CACHETABLE ct, bool completely) {
    assert(p->dirty == CACHETABLE_CLEAN);
    assert(p->refcount == 0);
    if (completely) {
-        // TODO: maybe break up this function
-        // so that write lock does not need to be held for entire
-        // free
-        cachetable_maybe_remove_and_free_pair(&ct->list, &ct->ev, p);
+        cachetable_remove_pair(&ct->list, &ct->ev, p);
+        pair_unlock(p);
+        // TODO: Eventually, we should not hold the write list lock during free
+        cachetable_free_pair(p);
    }
    else {
        // if we are not evicting completely,
@ -2587,9 +2593,12 @@ void toku_cachetable_close (CACHETABLE *ctp) {
    ct->list.destroy();
    ct->cf_list.destroy();
    
-    toku_kibbutz_destroy(ct->client_kibbutz);
-    toku_kibbutz_destroy(ct->ct_kibbutz);
-    toku_kibbutz_destroy(ct->checkpointing_kibbutz);
+    if (ct->client_kibbutz)
+        toku_kibbutz_destroy(ct->client_kibbutz);
+    if (ct->ct_kibbutz)
+        toku_kibbutz_destroy(ct->ct_kibbutz);
+    if (ct->checkpointing_kibbutz)
+        toku_kibbutz_destroy(ct->checkpointing_kibbutz);
    toku_free(ct->env_dir);
    toku_free(ct);
    *ctp = 0;
@ -3074,20 +3083,29 @@ int toku_cleaner_thread (void *cleaner_v) {
 //
 ENSURE_POD(cleaner);

-void cleaner::init(uint32_t _cleaner_iterations, pair_list* _pl, CACHETABLE _ct) {
+int cleaner::init(uint32_t _cleaner_iterations, pair_list* _pl, CACHETABLE _ct) {
    // default is no cleaner, for now
-    toku_minicron_setup(&m_cleaner_cron, 0, toku_cleaner_thread, this); 
+    m_cleaner_cron_init = false;
+    int r = toku_minicron_setup(&m_cleaner_cron, 0, toku_cleaner_thread, this);
+    if (r == 0) {
+        m_cleaner_cron_init = true;
+    }
    TOKU_VALGRIND_HG_DISABLE_CHECKING(&m_cleaner_iterations, sizeof m_cleaner_iterations);
    m_cleaner_iterations = _cleaner_iterations;
    m_pl = _pl;
    m_ct = _ct;
+    m_cleaner_init = true;
+    return r;
 }

 // this function is allowed to be called multiple times
 void cleaner::destroy(void) {
-    if (!toku_minicron_has_been_shutdown(&m_cleaner_cron)) {
+    if (!m_cleaner_init) {
+        return;
+    }
+    if (m_cleaner_cron_init && !toku_minicron_has_been_shutdown(&m_cleaner_cron)) {
        // for test code only, production code uses toku_cachetable_minicron_shutdown()
-        int  r = toku_minicron_shutdown(&m_cleaner_cron);
+        int r = toku_minicron_shutdown(&m_cleaner_cron);
        assert(r==0);
    }
 }
@ -3122,6 +3140,8 @@ void cleaner::set_period(uint32_t new_period) {
 // start).  At this point, we can safely unlock the cachetable, do the
 // work (callback), and unlock/release our claim to the cachefile.
 int cleaner::run_cleaner(void) {
+    toku::context cleaner_ctx(CTX_CLEANER);
+
    int r;
    uint32_t num_iterations = this->get_iterations();
    for (uint32_t i = 0; i < num_iterations; ++i) {
@ -3662,7 +3682,7 @@ static void *eviction_thread(void *evictor_v) {
 // Starts the eviction thread, assigns external object references,
 // and initializes all counters and condition variables.
 //
-void evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period) {
+int evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period) {
    TOKU_VALGRIND_HG_DISABLE_CHECKING(&m_ev_thread_is_running, sizeof m_ev_thread_is_running);
    TOKU_VALGRIND_HG_DISABLE_CHECKING(&m_size_evicting, sizeof m_size_evicting);

@ -3716,8 +3736,13 @@ void evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, K
    // start the background thread    
    m_run_thread = true;
    m_num_eviction_thread_runs = 0;
+    m_ev_thread_init = false;
    r = toku_pthread_create(&m_ev_thread, NULL, eviction_thread, this); 
-    assert_zero(r);
+    if (r == 0) {
+        m_ev_thread_init = true;
+    }
+    m_evictor_init = true;
+    return r;
 }

 //
@ -3725,7 +3750,10 @@ void evictor::init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, K
 //
 // NOTE: This should only be called if there are no evictions in progress.
 //
-void evictor::destroy() {    
+void evictor::destroy() { 
+    if (!m_evictor_init) {
+        return;
+    }
    assert(m_size_evicting == 0);
    //
    // commented out of Ming, because we could not finish
@ -3734,16 +3762,16 @@ void evictor::destroy() {
    //assert(m_size_current == 0);

    // Stop the eviction thread.
-    toku_mutex_lock(&m_ev_thread_lock);
-    m_run_thread = false;
-    this->signal_eviction_thread();
-    toku_mutex_unlock(&m_ev_thread_lock);
-
-    void *ret;
-    int r = toku_pthread_join(m_ev_thread, &ret); 
-    assert_zero(r);
-    assert(!m_ev_thread_is_running);
-
+    if (m_ev_thread_init) {
+        toku_mutex_lock(&m_ev_thread_lock);
+        m_run_thread = false;
+        this->signal_eviction_thread();
+        toku_mutex_unlock(&m_ev_thread_lock);
+        void *ret;
+        int r = toku_pthread_join(m_ev_thread, &ret); 
+        assert_zero(r);
+        assert(!m_ev_thread_is_running);
+    }
    destroy_partitioned_counter(m_size_nonleaf);
    m_size_nonleaf = NULL;
    destroy_partitioned_counter(m_size_leaf);
@ -4007,6 +4035,8 @@ bool evictor::run_eviction_on_pair(PAIR curr_in_clock) {
    m_pl->read_list_unlock();
    ret_val = true;
    if (curr_in_clock->count > 0) {
+        toku::context pe_ctx(CTX_PARTIAL_EVICTION);
+
        uint32_t curr_size = curr_in_clock->attr.size;
        // if the size of this PAIR is greater than the average size of PAIRs
        // in the cachetable, then decrement it, otherwise, decrement
@ -4052,10 +4082,10 @@ bool evictor::run_eviction_on_pair(PAIR curr_in_clock) {
            write_extraargs
            );
        if (cost == PE_CHEAP) {
-            curr_in_clock->size_evicting_estimate = 0;
-            this->do_partial_eviction(curr_in_clock, true);
-            bjm_remove_background_job(cf->bjm);
            pair_unlock(curr_in_clock);
+            curr_in_clock->size_evicting_estimate = 0;
+            this->do_partial_eviction(curr_in_clock);
+            bjm_remove_background_job(cf->bjm);
        }
        else if (cost == PE_EXPENSIVE) {
            // only bother running an expensive partial eviction
@ -4083,6 +4113,8 @@ bool evictor::run_eviction_on_pair(PAIR curr_in_clock) {
        }        
    }
    else {
+        toku::context pe_ctx(CTX_FULL_EVICTION);
+
        // responsibility of try_evict_pair to eventually remove background job
        // pair's mutex is still grabbed here
        this->try_evict_pair(curr_in_clock);
@ -4094,26 +4126,48 @@ exit:
    return ret_val;
 }

+struct pair_unpin_with_new_attr_extra {
+    pair_unpin_with_new_attr_extra(evictor *e, PAIR p) :
+        ev(e), pair(p) {
+    }
+    evictor *ev;
+    PAIR pair;
+};
+
+static void pair_unpin_with_new_attr(PAIR_ATTR new_attr, void *extra) {
+    struct pair_unpin_with_new_attr_extra *info =
+        reinterpret_cast<struct pair_unpin_with_new_attr_extra *>(extra);
+    PAIR p = info->pair;
+    evictor *ev = info->ev;
+
+    // change the attr in the evictor, then update the value in the pair
+    ev->change_pair_attr(p->attr, new_attr);
+    p->attr = new_attr;
+
+    // unpin
+    pair_lock(p);
+    p->value_rwlock.write_unlock();
+    pair_unlock(p);
+}
+
 //
-// on entry and exit, pair's mutex is held if pair_mutex_held is true
+// on entry and exit, pair's mutex is not held
 // on exit, PAIR is unpinned
 //
-void evictor::do_partial_eviction(PAIR p, bool pair_mutex_held) {
-    PAIR_ATTR new_attr;
+void evictor::do_partial_eviction(PAIR p) {
+    // Copy the old attr
    PAIR_ATTR old_attr = p->attr;
-    
-    p->pe_callback(p->value_data, old_attr, &new_attr, p->write_extraargs);
+    long long size_evicting_estimate = p->size_evicting_estimate;

-    this->change_pair_attr(old_attr, new_attr);
-    p->attr = new_attr;
-    this->decrease_size_evicting(p->size_evicting_estimate);
-    if (!pair_mutex_held) {
-        pair_lock(p);
-    }
-    p->value_rwlock.write_unlock();
-    if (!pair_mutex_held) {
-        pair_unlock(p);
-    }
+    struct pair_unpin_with_new_attr_extra extra(this, p);
+    p->pe_callback(p->value_data, old_attr, p->write_extraargs,
+                   // passed as the finalize continuation, which allows the
+                   // pe_callback to unpin the node before doing expensive cleanup
+                   pair_unpin_with_new_attr, &extra);
+
+    // now that the pe_callback (and its pair_unpin_with_new_attr continuation)
+    // have finished, we can safely decrease size_evicting
+    this->decrease_size_evicting(size_evicting_estimate);
 }

 //
@ -4188,8 +4242,25 @@ void evictor::evict_pair(PAIR p, bool for_checkpoint) {
    nb_mutex_unlock(&p->disk_nb_mutex);
    // at this point, we have the pair list's write list lock
    // and we have the pair's mutex (p->mutex) held
-    cachetable_maybe_remove_and_free_pair(m_pl, this, p);
+    
+    // this ensures that a clone running in the background first completes
+    bool removed = false;
+    if (p->value_rwlock.users() == 0 && p->refcount == 0) {
+        // assumption is that if we are about to remove the pair
+        // that no one has grabbed the disk_nb_mutex,
+        // and that there is no cloned_value_data, because
+        // no one is writing a cloned value out.
+        assert(nb_mutex_users(&p->disk_nb_mutex) == 0);
+        assert(p->cloned_value_data == NULL);
+        cachetable_remove_pair(m_pl, this, p);
+        removed = true;
+    }
+    pair_unlock(p);
    m_pl->write_list_unlock();
+    // do not want to hold the write list lock while freeing a pair
+    if (removed) {
+        cachetable_free_pair(p);
+    }
 }

 //
@ -4348,7 +4419,7 @@ ENSURE_POD(checkpointer);
 //
 // Sets the cachetable reference in this checkpointer class, this is temporary.
 //
-void checkpointer::init(pair_list *_pl, 
+int checkpointer::init(pair_list *_pl, 
                        TOKULOGGER _logger,
                        evictor *_ev,
                        cachefile_list *files) {
@ -4359,11 +4430,20 @@ void checkpointer::init(pair_list *_pl,
    bjm_init(&m_checkpoint_clones_bjm);
    
    // Default is no checkpointing.
-    toku_minicron_setup(&m_checkpointer_cron, 0, checkpoint_thread, this);
+    m_checkpointer_cron_init = false;
+    int r = toku_minicron_setup(&m_checkpointer_cron, 0, checkpoint_thread, this);
+    if (r == 0) {
+        m_checkpointer_cron_init = true;
+    }
+    m_checkpointer_init = true;
+    return r;
 }

 void checkpointer::destroy() {
-    if (!this->has_been_shutdown()) {
+    if (!m_checkpointer_init) {
+        return;
+    }
+    if (m_checkpointer_cron_init && !this->has_been_shutdown()) {
        // for test code only, production code uses toku_cachetable_minicron_shutdown()
        int r = this->shutdown();
        assert(r == 0);
--- a/storage/tokudb/ft-index/ft/cachetable.h
+++ b/storage/tokudb/ft-index/ft/cachetable.h
@ -122,7 +122,7 @@ uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct);
 // create and initialize a cache table
 // size_limit is the upper limit on the size of the size of the values in the table
 // pass 0 if you want the default
-void toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER);
+int toku_cachetable_create(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER);

 // Create a new cachetable.
 // Effects: a new cachetable is created and initialized.
@ -223,11 +223,15 @@ typedef void (*CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK)(void *ftnode_pv, void*

 // The cachetable calls the partial eviction callback is to possibly try and partially evict pieces
 // of the PAIR. The callback determines the strategy for what to evict. The callback may choose to free
-// nothing, or may choose to free as much as possible.
-// old_attr is the PAIR_ATTR of the PAIR when the callback is called. 
-// new_attr is set to the new PAIR_ATTR after the callback executes partial eviction
-// Requires a write lock to be held on the PAIR in the cachetable while this function is called
-typedef int (*CACHETABLE_PARTIAL_EVICTION_CALLBACK)(void *ftnode_pv, PAIR_ATTR old_attr, PAIR_ATTR* new_attr, void *write_extraargs);
+// nothing, or may choose to free as much as possible. When the partial eviction callback is finished,
+// it must call finalize with the new PAIR_ATTR and the given finalize_extra. After this point, the
+// write lock will be released on the PAIR and it is no longer safe to operate on any of the passed arguments.
+// This is useful for doing expensive cleanup work outside of the PAIR's write lock (such as destroying objects, etc)
+//
+// on entry, requires a write lock to be held on the PAIR in the cachetable while this function is called
+// on exit, the finalize continuation is called
+typedef int (*CACHETABLE_PARTIAL_EVICTION_CALLBACK)(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
+                                                    void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);

 // The cachetable calls this function to determine if get_and_pin call requires a partial fetch. If this function returns true, 
 // then the cachetable will subsequently call CACHETABLE_PARTIAL_FETCH_CALLBACK to perform
--- a/storage/tokudb/ft-index/ft/checkpoint.cc
+++ b/storage/tokudb/ft-index/ft/checkpoint.cc
@ -136,6 +136,7 @@ PATENT RIGHTS GRANT:
 #include "checkpoint.h"
 #include <portability/toku_atomic.h>
 #include <util/status.h>
+#include <util/frwlock.h>

 ///////////////////////////////////////////////////////////////////////////////////
 // Engine status
@ -187,7 +188,8 @@ toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {

 static LSN last_completed_checkpoint_lsn;

-static toku_pthread_rwlock_t checkpoint_safe_lock;
+static toku_mutex_t checkpoint_safe_mutex;
+static toku::frwlock checkpoint_safe_lock;
 static toku_pthread_rwlock_t multi_operation_lock;
 static toku_pthread_rwlock_t low_priority_multi_operation_lock;

@ -237,28 +239,33 @@ multi_operation_checkpoint_unlock(void) {

 static void
 checkpoint_safe_lock_init(void) {
-    toku_pthread_rwlock_init(&checkpoint_safe_lock, NULL); 
+    toku_mutex_init(&checkpoint_safe_mutex, NULL);
+    checkpoint_safe_lock.init(&checkpoint_safe_mutex);
    locked_cs = false;
 }

 static void
 checkpoint_safe_lock_destroy(void) {
-    toku_pthread_rwlock_destroy(&checkpoint_safe_lock); 
+    checkpoint_safe_lock.deinit();
+    toku_mutex_destroy(&checkpoint_safe_mutex);
 }

 static void 
 checkpoint_safe_checkpoint_lock(void) {
-    toku_pthread_rwlock_wrlock(&checkpoint_safe_lock);   
+    toku_mutex_lock(&checkpoint_safe_mutex);
+    checkpoint_safe_lock.write_lock(false);
+    toku_mutex_unlock(&checkpoint_safe_mutex);
    locked_cs = true;
 }

 static void 
 checkpoint_safe_checkpoint_unlock(void) {
    locked_cs = false;
-    toku_pthread_rwlock_wrunlock(&checkpoint_safe_lock); 
+    toku_mutex_lock(&checkpoint_safe_mutex);
+    checkpoint_safe_lock.write_unlock();
+    toku_mutex_unlock(&checkpoint_safe_mutex);
 }

-
 // toku_xxx_client_(un)lock() functions are only called from client code,
 // never from checkpoint code, and use the "reader" interface to the lock functions.

@ -286,18 +293,20 @@ void
 toku_checkpoint_safe_client_lock(void) {
    if (locked_cs)
        (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1);
-    toku_pthread_rwlock_rdlock(&checkpoint_safe_lock);  
+    toku_mutex_lock(&checkpoint_safe_mutex);
+    checkpoint_safe_lock.read_lock();
+    toku_mutex_unlock(&checkpoint_safe_mutex);
    toku_multi_operation_client_lock();
 }

 void 
 toku_checkpoint_safe_client_unlock(void) {
-    toku_pthread_rwlock_rdunlock(&checkpoint_safe_lock); 
+    toku_mutex_lock(&checkpoint_safe_mutex);
+    checkpoint_safe_lock.read_unlock();
+    toku_mutex_unlock(&checkpoint_safe_mutex);
    toku_multi_operation_client_unlock();
 }

-
-
 // Initialize the checkpoint mechanism, must be called before any client operations.
 void
 toku_checkpoint_init(void) {
--- a/storage/tokudb/ft-index/ft/compress.cc
+++ b/storage/tokudb/ft-index/ft/compress.cc
@ -89,6 +89,8 @@ PATENT RIGHTS GRANT:
 #ident "$Id$"

 #include <toku_portability.h>
+#include <util/scoped_malloc.h>
+
 #include <zlib.h>
 #include <lzma.h>

@ -241,10 +243,10 @@ void toku_decompress (Bytef       *dest,   uLongf destLen,
    }
    case TOKU_QUICKLZ_METHOD:
        if (sourceLen>1) {
-            qlz_state_decompress *XCALLOC(qsd);
+            toku::scoped_calloc state_buf(sizeof(qlz_state_decompress));
+            qlz_state_decompress *qsd = reinterpret_cast<qlz_state_decompress *>(state_buf.get());
            uLongf actual_destlen = qlz_decompress((char*)source+1, dest, qsd);
            assert(actual_destlen == destLen);
-            toku_free(qsd);
        } else {
            // length 1 means there is no data, so do nothing.
            assert(destLen==0);
--- a/storage/tokudb/ft-index/ft/fifo.cc
+++ b/storage/tokudb/ft-index/ft/fifo.cc
@ -135,6 +135,11 @@ int toku_fifo_create(FIFO *ptr) {
    return 0;
 }

+void toku_fifo_resize(FIFO fifo, size_t new_size) {
+    XREALLOC_N(new_size, fifo->memory);
+    fifo->memory_size = new_size;
+}
+
 void toku_fifo_free(FIFO *ptr) {
    FIFO fifo = *ptr;
    if (fifo->memory) toku_free(fifo->memory);
@ -162,16 +167,10 @@ int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *d
                          + xids_get_size(xids)
                          - sizeof(XIDS_S); //Prevent double counting
    int need_space_total = fifo->memory_used+need_space_here;
-    if (fifo->memory == NULL) {
-        fifo->memory_size = next_power_of_two(need_space_total);
-        XMALLOC_N(fifo->memory_size, fifo->memory);
-    }
-    if (need_space_total > fifo->memory_size) {
-        // Out of memory at the end.
+    if (fifo->memory == NULL || need_space_total > fifo->memory_size) {
+        // resize the fifo to the next power of 2 greater than the needed space
        int next_2 = next_power_of_two(need_space_total);
-        // resize the fifo
-        XREALLOC_N(next_2, fifo->memory);
-        fifo->memory_size = next_2;
+        toku_fifo_resize(fifo, next_2);
    }
    struct fifo_entry *entry = (struct fifo_entry *)(fifo->memory + fifo->memory_used);
    fifo_entry_set_msg_type(entry, type);
--- a/storage/tokudb/ft-index/ft/fifo.h
+++ b/storage/tokudb/ft-index/ft/fifo.h
@ -136,6 +136,8 @@ typedef struct fifo *FIFO;

 int toku_fifo_create(FIFO *);

+void toku_fifo_resize(FIFO fifo, size_t new_size);
+
 void toku_fifo_free(FIFO *);

 int toku_fifo_n_entries(FIFO);
--- a/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc
+++ b/storage/tokudb/ft-index/ft/ft-cachetable-wrappers.cc
@ -94,7 +94,8 @@ PATENT RIGHTS GRANT:
 #include <fttypes.h>
 #include <ft-flusher.h>
 #include <ft-internal.h>
-#include "ft.h"
+#include <ft.h>
+#include <util/context.h>

 static void
 ftnode_get_key_and_fullhash(
@ -252,6 +253,8 @@ toku_pin_ftnode_batched(
            bfe->child_to_read
            );
        if (needs_ancestors_messages) {
+            toku::context apply_messages_ctx(CTX_MESSAGE_APPLICATION);
+
            toku_unpin_ftnode_read_only(brt->ft, node);
            int rr = toku_cachetable_get_and_pin_nonblocking_batched(
                    brt->ft->cf,
--- a/storage/tokudb/ft-index/ft/ft-flusher.cc
+++ b/storage/tokudb/ft-index/ft/ft-flusher.cc
@ -97,6 +97,7 @@ PATENT RIGHTS GRANT:
 #include <toku_assert.h>
 #include <portability/toku_atomic.h>
 #include <util/status.h>
+#include <util/context.h>

 /* Status is intended for display to humans to help understand system behavior.
 * It does not need to be perfectly thread-safe.
@ -544,11 +545,13 @@ ct_flusher_advice_init(struct flusher_advice *fa, struct flush_status_update_ext
 // a leaf node that is not entirely in memory. If so, then
 // we cannot be sure if the node is reactive.
 //
-static bool may_node_be_reactive(FTNODE node)
+static bool may_node_be_reactive(FT ft, FTNODE node)
 {
-    if (node->height == 0) return true;
+    if (node->height == 0) {
+        return true;
+    }
    else {
-        return (get_nonleaf_reactivity(node) != RE_STABLE);
+        return (get_nonleaf_reactivity(node, ft->h->fanout) != RE_STABLE);
    }
 }

@ -1541,11 +1544,7 @@ ft_merge_child(
    }
 }

-static void ft_flush_some_child(
-    FT ft,
-    FTNODE parent,
-    struct flusher_advice *fa
-    )
+void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
 // Effect: This function does the following:
 //   - Pick a child of parent (the heaviest child),
 //   - flush from parent to child,
@ -1559,7 +1558,7 @@ static void ft_flush_some_child(
    NONLEAF_CHILDINFO bnc = NULL;
    paranoid_invariant(parent->height>0);
    toku_assert_entire_node_in_memory(parent);
-    TXNID oldest_referenced_xid = parent->oldest_referenced_xid_known;
+    TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;

    // pick the child we want to flush to
    int childnum = fa->pick_child(ft, parent, fa->extra);
@ -1589,7 +1588,7 @@ static void ft_flush_some_child(
    // Let's do a quick check to see if the child may be reactive
    // If the child cannot be reactive, then we can safely unlock
    // the parent before finishing reading in the entire child node.
-    bool may_child_be_reactive = may_node_be_reactive(child);
+    bool may_child_be_reactive = may_node_be_reactive(ft, child);

    paranoid_invariant(child->thisnodename.b!=0);
    //VERIFY_NODE(brt, child);
@ -1631,7 +1630,7 @@ static void ft_flush_some_child(
    // we wont be splitting/merging child
    // and we have already replaced the bnc
    // for the root with a fresh one
-    enum reactivity child_re = get_node_reactivity(child, ft->h->nodesize);
+    enum reactivity child_re = get_node_reactivity(ft, child);
    if (parent && child_re == RE_STABLE) {
        toku_unpin_ftnode_off_client_thread(ft, parent);
        parent = NULL;
@ -1652,7 +1651,7 @@ static void ft_flush_some_child(
            ft,
            bnc,
            child,
-            oldest_referenced_xid
+            parent_oldest_referenced_xid_known
            );
        destroy_nonleaf_childinfo(bnc);
    }
@ -1661,7 +1660,7 @@ static void ft_flush_some_child(
    // let's get the reactivity of the child again,
    // it is possible that the flush got rid of some values
    // and now the parent is no longer reactive
-    child_re = get_node_reactivity(child, ft->h->nodesize);
+    child_re = get_node_reactivity(ft, child);
    // if the parent has been unpinned above, then
    // this is our only option, even if the child is not stable
    // if the child is not stable, we'll handle it the next
@ -1676,10 +1675,10 @@ static void ft_flush_some_child(
            parent = NULL;
        }
        //
-        // it is the responsibility of ft_flush_some_child to unpin child
+        // it is the responsibility of toku_ft_flush_some_child to unpin child
        //
        if (child->height > 0 && fa->should_recursively_flush(child, fa->extra)) {
-            ft_flush_some_child(ft, child, fa);
+            toku_ft_flush_some_child(ft, child, fa);
        }
        else {
            toku_unpin_ftnode_off_client_thread(ft, child);
@ -1706,13 +1705,6 @@ static void ft_flush_some_child(
    }
 }

-void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) {
-    // Vanilla flush_some_child flushes from parent to child without
-    // providing a meaningful oldest_referenced_xid. No simple garbage
-    // collection is performed.
-    return ft_flush_some_child(ft, parent, fa);
-}
-
 static void
 update_cleaner_status(
    FTNODE node,
@ -1854,7 +1846,7 @@ struct flusher_extra {
    FT h;
    FTNODE node;
    NONLEAF_CHILDINFO bnc;
-    TXNID oldest_referenced_xid;
+    TXNID parent_oldest_referenced_xid_known;
 };

 //
@ -1864,6 +1856,7 @@ struct flusher_extra {
 //
 static void flush_node_fun(void *fe_v)
 {
+    toku::context flush_ctx(CTX_FLUSH);
    struct flusher_extra* fe = (struct flusher_extra *) fe_v;
    // The node that has been placed on the background
    // thread may not be fully in memory. Some message
@ -1892,16 +1885,16 @@ static void flush_node_fun(void *fe_v)
            fe->h,
            fe->bnc,
            fe->node,
-            fe->oldest_referenced_xid
+            fe->parent_oldest_referenced_xid_known
            );
        destroy_nonleaf_childinfo(fe->bnc);

        // after the flush has completed, now check to see if the node needs flushing
-        // If so, call ft_flush_some_child on the node (because this flush intends to
+        // If so, call toku_ft_flush_some_child on the node (because this flush intends to
        // pass a meaningful oldest referenced xid for simple garbage collection), and it is the
        // responsibility of the flush to unlock the node. otherwise, we unlock it here.
        if (fe->node->height > 0 && toku_ft_nonleaf_is_gorged(fe->node, fe->h->h->nodesize)) {
-            ft_flush_some_child(fe->h, fe->node, &fa);
+            toku_ft_flush_some_child(fe->h, fe->node, &fa);
        }
        else {
            toku_unpin_ftnode_off_client_thread(fe->h,fe->node);
@ -1912,7 +1905,7 @@ static void flush_node_fun(void *fe_v)
        // bnc, which means we are tasked with flushing some
        // buffer in the node.
        // It is the responsibility of flush some child to unlock the node
-        ft_flush_some_child(fe->h, fe->node, &fa);
+        toku_ft_flush_some_child(fe->h, fe->node, &fa);
    }
    remove_background_job_from_cf(fe->h->cf);
    toku_free(fe);
@ -1923,13 +1916,13 @@ place_node_and_bnc_on_background_thread(
    FT h,
    FTNODE node,
    NONLEAF_CHILDINFO bnc,
-    TXNID oldest_referenced_xid)
+    TXNID parent_oldest_referenced_xid_known)
 {
    struct flusher_extra *XMALLOC(fe);
    fe->h = h;
    fe->node = node;
    fe->bnc = bnc;
-    fe->oldest_referenced_xid = oldest_referenced_xid;
+    fe->parent_oldest_referenced_xid_known = parent_oldest_referenced_xid_known;
    cachefile_kibbutz_enq(h->cf, flush_node_fun, fe);
 }

@ -1948,7 +1941,8 @@ place_node_and_bnc_on_background_thread(
 //
 void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
 {
-    TXNID oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
+    toku::context flush_ctx(CTX_FLUSH);
+    TXNID parent_oldest_referenced_xid_known = parent->oldest_referenced_xid_known;
    //
    // first let's see if we can detach buffer on client thread
    // and pick the child we want to flush to
@ -1965,13 +1959,13 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
        // In this case, we could not lock the child, so just place the parent on the background thread
        // In the callback, we will use toku_ft_flush_some_child, which checks to
        // see if we should blow away the old basement nodes.
-        place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known);
+        place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
    }
    else {
        //
        // successfully locked child
        //
-        bool may_child_be_reactive = may_node_be_reactive(child);
+        bool may_child_be_reactive = may_node_be_reactive(h, child);
        if (!may_child_be_reactive) {
            // We're going to unpin the parent, so before we do, we must
            // check to see if we need to blow away the basement nodes to
@ -1994,7 +1988,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
            // so, because we know for sure the child is not
            // reactive, we can unpin the parent
            //
-            place_node_and_bnc_on_background_thread(h, child, bnc, oldest_referenced_xid_known);
+            place_node_and_bnc_on_background_thread(h, child, bnc, parent_oldest_referenced_xid_known);
            toku_unpin_ftnode(h, parent);
        }
        else {
@ -2004,7 +1998,7 @@ void toku_ft_flush_node_on_background_thread(FT h, FTNODE parent)
            toku_unpin_ftnode(h, child);
            // Again, we'll have the parent on the background thread, so
            // we don't need to destroy the basement nodes yet.
-            place_node_and_bnc_on_background_thread(h, parent, NULL, oldest_referenced_xid_known);
+            place_node_and_bnc_on_background_thread(h, parent, NULL, parent_oldest_referenced_xid_known);
        }
    }
 }
--- a/storage/tokudb/ft-index/ft/ft-flusher.h
+++ b/storage/tokudb/ft-index/ft/ft-flusher.h
@ -232,6 +232,6 @@ void toku_ft_hot_get_status(FT_HOT_STATUS);
 int
 toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right,
                      int (*progress_callback)(void *extra, float progress),
-                      void *progress_extra);
+                      void *progress_extra, uint64_t* loops_run);

 #endif // End of header guardian.
--- a/storage/tokudb/ft-index/ft/ft-hot-flusher.cc
+++ b/storage/tokudb/ft-index/ft/ft-hot-flusher.cc
@ -96,6 +96,7 @@ PATENT RIGHTS GRANT:
 #include <ft.h>
 #include <portability/toku_atomic.h>
 #include <util/status.h>
+#include <util/context.h>

 // Member Descirption:
 // 1. highest_pivot_key - this is the key that corresponds to the 
@ -299,8 +300,10 @@ hot_flusher_destroy(struct hot_flusher_extra *flusher)
 int
 toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right,
                      int (*progress_callback)(void *extra, float progress),
-                      void *progress_extra)
+                      void *progress_extra, uint64_t* loops_run)
 {
+    toku::context flush_ctx(CTX_FLUSH);
+
    int r = 0;
    struct hot_flusher_extra flusher;
    struct flusher_advice advice;
@ -403,6 +406,7 @@ toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right,
        // Loop until the max key has been updated to positive
        // infinity.
    } while (!flusher.rightmost_leaf_seen);
+    *loops_run = loop_count;

    // Cleanup.
    hot_flusher_destroy(&flusher);
--- a/storage/tokudb/ft-index/ft/ft-internal.h
+++ b/storage/tokudb/ft-index/ft/ft-internal.h
@ -117,15 +117,10 @@ PATENT RIGHTS GRANT:
 #include <util/omt.h>
 #include "bndata.h"

-#ifndef FT_FANOUT
-#define FT_FANOUT 16
-#endif
-enum { TREE_FANOUT = FT_FANOUT };
 enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
-enum { FT_CMD_OVERHEAD = (2 + sizeof(MSN))     // the type plus freshness plus MSN
-};
-
-enum { FT_DEFAULT_NODE_SIZE = 1 << 22 };
+enum { FT_CMD_OVERHEAD = (2 + sizeof(MSN)) };   // the type plus freshness plus MSN
+enum { FT_DEFAULT_FANOUT = 16 };
+enum { FT_DEFAULT_NODE_SIZE = 4 * 1024 * 1024 };
 enum { FT_DEFAULT_BASEMENT_NODE_SIZE = 128 * 1024 };

 //
@ -234,16 +229,14 @@ long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
 long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
 void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, DESCRIPTOR desc, ft_compare_func cmp);
 void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
-void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID oldest_referenced_xid);
+void toku_bnc_flush_to_child(FT h, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
 bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
 bool toku_ft_nonleaf_is_gorged(FTNODE node, uint32_t nodesize);

-
-enum reactivity get_nonleaf_reactivity (FTNODE node);
-enum reactivity get_node_reactivity (FTNODE node, uint32_t nodesize);
+enum reactivity get_nonleaf_reactivity(FTNODE node, unsigned int fanout);
+enum reactivity get_node_reactivity(FT ft, FTNODE node);
 uint32_t get_leaf_num_entries(FTNODE node);

-
 // data of an available partition of a leaf ftnode
 struct ftnode_leaf_basement_node {
    bn_data data_buffer;
@ -336,7 +329,7 @@ struct ftnode {
    int    height; /* height is always >= 0.  0 for leaf, >0 for nonleaf. */
    int    dirty;
    uint32_t fullhash;
-    int n_children; //for internal nodes, if n_children==TREE_FANOUT+1 then the tree needs to be rebalanced.
+    int n_children; //for internal nodes, if n_children==fanout+1 then the tree needs to be rebalanced.
                    // for leaf nodes, represents number of basement nodes
    unsigned int    totalchildkeylens;
    DBT *childkeys;   /* Pivot keys.  Child 0's keys are <= childkeys[0].  Child 1's keys are <= childkeys[1].
@ -509,6 +502,7 @@ struct ft_header {
    unsigned int nodesize; 
    unsigned int basementnodesize;
    enum toku_compression_method compression_method;
+    unsigned int fanout;

    // Current Minimum MSN to be used when upgrading pre-MSN BRT's.
    // This is decremented from our currnt MIN_MSN so as not to clash
@ -590,6 +584,7 @@ struct ft_options {
    unsigned int nodesize;
    unsigned int basementnodesize;
    enum toku_compression_method compression_method;
+    unsigned int fanout;
    unsigned int flags;
    ft_compare_func compare_fun;
    ft_update_func update_fun;
@ -632,7 +627,7 @@ int toku_serialize_ftnode_to(int fd, BLOCKNUM, FTNODE node, FTNODE_DISK_DATA* nd
 int toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
                                    FT h, bool for_checkpoint);
 void toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized);
-int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, uint32_t fullhash, ROLLBACK_LOG_NODE *logp, FT h);
+int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE *logp, FT h);
 int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe);
 int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, struct ftnode_fetch_extra *bfe);
 int toku_deserialize_ftnode_from (int fd, BLOCKNUM off, uint32_t /*fullhash*/, FTNODE *ftnode, FTNODE_DISK_DATA* ndd, struct ftnode_fetch_extra* bfe);
@ -735,7 +730,8 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data);
 void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
 int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs);
 void toku_ftnode_pe_est_callback(void* ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs);
-int toku_ftnode_pe_callback (void *ftnode_pv, PAIR_ATTR old_attr, PAIR_ATTR* new_attr, void *extraargs);
+int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *extraargs,
+                            void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
 bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs);
 int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep);
 int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs);
@ -787,6 +783,8 @@ struct ft_cursor {
    int out_of_range_error;
    int direction;
    TOKUTXN ttxn;
+    FT_CHECK_INTERRUPT_CALLBACK interrupt_cb;
+    void *interrupt_cb_extra;
 };

 //
@ -1029,7 +1027,7 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM, enum ft_msg_type,
 void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);

 // toku_ft_root_put_cmd() accepts non-constant cmd because this is where we set the msn
-void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, TXNID oldest_referenced_xid, GC_INFO gc_info);
+void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, txn_gc_info *gc_info);

 void
 toku_get_node_for_verify(
@ -1067,6 +1065,10 @@ typedef enum {
    LE_MAX_PROVISIONAL_XR,
    LE_EXPANDED,
    LE_MAX_MEMSIZE,
+    LE_APPLY_GC_BYTES_IN,
+    LE_APPLY_GC_BYTES_OUT,
+    LE_NORMAL_GC_BYTES_IN,
+    LE_NORMAL_GC_BYTES_OUT,
    LE_STATUS_NUM_ROWS
 } le_status_entry;

@ -1101,6 +1103,9 @@ typedef enum {
    FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
    FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
    FT_DISK_FLUSH_NONLEAF_TOKUTIME_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
+    FT_DISK_FLUSH_LEAF_COMPRESSION_RATIO,      // effective compression ratio for leaf bytes flushed to disk
+    FT_DISK_FLUSH_NONLEAF_COMPRESSION_RATIO,   // effective compression ratio for nonleaf bytes flushed to disk
+    FT_DISK_FLUSH_OVERALL_COMPRESSION_RATIO,   // effective compression ratio for all bytes flushed to disk
    FT_PARTIAL_EVICTIONS_NONLEAF,              // number of nonleaf node partial evictions
    FT_PARTIAL_EVICTIONS_NONLEAF_BYTES,        // number of nonleaf node partial evictions
    FT_PARTIAL_EVICTIONS_LEAF,                 // number of leaf node partial evictions
@ -1196,8 +1201,7 @@ toku_ft_bn_apply_cmd_once (
    const FT_MSG cmd,
    uint32_t idx,
    LEAFENTRY le,
-    TXNID oldest_referenced_xid,
-    GC_INFO gc_info,
+    txn_gc_info *gc_info,
    uint64_t *workdonep,
    STAT64INFO stats_to_update
    );
@ -1209,8 +1213,7 @@ toku_ft_bn_apply_cmd (
    DESCRIPTOR desc,
    BASEMENTNODE bn,
    FT_MSG cmd,
-    TXNID oldest_referenced_xid,
-    GC_INFO gc_info,
+    txn_gc_info *gc_info,
    uint64_t *workdone,
    STAT64INFO stats_to_update
    );
@ -1223,7 +1226,7 @@ toku_ft_leaf_apply_cmd (
    FTNODE node,
    int target_childnum,
    FT_MSG cmd,
-    GC_INFO gc_info,
+    txn_gc_info *gc_info,
    uint64_t *workdone,
    STAT64INFO stats_to_update
    );
@ -1237,7 +1240,7 @@ toku_ft_node_put_cmd (
    int target_childnum,
    FT_MSG cmd,
    bool is_fresh,
-    GC_INFO gc_info,
+    txn_gc_info *gc_info,
    size_t flow_deltas[],
    STAT64INFO stats_to_update
    );
--- a/storage/tokudb/ft-index/ft/ft-ops.cc
+++ b/storage/tokudb/ft-index/ft/ft-ops.cc
--- a/storage/tokudb/ft-index/ft/ft-ops.h
+++ b/storage/tokudb/ft-index/ft/ft-ops.h
@ -114,6 +114,8 @@ PATENT RIGHTS GRANT:
 // When lock_only is true, the callback only does optional lock tree locking.
 typedef int(*FT_GET_CALLBACK_FUNCTION)(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only);

+typedef bool(*FT_CHECK_INTERRUPT_CALLBACK)(void* extra);
+
 int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));

 // effect: changes the descriptor for the ft of the given handle.
@ -135,6 +137,8 @@ void toku_ft_handle_set_basementnodesize(FT_HANDLE, unsigned int basementnodesiz
 void toku_ft_handle_get_basementnodesize(FT_HANDLE, unsigned int *basementnodesize);
 void toku_ft_handle_set_compression_method(FT_HANDLE, enum toku_compression_method);
 void toku_ft_handle_get_compression_method(FT_HANDLE, enum toku_compression_method *);
+void toku_ft_handle_set_fanout(FT_HANDLE, unsigned int fanout);
+void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout);

 void toku_ft_set_bt_compare(FT_HANDLE, ft_compare_func);
 ft_compare_func toku_ft_get_bt_compare (FT_HANDLE brt);
@ -239,9 +243,12 @@ void toku_ft_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn);
 // Effect: Delete a key from a brt if the oplsn is newer than the brt lsn.  This function is called during recovery.
 void toku_ft_maybe_delete (FT_HANDLE brt, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);

-void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, TXNID oldest_referenced_xid, GC_INFO gc_info);
-void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xid, GC_INFO gc_info);
-void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, TXNID oldest_referenced_xids, GC_INFO gc_info);
+TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
+TXN_MANAGER toku_ft_get_txn_manager(FT_HANDLE ft_h);
+
+void toku_ft_send_insert(FT_HANDLE brt, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
+void toku_ft_send_delete(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);
+void toku_ft_send_commit_any(FT_HANDLE brt, DBT *key, XIDS xids, txn_gc_info *gc_info);

 int toku_close_ft_handle_nolsn (FT_HANDLE, char **error_string)  __attribute__ ((warn_unused_result));

@ -258,6 +265,7 @@ void toku_ft_cursor_set_leaf_mode(FT_CURSOR);
 // the cursor duing a one query.
 void toku_ft_cursor_set_temporary(FT_CURSOR);
 void toku_ft_cursor_remove_restriction(FT_CURSOR);
+void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR ftcursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra);
 int toku_ft_cursor_is_leaf_mode(FT_CURSOR);
 void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int);

@ -346,5 +354,8 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen);

 extern bool garbage_collection_debug;

+// This is a poor place to put global options like these.
 void toku_ft_set_direct_io(bool direct_io_on);
+void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers);
+
 #endif
--- a/storage/tokudb/ft-index/ft/ft-serialize.cc
+++ b/storage/tokudb/ft-index/ft/ft-serialize.cc
@ -404,6 +404,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
            .nodesize = nodesize,
            .basementnodesize = basementnodesize,
            .compression_method = compression_method,
+            .fanout = FT_DEFAULT_FANOUT, // fanout is not serialized, must be set at startup
            .highest_unused_msn_for_upgrade = highest_unused_msn_for_upgrade,
            .max_msn_in_ft = max_msn_in_ft,
            .time_of_last_optimize_begin = time_of_last_optimize_begin,
@ -461,6 +462,7 @@ serialize_ft_min_size (uint32_t version) {
    size_t size = 0;

    switch(version) {
+    case FT_LAYOUT_VERSION_25:
    case FT_LAYOUT_VERSION_24:
    case FT_LAYOUT_VERSION_23:
    case FT_LAYOUT_VERSION_22:
--- a/Show More
+++ b/Show More