Merge branch '10.0' of github.com:MariaDB/server into 10.0
This commit is contained in:
commit
23af6f5942
1
CREDITS
1
CREDITS
@ -10,6 +10,7 @@ Visma http://visma.com (2015 - 2016)
|
|||||||
Acronis http://acronis.com (2016)
|
Acronis http://acronis.com (2016)
|
||||||
Nexedi https://www.nexedi.com (2016)
|
Nexedi https://www.nexedi.com (2016)
|
||||||
Automattic https://automattic.com (2014 - 2016)
|
Automattic https://automattic.com (2014 - 2016)
|
||||||
|
Tencent Game DBA http://tencentdba.com/about (2016)
|
||||||
Verkkokauppa.com https://www.verkkokauppa.com (2015 - 2016)
|
Verkkokauppa.com https://www.verkkokauppa.com (2015 - 2016)
|
||||||
Virtuozzo https://virtuozzo.com (2016)
|
Virtuozzo https://virtuozzo.com (2016)
|
||||||
|
|
||||||
|
2
VERSION
2
VERSION
@ -1,3 +1,3 @@
|
|||||||
MYSQL_VERSION_MAJOR=10
|
MYSQL_VERSION_MAJOR=10
|
||||||
MYSQL_VERSION_MINOR=0
|
MYSQL_VERSION_MINOR=0
|
||||||
MYSQL_VERSION_PATCH=27
|
MYSQL_VERSION_PATCH=28
|
||||||
|
@ -220,6 +220,9 @@ SETA(CPACK_RPM_test_PACKAGE_PROVIDES
|
|||||||
"perl(mtr_io.pl)"
|
"perl(mtr_io.pl)"
|
||||||
"perl(mtr_match)"
|
"perl(mtr_match)"
|
||||||
"perl(mtr_misc.pl)"
|
"perl(mtr_misc.pl)"
|
||||||
|
"perl(mtr_gcov.pl)"
|
||||||
|
"perl(mtr_gprof.pl)"
|
||||||
|
"perl(mtr_process.pl)"
|
||||||
"perl(mtr_report)"
|
"perl(mtr_report)"
|
||||||
"perl(mtr_results)"
|
"perl(mtr_results)"
|
||||||
"perl(mtr_unique)")
|
"perl(mtr_unique)")
|
||||||
|
@ -882,8 +882,7 @@ typedef long long my_ptrdiff_t;
|
|||||||
and related routines are refactored.
|
and related routines are refactored.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define my_offsetof(TYPE, MEMBER) \
|
#define my_offsetof(TYPE, MEMBER) PTR_BYTE_DIFF(&((TYPE *)0x10)->MEMBER, 0x10)
|
||||||
((size_t)((char *)&(((TYPE *)0x10)->MEMBER) - (char*)0x10))
|
|
||||||
|
|
||||||
#define NullS (char *) 0
|
#define NullS (char *) 0
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
|
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
|
||||||
Copyright (c) 2010, 2013, Monty Program Ab.
|
Copyright (c) 2010, 2016, Monty Program Ab.
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -271,7 +271,7 @@ extern my_bool my_use_symdir;
|
|||||||
extern ulong my_default_record_cache_size;
|
extern ulong my_default_record_cache_size;
|
||||||
extern my_bool my_disable_locking, my_disable_async_io,
|
extern my_bool my_disable_locking, my_disable_async_io,
|
||||||
my_disable_flush_key_blocks, my_disable_symlinks;
|
my_disable_flush_key_blocks, my_disable_symlinks;
|
||||||
extern my_bool my_disable_sync;
|
extern my_bool my_disable_sync, my_disable_copystat_in_redel;
|
||||||
extern char wild_many,wild_one,wild_prefix;
|
extern char wild_many,wild_one,wild_prefix;
|
||||||
extern const char *charsets_dir;
|
extern const char *charsets_dir;
|
||||||
extern my_bool timed_mutexes;
|
extern my_bool timed_mutexes;
|
||||||
|
@ -52,7 +52,7 @@ eval SELECT 'hello' INTO OUTFILE 'fake_file.$prefix';
|
|||||||
|
|
||||||
# Use '/' instead of '\' in the error message. On windows platform, dir is
|
# Use '/' instead of '\' in the error message. On windows platform, dir is
|
||||||
# formed with '\'.
|
# formed with '\'.
|
||||||
--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /File exists/Directory not empty/
|
--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /247/39/ /File exists/Directory not empty/
|
||||||
--error 1010
|
--error 1010
|
||||||
DROP DATABASE testing_1;
|
DROP DATABASE testing_1;
|
||||||
let $wait_binlog_event= DROP TABLE IF EXIST;
|
let $wait_binlog_event= DROP TABLE IF EXIST;
|
||||||
|
@ -341,6 +341,7 @@ while ($1)
|
|||||||
alter table t1 add index i2(key2);
|
alter table t1 add index i2(key2);
|
||||||
alter table t1 add index i3(key3);
|
alter table t1 add index i3(key3);
|
||||||
update t1 set key2=key1,key3=key1;
|
update t1 set key2=key1,key3=key1;
|
||||||
|
analyze table t1;
|
||||||
|
|
||||||
# to test the bug, the following must use "sort_union":
|
# to test the bug, the following must use "sort_union":
|
||||||
--replace_column 9 REF
|
--replace_column 9 REF
|
||||||
|
@ -260,12 +260,8 @@ sub show {
|
|||||||
|
|
||||||
# On Windows, rely on cdb to be there...
|
# On Windows, rely on cdb to be there...
|
||||||
if (IS_WINDOWS)
|
if (IS_WINDOWS)
|
||||||
{
|
|
||||||
# Starting cdb is unsafe when used with --parallel > 1 option
|
|
||||||
if ( $parallel < 2 )
|
|
||||||
{
|
{
|
||||||
_cdb($core_name);
|
_cdb($core_name);
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,8 +60,6 @@ use My::Test;
|
|||||||
use My::Find;
|
use My::Find;
|
||||||
use My::Suite;
|
use My::Suite;
|
||||||
|
|
||||||
require "mtr_misc.pl";
|
|
||||||
|
|
||||||
# locate plugin suites, depending on whether it's a build tree or installed
|
# locate plugin suites, depending on whether it's a build tree or installed
|
||||||
my @plugin_suitedirs;
|
my @plugin_suitedirs;
|
||||||
my $plugin_suitedir_regex;
|
my $plugin_suitedir_regex;
|
||||||
@ -1122,7 +1120,7 @@ sub get_tags_from_file($$) {
|
|||||||
$file_to_tags{$file}= $tags;
|
$file_to_tags{$file}= $tags;
|
||||||
$file_to_master_opts{$file}= $master_opts;
|
$file_to_master_opts{$file}= $master_opts;
|
||||||
$file_to_slave_opts{$file}= $slave_opts;
|
$file_to_slave_opts{$file}= $slave_opts;
|
||||||
$file_combinations{$file}= [ uniq(@combinations) ];
|
$file_combinations{$file}= [ ::uniq(@combinations) ];
|
||||||
$file_in_overlay{$file} = 1 if $in_overlay;
|
$file_in_overlay{$file} = 1 if $in_overlay;
|
||||||
return @{$tags};
|
return @{$tags};
|
||||||
}
|
}
|
||||||
|
@ -34,7 +34,6 @@ use mtr_match;
|
|||||||
use My::Platform;
|
use My::Platform;
|
||||||
use POSIX qw[ _exit ];
|
use POSIX qw[ _exit ];
|
||||||
use IO::Handle qw[ flush ];
|
use IO::Handle qw[ flush ];
|
||||||
require "mtr_io.pl";
|
|
||||||
use mtr_results;
|
use mtr_results;
|
||||||
|
|
||||||
my $tot_real_time= 0;
|
my $tot_real_time= 0;
|
||||||
@ -92,7 +91,7 @@ sub mtr_report_test_passed ($) {
|
|||||||
my $timer_str= "";
|
my $timer_str= "";
|
||||||
if ( $timer and -f "$::opt_vardir/log/timer" )
|
if ( $timer and -f "$::opt_vardir/log/timer" )
|
||||||
{
|
{
|
||||||
$timer_str= mtr_fromfile("$::opt_vardir/log/timer");
|
$timer_str= ::mtr_fromfile("$::opt_vardir/log/timer");
|
||||||
$tinfo->{timer}= $timer_str;
|
$tinfo->{timer}= $timer_str;
|
||||||
resfile_test_info('duration', $timer_str) if $::opt_resfile;
|
resfile_test_info('duration', $timer_str) if $::opt_resfile;
|
||||||
}
|
}
|
||||||
|
@ -102,11 +102,11 @@ use mtr_results;
|
|||||||
use IO::Socket::INET;
|
use IO::Socket::INET;
|
||||||
use IO::Select;
|
use IO::Select;
|
||||||
|
|
||||||
require "lib/mtr_process.pl";
|
require "mtr_process.pl";
|
||||||
require "lib/mtr_io.pl";
|
require "mtr_io.pl";
|
||||||
require "lib/mtr_gcov.pl";
|
require "mtr_gcov.pl";
|
||||||
require "lib/mtr_gprof.pl";
|
require "mtr_gprof.pl";
|
||||||
require "lib/mtr_misc.pl";
|
require "mtr_misc.pl";
|
||||||
|
|
||||||
$SIG{INT}= sub { mtr_error("Got ^C signal"); };
|
$SIG{INT}= sub { mtr_error("Got ^C signal"); };
|
||||||
$SIG{HUP}= sub { mtr_error("Hangup detected on controlling terminal"); };
|
$SIG{HUP}= sub { mtr_error("Hangup detected on controlling terminal"); };
|
||||||
|
@ -9,6 +9,7 @@ Acronis http://www.acronis.com Silver Sponsor of the MariaDB Foundation
|
|||||||
Auttomattic https://automattic.com Bronze Sponsor of the MariaDB Foundation
|
Auttomattic https://automattic.com Bronze Sponsor of the MariaDB Foundation
|
||||||
Verkkokauppa.com https://virtuozzo.com Bronze Sponsor of the MariaDB Foundation
|
Verkkokauppa.com https://virtuozzo.com Bronze Sponsor of the MariaDB Foundation
|
||||||
Virtuozzo https://virtuozzo.com/ Bronze Sponsor of the MariaDB Foundation
|
Virtuozzo https://virtuozzo.com/ Bronze Sponsor of the MariaDB Foundation
|
||||||
|
Tencent Game DBA http://tencentdba.com/about/ Bronze Sponsor of the MariaDB Foundation
|
||||||
Google USA Sponsoring encryption, parallel replication and GTID
|
Google USA Sponsoring encryption, parallel replication and GTID
|
||||||
Facebook USA Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc
|
Facebook USA Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc
|
||||||
Ronald Bradford Brisbane, Australia EFF contribution for UC2006 Auction
|
Ronald Bradford Brisbane, Australia EFF contribution for UC2006 Auction
|
||||||
|
@ -1658,6 +1658,9 @@ CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061))
|
|||||||
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
|
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
|
||||||
CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061))
|
CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061))
|
||||||
1
|
1
|
||||||
|
select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
|
||||||
|
c
|
||||||
|
FFFF0000
|
||||||
#
|
#
|
||||||
# End of 5.5 tests
|
# End of 5.5 tests
|
||||||
#
|
#
|
||||||
|
@ -286,3 +286,19 @@ F 28 28
|
|||||||
F 29 29
|
F 29 29
|
||||||
F 30 30
|
F 30 30
|
||||||
DROP TABLE t0,t1,t2;
|
DROP TABLE t0,t1,t2;
|
||||||
|
#
|
||||||
|
# MDEV-MariaDB daemon leaks memory with specific query
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
|
||||||
|
`language_id` int(11) unsigned NOT NULL DEFAULT '1'
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||||
|
CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
|
||||||
|
`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||||
|
insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
|
||||||
|
SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
|
||||||
|
translation_resources serialized_c
|
||||||
|
NULL cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
|
||||||
|
NULL bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
|
||||||
|
NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||||
|
NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||||
|
drop table t1,t2;
|
||||||
|
@ -311,6 +311,9 @@ set @d=@d*2;
|
|||||||
alter table t1 add index i2(key2);
|
alter table t1 add index i2(key2);
|
||||||
alter table t1 add index i3(key3);
|
alter table t1 add index i3(key3);
|
||||||
update t1 set key2=key1,key3=key1;
|
update t1 set key2=key1,key3=key1;
|
||||||
|
analyze table t1;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t1 analyze status OK
|
||||||
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
|
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
|
||||||
id select_type table type possible_keys key key_len ref rows Extra
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
|
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
|
||||||
|
@ -1146,6 +1146,9 @@ set @d=@d*2;
|
|||||||
alter table t1 add index i2(key2);
|
alter table t1 add index i2(key2);
|
||||||
alter table t1 add index i3(key3);
|
alter table t1 add index i3(key3);
|
||||||
update t1 set key2=key1,key3=key1;
|
update t1 set key2=key1,key3=key1;
|
||||||
|
analyze table t1;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t1 analyze status OK
|
||||||
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
|
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
|
||||||
id select_type table type possible_keys key key_len ref rows Extra
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
|
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
|
||||||
|
@ -3832,6 +3832,23 @@ test.m1 repair error Corrupt
|
|||||||
# Clean-up.
|
# Clean-up.
|
||||||
drop tables m1, t1, t4;
|
drop tables m1, t1, t4;
|
||||||
drop view t3;
|
drop view t3;
|
||||||
|
#
|
||||||
|
# MDEV-10424 - Assertion `ticket == __null' failed in
|
||||||
|
# MDL_request::set_type
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
|
||||||
|
CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
|
||||||
|
PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
|
||||||
|
EXECUTE stmt;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.tmerge analyze note The storage engine for the table doesn't support analyze
|
||||||
|
test.t1 analyze status Table is already up to date
|
||||||
|
EXECUTE stmt;
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.tmerge analyze note The storage engine for the table doesn't support analyze
|
||||||
|
test.t1 analyze status Table is already up to date
|
||||||
|
DEALLOCATE PREPARE stmt;
|
||||||
|
DROP TABLE t1, tmerge;
|
||||||
End of 5.5 tests
|
End of 5.5 tests
|
||||||
#
|
#
|
||||||
# Additional coverage for refactoring which is made as part
|
# Additional coverage for refactoring which is made as part
|
||||||
|
@ -4076,4 +4076,35 @@ id value
|
|||||||
deallocate prepare stmt;
|
deallocate prepare stmt;
|
||||||
SET SESSION sql_mode = @save_sql_mode;
|
SET SESSION sql_mode = @save_sql_mode;
|
||||||
DROP TABLE t1,t2;
|
DROP TABLE t1,t2;
|
||||||
# End of 10.0 tests
|
#
|
||||||
|
# MDEV-8833: Crash of server on prepared statement with
|
||||||
|
# conversion to semi-join
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (column1 INT);
|
||||||
|
INSERT INTO t1 VALUES (3),(9);
|
||||||
|
CREATE TABLE t2 (column2 INT);
|
||||||
|
INSERT INTO t2 VALUES (1),(4);
|
||||||
|
CREATE TABLE t3 (column3 INT);
|
||||||
|
INSERT INTO t3 VALUES (6),(8);
|
||||||
|
CREATE TABLE t4 (column4 INT);
|
||||||
|
INSERT INTO t4 VALUES (2),(5);
|
||||||
|
PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1
|
||||||
|
FROM t1 AS table1
|
||||||
|
WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 )
|
||||||
|
) AS sq
|
||||||
|
FROM t3 AS table3, t4 AS table4";
|
||||||
|
EXECUTE stmt;
|
||||||
|
sq
|
||||||
|
NULL
|
||||||
|
NULL
|
||||||
|
NULL
|
||||||
|
NULL
|
||||||
|
EXECUTE stmt;
|
||||||
|
sq
|
||||||
|
NULL
|
||||||
|
NULL
|
||||||
|
NULL
|
||||||
|
NULL
|
||||||
|
deallocate prepare stmt;
|
||||||
|
drop table t1,t2,t3,t4;
|
||||||
|
# End of 5.5 tests
|
||||||
|
@ -14,6 +14,25 @@ this
|
|||||||
0
|
0
|
||||||
4294967295
|
4294967295
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
create table t1 (a bigint unsigned, b mediumint unsigned);
|
||||||
|
insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
|
||||||
|
select coalesce(a,b), coalesce(b,a) from t1;
|
||||||
|
coalesce(a,b) coalesce(b,a)
|
||||||
|
1 2
|
||||||
|
18446744073709551615 16777215
|
||||||
|
create table t2 as select a from t1 union select b from t1;
|
||||||
|
show create table t2;
|
||||||
|
Table Create Table
|
||||||
|
t2 CREATE TABLE `t2` (
|
||||||
|
`a` bigint(20) unsigned DEFAULT NULL
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||||
|
select * from t2;
|
||||||
|
a
|
||||||
|
1
|
||||||
|
18446744073709551615
|
||||||
|
2
|
||||||
|
16777215
|
||||||
|
drop table t1, t2;
|
||||||
#
|
#
|
||||||
# Start of 10.0 tests
|
# Start of 10.0 tests
|
||||||
#
|
#
|
||||||
|
@ -6,7 +6,8 @@ table_54044 CREATE TEMPORARY TABLE `table_54044` (
|
|||||||
`IF(NULL IS NOT NULL, NULL, NULL)` binary(0) DEFAULT NULL
|
`IF(NULL IS NOT NULL, NULL, NULL)` binary(0) DEFAULT NULL
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=latin1
|
) ENGINE=InnoDB DEFAULT CHARSET=latin1
|
||||||
DROP TABLE table_54044;
|
DROP TABLE table_54044;
|
||||||
CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
|
CREATE TABLE tmp ENGINE = INNODB
|
||||||
|
AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
|
||||||
SHOW CREATE TABLE tmp;
|
SHOW CREATE TABLE tmp;
|
||||||
Table Create Table
|
Table Create Table
|
||||||
tmp CREATE TABLE `tmp` (
|
tmp CREATE TABLE `tmp` (
|
||||||
|
8
mysql-test/suite/innodb/r/system_tables.result
Normal file
8
mysql-test/suite/innodb/r/system_tables.result
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
alter table mysql.time_zone_name engine=InnoDB;
|
||||||
|
create table envois3 (starttime datetime) engine=InnoDB;
|
||||||
|
insert envois3 values ('2008-08-11 22:43:00');
|
||||||
|
select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
|
||||||
|
starttime
|
||||||
|
2008-08-12 02:43:00
|
||||||
|
drop table envois3;
|
||||||
|
alter table mysql.time_zone_name engine=MyISAM;
|
@ -10,7 +10,10 @@ CREATE TEMPORARY TABLE table_54044 ENGINE = INNODB
|
|||||||
SHOW CREATE TABLE table_54044;
|
SHOW CREATE TABLE table_54044;
|
||||||
DROP TABLE table_54044;
|
DROP TABLE table_54044;
|
||||||
|
|
||||||
CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
|
# This 'create table' should pass since it uses a Field_string of size 0.
|
||||||
|
|
||||||
|
CREATE TABLE tmp ENGINE = INNODB
|
||||||
|
AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
|
||||||
SHOW CREATE TABLE tmp;
|
SHOW CREATE TABLE tmp;
|
||||||
DROP TABLE tmp;
|
DROP TABLE tmp;
|
||||||
|
|
||||||
@ -23,4 +26,3 @@ FLUSH TABLES;
|
|||||||
--error 1005
|
--error 1005
|
||||||
CREATE TEMPORARY TABLE tmp ENGINE=InnoDB AS SELECT VALUES(a) FROM t1;
|
CREATE TEMPORARY TABLE tmp ENGINE=InnoDB AS SELECT VALUES(a) FROM t1;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
12
mysql-test/suite/innodb/t/system_tables.test
Normal file
12
mysql-test/suite/innodb/t/system_tables.test
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
--source include/have_innodb.inc
|
||||||
|
|
||||||
|
#
|
||||||
|
# MDEV-10775 System table in InnoDB format allowed in MariaDB could lead to crash
|
||||||
|
#
|
||||||
|
alter table mysql.time_zone_name engine=InnoDB;
|
||||||
|
create table envois3 (starttime datetime) engine=InnoDB;
|
||||||
|
insert envois3 values ('2008-08-11 22:43:00');
|
||||||
|
--source include/restart_mysqld.inc
|
||||||
|
select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
|
||||||
|
drop table envois3;
|
||||||
|
alter table mysql.time_zone_name engine=MyISAM;
|
@ -1,121 +0,0 @@
|
|||||||
"General cleanup"
|
|
||||||
set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
|
|
||||||
set @@global.aria_checkpoint_interval= 0;
|
|
||||||
drop table if exists t1;
|
|
||||||
update performance_schema.setup_instruments set enabled = 'NO';
|
|
||||||
update performance_schema.setup_consumers set enabled = 'NO';
|
|
||||||
truncate table performance_schema.file_summary_by_event_name;
|
|
||||||
truncate table performance_schema.file_summary_by_instance;
|
|
||||||
truncate table performance_schema.socket_summary_by_event_name;
|
|
||||||
truncate table performance_schema.socket_summary_by_instance;
|
|
||||||
truncate table performance_schema.events_waits_summary_global_by_event_name;
|
|
||||||
truncate table performance_schema.events_waits_summary_by_instance;
|
|
||||||
truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
|
|
||||||
update performance_schema.setup_consumers set enabled = 'YES';
|
|
||||||
update performance_schema.setup_instruments
|
|
||||||
set enabled = 'YES', timed = 'YES';
|
|
||||||
create table t1 (
|
|
||||||
id INT PRIMARY KEY,
|
|
||||||
b CHAR(100) DEFAULT 'initial value')
|
|
||||||
ENGINE=MyISAM;
|
|
||||||
insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
|
|
||||||
update performance_schema.setup_instruments SET enabled = 'NO';
|
|
||||||
update performance_schema.setup_consumers set enabled = 'NO';
|
|
||||||
set @dump_all=FALSE;
|
|
||||||
"Verifying file aggregate consistency"
|
|
||||||
SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
|
|
||||||
FROM performance_schema.file_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME COUNT_READ SUM(i.COUNT_READ)
|
|
||||||
SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
|
|
||||||
FROM performance_schema.file_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE)
|
|
||||||
SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
|
|
||||||
FROM performance_schema.socket_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME COUNT_READ SUM(i.COUNT_READ)
|
|
||||||
SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
|
|
||||||
FROM performance_schema.socket_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE)
|
|
||||||
SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
|
|
||||||
FROM performance_schema.file_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME SUM_NUMBER_OF_BYTES_READ SUM(i.SUM_NUMBER_OF_BYTES_READ)
|
|
||||||
SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
|
|
||||||
FROM performance_schema.file_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME SUM_NUMBER_OF_BYTES_WRITE SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
|
|
||||||
"Verifying waits aggregate consistency (instance)"
|
|
||||||
SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME SUM_TIMER_WAIT SUM(i.SUM_TIMER_WAIT)
|
|
||||||
SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
|
|
||||||
AND (MIN(i.MIN_TIMER_WAIT) != 0)
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME MIN_TIMER_WAIT MIN(i.MIN_TIMER_WAIT)
|
|
||||||
SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME MAX_TIMER_WAIT MAX(i.MAX_TIMER_WAIT)
|
|
||||||
"Verifying waits aggregate consistency (thread)"
|
|
||||||
SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
|
||||||
USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME SUM_TIMER_WAIT SUM(t.SUM_TIMER_WAIT)
|
|
||||||
SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
|
||||||
USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
|
|
||||||
AND (MIN(t.MIN_TIMER_WAIT) != 0)
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME MIN_TIMER_WAIT MIN(t.MIN_TIMER_WAIT)
|
|
||||||
SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
|
||||||
USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
|
|
||||||
OR @dump_all;
|
|
||||||
EVENT_NAME MAX_TIMER_WAIT MAX(t.MAX_TIMER_WAIT)
|
|
||||||
update performance_schema.setup_consumers set enabled = 'YES';
|
|
||||||
update performance_schema.setup_instruments
|
|
||||||
set enabled = 'YES', timed = 'YES';
|
|
||||||
drop table test.t1;
|
|
||||||
set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
|
|
@ -1,197 +0,0 @@
|
|||||||
# Tests for PERFORMANCE_SCHEMA
|
|
||||||
# Verify that statistics aggregated by different criteria are consistent.
|
|
||||||
|
|
||||||
--source include/not_embedded.inc
|
|
||||||
--source include/have_perfschema.inc
|
|
||||||
|
|
||||||
--echo "General cleanup"
|
|
||||||
|
|
||||||
# MDEV-7187 - test fails sporadically in buildbot
|
|
||||||
set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
|
|
||||||
set @@global.aria_checkpoint_interval= 0;
|
|
||||||
|
|
||||||
--disable_warnings
|
|
||||||
drop table if exists t1;
|
|
||||||
--enable_warnings
|
|
||||||
|
|
||||||
update performance_schema.setup_instruments set enabled = 'NO';
|
|
||||||
update performance_schema.setup_consumers set enabled = 'NO';
|
|
||||||
|
|
||||||
# Cleanup statistics
|
|
||||||
truncate table performance_schema.file_summary_by_event_name;
|
|
||||||
truncate table performance_schema.file_summary_by_instance;
|
|
||||||
truncate table performance_schema.socket_summary_by_event_name;
|
|
||||||
truncate table performance_schema.socket_summary_by_instance;
|
|
||||||
truncate table performance_schema.events_waits_summary_global_by_event_name;
|
|
||||||
truncate table performance_schema.events_waits_summary_by_instance;
|
|
||||||
truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
|
|
||||||
|
|
||||||
# Start recording data
|
|
||||||
update performance_schema.setup_consumers set enabled = 'YES';
|
|
||||||
update performance_schema.setup_instruments
|
|
||||||
set enabled = 'YES', timed = 'YES';
|
|
||||||
|
|
||||||
|
|
||||||
create table t1 (
|
|
||||||
id INT PRIMARY KEY,
|
|
||||||
b CHAR(100) DEFAULT 'initial value')
|
|
||||||
ENGINE=MyISAM;
|
|
||||||
|
|
||||||
insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
|
|
||||||
|
|
||||||
# Stop recording data, so the select below don't add noise.
|
|
||||||
update performance_schema.setup_instruments SET enabled = 'NO';
|
|
||||||
# Disable all consumers, for long standing waits
|
|
||||||
update performance_schema.setup_consumers set enabled = 'NO';
|
|
||||||
|
|
||||||
# Helper to debug
|
|
||||||
set @dump_all=FALSE;
|
|
||||||
|
|
||||||
# Note that in general:
|
|
||||||
# - COUNT/SUM/MAX(file_summary_by_event_name) >=
|
|
||||||
# COUNT/SUM/MAX(file_summary_by_instance).
|
|
||||||
# - MIN(file_summary_by_event_name) <=
|
|
||||||
# MIN(file_summary_by_instance).
|
|
||||||
# There will be equality only when file instances are not removed,
|
|
||||||
# aka when a file is not deleted from the file system,
|
|
||||||
# because doing so removes a row in file_summary_by_instance.
|
|
||||||
|
|
||||||
# Likewise:
|
|
||||||
# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
|
|
||||||
# COUNT/SUM/MAX(events_waits_summary_by_instance)
|
|
||||||
# - MIN(events_waits_summary_global_by_event_name) <=
|
|
||||||
# MIN(events_waits_summary_by_instance)
|
|
||||||
# There will be equality only when an instrument instance
|
|
||||||
# is not removed, which is next to impossible to predictably guarantee
|
|
||||||
# in the server.
|
|
||||||
# For example, a MyISAM table removed from the table cache
|
|
||||||
# will cause a mysql_mutex_destroy on myisam/MYISAM_SHARE::intern_lock.
|
|
||||||
# Another example, a thread terminating will cause a mysql_mutex_destroy
|
|
||||||
# on sql/LOCK_delete
|
|
||||||
# Both cause a row to be deleted from events_waits_summary_by_instance.
|
|
||||||
|
|
||||||
# Likewise:
|
|
||||||
# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
|
|
||||||
# COUNT/SUM/MAX(events_waits_summary_by_thread_by_event_name)
|
|
||||||
# - MIN(events_waits_summary_global_by_event_name) <=
|
|
||||||
# MIN(events_waits_summary_by_thread_by_event_name)
|
|
||||||
# There will be equality only when no thread is removed,
|
|
||||||
# that is if no thread disconnects, or no sub thread (for example insert
|
|
||||||
# delayed) ever completes.
|
|
||||||
# A thread completing will cause rows in
|
|
||||||
# events_waits_summary_by_thread_by_event_name to be removed.
|
|
||||||
|
|
||||||
--echo "Verifying file aggregate consistency"
|
|
||||||
|
|
||||||
# Since the code generating the load in this test does:
|
|
||||||
# - create table
|
|
||||||
# - insert
|
|
||||||
# - does not cause temporary tables to be used
|
|
||||||
# we can test for equality here for file aggregates.
|
|
||||||
|
|
||||||
# If any of these queries returns data, the test failed.
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
|
|
||||||
FROM performance_schema.file_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
|
|
||||||
FROM performance_schema.file_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
|
|
||||||
FROM performance_schema.socket_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
|
|
||||||
FROM performance_schema.socket_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
|
|
||||||
FROM performance_schema.file_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
|
|
||||||
FROM performance_schema.file_summary_by_event_name AS e
|
|
||||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
--echo "Verifying waits aggregate consistency (instance)"
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
|
|
||||||
AND (MIN(i.MIN_TIMER_WAIT) != 0)
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
--echo "Verifying waits aggregate consistency (thread)"
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
|
||||||
USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
|
||||||
USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
|
|
||||||
AND (MIN(t.MIN_TIMER_WAIT) != 0)
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
|
|
||||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
|
||||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
|
||||||
USING (EVENT_NAME)
|
|
||||||
GROUP BY EVENT_NAME
|
|
||||||
HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
|
|
||||||
OR @dump_all;
|
|
||||||
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
|
|
||||||
update performance_schema.setup_consumers set enabled = 'YES';
|
|
||||||
update performance_schema.setup_instruments
|
|
||||||
set enabled = 'YES', timed = 'YES';
|
|
||||||
|
|
||||||
drop table test.t1;
|
|
||||||
|
|
||||||
set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
|
|
||||||
|
|
@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF
|
|||||||
server_audit_file_rotate_size 1000000
|
server_audit_file_rotate_size 1000000
|
||||||
server_audit_file_rotations 9
|
server_audit_file_rotations 9
|
||||||
server_audit_incl_users
|
server_audit_incl_users
|
||||||
server_audit_loc_info
|
|
||||||
server_audit_logging OFF
|
server_audit_logging OFF
|
||||||
server_audit_mode 0
|
server_audit_mode 0
|
||||||
server_audit_output_type file
|
server_audit_output_type file
|
||||||
@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF
|
|||||||
server_audit_file_rotate_size 1000000
|
server_audit_file_rotate_size 1000000
|
||||||
server_audit_file_rotations 9
|
server_audit_file_rotations 9
|
||||||
server_audit_incl_users odin, root, dva, tri
|
server_audit_incl_users odin, root, dva, tri
|
||||||
server_audit_loc_info
|
|
||||||
server_audit_logging ON
|
server_audit_logging ON
|
||||||
server_audit_mode 0
|
server_audit_mode 0
|
||||||
server_audit_output_type file
|
server_audit_output_type file
|
||||||
@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF
|
|||||||
server_audit_file_rotate_size 1000000
|
server_audit_file_rotate_size 1000000
|
||||||
server_audit_file_rotations 9
|
server_audit_file_rotations 9
|
||||||
server_audit_incl_users odin, root, dva, tri
|
server_audit_incl_users odin, root, dva, tri
|
||||||
server_audit_loc_info
|
|
||||||
server_audit_logging ON
|
server_audit_logging ON
|
||||||
server_audit_mode 1
|
server_audit_mode 1
|
||||||
server_audit_output_type file
|
server_audit_output_type file
|
||||||
|
@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF
|
|||||||
server_audit_file_rotate_size 1000000
|
server_audit_file_rotate_size 1000000
|
||||||
server_audit_file_rotations 9
|
server_audit_file_rotations 9
|
||||||
server_audit_incl_users
|
server_audit_incl_users
|
||||||
server_audit_loc_info
|
|
||||||
server_audit_logging OFF
|
server_audit_logging OFF
|
||||||
server_audit_mode 0
|
server_audit_mode 0
|
||||||
server_audit_output_type file
|
server_audit_output_type file
|
||||||
@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF
|
|||||||
server_audit_file_rotate_size 1000000
|
server_audit_file_rotate_size 1000000
|
||||||
server_audit_file_rotations 9
|
server_audit_file_rotations 9
|
||||||
server_audit_incl_users odin, root, dva, tri
|
server_audit_incl_users odin, root, dva, tri
|
||||||
server_audit_loc_info
|
|
||||||
server_audit_logging ON
|
server_audit_logging ON
|
||||||
server_audit_mode 0
|
server_audit_mode 0
|
||||||
server_audit_output_type file
|
server_audit_output_type file
|
||||||
@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF
|
|||||||
server_audit_file_rotate_size 1000000
|
server_audit_file_rotate_size 1000000
|
||||||
server_audit_file_rotations 9
|
server_audit_file_rotations 9
|
||||||
server_audit_incl_users odin, root, dva, tri
|
server_audit_incl_users odin, root, dva, tri
|
||||||
server_audit_loc_info
|
|
||||||
server_audit_logging ON
|
server_audit_logging ON
|
||||||
server_audit_mode 1
|
server_audit_mode 1
|
||||||
server_audit_output_type file
|
server_audit_output_type file
|
||||||
|
@ -13,7 +13,7 @@ insert into mysqltest1.t1 values (1);
|
|||||||
select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt';
|
select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt';
|
||||||
create table mysqltest1.t2 (n int);
|
create table mysqltest1.t2 (n int);
|
||||||
create table mysqltest1.t3 (n int);
|
create table mysqltest1.t3 (n int);
|
||||||
--replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
|
--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty"
|
||||||
--error 1010
|
--error 1010
|
||||||
drop database mysqltest1;
|
drop database mysqltest1;
|
||||||
use mysqltest1;
|
use mysqltest1;
|
||||||
@ -30,7 +30,7 @@ while ($1)
|
|||||||
}
|
}
|
||||||
--enable_query_log
|
--enable_query_log
|
||||||
|
|
||||||
--replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
|
--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty"
|
||||||
--error 1010
|
--error 1010
|
||||||
drop database mysqltest1;
|
drop database mysqltest1;
|
||||||
use mysqltest1;
|
use mysqltest1;
|
||||||
|
@ -889,6 +889,11 @@ SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061));
|
|||||||
SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061));
|
SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061));
|
||||||
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
|
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
|
||||||
|
|
||||||
|
#
|
||||||
|
# potential signedness issue
|
||||||
|
#
|
||||||
|
select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of 5.5 tests
|
--echo # End of 5.5 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
@ -230,3 +230,16 @@ eval EXPLAIN $query;
|
|||||||
eval $query;
|
eval $query;
|
||||||
|
|
||||||
DROP TABLE t0,t1,t2;
|
DROP TABLE t0,t1,t2;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-MariaDB daemon leaks memory with specific query
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
|
||||||
|
`language_id` int(11) unsigned NOT NULL DEFAULT '1'
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||||
|
CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
|
||||||
|
`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||||
|
insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
|
||||||
|
SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
|
||||||
|
drop table t1,t2;
|
||||||
|
@ -2880,6 +2880,19 @@ drop tables m1, t1, t4;
|
|||||||
drop view t3;
|
drop view t3;
|
||||||
|
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-10424 - Assertion `ticket == __null' failed in
|
||||||
|
--echo # MDL_request::set_type
|
||||||
|
--echo #
|
||||||
|
CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
|
||||||
|
CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
|
||||||
|
PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
|
||||||
|
EXECUTE stmt;
|
||||||
|
EXECUTE stmt;
|
||||||
|
DEALLOCATE PREPARE stmt;
|
||||||
|
DROP TABLE t1, tmerge;
|
||||||
|
|
||||||
|
|
||||||
--echo End of 5.5 tests
|
--echo End of 5.5 tests
|
||||||
|
|
||||||
|
|
||||||
|
@ -3653,5 +3653,32 @@ deallocate prepare stmt;
|
|||||||
SET SESSION sql_mode = @save_sql_mode;
|
SET SESSION sql_mode = @save_sql_mode;
|
||||||
DROP TABLE t1,t2;
|
DROP TABLE t1,t2;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-8833: Crash of server on prepared statement with
|
||||||
|
--echo # conversion to semi-join
|
||||||
|
--echo #
|
||||||
|
|
||||||
--echo # End of 10.0 tests
|
CREATE TABLE t1 (column1 INT);
|
||||||
|
INSERT INTO t1 VALUES (3),(9);
|
||||||
|
|
||||||
|
CREATE TABLE t2 (column2 INT);
|
||||||
|
INSERT INTO t2 VALUES (1),(4);
|
||||||
|
|
||||||
|
CREATE TABLE t3 (column3 INT);
|
||||||
|
INSERT INTO t3 VALUES (6),(8);
|
||||||
|
|
||||||
|
CREATE TABLE t4 (column4 INT);
|
||||||
|
INSERT INTO t4 VALUES (2),(5);
|
||||||
|
|
||||||
|
PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1
|
||||||
|
FROM t1 AS table1
|
||||||
|
WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 )
|
||||||
|
) AS sq
|
||||||
|
FROM t3 AS table3, t4 AS table4";
|
||||||
|
EXECUTE stmt;
|
||||||
|
EXECUTE stmt;
|
||||||
|
deallocate prepare stmt;
|
||||||
|
drop table t1,t2,t3,t4;
|
||||||
|
|
||||||
|
|
||||||
|
--echo # End of 5.5 tests
|
||||||
|
@ -16,6 +16,13 @@ drop table t1;
|
|||||||
|
|
||||||
# End of 4.1 tests
|
# End of 4.1 tests
|
||||||
|
|
||||||
|
create table t1 (a bigint unsigned, b mediumint unsigned);
|
||||||
|
insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
|
||||||
|
select coalesce(a,b), coalesce(b,a) from t1;
|
||||||
|
create table t2 as select a from t1 union select b from t1;
|
||||||
|
show create table t2;
|
||||||
|
select * from t2;
|
||||||
|
drop table t1, t2;
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # Start of 10.0 tests
|
--echo # Start of 10.0 tests
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/* Copyright (c) 2000, 2010, Oracle and/or its affiliates
|
||||||
Copyright (c) 2000, 2010, Oracle and/or its affiliates
|
Copyright (c) 2009, 2016, MariaDB
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -49,7 +49,8 @@ int my_redel(const char *org_name, const char *tmp_name,
|
|||||||
DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s' MyFlags: %lu",
|
DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s' MyFlags: %lu",
|
||||||
org_name,tmp_name,MyFlags));
|
org_name,tmp_name,MyFlags));
|
||||||
|
|
||||||
if (my_copystat(org_name,tmp_name,MyFlags) < 0)
|
if (!my_disable_copystat_in_redel &&
|
||||||
|
my_copystat(org_name,tmp_name,MyFlags) < 0)
|
||||||
goto end;
|
goto end;
|
||||||
if (MyFlags & MY_REDEL_MAKE_BACKUP)
|
if (MyFlags & MY_REDEL_MAKE_BACKUP)
|
||||||
{
|
{
|
||||||
|
@ -98,3 +98,4 @@ my_bool my_disable_sync=0;
|
|||||||
my_bool my_disable_async_io=0;
|
my_bool my_disable_async_io=0;
|
||||||
my_bool my_disable_flush_key_blocks=0;
|
my_bool my_disable_flush_key_blocks=0;
|
||||||
my_bool my_disable_symlinks=0;
|
my_bool my_disable_symlinks=0;
|
||||||
|
my_bool my_disable_copystat_in_redel=0;
|
||||||
|
@ -427,9 +427,8 @@ static MYSQL_SYSVAR_UINT(query_log_limit, query_log_limit,
|
|||||||
char locinfo_ini_value[sizeof(struct connection_info)+4];
|
char locinfo_ini_value[sizeof(struct connection_info)+4];
|
||||||
|
|
||||||
static MYSQL_THDVAR_STR(loc_info,
|
static MYSQL_THDVAR_STR(loc_info,
|
||||||
PLUGIN_VAR_READONLY | PLUGIN_VAR_MEMALLOC,
|
PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_MEMALLOC,
|
||||||
"Auxiliary info.", NULL, NULL,
|
"Internal info", NULL, NULL, locinfo_ini_value);
|
||||||
locinfo_ini_value);
|
|
||||||
|
|
||||||
static const char *syslog_facility_names[]=
|
static const char *syslog_facility_names[]=
|
||||||
{
|
{
|
||||||
|
@ -46,6 +46,7 @@ struct show_table_contributors_st show_table_contributors[]= {
|
|||||||
{"Auttomattic", "https://automattic.com", "Bronze Sponsor of the MariaDB Foundation"},
|
{"Auttomattic", "https://automattic.com", "Bronze Sponsor of the MariaDB Foundation"},
|
||||||
{"Verkkokauppa.com", "https://virtuozzo.com", "Bronze Sponsor of the MariaDB Foundation"},
|
{"Verkkokauppa.com", "https://virtuozzo.com", "Bronze Sponsor of the MariaDB Foundation"},
|
||||||
{"Virtuozzo", "https://virtuozzo.com/", "Bronze Sponsor of the MariaDB Foundation"},
|
{"Virtuozzo", "https://virtuozzo.com/", "Bronze Sponsor of the MariaDB Foundation"},
|
||||||
|
{"Tencent Game DBA", "http://tencentdba.com/about/", "Bronze Sponsor of the MariaDB Foundation"},
|
||||||
|
|
||||||
/* Sponsors of important features */
|
/* Sponsors of important features */
|
||||||
{"Google", "USA", "Sponsoring encryption, parallel replication and GTID"},
|
{"Google", "USA", "Sponsoring encryption, parallel replication and GTID"},
|
||||||
|
@ -355,7 +355,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]=
|
|||||||
//MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP
|
//MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP
|
||||||
MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR,
|
MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR,
|
||||||
//MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24
|
//MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24
|
||||||
MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONG,
|
MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG,
|
||||||
//MYSQL_TYPE_DATE MYSQL_TYPE_TIME
|
//MYSQL_TYPE_DATE MYSQL_TYPE_TIME
|
||||||
MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR,
|
MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR,
|
||||||
//MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR
|
//MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR
|
||||||
|
23
sql/item.cc
23
sql/item.cc
@ -2743,9 +2743,28 @@ void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
|
|||||||
if (context)
|
if (context)
|
||||||
{
|
{
|
||||||
Name_resolution_context *ctx= new Name_resolution_context();
|
Name_resolution_context *ctx= new Name_resolution_context();
|
||||||
ctx->outer_context= NULL; // We don't build a complete name resolver
|
if (context->select_lex == new_parent)
|
||||||
ctx->table_list= NULL; // We rely on first_name_resolution_table instead
|
{
|
||||||
|
/*
|
||||||
|
This field was pushed in then pulled out
|
||||||
|
(for example left part of IN)
|
||||||
|
*/
|
||||||
|
ctx->outer_context= context->outer_context;
|
||||||
|
}
|
||||||
|
else if (context->outer_context)
|
||||||
|
{
|
||||||
|
/* just pull to the upper context */
|
||||||
|
ctx->outer_context= context->outer_context->outer_context;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* No upper context (merging Derived/VIEW where context chain ends) */
|
||||||
|
ctx->outer_context= NULL;
|
||||||
|
}
|
||||||
|
ctx->table_list= context->first_name_resolution_table;
|
||||||
ctx->select_lex= new_parent;
|
ctx->select_lex= new_parent;
|
||||||
|
if (context->select_lex == NULL)
|
||||||
|
ctx->select_lex= NULL;
|
||||||
ctx->first_name_resolution_table= context->first_name_resolution_table;
|
ctx->first_name_resolution_table= context->first_name_resolution_table;
|
||||||
ctx->last_name_resolution_table= context->last_name_resolution_table;
|
ctx->last_name_resolution_table= context->last_name_resolution_table;
|
||||||
ctx->error_processor= context->error_processor;
|
ctx->error_processor= context->error_processor;
|
||||||
|
@ -3011,7 +3011,7 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
|
|||||||
if (! write_error)
|
if (! write_error)
|
||||||
{
|
{
|
||||||
write_error= 1;
|
write_error= 1;
|
||||||
sql_print_error(ER(ER_ERROR_ON_WRITE), name, error);
|
sql_print_error(ER(ER_ERROR_ON_WRITE), name, tmp_errno);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3916,6 +3916,7 @@ static int init_common_variables()
|
|||||||
|
|
||||||
max_system_variables.pseudo_thread_id= (ulong)~0;
|
max_system_variables.pseudo_thread_id= (ulong)~0;
|
||||||
server_start_time= flush_status_time= my_time(0);
|
server_start_time= flush_status_time= my_time(0);
|
||||||
|
my_disable_copystat_in_redel= 1;
|
||||||
|
|
||||||
global_rpl_filter= new Rpl_filter;
|
global_rpl_filter= new Rpl_filter;
|
||||||
binlog_filter= new Rpl_filter;
|
binlog_filter= new Rpl_filter;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
|
/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
|
||||||
Copyright (c) 2010, 2014, SkySQL Ab.
|
Copyright (c) 2012, 2016, MariaDB
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -43,7 +43,7 @@ enum file_opt_type {
|
|||||||
struct File_option
|
struct File_option
|
||||||
{
|
{
|
||||||
LEX_STRING name; /**< Name of the option */
|
LEX_STRING name; /**< Name of the option */
|
||||||
int offset; /**< offset to base address of value */
|
my_ptrdiff_t offset; /**< offset to base address of value */
|
||||||
file_opt_type type; /**< Option type */
|
file_opt_type type; /**< Option type */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -64,13 +64,13 @@ extern "C" sig_handler handle_fatal_signal(int sig)
|
|||||||
struct tm tm;
|
struct tm tm;
|
||||||
#ifdef HAVE_STACKTRACE
|
#ifdef HAVE_STACKTRACE
|
||||||
THD *thd;
|
THD *thd;
|
||||||
#endif
|
|
||||||
/*
|
/*
|
||||||
This flag remembers if the query pointer was found invalid.
|
This flag remembers if the query pointer was found invalid.
|
||||||
We will try and print the query at the end of the signal handler, in case
|
We will try and print the query at the end of the signal handler, in case
|
||||||
we're wrong.
|
we're wrong.
|
||||||
*/
|
*/
|
||||||
bool print_invalid_query_pointer= false;
|
bool print_invalid_query_pointer= false;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (segfaulted)
|
if (segfaulted)
|
||||||
{
|
{
|
||||||
@ -265,6 +265,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
|
|||||||
"\"mlockall\" bugs.\n");
|
"\"mlockall\" bugs.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_STACKTRACE
|
||||||
if (print_invalid_query_pointer)
|
if (print_invalid_query_pointer)
|
||||||
{
|
{
|
||||||
my_safe_printf_stderr(
|
my_safe_printf_stderr(
|
||||||
@ -274,6 +275,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
|
|||||||
my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length()));
|
my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length()));
|
||||||
my_safe_printf_stderr("\n\n");
|
my_safe_printf_stderr("\n\n");
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_WRITE_CORE
|
#ifdef HAVE_WRITE_CORE
|
||||||
if (test_flags & TEST_CORE_ON_SIGNAL)
|
if (test_flags & TEST_CORE_ON_SIGNAL)
|
||||||
|
@ -455,7 +455,19 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
|
|||||||
}
|
}
|
||||||
thd->prepare_derived_at_open= FALSE;
|
thd->prepare_derived_at_open= FALSE;
|
||||||
|
|
||||||
table->next_global= save_next_global;
|
/*
|
||||||
|
MERGE engine may adjust table->next_global chain, thus we have to
|
||||||
|
append save_next_global after merge children.
|
||||||
|
*/
|
||||||
|
if (save_next_global)
|
||||||
|
{
|
||||||
|
TABLE_LIST *table_list_iterator= table;
|
||||||
|
while (table_list_iterator->next_global)
|
||||||
|
table_list_iterator= table_list_iterator->next_global;
|
||||||
|
table_list_iterator->next_global= save_next_global;
|
||||||
|
save_next_global->prev_global= &table_list_iterator->next_global;
|
||||||
|
}
|
||||||
|
|
||||||
table->next_local= save_next_local;
|
table->next_local= save_next_local;
|
||||||
thd->open_options&= ~extra_open_options;
|
thd->open_options&= ~extra_open_options;
|
||||||
|
|
||||||
|
@ -9223,6 +9223,7 @@ open_system_tables_for_read(THD *thd, TABLE_LIST *table_list,
|
|||||||
*/
|
*/
|
||||||
lex->reset_n_backup_query_tables_list(&query_tables_list_backup);
|
lex->reset_n_backup_query_tables_list(&query_tables_list_backup);
|
||||||
thd->reset_n_backup_open_tables_state(backup);
|
thd->reset_n_backup_open_tables_state(backup);
|
||||||
|
thd->lex->sql_command= SQLCOM_SELECT;
|
||||||
|
|
||||||
if (open_and_lock_tables(thd, table_list, FALSE,
|
if (open_and_lock_tables(thd, table_list, FALSE,
|
||||||
MYSQL_OPEN_IGNORE_FLUSH |
|
MYSQL_OPEN_IGNORE_FLUSH |
|
||||||
|
@ -5371,9 +5371,11 @@ int THD::decide_logging_format(TABLE_LIST *tables)
|
|||||||
{
|
{
|
||||||
static const char *prelocked_mode_name[] = {
|
static const char *prelocked_mode_name[] = {
|
||||||
"NON_PRELOCKED",
|
"NON_PRELOCKED",
|
||||||
|
"LOCK_TABLES",
|
||||||
"PRELOCKED",
|
"PRELOCKED",
|
||||||
"PRELOCKED_UNDER_LOCK_TABLES",
|
"PRELOCKED_UNDER_LOCK_TABLES",
|
||||||
};
|
};
|
||||||
|
compile_time_assert(array_elements(prelocked_mode_name) == LTM_always_last);
|
||||||
DBUG_PRINT("debug", ("prelocked_mode: %s",
|
DBUG_PRINT("debug", ("prelocked_mode: %s",
|
||||||
prelocked_mode_name[locked_tables_mode]));
|
prelocked_mode_name[locked_tables_mode]));
|
||||||
}
|
}
|
||||||
|
@ -1182,7 +1182,8 @@ enum enum_locked_tables_mode
|
|||||||
LTM_NONE= 0,
|
LTM_NONE= 0,
|
||||||
LTM_LOCK_TABLES,
|
LTM_LOCK_TABLES,
|
||||||
LTM_PRELOCKED,
|
LTM_PRELOCKED,
|
||||||
LTM_PRELOCKED_UNDER_LOCK_TABLES
|
LTM_PRELOCKED_UNDER_LOCK_TABLES,
|
||||||
|
LTM_always_last
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -4302,6 +4303,11 @@ public:
|
|||||||
save_copy_field_end= copy_field_end= NULL;
|
save_copy_field_end= copy_field_end= NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void free_copy_field_data()
|
||||||
|
{
|
||||||
|
for (Copy_field *ptr= copy_field ; ptr != copy_field_end ; ptr++)
|
||||||
|
ptr->tmp.free();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class select_union :public select_result_interceptor
|
class select_union :public select_result_interceptor
|
||||||
|
@ -2810,6 +2810,22 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static size_t var_storage_size(int flags)
|
||||||
|
{
|
||||||
|
switch (flags & PLUGIN_VAR_TYPEMASK) {
|
||||||
|
case PLUGIN_VAR_BOOL: return sizeof(my_bool);
|
||||||
|
case PLUGIN_VAR_INT: return sizeof(int);
|
||||||
|
case PLUGIN_VAR_LONG: return sizeof(long);
|
||||||
|
case PLUGIN_VAR_ENUM: return sizeof(long);
|
||||||
|
case PLUGIN_VAR_LONGLONG: return sizeof(ulonglong);
|
||||||
|
case PLUGIN_VAR_SET: return sizeof(ulonglong);
|
||||||
|
case PLUGIN_VAR_STR: return sizeof(char*);
|
||||||
|
case PLUGIN_VAR_DOUBLE: return sizeof(double);
|
||||||
|
default: DBUG_ASSERT(0); return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
returns a bookmark for thd-local variables, creating if neccessary.
|
returns a bookmark for thd-local variables, creating if neccessary.
|
||||||
returns null for non thd-local variables.
|
returns null for non thd-local variables.
|
||||||
@ -2818,39 +2834,13 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
|
|||||||
static st_bookmark *register_var(const char *plugin, const char *name,
|
static st_bookmark *register_var(const char *plugin, const char *name,
|
||||||
int flags)
|
int flags)
|
||||||
{
|
{
|
||||||
uint length= strlen(plugin) + strlen(name) + 3, size= 0, offset, new_size;
|
uint length= strlen(plugin) + strlen(name) + 3, size, offset, new_size;
|
||||||
st_bookmark *result;
|
st_bookmark *result;
|
||||||
char *varname, *p;
|
char *varname, *p;
|
||||||
|
|
||||||
if (!(flags & PLUGIN_VAR_THDLOCAL))
|
DBUG_ASSERT(flags & PLUGIN_VAR_THDLOCAL);
|
||||||
return NULL;
|
|
||||||
|
|
||||||
switch (flags & PLUGIN_VAR_TYPEMASK) {
|
|
||||||
case PLUGIN_VAR_BOOL:
|
|
||||||
size= sizeof(my_bool);
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_INT:
|
|
||||||
size= sizeof(int);
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_LONG:
|
|
||||||
case PLUGIN_VAR_ENUM:
|
|
||||||
size= sizeof(long);
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_LONGLONG:
|
|
||||||
case PLUGIN_VAR_SET:
|
|
||||||
size= sizeof(ulonglong);
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_STR:
|
|
||||||
size= sizeof(char*);
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_DOUBLE:
|
|
||||||
size= sizeof(double);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
DBUG_ASSERT(0);
|
|
||||||
return NULL;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
size= var_storage_size(flags);
|
||||||
varname= ((char*) my_alloca(length));
|
varname= ((char*) my_alloca(length));
|
||||||
strxmov(varname + 1, plugin, "_", name, NullS);
|
strxmov(varname + 1, plugin, "_", name, NullS);
|
||||||
for (p= varname + 1; *p; p++)
|
for (p= varname + 1; *p; p++)
|
||||||
@ -3046,25 +3036,17 @@ void sync_dynamic_session_variables(THD* thd, bool global_lock)
|
|||||||
*/
|
*/
|
||||||
for (idx= 0; idx < bookmark_hash.records; idx++)
|
for (idx= 0; idx < bookmark_hash.records; idx++)
|
||||||
{
|
{
|
||||||
sys_var_pluginvar *pi;
|
|
||||||
sys_var *var;
|
|
||||||
st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx);
|
st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx);
|
||||||
|
|
||||||
if (v->version <= thd->variables.dynamic_variables_version)
|
if (v->version <= thd->variables.dynamic_variables_version)
|
||||||
continue; /* already in thd->variables */
|
continue; /* already in thd->variables */
|
||||||
|
|
||||||
if (!(var= intern_find_sys_var(v->key + 1, v->name_len)) ||
|
|
||||||
!(pi= var->cast_pluginvar()) ||
|
|
||||||
v->key[0] != plugin_var_bookmark_key(pi->plugin_var->flags))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* Here we do anything special that may be required of the data types */
|
/* Here we do anything special that may be required of the data types */
|
||||||
|
|
||||||
if ((pi->plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
|
if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
|
||||||
pi->plugin_var->flags & PLUGIN_VAR_MEMALLOC)
|
v->key[0] & BOOKMARK_MEMALLOC)
|
||||||
{
|
{
|
||||||
int offset= ((thdvar_str_t *)(pi->plugin_var))->offset;
|
char **pp= (char**) (thd->variables.dynamic_variables_ptr + v->offset);
|
||||||
char **pp= (char**) (thd->variables.dynamic_variables_ptr + offset);
|
|
||||||
if (*pp)
|
if (*pp)
|
||||||
*pp= my_strdup(*pp, MYF(MY_WME|MY_FAE));
|
*pp= my_strdup(*pp, MYF(MY_WME|MY_FAE));
|
||||||
}
|
}
|
||||||
@ -3325,6 +3307,48 @@ bool sys_var_pluginvar::session_update(THD *thd, set_var *var)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const void *var_def_ptr(st_mysql_sys_var *pv)
|
||||||
|
{
|
||||||
|
switch (pv->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
|
||||||
|
case PLUGIN_VAR_INT:
|
||||||
|
return &((sysvar_uint_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_LONG:
|
||||||
|
return &((sysvar_ulong_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_LONGLONG:
|
||||||
|
return &((sysvar_ulonglong_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_ENUM:
|
||||||
|
return &((sysvar_enum_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_SET:
|
||||||
|
return &((sysvar_set_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_BOOL:
|
||||||
|
return &((sysvar_bool_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_STR:
|
||||||
|
return &((sysvar_str_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_DOUBLE:
|
||||||
|
return &((sysvar_double_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL:
|
||||||
|
return &((thdvar_uint_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL:
|
||||||
|
return &((thdvar_ulong_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL:
|
||||||
|
return &((thdvar_ulonglong_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL:
|
||||||
|
return &((thdvar_enum_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL:
|
||||||
|
return &((thdvar_set_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL:
|
||||||
|
return &((thdvar_bool_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL:
|
||||||
|
return &((thdvar_str_t*) pv)->def_val;
|
||||||
|
case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL:
|
||||||
|
return &((thdvar_double_t*) pv)->def_val;
|
||||||
|
default:
|
||||||
|
DBUG_ASSERT(0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
|
bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
|
||||||
{
|
{
|
||||||
DBUG_ASSERT(!is_readonly());
|
DBUG_ASSERT(!is_readonly());
|
||||||
@ -3334,60 +3358,7 @@ bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
|
|||||||
const void *src= &var->save_result;
|
const void *src= &var->save_result;
|
||||||
|
|
||||||
if (!var->value)
|
if (!var->value)
|
||||||
{
|
src= var_def_ptr(plugin_var);
|
||||||
switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
|
|
||||||
case PLUGIN_VAR_INT:
|
|
||||||
src= &((sysvar_uint_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_LONG:
|
|
||||||
src= &((sysvar_ulong_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_LONGLONG:
|
|
||||||
src= &((sysvar_ulonglong_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_ENUM:
|
|
||||||
src= &((sysvar_enum_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_SET:
|
|
||||||
src= &((sysvar_set_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_BOOL:
|
|
||||||
src= &((sysvar_bool_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_STR:
|
|
||||||
src= &((sysvar_str_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_DOUBLE:
|
|
||||||
src= &((sysvar_double_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL:
|
|
||||||
src= &((thdvar_uint_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL:
|
|
||||||
src= &((thdvar_ulong_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL:
|
|
||||||
src= &((thdvar_ulonglong_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL:
|
|
||||||
src= &((thdvar_enum_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL:
|
|
||||||
src= &((thdvar_set_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL:
|
|
||||||
src= &((thdvar_bool_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL:
|
|
||||||
src= &((thdvar_str_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL:
|
|
||||||
src= &((thdvar_double_t*) plugin_var)->def_val;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
DBUG_ASSERT(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
plugin_var->update(thd, plugin_var, tgt, src);
|
plugin_var->update(thd, plugin_var, tgt, src);
|
||||||
return false;
|
return false;
|
||||||
@ -3743,7 +3714,18 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
|
|||||||
*(int*)(opt + 1)= offset= v->offset;
|
*(int*)(opt + 1)= offset= v->offset;
|
||||||
|
|
||||||
if (opt->flags & PLUGIN_VAR_NOCMDOPT)
|
if (opt->flags & PLUGIN_VAR_NOCMDOPT)
|
||||||
|
{
|
||||||
|
char *val= global_system_variables.dynamic_variables_ptr + offset;
|
||||||
|
if (((opt->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) &&
|
||||||
|
(opt->flags & PLUGIN_VAR_MEMALLOC))
|
||||||
|
{
|
||||||
|
char *def_val= *(char**)var_def_ptr(opt);
|
||||||
|
*(char**)val= def_val ? my_strdup(def_val, MYF(0)) : NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
memcpy(val, var_def_ptr(opt), var_storage_size(opt->flags));
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
optname= (char*) memdup_root(mem_root, v->key + 1,
|
optname= (char*) memdup_root(mem_root, v->key + 1,
|
||||||
(optnamelen= v->name_len) + 1);
|
(optnamelen= v->name_len) + 1);
|
||||||
@ -3951,10 +3933,11 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
|
|||||||
*str->value= strdup_root(mem_root, *str->value);
|
*str->value= strdup_root(mem_root, *str->value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var= find_bookmark(plugin_name.str, o->name, o->flags);
|
||||||
if (o->flags & PLUGIN_VAR_NOSYSVAR)
|
if (o->flags & PLUGIN_VAR_NOSYSVAR)
|
||||||
continue;
|
continue;
|
||||||
tmp_backup[tmp->nbackups++].save(&o->name);
|
tmp_backup[tmp->nbackups++].save(&o->name);
|
||||||
if ((var= find_bookmark(plugin_name.str, o->name, o->flags)))
|
if (var)
|
||||||
v= new (mem_root) sys_var_pluginvar(&chain, var->key + 1, o, tmp);
|
v= new (mem_root) sys_var_pluginvar(&chain, var->key + 1, o, tmp);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -9006,7 +9006,24 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
|
|||||||
*/
|
*/
|
||||||
if (!tmp_join || tmp_join != this)
|
if (!tmp_join || tmp_join != this)
|
||||||
tmp_table_param.cleanup();
|
tmp_table_param.cleanup();
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Free data buffered in copy_fields, but keep data pointed by copy_field
|
||||||
|
around for next iteration (possibly stored in save_copy_fields).
|
||||||
|
|
||||||
|
It would be logically simpler to not clear copy_field
|
||||||
|
below, but as we have loops that runs over copy_field to
|
||||||
|
copy_field_end that should not be done anymore, it's simpler to
|
||||||
|
just clear the pointers.
|
||||||
|
|
||||||
|
Another option would be to just clear copy_field_end and not run
|
||||||
|
the loops if this is not set or to have tmp_table_param.cleanup()
|
||||||
|
to run cleanup on save_copy_field if copy_field is not set.
|
||||||
|
*/
|
||||||
|
tmp_table_param.free_copy_field_data();
|
||||||
tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
|
tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
|
||||||
|
}
|
||||||
first_record= sort_and_group=0;
|
first_record= sort_and_group=0;
|
||||||
send_records= (ha_rows) 0;
|
send_records= (ha_rows) 0;
|
||||||
|
|
||||||
@ -11687,7 +11704,7 @@ void JOIN::join_free()
|
|||||||
/**
|
/**
|
||||||
Free resources of given join.
|
Free resources of given join.
|
||||||
|
|
||||||
@param fill true if we should free all resources, call with full==1
|
@param full true if we should free all resources, call with full==1
|
||||||
should be last, before it this function can be called with
|
should be last, before it this function can be called with
|
||||||
full==0
|
full==0
|
||||||
|
|
||||||
@ -11806,7 +11823,7 @@ void JOIN::cleanup(bool full)
|
|||||||
/*
|
/*
|
||||||
If we have tmp_join and 'this' JOIN is not tmp_join and
|
If we have tmp_join and 'this' JOIN is not tmp_join and
|
||||||
tmp_table_param.copy_field's of them are equal then we have to remove
|
tmp_table_param.copy_field's of them are equal then we have to remove
|
||||||
pointer to tmp_table_param.copy_field from tmp_join, because it qill
|
pointer to tmp_table_param.copy_field from tmp_join, because it will
|
||||||
be removed in tmp_table_param.cleanup().
|
be removed in tmp_table_param.cleanup().
|
||||||
*/
|
*/
|
||||||
if (tmp_join &&
|
if (tmp_join &&
|
||||||
@ -15710,6 +15727,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
|
|||||||
case Item::VARBIN_ITEM:
|
case Item::VARBIN_ITEM:
|
||||||
case Item::CACHE_ITEM:
|
case Item::CACHE_ITEM:
|
||||||
case Item::EXPR_CACHE_ITEM:
|
case Item::EXPR_CACHE_ITEM:
|
||||||
|
case Item::PARAM_ITEM:
|
||||||
if (make_copy_field)
|
if (make_copy_field)
|
||||||
{
|
{
|
||||||
DBUG_ASSERT(((Item_result_field*)item)->result_field);
|
DBUG_ASSERT(((Item_result_field*)item)->result_field);
|
||||||
|
@ -876,6 +876,8 @@ void tdc_release_share(TABLE_SHARE *share)
|
|||||||
}
|
}
|
||||||
if (--share->tdc.ref_count)
|
if (--share->tdc.ref_count)
|
||||||
{
|
{
|
||||||
|
if (!share->is_view)
|
||||||
|
mysql_cond_broadcast(&share->tdc.COND_release);
|
||||||
mysql_mutex_unlock(&share->tdc.LOCK_table_share);
|
mysql_mutex_unlock(&share->tdc.LOCK_table_share);
|
||||||
mysql_mutex_unlock(&LOCK_unused_shares);
|
mysql_mutex_unlock(&LOCK_unused_shares);
|
||||||
DBUG_VOID_RETURN;
|
DBUG_VOID_RETURN;
|
||||||
|
@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key;
|
|||||||
/** variable to record innodb_fts_internal_tbl_name for information
|
/** variable to record innodb_fts_internal_tbl_name for information
|
||||||
schema table INNODB_FTS_INSERTED etc. */
|
schema table INNODB_FTS_INSERTED etc. */
|
||||||
UNIV_INTERN char* fts_internal_tbl_name = NULL;
|
UNIV_INTERN char* fts_internal_tbl_name = NULL;
|
||||||
|
UNIV_INTERN char* fts_internal_tbl_name2 = NULL;
|
||||||
|
|
||||||
/** InnoDB default stopword list:
|
/** InnoDB default stopword list:
|
||||||
There are different versions of stopwords, the stop words listed
|
There are different versions of stopwords, the stop words listed
|
||||||
@ -6570,6 +6571,36 @@ fts_check_corrupt_index(
|
|||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Get parent table name if it's a fts aux table
|
||||||
|
@param[in] aux_table_name aux table name
|
||||||
|
@param[in] aux_table_len aux table length
|
||||||
|
@return parent table name, or NULL */
|
||||||
|
char*
|
||||||
|
fts_get_parent_table_name(
|
||||||
|
const char* aux_table_name,
|
||||||
|
ulint aux_table_len)
|
||||||
|
{
|
||||||
|
fts_aux_table_t aux_table;
|
||||||
|
char* parent_table_name = NULL;
|
||||||
|
|
||||||
|
if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
|
||||||
|
dict_table_t* parent_table;
|
||||||
|
|
||||||
|
parent_table = dict_table_open_on_id(
|
||||||
|
aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
|
||||||
|
|
||||||
|
if (parent_table != NULL) {
|
||||||
|
parent_table_name = mem_strdupl(
|
||||||
|
parent_table->name,
|
||||||
|
strlen(parent_table->name));
|
||||||
|
|
||||||
|
dict_table_close(parent_table, TRUE, FALSE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(parent_table_name);
|
||||||
|
}
|
||||||
|
|
||||||
/** Check the validity of the parent table.
|
/** Check the validity of the parent table.
|
||||||
@param[in] aux_table auxiliary table
|
@param[in] aux_table auxiliary table
|
||||||
@return true if it is a valid table or false if it is not */
|
@return true if it is a valid table or false if it is not */
|
||||||
|
@ -15010,7 +15010,12 @@ innodb_internal_table_update(
|
|||||||
my_free(old);
|
my_free(old);
|
||||||
}
|
}
|
||||||
|
|
||||||
fts_internal_tbl_name = *(char**) var_ptr;
|
fts_internal_tbl_name2 = *(char**) var_ptr;
|
||||||
|
if (fts_internal_tbl_name2 == NULL) {
|
||||||
|
fts_internal_tbl_name = const_cast<char*>("default");
|
||||||
|
} else {
|
||||||
|
fts_internal_tbl_name = fts_internal_tbl_name2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/****************************************************************//**
|
/****************************************************************//**
|
||||||
@ -16793,7 +16798,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
|
|||||||
"Whether to disable OS system file cache for sort I/O",
|
"Whether to disable OS system file cache for sort I/O",
|
||||||
NULL, NULL, FALSE);
|
NULL, NULL, FALSE);
|
||||||
|
|
||||||
static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
|
static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2,
|
||||||
PLUGIN_VAR_NOCMDARG,
|
PLUGIN_VAR_NOCMDARG,
|
||||||
"FTS internal auxiliary table to be checked",
|
"FTS internal auxiliary table to be checked",
|
||||||
innodb_internal_table_validate,
|
innodb_internal_table_validate,
|
||||||
|
@ -209,7 +209,10 @@ innobase_need_rebuild(
|
|||||||
const Alter_inplace_info* ha_alter_info,
|
const Alter_inplace_info* ha_alter_info,
|
||||||
const TABLE* altered_table)
|
const TABLE* altered_table)
|
||||||
{
|
{
|
||||||
if (ha_alter_info->handler_flags
|
Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
|
||||||
|
ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE);
|
||||||
|
|
||||||
|
if (alter_inplace_flags
|
||||||
== Alter_inplace_info::CHANGE_CREATE_OPTION
|
== Alter_inplace_info::CHANGE_CREATE_OPTION
|
||||||
&& !(ha_alter_info->create_info->used_fields
|
&& !(ha_alter_info->create_info->used_fields
|
||||||
& (HA_CREATE_USED_ROW_FORMAT
|
& (HA_CREATE_USED_ROW_FORMAT
|
||||||
@ -3933,7 +3936,7 @@ err_exit:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
|
if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
|
||||||
|| (ha_alter_info->handler_flags
|
|| ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
|
||||||
== Alter_inplace_info::CHANGE_CREATE_OPTION
|
== Alter_inplace_info::CHANGE_CREATE_OPTION
|
||||||
&& !innobase_need_rebuild(ha_alter_info, table))) {
|
&& !innobase_need_rebuild(ha_alter_info, table))) {
|
||||||
|
|
||||||
@ -4107,7 +4110,7 @@ ok_exit:
|
|||||||
DBUG_RETURN(false);
|
DBUG_RETURN(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ha_alter_info->handler_flags
|
if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
|
||||||
== Alter_inplace_info::CHANGE_CREATE_OPTION
|
== Alter_inplace_info::CHANGE_CREATE_OPTION
|
||||||
&& !innobase_need_rebuild(ha_alter_info, table)) {
|
&& !innobase_need_rebuild(ha_alter_info, table)) {
|
||||||
goto ok_exit;
|
goto ok_exit;
|
||||||
|
@ -3981,6 +3981,8 @@ i_s_fts_config_fill(
|
|||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEBUG_SYNC_C("i_s_fts_config_fille_check");
|
||||||
|
|
||||||
fields = table->field;
|
fields = table->field;
|
||||||
|
|
||||||
/* Prevent DDL to drop fts aux tables. */
|
/* Prevent DDL to drop fts aux tables. */
|
||||||
|
@ -375,6 +375,7 @@ extern bool fts_need_sync;
|
|||||||
/** Variable specifying the table that has Fulltext index to display its
|
/** Variable specifying the table that has Fulltext index to display its
|
||||||
content through information schema table */
|
content through information schema table */
|
||||||
extern char* fts_internal_tbl_name;
|
extern char* fts_internal_tbl_name;
|
||||||
|
extern char* fts_internal_tbl_name2;
|
||||||
|
|
||||||
#define fts_que_graph_free(graph) \
|
#define fts_que_graph_free(graph) \
|
||||||
do { \
|
do { \
|
||||||
@ -823,6 +824,15 @@ void
|
|||||||
fts_drop_orphaned_tables(void);
|
fts_drop_orphaned_tables(void);
|
||||||
/*==========================*/
|
/*==========================*/
|
||||||
|
|
||||||
|
/* Get parent table name if it's a fts aux table
|
||||||
|
@param[in] aux_table_name aux table name
|
||||||
|
@param[in] aux_table_len aux table length
|
||||||
|
@return parent table name, or NULL */
|
||||||
|
char*
|
||||||
|
fts_get_parent_table_name(
|
||||||
|
const char* aux_table_name,
|
||||||
|
ulint aux_table_len);
|
||||||
|
|
||||||
/******************************************************************//**
|
/******************************************************************//**
|
||||||
Since we do a horizontal split on the index table, we need to drop
|
Since we do a horizontal split on the index table, we need to drop
|
||||||
all the split tables.
|
all the split tables.
|
||||||
|
@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri
|
|||||||
|
|
||||||
#define INNODB_VERSION_MAJOR 5
|
#define INNODB_VERSION_MAJOR 5
|
||||||
#define INNODB_VERSION_MINOR 6
|
#define INNODB_VERSION_MINOR 6
|
||||||
#define INNODB_VERSION_BUGFIX 32
|
#define INNODB_VERSION_BUGFIX 33
|
||||||
|
|
||||||
/* The following is the InnoDB version as shown in
|
/* The following is the InnoDB version as shown in
|
||||||
SELECT plugin_version FROM information_schema.plugins;
|
SELECT plugin_version FROM information_schema.plugins;
|
||||||
|
@ -613,7 +613,7 @@ row_log_table_delete(
|
|||||||
&old_pk_extra_size);
|
&old_pk_extra_size);
|
||||||
ut_ad(old_pk_extra_size < 0x100);
|
ut_ad(old_pk_extra_size < 0x100);
|
||||||
|
|
||||||
mrec_size = 4 + old_pk_size;
|
mrec_size = 6 + old_pk_size;
|
||||||
|
|
||||||
/* Log enough prefix of the BLOB unless both the
|
/* Log enough prefix of the BLOB unless both the
|
||||||
old and new table are in COMPACT or REDUNDANT format,
|
old and new table are in COMPACT or REDUNDANT format,
|
||||||
@ -643,8 +643,8 @@ row_log_table_delete(
|
|||||||
*b++ = static_cast<byte>(old_pk_extra_size);
|
*b++ = static_cast<byte>(old_pk_extra_size);
|
||||||
|
|
||||||
/* Log the size of external prefix we saved */
|
/* Log the size of external prefix we saved */
|
||||||
mach_write_to_2(b, ext_size);
|
mach_write_to_4(b, ext_size);
|
||||||
b += 2;
|
b += 4;
|
||||||
|
|
||||||
rec_convert_dtuple_to_temp(
|
rec_convert_dtuple_to_temp(
|
||||||
b + old_pk_extra_size, new_index,
|
b + old_pk_extra_size, new_index,
|
||||||
@ -2268,14 +2268,14 @@ row_log_table_apply_op(
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case ROW_T_DELETE:
|
case ROW_T_DELETE:
|
||||||
/* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
|
/* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
|
||||||
if (mrec + 4 >= mrec_end) {
|
if (mrec + 6 >= mrec_end) {
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
extra_size = *mrec++;
|
extra_size = *mrec++;
|
||||||
ext_size = mach_read_from_2(mrec);
|
ext_size = mach_read_from_4(mrec);
|
||||||
mrec += 2;
|
mrec += 4;
|
||||||
ut_ad(mrec < mrec_end);
|
ut_ad(mrec < mrec_end);
|
||||||
|
|
||||||
/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
|
/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
|
||||||
|
@ -2715,6 +2715,10 @@ loop:
|
|||||||
return(n_tables + n_tables_dropped);
|
return(n_tables + n_tables_dropped);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
|
||||||
|
os_thread_sleep(5000000);
|
||||||
|
);
|
||||||
|
|
||||||
table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
|
table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
|
||||||
DICT_ERR_IGNORE_NONE);
|
DICT_ERR_IGNORE_NONE);
|
||||||
|
|
||||||
@ -2725,6 +2729,16 @@ loop:
|
|||||||
goto already_dropped;
|
goto already_dropped;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!table->to_be_dropped) {
|
||||||
|
/* There is a scenario: the old table is dropped
|
||||||
|
just after it's added into drop list, and new
|
||||||
|
table with the same name is created, then we try
|
||||||
|
to drop the new table in background. */
|
||||||
|
dict_table_close(table, FALSE, FALSE);
|
||||||
|
|
||||||
|
goto already_dropped;
|
||||||
|
}
|
||||||
|
|
||||||
ut_a(!table->can_be_evicted);
|
ut_a(!table->can_be_evicted);
|
||||||
|
|
||||||
dict_table_close(table, FALSE, FALSE);
|
dict_table_close(table, FALSE, FALSE);
|
||||||
@ -3992,6 +4006,13 @@ row_drop_table_for_mysql(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("row_drop_table_add_to_background",
|
||||||
|
row_add_table_to_background_drop_list(table->name);
|
||||||
|
err = DB_SUCCESS;
|
||||||
|
goto funct_exit;
|
||||||
|
);
|
||||||
|
|
||||||
/* TODO: could we replace the counter n_foreign_key_checks_running
|
/* TODO: could we replace the counter n_foreign_key_checks_running
|
||||||
with lock checks on the table? Acquire here an exclusive lock on the
|
with lock checks on the table? Acquire here an exclusive lock on the
|
||||||
table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
|
table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
|
||||||
@ -4608,6 +4629,19 @@ loop:
|
|||||||
row_mysql_lock_data_dictionary(trx);
|
row_mysql_lock_data_dictionary(trx);
|
||||||
|
|
||||||
while ((table_name = dict_get_first_table_name_in_db(name))) {
|
while ((table_name = dict_get_first_table_name_in_db(name))) {
|
||||||
|
/* Drop parent table if it is a fts aux table, to
|
||||||
|
avoid accessing dropped fts aux tables in information
|
||||||
|
scheam when parent table still exists.
|
||||||
|
Note: Drop parent table will drop fts aux tables. */
|
||||||
|
char* parent_table_name;
|
||||||
|
parent_table_name = fts_get_parent_table_name(
|
||||||
|
table_name, strlen(table_name));
|
||||||
|
|
||||||
|
if (parent_table_name != NULL) {
|
||||||
|
mem_free(table_name);
|
||||||
|
table_name = parent_table_name;
|
||||||
|
}
|
||||||
|
|
||||||
ut_a(memcmp(table_name, name, namelen) == 0);
|
ut_a(memcmp(table_name, name, namelen) == 0);
|
||||||
|
|
||||||
table = dict_table_open_on_name(
|
table = dict_table_open_on_name(
|
||||||
|
@ -205,7 +205,7 @@ maria_declare_plugin(perfschema)
|
|||||||
0x0001,
|
0x0001,
|
||||||
pfs_status_vars,
|
pfs_status_vars,
|
||||||
NULL,
|
NULL,
|
||||||
"5.6.32",
|
"5.6.33",
|
||||||
MariaDB_PLUGIN_MATURITY_STABLE
|
MariaDB_PLUGIN_MATURITY_STABLE
|
||||||
}
|
}
|
||||||
maria_declare_plugin_end;
|
maria_declare_plugin_end;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
SET(TOKUDB_VERSION 5.6.31-77.0)
|
SET(TOKUDB_VERSION 5.6.32-78.1)
|
||||||
# PerconaFT only supports x86-64 and cmake-2.8.9+
|
# PerconaFT only supports x86-64 and cmake-2.8.9+
|
||||||
IF(CMAKE_VERSION VERSION_LESS "2.8.9")
|
IF(CMAKE_VERSION VERSION_LESS "2.8.9")
|
||||||
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
|
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
|
||||||
|
@ -367,8 +367,8 @@ static void print_db_env_struct (void) {
|
|||||||
"int (*checkpointing_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */",
|
"int (*checkpointing_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */",
|
||||||
"int (*cleaner_set_period) (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */",
|
"int (*cleaner_set_period) (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */",
|
||||||
"int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
|
"int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
|
||||||
"int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */",
|
"int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation. 0 means disabled. */",
|
||||||
"int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */",
|
"int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation. 0 means disabled. */",
|
||||||
"int (*evictor_set_enable_partial_eviction) (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
|
"int (*evictor_set_enable_partial_eviction) (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
|
||||||
"int (*evictor_get_enable_partial_eviction) (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
|
"int (*evictor_get_enable_partial_eviction) (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
|
||||||
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
|
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
|
||||||
|
@ -103,6 +103,7 @@ set_cflags_if_supported(
|
|||||||
-Wno-pointer-bool-conversion
|
-Wno-pointer-bool-conversion
|
||||||
-fno-rtti
|
-fno-rtti
|
||||||
-fno-exceptions
|
-fno-exceptions
|
||||||
|
-Wno-error=nonnull-compare
|
||||||
)
|
)
|
||||||
## set_cflags_if_supported_named("-Weffc++" -Weffcpp)
|
## set_cflags_if_supported_named("-Weffc++" -Weffcpp)
|
||||||
|
|
||||||
|
@ -55,8 +55,8 @@ set(FT_SOURCES
|
|||||||
msg_buffer
|
msg_buffer
|
||||||
node
|
node
|
||||||
pivotkeys
|
pivotkeys
|
||||||
|
serialize/rbtree_mhs
|
||||||
serialize/block_allocator
|
serialize/block_allocator
|
||||||
serialize/block_allocator_strategy
|
|
||||||
serialize/block_table
|
serialize/block_table
|
||||||
serialize/compress
|
serialize/compress
|
||||||
serialize/ft_node-serialize
|
serialize/ft_node-serialize
|
||||||
|
@ -496,7 +496,7 @@ handle_split_of_child(
|
|||||||
|
|
||||||
// We never set the rightmost blocknum to be the root.
|
// We never set the rightmost blocknum to be the root.
|
||||||
// Instead, we wait for the root to split and let promotion initialize the rightmost
|
// Instead, we wait for the root to split and let promotion initialize the rightmost
|
||||||
// blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
|
// blocknum to be the first non-root leaf node on the right extreme to receive an insert.
|
||||||
BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
|
BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
|
||||||
invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
|
invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
|
||||||
if (childa->blocknum.b == rightmost_blocknum.b) {
|
if (childa->blocknum.b == rightmost_blocknum.b) {
|
||||||
@ -1470,7 +1470,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
|
|||||||
// It is possible after reading in the entire child,
|
// It is possible after reading in the entire child,
|
||||||
// that we now know that the child is not reactive
|
// that we now know that the child is not reactive
|
||||||
// if so, we can unpin parent right now
|
// if so, we can unpin parent right now
|
||||||
// we wont be splitting/merging child
|
// we won't be splitting/merging child
|
||||||
// and we have already replaced the bnc
|
// and we have already replaced the bnc
|
||||||
// for the root with a fresh one
|
// for the root with a fresh one
|
||||||
enum reactivity child_re = toku_ftnode_get_reactivity(ft, child);
|
enum reactivity child_re = toku_ftnode_get_reactivity(ft, child);
|
||||||
|
@ -598,15 +598,12 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ftnode_clone_callback(
|
void toku_ftnode_clone_callback(void *value_data,
|
||||||
void* value_data,
|
|
||||||
void **cloned_value_data,
|
void **cloned_value_data,
|
||||||
long *clone_size,
|
long *clone_size,
|
||||||
PAIR_ATTR *new_attr,
|
PAIR_ATTR *new_attr,
|
||||||
bool for_checkpoint,
|
bool for_checkpoint,
|
||||||
void* write_extraargs
|
void *write_extraargs) {
|
||||||
)
|
|
||||||
{
|
|
||||||
FTNODE node = static_cast<FTNODE>(value_data);
|
FTNODE node = static_cast<FTNODE>(value_data);
|
||||||
toku_ftnode_assert_fully_in_memory(node);
|
toku_ftnode_assert_fully_in_memory(node);
|
||||||
FT ft = static_cast<FT>(write_extraargs);
|
FT ft = static_cast<FT>(write_extraargs);
|
||||||
@ -618,13 +615,16 @@ void toku_ftnode_clone_callback(
|
|||||||
toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize);
|
toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize);
|
||||||
}
|
}
|
||||||
|
|
||||||
cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known;
|
cloned_node->oldest_referenced_xid_known =
|
||||||
cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk;
|
node->oldest_referenced_xid_known;
|
||||||
|
cloned_node->max_msn_applied_to_node_on_disk =
|
||||||
|
node->max_msn_applied_to_node_on_disk;
|
||||||
cloned_node->flags = node->flags;
|
cloned_node->flags = node->flags;
|
||||||
cloned_node->blocknum = node->blocknum;
|
cloned_node->blocknum = node->blocknum;
|
||||||
cloned_node->layout_version = node->layout_version;
|
cloned_node->layout_version = node->layout_version;
|
||||||
cloned_node->layout_version_original = node->layout_version_original;
|
cloned_node->layout_version_original = node->layout_version_original;
|
||||||
cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk;
|
cloned_node->layout_version_read_from_disk =
|
||||||
|
node->layout_version_read_from_disk;
|
||||||
cloned_node->build_id = node->build_id;
|
cloned_node->build_id = node->build_id;
|
||||||
cloned_node->height = node->height;
|
cloned_node->height = node->height;
|
||||||
cloned_node->dirty = node->dirty;
|
cloned_node->dirty = node->dirty;
|
||||||
@ -649,16 +649,18 @@ void toku_ftnode_clone_callback(
|
|||||||
// set new pair attr if necessary
|
// set new pair attr if necessary
|
||||||
if (node->height == 0) {
|
if (node->height == 0) {
|
||||||
*new_attr = make_ftnode_pair_attr(node);
|
*new_attr = make_ftnode_pair_attr(node);
|
||||||
|
for (int i = 0; i < node->n_children; i++) {
|
||||||
|
BLB(node, i)->logical_rows_delta = 0;
|
||||||
|
BLB(cloned_node, i)->logical_rows_delta = 0;
|
||||||
}
|
}
|
||||||
else {
|
} else {
|
||||||
new_attr->is_valid = false;
|
new_attr->is_valid = false;
|
||||||
}
|
}
|
||||||
*clone_size = ftnode_memory_size(cloned_node);
|
*clone_size = ftnode_memory_size(cloned_node);
|
||||||
*cloned_value_data = cloned_node;
|
*cloned_value_data = cloned_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ftnode_flush_callback(
|
void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
|
||||||
CACHEFILE UU(cachefile),
|
|
||||||
int fd,
|
int fd,
|
||||||
BLOCKNUM blocknum,
|
BLOCKNUM blocknum,
|
||||||
void *ftnode_v,
|
void *ftnode_v,
|
||||||
@ -669,9 +671,7 @@ void toku_ftnode_flush_callback(
|
|||||||
bool write_me,
|
bool write_me,
|
||||||
bool keep_me,
|
bool keep_me,
|
||||||
bool for_checkpoint,
|
bool for_checkpoint,
|
||||||
bool is_clone
|
bool is_clone) {
|
||||||
)
|
|
||||||
{
|
|
||||||
FT ft = (FT)extraargs;
|
FT ft = (FT)extraargs;
|
||||||
FTNODE ftnode = (FTNODE)ftnode_v;
|
FTNODE ftnode = (FTNODE)ftnode_v;
|
||||||
FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data;
|
FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data;
|
||||||
@ -680,7 +680,8 @@ void toku_ftnode_flush_callback(
|
|||||||
if (write_me) {
|
if (write_me) {
|
||||||
toku_ftnode_assert_fully_in_memory(ftnode);
|
toku_ftnode_assert_fully_in_memory(ftnode);
|
||||||
if (height > 0 && !is_clone) {
|
if (height > 0 && !is_clone) {
|
||||||
// cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback()
|
// cloned nodes already had their stale messages moved, see
|
||||||
|
// toku_ftnode_clone_callback()
|
||||||
toku_move_ftnode_messages_to_stale(ft, ftnode);
|
toku_move_ftnode_messages_to_stale(ft, ftnode);
|
||||||
} else if (height == 0) {
|
} else if (height == 0) {
|
||||||
toku_ftnode_leaf_run_gc(ft, ftnode);
|
toku_ftnode_leaf_run_gc(ft, ftnode);
|
||||||
@ -688,7 +689,8 @@ void toku_ftnode_flush_callback(
|
|||||||
toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint);
|
toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
|
int r = toku_serialize_ftnode_to(
|
||||||
|
fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION;
|
ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION;
|
||||||
}
|
}
|
||||||
@ -703,20 +705,22 @@ void toku_ftnode_flush_callback(
|
|||||||
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
|
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
|
||||||
}
|
}
|
||||||
toku_free(*disk_data);
|
toku_free(*disk_data);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
if (ftnode->height == 0) {
|
if (ftnode->height == 0) {
|
||||||
for (int i = 0; i < ftnode->n_children; i++) {
|
for (int i = 0; i < ftnode->n_children; i++) {
|
||||||
if (BP_STATE(ftnode, i) == PT_AVAIL) {
|
if (BP_STATE(ftnode, i) == PT_AVAIL) {
|
||||||
BASEMENTNODE bn = BLB(ftnode, i);
|
BASEMENTNODE bn = BLB(ftnode, i);
|
||||||
toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
|
toku_ft_decrease_stats(&ft->in_memory_stats,
|
||||||
|
bn->stat64_delta);
|
||||||
|
if (!ftnode->dirty)
|
||||||
|
toku_ft_adjust_logical_row_count(
|
||||||
|
ft, -bn->logical_rows_delta);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
toku_ftnode_free(&ftnode);
|
toku_ftnode_free(&ftnode);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
*new_size = make_ftnode_pair_attr(ftnode);
|
*new_size = make_ftnode_pair_attr(ftnode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -845,8 +849,11 @@ static void compress_internal_node_partition(FTNODE node, int i, enum toku_compr
|
|||||||
}
|
}
|
||||||
|
|
||||||
// callback for partially evicting a node
|
// callback for partially evicting a node
|
||||||
int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
|
int toku_ftnode_pe_callback(void *ftnode_pv,
|
||||||
void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) {
|
PAIR_ATTR old_attr,
|
||||||
|
void *write_extraargs,
|
||||||
|
void (*finalize)(PAIR_ATTR new_attr, void *extra),
|
||||||
|
void *finalize_extra) {
|
||||||
FTNODE node = (FTNODE)ftnode_pv;
|
FTNODE node = (FTNODE)ftnode_pv;
|
||||||
FT ft = (FT)write_extraargs;
|
FT ft = (FT)write_extraargs;
|
||||||
int num_partial_evictions = 0;
|
int num_partial_evictions = 0;
|
||||||
@ -866,7 +873,8 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
|
|||||||
}
|
}
|
||||||
// Don't partially evict nodes whose partitions can't be read back
|
// Don't partially evict nodes whose partitions can't be read back
|
||||||
// from disk individually
|
// from disk individually
|
||||||
if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
|
if (node->layout_version_read_from_disk <
|
||||||
|
FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
//
|
//
|
||||||
@ -878,46 +886,47 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
|
|||||||
if (BP_SHOULD_EVICT(node, i)) {
|
if (BP_SHOULD_EVICT(node, i)) {
|
||||||
NONLEAF_CHILDINFO bnc = BNC(node, i);
|
NONLEAF_CHILDINFO bnc = BNC(node, i);
|
||||||
if (ft_compress_buffers_before_eviction &&
|
if (ft_compress_buffers_before_eviction &&
|
||||||
// We may not serialize and compress a partition in memory if its
|
// We may not serialize and compress a partition in
|
||||||
// in memory layout version is different than what's on disk (and
|
// memory if its in memory layout version is different
|
||||||
// therefore requires upgrade).
|
// than what's on disk (and therefore requires upgrade).
|
||||||
//
|
//
|
||||||
// Auto-upgrade code assumes that if a node's layout version read
|
// Auto-upgrade code assumes that if a node's layout
|
||||||
// from disk is not current, it MUST require upgrade. Breaking
|
// version read from disk is not current, it MUST
|
||||||
// this rule would cause upgrade code to upgrade this partition
|
// require upgrade.
|
||||||
// again after we serialize it as the current version, which is bad.
|
// Breaking this rule would cause upgrade code to
|
||||||
node->layout_version == node->layout_version_read_from_disk) {
|
// upgrade this partition again after we serialize it as
|
||||||
|
// the current version, which is bad.
|
||||||
|
node->layout_version ==
|
||||||
|
node->layout_version_read_from_disk) {
|
||||||
toku_ft_bnc_move_messages_to_stale(ft, bnc);
|
toku_ft_bnc_move_messages_to_stale(ft, bnc);
|
||||||
compress_internal_node_partition(
|
compress_internal_node_partition(
|
||||||
node,
|
node,
|
||||||
i,
|
i,
|
||||||
// Always compress with quicklz
|
// Always compress with quicklz
|
||||||
TOKU_QUICKLZ_METHOD
|
TOKU_QUICKLZ_METHOD);
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
// We're not compressing buffers before eviction. Simply
|
// We're not compressing buffers before eviction. Simply
|
||||||
// detach the buffer and set the child's state to on-disk.
|
// detach the buffer and set the child's state to
|
||||||
|
// on-disk.
|
||||||
set_BNULL(node, i);
|
set_BNULL(node, i);
|
||||||
BP_STATE(node, i) = PT_ON_DISK;
|
BP_STATE(node, i) = PT_ON_DISK;
|
||||||
}
|
}
|
||||||
buffers_to_destroy[num_buffers_to_destroy++] = bnc;
|
buffers_to_destroy[num_buffers_to_destroy++] = bnc;
|
||||||
num_partial_evictions++;
|
num_partial_evictions++;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
BP_SWEEP_CLOCK(node, i);
|
BP_SWEEP_CLOCK(node, i);
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
//
|
//
|
||||||
// partial eviction strategy for basement nodes:
|
// partial eviction strategy for basement nodes:
|
||||||
// if the bn is compressed, evict it
|
// if the bn is compressed, evict it
|
||||||
// else: check if it requires eviction, if it does, evict it, if not, sweep the clock count
|
// else: check if it requires eviction, if it does, evict it, if not,
|
||||||
|
// sweep the clock count
|
||||||
//
|
//
|
||||||
else {
|
|
||||||
for (int i = 0; i < node->n_children; i++) {
|
for (int i = 0; i < node->n_children; i++) {
|
||||||
// Get rid of compressed stuff no matter what.
|
// Get rid of compressed stuff no matter what.
|
||||||
if (BP_STATE(node, i) == PT_COMPRESSED) {
|
if (BP_STATE(node, i) == PT_COMPRESSED) {
|
||||||
@ -927,24 +936,23 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
|
|||||||
set_BNULL(node, i);
|
set_BNULL(node, i);
|
||||||
BP_STATE(node, i) = PT_ON_DISK;
|
BP_STATE(node, i) = PT_ON_DISK;
|
||||||
num_partial_evictions++;
|
num_partial_evictions++;
|
||||||
}
|
} else if (BP_STATE(node, i) == PT_AVAIL) {
|
||||||
else if (BP_STATE(node,i) == PT_AVAIL) {
|
|
||||||
if (BP_SHOULD_EVICT(node, i)) {
|
if (BP_SHOULD_EVICT(node, i)) {
|
||||||
BASEMENTNODE bn = BLB(node, i);
|
BASEMENTNODE bn = BLB(node, i);
|
||||||
basements_to_destroy[num_basements_to_destroy++] = bn;
|
basements_to_destroy[num_basements_to_destroy++] = bn;
|
||||||
toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
|
toku_ft_decrease_stats(&ft->in_memory_stats,
|
||||||
|
bn->stat64_delta);
|
||||||
|
toku_ft_adjust_logical_row_count(ft,
|
||||||
|
-bn->logical_rows_delta);
|
||||||
set_BNULL(node, i);
|
set_BNULL(node, i);
|
||||||
BP_STATE(node, i) = PT_ON_DISK;
|
BP_STATE(node, i) = PT_ON_DISK;
|
||||||
num_partial_evictions++;
|
num_partial_evictions++;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
BP_SWEEP_CLOCK(node, i);
|
BP_SWEEP_CLOCK(node, i);
|
||||||
}
|
}
|
||||||
}
|
} else if (BP_STATE(node, i) == PT_ON_DISK) {
|
||||||
else if (BP_STATE(node,i) == PT_ON_DISK) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2378,8 +2386,12 @@ ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) {
|
|||||||
toku_ft_root_put_msg(ft_h->ft, msg, &gc_info);
|
toku_ft_root_put_msg(ft_h->ft, msg, &gc_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra,
|
void toku_ft_maybe_update(FT_HANDLE ft_h,
|
||||||
TOKUTXN txn, bool oplsn_valid, LSN oplsn,
|
const DBT *key,
|
||||||
|
const DBT *update_function_extra,
|
||||||
|
TOKUTXN txn,
|
||||||
|
bool oplsn_valid,
|
||||||
|
LSN oplsn,
|
||||||
bool do_logging) {
|
bool do_logging) {
|
||||||
TXNID_PAIR xid = toku_txn_get_txnid(txn);
|
TXNID_PAIR xid = toku_txn_get_txnid(txn);
|
||||||
if (txn) {
|
if (txn) {
|
||||||
@ -2395,19 +2407,30 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func
|
|||||||
BYTESTRING keybs = {.len = key->size, .data = (char *)key->data};
|
BYTESTRING keybs = {.len = key->size, .data = (char *)key->data};
|
||||||
BYTESTRING extrabs = {.len = update_function_extra->size,
|
BYTESTRING extrabs = {.len = update_function_extra->size,
|
||||||
.data = (char *)update_function_extra->data};
|
.data = (char *)update_function_extra->data};
|
||||||
toku_log_enq_update(logger, NULL, 0, txn,
|
toku_log_enq_update(logger,
|
||||||
|
NULL,
|
||||||
|
0,
|
||||||
|
txn,
|
||||||
toku_cachefile_filenum(ft_h->ft->cf),
|
toku_cachefile_filenum(ft_h->ft->cf),
|
||||||
xid, keybs, extrabs);
|
xid,
|
||||||
|
keybs,
|
||||||
|
extrabs);
|
||||||
}
|
}
|
||||||
|
|
||||||
LSN treelsn;
|
LSN treelsn;
|
||||||
if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
|
if (oplsn_valid &&
|
||||||
|
oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
|
||||||
// do nothing
|
// do nothing
|
||||||
} else {
|
} else {
|
||||||
XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
|
XIDS message_xids =
|
||||||
ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
|
txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
|
||||||
|
ft_msg msg(
|
||||||
|
key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
|
||||||
ft_send_update_msg(ft_h, msg, txn);
|
ft_send_update_msg(ft_h, msg, txn);
|
||||||
}
|
}
|
||||||
|
// updates get converted to insert messages, which should do a -1 on the
|
||||||
|
// logical row count when the messages are permanently applied
|
||||||
|
toku_ft_adjust_logical_row_count(ft_h->ft, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra,
|
void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra,
|
||||||
|
@ -73,30 +73,20 @@ static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) {
|
|||||||
return rre->_cancelled =
|
return rre->_cancelled =
|
||||||
rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
|
rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
|
||||||
}
|
}
|
||||||
int toku_ft_recount_rows(
|
int toku_ft_recount_rows(FT_HANDLE ft,
|
||||||
FT_HANDLE ft,
|
int (*progress_callback)(uint64_t count,
|
||||||
int (*progress_callback)(
|
|
||||||
uint64_t count,
|
|
||||||
uint64_t deleted,
|
uint64_t deleted,
|
||||||
void* progress_extra),
|
void* progress_extra),
|
||||||
void* progress_extra) {
|
void* progress_extra) {
|
||||||
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
recount_rows_extra_t rre = {
|
recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false};
|
||||||
progress_callback,
|
|
||||||
progress_extra,
|
|
||||||
0,
|
|
||||||
false
|
|
||||||
};
|
|
||||||
|
|
||||||
ft_cursor c;
|
ft_cursor c;
|
||||||
ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
|
ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
|
||||||
if (ret) return ret;
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
toku_ft_cursor_set_check_interrupt_cb(
|
toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre);
|
||||||
&c,
|
|
||||||
recount_rows_interrupt,
|
|
||||||
&rre);
|
|
||||||
|
|
||||||
ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
|
ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
|
||||||
while (FT_LIKELY(ret == 0)) {
|
while (FT_LIKELY(ret == 0)) {
|
||||||
@ -108,6 +98,7 @@ int toku_ft_recount_rows(
|
|||||||
if (rre._cancelled == false) {
|
if (rre._cancelled == false) {
|
||||||
// update ft count
|
// update ft count
|
||||||
toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
|
toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
|
||||||
|
ft->ft->h->dirty = 1;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -903,6 +903,9 @@ void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) {
|
|||||||
// must be returned in toku_ft_stat64.
|
// must be returned in toku_ft_stat64.
|
||||||
if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) {
|
if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) {
|
||||||
toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta);
|
toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta);
|
||||||
|
if (ft->in_memory_logical_rows == (uint64_t)-1) {
|
||||||
|
toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -301,7 +301,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
|
|||||||
|
|
||||||
void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
|
void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
|
||||||
|
|
||||||
// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.)
|
// For test purposes only. (In production, the rowset size is determined by negotiation with the cachetable for some memory. See #2613.)
|
||||||
uint64_t toku_ft_loader_get_rowset_budget_for_testing (void);
|
uint64_t toku_ft_loader_get_rowset_budget_for_testing (void);
|
||||||
|
|
||||||
int toku_ft_loader_finish_extractor(FTLOADER bl);
|
int toku_ft_loader_finish_extractor(FTLOADER bl);
|
||||||
|
@ -91,7 +91,7 @@ toku_ft_loader_set_size_factor(uint32_t factor) {
|
|||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
toku_ft_loader_get_rowset_budget_for_testing (void)
|
toku_ft_loader_get_rowset_budget_for_testing (void)
|
||||||
// For test purposes only. In production, the rowset size is determined by negotation with the cachetable for some memory. (See #2613).
|
// For test purposes only. In production, the rowset size is determined by negotiation with the cachetable for some memory. (See #2613).
|
||||||
{
|
{
|
||||||
return 16ULL*size_factor*1024ULL;
|
return 16ULL*size_factor*1024ULL;
|
||||||
}
|
}
|
||||||
|
@ -373,22 +373,20 @@ find_bounds_within_message_tree(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// For each message in the ancestor's buffer (determined by childnum) that
|
||||||
* For each message in the ancestor's buffer (determined by childnum) that
|
// is key-wise between lower_bound_exclusive and upper_bound_inclusive,
|
||||||
* is key-wise between lower_bound_exclusive and upper_bound_inclusive,
|
// apply the message to the basement node. We treat the bounds as minus
|
||||||
* apply the message to the basement node. We treat the bounds as minus
|
// or plus infinity respectively if they are NULL. Do not mark the node
|
||||||
* or plus infinity respectively if they are NULL. Do not mark the node
|
// as dirty (preserve previous state of 'dirty' bit).
|
||||||
* as dirty (preserve previous state of 'dirty' bit).
|
|
||||||
*/
|
|
||||||
static void bnc_apply_messages_to_basement_node(
|
static void bnc_apply_messages_to_basement_node(
|
||||||
FT_HANDLE t, // used for comparison function
|
FT_HANDLE t, // used for comparison function
|
||||||
BASEMENTNODE bn, // where to apply messages
|
BASEMENTNODE bn, // where to apply messages
|
||||||
FTNODE ancestor, // the ancestor node where we can find messages to apply
|
FTNODE ancestor, // the ancestor node where we can find messages to apply
|
||||||
int childnum, // which child buffer of ancestor contains messages we want
|
int childnum, // which child buffer of ancestor contains messages we want
|
||||||
const pivot_bounds &bounds, // contains pivot key bounds of this basement node
|
const pivot_bounds &
|
||||||
|
bounds, // contains pivot key bounds of this basement node
|
||||||
txn_gc_info *gc_info,
|
txn_gc_info *gc_info,
|
||||||
bool *msgs_applied) {
|
bool *msgs_applied) {
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
|
NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
|
||||||
|
|
||||||
@ -400,8 +398,7 @@ static void bnc_apply_messages_to_basement_node(
|
|||||||
|
|
||||||
uint32_t stale_lbi, stale_ube;
|
uint32_t stale_lbi, stale_ube;
|
||||||
if (!bn->stale_ancestor_messages_applied) {
|
if (!bn->stale_ancestor_messages_applied) {
|
||||||
find_bounds_within_message_tree(
|
find_bounds_within_message_tree(t->ft->cmp,
|
||||||
t->ft->cmp,
|
|
||||||
bnc->stale_message_tree,
|
bnc->stale_message_tree,
|
||||||
&bnc->msg_buffer,
|
&bnc->msg_buffer,
|
||||||
bounds,
|
bounds,
|
||||||
@ -412,8 +409,7 @@ static void bnc_apply_messages_to_basement_node(
|
|||||||
stale_ube = 0;
|
stale_ube = 0;
|
||||||
}
|
}
|
||||||
uint32_t fresh_lbi, fresh_ube;
|
uint32_t fresh_lbi, fresh_ube;
|
||||||
find_bounds_within_message_tree(
|
find_bounds_within_message_tree(t->ft->cmp,
|
||||||
t->ft->cmp,
|
|
||||||
bnc->fresh_message_tree,
|
bnc->fresh_message_tree,
|
||||||
&bnc->msg_buffer,
|
&bnc->msg_buffer,
|
||||||
bounds,
|
bounds,
|
||||||
@ -432,34 +428,42 @@ static void bnc_apply_messages_to_basement_node(
|
|||||||
// We have messages in multiple trees, so we grab all
|
// We have messages in multiple trees, so we grab all
|
||||||
// the relevant messages' offsets and sort them by MSN, then apply
|
// the relevant messages' offsets and sort them by MSN, then apply
|
||||||
// them in MSN order.
|
// them in MSN order.
|
||||||
const int buffer_size = ((stale_ube - stale_lbi) +
|
const int buffer_size =
|
||||||
(fresh_ube - fresh_lbi) +
|
((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) +
|
||||||
bnc->broadcast_list.size());
|
bnc->broadcast_list.size());
|
||||||
toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
|
toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
|
||||||
int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
|
int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
|
||||||
struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
|
struct store_msg_buffer_offset_extra sfo_extra = {.offsets = offsets,
|
||||||
|
.i = 0};
|
||||||
|
|
||||||
// Populate offsets array with offsets to stale messages
|
// Populate offsets array with offsets to stale messages
|
||||||
r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
|
r = bnc->stale_message_tree
|
||||||
|
.iterate_on_range<struct store_msg_buffer_offset_extra,
|
||||||
|
store_msg_buffer_offset>(
|
||||||
|
stale_lbi, stale_ube, &sfo_extra);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
|
|
||||||
// Then store fresh offsets, and mark them to be moved to stale later.
|
// Then store fresh offsets, and mark them to be moved to stale later.
|
||||||
r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
|
r = bnc->fresh_message_tree
|
||||||
|
.iterate_and_mark_range<struct store_msg_buffer_offset_extra,
|
||||||
|
store_msg_buffer_offset>(
|
||||||
|
fresh_lbi, fresh_ube, &sfo_extra);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
|
|
||||||
// Store offsets of all broadcast messages.
|
// Store offsets of all broadcast messages.
|
||||||
r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
|
r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra,
|
||||||
|
store_msg_buffer_offset>(&sfo_extra);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
invariant(sfo_extra.i == buffer_size);
|
invariant(sfo_extra.i == buffer_size);
|
||||||
|
|
||||||
// Sort by MSN.
|
// Sort by MSN.
|
||||||
toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
|
toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::
|
||||||
|
mergesort_r(offsets, buffer_size, bnc->msg_buffer);
|
||||||
|
|
||||||
// Apply the messages in MSN order.
|
// Apply the messages in MSN order.
|
||||||
for (int i = 0; i < buffer_size; ++i) {
|
for (int i = 0; i < buffer_size; ++i) {
|
||||||
*msgs_applied = true;
|
*msgs_applied = true;
|
||||||
do_bn_apply_msg(
|
do_bn_apply_msg(t,
|
||||||
t,
|
|
||||||
bn,
|
bn,
|
||||||
&bnc->msg_buffer,
|
&bnc->msg_buffer,
|
||||||
offsets[i],
|
offsets[i],
|
||||||
@ -469,7 +473,8 @@ static void bnc_apply_messages_to_basement_node(
|
|||||||
&logical_rows_delta);
|
&logical_rows_delta);
|
||||||
}
|
}
|
||||||
} else if (stale_lbi == stale_ube) {
|
} else if (stale_lbi == stale_ube) {
|
||||||
// No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
|
// No stale messages to apply, we just apply fresh messages, and mark
|
||||||
|
// them to be moved to stale later.
|
||||||
struct iterate_do_bn_apply_msg_extra iter_extra = {
|
struct iterate_do_bn_apply_msg_extra iter_extra = {
|
||||||
.t = t,
|
.t = t,
|
||||||
.bn = bn,
|
.bn = bn,
|
||||||
@ -477,16 +482,20 @@ static void bnc_apply_messages_to_basement_node(
|
|||||||
.gc_info = gc_info,
|
.gc_info = gc_info,
|
||||||
.workdone = &workdone_this_ancestor,
|
.workdone = &workdone_this_ancestor,
|
||||||
.stats_to_update = &stats_delta,
|
.stats_to_update = &stats_delta,
|
||||||
.logical_rows_delta = &logical_rows_delta
|
.logical_rows_delta = &logical_rows_delta};
|
||||||
};
|
if (fresh_ube - fresh_lbi > 0)
|
||||||
if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
|
*msgs_applied = true;
|
||||||
r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
|
r = bnc->fresh_message_tree
|
||||||
|
.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra,
|
||||||
|
iterate_do_bn_apply_msg>(
|
||||||
|
fresh_lbi, fresh_ube, &iter_extra);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
} else {
|
} else {
|
||||||
invariant(fresh_lbi == fresh_ube);
|
invariant(fresh_lbi == fresh_ube);
|
||||||
// No fresh messages to apply, we just apply stale messages.
|
// No fresh messages to apply, we just apply stale messages.
|
||||||
|
|
||||||
if (stale_ube - stale_lbi > 0) *msgs_applied = true;
|
if (stale_ube - stale_lbi > 0)
|
||||||
|
*msgs_applied = true;
|
||||||
struct iterate_do_bn_apply_msg_extra iter_extra = {
|
struct iterate_do_bn_apply_msg_extra iter_extra = {
|
||||||
.t = t,
|
.t = t,
|
||||||
.bn = bn,
|
.bn = bn,
|
||||||
@ -494,22 +503,26 @@ static void bnc_apply_messages_to_basement_node(
|
|||||||
.gc_info = gc_info,
|
.gc_info = gc_info,
|
||||||
.workdone = &workdone_this_ancestor,
|
.workdone = &workdone_this_ancestor,
|
||||||
.stats_to_update = &stats_delta,
|
.stats_to_update = &stats_delta,
|
||||||
.logical_rows_delta = &logical_rows_delta
|
.logical_rows_delta = &logical_rows_delta};
|
||||||
};
|
|
||||||
|
|
||||||
r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
|
r = bnc->stale_message_tree
|
||||||
|
.iterate_on_range<struct iterate_do_bn_apply_msg_extra,
|
||||||
|
iterate_do_bn_apply_msg>(
|
||||||
|
stale_lbi, stale_ube, &iter_extra);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
}
|
}
|
||||||
//
|
//
|
||||||
// update stats
|
// update stats
|
||||||
//
|
//
|
||||||
if (workdone_this_ancestor > 0) {
|
if (workdone_this_ancestor > 0) {
|
||||||
(void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor);
|
(void)toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum),
|
||||||
|
workdone_this_ancestor);
|
||||||
}
|
}
|
||||||
if (stats_delta.numbytes || stats_delta.numrows) {
|
if (stats_delta.numbytes || stats_delta.numrows) {
|
||||||
toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
|
toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
|
||||||
}
|
}
|
||||||
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
|
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
|
||||||
|
bn->logical_rows_delta += logical_rows_delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -199,6 +199,7 @@ struct ftnode_leaf_basement_node {
|
|||||||
MSN max_msn_applied; // max message sequence number applied
|
MSN max_msn_applied; // max message sequence number applied
|
||||||
bool stale_ancestor_messages_applied;
|
bool stale_ancestor_messages_applied;
|
||||||
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
|
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
|
||||||
|
int64_t logical_rows_delta;
|
||||||
};
|
};
|
||||||
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
|
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
|
||||||
|
|
||||||
|
@ -46,253 +46,123 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
#include "portability/toku_stdlib.h"
|
#include "portability/toku_stdlib.h"
|
||||||
|
|
||||||
#include "ft/serialize/block_allocator.h"
|
#include "ft/serialize/block_allocator.h"
|
||||||
#include "ft/serialize/block_allocator_strategy.h"
|
#include "ft/serialize/rbtree_mhs.h"
|
||||||
|
|
||||||
#if TOKU_DEBUG_PARANOID
|
#if TOKU_DEBUG_PARANOID
|
||||||
#define VALIDATE() validate()
|
#define VALIDATE() Validate()
|
||||||
#else
|
#else
|
||||||
#define VALIDATE()
|
#define VALIDATE()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static FILE *ba_trace_file = nullptr;
|
void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning,
|
||||||
|
uint64_t alignment) {
|
||||||
void block_allocator::maybe_initialize_trace(void) {
|
// the alignment must be at least 512 and aligned with 512 to work with
|
||||||
const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH");
|
// direct I/O
|
||||||
if (ba_trace_path != nullptr) {
|
invariant(alignment >= 512 && (alignment % 512) == 0);
|
||||||
ba_trace_file = toku_os_fopen(ba_trace_path, "w");
|
|
||||||
if (ba_trace_file == nullptr) {
|
|
||||||
fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), "
|
|
||||||
"but it could not be opened for writing (errno %d)\n",
|
|
||||||
ba_trace_path, get_maybe_error_errno());
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::maybe_close_trace() {
|
|
||||||
if (ba_trace_file != nullptr) {
|
|
||||||
int r = toku_os_fclose(ba_trace_file);
|
|
||||||
if (r != 0) {
|
|
||||||
fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n",
|
|
||||||
r, get_maybe_error_errno());
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) {
|
|
||||||
// the alignment must be at least 512 and aligned with 512 to work with direct I/O
|
|
||||||
assert(alignment >= 512 && (alignment % 512) == 0);
|
|
||||||
|
|
||||||
_reserve_at_beginning = reserve_at_beginning;
|
_reserve_at_beginning = reserve_at_beginning;
|
||||||
_alignment = alignment;
|
_alignment = alignment;
|
||||||
_n_blocks = 0;
|
_n_blocks = 0;
|
||||||
_blocks_array_size = 1;
|
|
||||||
XMALLOC_N(_blocks_array_size, _blocks_array);
|
|
||||||
_n_bytes_in_use = reserve_at_beginning;
|
_n_bytes_in_use = reserve_at_beginning;
|
||||||
_strategy = BA_STRATEGY_FIRST_FIT;
|
_tree = new MhsRbTree::Tree(alignment);
|
||||||
|
}
|
||||||
memset(&_trace_lock, 0, sizeof(toku_mutex_t));
|
|
||||||
toku_mutex_init(&_trace_lock, nullptr);
|
|
||||||
|
|
||||||
|
void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) {
|
||||||
|
CreateInternal(reserve_at_beginning, alignment);
|
||||||
|
_tree->Insert({reserve_at_beginning, MAX_BYTE});
|
||||||
VALIDATE();
|
VALIDATE();
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
|
void BlockAllocator::Destroy() {
|
||||||
_create_internal(reserve_at_beginning, alignment);
|
delete _tree;
|
||||||
_trace_create();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_allocator::destroy() {
|
void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning,
|
||||||
toku_free(_blocks_array);
|
uint64_t alignment,
|
||||||
_trace_destroy();
|
struct BlockPair *translation_pairs,
|
||||||
toku_mutex_destroy(&_trace_lock);
|
uint64_t n_blocks) {
|
||||||
}
|
CreateInternal(reserve_at_beginning, alignment);
|
||||||
|
|
||||||
void block_allocator::set_strategy(enum allocation_strategy strategy) {
|
|
||||||
_strategy = strategy;
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::grow_blocks_array_by(uint64_t n_to_add) {
|
|
||||||
if (_n_blocks + n_to_add > _blocks_array_size) {
|
|
||||||
uint64_t new_size = _n_blocks + n_to_add;
|
|
||||||
uint64_t at_least = _blocks_array_size * 2;
|
|
||||||
if (at_least > new_size) {
|
|
||||||
new_size = at_least;
|
|
||||||
}
|
|
||||||
_blocks_array_size = new_size;
|
|
||||||
XREALLOC_N(_blocks_array_size, _blocks_array);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::grow_blocks_array() {
|
|
||||||
grow_blocks_array_by(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
|
|
||||||
struct blockpair *pairs, uint64_t n_blocks) {
|
|
||||||
_create_internal(reserve_at_beginning, alignment);
|
|
||||||
|
|
||||||
_n_blocks = n_blocks;
|
_n_blocks = n_blocks;
|
||||||
grow_blocks_array_by(_n_blocks);
|
|
||||||
memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
|
|
||||||
std::sort(_blocks_array, _blocks_array + _n_blocks);
|
|
||||||
for (uint64_t i = 0; i < _n_blocks; i++) {
|
|
||||||
// Allocator does not support size 0 blocks. See block_allocator_free_block.
|
|
||||||
invariant(_blocks_array[i].size > 0);
|
|
||||||
invariant(_blocks_array[i].offset >= _reserve_at_beginning);
|
|
||||||
invariant(_blocks_array[i].offset % _alignment == 0);
|
|
||||||
|
|
||||||
_n_bytes_in_use += _blocks_array[i].size;
|
struct BlockPair *XMALLOC_N(n_blocks, pairs);
|
||||||
|
memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair));
|
||||||
|
std::sort(pairs, pairs + n_blocks);
|
||||||
|
|
||||||
|
if (pairs[0]._offset > reserve_at_beginning) {
|
||||||
|
_tree->Insert(
|
||||||
|
{reserve_at_beginning, pairs[0]._offset - reserve_at_beginning});
|
||||||
}
|
}
|
||||||
|
for (uint64_t i = 0; i < _n_blocks; i++) {
|
||||||
|
// Allocator does not support size 0 blocks. See
|
||||||
|
// block_allocator_free_block.
|
||||||
|
invariant(pairs[i]._size > 0);
|
||||||
|
invariant(pairs[i]._offset >= _reserve_at_beginning);
|
||||||
|
invariant(pairs[i]._offset % _alignment == 0);
|
||||||
|
|
||||||
|
_n_bytes_in_use += pairs[i]._size;
|
||||||
|
|
||||||
|
MhsRbTree::OUUInt64 free_size(MAX_BYTE);
|
||||||
|
MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size);
|
||||||
|
if (i < n_blocks - 1) {
|
||||||
|
MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset);
|
||||||
|
invariant(next_offset >= free_offset);
|
||||||
|
free_size = next_offset - free_offset;
|
||||||
|
if (free_size == 0)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
_tree->Insert({free_offset, free_size});
|
||||||
|
}
|
||||||
|
toku_free(pairs);
|
||||||
VALIDATE();
|
VALIDATE();
|
||||||
|
|
||||||
_trace_create_from_blockpairs();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Effect: align a value by rounding up.
|
// Effect: align a value by rounding up.
|
||||||
static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
|
static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) {
|
||||||
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
|
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct block_allocator::blockpair *
|
// Effect: Allocate a block. The resulting block must be aligned on the
|
||||||
block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) {
|
// ba->alignment (which to make direct_io happy must be a positive multiple of
|
||||||
switch (_strategy) {
|
// 512).
|
||||||
case BA_STRATEGY_FIRST_FIT:
|
void BlockAllocator::AllocBlock(uint64_t size,
|
||||||
return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
|
uint64_t *offset) {
|
||||||
case BA_STRATEGY_BEST_FIT:
|
|
||||||
return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
|
|
||||||
case BA_STRATEGY_HEAT_ZONE:
|
|
||||||
return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat);
|
|
||||||
case BA_STRATEGY_PADDED_FIT:
|
|
||||||
return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment);
|
|
||||||
default:
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
|
|
||||||
void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) {
|
|
||||||
struct blockpair *bp;
|
|
||||||
|
|
||||||
// Allocator does not support size 0 blocks. See block_allocator_free_block.
|
// Allocator does not support size 0 blocks. See block_allocator_free_block.
|
||||||
invariant(size > 0);
|
invariant(size > 0);
|
||||||
|
|
||||||
grow_blocks_array();
|
|
||||||
_n_bytes_in_use += size;
|
_n_bytes_in_use += size;
|
||||||
|
*offset = _tree->Remove(size);
|
||||||
|
|
||||||
uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
|
|
||||||
|
|
||||||
if (_n_blocks == 0) {
|
|
||||||
// First and only block
|
|
||||||
assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use
|
|
||||||
_blocks_array[0].offset = align(_reserve_at_beginning, _alignment);
|
|
||||||
_blocks_array[0].size = size;
|
|
||||||
*offset = _blocks_array[0].offset;
|
|
||||||
goto done;
|
|
||||||
} else if (end_of_reserve + size <= _blocks_array[0].offset ) {
|
|
||||||
// Check to see if the space immediately after the reserve is big enough to hold the new block.
|
|
||||||
bp = &_blocks_array[0];
|
|
||||||
memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
|
|
||||||
bp[0].offset = end_of_reserve;
|
|
||||||
bp[0].size = size;
|
|
||||||
*offset = end_of_reserve;
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
bp = choose_block_to_alloc_after(size, heat);
|
|
||||||
if (bp != nullptr) {
|
|
||||||
// our allocation strategy chose the space after `bp' to fit the new block
|
|
||||||
uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
|
|
||||||
uint64_t blocknum = bp - _blocks_array;
|
|
||||||
invariant(&_blocks_array[blocknum] == bp);
|
|
||||||
invariant(blocknum < _n_blocks);
|
|
||||||
memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
|
|
||||||
bp[1].offset = answer_offset;
|
|
||||||
bp[1].size = size;
|
|
||||||
*offset = answer_offset;
|
|
||||||
} else {
|
|
||||||
// It didn't fit anywhere, so fit it on the end.
|
|
||||||
assert(_n_blocks < _blocks_array_size);
|
|
||||||
bp = &_blocks_array[_n_blocks];
|
|
||||||
uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment);
|
|
||||||
bp->offset = answer_offset;
|
|
||||||
bp->size = size;
|
|
||||||
*offset = answer_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
done:
|
|
||||||
_n_blocks++;
|
_n_blocks++;
|
||||||
VALIDATE();
|
VALIDATE();
|
||||||
|
|
||||||
_trace_alloc(size, heat, *offset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find the index in the blocks array that has a particular offset. Requires that the block exist.
|
// To support 0-sized blocks, we need to include size as an input to this
|
||||||
// Use binary search so it runs fast.
|
// function.
|
||||||
int64_t block_allocator::find_block(uint64_t offset) {
|
|
||||||
VALIDATE();
|
|
||||||
if (_n_blocks == 1) {
|
|
||||||
assert(_blocks_array[0].offset == offset);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t lo = 0;
|
|
||||||
uint64_t hi = _n_blocks;
|
|
||||||
while (1) {
|
|
||||||
assert(lo < hi); // otherwise no such block exists.
|
|
||||||
uint64_t mid = (lo + hi) / 2;
|
|
||||||
uint64_t thisoff = _blocks_array[mid].offset;
|
|
||||||
if (thisoff < offset) {
|
|
||||||
lo = mid + 1;
|
|
||||||
} else if (thisoff > offset) {
|
|
||||||
hi = mid;
|
|
||||||
} else {
|
|
||||||
return mid;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// To support 0-sized blocks, we need to include size as an input to this function.
|
|
||||||
// All 0-sized blocks at the same offset can be considered identical, but
|
// All 0-sized blocks at the same offset can be considered identical, but
|
||||||
// a 0-sized block can share offset with a non-zero sized block.
|
// a 0-sized block can share offset with a non-zero sized block.
|
||||||
// The non-zero sized block is not exchangable with a zero sized block (or vice versa),
|
// The non-zero sized block is not exchangable with a zero sized block (or vice
|
||||||
// so inserting 0-sized blocks can cause corruption here.
|
// versa), so inserting 0-sized blocks can cause corruption here.
|
||||||
void block_allocator::free_block(uint64_t offset) {
|
void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) {
|
||||||
VALIDATE();
|
VALIDATE();
|
||||||
int64_t bn = find_block(offset);
|
_n_bytes_in_use -= size;
|
||||||
assert(bn >= 0); // we require that there is a block with that offset.
|
_tree->Insert({offset, size});
|
||||||
_n_bytes_in_use -= _blocks_array[bn].size;
|
|
||||||
memmove(&_blocks_array[bn], &_blocks_array[bn + 1],
|
|
||||||
(_n_blocks - bn - 1) * sizeof(struct blockpair));
|
|
||||||
_n_blocks--;
|
_n_blocks--;
|
||||||
VALIDATE();
|
VALIDATE();
|
||||||
|
|
||||||
_trace_free(offset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t block_allocator::block_size(uint64_t offset) {
|
uint64_t BlockAllocator::AllocatedLimit() const {
|
||||||
int64_t bn = find_block(offset);
|
MhsRbTree::Node *max_node = _tree->MaxNode();
|
||||||
assert(bn >=0); // we require that there is a block with that offset.
|
return rbn_offset(max_node).ToInt();
|
||||||
return _blocks_array[bn].size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t block_allocator::allocated_limit() const {
|
// Effect: Consider the blocks in sorted order. The reserved block at the
|
||||||
if (_n_blocks == 0) {
|
// beginning is number 0. The next one is number 1 and so forth.
|
||||||
return _reserve_at_beginning;
|
|
||||||
} else {
|
|
||||||
struct blockpair *last = &_blocks_array[_n_blocks - 1];
|
|
||||||
return last->offset + last->size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
|
|
||||||
// Return the offset and size of the block with that number.
|
// Return the offset and size of the block with that number.
|
||||||
// Return 0 if there is a block that big, return nonzero if b is too big.
|
// Return 0 if there is a block that big, return nonzero if b is too big.
|
||||||
int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) {
|
int BlockAllocator::NthBlockInLayoutOrder(uint64_t b,
|
||||||
|
uint64_t *offset,
|
||||||
|
uint64_t *size) {
|
||||||
|
MhsRbTree::Node *x, *y;
|
||||||
if (b == 0) {
|
if (b == 0) {
|
||||||
*offset = 0;
|
*offset = 0;
|
||||||
*size = _reserve_at_beginning;
|
*size = _reserve_at_beginning;
|
||||||
@ -300,161 +170,90 @@ int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset,
|
|||||||
} else if (b > _n_blocks) {
|
} else if (b > _n_blocks) {
|
||||||
return -1;
|
return -1;
|
||||||
} else {
|
} else {
|
||||||
*offset =_blocks_array[b - 1].offset;
|
x = _tree->MinNode();
|
||||||
*size =_blocks_array[b - 1].size;
|
for (uint64_t i = 1; i <= b; i++) {
|
||||||
|
y = x;
|
||||||
|
x = _tree->Successor(x);
|
||||||
|
}
|
||||||
|
*size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt();
|
||||||
|
*offset = (rbn_offset(y) + rbn_size(y)).ToInt();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct VisUnusedExtra {
|
||||||
|
TOKU_DB_FRAGMENTATION _report;
|
||||||
|
uint64_t _align;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void VisUnusedCollector(void *extra,
|
||||||
|
MhsRbTree::Node *node,
|
||||||
|
uint64_t UU(depth)) {
|
||||||
|
struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra;
|
||||||
|
TOKU_DB_FRAGMENTATION report = v_e->_report;
|
||||||
|
uint64_t alignm = v_e->_align;
|
||||||
|
|
||||||
|
MhsRbTree::OUUInt64 offset = rbn_offset(node);
|
||||||
|
MhsRbTree::OUUInt64 size = rbn_size(node);
|
||||||
|
MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm));
|
||||||
|
uint64_t free_space = (offset + size - answer_offset).ToInt();
|
||||||
|
if (free_space > 0) {
|
||||||
|
report->unused_bytes += free_space;
|
||||||
|
report->unused_blocks++;
|
||||||
|
if (free_space > report->largest_unused_block) {
|
||||||
|
report->largest_unused_block = free_space;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
// Requires: report->file_size_bytes is filled in
|
// Requires: report->file_size_bytes is filled in
|
||||||
// Requires: report->data_bytes is filled in
|
// Requires: report->data_bytes is filled in
|
||||||
// Requires: report->checkpoint_bytes_additional is filled in
|
// Requires: report->checkpoint_bytes_additional is filled in
|
||||||
void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) {
|
void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) {
|
||||||
assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional);
|
invariant(_n_bytes_in_use ==
|
||||||
|
report->data_bytes + report->checkpoint_bytes_additional);
|
||||||
|
|
||||||
report->unused_bytes = 0;
|
report->unused_bytes = 0;
|
||||||
report->unused_blocks = 0;
|
report->unused_blocks = 0;
|
||||||
report->largest_unused_block = 0;
|
report->largest_unused_block = 0;
|
||||||
if (_n_blocks > 0) {
|
struct VisUnusedExtra extra = {report, _alignment};
|
||||||
//Deal with space before block 0 and after reserve:
|
_tree->InOrderVisitor(VisUnusedCollector, &extra);
|
||||||
{
|
|
||||||
struct blockpair *bp = &_blocks_array[0];
|
|
||||||
assert(bp->offset >= align(_reserve_at_beginning, _alignment));
|
|
||||||
uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment);
|
|
||||||
if (free_space > 0) {
|
|
||||||
report->unused_bytes += free_space;
|
|
||||||
report->unused_blocks++;
|
|
||||||
if (free_space > report->largest_unused_block) {
|
|
||||||
report->largest_unused_block = free_space;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//Deal with space between blocks:
|
void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) {
|
||||||
for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) {
|
|
||||||
// Consider the space after blocknum
|
|
||||||
struct blockpair *bp = &_blocks_array[blocknum];
|
|
||||||
uint64_t this_offset = bp[0].offset;
|
|
||||||
uint64_t this_size = bp[0].size;
|
|
||||||
uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
|
|
||||||
uint64_t next_offset = bp[1].offset;
|
|
||||||
uint64_t free_space = next_offset - end_of_this_block;
|
|
||||||
if (free_space > 0) {
|
|
||||||
report->unused_bytes += free_space;
|
|
||||||
report->unused_blocks++;
|
|
||||||
if (free_space > report->largest_unused_block) {
|
|
||||||
report->largest_unused_block = free_space;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Deal with space after last block
|
|
||||||
{
|
|
||||||
struct blockpair *bp = &_blocks_array[_n_blocks-1];
|
|
||||||
uint64_t this_offset = bp[0].offset;
|
|
||||||
uint64_t this_size = bp[0].size;
|
|
||||||
uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
|
|
||||||
if (end_of_this_block < report->file_size_bytes) {
|
|
||||||
uint64_t free_space = report->file_size_bytes - end_of_this_block;
|
|
||||||
assert(free_space > 0);
|
|
||||||
report->unused_bytes += free_space;
|
|
||||||
report->unused_blocks++;
|
|
||||||
if (free_space > report->largest_unused_block) {
|
|
||||||
report->largest_unused_block = free_space;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// No blocks. Just the reserve.
|
|
||||||
uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment);
|
|
||||||
if (end_of_this_block < report->file_size_bytes) {
|
|
||||||
uint64_t free_space = report->file_size_bytes - end_of_this_block;
|
|
||||||
assert(free_space > 0);
|
|
||||||
report->unused_bytes += free_space;
|
|
||||||
report->unused_blocks++;
|
|
||||||
if (free_space > report->largest_unused_block) {
|
|
||||||
report->largest_unused_block = free_space;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) {
|
|
||||||
report->data_bytes = _n_bytes_in_use;
|
report->data_bytes = _n_bytes_in_use;
|
||||||
report->data_blocks = _n_blocks;
|
report->data_blocks = _n_blocks;
|
||||||
report->file_size_bytes = 0;
|
report->file_size_bytes = 0;
|
||||||
report->checkpoint_bytes_additional = 0;
|
report->checkpoint_bytes_additional = 0;
|
||||||
get_unused_statistics(report);
|
UnusedStatistics(report);
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_allocator::validate() const {
|
struct ValidateExtra {
|
||||||
uint64_t n_bytes_in_use = _reserve_at_beginning;
|
uint64_t _bytes;
|
||||||
for (uint64_t i = 0; i < _n_blocks; i++) {
|
MhsRbTree::Node *_pre_node;
|
||||||
n_bytes_in_use += _blocks_array[i].size;
|
};
|
||||||
if (i > 0) {
|
static void VisUsedBlocksInOrder(void *extra,
|
||||||
assert(_blocks_array[i].offset > _blocks_array[i - 1].offset);
|
MhsRbTree::Node *cur_node,
|
||||||
assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size );
|
uint64_t UU(depth)) {
|
||||||
|
struct ValidateExtra *v_e = (struct ValidateExtra *)extra;
|
||||||
|
MhsRbTree::Node *pre_node = v_e->_pre_node;
|
||||||
|
// verify no overlaps
|
||||||
|
if (pre_node) {
|
||||||
|
invariant(rbn_size(pre_node) > 0);
|
||||||
|
invariant(rbn_offset(cur_node) >
|
||||||
|
rbn_offset(pre_node) + rbn_size(pre_node));
|
||||||
|
MhsRbTree::OUUInt64 used_space =
|
||||||
|
rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node));
|
||||||
|
v_e->_bytes += used_space.ToInt();
|
||||||
|
} else {
|
||||||
|
v_e->_bytes += rbn_offset(cur_node).ToInt();
|
||||||
}
|
}
|
||||||
}
|
v_e->_pre_node = cur_node;
|
||||||
assert(n_bytes_in_use == _n_bytes_in_use);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tracing
|
void BlockAllocator::Validate() const {
|
||||||
|
_tree->ValidateBalance();
|
||||||
void block_allocator::_trace_create(void) {
|
_tree->ValidateMhs();
|
||||||
if (ba_trace_file != nullptr) {
|
struct ValidateExtra extra = {0, nullptr};
|
||||||
toku_mutex_lock(&_trace_lock);
|
_tree->InOrderVisitor(VisUsedBlocksInOrder, &extra);
|
||||||
fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n",
|
invariant(extra._bytes == _n_bytes_in_use);
|
||||||
this, _reserve_at_beginning, _alignment);
|
|
||||||
toku_mutex_unlock(&_trace_lock);
|
|
||||||
|
|
||||||
fflush(ba_trace_file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::_trace_create_from_blockpairs(void) {
|
|
||||||
if (ba_trace_file != nullptr) {
|
|
||||||
toku_mutex_lock(&_trace_lock);
|
|
||||||
fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ",
|
|
||||||
this, _reserve_at_beginning, _alignment);
|
|
||||||
for (uint64_t i = 0; i < _n_blocks; i++) {
|
|
||||||
fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ",
|
|
||||||
_blocks_array[i].offset, _blocks_array[i].size);
|
|
||||||
}
|
|
||||||
fprintf(ba_trace_file, "\n");
|
|
||||||
toku_mutex_unlock(&_trace_lock);
|
|
||||||
|
|
||||||
fflush(ba_trace_file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::_trace_destroy(void) {
|
|
||||||
if (ba_trace_file != nullptr) {
|
|
||||||
toku_mutex_lock(&_trace_lock);
|
|
||||||
fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
|
|
||||||
toku_mutex_unlock(&_trace_lock);
|
|
||||||
|
|
||||||
fflush(ba_trace_file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) {
|
|
||||||
if (ba_trace_file != nullptr) {
|
|
||||||
toku_mutex_lock(&_trace_lock);
|
|
||||||
fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
|
||||||
this, size, heat, offset);
|
|
||||||
toku_mutex_unlock(&_trace_lock);
|
|
||||||
|
|
||||||
fflush(ba_trace_file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_allocator::_trace_free(uint64_t offset) {
|
|
||||||
if (ba_trace_file != nullptr) {
|
|
||||||
toku_mutex_lock(&_trace_lock);
|
|
||||||
fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset);
|
|
||||||
toku_mutex_unlock(&_trace_lock);
|
|
||||||
|
|
||||||
fflush(ba_trace_file);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
#include "portability/toku_pthread.h"
|
#include "portability/toku_pthread.h"
|
||||||
#include "portability/toku_stdint.h"
|
#include "portability/toku_stdint.h"
|
||||||
#include "portability/toku_stdlib.h"
|
#include "portability/toku_stdlib.h"
|
||||||
|
#include "ft/serialize/rbtree_mhs.h"
|
||||||
|
|
||||||
// Block allocator.
|
// Block allocator.
|
||||||
//
|
//
|
||||||
@ -51,151 +52,128 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
// The allocation of block numbers is handled elsewhere.
|
// The allocation of block numbers is handled elsewhere.
|
||||||
//
|
//
|
||||||
// When creating a block allocator we also specify a certain-sized
|
// When creating a block allocator we also specify a certain-sized
|
||||||
// block at the beginning that is preallocated (and cannot be allocated or freed)
|
// block at the beginning that is preallocated (and cannot be allocated or
|
||||||
|
// freed)
|
||||||
//
|
//
|
||||||
// We can allocate blocks of a particular size at a particular location.
|
// We can allocate blocks of a particular size at a particular location.
|
||||||
// We can allocate blocks of a particular size at a location chosen by the allocator.
|
|
||||||
// We can free blocks.
|
// We can free blocks.
|
||||||
// We can determine the size of a block.
|
// We can determine the size of a block.
|
||||||
|
#define MAX_BYTE 0xffffffffffffffff
|
||||||
class block_allocator {
|
class BlockAllocator {
|
||||||
public:
|
public:
|
||||||
static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096;
|
static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096;
|
||||||
|
|
||||||
// How much must be reserved at the beginning for the block?
|
// How much must be reserved at the beginning for the block?
|
||||||
// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
|
// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1
|
||||||
|
// pointer for each root.
|
||||||
// So 4096 should be enough.
|
// So 4096 should be enough.
|
||||||
static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096;
|
static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096;
|
||||||
|
|
||||||
static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0,
|
static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT ==
|
||||||
|
0,
|
||||||
"block allocator header must have proper alignment");
|
"block allocator header must have proper alignment");
|
||||||
|
|
||||||
static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
|
static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE =
|
||||||
|
BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
|
||||||
|
|
||||||
enum allocation_strategy {
|
struct BlockPair {
|
||||||
BA_STRATEGY_FIRST_FIT = 1,
|
uint64_t _offset;
|
||||||
BA_STRATEGY_BEST_FIT,
|
uint64_t _size;
|
||||||
BA_STRATEGY_PADDED_FIT,
|
BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
|
||||||
BA_STRATEGY_HEAT_ZONE
|
int operator<(const struct BlockPair &rhs) const {
|
||||||
|
return _offset < rhs._offset;
|
||||||
|
}
|
||||||
|
int operator<(const uint64_t &o) const { return _offset < o; }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct blockpair {
|
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
|
||||||
uint64_t offset;
|
// bytes are not put into a block.
|
||||||
uint64_t size;
|
// The default allocation strategy is first fit
|
||||||
blockpair(uint64_t o, uint64_t s) :
|
// (BA_STRATEGY_FIRST_FIT)
|
||||||
offset(o), size(s) {
|
|
||||||
}
|
|
||||||
int operator<(const struct blockpair &rhs) const {
|
|
||||||
return offset < rhs.offset;
|
|
||||||
}
|
|
||||||
int operator<(const uint64_t &o) const {
|
|
||||||
return offset < o;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
|
|
||||||
// The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
|
|
||||||
// All blocks be start on a multiple of ALIGNMENT.
|
// All blocks be start on a multiple of ALIGNMENT.
|
||||||
// Aborts if we run out of memory.
|
// Aborts if we run out of memory.
|
||||||
// Parameters
|
// Parameters
|
||||||
// reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
|
// reserve_at_beginning (IN) Size of reserved block at beginning.
|
||||||
|
// This size does not have to be aligned.
|
||||||
// alignment (IN) Block alignment.
|
// alignment (IN) Block alignment.
|
||||||
void create(uint64_t reserve_at_beginning, uint64_t alignment);
|
void Create(uint64_t reserve_at_beginning, uint64_t alignment);
|
||||||
|
|
||||||
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
|
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
|
||||||
// The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
|
// bytes are not put into a block.
|
||||||
// The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs'
|
// The allocator is initialized to contain `n_blocks' of BlockPairs,
|
||||||
|
// taken from `pairs'
|
||||||
// All blocks be start on a multiple of ALIGNMENT.
|
// All blocks be start on a multiple of ALIGNMENT.
|
||||||
// Aborts if we run out of memory.
|
// Aborts if we run out of memory.
|
||||||
// Parameters
|
// Parameters
|
||||||
// pairs, unowned array of pairs to copy
|
// pairs, unowned array of pairs to copy
|
||||||
// n_blocks, Size of pairs array
|
// n_blocks, Size of pairs array
|
||||||
// reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
|
// reserve_at_beginning (IN) Size of reserved block at beginning.
|
||||||
|
// This size does not have to be aligned.
|
||||||
// alignment (IN) Block alignment.
|
// alignment (IN) Block alignment.
|
||||||
void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
|
void CreateFromBlockPairs(uint64_t reserve_at_beginning,
|
||||||
struct blockpair *pairs, uint64_t n_blocks);
|
uint64_t alignment,
|
||||||
|
struct BlockPair *pairs,
|
||||||
|
uint64_t n_blocks);
|
||||||
|
|
||||||
// Effect: Destroy this block allocator
|
// Effect: Destroy this block allocator
|
||||||
void destroy();
|
void Destroy();
|
||||||
|
|
||||||
// Effect: Set the allocation strategy that the allocator should use
|
// Effect: Allocate a block of the specified size at an address chosen by
|
||||||
// Requires: No other threads are operating on this block allocator
|
// the allocator.
|
||||||
void set_strategy(enum allocation_strategy strategy);
|
|
||||||
|
|
||||||
// Effect: Allocate a block of the specified size at an address chosen by the allocator.
|
|
||||||
// Aborts if anything goes wrong.
|
// Aborts if anything goes wrong.
|
||||||
// The block address will be a multiple of the alignment.
|
// The block address will be a multiple of the alignment.
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// size (IN): The size of the block. (The size does not have to be aligned.)
|
// size (IN): The size of the block. (The size does not have to be
|
||||||
|
// aligned.)
|
||||||
// offset (OUT): The location of the block.
|
// offset (OUT): The location of the block.
|
||||||
// heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint)
|
// block soon (perhaps in the next checkpoint)
|
||||||
// Heat values are lexiographically ordered (like integers), but their specific values are arbitrary
|
// Heat values are lexiographically ordered (like integers),
|
||||||
void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset);
|
// but their specific values are arbitrary
|
||||||
|
void AllocBlock(uint64_t size, uint64_t *offset);
|
||||||
|
|
||||||
// Effect: Free the block at offset.
|
// Effect: Free the block at offset.
|
||||||
// Requires: There must be a block currently allocated at that offset.
|
// Requires: There must be a block currently allocated at that offset.
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// offset (IN): The offset of the block.
|
// offset (IN): The offset of the block.
|
||||||
void free_block(uint64_t offset);
|
void FreeBlock(uint64_t offset, uint64_t size);
|
||||||
|
|
||||||
// Effect: Return the size of the block that starts at offset.
|
// Effect: Check to see if the block allocator is OK. This may take a long
|
||||||
// Requires: There must be a block currently allocated at that offset.
|
// time.
|
||||||
// Parameters:
|
|
||||||
// offset (IN): The offset of the block.
|
|
||||||
uint64_t block_size(uint64_t offset);
|
|
||||||
|
|
||||||
// Effect: Check to see if the block allocator is OK. This may take a long time.
|
|
||||||
// Usage Hints: Probably only use this for unit tests.
|
// Usage Hints: Probably only use this for unit tests.
|
||||||
// TODO: Private?
|
// TODO: Private?
|
||||||
void validate() const;
|
void Validate() const;
|
||||||
|
|
||||||
// Effect: Return the unallocated block address of "infinite" size.
|
// Effect: Return the unallocated block address of "infinite" size.
|
||||||
// That is, return the smallest address that is above all the allocated blocks.
|
// That is, return the smallest address that is above all the allocated
|
||||||
uint64_t allocated_limit() const;
|
// blocks.
|
||||||
|
uint64_t AllocatedLimit() const;
|
||||||
|
|
||||||
// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
|
// Effect: Consider the blocks in sorted order. The reserved block at the
|
||||||
|
// beginning is number 0. The next one is number 1 and so forth.
|
||||||
// Return the offset and size of the block with that number.
|
// Return the offset and size of the block with that number.
|
||||||
// Return 0 if there is a block that big, return nonzero if b is too big.
|
// Return 0 if there is a block that big, return nonzero if b is too big.
|
||||||
// Rationale: This is probably useful only for tests.
|
// Rationale: This is probably useful only for tests.
|
||||||
int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size);
|
int NthBlockInLayoutOrder(uint64_t b, uint64_t *offset, uint64_t *size);
|
||||||
|
|
||||||
// Effect: Fill in report to indicate how the file is used.
|
// Effect: Fill in report to indicate how the file is used.
|
||||||
// Requires:
|
// Requires:
|
||||||
// report->file_size_bytes is filled in
|
// report->file_size_bytes is filled in
|
||||||
// report->data_bytes is filled in
|
// report->data_bytes is filled in
|
||||||
// report->checkpoint_bytes_additional is filled in
|
// report->checkpoint_bytes_additional is filled in
|
||||||
void get_unused_statistics(TOKU_DB_FRAGMENTATION report);
|
void UnusedStatistics(TOKU_DB_FRAGMENTATION report);
|
||||||
|
|
||||||
// Effect: Fill in report->data_bytes with the number of bytes in use
|
// Effect: Fill in report->data_bytes with the number of bytes in use
|
||||||
// Fill in report->data_blocks with the number of blockpairs in use
|
// Fill in report->data_blocks with the number of BlockPairs in use
|
||||||
// Fill in unused statistics using this->get_unused_statistics()
|
// Fill in unused statistics using this->get_unused_statistics()
|
||||||
// Requires:
|
// Requires:
|
||||||
// report->file_size is ignored on return
|
// report->file_size is ignored on return
|
||||||
// report->checkpoint_bytes_additional is ignored on return
|
// report->checkpoint_bytes_additional is ignored on return
|
||||||
void get_statistics(TOKU_DB_FRAGMENTATION report);
|
void Statistics(TOKU_DB_FRAGMENTATION report);
|
||||||
|
|
||||||
// Block allocator tracing.
|
virtual ~BlockAllocator(){};
|
||||||
// - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file
|
|
||||||
// should be written to.
|
|
||||||
// - Trace may be replayed by ba_trace_replay tool in tools/ directory
|
|
||||||
// eg: "cat mytracefile | ba_trace_replay"
|
|
||||||
static void maybe_initialize_trace();
|
|
||||||
static void maybe_close_trace();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment);
|
void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment);
|
||||||
void grow_blocks_array_by(uint64_t n_to_add);
|
|
||||||
void grow_blocks_array();
|
|
||||||
int64_t find_block(uint64_t offset);
|
|
||||||
struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
|
|
||||||
|
|
||||||
// Tracing
|
|
||||||
toku_mutex_t _trace_lock;
|
|
||||||
void _trace_create(void);
|
|
||||||
void _trace_create_from_blockpairs(void);
|
|
||||||
void _trace_destroy(void);
|
|
||||||
void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset);
|
|
||||||
void _trace_free(uint64_t offset);
|
|
||||||
|
|
||||||
// How much to reserve at the beginning
|
// How much to reserve at the beginning
|
||||||
uint64_t _reserve_at_beginning;
|
uint64_t _reserve_at_beginning;
|
||||||
@ -203,12 +181,8 @@ private:
|
|||||||
uint64_t _alignment;
|
uint64_t _alignment;
|
||||||
// How many blocks
|
// How many blocks
|
||||||
uint64_t _n_blocks;
|
uint64_t _n_blocks;
|
||||||
// How big is the blocks_array. Must be >= n_blocks.
|
|
||||||
uint64_t _blocks_array_size;
|
|
||||||
// These blocks are sorted by address.
|
|
||||||
struct blockpair *_blocks_array;
|
|
||||||
// Including the reserve_at_beginning
|
|
||||||
uint64_t _n_bytes_in_use;
|
uint64_t _n_bytes_in_use;
|
||||||
// The allocation strategy are we using
|
|
||||||
enum allocation_strategy _strategy;
|
// These blocks are sorted by address.
|
||||||
|
MhsRbTree::Tree *_tree;
|
||||||
};
|
};
|
||||||
|
@ -1,224 +0,0 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
|
||||||
#ident "$Id$"
|
|
||||||
/*======
|
|
||||||
This file is part of PerconaFT.
|
|
||||||
|
|
||||||
|
|
||||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|
||||||
|
|
||||||
PerconaFT is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License, version 2,
|
|
||||||
as published by the Free Software Foundation.
|
|
||||||
|
|
||||||
PerconaFT is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
----------------------------------------
|
|
||||||
|
|
||||||
PerconaFT is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License, version 3,
|
|
||||||
as published by the Free Software Foundation.
|
|
||||||
|
|
||||||
PerconaFT is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
======= */
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "portability/toku_assert.h"
|
|
||||||
|
|
||||||
#include "ft/serialize/block_allocator_strategy.h"
|
|
||||||
|
|
||||||
static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
|
|
||||||
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint64_t _roundup_to_power_of_two(uint64_t value) {
|
|
||||||
uint64_t r = 4096;
|
|
||||||
while (r < value) {
|
|
||||||
r *= 2;
|
|
||||||
invariant(r > 0);
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
// First fit block allocation
|
|
||||||
static struct block_allocator::blockpair *
|
|
||||||
_first_fit(struct block_allocator::blockpair *blocks_array,
|
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
|
||||||
uint64_t max_padding) {
|
|
||||||
if (n_blocks == 1) {
|
|
||||||
// won't enter loop, can't underflow the direction < 0 case
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct block_allocator::blockpair *bp = &blocks_array[0];
|
|
||||||
for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
|
|
||||||
n_spaces_to_check--, bp++) {
|
|
||||||
// Consider the space after bp
|
|
||||||
uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
|
|
||||||
uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
|
|
||||||
if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1]
|
|
||||||
invariant(bp - blocks_array < (int64_t) n_blocks);
|
|
||||||
return bp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct block_allocator::blockpair *
|
|
||||||
_first_fit_bw(struct block_allocator::blockpair *blocks_array,
|
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
|
||||||
uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) {
|
|
||||||
if (n_blocks == 1) {
|
|
||||||
// won't enter loop, can't underflow the direction < 0 case
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct block_allocator::blockpair *bp = &blocks_array[-1];
|
|
||||||
for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
|
|
||||||
n_spaces_to_check--, bp--) {
|
|
||||||
// Consider the space after bp
|
|
||||||
uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
|
|
||||||
uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
|
|
||||||
if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) {
|
|
||||||
invariant(blocks_array - bp < (int64_t) n_blocks);
|
|
||||||
return bp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct block_allocator::blockpair *
|
|
||||||
block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
|
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
|
|
||||||
return _first_fit(blocks_array, n_blocks, size, alignment, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Best fit block allocation
|
|
||||||
struct block_allocator::blockpair *
|
|
||||||
block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
|
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
|
|
||||||
struct block_allocator::blockpair *best_bp = nullptr;
|
|
||||||
uint64_t best_hole_size = 0;
|
|
||||||
for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
|
|
||||||
// Consider the space after blocknum
|
|
||||||
struct block_allocator::blockpair *bp = &blocks_array[blocknum];
|
|
||||||
uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
|
|
||||||
uint64_t possible_end_offset = possible_offset + size;
|
|
||||||
if (possible_end_offset <= bp[1].offset) {
|
|
||||||
// It fits here. Is it the best fit?
|
|
||||||
uint64_t hole_size = bp[1].offset - possible_end_offset;
|
|
||||||
if (best_bp == nullptr || hole_size < best_hole_size) {
|
|
||||||
best_hole_size = hole_size;
|
|
||||||
best_bp = bp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return best_bp;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint64_t padded_fit_alignment = 4096;
|
|
||||||
|
|
||||||
// TODO: These compiler specific directives should be abstracted in a portability header
|
|
||||||
// portability/toku_compiler.h?
|
|
||||||
__attribute__((__constructor__))
|
|
||||||
static void determine_padded_fit_alignment_from_env(void) {
|
|
||||||
// TODO: Should be in portability as 'toku_os_getenv()?'
|
|
||||||
const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
|
|
||||||
if (s != nullptr && strlen(s) > 0) {
|
|
||||||
const int64_t alignment = strtoll(s, nullptr, 10);
|
|
||||||
if (alignment <= 0) {
|
|
||||||
fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
|
|
||||||
"but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n",
|
|
||||||
s, padded_fit_alignment);
|
|
||||||
} else {
|
|
||||||
padded_fit_alignment = _roundup_to_power_of_two(alignment);
|
|
||||||
fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n",
|
|
||||||
padded_fit_alignment);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// First fit into a block that is oversized by up to max_padding.
|
|
||||||
// The hope is that if we purposefully waste a bit of space at allocation
|
|
||||||
// time we'll be more likely to reuse this block later.
|
|
||||||
struct block_allocator::blockpair *
|
|
||||||
block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
|
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
|
|
||||||
return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment);
|
|
||||||
}
|
|
||||||
|
|
||||||
static double hot_zone_threshold = 0.85;
|
|
||||||
|
|
||||||
// TODO: These compiler specific directives should be abstracted in a portability header
|
|
||||||
// portability/toku_compiler.h?
|
|
||||||
__attribute__((__constructor__))
|
|
||||||
static void determine_hot_zone_threshold_from_env(void) {
|
|
||||||
// TODO: Should be in portability as 'toku_os_getenv()?'
|
|
||||||
const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
|
|
||||||
if (s != nullptr && strlen(s) > 0) {
|
|
||||||
const double hot_zone = strtod(s, nullptr);
|
|
||||||
if (hot_zone < 1 || hot_zone > 99) {
|
|
||||||
fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "
|
|
||||||
"but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s);
|
|
||||||
hot_zone_threshold = 85 / 100;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s);
|
|
||||||
hot_zone_threshold = hot_zone / 100;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct block_allocator::blockpair *
|
|
||||||
block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
|
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
|
||||||
uint64_t heat) {
|
|
||||||
if (heat > 0) {
|
|
||||||
struct block_allocator::blockpair *bp, *boundary_bp;
|
|
||||||
|
|
||||||
// Hot allocation. Find the beginning of the hot zone.
|
|
||||||
boundary_bp = &blocks_array[n_blocks - 1];
|
|
||||||
uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment);
|
|
||||||
uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
|
|
||||||
|
|
||||||
boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
|
|
||||||
uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp;
|
|
||||||
uint64_t blocks_outside_zone = boundary_bp - blocks_array;
|
|
||||||
invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
|
|
||||||
|
|
||||||
if (blocks_in_zone > 0) {
|
|
||||||
// Find the first fit in the hot zone, going forward.
|
|
||||||
bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0);
|
|
||||||
if (bp != nullptr) {
|
|
||||||
return bp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (blocks_outside_zone > 0) {
|
|
||||||
// Find the first fit in the cold zone, going backwards.
|
|
||||||
bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]);
|
|
||||||
if (bp != nullptr) {
|
|
||||||
return bp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Cold allocations are simply first-fit from the beginning.
|
|
||||||
return _first_fit(blocks_array, n_blocks, size, alignment, 0);
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
@ -51,7 +51,6 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
#include "ft/serialize/rbuf.h"
|
#include "ft/serialize/rbuf.h"
|
||||||
#include "ft/serialize/wbuf.h"
|
#include "ft/serialize/wbuf.h"
|
||||||
#include "ft/serialize/block_allocator.h"
|
#include "ft/serialize/block_allocator.h"
|
||||||
|
|
||||||
#include "util/nb_mutex.h"
|
#include "util/nb_mutex.h"
|
||||||
#include "util/scoped_malloc.h"
|
#include "util/scoped_malloc.h"
|
||||||
|
|
||||||
@ -61,16 +60,13 @@ static const BLOCKNUM freelist_null = { -1 };
|
|||||||
// value of block_translation_pair.size if blocknum is unused
|
// value of block_translation_pair.size if blocknum is unused
|
||||||
static const DISKOFF size_is_free = (DISKOFF)-1;
|
static const DISKOFF size_is_free = (DISKOFF)-1;
|
||||||
|
|
||||||
// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock
|
// value of block_translation_pair.u.diskoff if blocknum is used but does not
|
||||||
|
// yet have a diskblock
|
||||||
static const DISKOFF diskoff_unused = (DISKOFF)-2;
|
static const DISKOFF diskoff_unused = (DISKOFF)-2;
|
||||||
|
|
||||||
void block_table::_mutex_lock() {
|
void block_table::_mutex_lock() { toku_mutex_lock(&_mutex); }
|
||||||
toku_mutex_lock(&_mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
void block_table::_mutex_unlock() {
|
void block_table::_mutex_unlock() { toku_mutex_unlock(&_mutex); }
|
||||||
toku_mutex_unlock(&_mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Move lock to FT
|
// TODO: Move lock to FT
|
||||||
void toku_ft_lock(FT ft) {
|
void toku_ft_lock(FT ft) {
|
||||||
@ -85,12 +81,15 @@ void toku_ft_unlock(FT ft) {
|
|||||||
bt->_mutex_unlock();
|
bt->_mutex_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
// There are two headers: the reserve must fit them both and be suitably aligned.
|
// There are two headers: the reserve must fit them both and be suitably
|
||||||
static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
|
// aligned.
|
||||||
block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0,
|
static_assert(BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT ==
|
||||||
|
0,
|
||||||
"Block allocator's header reserve must be suitibly aligned");
|
"Block allocator's header reserve must be suitibly aligned");
|
||||||
static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
|
static_assert(
|
||||||
block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
|
BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
|
||||||
"Block allocator's total header reserve must exactly fit two headers");
|
"Block allocator's total header reserve must exactly fit two headers");
|
||||||
|
|
||||||
// does NOT initialize the block allocator: the caller is responsible
|
// does NOT initialize the block allocator: the caller is responsible
|
||||||
@ -99,15 +98,21 @@ void block_table::_create_internal() {
|
|||||||
memset(&_inprogress, 0, sizeof(struct translation));
|
memset(&_inprogress, 0, sizeof(struct translation));
|
||||||
memset(&_checkpointed, 0, sizeof(struct translation));
|
memset(&_checkpointed, 0, sizeof(struct translation));
|
||||||
memset(&_mutex, 0, sizeof(_mutex));
|
memset(&_mutex, 0, sizeof(_mutex));
|
||||||
|
_bt_block_allocator = new BlockAllocator();
|
||||||
toku_mutex_init(&_mutex, nullptr);
|
toku_mutex_init(&_mutex, nullptr);
|
||||||
nb_mutex_init(&_safe_file_size_lock);
|
nb_mutex_init(&_safe_file_size_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill in the checkpointed translation from buffer, and copy checkpointed to current.
|
// Fill in the checkpointed translation from buffer, and copy checkpointed to
|
||||||
// The one read from disk is the last known checkpointed one, so we are keeping it in
|
// current.
|
||||||
// place and then setting current (which is never stored on disk) for current use.
|
// The one read from disk is the last known checkpointed one, so we are keeping
|
||||||
// The translation_buffer has translation only, we create the rest of the block_table.
|
// it in
|
||||||
int block_table::create_from_buffer(int fd,
|
// place and then setting current (which is never stored on disk) for current
|
||||||
|
// use.
|
||||||
|
// The translation_buffer has translation only, we create the rest of the
|
||||||
|
// block_table.
|
||||||
|
int block_table::create_from_buffer(
|
||||||
|
int fd,
|
||||||
DISKOFF location_on_disk, // Location of translation_buffer
|
DISKOFF location_on_disk, // Location of translation_buffer
|
||||||
DISKOFF size_on_disk,
|
DISKOFF size_on_disk,
|
||||||
unsigned char *translation_buffer) {
|
unsigned char *translation_buffer) {
|
||||||
@ -115,9 +120,8 @@ int block_table::create_from_buffer(int fd,
|
|||||||
_create_internal();
|
_create_internal();
|
||||||
|
|
||||||
// Deserialize the translation and copy it to current
|
// Deserialize the translation and copy it to current
|
||||||
int r = _translation_deserialize_from_buffer(&_checkpointed,
|
int r = _translation_deserialize_from_buffer(
|
||||||
location_on_disk, size_on_disk,
|
&_checkpointed, location_on_disk, size_on_disk, translation_buffer);
|
||||||
translation_buffer);
|
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -130,22 +134,26 @@ int block_table::create_from_buffer(int fd,
|
|||||||
invariant(file_size >= 0);
|
invariant(file_size >= 0);
|
||||||
_safe_file_size = file_size;
|
_safe_file_size = file_size;
|
||||||
|
|
||||||
// Gather the non-empty translations and use them to create the block allocator
|
// Gather the non-empty translations and use them to create the block
|
||||||
|
// allocator
|
||||||
toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b *
|
toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b *
|
||||||
sizeof(struct block_allocator::blockpair));
|
sizeof(struct BlockAllocator::BlockPair));
|
||||||
struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
|
struct BlockAllocator::BlockPair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
|
||||||
uint64_t n_pairs = 0;
|
uint64_t n_pairs = 0;
|
||||||
for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) {
|
for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) {
|
||||||
struct block_translation_pair pair = _checkpointed.block_translation[i];
|
struct block_translation_pair pair = _checkpointed.block_translation[i];
|
||||||
if (pair.size > 0) {
|
if (pair.size > 0) {
|
||||||
invariant(pair.u.diskoff != diskoff_unused);
|
invariant(pair.u.diskoff != diskoff_unused);
|
||||||
pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size);
|
pairs[n_pairs++] =
|
||||||
|
BlockAllocator::BlockPair(pair.u.diskoff, pair.size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
|
_bt_block_allocator->CreateFromBlockPairs(
|
||||||
block_allocator::BLOCK_ALLOCATOR_ALIGNMENT,
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
|
||||||
pairs, n_pairs);
|
BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT,
|
||||||
|
pairs,
|
||||||
|
n_pairs);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -155,8 +163,10 @@ void block_table::create() {
|
|||||||
_create_internal();
|
_create_internal();
|
||||||
|
|
||||||
_checkpointed.type = TRANSLATION_CHECKPOINTED;
|
_checkpointed.type = TRANSLATION_CHECKPOINTED;
|
||||||
_checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS);
|
_checkpointed.smallest_never_used_blocknum =
|
||||||
_checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b;
|
make_blocknum(RESERVED_BLOCKNUMS);
|
||||||
|
_checkpointed.length_of_array =
|
||||||
|
_checkpointed.smallest_never_used_blocknum.b;
|
||||||
_checkpointed.blocknum_freelist_head = freelist_null;
|
_checkpointed.blocknum_freelist_head = freelist_null;
|
||||||
XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
|
XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
|
||||||
for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
|
for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
|
||||||
@ -168,8 +178,9 @@ void block_table::create() {
|
|||||||
_copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
|
_copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
|
||||||
|
|
||||||
// Create an empty block allocator.
|
// Create an empty block allocator.
|
||||||
_bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
|
_bt_block_allocator->Create(
|
||||||
block_allocator::BLOCK_ALLOCATOR_ALIGNMENT);
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Refactor with FT-303
|
// TODO: Refactor with FT-303
|
||||||
@ -185,20 +196,24 @@ static void ft_set_dirty(FT ft, bool for_checkpoint) {
|
|||||||
|
|
||||||
void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) {
|
void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) {
|
||||||
toku_mutex_assert_locked(&_mutex);
|
toku_mutex_assert_locked(&_mutex);
|
||||||
uint64_t new_size_needed = _bt_block_allocator.allocated_limit();
|
uint64_t new_size_needed = _bt_block_allocator->AllocatedLimit();
|
||||||
//Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful.
|
// Save a call to toku_os_get_file_size (kernel call) if unlikely to be
|
||||||
if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) {
|
// useful.
|
||||||
|
if (new_size_needed < size_needed_before &&
|
||||||
|
new_size_needed < _safe_file_size) {
|
||||||
nb_mutex_lock(&_safe_file_size_lock, &_mutex);
|
nb_mutex_lock(&_safe_file_size_lock, &_mutex);
|
||||||
|
|
||||||
// Must hold _safe_file_size_lock to change _safe_file_size.
|
// Must hold _safe_file_size_lock to change _safe_file_size.
|
||||||
if (new_size_needed < _safe_file_size) {
|
if (new_size_needed < _safe_file_size) {
|
||||||
int64_t safe_file_size_before = _safe_file_size;
|
int64_t safe_file_size_before = _safe_file_size;
|
||||||
// Not safe to use the 'to-be-truncated' portion until truncate is done.
|
// Not safe to use the 'to-be-truncated' portion until truncate is
|
||||||
|
// done.
|
||||||
_safe_file_size = new_size_needed;
|
_safe_file_size = new_size_needed;
|
||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
|
|
||||||
uint64_t size_after;
|
uint64_t size_after;
|
||||||
toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after);
|
toku_maybe_truncate_file(
|
||||||
|
fd, new_size_needed, safe_file_size_before, &size_after);
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
|
|
||||||
_safe_file_size = size_after;
|
_safe_file_size = size_after;
|
||||||
@ -213,26 +228,35 @@ void block_table::maybe_truncate_file_on_open(int fd) {
|
|||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) {
|
void block_table::_copy_translation(struct translation *dst,
|
||||||
// We intend to malloc a fresh block, so the incoming translation should be empty
|
struct translation *src,
|
||||||
|
enum translation_type newtype) {
|
||||||
|
// We intend to malloc a fresh block, so the incoming translation should be
|
||||||
|
// empty
|
||||||
invariant_null(dst->block_translation);
|
invariant_null(dst->block_translation);
|
||||||
|
|
||||||
invariant(src->length_of_array >= src->smallest_never_used_blocknum.b);
|
invariant(src->length_of_array >= src->smallest_never_used_blocknum.b);
|
||||||
invariant(newtype == TRANSLATION_DEBUG ||
|
invariant(newtype == TRANSLATION_DEBUG ||
|
||||||
(src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) ||
|
(src->type == TRANSLATION_CURRENT &&
|
||||||
(src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT));
|
newtype == TRANSLATION_INPROGRESS) ||
|
||||||
|
(src->type == TRANSLATION_CHECKPOINTED &&
|
||||||
|
newtype == TRANSLATION_CURRENT));
|
||||||
dst->type = newtype;
|
dst->type = newtype;
|
||||||
dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum;
|
dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum;
|
||||||
dst->blocknum_freelist_head = src->blocknum_freelist_head;
|
dst->blocknum_freelist_head = src->blocknum_freelist_head;
|
||||||
|
|
||||||
// destination btt is of fixed size. Allocate + memcpy the exact length necessary.
|
// destination btt is of fixed size. Allocate + memcpy the exact length
|
||||||
|
// necessary.
|
||||||
dst->length_of_array = dst->smallest_never_used_blocknum.b;
|
dst->length_of_array = dst->smallest_never_used_blocknum.b;
|
||||||
XMALLOC_N(dst->length_of_array, dst->block_translation);
|
XMALLOC_N(dst->length_of_array, dst->block_translation);
|
||||||
memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation));
|
memcpy(dst->block_translation,
|
||||||
|
src->block_translation,
|
||||||
|
dst->length_of_array * sizeof(*dst->block_translation));
|
||||||
|
|
||||||
// New version of btt is not yet stored on disk.
|
// New version of btt is not yet stored on disk.
|
||||||
dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0;
|
dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0;
|
||||||
dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused;
|
dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff =
|
||||||
|
diskoff_unused;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t block_table::get_blocks_in_use_unlocked() {
|
int64_t block_table::get_blocks_in_use_unlocked() {
|
||||||
@ -241,7 +265,8 @@ int64_t block_table::get_blocks_in_use_unlocked() {
|
|||||||
int64_t num_blocks = 0;
|
int64_t num_blocks = 0;
|
||||||
{
|
{
|
||||||
// Reserved blocknums do not get upgraded; They are part of the header.
|
// Reserved blocknums do not get upgraded; They are part of the header.
|
||||||
for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
|
for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
|
||||||
|
b.b++) {
|
||||||
if (t->block_translation[b.b].size != size_is_free) {
|
if (t->block_translation[b.b].size != size_is_free) {
|
||||||
num_blocks++;
|
num_blocks++;
|
||||||
}
|
}
|
||||||
@ -251,7 +276,8 @@ int64_t block_table::get_blocks_in_use_unlocked() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void block_table::_maybe_optimize_translation(struct translation *t) {
|
void block_table::_maybe_optimize_translation(struct translation *t) {
|
||||||
//Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just
|
// Reduce 'smallest_never_used_blocknum.b' (completely free blocknums
|
||||||
|
// instead of just
|
||||||
// on a free list. Doing so requires us to regenerate the free list.
|
// on a free list. Doing so requires us to regenerate the free list.
|
||||||
// This is O(n) work, so do it only if you're already doing that.
|
// This is O(n) work, so do it only if you're already doing that.
|
||||||
|
|
||||||
@ -260,7 +286,8 @@ void block_table::_maybe_optimize_translation(struct translation *t) {
|
|||||||
// Calculate how large the free suffix is.
|
// Calculate how large the free suffix is.
|
||||||
int64_t freed;
|
int64_t freed;
|
||||||
{
|
{
|
||||||
for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) {
|
for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS;
|
||||||
|
b.b--) {
|
||||||
if (t->block_translation[b.b - 1].size != size_is_free) {
|
if (t->block_translation[b.b - 1].size != size_is_free) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -270,7 +297,8 @@ void block_table::_maybe_optimize_translation(struct translation *t) {
|
|||||||
if (freed > 0) {
|
if (freed > 0) {
|
||||||
t->smallest_never_used_blocknum.b = b.b;
|
t->smallest_never_used_blocknum.b = b.b;
|
||||||
if (t->length_of_array / 4 > t->smallest_never_used_blocknum.b) {
|
if (t->length_of_array / 4 > t->smallest_never_used_blocknum.b) {
|
||||||
//We're using more memory than necessary to represent this now. Reduce.
|
// We're using more memory than necessary to represent this now.
|
||||||
|
// Reduce.
|
||||||
uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
|
uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
|
||||||
XREALLOC_N(new_length, t->block_translation);
|
XREALLOC_N(new_length, t->block_translation);
|
||||||
t->length_of_array = new_length;
|
t->length_of_array = new_length;
|
||||||
@ -279,9 +307,11 @@ void block_table::_maybe_optimize_translation(struct translation *t) {
|
|||||||
|
|
||||||
// Regenerate free list.
|
// Regenerate free list.
|
||||||
t->blocknum_freelist_head.b = freelist_null.b;
|
t->blocknum_freelist_head.b = freelist_null.b;
|
||||||
for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
|
for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
|
||||||
|
b.b++) {
|
||||||
if (t->block_translation[b.b].size == size_is_free) {
|
if (t->block_translation[b.b].size == size_is_free) {
|
||||||
t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
|
t->block_translation[b.b].u.next_free_blocknum =
|
||||||
|
t->blocknum_freelist_head;
|
||||||
t->blocknum_freelist_head = b;
|
t->blocknum_freelist_head = b;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -303,14 +333,16 @@ void block_table::note_start_checkpoint_unlocked() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void block_table::note_skipped_checkpoint() {
|
void block_table::note_skipped_checkpoint() {
|
||||||
//Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header
|
// Purpose, alert block translation that the checkpoint was skipped, e.x.
|
||||||
|
// for a non-dirty header
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
paranoid_invariant_notnull(_inprogress.block_translation);
|
paranoid_invariant_notnull(_inprogress.block_translation);
|
||||||
_checkpoint_skipped = true;
|
_checkpoint_skipped = true;
|
||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Purpose: free any disk space used by previous checkpoint that isn't in use by either
|
// Purpose: free any disk space used by previous checkpoint that isn't in use by
|
||||||
|
// either
|
||||||
// - current state
|
// - current state
|
||||||
// - in-progress checkpoint
|
// - in-progress checkpoint
|
||||||
// capture inprogress as new checkpointed.
|
// capture inprogress as new checkpointed.
|
||||||
@ -323,7 +355,7 @@ void block_table::note_skipped_checkpoint() {
|
|||||||
void block_table::note_end_checkpoint(int fd) {
|
void block_table::note_end_checkpoint(int fd) {
|
||||||
// Free unused blocks
|
// Free unused blocks
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit();
|
uint64_t allocated_limit_at_start = _bt_block_allocator->AllocatedLimit();
|
||||||
paranoid_invariant_notnull(_inprogress.block_translation);
|
paranoid_invariant_notnull(_inprogress.block_translation);
|
||||||
if (_checkpoint_skipped) {
|
if (_checkpoint_skipped) {
|
||||||
toku_free(_inprogress.block_translation);
|
toku_free(_inprogress.block_translation);
|
||||||
@ -332,16 +364,22 @@ void block_table::note_end_checkpoint(int fd) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Make certain inprogress was allocated space on disk
|
// Make certain inprogress was allocated space on disk
|
||||||
assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
|
invariant(
|
||||||
assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0);
|
_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
|
||||||
|
invariant(
|
||||||
|
_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff >
|
||||||
|
0);
|
||||||
|
|
||||||
{
|
{
|
||||||
struct translation *t = &_checkpointed;
|
struct translation *t = &_checkpointed;
|
||||||
for (int64_t i = 0; i < t->length_of_array; i++) {
|
for (int64_t i = 0; i < t->length_of_array; i++) {
|
||||||
struct block_translation_pair *pair = &t->block_translation[i];
|
struct block_translation_pair *pair = &t->block_translation[i];
|
||||||
if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) {
|
if (pair->size > 0 &&
|
||||||
assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair));
|
!_translation_prevents_freeing(
|
||||||
_bt_block_allocator.free_block(pair->u.diskoff);
|
&_inprogress, make_blocknum(i), pair)) {
|
||||||
|
invariant(!_translation_prevents_freeing(
|
||||||
|
&_current, make_blocknum(i), pair));
|
||||||
|
_bt_block_allocator->FreeBlock(pair->u.diskoff, pair->size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
toku_free(_checkpointed.block_translation);
|
toku_free(_checkpointed.block_translation);
|
||||||
@ -359,24 +397,27 @@ bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) {
|
|||||||
return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b;
|
return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b;
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
|
void block_table::_verify_valid_blocknum(struct translation *UU(t),
|
||||||
|
BLOCKNUM UU(b)) {
|
||||||
invariant(_is_valid_blocknum(t, b));
|
invariant(_is_valid_blocknum(t, b));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) {
|
bool block_table::_is_valid_freeable_blocknum(struct translation *t,
|
||||||
|
BLOCKNUM b) {
|
||||||
invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
|
invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
|
||||||
return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b;
|
return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b;
|
||||||
}
|
}
|
||||||
|
|
||||||
// should be freeable
|
// should be freeable
|
||||||
void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
|
void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t),
|
||||||
|
BLOCKNUM UU(b)) {
|
||||||
invariant(_is_valid_freeable_blocknum(t, b));
|
invariant(_is_valid_freeable_blocknum(t, b));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also used only in ft-serialize-test.
|
// Also used only in ft-serialize-test.
|
||||||
void block_table::block_free(uint64_t offset) {
|
void block_table::block_free(uint64_t offset, uint64_t size) {
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
_bt_block_allocator.free_block(offset);
|
_bt_block_allocator->FreeBlock(offset, size);
|
||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -387,25 +428,34 @@ int64_t block_table::_calculate_size_on_disk(struct translation *t) {
|
|||||||
4; // 4 for checksum
|
4; // 4 for checksum
|
||||||
}
|
}
|
||||||
|
|
||||||
// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table.
|
// We cannot free the disk space allocated to this blocknum if it is still in
|
||||||
bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) {
|
// use by the given translation table.
|
||||||
return t->block_translation &&
|
bool block_table::_translation_prevents_freeing(
|
||||||
b.b < t->smallest_never_used_blocknum.b &&
|
struct translation *t,
|
||||||
|
BLOCKNUM b,
|
||||||
|
struct block_translation_pair *old_pair) {
|
||||||
|
return t->block_translation && b.b < t->smallest_never_used_blocknum.b &&
|
||||||
old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
|
old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) {
|
void block_table::_realloc_on_disk_internal(BLOCKNUM b,
|
||||||
|
DISKOFF size,
|
||||||
|
DISKOFF *offset,
|
||||||
|
FT ft,
|
||||||
|
bool for_checkpoint) {
|
||||||
toku_mutex_assert_locked(&_mutex);
|
toku_mutex_assert_locked(&_mutex);
|
||||||
ft_set_dirty(ft, for_checkpoint);
|
ft_set_dirty(ft, for_checkpoint);
|
||||||
|
|
||||||
struct translation *t = &_current;
|
struct translation *t = &_current;
|
||||||
struct block_translation_pair old_pair = t->block_translation[b.b];
|
struct block_translation_pair old_pair = t->block_translation[b.b];
|
||||||
//Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
|
// Free the old block if it is not still in use by the checkpoint in
|
||||||
bool cannot_free = (bool)
|
// progress or the previous checkpoint
|
||||||
((!for_checkpoint && _translation_prevents_freeing(&_inprogress, b, &old_pair)) ||
|
bool cannot_free =
|
||||||
_translation_prevents_freeing(&_checkpointed, b, &old_pair));
|
(!for_checkpoint &&
|
||||||
|
_translation_prevents_freeing(&_inprogress, b, &old_pair)) ||
|
||||||
|
_translation_prevents_freeing(&_checkpointed, b, &old_pair);
|
||||||
if (!cannot_free && old_pair.u.diskoff != diskoff_unused) {
|
if (!cannot_free && old_pair.u.diskoff != diskoff_unused) {
|
||||||
_bt_block_allocator.free_block(old_pair.u.diskoff);
|
_bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t allocator_offset = diskoff_unused;
|
uint64_t allocator_offset = diskoff_unused;
|
||||||
@ -413,19 +463,22 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o
|
|||||||
if (size > 0) {
|
if (size > 0) {
|
||||||
// Allocate a new block if the size is greater than 0,
|
// Allocate a new block if the size is greater than 0,
|
||||||
// if the size is just 0, offset will be set to diskoff_unused
|
// if the size is just 0, offset will be set to diskoff_unused
|
||||||
_bt_block_allocator.alloc_block(size, heat, &allocator_offset);
|
_bt_block_allocator->AllocBlock(size, &allocator_offset);
|
||||||
}
|
}
|
||||||
t->block_translation[b.b].u.diskoff = allocator_offset;
|
t->block_translation[b.b].u.diskoff = allocator_offset;
|
||||||
*offset = allocator_offset;
|
*offset = allocator_offset;
|
||||||
|
|
||||||
//Update inprogress btt if appropriate (if called because Pending bit is set).
|
// Update inprogress btt if appropriate (if called because Pending bit is
|
||||||
|
// set).
|
||||||
if (for_checkpoint) {
|
if (for_checkpoint) {
|
||||||
paranoid_invariant(b.b < _inprogress.length_of_array);
|
paranoid_invariant(b.b < _inprogress.length_of_array);
|
||||||
_inprogress.block_translation[b.b] = t->block_translation[b.b];
|
_inprogress.block_translation[b.b] = t->block_translation[b.b];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) {
|
void block_table::_ensure_safe_write_unlocked(int fd,
|
||||||
|
DISKOFF block_size,
|
||||||
|
DISKOFF block_offset) {
|
||||||
// Requires: holding _mutex
|
// Requires: holding _mutex
|
||||||
uint64_t size_needed = block_size + block_offset;
|
uint64_t size_needed = block_size + block_offset;
|
||||||
if (size_needed > _safe_file_size) {
|
if (size_needed > _safe_file_size) {
|
||||||
@ -435,7 +488,8 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
|
|||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
|
|
||||||
int64_t size_after;
|
int64_t size_after;
|
||||||
toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after);
|
toku_maybe_preallocate_in_file(
|
||||||
|
fd, size_needed, _safe_file_size, &size_after);
|
||||||
|
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
_safe_file_size = size_after;
|
_safe_file_size = size_after;
|
||||||
@ -444,11 +498,16 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) {
|
void block_table::realloc_on_disk(BLOCKNUM b,
|
||||||
|
DISKOFF size,
|
||||||
|
DISKOFF *offset,
|
||||||
|
FT ft,
|
||||||
|
int fd,
|
||||||
|
bool for_checkpoint) {
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
struct translation *t = &_current;
|
struct translation *t = &_current;
|
||||||
_verify_valid_freeable_blocknum(t, b);
|
_verify_valid_freeable_blocknum(t, b);
|
||||||
_realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat);
|
_realloc_on_disk_internal(b, size, offset, ft, for_checkpoint);
|
||||||
|
|
||||||
_ensure_safe_write_unlocked(fd, size, *offset);
|
_ensure_safe_write_unlocked(fd, size, *offset);
|
||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
@ -458,9 +517,12 @@ bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) {
|
|||||||
return pair->size == 0 && pair->u.diskoff == diskoff_unused;
|
return pair->size == 0 && pair->u.diskoff == diskoff_unused;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Effect: figure out where to put the inprogress btt on disk, allocate space for it there.
|
// Effect: figure out where to put the inprogress btt on disk, allocate space
|
||||||
// The space must be 512-byte aligned (both the starting address and the size).
|
// for it there.
|
||||||
// As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt.
|
// The space must be 512-byte aligned (both the starting address and the
|
||||||
|
// size).
|
||||||
|
// As a result, the allcoated space may be a little bit bigger (up to the next
|
||||||
|
// 512-byte boundary) than the actual btt.
|
||||||
void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
|
void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
|
||||||
toku_mutex_assert_locked(&_mutex);
|
toku_mutex_assert_locked(&_mutex);
|
||||||
|
|
||||||
@ -473,32 +535,49 @@ void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
|
|||||||
// Allocate a new block
|
// Allocate a new block
|
||||||
int64_t size = _calculate_size_on_disk(t);
|
int64_t size = _calculate_size_on_disk(t);
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
_bt_block_allocator.alloc_block(size, 0, &offset);
|
_bt_block_allocator->AllocBlock(size, &offset);
|
||||||
t->block_translation[b.b].u.diskoff = offset;
|
t->block_translation[b.b].u.diskoff = offset;
|
||||||
t->block_translation[b.b].size = size;
|
t->block_translation[b.b].size = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Effect: Serializes the blocktable to a wbuf (which starts uninitialized)
|
// Effect: Serializes the blocktable to a wbuf (which starts uninitialized)
|
||||||
// A clean shutdown runs checkpoint start so that current and inprogress are copies.
|
// A clean shutdown runs checkpoint start so that current and inprogress are
|
||||||
// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd)
|
// copies.
|
||||||
// The address is guaranteed to be 512-byte aligned, but the size is not guaranteed.
|
// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the
|
||||||
// It *is* guaranteed that we can read up to the next 512-byte boundary, however
|
// total length is a multiple of 512 (so we pad with zeros at the end if
|
||||||
void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w,
|
// needd)
|
||||||
int64_t *address, int64_t *size) {
|
// The address is guaranteed to be 512-byte aligned, but the size is not
|
||||||
|
// guaranteed.
|
||||||
|
// It *is* guaranteed that we can read up to the next 512-byte boundary,
|
||||||
|
// however
|
||||||
|
void block_table::serialize_translation_to_wbuf(int fd,
|
||||||
|
struct wbuf *w,
|
||||||
|
int64_t *address,
|
||||||
|
int64_t *size) {
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
struct translation *t = &_inprogress;
|
struct translation *t = &_inprogress;
|
||||||
|
|
||||||
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
|
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
|
||||||
_alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy.
|
_alloc_inprogress_translation_on_disk_unlocked(); // The allocated block
|
||||||
|
// must be 512-byte
|
||||||
|
// aligned to make
|
||||||
|
// O_DIRECT happy.
|
||||||
uint64_t size_translation = _calculate_size_on_disk(t);
|
uint64_t size_translation = _calculate_size_on_disk(t);
|
||||||
uint64_t size_aligned = roundup_to_multiple(512, size_translation);
|
uint64_t size_aligned = roundup_to_multiple(512, size_translation);
|
||||||
assert((int64_t)size_translation==t->block_translation[b.b].size);
|
invariant((int64_t)size_translation == t->block_translation[b.b].size);
|
||||||
{
|
{
|
||||||
// Init wbuf
|
// Init wbuf
|
||||||
if (0)
|
if (0)
|
||||||
printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff);
|
printf(
|
||||||
|
"%s:%d writing translation table of size_translation %" PRIu64
|
||||||
|
" at %" PRId64 "\n",
|
||||||
|
__FILE__,
|
||||||
|
__LINE__,
|
||||||
|
size_translation,
|
||||||
|
t->block_translation[b.b].u.diskoff);
|
||||||
char *XMALLOC_N_ALIGNED(512, size_aligned, buf);
|
char *XMALLOC_N_ALIGNED(512, size_aligned, buf);
|
||||||
for (uint64_t i=size_translation; i<size_aligned; i++) buf[i]=0; // fill in the end of the buffer with zeros.
|
for (uint64_t i = size_translation; i < size_aligned; i++)
|
||||||
|
buf[i] = 0; // fill in the end of the buffer with zeros.
|
||||||
wbuf_init(w, buf, size_aligned);
|
wbuf_init(w, buf, size_aligned);
|
||||||
}
|
}
|
||||||
wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum);
|
wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum);
|
||||||
@ -506,7 +585,11 @@ void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w,
|
|||||||
int64_t i;
|
int64_t i;
|
||||||
for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
|
for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
|
||||||
if (0)
|
if (0)
|
||||||
printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size);
|
printf("%s:%d %" PRId64 ",%" PRId64 "\n",
|
||||||
|
__FILE__,
|
||||||
|
__LINE__,
|
||||||
|
t->block_translation[i].u.diskoff,
|
||||||
|
t->block_translation[i].size);
|
||||||
wbuf_DISKOFF(w, t->block_translation[i].u.diskoff);
|
wbuf_DISKOFF(w, t->block_translation[i].u.diskoff);
|
||||||
wbuf_DISKOFF(w, t->block_translation[i].size);
|
wbuf_DISKOFF(w, t->block_translation[i].size);
|
||||||
}
|
}
|
||||||
@ -514,14 +597,17 @@ void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w,
|
|||||||
wbuf_int(w, checksum);
|
wbuf_int(w, checksum);
|
||||||
*address = t->block_translation[b.b].u.diskoff;
|
*address = t->block_translation[b.b].u.diskoff;
|
||||||
*size = size_translation;
|
*size = size_translation;
|
||||||
assert((*address)%512 == 0);
|
invariant((*address) % 512 == 0);
|
||||||
|
|
||||||
_ensure_safe_write_unlocked(fd, size_aligned, *address);
|
_ensure_safe_write_unlocked(fd, size_aligned, *address);
|
||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
|
// Perhaps rename: purpose is get disk address of a block, given its blocknum
|
||||||
void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
|
// (blockid?)
|
||||||
|
void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
|
||||||
|
DISKOFF *offset,
|
||||||
|
DISKOFF *size) {
|
||||||
struct translation *t = &_current;
|
struct translation *t = &_current;
|
||||||
_verify_valid_blocknum(t, b);
|
_verify_valid_blocknum(t, b);
|
||||||
if (offset) {
|
if (offset) {
|
||||||
@ -532,8 +618,11 @@ void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOF
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
|
// Perhaps rename: purpose is get disk address of a block, given its blocknum
|
||||||
void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
|
// (blockid?)
|
||||||
|
void block_table::translate_blocknum_to_offset_size(BLOCKNUM b,
|
||||||
|
DISKOFF *offset,
|
||||||
|
DISKOFF *size) {
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
_translate_blocknum_to_offset_size_unlocked(b, offset, size);
|
_translate_blocknum_to_offset_size_unlocked(b, offset, size);
|
||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
@ -563,7 +652,8 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
|
|||||||
if (t->blocknum_freelist_head.b == freelist_null.b) {
|
if (t->blocknum_freelist_head.b == freelist_null.b) {
|
||||||
// no previously used blocknums are available
|
// no previously used blocknums are available
|
||||||
// use a never used blocknum
|
// use a never used blocknum
|
||||||
_maybe_expand_translation(t); //Ensure a never used blocknums is available
|
_maybe_expand_translation(
|
||||||
|
t); // Ensure a never used blocknums is available
|
||||||
result = t->smallest_never_used_blocknum;
|
result = t->smallest_never_used_blocknum;
|
||||||
t->smallest_never_used_blocknum.b++;
|
t->smallest_never_used_blocknum.b++;
|
||||||
} else { // reuse a previously used blocknum
|
} else { // reuse a previously used blocknum
|
||||||
@ -587,7 +677,8 @@ void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) {
|
|||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) {
|
void block_table::_free_blocknum_in_translation(struct translation *t,
|
||||||
|
BLOCKNUM b) {
|
||||||
_verify_valid_freeable_blocknum(t, b);
|
_verify_valid_freeable_blocknum(t, b);
|
||||||
paranoid_invariant(t->block_translation[b.b].size != size_is_free);
|
paranoid_invariant(t->block_translation[b.b].size != size_is_free);
|
||||||
|
|
||||||
@ -598,7 +689,9 @@ void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM
|
|||||||
|
|
||||||
// Effect: Free a blocknum.
|
// Effect: Free a blocknum.
|
||||||
// If the blocknum holds the only reference to a block on disk, free that block
|
// If the blocknum holds the only reference to a block on disk, free that block
|
||||||
void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) {
|
void block_table::_free_blocknum_unlocked(BLOCKNUM *bp,
|
||||||
|
FT ft,
|
||||||
|
bool for_checkpoint) {
|
||||||
toku_mutex_assert_locked(&_mutex);
|
toku_mutex_assert_locked(&_mutex);
|
||||||
BLOCKNUM b = *bp;
|
BLOCKNUM b = *bp;
|
||||||
bp->b = 0; // Remove caller's reference.
|
bp->b = 0; // Remove caller's reference.
|
||||||
@ -607,21 +700,22 @@ void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoi
|
|||||||
|
|
||||||
_free_blocknum_in_translation(&_current, b);
|
_free_blocknum_in_translation(&_current, b);
|
||||||
if (for_checkpoint) {
|
if (for_checkpoint) {
|
||||||
paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
|
paranoid_invariant(ft->checkpoint_header->type ==
|
||||||
|
FT_CHECKPOINT_INPROGRESS);
|
||||||
_free_blocknum_in_translation(&_inprogress, b);
|
_free_blocknum_in_translation(&_inprogress, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the size is 0, no disk block has ever been assigned to this blocknum.
|
// If the size is 0, no disk block has ever been assigned to this blocknum.
|
||||||
if (old_pair.size > 0) {
|
if (old_pair.size > 0) {
|
||||||
//Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
|
// Free the old block if it is not still in use by the checkpoint in
|
||||||
bool cannot_free = (bool)
|
// progress or the previous checkpoint
|
||||||
(_translation_prevents_freeing(&_inprogress, b, &old_pair) ||
|
bool cannot_free =
|
||||||
_translation_prevents_freeing(&_checkpointed, b, &old_pair));
|
_translation_prevents_freeing(&_inprogress, b, &old_pair) ||
|
||||||
|
_translation_prevents_freeing(&_checkpointed, b, &old_pair);
|
||||||
if (!cannot_free) {
|
if (!cannot_free) {
|
||||||
_bt_block_allocator.free_block(old_pair.u.diskoff);
|
_bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
paranoid_invariant(old_pair.size == 0);
|
paranoid_invariant(old_pair.size == 0);
|
||||||
paranoid_invariant(old_pair.u.diskoff == diskoff_unused);
|
paranoid_invariant(old_pair.u.diskoff == diskoff_unused);
|
||||||
}
|
}
|
||||||
@ -650,7 +744,8 @@ void block_table::free_unused_blocknums(BLOCKNUM root) {
|
|||||||
}
|
}
|
||||||
BLOCKNUM b = make_blocknum(i);
|
BLOCKNUM b = make_blocknum(i);
|
||||||
if (_current.block_translation[b.b].size == 0) {
|
if (_current.block_translation[b.b].size == 0) {
|
||||||
invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused);
|
invariant(_current.block_translation[b.b].u.diskoff ==
|
||||||
|
diskoff_unused);
|
||||||
_free_blocknum_in_translation(&_current, b);
|
_free_blocknum_in_translation(&_current, b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -681,7 +776,8 @@ bool block_table::_no_data_blocks_except_root(BLOCKNUM root) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Verify there are no data blocks except root.
|
// Verify there are no data blocks except root.
|
||||||
// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now.
|
// TODO(leif): This actually takes a lock, but I don't want to fix all the
|
||||||
|
// callers right now.
|
||||||
void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) {
|
void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) {
|
||||||
paranoid_invariant(_no_data_blocks_except_root(root));
|
paranoid_invariant(_no_data_blocks_except_root(root));
|
||||||
}
|
}
|
||||||
@ -705,13 +801,24 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
|
|||||||
if (t->block_translation) {
|
if (t->block_translation) {
|
||||||
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
|
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
|
||||||
fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array);
|
fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array);
|
||||||
fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b);
|
fprintf(f,
|
||||||
fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b);
|
" smallest_never_used_blocknum[%" PRId64 "]",
|
||||||
fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
|
t->smallest_never_used_blocknum.b);
|
||||||
fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff);
|
fprintf(f,
|
||||||
|
" blocknum_free_list_head[%" PRId64 "]",
|
||||||
|
t->blocknum_freelist_head.b);
|
||||||
|
fprintf(
|
||||||
|
f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
|
||||||
|
fprintf(f,
|
||||||
|
" location_on_disk[%" PRId64 "]\n",
|
||||||
|
t->block_translation[b.b].u.diskoff);
|
||||||
int64_t i;
|
int64_t i;
|
||||||
for (i = 0; i < t->length_of_array; i++) {
|
for (i = 0; i < t->length_of_array; i++) {
|
||||||
fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
|
fprintf(f,
|
||||||
|
" %" PRId64 ": %" PRId64 " %" PRId64 "\n",
|
||||||
|
i,
|
||||||
|
t->block_translation[i].u.diskoff,
|
||||||
|
t->block_translation[i].size);
|
||||||
}
|
}
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
} else {
|
} else {
|
||||||
@ -724,9 +831,13 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
|
|||||||
void block_table::dump_translation_table_pretty(FILE *f) {
|
void block_table::dump_translation_table_pretty(FILE *f) {
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
struct translation *t = &_checkpointed;
|
struct translation *t = &_checkpointed;
|
||||||
assert(t->block_translation != nullptr);
|
invariant(t->block_translation != nullptr);
|
||||||
for (int64_t i = 0; i < t->length_of_array; ++i) {
|
for (int64_t i = 0; i < t->length_of_array; ++i) {
|
||||||
fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
|
fprintf(f,
|
||||||
|
"%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n",
|
||||||
|
i,
|
||||||
|
t->block_translation[i].u.diskoff,
|
||||||
|
t->block_translation[i].size);
|
||||||
}
|
}
|
||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
}
|
}
|
||||||
@ -750,7 +861,10 @@ void block_table::blocknum_dump_translation(BLOCKNUM b) {
|
|||||||
struct translation *t = &_current;
|
struct translation *t = &_current;
|
||||||
if (b.b < t->length_of_array) {
|
if (b.b < t->length_of_array) {
|
||||||
struct block_translation_pair *bx = &t->block_translation[b.b];
|
struct block_translation_pair *bx = &t->block_translation[b.b];
|
||||||
printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size);
|
printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n",
|
||||||
|
b.b,
|
||||||
|
bx->u.diskoff,
|
||||||
|
bx->size);
|
||||||
}
|
}
|
||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
}
|
}
|
||||||
@ -763,18 +877,20 @@ void block_table::destroy(void) {
|
|||||||
toku_free(_inprogress.block_translation);
|
toku_free(_inprogress.block_translation);
|
||||||
toku_free(_checkpointed.block_translation);
|
toku_free(_checkpointed.block_translation);
|
||||||
|
|
||||||
_bt_block_allocator.destroy();
|
_bt_block_allocator->Destroy();
|
||||||
|
delete _bt_block_allocator;
|
||||||
toku_mutex_destroy(&_mutex);
|
toku_mutex_destroy(&_mutex);
|
||||||
nb_mutex_destroy(&_safe_file_size_lock);
|
nb_mutex_destroy(&_safe_file_size_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
int block_table::_translation_deserialize_from_buffer(struct translation *t,
|
int block_table::_translation_deserialize_from_buffer(
|
||||||
|
struct translation *t,
|
||||||
DISKOFF location_on_disk,
|
DISKOFF location_on_disk,
|
||||||
uint64_t size_on_disk,
|
uint64_t size_on_disk,
|
||||||
// out: buffer with serialized translation
|
// out: buffer with serialized translation
|
||||||
unsigned char *translation_buffer) {
|
unsigned char *translation_buffer) {
|
||||||
int r = 0;
|
int r = 0;
|
||||||
assert(location_on_disk != 0);
|
invariant(location_on_disk != 0);
|
||||||
t->type = TRANSLATION_CHECKPOINTED;
|
t->type = TRANSLATION_CHECKPOINTED;
|
||||||
|
|
||||||
// check the checksum
|
// check the checksum
|
||||||
@ -782,7 +898,10 @@ int block_table::_translation_deserialize_from_buffer(struct translation *t,
|
|||||||
uint64_t offset = size_on_disk - 4;
|
uint64_t offset = size_on_disk - 4;
|
||||||
uint32_t stored_x1764 = toku_dtoh32(*(int *)(translation_buffer + offset));
|
uint32_t stored_x1764 = toku_dtoh32(*(int *)(translation_buffer + offset));
|
||||||
if (x1764 != stored_x1764) {
|
if (x1764 != stored_x1764) {
|
||||||
fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764);
|
fprintf(stderr,
|
||||||
|
"Translation table checksum failure: calc=0x%08x read=0x%08x\n",
|
||||||
|
x1764,
|
||||||
|
stored_x1764);
|
||||||
r = TOKUDB_BAD_CHECKSUM;
|
r = TOKUDB_BAD_CHECKSUM;
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
@ -802,15 +921,20 @@ int block_table::_translation_deserialize_from_buffer(struct translation *t,
|
|||||||
t->block_translation[i].size = rbuf_DISKOFF(&rb);
|
t->block_translation[i].size = rbuf_DISKOFF(&rb);
|
||||||
}
|
}
|
||||||
invariant(_calculate_size_on_disk(t) == (int64_t)size_on_disk);
|
invariant(_calculate_size_on_disk(t) == (int64_t)size_on_disk);
|
||||||
invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk);
|
invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size ==
|
||||||
invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk);
|
(int64_t)size_on_disk);
|
||||||
|
invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff ==
|
||||||
|
location_on_disk);
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int block_table::iterate(enum translation_type type,
|
int block_table::iterate(enum translation_type type,
|
||||||
BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) {
|
BLOCKTABLE_CALLBACK f,
|
||||||
|
void *extra,
|
||||||
|
bool data_only,
|
||||||
|
bool used_only) {
|
||||||
struct translation *src;
|
struct translation *src;
|
||||||
|
|
||||||
int r = 0;
|
int r = 0;
|
||||||
@ -841,10 +965,13 @@ int block_table::iterate(enum translation_type type,
|
|||||||
int64_t i;
|
int64_t i;
|
||||||
for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
|
for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
|
||||||
struct block_translation_pair pair = t->block_translation[i];
|
struct block_translation_pair pair = t->block_translation[i];
|
||||||
if (data_only && i< RESERVED_BLOCKNUMS) continue;
|
if (data_only && i < RESERVED_BLOCKNUMS)
|
||||||
if (used_only && pair.size <= 0) continue;
|
continue;
|
||||||
|
if (used_only && pair.size <= 0)
|
||||||
|
continue;
|
||||||
r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra);
|
r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra);
|
||||||
if (r!=0) break;
|
if (r != 0)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
toku_free(t->block_translation);
|
toku_free(t->block_translation);
|
||||||
}
|
}
|
||||||
@ -856,7 +983,10 @@ typedef struct {
|
|||||||
int64_t total_space;
|
int64_t total_space;
|
||||||
} frag_extra;
|
} frag_extra;
|
||||||
|
|
||||||
static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) {
|
static int frag_helper(BLOCKNUM UU(b),
|
||||||
|
int64_t size,
|
||||||
|
int64_t address,
|
||||||
|
void *extra) {
|
||||||
frag_extra *info = (frag_extra *)extra;
|
frag_extra *info = (frag_extra *)extra;
|
||||||
|
|
||||||
if (size + address > info->total_space)
|
if (size + address > info->total_space)
|
||||||
@ -865,22 +995,30 @@ static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extr
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) {
|
void block_table::internal_fragmentation(int64_t *total_sizep,
|
||||||
|
int64_t *used_sizep) {
|
||||||
frag_extra info = {0, 0};
|
frag_extra info = {0, 0};
|
||||||
int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true);
|
int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true);
|
||||||
assert_zero(r);
|
invariant_zero(r);
|
||||||
|
|
||||||
if (total_sizep) *total_sizep = info.total_space;
|
if (total_sizep)
|
||||||
if (used_sizep) *used_sizep = info.used_space;
|
*total_sizep = info.total_space;
|
||||||
|
if (used_sizep)
|
||||||
|
*used_sizep = info.used_space;
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) {
|
void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size,
|
||||||
|
DISKOFF *offset,
|
||||||
|
FT ft) {
|
||||||
toku_mutex_assert_locked(&_mutex);
|
toku_mutex_assert_locked(&_mutex);
|
||||||
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
|
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
|
||||||
_realloc_on_disk_internal(b, size, offset, ft, false, 0);
|
_realloc_on_disk_internal(b, size, offset, ft, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) {
|
void block_table::realloc_descriptor_on_disk(DISKOFF size,
|
||||||
|
DISKOFF *offset,
|
||||||
|
FT ft,
|
||||||
|
int fd) {
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
_realloc_descriptor_on_disk_unlocked(size, offset, ft);
|
_realloc_descriptor_on_disk_unlocked(size, offset, ft);
|
||||||
_ensure_safe_write_unlocked(fd, size, *offset);
|
_ensure_safe_write_unlocked(fd, size, *offset);
|
||||||
@ -899,9 +1037,10 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
|
|||||||
// Requires: report->file_size_bytes is already filled in.
|
// Requires: report->file_size_bytes is already filled in.
|
||||||
|
|
||||||
// Count the headers.
|
// Count the headers.
|
||||||
report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
report->data_bytes = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
||||||
report->data_blocks = 1;
|
report->data_blocks = 1;
|
||||||
report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
report->checkpoint_bytes_additional =
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
||||||
report->checkpoint_blocks_additional = 1;
|
report->checkpoint_blocks_additional = 1;
|
||||||
|
|
||||||
struct translation *current = &_current;
|
struct translation *current = &_current;
|
||||||
@ -915,8 +1054,10 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
|
|||||||
|
|
||||||
struct translation *checkpointed = &_checkpointed;
|
struct translation *checkpointed = &_checkpointed;
|
||||||
for (int64_t i = 0; i < checkpointed->length_of_array; i++) {
|
for (int64_t i = 0; i < checkpointed->length_of_array; i++) {
|
||||||
struct block_translation_pair *pair = &checkpointed->block_translation[i];
|
struct block_translation_pair *pair =
|
||||||
if (pair->size > 0 && !(i < current->length_of_array &&
|
&checkpointed->block_translation[i];
|
||||||
|
if (pair->size > 0 &&
|
||||||
|
!(i < current->length_of_array &&
|
||||||
current->block_translation[i].size > 0 &&
|
current->block_translation[i].size > 0 &&
|
||||||
current->block_translation[i].u.diskoff == pair->u.diskoff)) {
|
current->block_translation[i].u.diskoff == pair->u.diskoff)) {
|
||||||
report->checkpoint_bytes_additional += pair->size;
|
report->checkpoint_bytes_additional += pair->size;
|
||||||
@ -927,18 +1068,20 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
|
|||||||
struct translation *inprogress = &_inprogress;
|
struct translation *inprogress = &_inprogress;
|
||||||
for (int64_t i = 0; i < inprogress->length_of_array; i++) {
|
for (int64_t i = 0; i < inprogress->length_of_array; i++) {
|
||||||
struct block_translation_pair *pair = &inprogress->block_translation[i];
|
struct block_translation_pair *pair = &inprogress->block_translation[i];
|
||||||
if (pair->size > 0 && !(i < current->length_of_array &&
|
if (pair->size > 0 &&
|
||||||
|
!(i < current->length_of_array &&
|
||||||
current->block_translation[i].size > 0 &&
|
current->block_translation[i].size > 0 &&
|
||||||
current->block_translation[i].u.diskoff == pair->u.diskoff) &&
|
current->block_translation[i].u.diskoff == pair->u.diskoff) &&
|
||||||
!(i < checkpointed->length_of_array &&
|
!(i < checkpointed->length_of_array &&
|
||||||
checkpointed->block_translation[i].size > 0 &&
|
checkpointed->block_translation[i].size > 0 &&
|
||||||
checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) {
|
checkpointed->block_translation[i].u.diskoff ==
|
||||||
|
pair->u.diskoff)) {
|
||||||
report->checkpoint_bytes_additional += pair->size;
|
report->checkpoint_bytes_additional += pair->size;
|
||||||
report->checkpoint_blocks_additional++;
|
report->checkpoint_blocks_additional++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_bt_block_allocator.get_unused_statistics(report);
|
_bt_block_allocator->UnusedStatistics(report);
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_table::get_info64(struct ftinfo64 *s) {
|
void block_table::get_info64(struct ftinfo64 *s) {
|
||||||
@ -967,7 +1110,8 @@ void block_table::get_info64(struct ftinfo64 *s) {
|
|||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
int block_table::iterate_translation_tables(uint64_t checkpoint_count,
|
int block_table::iterate_translation_tables(
|
||||||
|
uint64_t checkpoint_count,
|
||||||
int (*iter)(uint64_t checkpoint_count,
|
int (*iter)(uint64_t checkpoint_count,
|
||||||
int64_t total_num_rows,
|
int64_t total_num_rows,
|
||||||
int64_t blocknum,
|
int64_t blocknum,
|
||||||
@ -978,14 +1122,26 @@ int block_table::iterate_translation_tables(uint64_t checkpoint_count,
|
|||||||
int error = 0;
|
int error = 0;
|
||||||
_mutex_lock();
|
_mutex_lock();
|
||||||
|
|
||||||
int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array;
|
int64_t total_num_rows =
|
||||||
|
_current.length_of_array + _checkpointed.length_of_array;
|
||||||
for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) {
|
for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) {
|
||||||
struct block_translation_pair *block = &_current.block_translation[i];
|
struct block_translation_pair *block = &_current.block_translation[i];
|
||||||
error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
|
error = iter(checkpoint_count,
|
||||||
|
total_num_rows,
|
||||||
|
i,
|
||||||
|
block->u.diskoff,
|
||||||
|
block->size,
|
||||||
|
iter_extra);
|
||||||
}
|
}
|
||||||
for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) {
|
for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) {
|
||||||
struct block_translation_pair *block = &_checkpointed.block_translation[i];
|
struct block_translation_pair *block =
|
||||||
error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
|
&_checkpointed.block_translation[i];
|
||||||
|
error = iter(checkpoint_count - 1,
|
||||||
|
total_num_rows,
|
||||||
|
i,
|
||||||
|
block->u.diskoff,
|
||||||
|
block->size,
|
||||||
|
iter_extra);
|
||||||
}
|
}
|
||||||
|
|
||||||
_mutex_unlock();
|
_mutex_unlock();
|
||||||
|
@ -62,7 +62,10 @@ enum {
|
|||||||
RESERVED_BLOCKNUMS
|
RESERVED_BLOCKNUMS
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
|
typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b,
|
||||||
|
int64_t size,
|
||||||
|
int64_t address,
|
||||||
|
void *extra);
|
||||||
|
|
||||||
static inline BLOCKNUM make_blocknum(int64_t b) {
|
static inline BLOCKNUM make_blocknum(int64_t b) {
|
||||||
BLOCKNUM result = {.b = b};
|
BLOCKNUM result = {.b = b};
|
||||||
@ -80,13 +83,15 @@ static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
|
|||||||
*
|
*
|
||||||
* inprogress Is only filled by copying from current,
|
* inprogress Is only filled by copying from current,
|
||||||
* and is the only version ever serialized to disk.
|
* and is the only version ever serialized to disk.
|
||||||
* (It is serialized to disk on checkpoint and clean shutdown.)
|
* (It is serialized to disk on checkpoint and clean
|
||||||
|
*shutdown.)
|
||||||
* At end of checkpoint it replaces 'checkpointed'.
|
* At end of checkpoint it replaces 'checkpointed'.
|
||||||
* During a checkpoint, any 'pending' dirty writes will update
|
* During a checkpoint, any 'pending' dirty writes will update
|
||||||
* inprogress.
|
* inprogress.
|
||||||
*
|
*
|
||||||
* current Is initialized by copying from checkpointed,
|
* current Is initialized by copying from checkpointed,
|
||||||
* is the only version ever modified while the database is in use,
|
* is the only version ever modified while the database is in
|
||||||
|
*use,
|
||||||
* and is the only version ever copied to inprogress.
|
* and is the only version ever copied to inprogress.
|
||||||
* It is never stored on disk.
|
* It is never stored on disk.
|
||||||
*/
|
*/
|
||||||
@ -102,7 +107,10 @@ public:
|
|||||||
|
|
||||||
void create();
|
void create();
|
||||||
|
|
||||||
int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
|
int create_from_buffer(int fd,
|
||||||
|
DISKOFF location_on_disk,
|
||||||
|
DISKOFF size_on_disk,
|
||||||
|
unsigned char *translation_buffer);
|
||||||
|
|
||||||
void destroy();
|
void destroy();
|
||||||
|
|
||||||
@ -114,11 +122,21 @@ public:
|
|||||||
|
|
||||||
// Blocknums
|
// Blocknums
|
||||||
void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
|
void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
|
||||||
void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat);
|
void realloc_on_disk(BLOCKNUM b,
|
||||||
|
DISKOFF size,
|
||||||
|
DISKOFF *offset,
|
||||||
|
struct ft *ft,
|
||||||
|
int fd,
|
||||||
|
bool for_checkpoint);
|
||||||
void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
|
void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
|
||||||
void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
|
void translate_blocknum_to_offset_size(BLOCKNUM b,
|
||||||
|
DISKOFF *offset,
|
||||||
|
DISKOFF *size);
|
||||||
void free_unused_blocknums(BLOCKNUM root);
|
void free_unused_blocknums(BLOCKNUM root);
|
||||||
void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
|
void realloc_descriptor_on_disk(DISKOFF size,
|
||||||
|
DISKOFF *offset,
|
||||||
|
struct ft *ft,
|
||||||
|
int fd);
|
||||||
void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size);
|
void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size);
|
||||||
|
|
||||||
// External verfication
|
// External verfication
|
||||||
@ -127,15 +145,22 @@ public:
|
|||||||
void verify_no_free_blocknums();
|
void verify_no_free_blocknums();
|
||||||
|
|
||||||
// Serialization
|
// Serialization
|
||||||
void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size);
|
void serialize_translation_to_wbuf(int fd,
|
||||||
|
struct wbuf *w,
|
||||||
|
int64_t *address,
|
||||||
|
int64_t *size);
|
||||||
|
|
||||||
// DEBUG ONLY (ftdump included), tests included
|
// DEBUG ONLY (ftdump included), tests included
|
||||||
void blocknum_dump_translation(BLOCKNUM b);
|
void blocknum_dump_translation(BLOCKNUM b);
|
||||||
void dump_translation_table_pretty(FILE *f);
|
void dump_translation_table_pretty(FILE *f);
|
||||||
void dump_translation_table(FILE *f);
|
void dump_translation_table(FILE *f);
|
||||||
void block_free(uint64_t offset);
|
void block_free(uint64_t offset, uint64_t size);
|
||||||
|
|
||||||
int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only);
|
int iterate(enum translation_type type,
|
||||||
|
BLOCKTABLE_CALLBACK f,
|
||||||
|
void *extra,
|
||||||
|
bool data_only,
|
||||||
|
bool used_only);
|
||||||
void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep);
|
void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep);
|
||||||
|
|
||||||
// Requires: blocktable lock is held.
|
// Requires: blocktable lock is held.
|
||||||
@ -146,7 +171,10 @@ public:
|
|||||||
|
|
||||||
void get_info64(struct ftinfo64 *);
|
void get_info64(struct ftinfo64 *);
|
||||||
|
|
||||||
int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *);
|
int iterate_translation_tables(
|
||||||
|
uint64_t,
|
||||||
|
int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *),
|
||||||
|
void *);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct block_translation_pair {
|
struct block_translation_pair {
|
||||||
@ -173,7 +201,8 @@ private:
|
|||||||
struct translation {
|
struct translation {
|
||||||
enum translation_type type;
|
enum translation_type type;
|
||||||
|
|
||||||
// Number of elements in array (block_translation). always >= smallest_never_used_blocknum
|
// Number of elements in array (block_translation). always >=
|
||||||
|
// smallest_never_used_blocknum
|
||||||
int64_t length_of_array;
|
int64_t length_of_array;
|
||||||
BLOCKNUM smallest_never_used_blocknum;
|
BLOCKNUM smallest_never_used_blocknum;
|
||||||
|
|
||||||
@ -181,20 +210,28 @@ private:
|
|||||||
BLOCKNUM blocknum_freelist_head;
|
BLOCKNUM blocknum_freelist_head;
|
||||||
struct block_translation_pair *block_translation;
|
struct block_translation_pair *block_translation;
|
||||||
|
|
||||||
// size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
|
// size_on_disk is stored in
|
||||||
// location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
|
// block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
|
||||||
|
// location_on is stored in
|
||||||
|
// block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
|
||||||
};
|
};
|
||||||
|
|
||||||
void _create_internal();
|
void _create_internal();
|
||||||
int _translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize
|
int _translation_deserialize_from_buffer(
|
||||||
|
struct translation *t, // destination into which to deserialize
|
||||||
DISKOFF location_on_disk, // location of translation_buffer
|
DISKOFF location_on_disk, // location of translation_buffer
|
||||||
uint64_t size_on_disk,
|
uint64_t size_on_disk,
|
||||||
unsigned char * translation_buffer); // buffer with serialized translation
|
unsigned char *
|
||||||
|
translation_buffer); // buffer with serialized translation
|
||||||
|
|
||||||
void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype);
|
void _copy_translation(struct translation *dst,
|
||||||
|
struct translation *src,
|
||||||
|
enum translation_type newtype);
|
||||||
void _maybe_optimize_translation(struct translation *t);
|
void _maybe_optimize_translation(struct translation *t);
|
||||||
void _maybe_expand_translation(struct translation *t);
|
void _maybe_expand_translation(struct translation *t);
|
||||||
bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair);
|
bool _translation_prevents_freeing(struct translation *t,
|
||||||
|
BLOCKNUM b,
|
||||||
|
struct block_translation_pair *old_pair);
|
||||||
void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b);
|
void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b);
|
||||||
int64_t _calculate_size_on_disk(struct translation *t);
|
int64_t _calculate_size_on_disk(struct translation *t);
|
||||||
bool _pair_is_unallocated(struct block_translation_pair *pair);
|
bool _pair_is_unallocated(struct block_translation_pair *pair);
|
||||||
@ -203,14 +240,26 @@ private:
|
|||||||
|
|
||||||
// Blocknum management
|
// Blocknum management
|
||||||
void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
|
void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
|
||||||
void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
|
void _free_blocknum_unlocked(BLOCKNUM *bp,
|
||||||
void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
|
struct ft *ft,
|
||||||
void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat);
|
bool for_checkpoint);
|
||||||
void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
|
void _realloc_descriptor_on_disk_unlocked(DISKOFF size,
|
||||||
|
DISKOFF *offset,
|
||||||
|
struct ft *ft);
|
||||||
|
void _realloc_on_disk_internal(BLOCKNUM b,
|
||||||
|
DISKOFF size,
|
||||||
|
DISKOFF *offset,
|
||||||
|
struct ft *ft,
|
||||||
|
bool for_checkpoint);
|
||||||
|
void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
|
||||||
|
DISKOFF *offset,
|
||||||
|
DISKOFF *size);
|
||||||
|
|
||||||
// File management
|
// File management
|
||||||
void _maybe_truncate_file(int fd, uint64_t size_needed_before);
|
void _maybe_truncate_file(int fd, uint64_t size_needed_before);
|
||||||
void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset);
|
void _ensure_safe_write_unlocked(int fd,
|
||||||
|
DISKOFF block_size,
|
||||||
|
DISKOFF block_offset);
|
||||||
|
|
||||||
// Verification
|
// Verification
|
||||||
bool _is_valid_blocknum(struct translation *t, BLOCKNUM b);
|
bool _is_valid_blocknum(struct translation *t, BLOCKNUM b);
|
||||||
@ -231,18 +280,22 @@ private:
|
|||||||
struct translation _current;
|
struct translation _current;
|
||||||
|
|
||||||
// The translation used by the checkpoint currently in progress.
|
// The translation used by the checkpoint currently in progress.
|
||||||
// If the checkpoint thread allocates a block, it must also update the current translation.
|
// If the checkpoint thread allocates a block, it must also update the
|
||||||
|
// current translation.
|
||||||
struct translation _inprogress;
|
struct translation _inprogress;
|
||||||
|
|
||||||
// The translation for the data that shall remain inviolate on disk until the next checkpoint finishes,
|
// The translation for the data that shall remain inviolate on disk until
|
||||||
|
// the next checkpoint finishes,
|
||||||
// after which any blocks used only in this translation can be freed.
|
// after which any blocks used only in this translation can be freed.
|
||||||
struct translation _checkpointed;
|
struct translation _checkpointed;
|
||||||
|
|
||||||
// The in-memory data structure for block allocation.
|
// The in-memory data structure for block allocation.
|
||||||
// There is no on-disk data structure for block allocation.
|
// There is no on-disk data structure for block allocation.
|
||||||
// Note: This is *allocation* not *translation* - the block allocator is unaware of which
|
// Note: This is *allocation* not *translation* - the block allocator is
|
||||||
// blocks are used for which translation, but simply allocates and deallocates blocks.
|
// unaware of which
|
||||||
block_allocator _bt_block_allocator;
|
// blocks are used for which translation, but simply allocates and
|
||||||
|
// deallocates blocks.
|
||||||
|
BlockAllocator *_bt_block_allocator;
|
||||||
toku_mutex_t _mutex;
|
toku_mutex_t _mutex;
|
||||||
struct nb_mutex _safe_file_size_lock;
|
struct nb_mutex _safe_file_size_lock;
|
||||||
bool _checkpoint_skipped;
|
bool _checkpoint_skipped;
|
||||||
@ -280,6 +333,8 @@ static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) {
|
static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb,
|
||||||
|
memarena *UU(ma),
|
||||||
|
BLOCKNUM *blocknum) {
|
||||||
*blocknum = rbuf_blocknum(rb);
|
*blocknum = rbuf_blocknum(rb);
|
||||||
}
|
}
|
||||||
|
@ -235,7 +235,7 @@ void toku_decompress (Bytef *dest, uLongf destLen,
|
|||||||
strm.zalloc = Z_NULL;
|
strm.zalloc = Z_NULL;
|
||||||
strm.zfree = Z_NULL;
|
strm.zfree = Z_NULL;
|
||||||
strm.opaque = Z_NULL;
|
strm.opaque = Z_NULL;
|
||||||
char windowBits = source[1];
|
int8_t windowBits = source[1];
|
||||||
int r = inflateInit2(&strm, windowBits);
|
int r = inflateInit2(&strm, windowBits);
|
||||||
lazy_assert(r == Z_OK);
|
lazy_assert(r == Z_OK);
|
||||||
strm.next_out = dest;
|
strm.next_out = dest;
|
||||||
|
@ -217,8 +217,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
|
|||||||
// translation table itself won't fit in main memory.
|
// translation table itself won't fit in main memory.
|
||||||
ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read,
|
ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read,
|
||||||
translation_address_on_disk);
|
translation_address_on_disk);
|
||||||
assert(readsz >= translation_size_on_disk);
|
invariant(readsz >= translation_size_on_disk);
|
||||||
assert(readsz <= (ssize_t)size_to_read);
|
invariant(readsz <= (ssize_t)size_to_read);
|
||||||
}
|
}
|
||||||
// Create table and read in data.
|
// Create table and read in data.
|
||||||
r = ft->blocktable.create_from_buffer(fd,
|
r = ft->blocktable.create_from_buffer(fd,
|
||||||
@ -411,8 +411,7 @@ exit:
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t
|
static size_t serialize_ft_min_size(uint32_t version) {
|
||||||
serialize_ft_min_size (uint32_t version) {
|
|
||||||
size_t size = 0;
|
size_t size = 0;
|
||||||
|
|
||||||
switch (version) {
|
switch (version) {
|
||||||
@ -445,39 +444,57 @@ serialize_ft_min_size (uint32_t version) {
|
|||||||
case FT_LAYOUT_VERSION_16:
|
case FT_LAYOUT_VERSION_16:
|
||||||
case FT_LAYOUT_VERSION_15:
|
case FT_LAYOUT_VERSION_15:
|
||||||
size += 4; // basement node size
|
size += 4; // basement node size
|
||||||
size += 8; // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14
|
size += 8; // num_blocks_to_upgrade_14 (previously
|
||||||
|
// num_blocks_to_upgrade, now one int each for upgrade
|
||||||
|
// from 13, 14
|
||||||
size += 8; // time of last verification
|
size += 8; // time of last verification
|
||||||
case FT_LAYOUT_VERSION_14:
|
case FT_LAYOUT_VERSION_14:
|
||||||
size += 8; // TXNID that created
|
size += 8; // TXNID that created
|
||||||
case FT_LAYOUT_VERSION_13:
|
case FT_LAYOUT_VERSION_13:
|
||||||
size += (4 // build_id
|
size += (4 // build_id
|
||||||
+4 // build_id_original
|
+
|
||||||
+8 // time_of_creation
|
4 // build_id_original
|
||||||
+8 // time_of_last_modification
|
+
|
||||||
|
8 // time_of_creation
|
||||||
|
+
|
||||||
|
8 // time_of_last_modification
|
||||||
);
|
);
|
||||||
// fall through
|
// fall through
|
||||||
case FT_LAYOUT_VERSION_12:
|
case FT_LAYOUT_VERSION_12:
|
||||||
size += (+8 // "tokudata"
|
size += (+8 // "tokudata"
|
||||||
+4 // version
|
+
|
||||||
+4 // original_version
|
4 // version
|
||||||
+4 // size
|
+
|
||||||
+8 // byte order verification
|
4 // original_version
|
||||||
+8 // checkpoint_count
|
+
|
||||||
+8 // checkpoint_lsn
|
4 // size
|
||||||
+4 // tree's nodesize
|
+
|
||||||
+8 // translation_size_on_disk
|
8 // byte order verification
|
||||||
+8 // translation_address_on_disk
|
+
|
||||||
+4 // checksum
|
8 // checkpoint_count
|
||||||
+8 // Number of blocks in old version.
|
+
|
||||||
+8 // diskoff
|
8 // checkpoint_lsn
|
||||||
+4 // flags
|
+
|
||||||
|
4 // tree's nodesize
|
||||||
|
+
|
||||||
|
8 // translation_size_on_disk
|
||||||
|
+
|
||||||
|
8 // translation_address_on_disk
|
||||||
|
+
|
||||||
|
4 // checksum
|
||||||
|
+
|
||||||
|
8 // Number of blocks in old version.
|
||||||
|
+
|
||||||
|
8 // diskoff
|
||||||
|
+
|
||||||
|
4 // flags
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -554,7 +571,8 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
|
|||||||
// If too big, it is corrupt. We would probably notice during checksum
|
// If too big, it is corrupt. We would probably notice during checksum
|
||||||
// but may have to do a multi-gigabyte malloc+read to find out.
|
// but may have to do a multi-gigabyte malloc+read to find out.
|
||||||
// If its too small reading rbuf would crash, so verify.
|
// If its too small reading rbuf would crash, so verify.
|
||||||
if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) {
|
if (size > BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE ||
|
||||||
|
size < min_header_size) {
|
||||||
r = TOKUDB_DICTIONARY_NO_HEADER;
|
r = TOKUDB_DICTIONARY_NO_HEADER;
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
@ -566,13 +584,14 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
|
|||||||
uint32_t size_to_read = roundup_to_multiple(512, size);
|
uint32_t size_to_read = roundup_to_multiple(512, size);
|
||||||
XMALLOC_N_ALIGNED(512, size_to_read, rb->buf);
|
XMALLOC_N_ALIGNED(512, size_to_read, rb->buf);
|
||||||
|
|
||||||
assert(offset_of_header%512==0);
|
invariant(offset_of_header % 512 == 0);
|
||||||
n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header);
|
n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header);
|
||||||
if (n != size_to_read) {
|
if (n != size_to_read) {
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
r = get_error_errno();
|
r = get_error_errno();
|
||||||
} else {
|
} else {
|
||||||
r = EINVAL; //Header might be useless (wrong size) or could be a disk read error.
|
r = EINVAL; // Header might be useless (wrong size) or could be
|
||||||
|
// a disk read error.
|
||||||
}
|
}
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
@ -581,21 +600,26 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
|
|||||||
// We have an rbuf that represents the header.
|
// We have an rbuf that represents the header.
|
||||||
// Size is within acceptable bounds.
|
// Size is within acceptable bounds.
|
||||||
|
|
||||||
//Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed)
|
// Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function
|
||||||
|
// changed)
|
||||||
uint32_t calculated_x1764;
|
uint32_t calculated_x1764;
|
||||||
calculated_x1764 = toku_x1764_memory(rb->buf, rb->size - 4);
|
calculated_x1764 = toku_x1764_memory(rb->buf, rb->size - 4);
|
||||||
uint32_t stored_x1764;
|
uint32_t stored_x1764;
|
||||||
stored_x1764 = toku_dtoh32(*(int *)(rb->buf + rb->size - 4));
|
stored_x1764 = toku_dtoh32(*(int *)(rb->buf + rb->size - 4));
|
||||||
if (calculated_x1764 != stored_x1764) {
|
if (calculated_x1764 != stored_x1764) {
|
||||||
r = TOKUDB_BAD_CHECKSUM; // Header useless
|
r = TOKUDB_BAD_CHECKSUM; // Header useless
|
||||||
fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764);
|
fprintf(stderr,
|
||||||
|
"Header checksum failure: calc=0x%08x read=0x%08x\n",
|
||||||
|
calculated_x1764,
|
||||||
|
stored_x1764);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify byte order
|
// Verify byte order
|
||||||
const void *tmp_byte_order_check;
|
const void *tmp_byte_order_check;
|
||||||
lazy_assert((sizeof toku_byte_order_host) == 8);
|
lazy_assert((sizeof toku_byte_order_host) == 8);
|
||||||
rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order
|
rbuf_literal_bytes(
|
||||||
|
rb, &tmp_byte_order_check, 8); // Must not translate byte order
|
||||||
int64_t byte_order_stored;
|
int64_t byte_order_stored;
|
||||||
byte_order_stored = *(int64_t *)tmp_byte_order_check;
|
byte_order_stored = *(int64_t *)tmp_byte_order_check;
|
||||||
if (byte_order_stored != toku_byte_order_host) {
|
if (byte_order_stored != toku_byte_order_host) {
|
||||||
@ -620,11 +644,7 @@ exit:
|
|||||||
// Read ft from file into struct. Read both headers and use one.
|
// Read ft from file into struct. Read both headers and use one.
|
||||||
// We want the latest acceptable header whose checkpoint_lsn is no later
|
// We want the latest acceptable header whose checkpoint_lsn is no later
|
||||||
// than max_acceptable_lsn.
|
// than max_acceptable_lsn.
|
||||||
int
|
int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft) {
|
||||||
toku_deserialize_ft_from(int fd,
|
|
||||||
LSN max_acceptable_lsn,
|
|
||||||
FT *ft)
|
|
||||||
{
|
|
||||||
struct rbuf rb_0;
|
struct rbuf rb_0;
|
||||||
struct rbuf rb_1;
|
struct rbuf rb_1;
|
||||||
uint64_t checkpoint_count_0 = 0;
|
uint64_t checkpoint_count_0 = 0;
|
||||||
@ -638,13 +658,23 @@ toku_deserialize_ft_from(int fd,
|
|||||||
int r0, r1, r;
|
int r0, r1, r;
|
||||||
|
|
||||||
toku_off_t header_0_off = 0;
|
toku_off_t header_0_off = 0;
|
||||||
r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0);
|
r0 = deserialize_ft_from_fd_into_rbuf(fd,
|
||||||
|
header_0_off,
|
||||||
|
&rb_0,
|
||||||
|
&checkpoint_count_0,
|
||||||
|
&checkpoint_lsn_0,
|
||||||
|
&version_0);
|
||||||
if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) {
|
if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) {
|
||||||
h0_acceptable = true;
|
h0_acceptable = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
||||||
r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1);
|
r1 = deserialize_ft_from_fd_into_rbuf(fd,
|
||||||
|
header_1_off,
|
||||||
|
&rb_1,
|
||||||
|
&checkpoint_count_1,
|
||||||
|
&checkpoint_lsn_1,
|
||||||
|
&version_1);
|
||||||
if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) {
|
if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) {
|
||||||
h1_acceptable = true;
|
h1_acceptable = true;
|
||||||
}
|
}
|
||||||
@ -655,22 +685,27 @@ toku_deserialize_ft_from(int fd,
|
|||||||
// We were unable to read either header or at least one is too
|
// We were unable to read either header or at least one is too
|
||||||
// new. Certain errors are higher priority than others. Order of
|
// new. Certain errors are higher priority than others. Order of
|
||||||
// these if/else if is important.
|
// these if/else if is important.
|
||||||
if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
|
if (r0 == TOKUDB_DICTIONARY_TOO_NEW ||
|
||||||
|
r1 == TOKUDB_DICTIONARY_TOO_NEW) {
|
||||||
r = TOKUDB_DICTIONARY_TOO_NEW;
|
r = TOKUDB_DICTIONARY_TOO_NEW;
|
||||||
} else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) {
|
} else if (r0 == TOKUDB_DICTIONARY_TOO_OLD ||
|
||||||
|
r1 == TOKUDB_DICTIONARY_TOO_OLD) {
|
||||||
r = TOKUDB_DICTIONARY_TOO_OLD;
|
r = TOKUDB_DICTIONARY_TOO_OLD;
|
||||||
} else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) {
|
} else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) {
|
||||||
fprintf(stderr, "Both header checksums failed.\n");
|
fprintf(stderr, "Both header checksums failed.\n");
|
||||||
r = TOKUDB_BAD_CHECKSUM;
|
r = TOKUDB_BAD_CHECKSUM;
|
||||||
} else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) {
|
} else if (r0 == TOKUDB_DICTIONARY_NO_HEADER ||
|
||||||
|
r1 == TOKUDB_DICTIONARY_NO_HEADER) {
|
||||||
r = TOKUDB_DICTIONARY_NO_HEADER;
|
r = TOKUDB_DICTIONARY_NO_HEADER;
|
||||||
} else {
|
} else {
|
||||||
r = r0 ? r0 : r1; // Arbitrarily report the error from the
|
r = r0 ? r0 : r1; // Arbitrarily report the error from the
|
||||||
// first header, unless it's readable
|
// first header, unless it's readable
|
||||||
}
|
}
|
||||||
|
|
||||||
// it should not be possible for both headers to be later than the max_acceptable_lsn
|
// it should not be possible for both headers to be later than the
|
||||||
invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
|
// max_acceptable_lsn
|
||||||
|
invariant(
|
||||||
|
!((r0 == 0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
|
||||||
(r1 == 0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
|
(r1 == 0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
|
||||||
invariant(r != 0);
|
invariant(r != 0);
|
||||||
goto exit;
|
goto exit;
|
||||||
@ -682,8 +717,7 @@ toku_deserialize_ft_from(int fd,
|
|||||||
invariant(version_0 >= version_1);
|
invariant(version_0 >= version_1);
|
||||||
rb = &rb_0;
|
rb = &rb_0;
|
||||||
version = version_0;
|
version = version_0;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
invariant(checkpoint_count_1 == checkpoint_count_0 + 1);
|
invariant(checkpoint_count_1 == checkpoint_count_0 + 1);
|
||||||
invariant(version_1 >= version_0);
|
invariant(version_1 >= version_0);
|
||||||
rb = &rb_1;
|
rb = &rb_1;
|
||||||
@ -692,14 +726,18 @@ toku_deserialize_ft_from(int fd,
|
|||||||
} else if (h0_acceptable) {
|
} else if (h0_acceptable) {
|
||||||
if (r1 == TOKUDB_BAD_CHECKSUM) {
|
if (r1 == TOKUDB_BAD_CHECKSUM) {
|
||||||
// print something reassuring
|
// print something reassuring
|
||||||
fprintf(stderr, "Header 2 checksum failed, but header 1 ok. Proceeding.\n");
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"Header 2 checksum failed, but header 1 ok. Proceeding.\n");
|
||||||
}
|
}
|
||||||
rb = &rb_0;
|
rb = &rb_0;
|
||||||
version = version_0;
|
version = version_0;
|
||||||
} else if (h1_acceptable) {
|
} else if (h1_acceptable) {
|
||||||
if (r0 == TOKUDB_BAD_CHECKSUM) {
|
if (r0 == TOKUDB_BAD_CHECKSUM) {
|
||||||
// print something reassuring
|
// print something reassuring
|
||||||
fprintf(stderr, "Header 1 checksum failed, but header 2 ok. Proceeding.\n");
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"Header 1 checksum failed, but header 2 ok. Proceeding.\n");
|
||||||
}
|
}
|
||||||
rb = &rb_1;
|
rb = &rb_1;
|
||||||
version = version_1;
|
version = version_1;
|
||||||
@ -718,15 +756,13 @@ exit:
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t toku_serialize_ft_size(FT_HEADER h) {
|
size_t toku_serialize_ft_size(FT_HEADER h) {
|
||||||
size_t size = serialize_ft_min_size(h->layout_version);
|
size_t size = serialize_ft_min_size(h->layout_version);
|
||||||
// There is no dynamic data.
|
// There is no dynamic data.
|
||||||
lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void toku_serialize_ft_to_wbuf (
|
void toku_serialize_ft_to_wbuf (
|
||||||
struct wbuf *wbuf,
|
struct wbuf *wbuf,
|
||||||
FT_HEADER h,
|
FT_HEADER h,
|
||||||
@ -777,28 +813,35 @@ void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) {
|
|||||||
int64_t address_translation;
|
int64_t address_translation;
|
||||||
|
|
||||||
// Must serialize translation first, to get address,size for header.
|
// Must serialize translation first, to get address,size for header.
|
||||||
bt->serialize_translation_to_wbuf(fd, &w_translation,
|
bt->serialize_translation_to_wbuf(
|
||||||
&address_translation,
|
fd, &w_translation, &address_translation, &size_translation);
|
||||||
&size_translation);
|
invariant(size_translation == w_translation.ndone);
|
||||||
assert(size_translation == w_translation.ndone);
|
|
||||||
|
|
||||||
// the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized.
|
// the number of bytes available in the buffer is 0 mod 512, and those last
|
||||||
assert(w_translation.size % 512 == 0);
|
// bytes are all initialized.
|
||||||
|
invariant(w_translation.size % 512 == 0);
|
||||||
|
|
||||||
struct wbuf w_main;
|
struct wbuf w_main;
|
||||||
size_t size_main = toku_serialize_ft_size(h);
|
size_t size_main = toku_serialize_ft_size(h);
|
||||||
size_t size_main_aligned = roundup_to_multiple(512, size_main);
|
size_t size_main_aligned = roundup_to_multiple(512, size_main);
|
||||||
assert(size_main_aligned<block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
invariant(size_main_aligned <
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||||
char *XMALLOC_N_ALIGNED(512, size_main_aligned, mainbuf);
|
char *XMALLOC_N_ALIGNED(512, size_main_aligned, mainbuf);
|
||||||
for (size_t i=size_main; i<size_main_aligned; i++) mainbuf[i]=0; // initialize the end of the buffer with zeros
|
for (size_t i = size_main; i < size_main_aligned; i++)
|
||||||
|
mainbuf[i] = 0; // initialize the end of the buffer with zeros
|
||||||
wbuf_init(&w_main, mainbuf, size_main);
|
wbuf_init(&w_main, mainbuf, size_main);
|
||||||
toku_serialize_ft_to_wbuf(&w_main, h, address_translation, size_translation);
|
toku_serialize_ft_to_wbuf(
|
||||||
|
&w_main, h, address_translation, size_translation);
|
||||||
lazy_assert(w_main.ndone == size_main);
|
lazy_assert(w_main.ndone == size_main);
|
||||||
|
|
||||||
// Actually write translation table
|
// Actually write translation table
|
||||||
// This write is guaranteed to read good data at the end of the buffer, since the
|
// This write is guaranteed to read good data at the end of the buffer,
|
||||||
|
// since the
|
||||||
// w_translation.buf is padded with zeros to a 512-byte boundary.
|
// w_translation.buf is padded with zeros to a 512-byte boundary.
|
||||||
toku_os_full_pwrite(fd, w_translation.buf, roundup_to_multiple(512, size_translation), address_translation);
|
toku_os_full_pwrite(fd,
|
||||||
|
w_translation.buf,
|
||||||
|
roundup_to_multiple(512, size_translation),
|
||||||
|
address_translation);
|
||||||
|
|
||||||
// Everything but the header MUST be on disk before header starts.
|
// Everything but the header MUST be on disk before header starts.
|
||||||
// Otherwise we will think the header is good and some blocks might not
|
// Otherwise we will think the header is good and some blocks might not
|
||||||
@ -808,15 +851,16 @@ void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) {
|
|||||||
// If there is no cachefile we still need to do an fsync.
|
// If there is no cachefile we still need to do an fsync.
|
||||||
if (cf) {
|
if (cf) {
|
||||||
toku_cachefile_fsync(cf);
|
toku_cachefile_fsync(cf);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
toku_file_fsync(fd);
|
toku_file_fsync(fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Alternate writing header to two locations:
|
// Alternate writing header to two locations:
|
||||||
// Beginning (0) or BLOCK_ALLOCATOR_HEADER_RESERVE
|
// Beginning (0) or BLOCK_ALLOCATOR_HEADER_RESERVE
|
||||||
toku_off_t main_offset;
|
toku_off_t main_offset;
|
||||||
main_offset = (h->checkpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
main_offset = (h->checkpoint_count & 0x1)
|
||||||
|
? 0
|
||||||
|
: BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
||||||
toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset);
|
toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset);
|
||||||
toku_free(w_main.buf);
|
toku_free(w_main.buf);
|
||||||
toku_free(w_translation.buf);
|
toku_free(w_translation.buf);
|
||||||
|
@ -99,13 +99,11 @@ void toku_ft_serialize_layer_init(void) {
|
|||||||
num_cores = toku_os_get_number_active_processors();
|
num_cores = toku_os_get_number_active_processors();
|
||||||
int r = toku_thread_pool_create(&ft_pool, num_cores);
|
int r = toku_thread_pool_create(&ft_pool, num_cores);
|
||||||
lazy_assert_zero(r);
|
lazy_assert_zero(r);
|
||||||
block_allocator::maybe_initialize_trace();
|
|
||||||
toku_serialize_in_parallel = false;
|
toku_serialize_in_parallel = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ft_serialize_layer_destroy(void) {
|
void toku_ft_serialize_layer_destroy(void) {
|
||||||
toku_thread_pool_destroy(&ft_pool);
|
toku_thread_pool_destroy(&ft_pool);
|
||||||
block_allocator::maybe_close_trace();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
enum { FILE_CHANGE_INCREMENT = (16 << 20) };
|
enum { FILE_CHANGE_INCREMENT = (16 << 20) };
|
||||||
@ -773,9 +771,13 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int toku_serialize_ftnode_to(int fd,
|
||||||
toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) {
|
BLOCKNUM blocknum,
|
||||||
|
FTNODE node,
|
||||||
|
FTNODE_DISK_DATA *ndd,
|
||||||
|
bool do_rebalancing,
|
||||||
|
FT ft,
|
||||||
|
bool for_checkpoint) {
|
||||||
size_t n_to_write;
|
size_t n_to_write;
|
||||||
size_t n_uncompressed_bytes;
|
size_t n_uncompressed_bytes;
|
||||||
char *compressed_buf = nullptr;
|
char *compressed_buf = nullptr;
|
||||||
@ -802,32 +804,32 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
|
|||||||
toku_unsafe_fetch(&toku_serialize_in_parallel),
|
toku_unsafe_fetch(&toku_serialize_in_parallel),
|
||||||
&n_to_write,
|
&n_to_write,
|
||||||
&n_uncompressed_bytes,
|
&n_uncompressed_bytes,
|
||||||
&compressed_buf
|
&compressed_buf);
|
||||||
);
|
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the node has never been written, then write the whole buffer, including the zeros
|
// If the node has never been written, then write the whole buffer,
|
||||||
|
// including the zeros
|
||||||
invariant(blocknum.b >= 0);
|
invariant(blocknum.b >= 0);
|
||||||
DISKOFF offset;
|
DISKOFF offset;
|
||||||
|
|
||||||
// Dirties the ft
|
// Dirties the ft
|
||||||
ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
|
ft->blocktable.realloc_on_disk(
|
||||||
ft, fd, for_checkpoint,
|
blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
|
||||||
// Allocations for nodes high in the tree are considered 'hot',
|
|
||||||
// as they are likely to move again in the next checkpoint.
|
|
||||||
node->height);
|
|
||||||
|
|
||||||
tokutime_t t0 = toku_time_now();
|
tokutime_t t0 = toku_time_now();
|
||||||
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
|
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
|
||||||
tokutime_t t1 = toku_time_now();
|
tokutime_t t1 = toku_time_now();
|
||||||
|
|
||||||
tokutime_t io_time = t1 - t0;
|
tokutime_t io_time = t1 - t0;
|
||||||
toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
|
toku_ft_status_update_flush_reason(
|
||||||
|
node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
|
||||||
|
|
||||||
toku_free(compressed_buf);
|
toku_free(compressed_buf);
|
||||||
node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
|
node->dirty = 0; // See #1957. Must set the node to be clean after
|
||||||
|
// serializing it so that it doesn't get written again on
|
||||||
|
// the next checkpoint or eviction.
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -994,6 +996,7 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
|
|||||||
bn->seqinsert = orig_bn->seqinsert;
|
bn->seqinsert = orig_bn->seqinsert;
|
||||||
bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
|
bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
|
||||||
bn->stat64_delta = orig_bn->stat64_delta;
|
bn->stat64_delta = orig_bn->stat64_delta;
|
||||||
|
bn->logical_rows_delta = orig_bn->logical_rows_delta;
|
||||||
bn->data_buffer.clone(&orig_bn->data_buffer);
|
bn->data_buffer.clone(&orig_bn->data_buffer);
|
||||||
return bn;
|
return bn;
|
||||||
}
|
}
|
||||||
@ -1004,6 +1007,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
|
|||||||
bn->seqinsert = 0;
|
bn->seqinsert = 0;
|
||||||
bn->stale_ancestor_messages_applied = false;
|
bn->stale_ancestor_messages_applied = false;
|
||||||
bn->stat64_delta = ZEROSTATS;
|
bn->stat64_delta = ZEROSTATS;
|
||||||
|
bn->logical_rows_delta = 0;
|
||||||
bn->data_buffer.init_zero();
|
bn->data_buffer.init_zero();
|
||||||
return bn;
|
return bn;
|
||||||
}
|
}
|
||||||
@ -1897,7 +1901,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
|
|||||||
/* out */ int *layout_version_p);
|
/* out */ int *layout_version_p);
|
||||||
|
|
||||||
// This function upgrades a version 14 or 13 ftnode to the current
|
// This function upgrades a version 14 or 13 ftnode to the current
|
||||||
// verison. NOTE: This code assumes the first field of the rbuf has
|
// version. NOTE: This code assumes the first field of the rbuf has
|
||||||
// already been read from the buffer (namely the layout_version of the
|
// already been read from the buffer (namely the layout_version of the
|
||||||
// ftnode.)
|
// ftnode.)
|
||||||
static int
|
static int
|
||||||
@ -2488,9 +2492,12 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL
|
|||||||
serialized->blocknum = log->blocknum;
|
serialized->blocknum = log->blocknum;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int toku_serialize_rollback_log_to(int fd,
|
||||||
toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
|
ROLLBACK_LOG_NODE log,
|
||||||
FT ft, bool for_checkpoint) {
|
SERIALIZED_ROLLBACK_LOG_NODE serialized_log,
|
||||||
|
bool is_serialized,
|
||||||
|
FT ft,
|
||||||
|
bool for_checkpoint) {
|
||||||
size_t n_to_write;
|
size_t n_to_write;
|
||||||
char *compressed_buf;
|
char *compressed_buf;
|
||||||
struct serialized_rollback_log_node serialized_local;
|
struct serialized_rollback_log_node serialized_local;
|
||||||
@ -2511,21 +2518,21 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
|
|||||||
serialized_log->n_sub_blocks,
|
serialized_log->n_sub_blocks,
|
||||||
serialized_log->sub_block,
|
serialized_log->sub_block,
|
||||||
ft->h->compression_method,
|
ft->h->compression_method,
|
||||||
&n_to_write, &compressed_buf);
|
&n_to_write,
|
||||||
|
&compressed_buf);
|
||||||
|
|
||||||
// Dirties the ft
|
// Dirties the ft
|
||||||
DISKOFF offset;
|
DISKOFF offset;
|
||||||
ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
|
ft->blocktable.realloc_on_disk(
|
||||||
ft, fd, for_checkpoint,
|
blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
|
||||||
// We consider rollback log flushing the hottest possible allocation,
|
|
||||||
// since rollback logs are short-lived compared to FT nodes.
|
|
||||||
INT_MAX);
|
|
||||||
|
|
||||||
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
|
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
|
||||||
toku_free(compressed_buf);
|
toku_free(compressed_buf);
|
||||||
if (!is_serialized) {
|
if (!is_serialized) {
|
||||||
toku_static_serialized_rollback_log_destroy(&serialized_local);
|
toku_static_serialized_rollback_log_destroy(&serialized_local);
|
||||||
log->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
|
log->dirty = 0; // See #1957. Must set the node to be clean after
|
||||||
|
// serializing it so that it doesn't get written again
|
||||||
|
// on the next checkpoint or eviction.
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -2704,7 +2711,7 @@ exit:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
|
static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
|
||||||
// This function exists solely to accomodate future changes in compression.
|
// This function exists solely to accommodate future changes in compression.
|
||||||
int r = 0;
|
int r = 0;
|
||||||
if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) ||
|
if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) ||
|
||||||
(FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) ||
|
(FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) ||
|
||||||
|
833
storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
Normal file
833
storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
Normal file
@ -0,0 +1,833 @@
|
|||||||
|
/*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILIT or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "ft/serialize/rbtree_mhs.h"
|
||||||
|
#include "portability/toku_assert.h"
|
||||||
|
#include "portability/toku_portability.h"
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
namespace MhsRbTree {
|
||||||
|
|
||||||
|
Tree::Tree() : _root(NULL), _align(1) {}
|
||||||
|
|
||||||
|
Tree::Tree(uint64_t align) : _root(NULL), _align(align) {}
|
||||||
|
|
||||||
|
Tree::~Tree() { Destroy(); }
|
||||||
|
|
||||||
|
void Tree::PreOrder(Node *tree) const {
|
||||||
|
if (tree != NULL) {
|
||||||
|
fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
|
||||||
|
PreOrder(tree->_left);
|
||||||
|
PreOrder(tree->_right);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::PreOrder() { PreOrder(_root); }
|
||||||
|
|
||||||
|
void Tree::InOrder(Node *tree) const {
|
||||||
|
if (tree != NULL) {
|
||||||
|
InOrder(tree->_left);
|
||||||
|
fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
|
||||||
|
InOrder(tree->_right);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// yeah, i only care about in order visitor. -Jun
|
||||||
|
void Tree::InOrderVisitor(Node *tree,
|
||||||
|
void (*f)(void *, Node *, uint64_t),
|
||||||
|
void *extra,
|
||||||
|
uint64_t depth) {
|
||||||
|
if (tree != NULL) {
|
||||||
|
InOrderVisitor(tree->_left, f, extra, depth + 1);
|
||||||
|
f(extra, tree, depth);
|
||||||
|
InOrderVisitor(tree->_right, f, extra, depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::InOrderVisitor(void (*f)(void *, Node *, uint64_t),
|
||||||
|
void *extra) {
|
||||||
|
InOrderVisitor(_root, f, extra, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::InOrder() { InOrder(_root); }
|
||||||
|
|
||||||
|
void Tree::PostOrder(Node *tree) const {
|
||||||
|
if (tree != NULL) {
|
||||||
|
PostOrder(tree->_left);
|
||||||
|
PostOrder(tree->_right);
|
||||||
|
fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::PostOrder() { PostOrder(_root); }
|
||||||
|
|
||||||
|
Node *Tree::SearchByOffset(uint64_t offset) {
|
||||||
|
Node *x = _root;
|
||||||
|
while ((x != NULL) && (rbn_offset(x).ToInt() != offset)) {
|
||||||
|
if (offset < rbn_offset(x).ToInt())
|
||||||
|
x = x->_left;
|
||||||
|
else
|
||||||
|
x = x->_right;
|
||||||
|
}
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// mostly for testing
|
||||||
|
Node *Tree::SearchFirstFitBySize(uint64_t size) {
|
||||||
|
if (EffectiveSize(_root) < size && rbn_left_mhs(_root) < size &&
|
||||||
|
rbn_right_mhs(_root) < size) {
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
return SearchFirstFitBySizeHelper(_root, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Node *Tree::SearchFirstFitBySizeHelper(Node *x, uint64_t size) {
|
||||||
|
if (EffectiveSize(x) >= size) {
|
||||||
|
// only possible to go left
|
||||||
|
if (rbn_left_mhs(x) >= size)
|
||||||
|
return SearchFirstFitBySizeHelper(x->_left, size);
|
||||||
|
else
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
if (rbn_left_mhs(x) >= size)
|
||||||
|
return SearchFirstFitBySizeHelper(x->_left, size);
|
||||||
|
|
||||||
|
if (rbn_right_mhs(x) >= size)
|
||||||
|
return SearchFirstFitBySizeHelper(x->_right, size);
|
||||||
|
|
||||||
|
// this is an invalid state
|
||||||
|
Dump();
|
||||||
|
ValidateBalance();
|
||||||
|
ValidateMhs();
|
||||||
|
invariant(0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node *Tree::MinNode(Node *tree) {
|
||||||
|
if (tree == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
while (tree->_left != NULL)
|
||||||
|
tree = tree->_left;
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node *Tree::MinNode() { return MinNode(_root); }
|
||||||
|
|
||||||
|
Node *Tree::MaxNode(Node *tree) {
|
||||||
|
if (tree == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
while (tree->_right != NULL)
|
||||||
|
tree = tree->_right;
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node *Tree::MaxNode() { return MaxNode(_root); }
|
||||||
|
|
||||||
|
Node *Tree::SuccessorHelper(Node *y, Node *x) {
|
||||||
|
while ((y != NULL) && (x == y->_right)) {
|
||||||
|
x = y;
|
||||||
|
y = y->_parent;
|
||||||
|
}
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
Node *Tree::Successor(Node *x) {
|
||||||
|
if (x->_right != NULL)
|
||||||
|
return MinNode(x->_right);
|
||||||
|
|
||||||
|
Node *y = x->_parent;
|
||||||
|
return SuccessorHelper(y, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
Node *Tree::PredecessorHelper(Node *y, Node *x) {
|
||||||
|
while ((y != NULL) && (x == y->_left)) {
|
||||||
|
x = y;
|
||||||
|
y = y->_parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
Node *Tree::Predecessor(Node *x) {
|
||||||
|
if (x->_left != NULL)
|
||||||
|
return MaxNode(x->_left);
|
||||||
|
|
||||||
|
Node *y = x->_parent;
|
||||||
|
return SuccessorHelper(y, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* px px
|
||||||
|
* / /
|
||||||
|
* x y
|
||||||
|
* / \ --(left rotation)--> / \ #
|
||||||
|
* lx y x ry
|
||||||
|
* / \ / \
|
||||||
|
* ly ry lx ly
|
||||||
|
* max_hole_size updates are pretty local
|
||||||
|
*/
|
||||||
|
|
||||||
|
void Tree::LeftRotate(Node *&root, Node *x) {
|
||||||
|
Node *y = x->_right;
|
||||||
|
|
||||||
|
x->_right = y->_left;
|
||||||
|
rbn_right_mhs(x) = rbn_left_mhs(y);
|
||||||
|
|
||||||
|
if (y->_left != NULL)
|
||||||
|
y->_left->_parent = x;
|
||||||
|
|
||||||
|
y->_parent = x->_parent;
|
||||||
|
|
||||||
|
if (x->_parent == NULL) {
|
||||||
|
root = y;
|
||||||
|
} else {
|
||||||
|
if (x->_parent->_left == x) {
|
||||||
|
x->_parent->_left = y;
|
||||||
|
} else {
|
||||||
|
x->_parent->_right = y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
y->_left = x;
|
||||||
|
rbn_left_mhs(y) = mhs_of_subtree(x);
|
||||||
|
|
||||||
|
x->_parent = y;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* py py
|
||||||
|
* / /
|
||||||
|
* y x
|
||||||
|
* / \ --(right rotate)--> / \ #
|
||||||
|
* x ry lx y
|
||||||
|
* / \ / \ #
|
||||||
|
* lx rx rx ry
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
void Tree::RightRotate(Node *&root, Node *y) {
|
||||||
|
Node *x = y->_left;
|
||||||
|
|
||||||
|
y->_left = x->_right;
|
||||||
|
rbn_left_mhs(y) = rbn_right_mhs(x);
|
||||||
|
|
||||||
|
if (x->_right != NULL)
|
||||||
|
x->_right->_parent = y;
|
||||||
|
|
||||||
|
x->_parent = y->_parent;
|
||||||
|
|
||||||
|
if (y->_parent == NULL) {
|
||||||
|
root = x;
|
||||||
|
} else {
|
||||||
|
if (y == y->_parent->_right)
|
||||||
|
y->_parent->_right = x;
|
||||||
|
else
|
||||||
|
y->_parent->_left = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
x->_right = y;
|
||||||
|
rbn_right_mhs(x) = mhs_of_subtree(y);
|
||||||
|
y->_parent = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// walking from this node up to update the mhs info
|
||||||
|
// whenver there is change on left/right mhs or size we should recalculate.
|
||||||
|
// prerequisit: the children of the node are mhs up-to-date.
|
||||||
|
void Tree::RecalculateMhs(Node *node) {
|
||||||
|
uint64_t *p_node_mhs = 0;
|
||||||
|
Node *parent = node->_parent;
|
||||||
|
|
||||||
|
if (!parent)
|
||||||
|
return;
|
||||||
|
|
||||||
|
uint64_t max_mhs = mhs_of_subtree(node);
|
||||||
|
if (node == parent->_left) {
|
||||||
|
p_node_mhs = &rbn_left_mhs(parent);
|
||||||
|
} else if (node == parent->_right) {
|
||||||
|
p_node_mhs = &rbn_right_mhs(parent);
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (*p_node_mhs != max_mhs) {
|
||||||
|
*p_node_mhs = max_mhs;
|
||||||
|
RecalculateMhs(parent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::IsNewNodeMergable(Node *pred,
|
||||||
|
Node *succ,
|
||||||
|
Node::BlockPair pair,
|
||||||
|
bool *left_merge,
|
||||||
|
bool *right_merge) {
|
||||||
|
if (pred) {
|
||||||
|
OUUInt64 end_of_pred = rbn_size(pred) + rbn_offset(pred);
|
||||||
|
if (end_of_pred < pair._offset)
|
||||||
|
*left_merge = false;
|
||||||
|
else {
|
||||||
|
invariant(end_of_pred == pair._offset);
|
||||||
|
*left_merge = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (succ) {
|
||||||
|
OUUInt64 begin_of_succ = rbn_offset(succ);
|
||||||
|
OUUInt64 end_of_node = pair._offset + pair._size;
|
||||||
|
if (end_of_node < begin_of_succ) {
|
||||||
|
*right_merge = false;
|
||||||
|
} else {
|
||||||
|
invariant(end_of_node == begin_of_succ);
|
||||||
|
*right_merge = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::AbsorbNewNode(Node *pred,
|
||||||
|
Node *succ,
|
||||||
|
Node::BlockPair pair,
|
||||||
|
bool left_merge,
|
||||||
|
bool right_merge,
|
||||||
|
bool is_right_child) {
|
||||||
|
invariant(left_merge || right_merge);
|
||||||
|
if (left_merge && right_merge) {
|
||||||
|
// merge to the succ
|
||||||
|
if (!is_right_child) {
|
||||||
|
rbn_size(succ) += pair._size;
|
||||||
|
rbn_offset(succ) = pair._offset;
|
||||||
|
// merge to the pred
|
||||||
|
rbn_size(pred) += rbn_size(succ);
|
||||||
|
// to keep the invariant of the tree -no overlapping holes
|
||||||
|
rbn_offset(succ) += rbn_size(succ);
|
||||||
|
rbn_size(succ) = 0;
|
||||||
|
RecalculateMhs(succ);
|
||||||
|
RecalculateMhs(pred);
|
||||||
|
// pred dominates succ. this is going to
|
||||||
|
// update the pred labels separately.
|
||||||
|
// remove succ
|
||||||
|
RawRemove(_root, succ);
|
||||||
|
} else {
|
||||||
|
rbn_size(pred) += pair._size;
|
||||||
|
rbn_offset(succ) = rbn_offset(pred);
|
||||||
|
rbn_size(succ) += rbn_size(pred);
|
||||||
|
rbn_offset(pred) += rbn_size(pred);
|
||||||
|
rbn_size(pred) = 0;
|
||||||
|
RecalculateMhs(pred);
|
||||||
|
RecalculateMhs(succ);
|
||||||
|
// now remove pred
|
||||||
|
RawRemove(_root, pred);
|
||||||
|
}
|
||||||
|
} else if (left_merge) {
|
||||||
|
rbn_size(pred) += pair._size;
|
||||||
|
RecalculateMhs(pred);
|
||||||
|
} else if (right_merge) {
|
||||||
|
rbn_offset(succ) -= pair._size;
|
||||||
|
rbn_size(succ) += pair._size;
|
||||||
|
RecalculateMhs(succ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// this is the most tedious part, but not complicated:
|
||||||
|
// 1.find where to insert the pair
|
||||||
|
// 2.if the pred and succ can merge with the pair. merge with them. either
|
||||||
|
// pred
|
||||||
|
// or succ can be removed.
|
||||||
|
// 3. if only left-mergable or right-mergeable, just merge
|
||||||
|
// 4. non-mergable case. insert the node and run the fixup.
|
||||||
|
|
||||||
|
int Tree::Insert(Node *&root, Node::BlockPair pair) {
|
||||||
|
Node *x = _root;
|
||||||
|
Node *y = NULL;
|
||||||
|
bool left_merge = false;
|
||||||
|
bool right_merge = false;
|
||||||
|
Node *node = NULL;
|
||||||
|
|
||||||
|
while (x != NULL) {
|
||||||
|
y = x;
|
||||||
|
if (pair._offset < rbn_key(x))
|
||||||
|
x = x->_left;
|
||||||
|
else
|
||||||
|
x = x->_right;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we found where to insert, lets find out the pred and succ for
|
||||||
|
// possible
|
||||||
|
// merges.
|
||||||
|
// node->parent = y;
|
||||||
|
Node *pred, *succ;
|
||||||
|
if (y != NULL) {
|
||||||
|
if (pair._offset < rbn_key(y)) {
|
||||||
|
// as the left child
|
||||||
|
pred = PredecessorHelper(y->_parent, y);
|
||||||
|
succ = y;
|
||||||
|
IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
|
||||||
|
if (left_merge || right_merge) {
|
||||||
|
AbsorbNewNode(
|
||||||
|
pred, succ, pair, left_merge, right_merge, false);
|
||||||
|
} else {
|
||||||
|
// construct the node
|
||||||
|
Node::Pair mhsp {0, 0};
|
||||||
|
node =
|
||||||
|
new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
|
||||||
|
if (!node)
|
||||||
|
return -1;
|
||||||
|
y->_left = node;
|
||||||
|
node->_parent = y;
|
||||||
|
RecalculateMhs(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// as the right child
|
||||||
|
pred = y;
|
||||||
|
succ = SuccessorHelper(y->_parent, y);
|
||||||
|
IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
|
||||||
|
if (left_merge || right_merge) {
|
||||||
|
AbsorbNewNode(
|
||||||
|
pred, succ, pair, left_merge, right_merge, true);
|
||||||
|
} else {
|
||||||
|
// construct the node
|
||||||
|
Node::Pair mhsp {0, 0};
|
||||||
|
node =
|
||||||
|
new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
|
||||||
|
if (!node)
|
||||||
|
return -1;
|
||||||
|
y->_right = node;
|
||||||
|
node->_parent = y;
|
||||||
|
RecalculateMhs(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Node::Pair mhsp {0, 0};
|
||||||
|
node = new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
|
||||||
|
if (!node)
|
||||||
|
return -1;
|
||||||
|
root = node;
|
||||||
|
}
|
||||||
|
if (!left_merge && !right_merge) {
|
||||||
|
invariant_notnull(node);
|
||||||
|
node->_color = EColor::RED;
|
||||||
|
return InsertFixup(root, node);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Tree::InsertFixup(Node *&root, Node *node) {
|
||||||
|
Node *parent, *gparent;
|
||||||
|
while ((parent = rbn_parent(node)) && rbn_is_red(parent)) {
|
||||||
|
gparent = rbn_parent(parent);
|
||||||
|
if (parent == gparent->_left) {
|
||||||
|
{
|
||||||
|
Node *uncle = gparent->_right;
|
||||||
|
if (uncle && rbn_is_red(uncle)) {
|
||||||
|
rbn_set_black(uncle);
|
||||||
|
rbn_set_black(parent);
|
||||||
|
rbn_set_red(gparent);
|
||||||
|
node = gparent;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parent->_right == node) {
|
||||||
|
Node *tmp;
|
||||||
|
LeftRotate(root, parent);
|
||||||
|
tmp = parent;
|
||||||
|
parent = node;
|
||||||
|
node = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
rbn_set_black(parent);
|
||||||
|
rbn_set_red(gparent);
|
||||||
|
RightRotate(root, gparent);
|
||||||
|
} else {
|
||||||
|
{
|
||||||
|
Node *uncle = gparent->_left;
|
||||||
|
if (uncle && rbn_is_red(uncle)) {
|
||||||
|
rbn_set_black(uncle);
|
||||||
|
rbn_set_black(parent);
|
||||||
|
rbn_set_red(gparent);
|
||||||
|
node = gparent;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parent->_left == node) {
|
||||||
|
Node *tmp;
|
||||||
|
RightRotate(root, parent);
|
||||||
|
tmp = parent;
|
||||||
|
parent = node;
|
||||||
|
node = tmp;
|
||||||
|
}
|
||||||
|
rbn_set_black(parent);
|
||||||
|
rbn_set_red(gparent);
|
||||||
|
LeftRotate(root, gparent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rbn_set_black(root);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Tree::Insert(Node::BlockPair pair) { return Insert(_root, pair); }
|
||||||
|
|
||||||
|
uint64_t Tree::Remove(size_t size) {
|
||||||
|
Node *node = SearchFirstFitBySize(size);
|
||||||
|
return Remove(_root, node, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::RawRemove(Node *&root, Node *node) {
|
||||||
|
Node *child, *parent;
|
||||||
|
EColor color;
|
||||||
|
|
||||||
|
if ((node->_left != NULL) && (node->_right != NULL)) {
|
||||||
|
Node *replace = node;
|
||||||
|
replace = replace->_right;
|
||||||
|
while (replace->_left != NULL)
|
||||||
|
replace = replace->_left;
|
||||||
|
|
||||||
|
if (rbn_parent(node)) {
|
||||||
|
if (rbn_parent(node)->_left == node)
|
||||||
|
rbn_parent(node)->_left = replace;
|
||||||
|
else
|
||||||
|
rbn_parent(node)->_right = replace;
|
||||||
|
} else {
|
||||||
|
root = replace;
|
||||||
|
}
|
||||||
|
child = replace->_right;
|
||||||
|
parent = rbn_parent(replace);
|
||||||
|
color = rbn_color(replace);
|
||||||
|
|
||||||
|
if (parent == node) {
|
||||||
|
parent = replace;
|
||||||
|
} else {
|
||||||
|
if (child)
|
||||||
|
rbn_parent(child) = parent;
|
||||||
|
|
||||||
|
parent->_left = child;
|
||||||
|
rbn_left_mhs(parent) = rbn_right_mhs(replace);
|
||||||
|
RecalculateMhs(parent);
|
||||||
|
replace->_right = node->_right;
|
||||||
|
rbn_set_parent(node->_right, replace);
|
||||||
|
rbn_right_mhs(replace) = rbn_right_mhs(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
replace->_parent = node->_parent;
|
||||||
|
replace->_color = node->_color;
|
||||||
|
replace->_left = node->_left;
|
||||||
|
rbn_left_mhs(replace) = rbn_left_mhs(node);
|
||||||
|
node->_left->_parent = replace;
|
||||||
|
RecalculateMhs(replace);
|
||||||
|
if (color == EColor::BLACK)
|
||||||
|
RawRemoveFixup(root, child, parent);
|
||||||
|
delete node;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->_left != NULL)
|
||||||
|
child = node->_left;
|
||||||
|
else
|
||||||
|
child = node->_right;
|
||||||
|
|
||||||
|
parent = node->_parent;
|
||||||
|
color = node->_color;
|
||||||
|
|
||||||
|
if (child)
|
||||||
|
child->_parent = parent;
|
||||||
|
|
||||||
|
if (parent) {
|
||||||
|
if (parent->_left == node) {
|
||||||
|
parent->_left = child;
|
||||||
|
rbn_left_mhs(parent) = child ? mhs_of_subtree(child) : 0;
|
||||||
|
} else {
|
||||||
|
parent->_right = child;
|
||||||
|
rbn_right_mhs(parent) = child ? mhs_of_subtree(child) : 0;
|
||||||
|
}
|
||||||
|
RecalculateMhs(parent);
|
||||||
|
} else
|
||||||
|
root = child;
|
||||||
|
if (color == EColor::BLACK)
|
||||||
|
RawRemoveFixup(root, child, parent);
|
||||||
|
delete node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::RawRemove(uint64_t offset) {
|
||||||
|
Node *node = SearchByOffset(offset);
|
||||||
|
RawRemove(_root, node);
|
||||||
|
}
|
||||||
|
static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
|
||||||
|
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
|
||||||
|
}
|
||||||
|
uint64_t Tree::Remove(Node *&root, Node *node, size_t size) {
|
||||||
|
OUUInt64 n_offset = rbn_offset(node);
|
||||||
|
OUUInt64 n_size = rbn_size(node);
|
||||||
|
OUUInt64 answer_offset(align(rbn_offset(node).ToInt(), _align));
|
||||||
|
|
||||||
|
invariant((answer_offset + size) <= (n_offset + n_size));
|
||||||
|
if (answer_offset == n_offset) {
|
||||||
|
rbn_offset(node) += size;
|
||||||
|
rbn_size(node) -= size;
|
||||||
|
RecalculateMhs(node);
|
||||||
|
if (rbn_size(node) == 0) {
|
||||||
|
RawRemove(root, node);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if (answer_offset + size == n_offset + n_size) {
|
||||||
|
rbn_size(node) -= size;
|
||||||
|
RecalculateMhs(node);
|
||||||
|
} else {
|
||||||
|
// well, cut in the middle...
|
||||||
|
rbn_size(node) = answer_offset - n_offset;
|
||||||
|
RecalculateMhs(node);
|
||||||
|
Insert(_root,
|
||||||
|
{(answer_offset + size),
|
||||||
|
(n_offset + n_size) - (answer_offset + size)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return answer_offset.ToInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::RawRemoveFixup(Node *&root, Node *node, Node *parent) {
|
||||||
|
Node *other;
|
||||||
|
while ((!node || rbn_is_black(node)) && node != root) {
|
||||||
|
if (parent->_left == node) {
|
||||||
|
other = parent->_right;
|
||||||
|
if (rbn_is_red(other)) {
|
||||||
|
// Case 1: the brother of X, w, is read
|
||||||
|
rbn_set_black(other);
|
||||||
|
rbn_set_red(parent);
|
||||||
|
LeftRotate(root, parent);
|
||||||
|
other = parent->_right;
|
||||||
|
}
|
||||||
|
if ((!other->_left || rbn_is_black(other->_left)) &&
|
||||||
|
(!other->_right || rbn_is_black(other->_right))) {
|
||||||
|
// Case 2: w is black and both of w's children are black
|
||||||
|
rbn_set_red(other);
|
||||||
|
node = parent;
|
||||||
|
parent = rbn_parent(node);
|
||||||
|
} else {
|
||||||
|
if (!other->_right || rbn_is_black(other->_right)) {
|
||||||
|
// Case 3: w is black and left child of w is red but
|
||||||
|
// right
|
||||||
|
// child is black
|
||||||
|
rbn_set_black(other->_left);
|
||||||
|
rbn_set_red(other);
|
||||||
|
RightRotate(root, other);
|
||||||
|
other = parent->_right;
|
||||||
|
}
|
||||||
|
// Case 4: w is black and right child of w is red,
|
||||||
|
// regardless of
|
||||||
|
// left child's color
|
||||||
|
rbn_set_color(other, rbn_color(parent));
|
||||||
|
rbn_set_black(parent);
|
||||||
|
rbn_set_black(other->_right);
|
||||||
|
LeftRotate(root, parent);
|
||||||
|
node = root;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
other = parent->_left;
|
||||||
|
if (rbn_is_red(other)) {
|
||||||
|
// Case 1: w is red
|
||||||
|
rbn_set_black(other);
|
||||||
|
rbn_set_red(parent);
|
||||||
|
RightRotate(root, parent);
|
||||||
|
other = parent->_left;
|
||||||
|
}
|
||||||
|
if ((!other->_left || rbn_is_black(other->_left)) &&
|
||||||
|
(!other->_right || rbn_is_black(other->_right))) {
|
||||||
|
// Case 2: w is black and both children are black
|
||||||
|
rbn_set_red(other);
|
||||||
|
node = parent;
|
||||||
|
parent = rbn_parent(node);
|
||||||
|
} else {
|
||||||
|
if (!other->_left || rbn_is_black(other->_left)) {
|
||||||
|
// Case 3: w is black and left child of w is red whereas
|
||||||
|
// right child is black
|
||||||
|
rbn_set_black(other->_right);
|
||||||
|
rbn_set_red(other);
|
||||||
|
LeftRotate(root, other);
|
||||||
|
other = parent->_left;
|
||||||
|
}
|
||||||
|
// Case 4:w is black and right child of w is red, regardless
|
||||||
|
// of
|
||||||
|
// the left child's color
|
||||||
|
rbn_set_color(other, rbn_color(parent));
|
||||||
|
rbn_set_black(parent);
|
||||||
|
rbn_set_black(other->_left);
|
||||||
|
RightRotate(root, parent);
|
||||||
|
node = root;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (node)
|
||||||
|
rbn_set_black(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::Destroy(Node *&tree) {
|
||||||
|
if (tree == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (tree->_left != NULL)
|
||||||
|
Destroy(tree->_left);
|
||||||
|
if (tree->_right != NULL)
|
||||||
|
Destroy(tree->_right);
|
||||||
|
|
||||||
|
delete tree;
|
||||||
|
tree = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::Destroy() { Destroy(_root); }
|
||||||
|
|
||||||
|
void Tree::Dump(Node *tree, Node::BlockPair pair, EDirection dir) {
|
||||||
|
if (tree != NULL) {
|
||||||
|
if (dir == EDirection::NONE)
|
||||||
|
fprintf(stderr,
|
||||||
|
"(%" PRIu64 ",%" PRIu64 ", mhs:(%" PRIu64 ",%" PRIu64
|
||||||
|
"))(B) is root\n",
|
||||||
|
rbn_offset(tree).ToInt(),
|
||||||
|
rbn_size(tree).ToInt(),
|
||||||
|
rbn_left_mhs(tree),
|
||||||
|
rbn_right_mhs(tree));
|
||||||
|
else
|
||||||
|
fprintf(stderr,
|
||||||
|
"(%" PRIu64 ",%" PRIu64 ",mhs:(%" PRIu64 ",%" PRIu64
|
||||||
|
"))(%c) is %" PRIu64 "'s %s\n",
|
||||||
|
rbn_offset(tree).ToInt(),
|
||||||
|
rbn_size(tree).ToInt(),
|
||||||
|
rbn_left_mhs(tree),
|
||||||
|
rbn_right_mhs(tree),
|
||||||
|
rbn_is_red(tree) ? 'R' : 'B',
|
||||||
|
pair._offset.ToInt(),
|
||||||
|
dir == EDirection::RIGHT ? "right child" : "left child");
|
||||||
|
|
||||||
|
Dump(tree->_left, tree->_hole, EDirection::LEFT);
|
||||||
|
Dump(tree->_right, tree->_hole, EDirection::RIGHT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t Tree::EffectiveSize(Node *node) {
|
||||||
|
OUUInt64 offset = rbn_offset(node);
|
||||||
|
OUUInt64 size = rbn_size(node);
|
||||||
|
OUUInt64 end = offset + size;
|
||||||
|
OUUInt64 aligned_offset(align(offset.ToInt(), _align));
|
||||||
|
if (aligned_offset > end) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return (end - aligned_offset).ToInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::Dump() {
|
||||||
|
if (_root != NULL)
|
||||||
|
Dump(_root, _root->_hole, (EDirection)0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vis_bal_f(void *extra, Node *node, uint64_t depth) {
|
||||||
|
uint64_t **p = (uint64_t **)extra;
|
||||||
|
uint64_t min = *p[0];
|
||||||
|
uint64_t max = *p[1];
|
||||||
|
if (node->_left) {
|
||||||
|
Node *left = node->_left;
|
||||||
|
invariant(node == left->_parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->_right) {
|
||||||
|
Node *right = node->_right;
|
||||||
|
invariant(node == right->_parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!node->_left || !node->_right) {
|
||||||
|
if (min > depth) {
|
||||||
|
*p[0] = depth;
|
||||||
|
} else if (max < depth) {
|
||||||
|
*p[1] = depth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::ValidateBalance() {
|
||||||
|
uint64_t min_depth = 0xffffffffffffffff;
|
||||||
|
uint64_t max_depth = 0;
|
||||||
|
if (!_root) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint64_t *p[2] = {&min_depth, &max_depth};
|
||||||
|
InOrderVisitor(vis_bal_f, (void *)p);
|
||||||
|
invariant((min_depth + 1) * 2 >= max_depth + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vis_cmp_f(void *extra, Node *node, uint64_t UU(depth)) {
|
||||||
|
Node::BlockPair **p = (Node::BlockPair **)extra;
|
||||||
|
|
||||||
|
invariant_notnull(*p);
|
||||||
|
invariant((*p)->_offset == node->_hole._offset);
|
||||||
|
|
||||||
|
*p = *p + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate the input pairs matches with sorted pairs
|
||||||
|
void Tree::ValidateInOrder(Node::BlockPair *pairs) {
|
||||||
|
InOrderVisitor(vis_cmp_f, &pairs);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t Tree::ValidateMhs(Node *node) {
|
||||||
|
if (!node)
|
||||||
|
return 0;
|
||||||
|
else {
|
||||||
|
uint64_t mhs_left = ValidateMhs(node->_left);
|
||||||
|
uint64_t mhs_right = ValidateMhs(node->_right);
|
||||||
|
if (mhs_left != rbn_left_mhs(node)) {
|
||||||
|
printf("assert failure: mhs_left = %" PRIu64 "\n", mhs_left);
|
||||||
|
Dump(node, node->_hole, (EDirection)0);
|
||||||
|
}
|
||||||
|
invariant(mhs_left == rbn_left_mhs(node));
|
||||||
|
|
||||||
|
if (mhs_right != rbn_right_mhs(node)) {
|
||||||
|
printf("assert failure: mhs_right = %" PRIu64 "\n", mhs_right);
|
||||||
|
Dump(node, node->_hole, (EDirection)0);
|
||||||
|
}
|
||||||
|
invariant(mhs_right == rbn_right_mhs(node));
|
||||||
|
return std::max(EffectiveSize(node), std::max(mhs_left, mhs_right));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tree::ValidateMhs() {
|
||||||
|
if (!_root)
|
||||||
|
return;
|
||||||
|
uint64_t mhs_left = ValidateMhs(_root->_left);
|
||||||
|
uint64_t mhs_right = ValidateMhs(_root->_right);
|
||||||
|
invariant(mhs_left == rbn_left_mhs(_root));
|
||||||
|
invariant(mhs_right == rbn_right_mhs(_root));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace MhsRbTree
|
351
storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
Normal file
351
storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
Normal file
@ -0,0 +1,351 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <db.h>
|
||||||
|
|
||||||
|
#include "portability/toku_pthread.h"
|
||||||
|
#include "portability/toku_stdint.h"
|
||||||
|
#include "portability/toku_stdlib.h"
|
||||||
|
|
||||||
|
// RBTree(Red-black tree) with max hole sizes for subtrees.
|
||||||
|
|
||||||
|
// This is a tentative data struct to improve the block allocation time
|
||||||
|
// complexity from the linear time to the log time. Please be noted this DS only
|
||||||
|
// supports first-fit for now. It is actually easier to do it with
|
||||||
|
// best-fit.(just
|
||||||
|
// sort by size).
|
||||||
|
|
||||||
|
// RBTree is a classic data struct with O(log(n)) for insertion, deletion and
|
||||||
|
// search. Many years have seen its efficiency.
|
||||||
|
|
||||||
|
// a *hole* is the representation of an available BlockPair for allocation.
|
||||||
|
// defined as (start_address,size) or (offset, size) interchangably.
|
||||||
|
|
||||||
|
// each node has a *label* to indicate a pair of the max hole sizes for its
|
||||||
|
// subtree.
|
||||||
|
|
||||||
|
// We are implementing a RBTree with max hole sizes for subtree. It is a red
|
||||||
|
// black tree that is sorted by the start_address but also labeld with the max
|
||||||
|
// hole sizes of the subtrees.
|
||||||
|
|
||||||
|
// [(6,3)] -> [(offset, size)], the hole
|
||||||
|
// [{2,5}] -> [{mhs_of_left, mhs_of_right}], the label
|
||||||
|
/* / \ */
|
||||||
|
// [(0, 1)] [(10, 5)]
|
||||||
|
// [{0, 2}] [{0, 0}]
|
||||||
|
/* \ */
|
||||||
|
// [(3, 2)]
|
||||||
|
// [{0, 0}]
|
||||||
|
// request of allocation size=2 goes from root to [(3,2)].
|
||||||
|
|
||||||
|
// above example shows a simplified RBTree_max_holes.
|
||||||
|
// it is easier to tell the search time is O(log(n)) as we can make a decision
|
||||||
|
// on each descent until we get to the target.
|
||||||
|
|
||||||
|
// the only question is if we can keep the maintenance cost low -- and i think
|
||||||
|
// it is not a problem becoz an insertion/deletion is only going to update the
|
||||||
|
// max_hole_sizes of the nodes along the path from the root to the node to be
|
||||||
|
// deleted/inserted. The path can be cached and search is anyway O(log(n)).
|
||||||
|
|
||||||
|
// unlike the typical rbtree, Tree has to handle the inserts and deletes
|
||||||
|
// with more care: an allocation that triggers the delete might leave some
|
||||||
|
// unused space which we can simply update the start_addr and size without
|
||||||
|
// worrying overlapping. An free might not only mean the insertion but also
|
||||||
|
// *merging* with the adjacent holes.
|
||||||
|
|
||||||
|
namespace MhsRbTree {
|
||||||
|
|
||||||
|
#define offset_t uint64_t
|
||||||
|
enum class EColor { RED, BLACK };
|
||||||
|
enum class EDirection { NONE = 0, LEFT, RIGHT };
|
||||||
|
|
||||||
|
// I am a bit tired of fixing overflow/underflow, just quickly craft some
|
||||||
|
// int
|
||||||
|
// class that has an infinity-like max value and prevents overflow and
|
||||||
|
// underflow. If you got a file offset larger than MHS_MAX_VAL, it is not
|
||||||
|
// a problem here. :-/ - JYM
|
||||||
|
class OUUInt64 {
|
||||||
|
public:
|
||||||
|
static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff;
|
||||||
|
OUUInt64() : _value(0) {}
|
||||||
|
OUUInt64(uint64_t s) : _value(s) {}
|
||||||
|
bool operator<(const OUUInt64 &r) const {
|
||||||
|
invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
|
||||||
|
return _value < r.ToInt();
|
||||||
|
}
|
||||||
|
bool operator>(const OUUInt64 &r) const {
|
||||||
|
invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
|
||||||
|
return _value > r.ToInt();
|
||||||
|
}
|
||||||
|
bool operator<=(const OUUInt64 &r) const {
|
||||||
|
invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
|
||||||
|
return _value <= r.ToInt();
|
||||||
|
}
|
||||||
|
bool operator>=(const OUUInt64 &r) const {
|
||||||
|
invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
|
||||||
|
return _value >= r.ToInt();
|
||||||
|
}
|
||||||
|
OUUInt64 operator+(const OUUInt64 &r) const {
|
||||||
|
if (_value == MHS_MAX_VAL || r.ToInt() == MHS_MAX_VAL) {
|
||||||
|
OUUInt64 tmp(MHS_MAX_VAL);
|
||||||
|
return tmp;
|
||||||
|
} else {
|
||||||
|
// detecting overflow
|
||||||
|
invariant((MHS_MAX_VAL - _value) >= r.ToInt());
|
||||||
|
uint64_t plus = _value + r.ToInt();
|
||||||
|
OUUInt64 tmp(plus);
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OUUInt64 operator-(const OUUInt64 &r) const {
|
||||||
|
invariant(r.ToInt() != MHS_MAX_VAL);
|
||||||
|
if (_value == MHS_MAX_VAL) {
|
||||||
|
return *this;
|
||||||
|
} else {
|
||||||
|
invariant(_value >= r.ToInt());
|
||||||
|
uint64_t minus = _value - r.ToInt();
|
||||||
|
OUUInt64 tmp(minus);
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OUUInt64 operator-=(const OUUInt64 &r) {
|
||||||
|
if (_value != MHS_MAX_VAL) {
|
||||||
|
invariant(r.ToInt() != MHS_MAX_VAL);
|
||||||
|
invariant(_value >= r.ToInt());
|
||||||
|
_value -= r.ToInt();
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
OUUInt64 operator+=(const OUUInt64 &r) {
|
||||||
|
if (_value != MHS_MAX_VAL) {
|
||||||
|
if (r.ToInt() == MHS_MAX_VAL) {
|
||||||
|
_value = MHS_MAX_VAL;
|
||||||
|
} else {
|
||||||
|
invariant((MHS_MAX_VAL - _value) >= r.ToInt());
|
||||||
|
this->_value += r.ToInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
bool operator==(const OUUInt64 &r) const {
|
||||||
|
return _value == r.ToInt();
|
||||||
|
}
|
||||||
|
bool operator!=(const OUUInt64 &r) const {
|
||||||
|
return _value != r.ToInt();
|
||||||
|
}
|
||||||
|
OUUInt64 operator=(const OUUInt64 &r) {
|
||||||
|
_value = r.ToInt();
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
uint64_t ToInt() const { return _value; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
uint64_t _value;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Node {
|
||||||
|
public:
|
||||||
|
struct BlockPair {
|
||||||
|
OUUInt64 _offset;
|
||||||
|
OUUInt64 _size;
|
||||||
|
|
||||||
|
BlockPair() : _offset(0), _size(0) {}
|
||||||
|
BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
|
||||||
|
|
||||||
|
BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {}
|
||||||
|
int operator<(const struct BlockPair &rhs) const {
|
||||||
|
return _offset < rhs._offset;
|
||||||
|
}
|
||||||
|
int operator<(const uint64_t &o) const { return _offset < o; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Pair {
|
||||||
|
uint64_t _left;
|
||||||
|
uint64_t _right;
|
||||||
|
Pair(uint64_t l, uint64_t r) : _left(l), _right(r) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
EColor _color;
|
||||||
|
struct BlockPair _hole;
|
||||||
|
struct Pair _label;
|
||||||
|
Node *_left;
|
||||||
|
Node *_right;
|
||||||
|
Node *_parent;
|
||||||
|
|
||||||
|
Node(EColor c,
|
||||||
|
Node::BlockPair h,
|
||||||
|
struct Pair lb,
|
||||||
|
Node *l,
|
||||||
|
Node *r,
|
||||||
|
Node *p)
|
||||||
|
: _color(c),
|
||||||
|
_hole(h),
|
||||||
|
_label(lb),
|
||||||
|
_left(l),
|
||||||
|
_right(r),
|
||||||
|
_parent(p) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Tree {
|
||||||
|
private:
|
||||||
|
Node *_root;
|
||||||
|
uint64_t _align;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Tree();
|
||||||
|
Tree(uint64_t);
|
||||||
|
~Tree();
|
||||||
|
|
||||||
|
void PreOrder();
|
||||||
|
void InOrder();
|
||||||
|
void PostOrder();
|
||||||
|
// immutable operations
|
||||||
|
Node *SearchByOffset(uint64_t addr);
|
||||||
|
Node *SearchFirstFitBySize(uint64_t size);
|
||||||
|
|
||||||
|
Node *MinNode();
|
||||||
|
Node *MaxNode();
|
||||||
|
|
||||||
|
Node *Successor(Node *);
|
||||||
|
Node *Predecessor(Node *);
|
||||||
|
|
||||||
|
// mapped from tree_allocator::free_block
|
||||||
|
int Insert(Node::BlockPair pair);
|
||||||
|
// mapped from tree_allocator::alloc_block
|
||||||
|
uint64_t Remove(size_t size);
|
||||||
|
// mapped from tree_allocator::alloc_block_after
|
||||||
|
|
||||||
|
void RawRemove(uint64_t offset);
|
||||||
|
void Destroy();
|
||||||
|
// print the tree
|
||||||
|
void Dump();
|
||||||
|
// validation
|
||||||
|
// balance
|
||||||
|
void ValidateBalance();
|
||||||
|
void ValidateInOrder(Node::BlockPair *);
|
||||||
|
void InOrderVisitor(void (*f)(void *, Node *, uint64_t), void *);
|
||||||
|
void ValidateMhs();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void PreOrder(Node *node) const;
|
||||||
|
void InOrder(Node *node) const;
|
||||||
|
void PostOrder(Node *node) const;
|
||||||
|
Node *SearchByOffset(Node *node, offset_t addr) const;
|
||||||
|
Node *SearchFirstFitBySize(Node *node, size_t size) const;
|
||||||
|
|
||||||
|
Node *MinNode(Node *node);
|
||||||
|
Node *MaxNode(Node *node);
|
||||||
|
|
||||||
|
// rotations to fix up. we will have to update the labels too.
|
||||||
|
void LeftRotate(Node *&root, Node *x);
|
||||||
|
void RightRotate(Node *&root, Node *y);
|
||||||
|
|
||||||
|
int Insert(Node *&root, Node::BlockPair pair);
|
||||||
|
int InsertFixup(Node *&root, Node *node);
|
||||||
|
|
||||||
|
void RawRemove(Node *&root, Node *node);
|
||||||
|
uint64_t Remove(Node *&root, Node *node, size_t size);
|
||||||
|
void RawRemoveFixup(Node *&root, Node *node, Node *parent);
|
||||||
|
|
||||||
|
void Destroy(Node *&tree);
|
||||||
|
void Dump(Node *tree, Node::BlockPair pair, EDirection dir);
|
||||||
|
void RecalculateMhs(Node *node);
|
||||||
|
void IsNewNodeMergable(Node *, Node *, Node::BlockPair, bool *, bool *);
|
||||||
|
void AbsorbNewNode(Node *, Node *, Node::BlockPair, bool, bool, bool);
|
||||||
|
Node *SearchFirstFitBySizeHelper(Node *x, uint64_t size);
|
||||||
|
|
||||||
|
Node *SuccessorHelper(Node *y, Node *x);
|
||||||
|
|
||||||
|
Node *PredecessorHelper(Node *y, Node *x);
|
||||||
|
|
||||||
|
void InOrderVisitor(Node *,
|
||||||
|
void (*f)(void *, Node *, uint64_t),
|
||||||
|
void *,
|
||||||
|
uint64_t);
|
||||||
|
uint64_t ValidateMhs(Node *);
|
||||||
|
|
||||||
|
uint64_t EffectiveSize(Node *);
|
||||||
|
// mixed with some macros.....
|
||||||
|
#define rbn_parent(r) ((r)->_parent)
|
||||||
|
#define rbn_color(r) ((r)->_color)
|
||||||
|
#define rbn_is_red(r) ((r)->_color == EColor::RED)
|
||||||
|
#define rbn_is_black(r) ((r)->_color == EColor::BLACK)
|
||||||
|
#define rbn_set_black(r) \
|
||||||
|
do { \
|
||||||
|
(r)->_color = EColor::BLACK; \
|
||||||
|
} while (0)
|
||||||
|
#define rbn_set_red(r) \
|
||||||
|
do { \
|
||||||
|
(r)->_color = EColor::RED; \
|
||||||
|
} while (0)
|
||||||
|
#define rbn_set_parent(r, p) \
|
||||||
|
do { \
|
||||||
|
(r)->_parent = (p); \
|
||||||
|
} while (0)
|
||||||
|
#define rbn_set_color(r, c) \
|
||||||
|
do { \
|
||||||
|
(r)->_color = (c); \
|
||||||
|
} while (0)
|
||||||
|
#define rbn_set_offset(r) \
|
||||||
|
do { \
|
||||||
|
(r)->_hole._offset = (c); \
|
||||||
|
} while (0)
|
||||||
|
#define rbn_set_size(r, c) \
|
||||||
|
do { \
|
||||||
|
(r)->_hole._size = (c); \
|
||||||
|
} while (0)
|
||||||
|
#define rbn_set_left_mhs(r, c) \
|
||||||
|
do { \
|
||||||
|
(r)->_label._left = (c); \
|
||||||
|
} while (0)
|
||||||
|
#define rbn_set_right_mhs(r, c) \
|
||||||
|
do { \
|
||||||
|
(r)->_label._right = (c); \
|
||||||
|
} while (0)
|
||||||
|
#define rbn_size(r) ((r)->_hole._size)
|
||||||
|
#define rbn_offset(r) ((r)->_hole._offset)
|
||||||
|
#define rbn_key(r) ((r)->_hole._offset)
|
||||||
|
#define rbn_left_mhs(r) ((r)->_label._left)
|
||||||
|
#define rbn_right_mhs(r) ((r)->_label._right)
|
||||||
|
#define mhs_of_subtree(y) \
|
||||||
|
(std::max(std::max(rbn_left_mhs(y), rbn_right_mhs(y)), EffectiveSize(y)))
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace MhsRbTree
|
@ -1,126 +0,0 @@
|
|||||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
||||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
|
||||||
#ident "$Id$"
|
|
||||||
/*======
|
|
||||||
This file is part of PerconaFT.
|
|
||||||
|
|
||||||
|
|
||||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|
||||||
|
|
||||||
PerconaFT is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License, version 2,
|
|
||||||
as published by the Free Software Foundation.
|
|
||||||
|
|
||||||
PerconaFT is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
----------------------------------------
|
|
||||||
|
|
||||||
PerconaFT is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License, version 3,
|
|
||||||
as published by the Free Software Foundation.
|
|
||||||
|
|
||||||
PerconaFT is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
======= */
|
|
||||||
|
|
||||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
|
||||||
|
|
||||||
#include "ft/tests/test.h"
|
|
||||||
|
|
||||||
#include "ft/serialize/block_allocator_strategy.h"
|
|
||||||
|
|
||||||
static const uint64_t alignment = 4096;
|
|
||||||
|
|
||||||
static void test_first_vs_best_fit(void) {
|
|
||||||
struct block_allocator::blockpair pairs[] = {
|
|
||||||
block_allocator::blockpair(1 * alignment, 6 * alignment),
|
|
||||||
// hole between 7x align -> 8x align
|
|
||||||
block_allocator::blockpair(8 * alignment, 4 * alignment),
|
|
||||||
// hole between 12x align -> 16x align
|
|
||||||
block_allocator::blockpair(16 * alignment, 1 * alignment),
|
|
||||||
block_allocator::blockpair(17 * alignment, 2 * alignment),
|
|
||||||
// hole between 19 align -> 21x align
|
|
||||||
block_allocator::blockpair(21 * alignment, 2 * alignment),
|
|
||||||
};
|
|
||||||
const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
|
|
||||||
|
|
||||||
block_allocator::blockpair *bp;
|
|
||||||
|
|
||||||
// first fit
|
|
||||||
bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment);
|
|
||||||
assert(bp == &pairs[0]);
|
|
||||||
bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment);
|
|
||||||
assert(bp == &pairs[0]);
|
|
||||||
bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment);
|
|
||||||
assert(bp == &pairs[1]);
|
|
||||||
bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment);
|
|
||||||
assert(bp == nullptr);
|
|
||||||
|
|
||||||
// best fit
|
|
||||||
bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment);
|
|
||||||
assert(bp == &pairs[0]);
|
|
||||||
bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment);
|
|
||||||
assert(bp == &pairs[3]);
|
|
||||||
bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment);
|
|
||||||
assert(bp == &pairs[1]);
|
|
||||||
bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment);
|
|
||||||
assert(bp == nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_padded_fit(void) {
|
|
||||||
struct block_allocator::blockpair pairs[] = {
|
|
||||||
block_allocator::blockpair(1 * alignment, 1 * alignment),
|
|
||||||
// 4096 byte hole after bp[0]
|
|
||||||
block_allocator::blockpair(3 * alignment, 1 * alignment),
|
|
||||||
// 8192 byte hole after bp[1]
|
|
||||||
block_allocator::blockpair(6 * alignment, 1 * alignment),
|
|
||||||
// 16384 byte hole after bp[2]
|
|
||||||
block_allocator::blockpair(11 * alignment, 1 * alignment),
|
|
||||||
// 32768 byte hole after bp[3]
|
|
||||||
block_allocator::blockpair(17 * alignment, 1 * alignment),
|
|
||||||
// 116kb hole after bp[4]
|
|
||||||
block_allocator::blockpair(113 * alignment, 1 * alignment),
|
|
||||||
// 256kb hole after bp[5]
|
|
||||||
block_allocator::blockpair(371 * alignment, 1 * alignment),
|
|
||||||
};
|
|
||||||
const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
|
|
||||||
|
|
||||||
block_allocator::blockpair *bp;
|
|
||||||
|
|
||||||
// padding for a 100 byte allocation will be < than standard alignment,
|
|
||||||
// so it should fit in the first 4096 byte hole.
|
|
||||||
bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment);
|
|
||||||
assert(bp == &pairs[0]);
|
|
||||||
|
|
||||||
// Even padded, a 12kb alloc will fit in a 16kb hole
|
|
||||||
bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment);
|
|
||||||
assert(bp == &pairs[2]);
|
|
||||||
|
|
||||||
// would normally fit in the 116kb hole but the padding will bring it over
|
|
||||||
bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment);
|
|
||||||
assert(bp == &pairs[5]);
|
|
||||||
|
|
||||||
bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment);
|
|
||||||
assert(bp == &pairs[5]);
|
|
||||||
}
|
|
||||||
|
|
||||||
int test_main(int argc, const char *argv[]) {
|
|
||||||
(void) argc;
|
|
||||||
(void) argv;
|
|
||||||
|
|
||||||
test_first_vs_best_fit();
|
|
||||||
test_padded_fit();
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
@ -38,46 +38,48 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
|
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
|
static void ba_alloc(BlockAllocator *ba, uint64_t size, uint64_t *answer) {
|
||||||
ba->validate();
|
ba->Validate();
|
||||||
uint64_t actual_answer;
|
uint64_t actual_answer;
|
||||||
const uint64_t heat = random() % 2;
|
ba->AllocBlock(512 * size, &actual_answer);
|
||||||
ba->alloc_block(512 * size, heat, &actual_answer);
|
ba->Validate();
|
||||||
ba->validate();
|
|
||||||
|
|
||||||
assert(actual_answer%512==0);
|
invariant(actual_answer % 512 == 0);
|
||||||
*answer = actual_answer / 512;
|
*answer = actual_answer / 512;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ba_free(block_allocator *ba, uint64_t offset) {
|
static void ba_free(BlockAllocator *ba, uint64_t offset, uint64_t size) {
|
||||||
ba->validate();
|
ba->Validate();
|
||||||
ba->free_block(offset * 512);
|
ba->FreeBlock(offset * 512, 512 * size);
|
||||||
ba->validate();
|
ba->Validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order,
|
static void ba_check_l(BlockAllocator *ba,
|
||||||
uint64_t expected_offset, uint64_t expected_size) {
|
uint64_t blocknum_in_layout_order,
|
||||||
|
uint64_t expected_offset,
|
||||||
|
uint64_t expected_size) {
|
||||||
uint64_t actual_offset, actual_size;
|
uint64_t actual_offset, actual_size;
|
||||||
int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
|
int r = ba->NthBlockInLayoutOrder(
|
||||||
assert(r==0);
|
blocknum_in_layout_order, &actual_offset, &actual_size);
|
||||||
assert(expected_offset*512 == actual_offset);
|
invariant(r == 0);
|
||||||
assert(expected_size *512 == actual_size);
|
invariant(expected_offset * 512 == actual_offset);
|
||||||
|
invariant(expected_size * 512 == actual_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) {
|
static void ba_check_none(BlockAllocator *ba,
|
||||||
|
uint64_t blocknum_in_layout_order) {
|
||||||
uint64_t actual_offset, actual_size;
|
uint64_t actual_offset, actual_size;
|
||||||
int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
|
int r = ba->NthBlockInLayoutOrder(
|
||||||
assert(r==-1);
|
blocknum_in_layout_order, &actual_offset, &actual_size);
|
||||||
|
invariant(r == -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Simple block allocator test
|
// Simple block allocator test
|
||||||
static void test_ba0(block_allocator::allocation_strategy strategy) {
|
static void test_ba0() {
|
||||||
block_allocator allocator;
|
BlockAllocator allocator;
|
||||||
block_allocator *ba = &allocator;
|
BlockAllocator *ba = &allocator;
|
||||||
ba->create(100*512, 1*512);
|
ba->Create(100 * 512, 1 * 512);
|
||||||
ba->set_strategy(strategy);
|
invariant(ba->AllocatedLimit() == 100 * 512);
|
||||||
assert(ba->allocated_limit()==100*512);
|
|
||||||
|
|
||||||
uint64_t b2, b3, b4, b5, b6, b7;
|
uint64_t b2, b3, b4, b5, b6, b7;
|
||||||
ba_alloc(ba, 100, &b2);
|
ba_alloc(ba, 100, &b2);
|
||||||
@ -86,34 +88,32 @@ static void test_ba0(block_allocator::allocation_strategy strategy) {
|
|||||||
ba_alloc(ba, 100, &b5);
|
ba_alloc(ba, 100, &b5);
|
||||||
ba_alloc(ba, 100, &b6);
|
ba_alloc(ba, 100, &b6);
|
||||||
ba_alloc(ba, 100, &b7);
|
ba_alloc(ba, 100, &b7);
|
||||||
ba_free(ba, b2);
|
ba_free(ba, b2, 100);
|
||||||
ba_alloc(ba, 100, &b2);
|
ba_alloc(ba, 100, &b2);
|
||||||
ba_free(ba, b4);
|
ba_free(ba, b4, 100);
|
||||||
ba_free(ba, b6);
|
ba_free(ba, b6, 100);
|
||||||
uint64_t b8, b9;
|
uint64_t b8, b9;
|
||||||
ba_alloc(ba, 100, &b4);
|
ba_alloc(ba, 100, &b4);
|
||||||
ba_free(ba, b2);
|
ba_free(ba, b2, 100);
|
||||||
ba_alloc(ba, 100, &b6);
|
ba_alloc(ba, 100, &b6);
|
||||||
ba_alloc(ba, 100, &b8);
|
ba_alloc(ba, 100, &b8);
|
||||||
ba_alloc(ba, 100, &b9);
|
ba_alloc(ba, 100, &b9);
|
||||||
ba_free(ba, b6);
|
ba_free(ba, b6, 100);
|
||||||
ba_free(ba, b7);
|
ba_free(ba, b7, 100);
|
||||||
ba_free(ba, b8);
|
ba_free(ba, b8, 100);
|
||||||
ba_alloc(ba, 100, &b6);
|
ba_alloc(ba, 100, &b6);
|
||||||
ba_alloc(ba, 100, &b7);
|
ba_alloc(ba, 100, &b7);
|
||||||
ba_free(ba, b4);
|
ba_free(ba, b4, 100);
|
||||||
ba_alloc(ba, 100, &b4);
|
ba_alloc(ba, 100, &b4);
|
||||||
|
|
||||||
ba->destroy();
|
ba->Destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Manually to get coverage of all the code in the block allocator.
|
// Manually to get coverage of all the code in the block allocator.
|
||||||
static void
|
static void test_ba1(int n_initial) {
|
||||||
test_ba1(block_allocator::allocation_strategy strategy, int n_initial) {
|
BlockAllocator allocator;
|
||||||
block_allocator allocator;
|
BlockAllocator *ba = &allocator;
|
||||||
block_allocator *ba = &allocator;
|
ba->Create(0 * 512, 1 * 512);
|
||||||
ba->create(0*512, 1*512);
|
|
||||||
ba->set_strategy(strategy);
|
|
||||||
|
|
||||||
int n_blocks = 0;
|
int n_blocks = 0;
|
||||||
uint64_t blocks[1000];
|
uint64_t blocks[1000];
|
||||||
@ -127,28 +127,25 @@ test_ba1(block_allocator::allocation_strategy strategy, int n_initial) {
|
|||||||
} else {
|
} else {
|
||||||
if (n_blocks > 0) {
|
if (n_blocks > 0) {
|
||||||
int blocknum = random() % n_blocks;
|
int blocknum = random() % n_blocks;
|
||||||
//printf("F[%d]%ld\n", blocknum, blocks[blocknum]);
|
// printf("F[%d]=%ld\n", blocknum, blocks[blocknum]);
|
||||||
ba_free(ba, blocks[blocknum]);
|
ba_free(ba, blocks[blocknum], 1);
|
||||||
blocks[blocknum] = blocks[n_blocks - 1];
|
blocks[blocknum] = blocks[n_blocks - 1];
|
||||||
n_blocks--;
|
n_blocks--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ba->destroy();
|
ba->Destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check to see if it is first fit or best fit.
|
// Check to see if it is first fit or best fit.
|
||||||
static void
|
static void test_ba2(void) {
|
||||||
test_ba2 (void)
|
BlockAllocator allocator;
|
||||||
{
|
BlockAllocator *ba = &allocator;
|
||||||
block_allocator allocator;
|
|
||||||
block_allocator *ba = &allocator;
|
|
||||||
uint64_t b[6];
|
uint64_t b[6];
|
||||||
enum { BSIZE = 1024 };
|
enum { BSIZE = 1024 };
|
||||||
ba->create(100*512, BSIZE*512);
|
ba->Create(100 * 512, BSIZE * 512);
|
||||||
ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT);
|
invariant(ba->AllocatedLimit() == 100 * 512);
|
||||||
assert(ba->allocated_limit()==100*512);
|
|
||||||
|
|
||||||
ba_check_l(ba, 0, 0, 100);
|
ba_check_l(ba, 0, 0, 100);
|
||||||
ba_check_none(ba, 1);
|
ba_check_none(ba, 1);
|
||||||
@ -183,7 +180,7 @@ test_ba2 (void)
|
|||||||
ba_check_l(ba, 6, 7 * BSIZE, 100);
|
ba_check_l(ba, 6, 7 * BSIZE, 100);
|
||||||
ba_check_none(ba, 7);
|
ba_check_none(ba, 7);
|
||||||
|
|
||||||
ba_free (ba, 4*BSIZE);
|
ba_free(ba, 4 * BSIZE, 100);
|
||||||
ba_check_l(ba, 0, 0, 100);
|
ba_check_l(ba, 0, 0, 100);
|
||||||
ba_check_l(ba, 1, BSIZE, 100);
|
ba_check_l(ba, 1, BSIZE, 100);
|
||||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||||
@ -194,7 +191,7 @@ test_ba2 (void)
|
|||||||
|
|
||||||
uint64_t b2;
|
uint64_t b2;
|
||||||
ba_alloc(ba, 100, &b2);
|
ba_alloc(ba, 100, &b2);
|
||||||
assert(b2==4*BSIZE);
|
invariant(b2 == 4 * BSIZE);
|
||||||
ba_check_l(ba, 0, 0, 100);
|
ba_check_l(ba, 0, 0, 100);
|
||||||
ba_check_l(ba, 1, BSIZE, 100);
|
ba_check_l(ba, 1, BSIZE, 100);
|
||||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||||
@ -204,8 +201,8 @@ test_ba2 (void)
|
|||||||
ba_check_l(ba, 6, 7 * BSIZE, 100);
|
ba_check_l(ba, 6, 7 * BSIZE, 100);
|
||||||
ba_check_none(ba, 7);
|
ba_check_none(ba, 7);
|
||||||
|
|
||||||
ba_free (ba, BSIZE);
|
ba_free(ba, BSIZE, 100);
|
||||||
ba_free (ba, 5*BSIZE);
|
ba_free(ba, 5 * BSIZE, 100);
|
||||||
ba_check_l(ba, 0, 0, 100);
|
ba_check_l(ba, 0, 0, 100);
|
||||||
ba_check_l(ba, 1, 2 * BSIZE, BSIZE + 100);
|
ba_check_l(ba, 1, 2 * BSIZE, BSIZE + 100);
|
||||||
ba_check_l(ba, 2, 4 * BSIZE, 100);
|
ba_check_l(ba, 2, 4 * BSIZE, 100);
|
||||||
@ -213,16 +210,17 @@ test_ba2 (void)
|
|||||||
ba_check_l(ba, 4, 7 * BSIZE, 100);
|
ba_check_l(ba, 4, 7 * BSIZE, 100);
|
||||||
ba_check_none(ba, 5);
|
ba_check_none(ba, 5);
|
||||||
|
|
||||||
// This alloc will allocate the first block after the reserve space in the case of first fit.
|
// This alloc will allocate the first block after the reserve space in the
|
||||||
|
// case of first fit.
|
||||||
uint64_t b3;
|
uint64_t b3;
|
||||||
ba_alloc(ba, 100, &b3);
|
ba_alloc(ba, 100, &b3);
|
||||||
assert(b3== BSIZE); // First fit.
|
invariant(b3 == BSIZE); // First fit.
|
||||||
// if (b3==5*BSIZE) then it is next fit.
|
// if (b3==5*BSIZE) then it is next fit.
|
||||||
|
|
||||||
// Now 5*BSIZE is free
|
// Now 5*BSIZE is free
|
||||||
uint64_t b5;
|
uint64_t b5;
|
||||||
ba_alloc(ba, 100, &b5);
|
ba_alloc(ba, 100, &b5);
|
||||||
assert(b5==5*BSIZE);
|
invariant(b5 == 5 * BSIZE);
|
||||||
ba_check_l(ba, 0, 0, 100);
|
ba_check_l(ba, 0, 0, 100);
|
||||||
ba_check_l(ba, 1, BSIZE, 100);
|
ba_check_l(ba, 1, BSIZE, 100);
|
||||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||||
@ -237,9 +235,9 @@ test_ba2 (void)
|
|||||||
ba_alloc(ba, 100, &b6);
|
ba_alloc(ba, 100, &b6);
|
||||||
ba_alloc(ba, 100, &b7);
|
ba_alloc(ba, 100, &b7);
|
||||||
ba_alloc(ba, 100, &b8);
|
ba_alloc(ba, 100, &b8);
|
||||||
assert(b6==8*BSIZE);
|
invariant(b6 == 8 * BSIZE);
|
||||||
assert(b7==9*BSIZE);
|
invariant(b7 == 9 * BSIZE);
|
||||||
assert(b8==10*BSIZE);
|
invariant(b8 == 10 * BSIZE);
|
||||||
ba_check_l(ba, 0, 0, 100);
|
ba_check_l(ba, 0, 0, 100);
|
||||||
ba_check_l(ba, 1, BSIZE, 100);
|
ba_check_l(ba, 1, BSIZE, 100);
|
||||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||||
@ -252,39 +250,31 @@ test_ba2 (void)
|
|||||||
ba_check_l(ba, 9, 10 * BSIZE, 100);
|
ba_check_l(ba, 9, 10 * BSIZE, 100);
|
||||||
ba_check_none(ba, 10);
|
ba_check_none(ba, 10);
|
||||||
|
|
||||||
ba_free(ba, 9*BSIZE);
|
ba_free(ba, 9 * BSIZE, 100);
|
||||||
ba_free(ba, 7*BSIZE);
|
ba_free(ba, 7 * BSIZE, 100);
|
||||||
uint64_t b9;
|
uint64_t b9;
|
||||||
ba_alloc(ba, 100, &b9);
|
ba_alloc(ba, 100, &b9);
|
||||||
assert(b9==7*BSIZE);
|
invariant(b9 == 7 * BSIZE);
|
||||||
|
|
||||||
ba_free(ba, 5*BSIZE);
|
ba_free(ba, 5 * BSIZE, 100);
|
||||||
ba_free(ba, 2*BSIZE);
|
ba_free(ba, 2 * BSIZE, BSIZE + 100);
|
||||||
uint64_t b10, b11;
|
uint64_t b10, b11;
|
||||||
ba_alloc(ba, 100, &b10);
|
ba_alloc(ba, 100, &b10);
|
||||||
assert(b10==2*BSIZE);
|
invariant(b10 == 2 * BSIZE);
|
||||||
ba_alloc(ba, 100, &b11);
|
ba_alloc(ba, 100, &b11);
|
||||||
assert(b11==3*BSIZE);
|
invariant(b11 == 3 * BSIZE);
|
||||||
ba_alloc(ba, 100, &b11);
|
ba_alloc(ba, 100, &b11);
|
||||||
assert(b11==5*BSIZE);
|
invariant(b11 == 5 * BSIZE);
|
||||||
|
|
||||||
ba->destroy();
|
ba->Destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int test_main(int argc __attribute__((__unused__)),
|
||||||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
const char *argv[] __attribute__((__unused__))) {
|
||||||
enum block_allocator::allocation_strategy strategies[] = {
|
test_ba0();
|
||||||
block_allocator::BA_STRATEGY_FIRST_FIT,
|
test_ba1(0);
|
||||||
block_allocator::BA_STRATEGY_BEST_FIT,
|
test_ba1(10);
|
||||||
block_allocator::BA_STRATEGY_PADDED_FIT,
|
test_ba1(20);
|
||||||
block_allocator::BA_STRATEGY_HEAT_ZONE,
|
|
||||||
};
|
|
||||||
for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) {
|
|
||||||
test_ba0(strategies[i]);
|
|
||||||
test_ba1(strategies[i], 0);
|
|
||||||
test_ba1(strategies[i], 10);
|
|
||||||
test_ba1(strategies[i], 20);
|
|
||||||
}
|
|
||||||
test_ba2();
|
test_ba2();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
// #5978 is fixed. Here is what we do. We have four pairs with
|
// #5978 is fixed. Here is what we do. We have four pairs with
|
||||||
// blocknums and fullhashes of 1,2,3,4. The cachetable has only
|
// blocknums and fullhashes of 1,2,3,4. The cachetable has only
|
||||||
// two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4.
|
// two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4.
|
||||||
// We pin all four with expensive write locks. Then, on backgroud threads,
|
// We pin all four with expensive write locks. Then, on background threads,
|
||||||
// we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and
|
// we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and
|
||||||
// we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this
|
// we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this
|
||||||
// enough times, and we should see a deadlock before the fix, and no deadlock
|
// enough times, and we should see a deadlock before the fix, and no deadlock
|
||||||
|
@ -77,7 +77,7 @@ flush (
|
|||||||
|
|
||||||
//
|
//
|
||||||
// test the following things for simple cloning:
|
// test the following things for simple cloning:
|
||||||
// - verifies that after teh checkpoint ends, the PAIR is properly
|
// - verifies that after the checkpoint ends, the PAIR is properly
|
||||||
// dirty or clean based on the second unpin
|
// dirty or clean based on the second unpin
|
||||||
//
|
//
|
||||||
static void
|
static void
|
||||||
|
@ -38,18 +38,18 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
|
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
static int
|
static int int64_key_cmp(DB *db UU(), const DBT *a, const DBT *b) {
|
||||||
int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) {
|
|
||||||
int64_t x = *(int64_t *)a->data;
|
int64_t x = *(int64_t *)a->data;
|
||||||
int64_t y = *(int64_t *)b->data;
|
int64_t y = *(int64_t *)b->data;
|
||||||
|
|
||||||
if (x<y) return -1;
|
if (x < y)
|
||||||
if (x>y) return 1;
|
return -1;
|
||||||
|
if (x > y)
|
||||||
|
return 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
||||||
test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|
||||||
int r;
|
int r;
|
||||||
FT_CURSOR XMALLOC(cursor);
|
FT_CURSOR XMALLOC(cursor);
|
||||||
FTNODE dn = NULL;
|
FTNODE dn = NULL;
|
||||||
@ -69,16 +69,17 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||||||
cursor->disable_prefetching = true;
|
cursor->disable_prefetching = true;
|
||||||
bfe.create_for_prefetch(ft_h, cursor);
|
bfe.create_for_prefetch(ft_h, cursor);
|
||||||
FTNODE_DISK_DATA ndd = NULL;
|
FTNODE_DISK_DATA ndd = NULL;
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||||
assert(dn->n_children == 3);
|
invariant(r == 0);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(dn->n_children == 3);
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
bfe.destroy();
|
bfe.destroy();
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
@ -87,20 +88,22 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||||||
cursor->disable_prefetching = false;
|
cursor->disable_prefetching = false;
|
||||||
|
|
||||||
bfe.create_for_prefetch(ft_h, cursor);
|
bfe.create_for_prefetch(ft_h, cursor);
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||||
assert(dn->n_children == 3);
|
invariant(r == 0);
|
||||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
invariant(dn->n_children == 3);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,0) == PT_COMPRESSED);
|
toku_ftnode_pe_callback(
|
||||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
|
||||||
|
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||||
|
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
bfe.destroy();
|
bfe.destroy();
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
@ -109,20 +112,22 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||||||
toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t));
|
toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t));
|
||||||
cursor->left_is_neg_infty = false;
|
cursor->left_is_neg_infty = false;
|
||||||
bfe.create_for_prefetch(ft_h, cursor);
|
bfe.create_for_prefetch(ft_h, cursor);
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||||
assert(dn->n_children == 3);
|
invariant(r == 0);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(dn->n_children == 3);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
toku_ftnode_pe_callback(
|
||||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
|
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||||
|
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
bfe.destroy();
|
bfe.destroy();
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
@ -131,20 +136,22 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||||||
toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t));
|
toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t));
|
||||||
cursor->right_is_pos_infty = false;
|
cursor->right_is_pos_infty = false;
|
||||||
bfe.create_for_prefetch(ft_h, cursor);
|
bfe.create_for_prefetch(ft_h, cursor);
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||||
assert(dn->n_children == 3);
|
invariant(r == 0);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(dn->n_children == 3);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
toku_ftnode_pe_callback(
|
||||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
|
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||||
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
bfe.destroy();
|
bfe.destroy();
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
@ -152,20 +159,22 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||||||
left_key = 100000;
|
left_key = 100000;
|
||||||
right_key = 100000;
|
right_key = 100000;
|
||||||
bfe.create_for_prefetch(ft_h, cursor);
|
bfe.create_for_prefetch(ft_h, cursor);
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||||
assert(dn->n_children == 3);
|
invariant(r == 0);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(dn->n_children == 3);
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
toku_ftnode_pe_callback(
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
|
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
bfe.destroy();
|
bfe.destroy();
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
@ -173,20 +182,22 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||||||
left_key = 100;
|
left_key = 100;
|
||||||
right_key = 100;
|
right_key = 100;
|
||||||
bfe.create_for_prefetch(ft_h, cursor);
|
bfe.create_for_prefetch(ft_h, cursor);
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||||
assert(dn->n_children == 3);
|
invariant(r == 0);
|
||||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
invariant(dn->n_children == 3);
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,0) == PT_COMPRESSED);
|
toku_ftnode_pe_callback(
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
|
||||||
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
bfe.destroy();
|
bfe.destroy();
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
@ -194,8 +205,7 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||||||
toku_free(cursor);
|
toku_free(cursor);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
||||||
test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|
||||||
int r;
|
int r;
|
||||||
FT_CURSOR XMALLOC(cursor);
|
FT_CURSOR XMALLOC(cursor);
|
||||||
FTNODE dn = NULL;
|
FTNODE dn = NULL;
|
||||||
@ -216,101 +226,106 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||||||
|
|
||||||
ftnode_fetch_extra bfe;
|
ftnode_fetch_extra bfe;
|
||||||
bfe.create_for_subset_read(
|
bfe.create_for_subset_read(
|
||||||
ft_h,
|
ft_h, NULL, &left, &right, false, false, false, false);
|
||||||
NULL,
|
|
||||||
&left,
|
|
||||||
&right,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false
|
|
||||||
);
|
|
||||||
|
|
||||||
// fake the childnum to read
|
// fake the childnum to read
|
||||||
// set disable_prefetching ON
|
// set disable_prefetching ON
|
||||||
bfe.child_to_read = 2;
|
bfe.child_to_read = 2;
|
||||||
bfe.disable_prefetching = true;
|
bfe.disable_prefetching = true;
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||||
assert(dn->n_children == 3);
|
invariant(r == 0);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(dn->n_children == 3);
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
// need to call this twice because we had a subset read before, that touched the clock
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
// need to call this twice because we had a subset read before, that touched
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
// the clock
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
toku_ftnode_pe_callback(
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
toku_ftnode_pe_callback(
|
||||||
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
|
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
|
|
||||||
// fake the childnum to read
|
// fake the childnum to read
|
||||||
bfe.child_to_read = 2;
|
bfe.child_to_read = 2;
|
||||||
bfe.disable_prefetching = false;
|
bfe.disable_prefetching = false;
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||||
assert(dn->n_children == 3);
|
invariant(r == 0);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(dn->n_children == 3);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
// need to call this twice because we had a subset read before, that touched the clock
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
// need to call this twice because we had a subset read before, that touched
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
// the clock
|
||||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
toku_ftnode_pe_callback(
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
toku_ftnode_pe_callback(
|
||||||
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
|
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||||
|
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
|
|
||||||
// fake the childnum to read
|
// fake the childnum to read
|
||||||
bfe.child_to_read = 0;
|
bfe.child_to_read = 0;
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||||
assert(dn->n_children == 3);
|
invariant(r == 0);
|
||||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
invariant(dn->n_children == 3);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
// need to call this twice because we had a subset read before, that touched the clock
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
// need to call this twice because we had a subset read before, that touched
|
||||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
// the clock
|
||||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
toku_ftnode_pe_callback(
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,0) == PT_COMPRESSED);
|
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
toku_ftnode_pe_callback(
|
||||||
|
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||||
|
invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
|
||||||
|
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||||
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
|
|
||||||
toku_free(cursor);
|
toku_free(cursor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_prefetching(void) {
|
||||||
static void
|
|
||||||
test_prefetching(void) {
|
|
||||||
// struct ft_handle source_ft;
|
// struct ft_handle source_ft;
|
||||||
struct ftnode sn;
|
struct ftnode sn;
|
||||||
|
|
||||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
int fd = open(TOKU_TEST_FILENAME,
|
||||||
|
O_RDWR | O_CREAT | O_BINARY,
|
||||||
|
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||||
|
invariant(fd >= 0);
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
@ -370,34 +385,41 @@ test_prefetching(void) {
|
|||||||
ft_h->cmp.create(int64_key_cmp, nullptr);
|
ft_h->cmp.create(int64_key_cmp, nullptr);
|
||||||
ft->ft = ft_h;
|
ft->ft = ft_h;
|
||||||
ft_h->blocktable.create();
|
ft_h->blocktable.create();
|
||||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
{
|
||||||
|
int r_truncate = ftruncate(fd, 0);
|
||||||
|
CKERR(r_truncate);
|
||||||
|
}
|
||||||
// Want to use block #20
|
// Want to use block #20
|
||||||
BLOCKNUM b = make_blocknum(0);
|
BLOCKNUM b = make_blocknum(0);
|
||||||
while (b.b < 20) {
|
while (b.b < 20) {
|
||||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||||
}
|
}
|
||||||
assert(b.b == 20);
|
invariant(b.b == 20);
|
||||||
|
|
||||||
{
|
{
|
||||||
DISKOFF offset;
|
DISKOFF offset;
|
||||||
DISKOFF size;
|
DISKOFF size;
|
||||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
|
||||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
assert(size == 100);
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
invariant(size == 100);
|
||||||
}
|
}
|
||||||
FTNODE_DISK_DATA ndd = NULL;
|
FTNODE_DISK_DATA ndd = NULL;
|
||||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
r = toku_serialize_ftnode_to(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||||
|
invariant(r == 0);
|
||||||
|
|
||||||
test_prefetch_read(fd, ft, ft_h);
|
test_prefetch_read(fd, ft, ft_h);
|
||||||
test_subset_read(fd, ft, ft_h);
|
test_subset_read(fd, ft, ft_h);
|
||||||
|
|
||||||
toku_destroy_ftnode_internals(&sn);
|
toku_destroy_ftnode_internals(&sn);
|
||||||
|
|
||||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
ft_h->blocktable.block_free(
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||||
ft_h->blocktable.destroy();
|
ft_h->blocktable.destroy();
|
||||||
ft_h->cmp.destroy();
|
ft_h->cmp.destroy();
|
||||||
toku_free(ft_h->h);
|
toku_free(ft_h->h);
|
||||||
@ -405,11 +427,12 @@ test_prefetching(void) {
|
|||||||
toku_free(ft);
|
toku_free(ft);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
|
|
||||||
r = close(fd); assert(r != -1);
|
r = close(fd);
|
||||||
|
invariant(r != -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int test_main(int argc __attribute__((__unused__)),
|
||||||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
const char *argv[] __attribute__((__unused__))) {
|
||||||
test_prefetching();
|
test_prefetching();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -40,38 +40,28 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
|
|
||||||
#include "ft/cursor.h"
|
#include "ft/cursor.h"
|
||||||
|
|
||||||
enum ftnode_verify_type {
|
enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
|
||||||
read_all=1,
|
|
||||||
read_compressed,
|
|
||||||
read_none
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int
|
static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
|
||||||
string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
|
|
||||||
{
|
|
||||||
char *CAST_FROM_VOIDP(s, a->data);
|
char *CAST_FROM_VOIDP(s, a->data);
|
||||||
char *CAST_FROM_VOIDP(t, b->data);
|
char *CAST_FROM_VOIDP(t, b->data);
|
||||||
return strcmp(s, t);
|
return strcmp(s, t);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void le_add_to_bn(bn_data *bn,
|
||||||
le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char *val, int vallen)
|
uint32_t idx,
|
||||||
{
|
const char *key,
|
||||||
|
int keylen,
|
||||||
|
const char *val,
|
||||||
|
int vallen) {
|
||||||
LEAFENTRY r = NULL;
|
LEAFENTRY r = NULL;
|
||||||
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
|
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
|
||||||
void *maybe_free = nullptr;
|
void *maybe_free = nullptr;
|
||||||
bn->get_space_for_insert(
|
bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
|
||||||
idx,
|
|
||||||
key,
|
|
||||||
keylen,
|
|
||||||
size_needed,
|
|
||||||
&r,
|
|
||||||
&maybe_free
|
|
||||||
);
|
|
||||||
if (maybe_free) {
|
if (maybe_free) {
|
||||||
toku_free(maybe_free);
|
toku_free(maybe_free);
|
||||||
}
|
}
|
||||||
@ -81,70 +71,67 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char
|
|||||||
memcpy(r->u.clean.val, val, vallen);
|
memcpy(r->u.clean.val, val, vallen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void le_malloc(bn_data *bn,
|
||||||
static void
|
uint32_t idx,
|
||||||
le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val)
|
const char *key,
|
||||||
{
|
const char *val) {
|
||||||
int keylen = strlen(key) + 1;
|
int keylen = strlen(key) + 1;
|
||||||
int vallen = strlen(val) + 1;
|
int vallen = strlen(val) + 1;
|
||||||
le_add_to_bn(bn, idx, key, keylen, val, vallen);
|
le_add_to_bn(bn, idx, key, keylen, val, vallen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test1(int fd, FT ft_h, FTNODE *dn) {
|
||||||
static void
|
|
||||||
test1(int fd, FT ft_h, FTNODE *dn) {
|
|
||||||
int r;
|
int r;
|
||||||
ftnode_fetch_extra bfe_all;
|
ftnode_fetch_extra bfe_all;
|
||||||
bfe_all.create_for_full_read(ft_h);
|
bfe_all.create_for_full_read(ft_h);
|
||||||
FTNODE_DISK_DATA ndd = NULL;
|
FTNODE_DISK_DATA ndd = NULL;
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all);
|
r = toku_deserialize_ftnode_from(
|
||||||
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_all);
|
||||||
bool is_leaf = ((*dn)->height == 0);
|
bool is_leaf = ((*dn)->height == 0);
|
||||||
assert(r==0);
|
invariant(r == 0);
|
||||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||||
}
|
}
|
||||||
// should sweep and NOT get rid of anything
|
// should sweep and NOT get rid of anything
|
||||||
PAIR_ATTR attr;
|
PAIR_ATTR attr;
|
||||||
memset(&attr, 0, sizeof(attr));
|
memset(&attr, 0, sizeof(attr));
|
||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||||
}
|
}
|
||||||
// should sweep and get compress all
|
// should sweep and get compress all
|
||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||||
if (!is_leaf) {
|
if (!is_leaf) {
|
||||||
assert(BP_STATE(*dn,i) == PT_COMPRESSED);
|
invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
|
||||||
}
|
} else {
|
||||||
else {
|
invariant(BP_STATE(*dn, i) == PT_ON_DISK);
|
||||||
assert(BP_STATE(*dn,i) == PT_ON_DISK);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PAIR_ATTR size;
|
PAIR_ATTR size;
|
||||||
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
|
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
|
||||||
assert(req);
|
invariant(req);
|
||||||
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
|
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
|
||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||||
}
|
}
|
||||||
// should sweep and get compress all
|
// should sweep and get compress all
|
||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||||
if (!is_leaf) {
|
if (!is_leaf) {
|
||||||
assert(BP_STATE(*dn,i) == PT_COMPRESSED);
|
invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
|
||||||
}
|
} else {
|
||||||
else {
|
invariant(BP_STATE(*dn, i) == PT_ON_DISK);
|
||||||
assert(BP_STATE(*dn,i) == PT_ON_DISK);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
|
req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
|
||||||
assert(req);
|
invariant(req);
|
||||||
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
|
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
|
||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||||
}
|
}
|
||||||
(*dn)->dirty = 1;
|
(*dn)->dirty = 1;
|
||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
@ -152,19 +139,17 @@ test1(int fd, FT ft_h, FTNODE *dn) {
|
|||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||||
}
|
}
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
toku_ftnode_free(dn);
|
toku_ftnode_free(dn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int search_cmp(const struct ft_search &UU(so), const DBT *UU(key)) {
|
static int search_cmp(const struct ft_search &UU(so), const DBT *UU(key)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void test2(int fd, FT ft_h, FTNODE *dn) {
|
||||||
test2(int fd, FT ft_h, FTNODE *dn) {
|
|
||||||
DBT left, right;
|
DBT left, right;
|
||||||
DB dummy_db;
|
DB dummy_db;
|
||||||
memset(&dummy_db, 0, sizeof(dummy_db));
|
memset(&dummy_db, 0, sizeof(dummy_db));
|
||||||
@ -175,49 +160,49 @@ test2(int fd, FT ft_h, FTNODE *dn) {
|
|||||||
ftnode_fetch_extra bfe_subset;
|
ftnode_fetch_extra bfe_subset;
|
||||||
bfe_subset.create_for_subset_read(
|
bfe_subset.create_for_subset_read(
|
||||||
ft_h,
|
ft_h,
|
||||||
ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
|
ft_search_init(
|
||||||
|
&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
|
||||||
&left,
|
&left,
|
||||||
&right,
|
&right,
|
||||||
true,
|
true,
|
||||||
true,
|
true,
|
||||||
false,
|
false,
|
||||||
false
|
false);
|
||||||
);
|
|
||||||
|
|
||||||
FTNODE_DISK_DATA ndd = NULL;
|
FTNODE_DISK_DATA ndd = NULL;
|
||||||
int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset);
|
int r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_subset);
|
||||||
|
invariant(r == 0);
|
||||||
bool is_leaf = ((*dn)->height == 0);
|
bool is_leaf = ((*dn)->height == 0);
|
||||||
// at this point, although both partitions are available, only the
|
// at this point, although both partitions are available, only the
|
||||||
// second basement node should have had its clock
|
// second basement node should have had its clock
|
||||||
// touched
|
// touched
|
||||||
assert(BP_STATE(*dn, 0) == PT_AVAIL);
|
invariant(BP_STATE(*dn, 0) == PT_AVAIL);
|
||||||
assert(BP_STATE(*dn, 1) == PT_AVAIL);
|
invariant(BP_STATE(*dn, 1) == PT_AVAIL);
|
||||||
assert(BP_SHOULD_EVICT(*dn, 0));
|
invariant(BP_SHOULD_EVICT(*dn, 0));
|
||||||
assert(!BP_SHOULD_EVICT(*dn, 1));
|
invariant(!BP_SHOULD_EVICT(*dn, 1));
|
||||||
PAIR_ATTR attr;
|
PAIR_ATTR attr;
|
||||||
memset(&attr, 0, sizeof(attr));
|
memset(&attr, 0, sizeof(attr));
|
||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
|
invariant(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
|
||||||
assert(BP_STATE(*dn, 1) == PT_AVAIL);
|
invariant(BP_STATE(*dn, 1) == PT_AVAIL);
|
||||||
assert(BP_SHOULD_EVICT(*dn, 1));
|
invariant(BP_SHOULD_EVICT(*dn, 1));
|
||||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||||
assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
|
invariant(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
|
||||||
|
|
||||||
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset);
|
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset);
|
||||||
assert(req);
|
invariant(req);
|
||||||
toku_ftnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr);
|
toku_ftnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr);
|
||||||
assert(BP_STATE(*dn, 0) == PT_AVAIL);
|
invariant(BP_STATE(*dn, 0) == PT_AVAIL);
|
||||||
assert(BP_STATE(*dn, 1) == PT_AVAIL);
|
invariant(BP_STATE(*dn, 1) == PT_AVAIL);
|
||||||
assert(BP_SHOULD_EVICT(*dn, 0));
|
invariant(BP_SHOULD_EVICT(*dn, 0));
|
||||||
assert(!BP_SHOULD_EVICT(*dn, 1));
|
invariant(!BP_SHOULD_EVICT(*dn, 1));
|
||||||
|
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
toku_ftnode_free(dn);
|
toku_ftnode_free(dn);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void test3_leaf(int fd, FT ft_h, FTNODE *dn) {
|
||||||
test3_leaf(int fd, FT ft_h, FTNODE *dn) {
|
|
||||||
DBT left, right;
|
DBT left, right;
|
||||||
DB dummy_db;
|
DB dummy_db;
|
||||||
memset(&dummy_db, 0, sizeof(dummy_db));
|
memset(&dummy_db, 0, sizeof(dummy_db));
|
||||||
@ -228,25 +213,28 @@ test3_leaf(int fd, FT ft_h, FTNODE *dn) {
|
|||||||
bfe_min.create_for_min_read(ft_h);
|
bfe_min.create_for_min_read(ft_h);
|
||||||
|
|
||||||
FTNODE_DISK_DATA ndd = NULL;
|
FTNODE_DISK_DATA ndd = NULL;
|
||||||
int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min);
|
int r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_min);
|
||||||
|
invariant(r == 0);
|
||||||
//
|
//
|
||||||
// make sure we have a leaf
|
// make sure we have a leaf
|
||||||
//
|
//
|
||||||
assert((*dn)->height == 0);
|
invariant((*dn)->height == 0);
|
||||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||||
assert(BP_STATE(*dn, i) == PT_ON_DISK);
|
invariant(BP_STATE(*dn, i) == PT_ON_DISK);
|
||||||
}
|
}
|
||||||
toku_ftnode_free(dn);
|
toku_ftnode_free(dn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void test_serialize_nonleaf(void) {
|
||||||
test_serialize_nonleaf(void) {
|
|
||||||
// struct ft_handle source_ft;
|
// struct ft_handle source_ft;
|
||||||
struct ftnode sn, *dn;
|
struct ftnode sn, *dn;
|
||||||
|
|
||||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
int fd = open(TOKU_TEST_FILENAME,
|
||||||
|
O_RDWR | O_CREAT | O_BINARY,
|
||||||
|
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||||
|
invariant(fd >= 0);
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
@ -281,9 +269,36 @@ test_serialize_nonleaf(void) {
|
|||||||
toku::comparator cmp;
|
toku::comparator cmp;
|
||||||
cmp.create(string_key_cmp, nullptr);
|
cmp.create(string_key_cmp, nullptr);
|
||||||
|
|
||||||
toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
|
toku_bnc_insert_msg(BNC(&sn, 0),
|
||||||
toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
|
"a",
|
||||||
toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
|
2,
|
||||||
|
"aval",
|
||||||
|
5,
|
||||||
|
FT_NONE,
|
||||||
|
next_dummymsn(),
|
||||||
|
xids_0,
|
||||||
|
true,
|
||||||
|
cmp);
|
||||||
|
toku_bnc_insert_msg(BNC(&sn, 0),
|
||||||
|
"b",
|
||||||
|
2,
|
||||||
|
"bval",
|
||||||
|
5,
|
||||||
|
FT_NONE,
|
||||||
|
next_dummymsn(),
|
||||||
|
xids_123,
|
||||||
|
false,
|
||||||
|
cmp);
|
||||||
|
toku_bnc_insert_msg(BNC(&sn, 1),
|
||||||
|
"x",
|
||||||
|
2,
|
||||||
|
"xval",
|
||||||
|
5,
|
||||||
|
FT_NONE,
|
||||||
|
next_dummymsn(),
|
||||||
|
xids_234,
|
||||||
|
true,
|
||||||
|
cmp);
|
||||||
|
|
||||||
// Cleanup:
|
// Cleanup:
|
||||||
toku_xids_destroy(&xids_0);
|
toku_xids_destroy(&xids_0);
|
||||||
@ -305,27 +320,33 @@ test_serialize_nonleaf(void) {
|
|||||||
ft->ft = ft_h;
|
ft->ft = ft_h;
|
||||||
|
|
||||||
ft_h->blocktable.create();
|
ft_h->blocktable.create();
|
||||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
{
|
||||||
|
int r_truncate = ftruncate(fd, 0);
|
||||||
|
CKERR(r_truncate);
|
||||||
|
}
|
||||||
// Want to use block #20
|
// Want to use block #20
|
||||||
BLOCKNUM b = make_blocknum(0);
|
BLOCKNUM b = make_blocknum(0);
|
||||||
while (b.b < 20) {
|
while (b.b < 20) {
|
||||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||||
}
|
}
|
||||||
assert(b.b == 20);
|
invariant(b.b == 20);
|
||||||
|
|
||||||
{
|
{
|
||||||
DISKOFF offset;
|
DISKOFF offset;
|
||||||
DISKOFF size;
|
DISKOFF size;
|
||||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
|
||||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
assert(size == 100);
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
invariant(size == 100);
|
||||||
}
|
}
|
||||||
FTNODE_DISK_DATA ndd = NULL;
|
FTNODE_DISK_DATA ndd = NULL;
|
||||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
r = toku_serialize_ftnode_to(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||||
|
invariant(r == 0);
|
||||||
|
|
||||||
test1(fd, ft_h, &dn);
|
test1(fd, ft_h, &dn);
|
||||||
test2(fd, ft_h, &dn);
|
test2(fd, ft_h, &dn);
|
||||||
@ -333,22 +354,26 @@ test_serialize_nonleaf(void) {
|
|||||||
toku_destroy_ftnode_internals(&sn);
|
toku_destroy_ftnode_internals(&sn);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
|
|
||||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
ft_h->blocktable.block_free(
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||||
ft_h->blocktable.destroy();
|
ft_h->blocktable.destroy();
|
||||||
toku_free(ft_h->h);
|
toku_free(ft_h->h);
|
||||||
ft_h->cmp.destroy();
|
ft_h->cmp.destroy();
|
||||||
toku_free(ft_h);
|
toku_free(ft_h);
|
||||||
toku_free(ft);
|
toku_free(ft);
|
||||||
|
|
||||||
r = close(fd); assert(r != -1);
|
r = close(fd);
|
||||||
|
invariant(r != -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void test_serialize_leaf(void) {
|
||||||
test_serialize_leaf(void) {
|
|
||||||
// struct ft_handle source_ft;
|
// struct ft_handle source_ft;
|
||||||
struct ftnode sn, *dn;
|
struct ftnode sn, *dn;
|
||||||
|
|
||||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
int fd = open(TOKU_TEST_FILENAME,
|
||||||
|
O_RDWR | O_CREAT | O_BINARY,
|
||||||
|
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||||
|
invariant(fd >= 0);
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
@ -385,44 +410,52 @@ test_serialize_leaf(void) {
|
|||||||
ft->ft = ft_h;
|
ft->ft = ft_h;
|
||||||
|
|
||||||
ft_h->blocktable.create();
|
ft_h->blocktable.create();
|
||||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
{
|
||||||
|
int r_truncate = ftruncate(fd, 0);
|
||||||
|
CKERR(r_truncate);
|
||||||
|
}
|
||||||
// Want to use block #20
|
// Want to use block #20
|
||||||
BLOCKNUM b = make_blocknum(0);
|
BLOCKNUM b = make_blocknum(0);
|
||||||
while (b.b < 20) {
|
while (b.b < 20) {
|
||||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||||
}
|
}
|
||||||
assert(b.b == 20);
|
invariant(b.b == 20);
|
||||||
|
|
||||||
{
|
{
|
||||||
DISKOFF offset;
|
DISKOFF offset;
|
||||||
DISKOFF size;
|
DISKOFF size;
|
||||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
|
||||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
assert(size == 100);
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
invariant(size == 100);
|
||||||
}
|
}
|
||||||
FTNODE_DISK_DATA ndd = NULL;
|
FTNODE_DISK_DATA ndd = NULL;
|
||||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
r = toku_serialize_ftnode_to(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||||
|
invariant(r == 0);
|
||||||
|
|
||||||
test1(fd, ft_h, &dn);
|
test1(fd, ft_h, &dn);
|
||||||
test3_leaf(fd, ft_h, &dn);
|
test3_leaf(fd, ft_h, &dn);
|
||||||
|
|
||||||
toku_destroy_ftnode_internals(&sn);
|
toku_destroy_ftnode_internals(&sn);
|
||||||
|
|
||||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
ft_h->blocktable.block_free(
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||||
ft_h->blocktable.destroy();
|
ft_h->blocktable.destroy();
|
||||||
toku_free(ft_h->h);
|
toku_free(ft_h->h);
|
||||||
toku_free(ft_h);
|
toku_free(ft_h);
|
||||||
toku_free(ft);
|
toku_free(ft);
|
||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
r = close(fd); assert(r != -1);
|
r = close(fd);
|
||||||
|
invariant(r != -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int test_main(int argc __attribute__((__unused__)),
|
||||||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
const char *argv[] __attribute__((__unused__))) {
|
||||||
initialize_dummymsn();
|
initialize_dummymsn();
|
||||||
test_serialize_nonleaf();
|
test_serialize_nonleaf();
|
||||||
test_serialize_leaf();
|
test_serialize_leaf();
|
||||||
|
@ -41,27 +41,21 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||||
#endif
|
#endif
|
||||||
const double USECS_PER_SEC = 1000000.0;
|
const double USECS_PER_SEC = 1000000.0;
|
||||||
|
|
||||||
static void
|
static void le_add_to_bn(bn_data *bn,
|
||||||
le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int vallen)
|
uint32_t idx,
|
||||||
{
|
char *key,
|
||||||
|
int keylen,
|
||||||
|
char *val,
|
||||||
|
int vallen) {
|
||||||
LEAFENTRY r = NULL;
|
LEAFENTRY r = NULL;
|
||||||
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
|
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
|
||||||
void *maybe_free = nullptr;
|
void *maybe_free = nullptr;
|
||||||
bn->get_space_for_insert(
|
bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
|
||||||
idx,
|
|
||||||
key,
|
|
||||||
keylen,
|
|
||||||
size_needed,
|
|
||||||
&r,
|
|
||||||
&maybe_free
|
|
||||||
);
|
|
||||||
if (maybe_free) {
|
if (maybe_free) {
|
||||||
toku_free(maybe_free);
|
toku_free(maybe_free);
|
||||||
}
|
}
|
||||||
@ -71,20 +65,24 @@ le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int va
|
|||||||
memcpy(r->u.clean.val, val, vallen);
|
memcpy(r->u.clean.val, val, vallen);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
|
||||||
long_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
|
|
||||||
{
|
|
||||||
const long *CAST_FROM_VOIDP(x, a->data);
|
const long *CAST_FROM_VOIDP(x, a->data);
|
||||||
const long *CAST_FROM_VOIDP(y, b->data);
|
const long *CAST_FROM_VOIDP(y, b->data);
|
||||||
return (*x > *y) - (*x < *y);
|
return (*x > *y) - (*x < *y);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void test_serialize_leaf(int valsize,
|
||||||
test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
|
int nelts,
|
||||||
|
double entropy,
|
||||||
|
int ser_runs,
|
||||||
|
int deser_runs) {
|
||||||
// struct ft_handle source_ft;
|
// struct ft_handle source_ft;
|
||||||
struct ftnode *sn, *dn;
|
struct ftnode *sn, *dn;
|
||||||
|
|
||||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
int fd = open(TOKU_TEST_FILENAME,
|
||||||
|
O_RDWR | O_CREAT | O_BINARY,
|
||||||
|
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||||
|
invariant(fd >= 0);
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
@ -119,17 +117,12 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||||||
}
|
}
|
||||||
memset(&buf[c], 0, valsize - c);
|
memset(&buf[c], 0, valsize - c);
|
||||||
le_add_to_bn(
|
le_add_to_bn(
|
||||||
BLB_DATA(sn,ck),
|
BLB_DATA(sn, ck), i, (char *)&k, sizeof k, buf, sizeof buf);
|
||||||
i,
|
|
||||||
(char *)&k,
|
|
||||||
sizeof k,
|
|
||||||
buf,
|
|
||||||
sizeof buf
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
if (ck < 7) {
|
if (ck < 7) {
|
||||||
DBT pivotkey;
|
DBT pivotkey;
|
||||||
sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
|
sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)),
|
||||||
|
ck);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -147,23 +140,28 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||||||
ft->ft = ft_h;
|
ft->ft = ft_h;
|
||||||
|
|
||||||
ft_h->blocktable.create();
|
ft_h->blocktable.create();
|
||||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
{
|
||||||
|
int r_truncate = ftruncate(fd, 0);
|
||||||
|
CKERR(r_truncate);
|
||||||
|
}
|
||||||
// Want to use block #20
|
// Want to use block #20
|
||||||
BLOCKNUM b = make_blocknum(0);
|
BLOCKNUM b = make_blocknum(0);
|
||||||
while (b.b < 20) {
|
while (b.b < 20) {
|
||||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||||
}
|
}
|
||||||
assert(b.b == 20);
|
invariant(b.b == 20);
|
||||||
|
|
||||||
{
|
{
|
||||||
DISKOFF offset;
|
DISKOFF offset;
|
||||||
DISKOFF size;
|
DISKOFF size;
|
||||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
|
||||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
assert(size == 100);
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
invariant(size == 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct timeval total_start;
|
struct timeval total_start;
|
||||||
@ -176,8 +174,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||||||
gettimeofday(&t[0], NULL);
|
gettimeofday(&t[0], NULL);
|
||||||
ndd = NULL;
|
ndd = NULL;
|
||||||
sn->dirty = 1;
|
sn->dirty = 1;
|
||||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
|
r = toku_serialize_ftnode_to(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
|
||||||
|
invariant(r == 0);
|
||||||
gettimeofday(&t[1], NULL);
|
gettimeofday(&t[1], NULL);
|
||||||
total_start.tv_sec += t[0].tv_sec;
|
total_start.tv_sec += t[0].tv_sec;
|
||||||
total_start.tv_usec += t[0].tv_usec;
|
total_start.tv_usec += t[0].tv_usec;
|
||||||
@ -186,10 +185,12 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
}
|
}
|
||||||
double dt;
|
double dt;
|
||||||
dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
|
dt = (total_end.tv_sec - total_start.tv_sec) +
|
||||||
|
((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
|
||||||
dt *= 1000;
|
dt *= 1000;
|
||||||
dt /= ser_runs;
|
dt /= ser_runs;
|
||||||
printf("serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
|
printf(
|
||||||
|
"serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
|
||||||
|
|
||||||
// reset
|
// reset
|
||||||
total_start.tv_sec = total_start.tv_usec = 0;
|
total_start.tv_sec = total_start.tv_usec = 0;
|
||||||
@ -200,8 +201,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||||||
bfe.create_for_full_read(ft_h);
|
bfe.create_for_full_read(ft_h);
|
||||||
gettimeofday(&t[0], NULL);
|
gettimeofday(&t[0], NULL);
|
||||||
FTNODE_DISK_DATA ndd2 = NULL;
|
FTNODE_DISK_DATA ndd2 = NULL;
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
|
||||||
|
invariant(r == 0);
|
||||||
gettimeofday(&t[1], NULL);
|
gettimeofday(&t[1], NULL);
|
||||||
|
|
||||||
total_start.tv_sec += t[0].tv_sec;
|
total_start.tv_sec += t[0].tv_sec;
|
||||||
@ -212,35 +214,46 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_free(ndd2);
|
toku_free(ndd2);
|
||||||
}
|
}
|
||||||
dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
|
dt = (total_end.tv_sec - total_start.tv_sec) +
|
||||||
|
((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
|
||||||
dt *= 1000;
|
dt *= 1000;
|
||||||
dt /= deser_runs;
|
dt /= deser_runs;
|
||||||
printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
|
printf(
|
||||||
printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n",
|
"deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
|
||||||
|
printf(
|
||||||
|
"io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
|
||||||
|
"(average of %d runs)\n",
|
||||||
tokutime_to_seconds(bfe.io_time) * 1000,
|
tokutime_to_seconds(bfe.io_time) * 1000,
|
||||||
tokutime_to_seconds(bfe.decompress_time) * 1000,
|
tokutime_to_seconds(bfe.decompress_time) * 1000,
|
||||||
tokutime_to_seconds(bfe.deserialize_time) * 1000,
|
tokutime_to_seconds(bfe.deserialize_time) * 1000,
|
||||||
deser_runs
|
deser_runs);
|
||||||
);
|
|
||||||
|
|
||||||
toku_ftnode_free(&sn);
|
toku_ftnode_free(&sn);
|
||||||
|
|
||||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
ft_h->blocktable.block_free(
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||||
ft_h->blocktable.destroy();
|
ft_h->blocktable.destroy();
|
||||||
ft_h->cmp.destroy();
|
ft_h->cmp.destroy();
|
||||||
toku_free(ft_h->h);
|
toku_free(ft_h->h);
|
||||||
toku_free(ft_h);
|
toku_free(ft_h);
|
||||||
toku_free(ft);
|
toku_free(ft);
|
||||||
|
|
||||||
r = close(fd); assert(r != -1);
|
r = close(fd);
|
||||||
|
invariant(r != -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void test_serialize_nonleaf(int valsize,
|
||||||
test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
|
int nelts,
|
||||||
|
double entropy,
|
||||||
|
int ser_runs,
|
||||||
|
int deser_runs) {
|
||||||
// struct ft_handle source_ft;
|
// struct ft_handle source_ft;
|
||||||
struct ftnode sn, *dn;
|
struct ftnode sn, *dn;
|
||||||
|
|
||||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
int fd = open(TOKU_TEST_FILENAME,
|
||||||
|
O_RDWR | O_CREAT | O_BINARY,
|
||||||
|
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||||
|
invariant(fd >= 0);
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
@ -283,7 +296,16 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
|
|||||||
}
|
}
|
||||||
memset(&buf[c], 0, valsize - c);
|
memset(&buf[c], 0, valsize - c);
|
||||||
|
|
||||||
toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp);
|
toku_bnc_insert_msg(bnc,
|
||||||
|
&k,
|
||||||
|
sizeof k,
|
||||||
|
buf,
|
||||||
|
valsize,
|
||||||
|
FT_NONE,
|
||||||
|
next_dummymsn(),
|
||||||
|
xids_123,
|
||||||
|
true,
|
||||||
|
cmp);
|
||||||
}
|
}
|
||||||
if (ck < 7) {
|
if (ck < 7) {
|
||||||
DBT pivotkey;
|
DBT pivotkey;
|
||||||
@ -310,57 +332,70 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
|
|||||||
ft->ft = ft_h;
|
ft->ft = ft_h;
|
||||||
|
|
||||||
ft_h->blocktable.create();
|
ft_h->blocktable.create();
|
||||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
{
|
||||||
|
int r_truncate = ftruncate(fd, 0);
|
||||||
|
CKERR(r_truncate);
|
||||||
|
}
|
||||||
// Want to use block #20
|
// Want to use block #20
|
||||||
BLOCKNUM b = make_blocknum(0);
|
BLOCKNUM b = make_blocknum(0);
|
||||||
while (b.b < 20) {
|
while (b.b < 20) {
|
||||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||||
}
|
}
|
||||||
assert(b.b == 20);
|
invariant(b.b == 20);
|
||||||
|
|
||||||
{
|
{
|
||||||
DISKOFF offset;
|
DISKOFF offset;
|
||||||
DISKOFF size;
|
DISKOFF size;
|
||||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
|
||||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
invariant(offset ==
|
||||||
assert(size == 100);
|
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||||
|
invariant(size == 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct timeval t[2];
|
struct timeval t[2];
|
||||||
gettimeofday(&t[0], NULL);
|
gettimeofday(&t[0], NULL);
|
||||||
FTNODE_DISK_DATA ndd = NULL;
|
FTNODE_DISK_DATA ndd = NULL;
|
||||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
r = toku_serialize_ftnode_to(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||||
|
invariant(r == 0);
|
||||||
gettimeofday(&t[1], NULL);
|
gettimeofday(&t[1], NULL);
|
||||||
double dt;
|
double dt;
|
||||||
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
|
dt = (t[1].tv_sec - t[0].tv_sec) +
|
||||||
|
((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
|
||||||
dt *= 1000;
|
dt *= 1000;
|
||||||
printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
|
printf(
|
||||||
|
"serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
|
||||||
|
|
||||||
ftnode_fetch_extra bfe;
|
ftnode_fetch_extra bfe;
|
||||||
bfe.create_for_full_read(ft_h);
|
bfe.create_for_full_read(ft_h);
|
||||||
gettimeofday(&t[0], NULL);
|
gettimeofday(&t[0], NULL);
|
||||||
FTNODE_DISK_DATA ndd2 = NULL;
|
FTNODE_DISK_DATA ndd2 = NULL;
|
||||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
|
r = toku_deserialize_ftnode_from(
|
||||||
assert(r==0);
|
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
|
||||||
|
invariant(r == 0);
|
||||||
gettimeofday(&t[1], NULL);
|
gettimeofday(&t[1], NULL);
|
||||||
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
|
dt = (t[1].tv_sec - t[0].tv_sec) +
|
||||||
|
((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
|
||||||
dt *= 1000;
|
dt *= 1000;
|
||||||
printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
|
printf(
|
||||||
printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n",
|
"deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
|
||||||
|
printf(
|
||||||
|
"io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
|
||||||
|
"(IGNORED RUNS=%d)\n",
|
||||||
tokutime_to_seconds(bfe.io_time) * 1000,
|
tokutime_to_seconds(bfe.io_time) * 1000,
|
||||||
tokutime_to_seconds(bfe.decompress_time) * 1000,
|
tokutime_to_seconds(bfe.decompress_time) * 1000,
|
||||||
tokutime_to_seconds(bfe.deserialize_time) * 1000,
|
tokutime_to_seconds(bfe.deserialize_time) * 1000,
|
||||||
deser_runs
|
deser_runs);
|
||||||
);
|
|
||||||
|
|
||||||
toku_ftnode_free(&dn);
|
toku_ftnode_free(&dn);
|
||||||
toku_destroy_ftnode_internals(&sn);
|
toku_destroy_ftnode_internals(&sn);
|
||||||
|
|
||||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
ft_h->blocktable.block_free(
|
||||||
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||||
ft_h->blocktable.destroy();
|
ft_h->blocktable.destroy();
|
||||||
toku_free(ft_h->h);
|
toku_free(ft_h->h);
|
||||||
ft_h->cmp.destroy();
|
ft_h->cmp.destroy();
|
||||||
@ -369,17 +404,21 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
|
|||||||
toku_free(ndd);
|
toku_free(ndd);
|
||||||
toku_free(ndd2);
|
toku_free(ndd2);
|
||||||
|
|
||||||
r = close(fd); assert(r != -1);
|
r = close(fd);
|
||||||
|
invariant(r != -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int test_main(int argc __attribute__((__unused__)),
|
||||||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
const char *argv[] __attribute__((__unused__))) {
|
||||||
const int DEFAULT_RUNS = 5;
|
const int DEFAULT_RUNS = 5;
|
||||||
long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS;
|
long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS;
|
||||||
double entropy = 0.3;
|
double entropy = 0.3;
|
||||||
|
|
||||||
if (argc != 3 && argc != 5) {
|
if (argc != 3 && argc != 5) {
|
||||||
fprintf(stderr, "Usage: %s <valsize> <nelts> [<serialize_runs> <deserialize_runs>]\n", argv[0]);
|
fprintf(stderr,
|
||||||
|
"Usage: %s <valsize> <nelts> [<serialize_runs> "
|
||||||
|
"<deserialize_runs>]\n",
|
||||||
|
argv[0]);
|
||||||
fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS);
|
fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -164,18 +164,17 @@ static void test_read_what_was_written (void) {
|
|||||||
int r;
|
int r;
|
||||||
const int NVALS=10000;
|
const int NVALS=10000;
|
||||||
|
|
||||||
if (verbose) printf("test_read_what_was_written(): "); fflush(stdout);
|
if (verbose) {
|
||||||
|
printf("test_read_what_was_written(): "); fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
unlink(fname);
|
unlink(fname);
|
||||||
|
|
||||||
|
|
||||||
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
|
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
|
||||||
r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
|
r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
|
||||||
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
|
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
|
||||||
toku_cachetable_close(&ct);
|
toku_cachetable_close(&ct);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Now see if we can read an empty tree in. */
|
/* Now see if we can read an empty tree in. */
|
||||||
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
|
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
|
||||||
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
|
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
|
||||||
@ -189,8 +188,6 @@ static void test_read_what_was_written (void) {
|
|||||||
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
|
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
|
||||||
toku_cachetable_close(&ct);
|
toku_cachetable_close(&ct);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Now see if we can read it in and get the value. */
|
/* Now see if we can read it in and get the value. */
|
||||||
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
|
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
|
||||||
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
|
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
|
||||||
|
@ -109,7 +109,9 @@ static int run_test(void)
|
|||||||
r = pqueue_pop(pq, &node); assert(r==0);
|
r = pqueue_pop(pq, &node); assert(r==0);
|
||||||
if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data));
|
if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data));
|
||||||
if ( *(int*)(node->key->data) != i ) {
|
if ( *(int*)(node->key->data) != i ) {
|
||||||
if (verbose) printf("FAIL\n"); return -1;
|
if (verbose)
|
||||||
|
printf("FAIL\n");
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pqueue_free(pq);
|
pqueue_free(pq);
|
||||||
|
@ -793,7 +793,7 @@ static void test_le_garbage_collection_birdie(void) {
|
|||||||
do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200);
|
do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200);
|
||||||
invariant(do_garbage_collect);
|
invariant(do_garbage_collect);
|
||||||
|
|
||||||
// It is definately worth doing when the above case is true
|
// It is definitely worth doing when the above case is true
|
||||||
// and there is more than one provisional entry.
|
// and there is more than one provisional entry.
|
||||||
ule.num_cuxrs = 1;
|
ule.num_cuxrs = 1;
|
||||||
ule.num_puxrs = 2;
|
ule.num_puxrs = 2;
|
||||||
|
@ -72,7 +72,7 @@ static void dummy_update_status(FTNODE UU(child), int UU(dirtied), void* UU(extr
|
|||||||
|
|
||||||
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
|
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
|
||||||
|
|
||||||
static void test_oldest_referenced_xid_gets_propogated(void) {
|
static void test_oldest_referenced_xid_gets_propagated(void) {
|
||||||
int r;
|
int r;
|
||||||
CACHETABLE ct;
|
CACHETABLE ct;
|
||||||
FT_HANDLE t;
|
FT_HANDLE t;
|
||||||
@ -166,7 +166,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
|
|||||||
toku_ft_flush_some_child(t->ft, node, &fa);
|
toku_ft_flush_some_child(t->ft, node, &fa);
|
||||||
|
|
||||||
// pin the child, verify that oldest referenced xid was
|
// pin the child, verify that oldest referenced xid was
|
||||||
// propogated from parent to child during the flush
|
// propagated from parent to child during the flush
|
||||||
toku_pin_ftnode(
|
toku_pin_ftnode(
|
||||||
t->ft,
|
t->ft,
|
||||||
child_nonleaf_blocknum,
|
child_nonleaf_blocknum,
|
||||||
@ -185,6 +185,6 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
|
|||||||
|
|
||||||
int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
||||||
default_parse_args(argc, argv);
|
default_parse_args(argc, argv);
|
||||||
test_oldest_referenced_xid_gets_propogated();
|
test_oldest_referenced_xid_gets_propagated();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -36,30 +36,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
|
|
||||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
#pragma once
|
#include "ft/serialize/rbtree_mhs.h"
|
||||||
|
#include "test.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
|
#include <ctime>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
#include <db.h>
|
static void test_insert_remove(void) {
|
||||||
|
uint64_t i;
|
||||||
|
MhsRbTree::Tree *tree = new MhsRbTree::Tree();
|
||||||
|
verbose = 0;
|
||||||
|
|
||||||
#include "ft/serialize/block_allocator.h"
|
tree->Insert({0, 100});
|
||||||
|
|
||||||
// Block allocation strategy implementations
|
for (i = 0; i < 10; i++) {
|
||||||
|
tree->Remove(3);
|
||||||
|
tree->Remove(2);
|
||||||
|
}
|
||||||
|
tree->ValidateBalance();
|
||||||
|
tree->ValidateMhs();
|
||||||
|
|
||||||
class block_allocator_strategy {
|
for (i = 0; i < 10; i++) {
|
||||||
public:
|
tree->Insert({5 * i, 3});
|
||||||
static struct block_allocator::blockpair *
|
}
|
||||||
first_fit(struct block_allocator::blockpair *blocks_array,
|
tree->ValidateBalance();
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment);
|
tree->ValidateMhs();
|
||||||
|
|
||||||
static struct block_allocator::blockpair *
|
uint64_t offset = tree->Remove(2);
|
||||||
best_fit(struct block_allocator::blockpair *blocks_array,
|
invariant(offset == 0);
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment);
|
offset = tree->Remove(10);
|
||||||
|
invariant(offset == 50);
|
||||||
|
offset = tree->Remove(3);
|
||||||
|
invariant(offset == 5);
|
||||||
|
tree->ValidateBalance();
|
||||||
|
tree->ValidateMhs();
|
||||||
|
|
||||||
static struct block_allocator::blockpair *
|
tree->Insert({48, 2});
|
||||||
padded_fit(struct block_allocator::blockpair *blocks_array,
|
tree->Insert({50, 10});
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment);
|
|
||||||
|
|
||||||
static struct block_allocator::blockpair *
|
tree->ValidateBalance();
|
||||||
heat_zone(struct block_allocator::blockpair *blocks_array,
|
tree->ValidateMhs();
|
||||||
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
|
||||||
uint64_t heat);
|
tree->Insert({3, 7});
|
||||||
};
|
offset = tree->Remove(10);
|
||||||
|
invariant(offset == 2);
|
||||||
|
tree->ValidateBalance();
|
||||||
|
tree->ValidateMhs();
|
||||||
|
tree->Dump();
|
||||||
|
delete tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_main(int argc, const char *argv[]) {
|
||||||
|
default_parse_args(argc, argv);
|
||||||
|
|
||||||
|
test_insert_remove();
|
||||||
|
if (verbose)
|
||||||
|
printf("test ok\n");
|
||||||
|
return 0;
|
||||||
|
}
|
@ -0,0 +1,102 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*======
|
||||||
|
This file is part of PerconaFT.
|
||||||
|
|
||||||
|
|
||||||
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License, version 2,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
PerconaFT is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License, version 3,
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
PerconaFT is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
======= */
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||||
|
|
||||||
|
#include "ft/serialize/rbtree_mhs.h"
|
||||||
|
#include "test.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
|
#include <ctime>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#define N 1000000
|
||||||
|
std::vector<MhsRbTree::Node::BlockPair> input_vector;
|
||||||
|
MhsRbTree::Node::BlockPair old_vector[N];
|
||||||
|
|
||||||
|
static int myrandom(int i) { return std::rand() % i; }
|
||||||
|
|
||||||
|
static void generate_random_input() {
|
||||||
|
std::srand(unsigned(std::time(0)));
|
||||||
|
|
||||||
|
// set some values:
|
||||||
|
for (uint64_t i = 1; i < N; ++i) {
|
||||||
|
input_vector.push_back({i, 0});
|
||||||
|
old_vector[i] = {i, 0};
|
||||||
|
}
|
||||||
|
// using built-in random generator:
|
||||||
|
std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_insert_remove(void) {
|
||||||
|
int i;
|
||||||
|
MhsRbTree::Tree *tree = new MhsRbTree::Tree();
|
||||||
|
verbose = 0;
|
||||||
|
generate_random_input();
|
||||||
|
if (verbose) {
|
||||||
|
printf("\n we are going to insert the following block offsets\n");
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
printf("%" PRIu64 "\t", input_vector[i]._offset.ToInt());
|
||||||
|
}
|
||||||
|
for (i = 0; i < N; i++) {
|
||||||
|
tree->Insert(input_vector[i]);
|
||||||
|
// tree->ValidateBalance();
|
||||||
|
}
|
||||||
|
tree->ValidateBalance();
|
||||||
|
MhsRbTree::Node::BlockPair *p_bps = &old_vector[0];
|
||||||
|
tree->ValidateInOrder(p_bps);
|
||||||
|
printf("min node of the tree:%" PRIu64 "\n",
|
||||||
|
rbn_offset(tree->MinNode()).ToInt());
|
||||||
|
printf("max node of the tree:%" PRIu64 "\n",
|
||||||
|
rbn_offset(tree->MaxNode()).ToInt());
|
||||||
|
|
||||||
|
for (i = 0; i < N; i++) {
|
||||||
|
// tree->ValidateBalance();
|
||||||
|
tree->RawRemove(input_vector[i]._offset.ToInt());
|
||||||
|
}
|
||||||
|
|
||||||
|
tree->Destroy();
|
||||||
|
delete tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_main(int argc, const char *argv[]) {
|
||||||
|
default_parse_args(argc, argv);
|
||||||
|
|
||||||
|
test_insert_remove();
|
||||||
|
if (verbose)
|
||||||
|
printf("test ok\n");
|
||||||
|
return 0;
|
||||||
|
}
|
@ -49,7 +49,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||||||
// functionality provided by roll.c is exposed by an autogenerated
|
// functionality provided by roll.c is exposed by an autogenerated
|
||||||
// header file, logheader.h
|
// header file, logheader.h
|
||||||
//
|
//
|
||||||
// this (poorly) explains the absense of "roll.h"
|
// this (poorly) explains the absence of "roll.h"
|
||||||
|
|
||||||
// these flags control whether or not we send commit messages for
|
// these flags control whether or not we send commit messages for
|
||||||
// various operations
|
// various operations
|
||||||
|
@ -169,7 +169,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
|
|||||||
txn->roll_info.spilled_rollback_head = ROLLBACK_NONE;
|
txn->roll_info.spilled_rollback_head = ROLLBACK_NONE;
|
||||||
txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE;
|
txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE;
|
||||||
}
|
}
|
||||||
// if we're commiting a child rollback, put its entries into the parent
|
// if we're committing a child rollback, put its entries into the parent
|
||||||
// by pinning both child and parent and then linking the child log entry
|
// by pinning both child and parent and then linking the child log entry
|
||||||
// list to the end of the parent log entry list.
|
// list to the end of the parent log entry list.
|
||||||
if (txn_has_current_rollback_log(txn)) {
|
if (txn_has_current_rollback_log(txn)) {
|
||||||
|
@ -59,21 +59,18 @@ rollback_log_destroy(ROLLBACK_LOG_NODE log) {
|
|||||||
|
|
||||||
// flush an ununused log to disk, by allocating a size 0 blocknum in
|
// flush an ununused log to disk, by allocating a size 0 blocknum in
|
||||||
// the blocktable
|
// the blocktable
|
||||||
static void
|
static void toku_rollback_flush_unused_log(ROLLBACK_LOG_NODE log,
|
||||||
toku_rollback_flush_unused_log(
|
|
||||||
ROLLBACK_LOG_NODE log,
|
|
||||||
BLOCKNUM logname,
|
BLOCKNUM logname,
|
||||||
int fd,
|
int fd,
|
||||||
FT ft,
|
FT ft,
|
||||||
bool write_me,
|
bool write_me,
|
||||||
bool keep_me,
|
bool keep_me,
|
||||||
bool for_checkpoint,
|
bool for_checkpoint,
|
||||||
bool is_clone
|
bool is_clone) {
|
||||||
)
|
|
||||||
{
|
|
||||||
if (write_me) {
|
if (write_me) {
|
||||||
DISKOFF offset;
|
DISKOFF offset;
|
||||||
ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX);
|
ft->blocktable.realloc_on_disk(
|
||||||
|
logname, 0, &offset, ft, fd, for_checkpoint);
|
||||||
}
|
}
|
||||||
if (!keep_me && !is_clone) {
|
if (!keep_me && !is_clone) {
|
||||||
toku_free(log);
|
toku_free(log);
|
||||||
|
@ -587,8 +587,8 @@ bool toku_le_worth_running_garbage_collection(
|
|||||||
// by new txns.
|
// by new txns.
|
||||||
// 2.) There is only one committed entry, but the outermost
|
// 2.) There is only one committed entry, but the outermost
|
||||||
// provisional entry is older than the oldest known referenced
|
// provisional entry is older than the oldest known referenced
|
||||||
// xid, so it must have commited. Therefor we can promote it to
|
// xid, so it must have committed. Therefor we can promote it to
|
||||||
// committed and get rid of the old commited entry.
|
// committed and get rid of the old committed entry.
|
||||||
if (le->type != LE_MVCC) {
|
if (le->type != LE_MVCC) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user