Better comments part 2 with proof and simplified implementation.
Thanks to Daniel Black.
This commit is contained in:
parent
43054872be
commit
e15a83c0c7
@ -3844,7 +3844,7 @@ btr_estimate_number_of_different_key_vals(
|
|||||||
ib_uint64_t* n_diff;
|
ib_uint64_t* n_diff;
|
||||||
ib_uint64_t* n_not_null;
|
ib_uint64_t* n_not_null;
|
||||||
ibool stats_null_not_equal;
|
ibool stats_null_not_equal;
|
||||||
ullint n_sample_pages; /* number of pages to sample */
|
ullint n_sample_pages = 1; /* number of pages to sample */
|
||||||
ulint not_empty_flag = 0;
|
ulint not_empty_flag = 0;
|
||||||
ulint total_external_size = 0;
|
ulint total_external_size = 0;
|
||||||
ulint i;
|
ulint i;
|
||||||
@ -3897,8 +3897,6 @@ btr_estimate_number_of_different_key_vals(
|
|||||||
if (srv_stats_transient_sample_pages > index->stat_index_size) {
|
if (srv_stats_transient_sample_pages > index->stat_index_size) {
|
||||||
if (index->stat_index_size > 0) {
|
if (index->stat_index_size > 0) {
|
||||||
n_sample_pages = index->stat_index_size;
|
n_sample_pages = index->stat_index_size;
|
||||||
} else {
|
|
||||||
n_sample_pages = 1;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
n_sample_pages = srv_stats_transient_sample_pages;
|
n_sample_pages = srv_stats_transient_sample_pages;
|
||||||
@ -3906,17 +3904,44 @@ btr_estimate_number_of_different_key_vals(
|
|||||||
} else {
|
} else {
|
||||||
/* New logaritmic number of pages that are estimated.
|
/* New logaritmic number of pages that are estimated.
|
||||||
Number of pages estimated should be between 1 and
|
Number of pages estimated should be between 1 and
|
||||||
index->stat_index_size. We pick index->stat_index_size
|
index->stat_index_size.
|
||||||
as maximum and log2(index->stat_index_size)*sr_stats_transient_sample_pages
|
|
||||||
if between range as minimum.*/
|
If we have only 0 or 1 index pages then we can only take 1
|
||||||
if (index->stat_index_size > 0) {
|
sample. We have already initialized n_sample_pages to 1.
|
||||||
n_sample_pages = ut_min(index->stat_index_size,
|
|
||||||
ut_max(ut_min(srv_stats_transient_sample_pages,
|
So taking index size as I and sample as S and log(I)*S as L
|
||||||
index->stat_index_size),
|
|
||||||
log2(index->stat_index_size)
|
requirement 1) we want the out limit of the expression to not exceed I;
|
||||||
*srv_stats_transient_sample_pages));
|
requirement 2) we want the ideal pages to be at least S;
|
||||||
} else {
|
so the current expression is min(I, max( min(S,I), L)
|
||||||
n_sample_pages = 1;
|
|
||||||
|
looking for simplifications:
|
||||||
|
|
||||||
|
case 1: assume S < I
|
||||||
|
min(I, max( min(S,I), L) -> min(I , max( S, L))
|
||||||
|
|
||||||
|
but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L.
|
||||||
|
|
||||||
|
so we have: min(I , L)
|
||||||
|
|
||||||
|
case 2: assume I < S
|
||||||
|
min(I, max( min(S,I), L) -> min(I, max( I, L))
|
||||||
|
|
||||||
|
case 2a: L > I
|
||||||
|
min(I, max( I, L)) -> min(I, L) -> I
|
||||||
|
|
||||||
|
case 2b: when L < I
|
||||||
|
min(I, max( I, L)) -> min(I, I ) -> I
|
||||||
|
|
||||||
|
so taking all case2 paths is I, our expression is:
|
||||||
|
n_pages = S < I? min(I,L) : I
|
||||||
|
*/
|
||||||
|
if (index->stat_index_size > 1) {
|
||||||
|
n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ?
|
||||||
|
ut_min(index->stat_index_size,
|
||||||
|
log2(index->stat_index_size)*srv_stats_transient_sample_pages)
|
||||||
|
: index->stat_index_size;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4035,7 +4035,7 @@ btr_estimate_number_of_different_key_vals(
|
|||||||
ib_uint64_t* n_diff;
|
ib_uint64_t* n_diff;
|
||||||
ib_uint64_t* n_not_null;
|
ib_uint64_t* n_not_null;
|
||||||
ibool stats_null_not_equal;
|
ibool stats_null_not_equal;
|
||||||
ullint n_sample_pages; /* number of pages to sample */
|
ullint n_sample_pages=1; /* number of pages to sample */
|
||||||
ulint not_empty_flag = 0;
|
ulint not_empty_flag = 0;
|
||||||
ulint total_external_size = 0;
|
ulint total_external_size = 0;
|
||||||
ulint i;
|
ulint i;
|
||||||
@ -4088,8 +4088,6 @@ btr_estimate_number_of_different_key_vals(
|
|||||||
if (srv_stats_transient_sample_pages > index->stat_index_size) {
|
if (srv_stats_transient_sample_pages > index->stat_index_size) {
|
||||||
if (index->stat_index_size > 0) {
|
if (index->stat_index_size > 0) {
|
||||||
n_sample_pages = index->stat_index_size;
|
n_sample_pages = index->stat_index_size;
|
||||||
} else {
|
|
||||||
n_sample_pages = 1;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
n_sample_pages = srv_stats_transient_sample_pages;
|
n_sample_pages = srv_stats_transient_sample_pages;
|
||||||
@ -4097,18 +4095,44 @@ btr_estimate_number_of_different_key_vals(
|
|||||||
} else {
|
} else {
|
||||||
/* New logaritmic number of pages that are estimated.
|
/* New logaritmic number of pages that are estimated.
|
||||||
Number of pages estimated should be between 1 and
|
Number of pages estimated should be between 1 and
|
||||||
index->stat_index_size. We pick index->stat_index_size
|
index->stat_index_size.
|
||||||
as maximum and log2(index->stat_index_size)*sr_stats_transient_sample_pages
|
|
||||||
if between range as minimum.*/
|
If we have only 0 or 1 index pages then we can only take 1
|
||||||
|
sample. We have already initialized n_sample_pages to 1.
|
||||||
|
|
||||||
|
So taking index size as I and sample as S and log(I)*S as L
|
||||||
|
|
||||||
|
requirement 1) we want the out limit of the expression to not exceed I;
|
||||||
|
requirement 2) we want the ideal pages to be at least S;
|
||||||
|
so the current expression is min(I, max( min(S,I), L)
|
||||||
|
|
||||||
|
looking for simplifications:
|
||||||
|
|
||||||
|
case 1: assume S < I
|
||||||
|
min(I, max( min(S,I), L) -> min(I , max( S, L))
|
||||||
|
|
||||||
|
but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L.
|
||||||
|
|
||||||
|
so we have: min(I , L)
|
||||||
|
|
||||||
|
case 2: assume I < S
|
||||||
|
min(I, max( min(S,I), L) -> min(I, max( I, L))
|
||||||
|
|
||||||
|
case 2a: L > I
|
||||||
|
min(I, max( I, L)) -> min(I, L) -> I
|
||||||
|
|
||||||
|
case 2b: when L < I
|
||||||
|
min(I, max( I, L)) -> min(I, I ) -> I
|
||||||
|
|
||||||
|
so taking all case2 paths is I, our expression is:
|
||||||
|
n_pages = S < I? min(I,L) : I
|
||||||
|
*/
|
||||||
|
if (index->stat_index_size > 1) {
|
||||||
|
n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ?
|
||||||
|
ut_min(index->stat_index_size,
|
||||||
|
log2(index->stat_index_size)*srv_stats_transient_sample_pages)
|
||||||
|
: index->stat_index_size;
|
||||||
|
|
||||||
if (index->stat_index_size > 0) {
|
|
||||||
n_sample_pages = ut_min(index->stat_index_size,
|
|
||||||
ut_max(ut_min(srv_stats_transient_sample_pages,
|
|
||||||
index->stat_index_size),
|
|
||||||
log2(index->stat_index_size)
|
|
||||||
*srv_stats_transient_sample_pages));
|
|
||||||
} else {
|
|
||||||
n_sample_pages = 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user