From e15a83c0c7853a44cad031a3558607cb421461d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Wed, 26 Nov 2014 16:41:28 +0200 Subject: [PATCH] Better comments part 2 with proof and simplified implementation. Thanks to Daniel Black. --- storage/innobase/btr/btr0cur.cc | 53 ++++++++++++++++++++++++--------- storage/xtradb/btr/btr0cur.cc | 52 +++++++++++++++++++++++--------- 2 files changed, 77 insertions(+), 28 deletions(-) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index ea97468a8f1..acc3a4d1c98 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3844,7 +3844,7 @@ btr_estimate_number_of_different_key_vals( ib_uint64_t* n_diff; ib_uint64_t* n_not_null; ibool stats_null_not_equal; - ullint n_sample_pages; /* number of pages to sample */ + ullint n_sample_pages = 1; /* number of pages to sample */ ulint not_empty_flag = 0; ulint total_external_size = 0; ulint i; @@ -3897,8 +3897,6 @@ btr_estimate_number_of_different_key_vals( if (srv_stats_transient_sample_pages > index->stat_index_size) { if (index->stat_index_size > 0) { n_sample_pages = index->stat_index_size; - } else { - n_sample_pages = 1; } } else { n_sample_pages = srv_stats_transient_sample_pages; @@ -3906,17 +3904,44 @@ btr_estimate_number_of_different_key_vals( } else { /* New logaritmic number of pages that are estimated. Number of pages estimated should be between 1 and - index->stat_index_size. We pick index->stat_index_size - as maximum and log2(index->stat_index_size)*sr_stats_transient_sample_pages - if between range as minimum.*/ - if (index->stat_index_size > 0) { - n_sample_pages = ut_min(index->stat_index_size, - ut_max(ut_min(srv_stats_transient_sample_pages, - index->stat_index_size), - log2(index->stat_index_size) - *srv_stats_transient_sample_pages)); - } else { - n_sample_pages = 1; + index->stat_index_size. + + If we have only 0 or 1 index pages then we can only take 1 + sample. We have already initialized n_sample_pages to 1. + + So taking index size as I and sample as S and log(I)*S as L + + requirement 1) we want the out limit of the expression to not exceed I; + requirement 2) we want the ideal pages to be at least S; + so the current expression is min(I, max( min(S,I), L) + + looking for simplifications: + + case 1: assume S < I + min(I, max( min(S,I), L) -> min(I , max( S, L)) + + but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L. + + so we have: min(I , L) + + case 2: assume I < S + min(I, max( min(S,I), L) -> min(I, max( I, L)) + + case 2a: L > I + min(I, max( I, L)) -> min(I, L) -> I + + case 2b: when L < I + min(I, max( I, L)) -> min(I, I ) -> I + + so taking all case2 paths is I, our expression is: + n_pages = S < I? min(I,L) : I + */ + if (index->stat_index_size > 1) { + n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ? + ut_min(index->stat_index_size, + log2(index->stat_index_size)*srv_stats_transient_sample_pages) + : index->stat_index_size; + } } diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc index 2bc1a7de13c..b13b196605e 100644 --- a/storage/xtradb/btr/btr0cur.cc +++ b/storage/xtradb/btr/btr0cur.cc @@ -4035,7 +4035,7 @@ btr_estimate_number_of_different_key_vals( ib_uint64_t* n_diff; ib_uint64_t* n_not_null; ibool stats_null_not_equal; - ullint n_sample_pages; /* number of pages to sample */ + ullint n_sample_pages=1; /* number of pages to sample */ ulint not_empty_flag = 0; ulint total_external_size = 0; ulint i; @@ -4088,8 +4088,6 @@ btr_estimate_number_of_different_key_vals( if (srv_stats_transient_sample_pages > index->stat_index_size) { if (index->stat_index_size > 0) { n_sample_pages = index->stat_index_size; - } else { - n_sample_pages = 1; } } else { n_sample_pages = srv_stats_transient_sample_pages; @@ -4097,18 +4095,44 @@ btr_estimate_number_of_different_key_vals( } else { /* New logaritmic number of pages that are estimated. Number of pages estimated should be between 1 and - index->stat_index_size. We pick index->stat_index_size - as maximum and log2(index->stat_index_size)*sr_stats_transient_sample_pages - if between range as minimum.*/ + index->stat_index_size. + + If we have only 0 or 1 index pages then we can only take 1 + sample. We have already initialized n_sample_pages to 1. + + So taking index size as I and sample as S and log(I)*S as L + + requirement 1) we want the out limit of the expression to not exceed I; + requirement 2) we want the ideal pages to be at least S; + so the current expression is min(I, max( min(S,I), L) + + looking for simplifications: + + case 1: assume S < I + min(I, max( min(S,I), L) -> min(I , max( S, L)) + + but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L. + + so we have: min(I , L) + + case 2: assume I < S + min(I, max( min(S,I), L) -> min(I, max( I, L)) + + case 2a: L > I + min(I, max( I, L)) -> min(I, L) -> I + + case 2b: when L < I + min(I, max( I, L)) -> min(I, I ) -> I + + so taking all case2 paths is I, our expression is: + n_pages = S < I? min(I,L) : I + */ + if (index->stat_index_size > 1) { + n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ? + ut_min(index->stat_index_size, + log2(index->stat_index_size)*srv_stats_transient_sample_pages) + : index->stat_index_size; - if (index->stat_index_size > 0) { - n_sample_pages = ut_min(index->stat_index_size, - ut_max(ut_min(srv_stats_transient_sample_pages, - index->stat_index_size), - log2(index->stat_index_size) - *srv_stats_transient_sample_pages)); - } else { - n_sample_pages = 1; } }