From d5580feab72e3b16f736948f67531297d0e36ac1 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Mon, 2 Jun 2025 12:26:25 +0000 Subject: [PATCH] New getter to read Engine Independent JSON histogram buckets directly This would allow Columnstore to leverage EI data in its cost-based and rewrite optimizer parts --- sql/opt_histogram_json.h | 21 ++++++--------------- sql/sql_statistics.h | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/sql/opt_histogram_json.h b/sql/opt_histogram_json.h index 248467928fd..2c9ed9479ce 100644 --- a/sql/opt_histogram_json.h +++ b/sql/opt_histogram_json.h @@ -71,21 +71,7 @@ class Histogram_json_hb final : public Histogram_base /* Collection-time only: collected histogram in the JSON form. */ std::string json_text; - struct Bucket - { - // The left endpoint in KeyTupleFormat. The endpoint is inclusive, this - // value is in this bucket. - std::string start_value; - - // Cumulative fraction: The fraction of table rows that fall into this - // and preceding buckets. - double cum_fract; - - // Number of distinct values in the bucket. - longlong ndv; - }; - - std::vector buckets; + std::vector buckets; std::string last_bucket_end_endp; @@ -129,6 +115,11 @@ public: double range_selectivity(Field *field, key_range *min_endp, key_range *max_endp, double avg_sel) override; + std::vector get_histogram() override + { + return buckets; + } + void set_json_text(ulonglong sz, const char *json_text_arg, size_t json_text_len) { diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 0a890a7d2e2..ef57f272d64 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -152,6 +152,19 @@ bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table); bool is_eits_usable(Field* field); class Histogram_builder; +struct Histogram_bucket +{ + // The left endpoint in KeyTupleFormat. The endpoint is inclusive, this + // value is in this bucket. + std::string start_value; + + // Cumulative fraction: The fraction of table rows that fall into this + // and preceding buckets. + double cum_fract; + + // Number of distinct values in the bucket. + longlong ndv; +}; /* Common base for all histograms @@ -199,6 +212,8 @@ public: double avg_sel)=0; virtual double range_selectivity(Field *field, key_range *min_endp, key_range *max_endp, double avg_sel)=0; + virtual std::vector get_histogram()=0; + /* Legacy: return the size of the histogram on disk. @@ -355,6 +370,11 @@ public: */ double point_selectivity(Field *field, key_range *endpoint, double avg_sel) override; + + std::vector get_histogram() override + { + return {}; + } };