diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index c39aeb84e1b..07e6e09710c 100644 --- a/mysql-test/main/statistics_json.result +++ b/mysql-test/main/statistics_json.result @@ -19,7 +19,7 @@ test.t1 analyze status Engine-independent statistics collected test.t1 analyze status OK SELECT * FROM mysql.column_stats WHERE table_name='t1'; db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram -test t1 a 1 25 0.0000 4.0000 0.0000 10 JSON [ +test t1 a 1 25 0.0000 4.0000 1.0000 10 JSON [ "3", "5", "7", @@ -31,7 +31,7 @@ test t1 a 1 25 0.0000 4.0000 0.0000 10 JSON [ "21", "23" ] -test t1 b 1 9 0.0000 1.6400 0.0000 10 JSON [ +test t1 b 1 9 0.0000 1.6400 1.0000 10 JSON [ "11", "13", "15", @@ -43,7 +43,7 @@ test t1 b 1 9 0.0000 1.6400 0.0000 10 JSON [ "5", "7" ] -test t1 c 1 9 0.0000 2.0000 0.0000 10 JSON [ +test t1 c 1 9 0.0000 2.0000 1.0000 10 JSON [ "11", "13", "15", @@ -55,7 +55,7 @@ test t1 c 1 9 0.0000 2.0000 0.0000 10 JSON [ "5", "7" ] -test t1 d 1 25 0.0000 8.0000 0.0000 10 JSON [ +test t1 d 1 25 0.0000 8.0000 1.0000 10 JSON [ "3", "5", "7", @@ -101,7 +101,7 @@ ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '1'. UPDATE mysql.column_stats SET histogram='{}' WHERE table_name='t1'; FLUSH TABLES; SELECT * FROM t1; -ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '32608'. +ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '32641'. DELETE FROM mysql.column_stats; DROP TABLE t1; create schema world; diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index eed22d7ed77..baaabf73477 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1082,7 +1082,7 @@ public: // Note: this is dumb. the histogram size is stored with the // histogram! stat_field->store(stats->histogram_? - stats->histogram_->get_width() : 0); + stats->histogram_->get_size() : 0); break; case COLUMN_STAT_HIST_TYPE: if (stats->histogram_) @@ -1269,7 +1269,7 @@ bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_ar */ void Histogram_binary::serialize(Field *field) { - field->store((char*)get_values(), get_width(), &my_charset_bin); + field->store((char*)get_values(), get_size(), &my_charset_bin); } void Histogram_binary::init_for_collection(MEM_ROOT *mem_root, @@ -1292,6 +1292,7 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htyp bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, const uchar *ptr, uint size_arg) { DBUG_ENTER("Histogram_json::parse"); + size = (uint8) size_arg; type = type_arg; const char *json = (char *)ptr; int vt; @@ -1545,7 +1546,7 @@ double Histogram_json::range_selectivity_new(Field *field, key_range *min_endp, void Histogram_json::serialize(Field *field) { - field->store((char*)values, strlen((char*)values), + field->store((char*)get_values(), strlen((char*)get_values()), &my_charset_bin); } @@ -1866,13 +1867,13 @@ public: class Histogram_builder { -private: +public: Field *column; /* table field for which the histogram is built */ uint col_length; /* size of this field */ ha_rows records; /* number of records the histogram is built for */ Field *min_value; /* pointer to the minimal value for the field */ Field *max_value; /* pointer to the maximal value for the field */ - Histogram_binary *histogram; /* the histogram location */ + Histogram_base *histogram; /* the histogram location */ uint hist_width; /* the number of points in the histogram */ double bucket_capacity; /* number of rows in a bucket of the histogram */ uint curr_bucket; /* number of the current bucket to be built */ @@ -1881,14 +1882,13 @@ private: /* number of distinct values that occured only once */ ulonglong count_distinct_single_occurence; -public: Histogram_builder(Field *col, uint col_len, ha_rows rows) : column(col), col_length(col_len), records(rows) { Column_statistics *col_stats= col->collected_stats; min_value= col_stats->min_value; max_value= col_stats->max_value; - histogram= dynamic_cast(col_stats->histogram_); + histogram= col_stats->histogram_; hist_width= histogram->get_width(); bucket_capacity= (double) records / (hist_width + 1); curr_bucket= 0; @@ -1918,13 +1918,13 @@ public: if (count > bucket_capacity * (curr_bucket + 1)) { column->store_field_value((uchar *) elem, col_length); - histogram->set_value(curr_bucket, + ((Histogram_binary *)histogram)->set_value(curr_bucket, column->pos_in_interval(min_value, max_value)); curr_bucket++; while (curr_bucket != hist_width && count > bucket_capacity * (curr_bucket + 1)) { - histogram->set_prev_value(curr_bucket); + ((Histogram_binary *)histogram)->set_prev_value(curr_bucket); curr_bucket++; } } @@ -1934,35 +1934,13 @@ public: class Histogram_builder_json : public Histogram_builder { - Field *column; /* table field for which the histogram is built */ - uint col_length; /* size of this field */ - ha_rows records; /* number of records the histogram is built for */ - Field *min_value; /* pointer to the minimal value for the field */ - Field *max_value; /* pointer to the maximal value for the field */ - Histogram_json *histogram; /* the histogram location */ - uint hist_width; /* the number of points in the histogram */ - double bucket_capacity; /* number of rows in a bucket of the histogram */ - uint curr_bucket; /* number of the current bucket to be built */ - ulonglong count; /* number of values retrieved */ - ulonglong count_distinct; /* number of distinct values retrieved */ - /* number of distinct values that occured only once */ - ulonglong count_distinct_single_occurence; - std::vector bucket_bounds = {}; + std::vector bucket_bounds; public: Histogram_builder_json(Field *col, uint col_len, ha_rows rows) - : column(col), col_length(col_len), records(rows) + : Histogram_builder(col, col_len, rows) { - Column_statistics *col_stats= col->collected_stats; - min_value= col_stats->min_value; - max_value= col_stats->max_value; - histogram= dynamic_cast(col_stats->histogram_); - hist_width= histogram->get_width(); - bucket_capacity= (double) records / (hist_width + 1); - curr_bucket= 0; - count= 0; - count_distinct= 0; - count_distinct_single_occurence= 0; + bucket_bounds = {}; } ~Histogram_builder_json() override = default; @@ -1995,7 +1973,7 @@ public: writer->end_array(); histogram->set_size(bucket_bounds.size()); Binary_string *json_string = (Binary_string *) writer->output.get_string(); - histogram->set_values((uchar *) json_string->c_ptr()); + ((Histogram_json *)histogram)->set_values((uchar *) json_string->c_ptr()); } }; diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 6c498e17ac6..a8d5e338698 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -179,7 +179,11 @@ public: return 1.0; }; - virtual ~Histogram_base(){} + // Legacy: return the size of the histogram on disk. + // This will be stored in mysql.column_stats.hist_size column. + // Newer, JSON-based histograms may return 0. + virtual uint get_size()=0; + virtual ~Histogram_base()= default; }; class Histogram_binary : public Histogram_base @@ -283,7 +287,9 @@ public: void set_values (uchar *vals) override { values= (uchar *) vals; } void set_size (ulonglong sz) override { size= (uint8) sz; } - bool is_available() override { return get_width() > 0 && get_values(); } + uint get_size() override {return (uint)size;} + + bool is_available() override { return get_size() > 0 && get_values(); } /* This function checks that histograms should be usable only when @@ -385,6 +391,10 @@ public: void set_size (ulonglong sz) override {size = (uint8) sz; } + uint get_size() override { + return size; + } + void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override; bool is_available() override {return get_width() > 0 /*&& get_values()*/; } @@ -395,9 +405,9 @@ public: is_available(); } - void set_values (uchar *vals) override { values= vals; } + void set_values (uchar *vals) override { values= (uchar *) vals; } - uchar *get_values() override { return values; } + uchar *get_values() override { return (uchar *) values; } double range_selectivity(double min_pos, double max_pos) override {return 0.1;}