Fix JSON parsing: future-proof data representation in JSON, code cleanup
This commit is contained in:
parent
a0b4a86822
commit
2a1cdbabec
@ -283,12 +283,13 @@ int json_key_matches(json_engine_t *je, json_string_t *k);
|
|||||||
int json_read_value(json_engine_t *j);
|
int json_read_value(json_engine_t *j);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* json_smart_read_value() reads parses a scalar value and value length from the json engine,
|
json_smart_read_value() reads a JSON value. Pointer to value is stored in
|
||||||
* and copies them into `value` and `value_length` respectively.
|
*value and its length in *value_len.
|
||||||
* It should only be called when the json_engine state is JST_VALUE.
|
|
||||||
* If it encounters a non-scalar value (say object or array) before getting to value_len,
|
if the value is non a scalar, it returns pointers to its JSON
|
||||||
* such value is also read and copied into value.
|
representation.
|
||||||
*/
|
The function should only be called when je->state==JST_VALUE.
|
||||||
|
*/
|
||||||
enum json_types json_smart_read_value(json_engine_t *je, const char **value, int *value_len);
|
enum json_types json_smart_read_value(json_engine_t *je, const char **value, int *value_len);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
File diff suppressed because one or more lines are too long
@ -37,6 +37,8 @@ analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
|
|||||||
explain extended select * from t1_json where a < 'b-1a';
|
explain extended select * from t1_json where a < 'b-1a';
|
||||||
analyze select * from t1_json where a > 'zzzzzzzzz';
|
analyze select * from t1_json where a > 'zzzzzzzzz';
|
||||||
|
|
||||||
|
drop table ten;
|
||||||
|
|
||||||
# test different valid JSON strings that are invalid histograms.
|
# test different valid JSON strings that are invalid histograms.
|
||||||
UPDATE mysql.column_stats SET histogram='["a-1", "a-2", {"a": "b"}, "a-3"]' WHERE table_name='t1_json';
|
UPDATE mysql.column_stats SET histogram='["a-1", "a-2", {"a": "b"}, "a-3"]' WHERE table_name='t1_json';
|
||||||
FLUSH TABLES;
|
FLUSH TABLES;
|
||||||
@ -45,23 +47,23 @@ explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
|
|||||||
|
|
||||||
|
|
||||||
--source include/have_sequence.inc
|
--source include/have_sequence.inc
|
||||||
create table users (
|
create table t2 (
|
||||||
city varchar(100)
|
city varchar(100)
|
||||||
);
|
);
|
||||||
set histogram_size=50;
|
set histogram_size=50;
|
||||||
insert into users select 'Moscow' from seq_1_to_99;
|
insert into t2 select 'Moscow' from seq_1_to_99;
|
||||||
insert into users select 'Helsinki' from seq_1_to_2;
|
insert into t2 select 'Helsinki' from seq_1_to_2;
|
||||||
set histogram_type=json_hb;
|
set histogram_type=json_hb;
|
||||||
analyze table users persistent for all;
|
analyze table t2 persistent for all;
|
||||||
explain extended select * from users where city = 'Moscow';
|
explain extended select * from t2 where city = 'Moscow';
|
||||||
analyze select * from users where city = 'Moscow';
|
analyze select * from t2 where city = 'Moscow';
|
||||||
explain extended select * from users where city = 'Helsinki';
|
explain extended select * from t2 where city = 'Helsinki';
|
||||||
analyze select * from users where city = 'helsinki';
|
analyze select * from t2 where city = 'helsinki';
|
||||||
explain extended select * from users where city < 'Lagos';
|
explain extended select * from t2 where city < 'Lagos';
|
||||||
|
|
||||||
drop table t1_bin;
|
drop table t1_bin;
|
||||||
drop table t1_json;
|
drop table t1_json;
|
||||||
drop table users;
|
drop table t2;
|
||||||
|
|
||||||
DELETE FROM mysql.column_stats;
|
DELETE FROM mysql.column_stats;
|
||||||
|
|
||||||
|
@ -8914,4 +8914,4 @@ ER_PARTITION_CONVERT_SUBPARTITIONED
|
|||||||
ER_PROVIDER_NOT_LOADED
|
ER_PROVIDER_NOT_LOADED
|
||||||
eng "MariaDB tried to use the %s, but its provider plugin is not loaded"
|
eng "MariaDB tried to use the %s, but its provider plugin is not loaded"
|
||||||
ER_JSON_HISTOGRAM_PARSE_FAILED
|
ER_JSON_HISTOGRAM_PARSE_FAILED
|
||||||
eng "Failed to parse histogram, encountered JSON_TYPE '%d'."
|
eng "Failed to parse histogram: %s at offset %d."
|
||||||
|
@ -1123,6 +1123,7 @@ public:
|
|||||||
void get_stat_values()
|
void get_stat_values()
|
||||||
{
|
{
|
||||||
table_field->read_stats->set_all_nulls();
|
table_field->read_stats->set_all_nulls();
|
||||||
|
// default: hist_type=NULL means there's no histogram
|
||||||
table_field->read_stats->histogram_type_on_disk= INVALID_HISTOGRAM;
|
table_field->read_stats->histogram_type_on_disk= INVALID_HISTOGRAM;
|
||||||
|
|
||||||
if (table_field->read_stats->min_value)
|
if (table_field->read_stats->min_value)
|
||||||
@ -1196,7 +1197,10 @@ public:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case COLUMN_STAT_HISTOGRAM:
|
case COLUMN_STAT_HISTOGRAM:
|
||||||
//TODO: if stat_field->length() == 0 then histogram_type_on_disk is set to INVALID_HISTOGRAM
|
/*
|
||||||
|
Do nothing here: we take the histogram length from the 'histogram'
|
||||||
|
column itself
|
||||||
|
*/
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1245,7 +1249,7 @@ public:
|
|||||||
}
|
}
|
||||||
if (!hist->parse(mem_root, table_field,
|
if (!hist->parse(mem_root, table_field,
|
||||||
table_field->read_stats->histogram_type_on_disk,
|
table_field->read_stats->histogram_type_on_disk,
|
||||||
(const uchar*)val.ptr(), val.length()))
|
val.ptr(), val.length()))
|
||||||
{
|
{
|
||||||
table_field->read_stats->histogram_= hist;
|
table_field->read_stats->histogram_= hist;
|
||||||
return hist;
|
return hist;
|
||||||
@ -1255,19 +1259,19 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *,
|
|
||||||
Histogram_type type_arg,
|
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field*,
|
||||||
const uchar *ptr_arg, uint size_arg)
|
Histogram_type type_arg, const char *hist_data,
|
||||||
|
size_t hist_data_len)
|
||||||
{
|
{
|
||||||
// Just copy the data
|
/* On-disk an in-memory formats are the same. Just copy the data. */
|
||||||
size = (uint8) size_arg;
|
type= type_arg;
|
||||||
type = type_arg;
|
size= (uint8) hist_data_len; // 'size' holds the size of histogram in bytes
|
||||||
if ((values = (uchar*)alloc_root(mem_root, size_arg)))
|
if (!(values= (uchar*)alloc_root(mem_root, hist_data_len)))
|
||||||
{
|
return true;
|
||||||
memcpy(values, ptr_arg, size_arg);
|
|
||||||
return false;
|
memcpy(values, hist_data, hist_data_len);
|
||||||
}
|
return false;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1307,39 +1311,81 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field,
|
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field,
|
||||||
Histogram_type type_arg, const uchar *ptr,
|
Histogram_type type_arg, const char *hist_data,
|
||||||
uint size_arg)
|
size_t hist_data_len)
|
||||||
{
|
{
|
||||||
DBUG_ENTER("Histogram_json::parse");
|
DBUG_ENTER("Histogram_json::parse");
|
||||||
DBUG_ASSERT(type_arg == JSON_HB);
|
DBUG_ASSERT(type_arg == JSON_HB);
|
||||||
size = (uint8) size_arg;
|
const char *err;
|
||||||
const char *json = (char *)ptr;
|
json_engine_t je;
|
||||||
int vt;
|
json_string_t key_name;
|
||||||
std::vector<std::string> hist_buckets_text;
|
|
||||||
bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets_text);
|
|
||||||
if (!result)
|
|
||||||
{
|
|
||||||
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), vt);
|
|
||||||
DBUG_RETURN(true);
|
|
||||||
}
|
|
||||||
size= hist_buckets_text.size();
|
|
||||||
|
|
||||||
/*
|
json_scan_start(&je, &my_charset_utf8mb4_bin,
|
||||||
Convert the text based array into a data structure that allows lookups and
|
(const uchar*)hist_data,
|
||||||
estimates
|
(const uchar*)hist_data+hist_data_len);
|
||||||
*/
|
|
||||||
for (auto &s : hist_buckets_text)
|
|
||||||
{
|
|
||||||
field->store_text(s.data(), s.size(), &my_charset_bin);
|
|
||||||
|
|
||||||
// Get the value in "truncated key tuple format" here:
|
if (json_read_value(&je) || je.value_type != JSON_VALUE_OBJECT)
|
||||||
uchar buf[MAX_KEY_LENGTH];
|
{
|
||||||
uint len_to_copy= field->key_length();
|
err= "Root JSON element must be a JSON object";
|
||||||
uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
|
goto error;
|
||||||
histogram_bounds.push_back(std::string((char*)buf, bytes));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
json_string_set_str(&key_name, (const uchar*)JSON_NAME,
|
||||||
|
(const uchar*)JSON_NAME + strlen(JSON_NAME));
|
||||||
|
json_string_set_cs(&key_name, system_charset_info);
|
||||||
|
|
||||||
|
if (json_scan_next(&je) || je.state != JST_KEY ||
|
||||||
|
!json_key_matches(&je, &key_name))
|
||||||
|
{
|
||||||
|
err= "The first key in the object must be histogram_hb_v1";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The value must be a JSON array
|
||||||
|
if (json_read_value(&je) || (je.value_type != JSON_VALUE_ARRAY))
|
||||||
|
{
|
||||||
|
err= "A JSON array expected";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the array
|
||||||
|
while (!json_scan_next(&je))
|
||||||
|
{
|
||||||
|
switch(je.state)
|
||||||
|
{
|
||||||
|
case JST_VALUE:
|
||||||
|
{
|
||||||
|
const char *val;
|
||||||
|
int val_len;
|
||||||
|
json_smart_read_value(&je, &val, &val_len);
|
||||||
|
if (je.value_type != JSON_VALUE_STRING &&
|
||||||
|
je.value_type != JSON_VALUE_NUMBER &&
|
||||||
|
je.value_type != JSON_VALUE_TRUE &&
|
||||||
|
je.value_type != JSON_VALUE_FALSE)
|
||||||
|
{
|
||||||
|
err= "Scalar value expected";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
uchar buf[MAX_KEY_LENGTH];
|
||||||
|
uint len_to_copy= field->key_length();
|
||||||
|
field->store_text(val, val_len, &my_charset_bin);
|
||||||
|
uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
|
||||||
|
histogram_bounds.push_back(std::string((char*)buf, bytes));
|
||||||
|
// TODO: Should we also compare this endpoint with the previous
|
||||||
|
// to verify that the ordering is right?
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case JST_ARRAY_END:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size= histogram_bounds.size();
|
||||||
DBUG_RETURN(false);
|
DBUG_RETURN(false);
|
||||||
|
|
||||||
|
error:
|
||||||
|
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), err,
|
||||||
|
je.s.c_str - (const uchar*)hist_data);
|
||||||
|
DBUG_RETURN(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1347,7 +1393,7 @@ static
|
|||||||
void store_key_image_to_rec_no_null(Field *field, uchar *ptr) {
|
void store_key_image_to_rec_no_null(Field *field, uchar *ptr) {
|
||||||
MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
|
MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
|
||||||
&field->table->write_set);
|
&field->table->write_set);
|
||||||
field->set_key_image(ptr, field->key_length());
|
field->set_key_image(ptr, field->key_length());
|
||||||
dbug_tmp_restore_column_map(&field->table->write_set, old_map);
|
dbug_tmp_restore_column_map(&field->table->write_set, old_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1506,9 +1552,9 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
@param field The table field histogram is for. We don't care about the
|
@param field The table field histogram is for. We don't care about the
|
||||||
field's current value, we only need its virtual functions to
|
field's current value, we only need its virtual functions to
|
||||||
perform various operations
|
perform various operations
|
||||||
|
|
||||||
@param min_endp, max_endp - this specifies the range.
|
@param min_endp, max_endp - this specifies the range.
|
||||||
*/
|
*/
|
||||||
double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
|
double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
|
||||||
@ -1594,7 +1640,7 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
|
|||||||
|
|
||||||
void Histogram_json::serialize(Field *field)
|
void Histogram_json::serialize(Field *field)
|
||||||
{
|
{
|
||||||
field->store((char*)json_text, strlen((char*)json_text), &my_charset_bin);
|
field->store(json_text.data(), json_text.size(), &my_charset_bin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2052,13 +2098,16 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void build_json_from_histogram() {
|
void build_json_from_histogram() {
|
||||||
Json_writer *writer = new Json_writer();
|
Json_writer writer;
|
||||||
writer->start_array();
|
writer.start_object();
|
||||||
|
writer.add_member(Histogram_json::JSON_NAME).start_array();
|
||||||
|
|
||||||
for(auto& value: bucket_bounds) {
|
for(auto& value: bucket_bounds) {
|
||||||
writer->add_str(value.c_str());
|
writer.add_str(value.c_str());
|
||||||
}
|
}
|
||||||
writer->end_array();
|
writer.end_array();
|
||||||
Binary_string *json_string = (Binary_string *) writer->output.get_string();
|
writer.end_object();
|
||||||
|
Binary_string *json_string = (Binary_string *) writer.output.get_string();
|
||||||
Histogram_json *hist= (Histogram_json*)histogram;
|
Histogram_json *hist= (Histogram_json*)histogram;
|
||||||
hist->set_json_text(bucket_bounds.size(), (uchar *) json_string->c_ptr());
|
hist->set_json_text(bucket_bounds.size(), (uchar *) json_string->c_ptr());
|
||||||
}
|
}
|
||||||
@ -2080,42 +2129,6 @@ Histogram_base *create_histogram(Histogram_type hist_type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container) {
|
|
||||||
json_engine_t je;
|
|
||||||
int vl;
|
|
||||||
const char *v;
|
|
||||||
|
|
||||||
json_scan_start(&je, &my_charset_utf8mb4_bin, (const uchar *)json, (const uchar *)json_end);
|
|
||||||
|
|
||||||
if (json_read_value(&je) || (*value_type = je.value_type) != JSON_VALUE_ARRAY)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string val;
|
|
||||||
while(!json_scan_next(&je))
|
|
||||||
{
|
|
||||||
switch(je.state)
|
|
||||||
{
|
|
||||||
case JST_VALUE:
|
|
||||||
*value_type = json_smart_read_value(&je, &v, &vl);
|
|
||||||
if (je.value_type != JSON_VALUE_STRING &&
|
|
||||||
je.value_type != JSON_VALUE_NUMBER &&
|
|
||||||
je.value_type != JSON_VALUE_TRUE &&
|
|
||||||
je.value_type != JSON_VALUE_FALSE)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
val = std::string(v, vl);
|
|
||||||
container.emplace_back(val);
|
|
||||||
break;
|
|
||||||
case JST_ARRAY_END:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
C_MODE_START
|
C_MODE_START
|
||||||
|
|
||||||
int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
|
int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
|
||||||
|
@ -152,7 +152,7 @@ class Histogram_base : public Sql_alloc
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
|
virtual bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
|
||||||
const uchar *ptr, uint size)= 0;
|
const char *hist_data, size_t hist_data_len)= 0;
|
||||||
virtual void serialize(Field *to_field)= 0;
|
virtual void serialize(Field *to_field)= 0;
|
||||||
|
|
||||||
virtual Histogram_type get_type()=0;
|
virtual Histogram_type get_type()=0;
|
||||||
@ -187,7 +187,7 @@ class Histogram_binary : public Histogram_base
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
bool parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg,
|
bool parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg,
|
||||||
const uchar *ptr_arg, uint size_arg) override;
|
const char *hist_data, size_t hist_data_len) override;
|
||||||
void serialize(Field *to_field) override;
|
void serialize(Field *to_field) override;
|
||||||
|
|
||||||
Histogram_type get_type() override { return type; }
|
Histogram_type get_type() override { return type; }
|
||||||
@ -350,14 +350,16 @@ private:
|
|||||||
uint8 size; /* Number of elements in the histogram */
|
uint8 size; /* Number of elements in the histogram */
|
||||||
|
|
||||||
/* Collection-time only: collected histogram in the JSON form. */
|
/* Collection-time only: collected histogram in the JSON form. */
|
||||||
uchar *json_text;
|
std::string json_text;
|
||||||
|
|
||||||
// Array of histogram bucket endpoints in KeyTupleFormat.
|
// Array of histogram bucket endpoints in KeyTupleFormat.
|
||||||
std::vector<std::string> histogram_bounds;
|
std::vector<std::string> histogram_bounds;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static constexpr const char* JSON_NAME="histogram_hb_v1";
|
||||||
|
|
||||||
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
|
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
|
||||||
const uchar *ptr, uint size) override;
|
const char *hist_data, size_t hist_data_len) override;
|
||||||
|
|
||||||
void serialize(Field *field) override;
|
void serialize(Field *field) override;
|
||||||
|
|
||||||
@ -375,7 +377,8 @@ public:
|
|||||||
void set_json_text(ulonglong sz, uchar *json_text_arg)
|
void set_json_text(ulonglong sz, uchar *json_text_arg)
|
||||||
{
|
{
|
||||||
size = (uint8) sz;
|
size = (uint8) sz;
|
||||||
json_text= json_text_arg;
|
json_text.assign((const char*)json_text_arg,
|
||||||
|
strlen((const char*)json_text_arg));
|
||||||
}
|
}
|
||||||
|
|
||||||
uint get_size() override
|
uint get_size() override
|
||||||
@ -481,8 +484,9 @@ private:
|
|||||||
ulonglong avg_frequency;
|
ulonglong avg_frequency;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/* Histogram type as specified in mysql.column_stats.hist_type */
|
||||||
Histogram_type histogram_type_on_disk;
|
Histogram_type histogram_type_on_disk;
|
||||||
|
|
||||||
Histogram_base *histogram_;
|
Histogram_base *histogram_;
|
||||||
|
|
||||||
uint32 no_values_provided_bitmap()
|
uint32 no_values_provided_bitmap()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user