Add parser to read JSON array (of histograms) into string vector
Signed-off-by: Michael Okoko <okokomichaels@outlook.com>
This commit is contained in:
parent
524322ad3e
commit
6bc2df5fa4
@ -282,6 +282,14 @@ int json_key_matches(json_engine_t *je, json_string_t *k);
|
||||
*/
|
||||
int json_read_value(json_engine_t *j);
|
||||
|
||||
/*
|
||||
* smart_read_value() reads parses a scalar value and value length from the json engine,
|
||||
* and copies them into `value` and `value_length` respectively.
|
||||
* It should only be called when the json_engine state is JST_VALUE.
|
||||
* If it encounters a non-scalar value (say object or array) before getting to value_len,
|
||||
* such value is also read and copied into value.
|
||||
*/
|
||||
enum json_types smart_read_value(json_engine_t *je, const char **value, int *value_len);
|
||||
|
||||
/*
|
||||
json_skip_key() makes parser skip the content of the current
|
||||
|
@ -1635,7 +1635,7 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void build() {
|
||||
void build_json_from_histogram() {
|
||||
Json_writer *writer = new Json_writer();
|
||||
writer->start_array();
|
||||
for(auto& value: bucket_bounds) {
|
||||
@ -1645,6 +1645,87 @@ public:
|
||||
histogram->set_size(bucket_bounds.size());
|
||||
Binary_string *json_string = (Binary_string *) writer->output.get_string();
|
||||
histogram->set_values((uchar *) json_string->c_ptr());
|
||||
|
||||
std::vector<std::string> buckets = parse_histogram_from_json(json_string->c_ptr());
|
||||
printf("%zu", buckets.size());
|
||||
|
||||
test_parse_histogram_from_json();
|
||||
}
|
||||
|
||||
static std::vector<std::string> parse_histogram_from_json(const char *json)
|
||||
{
|
||||
std::vector<std::string> hist_buckets= {};
|
||||
enum json_types vt = json_get_array_items(json, json + strlen(json), hist_buckets);
|
||||
printf("%d", vt);
|
||||
printf("%zu", hist_buckets.size());
|
||||
|
||||
return hist_buckets;
|
||||
}
|
||||
|
||||
static void test_parse_histogram_from_json()
|
||||
{
|
||||
std::vector<std::string> bucket = {};
|
||||
std::string json;
|
||||
std::string tests[7] = {
|
||||
R"(["aabbb", "ccccdd", "eeefff"])",
|
||||
R"(["aabbb", "ccc{}dd", "eeefff"])",
|
||||
R"(["aabbb", {"a": "b"}, "eeefff"])",
|
||||
R"({})",
|
||||
R"([1,2,3, null])",
|
||||
R"([null])",
|
||||
R"([])"
|
||||
};
|
||||
|
||||
for(const auto& test : tests) {
|
||||
json = test;
|
||||
bucket = parse_histogram_from_json(json.c_str());
|
||||
printf("%zu", bucket.size());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* json_get_array_items expects a JSON array as argument,
|
||||
* and pushes the elements of the array into the `container` vector.
|
||||
* It only works if all the elements in the original JSON array
|
||||
* are scalar values (i.e., strings, numbers, true or false), and returns JSV_BAD_JSON if:
|
||||
* the original JSON is not an array OR the JSON array contains non-scalar elements.
|
||||
*/
|
||||
static json_types json_get_array_items(const char *json, const char *json_end, std::vector<std::string> &container) {
|
||||
json_engine_t je;
|
||||
enum json_types value_type;
|
||||
int vl;
|
||||
const char *v;
|
||||
|
||||
json_scan_start(&je, &my_charset_utf8mb4_bin, (const uchar *)json, (const uchar *)json_end);
|
||||
|
||||
if (json_read_value(&je) || je.value_type != JSON_VALUE_ARRAY)
|
||||
{
|
||||
return JSV_BAD_JSON;
|
||||
}
|
||||
value_type = static_cast<json_types>(je.value_type);
|
||||
|
||||
std::string val;
|
||||
while(!json_scan_next(&je))
|
||||
{
|
||||
switch(je.state)
|
||||
{
|
||||
case JST_VALUE:
|
||||
if (je.value_type != JSON_VALUE_STRING &&
|
||||
je.value_type != JSON_VALUE_NUMBER &&
|
||||
je.value_type != JSON_VALUE_TRUE &&
|
||||
je.value_type != JSON_VALUE_FALSE)
|
||||
{
|
||||
return JSV_BAD_JSON;
|
||||
}
|
||||
value_type = smart_read_value(&je, &v, &vl);
|
||||
val = std::string(v, vl);
|
||||
container.emplace_back(val);
|
||||
case JST_ARRAY_END:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return value_type;
|
||||
}
|
||||
};
|
||||
|
||||
@ -1772,7 +1853,7 @@ public:
|
||||
Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
|
||||
tree->walk(table_field->table, json_histogram_build_walk,
|
||||
(void *) &hist_builder);
|
||||
hist_builder.build();
|
||||
hist_builder.build_json_from_histogram();
|
||||
distincts= hist_builder.get_count_distinct();
|
||||
distincts_single_occurence= hist_builder.get_count_single_occurence();
|
||||
} else
|
||||
|
@ -1868,7 +1868,7 @@ int json_path_compare(const json_path_t *a, const json_path_t *b,
|
||||
}
|
||||
|
||||
|
||||
static enum json_types smart_read_value(json_engine_t *je,
|
||||
enum json_types smart_read_value(json_engine_t *je,
|
||||
const char **value, int *value_len)
|
||||
{
|
||||
if (json_read_value(je))
|
||||
@ -1952,7 +1952,6 @@ err_return:
|
||||
return JSV_BAD_JSON;
|
||||
}
|
||||
|
||||
|
||||
/** Simple json lookup for a value by the key.
|
||||
|
||||
Expects JSON object.
|
||||
@ -2029,6 +2028,7 @@ enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Check if json is valid (well-formed)
|
||||
|
||||
@retval 0 - success, json is well-formed
|
||||
|
Loading…
x
Reference in New Issue
Block a user