From 2e830cfbc81c545de135b4fbee751141741a11ab Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 20 Apr 2013 23:30:21 +0300 Subject: [PATCH] MDEV-4402 A function to visualize histograms data. --- mysql-test/r/statistics.result | 59 ++++++++++++++++++++++++++ mysql-test/t/statistics.test | 16 +++++++ sql/item_create.cc | 21 ++++++++++ sql/item_strfunc.cc | 77 ++++++++++++++++++++++++++++++++++ sql/item_strfunc.h | 16 +++++++ sql/sys_vars.cc | 3 +- 6 files changed, 190 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/statistics.result b/mysql-test/r/statistics.result index 20469c01a2e..4652420bd87 100644 --- a/mysql-test/r/statistics.result +++ b/mysql-test/r/statistics.result @@ -1416,6 +1416,65 @@ WORLD_INNODB CITY Country 1 17.5819 WORLD_INNODB COUNTRYLANGUAGE PRIMARY 1 4.2232 WORLD_INNODB COUNTRYLANGUAGE PRIMARY 2 1.0000 WORLD_INNODB COUNTRYLANGUAGE Percentage 1 2.7640 +set use_stat_tables='preferably'; +set histogram_size=100; +set histogram_type='SINGLE_PREC_HB'; +ANALYZE TABLE CountryLanguage; +set histogram_size=255; +set histogram_type='DOUBLE_PREC_HB'; +ANALYZE TABLE City; +select db_name,table_name,column_name,min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,hex(histogram),decode_histogram(histogram,hist_type) from mysql.column_stats where table_name='CountryLanguage' and column_name = 'Percentage';; +db_name world +table_name CountryLanguage +column_name Percentage +min_value 0.0 +max_value 99.9 +nulls_ratio 0.0000 +avg_length 4.0000 +avg_frequency 2.7640 +hist_size 0 +hist_type NULL +hex(histogram) NULL +decode_histogram(histogram,hist_type) NULL +db_name world_innodb +table_name CountryLanguage +column_name Percentage +min_value 0.0 +max_value 99.9 +nulls_ratio 0.0000 +avg_length 4.0000 +avg_frequency 2.7640 +hist_size 100 +hist_type SINGLE_PREC_HB +hex(histogram) 0000000000000000000000000101010101010101010202020303030304040404050505050606070707080809090A0A0B0C0D0D0E0E0F10111213131415161718191B1C1E202224292A2E33373B4850575F6A76818C9AA7B9C4CFDADFE5EBF0F4F8FAFCFF +decode_histogram(histogram,hist_type) 0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.004,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.004,0.000,0.000,0.004,0.000,0.000,0.000,0.004,0.000,0.000,0.000,0.004,0.000,0.000,0.000,0.004,0.000,0.004,0.000,0.000,0.004,0.000,0.004,0.000,0.004,0.000,0.004,0.004,0.004,0.000,0.004,0.000,0.004,0.004,0.004,0.004,0.004,0.000,0.004,0.004,0.004,0.004,0.004,0.004,0.008,0.004,0.008,0.008,0.008,0.008,0.020,0.004,0.016,0.020,0.016,0.016,0.051,0.031,0.027,0.031,0.043,0.047,0.043,0.043,0.055,0.051,0.071,0.043,0.043,0.043,0.020,0.024,0.024,0.020,0.016,0.016,0.008,0.008,0.012 +select db_name,table_name,column_name,min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,hex(histogram),decode_histogram(histogram,hist_type) from mysql.column_stats where table_name='City' and column_name = 'Population';; +db_name world +table_name City +column_name Population +min_value 42 +max_value 10500000 +nulls_ratio 0.0000 +avg_length 4.0000 +avg_frequency 1.0467 +hist_size 0 +hist_type NULL +hex(histogram) NULL +decode_histogram(histogram,hist_type) NULL +db_name world_innodb +table_name City +column_name Population +min_value 42 +max_value 10500000 +nulls_ratio 0.0000 +avg_length 4.0000 +avg_frequency 1.0467 +hist_size 255 +hist_type DOUBLE_PREC_HB +hex(histogram) 1F00A1002B023002350238023F02430249024E02520258025D02630268026E02720276027B02800285028C02920297029D02A102A802AC02B402BC02C402CC02D302DA02E302EA02F102F802010305030C03120319031F03290333033D0343034F03590363036D037803840390039A03A603B303C303D103E003F203020412042404330440045304600472047F049104A204B804C804DE04F2040A0526053F0558056F058E05B305D905F4051306380667068406AB06DA06020731075C079407C507F8072E085E08A508DF0824096909CC092E0A760AD50A400BA90B150CAD0C310D240E130F0E103B11B9126B14F0166B192F1CB71FFF240630483FC567A5 +decode_histogram(histogram,hist_type) 0.00047,0.00198,0.00601,0.00008,0.00008,0.00005,0.00011,0.00006,0.00009,0.00008,0.00006,0.00009,0.00008,0.00009,0.00008,0.00009,0.00006,0.00006,0.00008,0.00008,0.00008,0.00011,0.00009,0.00008,0.00009,0.00006,0.00011,0.00006,0.00012,0.00012,0.00012,0.00012,0.00011,0.00011,0.00014,0.00011,0.00011,0.00011,0.00014,0.00006,0.00011,0.00009,0.00011,0.00009,0.00015,0.00015,0.00015,0.00009,0.00018,0.00015,0.00015,0.00015,0.00017,0.00018,0.00018,0.00015,0.00018,0.00020,0.00024,0.00021,0.00023,0.00027,0.00024,0.00024,0.00027,0.00023,0.00020,0.00029,0.00020,0.00027,0.00020,0.00027,0.00026,0.00034,0.00024,0.00034,0.00031,0.00037,0.00043,0.00038,0.00038,0.00035,0.00047,0.00056,0.00058,0.00041,0.00047,0.00056,0.00072,0.00044,0.00060,0.00072,0.00061,0.00072,0.00066,0.00085,0.00075,0.00078,0.00082,0.00073,0.00108,0.00089,0.00105,0.00105,0.00151,0.00150,0.00110,0.00145,0.00163,0.00160,0.00165,0.00232,0.00201,0.00371,0.00365,0.00383,0.00459,0.00583,0.00662,0.00984,0.00969,0.01080,0.01379,0.02063,0.04308,0.05960,0.15816 +set histogram_type=default; +set histogram_size=default; use test; DROP DATABASE world; SELECT UPPER(db_name), UPPER(table_name), cardinality diff --git a/mysql-test/t/statistics.test b/mysql-test/t/statistics.test index b2a052fd3e8..e2ba9d4e173 100644 --- a/mysql-test/t/statistics.test +++ b/mysql-test/t/statistics.test @@ -564,6 +564,22 @@ SELECT UPPER(db_name), UPPER(table_name), index_name, prefix_arity, avg_frequency FROM mysql.index_stats; +set use_stat_tables='preferably'; +--disable_result_log +set histogram_size=100; +set histogram_type='SINGLE_PREC_HB'; +ANALYZE TABLE CountryLanguage; +set histogram_size=255; +set histogram_type='DOUBLE_PREC_HB'; +ANALYZE TABLE City; +--enable_result_log + +--query_vertical select db_name,table_name,column_name,min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,hex(histogram),decode_histogram(histogram,hist_type) from mysql.column_stats where table_name='CountryLanguage' and column_name = 'Percentage'; +--query_vertical select db_name,table_name,column_name,min_value,max_value,nulls_ratio,avg_length,avg_frequency,hist_size,hist_type,hex(histogram),decode_histogram(histogram,hist_type) from mysql.column_stats where table_name='City' and column_name = 'Population'; + +set histogram_type=default; +set histogram_size=default; + use test; DROP DATABASE world; diff --git a/sql/item_create.cc b/sql/item_create.cc index c1cefed6f8b..ce4dc7ced8f 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -614,6 +614,19 @@ protected: }; +class Create_func_decode_histogram : public Create_func_arg2 +{ +public: + Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_decode_histogram s_singleton; + +protected: + Create_func_decode_histogram() {} + virtual ~Create_func_decode_histogram() {} +}; + + class Create_func_concat_ws : public Create_native_func { public: @@ -3231,6 +3244,13 @@ Create_func_concat::create_native(THD *thd, LEX_STRING name, return new (thd->mem_root) Item_func_concat(*item_list); } +Create_func_decode_histogram Create_func_decode_histogram::s_singleton; + +Item * +Create_func_decode_histogram::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_decode_histogram(arg1, arg2); +} Create_func_concat_ws Create_func_concat_ws::s_singleton; @@ -5377,6 +5397,7 @@ static Native_func_registry func_array[] = { { C_STRING_WITH_LEN("DAYOFYEAR") }, BUILDER(Create_func_dayofyear)}, { { C_STRING_WITH_LEN("DECODE") }, BUILDER(Create_func_decode)}, { { C_STRING_WITH_LEN("DEGREES") }, BUILDER(Create_func_degrees)}, + { { C_STRING_WITH_LEN("DECODE_HISTOGRAM") }, BUILDER(Create_func_decode_histogram)}, { { C_STRING_WITH_LEN("DES_DECRYPT") }, BUILDER(Create_func_des_decrypt)}, { { C_STRING_WITH_LEN("DES_ENCRYPT") }, BUILDER(Create_func_des_encrypt)}, { { C_STRING_WITH_LEN("DIMENSION") }, GEOM_BUILDER(Create_func_dimension)}, diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 93569082d74..5cce910758a 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -60,6 +60,7 @@ C_MODE_START C_MODE_END #include "sql_show.h" // append_identifier #include +#include "sql_statistics.h" /** @todo Remove this. It is not safe to use a shared String object. @@ -472,6 +473,82 @@ void Item_func_aes_decrypt::fix_length_and_dec() set_persist_maybe_null(1); } +/////////////////////////////////////////////////////////////////////////////// + + +const char *histogram_types[] = + {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", 0}; +static TYPELIB hystorgam_types_typelib= + { array_elements(histogram_types), + "histogram_types", + histogram_types, NULL}; +const char *representation_by_type[]= {"%.3f", "%.5f"}; + +String *Item_func_decode_histogram::val_str(String *str) +{ + DBUG_ASSERT(fixed == 1); + char buff[STRING_BUFFER_USUAL_SIZE]; + String *res, tmp(buff, sizeof(buff), &my_charset_bin); + int type; + + tmp.length(0); + if (!(res= args[1]->val_str(&tmp)) || + (type= find_type(res->c_ptr_safe(), + &hystorgam_types_typelib, MYF(0))) <= 0) + { + null_value= 1; + return 0; + } + type--; + + tmp.length(0); + if (!(res= args[0]->val_str(&tmp))) + { + null_value= 1; + return 0; + } + if (type == DOUBLE_PREC_HB && res->length() % 2 != 0) + res->length(res->length() - 1); // one byte is unused + + double prev= 0.0; + uint i; + str->length(0); + bool first= true; + const uchar *p= (uchar*)res->c_ptr(); + for (i= 0; i < res->length(); i++) + { + char numbuf[32]; + double val; + switch (type) + { + case SINGLE_PREC_HB: + val= p[i] / ((double)((1 << 8) - 1)); + break; + case DOUBLE_PREC_HB: + val= ((uint16 *)(p + i))[0] / ((double)((1 << 16) - 1)); + i++; + break; + default: + val= 0; + DBUG_ASSERT(0); + } + /* show delta with previous value */ + int size= my_snprintf(numbuf, sizeof(numbuf), + representation_by_type[type], val - prev); + if (first) + first= false; + else + str->append(","); + str->append(numbuf, size); + prev= val; + } + + null_value=0; + return str; +} + + +/////////////////////////////////////////////////////////////////////////////// /** Concatenate args with the following premises: diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 89d7fa67f6b..169da25e826 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -143,6 +143,22 @@ public: const char *func_name() const { return "concat"; } }; +class Item_func_decode_histogram :public Item_str_func +{ + String tmp_value; +public: + Item_func_decode_histogram(Item *a, Item *b) + :Item_str_func(a, b) {} + String *val_str(String *); + void fix_length_and_dec() + { + collation.set(system_charset_info); + max_length= MAX_BLOB_WIDTH; + set_persist_maybe_null(1); + } + const char *func_name() const { return "decode_histogram"; } +}; + class Item_func_concat_ws :public Item_str_func { String tmp_value; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 24009cb5a99..e51fd1cc11c 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -3970,8 +3970,7 @@ static Sys_var_ulong Sys_histogram_size( SESSION_VAR(histogram_size), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 255), DEFAULT(0), BLOCK_SIZE(1)); -const char *histogram_types[] = - {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", 0}; +extern const char *histogram_types[]; static Sys_var_enum Sys_histogram_type( "histogram_type", "Specifies type of the histograms created by ANALYZE. "