From 29c4bd9d27996600790be45ae057214381c7d3a8 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Wed, 1 Nov 2017 13:20:32 +0000 Subject: [PATCH 01/33] SOURCE_REVISION should always be defined in source_revision.h #cmakedefine in template file might not create a #define in configured header file. --- include/source_revision.h.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/source_revision.h.in b/include/source_revision.h.in index 5b657d0940e..991df992e2f 100644 --- a/include/source_revision.h.in +++ b/include/source_revision.h.in @@ -1 +1 @@ -#cmakedefine SOURCE_REVISION "@SOURCE_REVISION@" +#define SOURCE_REVISION "@SOURCE_REVISION@" From fadfe447e8d9b1244341bd5ed94bb7f8eb623f18 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 13 Jun 2017 11:54:39 +0530 Subject: [PATCH 02/33] MDEV-12985: syntax added for the percentile_cont and percentile_disc functions --- sql/lex.h | 3 +++ sql/sql_yacc.yy | 25 ++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/sql/lex.h b/sql/lex.h index ef03afb7a32..7967d17a5d4 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -701,6 +701,7 @@ static SYMBOL symbols[] = { { "WHILE", SYM(WHILE_SYM)}, { "WINDOW", SYM(WINDOW_SYM)}, { "WITH", SYM(WITH)}, + { "WITHIN", SYM(WITHIN)}, { "WORK", SYM(WORK_SYM)}, { "WRAPPER", SYM(WRAPPER_SYM)}, { "WRITE", SYM(WRITE_SYM)}, @@ -743,6 +744,8 @@ static SYMBOL sql_functions[] = { { "NTILE", SYM(NTILE_SYM)}, { "POSITION", SYM(POSITION_SYM)}, { "PERCENT_RANK", SYM(PERCENT_RANK_SYM)}, + { "PERCENTILE_CONT", SYM(PERCENTILE_CONT_SYM)}, + { "PERCENTILE_DISC", SYM(PERCENTILE_DISC_SYM)}, { "RANK", SYM(RANK_SYM)}, { "ROW_NUMBER", SYM(ROW_NUMBER_SYM)}, { "SESSION_USER", SYM(USER_SYM)}, diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 67b73dea506..57d7ecd48c8 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1328,6 +1328,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token PARTITIONING_SYM %token PASSWORD_SYM %token PERCENT_RANK_SYM +%token PERCENTILE_CONT_SYM +%token PERCENTILE_DISC_SYM %token PERSISTENT_SYM %token PHASE_SYM %token PLUGINS_SYM @@ -1574,6 +1576,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token WINDOW_SYM %token WHILE_SYM %token WITH /* SQL-2003-R */ +%token WITHIN %token WITH_CUBE_SYM /* INTERNAL */ %token WITH_ROLLUP_SYM /* INTERNAL */ %token WORK_SYM /* SQL-2003-N */ @@ -1733,6 +1736,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); window_func_expr window_func simple_window_func + inverse_distribution_function function_call_keyword function_call_nonkeyword function_call_generic @@ -1951,7 +1955,6 @@ END_OF_INPUT %type row_field_name row_field_definition %type row_field_definition_list row_type_body - %type opt_window_clause window_def_list window_def window_spec %type window_name %type opt_window_ref opt_window_frame_clause @@ -9374,6 +9377,7 @@ column_default_non_parenthesized_expr: | variable | sum_expr | window_func_expr + | inverse_distribution_function | ROW_SYM '(' expr ',' expr_list ')' { $5->push_front($3, thd->mem_root); @@ -10693,6 +10697,25 @@ simple_window_func: } ; + +inverse_distribution_function: + inverse_distribution_function_type '(' expr ')' WITHIN GROUP_SYM + '(' order_by_single_element_list ')' OVER_SYM '(' opt_window_ref opt_window_partition_clause ')' + { + my_yyabort_error((ER_VIEW_SELECT_VARIABLE, MYF(0))); + }; + +inverse_distribution_function_type: + PERCENTILE_CONT_SYM + {} + |PERCENTILE_DISC_SYM + {} + ; + +order_by_single_element_list: + ORDER_SYM BY order_ident order_dir + ; + window_name: ident { From 280945bf29095da1c8aacdcba37aecce605fc127 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 22 Jun 2017 21:43:45 +0530 Subject: [PATCH 03/33] MDEV-12985: support percentile and median window functions Finalised the synatax and have started implementing the class for the PERCENTILE_DISC --- sql/item_sum.h | 3 +- sql/item_windowfunc.h | 69 +++++++++++++++++++++++++++++++++++++++++++ sql/sql_window.cc | 16 ++++++++++ sql/sql_yacc.yy | 47 +++++++++++++++++++++-------- 4 files changed, 121 insertions(+), 14 deletions(-) diff --git a/sql/item_sum.h b/sql/item_sum.h index 7845ed3318f..467a77c8983 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -354,7 +354,8 @@ public: VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC, ROW_NUMBER_FUNC, RANK_FUNC, DENSE_RANK_FUNC, PERCENT_RANK_FUNC, CUME_DIST_FUNC, NTILE_FUNC, FIRST_VALUE_FUNC, LAST_VALUE_FUNC, - NTH_VALUE_FUNC, LEAD_FUNC, LAG_FUNC + NTH_VALUE_FUNC, LEAD_FUNC, LAG_FUNC, PERCENTILE_CONT_FUNC, + PERCENTILE_DISC_FUNC }; Item **ref_by; /* pointer to a ref to the object used to register it */ diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 64a974d55d6..ac695729fc5 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -572,6 +572,9 @@ class Item_sum_cume_dist: public Item_sum_window_with_row_count Item_sum_cume_dist(THD *thd) : Item_sum_window_with_row_count(thd), current_row_count_(0) {} + Item_sum_cume_dist(THD *thd, Item *arg) : Item_sum_window_with_row_count(thd,arg), + current_row_count_(0) {} + double val_real() { if (get_row_count() == 0) @@ -618,6 +621,11 @@ class Item_sum_cume_dist: public Item_sum_window_with_row_count Item *get_copy(THD *thd, MEM_ROOT *mem_root) { return get_item_copy(thd, mem_root, this); } + ulonglong get_row_number() + { + return current_row_count_; + } + private: ulonglong current_row_count_; }; @@ -693,6 +701,61 @@ class Item_sum_ntile : public Item_sum_window_with_row_count ulong current_row_count_; }; +class Item_sum_percentile_disc : public Item_sum_cume_dist +{ +public: + Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg) + {} + + double val_real() + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + return 0; + } + + bool add() + { + Item *arg = get_arg(0); + if (arg->is_null()) + return true; + /*implementation to be done*/ + return false; + } + + enum Sumfunctype sum_func() const + { + return PERCENTILE_DISC_FUNC; + } + + void clear() + { + //need to implement + } + + const char*func_name() const + { + return "percentile_disc"; + } + + void update_field() {} + const Type_handler *type_handler() const { return &type_handler_double; } + + void fix_length_and_dec() + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + } + + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy(thd, mem_root, this); } + +}; + class Item_window_func : public Item_func_or_sum { @@ -747,6 +810,8 @@ public: case Item_sum::PERCENT_RANK_FUNC: case Item_sum::CUME_DIST_FUNC: case Item_sum::NTILE_FUNC: + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: return true; default: return false; @@ -773,6 +838,8 @@ public: case Item_sum::PERCENT_RANK_FUNC: case Item_sum::CUME_DIST_FUNC: case Item_sum::NTILE_FUNC: + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: return true; default: return false; @@ -796,6 +863,8 @@ public: case Item_sum::DENSE_RANK_FUNC: case Item_sum::PERCENT_RANK_FUNC: case Item_sum::CUME_DIST_FUNC: + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: return true; default: return false; diff --git a/sql/sql_window.cc b/sql/sql_window.cc index d22fff9d486..a4c558b52b6 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -2490,6 +2490,20 @@ void add_special_frame_cursors(THD *thd, Cursor_manager *cursor_manager, cursor_manager->add_cursor(fc); break; } + case Item_sum::PERCENTILE_DISC_FUNC: + { + fc= new Frame_unbounded_preceding(thd, + spec->partition_list, + spec->order_list); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + fc= new Frame_unbounded_following(thd, + spec->partition_list, + spec->order_list); + fc->add_sum_func(item_sum); + cursor_manager->add_cursor(fc); + break; + } default: fc= new Frame_unbounded_preceding( thd, spec->partition_list, spec->order_list); @@ -2514,6 +2528,8 @@ static bool is_computed_with_remove(Item_sum::Sumfunctype sum_func) case Item_sum::NTILE_FUNC: case Item_sum::FIRST_VALUE_FUNC: case Item_sum::LAST_VALUE_FUNC: + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: return false; default: return true; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 57d7ecd48c8..62dbc3860ec 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1737,6 +1737,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); window_func simple_window_func inverse_distribution_function + inverse_distribution_function_def function_call_keyword function_call_nonkeyword function_call_generic @@ -10697,23 +10698,43 @@ simple_window_func: } ; - inverse_distribution_function: - inverse_distribution_function_type '(' expr ')' WITHIN GROUP_SYM - '(' order_by_single_element_list ')' OVER_SYM '(' opt_window_ref opt_window_partition_clause ')' - { - my_yyabort_error((ER_VIEW_SELECT_VARIABLE, MYF(0))); - }; + inverse_distribution_function_def WITHIN GROUP_SYM + '(' + { Select->prepare_add_window_spec(thd); } + order_by_single_element_list ')' OVER_SYM + '(' opt_window_ref opt_window_partition_clause ')' + { + LEX *lex= Lex; + if (Select->add_window_spec(thd, lex->win_ref, + Select->group_list, + Select->order_list, + NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, + thd->lex->win_spec); + if ($$ == NULL) + MYSQL_YYABORT; + if (Select->add_window_func((Item_window_func *) $$)) + MYSQL_YYABORT; + } + ; -inverse_distribution_function_type: - PERCENTILE_CONT_SYM - {} - |PERCENTILE_DISC_SYM - {} - ; +inverse_distribution_function_def: + PERCENTILE_CONT_SYM '(' expr ')' + { + //Not yet started implementing + } + | PERCENTILE_DISC_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_percentile_disc(thd, $3); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; order_by_single_element_list: - ORDER_SYM BY order_ident order_dir + ORDER_SYM BY order_list ; window_name: From 129626f171377c247b71bdda602a554829e4f848 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 26 Jun 2017 01:55:05 +0530 Subject: [PATCH 04/33] Added get_item() to Cached_item_item and get_value to the Cached_item --- sql/item.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/sql/item.h b/sql/item.h index 1f743913d78..68d09577dcc 100644 --- a/sql/item.h +++ b/sql/item.h @@ -5303,6 +5303,8 @@ public: /* Compare the cached value with the source value, without copying */ virtual int cmp_read_only()=0; + virtual void clear()=0; + virtual ~Cached_item(); /*line -e1509 */ }; @@ -5320,6 +5322,14 @@ public: cmp(); item= save; } + Item* get_item() + { + return item; + } + void clear() + { + null_value= false; + } }; class Cached_item_str :public Cached_item_item @@ -5330,6 +5340,10 @@ public: Cached_item_str(THD *thd, Item *arg); bool cmp(void); int cmp_read_only(); + void clear() + { + null_value= false; + } ~Cached_item_str(); // Deallocate String:s }; @@ -5341,6 +5355,12 @@ public: Cached_item_real(Item *item_par) :Cached_item_item(item_par),value(0.0) {} bool cmp(void); int cmp_read_only(); + double get_value(){ return value;} + void clear() + { + value=0.0; + null_value= false; + } }; class Cached_item_int :public Cached_item_item @@ -5350,6 +5370,12 @@ public: Cached_item_int(Item *item_par) :Cached_item_item(item_par),value(0) {} bool cmp(void); int cmp_read_only(); + longlong get_value(){ return value;} + void clear() + { + value=0.0; + null_value= false; + } }; @@ -5360,6 +5386,12 @@ public: Cached_item_decimal(Item *item_par); bool cmp(void); int cmp_read_only(); + my_decimal get_value(){ return value;}; + void clear() + { + null_value= false; + my_decimal_set_zero(&value); + } }; class Cached_item_field :public Cached_item @@ -5377,6 +5409,10 @@ public: } bool cmp(void); int cmp_read_only(); + void clear() + { + null_value= false; + } }; class Item_default_value : public Item_field From 18747a4baa9cc68766eaa7a40e92f8c3d873631a Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 26 Jun 2017 02:15:19 +0530 Subject: [PATCH 05/33] Added value field to Item_sum_percentile_disc Check for single element in the order_list is added --- sql/item_windowfunc.cc | 16 ++++++++++++++++ sql/item_windowfunc.h | 29 ++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index 835a3cbfdae..50a8c5d82cd 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -108,6 +108,17 @@ Item_window_func::fix_fields(THD *thd, Item **ref) my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), window_func()->func_name()); return true; } + + if (only_single_element_order_list()) + { + // need to change the error, the error should say that we have more than one element in the order list + if (window_spec->order_list->elements != 1) + { + my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), window_func()->func_name()); + return true; + } + } + /* TODO: why the last parameter is 'ref' in this call? What if window_func decides to substitute itself for something else and does *ref=.... ? @@ -194,6 +205,11 @@ void Item_sum_dense_rank::setup_window_func(THD *thd, Window_spec *window_spec) clear(); } +void Item_sum_percentile_disc::setup_window_func(THD *thd, Window_spec *window_spec) +{ + setup_percentile_func(thd, window_spec->order_list); +} + bool Item_sum_dense_rank::add() { if (peer_tracker->check_if_next_group() || first_add) diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index ac695729fc5..3a774a2ff57 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -705,7 +705,7 @@ class Item_sum_percentile_disc : public Item_sum_cume_dist { public: Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg) - {} + value(NULL) {} double val_real() { @@ -753,7 +753,23 @@ public: Item *get_copy(THD *thd, MEM_ROOT *mem_root) { return get_item_copy(thd, mem_root, this); } + void setup_window_func(THD *thd, Window_spec *window_spec); + void setup_percentile_func(THD *thd, SQL_I_List *list) + { + value= new_Cached_item(thd, list->first->item[0], FALSE); + } + void cleanup() + { + if (value) + { + delete value; + value= NULL; + } + Item_sum_num::cleanup(); + } +private: + Cached_item *value; }; @@ -871,6 +887,17 @@ public: } } + bool only_single_element_order_list() const + { + switch(window_func()->sum_func()){ + case Item_sum::PERCENTILE_CONT_FUNC: + case Item_sum::PERCENTILE_DISC_FUNC: + return true; + default: + return false; + } + } + /* Computation functions. TODO: consoder merging these with class Group_bound_tracker. From cc046fa92c8724801c3bd725b62a21b849e738f6 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 26 Jun 2017 03:39:25 +0530 Subject: [PATCH 06/33] A basic implementation of the add function is added --- sql/item_windowfunc.h | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 3a774a2ff57..6595012fabf 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -704,8 +704,8 @@ class Item_sum_ntile : public Item_sum_window_with_row_count class Item_sum_percentile_disc : public Item_sum_cume_dist { public: - Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg) - value(NULL) {} + Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg), + value(NULL), val_calculated(FALSE) {} double val_real() { @@ -715,7 +715,18 @@ public: return 0; } null_value= false; - return 0; + return ((Cached_item_int*) value)->get_value(); + } + + longlong val_int() + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + return ((Cached_item_int*) value)->get_value(); } bool add() @@ -724,6 +735,19 @@ public: if (arg->is_null()) return true; /*implementation to be done*/ + Item_sum_cume_dist::add(); + double val1= Item_sum_cume_dist::val_real(); + /* need to check type and return value accordingly*/ + double val2 =arg->val_real_from_decimal(); + + /* use Cached_item to do the comparision using cmp_read_only() */ + + if( val1 >= val2 && !val_calculated) + { + val_calculated= true; + value->cmp(); + return false; + } return false; } @@ -734,7 +758,9 @@ public: void clear() { - //need to implement + val_calculated= false; + value->clear(); + Item_sum_cume_dist::clear(); } const char*func_name() const @@ -770,6 +796,7 @@ public: private: Cached_item *value; + bool val_calculated; }; From 31f1541f1e367f6eb91f948c4e814bb6554e6b78 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 26 Jun 2017 03:53:27 +0530 Subject: [PATCH 07/33] Setting handler to have the return type as that of the element by which we are ordering the partition --- sql/item.h | 2 +- sql/item_windowfunc.cc | 5 +++++ sql/item_windowfunc.h | 12 ++++++++---- sql/sql_window.cc | 4 ++++ 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/sql/item.h b/sql/item.h index 68d09577dcc..0e7582a9ac4 100644 --- a/sql/item.h +++ b/sql/item.h @@ -5386,7 +5386,7 @@ public: Cached_item_decimal(Item *item_par); bool cmp(void); int cmp_read_only(); - my_decimal get_value(){ return value;}; + my_decimal *get_value(){ return &value;}; void clear() { null_value= false; diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index 50a8c5d82cd..5e77132ad38 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -210,6 +210,11 @@ void Item_sum_percentile_disc::setup_window_func(THD *thd, Window_spec *window_s setup_percentile_func(thd, window_spec->order_list); } +void Item_sum_percentile_disc::set_type_handler(Window_spec *window_spec) +{ + type_handler()->get_handler_by_cmp_type(window_spec->order_list->first->item[0]->result_type()); +} + bool Item_sum_dense_rank::add() { if (peer_tracker->check_if_next_group() || first_add) diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 6595012fabf..9087404ea3e 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -701,11 +701,13 @@ class Item_sum_ntile : public Item_sum_window_with_row_count ulong current_row_count_; }; -class Item_sum_percentile_disc : public Item_sum_cume_dist +class Item_sum_percentile_disc : public Item_sum_cume_dist, + public Type_handler_hybrid_field_type { public: Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg), - value(NULL), val_calculated(FALSE) {} + Type_handler_hybrid_field_type(&type_handler_longlong), + value(NULL), val_calculated(FALSE) {} double val_real() { @@ -715,7 +717,7 @@ public: return 0; } null_value= false; - return ((Cached_item_int*) value)->get_value(); + return ((Cached_item_real*) value)->get_value(); } longlong val_int() @@ -769,7 +771,9 @@ public: } void update_field() {} - const Type_handler *type_handler() const { return &type_handler_double; } + void set_type_handler(Window_spec *window_spec); + const Type_handler *type_handler() const + {return Type_handler_hybrid_field_type::type_handler();} void fix_length_and_dec() { diff --git a/sql/sql_window.cc b/sql/sql_window.cc index a4c558b52b6..dc8c66120ff 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -319,6 +319,10 @@ setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, while ((win_func_item= li++)) { win_func_item->update_used_tables(); + if (win_func_item->only_single_element_order_list()) + { + ((Item_sum_percentile_disc*)win_func_item)->set_type_handler(win_func_item->window_spec); + } } DBUG_RETURN(0); From c85552f42b12026f4a1f6c63973601c494b8f8b5 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 27 Jun 2017 02:24:32 +0530 Subject: [PATCH 08/33] Added a class Frame_unbounded_following_set_count_special, which is required to ignore all the null values while calculating the number of rows in the partition --- sql/sql_window.cc | 85 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 8 deletions(-) diff --git a/sql/sql_window.cc b/sql/sql_window.cc index dc8c66120ff..e8925d5f45d 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -908,13 +908,14 @@ private: class Partition_read_cursor : public Table_read_cursor { public: - Partition_read_cursor(THD *thd, SQL_I_List *partition_list) : - bound_tracker(thd, partition_list) {} + Partition_read_cursor(THD *thd, SQL_I_List *partition_list, SQL_I_List *order_list) : + bound_tracker(thd, partition_list), order_tracker(thd, order_list) {} void init(READ_RECORD *info) { Table_read_cursor::init(info); bound_tracker.init(); + order_tracker.init(); end_of_partition= false; } @@ -966,9 +967,39 @@ public: } return 0; } + bool next_func(ha_rows *counter) + { + if (next()) + return true; + if (!check_for_null_row()) + { + (*counter)++; + } + return false; + } + bool fetch_func(ha_rows *counter) + { + if (fetch()) + return true; + if (!check_for_null_row()) + { + (*counter)++; + } + return false; + } + bool check_for_null_row() + { + if (!end_of_partition) + { + if (order_tracker.compare_with_cache_for_null_values()) + return true; + } + return false; + } private: Group_bound_tracker bound_tracker; + Group_bound_tracker order_tracker; bool end_of_partition; }; @@ -1200,7 +1231,7 @@ public: SQL_I_List *partition_list, SQL_I_List *order_list, bool is_preceding_arg, Item *n_val_arg) : - cursor(thd, partition_list), n_val(n_val_arg), item_add(NULL), + cursor(thd, partition_list, NULL), n_val(n_val_arg), item_add(NULL), is_preceding(is_preceding_arg) { DBUG_ASSERT(order_list->elements == 1); @@ -1339,7 +1370,7 @@ public: SQL_I_List *partition_list, SQL_I_List *order_list, bool is_preceding_arg, Item *n_val_arg) : - cursor(thd, partition_list), n_val(n_val_arg), item_add(NULL), + cursor(thd, partition_list, NULL), n_val(n_val_arg), item_add(NULL), is_preceding(is_preceding_arg), added_values(false) { DBUG_ASSERT(order_list->elements == 1); @@ -1469,7 +1500,7 @@ public: Frame_range_current_row_bottom(THD *thd, SQL_I_List *partition_list, SQL_I_List *order_list) : - cursor(thd, partition_list), peer_tracker(thd, order_list) + cursor(thd, partition_list, NULL), peer_tracker(thd, order_list) { } @@ -1684,7 +1715,7 @@ public: Frame_unbounded_following(THD *thd, SQL_I_List *partition_list, SQL_I_List *order_list) : - cursor(thd, partition_list) {} + cursor(thd, partition_list, order_list){} void init(READ_RECORD *info) { @@ -1756,6 +1787,35 @@ public: } }; +class Frame_unbounded_following_set_count_special : public Frame_unbounded_following_set_count +{ +public: + Frame_unbounded_following_set_count_special( + THD *thd, + SQL_I_List *partition_list, SQL_I_List *order_list) : + Frame_unbounded_following_set_count(thd, partition_list, order_list) + {} + + void next_partition(ha_rows rownum) + { + ha_rows num_rows_in_partition= 0; + if (cursor.fetch_func(&num_rows_in_partition)) + return; + + /* Walk to the end of the partition, find how many rows there are. */ + while (!cursor.next_func(&num_rows_in_partition)); + + List_iterator_fast it(sum_functions); + Item_sum* item; + while ((item= it++)) + { + Item_sum_window_with_row_count* item_with_row_count = + static_cast(item); + item_with_row_count->set_row_count(num_rows_in_partition); + } + } +}; + ///////////////////////////////////////////////////////////////////////////// // ROWS-type frame bounds ///////////////////////////////////////////////////////////////////////////// @@ -1953,7 +2013,7 @@ public: SQL_I_List *order_list, bool is_top_bound_arg, ha_rows n_rows_arg) : is_top_bound(is_top_bound_arg), n_rows(n_rows_arg), - cursor(thd, partition_list) + cursor(thd, partition_list, NULL) { } @@ -2564,9 +2624,18 @@ void get_window_functions_required_cursors( */ if (item_win_func->requires_partition_size()) { - fc= new Frame_unbounded_following_set_count(thd, + if (item_win_func->only_single_element_order_list()) + { + fc= new Frame_unbounded_following_set_count_special(thd, item_win_func->window_spec->partition_list, item_win_func->window_spec->order_list); + } + else + { + fc= new Frame_unbounded_following_set_count(thd, + item_win_func->window_spec->partition_list, + item_win_func->window_spec->order_list); + } fc->add_sum_func(sum_func); cursor_manager->add_cursor(fc); } From ba9fbc6a83f75bce99ff5e2295a6929203297471 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 27 Jun 2017 02:50:18 +0530 Subject: [PATCH 09/33] implementation of add() function added to the Item_sum_percentile_disc class --- sql/item_windowfunc.h | 64 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 9087404ea3e..3178bdeb54b 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -24,6 +24,7 @@ class Window_spec; int test_if_group_changed(List &list); + /* A wrapper around test_if_group_changed */ class Group_bound_tracker { @@ -31,10 +32,13 @@ public: Group_bound_tracker(THD *thd, SQL_I_List *list) { - for (ORDER *curr = list->first; curr; curr=curr->next) + if (list) { - Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); - group_fields.push_back(tmp); + for (ORDER *curr = list->first; curr; curr=curr->next) + { + Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); + group_fields.push_back(tmp); + } } } @@ -86,6 +90,19 @@ public: return 0; } + bool compare_with_cache_for_null_values() + { + List_iterator li(group_fields); + Cached_item *ptr; + while ((ptr= li++)) + { + ptr->cmp(); + if (ptr->null_value) + return true; + } + return false; + } + private: List group_fields; /* @@ -100,6 +117,7 @@ private: bool first_check; }; + /* ROW_NUMBER() OVER (...) @@ -707,7 +725,7 @@ class Item_sum_percentile_disc : public Item_sum_cume_dist, public: Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg), Type_handler_hybrid_field_type(&type_handler_longlong), - value(NULL), val_calculated(FALSE) {} + value(NULL), result_value(NULL), val_calculated(FALSE) {} double val_real() { @@ -717,7 +735,7 @@ public: return 0; } null_value= false; - return ((Cached_item_real*) value)->get_value(); + return ((Cached_item_real*) result_value)->get_value(); } longlong val_int() @@ -728,7 +746,18 @@ public: return 0; } null_value= false; - return ((Cached_item_int*) value)->get_value(); + return ((Cached_item_int*) result_value)->get_value(); + } + + my_decimal* val_decimal(my_decimal* dec) + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + return ((Cached_item_decimal*) result_value)->get_value(); } bool add() @@ -736,7 +765,18 @@ public: Item *arg = get_arg(0); if (arg->is_null()) return true; - /*implementation to be done*/ + /* + need to ensure that the Item arg is constant across the entire partition + and its value ranges between [0,1] + */ + value->cmp(); + + /* for the null values of the row, we dont count take those rows in account for calculating + the CUME_DIST */ + + if(value->null_value) + return false; + Item_sum_cume_dist::add(); double val1= Item_sum_cume_dist::val_real(); /* need to check type and return value accordingly*/ @@ -747,7 +787,7 @@ public: if( val1 >= val2 && !val_calculated) { val_calculated= true; - value->cmp(); + result_value->cmp(); return false; } return false; @@ -762,6 +802,7 @@ public: { val_calculated= false; value->clear(); + result_value->clear(); Item_sum_cume_dist::clear(); } @@ -787,6 +828,7 @@ public: void setup_percentile_func(THD *thd, SQL_I_List *list) { value= new_Cached_item(thd, list->first->item[0], FALSE); + result_value= new_Cached_item(thd, list->first->item[0], FALSE); } void cleanup() { @@ -795,11 +837,17 @@ public: delete value; value= NULL; } + if(result_value) + { + delete result_value; + result_value= NULL; + } Item_sum_num::cleanup(); } private: Cached_item *value; + Cached_item *result_value; bool val_calculated; }; From d2214da4d0d5d8509dfe928733b5bb866ba59195 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 27 Jun 2017 12:44:00 +0530 Subject: [PATCH 10/33] Test case added for the percentile disc function --- mysql-test/t/percentile.test | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 mysql-test/t/percentile.test diff --git a/mysql-test/t/percentile.test b/mysql-test/t/percentile.test new file mode 100644 index 00000000000..0958fc05e7d --- /dev/null +++ b/mysql-test/t/percentile.test @@ -0,0 +1,41 @@ +CREATE TABLE student (name CHAR(10), test double, score TINYINT); + +INSERT INTO student VALUES +('Chun', 0, null), ('Chun', 0, 4), +('Esben', 1, null), ('Esben', 1, null), +('Kaolin', 0.5, 56), ('Kaolin', 0.5, 88), +('Tatiana', 0.8, 2), ('Tatiana', 0.8, 1); + + + +select name, percentile_disc(0.6) within group(order by score) over (partition by name) from student; +select name, percentile_disc(test) within group(order by score) over (partition by name) from student; +select name, percentile_disc(0.4) within group(order by score) over (partition by name) from student; + + +#select name, percentile_cont(null) within group(order by score) over (partition by name) from student; +#select name, cume_dist() over (partition by name order by score) from student; + + +#normal parsing +#select percentile_cont(0.5) within group(order by score) over w1 from student +#window w1 AS (partition by name); + +# no partition clause +#select percentile_cont(0.5) within group(order by score) over () from student; + + +# only one sort allowed +#select percentile_cont(0.5) within group(order by score) over (partition by name); + +#parameter value should be in the range of 0 to 1 +#select percentile_cont(1.5) within group(order by score) over (partition by name); + + +# +#select rank() over (partition by name order by score ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) from student; + + + +drop table student; + From 01d2b6e9d94b26ae0df8e900958f91f0c23f203e Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 6 Jul 2017 01:29:49 +0530 Subject: [PATCH 11/33] Implemented the implementation of percentile functions using Item_cache instead of Cache_Item --- sql/item.h | 5 +-- sql/item_windowfunc.cc | 14 +++---- sql/item_windowfunc.h | 82 +++++++++++++++++--------------------- sql/sql_window.cc | 90 ++++++++++++++++++------------------------ 4 files changed, 83 insertions(+), 108 deletions(-) diff --git a/sql/item.h b/sql/item.h index 0e7582a9ac4..067deba8b58 100644 --- a/sql/item.h +++ b/sql/item.h @@ -5322,10 +5322,7 @@ public: cmp(); item= save; } - Item* get_item() - { - return item; - } + void clear() { null_value= false; diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index 5e77132ad38..8a35cd34e82 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -111,7 +111,7 @@ Item_window_func::fix_fields(THD *thd, Item **ref) if (only_single_element_order_list()) { - // need to change the error, the error should say that we have more than one element in the order list + //TODO (varun): need to change the error, the error should say that we have more than one element in the order list if (window_spec->order_list->elements != 1) { my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), window_func()->func_name()); @@ -207,12 +207,12 @@ void Item_sum_dense_rank::setup_window_func(THD *thd, Window_spec *window_spec) void Item_sum_percentile_disc::setup_window_func(THD *thd, Window_spec *window_spec) { - setup_percentile_func(thd, window_spec->order_list); -} - -void Item_sum_percentile_disc::set_type_handler(Window_spec *window_spec) -{ - type_handler()->get_handler_by_cmp_type(window_spec->order_list->first->item[0]->result_type()); + order_item= window_spec->order_list->first->item[0]; + set_handler_by_cmp_type(order_item->result_type()); + if (!(value= order_item->get_cache(thd))) + return; + value->setup(thd, order_item); + value->store(order_item); } bool Item_sum_dense_rank::add() diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 3178bdeb54b..7c6c3c87025 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -42,6 +42,12 @@ public: } } + Group_bound_tracker(THD *thd, Item *item) + { + Cached_item *tmp= new_Cached_item(thd, item, FALSE); + group_fields.push_back(tmp); + } + void init() { first_check= true; @@ -117,7 +123,6 @@ private: bool first_check; }; - /* ROW_NUMBER() OVER (...) @@ -725,7 +730,7 @@ class Item_sum_percentile_disc : public Item_sum_cume_dist, public: Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg), Type_handler_hybrid_field_type(&type_handler_longlong), - value(NULL), result_value(NULL), val_calculated(FALSE) {} + value(NULL), val_calculated(FALSE), first_call(TRUE),prev_value(0), order_item(NULL){} double val_real() { @@ -735,7 +740,7 @@ public: return 0; } null_value= false; - return ((Cached_item_real*) result_value)->get_value(); + return value->val_real(); } longlong val_int() @@ -746,7 +751,7 @@ public: return 0; } null_value= false; - return ((Cached_item_int*) result_value)->get_value(); + return value->val_int(); } my_decimal* val_decimal(my_decimal* dec) @@ -757,7 +762,7 @@ public: return 0; } null_value= false; - return ((Cached_item_decimal*) result_value)->get_value(); + return value->val_decimal(dec); } bool add() @@ -765,31 +770,32 @@ public: Item *arg = get_arg(0); if (arg->is_null()) return true; - /* - need to ensure that the Item arg is constant across the entire partition - and its value ranges between [0,1] - */ - value->cmp(); - /* for the null values of the row, we dont count take those rows in account for calculating - the CUME_DIST */ + if (first_call) + { + prev_value= arg->val_real(); + first_call= false; + } - if(value->null_value) + if(prev_value != arg->val_real() || prev_value >1 || prev_value < 0) + { + // TODO(varun) need to add an error here , check the MDEV-12985 for the information + return true; + } + + if (val_calculated) return false; + value->store(order_item); + value->cache_value(); + if (value->null_value) + return false; + Item_sum_cume_dist::add(); - double val1= Item_sum_cume_dist::val_real(); - /* need to check type and return value accordingly*/ - double val2 =arg->val_real_from_decimal(); + double val= Item_sum_cume_dist::val_real(); - /* use Cached_item to do the comparision using cmp_read_only() */ - - if( val1 >= val2 && !val_calculated) - { + if(val >= prev_value && !val_calculated) val_calculated= true; - result_value->cmp(); - return false; - } return false; } @@ -801,8 +807,8 @@ public: void clear() { val_calculated= false; + first_call= true; value->clear(); - result_value->clear(); Item_sum_cume_dist::clear(); } @@ -825,33 +831,19 @@ public: Item *get_copy(THD *thd, MEM_ROOT *mem_root) { return get_item_copy(thd, mem_root, this); } void setup_window_func(THD *thd, Window_spec *window_spec); - void setup_percentile_func(THD *thd, SQL_I_List *list) - { - value= new_Cached_item(thd, list->first->item[0], FALSE); - result_value= new_Cached_item(thd, list->first->item[0], FALSE); - } - void cleanup() - { - if (value) - { - delete value; - value= NULL; - } - if(result_value) - { - delete result_value; - result_value= NULL; - } - Item_sum_num::cleanup(); - } + void setup_hybrid(THD *thd, Item *item); private: - Cached_item *value; - Cached_item *result_value; + Item_cache *value; bool val_calculated; + bool first_call; + double prev_value; + Item *order_item; }; + + class Item_window_func : public Item_func_or_sum { /* Window function parameters as we've got them from the parser */ diff --git a/sql/sql_window.cc b/sql/sql_window.cc index e8925d5f45d..bb7742b029a 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -319,10 +319,6 @@ setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, while ((win_func_item= li++)) { win_func_item->update_used_tables(); - if (win_func_item->only_single_element_order_list()) - { - ((Item_sum_percentile_disc*)win_func_item)->set_type_handler(win_func_item->window_spec); - } } DBUG_RETURN(0); @@ -908,14 +904,13 @@ private: class Partition_read_cursor : public Table_read_cursor { public: - Partition_read_cursor(THD *thd, SQL_I_List *partition_list, SQL_I_List *order_list) : - bound_tracker(thd, partition_list), order_tracker(thd, order_list) {} + Partition_read_cursor(THD *thd, SQL_I_List *partition_list) : + bound_tracker(thd, partition_list){} void init(READ_RECORD *info) { Table_read_cursor::init(info); bound_tracker.init(); - order_tracker.init(); end_of_partition= false; } @@ -967,42 +962,18 @@ public: } return 0; } - bool next_func(ha_rows *counter) + bool check_for_end_of_partition() { - if (next()) - return true; - if (!check_for_null_row()) - { - (*counter)++; - } - return false; - } - bool fetch_func(ha_rows *counter) - { - if (fetch()) - return true; - if (!check_for_null_row()) - { - (*counter)++; - } - return false; - } - bool check_for_null_row() - { - if (!end_of_partition) - { - if (order_tracker.compare_with_cache_for_null_values()) - return true; - } - return false; + return end_of_partition; } private: Group_bound_tracker bound_tracker; - Group_bound_tracker order_tracker; bool end_of_partition; }; + + ///////////////////////////////////////////////////////////////////////////// /* @@ -1178,7 +1149,7 @@ public: Frame_cursor *cursor; while ((cursor= iter++)) cursor->pre_next_row(); - + iter.rewind(); while ((cursor= iter++)) cursor->next_row(); @@ -1231,7 +1202,7 @@ public: SQL_I_List *partition_list, SQL_I_List *order_list, bool is_preceding_arg, Item *n_val_arg) : - cursor(thd, partition_list, NULL), n_val(n_val_arg), item_add(NULL), + cursor(thd, partition_list), n_val(n_val_arg), item_add(NULL), is_preceding(is_preceding_arg) { DBUG_ASSERT(order_list->elements == 1); @@ -1370,7 +1341,7 @@ public: SQL_I_List *partition_list, SQL_I_List *order_list, bool is_preceding_arg, Item *n_val_arg) : - cursor(thd, partition_list, NULL), n_val(n_val_arg), item_add(NULL), + cursor(thd, partition_list), n_val(n_val_arg), item_add(NULL), is_preceding(is_preceding_arg), added_values(false) { DBUG_ASSERT(order_list->elements == 1); @@ -1500,7 +1471,7 @@ public: Frame_range_current_row_bottom(THD *thd, SQL_I_List *partition_list, SQL_I_List *order_list) : - cursor(thd, partition_list, NULL), peer_tracker(thd, order_list) + cursor(thd, partition_list), peer_tracker(thd, order_list) { } @@ -1715,7 +1686,7 @@ public: Frame_unbounded_following(THD *thd, SQL_I_List *partition_list, SQL_I_List *order_list) : - cursor(thd, partition_list, order_list){} + cursor(thd, partition_list){} void init(READ_RECORD *info) { @@ -1758,7 +1729,7 @@ public: Frame_unbounded_following_set_count( THD *thd, SQL_I_List *partition_list, SQL_I_List *order_list) : - Frame_unbounded_following(thd, partition_list, order_list) {} + Frame_unbounded_following(thd, partition_list, order_list){} void next_partition(ha_rows rownum) { @@ -1769,7 +1740,9 @@ public: /* Walk to the end of the partition, find how many rows there are. */ while (!cursor.next()) + { num_rows_in_partition++; + } List_iterator_fast it(sum_functions); Item_sum* item; @@ -1787,23 +1760,29 @@ public: } }; -class Frame_unbounded_following_set_count_special : public Frame_unbounded_following_set_count +class Frame_unbounded_following_set_count_special: public Frame_unbounded_following_set_count { -public: - Frame_unbounded_following_set_count_special( - THD *thd, - SQL_I_List *partition_list, SQL_I_List *order_list) : - Frame_unbounded_following_set_count(thd, partition_list, order_list) - {} +public: + Frame_unbounded_following_set_count_special(THD *thd, + SQL_I_List *partition_list, + SQL_I_List *order_list, Item* arg) : + Frame_unbounded_following_set_count(thd,partition_list, order_list) + { + order_item= order_list->first->item[0]; + } void next_partition(ha_rows rownum) { ha_rows num_rows_in_partition= 0; - if (cursor.fetch_func(&num_rows_in_partition)) + if (cursor.fetch()) return; /* Walk to the end of the partition, find how many rows there are. */ - while (!cursor.next_func(&num_rows_in_partition)); + do + { + if (!order_item->is_null()) + num_rows_in_partition++; + }while (!cursor.next()); List_iterator_fast it(sum_functions); Item_sum* item; @@ -1814,6 +1793,13 @@ public: item_with_row_count->set_row_count(num_rows_in_partition); } } + + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } +private: + Item* order_item; }; ///////////////////////////////////////////////////////////////////////////// @@ -2013,7 +1999,7 @@ public: SQL_I_List *order_list, bool is_top_bound_arg, ha_rows n_rows_arg) : is_top_bound(is_top_bound_arg), n_rows(n_rows_arg), - cursor(thd, partition_list, NULL) + cursor(thd, partition_list) { } @@ -2628,7 +2614,7 @@ void get_window_functions_required_cursors( { fc= new Frame_unbounded_following_set_count_special(thd, item_win_func->window_spec->partition_list, - item_win_func->window_spec->order_list); + item_win_func->window_spec->order_list, item_win_func->window_func()->get_arg(0)); } else { From 275ce39f05c364ca25bb7ecaa6a0bb02df83bdc4 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Fri, 7 Jul 2017 17:37:06 +0530 Subject: [PATCH 12/33] Percentile class implemented, most of the functions have the same functionalite as the percentile cont class --- sql/item_windowfunc.cc | 21 ++++++ sql/item_windowfunc.h | 161 ++++++++++++++++++++++++++++++++++++++++- sql/sql_window.cc | 1 + sql/sql_yacc.yy | 4 +- 4 files changed, 184 insertions(+), 3 deletions(-) diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index 8a35cd34e82..051dc203ab9 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -117,6 +117,12 @@ Item_window_func::fix_fields(THD *thd, Item **ref) my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), window_func()->func_name()); return true; } + /*switch(window_spec->order_list->firt->item[0]->type()) + { + case INT_TYPE: + default: + break; + }*/ } /* @@ -215,6 +221,21 @@ void Item_sum_percentile_disc::setup_window_func(THD *thd, Window_spec *window_s value->store(order_item); } +void Item_sum_percentile_cont::setup_window_func(THD *thd, Window_spec *window_spec) +{ + order_item= window_spec->order_list->first->item[0]; + //set_handler_by_cmp_type(order_item->result_type()); + if (!(ceil_value= order_item->get_cache(thd))) + return; + ceil_value->setup(thd, order_item); + ceil_value->store(order_item); + + if (!(floor_value= order_item->get_cache(thd))) + return; + floor_value->setup(thd, order_item); + floor_value->store(order_item); +} + bool Item_sum_dense_rank::add() { if (peer_tracker->check_if_next_group() || first_add) diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 7c6c3c87025..9dfff36f952 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -646,7 +646,7 @@ class Item_sum_cume_dist: public Item_sum_window_with_row_count ulonglong get_row_number() { - return current_row_count_; + return current_row_count_ ; } private: @@ -774,10 +774,14 @@ public: if (first_call) { prev_value= arg->val_real(); + if (prev_value >1 || prev_value < 0) + { + return true; + } first_call= false; } - if(prev_value != arg->val_real() || prev_value >1 || prev_value < 0) + if(prev_value != arg->val_real()) { // TODO(varun) need to add an error here , check the MDEV-12985 for the information return true; @@ -841,6 +845,159 @@ private: Item *order_item; }; +class Item_sum_percentile_cont : public Item_sum_cume_dist, + public Type_handler_hybrid_field_type +{ +public: + Item_sum_percentile_cont(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg), + Type_handler_hybrid_field_type(&type_handler_double), + floor_value(NULL), ceil_value(NULL), first_call(TRUE),prev_value(0), + ceil_val_calculated(FALSE), floor_val_calculated(FALSE), order_item(NULL){} + + double val_real() + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + double val= 1 + prev_value * (get_row_count()-1); + + /* + Applying the formula to get the value + If (CRN = FRN = RN) then the result is (value of expression from row at RN) + Otherwise the result is + (CRN - RN) * (value of expression for row at FRN) + + (RN - FRN) * (value of expression for row at CRN) + */ + + if(ceil(val) == floor(val)) + return floor_value->val_real(); + + double ret_val= ((val - floor(val)) * ceil_value->val_real()) + + ((ceil(val) - val) * floor_value->val_real()); + + return ret_val; + + } + longlong val_int() + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + return 0; + } + + my_decimal* val_decimal(my_decimal* dec) + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + return 0; + } + null_value= false; + return ceil_value->val_decimal(dec); + } + + bool add() + { + Item *arg = get_arg(0); + if (arg->is_null()) + return true; + + if (first_call) + { + first_call= false; + prev_value= arg->val_real(); + if (prev_value >1 || prev_value < 0) + { + // TODO(varun) need to add an error here , check the MDEV-12985 for the information + return true; + } + } + + if (prev_value != arg->val_real()) + { + // TODO(varun) need to add an error here , check the MDEV-12985 for the information + return true; + } + + if (!floor_val_calculated) + { + floor_value->store(order_item); + floor_value->cache_value(); + if (floor_value->null_value) + return false; + } + if (floor_val_calculated && !ceil_val_calculated) + { + ceil_value->store(order_item); + ceil_value->cache_value(); + if (ceil_value->null_value) + return false; + } + + Item_sum_cume_dist::add(); + double val= 1 + prev_value * (get_row_count()-1); + + if (!floor_val_calculated && get_row_number() == floor(val)) + floor_val_calculated= true; + + if (!ceil_val_calculated && get_row_number() == ceil(val)) + ceil_val_calculated= true; + return false; + } + + enum Sumfunctype sum_func() const + { + return PERCENTILE_DISC_FUNC; + } + + void clear() + { + first_call= true; + floor_value->clear(); + ceil_value->clear(); + floor_val_calculated= false; + ceil_val_calculated= false; + Item_sum_cume_dist::clear(); + } + + const char*func_name() const + { + return "percentile_cont"; + } + void update_field() {} + void set_type_handler(Window_spec *window_spec); + const Type_handler *type_handler() const + {return Type_handler_hybrid_field_type::type_handler();} + + void fix_length_and_dec() + { + decimals = 10; // TODO-cvicentiu find out how many decimals the standard + // requires. + } + + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy(thd, mem_root, this); } + void setup_window_func(THD *thd, Window_spec *window_spec); + void setup_hybrid(THD *thd, Item *item); + +private: + Item_cache *floor_value; + Item_cache *ceil_value; + bool first_call; + double prev_value; + bool ceil_val_calculated; + bool floor_val_calculated; + Item *order_item; +}; + diff --git a/sql/sql_window.cc b/sql/sql_window.cc index bb7742b029a..d1628c46e2a 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -2540,6 +2540,7 @@ void add_special_frame_cursors(THD *thd, Cursor_manager *cursor_manager, cursor_manager->add_cursor(fc); break; } + case Item_sum::PERCENTILE_CONT_FUNC: case Item_sum::PERCENTILE_DISC_FUNC: { fc= new Frame_unbounded_preceding(thd, diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 62dbc3860ec..b5bc70639b0 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -10723,7 +10723,9 @@ inverse_distribution_function: inverse_distribution_function_def: PERCENTILE_CONT_SYM '(' expr ')' { - //Not yet started implementing + $$= new (thd->mem_root) Item_sum_percentile_cont(thd, $3); + if ($$ == NULL) + MYSQL_YYABORT; } | PERCENTILE_DISC_SYM '(' expr ')' { From 3393005e958de87909ac5fa7f8e6ca92f8cc8b21 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 10 Jul 2017 01:12:56 +0530 Subject: [PATCH 13/33] Ensured that the the element in the order by clause should have a numerical time, if not throw an error --- sql/item_windowfunc.cc | 18 ++++++++++++++++++ sql/sql_window.cc | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index 051dc203ab9..d5e13054755 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -185,6 +185,21 @@ void Item_window_func::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, window_func()->setup_caches(thd); } +bool Item_window_func::check_order_list() +{ + if (only_single_element_order_list()) + { + Item_result rtype= window_spec->order_list->first->item[0]->result_type(); + if (rtype != REAL_RESULT && rtype != INT_RESULT && + rtype != DECIMAL_RESULT) + { + // TODO(varun) please change the error name + my_error(ER_WRONG_TYPE_FOR_RANGE_FRAME, MYF(0),"percentile functions"); + return TRUE; + } + } + return FALSE; +} /* This must be called before attempting to compute the window function values. @@ -224,6 +239,9 @@ void Item_sum_percentile_disc::setup_window_func(THD *thd, Window_spec *window_s void Item_sum_percentile_cont::setup_window_func(THD *thd, Window_spec *window_spec) { order_item= window_spec->order_list->first->item[0]; + /* TODO(varun): need to discuss and finalise what type should we + return for percentile cont functions + */ //set_handler_by_cmp_type(order_item->result_type()); if (!(ceil_value= order_item->get_cache(thd))) return; diff --git a/sql/sql_window.cc b/sql/sql_window.cc index d1628c46e2a..08e93dfcce2 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -321,6 +321,12 @@ setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, win_func_item->update_used_tables(); } + li.rewind(); + while((win_func_item= li++)) + { + if (win_func_item->check_order_list()) + DBUG_RETURN(1); + } DBUG_RETURN(0); } From eb2187a24f43fa22d0eb12ed9c9bae13faf16bd5 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 17 Jul 2017 13:19:20 +0530 Subject: [PATCH 14/33] Val_str function added for the percentile_disc function, as it can have result type as STRING_RESULT --- sql/item_windowfunc.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 9dfff36f952..c5c2c0aa004 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -765,6 +765,17 @@ public: return value->val_decimal(dec); } + String* val_str(String *str) + { + if (get_row_count() == 0 || get_arg(0)->is_null()) + { + null_value= true; + return 0; + } + null_value= false; + return value->val_str(str); + } + bool add() { Item *arg = get_arg(0); From 64a2a3029577e1539bcf029c73d616b40b657b1c Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 17 Jul 2017 13:21:23 +0530 Subject: [PATCH 15/33] Error codes added for the percentile functions, the errors are -ER_NOT_SINGLE_ELEMENT_ORDER_LIST -ER_WRONG_TYPE_FOR_PERCENTILE_CONT -ER_ARGUMENT_NOT_CONSTANT -ER_ARGUMENT_OUT_OF_RANGE --- sql/share/errmsg-utf8.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index e414a674af6..1011d540e51 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7786,3 +7786,11 @@ ER_COMPRESSED_COLUMN_USED_AS_KEY eng "Compressed column '%-.192s' can't be used in key specification" ER_UNKNOWN_COMPRESSION_METHOD eng "Unknown compression method: %s" +ER_NOT_SINGLE_ELEMENT_ORDER_LIST + eng "Incorrect number of elements in the order list for '%s'" +ER_WRONG_TYPE_FOR_PERCENTILE_CONT + eng "Numeric datatype is required for Percentile_CONT function" +ER_ARGUMENT_NOT_CONSTANT + eng "Argument to the percentile functions is not a constant" +ER_ARGUMENT_OUT_OF_RANGE + eng "Argument to the percentile functions does not belong to the range [0,1]" \ No newline at end of file From 330577988f23bcee35fb3956d35d9c51f410ea46 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 17 Jul 2017 13:25:08 +0530 Subject: [PATCH 16/33] has_error field added to the item_sum class. This field ensures that query is terminated if we get any error during the add function call. This is currently used only for the percentile functions --- sql/item_sum.cc | 3 ++- sql/item_sum.h | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index b047dc4ea4d..4f9cdfe20e8 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -441,6 +441,7 @@ Item_sum::Item_sum(THD *thd, List &list): Item_func_or_sum(thd, list) mark_as_sum_func(); init_aggregator(); list.empty(); // Fields are used + has_error= FALSE; } @@ -452,7 +453,7 @@ Item_sum::Item_sum(THD *thd, Item_sum *item): Item_func_or_sum(thd, item), aggr_sel(item->aggr_sel), nest_level(item->nest_level), aggr_level(item->aggr_level), - quick_group(item->quick_group), + quick_group(item->quick_group), has_error(FALSE), orig_args(NULL) { if (arg_count <= 2) diff --git a/sql/item_sum.h b/sql/item_sum.h index 467a77c8983..a3bcf397db7 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -367,6 +367,7 @@ public: int8 max_arg_level; /* max level of unbound column references */ int8 max_sum_func_level;/* max level of aggregation for embedded functions */ bool quick_group; /* If incremental update of fields */ + bool has_error; /* This list is used by the check for mixing non aggregated fields and sum functions in the ONLY_FULL_GROUP_BY_MODE. We save all outer fields @@ -388,19 +389,19 @@ protected: public: void mark_as_sum_func(); - Item_sum(THD *thd): Item_func_or_sum(thd), quick_group(1) + Item_sum(THD *thd): Item_func_or_sum(thd), quick_group(1), has_error(0) { mark_as_sum_func(); init_aggregator(); } Item_sum(THD *thd, Item *a): Item_func_or_sum(thd, a), quick_group(1), - orig_args(tmp_orig_args) + has_error(0), orig_args(tmp_orig_args) { mark_as_sum_func(); init_aggregator(); } Item_sum(THD *thd, Item *a, Item *b): Item_func_or_sum(thd, a, b), - quick_group(1), orig_args(tmp_orig_args) + quick_group(1), has_error(0), orig_args(tmp_orig_args) { mark_as_sum_func(); init_aggregator(); From 96565ac3118c936e81969cb68e3d1c6fa34c4114 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 17 Jul 2017 15:02:22 +0530 Subject: [PATCH 17/33] Added the function setting_handler_for_percentile_function() for the percentile_disc function that would set the type of the result field for percentile_disc. Percentile_cont would habe double precision result type --- sql/item_windowfunc.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index c5c2c0aa004..a83147fc7a1 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -1137,6 +1137,21 @@ public: } } + void setting_handler_for_percentile_functions(Item_result rtype) const + { + switch(window_func()->sum_func()){ + case Item_sum::PERCENTILE_DISC_FUNC: + ((Item_sum_percentile_disc* ) window_func())->set_handler_by_cmp_type(rtype); + break; + default: + return; + } + } + + bool check_result_type_of_order_item(); + + + /* Computation functions. TODO: consoder merging these with class Group_bound_tracker. From 947ce922c950323a91a187b53d3315b6e6a582f0 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 17 Jul 2017 15:06:42 +0530 Subject: [PATCH 18/33] Added the error ER_NOT_SINGLE_ELEMENT_ORDER_LIST for th percentile functions, these ensure that for the percentile function we have the order list with exactly one element --- sql/item_windowfunc.cc | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index d5e13054755..c860c5ead68 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -111,18 +111,11 @@ Item_window_func::fix_fields(THD *thd, Item **ref) if (only_single_element_order_list()) { - //TODO (varun): need to change the error, the error should say that we have more than one element in the order list if (window_spec->order_list->elements != 1) { - my_error(ER_NO_ORDER_LIST_IN_WINDOW_SPEC, MYF(0), window_func()->func_name()); + my_error(ER_NOT_SINGLE_ELEMENT_ORDER_LIST, MYF(0), window_func()->func_name()); return true; } - /*switch(window_spec->order_list->firt->item[0]->type()) - { - case INT_TYPE: - default: - break; - }*/ } /* From 6511069e7fcf3c9035469f9d2996706fbde5d6a0 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 17 Jul 2017 15:08:08 +0530 Subject: [PATCH 19/33] Added the error ER_WRONG_TYPE_FOR_PERCENTILE_CONT, which ensures that the result type for percentile_cont is always numerical --- sql/item_windowfunc.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index c860c5ead68..4022c3ddf70 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -178,25 +178,24 @@ void Item_window_func::split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array, window_func()->setup_caches(thd); } -bool Item_window_func::check_order_list() +bool Item_window_func::check_result_type_of_order_item() { if (only_single_element_order_list()) { Item_result rtype= window_spec->order_list->first->item[0]->result_type(); if (rtype != REAL_RESULT && rtype != INT_RESULT && - rtype != DECIMAL_RESULT) + rtype != DECIMAL_RESULT && window_func()->sum_func() == Item_sum::PERCENTILE_CONT_FUNC) { - // TODO(varun) please change the error name - my_error(ER_WRONG_TYPE_FOR_RANGE_FRAME, MYF(0),"percentile functions"); + my_error(ER_WRONG_TYPE_FOR_PERCENTILE_CONT, MYF(0)); return TRUE; } + setting_handler_for_percentile_functions(rtype); } return FALSE; } /* This must be called before attempting to compute the window function values. - @detail If we attempt to do it in fix_fields(), partition_fields will refer to the original window function arguments. @@ -222,7 +221,6 @@ void Item_sum_dense_rank::setup_window_func(THD *thd, Window_spec *window_spec) void Item_sum_percentile_disc::setup_window_func(THD *thd, Window_spec *window_spec) { order_item= window_spec->order_list->first->item[0]; - set_handler_by_cmp_type(order_item->result_type()); if (!(value= order_item->get_cache(thd))) return; value->setup(thd, order_item); @@ -235,7 +233,6 @@ void Item_sum_percentile_cont::setup_window_func(THD *thd, Window_spec *window_s /* TODO(varun): need to discuss and finalise what type should we return for percentile cont functions */ - //set_handler_by_cmp_type(order_item->result_type()); if (!(ceil_value= order_item->get_cache(thd))) return; ceil_value->setup(thd, order_item); From 03ed22326a85c50a67d4c43e9392de8c135cf649 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 17 Jul 2017 15:10:19 +0530 Subject: [PATCH 20/33] Added the error 1)ER_ARGUMENT_OUT_OF_RANGE: This error is thrown if the argument of the percentile function is not in the range [0,1] 2)ER_ARGUMENT_NOT_CONSTANT: This error is thrown if the argument of the percnetile function is not constant in the entire partition of the window function --- sql/item_windowfunc.h | 52 +++++++++++++++++-------------------------- sql/sql_window.cc | 12 ++++++++-- 2 files changed, 30 insertions(+), 34 deletions(-) diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index a83147fc7a1..995010d2df2 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -730,7 +730,8 @@ class Item_sum_percentile_disc : public Item_sum_cume_dist, public: Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd, arg), Type_handler_hybrid_field_type(&type_handler_longlong), - value(NULL), val_calculated(FALSE), first_call(TRUE),prev_value(0), order_item(NULL){} + value(NULL), val_calculated(FALSE), first_call(TRUE), + prev_value(0), order_item(NULL){} double val_real() { @@ -780,21 +781,26 @@ public: { Item *arg = get_arg(0); if (arg->is_null()) - return true; + return false; if (first_call) { prev_value= arg->val_real(); if (prev_value >1 || prev_value < 0) { + my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0)); + has_error= TRUE; return true; } first_call= false; } - if(prev_value != arg->val_real()) + double arg_val= arg->val_real(); + + if(prev_value != arg_val) { - // TODO(varun) need to add an error here , check the MDEV-12985 for the information + my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); + has_error= TRUE; return true; } @@ -821,6 +827,7 @@ public: void clear() { + has_error= false; val_calculated= false; first_call= true; value->clear(); @@ -890,36 +897,13 @@ public: ((ceil(val) - val) * floor_value->val_real()); return ret_val; - - } - longlong val_int() - { - if (get_row_count() == 0 || get_arg(0)->is_null()) - { - null_value= true; - return 0; - } - null_value= false; - return 0; - } - - my_decimal* val_decimal(my_decimal* dec) - { - if (get_row_count() == 0 || get_arg(0)->is_null()) - { - null_value= true; - return 0; - return 0; - } - null_value= false; - return ceil_value->val_decimal(dec); } bool add() { Item *arg = get_arg(0); if (arg->is_null()) - return true; + return false; if (first_call) { @@ -927,14 +911,17 @@ public: prev_value= arg->val_real(); if (prev_value >1 || prev_value < 0) { - // TODO(varun) need to add an error here , check the MDEV-12985 for the information + my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0)); + has_error= TRUE; return true; } } - if (prev_value != arg->val_real()) + double arg_val= arg->val_real(); + if(prev_value != arg_val) { - // TODO(varun) need to add an error here , check the MDEV-12985 for the information + my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); + has_error= TRUE; return true; } @@ -966,12 +953,13 @@ public: enum Sumfunctype sum_func() const { - return PERCENTILE_DISC_FUNC; + return PERCENTILE_CONT_FUNC; } void clear() { first_call= true; + has_error= false; floor_value->clear(); ceil_value->clear(); floor_val_calculated= false; diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 08e93dfcce2..d51ffdc2f83 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -324,7 +324,7 @@ setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, li.rewind(); while((win_func_item= li++)) { - if (win_func_item->check_order_list()) + if (win_func_item->check_result_type_of_order_item()) DBUG_RETURN(1); } DBUG_RETURN(0); @@ -1078,12 +1078,13 @@ protected: { if (perform_no_action) return; - List_iterator_fast it(sum_functions); Item_sum *item_sum; while ((item_sum= it++)) { item_sum->add(); + if (item_sum->has_error) + return; } } @@ -2809,6 +2810,12 @@ bool compute_window_func(THD *thd, { cursor_manager->notify_cursors_next_row(); } + + /* check if we found any error in the window function while calling the add function */ + + if (win_func->window_func()->has_error) + goto label; + /* Return to current row after notifying cursors for each window function. */ tbl->file->ha_rnd_pos(tbl->record[0], rowid_buf); @@ -2821,6 +2828,7 @@ bool compute_window_func(THD *thd, rownum++; } +label: my_free(rowid_buf); partition_trackers.delete_elements(); end_read_record(&info); From f04426f7271388416b69feeb8252d03f0a29df46 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 17 Jul 2017 15:23:21 +0530 Subject: [PATCH 21/33] Added more tests for the percentile functions --- mysql-test/r/win_percentile_cont.result | 111 ++++++++++++++++++++++++ mysql-test/t/win_percentile_cont.test | 55 ++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 mysql-test/r/win_percentile_cont.result create mode 100644 mysql-test/t/win_percentile_cont.test diff --git a/mysql-test/r/win_percentile_cont.result b/mysql-test/r/win_percentile_cont.result new file mode 100644 index 00000000000..61f70892887 --- /dev/null +++ b/mysql-test/r/win_percentile_cont.result @@ -0,0 +1,111 @@ +CREATE TABLE student (name CHAR(10), test double, score DECIMAL(19,4)); +INSERT INTO student VALUES +('Chun', 0, 3), ('Chun', 0, 7), +('Kaolin', 0.5, 3), ('Kaolin', 0.6, 7), +('Kaolin', 0.5, 4), +('Tatiana', 0.8, 4), ('Tata', 0.8, 4); +select name, percentile_disc(0.5) within group(order by score) over () from student; +name percentile_disc(0.5) within group(order by score) over () +Chun 4.0000000000 +Chun 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 +select name, percentile_cont(0.5) within group(order by score) over () from student; +name percentile_cont(0.5) within group(order by score) over () +Chun 4.0000000000 +Chun 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 +select name, percentile_cont(null) within group(order by score) over (partition by name) from student; +name percentile_cont(null) within group(order by score) over (partition by name) +Chun NULL +Chun NULL +Kaolin NULL +Kaolin NULL +Kaolin NULL +Tatiana NULL +Tata NULL +select name, percentile_disc(null) within group(order by score) over (partition by name) from student; +name percentile_disc(null) within group(order by score) over (partition by name) +Chun NULL +Chun NULL +Kaolin NULL +Kaolin NULL +Kaolin NULL +Tatiana NULL +Tata NULL +select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from student; +name c +Chun 5.0000000000 +Chun 5.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 +select name, percentile_disc(0.5) within group(order by score) over (partition by name) as c from student; +name c +Chun 3.0000000000 +Chun 3.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 +select * from ( select name , percentile_cont(0.5) within group ( order by score) over (partition by name ) from student ) as t; +name percentile_cont(0.5) within group ( order by score) over (partition by name ) +Chun 5.0000000000 +Chun 5.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 +select * from ( select name , percentile_disc(0.5) within group ( order by score) over (partition by name ) from student ) as t; +name percentile_disc(0.5) within group ( order by score) over (partition by name ) +Chun 3.0000000000 +Chun 3.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 +select name from student a where (select percentile_disc(0.5) within group (order by score) over (partition by name) from student b limit 1) >= 0.5; +name +Chun +Chun +Kaolin +Kaolin +Kaolin +Tatiana +Tata +select score, percentile_cont(0.5) within group(order by name) over (partition by score) from student; +ERROR HY000: Numeric datatype is required for Percentile_CONT function +select score, percentile_disc(0.5) within group(order by name) over (partition by score) from student; +score percentile_disc(0.5) within group(order by name) over (partition by score) +3.0000 Chun +7.0000 Chun +3.0000 Chun +7.0000 Chun +4.0000 Tata +4.0000 Tata +4.0000 Tata +select percentile_disc(0.5) within group(order by score,test) over (partition by name) from student; +ERROR HY000: Incorrect number of elements in the order list for 'percentile_disc' +select percentile_cont(0.5) within group(order by score,test) over (partition by name) from student; +ERROR HY000: Incorrect number of elements in the order list for 'percentile_cont' +select percentile_disc(1.5) within group(order by score) over (partition by name) from student; +ERROR HY000: Argument to the percentile functions does not belong to the range [0,1] +select percentile_cont(1.5) within group(order by score) over (partition by name) from student; +ERROR HY000: Argument to the percentile functions does not belong to the range [0,1] +select name,percentile_cont(test) within group(order by score) over (partition by name) from student; +ERROR HY000: Argument to the percentile functions is not a constant +select name, percentile_disc(test) within group(order by score) over (partition by name) from student; +ERROR HY000: Argument to the percentile functions is not a constant +drop table student; diff --git a/mysql-test/t/win_percentile_cont.test b/mysql-test/t/win_percentile_cont.test new file mode 100644 index 00000000000..75fde963b2a --- /dev/null +++ b/mysql-test/t/win_percentile_cont.test @@ -0,0 +1,55 @@ +CREATE TABLE student (name CHAR(10), test double, score DECIMAL(19,4)); +INSERT INTO student VALUES +('Chun', 0, 3), ('Chun', 0, 7), +('Kaolin', 0.5, 3), ('Kaolin', 0.6, 7), +('Kaolin', 0.5, 4), +('Tatiana', 0.8, 4), ('Tata', 0.8, 4); + +#no partition clause +select name, percentile_disc(0.5) within group(order by score) over () from student; +select name, percentile_cont(0.5) within group(order by score) over () from student; + +# argument set to null +select name, percentile_cont(null) within group(order by score) over (partition by name) from student; +select name, percentile_disc(null) within group(order by score) over (partition by name) from student; + +# complete query with partition column +select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from student; +select name, percentile_disc(0.5) within group(order by score) over (partition by name) as c from student; + +#subqueries having percentile functions + +select * from ( select name , percentile_cont(0.5) within group ( order by score) over (partition by name ) from student ) as t; +select * from ( select name , percentile_disc(0.5) within group ( order by score) over (partition by name ) from student ) as t; +select name from student a where (select percentile_disc(0.5) within group (order by score) over (partition by name) from student b limit 1) >= 0.5; + +# WITH STORED PROCEDURES + + +#DISALLOWED FIELDS IN ORDER BY CLAUSE +--error ER_WRONG_TYPE_FOR_PERCENTILE_CONT +select score, percentile_cont(0.5) within group(order by name) over (partition by score) from student; +select score, percentile_disc(0.5) within group(order by name) over (partition by score) from student; + +# error with 2 order by elements + +--error ER_NOT_SINGLE_ELEMENT_ORDER_LIST +select percentile_disc(0.5) within group(order by score,test) over (partition by name) from student; +--error ER_NOT_SINGLE_ELEMENT_ORDER_LIST +select percentile_cont(0.5) within group(order by score,test) over (partition by name) from student; + +#parameter value should be in the range of 0 to 1( NEED TO THINK A WAY FOR THIS) +--error ER_ARGUMENT_OUT_OF_RANGE +select percentile_disc(1.5) within group(order by score) over (partition by name) from student; +--error ER_ARGUMENT_OUT_OF_RANGE +select percentile_cont(1.5) within group(order by score) over (partition by name) from student; + +--error ER_ARGUMENT_NOT_CONSTANT +select name,percentile_cont(test) within group(order by score) over (partition by name) from student; +--error ER_ARGUMENT_NOT_CONSTANT +select name, percentile_disc(test) within group(order by score) over (partition by name) from student; + +#CHECK TYPE OF THE ARGUMENT, SHOULD BE ONLY NUMERICAL +#select name, percentile_cont(name) within group(order by score) over (partition by name) from student; + +drop table student; From f8e135c7dfc212d24ca6b00df92f9f3ba2d77970 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 18 Jul 2017 01:55:31 +0530 Subject: [PATCH 22/33] made changes according to the review, mostly removing unused code and fixing code to follow the coding conventions --- sql/item.h | 30 ----------------- sql/item_windowfunc.h | 34 ++++---------------- sql/sql_window.cc | 20 +++++------- sql/sql_yacc.yy | 75 +++++++++++++++++++++++-------------------- 4 files changed, 55 insertions(+), 104 deletions(-) diff --git a/sql/item.h b/sql/item.h index 067deba8b58..f4f2055b48c 100644 --- a/sql/item.h +++ b/sql/item.h @@ -5303,8 +5303,6 @@ public: /* Compare the cached value with the source value, without copying */ virtual int cmp_read_only()=0; - virtual void clear()=0; - virtual ~Cached_item(); /*line -e1509 */ }; @@ -5322,11 +5320,6 @@ public: cmp(); item= save; } - - void clear() - { - null_value= false; - } }; class Cached_item_str :public Cached_item_item @@ -5337,10 +5330,6 @@ public: Cached_item_str(THD *thd, Item *arg); bool cmp(void); int cmp_read_only(); - void clear() - { - null_value= false; - } ~Cached_item_str(); // Deallocate String:s }; @@ -5353,11 +5342,6 @@ public: bool cmp(void); int cmp_read_only(); double get_value(){ return value;} - void clear() - { - value=0.0; - null_value= false; - } }; class Cached_item_int :public Cached_item_item @@ -5368,11 +5352,6 @@ public: bool cmp(void); int cmp_read_only(); longlong get_value(){ return value;} - void clear() - { - value=0.0; - null_value= false; - } }; @@ -5384,11 +5363,6 @@ public: bool cmp(void); int cmp_read_only(); my_decimal *get_value(){ return &value;}; - void clear() - { - null_value= false; - my_decimal_set_zero(&value); - } }; class Cached_item_field :public Cached_item @@ -5406,10 +5380,6 @@ public: } bool cmp(void); int cmp_read_only(); - void clear() - { - null_value= false; - } }; class Item_default_value : public Item_field diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 995010d2df2..a2357b24980 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -32,22 +32,13 @@ public: Group_bound_tracker(THD *thd, SQL_I_List *list) { - if (list) + for (ORDER *curr = list->first; curr; curr=curr->next) { - for (ORDER *curr = list->first; curr; curr=curr->next) - { Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE); group_fields.push_back(tmp); - } } } - Group_bound_tracker(THD *thd, Item *item) - { - Cached_item *tmp= new_Cached_item(thd, item, FALSE); - group_fields.push_back(tmp); - } - void init() { first_check= true; @@ -96,19 +87,6 @@ public: return 0; } - bool compare_with_cache_for_null_values() - { - List_iterator li(group_fields); - Cached_item *ptr; - while ((ptr= li++)) - { - ptr->cmp(); - if (ptr->null_value) - return true; - } - return false; - } - private: List group_fields; /* @@ -797,7 +775,7 @@ public: double arg_val= arg->val_real(); - if(prev_value != arg_val) + if (prev_value != arg_val) { my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); has_error= TRUE; @@ -815,7 +793,7 @@ public: Item_sum_cume_dist::add(); double val= Item_sum_cume_dist::val_real(); - if(val >= prev_value && !val_calculated) + if (val >= prev_value && !val_calculated) val_calculated= true; return false; } @@ -918,7 +896,7 @@ public: } double arg_val= arg->val_real(); - if(prev_value != arg_val) + if (prev_value != arg_val) { my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); has_error= TRUE; @@ -1116,7 +1094,7 @@ public: bool only_single_element_order_list() const { - switch(window_func()->sum_func()){ + switch (window_func()->sum_func()){ case Item_sum::PERCENTILE_CONT_FUNC: case Item_sum::PERCENTILE_DISC_FUNC: return true; @@ -1127,7 +1105,7 @@ public: void setting_handler_for_percentile_functions(Item_result rtype) const { - switch(window_func()->sum_func()){ + switch (window_func()->sum_func()){ case Item_sum::PERCENTILE_DISC_FUNC: ((Item_sum_percentile_disc* ) window_func())->set_handler_by_cmp_type(rtype); break; diff --git a/sql/sql_window.cc b/sql/sql_window.cc index d51ffdc2f83..e60e23c80cf 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -322,7 +322,7 @@ setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, } li.rewind(); - while((win_func_item= li++)) + while ((win_func_item= li++)) { if (win_func_item->check_result_type_of_order_item()) DBUG_RETURN(1); @@ -911,7 +911,7 @@ class Partition_read_cursor : public Table_read_cursor { public: Partition_read_cursor(THD *thd, SQL_I_List *partition_list) : - bound_tracker(thd, partition_list){} + bound_tracker(thd, partition_list) {} void init(READ_RECORD *info) { @@ -968,10 +968,6 @@ public: } return 0; } - bool check_for_end_of_partition() - { - return end_of_partition; - } private: Group_bound_tracker bound_tracker; @@ -1078,6 +1074,7 @@ protected: { if (perform_no_action) return; + List_iterator_fast it(sum_functions); Item_sum *item_sum; while ((item_sum= it++)) @@ -1156,7 +1153,7 @@ public: Frame_cursor *cursor; while ((cursor= iter++)) cursor->pre_next_row(); - + iter.rewind(); while ((cursor= iter++)) cursor->next_row(); @@ -1693,7 +1690,7 @@ public: Frame_unbounded_following(THD *thd, SQL_I_List *partition_list, SQL_I_List *order_list) : - cursor(thd, partition_list){} + cursor(thd, partition_list) {} void init(READ_RECORD *info) { @@ -1736,7 +1733,7 @@ public: Frame_unbounded_following_set_count( THD *thd, SQL_I_List *partition_list, SQL_I_List *order_list) : - Frame_unbounded_following(thd, partition_list, order_list){} + Frame_unbounded_following(thd, partition_list, order_list) {} void next_partition(ha_rows rownum) { @@ -1747,9 +1744,7 @@ public: /* Walk to the end of the partition, find how many rows there are. */ while (!cursor.next()) - { num_rows_in_partition++; - } List_iterator_fast it(sum_functions); Item_sum* item; @@ -1773,7 +1768,7 @@ class Frame_unbounded_following_set_count_special: public Frame_unbounded_follow public: Frame_unbounded_following_set_count_special(THD *thd, SQL_I_List *partition_list, - SQL_I_List *order_list, Item* arg) : + SQL_I_List *order_list, Item* arg) : Frame_unbounded_following_set_count(thd,partition_list, order_list) { order_item= order_list->first->item[0]; @@ -1805,6 +1800,7 @@ public: { return cursor.get_rownum(); } + private: Item* order_item; }; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index b5bc70639b0..4b4a9bb5f6f 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -10191,6 +10191,11 @@ geometry_function: Item_func_spatial_precise_rel(thd, $3, $5, Item_func::SP_CONTAINS_FUNC)); } + | WITHIN '(' expr ',' expr ')' + { + $$= GEOM_NEW(thd, Item_func_spatial_precise_rel(thd, $3, $5, + Item_func::SP_WITHIN_FUNC)); + } | GEOMETRYCOLLECTION '(' expr_list ')' { $$= GEOM_NEW(thd, @@ -10237,6 +10242,7 @@ geometry_function: Geometry::wkb_polygon, Geometry::wkb_linestring)); } + ; /* @@ -10699,45 +10705,45 @@ simple_window_func: ; inverse_distribution_function: - inverse_distribution_function_def WITHIN GROUP_SYM - '(' - { Select->prepare_add_window_spec(thd); } - order_by_single_element_list ')' OVER_SYM - '(' opt_window_ref opt_window_partition_clause ')' - { - LEX *lex= Lex; - if (Select->add_window_spec(thd, lex->win_ref, - Select->group_list, - Select->order_list, - NULL)) - MYSQL_YYABORT; - $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, - thd->lex->win_spec); - if ($$ == NULL) - MYSQL_YYABORT; - if (Select->add_window_func((Item_window_func *) $$)) - MYSQL_YYABORT; - } + inverse_distribution_function_def WITHIN GROUP_SYM + '(' + { Select->prepare_add_window_spec(thd); } + order_by_single_element_list ')' OVER_SYM + '(' opt_window_ref opt_window_partition_clause ')' + { + LEX *lex= Lex; + if (Select->add_window_spec(thd, lex->win_ref, + Select->group_list, + Select->order_list, + NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, + thd->lex->win_spec); + if ($$ == NULL) + MYSQL_YYABORT; + if (Select->add_window_func((Item_window_func *) $$)) + MYSQL_YYABORT; + } ; inverse_distribution_function_def: - PERCENTILE_CONT_SYM '(' expr ')' - { - $$= new (thd->mem_root) Item_sum_percentile_cont(thd, $3); - if ($$ == NULL) - MYSQL_YYABORT; - } - | PERCENTILE_DISC_SYM '(' expr ')' - { - $$= new (thd->mem_root) Item_sum_percentile_disc(thd, $3); - if ($$ == NULL) - MYSQL_YYABORT; - } - ; + PERCENTILE_CONT_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_percentile_cont(thd, $3); + if ($$ == NULL) + MYSQL_YYABORT; + } + | PERCENTILE_DISC_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_percentile_disc(thd, $3); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; order_by_single_element_list: - ORDER_SYM BY order_list - ; + ORDER_SYM BY order_list + ; window_name: ident @@ -14628,6 +14634,7 @@ keyword: | UNICODE_SYM {} | UNINSTALL_SYM {} | UNBOUNDED_SYM {} + | WITHIN | WRAPPER_SYM {} | XA_SYM {} | UPGRADE_SYM {} From f4ba298abd06024f619659a4d9aae1e3fad97b08 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 18 Jul 2017 02:58:08 +0530 Subject: [PATCH 23/33] Fixed indentation in the syntax rules for the sql_yacc.yy , also added the rules of the percentile functions to the sql_yacc_ora.yy --- sql/sql_yacc.yy | 13 ++++++------ sql/sql_yacc_ora.yy | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 4b4a9bb5f6f..5fd33fb3249 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -10191,11 +10191,6 @@ geometry_function: Item_func_spatial_precise_rel(thd, $3, $5, Item_func::SP_CONTAINS_FUNC)); } - | WITHIN '(' expr ',' expr ')' - { - $$= GEOM_NEW(thd, Item_func_spatial_precise_rel(thd, $3, $5, - Item_func::SP_WITHIN_FUNC)); - } | GEOMETRYCOLLECTION '(' expr_list ')' { $$= GEOM_NEW(thd, @@ -10242,7 +10237,11 @@ geometry_function: Geometry::wkb_polygon, Geometry::wkb_linestring)); } - + | WITHIN '(' expr ',' expr ')' + { + $$= GEOM_NEW(thd, Item_func_spatial_precise_rel(thd, $3, $5, + Item_func::SP_WITHIN_FUNC)); + } ; /* @@ -14634,7 +14633,7 @@ keyword: | UNICODE_SYM {} | UNINSTALL_SYM {} | UNBOUNDED_SYM {} - | WITHIN + | WITHIN {} | WRAPPER_SYM {} | XA_SYM {} | UPGRADE_SYM {} diff --git a/sql/sql_yacc_ora.yy b/sql/sql_yacc_ora.yy index db45414fd28..e04c7ee60b3 100644 --- a/sql/sql_yacc_ora.yy +++ b/sql/sql_yacc_ora.yy @@ -737,6 +737,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token PARTITIONING_SYM %token PASSWORD_SYM %token PERCENT_RANK_SYM +%token PERCENTILE_CONT_SYM +%token PERCENTILE_DISC_SYM %token PERSISTENT_SYM %token PHASE_SYM %token PLUGINS_SYM @@ -983,6 +985,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token WINDOW_SYM %token WHILE_SYM %token WITH /* SQL-2003-R */ +%token WITHIN %token WITH_CUBE_SYM /* INTERNAL */ %token WITH_ROLLUP_SYM /* INTERNAL */ %token WORK_SYM /* SQL-2003-N */ @@ -1147,6 +1150,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); window_func_expr window_func simple_window_func + inverse_distribution_function + inverse_distribution_function_def explicit_cursor_attr function_call_keyword function_call_nonkeyword @@ -9389,6 +9394,7 @@ column_default_non_parenthesized_expr: | variable | sum_expr | window_func_expr + | inverse_distribution_function | ROW_SYM '(' expr ',' expr_list ')' { $5->push_front($3, thd->mem_root); @@ -10718,6 +10724,47 @@ simple_window_func: } ; +inverse_distribution_function: + inverse_distribution_function_def WITHIN GROUP_SYM + '(' + { Select->prepare_add_window_spec(thd); } + order_by_single_element_list ')' OVER_SYM + '(' opt_window_ref opt_window_partition_clause ')' + { + LEX *lex= Lex; + if (Select->add_window_spec(thd, lex->win_ref, + Select->group_list, + Select->order_list, + NULL)) + MYSQL_YYABORT; + $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1, + thd->lex->win_spec); + if ($$ == NULL) + MYSQL_YYABORT; + if (Select->add_window_func((Item_window_func *) $$)) + MYSQL_YYABORT; + } + ; + +inverse_distribution_function_def: + PERCENTILE_CONT_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_percentile_cont(thd, $3); + if ($$ == NULL) + MYSQL_YYABORT; + } + | PERCENTILE_DISC_SYM '(' expr ')' + { + $$= new (thd->mem_root) Item_sum_percentile_disc(thd, $3); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; + +order_by_single_element_list: + ORDER_SYM BY order_list + ; + window_name: ident { @@ -14703,6 +14750,7 @@ keyword_directly_assignable: | UNICODE_SYM {} | UNINSTALL_SYM {} | UNBOUNDED_SYM {} + | WITHIN {} | WRAPPER_SYM {} | XA_SYM {} | UPGRADE_SYM {} From 24e219b179142b3708ff4bdf5ae3db96d6fa184a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Tue, 29 Aug 2017 18:27:16 +0300 Subject: [PATCH 24/33] Remove has_error as a member from Item_sum and use THD::is_error() instead Additionally, allow a query with window functions to be killed by the user during its execution. --- sql/item_sum.cc | 3 +-- sql/item_sum.h | 7 +++---- sql/item_windowfunc.h | 6 ------ sql/sql_window.cc | 10 ++++------ 4 files changed, 8 insertions(+), 18 deletions(-) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 4f9cdfe20e8..b047dc4ea4d 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -441,7 +441,6 @@ Item_sum::Item_sum(THD *thd, List &list): Item_func_or_sum(thd, list) mark_as_sum_func(); init_aggregator(); list.empty(); // Fields are used - has_error= FALSE; } @@ -453,7 +452,7 @@ Item_sum::Item_sum(THD *thd, Item_sum *item): Item_func_or_sum(thd, item), aggr_sel(item->aggr_sel), nest_level(item->nest_level), aggr_level(item->aggr_level), - quick_group(item->quick_group), has_error(FALSE), + quick_group(item->quick_group), orig_args(NULL) { if (arg_count <= 2) diff --git a/sql/item_sum.h b/sql/item_sum.h index a3bcf397db7..467a77c8983 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -367,7 +367,6 @@ public: int8 max_arg_level; /* max level of unbound column references */ int8 max_sum_func_level;/* max level of aggregation for embedded functions */ bool quick_group; /* If incremental update of fields */ - bool has_error; /* This list is used by the check for mixing non aggregated fields and sum functions in the ONLY_FULL_GROUP_BY_MODE. We save all outer fields @@ -389,19 +388,19 @@ protected: public: void mark_as_sum_func(); - Item_sum(THD *thd): Item_func_or_sum(thd), quick_group(1), has_error(0) + Item_sum(THD *thd): Item_func_or_sum(thd), quick_group(1) { mark_as_sum_func(); init_aggregator(); } Item_sum(THD *thd, Item *a): Item_func_or_sum(thd, a), quick_group(1), - has_error(0), orig_args(tmp_orig_args) + orig_args(tmp_orig_args) { mark_as_sum_func(); init_aggregator(); } Item_sum(THD *thd, Item *a, Item *b): Item_func_or_sum(thd, a, b), - quick_group(1), has_error(0), orig_args(tmp_orig_args) + quick_group(1), orig_args(tmp_orig_args) { mark_as_sum_func(); init_aggregator(); diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index a2357b24980..c1a8c594e20 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -767,7 +767,6 @@ public: if (prev_value >1 || prev_value < 0) { my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0)); - has_error= TRUE; return true; } first_call= false; @@ -778,7 +777,6 @@ public: if (prev_value != arg_val) { my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); - has_error= TRUE; return true; } @@ -805,7 +803,6 @@ public: void clear() { - has_error= false; val_calculated= false; first_call= true; value->clear(); @@ -890,7 +887,6 @@ public: if (prev_value >1 || prev_value < 0) { my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0)); - has_error= TRUE; return true; } } @@ -899,7 +895,6 @@ public: if (prev_value != arg_val) { my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); - has_error= TRUE; return true; } @@ -937,7 +932,6 @@ public: void clear() { first_call= true; - has_error= false; floor_value->clear(); ceil_value->clear(); floor_val_calculated= false; diff --git a/sql/sql_window.cc b/sql/sql_window.cc index e60e23c80cf..9a274179b21 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -1080,8 +1080,6 @@ protected: while ((item_sum= it++)) { item_sum->add(); - if (item_sum->has_error) - return; } } @@ -2807,10 +2805,11 @@ bool compute_window_func(THD *thd, cursor_manager->notify_cursors_next_row(); } - /* check if we found any error in the window function while calling the add function */ + /* Check if we found any error in the window function while adding values + through cursors. */ + if (thd->is_error() || thd->is_killed()) + break; - if (win_func->window_func()->has_error) - goto label; /* Return to current row after notifying cursors for each window function. */ @@ -2824,7 +2823,6 @@ bool compute_window_func(THD *thd, rownum++; } -label: my_free(rowid_buf); partition_trackers.delete_elements(); end_read_record(&info); From b5c104d00a264e250cc008c6f2a42e8a2b18f385 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 7 Sep 2017 17:37:55 +0530 Subject: [PATCH 25/33] Changes made according to the review given, mostly fixing coding style errors --- mysql-test/r/win_percentile_cont.result | 111 ------------------------ mysql-test/t/win_percentile_cont.test | 55 ------------ sql/item.h | 3 - sql/item_windowfunc.h | 18 ++-- sql/sql_window.cc | 37 ++++---- 5 files changed, 28 insertions(+), 196 deletions(-) delete mode 100644 mysql-test/r/win_percentile_cont.result delete mode 100644 mysql-test/t/win_percentile_cont.test diff --git a/mysql-test/r/win_percentile_cont.result b/mysql-test/r/win_percentile_cont.result deleted file mode 100644 index 61f70892887..00000000000 --- a/mysql-test/r/win_percentile_cont.result +++ /dev/null @@ -1,111 +0,0 @@ -CREATE TABLE student (name CHAR(10), test double, score DECIMAL(19,4)); -INSERT INTO student VALUES -('Chun', 0, 3), ('Chun', 0, 7), -('Kaolin', 0.5, 3), ('Kaolin', 0.6, 7), -('Kaolin', 0.5, 4), -('Tatiana', 0.8, 4), ('Tata', 0.8, 4); -select name, percentile_disc(0.5) within group(order by score) over () from student; -name percentile_disc(0.5) within group(order by score) over () -Chun 4.0000000000 -Chun 4.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Tatiana 4.0000000000 -Tata 4.0000000000 -select name, percentile_cont(0.5) within group(order by score) over () from student; -name percentile_cont(0.5) within group(order by score) over () -Chun 4.0000000000 -Chun 4.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Tatiana 4.0000000000 -Tata 4.0000000000 -select name, percentile_cont(null) within group(order by score) over (partition by name) from student; -name percentile_cont(null) within group(order by score) over (partition by name) -Chun NULL -Chun NULL -Kaolin NULL -Kaolin NULL -Kaolin NULL -Tatiana NULL -Tata NULL -select name, percentile_disc(null) within group(order by score) over (partition by name) from student; -name percentile_disc(null) within group(order by score) over (partition by name) -Chun NULL -Chun NULL -Kaolin NULL -Kaolin NULL -Kaolin NULL -Tatiana NULL -Tata NULL -select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from student; -name c -Chun 5.0000000000 -Chun 5.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Tatiana 4.0000000000 -Tata 4.0000000000 -select name, percentile_disc(0.5) within group(order by score) over (partition by name) as c from student; -name c -Chun 3.0000000000 -Chun 3.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Tatiana 4.0000000000 -Tata 4.0000000000 -select * from ( select name , percentile_cont(0.5) within group ( order by score) over (partition by name ) from student ) as t; -name percentile_cont(0.5) within group ( order by score) over (partition by name ) -Chun 5.0000000000 -Chun 5.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Tatiana 4.0000000000 -Tata 4.0000000000 -select * from ( select name , percentile_disc(0.5) within group ( order by score) over (partition by name ) from student ) as t; -name percentile_disc(0.5) within group ( order by score) over (partition by name ) -Chun 3.0000000000 -Chun 3.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Kaolin 4.0000000000 -Tatiana 4.0000000000 -Tata 4.0000000000 -select name from student a where (select percentile_disc(0.5) within group (order by score) over (partition by name) from student b limit 1) >= 0.5; -name -Chun -Chun -Kaolin -Kaolin -Kaolin -Tatiana -Tata -select score, percentile_cont(0.5) within group(order by name) over (partition by score) from student; -ERROR HY000: Numeric datatype is required for Percentile_CONT function -select score, percentile_disc(0.5) within group(order by name) over (partition by score) from student; -score percentile_disc(0.5) within group(order by name) over (partition by score) -3.0000 Chun -7.0000 Chun -3.0000 Chun -7.0000 Chun -4.0000 Tata -4.0000 Tata -4.0000 Tata -select percentile_disc(0.5) within group(order by score,test) over (partition by name) from student; -ERROR HY000: Incorrect number of elements in the order list for 'percentile_disc' -select percentile_cont(0.5) within group(order by score,test) over (partition by name) from student; -ERROR HY000: Incorrect number of elements in the order list for 'percentile_cont' -select percentile_disc(1.5) within group(order by score) over (partition by name) from student; -ERROR HY000: Argument to the percentile functions does not belong to the range [0,1] -select percentile_cont(1.5) within group(order by score) over (partition by name) from student; -ERROR HY000: Argument to the percentile functions does not belong to the range [0,1] -select name,percentile_cont(test) within group(order by score) over (partition by name) from student; -ERROR HY000: Argument to the percentile functions is not a constant -select name, percentile_disc(test) within group(order by score) over (partition by name) from student; -ERROR HY000: Argument to the percentile functions is not a constant -drop table student; diff --git a/mysql-test/t/win_percentile_cont.test b/mysql-test/t/win_percentile_cont.test deleted file mode 100644 index 75fde963b2a..00000000000 --- a/mysql-test/t/win_percentile_cont.test +++ /dev/null @@ -1,55 +0,0 @@ -CREATE TABLE student (name CHAR(10), test double, score DECIMAL(19,4)); -INSERT INTO student VALUES -('Chun', 0, 3), ('Chun', 0, 7), -('Kaolin', 0.5, 3), ('Kaolin', 0.6, 7), -('Kaolin', 0.5, 4), -('Tatiana', 0.8, 4), ('Tata', 0.8, 4); - -#no partition clause -select name, percentile_disc(0.5) within group(order by score) over () from student; -select name, percentile_cont(0.5) within group(order by score) over () from student; - -# argument set to null -select name, percentile_cont(null) within group(order by score) over (partition by name) from student; -select name, percentile_disc(null) within group(order by score) over (partition by name) from student; - -# complete query with partition column -select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from student; -select name, percentile_disc(0.5) within group(order by score) over (partition by name) as c from student; - -#subqueries having percentile functions - -select * from ( select name , percentile_cont(0.5) within group ( order by score) over (partition by name ) from student ) as t; -select * from ( select name , percentile_disc(0.5) within group ( order by score) over (partition by name ) from student ) as t; -select name from student a where (select percentile_disc(0.5) within group (order by score) over (partition by name) from student b limit 1) >= 0.5; - -# WITH STORED PROCEDURES - - -#DISALLOWED FIELDS IN ORDER BY CLAUSE ---error ER_WRONG_TYPE_FOR_PERCENTILE_CONT -select score, percentile_cont(0.5) within group(order by name) over (partition by score) from student; -select score, percentile_disc(0.5) within group(order by name) over (partition by score) from student; - -# error with 2 order by elements - ---error ER_NOT_SINGLE_ELEMENT_ORDER_LIST -select percentile_disc(0.5) within group(order by score,test) over (partition by name) from student; ---error ER_NOT_SINGLE_ELEMENT_ORDER_LIST -select percentile_cont(0.5) within group(order by score,test) over (partition by name) from student; - -#parameter value should be in the range of 0 to 1( NEED TO THINK A WAY FOR THIS) ---error ER_ARGUMENT_OUT_OF_RANGE -select percentile_disc(1.5) within group(order by score) over (partition by name) from student; ---error ER_ARGUMENT_OUT_OF_RANGE -select percentile_cont(1.5) within group(order by score) over (partition by name) from student; - ---error ER_ARGUMENT_NOT_CONSTANT -select name,percentile_cont(test) within group(order by score) over (partition by name) from student; ---error ER_ARGUMENT_NOT_CONSTANT -select name, percentile_disc(test) within group(order by score) over (partition by name) from student; - -#CHECK TYPE OF THE ARGUMENT, SHOULD BE ONLY NUMERICAL -#select name, percentile_cont(name) within group(order by score) over (partition by name) from student; - -drop table student; diff --git a/sql/item.h b/sql/item.h index f4f2055b48c..1f743913d78 100644 --- a/sql/item.h +++ b/sql/item.h @@ -5341,7 +5341,6 @@ public: Cached_item_real(Item *item_par) :Cached_item_item(item_par),value(0.0) {} bool cmp(void); int cmp_read_only(); - double get_value(){ return value;} }; class Cached_item_int :public Cached_item_item @@ -5351,7 +5350,6 @@ public: Cached_item_int(Item *item_par) :Cached_item_item(item_par),value(0) {} bool cmp(void); int cmp_read_only(); - longlong get_value(){ return value;} }; @@ -5362,7 +5360,6 @@ public: Cached_item_decimal(Item *item_par); bool cmp(void); int cmp_read_only(); - my_decimal *get_value(){ return &value;}; }; class Cached_item_field :public Cached_item diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index c1a8c594e20..8b3bd5a26e2 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -757,14 +757,14 @@ public: bool add() { - Item *arg = get_arg(0); + Item *arg= get_arg(0); if (arg->is_null()) return false; if (first_call) { prev_value= arg->val_real(); - if (prev_value >1 || prev_value < 0) + if (prev_value > 1 || prev_value < 0) { my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0)); return true; @@ -774,7 +774,7 @@ public: double arg_val= arg->val_real(); - if (prev_value != arg_val) + if (prev_value != arg_val) { my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); return true; @@ -821,7 +821,7 @@ public: void fix_length_and_dec() { - decimals = 10; // TODO-cvicentiu find out how many decimals the standard + decimals = 5; // TODO-cvicentiu find out how many decimals the standard // requires. } @@ -829,6 +829,7 @@ public: { return get_item_copy(thd, mem_root, this); } void setup_window_func(THD *thd, Window_spec *window_spec); void setup_hybrid(THD *thd, Item *item); + bool fix_fields(THD *thd, Item **ref); private: Item_cache *value; @@ -876,7 +877,7 @@ public: bool add() { - Item *arg = get_arg(0); + Item *arg= get_arg(0); if (arg->is_null()) return false; @@ -884,7 +885,7 @@ public: { first_call= false; prev_value= arg->val_real(); - if (prev_value >1 || prev_value < 0) + if (prev_value > 1 || prev_value < 0) { my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0)); return true; @@ -892,7 +893,7 @@ public: } double arg_val= arg->val_real(); - if (prev_value != arg_val) + if (prev_value != arg_val) { my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); return true; @@ -950,7 +951,7 @@ public: void fix_length_and_dec() { - decimals = 10; // TODO-cvicentiu find out how many decimals the standard + decimals = 5; // TODO-cvicentiu find out how many decimals the standard // requires. } @@ -958,6 +959,7 @@ public: { return get_item_copy(thd, mem_root, this); } void setup_window_func(THD *thd, Window_spec *window_spec); void setup_hybrid(THD *thd, Item *item); + bool fix_fields(THD *thd, Item **ref); private: Item_cache *floor_value; diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 9a274179b21..059dd8073f5 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -1743,7 +1743,17 @@ public: /* Walk to the end of the partition, find how many rows there are. */ while (!cursor.next()) num_rows_in_partition++; + set_win_funcs_row_count(num_rows_in_partition); + } + ha_rows get_curr_rownum() const + { + return cursor.get_rownum(); + } + +protected: + void set_win_funcs_row_count(ha_rows num_rows_in_partition) + { List_iterator_fast it(sum_functions); Item_sum* item; while ((item= it++)) @@ -1753,20 +1763,16 @@ public: item_with_row_count->set_row_count(num_rows_in_partition); } } - - ha_rows get_curr_rownum() const - { - return cursor.get_rownum(); - } }; -class Frame_unbounded_following_set_count_special: public Frame_unbounded_following_set_count +class Frame_unbounded_following_set_count_no_nulls: + public Frame_unbounded_following_set_count { public: - Frame_unbounded_following_set_count_special(THD *thd, + Frame_unbounded_following_set_count_no_nulls(THD *thd, SQL_I_List *partition_list, - SQL_I_List *order_list, Item* arg) : + SQL_I_List *order_list) : Frame_unbounded_following_set_count(thd,partition_list, order_list) { order_item= order_list->first->item[0]; @@ -1782,16 +1788,9 @@ public: { if (!order_item->is_null()) num_rows_in_partition++; - }while (!cursor.next()); + } while (!cursor.next()); - List_iterator_fast it(sum_functions); - Item_sum* item; - while ((item= it++)) - { - Item_sum_window_with_row_count* item_with_row_count = - static_cast(item); - item_with_row_count->set_row_count(num_rows_in_partition); - } + set_win_funcs_row_count(num_rows_in_partition); } ha_rows get_curr_rownum() const @@ -2614,9 +2613,9 @@ void get_window_functions_required_cursors( { if (item_win_func->only_single_element_order_list()) { - fc= new Frame_unbounded_following_set_count_special(thd, + fc= new Frame_unbounded_following_set_count_no_nulls(thd, item_win_func->window_spec->partition_list, - item_win_func->window_spec->order_list, item_win_func->window_func()->get_arg(0)); + item_win_func->window_spec->order_list); } else { From 02a4a4b512ace75bbe66065c136d697e83a4d9ff Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 7 Sep 2017 17:40:09 +0530 Subject: [PATCH 26/33] Added fix_fields for percentile function to check the type of argument and to ensure that only numeric arguments are allowed --- sql/item_windowfunc.cc | 54 +++++++++++++++++++++++++++++++-------- sql/share/errmsg-utf8.txt | 4 ++- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index 4022c3ddf70..f941900646e 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -109,15 +109,6 @@ Item_window_func::fix_fields(THD *thd, Item **ref) return true; } - if (only_single_element_order_list()) - { - if (window_spec->order_list->elements != 1) - { - my_error(ER_NOT_SINGLE_ELEMENT_ORDER_LIST, MYF(0), window_func()->func_name()); - return true; - } - } - /* TODO: why the last parameter is 'ref' in this call? What if window_func decides to substitute itself for something else and does *ref=.... ? @@ -182,9 +173,11 @@ bool Item_window_func::check_result_type_of_order_item() { if (only_single_element_order_list()) { - Item_result rtype= window_spec->order_list->first->item[0]->result_type(); + Item_result rtype= window_spec->order_list->first->item[0]->cmp_type(); + // TODO (varun) : support date type in percentile_cont function if (rtype != REAL_RESULT && rtype != INT_RESULT && - rtype != DECIMAL_RESULT && window_func()->sum_func() == Item_sum::PERCENTILE_CONT_FUNC) + rtype != DECIMAL_RESULT && rtype != TIME_RESULT + window_func()->sum_func() == Item_sum::PERCENTILE_CONT_FUNC) { my_error(ER_WRONG_TYPE_FOR_PERCENTILE_CONT, MYF(0)); return TRUE; @@ -243,6 +236,45 @@ void Item_sum_percentile_cont::setup_window_func(THD *thd, Window_spec *window_s floor_value->setup(thd, order_item); floor_value->store(order_item); } +bool Item_sum_percentile_cont::fix_fields(THD *thd, Item **ref) +{ + bool res; + res= Item_sum_num::fix_fields(thd, ref); + if (res) + return res; + + switch(args[0]->cmp_type()) + { + case DECIMAL_RESULT: + case REAL_RESULT: + case INT_RESULT: + break; + default: + my_error(ER_WRONG_TYPE_OF_ARGUMENT, MYF(0)); + return TRUE; + } + return res; +} +bool Item_sum_percentile_disc::fix_fields(THD *thd, Item **ref) +{ + bool res; + res= Item_sum_num::fix_fields(thd, ref); + if (res) + return res; + + switch(args[0]->cmp_type()) + { + case DECIMAL_RESULT: + case REAL_RESULT: + case INT_RESULT: + break; + default: + my_error(ER_WRONG_TYPE_OF_ARGUMENT, MYF(0)); + return TRUE; + } + return res; + +} bool Item_sum_dense_rank::add() { diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 1011d540e51..415d47df36e 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7793,4 +7793,6 @@ ER_WRONG_TYPE_FOR_PERCENTILE_CONT ER_ARGUMENT_NOT_CONSTANT eng "Argument to the percentile functions is not a constant" ER_ARGUMENT_OUT_OF_RANGE - eng "Argument to the percentile functions does not belong to the range [0,1]" \ No newline at end of file + eng "Argument to the percentile functions does not belong to the range [0,1]" +ER_WRONG_TYPE_OF_ARGUMENT + eng "Numeric values are only allowed as arguments to percentile functions" \ No newline at end of file From 4f4f8f3fb120e9d4507766c817323c758a0a1990 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 26 Oct 2017 23:55:09 +0530 Subject: [PATCH 27/33] Added the median function to the parser , it should behave as a percentile_cont function with its argument fixed to 0.5 --- sql/lex.h | 1 + sql/sql_yacc.yy | 34 +++++++++++++++++++++++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/sql/lex.h b/sql/lex.h index 7967d17a5d4..63b0567c5d0 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -737,6 +737,7 @@ static SYMBOL sql_functions[] = { { "LAG", SYM(LAG_SYM)}, { "LEAD", SYM(LEAD_SYM)}, { "MAX", SYM(MAX_SYM)}, + { "MEDIAN", SYM(MEDIAN_SYM)}, { "MID", SYM(SUBSTRING)}, /* unireg function */ { "MIN", SYM(MIN_SYM)}, { "NOW", SYM(NOW_SYM)}, diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 5fd33fb3249..21ea4d3dbde 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1236,6 +1236,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token MAX_STATEMENT_TIME_SYM %token MAX_USER_CONNECTIONS_SYM %token MAXVALUE_SYM /* SQL-2003-N */ +%token MEDIAN_SYM %token MEDIUMBLOB %token MEDIUMINT %token MEDIUMTEXT @@ -1737,6 +1738,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); window_func simple_window_func inverse_distribution_function + percentile_function inverse_distribution_function_def function_call_keyword function_call_nonkeyword @@ -10703,12 +10705,11 @@ simple_window_func: } ; + + inverse_distribution_function: - inverse_distribution_function_def WITHIN GROUP_SYM - '(' - { Select->prepare_add_window_spec(thd); } - order_by_single_element_list ')' OVER_SYM - '(' opt_window_ref opt_window_partition_clause ')' + percentile_function OVER_SYM + '(' opt_window_partition_clause ')' { LEX *lex= Lex; if (Select->add_window_spec(thd, lex->win_ref, @@ -10725,6 +10726,29 @@ inverse_distribution_function: } ; +percentile_function: + inverse_distribution_function_def WITHIN GROUP_SYM '(' + { Select->prepare_add_window_spec(thd); } + order_by_single_element_list ')' + { + $$= $1; + } + | MEDIAN_SYM '(' expr ')' + { + Item *args= new (thd->mem_root) Item_decimal(thd, "0.5", 3, + thd->charset()); + if (($$ == NULL) || (thd->is_error())) + { + MYSQL_YYABORT; + } + if (add_order_to_list(thd, $3,FALSE)) MYSQL_YYABORT; + + $$= new (thd->mem_root) Item_sum_percentile_cont(thd, args); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; + inverse_distribution_function_def: PERCENTILE_CONT_SYM '(' expr ')' { From b77105cab6b97c70a5a61084588805371da6caef Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 26 Oct 2017 23:55:52 +0530 Subject: [PATCH 28/33] Only single element order-by list is allowed for percentile functions --- sql/sql_yacc.yy | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 21ea4d3dbde..2f82b917ef1 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -10765,9 +10765,11 @@ inverse_distribution_function_def: ; order_by_single_element_list: - ORDER_SYM BY order_list + ORDER_SYM BY order_ident order_dir + { if (add_order_to_list(thd, $3,(bool) $4)) MYSQL_YYABORT; } ; + window_name: ident { From 58a6e43513bd9b8ee6cc58ddf3d8aee5fe0eb279 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Thu, 7 Sep 2017 23:51:42 +0530 Subject: [PATCH 29/33] Tests added for percentile and median functions --- mysql-test/r/win_percentile.result | 305 +++++++++++++++++++++++++++++ mysql-test/t/win_percentile.test | 77 ++++++++ sql/item_windowfunc.cc | 2 +- 3 files changed, 383 insertions(+), 1 deletion(-) create mode 100644 mysql-test/r/win_percentile.result create mode 100644 mysql-test/t/win_percentile.test diff --git a/mysql-test/r/win_percentile.result b/mysql-test/r/win_percentile.result new file mode 100644 index 00000000000..b365a995da0 --- /dev/null +++ b/mysql-test/r/win_percentile.result @@ -0,0 +1,305 @@ +CREATE TABLE t1 (name CHAR(10), test double, score DECIMAL(19,4)); +INSERT INTO t1 VALUES +('Chun', 0, 3), ('Chun', 0, 7), +('Kaolin', 0.5, 3), ('Kaolin', 0.6, 7), +('Kaolin', 0.5, 4), +('Tatiana', 0.8, 4), ('Tata', 0.8, 4); +select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; +name c +Chun 5.00000 +Chun 5.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Tatiana 4.00000 +Tata 4.00000 +no partition clause +select name, percentile_disc(0.5) within group(order by score) over () from t1; +name percentile_disc(0.5) within group(order by score) over () +Chun 4.00000 +Chun 4.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Tatiana 4.00000 +Tata 4.00000 +select name, percentile_cont(0.5) within group(order by score) over () from t1; +name percentile_cont(0.5) within group(order by score) over () +Chun 4.00000 +Chun 4.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Tatiana 4.00000 +Tata 4.00000 +argument set to null +select name, percentile_cont(null) within group(order by score) over (partition by name) from t1; +ERROR HY000: Numeric values are only allowed as arguments to percentile functions +select name, percentile_disc(null) within group(order by score) over (partition by name) from t1; +ERROR HY000: Numeric values are only allowed as arguments to percentile functions +subqueries having percentile functions +select * from ( select name , percentile_cont(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; +name percentile_cont(0.5) within group ( order by score) over (partition by name ) +Chun 5.00000 +Chun 5.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Tatiana 4.00000 +Tata 4.00000 +select * from ( select name , percentile_disc(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; +name percentile_disc(0.5) within group ( order by score) over (partition by name ) +Chun 3.00000 +Chun 3.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Tatiana 4.00000 +Tata 4.00000 +select name from t1 a where (select percentile_disc(0.5) within group (order by score) over (partition by name) from t1 b limit 1) >= 0.5; +name +Chun +Chun +Kaolin +Kaolin +Kaolin +Tatiana +Tata +disallowed fields in order by +select score, percentile_cont(0.5) within group(order by name) over (partition by score) from t1; +ERROR HY000: Numeric datatype is required for Percentile_CONT function +select score, percentile_disc(0.5) within group(order by name) over (partition by score) from t1; +score percentile_disc(0.5) within group(order by name) over (partition by score) +3.0000 Chun +7.0000 Chun +3.0000 Chun +7.0000 Chun +4.0000 Tata +4.0000 Tata +4.0000 Tata +order by clause has more than one element +select percentile_disc(0.5) within group(order by score,test) over (partition by name) from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'test) over (partition by name) from t1' at line 1 +select percentile_cont(0.5) within group(order by score,test) over (partition by name) from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'test) over (partition by name) from t1' at line 1 +parameter value should be in the range of [0,1] +select percentile_disc(1.5) within group(order by score) over (partition by name) from t1; +ERROR HY000: Argument to the percentile functions does not belong to the range [0,1] +select percentile_cont(1.5) within group(order by score) over (partition by name) from t1; +ERROR HY000: Argument to the percentile functions does not belong to the range [0,1] +select name,percentile_cont(test) within group(order by score) over (partition by name) from t1; +ERROR HY000: Argument to the percentile functions is not a constant +select name, percentile_disc(test) within group(order by score) over (partition by name) from t1; +ERROR HY000: Argument to the percentile functions is not a constant +only numerical types are allowed as argument to percentile functions +select name, percentile_cont(name) within group(order by score) over (partition by name) from t1; +ERROR HY000: Numeric values are only allowed as arguments to percentile functions +complete query with partition column +select name,cume_dist() over (partition by name order by score), percentile_disc(0.5) within group(order by score) over (partition by name) as c from t1; +name cume_dist() over (partition by name order by score) c +Chun 0.5000000000 3.00000 +Chun 1.0000000000 3.00000 +Kaolin 0.3333333333 4.00000 +Kaolin 1.0000000000 4.00000 +Kaolin 0.6666666667 4.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; +name c +Chun 5.00000 +Chun 5.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Kaolin 4.00000 +Tatiana 4.00000 +Tata 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.1) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 3.00000 +Chun 1.0000000000 3.00000 +Kaolin 0.3333333333 3.00000 +Kaolin 1.0000000000 3.00000 +Kaolin 0.6666666667 3.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.2) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 3.00000 +Chun 1.0000000000 3.00000 +Kaolin 0.3333333333 3.00000 +Kaolin 1.0000000000 3.00000 +Kaolin 0.6666666667 3.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.3) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 3.00000 +Chun 1.0000000000 3.00000 +Kaolin 0.3333333333 3.00000 +Kaolin 1.0000000000 3.00000 +Kaolin 0.6666666667 3.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.4) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 3.00000 +Chun 1.0000000000 3.00000 +Kaolin 0.3333333333 4.00000 +Kaolin 1.0000000000 4.00000 +Kaolin 0.6666666667 4.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.5) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 3.00000 +Chun 1.0000000000 3.00000 +Kaolin 0.3333333333 4.00000 +Kaolin 1.0000000000 4.00000 +Kaolin 0.6666666667 4.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.6) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 7.00000 +Chun 1.0000000000 7.00000 +Kaolin 0.3333333333 4.00000 +Kaolin 1.0000000000 4.00000 +Kaolin 0.6666666667 4.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.7) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 7.00000 +Chun 1.0000000000 7.00000 +Kaolin 0.3333333333 7.00000 +Kaolin 1.0000000000 7.00000 +Kaolin 0.6666666667 7.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.8) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 7.00000 +Chun 1.0000000000 7.00000 +Kaolin 0.3333333333 7.00000 +Kaolin 1.0000000000 7.00000 +Kaolin 0.6666666667 7.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.9) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 7.00000 +Chun 1.0000000000 7.00000 +Kaolin 0.3333333333 7.00000 +Kaolin 1.0000000000 7.00000 +Kaolin 0.6666666667 7.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(1) within group(order by score) over (partition by name) as c from t1; +name b c +Chun 0.5000000000 7.00000 +Chun 1.0000000000 7.00000 +Kaolin 0.3333333333 7.00000 +Kaolin 1.0000000000 7.00000 +Kaolin 0.6666666667 7.00000 +Tatiana 1.0000000000 4.00000 +Tata 1.0000000000 4.00000 +select median(score) over (partition by name), percentile_cont(0) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 3.00000 +5.00000 3.00000 +4.00000 3.00000 +4.00000 3.00000 +4.00000 3.00000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(0.1) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 3.40000 +5.00000 3.40000 +4.00000 3.20000 +4.00000 3.20000 +4.00000 3.20000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(0.2) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 3.80000 +5.00000 3.80000 +4.00000 3.40000 +4.00000 3.40000 +4.00000 3.40000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(0.3) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 4.20000 +5.00000 4.20000 +4.00000 3.60000 +4.00000 3.60000 +4.00000 3.60000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(0.4) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 4.60000 +5.00000 4.60000 +4.00000 3.80000 +4.00000 3.80000 +4.00000 3.80000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 5.00000 +5.00000 5.00000 +4.00000 4.00000 +4.00000 4.00000 +4.00000 4.00000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(0.6) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 5.40000 +5.00000 5.40000 +4.00000 4.60000 +4.00000 4.60000 +4.00000 4.60000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(0.7) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 5.80000 +5.00000 5.80000 +4.00000 5.20000 +4.00000 5.20000 +4.00000 5.20000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(0.8) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 6.20000 +5.00000 6.20000 +4.00000 5.80000 +4.00000 5.80000 +4.00000 5.80000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(0.9) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 6.60000 +5.00000 6.60000 +4.00000 6.40000 +4.00000 6.40000 +4.00000 6.40000 +4.00000 4.00000 +4.00000 4.00000 +select median(score) over (partition by name), percentile_cont(1) within group(order by score) over (partition by name) as c from t1; +median(score) over (partition by name) c +5.00000 7.00000 +5.00000 7.00000 +4.00000 7.00000 +4.00000 7.00000 +4.00000 7.00000 +4.00000 4.00000 +4.00000 4.00000 +drop table t1; diff --git a/mysql-test/t/win_percentile.test b/mysql-test/t/win_percentile.test new file mode 100644 index 00000000000..cab3478241c --- /dev/null +++ b/mysql-test/t/win_percentile.test @@ -0,0 +1,77 @@ +CREATE TABLE t1 (name CHAR(10), test double, score DECIMAL(19,4)); +INSERT INTO t1 VALUES +('Chun', 0, 3), ('Chun', 0, 7), +('Kaolin', 0.5, 3), ('Kaolin', 0.6, 7), +('Kaolin', 0.5, 4), +('Tatiana', 0.8, 4), ('Tata', 0.8, 4); + +select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; + +--echo no partition clause +select name, percentile_disc(0.5) within group(order by score) over () from t1; +select name, percentile_cont(0.5) within group(order by score) over () from t1; + +--echo argument set to null +--error ER_WRONG_TYPE_OF_ARGUMENT +select name, percentile_cont(null) within group(order by score) over (partition by name) from t1; +--error ER_WRONG_TYPE_OF_ARGUMENT +select name, percentile_disc(null) within group(order by score) over (partition by name) from t1; + +--echo subqueries having percentile functions +select * from ( select name , percentile_cont(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; +select * from ( select name , percentile_disc(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; +select name from t1 a where (select percentile_disc(0.5) within group (order by score) over (partition by name) from t1 b limit 1) >= 0.5; + +--echo disallowed fields in order by +--error ER_WRONG_TYPE_FOR_PERCENTILE_CONT +select score, percentile_cont(0.5) within group(order by name) over (partition by score) from t1; +select score, percentile_disc(0.5) within group(order by name) over (partition by score) from t1; + +--echo order by clause has more than one element +--error ER_PARSE_ERROR +select percentile_disc(0.5) within group(order by score,test) over (partition by name) from t1; +--error ER_PARSE_ERROR +select percentile_cont(0.5) within group(order by score,test) over (partition by name) from t1; + +--echo parameter value should be in the range of [0,1] +--error ER_ARGUMENT_OUT_OF_RANGE +select percentile_disc(1.5) within group(order by score) over (partition by name) from t1; +--error ER_ARGUMENT_OUT_OF_RANGE +select percentile_cont(1.5) within group(order by score) over (partition by name) from t1; + +--error ER_ARGUMENT_NOT_CONSTANT +select name,percentile_cont(test) within group(order by score) over (partition by name) from t1; +--error ER_ARGUMENT_NOT_CONSTANT +select name, percentile_disc(test) within group(order by score) over (partition by name) from t1; + +--echo only numerical types are allowed as argument to percentile functions +--error ER_WRONG_TYPE_OF_ARGUMENT +select name, percentile_cont(name) within group(order by score) over (partition by name) from t1; + +--echo complete query with partition column +select name,cume_dist() over (partition by name order by score), percentile_disc(0.5) within group(order by score) over (partition by name) as c from t1; +select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; + +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.1) within group(order by score) over (partition by name) as c from t1; +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.2) within group(order by score) over (partition by name) as c from t1; +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.3) within group(order by score) over (partition by name) as c from t1; +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.4) within group(order by score) over (partition by name) as c from t1; +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.5) within group(order by score) over (partition by name) as c from t1; +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.6) within group(order by score) over (partition by name) as c from t1; +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.7) within group(order by score) over (partition by name) as c from t1; +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.8) within group(order by score) over (partition by name) as c from t1; +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.9) within group(order by score) over (partition by name) as c from t1; +select name,cume_dist() over (partition by name order by score) as b, percentile_disc(1) within group(order by score) over (partition by name) as c from t1; + +select median(score) over (partition by name), percentile_cont(0) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(0.1) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(0.2) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(0.3) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(0.4) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(0.6) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(0.7) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(0.8) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(0.9) within group(order by score) over (partition by name) as c from t1; +select median(score) over (partition by name), percentile_cont(1) within group(order by score) over (partition by name) as c from t1; +drop table t1; diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index f941900646e..8d835eac903 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -176,7 +176,7 @@ bool Item_window_func::check_result_type_of_order_item() Item_result rtype= window_spec->order_list->first->item[0]->cmp_type(); // TODO (varun) : support date type in percentile_cont function if (rtype != REAL_RESULT && rtype != INT_RESULT && - rtype != DECIMAL_RESULT && rtype != TIME_RESULT + rtype != DECIMAL_RESULT && rtype != TIME_RESULT && window_func()->sum_func() == Item_sum::PERCENTILE_CONT_FUNC) { my_error(ER_WRONG_TYPE_FOR_PERCENTILE_CONT, MYF(0)); From 0ef6127c291c1b2ce2650c9ea4213293c8058741 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Fri, 27 Oct 2017 00:10:22 +0530 Subject: [PATCH 30/33] Date-time fields are disabled currently for the result type of percentile function --- sql/item_windowfunc.cc | 5 ++--- sql/share/errmsg-utf8.txt | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index 8d835eac903..d38befa66bc 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -176,10 +176,9 @@ bool Item_window_func::check_result_type_of_order_item() Item_result rtype= window_spec->order_list->first->item[0]->cmp_type(); // TODO (varun) : support date type in percentile_cont function if (rtype != REAL_RESULT && rtype != INT_RESULT && - rtype != DECIMAL_RESULT && rtype != TIME_RESULT && - window_func()->sum_func() == Item_sum::PERCENTILE_CONT_FUNC) + rtype != DECIMAL_RESULT && rtype != TIME_RESULT) { - my_error(ER_WRONG_TYPE_FOR_PERCENTILE_CONT, MYF(0)); + my_error(ER_WRONG_TYPE_FOR_PERCENTILE_FUNC, MYF(0)); return TRUE; } setting_handler_for_percentile_functions(rtype); diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 415d47df36e..22da221a396 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7788,8 +7788,8 @@ ER_UNKNOWN_COMPRESSION_METHOD eng "Unknown compression method: %s" ER_NOT_SINGLE_ELEMENT_ORDER_LIST eng "Incorrect number of elements in the order list for '%s'" -ER_WRONG_TYPE_FOR_PERCENTILE_CONT - eng "Numeric datatype is required for Percentile_CONT function" +ER_WRONG_TYPE_FOR_PERCENTILE_FUNC + eng "Numeric datatype is required for %s function" ER_ARGUMENT_NOT_CONSTANT eng "Argument to the percentile functions is not a constant" ER_ARGUMENT_OUT_OF_RANGE From 40887913ff4bb135d1b11495cd60c7a6f9402b8d Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Fri, 27 Oct 2017 00:16:13 +0530 Subject: [PATCH 31/33] Update the error messages involving percentile functions --- sql/item_windowfunc.h | 8 ++++---- sql/share/errmsg-utf8.txt | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 8b3bd5a26e2..a3056dc222a 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -766,7 +766,7 @@ public: prev_value= arg->val_real(); if (prev_value > 1 || prev_value < 0) { - my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0)); + my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0), func_name()); return true; } first_call= false; @@ -776,7 +776,7 @@ public: if (prev_value != arg_val) { - my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); + my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0), func_name()); return true; } @@ -887,7 +887,7 @@ public: prev_value= arg->val_real(); if (prev_value > 1 || prev_value < 0) { - my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0)); + my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0), func_name()); return true; } } @@ -895,7 +895,7 @@ public: double arg_val= arg->val_real(); if (prev_value != arg_val) { - my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0)); + my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0), func_name()); return true; } diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 22da221a396..edde39a6feb 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7791,8 +7791,8 @@ ER_NOT_SINGLE_ELEMENT_ORDER_LIST ER_WRONG_TYPE_FOR_PERCENTILE_FUNC eng "Numeric datatype is required for %s function" ER_ARGUMENT_NOT_CONSTANT - eng "Argument to the percentile functions is not a constant" + eng "Argument to the %s function is not a constant for a partition" ER_ARGUMENT_OUT_OF_RANGE - eng "Argument to the percentile functions does not belong to the range [0,1]" + eng "Argument to the %s function does not belong to the range [0,1]" ER_WRONG_TYPE_OF_ARGUMENT - eng "Numeric values are only allowed as arguments to percentile functions" \ No newline at end of file + eng "%s function only accepts arguments that can be converted to numerical types" \ No newline at end of file From ab5503c8c5aca9c672db5ceb2c3636b598290a21 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Fri, 27 Oct 2017 20:04:05 +0530 Subject: [PATCH 32/33] Updates the tests for the percentile functions --- mysql-test/r/win_percentile.result | 469 +++++++++++++++-------------- mysql-test/t/percentile.test | 41 --- mysql-test/t/win_percentile.test | 77 +++-- sql/item_windowfunc.cc | 8 +- sql/item_windowfunc.h | 14 +- sql/sql_window.cc | 2 +- 6 files changed, 309 insertions(+), 302 deletions(-) delete mode 100644 mysql-test/t/percentile.test diff --git a/mysql-test/r/win_percentile.result b/mysql-test/r/win_percentile.result index b365a995da0..c51e2e6bd51 100644 --- a/mysql-test/r/win_percentile.result +++ b/mysql-test/r/win_percentile.result @@ -4,58 +4,88 @@ INSERT INTO t1 VALUES ('Kaolin', 0.5, 3), ('Kaolin', 0.6, 7), ('Kaolin', 0.5, 4), ('Tatiana', 0.8, 4), ('Tata', 0.8, 4); +# +# Test invalid syntax +# +# Order by clause has more than one element +select percentile_disc(0.5) within group(order by score,test) over (partition by name) from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'test) over (partition by name) from t1' at line 1 +select percentile_cont(0.5) within group(order by score,test) over (partition by name) from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'test) over (partition by name) from t1' at line 1 +# Order by clause has no element +select percentile_disc(0.5) within group() over (partition by name) from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ') over (partition by name) from t1' at line 1 +select percentile_cont(0.5) within group() over (partition by name) from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ') over (partition by name) from t1' at line 1 +# No parameters to the percentile functions +select percentile_disc() within group() over (partition by name) from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ') within group() over (partition by name) from t1' at line 1 +select percentile_cont() within group() over (partition by name) from t1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ') within group() over (partition by name) from t1' at line 1 +# +# Test simple syntax +# select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; name c -Chun 5.00000 -Chun 5.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Tatiana 4.00000 -Tata 4.00000 -no partition clause +Chun 5.0000000000 +Chun 5.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 +select name, percentile_disc(0.5) within group(order by score) over (partition by name) as c from t1; +name c +Chun 3.0000000000 +Chun 3.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 +# no partition clause select name, percentile_disc(0.5) within group(order by score) over () from t1; name percentile_disc(0.5) within group(order by score) over () -Chun 4.00000 -Chun 4.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Tatiana 4.00000 -Tata 4.00000 +Chun 4.0000000000 +Chun 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 select name, percentile_cont(0.5) within group(order by score) over () from t1; name percentile_cont(0.5) within group(order by score) over () -Chun 4.00000 -Chun 4.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Tatiana 4.00000 -Tata 4.00000 -argument set to null +Chun 4.0000000000 +Chun 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 +# argument set to null select name, percentile_cont(null) within group(order by score) over (partition by name) from t1; -ERROR HY000: Numeric values are only allowed as arguments to percentile functions +ERROR HY000: percentile_cont function only accepts arguments that can be converted to numerical types select name, percentile_disc(null) within group(order by score) over (partition by name) from t1; -ERROR HY000: Numeric values are only allowed as arguments to percentile functions -subqueries having percentile functions +ERROR HY000: percentile_disc function only accepts arguments that can be converted to numerical types +#subqueries having percentile functions select * from ( select name , percentile_cont(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; name percentile_cont(0.5) within group ( order by score) over (partition by name ) -Chun 5.00000 -Chun 5.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Tatiana 4.00000 -Tata 4.00000 +Chun 5.0000000000 +Chun 5.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 select * from ( select name , percentile_disc(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; name percentile_disc(0.5) within group ( order by score) over (partition by name ) -Chun 3.00000 -Chun 3.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Tatiana 4.00000 -Tata 4.00000 +Chun 3.0000000000 +Chun 3.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 select name from t1 a where (select percentile_disc(0.5) within group (order by score) over (partition by name) from t1 b limit 1) >= 0.5; name Chun @@ -65,241 +95,232 @@ Kaolin Kaolin Tatiana Tata -disallowed fields in order by +#disallowed fields in order by select score, percentile_cont(0.5) within group(order by name) over (partition by score) from t1; -ERROR HY000: Numeric datatype is required for Percentile_CONT function +ERROR HY000: Numeric datatype is required for percentile_cont function select score, percentile_disc(0.5) within group(order by name) over (partition by score) from t1; -score percentile_disc(0.5) within group(order by name) over (partition by score) -3.0000 Chun -7.0000 Chun -3.0000 Chun -7.0000 Chun -4.0000 Tata -4.0000 Tata -4.0000 Tata -order by clause has more than one element -select percentile_disc(0.5) within group(order by score,test) over (partition by name) from t1; -ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'test) over (partition by name) from t1' at line 1 -select percentile_cont(0.5) within group(order by score,test) over (partition by name) from t1; -ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'test) over (partition by name) from t1' at line 1 -parameter value should be in the range of [0,1] +ERROR HY000: Numeric datatype is required for percentile_disc function +#parameter value should be in the range of [0,1] select percentile_disc(1.5) within group(order by score) over (partition by name) from t1; -ERROR HY000: Argument to the percentile functions does not belong to the range [0,1] +ERROR HY000: Argument to the percentile_disc function does not belong to the range [0,1] select percentile_cont(1.5) within group(order by score) over (partition by name) from t1; -ERROR HY000: Argument to the percentile functions does not belong to the range [0,1] +ERROR HY000: Argument to the percentile_cont function does not belong to the range [0,1] +#Argument should remain constant for the entire partition select name,percentile_cont(test) within group(order by score) over (partition by name) from t1; -ERROR HY000: Argument to the percentile functions is not a constant +ERROR HY000: Argument to the percentile_cont function is not a constant for a partition select name, percentile_disc(test) within group(order by score) over (partition by name) from t1; -ERROR HY000: Argument to the percentile functions is not a constant -only numerical types are allowed as argument to percentile functions +ERROR HY000: Argument to the percentile_disc function is not a constant for a partition +#only numerical types are allowed as argument to percentile functions select name, percentile_cont(name) within group(order by score) over (partition by name) from t1; -ERROR HY000: Numeric values are only allowed as arguments to percentile functions -complete query with partition column +ERROR HY000: percentile_cont function only accepts arguments that can be converted to numerical types +select name, percentile_disc(name) within group(order by score) over (partition by name) from t1; +ERROR HY000: percentile_disc function only accepts arguments that can be converted to numerical types +#complete query with partition column select name,cume_dist() over (partition by name order by score), percentile_disc(0.5) within group(order by score) over (partition by name) as c from t1; name cume_dist() over (partition by name order by score) c -Chun 0.5000000000 3.00000 -Chun 1.0000000000 3.00000 -Kaolin 0.3333333333 4.00000 -Kaolin 1.0000000000 4.00000 -Kaolin 0.6666666667 4.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 3.0000000000 +Chun 1.0000000000 3.0000000000 +Kaolin 0.3333333333 4.0000000000 +Kaolin 1.0000000000 4.0000000000 +Kaolin 0.6666666667 4.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; name c -Chun 5.00000 -Chun 5.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Kaolin 4.00000 -Tatiana 4.00000 -Tata 4.00000 +Chun 5.0000000000 +Chun 5.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Kaolin 4.0000000000 +Tatiana 4.0000000000 +Tata 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.1) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 3.00000 -Chun 1.0000000000 3.00000 -Kaolin 0.3333333333 3.00000 -Kaolin 1.0000000000 3.00000 -Kaolin 0.6666666667 3.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 3.0000000000 +Chun 1.0000000000 3.0000000000 +Kaolin 0.3333333333 3.0000000000 +Kaolin 1.0000000000 3.0000000000 +Kaolin 0.6666666667 3.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.2) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 3.00000 -Chun 1.0000000000 3.00000 -Kaolin 0.3333333333 3.00000 -Kaolin 1.0000000000 3.00000 -Kaolin 0.6666666667 3.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 3.0000000000 +Chun 1.0000000000 3.0000000000 +Kaolin 0.3333333333 3.0000000000 +Kaolin 1.0000000000 3.0000000000 +Kaolin 0.6666666667 3.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.3) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 3.00000 -Chun 1.0000000000 3.00000 -Kaolin 0.3333333333 3.00000 -Kaolin 1.0000000000 3.00000 -Kaolin 0.6666666667 3.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 3.0000000000 +Chun 1.0000000000 3.0000000000 +Kaolin 0.3333333333 3.0000000000 +Kaolin 1.0000000000 3.0000000000 +Kaolin 0.6666666667 3.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.4) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 3.00000 -Chun 1.0000000000 3.00000 -Kaolin 0.3333333333 4.00000 -Kaolin 1.0000000000 4.00000 -Kaolin 0.6666666667 4.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 3.0000000000 +Chun 1.0000000000 3.0000000000 +Kaolin 0.3333333333 4.0000000000 +Kaolin 1.0000000000 4.0000000000 +Kaolin 0.6666666667 4.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.5) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 3.00000 -Chun 1.0000000000 3.00000 -Kaolin 0.3333333333 4.00000 -Kaolin 1.0000000000 4.00000 -Kaolin 0.6666666667 4.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 3.0000000000 +Chun 1.0000000000 3.0000000000 +Kaolin 0.3333333333 4.0000000000 +Kaolin 1.0000000000 4.0000000000 +Kaolin 0.6666666667 4.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.6) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 7.00000 -Chun 1.0000000000 7.00000 -Kaolin 0.3333333333 4.00000 -Kaolin 1.0000000000 4.00000 -Kaolin 0.6666666667 4.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 7.0000000000 +Chun 1.0000000000 7.0000000000 +Kaolin 0.3333333333 4.0000000000 +Kaolin 1.0000000000 4.0000000000 +Kaolin 0.6666666667 4.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.7) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 7.00000 -Chun 1.0000000000 7.00000 -Kaolin 0.3333333333 7.00000 -Kaolin 1.0000000000 7.00000 -Kaolin 0.6666666667 7.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 7.0000000000 +Chun 1.0000000000 7.0000000000 +Kaolin 0.3333333333 7.0000000000 +Kaolin 1.0000000000 7.0000000000 +Kaolin 0.6666666667 7.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.8) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 7.00000 -Chun 1.0000000000 7.00000 -Kaolin 0.3333333333 7.00000 -Kaolin 1.0000000000 7.00000 -Kaolin 0.6666666667 7.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 7.0000000000 +Chun 1.0000000000 7.0000000000 +Kaolin 0.3333333333 7.0000000000 +Kaolin 1.0000000000 7.0000000000 +Kaolin 0.6666666667 7.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(0.9) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 7.00000 -Chun 1.0000000000 7.00000 -Kaolin 0.3333333333 7.00000 -Kaolin 1.0000000000 7.00000 -Kaolin 0.6666666667 7.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 7.0000000000 +Chun 1.0000000000 7.0000000000 +Kaolin 0.3333333333 7.0000000000 +Kaolin 1.0000000000 7.0000000000 +Kaolin 0.6666666667 7.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select name,cume_dist() over (partition by name order by score) as b, percentile_disc(1) within group(order by score) over (partition by name) as c from t1; name b c -Chun 0.5000000000 7.00000 -Chun 1.0000000000 7.00000 -Kaolin 0.3333333333 7.00000 -Kaolin 1.0000000000 7.00000 -Kaolin 0.6666666667 7.00000 -Tatiana 1.0000000000 4.00000 -Tata 1.0000000000 4.00000 +Chun 0.5000000000 7.0000000000 +Chun 1.0000000000 7.0000000000 +Kaolin 0.3333333333 7.0000000000 +Kaolin 1.0000000000 7.0000000000 +Kaolin 0.6666666667 7.0000000000 +Tatiana 1.0000000000 4.0000000000 +Tata 1.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 3.00000 -5.00000 3.00000 -4.00000 3.00000 -4.00000 3.00000 -4.00000 3.00000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 3.0000000000 +5.0000000000 3.0000000000 +4.0000000000 3.0000000000 +4.0000000000 3.0000000000 +4.0000000000 3.0000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0.1) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 3.40000 -5.00000 3.40000 -4.00000 3.20000 -4.00000 3.20000 -4.00000 3.20000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 3.4000000000 +5.0000000000 3.4000000000 +4.0000000000 3.2000000000 +4.0000000000 3.2000000000 +4.0000000000 3.2000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0.2) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 3.80000 -5.00000 3.80000 -4.00000 3.40000 -4.00000 3.40000 -4.00000 3.40000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 3.8000000000 +5.0000000000 3.8000000000 +4.0000000000 3.4000000000 +4.0000000000 3.4000000000 +4.0000000000 3.4000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0.3) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 4.20000 -5.00000 4.20000 -4.00000 3.60000 -4.00000 3.60000 -4.00000 3.60000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 4.2000000000 +5.0000000000 4.2000000000 +4.0000000000 3.6000000000 +4.0000000000 3.6000000000 +4.0000000000 3.6000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0.4) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 4.60000 -5.00000 4.60000 -4.00000 3.80000 -4.00000 3.80000 -4.00000 3.80000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 4.6000000000 +5.0000000000 4.6000000000 +4.0000000000 3.8000000000 +4.0000000000 3.8000000000 +4.0000000000 3.8000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 5.00000 -5.00000 5.00000 -4.00000 4.00000 -4.00000 4.00000 -4.00000 4.00000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 5.0000000000 +5.0000000000 5.0000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0.6) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 5.40000 -5.00000 5.40000 -4.00000 4.60000 -4.00000 4.60000 -4.00000 4.60000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 5.4000000000 +5.0000000000 5.4000000000 +4.0000000000 4.6000000000 +4.0000000000 4.6000000000 +4.0000000000 4.6000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0.7) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 5.80000 -5.00000 5.80000 -4.00000 5.20000 -4.00000 5.20000 -4.00000 5.20000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 5.8000000000 +5.0000000000 5.8000000000 +4.0000000000 5.2000000000 +4.0000000000 5.2000000000 +4.0000000000 5.2000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0.8) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 6.20000 -5.00000 6.20000 -4.00000 5.80000 -4.00000 5.80000 -4.00000 5.80000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 6.2000000000 +5.0000000000 6.2000000000 +4.0000000000 5.8000000000 +4.0000000000 5.8000000000 +4.0000000000 5.8000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(0.9) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 6.60000 -5.00000 6.60000 -4.00000 6.40000 -4.00000 6.40000 -4.00000 6.40000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 6.6000000000 +5.0000000000 6.6000000000 +4.0000000000 6.4000000000 +4.0000000000 6.4000000000 +4.0000000000 6.4000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 select median(score) over (partition by name), percentile_cont(1) within group(order by score) over (partition by name) as c from t1; median(score) over (partition by name) c -5.00000 7.00000 -5.00000 7.00000 -4.00000 7.00000 -4.00000 7.00000 -4.00000 7.00000 -4.00000 4.00000 -4.00000 4.00000 +5.0000000000 7.0000000000 +5.0000000000 7.0000000000 +4.0000000000 7.0000000000 +4.0000000000 7.0000000000 +4.0000000000 7.0000000000 +4.0000000000 4.0000000000 +4.0000000000 4.0000000000 drop table t1; diff --git a/mysql-test/t/percentile.test b/mysql-test/t/percentile.test deleted file mode 100644 index 0958fc05e7d..00000000000 --- a/mysql-test/t/percentile.test +++ /dev/null @@ -1,41 +0,0 @@ -CREATE TABLE student (name CHAR(10), test double, score TINYINT); - -INSERT INTO student VALUES -('Chun', 0, null), ('Chun', 0, 4), -('Esben', 1, null), ('Esben', 1, null), -('Kaolin', 0.5, 56), ('Kaolin', 0.5, 88), -('Tatiana', 0.8, 2), ('Tatiana', 0.8, 1); - - - -select name, percentile_disc(0.6) within group(order by score) over (partition by name) from student; -select name, percentile_disc(test) within group(order by score) over (partition by name) from student; -select name, percentile_disc(0.4) within group(order by score) over (partition by name) from student; - - -#select name, percentile_cont(null) within group(order by score) over (partition by name) from student; -#select name, cume_dist() over (partition by name order by score) from student; - - -#normal parsing -#select percentile_cont(0.5) within group(order by score) over w1 from student -#window w1 AS (partition by name); - -# no partition clause -#select percentile_cont(0.5) within group(order by score) over () from student; - - -# only one sort allowed -#select percentile_cont(0.5) within group(order by score) over (partition by name); - -#parameter value should be in the range of 0 to 1 -#select percentile_cont(1.5) within group(order by score) over (partition by name); - - -# -#select rank() over (partition by name order by score ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) from student; - - - -drop table student; - diff --git a/mysql-test/t/win_percentile.test b/mysql-test/t/win_percentile.test index cab3478241c..468d8cff56b 100644 --- a/mysql-test/t/win_percentile.test +++ b/mysql-test/t/win_percentile.test @@ -5,50 +5,77 @@ INSERT INTO t1 VALUES ('Kaolin', 0.5, 4), ('Tatiana', 0.8, 4), ('Tata', 0.8, 4); -select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; +--echo # +--echo # Test invalid syntax +--echo # ---echo no partition clause -select name, percentile_disc(0.5) within group(order by score) over () from t1; -select name, percentile_cont(0.5) within group(order by score) over () from t1; - ---echo argument set to null ---error ER_WRONG_TYPE_OF_ARGUMENT -select name, percentile_cont(null) within group(order by score) over (partition by name) from t1; ---error ER_WRONG_TYPE_OF_ARGUMENT -select name, percentile_disc(null) within group(order by score) over (partition by name) from t1; - ---echo subqueries having percentile functions -select * from ( select name , percentile_cont(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; -select * from ( select name , percentile_disc(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; -select name from t1 a where (select percentile_disc(0.5) within group (order by score) over (partition by name) from t1 b limit 1) >= 0.5; - ---echo disallowed fields in order by ---error ER_WRONG_TYPE_FOR_PERCENTILE_CONT -select score, percentile_cont(0.5) within group(order by name) over (partition by score) from t1; -select score, percentile_disc(0.5) within group(order by name) over (partition by score) from t1; - ---echo order by clause has more than one element +--echo # Order by clause has more than one element --error ER_PARSE_ERROR select percentile_disc(0.5) within group(order by score,test) over (partition by name) from t1; --error ER_PARSE_ERROR select percentile_cont(0.5) within group(order by score,test) over (partition by name) from t1; ---echo parameter value should be in the range of [0,1] +--echo # Order by clause has no element +--error ER_PARSE_ERROR +select percentile_disc(0.5) within group() over (partition by name) from t1; +--error ER_PARSE_ERROR +select percentile_cont(0.5) within group() over (partition by name) from t1; + +--echo # No parameters to the percentile functions +--error ER_PARSE_ERROR +select percentile_disc() within group() over (partition by name) from t1; +--error ER_PARSE_ERROR +select percentile_cont() within group() over (partition by name) from t1; + + + +--echo # +--echo # Test simple syntax +--echo # + +select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; +select name, percentile_disc(0.5) within group(order by score) over (partition by name) as c from t1; + +--echo # no partition clause +select name, percentile_disc(0.5) within group(order by score) over () from t1; +select name, percentile_cont(0.5) within group(order by score) over () from t1; + +--echo # argument set to null +--error ER_WRONG_TYPE_OF_ARGUMENT +select name, percentile_cont(null) within group(order by score) over (partition by name) from t1; +--error ER_WRONG_TYPE_OF_ARGUMENT +select name, percentile_disc(null) within group(order by score) over (partition by name) from t1; + +--echo #subqueries having percentile functions +select * from ( select name , percentile_cont(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; +select * from ( select name , percentile_disc(0.5) within group ( order by score) over (partition by name ) from t1 ) as t; +select name from t1 a where (select percentile_disc(0.5) within group (order by score) over (partition by name) from t1 b limit 1) >= 0.5; + +--echo #disallowed fields in order by +--error ER_WRONG_TYPE_FOR_PERCENTILE_FUNC +select score, percentile_cont(0.5) within group(order by name) over (partition by score) from t1; +--error ER_WRONG_TYPE_FOR_PERCENTILE_FUNC +select score, percentile_disc(0.5) within group(order by name) over (partition by score) from t1; + +--echo #parameter value should be in the range of [0,1] --error ER_ARGUMENT_OUT_OF_RANGE select percentile_disc(1.5) within group(order by score) over (partition by name) from t1; --error ER_ARGUMENT_OUT_OF_RANGE select percentile_cont(1.5) within group(order by score) over (partition by name) from t1; +--echo #Argument should remain constant for the entire partition --error ER_ARGUMENT_NOT_CONSTANT select name,percentile_cont(test) within group(order by score) over (partition by name) from t1; --error ER_ARGUMENT_NOT_CONSTANT select name, percentile_disc(test) within group(order by score) over (partition by name) from t1; ---echo only numerical types are allowed as argument to percentile functions +--echo #only numerical types are allowed as argument to percentile functions --error ER_WRONG_TYPE_OF_ARGUMENT select name, percentile_cont(name) within group(order by score) over (partition by name) from t1; +--error ER_WRONG_TYPE_OF_ARGUMENT +select name, percentile_disc(name) within group(order by score) over (partition by name) from t1; ---echo complete query with partition column +--echo #complete query with partition column select name,cume_dist() over (partition by name order by score), percentile_disc(0.5) within group(order by score) over (partition by name) as c from t1; select name, percentile_cont(0.5) within group(order by score) over (partition by name) as c from t1; diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc index d38befa66bc..8432ab43ad8 100644 --- a/sql/item_windowfunc.cc +++ b/sql/item_windowfunc.cc @@ -174,11 +174,11 @@ bool Item_window_func::check_result_type_of_order_item() if (only_single_element_order_list()) { Item_result rtype= window_spec->order_list->first->item[0]->cmp_type(); - // TODO (varun) : support date type in percentile_cont function + // TODO (varun) : support date type in percentile_cont function if (rtype != REAL_RESULT && rtype != INT_RESULT && rtype != DECIMAL_RESULT && rtype != TIME_RESULT) { - my_error(ER_WRONG_TYPE_FOR_PERCENTILE_FUNC, MYF(0)); + my_error(ER_WRONG_TYPE_FOR_PERCENTILE_FUNC, MYF(0), window_func()->func_name()); return TRUE; } setting_handler_for_percentile_functions(rtype); @@ -249,7 +249,7 @@ bool Item_sum_percentile_cont::fix_fields(THD *thd, Item **ref) case INT_RESULT: break; default: - my_error(ER_WRONG_TYPE_OF_ARGUMENT, MYF(0)); + my_error(ER_WRONG_TYPE_OF_ARGUMENT, MYF(0), func_name()); return TRUE; } return res; @@ -268,7 +268,7 @@ bool Item_sum_percentile_disc::fix_fields(THD *thd, Item **ref) case INT_RESULT: break; default: - my_error(ER_WRONG_TYPE_OF_ARGUMENT, MYF(0)); + my_error(ER_WRONG_TYPE_OF_ARGUMENT, MYF(0), func_name()); return TRUE; } return res; diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index a3056dc222a..849c298f5aa 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -786,7 +786,7 @@ public: value->store(order_item); value->cache_value(); if (value->null_value) - return false; + return false; Item_sum_cume_dist::add(); double val= Item_sum_cume_dist::val_real(); @@ -821,7 +821,7 @@ public: void fix_length_and_dec() { - decimals = 5; // TODO-cvicentiu find out how many decimals the standard + decimals = 10; // TODO-cvicentiu find out how many decimals the standard // requires. } @@ -904,24 +904,24 @@ public: floor_value->store(order_item); floor_value->cache_value(); if (floor_value->null_value) - return false; + return false; } if (floor_val_calculated && !ceil_val_calculated) { ceil_value->store(order_item); ceil_value->cache_value(); if (ceil_value->null_value) - return false; + return false; } Item_sum_cume_dist::add(); double val= 1 + prev_value * (get_row_count()-1); if (!floor_val_calculated && get_row_number() == floor(val)) - floor_val_calculated= true; + floor_val_calculated= true; if (!ceil_val_calculated && get_row_number() == ceil(val)) - ceil_val_calculated= true; + ceil_val_calculated= true; return false; } @@ -951,7 +951,7 @@ public: void fix_length_and_dec() { - decimals = 5; // TODO-cvicentiu find out how many decimals the standard + decimals = 10; // TODO-cvicentiu find out how many decimals the standard // requires. } diff --git a/sql/sql_window.cc b/sql/sql_window.cc index 059dd8073f5..4bcdca3ca11 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -1765,7 +1765,7 @@ protected: } }; -class Frame_unbounded_following_set_count_no_nulls: +class Frame_unbounded_following_set_count_no_nulls: public Frame_unbounded_following_set_count { From a607e4e7aa2d2fb5d9eeee35ba06d051bbcc35f6 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Sat, 28 Oct 2017 22:28:31 +0300 Subject: [PATCH 33/33] Added the syntax for percentile functions and median function to the sql_yacc_ora.yy file --- sql/sql_yacc_ora.yy | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/sql/sql_yacc_ora.yy b/sql/sql_yacc_ora.yy index e04c7ee60b3..a50a4ad8ccb 100644 --- a/sql/sql_yacc_ora.yy +++ b/sql/sql_yacc_ora.yy @@ -645,6 +645,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token MAX_STATEMENT_TIME_SYM %token MAX_USER_CONNECTIONS_SYM %token MAXVALUE_SYM /* SQL-2003-N */ +%token MEDIAN_SYM %token MEDIUMBLOB %token MEDIUMINT %token MEDIUMTEXT @@ -1151,6 +1152,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); window_func simple_window_func inverse_distribution_function + percentile_function inverse_distribution_function_def explicit_cursor_attr function_call_keyword @@ -10723,13 +10725,9 @@ simple_window_func: MYSQL_YYABORT; } ; - inverse_distribution_function: - inverse_distribution_function_def WITHIN GROUP_SYM - '(' - { Select->prepare_add_window_spec(thd); } - order_by_single_element_list ')' OVER_SYM - '(' opt_window_ref opt_window_partition_clause ')' + percentile_function OVER_SYM + '(' opt_window_partition_clause ')' { LEX *lex= Lex; if (Select->add_window_spec(thd, lex->win_ref, @@ -10746,6 +10744,29 @@ inverse_distribution_function: } ; +percentile_function: + inverse_distribution_function_def WITHIN GROUP_SYM '(' + { Select->prepare_add_window_spec(thd); } + order_by_single_element_list ')' + { + $$= $1; + } + | MEDIAN_SYM '(' expr ')' + { + Item *args= new (thd->mem_root) Item_decimal(thd, "0.5", 3, + thd->charset()); + if (($$ == NULL) || (thd->is_error())) + { + MYSQL_YYABORT; + } + if (add_order_to_list(thd, $3,FALSE)) MYSQL_YYABORT; + + $$= new (thd->mem_root) Item_sum_percentile_cont(thd, args); + if ($$ == NULL) + MYSQL_YYABORT; + } + ; + inverse_distribution_function_def: PERCENTILE_CONT_SYM '(' expr ')' { @@ -10762,7 +10783,8 @@ inverse_distribution_function_def: ; order_by_single_element_list: - ORDER_SYM BY order_list + ORDER_SYM BY order_ident order_dir + { if (add_order_to_list(thd, $3,(bool) $4)) MYSQL_YYABORT; } ; window_name: