Bug#16447483: PARTITION PRUNING IS NOT CORRECT FOR RANGE COLUMNS

The problem was in get_partition_id_cols_range_for_endpoint and cmp_rec_and_tuple_prune, which stepped one partition too long. Solution was to move a small portion of logic to cmp_rec_and_tuple_prune, to simplify both get_partition_id_cols_range_for_endpoint and get_partition_id_cols_list_for_endpoint.
2013-05-16 11:02:39 +02:00 · 2013-05-16 11:02:39 +02:00 · 23c5840d52
commit 23c5840d52
parent 2ee012b2e8
1 changed files with 206 additions and 141 deletions
--- a/sql/sql_partition.cc
+++ b/sql/sql_partition.cc
@ -173,7 +173,8 @@ int get_part_iter_for_interval_via_walking(partition_info *part_info,
 static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec);
 static int cmp_rec_and_tuple_prune(part_column_list_val *val,
                                   uint32 n_vals_in_rec,
-                                   bool tail_is_min);
+                                   bool is_left_endpoint,
+                                   bool include_endpoint);

 /*
  Convert constants in VALUES definition to the character set the
@ -3293,44 +3294,6 @@ notfound:
 }


-/*
-  Find the sub-array part_info->list_array that corresponds to given interval
-
-  SYNOPSIS 
-    get_list_array_idx_for_endpoint()
-      part_info         Partitioning info (partitioning type must be LIST)
-      left_endpoint     TRUE  - the interval is [a; +inf) or (a; +inf)
-                        FALSE - the interval is (-inf; a] or (-inf; a)
-      include_endpoint  TRUE iff the interval includes the endpoint
-
-  DESCRIPTION
-    This function finds the sub-array of part_info->list_array where values of
-    list_array[idx].list_value are contained within the specifed interval.
-    list_array is ordered by list_value, so
-    1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the 
-       sought sub-array starts at some index idx and continues till array end.
-       The function returns first number idx, such that 
-       list_array[idx].list_value is contained within the passed interval.
-       
-    2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
-       sought sub-array starts at array start and continues till some last 
-       index idx.
-       The function returns first number idx, such that 
-       list_array[idx].list_value is NOT contained within the passed interval.
-       If all array elements are contained, part_info->num_list_values is
-       returned.
-
-  NOTE
-    The caller will call this function and then will run along the sub-array of
-    list_array to collect partition ids. If the number of list values is 
-    significantly higher then number of partitions, this could be slow and
-    we could invent some other approach. The "run over list array" part is
-    already wrapped in a get_next()-like function.
-
-  RETURN
-    The edge of corresponding sub-array of part_info->list_array
-*/
-
 uint32 get_partition_id_cols_list_for_endpoint(partition_info *part_info,
                                               bool left_endpoint,
                                               bool include_endpoint,
@ -3338,37 +3301,81 @@ uint32 get_partition_id_cols_list_for_endpoint(partition_info *part_info,
 {
  part_column_list_val *list_col_array= part_info->list_col_array;
  uint num_columns= part_info->part_field_list.elements;
-  int list_index, cmp;
+  uint list_index;
  uint min_list_index= 0;
-  uint max_list_index= part_info->num_list_values - 1;
-  bool tailf= !(left_endpoint ^ include_endpoint);
+  uint max_list_index= part_info->num_list_values;
  DBUG_ENTER("get_partition_id_cols_list_for_endpoint");

+  /* Find the matching partition (including taking endpoint into account). */
  do
  {
+    /* Midpoint, adjusted down, so it can never be > last index. */
    list_index= (max_list_index + min_list_index) >> 1;
-    cmp= cmp_rec_and_tuple_prune(list_col_array + list_index*num_columns,
-                                 nparts, tailf);
-    if (cmp > 0)
+    if (cmp_rec_and_tuple_prune(list_col_array + list_index*num_columns,
+                                nparts, left_endpoint, include_endpoint) > 0)
      min_list_index= list_index + 1;
-    else if (cmp < 0)
-    {
-      if (!list_index)
-        goto notfound;
-      max_list_index= list_index - 1;
-    }
-    else 
-    {
-      DBUG_RETURN(list_index + test(!tailf));
-    }
-  } while (max_list_index >= min_list_index);
-  if (cmp > 0)
-    list_index++;
-notfound:
+    else
+      max_list_index= list_index;
+  } while (max_list_index > min_list_index);
+  list_index= max_list_index;
+
+  /* Given value must be LESS THAN or EQUAL to the found partition. */
+  DBUG_ASSERT(list_index == part_info->num_list_values ||
+              (0 >= cmp_rec_and_tuple_prune(list_col_array +
+                                              list_index*num_columns,
+                                            nparts, left_endpoint,
+                                            include_endpoint)));
+  /* Given value must be GREATER THAN the previous partition. */
+  DBUG_ASSERT(list_index == 0 ||
+              (0 < cmp_rec_and_tuple_prune(list_col_array +
+                                            (list_index - 1)*num_columns,
+                                           nparts, left_endpoint,
+                                           include_endpoint)));
+
+  if (!left_endpoint)
+  {
+    /* Set the end after this list tuple if not already after the last. */
+    if (list_index < part_info->num_parts)
+      list_index++;
+  }
+
  DBUG_RETURN(list_index);
 }


+/**
+  Find the sub-array part_info->list_array that corresponds to given interval.
+
+  @param part_info         Partitioning info (partitioning type must be LIST)
+  @param left_endpoint     TRUE  - the interval is [a; +inf) or (a; +inf)
+                           FALSE - the interval is (-inf; a] or (-inf; a)
+  @param include_endpoint  TRUE iff the interval includes the endpoint
+
+  This function finds the sub-array of part_info->list_array where values of
+  list_array[idx].list_value are contained within the specifed interval.
+  list_array is ordered by list_value, so
+  1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the
+     sought sub-array starts at some index idx and continues till array end.
+     The function returns first number idx, such that
+     list_array[idx].list_value is contained within the passed interval.
+
+  2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
+     sought sub-array starts at array start and continues till some last
+     index idx.
+     The function returns first number idx, such that
+     list_array[idx].list_value is NOT contained within the passed interval.
+     If all array elements are contained, part_info->num_list_values is
+     returned.
+
+  @note The caller will call this function and then will run along the
+  sub-array of list_array to collect partition ids. If the number of list
+  values is significantly higher then number of partitions, this could be slow
+  and we could invent some other approach. The "run over list array" part is
+  already wrapped in a get_next()-like function.
+
+  @return The index of corresponding sub-array of part_info->list_array.
+*/
+
 uint32 get_list_array_idx_for_endpoint_charset(partition_info *part_info,
                                               bool left_endpoint,
                                               bool include_endpoint)
@ -7414,15 +7421,17 @@ uint32 store_tuple_to_record(Field **pfield,
  return nparts;
 }

-/*
-  RANGE(columns) partitioning: compare value bound and probe tuple.
+/**
+  RANGE(columns) partitioning: compare partition value bound and probe tuple.

-  The value bound always is a full tuple (but may include the MAXVALUE
-  special value).
+  @param val           Partition column values.
+  @param nvals_in_rec  Number of (prefix) fields to compare.

-  The probe tuple may be a prefix of partitioning tuple. The tail_is_min
-  parameter specifies whether the suffix components should be assumed to
-  hold MAXVALUE
+  @return Less than/Equal to/Greater than 0 if the record is L/E/G than val.
+
+  @note The partition value bound is always a full tuple (but may include the
+  MAXVALUE special value). The probe tuple may be a prefix of partitioning
+  tuple.
 */

 static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec)
@ -7452,25 +7461,73 @@ static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec)
 }


+/**
+  Compare record and columns partition tuple including endpoint handling.
+
+  @param  val               Columns partition tuple
+  @param  n_vals_in_rec     Number of columns to compare
+  @param  is_left_endpoint  True if left endpoint (part_tuple < rec or
+                            part_tuple <= rec)
+  @param  include_endpoint  If endpoint is included (part_tuple <= rec or
+                            rec <= part_tuple)
+
+  @return Less than/Equal to/Greater than 0 if the record is L/E/G than
+  the partition tuple.
+
+  @see get_list_array_idx_for_endpoint() and
+  get_partition_id_range_for_endpoint().
+*/
+
 static int cmp_rec_and_tuple_prune(part_column_list_val *val,
                                   uint32 n_vals_in_rec,
-                                   bool tail_is_min)
+                                   bool is_left_endpoint,
+                                   bool include_endpoint)
 {
  int cmp;
  Field **field;
-  partition_info *part_info;
  if ((cmp= cmp_rec_and_tuple(val, n_vals_in_rec)))
    return cmp;
-  part_info= val->part_info;
-  field= part_info->part_field_array + n_vals_in_rec;
-  for (; *field; field++, val++)
+  field= val->part_info->part_field_array + n_vals_in_rec;
+  if (!(*field))
  {
-    if (tail_is_min)
-      return -1;
-    if (!tail_is_min && !val->max_value)
-      return +1;
+    /*
+      Full match, if right endpoint and not including the endpoint,
+      (rec < part) return lesser.
+    */
+    if (!is_left_endpoint && !include_endpoint)
+      return -4;
+
+    /* Otherwise they are equal! */
+    return 0;
  }
-  return 0;
+  /*
+    The prefix is equal and there are more partition columns to compare.
+
+    If including left endpoint or not including right endpoint
+    then the record is considered lesser compared to the partition.
+
+    i.e:
+    part(10, x) <= rec(10, unknown) and rec(10, unknown) < part(10, x)
+    part <= rec -> lesser (i.e. this or previous partitions)
+    rec < part -> lesser (i.e. this or previous partitions)
+  */
+  if (is_left_endpoint == include_endpoint)
+    return -2;
+
+  /*
+    If right endpoint and the first additional partition value
+    is MAXVALUE, then the record is lesser.
+  */
+  if (!is_left_endpoint && (val + n_vals_in_rec)->max_value)
+    return -3;
+
+  /*
+    Otherwise the record is considered greater.
+
+    rec <= part -> greater (i.e. does not match this partition, seek higher).
+    part < rec -> greater (i.e. does not match this partition, seek higher).
+  */
+  return 2;
 }


@ -7481,91 +7538,65 @@ typedef uint32 (*get_col_endpoint_func)(partition_info*, bool left_endpoint,
                                        bool include_endpoint,
                                        uint32 num_parts);

-/*
-  Partitioning Interval Analysis: Initialize the iterator for "mapping" case
+/**
+  Get partition for RANGE COLUMNS endpoint.

-  SYNOPSIS
-    get_part_iter_for_interval_via_mapping()
-      part_info   Partition info
-      is_subpart  TRUE  - act for subpartitioning
-                  FALSE - act for partitioning
-      min_value   minimum field value, in opt_range key format.
-      max_value   minimum field value, in opt_range key format.
-      flags       Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE,
-                  NO_MAX_RANGE.
-      part_iter   Iterator structure to be initialized
+  @param part_info         Partitioning metadata.
+  @param is_left_endpoint     True if left endpoint (const <=/< cols)
+  @param include_endpoint  True if range includes the endpoint (<=/>=)
+  @param nparts            Total number of partitions

-  DESCRIPTION
-    Initialize partition set iterator to walk over the interval in
-    ordered-array-of-partitions (for RANGE partitioning) or 
-    ordered-array-of-list-constants (for LIST partitioning) space.
+  @return Partition id of matching partition.

-  IMPLEMENTATION
-    This function is used when partitioning is done by
-    <RANGE|LIST>(ascending_func(t.field)), and we can map an interval in
-    t.field space into a sub-array of partition_info::range_int_array or
-    partition_info::list_array (see get_partition_id_range_for_endpoint,
-    get_list_array_idx_for_endpoint for details).
-    
-    The function performs this interval mapping, and sets the iterator to
-    traverse the sub-array and return appropriate partitions.
-    
-  RETURN
-    0 - No matching partitions (iterator not initialized)
-    1 - Ok, iterator intialized for traversal of matching partitions.
-   -1 - All partitions would match (iterator not initialized)
+  @see get_partition_id_cols_list_for_endpoint and
+  get_partition_id_range_for_endpoint.
 */

 uint32 get_partition_id_cols_range_for_endpoint(partition_info *part_info,
-                                                bool left_endpoint,
+                                                bool is_left_endpoint,
                                                bool include_endpoint,
                                                uint32 nparts)
 {
-  uint max_partition= part_info->num_parts - 1;
-  uint min_part_id= 0, max_part_id= max_partition, loc_part_id;
+  uint min_part_id= 0, max_part_id= part_info->num_parts, loc_part_id;
  part_column_list_val *range_col_array= part_info->range_col_array;
  uint num_columns= part_info->part_field_list.elements;
-  bool tailf= !(left_endpoint ^ include_endpoint);
  DBUG_ENTER("get_partition_id_cols_range_for_endpoint");

-  /* Get the partitioning function value for the endpoint */
-  while (max_part_id > min_part_id)
+  /* Find the matching partition (including taking endpoint into account). */
+  do
  {
-    loc_part_id= (max_part_id + min_part_id + 1) >> 1;
-    if (cmp_rec_and_tuple_prune(range_col_array + loc_part_id*num_columns,
-                                nparts, tailf) >= 0)
+    /* Midpoint, adjusted down, so it can never be > last partition. */
+    loc_part_id= (max_part_id + min_part_id) >> 1;
+    if (0 <= cmp_rec_and_tuple_prune(range_col_array +
+                                       loc_part_id * num_columns,
+                                     nparts,
+                                     is_left_endpoint,
+                                     include_endpoint))
      min_part_id= loc_part_id + 1;
    else
-      max_part_id= loc_part_id - 1;
-  }
+      max_part_id= loc_part_id;
+  } while (max_part_id > min_part_id);
  loc_part_id= max_part_id;
-  if (loc_part_id < max_partition && 
-      cmp_rec_and_tuple_prune(range_col_array + (loc_part_id+1)*num_columns,
-                              nparts, tailf) >= 0
-      )
+
+  /* Given value must be LESS THAN the found partition. */
+  DBUG_ASSERT(loc_part_id == part_info->num_parts ||
+              (0 > cmp_rec_and_tuple_prune(range_col_array +
+                                             loc_part_id * num_columns,
+                                           nparts, is_left_endpoint,
+                                           include_endpoint)));
+  /* Given value must be GREATER THAN or EQUAL to the previous partition. */
+  DBUG_ASSERT(loc_part_id == 0 ||
+              (0 <= cmp_rec_and_tuple_prune(range_col_array +
+                                              (loc_part_id - 1) * num_columns,
+                                            nparts, is_left_endpoint,
+                                            include_endpoint)));
+
+  if (!is_left_endpoint)
  {
-     loc_part_id++;
-  }
-  if (left_endpoint)
-  {
-    if (cmp_rec_and_tuple_prune(range_col_array + loc_part_id*num_columns,
-                                nparts, tailf) >= 0)
+    /* Set the end after this partition if not already after the last. */
+    if (loc_part_id < part_info->num_parts)
      loc_part_id++;
  }
-  else 
-  {
-    if (loc_part_id < max_partition)
-    {
-      int res= cmp_rec_and_tuple_prune(range_col_array +
-                                       loc_part_id * num_columns,
-                                       nparts, tailf);
-      if (!res)
-        loc_part_id += test(include_endpoint);
-      else if (res > 0)
-        loc_part_id++;
-    }
-    loc_part_id++;
-  }
  DBUG_RETURN(loc_part_id);
 }

@ -7637,6 +7668,40 @@ int get_part_iter_for_interval_cols_via_map(partition_info *part_info,
 }


+/**
+  Partitioning Interval Analysis: Initialize the iterator for "mapping" case
+
+  @param part_info   Partition info
+  @param is_subpart  TRUE  - act for subpartitioning
+                     FALSE - act for partitioning
+  @param store_length_array  Ignored.
+  @param min_value   minimum field value, in opt_range key format.
+  @param max_value   minimum field value, in opt_range key format.
+  @param min_len     Ignored.
+  @param max_len     Ignored.
+  @param flags       Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE,
+                     NO_MAX_RANGE.
+  @param part_iter   Iterator structure to be initialized
+
+  @details Initialize partition set iterator to walk over the interval in
+  ordered-array-of-partitions (for RANGE partitioning) or
+  ordered-array-of-list-constants (for LIST partitioning) space.
+
+  This function is used when partitioning is done by
+  <RANGE|LIST>(ascending_func(t.field)), and we can map an interval in
+  t.field space into a sub-array of partition_info::range_int_array or
+  partition_info::list_array (see get_partition_id_range_for_endpoint,
+  get_list_array_idx_for_endpoint for details).
+
+  The function performs this interval mapping, and sets the iterator to
+  traverse the sub-array and return appropriate partitions.
+
+  @return Status of iterator
+    @retval 0   No matching partitions (iterator not initialized)
+    @retval 1   Ok, iterator intialized for traversal of matching partitions.
+    @retval -1  All partitions would match (iterator not initialized)
+*/
+
 int get_part_iter_for_interval_via_mapping(partition_info *part_info,
                                           bool is_subpart,
                                           uint32 *store_length_array, /* ignored */