Bug#16447483: PARTITION PRUNING IS NOT CORRECT FOR RANGE COLUMNS

The problem was in get_partition_id_cols_range_for_endpoint and cmp_rec_and_tuple_prune, which stepped one partition too long. Solution was to move a small portion of logic to cmp_rec_and_tuple_prune, to simplify both get_partition_id_cols_range_for_endpoint and get_partition_id_cols_list_for_endpoint.
2013-05-16 11:02:39 +02:00 · 2013-05-16 11:02:39 +02:00 · 23c5840d52
commit 23c5840d52
parent 2ee012b2e8
1 changed files with 206 additions and 141 deletions
--- a/sql/sql_partition.cc
+++ b/sql/sql_partition.cc
@ -173,7 +173,8 @@ int get_part_iter_for_interval_via_walking(partition_info *part_info,
 static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec);
 static int cmp_rec_and_tuple_prune(part_column_list_val *val,
                                   uint32 n_vals_in_rec,
-                                   bool tail_is_min);
+                                   bool is_left_endpoint,
                                   bool include_endpoint);
 /*
  Convert constants in VALUES definition to the character set the
@ -3293,44 +3294,6 @@ notfound:
 }
 /*
  Find the sub-array part_info->list_array that corresponds to given interval
  SYNOPSIS 
    get_list_array_idx_for_endpoint()
      part_info         Partitioning info (partitioning type must be LIST)
      left_endpoint     TRUE  - the interval is [a; +inf) or (a; +inf)
                        FALSE - the interval is (-inf; a] or (-inf; a)
      include_endpoint  TRUE iff the interval includes the endpoint
  DESCRIPTION
    This function finds the sub-array of part_info->list_array where values of
    list_array[idx].list_value are contained within the specifed interval.
    list_array is ordered by list_value, so
    1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the 
       sought sub-array starts at some index idx and continues till array end.
       The function returns first number idx, such that 
       list_array[idx].list_value is contained within the passed interval.
    2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
       sought sub-array starts at array start and continues till some last 
       index idx.
       The function returns first number idx, such that 
       list_array[idx].list_value is NOT contained within the passed interval.
       If all array elements are contained, part_info->num_list_values is
       returned.
  NOTE
    The caller will call this function and then will run along the sub-array of
    list_array to collect partition ids. If the number of list values is 
    significantly higher then number of partitions, this could be slow and
    we could invent some other approach. The "run over list array" part is
    already wrapped in a get_next()-like function.
  RETURN
    The edge of corresponding sub-array of part_info->list_array
 */
 uint32 get_partition_id_cols_list_for_endpoint(partition_info *part_info,
                                               bool left_endpoint,
                                               bool include_endpoint,
@ -3338,37 +3301,81 @@ uint32 get_partition_id_cols_list_for_endpoint(partition_info *part_info,
 {
  part_column_list_val *list_col_array= part_info->list_col_array;
  uint num_columns= part_info->part_field_list.elements;
-  int list_index, cmp;
+  uint list_index;
  uint min_list_index= 0;
-  uint max_list_index= part_info->num_list_values - 1;
+  uint max_list_index= part_info->num_list_values;
  bool tailf= !(left_endpoint ^ include_endpoint);
  DBUG_ENTER("get_partition_id_cols_list_for_endpoint");
  /* Find the matching partition (including taking endpoint into account). */
  do
  {
    /* Midpoint, adjusted down, so it can never be > last index. */
    list_index= (max_list_index + min_list_index) >> 1;
-    cmp= cmp_rec_and_tuple_prune(list_col_array + list_index*num_columns,
+    if (cmp_rec_and_tuple_prune(list_col_array + list_index*num_columns,
-                                 nparts, tailf);
+                                nparts, left_endpoint, include_endpoint) > 0)
    if (cmp > 0)
      min_list_index= list_index + 1;
    else if (cmp < 0)
    {
      if (!list_index)
        goto notfound;
      max_list_index= list_index - 1;
    }
    else
-    {
+      max_list_index= list_index;
-      DBUG_RETURN(list_index + test(!tailf));
+  } while (max_list_index > min_list_index);
-    }
+  list_index= max_list_index;
-  } while (max_list_index >= min_list_index);
+
-  if (cmp > 0)
+  /* Given value must be LESS THAN or EQUAL to the found partition. */
-    list_index++;
+  DBUG_ASSERT(list_index == part_info->num_list_values ||
-notfound:
+              (0 >= cmp_rec_and_tuple_prune(list_col_array +
                                              list_index*num_columns,
                                            nparts, left_endpoint,
                                            include_endpoint)));
  /* Given value must be GREATER THAN the previous partition. */
  DBUG_ASSERT(list_index == 0 ||
              (0 < cmp_rec_and_tuple_prune(list_col_array +
                                            (list_index - 1)*num_columns,
                                           nparts, left_endpoint,
                                           include_endpoint)));
  if (!left_endpoint)
  {
    /* Set the end after this list tuple if not already after the last. */
    if (list_index < part_info->num_parts)
      list_index++;
  }
  DBUG_RETURN(list_index);
 }
 /**
  Find the sub-array part_info->list_array that corresponds to given interval.
  @param part_info         Partitioning info (partitioning type must be LIST)
  @param left_endpoint     TRUE  - the interval is [a; +inf) or (a; +inf)
                           FALSE - the interval is (-inf; a] or (-inf; a)
  @param include_endpoint  TRUE iff the interval includes the endpoint
  This function finds the sub-array of part_info->list_array where values of
  list_array[idx].list_value are contained within the specifed interval.
  list_array is ordered by list_value, so
  1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the
     sought sub-array starts at some index idx and continues till array end.
     The function returns first number idx, such that
     list_array[idx].list_value is contained within the passed interval.
  2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
     sought sub-array starts at array start and continues till some last
     index idx.
     The function returns first number idx, such that
     list_array[idx].list_value is NOT contained within the passed interval.
     If all array elements are contained, part_info->num_list_values is
     returned.
  @note The caller will call this function and then will run along the
  sub-array of list_array to collect partition ids. If the number of list
  values is significantly higher then number of partitions, this could be slow
  and we could invent some other approach. The "run over list array" part is
  already wrapped in a get_next()-like function.
  @return The index of corresponding sub-array of part_info->list_array.
 */
 uint32 get_list_array_idx_for_endpoint_charset(partition_info *part_info,
                                               bool left_endpoint,
                                               bool include_endpoint)
@ -7414,15 +7421,17 @@ uint32 store_tuple_to_record(Field **pfield,
  return nparts;
 }
-/*
+/**
-  RANGE(columns) partitioning: compare value bound and probe tuple.
+  RANGE(columns) partitioning: compare partition value bound and probe tuple.
-  The value bound always is a full tuple (but may include the MAXVALUE
+  @param val           Partition column values.
-  special value).
+  @param nvals_in_rec  Number of (prefix) fields to compare.
-  The probe tuple may be a prefix of partitioning tuple. The tail_is_min
+  @return Less than/Equal to/Greater than 0 if the record is L/E/G than val.
-  parameter specifies whether the suffix components should be assumed to
+
-  hold MAXVALUE
+  @note The partition value bound is always a full tuple (but may include the
  MAXVALUE special value). The probe tuple may be a prefix of partitioning
  tuple.
 */
 static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec)
@ -7452,25 +7461,73 @@ static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec)
 }
 /**
  Compare record and columns partition tuple including endpoint handling.
  @param  val               Columns partition tuple
  @param  n_vals_in_rec     Number of columns to compare
  @param  is_left_endpoint  True if left endpoint (part_tuple < rec or
                            part_tuple <= rec)
  @param  include_endpoint  If endpoint is included (part_tuple <= rec or
                            rec <= part_tuple)
  @return Less than/Equal to/Greater than 0 if the record is L/E/G than
  the partition tuple.
  @see get_list_array_idx_for_endpoint() and
  get_partition_id_range_for_endpoint().
 */
 static int cmp_rec_and_tuple_prune(part_column_list_val *val,
                                   uint32 n_vals_in_rec,
-                                   bool tail_is_min)
+                                   bool is_left_endpoint,
                                   bool include_endpoint)
 {
  int cmp;
  Field **field;
  partition_info *part_info;
  if ((cmp= cmp_rec_and_tuple(val, n_vals_in_rec)))
    return cmp;
-  part_info= val->part_info;
+  field= val->part_info->part_field_array + n_vals_in_rec;
-  field= part_info->part_field_array + n_vals_in_rec;
+  if (!(*field))
  for (; *field; field++, val++)
  {
-    if (tail_is_min)
+    /*
-      return -1;
+      Full match, if right endpoint and not including the endpoint,
-    if (!tail_is_min && !val->max_value)
+      (rec < part) return lesser.
-      return +1;
+    */
    if (!is_left_endpoint && !include_endpoint)
      return -4;
    /* Otherwise they are equal! */
    return 0;
  }
-  return 0;
+  /*
    The prefix is equal and there are more partition columns to compare.
    If including left endpoint or not including right endpoint
    then the record is considered lesser compared to the partition.
    i.e:
    part(10, x) <= rec(10, unknown) and rec(10, unknown) < part(10, x)
    part <= rec -> lesser (i.e. this or previous partitions)
    rec < part -> lesser (i.e. this or previous partitions)
  */
  if (is_left_endpoint == include_endpoint)
    return -2;
  /*
    If right endpoint and the first additional partition value
    is MAXVALUE, then the record is lesser.
  */
  if (!is_left_endpoint && (val + n_vals_in_rec)->max_value)
    return -3;
  /*
    Otherwise the record is considered greater.
    rec <= part -> greater (i.e. does not match this partition, seek higher).
    part < rec -> greater (i.e. does not match this partition, seek higher).
  */
  return 2;
 }
@ -7481,91 +7538,65 @@ typedef uint32 (*get_col_endpoint_func)(partition_info*, bool left_endpoint,
                                        bool include_endpoint,
                                        uint32 num_parts);
-/*
+/**
-  Partitioning Interval Analysis: Initialize the iterator for "mapping" case
+  Get partition for RANGE COLUMNS endpoint.
-  SYNOPSIS
+  @param part_info         Partitioning metadata.
-    get_part_iter_for_interval_via_mapping()
+  @param is_left_endpoint     True if left endpoint (const <=/< cols)
-      part_info   Partition info
+  @param include_endpoint  True if range includes the endpoint (<=/>=)
-      is_subpart  TRUE  - act for subpartitioning
+  @param nparts            Total number of partitions
                  FALSE - act for partitioning
      min_value   minimum field value, in opt_range key format.
      max_value   minimum field value, in opt_range key format.
      flags       Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE,
                  NO_MAX_RANGE.
      part_iter   Iterator structure to be initialized
-  DESCRIPTION
+  @return Partition id of matching partition.
    Initialize partition set iterator to walk over the interval in
    ordered-array-of-partitions (for RANGE partitioning) or 
    ordered-array-of-list-constants (for LIST partitioning) space.
-  IMPLEMENTATION
+  @see get_partition_id_cols_list_for_endpoint and
-    This function is used when partitioning is done by
+  get_partition_id_range_for_endpoint.
    <RANGE|LIST>(ascending_func(t.field)), and we can map an interval in
    t.field space into a sub-array of partition_info::range_int_array or
    partition_info::list_array (see get_partition_id_range_for_endpoint,
    get_list_array_idx_for_endpoint for details).
    The function performs this interval mapping, and sets the iterator to
    traverse the sub-array and return appropriate partitions.
  RETURN
    0 - No matching partitions (iterator not initialized)
    1 - Ok, iterator intialized for traversal of matching partitions.
   -1 - All partitions would match (iterator not initialized)
 */
 uint32 get_partition_id_cols_range_for_endpoint(partition_info *part_info,
-                                                bool left_endpoint,
+                                                bool is_left_endpoint,
                                                bool include_endpoint,
                                                uint32 nparts)
 {
-  uint max_partition= part_info->num_parts - 1;
+  uint min_part_id= 0, max_part_id= part_info->num_parts, loc_part_id;
  uint min_part_id= 0, max_part_id= max_partition, loc_part_id;
  part_column_list_val *range_col_array= part_info->range_col_array;
  uint num_columns= part_info->part_field_list.elements;
  bool tailf= !(left_endpoint ^ include_endpoint);
  DBUG_ENTER("get_partition_id_cols_range_for_endpoint");
-  /* Get the partitioning function value for the endpoint */
+  /* Find the matching partition (including taking endpoint into account). */
-  while (max_part_id > min_part_id)
+  do
  {
-    loc_part_id= (max_part_id + min_part_id + 1) >> 1;
+    /* Midpoint, adjusted down, so it can never be > last partition. */
-    if (cmp_rec_and_tuple_prune(range_col_array + loc_part_id*num_columns,
+    loc_part_id= (max_part_id + min_part_id) >> 1;
-                                nparts, tailf) >= 0)
+    if (0 <= cmp_rec_and_tuple_prune(range_col_array +
                                       loc_part_id * num_columns,
                                     nparts,
                                     is_left_endpoint,
                                     include_endpoint))
      min_part_id= loc_part_id + 1;
    else
-      max_part_id= loc_part_id - 1;
+      max_part_id= loc_part_id;
-  }
+  } while (max_part_id > min_part_id);
  loc_part_id= max_part_id;
-  if (loc_part_id < max_partition && 
+
-      cmp_rec_and_tuple_prune(range_col_array + (loc_part_id+1)*num_columns,
+  /* Given value must be LESS THAN the found partition. */
-                              nparts, tailf) >= 0
+  DBUG_ASSERT(loc_part_id == part_info->num_parts ||
-      )
+              (0 > cmp_rec_and_tuple_prune(range_col_array +
                                             loc_part_id * num_columns,
                                           nparts, is_left_endpoint,
                                           include_endpoint)));
  /* Given value must be GREATER THAN or EQUAL to the previous partition. */
  DBUG_ASSERT(loc_part_id == 0 ||
              (0 <= cmp_rec_and_tuple_prune(range_col_array +
                                              (loc_part_id - 1) * num_columns,
                                            nparts, is_left_endpoint,
                                            include_endpoint)));
  if (!is_left_endpoint)
  {
-     loc_part_id++;
+    /* Set the end after this partition if not already after the last. */
-  }
+    if (loc_part_id < part_info->num_parts)
  if (left_endpoint)
  {
    if (cmp_rec_and_tuple_prune(range_col_array + loc_part_id*num_columns,
                                nparts, tailf) >= 0)
      loc_part_id++;
  }
  else 
  {
    if (loc_part_id < max_partition)
    {
      int res= cmp_rec_and_tuple_prune(range_col_array +
                                       loc_part_id * num_columns,
                                       nparts, tailf);
      if (!res)
        loc_part_id += test(include_endpoint);
      else if (res > 0)
        loc_part_id++;
    }
    loc_part_id++;
  }
  DBUG_RETURN(loc_part_id);
 }
@ -7637,6 +7668,40 @@ int get_part_iter_for_interval_cols_via_map(partition_info *part_info,
 }
 /**
  Partitioning Interval Analysis: Initialize the iterator for "mapping" case
  @param part_info   Partition info
  @param is_subpart  TRUE  - act for subpartitioning
                     FALSE - act for partitioning
  @param store_length_array  Ignored.
  @param min_value   minimum field value, in opt_range key format.
  @param max_value   minimum field value, in opt_range key format.
  @param min_len     Ignored.
  @param max_len     Ignored.
  @param flags       Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE,
                     NO_MAX_RANGE.
  @param part_iter   Iterator structure to be initialized
  @details Initialize partition set iterator to walk over the interval in
  ordered-array-of-partitions (for RANGE partitioning) or
  ordered-array-of-list-constants (for LIST partitioning) space.
  This function is used when partitioning is done by
  <RANGE|LIST>(ascending_func(t.field)), and we can map an interval in
  t.field space into a sub-array of partition_info::range_int_array or
  partition_info::list_array (see get_partition_id_range_for_endpoint,
  get_list_array_idx_for_endpoint for details).
  The function performs this interval mapping, and sets the iterator to
  traverse the sub-array and return appropriate partitions.
  @return Status of iterator
    @retval 0   No matching partitions (iterator not initialized)
    @retval 1   Ok, iterator intialized for traversal of matching partitions.
    @retval -1  All partitions would match (iterator not initialized)
 */
 int get_part_iter_for_interval_via_mapping(partition_info *part_info,
                                           bool is_subpart,
                                           uint32 *store_length_array, /* ignored */