Bug#16447483: PARTITION PRUNING IS NOT CORRECT FOR RANGE COLUMNS
The problem was in get_partition_id_cols_range_for_endpoint and cmp_rec_and_tuple_prune, which stepped one partition too long. Solution was to move a small portion of logic to cmp_rec_and_tuple_prune, to simplify both get_partition_id_cols_range_for_endpoint and get_partition_id_cols_list_for_endpoint.
This commit is contained in:
parent
2ee012b2e8
commit
23c5840d52
@ -173,7 +173,8 @@ int get_part_iter_for_interval_via_walking(partition_info *part_info,
|
|||||||
static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec);
|
static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec);
|
||||||
static int cmp_rec_and_tuple_prune(part_column_list_val *val,
|
static int cmp_rec_and_tuple_prune(part_column_list_val *val,
|
||||||
uint32 n_vals_in_rec,
|
uint32 n_vals_in_rec,
|
||||||
bool tail_is_min);
|
bool is_left_endpoint,
|
||||||
|
bool include_endpoint);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Convert constants in VALUES definition to the character set the
|
Convert constants in VALUES definition to the character set the
|
||||||
@ -3293,44 +3294,6 @@ notfound:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
Find the sub-array part_info->list_array that corresponds to given interval
|
|
||||||
|
|
||||||
SYNOPSIS
|
|
||||||
get_list_array_idx_for_endpoint()
|
|
||||||
part_info Partitioning info (partitioning type must be LIST)
|
|
||||||
left_endpoint TRUE - the interval is [a; +inf) or (a; +inf)
|
|
||||||
FALSE - the interval is (-inf; a] or (-inf; a)
|
|
||||||
include_endpoint TRUE iff the interval includes the endpoint
|
|
||||||
|
|
||||||
DESCRIPTION
|
|
||||||
This function finds the sub-array of part_info->list_array where values of
|
|
||||||
list_array[idx].list_value are contained within the specifed interval.
|
|
||||||
list_array is ordered by list_value, so
|
|
||||||
1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the
|
|
||||||
sought sub-array starts at some index idx and continues till array end.
|
|
||||||
The function returns first number idx, such that
|
|
||||||
list_array[idx].list_value is contained within the passed interval.
|
|
||||||
|
|
||||||
2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
|
|
||||||
sought sub-array starts at array start and continues till some last
|
|
||||||
index idx.
|
|
||||||
The function returns first number idx, such that
|
|
||||||
list_array[idx].list_value is NOT contained within the passed interval.
|
|
||||||
If all array elements are contained, part_info->num_list_values is
|
|
||||||
returned.
|
|
||||||
|
|
||||||
NOTE
|
|
||||||
The caller will call this function and then will run along the sub-array of
|
|
||||||
list_array to collect partition ids. If the number of list values is
|
|
||||||
significantly higher then number of partitions, this could be slow and
|
|
||||||
we could invent some other approach. The "run over list array" part is
|
|
||||||
already wrapped in a get_next()-like function.
|
|
||||||
|
|
||||||
RETURN
|
|
||||||
The edge of corresponding sub-array of part_info->list_array
|
|
||||||
*/
|
|
||||||
|
|
||||||
uint32 get_partition_id_cols_list_for_endpoint(partition_info *part_info,
|
uint32 get_partition_id_cols_list_for_endpoint(partition_info *part_info,
|
||||||
bool left_endpoint,
|
bool left_endpoint,
|
||||||
bool include_endpoint,
|
bool include_endpoint,
|
||||||
@ -3338,37 +3301,81 @@ uint32 get_partition_id_cols_list_for_endpoint(partition_info *part_info,
|
|||||||
{
|
{
|
||||||
part_column_list_val *list_col_array= part_info->list_col_array;
|
part_column_list_val *list_col_array= part_info->list_col_array;
|
||||||
uint num_columns= part_info->part_field_list.elements;
|
uint num_columns= part_info->part_field_list.elements;
|
||||||
int list_index, cmp;
|
uint list_index;
|
||||||
uint min_list_index= 0;
|
uint min_list_index= 0;
|
||||||
uint max_list_index= part_info->num_list_values - 1;
|
uint max_list_index= part_info->num_list_values;
|
||||||
bool tailf= !(left_endpoint ^ include_endpoint);
|
|
||||||
DBUG_ENTER("get_partition_id_cols_list_for_endpoint");
|
DBUG_ENTER("get_partition_id_cols_list_for_endpoint");
|
||||||
|
|
||||||
|
/* Find the matching partition (including taking endpoint into account). */
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
/* Midpoint, adjusted down, so it can never be > last index. */
|
||||||
list_index= (max_list_index + min_list_index) >> 1;
|
list_index= (max_list_index + min_list_index) >> 1;
|
||||||
cmp= cmp_rec_and_tuple_prune(list_col_array + list_index*num_columns,
|
if (cmp_rec_and_tuple_prune(list_col_array + list_index*num_columns,
|
||||||
nparts, tailf);
|
nparts, left_endpoint, include_endpoint) > 0)
|
||||||
if (cmp > 0)
|
|
||||||
min_list_index= list_index + 1;
|
min_list_index= list_index + 1;
|
||||||
else if (cmp < 0)
|
|
||||||
{
|
|
||||||
if (!list_index)
|
|
||||||
goto notfound;
|
|
||||||
max_list_index= list_index - 1;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
max_list_index= list_index;
|
||||||
DBUG_RETURN(list_index + test(!tailf));
|
} while (max_list_index > min_list_index);
|
||||||
}
|
list_index= max_list_index;
|
||||||
} while (max_list_index >= min_list_index);
|
|
||||||
if (cmp > 0)
|
/* Given value must be LESS THAN or EQUAL to the found partition. */
|
||||||
list_index++;
|
DBUG_ASSERT(list_index == part_info->num_list_values ||
|
||||||
notfound:
|
(0 >= cmp_rec_and_tuple_prune(list_col_array +
|
||||||
|
list_index*num_columns,
|
||||||
|
nparts, left_endpoint,
|
||||||
|
include_endpoint)));
|
||||||
|
/* Given value must be GREATER THAN the previous partition. */
|
||||||
|
DBUG_ASSERT(list_index == 0 ||
|
||||||
|
(0 < cmp_rec_and_tuple_prune(list_col_array +
|
||||||
|
(list_index - 1)*num_columns,
|
||||||
|
nparts, left_endpoint,
|
||||||
|
include_endpoint)));
|
||||||
|
|
||||||
|
if (!left_endpoint)
|
||||||
|
{
|
||||||
|
/* Set the end after this list tuple if not already after the last. */
|
||||||
|
if (list_index < part_info->num_parts)
|
||||||
|
list_index++;
|
||||||
|
}
|
||||||
|
|
||||||
DBUG_RETURN(list_index);
|
DBUG_RETURN(list_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
Find the sub-array part_info->list_array that corresponds to given interval.
|
||||||
|
|
||||||
|
@param part_info Partitioning info (partitioning type must be LIST)
|
||||||
|
@param left_endpoint TRUE - the interval is [a; +inf) or (a; +inf)
|
||||||
|
FALSE - the interval is (-inf; a] or (-inf; a)
|
||||||
|
@param include_endpoint TRUE iff the interval includes the endpoint
|
||||||
|
|
||||||
|
This function finds the sub-array of part_info->list_array where values of
|
||||||
|
list_array[idx].list_value are contained within the specifed interval.
|
||||||
|
list_array is ordered by list_value, so
|
||||||
|
1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the
|
||||||
|
sought sub-array starts at some index idx and continues till array end.
|
||||||
|
The function returns first number idx, such that
|
||||||
|
list_array[idx].list_value is contained within the passed interval.
|
||||||
|
|
||||||
|
2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
|
||||||
|
sought sub-array starts at array start and continues till some last
|
||||||
|
index idx.
|
||||||
|
The function returns first number idx, such that
|
||||||
|
list_array[idx].list_value is NOT contained within the passed interval.
|
||||||
|
If all array elements are contained, part_info->num_list_values is
|
||||||
|
returned.
|
||||||
|
|
||||||
|
@note The caller will call this function and then will run along the
|
||||||
|
sub-array of list_array to collect partition ids. If the number of list
|
||||||
|
values is significantly higher then number of partitions, this could be slow
|
||||||
|
and we could invent some other approach. The "run over list array" part is
|
||||||
|
already wrapped in a get_next()-like function.
|
||||||
|
|
||||||
|
@return The index of corresponding sub-array of part_info->list_array.
|
||||||
|
*/
|
||||||
|
|
||||||
uint32 get_list_array_idx_for_endpoint_charset(partition_info *part_info,
|
uint32 get_list_array_idx_for_endpoint_charset(partition_info *part_info,
|
||||||
bool left_endpoint,
|
bool left_endpoint,
|
||||||
bool include_endpoint)
|
bool include_endpoint)
|
||||||
@ -7414,15 +7421,17 @@ uint32 store_tuple_to_record(Field **pfield,
|
|||||||
return nparts;
|
return nparts;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
RANGE(columns) partitioning: compare value bound and probe tuple.
|
RANGE(columns) partitioning: compare partition value bound and probe tuple.
|
||||||
|
|
||||||
The value bound always is a full tuple (but may include the MAXVALUE
|
@param val Partition column values.
|
||||||
special value).
|
@param nvals_in_rec Number of (prefix) fields to compare.
|
||||||
|
|
||||||
The probe tuple may be a prefix of partitioning tuple. The tail_is_min
|
@return Less than/Equal to/Greater than 0 if the record is L/E/G than val.
|
||||||
parameter specifies whether the suffix components should be assumed to
|
|
||||||
hold MAXVALUE
|
@note The partition value bound is always a full tuple (but may include the
|
||||||
|
MAXVALUE special value). The probe tuple may be a prefix of partitioning
|
||||||
|
tuple.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec)
|
static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec)
|
||||||
@ -7452,25 +7461,73 @@ static int cmp_rec_and_tuple(part_column_list_val *val, uint32 nvals_in_rec)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
Compare record and columns partition tuple including endpoint handling.
|
||||||
|
|
||||||
|
@param val Columns partition tuple
|
||||||
|
@param n_vals_in_rec Number of columns to compare
|
||||||
|
@param is_left_endpoint True if left endpoint (part_tuple < rec or
|
||||||
|
part_tuple <= rec)
|
||||||
|
@param include_endpoint If endpoint is included (part_tuple <= rec or
|
||||||
|
rec <= part_tuple)
|
||||||
|
|
||||||
|
@return Less than/Equal to/Greater than 0 if the record is L/E/G than
|
||||||
|
the partition tuple.
|
||||||
|
|
||||||
|
@see get_list_array_idx_for_endpoint() and
|
||||||
|
get_partition_id_range_for_endpoint().
|
||||||
|
*/
|
||||||
|
|
||||||
static int cmp_rec_and_tuple_prune(part_column_list_val *val,
|
static int cmp_rec_and_tuple_prune(part_column_list_val *val,
|
||||||
uint32 n_vals_in_rec,
|
uint32 n_vals_in_rec,
|
||||||
bool tail_is_min)
|
bool is_left_endpoint,
|
||||||
|
bool include_endpoint)
|
||||||
{
|
{
|
||||||
int cmp;
|
int cmp;
|
||||||
Field **field;
|
Field **field;
|
||||||
partition_info *part_info;
|
|
||||||
if ((cmp= cmp_rec_and_tuple(val, n_vals_in_rec)))
|
if ((cmp= cmp_rec_and_tuple(val, n_vals_in_rec)))
|
||||||
return cmp;
|
return cmp;
|
||||||
part_info= val->part_info;
|
field= val->part_info->part_field_array + n_vals_in_rec;
|
||||||
field= part_info->part_field_array + n_vals_in_rec;
|
if (!(*field))
|
||||||
for (; *field; field++, val++)
|
|
||||||
{
|
{
|
||||||
if (tail_is_min)
|
/*
|
||||||
return -1;
|
Full match, if right endpoint and not including the endpoint,
|
||||||
if (!tail_is_min && !val->max_value)
|
(rec < part) return lesser.
|
||||||
return +1;
|
*/
|
||||||
|
if (!is_left_endpoint && !include_endpoint)
|
||||||
|
return -4;
|
||||||
|
|
||||||
|
/* Otherwise they are equal! */
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
return 0;
|
/*
|
||||||
|
The prefix is equal and there are more partition columns to compare.
|
||||||
|
|
||||||
|
If including left endpoint or not including right endpoint
|
||||||
|
then the record is considered lesser compared to the partition.
|
||||||
|
|
||||||
|
i.e:
|
||||||
|
part(10, x) <= rec(10, unknown) and rec(10, unknown) < part(10, x)
|
||||||
|
part <= rec -> lesser (i.e. this or previous partitions)
|
||||||
|
rec < part -> lesser (i.e. this or previous partitions)
|
||||||
|
*/
|
||||||
|
if (is_left_endpoint == include_endpoint)
|
||||||
|
return -2;
|
||||||
|
|
||||||
|
/*
|
||||||
|
If right endpoint and the first additional partition value
|
||||||
|
is MAXVALUE, then the record is lesser.
|
||||||
|
*/
|
||||||
|
if (!is_left_endpoint && (val + n_vals_in_rec)->max_value)
|
||||||
|
return -3;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Otherwise the record is considered greater.
|
||||||
|
|
||||||
|
rec <= part -> greater (i.e. does not match this partition, seek higher).
|
||||||
|
part < rec -> greater (i.e. does not match this partition, seek higher).
|
||||||
|
*/
|
||||||
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -7481,91 +7538,65 @@ typedef uint32 (*get_col_endpoint_func)(partition_info*, bool left_endpoint,
|
|||||||
bool include_endpoint,
|
bool include_endpoint,
|
||||||
uint32 num_parts);
|
uint32 num_parts);
|
||||||
|
|
||||||
/*
|
/**
|
||||||
Partitioning Interval Analysis: Initialize the iterator for "mapping" case
|
Get partition for RANGE COLUMNS endpoint.
|
||||||
|
|
||||||
SYNOPSIS
|
@param part_info Partitioning metadata.
|
||||||
get_part_iter_for_interval_via_mapping()
|
@param is_left_endpoint True if left endpoint (const <=/< cols)
|
||||||
part_info Partition info
|
@param include_endpoint True if range includes the endpoint (<=/>=)
|
||||||
is_subpart TRUE - act for subpartitioning
|
@param nparts Total number of partitions
|
||||||
FALSE - act for partitioning
|
|
||||||
min_value minimum field value, in opt_range key format.
|
|
||||||
max_value minimum field value, in opt_range key format.
|
|
||||||
flags Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE,
|
|
||||||
NO_MAX_RANGE.
|
|
||||||
part_iter Iterator structure to be initialized
|
|
||||||
|
|
||||||
DESCRIPTION
|
@return Partition id of matching partition.
|
||||||
Initialize partition set iterator to walk over the interval in
|
|
||||||
ordered-array-of-partitions (for RANGE partitioning) or
|
|
||||||
ordered-array-of-list-constants (for LIST partitioning) space.
|
|
||||||
|
|
||||||
IMPLEMENTATION
|
@see get_partition_id_cols_list_for_endpoint and
|
||||||
This function is used when partitioning is done by
|
get_partition_id_range_for_endpoint.
|
||||||
<RANGE|LIST>(ascending_func(t.field)), and we can map an interval in
|
|
||||||
t.field space into a sub-array of partition_info::range_int_array or
|
|
||||||
partition_info::list_array (see get_partition_id_range_for_endpoint,
|
|
||||||
get_list_array_idx_for_endpoint for details).
|
|
||||||
|
|
||||||
The function performs this interval mapping, and sets the iterator to
|
|
||||||
traverse the sub-array and return appropriate partitions.
|
|
||||||
|
|
||||||
RETURN
|
|
||||||
0 - No matching partitions (iterator not initialized)
|
|
||||||
1 - Ok, iterator intialized for traversal of matching partitions.
|
|
||||||
-1 - All partitions would match (iterator not initialized)
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
uint32 get_partition_id_cols_range_for_endpoint(partition_info *part_info,
|
uint32 get_partition_id_cols_range_for_endpoint(partition_info *part_info,
|
||||||
bool left_endpoint,
|
bool is_left_endpoint,
|
||||||
bool include_endpoint,
|
bool include_endpoint,
|
||||||
uint32 nparts)
|
uint32 nparts)
|
||||||
{
|
{
|
||||||
uint max_partition= part_info->num_parts - 1;
|
uint min_part_id= 0, max_part_id= part_info->num_parts, loc_part_id;
|
||||||
uint min_part_id= 0, max_part_id= max_partition, loc_part_id;
|
|
||||||
part_column_list_val *range_col_array= part_info->range_col_array;
|
part_column_list_val *range_col_array= part_info->range_col_array;
|
||||||
uint num_columns= part_info->part_field_list.elements;
|
uint num_columns= part_info->part_field_list.elements;
|
||||||
bool tailf= !(left_endpoint ^ include_endpoint);
|
|
||||||
DBUG_ENTER("get_partition_id_cols_range_for_endpoint");
|
DBUG_ENTER("get_partition_id_cols_range_for_endpoint");
|
||||||
|
|
||||||
/* Get the partitioning function value for the endpoint */
|
/* Find the matching partition (including taking endpoint into account). */
|
||||||
while (max_part_id > min_part_id)
|
do
|
||||||
{
|
{
|
||||||
loc_part_id= (max_part_id + min_part_id + 1) >> 1;
|
/* Midpoint, adjusted down, so it can never be > last partition. */
|
||||||
if (cmp_rec_and_tuple_prune(range_col_array + loc_part_id*num_columns,
|
loc_part_id= (max_part_id + min_part_id) >> 1;
|
||||||
nparts, tailf) >= 0)
|
if (0 <= cmp_rec_and_tuple_prune(range_col_array +
|
||||||
|
loc_part_id * num_columns,
|
||||||
|
nparts,
|
||||||
|
is_left_endpoint,
|
||||||
|
include_endpoint))
|
||||||
min_part_id= loc_part_id + 1;
|
min_part_id= loc_part_id + 1;
|
||||||
else
|
else
|
||||||
max_part_id= loc_part_id - 1;
|
max_part_id= loc_part_id;
|
||||||
}
|
} while (max_part_id > min_part_id);
|
||||||
loc_part_id= max_part_id;
|
loc_part_id= max_part_id;
|
||||||
if (loc_part_id < max_partition &&
|
|
||||||
cmp_rec_and_tuple_prune(range_col_array + (loc_part_id+1)*num_columns,
|
/* Given value must be LESS THAN the found partition. */
|
||||||
nparts, tailf) >= 0
|
DBUG_ASSERT(loc_part_id == part_info->num_parts ||
|
||||||
)
|
(0 > cmp_rec_and_tuple_prune(range_col_array +
|
||||||
|
loc_part_id * num_columns,
|
||||||
|
nparts, is_left_endpoint,
|
||||||
|
include_endpoint)));
|
||||||
|
/* Given value must be GREATER THAN or EQUAL to the previous partition. */
|
||||||
|
DBUG_ASSERT(loc_part_id == 0 ||
|
||||||
|
(0 <= cmp_rec_and_tuple_prune(range_col_array +
|
||||||
|
(loc_part_id - 1) * num_columns,
|
||||||
|
nparts, is_left_endpoint,
|
||||||
|
include_endpoint)));
|
||||||
|
|
||||||
|
if (!is_left_endpoint)
|
||||||
{
|
{
|
||||||
loc_part_id++;
|
/* Set the end after this partition if not already after the last. */
|
||||||
}
|
if (loc_part_id < part_info->num_parts)
|
||||||
if (left_endpoint)
|
|
||||||
{
|
|
||||||
if (cmp_rec_and_tuple_prune(range_col_array + loc_part_id*num_columns,
|
|
||||||
nparts, tailf) >= 0)
|
|
||||||
loc_part_id++;
|
loc_part_id++;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
if (loc_part_id < max_partition)
|
|
||||||
{
|
|
||||||
int res= cmp_rec_and_tuple_prune(range_col_array +
|
|
||||||
loc_part_id * num_columns,
|
|
||||||
nparts, tailf);
|
|
||||||
if (!res)
|
|
||||||
loc_part_id += test(include_endpoint);
|
|
||||||
else if (res > 0)
|
|
||||||
loc_part_id++;
|
|
||||||
}
|
|
||||||
loc_part_id++;
|
|
||||||
}
|
|
||||||
DBUG_RETURN(loc_part_id);
|
DBUG_RETURN(loc_part_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7637,6 +7668,40 @@ int get_part_iter_for_interval_cols_via_map(partition_info *part_info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
Partitioning Interval Analysis: Initialize the iterator for "mapping" case
|
||||||
|
|
||||||
|
@param part_info Partition info
|
||||||
|
@param is_subpart TRUE - act for subpartitioning
|
||||||
|
FALSE - act for partitioning
|
||||||
|
@param store_length_array Ignored.
|
||||||
|
@param min_value minimum field value, in opt_range key format.
|
||||||
|
@param max_value minimum field value, in opt_range key format.
|
||||||
|
@param min_len Ignored.
|
||||||
|
@param max_len Ignored.
|
||||||
|
@param flags Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE,
|
||||||
|
NO_MAX_RANGE.
|
||||||
|
@param part_iter Iterator structure to be initialized
|
||||||
|
|
||||||
|
@details Initialize partition set iterator to walk over the interval in
|
||||||
|
ordered-array-of-partitions (for RANGE partitioning) or
|
||||||
|
ordered-array-of-list-constants (for LIST partitioning) space.
|
||||||
|
|
||||||
|
This function is used when partitioning is done by
|
||||||
|
<RANGE|LIST>(ascending_func(t.field)), and we can map an interval in
|
||||||
|
t.field space into a sub-array of partition_info::range_int_array or
|
||||||
|
partition_info::list_array (see get_partition_id_range_for_endpoint,
|
||||||
|
get_list_array_idx_for_endpoint for details).
|
||||||
|
|
||||||
|
The function performs this interval mapping, and sets the iterator to
|
||||||
|
traverse the sub-array and return appropriate partitions.
|
||||||
|
|
||||||
|
@return Status of iterator
|
||||||
|
@retval 0 No matching partitions (iterator not initialized)
|
||||||
|
@retval 1 Ok, iterator intialized for traversal of matching partitions.
|
||||||
|
@retval -1 All partitions would match (iterator not initialized)
|
||||||
|
*/
|
||||||
|
|
||||||
int get_part_iter_for_interval_via_mapping(partition_info *part_info,
|
int get_part_iter_for_interval_via_mapping(partition_info *part_info,
|
||||||
bool is_subpart,
|
bool is_subpart,
|
||||||
uint32 *store_length_array, /* ignored */
|
uint32 *store_length_array, /* ignored */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user