Bug#45012 my_like_range_cp932 generates invalid string
Problem: The functions my_like_range_xxx() returned badly formed maximum strings for Asian character sets, which made problems for storage engines. Fix: - Removed a number my_like_range_xxx() implementations, which were in fact dumplicate code pieces. - Using generic my_like_range_mb() instead. - Setting max_sort_char member properly for Asian character sets - Adding unittest/strings/strings-t.c, to test that my_like_range_xxx() return well-formed min and max strings. Notes: - No additional tests in mysql/t/ available. Old tests cover the affected code well enough.
This commit is contained in:
parent
32372a756f
commit
e497d6e6e1
@ -2880,7 +2880,8 @@ fi
|
|||||||
|
|
||||||
AC_CONFIG_FILES(Makefile extra/Makefile mysys/Makefile dnl
|
AC_CONFIG_FILES(Makefile extra/Makefile mysys/Makefile dnl
|
||||||
unittest/Makefile unittest/mytap/Makefile unittest/mytap/t/Makefile dnl
|
unittest/Makefile unittest/mytap/Makefile unittest/mytap/t/Makefile dnl
|
||||||
unittest/mysys/Makefile unittest/examples/Makefile dnl
|
unittest/mysys/Makefile unittest/strings/Makefile dnl
|
||||||
|
unittest/examples/Makefile dnl
|
||||||
strings/Makefile regex/Makefile storage/Makefile dnl
|
strings/Makefile regex/Makefile storage/Makefile dnl
|
||||||
man/Makefile BUILD/Makefile vio/Makefile dnl
|
man/Makefile BUILD/Makefile vio/Makefile dnl
|
||||||
libmysql/Makefile libmysql_r/Makefile client/Makefile dnl
|
libmysql/Makefile libmysql_r/Makefile client/Makefile dnl
|
||||||
|
@ -377,86 +377,6 @@ static int my_strxfrm_big5(uchar *dest, const uchar *src, int len)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
** Calculate min_str and max_str that ranges a LIKE string.
|
|
||||||
** Arguments:
|
|
||||||
** ptr Pointer to LIKE string.
|
|
||||||
** ptr_length Length of LIKE string.
|
|
||||||
** escape Escape character in LIKE. (Normally '\').
|
|
||||||
** All escape characters should be removed from min_str and max_str
|
|
||||||
** res_length Length of min_str and max_str.
|
|
||||||
** min_str Smallest case sensitive string that ranges LIKE.
|
|
||||||
** Should be space padded to res_length.
|
|
||||||
** max_str Largest case sensitive string that ranges LIKE.
|
|
||||||
** Normally padded with the biggest character sort value.
|
|
||||||
**
|
|
||||||
** The function should return 0 if ok and 1 if the LIKE string can't be
|
|
||||||
** optimized !
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define max_sort_char ((char) 255)
|
|
||||||
|
|
||||||
static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const char *ptr,size_t ptr_length,
|
|
||||||
pbool escape, pbool w_one, pbool w_many,
|
|
||||||
size_t res_length,
|
|
||||||
char *min_str, char *max_str,
|
|
||||||
size_t *min_length, size_t *max_length)
|
|
||||||
{
|
|
||||||
const char *end= ptr + ptr_length;
|
|
||||||
char *min_org=min_str;
|
|
||||||
char *min_end=min_str+res_length;
|
|
||||||
size_t charlen= res_length / cs->mbmaxlen;
|
|
||||||
|
|
||||||
for (; ptr != end && min_str != min_end && charlen > 0; ptr++, charlen--)
|
|
||||||
{
|
|
||||||
if (ptr+1 != end && isbig5code(ptr[0],ptr[1]))
|
|
||||||
{
|
|
||||||
*min_str++= *max_str++ = *ptr++;
|
|
||||||
*min_str++= *max_str++ = *ptr;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == escape && ptr+1 != end)
|
|
||||||
{
|
|
||||||
ptr++; /* Skip escape */
|
|
||||||
if (isbig5code(ptr[0], ptr[1]))
|
|
||||||
*min_str++= *max_str++ = *ptr++;
|
|
||||||
if (min_str < min_end)
|
|
||||||
*min_str++= *max_str++= *ptr;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == w_one) /* '_' in SQL */
|
|
||||||
{
|
|
||||||
*min_str++='\0'; /* This should be min char */
|
|
||||||
*max_str++=max_sort_char;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == w_many) /* '%' in SQL */
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
Calculate length of keys:
|
|
||||||
'a\0\0... is the smallest possible string when we have space expand
|
|
||||||
a\ff\ff... is the biggest possible string
|
|
||||||
*/
|
|
||||||
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
|
|
||||||
res_length);
|
|
||||||
*max_length= res_length;
|
|
||||||
do {
|
|
||||||
*min_str++ = 0;
|
|
||||||
*max_str++ = max_sort_char;
|
|
||||||
} while (min_str != min_end);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
*min_str++= *max_str++ = *ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
*min_length= *max_length= (size_t) (min_str-min_org);
|
|
||||||
while (min_str != min_end)
|
|
||||||
*min_str++= *max_str++= ' ';
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static uint ismbchar_big5(CHARSET_INFO *cs __attribute__((unused)),
|
static uint ismbchar_big5(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
const char* p, const char *e)
|
const char* p, const char *e)
|
||||||
{
|
{
|
||||||
@ -6338,7 +6258,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
|
|||||||
my_strnncollsp_big5,
|
my_strnncollsp_big5,
|
||||||
my_strnxfrm_big5,
|
my_strnxfrm_big5,
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_big5,
|
my_like_range_mb,
|
||||||
my_wildcmp_mb,
|
my_wildcmp_mb,
|
||||||
my_strcasecmp_mb,
|
my_strcasecmp_mb,
|
||||||
my_instr_mb,
|
my_instr_mb,
|
||||||
@ -6402,7 +6322,7 @@ CHARSET_INFO my_charset_big5_chinese_ci=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xF9D5, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_big5_handler,
|
&my_charset_big5_handler,
|
||||||
@ -6435,7 +6355,7 @@ CHARSET_INFO my_charset_big5_bin=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xF9FE, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_big5_handler,
|
&my_charset_big5_handler,
|
||||||
|
@ -306,76 +306,6 @@ static size_t my_strnxfrm_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
** Calculate min_str and max_str that ranges a LIKE string.
|
|
||||||
** Arguments:
|
|
||||||
** ptr Pointer to LIKE string.
|
|
||||||
** ptr_length Length of LIKE string.
|
|
||||||
** escape Escape character in LIKE. (Normally '\').
|
|
||||||
** All escape characters should be removed from min_str and max_str
|
|
||||||
** res_length Length of min_str and max_str.
|
|
||||||
** min_str Smallest case sensitive string that ranges LIKE.
|
|
||||||
** Should be space padded to res_length.
|
|
||||||
** max_str Largest case sensitive string that ranges LIKE.
|
|
||||||
** Normally padded with the biggest character sort value.
|
|
||||||
**
|
|
||||||
** The function should return 0 if ok and 1 if the LIKE string can't be
|
|
||||||
** optimized !
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define max_sort_char ((char) 255)
|
|
||||||
|
|
||||||
static my_bool my_like_range_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const char *ptr,size_t ptr_length,
|
|
||||||
pbool escape, pbool w_one, pbool w_many,
|
|
||||||
size_t res_length,
|
|
||||||
char *min_str,char *max_str,
|
|
||||||
size_t *min_length, size_t *max_length)
|
|
||||||
{
|
|
||||||
const char *end=ptr+ptr_length;
|
|
||||||
char *min_org=min_str;
|
|
||||||
char *min_end=min_str+res_length;
|
|
||||||
|
|
||||||
while (ptr < end && min_str < min_end) {
|
|
||||||
if (ismbchar_cp932(cs, ptr, end)) {
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
if (min_str < min_end)
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == escape && ptr+1 < end) {
|
|
||||||
ptr++; /* Skip escape */
|
|
||||||
if (ismbchar_cp932(cs, ptr, end))
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
if (min_str < min_end)
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == w_one) { /* '_' in SQL */
|
|
||||||
*min_str++ = '\0'; /* This should be min char */
|
|
||||||
*max_str++ = max_sort_char;
|
|
||||||
ptr++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == w_many)
|
|
||||||
{ /* '%' in SQL */
|
|
||||||
*min_length = (size_t)(min_str - min_org);
|
|
||||||
*max_length = res_length;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
*min_str++= 0;
|
|
||||||
*max_str++= max_sort_char;
|
|
||||||
} while (min_str < min_end);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
}
|
|
||||||
*min_length = *max_length = (size_t) (min_str - min_org);
|
|
||||||
while (min_str < min_end)
|
|
||||||
*min_str++ = *max_str++ = ' '; /* Because if key compression */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* page 0 0x00A1-0x00DF */
|
/* page 0 0x00A1-0x00DF */
|
||||||
static uint16 tab_cp932_uni0[]={
|
static uint16 tab_cp932_uni0[]={
|
||||||
0xFF61,0xFF62,0xFF63,0xFF64,0xFF65,0xFF66,0xFF67,0xFF68,
|
0xFF61,0xFF62,0xFF63,0xFF64,0xFF65,0xFF66,0xFF67,0xFF68,
|
||||||
@ -5467,7 +5397,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||||||
my_strnncollsp_cp932,
|
my_strnncollsp_cp932,
|
||||||
my_strnxfrm_cp932,
|
my_strnxfrm_cp932,
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_cp932,
|
my_like_range_mb,
|
||||||
my_wildcmp_mb, /* wildcmp */
|
my_wildcmp_mb, /* wildcmp */
|
||||||
my_strcasecmp_8bit,
|
my_strcasecmp_8bit,
|
||||||
my_instr_mb,
|
my_instr_mb,
|
||||||
@ -5533,7 +5463,7 @@ CHARSET_INFO my_charset_cp932_japanese_ci=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFCFC, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
@ -5565,7 +5495,7 @@ CHARSET_INFO my_charset_cp932_bin=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFCFC, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
|
@ -8762,7 +8762,7 @@ CHARSET_INFO my_charset_euckr_korean_ci=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFEFE, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
@ -8795,7 +8795,7 @@ CHARSET_INFO my_charset_euckr_bin=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFEFE, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
|
@ -8710,7 +8710,7 @@ CHARSET_INFO my_charset_eucjpms_japanese_ci=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
3, /* mbmaxlen */
|
3, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFEFE, /* max_sort_char */
|
||||||
' ', /* pad_char */
|
' ', /* pad_char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
@ -8743,7 +8743,7 @@ CHARSET_INFO my_charset_eucjpms_bin=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
3, /* mbmaxlen */
|
3, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFEFE, /* max_sort_char */
|
||||||
' ', /* pad_char */
|
' ', /* pad_char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
|
@ -5790,7 +5790,7 @@ CHARSET_INFO my_charset_gb2312_chinese_ci=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xF7FE, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
@ -5822,7 +5822,7 @@ CHARSET_INFO my_charset_gb2312_bin=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xF7FE, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
|
@ -2690,86 +2690,6 @@ static size_t my_strnxfrm_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
** Calculate min_str and max_str that ranges a LIKE string.
|
|
||||||
** Arguments:
|
|
||||||
** ptr Pointer to LIKE string.
|
|
||||||
** ptr_length Length of LIKE string.
|
|
||||||
** escape Escape character in LIKE. (Normally '\').
|
|
||||||
** All escape characters should be removed from min_str and max_str
|
|
||||||
** res_length Length of min_str and max_str.
|
|
||||||
** min_str Smallest case sensitive string that ranges LIKE.
|
|
||||||
** Should be space padded to res_length.
|
|
||||||
** max_str Largest case sensitive string that ranges LIKE.
|
|
||||||
** Normally padded with the biggest character sort value.
|
|
||||||
**
|
|
||||||
** The function should return 0 if ok and 1 if the LIKE string can't be
|
|
||||||
** optimized !
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define max_sort_char ((uchar) 255)
|
|
||||||
|
|
||||||
static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const char *ptr,size_t ptr_length,
|
|
||||||
pbool escape, pbool w_one, pbool w_many,
|
|
||||||
size_t res_length,
|
|
||||||
char *min_str,char *max_str,
|
|
||||||
size_t *min_length,size_t *max_length)
|
|
||||||
{
|
|
||||||
const char *end= ptr + ptr_length;
|
|
||||||
char *min_org=min_str;
|
|
||||||
char *min_end=min_str+res_length;
|
|
||||||
size_t charlen= res_length / cs->mbmaxlen;
|
|
||||||
|
|
||||||
for (; ptr != end && min_str != min_end && charlen > 0; ptr++, charlen--)
|
|
||||||
{
|
|
||||||
if (ptr+1 != end && isgbkcode(ptr[0],ptr[1]))
|
|
||||||
{
|
|
||||||
*min_str++= *max_str++ = *ptr++;
|
|
||||||
*min_str++= *max_str++ = *ptr;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == escape && ptr+1 != end)
|
|
||||||
{
|
|
||||||
ptr++; /* Skip escape */
|
|
||||||
if (isgbkcode(ptr[0], ptr[1]))
|
|
||||||
*min_str++= *max_str++ = *ptr;
|
|
||||||
if (min_str < min_end)
|
|
||||||
*min_str++= *max_str++= *ptr;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == w_one) /* '_' in SQL */
|
|
||||||
{
|
|
||||||
*min_str++='\0'; /* This should be min char */
|
|
||||||
*max_str++=max_sort_char;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == w_many) /* '%' in SQL */
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
Calculate length of keys:
|
|
||||||
'a\0\0... is the smallest possible string when we have space expand
|
|
||||||
a\ff\ff... is the biggest possible string
|
|
||||||
*/
|
|
||||||
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
|
|
||||||
res_length);
|
|
||||||
*max_length= res_length;
|
|
||||||
do {
|
|
||||||
*min_str++= 0;
|
|
||||||
*max_str++= max_sort_char;
|
|
||||||
} while (min_str != min_end);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
*min_str++= *max_str++ = *ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
*min_length= *max_length = (size_t) (min_str - min_org);
|
|
||||||
while (min_str != min_end)
|
|
||||||
*min_str++= *max_str++= ' '; /* Because if key compression */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static uint ismbchar_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
static uint ismbchar_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
const char* p, const char *e)
|
const char* p, const char *e)
|
||||||
{
|
{
|
||||||
@ -9983,7 +9903,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||||||
my_strnncollsp_gbk,
|
my_strnncollsp_gbk,
|
||||||
my_strnxfrm_gbk,
|
my_strnxfrm_gbk,
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_gbk,
|
my_like_range_mb,
|
||||||
my_wildcmp_mb,
|
my_wildcmp_mb,
|
||||||
my_strcasecmp_mb,
|
my_strcasecmp_mb,
|
||||||
my_instr_mb,
|
my_instr_mb,
|
||||||
@ -10048,7 +9968,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xA967, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
@ -10080,7 +10000,7 @@ CHARSET_INFO my_charset_gbk_bin=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFEFE, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
|
@ -498,7 +498,9 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
Write max key:
|
Write max key:
|
||||||
- for non-Unicode character sets:
|
- for non-Unicode character sets:
|
||||||
just set to 255.
|
just bfill using max_sort_char if max_sort_char is one byte.
|
||||||
|
In case when max_sort_char is two bytes, fill with double-byte pairs
|
||||||
|
and optionally pad with a single space character.
|
||||||
- for Unicode character set (utf-8):
|
- for Unicode character set (utf-8):
|
||||||
create a buffer with multibyte representation of the max_sort_char
|
create a buffer with multibyte representation of the max_sort_char
|
||||||
character, and copy it into max_str in a loop.
|
character, and copy it into max_str in a loop.
|
||||||
@ -510,12 +512,20 @@ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
|
|||||||
|
|
||||||
if (!(cs->state & MY_CS_UNICODE))
|
if (!(cs->state & MY_CS_UNICODE))
|
||||||
{
|
{
|
||||||
bfill(str, end - str, 255);
|
if (cs->max_sort_char <= 255)
|
||||||
return;
|
{
|
||||||
|
bfill(str, end - str, cs->max_sort_char);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buf[0]= cs->max_sort_char >> 8;
|
||||||
|
buf[1]= cs->max_sort_char & 0xFF;
|
||||||
|
buflen= 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
|
||||||
|
(uchar*) buf + sizeof(buf));
|
||||||
}
|
}
|
||||||
|
|
||||||
buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
|
|
||||||
(uchar*) buf + sizeof(buf));
|
|
||||||
|
|
||||||
DBUG_ASSERT(buflen > 0);
|
DBUG_ASSERT(buflen > 0);
|
||||||
do
|
do
|
||||||
|
@ -304,85 +304,6 @@ static size_t my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
** Calculate min_str and max_str that ranges a LIKE string.
|
|
||||||
** Arguments:
|
|
||||||
** ptr Pointer to LIKE string.
|
|
||||||
** ptr_length Length of LIKE string.
|
|
||||||
** escape Escape character in LIKE. (Normally '\').
|
|
||||||
** All escape characters should be removed from min_str and max_str
|
|
||||||
** res_length Length of min_str and max_str.
|
|
||||||
** min_str Smallest case sensitive string that ranges LIKE.
|
|
||||||
** Should be space padded to res_length.
|
|
||||||
** max_str Largest case sensitive string that ranges LIKE.
|
|
||||||
** Normally padded with the biggest character sort value.
|
|
||||||
**
|
|
||||||
** The function should return 0 if ok and 1 if the LIKE string can't be
|
|
||||||
** optimized !
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define max_sort_char ((char) 255)
|
|
||||||
|
|
||||||
static my_bool my_like_range_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const char *ptr,size_t ptr_length,
|
|
||||||
pbool escape, pbool w_one, pbool w_many,
|
|
||||||
size_t res_length,
|
|
||||||
char *min_str,char *max_str,
|
|
||||||
size_t *min_length,size_t *max_length)
|
|
||||||
{
|
|
||||||
const char *end= ptr + ptr_length;
|
|
||||||
char *min_org=min_str;
|
|
||||||
char *min_end=min_str+res_length;
|
|
||||||
size_t charlen= res_length / cs->mbmaxlen;
|
|
||||||
|
|
||||||
for ( ; ptr < end && min_str < min_end && charlen > 0 ; charlen--)
|
|
||||||
{
|
|
||||||
if (ismbchar_sjis(cs, ptr, end)) {
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
if (min_str < min_end)
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == escape && ptr+1 < end) {
|
|
||||||
ptr++; /* Skip escape */
|
|
||||||
if (ismbchar_sjis(cs, ptr, end))
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
if (min_str < min_end)
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == w_one) { /* '_' in SQL */
|
|
||||||
*min_str++ = '\0'; /* This should be min char */
|
|
||||||
*max_str++ = max_sort_char;
|
|
||||||
ptr++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (*ptr == w_many)
|
|
||||||
{ /* '%' in SQL */
|
|
||||||
/*
|
|
||||||
Calculate length of keys:
|
|
||||||
'a\0\0... is the smallest possible string when we have space expand
|
|
||||||
a\ff\ff... is the biggest possible string
|
|
||||||
*/
|
|
||||||
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
|
|
||||||
res_length);
|
|
||||||
*max_length= res_length;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
*min_str++= 0;
|
|
||||||
*max_str++= max_sort_char;
|
|
||||||
} while (min_str < min_end);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
*min_str++ = *max_str++ = *ptr++;
|
|
||||||
}
|
|
||||||
|
|
||||||
*min_length= *max_length= (size_t) (min_str - min_org);
|
|
||||||
while (min_str != min_end)
|
|
||||||
*min_str++= *max_str++= ' '; /* Because if key compression */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* page 0 0x00A1-0x00DF */
|
/* page 0 0x00A1-0x00DF */
|
||||||
static uint16 tab_sjis_uni0[]={
|
static uint16 tab_sjis_uni0[]={
|
||||||
0xFF61,0xFF62,0xFF63,0xFF64,0xFF65,0xFF66,0xFF67,0xFF68,
|
0xFF61,0xFF62,0xFF63,0xFF64,0xFF65,0xFF66,0xFF67,0xFF68,
|
||||||
@ -4628,7 +4549,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||||||
my_strnncollsp_sjis,
|
my_strnncollsp_sjis,
|
||||||
my_strnxfrm_sjis,
|
my_strnxfrm_sjis,
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_sjis,
|
my_like_range_mb,
|
||||||
my_wildcmp_mb, /* wildcmp */
|
my_wildcmp_mb, /* wildcmp */
|
||||||
my_strcasecmp_8bit,
|
my_strcasecmp_8bit,
|
||||||
my_instr_mb,
|
my_instr_mb,
|
||||||
@ -4694,7 +4615,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFCFC, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
@ -4726,7 +4647,7 @@ CHARSET_INFO my_charset_sjis_bin=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
2, /* mbmaxlen */
|
2, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFCFC, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
|
@ -8567,7 +8567,7 @@ CHARSET_INFO my_charset_ujis_japanese_ci=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
3, /* mbmaxlen */
|
3, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFEFE, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
@ -8600,7 +8600,7 @@ CHARSET_INFO my_charset_ujis_bin=
|
|||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
3, /* mbmaxlen */
|
3, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFEFE, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
|
@ -13,12 +13,12 @@
|
|||||||
# along with this program; if not, write to the Free Software
|
# along with this program; if not, write to the Free Software
|
||||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
SUBDIRS = mytap . mysys examples
|
SUBDIRS = mytap . mysys examples strings
|
||||||
|
|
||||||
EXTRA_DIST = unit.pl
|
EXTRA_DIST = unit.pl
|
||||||
CLEANFILES = unit
|
CLEANFILES = unit
|
||||||
|
|
||||||
unittests = mytap mysys @mysql_se_unittest_dirs@ @mysql_pg_unittest_dirs@
|
unittests = mytap mysys strings @mysql_se_unittest_dirs@ @mysql_pg_unittest_dirs@
|
||||||
|
|
||||||
test:
|
test:
|
||||||
perl unit.pl run $(unittests)
|
perl unit.pl run $(unittests)
|
||||||
|
27
unittest/strings/Makefile.am
Normal file
27
unittest/strings/Makefile.am
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# Copyright 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; version 2 of the License.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
|
AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include
|
||||||
|
AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
|
||||||
|
|
||||||
|
LDADD = $(top_builddir)/unittest/mytap/libmytap.a \
|
||||||
|
$(top_builddir)/mysys/libmysys.a \
|
||||||
|
$(top_builddir)/dbug/libdbug.a \
|
||||||
|
$(top_builddir)/strings/libmystrings.a
|
||||||
|
|
||||||
|
noinst_PROGRAMS = strings-t
|
||||||
|
|
||||||
|
# Don't update the files from bitkeeper
|
||||||
|
%::SCCS/s.%
|
114
unittest/strings/strings-t.c
Normal file
114
unittest/strings/strings-t.c
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
/* Copyright 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; version 2 of the License.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
||||||
|
|
||||||
|
#include <tap.h>
|
||||||
|
#include <my_global.h>
|
||||||
|
#include <my_sys.h>
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Test that like_range() returns well-formed results.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
test_like_range_for_charset(CHARSET_INFO *cs, const char *src, size_t src_len)
|
||||||
|
{
|
||||||
|
char min_str[32], max_str[32];
|
||||||
|
size_t min_len, max_len, min_well_formed_len, max_well_formed_len;
|
||||||
|
int error= 0;
|
||||||
|
|
||||||
|
cs->coll->like_range(cs, src, src_len, '\\', '_', '%',
|
||||||
|
sizeof(min_str), min_str, max_str, &min_len, &max_len);
|
||||||
|
diag("min_len=%d\tmax_len=%d\t%s", min_len, max_len, cs->name);
|
||||||
|
min_well_formed_len= cs->cset->well_formed_len(cs,
|
||||||
|
min_str, min_str + min_len,
|
||||||
|
10000, &error);
|
||||||
|
max_well_formed_len= cs->cset->well_formed_len(cs,
|
||||||
|
max_str, max_str + max_len,
|
||||||
|
10000, &error);
|
||||||
|
if (min_len != min_well_formed_len)
|
||||||
|
diag("Bad min_str: min_well_formed_len=%d min_str[%d]=0x%02X",
|
||||||
|
min_well_formed_len, min_well_formed_len,
|
||||||
|
(uchar) min_str[min_well_formed_len]);
|
||||||
|
if (max_len != max_well_formed_len)
|
||||||
|
diag("Bad max_str: max_well_formed_len=%d max_str[%d]=0x%02X",
|
||||||
|
max_well_formed_len, max_well_formed_len,
|
||||||
|
(uchar) max_str[max_well_formed_len]);
|
||||||
|
return
|
||||||
|
min_len == min_well_formed_len &&
|
||||||
|
max_len == max_well_formed_len ? 0 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static CHARSET_INFO *charset_list[]=
|
||||||
|
{
|
||||||
|
#ifdef HAVE_CHARSET_big5
|
||||||
|
&my_charset_big5_chinese_ci,
|
||||||
|
&my_charset_big5_bin,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_euckr
|
||||||
|
&my_charset_euckr_korean_ci,
|
||||||
|
&my_charset_euckr_bin,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_gb2312
|
||||||
|
&my_charset_gb2312_chinese_ci,
|
||||||
|
&my_charset_gb2312_bin,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_gbk
|
||||||
|
&my_charset_gbk_chinese_ci,
|
||||||
|
&my_charset_gbk_bin,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_latin1
|
||||||
|
&my_charset_latin1,
|
||||||
|
&my_charset_latin1_bin,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_sjis
|
||||||
|
&my_charset_sjis_japanese_ci,
|
||||||
|
&my_charset_sjis_bin,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_tis620
|
||||||
|
&my_charset_tis620_thai_ci,
|
||||||
|
&my_charset_tis620_bin,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_ujis
|
||||||
|
&my_charset_ujis_japanese_ci,
|
||||||
|
&my_charset_ujis_bin,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_utf8
|
||||||
|
&my_charset_utf8_general_ci,
|
||||||
|
&my_charset_utf8_unicode_ci,
|
||||||
|
&my_charset_utf8_bin,
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
size_t i, failed= 0;
|
||||||
|
|
||||||
|
plan(1);
|
||||||
|
diag("Testing my_like_range_xxx() functions");
|
||||||
|
|
||||||
|
for (i= 0; i < array_elements(charset_list); i++)
|
||||||
|
{
|
||||||
|
CHARSET_INFO *cs= charset_list[i];
|
||||||
|
if (test_like_range_for_charset(cs, "abc%", 4))
|
||||||
|
{
|
||||||
|
++failed;
|
||||||
|
diag("Failed for %s", cs->name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ok(failed == 0, "Testing my_like_range_xxx() functions");
|
||||||
|
return exit_status();
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user