Bug#20404: SHOW CREATE TABLE fails with Turkish I
Problem: SHOW CREATE TABLE printed garbage in table name for tables having TURKISH I (i.e. LATIN CAPITABLE LETTER I WITH DOT ABOVE) when lower-case-table-name=1. Reason: In some cases during lower/upper conversion in utf8, the result string can be shorter the original string (including the above letter). Old implementation of caseup_str() and casedn_str() didn't handle the result length properly, assuming that length cannot change. This fix changes the result type of cs->cset->casedn_str() and cs->cset->caseup_str() from VOID to UINT, to return the result length, as well as put '\0' terminator on a proper place. Also, my_caseup_str_utf8() and my_casedn_str_utf8() were rewritten not to use strlen() for performance purposes. It was done with help of adding of new functions - my_utf8_uni_no_range() and my_uni_utf8_no_range() - for null terminated strings.
This commit is contained in:
parent
28eaf5ff75
commit
d18fcb3a0f
@ -178,8 +178,8 @@ typedef struct my_charset_handler_st
|
||||
unsigned char *s,unsigned char *e);
|
||||
|
||||
/* Functions for case and sort convertion */
|
||||
void (*caseup_str)(struct charset_info_st *, char *);
|
||||
void (*casedn_str)(struct charset_info_st *, char *);
|
||||
uint (*caseup_str)(struct charset_info_st *, char *);
|
||||
uint (*casedn_str)(struct charset_info_st *, char *);
|
||||
uint (*caseup)(struct charset_info_st *, char *src, uint srclen,
|
||||
char *dst, uint dstlen);
|
||||
uint (*casedn)(struct charset_info_st *, char *src, uint srclen,
|
||||
@ -311,8 +311,8 @@ extern uint my_instr_simple(struct charset_info_st *,
|
||||
|
||||
|
||||
/* Functions for 8bit */
|
||||
extern void my_caseup_str_8bit(CHARSET_INFO *, char *);
|
||||
extern void my_casedn_str_8bit(CHARSET_INFO *, char *);
|
||||
extern uint my_caseup_str_8bit(CHARSET_INFO *, char *);
|
||||
extern uint my_casedn_str_8bit(CHARSET_INFO *, char *);
|
||||
extern uint my_caseup_8bit(CHARSET_INFO *, char *src, uint srclen,
|
||||
char *dst, uint dstlen);
|
||||
extern uint my_casedn_8bit(CHARSET_INFO *, char *src, uint srclen,
|
||||
@ -399,8 +399,8 @@ int my_mbcharlen_8bit(CHARSET_INFO *, uint c);
|
||||
|
||||
|
||||
/* Functions for multibyte charsets */
|
||||
extern void my_caseup_str_mb(CHARSET_INFO *, char *);
|
||||
extern void my_casedn_str_mb(CHARSET_INFO *, char *);
|
||||
extern uint my_caseup_str_mb(CHARSET_INFO *, char *);
|
||||
extern uint my_casedn_str_mb(CHARSET_INFO *, char *);
|
||||
extern uint my_caseup_mb(CHARSET_INFO *, char *src, uint srclen,
|
||||
char *dst, uint dstlen);
|
||||
extern uint my_casedn_mb(CHARSET_INFO *, char *src, uint srclen,
|
||||
|
@ -84,3 +84,27 @@ create table t2 like T1;
|
||||
drop table t1, t2;
|
||||
show tables;
|
||||
Tables_in_test
|
||||
set names utf8;
|
||||
drop table if exists İ,İİ;
|
||||
create table İ (s1 int);
|
||||
show create table İ;
|
||||
Table Create Table
|
||||
İ CREATE TABLE `i` (
|
||||
`s1` int(11) default NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
show tables;
|
||||
Tables_in_test
|
||||
i
|
||||
drop table İ;
|
||||
create table İİ (s1 int);
|
||||
show create table İİ;
|
||||
Table Create Table
|
||||
İİ CREATE TABLE `ii` (
|
||||
`s1` int(11) default NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
show tables;
|
||||
Tables_in_test
|
||||
ii
|
||||
drop table İİ;
|
||||
set names latin1;
|
||||
End of 5.0 tests
|
||||
|
@ -85,3 +85,23 @@ drop table t1, t2;
|
||||
show tables;
|
||||
|
||||
# End of 4.1 tests
|
||||
|
||||
|
||||
#
|
||||
# Bug#20404: SHOW CREATE TABLE fails with Turkish I
|
||||
#
|
||||
set names utf8;
|
||||
--disable_warnings
|
||||
drop table if exists İ,İİ;
|
||||
--enable_warnings
|
||||
create table İ (s1 int);
|
||||
show create table İ;
|
||||
show tables;
|
||||
drop table İ;
|
||||
create table İİ (s1 int);
|
||||
show create table İİ;
|
||||
show tables;
|
||||
drop table İİ;
|
||||
set names latin1;
|
||||
|
||||
--echo End of 5.0 tests
|
||||
|
@ -6177,7 +6177,7 @@ TABLE_LIST *st_select_lex::add_table_to_list(THD *thd,
|
||||
|
||||
ptr->alias= alias_str;
|
||||
if (lower_case_table_names && table->table.length)
|
||||
my_casedn_str(files_charset_info, table->table.str);
|
||||
table->table.length= my_casedn_str(files_charset_info, table->table.str);
|
||||
ptr->table_name=table->table.str;
|
||||
ptr->table_name_length=table->table.length;
|
||||
ptr->lock_type= lock_type;
|
||||
|
@ -211,9 +211,10 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
|
||||
|
||||
/* This function is used for all conversion functions */
|
||||
|
||||
static void my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||
static uint my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||
char *str __attribute__((unused)))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint my_case_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||
|
@ -21,10 +21,11 @@
|
||||
#ifdef USE_MB
|
||||
|
||||
|
||||
void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
|
||||
uint my_caseup_str_mb(CHARSET_INFO * cs, char *str)
|
||||
{
|
||||
register uint32 l;
|
||||
register uchar *map= cs->to_upper;
|
||||
char *str_orig= str;
|
||||
|
||||
while (*str)
|
||||
{
|
||||
@ -37,12 +38,14 @@ void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
|
||||
str++;
|
||||
}
|
||||
}
|
||||
return str - str_orig;
|
||||
}
|
||||
|
||||
void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
|
||||
uint my_casedn_str_mb(CHARSET_INFO * cs, char *str)
|
||||
{
|
||||
register uint32 l;
|
||||
register uchar *map= cs->to_lower;
|
||||
char *str_orig= str;
|
||||
|
||||
while (*str)
|
||||
{
|
||||
@ -55,6 +58,7 @@ void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
|
||||
str++;
|
||||
}
|
||||
}
|
||||
return str - str_orig;
|
||||
}
|
||||
|
||||
uint my_caseup_mb(CHARSET_INFO * cs, char *src, uint srclen,
|
||||
|
@ -188,20 +188,26 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length,
|
||||
}
|
||||
|
||||
|
||||
void my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
|
||||
uint my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
|
||||
{
|
||||
register uchar *map= cs->to_upper;
|
||||
char *str_orig= str;
|
||||
while ((*str= (char) map[(uchar) *str]) != 0)
|
||||
str++;
|
||||
return str - str_orig;
|
||||
}
|
||||
|
||||
void my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
|
||||
|
||||
uint my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
|
||||
{
|
||||
register uchar *map= cs->to_lower;
|
||||
char *str_orig= str;
|
||||
while ((*str= (char) map[(uchar) *str]) != 0)
|
||||
str++;
|
||||
return str - str_orig;
|
||||
}
|
||||
|
||||
|
||||
uint my_caseup_8bit(CHARSET_INFO * cs, char *src, uint srclen,
|
||||
char *dst __attribute__((unused)),
|
||||
uint dstlen __attribute__((unused)))
|
||||
|
@ -159,13 +159,13 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen,
|
||||
}
|
||||
|
||||
|
||||
static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
|
||||
static uint my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
|
||||
char * s __attribute__((unused)))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
|
||||
char *dst __attribute__((unused)),
|
||||
uint dstlen __attribute__((unused)))
|
||||
@ -188,9 +188,11 @@ static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
|
||||
return srclen;
|
||||
}
|
||||
|
||||
static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
|
||||
static uint my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
char * s __attribute__((unused)))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2045,6 +2045,52 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
|
||||
return MY_CS_ILSEQ;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
The same as above, but without range check
|
||||
for example, for a null-terminated string
|
||||
*/
|
||||
static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)),
|
||||
my_wc_t * pwc, const uchar *s)
|
||||
{
|
||||
unsigned char c;
|
||||
|
||||
c= s[0];
|
||||
if (c < 0x80)
|
||||
{
|
||||
*pwc = c;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (c < 0xc2)
|
||||
return MY_CS_ILSEQ;
|
||||
|
||||
if (c < 0xe0)
|
||||
{
|
||||
if (!((s[1] ^ 0x80) < 0x40))
|
||||
return MY_CS_ILSEQ;
|
||||
|
||||
*pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80);
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (c < 0xf0)
|
||||
{
|
||||
if (!((s[1] ^ 0x80) < 0x40 &&
|
||||
(s[2] ^ 0x80) < 0x40 &&
|
||||
(c >= 0xe1 || s[1] >= 0xa0)))
|
||||
return MY_CS_ILSEQ;
|
||||
|
||||
*pwc= ((my_wc_t) (c & 0x0f) << 12) |
|
||||
((my_wc_t) (s[1] ^ 0x80) << 6) |
|
||||
(my_wc_t) (s[2] ^ 0x80);
|
||||
|
||||
return 3;
|
||||
}
|
||||
return MY_CS_ILSEQ;
|
||||
}
|
||||
|
||||
|
||||
static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
|
||||
my_wc_t wc, uchar *r, uchar *e)
|
||||
{
|
||||
@ -2091,6 +2137,34 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
The same as above, but without range check.
|
||||
*/
|
||||
static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)),
|
||||
my_wc_t wc, uchar *r)
|
||||
{
|
||||
int count;
|
||||
|
||||
if (wc < 0x80)
|
||||
count= 1;
|
||||
else if (wc < 0x800)
|
||||
count= 2;
|
||||
else if (wc < 0x10000)
|
||||
count= 3;
|
||||
else
|
||||
return MY_CS_ILUNI;
|
||||
|
||||
switch (count)
|
||||
{
|
||||
/* Fall through all cases!!! */
|
||||
case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800;
|
||||
case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0;
|
||||
case 1: r[0]= (uchar) wc;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
static uint my_caseup_utf8(CHARSET_INFO *cs, char *src, uint srclen,
|
||||
char *dst, uint dstlen)
|
||||
{
|
||||
@ -2141,10 +2215,26 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, uint slen,
|
||||
}
|
||||
|
||||
|
||||
static void my_caseup_str_utf8(CHARSET_INFO * cs, char * s)
|
||||
static uint my_caseup_str_utf8(CHARSET_INFO *cs, char *src)
|
||||
{
|
||||
uint len= (uint) strlen(s);
|
||||
my_caseup_utf8(cs, s, len, s, len);
|
||||
my_wc_t wc;
|
||||
int srcres, dstres;
|
||||
char *dst= src, *dst0= src;
|
||||
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
|
||||
DBUG_ASSERT(cs->caseup_multiply == 1);
|
||||
|
||||
while (*src &&
|
||||
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
|
||||
{
|
||||
int plane= (wc>>8) & 0xFF;
|
||||
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
|
||||
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
|
||||
break;
|
||||
src+= srcres;
|
||||
dst+= dstres;
|
||||
}
|
||||
*dst= '\0';
|
||||
return (uint) (dst - dst0);
|
||||
}
|
||||
|
||||
|
||||
@ -2170,10 +2260,43 @@ static uint my_casedn_utf8(CHARSET_INFO *cs, char *src, uint srclen,
|
||||
return (uint) (dst - dst0);
|
||||
}
|
||||
|
||||
static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
|
||||
|
||||
static uint my_casedn_str_utf8(CHARSET_INFO *cs, char *src)
|
||||
{
|
||||
uint len= (uint) strlen(s);
|
||||
my_casedn_utf8(cs, s, len, s, len);
|
||||
my_wc_t wc;
|
||||
int srcres, dstres;
|
||||
char *dst= src, *dst0= src;
|
||||
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
|
||||
DBUG_ASSERT(cs->casedn_multiply == 1);
|
||||
|
||||
while (*src &&
|
||||
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
|
||||
{
|
||||
int plane= (wc>>8) & 0xFF;
|
||||
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
|
||||
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
|
||||
break;
|
||||
src+= srcres;
|
||||
dst+= dstres;
|
||||
}
|
||||
|
||||
/*
|
||||
In rare cases lower string can be shorter than
|
||||
the original string, for example:
|
||||
|
||||
"U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE"
|
||||
(which is 0xC4B0 in utf8, i.e. two bytes)
|
||||
|
||||
is converted into
|
||||
|
||||
"U+0069 LATIN SMALL LETTER I"
|
||||
(which is 0x69 in utf8, i.e. one byte)
|
||||
|
||||
So, we need to put '\0' terminator after converting.
|
||||
*/
|
||||
|
||||
*dst= '\0';
|
||||
return (uint) (dst - dst0);
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user