Problem fix:
http://bugs.mysql.com/bug.php?id=2366 Wrong utf8 behaviour when data is trancated
This commit is contained in:
parent
d200443bed
commit
d13ad0822e
@ -144,6 +144,8 @@ typedef struct my_charset_handler_st
|
||||
int (*mbcharlen)(struct charset_info_st *, uint);
|
||||
uint (*numchars)(struct charset_info_st *, const char *b, const char *e);
|
||||
uint (*charpos)(struct charset_info_st *, const char *b, const char *e, uint pos);
|
||||
uint (*wellformedlen)(struct charset_info_st *,
|
||||
const char *b,const char *e, uint nchars);
|
||||
uint (*lengthsp)(struct charset_info_st *, const char *ptr, uint length);
|
||||
|
||||
/* Unicode convertion */
|
||||
@ -311,6 +313,7 @@ int my_wildcmp_8bit(CHARSET_INFO *,
|
||||
|
||||
uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e);
|
||||
uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos);
|
||||
uint my_wellformedlen_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos);
|
||||
int my_mbcharlen_8bit(CHARSET_INFO *, uint c);
|
||||
|
||||
|
||||
@ -327,6 +330,7 @@ int my_wildcmp_mb(CHARSET_INFO *,
|
||||
int escape, int w_one, int w_many);
|
||||
uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e);
|
||||
uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
|
||||
uint my_wellformedlen_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
|
||||
uint my_instr_mb(struct charset_info_st *,
|
||||
const char *b, uint b_length,
|
||||
const char *s, uint s_length,
|
||||
|
@ -22,7 +22,7 @@ Warning 1264 Data truncated for column 'c2' at row 1
|
||||
Warning 1264 Data truncated for column 'c3' at row 1
|
||||
SELECT * FROM t1;
|
||||
c1 c2 c3
|
||||
aaaabbbbcccc aaaabbbbcccc aaaabbbbcccc
|
||||
aaaa aaaa aaaa
|
||||
DROP TABLE t1;
|
||||
CREATE TABLE t1 (a CHAR(4) CHARACTER SET utf8, KEY key_a(a(3)));
|
||||
SHOW CREATE TABLE t1;
|
||||
|
@ -73,9 +73,17 @@ create table t1 select date_format("2004-01-19 10:10:10", "%Y-%m-%d");
|
||||
show create table t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`date_format("2004-01-19 10:10:10", "%Y-%m-%d")` char(4) character set utf8 default NULL
|
||||
`date_format("2004-01-19 10:10:10", "%Y-%m-%d")` char(10) character set utf8 default NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
select * from t1;
|
||||
date_format("2004-01-19 10:10:10", "%Y-%m-%d")
|
||||
2004-01-19
|
||||
drop table t1;
|
||||
set names koi8r;
|
||||
create table t1 (s1 char(1) character set utf8);
|
||||
insert into t1 values (_koi8r'ÁÂ');
|
||||
Warnings:
|
||||
Warning 1264 Data truncated for column 's1' at row 1
|
||||
select s1,hex(s1),char_length(s1),octet_length(s1) from t1;
|
||||
s1 hex(s1) char_length(s1) octet_length(s1)
|
||||
Á D0B0 1 2
|
||||
|
@ -53,3 +53,10 @@ show create table t1;
|
||||
select * from t1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug #2366 Wrong utf8 behaviour when data is trancated
|
||||
#
|
||||
set names koi8r;
|
||||
create table t1 (s1 char(1) character set utf8);
|
||||
insert into t1 values (_koi8r'ÁÂ');
|
||||
select s1,hex(s1),char_length(s1),octet_length(s1) from t1;
|
||||
|
48
sql/field.cc
48
sql/field.cc
@ -4060,6 +4060,8 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
|
||||
int error= 0;
|
||||
char buff[80];
|
||||
String tmpstr(buff,sizeof(buff), &my_charset_bin);
|
||||
uint copy_length;
|
||||
|
||||
/* Convert character set if nesessary */
|
||||
if (String::needs_conversion(from, length, cs, field_charset))
|
||||
{
|
||||
@ -4067,27 +4069,31 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
|
||||
from= tmpstr.ptr();
|
||||
length= tmpstr.length();
|
||||
}
|
||||
if (length <= field_length)
|
||||
{
|
||||
memcpy(ptr,from,length);
|
||||
if (length < field_length)
|
||||
field_charset->cset->fill(field_charset,ptr+length,field_length-length,
|
||||
' ');
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(ptr,from,field_length);
|
||||
if (current_thd->count_cuted_fields)
|
||||
{ // Check if we loosed some info
|
||||
const char *end=from+length;
|
||||
from+= field_length;
|
||||
from+= field_charset->cset->scan(field_charset, from, end,
|
||||
MY_SEQ_SPACES);
|
||||
if (from != end)
|
||||
{
|
||||
set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_TRUNCATED);
|
||||
error=1;
|
||||
}
|
||||
|
||||
/*
|
||||
Make sure we don't break a multybite sequence
|
||||
as well as don't copy a malformed data.
|
||||
*/
|
||||
copy_length= field_charset->cset->wellformedlen(field_charset,
|
||||
from,from+length,
|
||||
field_length/
|
||||
field_charset->mbmaxlen);
|
||||
memcpy(ptr,from,copy_length);
|
||||
if (copy_length < field_length) // Append spaces if shorter
|
||||
field_charset->cset->fill(field_charset,ptr+copy_length,
|
||||
field_length-copy_length,' ');
|
||||
|
||||
|
||||
if (current_thd->count_cuted_fields && (copy_length < length))
|
||||
{ // Check if we loosed some info
|
||||
const char *end=from+length;
|
||||
from+= copy_length;
|
||||
from+= field_charset->cset->scan(field_charset, from, end,
|
||||
MY_SEQ_SPACES);
|
||||
if (from != end)
|
||||
{
|
||||
set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_TRUNCATED);
|
||||
error=1;
|
||||
}
|
||||
}
|
||||
return error;
|
||||
|
@ -518,7 +518,8 @@ public:
|
||||
void fix_length_and_dec()
|
||||
{
|
||||
collation.set(default_charset());
|
||||
decimals=0; max_length=args[0]->max_length*2;
|
||||
decimals=0;
|
||||
max_length=args[0]->max_length*2*collation.collation->mbmaxlen;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1286,12 +1286,13 @@ void Item_func_date_format::fix_length_and_dec()
|
||||
if (args[1]->type() == STRING_ITEM)
|
||||
{ // Optimize the normal case
|
||||
fixed_length=1;
|
||||
max_length=format_length(((Item_string*) args[1])->const_string());
|
||||
max_length= format_length(((Item_string*) args[1])->const_string())*
|
||||
collation.collation->mbmaxlen;
|
||||
}
|
||||
else
|
||||
{
|
||||
fixed_length=0;
|
||||
max_length=args[1]->max_length*10;
|
||||
max_length=args[1]->max_length*10*collation.collation->mbmaxlen;
|
||||
set_if_smaller(max_length,MAX_BLOB_WIDTH);
|
||||
}
|
||||
maybe_null=1; // If wrong date
|
||||
|
@ -6245,6 +6245,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
|
||||
mbcharlen_big5,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_wellformedlen_mb,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_big5, /* mb_wc */
|
||||
my_wc_mb_big5, /* wc_mb */
|
||||
|
@ -347,6 +347,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_mbcharlen_8bit, /* mbcharlen */
|
||||
my_numchars_8bit,
|
||||
my_charpos_8bit,
|
||||
my_wellformedlen_8bit,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_bin,
|
||||
my_wc_mb_bin,
|
||||
|
@ -8653,6 +8653,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
mbcharlen_euc_kr,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_wellformedlen_mb,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_euc_kr, /* mb_wc */
|
||||
my_wc_mb_euc_kr, /* wc_mb */
|
||||
|
@ -5704,6 +5704,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
mbcharlen_gb2312,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_wellformedlen_mb,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_gb2312, /* mb_wc */
|
||||
my_wc_mb_gb2312, /* wc_mb */
|
||||
|
@ -9900,6 +9900,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
mbcharlen_gbk,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_wellformedlen_mb,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_gbk,
|
||||
my_wc_mb_gbk,
|
||||
|
@ -180,6 +180,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_mbcharlen_8bit,
|
||||
my_numchars_8bit,
|
||||
my_charpos_8bit,
|
||||
my_wellformedlen_8bit,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_latin1,
|
||||
my_wc_mb_latin1,
|
||||
|
@ -274,6 +274,22 @@ uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
|
||||
return pos ? e+2-b0 : b-b0;
|
||||
}
|
||||
|
||||
uint my_wellformedlen_mb(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e, uint pos)
|
||||
{
|
||||
uint mblen;
|
||||
const char *b0=b;
|
||||
|
||||
while (pos && b<e)
|
||||
{
|
||||
b+= (mblen= my_ismbchar(cs,b,e)) ? mblen : 1;
|
||||
pos--;
|
||||
}
|
||||
return b-b0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
uint my_instr_mb(CHARSET_INFO *cs,
|
||||
const char *b, uint b_length,
|
||||
const char *s, uint s_length,
|
||||
|
@ -1020,6 +1020,15 @@ uint my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
||||
return pos;
|
||||
}
|
||||
|
||||
uint my_wellformedlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b,
|
||||
const char *e,
|
||||
uint nchars)
|
||||
{
|
||||
uint nbytes= e-b;
|
||||
return nbytes < nchars ? nbytes : nchars;
|
||||
}
|
||||
|
||||
uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *ptr, uint length)
|
||||
{
|
||||
@ -1096,6 +1105,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
|
||||
my_mbcharlen_8bit, /* mbcharlen */
|
||||
my_numchars_8bit,
|
||||
my_charpos_8bit,
|
||||
my_wellformedlen_8bit,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_8bit,
|
||||
my_wc_mb_8bit,
|
||||
|
@ -4489,6 +4489,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
mbcharlen_sjis,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_wellformedlen_mb,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_sjis, /* mb_wc */
|
||||
my_wc_mb_sjis, /* wc_mb */
|
||||
|
@ -874,7 +874,8 @@ NULL,NULL,NULL,NULL,NULL,NULL,NULL,plFF
|
||||
|
||||
|
||||
static
|
||||
int my_mb_wc_tis620(CHARSET_INFO *cs,my_wc_t *wc,
|
||||
int my_mb_wc_tis620(CHARSET_INFO *cs __attribute__((unused)),
|
||||
my_wc_t *wc,
|
||||
const unsigned char *str,
|
||||
const unsigned char *end __attribute__((unused)))
|
||||
{
|
||||
@ -886,7 +887,8 @@ int my_mb_wc_tis620(CHARSET_INFO *cs,my_wc_t *wc,
|
||||
}
|
||||
|
||||
static
|
||||
int my_wc_mb_tis620(CHARSET_INFO *cs,my_wc_t wc,
|
||||
int my_wc_mb_tis620(CHARSET_INFO *cs __attribute__((unused)),
|
||||
my_wc_t wc,
|
||||
unsigned char *str,
|
||||
unsigned char *end __attribute__((unused)))
|
||||
{
|
||||
@ -919,6 +921,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_mbcharlen_8bit, /* mbcharlen */
|
||||
my_numchars_8bit,
|
||||
my_charpos_8bit,
|
||||
my_wellformedlen_8bit,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_tis620, /* mb_wc */
|
||||
my_wc_mb_tis620, /* wc_mb */
|
||||
|
@ -1001,6 +1001,17 @@ uint my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
return pos*2;
|
||||
}
|
||||
|
||||
static
|
||||
uint my_wellformedlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b,
|
||||
const char *e,
|
||||
uint nchars)
|
||||
{
|
||||
uint nbytes= (e-b) & ~ (uint)1;
|
||||
nchars*= 2;
|
||||
return nbytes < nchars ? nbytes : nchars;
|
||||
}
|
||||
|
||||
static
|
||||
void my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
char *s, uint l, int fill)
|
||||
@ -1287,6 +1298,7 @@ static MY_CHARSET_HANDLER my_charset_ucs2_handler=
|
||||
my_mbcharlen_ucs2, /* mbcharlen */
|
||||
my_numchars_ucs2,
|
||||
my_charpos_ucs2,
|
||||
my_wellformedlen_ucs2,
|
||||
my_lengthsp_ucs2,
|
||||
my_ucs2_uni, /* mb_wc */
|
||||
my_uni_ucs2, /* wc_mb */
|
||||
|
@ -8444,6 +8444,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
mbcharlen_ujis,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_wellformedlen_mb,
|
||||
my_lengthsp_8bit,
|
||||
my_mb_wc_euc_jp, /* mb_wc */
|
||||
my_wc_mb_euc_jp, /* wc_mb */
|
||||
|
@ -1969,6 +1969,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_mbcharlen_utf8,
|
||||
my_numchars_mb,
|
||||
my_charpos_mb,
|
||||
my_wellformedlen_mb,
|
||||
my_lengthsp_8bit,
|
||||
my_utf8_uni,
|
||||
my_uni_utf8,
|
||||
|
Loading…
x
Reference in New Issue
Block a user