diff --git a/include/m_ctype.h b/include/m_ctype.h index 4a9415f43f9..88c3418fc0d 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -144,6 +144,8 @@ typedef struct my_charset_handler_st int (*mbcharlen)(struct charset_info_st *, uint); uint (*numchars)(struct charset_info_st *, const char *b, const char *e); uint (*charpos)(struct charset_info_st *, const char *b, const char *e, uint pos); + uint (*wellformedlen)(struct charset_info_st *, + const char *b,const char *e, uint nchars); uint (*lengthsp)(struct charset_info_st *, const char *ptr, uint length); /* Unicode convertion */ @@ -311,6 +313,7 @@ int my_wildcmp_8bit(CHARSET_INFO *, uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e); uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos); +uint my_wellformedlen_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos); int my_mbcharlen_8bit(CHARSET_INFO *, uint c); @@ -327,6 +330,7 @@ int my_wildcmp_mb(CHARSET_INFO *, int escape, int w_one, int w_many); uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e); uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos); +uint my_wellformedlen_mb(CHARSET_INFO *, const char *b, const char *e, uint pos); uint my_instr_mb(struct charset_info_st *, const char *b, uint b_length, const char *s, uint s_length, diff --git a/mysql-test/r/ctype_mb.result b/mysql-test/r/ctype_mb.result index 288033e7530..edccb047c85 100644 --- a/mysql-test/r/ctype_mb.result +++ b/mysql-test/r/ctype_mb.result @@ -22,7 +22,7 @@ Warning 1264 Data truncated for column 'c2' at row 1 Warning 1264 Data truncated for column 'c3' at row 1 SELECT * FROM t1; c1 c2 c3 -aaaabbbbcccc aaaabbbbcccc aaaabbbbcccc +aaaa aaaa aaaa DROP TABLE t1; CREATE TABLE t1 (a CHAR(4) CHARACTER SET utf8, KEY key_a(a(3))); SHOW CREATE TABLE t1; diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 7c05b1ea446..31f26c421b6 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -73,9 +73,17 @@ create table t1 select date_format("2004-01-19 10:10:10", "%Y-%m-%d"); show create table t1; Table Create Table t1 CREATE TABLE `t1` ( - `date_format("2004-01-19 10:10:10", "%Y-%m-%d")` char(4) character set utf8 default NULL + `date_format("2004-01-19 10:10:10", "%Y-%m-%d")` char(10) character set utf8 default NULL ) ENGINE=MyISAM DEFAULT CHARSET=latin1 select * from t1; date_format("2004-01-19 10:10:10", "%Y-%m-%d") 2004-01-19 drop table t1; +set names koi8r; +create table t1 (s1 char(1) character set utf8); +insert into t1 values (_koi8r'ÁÂ'); +Warnings: +Warning 1264 Data truncated for column 's1' at row 1 +select s1,hex(s1),char_length(s1),octet_length(s1) from t1; +s1 hex(s1) char_length(s1) octet_length(s1) +Á D0B0 1 2 diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 5e9324dd68f..c2ea1ed20a4 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -53,3 +53,10 @@ show create table t1; select * from t1; drop table t1; +# +# Bug #2366 Wrong utf8 behaviour when data is trancated +# +set names koi8r; +create table t1 (s1 char(1) character set utf8); +insert into t1 values (_koi8r'ÁÂ'); +select s1,hex(s1),char_length(s1),octet_length(s1) from t1; diff --git a/sql/field.cc b/sql/field.cc index 9f25b770ab0..57b77693275 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -4060,6 +4060,8 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs) int error= 0; char buff[80]; String tmpstr(buff,sizeof(buff), &my_charset_bin); + uint copy_length; + /* Convert character set if nesessary */ if (String::needs_conversion(from, length, cs, field_charset)) { @@ -4067,27 +4069,31 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs) from= tmpstr.ptr(); length= tmpstr.length(); } - if (length <= field_length) - { - memcpy(ptr,from,length); - if (length < field_length) - field_charset->cset->fill(field_charset,ptr+length,field_length-length, - ' '); - } - else - { - memcpy(ptr,from,field_length); - if (current_thd->count_cuted_fields) - { // Check if we loosed some info - const char *end=from+length; - from+= field_length; - from+= field_charset->cset->scan(field_charset, from, end, - MY_SEQ_SPACES); - if (from != end) - { - set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_TRUNCATED); - error=1; - } + + /* + Make sure we don't break a multybite sequence + as well as don't copy a malformed data. + */ + copy_length= field_charset->cset->wellformedlen(field_charset, + from,from+length, + field_length/ + field_charset->mbmaxlen); + memcpy(ptr,from,copy_length); + if (copy_length < field_length) // Append spaces if shorter + field_charset->cset->fill(field_charset,ptr+copy_length, + field_length-copy_length,' '); + + + if (current_thd->count_cuted_fields && (copy_length < length)) + { // Check if we loosed some info + const char *end=from+length; + from+= copy_length; + from+= field_charset->cset->scan(field_charset, from, end, + MY_SEQ_SPACES); + if (from != end) + { + set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_TRUNCATED); + error=1; } } return error; diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index fd0afb19726..465300e721e 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -518,7 +518,8 @@ public: void fix_length_and_dec() { collation.set(default_charset()); - decimals=0; max_length=args[0]->max_length*2; + decimals=0; + max_length=args[0]->max_length*2*collation.collation->mbmaxlen; } }; diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index ed5b9ecc0db..062e7fc7b44 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -1286,12 +1286,13 @@ void Item_func_date_format::fix_length_and_dec() if (args[1]->type() == STRING_ITEM) { // Optimize the normal case fixed_length=1; - max_length=format_length(((Item_string*) args[1])->const_string()); + max_length= format_length(((Item_string*) args[1])->const_string())* + collation.collation->mbmaxlen; } else { fixed_length=0; - max_length=args[1]->max_length*10; + max_length=args[1]->max_length*10*collation.collation->mbmaxlen; set_if_smaller(max_length,MAX_BLOB_WIDTH); } maybe_null=1; // If wrong date diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 8d4081fb2aa..574156a99ed 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6245,6 +6245,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler= mbcharlen_big5, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_big5, /* mb_wc */ my_wc_mb_big5, /* wc_mb */ diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 43575bbc277..fc22938d46e 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -347,6 +347,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, + my_wellformedlen_8bit, my_lengthsp_8bit, my_mb_wc_bin, my_wc_mb_bin, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 366a5d500ed..278e8529e83 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8653,6 +8653,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_euc_kr, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_euc_kr, /* mb_wc */ my_wc_mb_euc_kr, /* wc_mb */ diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 44a58b2b906..722f00f0f7a 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5704,6 +5704,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_gb2312, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_gb2312, /* mb_wc */ my_wc_mb_gb2312, /* wc_mb */ diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 5475c3bd363..9e71a18e531 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9900,6 +9900,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_gbk, my_numchars_mb, my_charpos_mb, + my_wellformedlen_mb, my_lengthsp_8bit, my_mb_wc_gbk, my_wc_mb_gbk, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index c00ded21575..933737b5f61 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -180,6 +180,7 @@ static MY_CHARSET_HANDLER my_charset_handler= my_mbcharlen_8bit, my_numchars_8bit, my_charpos_8bit, + my_wellformedlen_8bit, my_lengthsp_8bit, my_mb_wc_latin1, my_wc_mb_latin1, diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index b5e8c4598a0..377bf311d38 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -274,6 +274,22 @@ uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)), return pos ? e+2-b0 : b-b0; } +uint my_wellformedlen_mb(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, uint pos) +{ + uint mblen; + const char *b0=b; + + while (pos && b