Bug#26711 "Binary content 0x00 sometimes becomes 0x5C 0x00 after dump/load"
Problem: "SELECT INTO OUTFILE" created incorrect dumps for BLOBs, so "LOAD DATA" later incorrectly interpreted 0x5C as the second byte of a multi-byte sequence, instead of escape character. Fix: adding escaping of multi-byte heads. mysql-test/r/ctype_big5.result: Adding test case mysql-test/t/ctype_big5.test: Adding test case sql/sql_class.cc: Add escape characters before multi-byte heads. strings/ctype-big5.c: Flagging character set as dangerous for escaping. strings/ctype-gbk.c: Flagging character set as dangerous for escaping. strings/ctype-sjis.c: Flagging character set as dangerous for escaping.
This commit is contained in:
parent
4bdbc3396e
commit
2971d645c1
@ -192,3 +192,16 @@ drop table t1;
|
|||||||
select hex(convert(_big5 0xC84041 using ucs2));
|
select hex(convert(_big5 0xC84041 using ucs2));
|
||||||
hex(convert(_big5 0xC84041 using ucs2))
|
hex(convert(_big5 0xC84041 using ucs2))
|
||||||
003F0041
|
003F0041
|
||||||
|
End of 4.1 tests
|
||||||
|
create table t1 (a blob);
|
||||||
|
insert into t1 values (0xEE00);
|
||||||
|
delete from t1;
|
||||||
|
select hex(load_file('test/t1.txt'));
|
||||||
|
hex(load_file('test/t1.txt'))
|
||||||
|
5CEE5C300A
|
||||||
|
load data infile 't1.txt' into table t1;
|
||||||
|
select hex(a) from t1;
|
||||||
|
hex(a)
|
||||||
|
EE00
|
||||||
|
drop table t1;
|
||||||
|
End of 5.0 tests
|
||||||
|
@ -63,4 +63,20 @@ drop table t1;
|
|||||||
#
|
#
|
||||||
select hex(convert(_big5 0xC84041 using ucs2));
|
select hex(convert(_big5 0xC84041 using ucs2));
|
||||||
|
|
||||||
# End of 4.1 tests
|
--echo End of 4.1 tests
|
||||||
|
|
||||||
|
#
|
||||||
|
# Bug#26711 "binary content 0x00 sometimes becomes 0x5C 0x00 after dump/load"
|
||||||
|
#
|
||||||
|
create table t1 (a blob);
|
||||||
|
insert into t1 values (0xEE00);
|
||||||
|
--exec $MYSQL_DUMP --default-character-set=big5 -T $MYSQLTEST_VARDIR/master-data/test test t1
|
||||||
|
delete from t1;
|
||||||
|
select hex(load_file('test/t1.txt'));
|
||||||
|
load data infile 't1.txt' into table t1;
|
||||||
|
select hex(a) from t1;
|
||||||
|
--exec rm $MYSQLTEST_VARDIR/master-data/test/t1.txt
|
||||||
|
--exec rm $MYSQLTEST_VARDIR/master-data/test/t1.sql
|
||||||
|
drop table t1;
|
||||||
|
|
||||||
|
--echo End of 5.0 tests
|
||||||
|
@ -1221,6 +1221,11 @@ select_export::prepare(List<Item> &list, SELECT_LEX_UNIT *u)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define NEED_ESCAPING(x) ((int) (uchar) (x) == escape_char || \
|
||||||
|
(int) (uchar) (x) == field_sep_char || \
|
||||||
|
(int) (uchar) (x) == line_sep_char || \
|
||||||
|
!(x))
|
||||||
|
|
||||||
bool select_export::send_data(List<Item> &items)
|
bool select_export::send_data(List<Item> &items)
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -1280,14 +1285,20 @@ bool select_export::send_data(List<Item> &items)
|
|||||||
used_length=res->length();
|
used_length=res->length();
|
||||||
if (result_type == STRING_RESULT && escape_char != -1)
|
if (result_type == STRING_RESULT && escape_char != -1)
|
||||||
{
|
{
|
||||||
char *pos,*start,*end;
|
char *pos, *start, *end;
|
||||||
|
CHARSET_INFO *res_charset= res->charset();
|
||||||
|
CHARSET_INFO *character_set_client= thd->variables.
|
||||||
|
character_set_client;
|
||||||
|
bool check_second_byte= (res_charset == &my_charset_bin) &&
|
||||||
|
character_set_client->
|
||||||
|
escape_with_backslash_is_dangerous;
|
||||||
|
DBUG_ASSERT(character_set_client->mbmaxlen == 2 ||
|
||||||
|
!character_set_client->escape_with_backslash_is_dangerous);
|
||||||
for (start=pos=(char*) res->ptr(),end=pos+used_length ;
|
for (start=pos=(char*) res->ptr(),end=pos+used_length ;
|
||||||
pos != end ;
|
pos != end ;
|
||||||
pos++)
|
pos++)
|
||||||
{
|
{
|
||||||
#ifdef USE_MB
|
#ifdef USE_MB
|
||||||
CHARSET_INFO *res_charset=res->charset();
|
|
||||||
if (use_mb(res_charset))
|
if (use_mb(res_charset))
|
||||||
{
|
{
|
||||||
int l;
|
int l;
|
||||||
@ -1298,8 +1309,44 @@ bool select_export::send_data(List<Item> &items)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if ((int) *pos == escape_char || (int) *pos == field_sep_char ||
|
|
||||||
(int) *pos == line_sep_char || !*pos)
|
/*
|
||||||
|
Special case when dumping BINARY/VARBINARY/BLOB values
|
||||||
|
for the clients with character sets big5, cp932, gbk and sjis,
|
||||||
|
which can have the escape character (0x5C "\" by default)
|
||||||
|
as the second byte of a multi-byte sequence.
|
||||||
|
|
||||||
|
If
|
||||||
|
- pos[0] is a valid multi-byte head (e.g 0xEE) and
|
||||||
|
- pos[1] is 0x00, which will be escaped as "\0",
|
||||||
|
|
||||||
|
then we'll get "0xEE + 0x5C + 0x30" in the output file.
|
||||||
|
|
||||||
|
If this file is later loaded using this sequence of commands:
|
||||||
|
|
||||||
|
mysql> create table t1 (a varchar(128)) character set big5;
|
||||||
|
mysql> LOAD DATA INFILE 'dump.txt' INTO TABLE t1;
|
||||||
|
|
||||||
|
then 0x5C will be misinterpreted as the second byte
|
||||||
|
of a multi-byte character "0xEE + 0x5C", instead of
|
||||||
|
escape character for 0x00.
|
||||||
|
|
||||||
|
To avoid this confusion, we'll escape the multi-byte
|
||||||
|
head character too, so the sequence "0xEE + 0x00" will be
|
||||||
|
dumped as "0x5C + 0xEE + 0x5C + 0x30".
|
||||||
|
|
||||||
|
Note, in the condition below we only check if
|
||||||
|
mbcharlen is equal to 2, because there are no
|
||||||
|
character sets with mbmaxlen longer than 2
|
||||||
|
and with escape_with_backslash_is_dangerous set.
|
||||||
|
DBUG_ASSERT before the loop makes that sure.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (NEED_ESCAPING(*pos) ||
|
||||||
|
(check_second_byte &&
|
||||||
|
my_mbcharlen(character_set_client, (uchar) *pos) == 2 &&
|
||||||
|
pos + 1 < end &&
|
||||||
|
NEED_ESCAPING(pos[1])))
|
||||||
{
|
{
|
||||||
char tmp_buff[2];
|
char tmp_buff[2];
|
||||||
tmp_buff[0]= escape_char;
|
tmp_buff[0]= escape_char;
|
||||||
|
@ -6400,7 +6400,7 @@ CHARSET_INFO my_charset_big5_chinese_ci=
|
|||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
255, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_big5_handler,
|
&my_charset_big5_handler,
|
||||||
&my_collation_big5_chinese_ci_handler
|
&my_collation_big5_chinese_ci_handler
|
||||||
};
|
};
|
||||||
@ -6433,7 +6433,7 @@ CHARSET_INFO my_charset_big5_bin=
|
|||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
255, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_big5_handler,
|
&my_charset_big5_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_mb_bin_handler
|
||||||
};
|
};
|
||||||
|
@ -10046,7 +10046,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci=
|
|||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
255, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_ci_handler
|
&my_collation_ci_handler
|
||||||
};
|
};
|
||||||
@ -10078,7 +10078,7 @@ CHARSET_INFO my_charset_gbk_bin=
|
|||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
255, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_mb_bin_handler
|
||||||
};
|
};
|
||||||
|
@ -4694,7 +4694,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci=
|
|||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
255, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_ci_handler
|
&my_collation_ci_handler
|
||||||
};
|
};
|
||||||
@ -4726,7 +4726,7 @@ CHARSET_INFO my_charset_sjis_bin=
|
|||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
255, /* max_sort_char */
|
||||||
' ', /* pad char */
|
' ', /* pad char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_mb_bin_handler
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user