MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion

This commit is contained in:
Alexander Barkov 2014-09-18 12:40:55 +04:00
parent 391fddf660
commit 8286bcd721
5 changed files with 96 additions and 4 deletions

View File

@ -7660,5 +7660,60 @@ DROP FUNCTION iswellformed;
DROP TABLE allbytes;
# End of ctype_backslash.inc
#
# MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
#
SET NAMES utf8, character_set_connection=latin1;
SELECT 'Â';
?
?
SELECT HEX('Â');
HEX('Â')
3F
SELECT HEX(CAST('Â' AS CHAR CHARACTER SET utf8));
HEX(CAST('Â' AS CHAR CHARACTER SET utf8))
3F
SELECT HEX(CAST('Â' AS CHAR CHARACTER SET latin1));
HEX(CAST('Â' AS CHAR CHARACTER SET latin1))
3F
SELECT HEX(CONVERT('Â' USING utf8));
HEX(CONVERT('Â' USING utf8))
3F
SELECT HEX(CONVERT('Â' USING latin1));
HEX(CONVERT('Â' USING latin1))
3F
SELECT 'Âx';
?x
?x
SELECT HEX('Âx');
HEX('Âx')
3F78
SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET utf8));
HEX(CAST('Âx' AS CHAR CHARACTER SET utf8))
3F78
SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET latin1));
HEX(CAST('Âx' AS CHAR CHARACTER SET latin1))
3F78
SELECT HEX(CONVERT('Âx' USING utf8));
HEX(CONVERT('Âx' USING utf8))
3F78
SELECT HEX(CONVERT('Âx' USING latin1));
HEX(CONVERT('Âx' USING latin1))
3F78
SET NAMES utf8;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES ('Â'),('Â#');
Warnings:
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
SHOW WARNINGS;
Level Code Message
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
SELECT HEX(a),a FROM t1;
HEX(a) a
3F ?
3F23 ?#
DROP TABLE t1;
#
# End of 10.0 tests
#

View File

@ -30,7 +30,7 @@ SET @@session.character_set_client = utf8;
INSERT INTO t1 values('è');
SELECT hex(a),CHAR_LENGTH(a) FROM t1;
hex(a) CHAR_LENGTH(a)
03 1
033F 2
DELETE FROM t1;
DROP TABLE IF EXISTS t1;
SET @@global.character_set_client = @global_character_set_client;

View File

@ -210,6 +210,29 @@ set names latin1;
let $ctype_unescape_combinations=selected;
--source include/ctype_unescape.inc
--echo #
--echo # MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
--echo #
SET NAMES utf8, character_set_connection=latin1;
SELECT 'Â';
SELECT HEX('Â');
SELECT HEX(CAST('Â' AS CHAR CHARACTER SET utf8));
SELECT HEX(CAST('Â' AS CHAR CHARACTER SET latin1));
SELECT HEX(CONVERT('Â' USING utf8));
SELECT HEX(CONVERT('Â' USING latin1));
SELECT 'Âx';
SELECT HEX('Âx');
SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET utf8));
SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET latin1));
SELECT HEX(CONVERT('Âx' USING utf8));
SELECT HEX(CONVERT('Âx' USING latin1));
SET NAMES utf8;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES ('Â'),('Â#');
SHOW WARNINGS;
SELECT HEX(a),a FROM t1;
DROP TABLE t1;
--echo #
--echo # End of 10.0 tests
--echo #

View File

@ -1022,8 +1022,15 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
wc= '?';
}
else
break; // Not enough characters
{
if ((uchar *) from >= from_end)
break; // End of line
// Incomplete byte sequence
if (!*well_formed_error_pos)
*well_formed_error_pos= from;
from++;
wc= '?';
}
outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
to+= cnvres;

View File

@ -1066,7 +1066,14 @@ my_convert_internal(char *to, uint32 to_length,
wc= '?';
}
else
break; // Not enough characters
{
if ((uchar *) from >= from_end)
break; /* End of line */
/* Incomplete byte sequence */
error_count++;
from++;
wc= '?';
}
outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)