MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
This commit is contained in:
parent
391fddf660
commit
8286bcd721
@ -7660,5 +7660,60 @@ DROP FUNCTION iswellformed;
|
||||
DROP TABLE allbytes;
|
||||
# End of ctype_backslash.inc
|
||||
#
|
||||
# MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
|
||||
#
|
||||
SET NAMES utf8, character_set_connection=latin1;
|
||||
SELECT 'Â';
|
||||
?
|
||||
?
|
||||
SELECT HEX('Â');
|
||||
HEX('Â')
|
||||
3F
|
||||
SELECT HEX(CAST('Â' AS CHAR CHARACTER SET utf8));
|
||||
HEX(CAST('Â' AS CHAR CHARACTER SET utf8))
|
||||
3F
|
||||
SELECT HEX(CAST('Â' AS CHAR CHARACTER SET latin1));
|
||||
HEX(CAST('Â' AS CHAR CHARACTER SET latin1))
|
||||
3F
|
||||
SELECT HEX(CONVERT('Â' USING utf8));
|
||||
HEX(CONVERT('Â' USING utf8))
|
||||
3F
|
||||
SELECT HEX(CONVERT('Â' USING latin1));
|
||||
HEX(CONVERT('Â' USING latin1))
|
||||
3F
|
||||
SELECT 'Âx';
|
||||
?x
|
||||
?x
|
||||
SELECT HEX('Âx');
|
||||
HEX('Âx')
|
||||
3F78
|
||||
SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET utf8));
|
||||
HEX(CAST('Âx' AS CHAR CHARACTER SET utf8))
|
||||
3F78
|
||||
SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET latin1));
|
||||
HEX(CAST('Âx' AS CHAR CHARACTER SET latin1))
|
||||
3F78
|
||||
SELECT HEX(CONVERT('Âx' USING utf8));
|
||||
HEX(CONVERT('Âx' USING utf8))
|
||||
3F78
|
||||
SELECT HEX(CONVERT('Âx' USING latin1));
|
||||
HEX(CONVERT('Âx' USING latin1))
|
||||
3F78
|
||||
SET NAMES utf8;
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
|
||||
INSERT INTO t1 VALUES ('Â'),('Â#');
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
|
||||
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
|
||||
SHOW WARNINGS;
|
||||
Level Code Message
|
||||
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
|
||||
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
|
||||
SELECT HEX(a),a FROM t1;
|
||||
HEX(a) a
|
||||
3F ?
|
||||
3F23 ?#
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 10.0 tests
|
||||
#
|
||||
|
@ -30,7 +30,7 @@ SET @@session.character_set_client = utf8;
|
||||
INSERT INTO t1 values('è');
|
||||
SELECT hex(a),CHAR_LENGTH(a) FROM t1;
|
||||
hex(a) CHAR_LENGTH(a)
|
||||
03 1
|
||||
033F 2
|
||||
DELETE FROM t1;
|
||||
DROP TABLE IF EXISTS t1;
|
||||
SET @@global.character_set_client = @global_character_set_client;
|
||||
|
@ -210,6 +210,29 @@ set names latin1;
|
||||
let $ctype_unescape_combinations=selected;
|
||||
--source include/ctype_unescape.inc
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
|
||||
--echo #
|
||||
SET NAMES utf8, character_set_connection=latin1;
|
||||
SELECT 'Â';
|
||||
SELECT HEX('Â');
|
||||
SELECT HEX(CAST('Â' AS CHAR CHARACTER SET utf8));
|
||||
SELECT HEX(CAST('Â' AS CHAR CHARACTER SET latin1));
|
||||
SELECT HEX(CONVERT('Â' USING utf8));
|
||||
SELECT HEX(CONVERT('Â' USING latin1));
|
||||
SELECT 'Âx';
|
||||
SELECT HEX('Âx');
|
||||
SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET utf8));
|
||||
SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET latin1));
|
||||
SELECT HEX(CONVERT('Âx' USING utf8));
|
||||
SELECT HEX(CONVERT('Âx' USING latin1));
|
||||
SET NAMES utf8;
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
|
||||
INSERT INTO t1 VALUES ('Â'),('Â#');
|
||||
SHOW WARNINGS;
|
||||
SELECT HEX(a),a FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.0 tests
|
||||
--echo #
|
||||
|
@ -1022,8 +1022,15 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
|
||||
wc= '?';
|
||||
}
|
||||
else
|
||||
break; // Not enough characters
|
||||
|
||||
{
|
||||
if ((uchar *) from >= from_end)
|
||||
break; // End of line
|
||||
// Incomplete byte sequence
|
||||
if (!*well_formed_error_pos)
|
||||
*well_formed_error_pos= from;
|
||||
from++;
|
||||
wc= '?';
|
||||
}
|
||||
outp:
|
||||
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
|
||||
to+= cnvres;
|
||||
|
@ -1066,7 +1066,14 @@ my_convert_internal(char *to, uint32 to_length,
|
||||
wc= '?';
|
||||
}
|
||||
else
|
||||
break; // Not enough characters
|
||||
{
|
||||
if ((uchar *) from >= from_end)
|
||||
break; /* End of line */
|
||||
/* Incomplete byte sequence */
|
||||
error_count++;
|
||||
from++;
|
||||
wc= '?';
|
||||
}
|
||||
|
||||
outp:
|
||||
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
|
||||
|
Loading…
x
Reference in New Issue
Block a user