Bug #20238729: ILLEGALLY CRAFTED UTF8 SELECT PROVIDES NO
WARNINGS Backporting to 5.1 and 5.5
This commit is contained in:
parent
8f87d6cd41
commit
33a2e5abd8
Binary file not shown.
@ -23,6 +23,8 @@ SET @@session.character_set_connection = latin1;
|
||||
SELECT 'ЁЂЃЄ' AS utf_text;
|
||||
utf_text
|
||||
????
|
||||
Warnings:
|
||||
Warning 1105 Can't convert the character string from utf8 to latin1: '\xD0\x81\xD0\x82\xD0\x83...'
|
||||
SET @@session.character_set_connection = utf8;
|
||||
SELECT 'ЁЂЃЄ' AS utf_text;
|
||||
utf_text
|
||||
@ -30,6 +32,8 @@ utf_text
|
||||
'---now inserting utf8 string with different character_set_connection--'
|
||||
SET @@session.character_set_connection = ascii;
|
||||
INSERT INTO t1 VALUES('ЁЂЃЄ');
|
||||
Warnings:
|
||||
Warning 1105 Can't convert the character string from utf8 to ascii: '\xD0\x81\xD0\x82\xD0\x83...'
|
||||
SELECT * FROM t1;
|
||||
b
|
||||
????
|
||||
@ -39,6 +43,8 @@ SET @@session.character_set_connection = ascii;
|
||||
SET @@session.character_set_client = latin1;
|
||||
SET @@session.character_set_results = latin1;
|
||||
INSERT INTO t1 VALUES('ЁЂЃЄ');
|
||||
Warnings:
|
||||
Warning 1105 Can't convert the character string from latin1 to ascii: '\xD0\x81\xD0\x82\xD0\x83...'
|
||||
SELECT * FROM t1;
|
||||
b
|
||||
????????
|
||||
|
63
sql/item.cc
63
sql/item.cc
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -4807,39 +4807,60 @@ bool Item::is_datetime()
|
||||
}
|
||||
|
||||
|
||||
String *Item::check_well_formed_result(String *str, bool send_error)
|
||||
/**
|
||||
Verifies that the input string is well-formed according to its character set.
|
||||
@param send_error If true, call my_error if string is not well-formed.
|
||||
@param truncate If true, set to null/truncate if not well-formed.
|
||||
|
||||
@return
|
||||
If well-formed: input string.
|
||||
If not well-formed:
|
||||
if truncate is true and strict mode: NULL pointer and we set this
|
||||
Item's value to NULL.
|
||||
if truncate is true and not strict mode: input string truncated up to
|
||||
last good character.
|
||||
if truncate is false: input string is returned.
|
||||
*/
|
||||
String *Item::check_well_formed_result(String *str,
|
||||
bool send_error,
|
||||
bool truncate)
|
||||
{
|
||||
/* Check whether we got a well-formed string */
|
||||
CHARSET_INFO *cs= str->charset();
|
||||
int well_formed_error;
|
||||
uint wlen= cs->cset->well_formed_len(cs,
|
||||
str->ptr(), str->ptr() + str->length(),
|
||||
str->length(), &well_formed_error);
|
||||
if (wlen < str->length())
|
||||
|
||||
size_t valid_length;
|
||||
bool length_error;
|
||||
|
||||
if (validate_string(cs, str->ptr(), str->length(),
|
||||
&valid_length, &length_error))
|
||||
{
|
||||
const char *str_end= str->ptr() + str->length();
|
||||
const char *print_byte= str->ptr() + valid_length;
|
||||
THD *thd= current_thd;
|
||||
char hexbuf[7];
|
||||
enum MYSQL_ERROR::enum_warning_level level;
|
||||
uint diff= str->length() - wlen;
|
||||
enum MYSQL_ERROR::enum_warning_level level= MYSQL_ERROR::WARN_LEVEL_WARN;
|
||||
uint diff= str_end - print_byte;
|
||||
set_if_smaller(diff, 3);
|
||||
octet2hex(hexbuf, str->ptr() + wlen, diff);
|
||||
if (send_error)
|
||||
octet2hex(hexbuf, print_byte, diff);
|
||||
if (send_error && length_error)
|
||||
{
|
||||
my_error(ER_INVALID_CHARACTER_STRING, MYF(0),
|
||||
cs->csname, hexbuf);
|
||||
return 0;
|
||||
}
|
||||
if ((thd->variables.sql_mode &
|
||||
(MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES)))
|
||||
if (truncate && length_error)
|
||||
{
|
||||
level= MYSQL_ERROR::WARN_LEVEL_ERROR;
|
||||
null_value= 1;
|
||||
str= 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
level= MYSQL_ERROR::WARN_LEVEL_WARN;
|
||||
str->length(wlen);
|
||||
if ((thd->variables.sql_mode &
|
||||
(MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES)))
|
||||
{
|
||||
level= MYSQL_ERROR::WARN_LEVEL_ERROR;
|
||||
null_value= 1;
|
||||
str= 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
str->length(valid_length);
|
||||
}
|
||||
}
|
||||
push_warning_printf(thd, level, ER_INVALID_CHARACTER_STRING,
|
||||
ER(ER_INVALID_CHARACTER_STRING), cs->csname, hexbuf);
|
||||
|
@ -1052,7 +1052,9 @@ public:
|
||||
bool is_datetime();
|
||||
virtual Field::geometry_type get_geometry_type() const
|
||||
{ return Field::GEOM_GEOMETRY; };
|
||||
String *check_well_formed_result(String *str, bool send_error= 0);
|
||||
String *check_well_formed_result(String *str,
|
||||
bool send_error,
|
||||
bool truncate);
|
||||
bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs);
|
||||
|
||||
/**
|
||||
@ -1929,6 +1931,11 @@ public:
|
||||
decimals=NOT_FIXED_DEC;
|
||||
// it is constant => can be used without fix_fields (and frequently used)
|
||||
fixed= 1;
|
||||
/*
|
||||
Check if the string has any character that can't be
|
||||
interpreted using the relevant charset.
|
||||
*/
|
||||
check_well_formed_result(&str_value, false, false);
|
||||
}
|
||||
/* Just create an item and do not fill string representation */
|
||||
Item_string(CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -2373,7 +2373,9 @@ String *Item_func_char::val_str(String *str)
|
||||
}
|
||||
}
|
||||
str->realloc(str->length()); // Add end 0 (for Purify)
|
||||
return check_well_formed_result(str);
|
||||
return check_well_formed_result(str,
|
||||
false, // send warning
|
||||
true); // truncate
|
||||
}
|
||||
|
||||
|
||||
@ -2773,7 +2775,9 @@ String *Item_func_conv_charset::val_str(String *str)
|
||||
}
|
||||
null_value= tmp_value.copy(arg->ptr(), arg->length(), arg->charset(),
|
||||
conv_charset, &dummy_errors);
|
||||
return null_value ? 0 : check_well_formed_result(&tmp_value);
|
||||
return null_value ? 0 : check_well_formed_result(&tmp_value,
|
||||
false, // send warning
|
||||
true); // truncate
|
||||
}
|
||||
|
||||
void Item_func_conv_charset::fix_length_and_dec()
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -1313,21 +1313,17 @@ LEX_STRING *THD::make_lex_string(LEX_STRING *lex_str,
|
||||
/*
|
||||
Convert a string to another character set
|
||||
|
||||
SYNOPSIS
|
||||
convert_string()
|
||||
to Store new allocated string here
|
||||
to_cs New character set for allocated string
|
||||
from String to convert
|
||||
from_length Length of string to convert
|
||||
from_cs Original character set
|
||||
@param to Store new allocated string here
|
||||
@param to_cs New character set for allocated string
|
||||
@param from String to convert
|
||||
@param from_length Length of string to convert
|
||||
@param from_cs Original character set
|
||||
|
||||
NOTES
|
||||
to will be 0-terminated to make it easy to pass to system funcs
|
||||
@note to will be 0-terminated to make it easy to pass to system funcs
|
||||
|
||||
RETURN
|
||||
0 ok
|
||||
1 End of memory.
|
||||
In this case to->str will point to 0 and to->length will be 0.
|
||||
@retval false ok
|
||||
@retval true End of memory.
|
||||
In this case to->str will point to 0 and to->length will be 0.
|
||||
*/
|
||||
|
||||
bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
||||
@ -1336,15 +1332,25 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
||||
{
|
||||
DBUG_ENTER("convert_string");
|
||||
size_t new_length= to_cs->mbmaxlen * from_length;
|
||||
uint dummy_errors;
|
||||
uint errors= 0;
|
||||
if (!(to->str= (char*) alloc(new_length+1)))
|
||||
{
|
||||
to->length= 0; // Safety fix
|
||||
DBUG_RETURN(1); // EOM
|
||||
}
|
||||
to->length= copy_and_convert((char*) to->str, new_length, to_cs,
|
||||
from, from_length, from_cs, &dummy_errors);
|
||||
from, from_length, from_cs, &errors);
|
||||
to->str[to->length]=0; // Safety
|
||||
if (errors != 0)
|
||||
{
|
||||
char printable_buff[32];
|
||||
convert_to_printable(printable_buff, sizeof(printable_buff),
|
||||
from, from_length, from_cs, 6);
|
||||
push_warning_printf(this, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
|
||||
"Can't convert the character string from %s to %s: '%.64s'",
|
||||
from_cs->csname, to_cs->csname, printable_buff);
|
||||
}
|
||||
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -1231,3 +1231,69 @@ uint convert_to_printable(char *to, size_t to_len,
|
||||
*t= '\0';
|
||||
return t - to;
|
||||
}
|
||||
|
||||
/**
|
||||
Check if an input byte sequence is a valid character string of a given charset
|
||||
|
||||
@param cs The input character set.
|
||||
@param str The input byte sequence to validate.
|
||||
@param length A byte length of the str.
|
||||
@param [out] valid_length A byte length of a valid prefix of the str.
|
||||
@param [out] length_error True in the case of a character length error:
|
||||
some byte[s] in the input is not a valid
|
||||
prefix for a character, i.e. the byte length
|
||||
of that invalid character is undefined.
|
||||
|
||||
@retval true if the whole input byte sequence is a valid character string.
|
||||
The length_error output parameter is undefined.
|
||||
|
||||
@return
|
||||
if the whole input byte sequence is a valid character string
|
||||
then
|
||||
return false
|
||||
else
|
||||
if the length of some character in the input is undefined (MY_CS_ILSEQ)
|
||||
or the last character is truncated (MY_CS_TOOSMALL)
|
||||
then
|
||||
*length_error= true; // fatal error!
|
||||
else
|
||||
*length_error= false; // non-fatal error: there is no wide character
|
||||
// encoding for some input character
|
||||
return true
|
||||
*/
|
||||
bool validate_string(CHARSET_INFO *cs, const char *str, uint32 length,
|
||||
size_t *valid_length, bool *length_error)
|
||||
{
|
||||
if (cs->mbmaxlen > 1)
|
||||
{
|
||||
int well_formed_error;
|
||||
*valid_length= cs->cset->well_formed_len(cs, str, str + length,
|
||||
length, &well_formed_error);
|
||||
*length_error= well_formed_error;
|
||||
return well_formed_error;
|
||||
}
|
||||
|
||||
/*
|
||||
well_formed_len() is not functional on single-byte character sets,
|
||||
so use mb_wc() instead:
|
||||
*/
|
||||
*length_error= false;
|
||||
|
||||
const uchar *from= reinterpret_cast<const uchar *>(str);
|
||||
const uchar *from_end= from + length;
|
||||
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
||||
|
||||
while (from < from_end)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end);
|
||||
if (cnvres <= 0)
|
||||
{
|
||||
*valid_length= from - reinterpret_cast<const uchar *>(str);
|
||||
return true;
|
||||
}
|
||||
from+= cnvres;
|
||||
}
|
||||
*valid_length= length;
|
||||
return false;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -413,3 +413,7 @@ static inline bool check_if_only_end_space(CHARSET_INFO *cs, char *str,
|
||||
{
|
||||
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
|
||||
}
|
||||
|
||||
bool
|
||||
validate_string(CHARSET_INFO *cs, const char *str, uint32 length,
|
||||
size_t *valid_length, bool *length_error);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -11070,7 +11070,9 @@ literal:
|
||||
str ? str->length() : 0,
|
||||
$1);
|
||||
if (!item_str ||
|
||||
!item_str->check_well_formed_result(&item_str->str_value, TRUE))
|
||||
!item_str->check_well_formed_result(&item_str->str_value,
|
||||
true, //send error
|
||||
true)) //truncate
|
||||
{
|
||||
MYSQL_YYABORT;
|
||||
}
|
||||
@ -11099,7 +11101,9 @@ literal:
|
||||
str ? str->length() : 0,
|
||||
$1);
|
||||
if (!item_str ||
|
||||
!item_str->check_well_formed_result(&item_str->str_value, TRUE))
|
||||
!item_str->check_well_formed_result(&item_str->str_value,
|
||||
true, //send error
|
||||
true)) //truncate
|
||||
{
|
||||
MYSQL_YYABORT;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user