Removing some duplicate code in THD::convert_string() & friends
1. Adding THD::convert_string(LEX_CSTRING *to,...) as a wrapper for convert_string(LEX_STRING *to,...), as LEX_CSTRING is now frequently used for conversion purpose. This reduced duplicate code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules in *.yy 2. Adding yet another THD::convert_string() with an extra parameter "bool simple_copy_is_possible". This even more reduced repeatable code in the mentioned grammar rules in *.yy 3. Deriving Lex_ident_cli_st from Lex_string_with_metadata_st, as they have very similar functionality. Moving m_quote from Lex_ident_cli_st to Lex_string_with_metadata_st, as m_quote will be used later to optimize string literals anyway (e.g. avoid redundant copying on the tokenizer stage). Adjusting Lex_input_stream::get_text() accordingly. 4. Moving the reminders of the code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules as new methods in THD: - make_text_string_sys() - make_text_string_connection() - make_text_string_filesystem() and changing *.yy to use these new methods. This reduced the amount of similar code in sql_yacc.yy and sql_yacc_ora.yy. 5. Removing duplicate code in Lex_input_stream::body_utf8_append_ident(): by reusing THD::make_text_string_sys(). Thanks to #3 and #4. 6. Making THD members charset_is_system_charset, charset_is_collation_connection, charset_is_character_set_filesystem private, as they are not needed externally any more.
This commit is contained in:
parent
af682525a8
commit
4a126bf3e1
@ -3130,8 +3130,10 @@ public:
|
||||
is set if a statement accesses a temporary table created through
|
||||
CREATE TEMPORARY TABLE.
|
||||
*/
|
||||
bool charset_is_system_charset, charset_is_collation_connection;
|
||||
private:
|
||||
bool charset_is_system_charset, charset_is_collation_connection;
|
||||
bool charset_is_character_set_filesystem;
|
||||
public:
|
||||
bool enable_slow_log; /* Enable slow log for current statement */
|
||||
bool abort_on_warning;
|
||||
bool got_warning; /* Set on call to push_warning() */
|
||||
@ -3706,6 +3708,25 @@ public:
|
||||
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
||||
const char *from, size_t from_length,
|
||||
CHARSET_INFO *from_cs);
|
||||
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs,
|
||||
const char *from, size_t from_length,
|
||||
CHARSET_INFO *from_cs)
|
||||
{
|
||||
LEX_STRING tmp;
|
||||
bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs);
|
||||
to->str= tmp.str;
|
||||
to->length= tmp.length;
|
||||
return rc;
|
||||
}
|
||||
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs,
|
||||
const LEX_CSTRING *from, CHARSET_INFO *fromcs,
|
||||
bool simple_copy_is_possible)
|
||||
{
|
||||
if (!simple_copy_is_possible)
|
||||
return unlikely(convert_string(to, tocs, from->str, from->length, fromcs));
|
||||
*to= *from;
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
Convert a strings between character sets.
|
||||
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
|
||||
@ -3767,6 +3788,24 @@ public:
|
||||
Item_basic_constant *make_string_literal_nchar(const Lex_string_with_metadata_st &str);
|
||||
Item_basic_constant *make_string_literal_charset(const Lex_string_with_metadata_st &str,
|
||||
CHARSET_INFO *cs);
|
||||
bool make_text_string_sys(LEX_CSTRING *to,
|
||||
const Lex_string_with_metadata_st *from)
|
||||
{
|
||||
return convert_string(to, system_charset_info,
|
||||
from, charset(), charset_is_system_charset);
|
||||
}
|
||||
bool make_text_string_connection(LEX_CSTRING *to,
|
||||
const Lex_string_with_metadata_st *from)
|
||||
{
|
||||
return convert_string(to, variables.collation_connection,
|
||||
from, charset(), charset_is_collation_connection);
|
||||
}
|
||||
bool make_text_string_filesystem(LEX_CSTRING *to,
|
||||
const Lex_string_with_metadata_st *from)
|
||||
{
|
||||
return convert_string(to, variables.character_set_filesystem,
|
||||
from, charset(), charset_is_character_set_filesystem);
|
||||
}
|
||||
void add_changed_table(TABLE *table);
|
||||
void add_changed_table(const char *key, size_t key_length);
|
||||
CHANGED_TABLE_LIST * changed_table_dup(const char *key, size_t key_length);
|
||||
|
@ -416,32 +416,18 @@ void Lex_input_stream::body_utf8_append(const char *ptr)
|
||||
operation.
|
||||
*/
|
||||
|
||||
void Lex_input_stream::body_utf8_append_ident(THD *thd,
|
||||
const LEX_CSTRING *txt,
|
||||
const char *end_ptr)
|
||||
void
|
||||
Lex_input_stream::body_utf8_append_ident(THD *thd,
|
||||
const Lex_string_with_metadata_st *txt,
|
||||
const char *end_ptr)
|
||||
{
|
||||
if (!m_cpp_utf8_processed_ptr)
|
||||
return;
|
||||
|
||||
LEX_CSTRING utf_txt;
|
||||
CHARSET_INFO *txt_cs= thd->charset();
|
||||
|
||||
if (!my_charset_same(txt_cs, &my_charset_utf8_general_ci))
|
||||
{
|
||||
LEX_STRING to;
|
||||
thd->convert_string(&to,
|
||||
&my_charset_utf8_general_ci,
|
||||
txt->str, (uint) txt->length,
|
||||
txt_cs);
|
||||
utf_txt.str= to.str;
|
||||
utf_txt.length= to.length;
|
||||
|
||||
}
|
||||
else
|
||||
utf_txt= *txt;
|
||||
thd->make_text_string_sys(&utf_txt, txt); // QQ: check return value?
|
||||
|
||||
/* NOTE: utf_txt.length is in bytes, not in symbols. */
|
||||
|
||||
memcpy(m_body_utf8_ptr, utf_txt.str, utf_txt.length);
|
||||
m_body_utf8_ptr += utf_txt.length;
|
||||
*m_body_utf8_ptr= 0;
|
||||
@ -1043,13 +1029,13 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
|
||||
uchar c;
|
||||
uint found_escape=0;
|
||||
CHARSET_INFO *cs= m_thd->charset();
|
||||
bool is_8bit= false;
|
||||
|
||||
dst->set_8bit(false);
|
||||
while (! eof())
|
||||
{
|
||||
c= yyGet();
|
||||
if (c & 0x80)
|
||||
dst->set_8bit(true);
|
||||
is_8bit= true;
|
||||
#ifdef USE_MB
|
||||
{
|
||||
int l;
|
||||
@ -1093,23 +1079,24 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
|
||||
|
||||
if (!(to= (char*) m_thd->alloc((uint) (end - str) + 1)))
|
||||
{
|
||||
dst->str= ""; // Sql_alloc has set error flag
|
||||
dst->length= 0;
|
||||
return true;
|
||||
dst->set(&empty_clex_str, 0, '\0');
|
||||
return true; // Sql_alloc has set error flag
|
||||
}
|
||||
dst->str= to;
|
||||
|
||||
m_cpp_text_start= m_cpp_tok_start + pre_skip;
|
||||
m_cpp_text_end= get_cpp_ptr() - post_skip;
|
||||
|
||||
if (!found_escape)
|
||||
{
|
||||
memcpy(to, str, dst->length= (end - str));
|
||||
to[dst->length]= 0;
|
||||
size_t len= (end - str);
|
||||
memcpy(to, str, len);
|
||||
to[len]= '\0';
|
||||
dst->set(to, len, is_8bit, '\0');
|
||||
}
|
||||
else
|
||||
{
|
||||
dst->length= unescape(cs, to, str, end, sep);
|
||||
size_t len= unescape(cs, to, str, end, sep);
|
||||
dst->set(to, len, is_8bit, '\0');
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -37,12 +37,16 @@
|
||||
|
||||
|
||||
/**
|
||||
A string with metadata.
|
||||
A string with metadata. Usually points to a string in the client
|
||||
character set, but unlike Lex_ident_cli_st (see below) it does not
|
||||
necessarily point to a query fragment. It can also point to memory
|
||||
of other kinds (e.g. an additional THD allocated memory buffer
|
||||
not overlapping with the current query text).
|
||||
|
||||
We'll add more flags here eventually, to know if the string has, e.g.:
|
||||
- multi-byte characters
|
||||
- bad byte sequences
|
||||
- backslash escapes: 'a\nb'
|
||||
- separator escapes: 'a''b'
|
||||
and reuse the original query fragments instead of making the string
|
||||
copy too early, in Lex_input_stream::get_text().
|
||||
This will allow to avoid unnecessary copying, as well as
|
||||
@ -50,9 +54,30 @@
|
||||
*/
|
||||
struct Lex_string_with_metadata_st: public LEX_CSTRING
|
||||
{
|
||||
private:
|
||||
bool m_is_8bit; // True if the string has 8bit characters
|
||||
char m_quote; // Quote character, or 0 if not quoted
|
||||
public:
|
||||
void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
|
||||
void set_metadata(bool is_8bit, char quote)
|
||||
{
|
||||
m_is_8bit= is_8bit;
|
||||
m_quote= quote;
|
||||
}
|
||||
void set(const char *s, size_t len, bool is_8bit, char quote)
|
||||
{
|
||||
str= s;
|
||||
length= len;
|
||||
set_metadata(is_8bit, quote);
|
||||
}
|
||||
void set(const LEX_CSTRING *s, bool is_8bit, char quote)
|
||||
{
|
||||
((LEX_CSTRING &)*this)= *s;
|
||||
set_metadata(is_8bit, quote);
|
||||
}
|
||||
bool is_8bit() const { return m_is_8bit; }
|
||||
bool is_quoted() const { return m_quote != '\0'; }
|
||||
char quote() const { return m_quote; }
|
||||
// Get string repertoire by the 8-bit flag and the character set
|
||||
uint repertoire(CHARSET_INFO *cs) const
|
||||
{
|
||||
@ -71,44 +96,27 @@ public:
|
||||
Used to store identifiers in the client character set.
|
||||
Points to a query fragment.
|
||||
*/
|
||||
struct Lex_ident_cli_st: public LEX_CSTRING
|
||||
struct Lex_ident_cli_st: public Lex_string_with_metadata_st
|
||||
{
|
||||
private:
|
||||
bool m_is_8bit;
|
||||
char m_quote;
|
||||
public:
|
||||
void set_keyword(const char *s, size_t len)
|
||||
{
|
||||
str= s;
|
||||
length= len;
|
||||
m_is_8bit= false;
|
||||
m_quote= '\0';
|
||||
set(s, len, false, '\0');
|
||||
}
|
||||
void set_ident(const char *s, size_t len, bool is_8bit)
|
||||
{
|
||||
str= s;
|
||||
length= len;
|
||||
m_is_8bit= is_8bit;
|
||||
m_quote= '\0';
|
||||
set(s, len, is_8bit, '\0');
|
||||
}
|
||||
void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote)
|
||||
{
|
||||
str= s;
|
||||
length= len;
|
||||
m_is_8bit= is_8bit;
|
||||
m_quote= quote;
|
||||
set(s, len, is_8bit, quote);
|
||||
}
|
||||
void set_unquoted(const LEX_CSTRING *s, bool is_8bit)
|
||||
{
|
||||
((LEX_CSTRING &)*this)= *s;
|
||||
m_is_8bit= is_8bit;
|
||||
m_quote= '\0';
|
||||
set(s, is_8bit, '\0');
|
||||
}
|
||||
const char *pos() const { return str - is_quoted(); }
|
||||
const char *end() const { return str + length + is_quoted(); }
|
||||
bool is_quoted() const { return m_quote != '\0'; }
|
||||
bool is_8bit() const { return m_is_8bit; }
|
||||
char quote() const { return m_quote; }
|
||||
};
|
||||
|
||||
|
||||
@ -2453,7 +2461,7 @@ public:
|
||||
void body_utf8_append(const char *ptr);
|
||||
void body_utf8_append(const char *ptr, const char *end_ptr);
|
||||
void body_utf8_append_ident(THD *thd,
|
||||
const LEX_CSTRING *txt,
|
||||
const Lex_string_with_metadata_st *txt,
|
||||
const char *end_ptr);
|
||||
void body_utf8_append_escape(THD *thd,
|
||||
const LEX_CSTRING *txt,
|
||||
|
@ -15170,57 +15170,26 @@ IDENT_sys:
|
||||
TEXT_STRING_sys:
|
||||
TEXT_STRING
|
||||
{
|
||||
if (thd->charset_is_system_charset)
|
||||
$$= $1;
|
||||
else
|
||||
{
|
||||
LEX_STRING to;
|
||||
if (unlikely(thd->convert_string(&to, system_charset_info,
|
||||
$1.str, $1.length,
|
||||
thd->charset())))
|
||||
MYSQL_YYABORT;
|
||||
$$.str= to.str;
|
||||
$$.length= to.length;
|
||||
}
|
||||
if (thd->make_text_string_sys(&$$, &$1))
|
||||
MYSQL_YYABORT;
|
||||
}
|
||||
;
|
||||
|
||||
TEXT_STRING_literal:
|
||||
TEXT_STRING
|
||||
{
|
||||
if (thd->charset_is_collation_connection)
|
||||
$$= $1;
|
||||
else
|
||||
{
|
||||
LEX_STRING to;
|
||||
if (unlikely(thd->convert_string(&to,
|
||||
thd->variables.collation_connection,
|
||||
$1.str, $1.length,
|
||||
thd->charset())))
|
||||
MYSQL_YYABORT;
|
||||
$$.str= to.str;
|
||||
$$.length= to.length;
|
||||
}
|
||||
if (thd->make_text_string_connection(&$$, &$1))
|
||||
MYSQL_YYABORT;
|
||||
}
|
||||
;
|
||||
|
||||
TEXT_STRING_filesystem:
|
||||
TEXT_STRING
|
||||
{
|
||||
if (thd->charset_is_character_set_filesystem)
|
||||
$$= $1;
|
||||
else
|
||||
{
|
||||
LEX_STRING to;
|
||||
if (unlikely(thd->convert_string(&to,
|
||||
thd->variables.character_set_filesystem,
|
||||
$1.str, $1.length,
|
||||
thd->charset())))
|
||||
MYSQL_YYABORT;
|
||||
$$.str= to.str;
|
||||
$$.length= to.length;
|
||||
}
|
||||
if (thd->make_text_string_filesystem(&$$, &$1))
|
||||
MYSQL_YYABORT;
|
||||
}
|
||||
;
|
||||
|
||||
ident_table_alias:
|
||||
IDENT_sys
|
||||
|
@ -14919,56 +14919,24 @@ IDENT_sys:
|
||||
TEXT_STRING_sys:
|
||||
TEXT_STRING
|
||||
{
|
||||
if (thd->charset_is_system_charset)
|
||||
$$= $1;
|
||||
else
|
||||
{
|
||||
LEX_STRING to;
|
||||
if (unlikely(thd->convert_string(&to, system_charset_info,
|
||||
$1.str, $1.length,
|
||||
thd->charset())))
|
||||
MYSQL_YYABORT;
|
||||
$$.str= to.str;
|
||||
$$.length= to.length;
|
||||
}
|
||||
if (thd->make_text_string_sys(&$$, &$1))
|
||||
MYSQL_YYABORT;
|
||||
}
|
||||
;
|
||||
|
||||
TEXT_STRING_literal:
|
||||
TEXT_STRING
|
||||
{
|
||||
if (thd->charset_is_collation_connection)
|
||||
$$= $1;
|
||||
else
|
||||
{
|
||||
LEX_STRING to;
|
||||
if (unlikely(thd->convert_string(&to,
|
||||
thd->variables.collation_connection,
|
||||
$1.str, $1.length,
|
||||
thd->charset())))
|
||||
MYSQL_YYABORT;
|
||||
$$.str= to.str;
|
||||
$$.length= to.length;
|
||||
}
|
||||
if (thd->make_text_string_connection(&$$, &$1))
|
||||
MYSQL_YYABORT;
|
||||
}
|
||||
;
|
||||
|
||||
TEXT_STRING_filesystem:
|
||||
TEXT_STRING
|
||||
{
|
||||
if (thd->charset_is_character_set_filesystem)
|
||||
$$= $1;
|
||||
else
|
||||
{
|
||||
LEX_STRING to;
|
||||
if (unlikely(thd->convert_string(&to,
|
||||
thd->variables.character_set_filesystem,
|
||||
$1.str, $1.length,
|
||||
thd->charset())))
|
||||
MYSQL_YYABORT;
|
||||
$$.str= to.str;
|
||||
$$.length= to.length;
|
||||
}
|
||||
if (thd->make_text_string_filesystem(&$$, &$1))
|
||||
MYSQL_YYABORT;
|
||||
}
|
||||
;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user