diff --git a/sql/sql_class.h b/sql/sql_class.h index 49b6267c656..74d00b96ca2 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3130,8 +3130,10 @@ public: is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE. */ - bool charset_is_system_charset, charset_is_collation_connection; +private: + bool charset_is_system_charset, charset_is_collation_connection; bool charset_is_character_set_filesystem; +public: bool enable_slow_log; /* Enable slow log for current statement */ bool abort_on_warning; bool got_warning; /* Set on call to push_warning() */ @@ -3706,6 +3708,25 @@ public: bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, const char *from, size_t from_length, CHARSET_INFO *from_cs); + bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs, + const char *from, size_t from_length, + CHARSET_INFO *from_cs) + { + LEX_STRING tmp; + bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs); + to->str= tmp.str; + to->length= tmp.length; + return rc; + } + bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs, + const LEX_CSTRING *from, CHARSET_INFO *fromcs, + bool simple_copy_is_possible) + { + if (!simple_copy_is_possible) + return unlikely(convert_string(to, tocs, from->str, from->length, fromcs)); + *to= *from; + return false; + } /* Convert a strings between character sets. Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally. @@ -3767,6 +3788,24 @@ public: Item_basic_constant *make_string_literal_nchar(const Lex_string_with_metadata_st &str); Item_basic_constant *make_string_literal_charset(const Lex_string_with_metadata_st &str, CHARSET_INFO *cs); + bool make_text_string_sys(LEX_CSTRING *to, + const Lex_string_with_metadata_st *from) + { + return convert_string(to, system_charset_info, + from, charset(), charset_is_system_charset); + } + bool make_text_string_connection(LEX_CSTRING *to, + const Lex_string_with_metadata_st *from) + { + return convert_string(to, variables.collation_connection, + from, charset(), charset_is_collation_connection); + } + bool make_text_string_filesystem(LEX_CSTRING *to, + const Lex_string_with_metadata_st *from) + { + return convert_string(to, variables.character_set_filesystem, + from, charset(), charset_is_character_set_filesystem); + } void add_changed_table(TABLE *table); void add_changed_table(const char *key, size_t key_length); CHANGED_TABLE_LIST * changed_table_dup(const char *key, size_t key_length); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 35cf57a283f..95a59c341d5 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -416,32 +416,18 @@ void Lex_input_stream::body_utf8_append(const char *ptr) operation. */ -void Lex_input_stream::body_utf8_append_ident(THD *thd, - const LEX_CSTRING *txt, - const char *end_ptr) +void +Lex_input_stream::body_utf8_append_ident(THD *thd, + const Lex_string_with_metadata_st *txt, + const char *end_ptr) { if (!m_cpp_utf8_processed_ptr) return; LEX_CSTRING utf_txt; - CHARSET_INFO *txt_cs= thd->charset(); - - if (!my_charset_same(txt_cs, &my_charset_utf8_general_ci)) - { - LEX_STRING to; - thd->convert_string(&to, - &my_charset_utf8_general_ci, - txt->str, (uint) txt->length, - txt_cs); - utf_txt.str= to.str; - utf_txt.length= to.length; - - } - else - utf_txt= *txt; + thd->make_text_string_sys(&utf_txt, txt); // QQ: check return value? /* NOTE: utf_txt.length is in bytes, not in symbols. */ - memcpy(m_body_utf8_ptr, utf_txt.str, utf_txt.length); m_body_utf8_ptr += utf_txt.length; *m_body_utf8_ptr= 0; @@ -1043,13 +1029,13 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep, uchar c; uint found_escape=0; CHARSET_INFO *cs= m_thd->charset(); + bool is_8bit= false; - dst->set_8bit(false); while (! eof()) { c= yyGet(); if (c & 0x80) - dst->set_8bit(true); + is_8bit= true; #ifdef USE_MB { int l; @@ -1093,23 +1079,24 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep, if (!(to= (char*) m_thd->alloc((uint) (end - str) + 1))) { - dst->str= ""; // Sql_alloc has set error flag - dst->length= 0; - return true; + dst->set(&empty_clex_str, 0, '\0'); + return true; // Sql_alloc has set error flag } - dst->str= to; m_cpp_text_start= m_cpp_tok_start + pre_skip; m_cpp_text_end= get_cpp_ptr() - post_skip; if (!found_escape) { - memcpy(to, str, dst->length= (end - str)); - to[dst->length]= 0; + size_t len= (end - str); + memcpy(to, str, len); + to[len]= '\0'; + dst->set(to, len, is_8bit, '\0'); } else { - dst->length= unescape(cs, to, str, end, sep); + size_t len= unescape(cs, to, str, end, sep); + dst->set(to, len, is_8bit, '\0'); } return false; } diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 9e476f3a5eb..b78fbcf9d18 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -37,12 +37,16 @@ /** - A string with metadata. + A string with metadata. Usually points to a string in the client + character set, but unlike Lex_ident_cli_st (see below) it does not + necessarily point to a query fragment. It can also point to memory + of other kinds (e.g. an additional THD allocated memory buffer + not overlapping with the current query text). + We'll add more flags here eventually, to know if the string has, e.g.: - multi-byte characters - bad byte sequences - backslash escapes: 'a\nb' - - separator escapes: 'a''b' and reuse the original query fragments instead of making the string copy too early, in Lex_input_stream::get_text(). This will allow to avoid unnecessary copying, as well as @@ -50,9 +54,30 @@ */ struct Lex_string_with_metadata_st: public LEX_CSTRING { +private: bool m_is_8bit; // True if the string has 8bit characters + char m_quote; // Quote character, or 0 if not quoted public: void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; } + void set_metadata(bool is_8bit, char quote) + { + m_is_8bit= is_8bit; + m_quote= quote; + } + void set(const char *s, size_t len, bool is_8bit, char quote) + { + str= s; + length= len; + set_metadata(is_8bit, quote); + } + void set(const LEX_CSTRING *s, bool is_8bit, char quote) + { + ((LEX_CSTRING &)*this)= *s; + set_metadata(is_8bit, quote); + } + bool is_8bit() const { return m_is_8bit; } + bool is_quoted() const { return m_quote != '\0'; } + char quote() const { return m_quote; } // Get string repertoire by the 8-bit flag and the character set uint repertoire(CHARSET_INFO *cs) const { @@ -71,44 +96,27 @@ public: Used to store identifiers in the client character set. Points to a query fragment. */ -struct Lex_ident_cli_st: public LEX_CSTRING +struct Lex_ident_cli_st: public Lex_string_with_metadata_st { -private: - bool m_is_8bit; - char m_quote; public: void set_keyword(const char *s, size_t len) { - str= s; - length= len; - m_is_8bit= false; - m_quote= '\0'; + set(s, len, false, '\0'); } void set_ident(const char *s, size_t len, bool is_8bit) { - str= s; - length= len; - m_is_8bit= is_8bit; - m_quote= '\0'; + set(s, len, is_8bit, '\0'); } void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote) { - str= s; - length= len; - m_is_8bit= is_8bit; - m_quote= quote; + set(s, len, is_8bit, quote); } void set_unquoted(const LEX_CSTRING *s, bool is_8bit) { - ((LEX_CSTRING &)*this)= *s; - m_is_8bit= is_8bit; - m_quote= '\0'; + set(s, is_8bit, '\0'); } const char *pos() const { return str - is_quoted(); } const char *end() const { return str + length + is_quoted(); } - bool is_quoted() const { return m_quote != '\0'; } - bool is_8bit() const { return m_is_8bit; } - char quote() const { return m_quote; } }; @@ -2453,7 +2461,7 @@ public: void body_utf8_append(const char *ptr); void body_utf8_append(const char *ptr, const char *end_ptr); void body_utf8_append_ident(THD *thd, - const LEX_CSTRING *txt, + const Lex_string_with_metadata_st *txt, const char *end_ptr); void body_utf8_append_escape(THD *thd, const LEX_CSTRING *txt, diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index add67e1fd3e..cd79d6cd2c6 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -15170,57 +15170,26 @@ IDENT_sys: TEXT_STRING_sys: TEXT_STRING { - if (thd->charset_is_system_charset) - $$= $1; - else - { - LEX_STRING to; - if (unlikely(thd->convert_string(&to, system_charset_info, - $1.str, $1.length, - thd->charset()))) - MYSQL_YYABORT; - $$.str= to.str; - $$.length= to.length; - } + if (thd->make_text_string_sys(&$$, &$1)) + MYSQL_YYABORT; } ; TEXT_STRING_literal: TEXT_STRING { - if (thd->charset_is_collation_connection) - $$= $1; - else - { - LEX_STRING to; - if (unlikely(thd->convert_string(&to, - thd->variables.collation_connection, - $1.str, $1.length, - thd->charset()))) - MYSQL_YYABORT; - $$.str= to.str; - $$.length= to.length; - } + if (thd->make_text_string_connection(&$$, &$1)) + MYSQL_YYABORT; } ; TEXT_STRING_filesystem: TEXT_STRING { - if (thd->charset_is_character_set_filesystem) - $$= $1; - else - { - LEX_STRING to; - if (unlikely(thd->convert_string(&to, - thd->variables.character_set_filesystem, - $1.str, $1.length, - thd->charset()))) - MYSQL_YYABORT; - $$.str= to.str; - $$.length= to.length; - } + if (thd->make_text_string_filesystem(&$$, &$1)) + MYSQL_YYABORT; } + ; ident_table_alias: IDENT_sys diff --git a/sql/sql_yacc_ora.yy b/sql/sql_yacc_ora.yy index c40f0ddb307..a3e76c6b80a 100644 --- a/sql/sql_yacc_ora.yy +++ b/sql/sql_yacc_ora.yy @@ -14919,56 +14919,24 @@ IDENT_sys: TEXT_STRING_sys: TEXT_STRING { - if (thd->charset_is_system_charset) - $$= $1; - else - { - LEX_STRING to; - if (unlikely(thd->convert_string(&to, system_charset_info, - $1.str, $1.length, - thd->charset()))) - MYSQL_YYABORT; - $$.str= to.str; - $$.length= to.length; - } + if (thd->make_text_string_sys(&$$, &$1)) + MYSQL_YYABORT; } ; TEXT_STRING_literal: TEXT_STRING { - if (thd->charset_is_collation_connection) - $$= $1; - else - { - LEX_STRING to; - if (unlikely(thd->convert_string(&to, - thd->variables.collation_connection, - $1.str, $1.length, - thd->charset()))) - MYSQL_YYABORT; - $$.str= to.str; - $$.length= to.length; - } + if (thd->make_text_string_connection(&$$, &$1)) + MYSQL_YYABORT; } ; TEXT_STRING_filesystem: TEXT_STRING { - if (thd->charset_is_character_set_filesystem) - $$= $1; - else - { - LEX_STRING to; - if (unlikely(thd->convert_string(&to, - thd->variables.character_set_filesystem, - $1.str, $1.length, - thd->charset()))) - MYSQL_YYABORT; - $$.str= to.str; - $$.length= to.length; - } + if (thd->make_text_string_filesystem(&$$, &$1)) + MYSQL_YYABORT; } ;