diff --git a/mysql-test/t/ctype_many.test b/mysql-test/t/ctype_many.test index 1f9f9cf99cc..ddb7034be10 100644 --- a/mysql-test/t/ctype_many.test +++ b/mysql-test/t/ctype_many.test @@ -2,6 +2,8 @@ DROP TABLE IF EXISTS t1; --enable_warnings +SET NAMES latin1; + CREATE TABLE t1 ( comment CHAR(32) ASCII NOT NULL, koi8_ru_f CHAR(32) CHARACTER SET koi8r NOT NULL @@ -135,10 +137,12 @@ INSERT INTO t1 (koi8_ru_f,comment) VALUES (_koi8r' INSERT INTO t1 (koi8_ru_f,comment) VALUES (_koi8r'à','CYR CAPIT YU'); INSERT INTO t1 (koi8_ru_f,comment) VALUES (_koi8r'ñ','CYR CAPIT YA'); -SELECT CONVERT(koi8_ru_f USING utf8),MIN(comment),COUNT(*) FROM t1 GROUP BY 1; +SET NAMES utf8; +SELECT koi8_ru_f,MIN(comment),COUNT(*) FROM t1 GROUP BY 1; ALTER TABLE t1 ADD utf8_f CHAR(32) CHARACTER SET utf8 NOT NULL; UPDATE t1 SET utf8_f=CONVERT(koi8_ru_f USING utf8); +SET NAMES koi8r; SELECT * FROM t1; ALTER TABLE t1 ADD bin_f CHAR(32) BYTE NOT NULL; @@ -161,6 +165,7 @@ FROM t1 t11,t1 t12 WHERE t11.koi8_ru_f=CONVERT(t12.utf8_f USING koi8r) ORDER BY t12.utf8_f,t11.comment,t12.comment; +SET NAMES utf8; ALTER TABLE t1 ADD ucs2_f CHAR(32) CHARACTER SET ucs2; ALTER TABLE t1 CHANGE ucs2_f ucs2_f CHAR(32) UNICODE NOT NULL; @@ -193,5 +198,8 @@ UPDATE t1 SET greek_f=CONVERT(ucs2_f USING greek) WHERE comment LIKE 'GRE%'; UPDATE t1 SET armscii8_f=CONVERT(ucs2_f USING armscii8) WHERE comment LIKE 'ARM%'; UPDATE t1 SET utf8_f=CONVERT(ucs2_f USING utf8) WHERE utf8_f=''; UPDATE t1 SET ucs2_f=CONVERT(utf8_f USING ucs2) WHERE ucs2_f=''; +SELECT * FROM t1; +SET NAMES 'binary'; +SELECT * FROM t1; SELECT min(comment),count(*) FROM t1 GROUP BY ucs2_f; DROP TABLE t1; diff --git a/mysys/charset.c b/mysys/charset.c index 87fb7846553..0b2fd514019 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -324,7 +324,7 @@ static int add_collation(CHARSET_INFO *cs) if (cs->primary_number == cs->number) cs->state |= MY_CS_PRIMARY; - if (cs->primary_number == cs->number) + if (cs->binary_number == cs->number) cs->state |= MY_CS_BINSORT; if (!(all_charsets[cs->number]->state & MY_CS_COMPILED)) diff --git a/sql/item.cc b/sql/item.cc index 28fcabfe1b8..6dbe026515d 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -90,19 +90,23 @@ bool Item::check_cols(uint c) return 0; } -void Item::set_name(const char *str,uint length) +void Item::set_name(const char *str,uint length, CHARSET_INFO *cs) { if (!length) - name= (char*) str; // Used by AS - else - { - while (length && !my_isgraph(system_charset_info,*str)) - { // Fix problem with yacc - length--; - str++; - } - name=sql_strmake(str,min(length,MAX_FIELD_WIDTH)); + length= str ? strlen(str) : 0; + while (length && !my_isgraph(cs,*str)) + { // Fix problem with yacc + length--; + str++; } + if (!my_charset_same(cs, system_charset_info)) + { + String tmp; + tmp.copy(str, length, cs, system_charset_info); + name=sql_strmake(tmp.ptr(),min(tmp.length(),MAX_FIELD_WIDTH)); + } + else + name=sql_strmake(str,min(length,MAX_FIELD_WIDTH)); } /* diff --git a/sql/item.h b/sql/item.h index 7b31f03f6ac..2bb22d47083 100644 --- a/sql/item.h +++ b/sql/item.h @@ -63,7 +63,7 @@ public: */ Item(THD *thd, Item &item); virtual ~Item() { name=0; } /*lint -e1509 */ - void set_name(const char *str,uint length=0); + void set_name(const char *str,uint length, CHARSET_INFO *cs); void init_make_field(Send_field *tmp_field,enum enum_field_types type); virtual void make_field(Send_field *field); virtual bool fix_fields(THD *, struct st_table_list *, Item **); @@ -359,7 +359,7 @@ public: str_value.set(str,length,cs); coercibility= coer; max_length=length; - name=(char*) str_value.ptr(); + set_name(str, length, cs); decimals=NOT_FIXED_DEC; } Item_string(const char *name_par, const char *str, uint length, @@ -368,7 +368,7 @@ public: str_value.set(str,length,cs); coercibility= coer; max_length=length; - name=(char*) name_par; + set_name(name_par,0,cs); decimals=NOT_FIXED_DEC; } ~Item_string() {} diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 301e5b4454f..998b38513a9 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -124,10 +124,10 @@ void Item_bool_func2::fix_length_and_dec() } } } - set_cmp_func(); /* QQ: COERCIBILITY */ - cmp_charset= (args[0]->binary() || args[1]->binary()) ? - &my_charset_bin : args[0]->charset(); + cmp_charset= (args[0]->binary() || args[1]->binary()) ? + &my_charset_bin : args[0]->charset(); + set_cmp_func(); } diff --git a/sql/item_func.cc b/sql/item_func.cc index 8fb97dc2873..2099a5a4ccf 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -2641,7 +2641,7 @@ Item *get_system_var(enum_var_type var_type, LEX_STRING name) pos=strmov(pos,"global."); memcpy(pos, var->name, var->name_length+1); // set_name() will allocate the name - item->set_name(buff,(uint) (pos-buff)+var->name_length); + item->set_name(buff,(uint) (pos-buff)+var->name_length, system_charset_info); return item; } @@ -2656,9 +2656,9 @@ Item *get_system_var(enum_var_type var_type, const char *var_name, uint length, var= find_sys_var(var_name, length); DBUG_ASSERT(var != 0); if (!(item=var->item(thd, var_type))) - return 0; // Impossible + return 0; // Impossible thd->lex.uncacheable(); - item->set_name(item_name); // Will use original name + item->set_name(item_name, 0, system_charset_info); // Will use original name return item; } diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 80d85e565e7..f68a0dc9e73 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -2015,65 +2015,14 @@ String *Item_func_conv::val_str(String *str) String *Item_func_conv_charset::val_str(String *str) { - my_wc_t wc; - int cnvres; - const uchar *s, *se; - uchar *d, *d0, *de; - uint32 dmaxlen; String *arg= args[0]->val_str(str); - CHARSET_INFO *from,*to; - if (!arg) { null_value=1; return 0; } - null_value=0; - - from=arg->charset(); - to=conv_charset; - - s=(const uchar*)arg->ptr(); - se=s+arg->length(); - - dmaxlen=arg->length()*to->mbmaxlen+1; - str->alloc(dmaxlen); - d0=d=(unsigned char*)str->ptr(); - de=d+dmaxlen; - - while (1) - { - cnvres=from->mb_wc(from,&wc,s,se); - if (cnvres>0) - { - s+=cnvres; - } - else if (cnvres==MY_CS_ILSEQ) - { - s++; - wc='?'; - } - else - break; - -outp: - cnvres=to->wc_mb(to,wc,d,de); - if (cnvres>0) - { - d+=cnvres; - } - else if (cnvres==MY_CS_ILUNI && wc!='?') - { - wc='?'; - goto outp; - } - else - break; - }; - - str->length((uint32) (d-d0)); - str->set_charset(to); - return str; + null_value= str->copy(arg->ptr(),arg->length(),arg->charset(),conv_charset); + return null_value ? 0 : str; } void Item_func_conv_charset::fix_length_and_dec() diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e01af4de543..7f9c39e7e8f 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -2084,7 +2084,6 @@ static int init_common_variables(const char *conf_file_name, int argc, #endif if (!(default_charset_info= get_charset_by_name(sys_charset.value, MYF(MY_WME)))) return 1; - system_charset_info= default_charset_info; charsets_list= list_charsets(MYF(MY_CS_COMPILED | MY_CS_CONFIG)); if (use_temp_pool && bitmap_init(&temp_pool,1024,1)) diff --git a/sql/protocol.cc b/sql/protocol.cc index c7ce38eadac..9fb32daf57f 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -714,7 +714,6 @@ bool Protocol_simple::store(const char *from, uint length, CHARSET_INFO *cs) #endif if (convert) return convert_str(from, length); -#if 0 if (cs != this->thd->charset()) { String tmp; @@ -722,7 +721,6 @@ bool Protocol_simple::store(const char *from, uint length, CHARSET_INFO *cs) return net_store_data(tmp.ptr(), tmp.length()); } else -#endif return net_store_data(from, length); } @@ -813,11 +811,18 @@ bool Protocol_simple::store(Field *field) field_pos++; #endif char buff[MAX_FIELD_WIDTH]; - String tmp(buff,sizeof(buff), &my_charset_bin); - field->val_str(&tmp,&tmp); + String tmp1(buff,sizeof(buff), &my_charset_bin); + field->val_str(&tmp1,&tmp1); if (convert) - return convert_str(tmp.ptr(), tmp.length()); - return net_store_data(tmp.ptr(), tmp.length()); + return convert_str(tmp1.ptr(), tmp1.length()); + if (field->charset() != this->thd->charset()) + { + String tmp; + tmp.copy(tmp1.ptr(), tmp1.length(), tmp1.charset(), this->thd->charset()); + return net_store_data(tmp.ptr(), tmp.length()); + } + else + return net_store_data(tmp1.ptr(), tmp1.length()); } diff --git a/sql/sql_string.cc b/sql/sql_string.cc index d796c53fb52..ae3875834f0 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -28,7 +28,7 @@ #include #endif -CHARSET_INFO *system_charset_info= &my_charset_latin1; +CHARSET_INFO *system_charset_info= &my_charset_utf8; extern gptr sql_alloc(unsigned size); extern void sql_element_free(void *ptr); static uint32 diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 7db398e7810..a526b8d4e06 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -558,7 +558,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize); %type IDENT TEXT_STRING REAL_NUM FLOAT_NUM NUM LONG_NUM HEX_NUM LEX_HOSTNAME ULONGLONG_NUM field_ident select_alias ident ident_or_text - UNDERSCORE_CHARSET + UNDERSCORE_CHARSET IDENT_sys TEXT_STRING_sys TEXT_STRING_db %type opt_table_alias @@ -772,22 +772,22 @@ master_defs: | master_defs ',' master_def; master_def: - MASTER_HOST_SYM EQ TEXT_STRING + MASTER_HOST_SYM EQ TEXT_STRING_sys { Lex->mi.host = $3.str; } | - MASTER_USER_SYM EQ TEXT_STRING + MASTER_USER_SYM EQ TEXT_STRING_sys { Lex->mi.user = $3.str; } | - MASTER_PASSWORD_SYM EQ TEXT_STRING + MASTER_PASSWORD_SYM EQ TEXT_STRING_sys { Lex->mi.password = $3.str; } | - MASTER_LOG_FILE_SYM EQ TEXT_STRING + MASTER_LOG_FILE_SYM EQ TEXT_STRING_sys { Lex->mi.log_file_name = $3.str; } @@ -807,7 +807,7 @@ master_def: Lex->mi.connect_retry = $3; } | - RELAY_LOG_FILE_SYM EQ TEXT_STRING + RELAY_LOG_FILE_SYM EQ TEXT_STRING_sys { Lex->mi.relay_log_name = $3.str; } @@ -877,14 +877,14 @@ create: lex->name=$4.str; lex->create_info.options=$3; } - | CREATE udf_func_type UDF_SYM IDENT + | CREATE udf_func_type UDF_SYM IDENT_sys { LEX *lex=Lex; lex->sql_command = SQLCOM_CREATE_FUNCTION; lex->udf.name = $4; lex->udf.type= $2; } - UDF_RETURNS_SYM udf_type UDF_SONAME_SYM TEXT_STRING + UDF_RETURNS_SYM udf_type UDF_SONAME_SYM TEXT_STRING_sys { LEX *lex=Lex; lex->udf.returns=(Item_result) $7; @@ -966,8 +966,8 @@ create_table_option: | MAX_ROWS opt_equal ulonglong_num { Lex->create_info.max_rows= $3; Lex->create_info.used_fields|= HA_CREATE_USED_MAX_ROWS;} | MIN_ROWS opt_equal ulonglong_num { Lex->create_info.min_rows= $3; Lex->create_info.used_fields|= HA_CREATE_USED_MIN_ROWS;} | AVG_ROW_LENGTH opt_equal ULONG_NUM { Lex->create_info.avg_row_length=$3; Lex->create_info.used_fields|= HA_CREATE_USED_AVG_ROW_LENGTH;} - | PASSWORD opt_equal TEXT_STRING { Lex->create_info.password=$3.str; } - | COMMENT_SYM opt_equal TEXT_STRING { Lex->create_info.comment=$3.str; } + | PASSWORD opt_equal TEXT_STRING_sys { Lex->create_info.password=$3.str; } + | COMMENT_SYM opt_equal TEXT_STRING_sys { Lex->create_info.comment=$3.str; } | AUTO_INC opt_equal ulonglong_num { Lex->create_info.auto_increment_value=$3; Lex->create_info.used_fields|= HA_CREATE_USED_AUTO;} | PACK_KEYS_SYM opt_equal ULONG_NUM { Lex->create_info.table_options|= $3 ? HA_OPTION_PACK_KEYS : HA_OPTION_NO_PACK_KEYS; Lex->create_info.used_fields|= HA_CREATE_USED_PACK_KEYS;} | PACK_KEYS_SYM opt_equal DEFAULT { Lex->create_info.table_options&= ~(HA_OPTION_PACK_KEYS | HA_OPTION_NO_PACK_KEYS); Lex->create_info.used_fields|= HA_CREATE_USED_PACK_KEYS;} @@ -1001,8 +1001,9 @@ create_table_option: Lex->create_info.used_fields|= HA_CREATE_USED_CHARSET; } | INSERT_METHOD opt_equal merge_insert_types { Lex->create_info.merge_insert_method= $3; Lex->create_info.used_fields|= HA_CREATE_USED_INSERT_METHOD;} - | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING { Lex->create_info.data_file_name= $4.str; } - | INDEX DIRECTORY_SYM opt_equal TEXT_STRING { Lex->create_info.index_file_name= $4.str; }; + | DATA_SYM DIRECTORY_SYM opt_equal TEXT_STRING_sys + { Lex->create_info.data_file_name= $4.str; } + | INDEX DIRECTORY_SYM opt_equal TEXT_STRING_sys { Lex->create_info.index_file_name= $4.str; }; table_types: ISAM_SYM { $$= DB_TYPE_ISAM; } @@ -1637,7 +1638,7 @@ restore: { Lex->sql_command = SQLCOM_RESTORE_TABLE; } - table_list FROM TEXT_STRING + table_list FROM TEXT_STRING_sys { Lex->backup_dir = $6.str; }; @@ -1647,7 +1648,7 @@ backup: { Lex->sql_command = SQLCOM_BACKUP_TABLE; } - table_list TO_SYM TEXT_STRING + table_list TO_SYM TEXT_STRING_sys { Lex->backup_dir = $6.str; }; @@ -1897,9 +1898,9 @@ select_item: if (add_item_to_list(YYTHD, $2)) YYABORT; if ($4.str) - $2->set_name($4.str); + $2->set_name($4.str,$4.length,system_charset_info); else if (!$2->name) - $2->set_name($1,(uint) ($3 - $1)); + $2->set_name($1,(uint) ($3 - $1), YYTHD->charset()); }; remember_name: @@ -1913,11 +1914,12 @@ select_item2: | expr { $$=$1; }; select_alias: - { $$.str=0;} - | AS ident { $$=$2; } - | AS TEXT_STRING { $$=$2; } - | ident { $$=$1; } - | TEXT_STRING { $$=$1; }; + /* empty */ { $$.str=0;} + | AS ident { $$=$2; } + | AS TEXT_STRING_sys { $$=$2; } + | ident { $$=$1; } + | TEXT_STRING_sys { $$=$1; } + ; optional_braces: /* empty */ {} @@ -2219,9 +2221,9 @@ simple_expr: Lex->uncacheable();; } | ENCRYPT '(' expr ',' expr ')' { $$= new Item_func_encrypt($3,$5); } - | DECODE_SYM '(' expr ',' TEXT_STRING ')' + | DECODE_SYM '(' expr ',' TEXT_STRING_db ')' { $$= new Item_func_decode($3,$5.str); } - | ENCODE_SYM '(' expr ',' TEXT_STRING ')' + | ENCODE_SYM '(' expr ',' TEXT_STRING_db ')' { $$= new Item_func_encode($3,$5.str); } | DES_DECRYPT_SYM '(' expr ')' { $$= new Item_func_des_decrypt($3); } @@ -2818,8 +2820,8 @@ having_clause: ; opt_escape: - ESCAPE_SYM TEXT_STRING { $$= $2.str; } - | /* empty */ { $$= (char*) "\\"; }; + ESCAPE_SYM TEXT_STRING_db { $$= $2.str; } + | /* empty */ { $$= (char*) "\\"; }; /* @@ -3015,7 +3017,7 @@ procedure_item: if (add_proc_to_list(lex->thd, $2)) YYABORT; if (!$2->name) - $2->set_name($1,(uint) ((char*) lex->tok_end - $1)); + $2->set_name($1,(uint) ((char*) lex->tok_end - $1), YYTHD->charset()); } ; @@ -3044,7 +3046,7 @@ select_var_ident: '@' ident_or_text ; into: - INTO OUTFILE TEXT_STRING + INTO OUTFILE TEXT_STRING_sys { LEX *lex=Lex; if (!lex->describe) @@ -3056,7 +3058,7 @@ into: } } opt_field_term opt_line_term - | INTO DUMPFILE TEXT_STRING + | INTO DUMPFILE TEXT_STRING_sys { LEX *lex=Lex; if (!lex->describe) @@ -3118,7 +3120,7 @@ drop: lex->drop_if_exists=$3; lex->name=$4.str; } - | DROP UDF_SYM IDENT + | DROP UDF_SYM IDENT_sys { LEX *lex=Lex; lex->sql_command = SQLCOM_DROP_FUNCTION; @@ -3478,7 +3480,7 @@ show_param: YYABORT; } | NEW_SYM MASTER_SYM FOR_SYM SLAVE WITH MASTER_LOG_FILE_SYM EQ - TEXT_STRING AND MASTER_LOG_POS_SYM EQ ulonglong_num + TEXT_STRING_sys AND MASTER_LOG_POS_SYM EQ ulonglong_num AND MASTER_SERVER_ID_SYM EQ ULONG_NUM { @@ -3593,7 +3595,7 @@ from_or_in: binlog_in: /* empty */ { Lex->mi.log_file_name = 0; } - | IN_SYM TEXT_STRING { Lex->mi.log_file_name = $2.str; }; + | IN_SYM TEXT_STRING_sys { Lex->mi.log_file_name = $2.str; }; binlog_from: /* empty */ { Lex->mi.pos = 4; /* skip magic number */ } @@ -3696,7 +3698,7 @@ purge_options: ; purge_option: - TO_SYM TEXT_STRING + TO_SYM TEXT_STRING_sys { Lex->sql_command = SQLCOM_PURGE; Lex->to_log = $2.str; @@ -3740,7 +3742,7 @@ use: USE_SYM ident /* import, export of files */ -load: LOAD DATA_SYM load_data_lock opt_local INFILE TEXT_STRING +load: LOAD DATA_SYM load_data_lock opt_local INFILE TEXT_STRING_sys { LEX *lex=Lex; lex->sql_command= SQLCOM_LOAD; @@ -3824,17 +3826,21 @@ opt_ignore_lines: /* Common definitions */ text_literal: - TEXT_STRING - { $$ = new Item_string($1.str,$1.length, - YYTHD->variables.thd_charset); } + TEXT_STRING_db + { + THD *thd= YYTHD; + CHARSET_INFO *cs= my_charset_same(thd->charset(),thd->db_charset) ? + thd->charset() : thd->db_charset; + $$ = new Item_string($1.str,$1.length,cs); + } | UNDERSCORE_CHARSET TEXT_STRING { $$ = new Item_string($2.str,$2.length,Lex->charset,Item::COER_IMPLICIT); } - | text_literal TEXT_STRING + | text_literal TEXT_STRING_db { ((Item_string*) $1)->append($2.str,$2.length); }; text_string: - TEXT_STRING - { $$= new String($1.str,$1.length,YYTHD->variables.thd_charset); } + TEXT_STRING_db + { $$= new String($1.str,$1.length,YYTHD->db_charset); } | HEX_NUM { Item *tmp = new Item_varbinary($1.str,$1.length); @@ -3957,8 +3963,63 @@ table_ident: | '.' ident { $$=new Table_ident($2);} /* For Delphi */; +IDENT_sys: + IDENT + { + THD *thd= YYTHD; + if (my_charset_same(thd->charset(),system_charset_info)) + { + $$=$1; + } + else + { + String ident; + ident.copy($1.str,$1.length,thd->charset(),system_charset_info); + $$.str= thd->strmake(ident.ptr(),ident.length()); + $$.length= ident.length(); + } + } + ; + +TEXT_STRING_sys: + TEXT_STRING + { + THD *thd= YYTHD; + if (my_charset_same(thd->charset(),system_charset_info)) + { + $$=$1; + } + else + { + String ident; + ident.copy($1.str,$1.length,thd->charset(),system_charset_info); + $$.str= thd->strmake(ident.ptr(),ident.length()); + $$.length= ident.length(); + } + } + ; + +TEXT_STRING_db: + TEXT_STRING + { + THD *thd= YYTHD; + if (my_charset_same(thd->charset(),thd->db_charset)) + { + $$=$1; + } + else + { + String ident; + ident.copy($1.str,$1.length,thd->charset(),thd->db_charset); + $$.str= thd->strmake(ident.ptr(),ident.length()); + $$.length= ident.length(); + } + } + ; + + ident: - IDENT { $$=$1; } + IDENT_sys { $$=$1; } | keyword { LEX *lex= Lex; @@ -3970,9 +4031,9 @@ ident: ; ident_or_text: - ident { $$=$1;} - | TEXT_STRING { $$=$1;} - | LEX_HOSTNAME { $$=$1;}; + ident { $$=$1;} + | TEXT_STRING_sys { $$=$1;} + | LEX_HOSTNAME { $$=$1;}; user: ident_or_text diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 2db00e64c72..897e2846659 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -27,6 +27,10 @@ #define EILSEQ ENOENT #endif +#ifndef HAVE_CHARSET_utf8 +#define HAVE_CHARSET_utf8 +#endif + #ifdef HAVE_CHARSET_utf8 #define HAVE_UNIDATA #endif