From 72d7b12b9c9c5ceffef9fff3adc86c149f57f20f Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 27 Feb 2015 16:26:12 +0400 Subject: [PATCH] Reducing duplicate code and simplifying well formed string copying by adding a new class String_copier. This is a pre-requisite patch for MDEV-6566 and MDEV-6572, to avoid adding more similar code. --- sql/field.cc | 86 +++++----------- sql/field.h | 11 ++ sql/sql_class.cc | 19 ++-- sql/sql_signal.cc | 13 +-- sql/sql_string.cc | 105 ++++++++++---------- sql/sql_string.h | 72 ++++++++++++-- storage/perfschema/table_session_connect.cc | 10 +- 7 files changed, 164 insertions(+), 152 deletions(-) diff --git a/sql/field.cc b/sql/field.cc index fe60a824568..0f062c38ca1 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -6262,10 +6262,9 @@ bool Field_datetimef::get_date(MYSQL_TIME *ltime, ulonglong fuzzydate) SYNOPSIS check_string_copy_error() - field - Field - well_formed_error_pos - where not well formed data was first met - cannot_convert_error_pos - where a not-convertable character was first met - end - end of the string + copier - the conversion status + end - the very end of the source string + that was just copied cs - character set of the string NOTES @@ -6282,30 +6281,25 @@ bool Field_datetimef::get_date(MYSQL_TIME *ltime, ulonglong fuzzydate) TRUE - If an error happened */ -static bool -check_string_copy_error(Field_str *field, - const char *well_formed_error_pos, - const char *cannot_convert_error_pos, - const char *end, - CHARSET_INFO *cs) +bool +Field_longstr::check_string_copy_error(const String_copier *copier, + const char *end, + CHARSET_INFO *cs) { const char *pos; char tmp[32]; - THD *thd; - thd= field->get_thd(); - - if (!(pos= well_formed_error_pos) && - !(pos= cannot_convert_error_pos)) + if (!(pos= copier->most_important_error_pos())) return FALSE; convert_to_printable(tmp, sizeof(tmp), pos, (end - pos), cs, 6); + THD *thd= get_thd(); push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD, ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD), - "string", tmp, field->field_name, + "string", tmp, field_name, thd->get_stmt_da()->current_row_for_warning()); return TRUE; } @@ -6362,20 +6356,15 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs) { ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; uint copy_length; - const char *well_formed_error_pos; - const char *cannot_convert_error_pos; - const char *from_end_pos; + String_copier copier; /* See the comment for Field_long::store(long long) */ DBUG_ASSERT(!table || table->in_use == current_thd); - copy_length= well_formed_copy_nchars(field_charset, + copy_length= copier.well_formed_copy(field_charset, (char*) ptr, field_length, cs, from, length, - field_length / field_charset->mbmaxlen, - &well_formed_error_pos, - &cannot_convert_error_pos, - &from_end_pos); + field_length / field_charset->mbmaxlen); /* Append spaces if the string was shorter than the field. */ if (copy_length < field_length) @@ -6383,11 +6372,7 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs) field_length-copy_length, field_charset->pad_char); - if (check_string_copy_error(this, well_formed_error_pos, - cannot_convert_error_pos, from + length, cs)) - return 2; - - return report_if_important_data(from_end_pos, from + length, FALSE); + return check_conversion_status(&copier, from + length, cs, false); } @@ -6874,29 +6859,19 @@ int Field_varstring::store(const char *from,uint length,CHARSET_INFO *cs) { ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; uint copy_length; - const char *well_formed_error_pos; - const char *cannot_convert_error_pos; - const char *from_end_pos; + String_copier copier; - copy_length= well_formed_copy_nchars(field_charset, + copy_length= copier.well_formed_copy(field_charset, (char*) ptr + length_bytes, field_length, cs, from, length, - field_length / field_charset->mbmaxlen, - &well_formed_error_pos, - &cannot_convert_error_pos, - &from_end_pos); - + field_length / field_charset->mbmaxlen); if (length_bytes == 1) *ptr= (uchar) copy_length; else int2store(ptr, copy_length); - if (check_string_copy_error(this, well_formed_error_pos, - cannot_convert_error_pos, from + length, cs)) - return 2; - - return report_if_important_data(from_end_pos, from + length, TRUE); + return check_conversion_status(&copier, from + length, cs, true); } @@ -7351,9 +7326,8 @@ int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs) { ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; uint copy_length, new_length; - const char *well_formed_error_pos; - const char *cannot_convert_error_pos; - const char *from_end_pos, *tmp; + String_copier copier; + const char *tmp; char buff[STRING_BUFFER_USUAL_SIZE]; String tmpstr(buff,sizeof(buff), &my_charset_bin); @@ -7401,28 +7375,14 @@ int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs) bmove(ptr + packlength, (uchar*) &tmp, sizeof(char*)); return 0; } - /* - "length" is OK as "nchars" argument to well_formed_copy_nchars as this - is never used to limit the length of the data. The cut of long data - is done with the new_length value. - */ - copy_length= well_formed_copy_nchars(field_charset, + copy_length= copier.well_formed_copy(field_charset, (char*) value.ptr(), new_length, - cs, from, length, - length, - &well_formed_error_pos, - &cannot_convert_error_pos, - &from_end_pos); - + cs, from, length); Field_blob::store_length(copy_length); tmp= value.ptr(); bmove(ptr+packlength,(uchar*) &tmp,sizeof(char*)); - if (check_string_copy_error(this, well_formed_error_pos, - cannot_convert_error_pos, from + length, cs)) - return 2; - - return report_if_important_data(from_end_pos, from + length, TRUE); + return check_conversion_status(&copier, from + length, cs, true); oom_error: /* Fatal OOM error */ diff --git a/sql/field.h b/sql/field.h index ba4c6ff0302..cac9a156a95 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1132,6 +1132,17 @@ class Field_longstr :public Field_str protected: int report_if_important_data(const char *ptr, const char *end, bool count_spaces); + bool check_string_copy_error(const String_copier *copier, + const char *end, CHARSET_INFO *cs); + int check_conversion_status(const String_copier *copier, + const char *end, CHARSET_INFO *cs, + bool count_spaces) + { + if (check_string_copy_error(copier, end, cs)) + return 2; + return report_if_important_data(copier->source_end_pos(), + end, count_spaces); + } public: Field_longstr(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg, uchar null_bit_arg, utype unireg_check_arg, diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 7bbc36d6cb5..37060a6e133 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -2986,9 +2986,7 @@ int select_export::send_data(List &items) if (res && !my_charset_same(write_cs, res->charset()) && !my_charset_same(write_cs, &my_charset_bin)) { - const char *well_formed_error_pos; - const char *cannot_convert_error_pos; - const char *from_end_pos; + String_copier copier; const char *error_pos; uint32 bytes; uint64 estimated_bytes= @@ -3001,16 +2999,11 @@ int select_export::send_data(List &items) goto err; } - bytes= well_formed_copy_nchars(write_cs, (char *) cvt_str.ptr(), + bytes= copier.well_formed_copy(write_cs, (char *) cvt_str.ptr(), cvt_str.alloced_length(), - res->charset(), res->ptr(), res->length(), - UINT_MAX32, // copy all input chars, - // i.e. ignore nchars parameter - &well_formed_error_pos, - &cannot_convert_error_pos, - &from_end_pos); - error_pos= well_formed_error_pos ? well_formed_error_pos - : cannot_convert_error_pos; + res->charset(), + res->ptr(), res->length()); + error_pos= copier.most_important_error_pos(); if (error_pos) { char printable_buff[32]; @@ -3023,7 +3016,7 @@ int select_export::send_data(List &items) "string", printable_buff, item->name, static_cast(row_count)); } - else if (from_end_pos < res->ptr() + res->length()) + else if (copier.source_end_pos() < res->ptr() + res->length()) { /* result is longer than UINT_MAX32 and doesn't fit into String diff --git a/sql/sql_signal.cc b/sql/sql_signal.cc index 374a24f75e5..f9933e68595 100644 --- a/sql/sql_signal.cc +++ b/sql/sql_signal.cc @@ -195,16 +195,9 @@ static bool assign_fixed_string(MEM_ROOT *mem_root, dst_str= (char*) alloc_root(mem_root, dst_len + 1); if (dst_str) { - const char* well_formed_error_pos; - const char* cannot_convert_error_pos; - const char* from_end_pos; - - dst_copied= well_formed_copy_nchars(dst_cs, dst_str, dst_len, - src_cs, src_str, src_len, - numchars, - & well_formed_error_pos, - & cannot_convert_error_pos, - & from_end_pos); + dst_copied= String_copier().well_formed_copy(dst_cs, dst_str, dst_len, + src_cs, src_str, src_len, + numchars); DBUG_ASSERT(dst_copied <= dst_len); dst_len= dst_copied; /* In case the copy truncated the data */ dst_str[dst_copied]= '\0'; diff --git a/sql/sql_string.cc b/sql/sql_string.cc index a7bfa6c1455..5eb55463e85 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -875,41 +875,44 @@ my_copy_with_hex_escaping(CHARSET_INFO *cs, /* - copy a string, + Copy a string, with optional character set conversion, with optional left padding (for binary -> UCS2 conversion) - - SYNOPSIS - well_formed_copy_nchars() - to Store result here - to_length Maxinum length of "to" string - to_cs Character set of "to" string - from Copy from here - from_length Length of from string - from_cs From character set - nchars Copy not more that nchars characters - well_formed_error_pos Return position when "from" is not well formed + + In case if there is a Unicode conversion (i.e. to_cs and from_cs are + different character sets and both are not &my_charset_bin), bad input bytes + as well as characters that cannot be encoded in to_cs are replaced to '?'. + + In case of non-Unicode copying (i.e. to_cs and from_cs are same character set, + or from_cs is &my_charset_bin), the function stops on the first bad + byte sequence. + + The string that is written to "to" is always well-formed. + + @param to The destination string + @param to_length Space available in "to" + @param to_cs Character set of the "to" string + @param from The source string + @param from_length Length of the "from" string + @param from_cs Character set of the "from" string + @param nchars Copy not more than "nchars" characters + + The members as set as follows: + m_well_formed_error_pos To the position when "from" is not well formed or NULL otherwise. - cannot_convert_error_pos Return position where a not convertable + m_cannot_convert_error_pos To the position where a not convertable character met, or NULL otherwise. - from_end_pos Return position where scanning of "from" + m_source_end_pos To the position where scanning of the "from" string stopped. - NOTES - RETURN - length of bytes copied to 'to' + @returns number of bytes that were written to 'to' */ - - -uint32 -well_formed_copy_nchars(CHARSET_INFO *to_cs, - char *to, uint to_length, - CHARSET_INFO *from_cs, - const char *from, uint from_length, - uint nchars, - const char **well_formed_error_pos, - const char **cannot_convert_error_pos, - const char **from_end_pos) +uint +String_copier::well_formed_copy(CHARSET_INFO *to_cs, + char *to, uint to_length, + CHARSET_INFO *from_cs, + const char *from, uint from_length, + uint nchars) { uint res; @@ -920,9 +923,9 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, { if (to_length < to_cs->mbminlen || !nchars) { - *from_end_pos= from; - *cannot_convert_error_pos= NULL; - *well_formed_error_pos= NULL; + m_source_end_pos= from; + m_cannot_convert_error_pos= NULL; + m_well_formed_error_pos= NULL; return 0; } @@ -930,9 +933,9 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, { res= MY_MIN(MY_MIN(nchars, to_length), from_length); memmove(to, from, res); - *from_end_pos= from + res; - *well_formed_error_pos= NULL; - *cannot_convert_error_pos= NULL; + m_source_end_pos= from + res; + m_well_formed_error_pos= NULL; + m_cannot_convert_error_pos= NULL; } else { @@ -964,8 +967,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, &well_formed_error) != to_cs->mbminlen) { - *from_end_pos= *well_formed_error_pos= from; - *cannot_convert_error_pos= NULL; + m_source_end_pos= m_well_formed_error_pos= from; + m_cannot_convert_error_pos= NULL; return 0; } nchars--; @@ -979,9 +982,9 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, res= to_cs->cset->well_formed_len(to_cs, from, from + from_length, nchars, &well_formed_error); memmove(to, from, res); - *from_end_pos= from + res; - *well_formed_error_pos= well_formed_error ? from + res : NULL; - *cannot_convert_error_pos= NULL; + m_source_end_pos= from + res; + m_well_formed_error_pos= well_formed_error ? from + res : NULL; + m_cannot_convert_error_pos= NULL; if (from_offset) res+= to_cs->mbminlen; } @@ -995,8 +998,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, const uchar *from_end= (const uchar*) from + from_length; uchar *to_end= (uchar*) to + to_length; char *to_start= to; - *well_formed_error_pos= NULL; - *cannot_convert_error_pos= NULL; + m_well_formed_error_pos= NULL; + m_cannot_convert_error_pos= NULL; for ( ; nchars; nchars--) { @@ -1005,8 +1008,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, from+= cnvres; else if (cnvres == MY_CS_ILSEQ) { - if (!*well_formed_error_pos) - *well_formed_error_pos= from; + if (!m_well_formed_error_pos) + m_well_formed_error_pos= from; from++; wc= '?'; } @@ -1016,8 +1019,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, A correct multibyte sequence detected But it doesn't have Unicode mapping. */ - if (!*cannot_convert_error_pos) - *cannot_convert_error_pos= from; + if (!m_cannot_convert_error_pos) + m_cannot_convert_error_pos= from; from+= (-cnvres); wc= '?'; } @@ -1026,8 +1029,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, if ((uchar *) from >= from_end) break; // End of line // Incomplete byte sequence - if (!*well_formed_error_pos) - *well_formed_error_pos= from; + if (!m_well_formed_error_pos) + m_well_formed_error_pos= from; from++; wc= '?'; } @@ -1036,8 +1039,8 @@ outp: to+= cnvres; else if (cnvres == MY_CS_ILUNI && wc != '?') { - if (!*cannot_convert_error_pos) - *cannot_convert_error_pos= from_prev; + if (!m_cannot_convert_error_pos) + m_cannot_convert_error_pos= from_prev; wc= '?'; goto outp; } @@ -1047,10 +1050,10 @@ outp: break; } } - *from_end_pos= from; + m_source_end_pos= from; res= (uint) (to - to_start); } - return (uint32) res; + return res; } diff --git a/sql/sql_string.h b/sql/sql_string.h index 7f6aed9f804..a40ac536f04 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -41,14 +41,70 @@ inline uint32 copy_and_convert(char *to, uint32 to_length, { return my_convert(to, to_length, to_cs, from, from_length, from_cs, errors); } -uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs, - char *to, uint to_length, - CHARSET_INFO *from_cs, - const char *from, uint from_length, - uint nchars, - const char **well_formed_error_pos, - const char **cannot_convert_error_pos, - const char **from_end_pos); + + +class String_copier +{ + const char *m_source_end_pos; + const char *m_well_formed_error_pos; + const char *m_cannot_convert_error_pos; +public: + const char *source_end_pos() const + { return m_source_end_pos; } + const char *well_formed_error_pos() const + { return m_well_formed_error_pos; } + const char *cannot_convert_error_pos() const + { return m_cannot_convert_error_pos; } + const char *most_important_error_pos() const + { + return well_formed_error_pos() ? well_formed_error_pos() : + cannot_convert_error_pos(); + } + /* + Copy a string. Fix bad bytes/characters one Unicode conversion, + break on bad bytes in case of non-Unicode copying. + */ + uint well_formed_copy(CHARSET_INFO *to_cs, char *to, uint to_length, + CHARSET_INFO *from_cs, const char *from, + uint from_length, uint nchars); + // Same as above, but without the "nchars" limit. + uint well_formed_copy(CHARSET_INFO *to_cs, char *to, uint to_length, + CHARSET_INFO *from_cs, const char *from, + uint from_length) + { + return well_formed_copy(to_cs, to, to_length, + from_cs, from, from_length, + from_length /* No limit on "nchars"*/); + } + /* + Copy a string. If a bad byte sequence is found in case of non-Unicode + copying, continues processing and replaces bad bytes to '?'. + */ + uint copy_fix(CHARSET_INFO *to_cs, char *to, uint to_length, + CHARSET_INFO *from_cs, const char *from, uint from_length) + { + uint length= well_formed_copy(to_cs, to, to_length, + from_cs, from, from_length, + from_length /* No limit on nchars */); + if (well_formed_error_pos() && source_end_pos() < from + from_length) + { + /* + There was an error and there are still some bytes in the source string. + This is possible if there were no character set conversion and a + malformed byte sequence was found. Copy the rest and replace bad + bytes to '?'. Note: m_source_end_pos is not updated!!! + */ + uint dummy_errors; + length+= copy_and_convert(to + length, to_length - length, to_cs, + source_end_pos(), + from_length - (source_end_pos() - from), + from_cs, &dummy_errors); + } + return length; + } +}; + + size_t my_copy_with_hex_escaping(CHARSET_INFO *cs, char *dst, size_t dstlen, const char *src, size_t srclen); diff --git a/storage/perfschema/table_session_connect.cc b/storage/perfschema/table_session_connect.cc index a5c557baeb2..bbe99ffa794 100644 --- a/storage/perfschema/table_session_connect.cc +++ b/storage/perfschema/table_session_connect.cc @@ -61,8 +61,7 @@ bool parse_length_encoded_string(const char **ptr, uint nchars_max) { ulong copy_length, data_length; - const char *well_formed_error_pos= NULL, *cannot_convert_error_pos= NULL, - *from_end_pos= NULL; + String_copier copier; copy_length= data_length= net_field_length((uchar **) ptr); @@ -73,11 +72,8 @@ bool parse_length_encoded_string(const char **ptr, if (*ptr - start_ptr + data_length > input_length) return true; - copy_length= well_formed_copy_nchars(&my_charset_utf8_bin, dest, dest_size, - from_cs, *ptr, data_length, nchars_max, - &well_formed_error_pos, - &cannot_convert_error_pos, - &from_end_pos); + copy_length= copier.well_formed_copy(&my_charset_utf8_bin, dest, dest_size, + from_cs, *ptr, data_length, nchars_max); *copied_len= copy_length; (*ptr)+= data_length;