diff --git a/mysql-test/main/column_compression.result b/mysql-test/main/column_compression.result index f4eb0461ed3..ace65387181 100644 --- a/mysql-test/main/column_compression.result +++ b/mysql-test/main/column_compression.result @@ -1336,15 +1336,33 @@ a LENGTH(a) DROP TABLE t1; CREATE TABLE t1(a TINYTEXT COMPRESSED); SET column_compression_threshold=300; +INSERT INTO t1 VALUES(REPEAT('a', 254)); +INSERT INTO t1 VALUES(REPEAT(' ', 254)); INSERT INTO t1 VALUES(REPEAT('a', 255)); ERROR 22001: Data too long for column 'a' at row 1 INSERT INTO t1 VALUES(REPEAT(' ', 255)); Warnings: Note 1265 Data truncated for column 'a' at row 1 +INSERT INTO t1 VALUES(REPEAT('a', 256)); +ERROR 22001: Data too long for column 'a' at row 1 +INSERT INTO t1 VALUES(REPEAT(' ', 256)); +Warnings: +Note 1265 Data truncated for column 'a' at row 1 +Note 1265 Data truncated for column 'a' at row 1 +INSERT INTO t1 VALUES(REPEAT('a', 257)); +ERROR 22001: Data too long for column 'a' at row 1 +INSERT INTO t1 VALUES(REPEAT(' ', 257)); +Warnings: +Note 1265 Data truncated for column 'a' at row 1 +Note 1265 Data truncated for column 'a' at row 1 SET column_compression_threshold=DEFAULT; -SELECT a, LENGTH(a) FROM t1; -a LENGTH(a) - 254 +SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1; +LEFT(a, 10) LENGTH(a) + 254 + 254 + 254 + 254 +aaaaaaaaaa 254 DROP TABLE t1; # Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length CREATE TABLE t1(a VARCHAR(255) COMPRESSED); @@ -1360,6 +1378,32 @@ SELECT a, LENGTH(a) FROM t1; a LENGTH(a) aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 255 DROP TABLE t1; +CREATE TABLE t1(a VARCHAR(65531) COMPRESSED); +SET column_compression_threshold=65537; +INSERT INTO t1 VALUES(REPEAT('a', 65530)); +INSERT INTO t1 VALUES(REPEAT(' ', 65530)); +INSERT INTO t1 VALUES(REPEAT('a', 65531)); +INSERT INTO t1 VALUES(REPEAT(' ', 65531)); +INSERT INTO t1 VALUES(REPEAT('a', 65532)); +ERROR 22001: Data too long for column 'a' at row 1 +INSERT INTO t1 VALUES(REPEAT(' ', 65532)); +Warnings: +Note 1265 Data truncated for column 'a' at row 1 +INSERT INTO t1 VALUES(REPEAT('a', 65533)); +ERROR 22001: Data too long for column 'a' at row 1 +INSERT INTO t1 VALUES(REPEAT(' ', 65533)); +Warnings: +Note 1265 Data truncated for column 'a' at row 1 +SET column_compression_threshold=DEFAULT; +SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1, 2; +LEFT(a, 10) LENGTH(a) + 65530 + 65531 + 65531 + 65531 +aaaaaaaaaa 65530 +aaaaaaaaaa 65531 +DROP TABLE t1; # # MDEV-14929 - AddressSanitizer: memcpy-param-overlap in # Field_longstr::compress @@ -1419,3 +1463,12 @@ COLUMN_NAME CHARACTER_MAXIMUM_LENGTH CHARACTER_OCTET_LENGTH a 10 10 b 10 30 DROP TABLE t1; +# +# MDEV-15592 - Column COMPRESSED should select a 'high order' datatype +# +CREATE TABLE t1(a TINYTEXT COMPRESSED); +INSERT INTO t1 VALUES(REPEAT('a', 255)); +SELECT LEFT(a, 10), LENGTH(a) FROM t1; +LEFT(a, 10) LENGTH(a) +aaaaaaaaaa 255 +DROP TABLE t1; diff --git a/mysql-test/main/column_compression.test b/mysql-test/main/column_compression.test index cdd9adb254d..f3220503694 100644 --- a/mysql-test/main/column_compression.test +++ b/mysql-test/main/column_compression.test @@ -64,11 +64,19 @@ DROP TABLE t1; CREATE TABLE t1(a TINYTEXT COMPRESSED); SET column_compression_threshold=300; +INSERT INTO t1 VALUES(REPEAT('a', 254)); +INSERT INTO t1 VALUES(REPEAT(' ', 254)); --error ER_DATA_TOO_LONG INSERT INTO t1 VALUES(REPEAT('a', 255)); INSERT INTO t1 VALUES(REPEAT(' ', 255)); +--error ER_DATA_TOO_LONG +INSERT INTO t1 VALUES(REPEAT('a', 256)); +INSERT INTO t1 VALUES(REPEAT(' ', 256)); +--error ER_DATA_TOO_LONG +INSERT INTO t1 VALUES(REPEAT('a', 257)); +INSERT INTO t1 VALUES(REPEAT(' ', 257)); SET column_compression_threshold=DEFAULT; -SELECT a, LENGTH(a) FROM t1; +SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1; DROP TABLE t1; --echo # Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length @@ -80,6 +88,22 @@ SET column_compression_threshold=DEFAULT; SELECT a, LENGTH(a) FROM t1; DROP TABLE t1; +CREATE TABLE t1(a VARCHAR(65531) COMPRESSED); +SET column_compression_threshold=65537; +INSERT INTO t1 VALUES(REPEAT('a', 65530)); +INSERT INTO t1 VALUES(REPEAT(' ', 65530)); +INSERT INTO t1 VALUES(REPEAT('a', 65531)); +INSERT INTO t1 VALUES(REPEAT(' ', 65531)); +--error ER_DATA_TOO_LONG +INSERT INTO t1 VALUES(REPEAT('a', 65532)); +INSERT INTO t1 VALUES(REPEAT(' ', 65532)); +--error ER_DATA_TOO_LONG +INSERT INTO t1 VALUES(REPEAT('a', 65533)); +INSERT INTO t1 VALUES(REPEAT(' ', 65533)); +SET column_compression_threshold=DEFAULT; +SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1, 2; +DROP TABLE t1; + --echo # --echo # MDEV-14929 - AddressSanitizer: memcpy-param-overlap in @@ -113,6 +137,7 @@ INSERT INTO t1 VALUES('a'); SET column_compression_threshold=DEFAULT; DROP TABLE t1; + --echo # --echo # MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data --echo # @@ -136,3 +161,12 @@ FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1' AND COLUMN_NAME IN ('a','b') ORDER BY COLUMN_NAME; DROP TABLE t1; + + +--echo # +--echo # MDEV-15592 - Column COMPRESSED should select a 'high order' datatype +--echo # +CREATE TABLE t1(a TINYTEXT COMPRESSED); +INSERT INTO t1 VALUES(REPEAT('a', 255)); +SELECT LEFT(a, 10), LENGTH(a) FROM t1; +DROP TABLE t1; diff --git a/sql/field.cc b/sql/field.cc index 1edc0cbc218..15ca9ebf6d9 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -7920,10 +7920,13 @@ void Field_varstring::hash(ulong *nr, ulong *nr2) Compress field @param[out] to destination buffer for compressed data - @param[in,out] to_length in: size of to, out: compressed data length + @param[in] to_length size of to @param[in] from data to compress @param[in] length from length + @param[in] max_length truncate `from' to this length + @param[out] out_length compessed data length @param[in] cs from character set + @param[in] nchars copy no more than "nchars" characters In worst case (no compression performed) storage requirement is increased by 1 byte to store header. If it exceeds field length, normal data truncation is @@ -7947,42 +7950,57 @@ void Field_varstring::hash(ulong *nr, ulong *nr2) followed by compressed data. */ -int Field_longstr::compress(char *to, uint *to_length, +int Field_longstr::compress(char *to, uint to_length, const char *from, uint length, + uint max_length, + uint *out_length, CHARSET_INFO *cs, size_t nchars) { THD *thd= get_thd(); - char *buf= 0; + char *buf; + uint buf_length; int rc= 0; if (String::needs_conversion_on_storage(length, cs, field_charset) || - *to_length <= length) + max_length < length) { - if (!(buf= (char*) my_malloc(*to_length - 1, MYF(MY_WME)))) + set_if_smaller(max_length, static_cast(field_charset->mbmaxlen) * length + 1); + if (!(buf= (char*) my_malloc(max_length, MYF(MY_WME)))) { - *to_length= 0; + *out_length= 0; return -1; } - rc= well_formed_copy_with_check(buf, *to_length - 1, cs, from, length, - nchars, true, &length); - from= buf; + rc= well_formed_copy_with_check(buf, max_length, cs, from, length, + nchars, true, &buf_length); + } + else + { + buf= const_cast(from); + buf_length= length; } - if (length == 0) - *to_length= 0; - else if (length >= thd->variables.column_compression_threshold && - (*to_length= compression_method()->compress(thd, to, from, length))) + if (buf_length == 0) + *out_length= 0; + else if (buf_length >= thd->variables.column_compression_threshold && + (*out_length= compression_method()->compress(thd, to, buf, buf_length))) status_var_increment(thd->status_var.column_compressions); else { /* Store uncompressed */ to[0]= 0; - memcpy(to + 1, from, length); - *to_length= length + 1; + if (buf_length < to_length) + memcpy(to + 1, buf, buf_length); + else + { + /* Storing string at blob capacity, e.g. 255 bytes string to TINYBLOB. */ + rc= well_formed_copy_with_check(to + 1, to_length - 1, cs, from, length, + nchars, true, &buf_length); + } + *out_length= buf_length + 1; } - if (buf) + if (buf != from) my_free(buf); return rc; } @@ -8036,10 +8054,12 @@ int Field_varstring_compressed::store(const char *from, size_t length, CHARSET_INFO *cs) { ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; - uint to_length= (uint)MY_MIN(field_length, field_charset->mbmaxlen * length + 1); - int rc= compress((char*) get_data(), &to_length, from, (uint) length, cs, - (to_length - 1) / field_charset->mbmaxlen); - store_length(to_length); + uint compressed_length; + int rc= compress((char*) get_data(), field_length, from, (uint) length, + Field_varstring_compressed::max_display_length(), + &compressed_length, cs, + Field_varstring_compressed::char_length()); + store_length(compressed_length); return rc; } @@ -8648,7 +8668,10 @@ int Field_blob_compressed::store(const char *from, size_t length, CHARSET_INFO *cs) { ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; - uint to_length= (uint)MY_MIN(max_data_length(), field_charset->mbmaxlen * length + 1); + uint compressed_length; + uint max_length= max_data_length(); + uint to_length= (uint) MY_MIN(max_length, + field_charset->mbmaxlen * length + 1); String tmp(from, length, cs); int rc; @@ -8658,9 +8681,9 @@ int Field_blob_compressed::store(const char *from, size_t length, if (value.alloc(to_length)) goto oom; - rc= compress((char*) value.ptr(), &to_length, tmp.ptr(), (uint) length, cs, - (uint) length); - set_ptr(to_length, (uchar*) value.ptr()); + rc= compress((char*) value.ptr(), to_length, tmp.ptr(), (uint) length, + max_length, &compressed_length, cs, (uint) length); + set_ptr(compressed_length, (uchar*) value.ptr()); return rc; oom: diff --git a/sql/field.h b/sql/field.h index baed1511837..99da0cf624f 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1790,8 +1790,10 @@ protected: const Item *item) const; bool cmp_to_string_with_stricter_collation(const Item_bool_func *cond, const Item *item) const; - int compress(char *to, uint *to_length, + int compress(char *to, uint to_length, const char *from, uint length, + uint max_length, + uint *out_length, CHARSET_INFO *cs, size_t nchars); String *uncompress(String *val_buffer, String *val_ptr, const uchar *from, uint from_length);