diff --git a/mysql-test/main/natural_sort_key.result b/mysql-test/main/natural_sort_key.result index 1a6cf1cc3ea..a99fa7a71b8 100644 --- a/mysql-test/main/natural_sort_key.result +++ b/mysql-test/main/natural_sort_key.result @@ -2,14 +2,6 @@ SET NAMES utf8mb4; SELECT NATURAL_SORT_KEY(NULL); NATURAL_SORT_KEY(NULL) NULL -SELECT '' c WHERE 0 UNION VALUES('a10'),('a9'),('a1000'), ('a0'),('b'),('b0') ORDER BY NATURAL_SORT_KEY(c); -c -a0 -a9 -a10 -a1000 -b -b0 SELECT NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1)); NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1)) NULL @@ -22,14 +14,10 @@ Warnings: Warning 1301 Result of natural_sort_key() was larger than max_allowed_packet (16777216) - truncated CREATE TABLE t1( c VARCHAR(30) CHARACTER SET latin1 COLLATE latin1_bin, -k VARCHAR(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) INVISIBLE, -KEY(k,c)) ENGINE=InnoDB; +k VARCHAR(60) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) VIRTUAL INVISIBLE); INSERT INTO t1 values ('A1'),('a1'),('A100'),('a100'),('A2'),('ä2'),('a2'),('A99'), ('äb'),('B1'),('B100'),('B9'),('C'),('100'); -EXPLAIN SELECT c FROM t1 ORDER BY k,c; -id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL k 216 NULL 14 Using index #Natural sort order. SELECT c FROM t1 ORDER BY k,c; c @@ -69,55 +57,136 @@ SHOW CREATE TABLE t2; Table Create Table t2 CREATE TABLE `t2` ( `c` varchar(30) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL, - `NATURAL_SORT_KEY(c)` varchar(60) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL + `NATURAL_SORT_KEY(c)` varchar(45) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL ) ENGINE=MyISAM DEFAULT CHARSET=latin1 DROP TABLE t1,t2; -SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),35,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead +CREATE TABLE t1(c VARCHAR(1), k VARCHAR(2) AS (NATURAL_SORT_KEY(c)) STORED); +ERROR HY000: Function or expression 'natural_sort_key()' cannot be used in the GENERATED ALWAYS AS clause of `k` +SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),36,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead FROM ( -SELECT 0 val -UNION VALUES ('1'),('01'),('0001') +SELECT '0' val +UNION SELECT seq FROM seq_1_to_9 UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_27 ) AS numbers ORDER BY sortkey; value sortkey encoding_overhead -0 000 2 -1 010 2 -01 011 1 -0001 013 -1 -10 1100 2 -100 21000 2 -1000 310000 2 -10000 4100000 2 -100000 51000000 2 -1000000 610000000 2 -10000000 7100000000 2 -100000000 81000000000 2 -1000000000 9010000000000 3 -10000000000 91100000000000 3 -100000000000 921000000000000 3 -1000000000000 9310000000000000 3 -10000000000000 94100000000000000 3 -100000000000000 951000000000000000 3 -1000000000000000 9610000000000000000 3 -10000000000000000 97100000000000000000 3 -100000000000000000 981000000000000000000 3 -1000000000000000000 99010000000000000000000 4 -10000000000000000000 991100000000000000000000 4 -100000000000000000000 9921000000000000000000000 4 -1000000000000000000000 99310000000000000000000000 4 -10000000000000000000000 994100000000000000000000000 4 -100000000000000000000000 9951000000000000000000000000 4 -1000000000000000000000000 99610000000000000000000000000 4 -10000000000000000000000000 997100000000000000000000000000 4 -100000000000000000000000000 9981000000000000000000000000000 4 -1000000000000000000000000000 99901271000000000000000000000000000 8 -SELECT val -FROM -( -SELECT 0 val WHERE 0 -UNION VALUES ('1/'),('01'),('1a') -) AS strings ORDER BY NATURAL_SORT_KEY(val); -val -1/ -1a -01 +0 00 1 +1 01 1 +2 02 1 +3 03 1 +4 04 1 +5 05 1 +6 06 1 +7 07 1 +8 08 1 +9 09 1 +10 110 1 +100 2100 1 +1000 31000 1 +10000 410000 1 +100000 5100000 1 +1000000 61000000 1 +10000000 710000000 1 +100000000 8100000000 1 +1000000000 901000000000 2 +10000000000 9110000000000 2 +100000000000 92100000000000 2 +1000000000000 931000000000000 2 +10000000000000 9410000000000000 2 +100000000000000 95100000000000000 2 +1000000000000000 961000000000000000 2 +10000000000000000 9710000000000000000 2 +100000000000000000 98100000000000000000 2 +1000000000000000000 9901000000000000000000 3 +10000000000000000000 99110000000000000000000 3 +100000000000000000000 992100000000000000000000 3 +1000000000000000000000 9931000000000000000000000 3 +10000000000000000000000 99410000000000000000000000 3 +100000000000000000000000 995100000000000000000000000 3 +1000000000000000000000000 9961000000000000000000000000 3 +10000000000000000000000000 99710000000000000000000000000 3 +100000000000000000000000000 998100000000000000000000000000 3 +1000000000000000000000000000 99901271000000000000000000000000000 7 +SELECT natural_sort_key('1') = natural_sort_key('0001'); +natural_sort_key('1') = natural_sort_key('0001') +1 +SELECT natural_sort_key('1.1') = natural_sort_key('1.00001'); +natural_sort_key('1.1') = natural_sort_key('1.00001') +1 +SELECT RPAD(val,20,' ') value, NATURAL_SORT_KEY(val) FROM +(SELECT '' val WHERE 0 UNION VALUES +('fred'), +('pic2'), +('pic100a'), +('pic120'), +('pic121'), +('jane'), +('tom'), +('pic02a'), +('pic3'), +('pic4'), +('1-20'), +('pic100'), +('pic02000'), +('10-20'), +('1-02'), +('1-2'), +('pic01'), +('pic02'), +('pic 6'), +('pic 7'), +('pic 5'), +('pic05'), +('pic 5 '), +('pic 5 something'), +('pic 4 else'), +('2000-1-10'), +('1999-12-25'), +('1999-3-3'), +('2000-3-23'), +('2000-1-2'), +('100.200.300.400'), +('100.50.60.70'), +('100.8.9.0'), +('a1b1'), +('a01b2'), +('a1b2'), +('a01b3') +)AS data ORDER BY 2,1; +value NATURAL_SORT_KEY(val) +1-02 01-02 +1-2 01-02 +1-20 01-120 +10-20 110-120 +100.8.9.0 2100.08.09.00 +100.50.60.70 2100.150.160.170 +100.200.300.400 2100.2200.2300.2400 +1999-3-3 31999-03-03 +1999-12-25 31999-112-125 +2000-1-2 32000-01-02 +2000-1-10 32000-01-110 +2000-3-23 32000-03-123 +a1b1 a01b01 +a01b2 a01b02 +a1b2 a01b02 +a01b3 a01b03 +fred fred +jane jane +pic 7 pic 07 +pic 4 else pic 04 else +pic 5 pic 05 +pic 5 something pic 05 something +pic 6 pic 06 +pic01 pic01 +pic02 pic02 +pic2 pic02 +pic02a pic02a +pic3 pic03 +pic4 pic04 +pic05 pic05 +pic100 pic2100 +pic100a pic2100a +pic120 pic2120 +pic121 pic2121 +pic02000 pic32000 +tom tom diff --git a/mysql-test/main/natural_sort_key.test b/mysql-test/main/natural_sort_key.test index f918c0e9bdf..dda25c9e1f8 100644 --- a/mysql-test/main/natural_sort_key.test +++ b/mysql-test/main/natural_sort_key.test @@ -1,25 +1,21 @@ ---source include/have_innodb.inc --source include/have_sequence.inc SET NAMES utf8mb4; SELECT NATURAL_SORT_KEY(NULL); -# Sort without tables -SELECT '' c WHERE 0 UNION VALUES('a10'),('a9'),('a1000'), ('a0'),('b'),('b0') ORDER BY NATURAL_SORT_KEY(c); - #Test that max packet overflow produces NULL plus warning SELECT NATURAL_SORT_KEY(repeat('a1',@@max_allowed_packet/2-1)); SELECT NATURAL_SORT_KEY(repeat('1',@@max_allowed_packet-1)); -#Test with virtual(index only) key +#Test with virtual CREATE TABLE t1( c VARCHAR(30) CHARACTER SET latin1 COLLATE latin1_bin, - k VARCHAR(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) INVISIBLE, - KEY(k,c)) ENGINE=InnoDB; + k VARCHAR(60) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci AS (NATURAL_SORT_KEY(CONVERT(c USING utf8mb4))) VIRTUAL INVISIBLE); + INSERT INTO t1 values ('A1'),('a1'),('A100'),('a100'),('A2'),('ä2'),('a2'),('A99'), ('äb'),('B1'),('B100'),('B9'),('C'),('100'); -EXPLAIN SELECT c FROM t1 ORDER BY k,c; + -- echo #Natural sort order. # We sort by 2 colums, for stable sort,as we do not currenly have a case and accent insensitive Unicode collation. SELECT c FROM t1 ORDER BY k,c; @@ -30,21 +26,63 @@ CREATE TABLE t2 AS SELECT c, NATURAL_SORT_KEY(c) FROM t1 WHERE 0; SHOW CREATE TABLE t2; DROP TABLE t1,t2; -#Show encoding of numbers, with some leading whitespace. -SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),35,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead +#Virtual STORED is temporarily disabled +--error ER_GENERATED_COLUMN_FUNCTION_IS_NOT_ALLOWED +CREATE TABLE t1(c VARCHAR(1), k VARCHAR(2) AS (NATURAL_SORT_KEY(c)) STORED); + +#Show encoding of numbers. +SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),36,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead FROM ( -SELECT 0 val -UNION VALUES ('1'),('01'),('0001') +SELECT '0' val +UNION SELECT seq FROM seq_1_to_9 UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_27 ) AS numbers ORDER BY sortkey; +# leading zeros ignored +SELECT natural_sort_key('1') = natural_sort_key('0001'); +SELECT natural_sort_key('1.1') = natural_sort_key('1.00001'); -#Test that sort order with leading zeros (lead zeros sort larger) -SELECT val -FROM -( -SELECT 0 val WHERE 0 -UNION VALUES ('1/'),('01'),('1a') -) AS strings ORDER BY NATURAL_SORT_KEY(val); +# Some examples from https://github.com/sourcefrog/natsort/ +# words +SELECT RPAD(val,20,' ') value, NATURAL_SORT_KEY(val) FROM +(SELECT '' val WHERE 0 UNION VALUES +('fred'), +('pic2'), +('pic100a'), +('pic120'), +('pic121'), +('jane'), +('tom'), +('pic02a'), +('pic3'), +('pic4'), +('1-20'), +('pic100'), +('pic02000'), +('10-20'), +('1-02'), +('1-2'), +('pic01'), +('pic02'), +('pic 6'), +('pic 7'), +('pic 5'), +('pic05'), +('pic 5 '), +('pic 5 something'), +('pic 4 else'), +('2000-1-10'), +('1999-12-25'), +('1999-3-3'), +('2000-3-23'), +('2000-1-2'), +('100.200.300.400'), +('100.50.60.70'), +('100.8.9.0'), +('a1b1'), +('a01b2'), +('a1b2'), +('a01b3') +)AS data ORDER BY 2,1; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index cfacddf62d4..60e2d297459 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -5450,7 +5450,7 @@ String *Item_temptable_rowid::val_str(String *str) small, as it only computes variable *length prefixes*. @param[in] n - the number - @param[in] s - output buffer (should be at least 26 bytes large) + @param[in] s - output string @return - length of encoding @@ -5467,14 +5467,14 @@ String *Item_temptable_rowid::val_str(String *str) Output calculated as concat('99', '0' + n -18)' Output range '990'-'998' - - n is from 28 to SIZE_T_MAX + - n is from 27 to SIZE_T_MAX Output starts with '999', then log10(n) is encoded as 2-digit decimal number then the number itself is added. Example : for 28 key is concat('999', '01' , '28') i.e '9990128' - Key legth is 5 + ceil(log10(n)) + Key length is 5 + ceil(log10(n)) Output range is (64bit)'9990128' - '9991918446744073709551615' @@ -5482,26 +5482,28 @@ String *Item_temptable_rowid::val_str(String *str) */ /* Largest length of encoded string.*/ -#define NATSORT_BUFSIZE 26 -static size_t natsort_encode_length(size_t n, char *s) +static size_t natsort_encode_length_max(size_t n) +{ + return (n < 27) ? n/9+1 : 26; +} + +static void natsort_encode_length(size_t n, String* out) { if (n < 27) { - size_t n_nines= n / 9; - if (n_nines) - memset(s, '9', n_nines); - s[n_nines]= char(n % 9 + '0'); - s[n_nines + 1]= 0; - return n_nines + 1; + if (n >= 9) + out->fill(out->length() + n/9,'9'); + out->append(char(n % 9 + '0')); + return; } - memset(s, '9', 3); size_t log10n= 0; for (size_t tmp= n / 10; tmp; tmp/= 10) log10n++; - s[3]= '0' + (char) (log10n / 10); - s[4]= '0' + (char) (log10n % 10); - return longlong10_to_str(n, s + 5, 10) - s; + out->fill(out->length() + 3, '9'); + out->append('0' + (char) (log10n / 10)); + out->append('0' + (char) (log10n % 10)); + out->append_ulonglong(n); } enum class NATSORT_ERR @@ -5520,8 +5522,6 @@ enum class NATSORT_ERR @param[in] n_digits - length of the string, in characters, not counting leading zeros. - @param[in] n_lead_zeros - leading zeros count. - @param[out] out - String to write to. The string should have enough preallocated space to fit the encoded key. @@ -5530,58 +5530,43 @@ enum class NATSORT_ERR NATSORT_ERR::KEY_TOO_LARGE - out string does not have enough space left to accomodate the key. - Note: - Special case, where there are only leading zeros - n_digits == 0 and n_leading_zeros > 0 - will treated as-if in = "0", n_digits = 1, n_leading_zeros - is decremented. The resulting encoding of the numeric string is then - CONCAT(natsort_encode_length(n_digits), in, - natsort_encode_length(n_leading_zeros)) + CONCAT(natsort_encode_length(n_digits), in) */ static NATSORT_ERR natsort_encode_numeric_string(const char *in, size_t n_digits, - size_t n_leading_zeros, String *out) { - char buf[NATSORT_BUFSIZE]; DBUG_ASSERT(in); DBUG_ASSERT(n_digits); - size_t len; - len= natsort_encode_length(n_digits - 1, buf); - - if (out->length() + len + n_digits > out->alloced_length()) + if (out->length() + natsort_encode_length_max(n_digits - 1) + n_digits > + out->alloced_length()) return NATSORT_ERR::KEY_TOO_LARGE; - out->append(buf, len); + natsort_encode_length(n_digits - 1, out); out->append(in, n_digits); - - len= natsort_encode_length(n_leading_zeros, buf); - if (out->length() + len > out->alloced_length()) - return NATSORT_ERR::KEY_TOO_LARGE; - out->append(buf, len); return NATSORT_ERR::SUCCESS; } /* Calculate max size of the natsort key. - A digit expands to 3 chars - length_prefix, the digit, lead_zero_cnt(0) + A digit in string expands to 2 chars length_prefix , and the digit With even length L=2N, the largest key corresponds to input string - in form REPEAT('',N) and the length of a key is - 3N + N = 4*N = 2*L + in form REPEAT(,N) and the length of a key is + 2N + N = 3N + + With odd input length L=2N+1, largest key is built by appending + a digit at the end, with key length 3N+2 - With odd input length L=2*N+1, largest key corresponds to - CONCAT(,REPEAT('', N) - with key length is 3 + 4*N = 2*L+1 */ static size_t natsort_max_key_size(size_t input_size) { - return input_size * 2 + input_size % 2; + return input_size + (input_size + 1)/2 ; } /** @@ -5600,8 +5585,8 @@ static NATSORT_ERR to_natsort_key(const String *in, String *out, size_t n_digits= 0; size_t n_lead_zeros= 0; size_t num_start; - size_t reserve_length= std::min(natsort_max_key_size(in->length()), - max_key_size); + size_t reserve_length= std::min( + natsort_max_key_size(in->length()) + MAX_BIGINT_WIDTH + 2, max_key_size); out->length(0); out->set_charset(in->charset()); @@ -5624,7 +5609,7 @@ static NATSORT_ERR to_natsort_key(const String *in, String *out, n_digits= 1; } NATSORT_ERR err= natsort_encode_numeric_string( - in->ptr() + num_start, n_digits, n_lead_zeros, out); + in->ptr() + num_start, n_digits, out); if (err != NATSORT_ERR::SUCCESS) return err; @@ -5720,6 +5705,16 @@ bool Item_func_natural_sort_key::fix_length_and_dec(void) return false; } +/** + Disable use in stored virtual functions. Temporarily(?), until + the encoding is stable. +*/ +bool Item_func_natural_sort_key::check_vcol_func_processor(void *arg) +{ + return mark_unsupported_function(func_name(), "()", arg, + VCOL_NON_DETERMINISTIC); +} + #ifdef WITH_WSREP #include "wsrep_mysqld.h" diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 63894f1d6d9..1f925053241 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -287,6 +287,8 @@ public: { return get_item_copy(thd, this); } + + bool check_vcol_func_processor(void *arg) override; }; class Item_func_concat :public Item_str_func