diff --git a/mysql-test/r/ctype_utf32_uca.result b/mysql-test/r/ctype_utf32_uca.result index fd5a4199217..a9f056b2e44 100644 --- a/mysql-test/r/ctype_utf32_uca.result +++ b/mysql-test/r/ctype_utf32_uca.result @@ -2415,5 +2415,25 @@ HEX(s1) 00000061 DROP TABLE t1; # +# Bug #12319710 : INVALID MEMORY READ AND/OR CRASH IN +# MY_UCA_CHARCMP WITH UTF32 +# +SET collation_connection=utf32_unicode_ci; +CREATE TABLE t1 (a TEXT CHARACTER SET utf32 COLLATE utf32_turkish_ci NOT NULL); +INSERT INTO t1 VALUES ('a'), ('b'); +CREATE TABLE t2 (b VARBINARY(5) NOT NULL); +#insert chars outside of BMP +INSERT INTO t2 VALUEs (0x082837),(0x082837); +#test for read-out-of-bounds with non-BMP chars as a LIKE pattern +SELECT * FROM t1,t2 WHERE a LIKE b; +a b +#test the original statement +SELECT 1 FROM t1 AS t1_0 NATURAL LEFT OUTER JOIN t2 AS t2_0 +RIGHT JOIN t1 AS t1_1 ON t1_0.a LIKE t2_0.b; +1 +1 +1 +DROP TABLE t1,t2; +# # End of 5.5 tests # diff --git a/mysql-test/t/ctype_utf32_uca.test b/mysql-test/t/ctype_utf32_uca.test index a62ffbf95c7..1d79fbe1616 100644 --- a/mysql-test/t/ctype_utf32_uca.test +++ b/mysql-test/t/ctype_utf32_uca.test @@ -293,6 +293,27 @@ SET collation_connection=utf32_czech_ci; --source include/ctype_czech.inc --source include/ctype_like_ignorable.inc +--echo # +--echo # Bug #12319710 : INVALID MEMORY READ AND/OR CRASH IN +--echo # MY_UCA_CHARCMP WITH UTF32 +--echo # + +SET collation_connection=utf32_unicode_ci; +CREATE TABLE t1 (a TEXT CHARACTER SET utf32 COLLATE utf32_turkish_ci NOT NULL); +INSERT INTO t1 VALUES ('a'), ('b'); +CREATE TABLE t2 (b VARBINARY(5) NOT NULL); + +--echo #insert chars outside of BMP +INSERT INTO t2 VALUEs (0x082837),(0x082837); + +--echo #test for read-out-of-bounds with non-BMP chars as a LIKE pattern +SELECT * FROM t1,t2 WHERE a LIKE b; + +--echo #test the original statement +SELECT 1 FROM t1 AS t1_0 NATURAL LEFT OUTER JOIN t2 AS t2_0 +RIGHT JOIN t1 AS t1_1 ON t1_0.a LIKE t2_0.b; + +DROP TABLE t1,t2; --echo # --echo # End of 5.5 tests diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index d6be395535c..70d2df3bab9 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -42,6 +42,7 @@ #define MY_UCA_NCHARS 256 #define MY_UCA_CMASK 255 #define MY_UCA_PSHIFT 8 +#define MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT 0xFFFF uint16 page000data[]= { /* 0000 (4 weights per char) */ 0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000, @@ -6984,7 +6985,7 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner) return -1; scanner->sbeg+= mb_len; - if (wc > 0xFFFF) + if (wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT) { /* Return 0xFFFD as weight for all characters outside BMP */ scanner->wbeg= nochar; @@ -7322,6 +7323,33 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs, +/** + Helper function: + Find address of weights of the given character. + + @param weights UCA weight array + @param lengths UCA length array + @param ch character Unicode code point + + @return Weight array + @retval pointer to weight array for the given character, + or NULL if this page does not have implicit weights. +*/ + +static inline uint16 * +my_char_weight_addr(CHARSET_INFO *cs, uint wc) +{ + uint page, ofst; + uchar *ucal= cs->sort_order; + uint16 **ucaw= cs->sort_order_big; + + return wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ? NULL : + (ucaw[page= (wc >> 8)] ? + ucaw[page] + (ofst= (wc & 0xFF)) * ucal[page] : + NULL); +} + + /* This function compares if two characters are the same. The sign +1 or -1 does not matter. The only @@ -7332,17 +7360,20 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs, static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) { - size_t page1= wc1 >> MY_UCA_PSHIFT; - size_t page2= wc2 >> MY_UCA_PSHIFT; - uchar *ucal= cs->sort_order; - uint16 **ucaw= cs->sort_order_big; - size_t length1= ucal[page1]; - size_t length2= ucal[page2]; - uint16 *weight1= ucaw[page1] + (wc1 & MY_UCA_CMASK) * ucal[page1]; - uint16 *weight2= ucaw[page2] + (wc2 & MY_UCA_CMASK) * ucal[page2]; + size_t length1, length2; + uint16 *weight1= my_char_weight_addr(cs, wc1); + uint16 *weight2= my_char_weight_addr(cs, wc2); if (!weight1 || !weight2) return wc1 != wc2; + + /* Quickly compare first weights */ + if (weight1[0] != weight2[0]) + return 1; + + /* Thoroughly compare all weights */ + length1= cs->sort_order[wc1 >> MY_UCA_PSHIFT]; + length2= cs->sort_order[wc2 >> MY_UCA_PSHIFT]; if (length1 > length2) return memcmp((const void*)weight1, (const void*)weight2, length2*2) ? @@ -7924,6 +7955,11 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t)) */ for (i=0; i < rc; i++) { + /* check if the shift or the reset characters are out of range */ + if (rule[i].curr[0] > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT || + rule[i].base > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT) + return 1; + if (!rule[i].curr[1]) /* If not a contraction */ { uint pageb= (rule[i].base >> 8) & 0xFF;