UTF8 now process space as PAD character correctly.
This commit is contained in:
parent
b11ee0d8bb
commit
36e7f41d63
@ -12,3 +12,27 @@ select * from t1;
|
|||||||
id
|
id
|
||||||
000000000001
|
000000000001
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
SELECT 'a' = 'a ';
|
||||||
|
'a' = 'a '
|
||||||
|
1
|
||||||
|
SELECT 'a\0' < 'a';
|
||||||
|
'a\0' < 'a'
|
||||||
|
1
|
||||||
|
SELECT 'a\0' < 'a ';
|
||||||
|
'a\0' < 'a '
|
||||||
|
1
|
||||||
|
SELECT 'a\t' < 'a';
|
||||||
|
'a\t' < 'a'
|
||||||
|
1
|
||||||
|
SELECT 'a\t' < 'a ';
|
||||||
|
'a\t' < 'a '
|
||||||
|
1
|
||||||
|
CREATE TABLE t1 (a char(10) not null);
|
||||||
|
INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
|
||||||
|
SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
|
||||||
|
hex(a) STRCMP(a,'a') STRCMP(a,'a ')
|
||||||
|
61 0 0
|
||||||
|
6100 -1 -1
|
||||||
|
6109 -1 -1
|
||||||
|
61 0 0
|
||||||
|
DROP TABLE t1;
|
||||||
|
@ -63,6 +63,30 @@ select 'A' like 'a' collate utf8_bin;
|
|||||||
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
|
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
|
||||||
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
|
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
|
||||||
1
|
1
|
||||||
|
SELECT 'a' = 'a ';
|
||||||
|
'a' = 'a '
|
||||||
|
1
|
||||||
|
SELECT 'a\0' < 'a';
|
||||||
|
'a\0' < 'a'
|
||||||
|
1
|
||||||
|
SELECT 'a\0' < 'a ';
|
||||||
|
'a\0' < 'a '
|
||||||
|
1
|
||||||
|
SELECT 'a\t' < 'a';
|
||||||
|
'a\t' < 'a'
|
||||||
|
1
|
||||||
|
SELECT 'a\t' < 'a ';
|
||||||
|
'a\t' < 'a '
|
||||||
|
1
|
||||||
|
CREATE TABLE t1 (a char(10) character set utf8 not null);
|
||||||
|
INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
|
||||||
|
SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
|
||||||
|
hex(a) STRCMP(a,'a') STRCMP(a,'a ')
|
||||||
|
61 0 0
|
||||||
|
6100 -1 -1
|
||||||
|
6109 -1 -1
|
||||||
|
61 0 0
|
||||||
|
DROP TABLE t1;
|
||||||
select insert('txs',2,1,'hi'),insert('is ',4,0,'a'),insert('txxxxt',2,4,'es');
|
select insert('txs',2,1,'hi'),insert('is ',4,0,'a'),insert('txxxxt',2,4,'es');
|
||||||
insert('txs',2,1,'hi') insert('is ',4,0,'a') insert('txxxxt',2,4,'es')
|
insert('txs',2,1,'hi') insert('is ',4,0,'a') insert('txxxxt',2,4,'es')
|
||||||
this is a test
|
this is a test
|
||||||
|
@ -13,3 +13,20 @@ select * from t1 where id=000000000001;
|
|||||||
delete from t1 where id=000000000002;
|
delete from t1 where id=000000000002;
|
||||||
select * from t1;
|
select * from t1;
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
|
||||||
|
#
|
||||||
|
# Check the following:
|
||||||
|
# "a" == "a "
|
||||||
|
# "a\0" < "a"
|
||||||
|
# "a\0" < "a "
|
||||||
|
|
||||||
|
SELECT 'a' = 'a ';
|
||||||
|
SELECT 'a\0' < 'a';
|
||||||
|
SELECT 'a\0' < 'a ';
|
||||||
|
SELECT 'a\t' < 'a';
|
||||||
|
SELECT 'a\t' < 'a ';
|
||||||
|
|
||||||
|
CREATE TABLE t1 (a char(10) not null);
|
||||||
|
INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
|
||||||
|
SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
@ -33,6 +33,23 @@ select 'A' like 'a';
|
|||||||
select 'A' like 'a' collate utf8_bin;
|
select 'A' like 'a' collate utf8_bin;
|
||||||
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
|
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
|
||||||
|
|
||||||
|
#
|
||||||
|
# Check the following:
|
||||||
|
# "a" == "a "
|
||||||
|
# "a\0" < "a"
|
||||||
|
# "a\0" < "a "
|
||||||
|
|
||||||
|
SELECT 'a' = 'a ';
|
||||||
|
SELECT 'a\0' < 'a';
|
||||||
|
SELECT 'a\0' < 'a ';
|
||||||
|
SELECT 'a\t' < 'a';
|
||||||
|
SELECT 'a\t' < 'a ';
|
||||||
|
|
||||||
|
CREATE TABLE t1 (a char(10) character set utf8 not null);
|
||||||
|
INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
|
||||||
|
SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
#
|
#
|
||||||
# Fix this, it should return 1:
|
# Fix this, it should return 1:
|
||||||
#
|
#
|
||||||
|
@ -1837,18 +1837,98 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
TODO: Has to be fixed as strnncollsp in ctype-simple
|
Compare strings, discarding end space
|
||||||
|
|
||||||
|
SYNOPSIS
|
||||||
|
my_strnncollsp_utf8()
|
||||||
|
cs character set handler
|
||||||
|
a First string to compare
|
||||||
|
a_length Length of 'a'
|
||||||
|
b Second string to compare
|
||||||
|
b_length Length of 'b'
|
||||||
|
|
||||||
|
IMPLEMENTATION
|
||||||
|
If one string is shorter as the other, then we space extend the other
|
||||||
|
so that the strings have equal length.
|
||||||
|
|
||||||
|
This will ensure that the following things hold:
|
||||||
|
|
||||||
|
"a" == "a "
|
||||||
|
"a\0" < "a"
|
||||||
|
"a\0" < "a "
|
||||||
|
|
||||||
|
RETURN
|
||||||
|
< 0 a < b
|
||||||
|
= 0 a == b
|
||||||
|
> 0 a > b
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static
|
static int my_strnncollsp_utf8(CHARSET_INFO *cs,
|
||||||
int my_strnncollsp_utf8(CHARSET_INFO * cs,
|
const uchar *s, uint slen,
|
||||||
const uchar *s, uint slen,
|
const uchar *t, uint tlen)
|
||||||
const uchar *t, uint tlen)
|
|
||||||
{
|
{
|
||||||
for ( ; slen && s[slen-1] == ' ' ; slen--);
|
int s_res,t_res;
|
||||||
for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
|
my_wc_t s_wc,t_wc;
|
||||||
return my_strnncoll_utf8(cs,s,slen,t,tlen);
|
const uchar *se= s+slen;
|
||||||
|
const uchar *te= t+tlen;
|
||||||
|
|
||||||
|
while ( s < se && t < te )
|
||||||
|
{
|
||||||
|
int plane;
|
||||||
|
s_res=my_utf8_uni(cs,&s_wc, s, se);
|
||||||
|
t_res=my_utf8_uni(cs,&t_wc, t, te);
|
||||||
|
|
||||||
|
if ( s_res <= 0 || t_res <= 0 )
|
||||||
|
{
|
||||||
|
/* Incorrect string, compare by char value */
|
||||||
|
return ((int)s[0]-(int)t[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
plane=(s_wc>>8) & 0xFF;
|
||||||
|
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
|
||||||
|
plane=(t_wc>>8) & 0xFF;
|
||||||
|
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
|
||||||
|
if ( s_wc != t_wc )
|
||||||
|
{
|
||||||
|
return ((int) s_wc) - ((int) t_wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
s+=s_res;
|
||||||
|
t+=t_res;
|
||||||
|
}
|
||||||
|
|
||||||
|
slen= se-s;
|
||||||
|
tlen= te-t;
|
||||||
|
|
||||||
|
if (slen != tlen)
|
||||||
|
{
|
||||||
|
int swap= 0;
|
||||||
|
if (slen < tlen)
|
||||||
|
{
|
||||||
|
slen= tlen;
|
||||||
|
s= t;
|
||||||
|
se= te;
|
||||||
|
swap= -1;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
This following loop uses the fact that in UTF-8
|
||||||
|
all multibyte characters are greater than space,
|
||||||
|
and all multibyte head characters are greater than
|
||||||
|
space. It means if we meet a character greater
|
||||||
|
than space, it always means that the longer string
|
||||||
|
is greater. So we can reuse the same loop from the
|
||||||
|
8bit version, without having to process full multibute
|
||||||
|
sequences.
|
||||||
|
*/
|
||||||
|
for ( ; s < se; s++)
|
||||||
|
{
|
||||||
|
if (*s != ' ')
|
||||||
|
return ((int)*s - (int) ' ') ^ swap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user