Fixes for German sorting order.
This commit is contained in:
parent
c526f5d2ac
commit
fa23b20789
@ -748,7 +748,7 @@ is also available through the SQL interface as well.
|
|||||||
|
|
||||||
@item
|
@item
|
||||||
Full support for several different character sets, including
|
Full support for several different character sets, including
|
||||||
ISO-8859-1 (Latin1), big5, ujis, and more. For example, the
|
ISO-8859-1 (Latin1), german, big5, ujis, and more. For example, the
|
||||||
Scandinavian characters `@ringaccent{a}', `@"a' and `@"o' are allowed
|
Scandinavian characters `@ringaccent{a}', `@"a' and `@"o' are allowed
|
||||||
in table and column names.
|
in table and column names.
|
||||||
|
|
||||||
@ -20442,6 +20442,35 @@ default-character-set=character-set-name
|
|||||||
|
|
||||||
but normally this is never needed.
|
but normally this is never needed.
|
||||||
|
|
||||||
|
@menu
|
||||||
|
* German character set::
|
||||||
|
@end menu
|
||||||
|
|
||||||
|
@node German character set, , Character sets, Character sets
|
||||||
|
@subsubsection German character set
|
||||||
|
|
||||||
|
To get German sorting order, you should start @code{mysqld} with
|
||||||
|
@code{--default-character-set=latin_de}. This will give you the following
|
||||||
|
characteristics.
|
||||||
|
|
||||||
|
When sorting and comparing string's the following mapping is done on the
|
||||||
|
strings before doing the comparison:
|
||||||
|
|
||||||
|
@example
|
||||||
|
ä -> ae
|
||||||
|
ö -> oe
|
||||||
|
ü -> ue
|
||||||
|
ß -> ss
|
||||||
|
@end example
|
||||||
|
|
||||||
|
All accented characters, except @code{'é'} and @code{É} are converted to
|
||||||
|
their un-accented counterpart. All letters are converted to uppercase.
|
||||||
|
|
||||||
|
When comparing strings with @code{LIKE} the one -> two character mapping
|
||||||
|
is not done. All letters are converted to uppercase. Accent are removed
|
||||||
|
from all letters except: @code{Ü}, @code{ü}, @code{É}, @code{é}, @code{Ö},
|
||||||
|
@code{ö}, @code{Ä} and @code{ä}.
|
||||||
|
|
||||||
|
|
||||||
@node Languages, Adding character set, Character sets, Localization
|
@node Languages, Adding character set, Character sets, Localization
|
||||||
@subsection Non-English Error Messages
|
@subsection Non-English Error Messages
|
||||||
@ -46753,6 +46782,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
|
|||||||
|
|
||||||
@itemize @bullet
|
@itemize @bullet
|
||||||
@item
|
@item
|
||||||
|
New character set @code{latin_de} which provides correct German sorting.
|
||||||
|
@item
|
||||||
@code{TRUNCATE TABLE} and @code{DELETE FROM table_name} are now separate
|
@code{TRUNCATE TABLE} and @code{DELETE FROM table_name} are now separate
|
||||||
functions. One bonus is that @code{DELETE FROM table_name} now returns
|
functions. One bonus is that @code{DELETE FROM table_name} now returns
|
||||||
the number of deleted rows.
|
the number of deleted rows.
|
||||||
|
@ -1826,7 +1826,7 @@ CHARSETS_AVAILABLE="big5 cp1251 cp1257
|
|||||||
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
|
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
|
||||||
usa7 win1250 win1251ukr"
|
usa7 win1250 win1251ukr"
|
||||||
CHARSETS_DEPRECATED="win1251"
|
CHARSETS_DEPRECATED="win1251"
|
||||||
DEFAULT_CHARSET=latin1_de
|
DEFAULT_CHARSET=latin1
|
||||||
AC_DIVERT_POP
|
AC_DIVERT_POP
|
||||||
|
|
||||||
AC_ARG_WITH(charset,
|
AC_ARG_WITH(charset,
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
||||||
|
|
||||||
/* Remove all rows from a MyISAM table */
|
/* Remove all rows from a MyISAM table */
|
||||||
/* This only clears the status information; The files are not truncated */
|
/* This only clears the status information and truncates the data file */
|
||||||
|
|
||||||
#include "myisamdef.h"
|
#include "myisamdef.h"
|
||||||
|
|
||||||
@ -50,6 +50,8 @@ int mi_delete_all_rows(MI_INFO *info)
|
|||||||
|
|
||||||
myisam_log_command(MI_LOG_DELETE_ALL,info,(byte*) 0,0,0);
|
myisam_log_command(MI_LOG_DELETE_ALL,info,(byte*) 0,0,0);
|
||||||
VOID(_mi_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
|
VOID(_mi_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
|
||||||
|
if (my_chsize(info->dfile, 0, MYF(MY_WME)))
|
||||||
|
goto err;
|
||||||
allow_break(); /* Allow SIGHUP & SIGINT */
|
allow_break(); /* Allow SIGHUP & SIGINT */
|
||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
|
|
||||||
|
@ -657,19 +657,19 @@ void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos)
|
|||||||
int _mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
|
int _mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
|
||||||
uchar *b, uint b_length, my_bool part_key)
|
uchar *b, uint b_length, my_bool part_key)
|
||||||
{
|
{
|
||||||
uint length= min(a_length,b_length);
|
|
||||||
uchar *end= a+ length;
|
|
||||||
int flag;
|
int flag;
|
||||||
|
|
||||||
#ifdef USE_STRCOLL
|
#ifdef USE_STRCOLL
|
||||||
if (use_strcoll(charset_info))
|
if (use_strcoll(charset_info))
|
||||||
{
|
{
|
||||||
if ((flag = my_strnncoll(charset_info, a, a_length, b, b_length)))
|
/* QQ: This needs to work with part keys at some point */
|
||||||
return flag;
|
return my_strnncoll(charset_info, a, a_length, b, b_length);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
uint length= min(a_length,b_length);
|
||||||
|
uchar *end= a+ length;
|
||||||
uchar *sort_order=charset_info->sort_order;
|
uchar *sort_order=charset_info->sort_order;
|
||||||
while (a < end)
|
while (a < end)
|
||||||
if ((flag= (int) sort_order[*a++] - (int) sort_order[*b++]))
|
if ((flag= (int) sort_order[*a++] - (int) sort_order[*b++]))
|
||||||
@ -768,8 +768,15 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uint length=(uint) (end-a);
|
uint length=(uint) (end-a), a_length=length, b_length=length;
|
||||||
if ((flag=_mi_compare_text(keyseg->charset,a,length,b,length,
|
if (!(nextflag & SEARCH_PREFIX))
|
||||||
|
{
|
||||||
|
while (a_length && a[a_length-1] == ' ')
|
||||||
|
a_length--;
|
||||||
|
while (b_length && b[b_length-1] == ' ')
|
||||||
|
b_length--;
|
||||||
|
}
|
||||||
|
if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
|
||||||
(my_bool) ((nextflag & SEARCH_PREFIX) &&
|
(my_bool) ((nextflag & SEARCH_PREFIX) &&
|
||||||
next_key_length <= 0))))
|
next_key_length <= 0))))
|
||||||
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
|
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
|
||||||
|
168
mysql-test/r/ctype_latin1_de.result
Normal file
168
mysql-test/r/ctype_latin1_de.result
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
a b
|
||||||
|
a 35
|
||||||
|
ac 2
|
||||||
|
ad 4
|
||||||
|
ä 1
|
||||||
|
ae 3
|
||||||
|
ää 31
|
||||||
|
aeae 33
|
||||||
|
ääa 32
|
||||||
|
aeb 6
|
||||||
|
Äc 5
|
||||||
|
eä 28
|
||||||
|
o 37
|
||||||
|
oc 15
|
||||||
|
od 18
|
||||||
|
ö 14
|
||||||
|
oe 17
|
||||||
|
Öa 16
|
||||||
|
oeb 20
|
||||||
|
Öc 19
|
||||||
|
öo 30
|
||||||
|
q 34
|
||||||
|
s 21
|
||||||
|
ss 22
|
||||||
|
ß 23
|
||||||
|
ssa 25
|
||||||
|
ßa 27
|
||||||
|
ßb 24
|
||||||
|
ssc 26
|
||||||
|
u 36
|
||||||
|
uc 8
|
||||||
|
ud 10
|
||||||
|
ue 9
|
||||||
|
Ü 11
|
||||||
|
ueb 12
|
||||||
|
üc 7
|
||||||
|
uf 13
|
||||||
|
uü 29
|
||||||
|
é 38
|
||||||
|
É 39
|
||||||
|
a b
|
||||||
|
a 35
|
||||||
|
ac 2
|
||||||
|
ad 4
|
||||||
|
ä 1
|
||||||
|
ae 3
|
||||||
|
ää 31
|
||||||
|
aeae 33
|
||||||
|
ääa 32
|
||||||
|
aeb 6
|
||||||
|
Äc 5
|
||||||
|
eä 28
|
||||||
|
o 37
|
||||||
|
oc 15
|
||||||
|
od 18
|
||||||
|
ö 14
|
||||||
|
oe 17
|
||||||
|
Öa 16
|
||||||
|
oeb 20
|
||||||
|
Öc 19
|
||||||
|
öo 30
|
||||||
|
q 34
|
||||||
|
s 21
|
||||||
|
ss 22
|
||||||
|
ß 23
|
||||||
|
ssa 25
|
||||||
|
ßa 27
|
||||||
|
ßb 24
|
||||||
|
ssc 26
|
||||||
|
u 36
|
||||||
|
uc 8
|
||||||
|
ud 10
|
||||||
|
ue 9
|
||||||
|
Ü 11
|
||||||
|
ueb 12
|
||||||
|
üc 7
|
||||||
|
uf 13
|
||||||
|
uü 29
|
||||||
|
é 38
|
||||||
|
É 39
|
||||||
|
a
|
||||||
|
É
|
||||||
|
é
|
||||||
|
uü
|
||||||
|
uf
|
||||||
|
üc
|
||||||
|
ueb
|
||||||
|
Ü
|
||||||
|
ue
|
||||||
|
ud
|
||||||
|
uc
|
||||||
|
u
|
||||||
|
ssc
|
||||||
|
ßb
|
||||||
|
ßa
|
||||||
|
ssa
|
||||||
|
ß
|
||||||
|
ss
|
||||||
|
s
|
||||||
|
q
|
||||||
|
öo
|
||||||
|
Öc
|
||||||
|
oeb
|
||||||
|
Öa
|
||||||
|
oe
|
||||||
|
ö
|
||||||
|
od
|
||||||
|
oc
|
||||||
|
o
|
||||||
|
eä
|
||||||
|
Äc
|
||||||
|
aeb
|
||||||
|
ääa
|
||||||
|
aeae
|
||||||
|
ää
|
||||||
|
ae
|
||||||
|
ä
|
||||||
|
ad
|
||||||
|
ac
|
||||||
|
a
|
||||||
|
Table Op Msg_type Msg_text
|
||||||
|
test.t1 check status OK
|
||||||
|
a b
|
||||||
|
Öa 16
|
||||||
|
Öc 19
|
||||||
|
öo 30
|
||||||
|
a b
|
||||||
|
é 38
|
||||||
|
É 39
|
||||||
|
a b
|
||||||
|
a 35
|
||||||
|
ac 2
|
||||||
|
ad 4
|
||||||
|
ae 3
|
||||||
|
aeae 33
|
||||||
|
ääa 32
|
||||||
|
aeb 6
|
||||||
|
Öa 16
|
||||||
|
ssa 25
|
||||||
|
ßa 27
|
||||||
|
a b
|
||||||
|
u 36
|
||||||
|
uc 8
|
||||||
|
ud 10
|
||||||
|
ue 9
|
||||||
|
ueb 12
|
||||||
|
uf 13
|
||||||
|
uü 29
|
||||||
|
a b
|
||||||
|
ss 22
|
||||||
|
ssa 25
|
||||||
|
ssc 26
|
||||||
|
strcmp('ä','ae') strcmp('ae','ä') strcmp('aeq','äq') strcmp('äq','aeq')
|
||||||
|
0 0 0 0
|
||||||
|
strcmp('ss','ß') strcmp('ß','ss') strcmp('ßs','sss') strcmp('ßq','ssq')
|
||||||
|
0 0 0 0
|
||||||
|
strcmp('ä','af') strcmp('a','ä') strcmp('ää','aeq') strcmp('ää','aeaeq')
|
||||||
|
-1 -1 -1 -1
|
||||||
|
strcmp('ss','ßa') strcmp('ß','ssa') strcmp('sßa','sssb') strcmp('s','ß')
|
||||||
|
-1 -1 -1 -1
|
||||||
|
strcmp('ö','oö') strcmp('Ü','uü') strcmp('ö','oeb')
|
||||||
|
-1 -1 -1
|
||||||
|
strcmp('af','ä') strcmp('ä','a') strcmp('aeq','ää') strcmp('aeaeq','ää')
|
||||||
|
1 1 1 1
|
||||||
|
strcmp('ßa','ss') strcmp('ssa','ß') strcmp('sssb','sßa') strcmp('ß','s')
|
||||||
|
1 1 1 1
|
||||||
|
strcmp('u','öa') strcmp('u','ö')
|
||||||
|
1 1
|
1
mysql-test/t/ctype_latin1_de-master.opt
Normal file
1
mysql-test/t/ctype_latin1_de-master.opt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--default-character-set=latin1_de
|
36
mysql-test/t/ctype_latin1_de.test
Normal file
36
mysql-test/t/ctype_latin1_de.test
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#
|
||||||
|
# Test latin_de character set
|
||||||
|
#
|
||||||
|
drop table if exists t1;
|
||||||
|
create table t1 (a char (20) not null, b int not null auto_increment, index (a,b),index(b));
|
||||||
|
insert into t1 (a) values ('ä'),('ac'),('ae'),('ad'),('Äc'),('aeb');
|
||||||
|
insert into t1 (a) values ('üc'),('uc'),('ue'),('ud'),('Ü'),('ueb'),('uf');
|
||||||
|
insert into t1 (a) values ('ö'),('oc'),('Öa'),('oe'),('od'),('Öc'),('oeb');
|
||||||
|
insert into t1 (a) values ('s'),('ss'),('ß'),('ßb'),('ssa'),('ssc'),('ßa');
|
||||||
|
insert into t1 (a) values ('eä'),('uü'),('öo'),('ää'),('ääa'),('aeae');
|
||||||
|
insert into t1 (a) values ('q'),('a'),('u'),('o'),('é'),('É');
|
||||||
|
select a,b from t1 order by a,b;
|
||||||
|
select a,b from t1 order by upper(a),b;
|
||||||
|
select a from t1 order by a desc;
|
||||||
|
check table t1;
|
||||||
|
select * from t1 where a like "ö%";
|
||||||
|
select * from t1 where a like "%É%";
|
||||||
|
select * from t1 where a like "%Á%";
|
||||||
|
select * from t1 where a like "%U%";
|
||||||
|
select * from t1 where a like "%ss%";
|
||||||
|
drop table t1;
|
||||||
|
|
||||||
|
# The following should all be true
|
||||||
|
select strcmp('ä','ae'),strcmp('ae','ä'),strcmp('aeq','äq'),strcmp('äq','aeq');
|
||||||
|
select strcmp('ss','ß'),strcmp('ß','ss'),strcmp('ßs','sss'),strcmp('ßq','ssq');
|
||||||
|
|
||||||
|
# The following should all return -1
|
||||||
|
select strcmp('ä','af'),strcmp('a','ä'),strcmp('ää','aeq'),strcmp('ää','aeaeq');
|
||||||
|
select strcmp('ss','ßa'),strcmp('ß','ssa'),strcmp('sßa','sssb'),strcmp('s','ß');
|
||||||
|
select strcmp('ö','oö'),strcmp('Ü','uü'),strcmp('ö','oeb');
|
||||||
|
|
||||||
|
# The following should all return 1
|
||||||
|
select strcmp('af','ä'),strcmp('ä','a'),strcmp('aeq','ää'),strcmp('aeaeq','ää');
|
||||||
|
select strcmp('ßa','ss'),strcmp('ssa','ß'),strcmp('sssb','sßa'),strcmp('ß','s');
|
||||||
|
select strcmp('u','öa'),strcmp('u','ö');
|
||||||
|
|
@ -254,7 +254,7 @@ longlong Item_func_strcmp::val_int()
|
|||||||
null_value=1;
|
null_value=1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
int value=stringcmp(a,b);
|
int value= binary ? stringcmp(a,b) : sortcmp(a,b);
|
||||||
null_value=0;
|
null_value=0;
|
||||||
return !value ? 0 : (value < 0 ? (longlong) -1 : (longlong) 1);
|
return !value ? 0 : (value < 0 ? (longlong) -1 : (longlong) 1);
|
||||||
}
|
}
|
||||||
|
@ -99,12 +99,10 @@ uchar to_upper_latin1_de[] = {
|
|||||||
* This is a simple latin1 mapping table, which maps all accented
|
* This is a simple latin1 mapping table, which maps all accented
|
||||||
* characters to their non-accented equivalents. Note: in this
|
* characters to their non-accented equivalents. Note: in this
|
||||||
* table, 'ä' is mapped to 'A', 'ÿ' is mapped to 'Y', etc. - all
|
* table, 'ä' is mapped to 'A', 'ÿ' is mapped to 'Y', etc. - all
|
||||||
* accented characters are treated the same way.
|
* accented characters except the following are treated the same way.
|
||||||
*
|
* Ü, ü, É, é, Ö, ö, Ä, ä
|
||||||
* SPECIAL NOTE: 'ß' (the sz ligature), which isn't really an
|
|
||||||
* accented 's', is mapped to 'S', to simplify the sorting
|
|
||||||
* functions.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
uchar sort_order_latin1_de[] = {
|
uchar sort_order_latin1_de[] = {
|
||||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||||
@ -118,10 +116,10 @@ uchar sort_order_latin1_de[] = {
|
|||||||
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
||||||
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
||||||
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
||||||
65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
65, 65, 65, 65,196, 65, 92, 67, 69,201, 69, 69, 73, 73, 73, 73,
|
||||||
68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
|
68, 78, 79, 79, 79, 79,214,215,216, 85, 85, 85,220, 89,222,223,
|
||||||
65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
65, 65, 65, 65,196, 65, 92, 67, 69,201, 69, 69, 73, 73, 73, 73,
|
||||||
68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
|
68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
|
||||||
};
|
};
|
||||||
|
|
||||||
#define L1_AE 196
|
#define L1_AE 196
|
||||||
@ -132,6 +130,39 @@ uchar sort_order_latin1_de[] = {
|
|||||||
#define L1_ue 252
|
#define L1_ue 252
|
||||||
#define L1_ss 223
|
#define L1_ss 223
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Some notes about the following comparison rules:
|
||||||
|
By definition, my_strnncoll_latin_de must works exactly as if had called
|
||||||
|
my_strnxfrm_latin_de() on both strings and compared the result strings.
|
||||||
|
|
||||||
|
This means that:
|
||||||
|
Ä must also matches ÁE and Aè, because my_strxn_frm_latin_de() will convert
|
||||||
|
both to AE.
|
||||||
|
|
||||||
|
The other option would be to not do any accent removal in
|
||||||
|
sort_order_latin_de[] at all
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#define CHECK_S1_COMBO(ch1, ch2, str1, str1_end, res_if_str1_smaller, str2, fst, snd, accent) \
|
||||||
|
/* Invariant: ch1 == fst == sort_order_latin1_de[accent] && ch1 != ch2 */ \
|
||||||
|
if (ch2 != accent) \
|
||||||
|
{ \
|
||||||
|
ch1= fst; \
|
||||||
|
goto normal; \
|
||||||
|
} \
|
||||||
|
if (str1 == str1_end) \
|
||||||
|
return res_if_str1_smaller; \
|
||||||
|
{ \
|
||||||
|
int diff = (int) sort_order_latin1_de[*str1] - snd; \
|
||||||
|
if (diff) \
|
||||||
|
return diff*(-(res_if_str1_smaller)); \
|
||||||
|
/* They are equal (e.g., "Ae" == 'ä') */ \
|
||||||
|
str1++; \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int my_strnncoll_latin1_de(const uchar * s1, int len1,
|
int my_strnncoll_latin1_de(const uchar * s1, int len1,
|
||||||
const uchar * s2, int len2)
|
const uchar * s2, int len2)
|
||||||
{
|
{
|
||||||
@ -140,172 +171,71 @@ int my_strnncoll_latin1_de(const uchar * s1, int len1,
|
|||||||
|
|
||||||
while (s1 < e1 && s2 < e2)
|
while (s1 < e1 && s2 < e2)
|
||||||
{
|
{
|
||||||
/* to_upper is used instead of sort_order, because we don't want
|
/*
|
||||||
* 'Ä' to match "ÁE", only "AE". This couples the to_upper and
|
Because sort_order_latin1_de doesn't convert 'Ä', Ü or ß we
|
||||||
* sort_order tables together, but that is acceptable. */
|
can use it here.
|
||||||
uchar c1 = to_upper_latin1_de[*s1];
|
*/
|
||||||
uchar c2 = to_upper_latin1_de[*s2];
|
uchar c1 = sort_order_latin1_de[*s1++];
|
||||||
|
uchar c2 = sort_order_latin1_de[*s2++];
|
||||||
if (c1 != c2)
|
if (c1 != c2)
|
||||||
{
|
{
|
||||||
switch (c1)
|
switch (c1) {
|
||||||
{
|
|
||||||
|
|
||||||
#define CHECK_S1_COMBO(fst, snd, accent) \
|
|
||||||
/* Invariant: c1 == fst == sort_order_latin1_de[accent] && c1 != c2 */ \
|
|
||||||
if (c2 == accent) \
|
|
||||||
{ \
|
|
||||||
if (s1 + 1 < e1) \
|
|
||||||
{ \
|
|
||||||
if (to_upper_latin1_de[*(s1 + 1)] == snd) \
|
|
||||||
{ \
|
|
||||||
/* They are equal (e.g., "Ae" == 'ä') */ \
|
|
||||||
s1 += 2; \
|
|
||||||
s2 += 1; \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
int diff = sort_order_latin1_de[*(s1 + 1)] - snd; \
|
|
||||||
if (diff) \
|
|
||||||
return diff; \
|
|
||||||
else \
|
|
||||||
/* Comparison between, e.g., "AÉ" and 'Ä' */ \
|
|
||||||
return 1; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
return -1; \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
/* The following should work even if c2 is [ÄÖÜß] */ \
|
|
||||||
return fst - sort_order_latin1_de[c2]
|
|
||||||
|
|
||||||
case 'A':
|
case 'A':
|
||||||
CHECK_S1_COMBO('A', 'E', L1_AE);
|
CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'A', 'E', L1_AE);
|
||||||
break;
|
break;
|
||||||
case 'O':
|
case 'O':
|
||||||
CHECK_S1_COMBO('O', 'E', L1_OE);
|
CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'O', 'E', L1_OE);
|
||||||
break;
|
break;
|
||||||
case 'U':
|
case 'U':
|
||||||
CHECK_S1_COMBO('U', 'E', L1_UE);
|
CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'U', 'E', L1_UE);
|
||||||
break;
|
break;
|
||||||
case 'S':
|
case 'S':
|
||||||
CHECK_S1_COMBO('S', 'S', L1_ss);
|
CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'S', 'S', L1_ss);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#define CHECK_S2_COMBO(fst, snd) \
|
|
||||||
/* Invariant: sort_order_latin1_de[c1] == fst && c1 != c2 */ \
|
|
||||||
if (c2 == fst) \
|
|
||||||
{ \
|
|
||||||
if (s2 + 1 < e2) \
|
|
||||||
{ \
|
|
||||||
if (to_upper_latin1_de[*(s2 + 1)] == snd) \
|
|
||||||
{ \
|
|
||||||
/* They are equal (e.g., 'ä' == "Ae") */ \
|
|
||||||
s1 += 1; \
|
|
||||||
s2 += 2; \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
int diff = sort_order_latin1_de[*(s1 + 1)] - snd; \
|
|
||||||
if (diff) \
|
|
||||||
return diff; \
|
|
||||||
else \
|
|
||||||
/* Comparison between, e.g., 'Ä' and "AÉ" */ \
|
|
||||||
return -1; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
return 1; \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
/* The following should work even if c2 is [ÄÖÜß] */ \
|
|
||||||
return fst - sort_order_latin1_de[c2]
|
|
||||||
|
|
||||||
case L1_AE:
|
case L1_AE:
|
||||||
CHECK_S2_COMBO('A', 'E');
|
CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'A', 'E', 'A');
|
||||||
break;
|
break;
|
||||||
case L1_OE:
|
case L1_OE:
|
||||||
CHECK_S2_COMBO('O', 'E');
|
CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'O', 'E', 'O');
|
||||||
break;
|
break;
|
||||||
case L1_UE:
|
case L1_UE:
|
||||||
CHECK_S2_COMBO('U', 'E');
|
CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'U', 'E', 'U');
|
||||||
break;
|
break;
|
||||||
case L1_ss:
|
case L1_ss:
|
||||||
CHECK_S2_COMBO('S', 'S');
|
CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'S', 'S', 'S');
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
/*
|
||||||
|
Handle the case where 'c2' is a special character
|
||||||
|
If this is true, we know that c1 can't match this character.
|
||||||
|
*/
|
||||||
|
normal:
|
||||||
switch (c2) {
|
switch (c2) {
|
||||||
case L1_AE:
|
case L1_AE:
|
||||||
|
return (int) c1 - (int) 'A';
|
||||||
case L1_OE:
|
case L1_OE:
|
||||||
|
return (int) c1 - (int) 'O';
|
||||||
case L1_UE:
|
case L1_UE:
|
||||||
|
return (int) c1 - (int) 'U';
|
||||||
case L1_ss:
|
case L1_ss:
|
||||||
/* Make sure these do not match (e.g., "Ä" != "Á") */
|
return (int) c1 - (int) 'S';
|
||||||
return sort_order_latin1_de[c1] - sort_order_latin1_de[c2];
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
if (sort_order_latin1_de[*s1] != sort_order_latin1_de[*s2])
|
|
||||||
return sort_order_latin1_de[*s1] - sort_order_latin1_de[*s2];
|
|
||||||
++s1;
|
|
||||||
++s2;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
#undef CHECK_S1_COMBO
|
|
||||||
#undef CHECK_S2_COMBO
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
/* In order to consistently treat "ae" == 'ä', but to NOT allow
|
int diff= (int) c1 - (int) c2;
|
||||||
* "aé" == 'ä', we must look ahead here to ensure that the second
|
if (diff)
|
||||||
* letter in a combo really is the unaccented 'e' (or 's' for
|
return diff;
|
||||||
* "ss") and is not an accented character with the same sort_order. */
|
}
|
||||||
++s1;
|
|
||||||
++s2;
|
|
||||||
if (s1 < e1 && s2 < e2)
|
|
||||||
{
|
|
||||||
switch (c1)
|
|
||||||
{
|
|
||||||
case 'A':
|
|
||||||
case 'O':
|
|
||||||
case 'U':
|
|
||||||
if (sort_order_latin1_de[*s1] == 'E' &&
|
|
||||||
to_upper_latin1_de[*s1] != 'E' &&
|
|
||||||
to_upper_latin1_de[*s2] == 'E')
|
|
||||||
/* Comparison between, e.g., "AÉ" and "AE" */
|
|
||||||
return 1;
|
|
||||||
if (sort_order_latin1_de[*s2] == 'E' &&
|
|
||||||
to_upper_latin1_de[*s2] != 'E' &&
|
|
||||||
to_upper_latin1_de[*s1] == 'E')
|
|
||||||
/* Comparison between, e.g., "AE" and "AÉ" */
|
|
||||||
return -1;
|
|
||||||
break;
|
|
||||||
case 'S':
|
|
||||||
if (sort_order_latin1_de[*s1] == 'S' &&
|
|
||||||
to_upper_latin1_de[*s1] != 'S' &&
|
|
||||||
to_upper_latin1_de[*s2] == 'S')
|
|
||||||
/* Comparison between, e.g., "Sß" and "SS" */
|
|
||||||
return 1;
|
|
||||||
if (sort_order_latin1_de[*s2] == 'S' &&
|
|
||||||
to_upper_latin1_de[*s2] != 'S' &&
|
|
||||||
to_upper_latin1_de[*s1] == 'S')
|
|
||||||
/* Comparison between, e.g., "SS" and "Sß" */
|
|
||||||
return -1;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* A simple test of string lengths won't work -- we test to see
|
/* A simple test of string lengths won't work -- we test to see
|
||||||
* which string ran out first */
|
* which string ran out first */
|
||||||
return s1 < e1 ? 1 : s2 < e2 ? -1 : 0;
|
return s1 < e1 ? 1 : s2 < e2 ? -1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
|
int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
|
||||||
{
|
{
|
||||||
const uchar *dest_orig = dest;
|
const uchar *dest_orig = dest;
|
||||||
@ -313,22 +243,19 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
|
|||||||
const uchar *se = src + srclen;
|
const uchar *se = src + srclen;
|
||||||
while (src < se && dest < de)
|
while (src < se && dest < de)
|
||||||
{
|
{
|
||||||
switch (*src)
|
uchar chr=sort_order_latin1_de[*src];
|
||||||
{
|
switch (chr) {
|
||||||
case L1_AE:
|
case L1_AE:
|
||||||
case L1_ae:
|
|
||||||
*dest++ = 'A';
|
*dest++ = 'A';
|
||||||
if (dest < de)
|
if (dest < de)
|
||||||
*dest++ = 'E';
|
*dest++ = 'E';
|
||||||
break;
|
break;
|
||||||
case L1_OE:
|
case L1_OE:
|
||||||
case L1_oe:
|
|
||||||
*dest++ = 'O';
|
*dest++ = 'O';
|
||||||
if (dest < de)
|
if (dest < de)
|
||||||
*dest++ = 'E';
|
*dest++ = 'E';
|
||||||
break;
|
break;
|
||||||
case L1_UE:
|
case L1_UE:
|
||||||
case L1_ue:
|
|
||||||
*dest++ = 'U';
|
*dest++ = 'U';
|
||||||
if (dest < de)
|
if (dest < de)
|
||||||
*dest++ = 'E';
|
*dest++ = 'E';
|
||||||
@ -339,7 +266,7 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
|
|||||||
*dest++ = 'S';
|
*dest++ = 'S';
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
*dest++ = sort_order_latin1_de[*src];
|
*dest++= chr;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
++src;
|
++src;
|
||||||
@ -347,6 +274,7 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
|
|||||||
return dest - dest_orig;
|
return dest - dest_orig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int my_strcoll_latin1_de(const uchar * s1, const uchar * s2)
|
int my_strcoll_latin1_de(const uchar * s1, const uchar * s2)
|
||||||
{
|
{
|
||||||
/* XXX QQ: This should be fixed to not call strlen */
|
/* XXX QQ: This should be fixed to not call strlen */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user