MDEV-4928 Merge collation customization improvements

Merging the following MySQL-5.6 changes:
- WL#5624: Collation customization improvements
  http://dev.mysql.com/worklog/task/?id=5624

- WL#4013: Unicode german2 collation
  http://dev.mysql.com/worklog/task/?id=4013

- Bug#62429 XML: ExtractValue, UpdateXML max arg length 127 chars
  http://bugs.mysql.com/bug.php?id=62429
  (required by WL#5624)
This commit is contained in:
Alexander Barkov 2013-10-02 15:04:07 +04:00
parent 9538bbfce9
commit 0b6c4bb34f
42 changed files with 5823 additions and 1715 deletions

View File

@ -23,6 +23,12 @@
#include <my_attribute.h>
#include "my_global.h" /* uint16, uchar */
enum loglevel {
ERROR_LEVEL= 0,
WARNING_LEVEL= 1,
INFORMATION_LEVEL= 2
};
#ifdef __cplusplus
extern "C" {
#endif
@ -61,24 +67,35 @@ typedef const struct unicase_info_st MY_UNICASE_INFO;
typedef const struct uni_ctype_st MY_UNI_CTYPE;
typedef const struct my_uni_idx_st MY_UNI_IDX;
struct unicase_info_st
typedef struct unicase_info_char_st
{
uint32 toupper;
uint32 tolower;
uint32 sort;
} MY_UNICASE_CHARACTER;
struct unicase_info_st
{
my_wc_t maxchar;
MY_UNICASE_CHARACTER **page;
};
extern MY_UNICASE_INFO *const my_unicase_default[256];
extern MY_UNICASE_INFO *const my_unicase_turkish[256];
extern MY_UNICASE_INFO *const my_unicase_mysql500[256];
#define MY_UCA_MAX_CONTRACTION 4
extern MY_UNICASE_INFO my_unicase_default;
extern MY_UNICASE_INFO my_unicase_turkish;
extern MY_UNICASE_INFO my_unicase_mysql500;
extern MY_UNICASE_INFO my_unicase_unicode520;
#define MY_UCA_MAX_CONTRACTION 6
#define MY_UCA_MAX_WEIGHT_SIZE 8
#define MY_UCA_WEIGHT_LEVELS 1
typedef struct my_contraction_t
{
my_wc_t ch[MY_UCA_MAX_CONTRACTION]; /* Character sequence */
uint16 weight[MY_UCA_MAX_WEIGHT_SIZE];/* Its weight string, 0-terminated */
my_bool with_context;
} MY_CONTRACTION;
@ -89,6 +106,46 @@ typedef struct my_contraction_list_t
char *flags; /* Character flags, e.g. "is contraction head") */
} MY_CONTRACTIONS;
my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc);
my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc);
uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
my_wc_t wc1, my_wc_t wc2);
/* Collation weights on a single level (e.g. primary, secondary, tertiarty) */
typedef struct my_uca_level_info_st
{
my_wc_t maxchar;
uchar *lengths;
uint16 **weights;
MY_CONTRACTIONS contractions;
} MY_UCA_WEIGHT_LEVEL;
typedef struct uca_info_st
{
MY_UCA_WEIGHT_LEVEL level[MY_UCA_WEIGHT_LEVELS];
/* Logical positions */
my_wc_t first_non_ignorable;
my_wc_t last_non_ignorable;
my_wc_t first_primary_ignorable;
my_wc_t last_primary_ignorable;
my_wc_t first_secondary_ignorable;
my_wc_t last_secondary_ignorable;
my_wc_t first_tertiary_ignorable;
my_wc_t last_tertiary_ignorable;
my_wc_t first_trailing;
my_wc_t last_trailing;
my_wc_t first_variable;
my_wc_t last_variable;
} MY_UCA_INFO;
extern MY_UCA_INFO my_uca_v400;
struct uni_ctype_st
{
@ -122,7 +179,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_CS_BINSORT 16 /* if binary sort order */
#define MY_CS_PRIMARY 32 /* if primary collation */
#define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
#define MY_CS_UNICODE 128 /* is a charset is full unicode */
#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */
#define MY_CS_READY 256 /* if a charset is initialized */
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
@ -130,6 +187,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */
#define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
#define MY_CS_LOWER_SORT 32768 /* If use lower case as weight */
#define MY_CHARSET_UNDEFINED 0
/* Character repertoire flags */
@ -202,13 +260,24 @@ enum my_lex_states
struct charset_info_st;
typedef struct my_charset_loader_st
{
char error[128];
void *(*once_alloc)(size_t);
void *(*malloc)(size_t);
void *(*realloc)(void *, size_t);
void (*free)(void *);
void (*reporter)(enum loglevel, const char *format, ...);
int (*add_collation)(struct charset_info_st *cs);
} MY_CHARSET_LOADER;
extern int (*my_string_stack_guard)(int);
/* See strings/CHARSET_INFO.txt for information about this structure */
struct my_collation_handler_st
{
my_bool (*init)(struct charset_info_st *, void *(*alloc)(size_t));
my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *);
/* Collation routines */
int (*strnncoll)(CHARSET_INFO *,
const uchar *, size_t, const uchar *, size_t, my_bool);
@ -259,7 +328,7 @@ typedef size_t (*my_charset_conv_case)(CHARSET_INFO *,
/* See strings/CHARSET_INFO.txt about information on this structure */
struct my_charset_handler_st
{
my_bool (*init)(struct charset_info_st *, void *(*alloc)(size_t));
my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader);
/* Multibyte routines */
uint (*ismbchar)(CHARSET_INFO *, const char *, const char *);
uint (*mbcharlen)(CHARSET_INFO *, uint c);
@ -322,6 +391,13 @@ struct my_charset_handler_st
extern MY_CHARSET_HANDLER my_charset_8bit_handler;
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
/*
We define this CHARSET_INFO_DEFINED here to prevent a repeat of the
typedef in hash.c, which will cause a compiler error.
*/
#define CHARSET_INFO_DEFINED
/* See strings/CHARSET_INFO.txt about information on this structure */
struct charset_info_st
{
@ -337,11 +413,10 @@ struct charset_info_st
const uchar *to_lower;
const uchar *to_upper;
const uchar *sort_order;
const MY_CONTRACTIONS *contractions;
const uint16 *const *sort_order_big;
MY_UCA_INFO *uca;
const uint16 *tab_to_uni;
MY_UNI_IDX *tab_from_uni;
MY_UNICASE_INFO *const *caseinfo;
MY_UNI_IDX *tab_from_uni;
MY_UNICASE_INFO *caseinfo;
const uchar *state_map;
const uchar *ident_map;
uint strxfrm_multiply;
@ -349,8 +424,8 @@ struct charset_info_st
uchar casedn_multiply;
uint mbminlen;
uint mbmaxlen;
uint16 min_sort_char;
uint16 max_sort_char; /* For LIKE optimization */
my_wc_t min_sort_char;
my_wc_t max_sort_char; /* For LIKE optimization */
uchar pad_char;
my_bool escape_with_backslash_is_dangerous;
@ -600,10 +675,10 @@ int my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr, const char *wildend,
int escape, int w_one, int w_many,
MY_UNICASE_INFO *const *weights);
MY_UNICASE_INFO *weights);
extern my_bool my_parse_charset_xml(const char *bug, size_t len,
int (*add)(struct charset_info_st *cs));
extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader,
const char *buf, size_t buflen);
extern char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end,
pchar c);
extern size_t my_strcspn(CHARSET_INFO *cs, const char *str, const char *end,
@ -620,6 +695,9 @@ uint my_charset_repertoire(CHARSET_INFO *cs);
my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs);
const MY_CONTRACTIONS *my_charset_get_contractions(const CHARSET_INFO *cs,
int level);
extern size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
const char* fmt, va_list ap);

View File

@ -73,10 +73,12 @@ extern "C" {
#endif
/*
my_str_malloc() and my_str_free() are assigned to implementations in
strings/alloc.c, but can be overridden in the calling program.
my_str_malloc(), my_str_realloc() and my_str_free() are assigned to
implementations in strings/alloc.c, but can be overridden in
the calling program.
*/
extern void *(*my_str_malloc)(size_t);
extern void *(*my_str_realloc)(void *, size_t);
extern void (*my_str_free)(void *);
#if defined(HAVE_STPCPY) && MY_GNUC_PREREQ(3, 4) && !defined(__INTEL_COMPILER)

View File

@ -271,12 +271,6 @@ extern char wild_many,wild_one,wild_prefix;
extern const char *charsets_dir;
extern my_bool timed_mutexes;
enum loglevel {
ERROR_LEVEL,
WARNING_LEVEL,
INFORMATION_LEVEL
};
enum cache_type
{
TYPE_NOT_SET= 0, READ_CACHE, WRITE_CACHE,
@ -947,15 +941,20 @@ void my_uuid2str(const uchar *guid, char *s);
void my_uuid_end();
/* character sets */
extern void my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader);
extern uint get_charset_number(const char *cs_name, uint cs_flags);
extern uint get_collation_number(const char *name);
extern const char *get_charset_name(uint cs_number);
extern CHARSET_INFO *get_charset(uint cs_number, myf flags);
extern CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags);
extern CHARSET_INFO *my_collation_get_by_name(MY_CHARSET_LOADER *loader,
const char *name, myf flags);
extern CHARSET_INFO *get_charset_by_csname(const char *cs_name,
uint cs_flags, myf my_flags);
extern CHARSET_INFO *my_charset_get_by_name(MY_CHARSET_LOADER *loader,
const char *name,
uint cs_flags, myf my_flags);
extern my_bool resolve_charset(const char *cs_name,
CHARSET_INFO *default_cs,
CHARSET_INFO **cs);

View File

@ -52,8 +52,15 @@ typedef struct xml_stack_st
int flags;
enum my_xml_node_type current_node_type;
char errstr[128];
char attr[128];
char *attrend;
struct {
char static_buffer[128];
char *buffer;
size_t buffer_size;
char *start;
char *end;
} attr;
const char *beg;
const char *cur;
const char *end;

View File

@ -411,10 +411,19 @@ select * from information_schema.collations where id>256 order by id;
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
utf8mb4_test_ci utf8mb4 326 8
utf16_test_ci utf16 327 8
utf8mb4_test_400_ci utf8mb4 328 8
utf8_bengali_standard_ci utf8 336 8
utf8_bengali_traditional_ci utf8 337 8
utf8_phone_ci utf8 352 8
utf8_test_ci utf8 353 8
utf8_5624_1 utf8 354 8
utf8_5624_2 utf8 355 8
utf8_5624_3 utf8 356 8
utf8_5624_4 utf8 357 8
ucs2_test_ci ucs2 358 8
ucs2_vn_ci ucs2 359 8
ucs2_5624_1 ucs2 360 8
utf8_5624_5 utf8 368 8
utf32_test_ci utf32 391 8
utf8_maxuserid_ci utf8 2047 8
show collation like '%test%';
@ -423,6 +432,7 @@ latin1_test latin1 99 Yes 1
utf8_test_ci utf8 353 8
ucs2_test_ci ucs2 358 8
utf8mb4_test_ci utf8mb4 326 8
utf8mb4_test_400_ci utf8mb4 328 8
utf16_test_ci utf16 327 8
utf32_test_ci utf32 391 8
show collation like 'ucs2_vn_ci';
@ -449,3 +459,631 @@ SHOW COLLATION LIKE 'utf8_phone_ci';
Collation Charset Id Default Compiled Sortlen
utf8_phone_ci utf8 352 8
SET NAMES utf8;
SELECT hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a));
hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_400_ci) hex(lower(@a))
F0909080 F0909080
SELECT hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a));
hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_400_ci) hex(upper(@a))
F09090A8 F09090A8
SELECT hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a));
hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_400_ci) hex(lower(@a))
E2B080 E2B080
SELECT hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a));
hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_400_ci) hex(upper(@a))
E2B0B0 E2B0B0
#
# WL#5624 Collation customization improvements
#
SET NAMES utf8 COLLATE utf8_5624_1;
CREATE TABLE t1 AS SELECT REPEAT(' ', 16) AS a LIMIT 0;
INSERT INTO t1 VALUES ('012345'),('001234'),('000123'),('000012'),('000001');
INSERT INTO t1 VALUES ('12345'),('01234'),('00123'),('00012'),('00001');
INSERT INTO t1 VALUES ('1234'),('0123'),('0012'),('0001');
INSERT INTO t1 VALUES ('123'),('012'),('001');
INSERT INTO t1 VALUES ('12'),('01');
INSERT INTO t1 VALUES ('1'),('9');
INSERT INTO t1 VALUES ('ГАИ'),('ГИБДД');
INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e');
INSERT INTO t1 VALUES ('cz'),('Ċ'),('ċ');
INSERT INTO t1 VALUES ('f'),('fz'),('g'),('Ġ'),('ġ');
INSERT INTO t1 VALUES ('h'),('hz'),('GĦ'),('Għ'),('gĦ'),('għ');
INSERT INTO t1 VALUES ('i'),('iz'),('Ħ'),('ħ');
INSERT INTO t1 VALUES ('y'),('yz'),('z'),('Ż'),('ż');
INSERT INTO t1 VALUES ('ā'),('Ā'),('á'),('Á'),('à'),('À');
INSERT INTO t1 VALUES ('ē'),('é'),('ě'),('ê'),('Ē'),('É'),('Ě'),('Ê');
INSERT INTO t1 VALUES ('a'),('~'),('!'),('@'),('#'),('$'),('%'),('^');
INSERT INTO t1 VALUES ('('),(')'),('-'),('+'),('|'),('='),(':'),(';');
INSERT INTO t1 VALUES ('"'),('\''),('?');
INSERT INTO t1 VALUES ('ch'),('k'),('cs'),('ccs'),('cscs');
INSERT INTO t1 VALUES ('aa-'),('ab-'),('ac-'),('ad-'),('ae-'),('af-'),('az-');
INSERT INTO t1 VALUES ('lp-fni'),('lp-lni');
INSERT INTO t1 VALUES ('lp-fpi'),('lp-lpi');
INSERT INTO t1 VALUES ('lp-fsi'),('lp-lsi');
INSERT INTO t1 VALUES ('lp-fti'),('lp-lti');
INSERT INTO t1 VALUES ('lp-ft'),('lp-lt');
INSERT INTO t1 VALUES ('lp-fv'),('lp-lv');
INSERT INTO t1 VALUES ('lb-fni'),('lb-lni');
INSERT INTO t1 VALUES ('lb-fv'),('lb-lv');
INSERT INTO t1 VALUES (_ucs2 0x3106),(_ucs2 0x3110), (_ucs2 0x3111), (_ucs2 0x3112);
INSERT INTO t1 VALUES (_ucs2 0x32A3), (_ucs2 0x3231);
INSERT INTO t1 VALUES (_ucs2 0x84D9), (_ucs2 0x98F5), (_ucs2 0x7CF3), (_ucs2 0x5497);
SELECT a FROM t1 ORDER BY a, LENGTH(a), BINARY a;
a
lp-ft
lp-lt
lp-fpi
lp-fsi
lp-fti
lp-lpi
lp-lsi
lp-lti
lb-fv
lb-fni
lp-fv
lp-fni
-
=
|
lb-lv
lp-lv
1
01
001
0001
00001
000001
12
012
0012
00012
000012
123
0123
00123
000123
1234
01234
001234
12345
012345
9
~
!
@
#
$
%
^
(
)
+
:
;
"
'
?
a
a
aa-
ab-
ac-
ad-
ae-
af-
az-
b
À
Á
à
á
Ā
ā
c
k
ch
cs
ccs
cscs
cz
Ċ
ċ
d
É
Ê
é
ê
Ē
ē
Ě
ě
e
f
fz
Ġ
ġ
g
h
hz
Ħ
ħ
i
iz
y
yz
Ż
ż
z
ГАИ
ГИБДД
lb-lni
lp-lni
#
# WL#5624, the same test with UCS2
#
ALTER TABLE t1 CONVERT TO CHARACTER SET ucs2 COLLATE ucs2_5624_1;
SELECT a FROM t1 ORDER BY a, LENGTH(a), BINARY(a);
a
lp-ft
lp-lt
lp-fpi
lp-fsi
lp-fti
lp-lpi
lp-lsi
lp-lti
lb-fv
lb-fni
lp-fv
lp-fni
-
=
|
lb-lv
lp-lv
1
01
001
0001
00001
000001
12
012
0012
00012
000012
123
0123
00123
000123
1234
01234
001234
12345
012345
9
~
!
@
#
$
%
^
(
)
+
:
;
"
'
?
a
a
aa-
ab-
ac-
ad-
ae-
af-
az-
b
À
Á
à
á
Ā
ā
c
k
ch
cs
ccs
cscs
cz
Ċ
ċ
d
É
Ê
é
ê
Ē
ē
Ě
ě
e
f
fz
Ġ
ġ
g
h
hz
Ħ
ħ
i
iz
y
yz
Ż
ż
z
ГАИ
ГИБДД
lb-lni
lp-lni
DROP TABLE t1;
#
# WL#5624, unsupported features
#
SET NAMES utf8 COLLATE utf8_5624_2;
ERROR HY000: Unknown collation: 'utf8_5624_2'
SHOW WARNINGS;
Level Code Message
Error 1273 Unknown collation: 'utf8_5624_2'
Warning 1273 Syntax error at '[strength tertiary]'
#
# WL#5624, reset before primary ignorable
#
SET NAMES utf8 COLLATE utf8_5624_3;
ERROR HY000: Unknown collation: 'utf8_5624_3'
SHOW WARNINGS;
Level Code Message
Error 1273 Unknown collation: 'utf8_5624_3'
Warning 1273 Can't reset before a primary ignorable character U+A48C
#
# WL#5624, \u without hex digits is equal to {'\', 'u'}
#
SET NAMES utf8 COLLATE utf8_5624_4;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES ('\\'),('u'),('x'),('X');
SELECT a FROM t1 ORDER BY a, LENGTH(a), BINARY(a);
a
\
x
u
X
DROP TABLE t1;
#
# WL#5624, testing Bengali collations
#
SET NAMES utf8, collation_connection=utf8_bengali_standard_ci;
CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES (_ucs2 0x09FA), (_ucs2 0x09F8), (_ucs2 0x09F9), (_ucs2 0x09F2);
INSERT INTO t1 VALUES (_ucs2 0x09DC), (_ucs2 0x09A109BC);
INSERT INTO t1 VALUES (_ucs2 0x09A2), (_ucs2 0x09DD), (_ucs2 0x09A209BC);
INSERT INTO t1 VALUES (_ucs2 0x09A3);
SELECT HEX(CONVERT(a USING ucs2)), HEX(a)
FROM t1 ORDER BY a, BINARY a;
HEX(CONVERT(a USING ucs2)) HEX(a)
09FA E0A7BA
09F8 E0A7B8
09F9 E0A7B9
09F2 E0A7B2
09A109BC E0A6A1E0A6BC
09DC E0A79C
09A2 E0A6A2
09A209BC E0A6A2E0A6BC
09DD E0A79D
09A3 E0A6A3
DROP TABLE t1;
SET NAMES utf8, collation_connection=utf8_bengali_traditional_ci;
CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES
(_ucs2 0x0985),(_ucs2 0x0986),(_ucs2 0x0987),(_ucs2 0x0988),
(_ucs2 0x0989),(_ucs2 0x098A),(_ucs2 0x098B),(_ucs2 0x09E0),
(_ucs2 0x098C),(_ucs2 0x09E1),(_ucs2 0x098F),(_ucs2 0x0990),
(_ucs2 0x0993);
INSERT INTO t1 VALUES
(_ucs2 0x0994),(_ucs2 0x0982),(_ucs2 0x0983),(_ucs2 0x0981),
(_ucs2 0x099509CD), (_ucs2 0x099609CD), (_ucs2 0x099709CD), (_ucs2 0x099809CD),
(_ucs2 0x099909CD), (_ucs2 0x099A09CD), (_ucs2 0x099B09CD), (_ucs2 0x099C09CD),
(_ucs2 0x099D09CD), (_ucs2 0x099E09CD), (_ucs2 0x099F09CD), (_ucs2 0x09A009CD),
(_ucs2 0x09A109CD), (_ucs2 0x09A209CD), (_ucs2 0x09A309CD),
(_ucs2 0x09CE), (_ucs2 0x09A409CD200D), (_ucs2 0x09A409CD),
(_ucs2 0x09A509CD),(_ucs2 0x09A609CD),
(_ucs2 0x09A709CD), (_ucs2 0x09A809CD), (_ucs2 0x09AA09CD), (_ucs2 0x09AB09CD),
(_ucs2 0x09AC09CD), (_ucs2 0x09AD09CD), (_ucs2 0x09AE09CD), (_ucs2 0x09AF09CD),
(_ucs2 0x09B009CD), (_ucs2 0x09F009CD), (_ucs2 0x09B209CD), (_ucs2 0x09F109CD),
(_ucs2 0x09B609CD), (_ucs2 0x09B709CD), (_ucs2 0x09B809CD), (_ucs2 0x09B909CD);
INSERT INTO t1 VALUES
(_ucs2 0x099509CD0985),(_ucs2 0x0995),
(_ucs2 0x099509CD0986),(_ucs2 0x099509BE),
(_ucs2 0x099509CD0987),(_ucs2 0x099509BF),
(_ucs2 0x099509CD0988),(_ucs2 0x099509C0),
(_ucs2 0x099509CD0989),(_ucs2 0x099509C1),
(_ucs2 0x099509CD098A),(_ucs2 0x099509C2),
(_ucs2 0x099509CD098B),(_ucs2 0x099509C3),
(_ucs2 0x099509CD09E0),(_ucs2 0x099509C4),
(_ucs2 0x099509CD098C),(_ucs2 0x099509E2),
(_ucs2 0x099509CD09E1),(_ucs2 0x099509E3),
(_ucs2 0x099509CD098F),(_ucs2 0x099509C7),
(_ucs2 0x099509CD0990),(_ucs2 0x099509C8),
(_ucs2 0x099509CD0993),(_ucs2 0x099509CB),
(_ucs2 0x099509CD0994),(_ucs2 0x099509CC);
SELECT HEX(CONVERT(a USING ucs2)), HEX(a)
FROM t1 ORDER BY a, BINARY(a);
HEX(CONVERT(a USING ucs2)) HEX(a)
0985 E0A685
0986 E0A686
0987 E0A687
0988 E0A688
0989 E0A689
098A E0A68A
098B E0A68B
09E0 E0A7A0
098C E0A68C
09E1 E0A7A1
098F E0A68F
0990 E0A690
0993 E0A693
0994 E0A694
0982 E0A682
0983 E0A683
0981 E0A681
099509CD E0A695E0A78D
0995 E0A695
099509CD0985 E0A695E0A78DE0A685
099509BE E0A695E0A6BE
099509CD0986 E0A695E0A78DE0A686
099509BF E0A695E0A6BF
099509CD0987 E0A695E0A78DE0A687
099509C0 E0A695E0A780
099509CD0988 E0A695E0A78DE0A688
099509C1 E0A695E0A781
099509CD0989 E0A695E0A78DE0A689
099509C2 E0A695E0A782
099509CD098A E0A695E0A78DE0A68A
099509C3 E0A695E0A783
099509CD098B E0A695E0A78DE0A68B
099509C4 E0A695E0A784
099509CD09E0 E0A695E0A78DE0A7A0
099509CD098C E0A695E0A78DE0A68C
099509E2 E0A695E0A7A2
099509CD09E1 E0A695E0A78DE0A7A1
099509E3 E0A695E0A7A3
099509C7 E0A695E0A787
099509CD098F E0A695E0A78DE0A68F
099509C8 E0A695E0A788
099509CD0990 E0A695E0A78DE0A690
099509CB E0A695E0A78B
099509CD0993 E0A695E0A78DE0A693
099509CC E0A695E0A78C
099509CD0994 E0A695E0A78DE0A694
099609CD E0A696E0A78D
099709CD E0A697E0A78D
099809CD E0A698E0A78D
099909CD E0A699E0A78D
099A09CD E0A69AE0A78D
099B09CD E0A69BE0A78D
099C09CD E0A69CE0A78D
099D09CD E0A69DE0A78D
099E09CD E0A69EE0A78D
099F09CD E0A69FE0A78D
09A009CD E0A6A0E0A78D
09A109CD E0A6A1E0A78D
09A209CD E0A6A2E0A78D
09A309CD E0A6A3E0A78D
09A409CD E0A6A4E0A78D
09A409CD200D E0A6A4E0A78DE2808D
09CE E0A78E
09A509CD E0A6A5E0A78D
09A609CD E0A6A6E0A78D
09A709CD E0A6A7E0A78D
09A809CD E0A6A8E0A78D
09AA09CD E0A6AAE0A78D
09AB09CD E0A6ABE0A78D
09AC09CD E0A6ACE0A78D
09AD09CD E0A6ADE0A78D
09AE09CD E0A6AEE0A78D
09AF09CD E0A6AFE0A78D
09B009CD E0A6B0E0A78D
09F009CD E0A7B0E0A78D
09B209CD E0A6B2E0A78D
09F109CD E0A7B1E0A78D
09B609CD E0A6B6E0A78D
09B709CD E0A6B7E0A78D
09B809CD E0A6B8E0A78D
09B909CD E0A6B9E0A78D
SELECT
GROUP_CONCAT(HEX(CONVERT(a USING ucs2)) ORDER BY LENGTH(a), BINARY a)
FROM t1 GROUP BY a ORDER BY a;
GROUP_CONCAT(HEX(CONVERT(a USING ucs2)) ORDER BY LENGTH(a), BINARY a)
0985
0986
0987
0988
0989
098A
098B
09E0
098C
09E1
098F
0990
0993
0994
0982
0983
0981
099509CD
0995,099509CD0985
099509BE,099509CD0986
099509BF,099509CD0987
099509C0,099509CD0988
099509C1,099509CD0989
099509C2,099509CD098A
099509C3,099509CD098B
099509C4,099509CD09E0
099509E2,099509CD098C
099509E3,099509CD09E1
099509C7,099509CD098F
099509C8,099509CD0990
099509CB,099509CD0993
099509CC,099509CD0994
099609CD
099709CD
099809CD
099909CD
099A09CD
099B09CD
099C09CD
099D09CD
099E09CD
099F09CD
09A009CD
09A109CD
09A209CD
09A309CD
09CE,09A409CD,09A409CD200D
09A509CD
09A609CD
09A709CD
09A809CD
09AA09CD
09AB09CD
09AC09CD
09AD09CD
09AE09CD
09AF09CD
09B009CD
09F009CD
09B209CD
09F109CD
09B609CD
09B709CD
09B809CD
09B909CD
DROP TABLE t1;
#
# WL#5624, shift after, using expansion
#
SET NAMES utf8 COLLATE utf8_5624_5;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES ('0'),('1'),('0z'),(_ucs2 0x0030FF9D);
INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e'),('f'),('g'),('h'),('i');
INSERT INTO t1 VALUES ('j'),('k'),('l'),('m'),('n'),('o'),('p'),('q'),('r');
INSERT INTO t1 VALUES ('s'),('t'),('u'),('v'),('w'),('x'),('y'),('z');
INSERT INTO t1 VALUES ('aa'),('aaa');
INSERT INTO t1 VALUES ('A'),('B'),('C'),('D'),('E'),('F'),('G'),('H'),('I');
INSERT INTO t1 VALUES ('J'),('K'),('L'),('M'),('N'),('O'),('P'),('Q'),('R');
INSERT INTO t1 VALUES ('S'),('T'),('U'),('V'),('W'),('X'),('Y'),('Z');
INSERT INTO t1 VALUES ('AA'),('AAA');
SELECT a FROM t1 ORDER BY a, LENGTH(a), BINARY(a);
a
0
0z
0ン
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
aa
aaa
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
AA
AAA
1
DROP TABLE t1;
#
# End of WL#5624
#
#
# Bug#14197426 PARSE ERRORS IN LOADABLE UCA / LDML COLLATIONS ARE SILENTLY IGNORED
#
# Search for occurrences of [ERROR] Syntax error at '[strength tertiary]'
Occurances : 1

View File

@ -2240,6 +2240,112 @@ Z,z,Ź,ź,Ż,ż
ǁ
ǂ
ǃ
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_german2_ci;
group_concat(c1 order by c1)
÷
×
A,a,À,Á,Â,Ã,Å,à,á,â,ã,å,Ā,ā,Ă,ă,Ą,ą,Ǎ,ǎ,Ǟ,ǟ,Ǡ,ǡ,Ǻ,ǻ
AA,Aa,aA,aa
Ä,Æ,ä,æ
Ǣ,ǣ,Ǽ,ǽ
B,b
ƀ
Ɓ
Ƃ,ƃ
C,c,Ç,ç,Ć,ć,Ĉ,ĉ,Ċ,ċ,Č,č
CH,Ch,cH,ch
Ƈ,ƈ
D,d,Ď,ď
DZ,Dz,DŽ,Dž,dZ,dz,dŽ,dž,DŽ,Dž,dž,DZ,Dz,dz
Đ,đ
Ɖ
Ɗ
Ƌ,ƌ
Ð,ð
E,e,È,É,Ê,Ë,è,é,ê,ë,Ē,ē,Ĕ,ĕ,Ė,ė,Ę,ę,Ě,ě
Ǝ,ǝ
Ə
Ɛ
F,f
Ƒ,ƒ
G,g,Ĝ,ĝ,Ğ,ğ,Ġ,ġ,Ģ,ģ,Ǧ,ǧ,Ǵ,ǵ
Ǥ,ǥ
Ɠ
Ɣ
Ƣ,ƣ
H,h,Ĥ,ĥ
ƕ,Ƕ
Ħ,ħ
I,i,Ì,Í,Î,Ï,ì,í,î,ï,Ĩ,ĩ,Ī,ī,Ĭ,ĭ,Į,į,İ,Ǐ,ǐ
IJ,Ij,iJ,ij,IJ,ij
ı
Ɨ
Ɩ
J,j,Ĵ,ĵ,ǰ
K,k,Ķ,ķ,Ǩ,ǩ
Ƙ,ƙ
L,l,Ĺ,ĺ,Ļ,ļ,Ľ,ľ
Ŀ,ŀ
LJ,Lj,lJ,lj,LJ,Lj,lj
LL,Ll,lL,ll
Ł,ł
ƚ
ƛ
M,m
N,n,Ñ,ñ,Ń,ń,Ņ,ņ,Ň,ň,Ǹ,ǹ
NJ,Nj,nJ,nj,NJ,Nj,nj
Ɲ
ƞ
Ŋ,ŋ
O,o,Ò,Ó,Ô,Õ,ò,ó,ô,õ,Ō,ō,Ŏ,ŏ,Ő,ő,Ơ,ơ,Ǒ,ǒ,Ǫ,ǫ,Ǭ,ǭ
OE,Oe,oE,oe,Ö,ö,Œ,œ
Ø,ø,Ǿ,ǿ
Ɔ
Ɵ
P,p
Ƥ,ƥ
Q,q
ĸ
R,r,Ŕ,ŕ,Ŗ,ŗ,Ř,ř
RR,Rr,rR,rr
Ʀ
S,s,Ś,ś,Ŝ,ŝ,Ş,ş,Š,š,ſ
SS,Ss,sS,ss,ß
Ʃ
ƪ
T,t,Ţ,ţ,Ť,ť
ƾ
Ŧ,ŧ
ƫ
Ƭ,ƭ
Ʈ
U,u,Ù,Ú,Û,ù,ú,û,Ũ,ũ,Ū,ū,Ŭ,ŭ,Ů,ů,Ű,ű,Ų,ų,Ư,ư,Ǔ,ǔ,Ǖ,ǖ,Ǘ,ǘ,Ǚ,ǚ,Ǜ,ǜ
Ü,ü
Ɯ
Ʊ
V,v
Ʋ
W,w,Ŵ,ŵ
X,x
Y,y,Ý,ý,ÿ,Ŷ,ŷ,Ÿ
Ƴ,ƴ
Z,z,Ź,ź,Ż,ż,Ž,ž
ƍ
Ƶ,ƶ
Ʒ,Ǯ,ǯ
Ƹ,ƹ
ƺ
Þ,þ
ƿ,Ƿ
ƻ
Ƨ
Ƽ,ƽ
Ƅ
ʼn
ǀ
ǁ
ǂ
ǃ
drop table t1;
SET NAMES utf8;
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
@ -3192,3 +3298,45 @@ drop table t1;
#
# End of 5.5 tests
#
#
# WL#4013 Unicode german2 collation
#
SET collation_connection=utf8_german2_ci;
drop table if exists t1;
create table t1 as select repeat(' ', 64) as s1;
select collation(s1) from t1;
collation(s1)
utf8_german2_ci
delete from t1;
insert into t1 values ('a'),('ae'),(_latin1 0xE4);
insert into t1 values ('o'),('oe'),(_latin1 0xF6);
insert into t1 values ('s'),('ss'),(_latin1 0xDF);
insert into t1 values ('u'),('ue'),(_latin1 0xFC);
select s1, hex(s1) from t1 order by s1, binary s1;
s1 hex(s1)
a 61
ae 6165
ä C3A4
o 6F
oe 6F65
ö C3B6
s 73
ss 7373
ß C39F
u 75
ue 7565
ü C3BC
select group_concat(s1 order by binary s1) from t1 group by s1;
group_concat(s1 order by binary s1)
a
ae,ä
o
oe,ö
s
ss,ß
u
ue,ü
drop table t1;
#
# End of 5.6 tests
#

View File

@ -1162,5 +1162,52 @@ SELECT ExtractValue('<a><a>aa</a><b>bb</b></a>','(a)/a|(a)/b');
ExtractValue('<a><a>aa</a><b>bb</b></a>','(a)/a|(a)/b')
aa bb
#
# Bug#62429 XML: ExtractValue, UpdateXML max arg length 127 chars
#
CREATE TABLE t1 (id INT AUTO_INCREMENT, txt VARCHAR(1000), PRIMARY KEY(id));
INSERT INTO t1 (txt) VALUES
(CONCAT('<', REPEAT('a',127), '>127</', REPEAT('a',127), '>')),
(CONCAT('<', REPEAT('a',128), '>128</', REPEAT('a',128), '>')),
(CONCAT('<', REPEAT('a',63), '><', REPEAT('b',63), '>63/63</', REPEAT('b',63), '></', REPEAT('a',63),'>')),
(CONCAT('<', REPEAT('a',63), '><', REPEAT('b',64), '>63/64</', REPEAT('b',64), '></', REPEAT('a',63),'>'));
SELECT
txt,
EXTRACTVALUE(txt, CONCAT('/', REPEAT('a', 127))) as a127,
EXTRACTVALUE(txt, CONCAT('/', REPEAT('a', 128))) as a128,
EXTRACTVALUE(txt, CONCAT('//', REPEAT('b', 63))) as a63b63,
EXTRACTVALUE(txt, CONCAT('//', REPEAT('b', 64))) as a63b64
FROM t1;
txt <aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>127</aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>
a127 127
a128
a63b63
a63b64
txt <aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>128</aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>
a127
a128 128
a63b63
a63b64
txt <aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa><bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>63/63</bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb></aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>
a127
a128
a63b63 63/63
a63b64
txt <aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa><bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>63/64</bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb></aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>
a127
a128
a63b63
a63b64 63/64
SELECT UPDATEXML(txt, CONCAT('//', REPEAT('b', 63)), '63/63+') FROM t1;
UPDATEXML(txt, CONCAT('//', REPEAT('b', 63)), '63/63+') <aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>127</aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>
UPDATEXML(txt, CONCAT('//', REPEAT('b', 63)), '63/63+') <aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>128</aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>
UPDATEXML(txt, CONCAT('//', REPEAT('b', 63)), '63/63+') <aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>63/63+</aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>
UPDATEXML(txt, CONCAT('//', REPEAT('b', 63)), '63/63+') <aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa><bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb>63/64</bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb></aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>
DROP TABLE t1;
CREATE TABLE t1 (a TEXT);
INSERT INTO t1 VALUES (CONCAT('<a><', REPEAT('b',128),'>b128</',REPEAT('b',128),'><',REPEAT('c',512),'>c512</',REPEAT('c',512),'></a>'));
SELECT ExtractValue (a, CONCAT('//',REPEAT('c',512))) AS c512 FROM t1;
c512 c512
DROP TABLE t1;
#
# End of 5.5 tests
#

File diff suppressed because one or more lines are too long

View File

@ -1,2 +1,2 @@
--character-sets-dir=$MYSQL_TEST_DIR/std_data/
--log-error=$MYSQLTEST_VARDIR/tmp/ctype_ldml_log.err

View File

@ -61,7 +61,6 @@ insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
#
# Bug#41084 full-text index added to custom UCA collation not working
#
@ -181,3 +180,188 @@ DROP TABLE t1;
SET NAMES utf8 COLLATE utf8_phone_ci;
SHOW COLLATION LIKE 'utf8_phone_ci';
SET NAMES utf8;
# make sure utf8mb4_test_400_ci is Unicode-4.0.0 based
SELECT hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a));
SELECT hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a));
SELECT hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_400_ci), hex(lower(@a));
SELECT hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_400_ci), hex(upper(@a));
--echo #
--echo # WL#5624 Collation customization improvements
--echo #
SET NAMES utf8 COLLATE utf8_5624_1;
CREATE TABLE t1 AS SELECT REPEAT(' ', 16) AS a LIMIT 0;
# Part 1,2,3: long contractions and expansions
# Part 7: Quarternary difference
INSERT INTO t1 VALUES ('012345'),('001234'),('000123'),('000012'),('000001');
INSERT INTO t1 VALUES ('12345'),('01234'),('00123'),('00012'),('00001');
INSERT INTO t1 VALUES ('1234'),('0123'),('0012'),('0001');
INSERT INTO t1 VALUES ('123'),('012'),('001');
INSERT INTO t1 VALUES ('12'),('01');
INSERT INTO t1 VALUES ('1'),('9');
INSERT INTO t1 VALUES ('ГАИ'),('ГИБДД');
# Part 4: reset before
# Part 6: characters rather than escape sequences
INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e');
INSERT INTO t1 VALUES ('cz'),('Ċ'),('ċ');
INSERT INTO t1 VALUES ('f'),('fz'),('g'),('Ġ'),('ġ');
INSERT INTO t1 VALUES ('h'),('hz'),('GĦ'),('Għ'),('gĦ'),('għ');
INSERT INTO t1 VALUES ('i'),('iz'),('Ħ'),('ħ');
INSERT INTO t1 VALUES ('y'),('yz'),('z'),('Ż'),('ż');
INSERT INTO t1 VALUES ('ā'),('Ā'),('á'),('Á'),('à'),('À');
INSERT INTO t1 VALUES ('ē'),('é'),('ě'),('ê'),('Ē'),('É'),('Ě'),('Ê');
# Part 8: Abbreviated shift syntax
INSERT INTO t1 VALUES ('a'),('~'),('!'),('@'),('#'),('$'),('%'),('^');
INSERT INTO t1 VALUES ('('),(')'),('-'),('+'),('|'),('='),(':'),(';');
INSERT INTO t1 VALUES ('"'),('\''),('?');
# Part 9: Normal expansion syntax
INSERT INTO t1 VALUES ('ch'),('k'),('cs'),('ccs'),('cscs');
# Part 10: Previous context
INSERT INTO t1 VALUES ('aa-'),('ab-'),('ac-'),('ad-'),('ae-'),('af-'),('az-');
# Part 12: Logical reset positions
INSERT INTO t1 VALUES ('lp-fni'),('lp-lni');
INSERT INTO t1 VALUES ('lp-fpi'),('lp-lpi');
INSERT INTO t1 VALUES ('lp-fsi'),('lp-lsi');
INSERT INTO t1 VALUES ('lp-fti'),('lp-lti');
INSERT INTO t1 VALUES ('lp-ft'),('lp-lt');
INSERT INTO t1 VALUES ('lp-fv'),('lp-lv');
# Logical positions with reset before
INSERT INTO t1 VALUES ('lb-fni'),('lb-lni');
INSERT INTO t1 VALUES ('lb-fv'),('lb-lv');
# Part 5: Long tailoring
INSERT INTO t1 VALUES (_ucs2 0x3106),(_ucs2 0x3110), (_ucs2 0x3111), (_ucs2 0x3112);
INSERT INTO t1 VALUES (_ucs2 0x32A3), (_ucs2 0x3231);
INSERT INTO t1 VALUES (_ucs2 0x84D9), (_ucs2 0x98F5), (_ucs2 0x7CF3), (_ucs2 0x5497);
SELECT a FROM t1 ORDER BY a, LENGTH(a), BINARY a;
--echo #
--echo # WL#5624, the same test with UCS2
--echo #
ALTER TABLE t1 CONVERT TO CHARACTER SET ucs2 COLLATE ucs2_5624_1;
SELECT a FROM t1 ORDER BY a, LENGTH(a), BINARY(a);
DROP TABLE t1;
--echo #
--echo # WL#5624, unsupported features
--echo #
# Part 13: More verbosity
--error ER_UNKNOWN_COLLATION
SET NAMES utf8 COLLATE utf8_5624_2;
SHOW WARNINGS;
--echo #
--echo # WL#5624, reset before primary ignorable
--echo #
--error ER_UNKNOWN_COLLATION
SET NAMES utf8 COLLATE utf8_5624_3;
SHOW WARNINGS;
--echo #
--echo # WL#5624, \u without hex digits is equal to {'\\', 'u'}
--echo #
SET NAMES utf8 COLLATE utf8_5624_4;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES ('\\'),('u'),('x'),('X');
SELECT a FROM t1 ORDER BY a, LENGTH(a), BINARY(a);
DROP TABLE t1;
--echo #
--echo # WL#5624, testing Bengali collations
--echo #
SET NAMES utf8, collation_connection=utf8_bengali_standard_ci;
CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES (_ucs2 0x09FA), (_ucs2 0x09F8), (_ucs2 0x09F9), (_ucs2 0x09F2);
INSERT INTO t1 VALUES (_ucs2 0x09DC), (_ucs2 0x09A109BC);
INSERT INTO t1 VALUES (_ucs2 0x09A2), (_ucs2 0x09DD), (_ucs2 0x09A209BC);
INSERT INTO t1 VALUES (_ucs2 0x09A3);
SELECT HEX(CONVERT(a USING ucs2)), HEX(a)
FROM t1 ORDER BY a, BINARY a;
DROP TABLE t1;
SET NAMES utf8, collation_connection=utf8_bengali_traditional_ci;
CREATE TABLE t1 AS SELECT REPEAT (' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES
(_ucs2 0x0985),(_ucs2 0x0986),(_ucs2 0x0987),(_ucs2 0x0988),
(_ucs2 0x0989),(_ucs2 0x098A),(_ucs2 0x098B),(_ucs2 0x09E0),
(_ucs2 0x098C),(_ucs2 0x09E1),(_ucs2 0x098F),(_ucs2 0x0990),
(_ucs2 0x0993);
INSERT INTO t1 VALUES
(_ucs2 0x0994),(_ucs2 0x0982),(_ucs2 0x0983),(_ucs2 0x0981),
(_ucs2 0x099509CD), (_ucs2 0x099609CD), (_ucs2 0x099709CD), (_ucs2 0x099809CD),
(_ucs2 0x099909CD), (_ucs2 0x099A09CD), (_ucs2 0x099B09CD), (_ucs2 0x099C09CD),
(_ucs2 0x099D09CD), (_ucs2 0x099E09CD), (_ucs2 0x099F09CD), (_ucs2 0x09A009CD),
(_ucs2 0x09A109CD), (_ucs2 0x09A209CD), (_ucs2 0x09A309CD),
(_ucs2 0x09CE), (_ucs2 0x09A409CD200D), (_ucs2 0x09A409CD),
(_ucs2 0x09A509CD),(_ucs2 0x09A609CD),
(_ucs2 0x09A709CD), (_ucs2 0x09A809CD), (_ucs2 0x09AA09CD), (_ucs2 0x09AB09CD),
(_ucs2 0x09AC09CD), (_ucs2 0x09AD09CD), (_ucs2 0x09AE09CD), (_ucs2 0x09AF09CD),
(_ucs2 0x09B009CD), (_ucs2 0x09F009CD), (_ucs2 0x09B209CD), (_ucs2 0x09F109CD),
(_ucs2 0x09B609CD), (_ucs2 0x09B709CD), (_ucs2 0x09B809CD), (_ucs2 0x09B909CD);
INSERT INTO t1 VALUES
(_ucs2 0x099509CD0985),(_ucs2 0x0995),
(_ucs2 0x099509CD0986),(_ucs2 0x099509BE),
(_ucs2 0x099509CD0987),(_ucs2 0x099509BF),
(_ucs2 0x099509CD0988),(_ucs2 0x099509C0),
(_ucs2 0x099509CD0989),(_ucs2 0x099509C1),
(_ucs2 0x099509CD098A),(_ucs2 0x099509C2),
(_ucs2 0x099509CD098B),(_ucs2 0x099509C3),
(_ucs2 0x099509CD09E0),(_ucs2 0x099509C4),
(_ucs2 0x099509CD098C),(_ucs2 0x099509E2),
(_ucs2 0x099509CD09E1),(_ucs2 0x099509E3),
(_ucs2 0x099509CD098F),(_ucs2 0x099509C7),
(_ucs2 0x099509CD0990),(_ucs2 0x099509C8),
(_ucs2 0x099509CD0993),(_ucs2 0x099509CB),
(_ucs2 0x099509CD0994),(_ucs2 0x099509CC);
SELECT HEX(CONVERT(a USING ucs2)), HEX(a)
FROM t1 ORDER BY a, BINARY(a);
SELECT
GROUP_CONCAT(HEX(CONVERT(a USING ucs2)) ORDER BY LENGTH(a), BINARY a)
FROM t1 GROUP BY a ORDER BY a;
DROP TABLE t1;
--echo #
--echo # WL#5624, shift after, using expansion
--echo #
SET NAMES utf8 COLLATE utf8_5624_5;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS a LIMIT 0;
INSERT INTO t1 VALUES ('0'),('1'),('0z'),(_ucs2 0x0030FF9D);
INSERT INTO t1 VALUES ('a'),('b'),('c'),('d'),('e'),('f'),('g'),('h'),('i');
INSERT INTO t1 VALUES ('j'),('k'),('l'),('m'),('n'),('o'),('p'),('q'),('r');
INSERT INTO t1 VALUES ('s'),('t'),('u'),('v'),('w'),('x'),('y'),('z');
INSERT INTO t1 VALUES ('aa'),('aaa');
INSERT INTO t1 VALUES ('A'),('B'),('C'),('D'),('E'),('F'),('G'),('H'),('I');
INSERT INTO t1 VALUES ('J'),('K'),('L'),('M'),('N'),('O'),('P'),('Q'),('R');
INSERT INTO t1 VALUES ('S'),('T'),('U'),('V'),('W'),('X'),('Y'),('Z');
INSERT INTO t1 VALUES ('AA'),('AAA');
SELECT a FROM t1 ORDER BY a, LENGTH(a), BINARY(a);
DROP TABLE t1;
--echo #
--echo # End of WL#5624
--echo #
--echo #
--echo # Bug#14197426 PARSE ERRORS IN LOADABLE UCA / LDML COLLATIONS ARE SILENTLY IGNORED
--echo #
--let $out_file= $MYSQLTEST_VARDIR/tmp/ctype_ldml_log.err
--let OUTF= $out_file
# Error messages are not seen in error log in embedded version
--let EMBEDDED=`SELECT version() LIKE '%embedded%'`
--echo # Search for occurrences of [ERROR] Syntax error at '[strength tertiary]'
perl;
use strict;
my $outf= $ENV{'OUTF'} or die "OUTF not set";
open(FILE, "$outf") or die("Unable to open $outf: $!\n");
my $count_error= grep(/\[ERROR\] Syntax error at '\[strength tertiary\]'/gi,<FILE>);
my $count_error= $count_error + $ENV{"EMBEDDED"};
print "Occurances : $count_error\n";
close(FILE);
EOF

View File

@ -215,6 +215,7 @@ select group_concat(c1 order by c1) from t1 group by c1 collate utf8_roman_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_esperanto_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_hungarian_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_croatian_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_german2_ci;
drop table t1;
@ -580,3 +581,14 @@ drop table t1;
--echo #
--echo # End of 5.5 tests
--echo #
--echo #
--echo # WL#4013 Unicode german2 collation
--echo #
SET collation_connection=utf8_german2_ci;
--source include/ctype_german.inc
--echo #
--echo # End of 5.6 tests
--echo #

View File

@ -673,6 +673,35 @@ SELECT UPDATEXML('<a><c><a>x</a></c></a>','(a)/a','<b />');
SELECT UPDATEXML('<a><c><a>x</a></c></a>','(a)//a','<b />');
SELECT ExtractValue('<a><a>aa</a><b>bb</b></a>','(a)/a|(a)/b');
--echo #
--echo # Bug#62429 XML: ExtractValue, UpdateXML max arg length 127 chars
--echo #
CREATE TABLE t1 (id INT AUTO_INCREMENT, txt VARCHAR(1000), PRIMARY KEY(id));
INSERT INTO t1 (txt) VALUES
(CONCAT('<', REPEAT('a',127), '>127</', REPEAT('a',127), '>')),
(CONCAT('<', REPEAT('a',128), '>128</', REPEAT('a',128), '>')),
(CONCAT('<', REPEAT('a',63), '><', REPEAT('b',63), '>63/63</', REPEAT('b',63), '></', REPEAT('a',63),'>')),
(CONCAT('<', REPEAT('a',63), '><', REPEAT('b',64), '>63/64</', REPEAT('b',64), '></', REPEAT('a',63),'>'));
--vertical_results
SELECT
txt,
EXTRACTVALUE(txt, CONCAT('/', REPEAT('a', 127))) as a127,
EXTRACTVALUE(txt, CONCAT('/', REPEAT('a', 128))) as a128,
EXTRACTVALUE(txt, CONCAT('//', REPEAT('b', 63))) as a63b63,
EXTRACTVALUE(txt, CONCAT('//', REPEAT('b', 64))) as a63b64
FROM t1;
SELECT UPDATEXML(txt, CONCAT('//', REPEAT('b', 63)), '63/63+') FROM t1;
DROP TABLE t1;
# This will call my_str_realloc_mysqld()
CREATE TABLE t1 (a TEXT);
INSERT INTO t1 VALUES (CONCAT('<a><', REPEAT('b',128),'>b128</',REPEAT('b',128),'><',REPEAT('c',512),'>c512</',REPEAT('c',512),'></a>'));
SELECT ExtractValue (a, CONCAT('//',REPEAT('c',512))) AS c512 FROM t1;
DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #

View File

@ -24,6 +24,7 @@
#ifdef HAVE_UCA_COLLATIONS
#ifdef HAVE_CHARSET_ucs2
extern struct charset_info_st my_charset_ucs2_german2_uca_ci;
extern struct charset_info_st my_charset_ucs2_icelandic_uca_ci;
extern struct charset_info_st my_charset_ucs2_latvian_uca_ci;
extern struct charset_info_st my_charset_ucs2_romanian_uca_ci;
@ -48,6 +49,7 @@ extern struct charset_info_st my_charset_ucs2_croatian_uca_ci;
#ifdef HAVE_CHARSET_utf32
extern struct charset_info_st my_charset_utf32_german2_uca_ci;
extern struct charset_info_st my_charset_utf32_icelandic_uca_ci;
extern struct charset_info_st my_charset_utf32_latvian_uca_ci;
extern struct charset_info_st my_charset_utf32_romanian_uca_ci;
@ -72,6 +74,7 @@ extern struct charset_info_st my_charset_utf32_croatian_uca_ci;
#ifdef HAVE_CHARSET_utf16
extern struct charset_info_st my_charset_utf16_german2_uca_ci;
extern struct charset_info_st my_charset_utf16_icelandic_uca_ci;
extern struct charset_info_st my_charset_utf16_latvian_uca_ci;
extern struct charset_info_st my_charset_utf16_romanian_uca_ci;
@ -96,6 +99,7 @@ extern struct charset_info_st my_charset_utf16_croatian_uca_ci;
#ifdef HAVE_CHARSET_utf8
extern struct charset_info_st my_charset_utf8_german2_uca_ci;
extern struct charset_info_st my_charset_utf8_icelandic_uca_ci;
extern struct charset_info_st my_charset_utf8_latvian_uca_ci;
extern struct charset_info_st my_charset_utf8_romanian_uca_ci;
@ -122,6 +126,7 @@ extern struct charset_info_st my_charset_utf8_general_cs;
#endif
#ifdef HAVE_CHARSET_utf8mb4
extern struct charset_info_st my_charset_utf8mb4_german2_uca_ci;
extern struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci;
extern struct charset_info_st my_charset_utf8mb4_latvian_uca_ci;
extern struct charset_info_st my_charset_utf8mb4_romanian_uca_ci;
@ -211,6 +216,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_ucs2_general_mysql500_ci);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_ucs2_unicode_ci);
add_compiled_collation(&my_charset_ucs2_german2_uca_ci);
add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci);
add_compiled_collation(&my_charset_ucs2_latvian_uca_ci);
add_compiled_collation(&my_charset_ucs2_romanian_uca_ci);
@ -248,6 +254,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
#endif
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf8_unicode_ci);
add_compiled_collation(&my_charset_utf8_german2_uca_ci);
add_compiled_collation(&my_charset_utf8_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf8_latvian_uca_ci);
add_compiled_collation(&my_charset_utf8_romanian_uca_ci);
@ -277,6 +284,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf8mb4_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf8mb4_unicode_ci);
add_compiled_collation(&my_charset_utf8mb4_german2_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_latvian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_romanian_uca_ci);
@ -308,6 +316,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf16le_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf16_unicode_ci);
add_compiled_collation(&my_charset_utf16_german2_uca_ci);
add_compiled_collation(&my_charset_utf16_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf16_latvian_uca_ci);
add_compiled_collation(&my_charset_utf16_romanian_uca_ci);
@ -337,6 +346,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf32_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf32_unicode_ci);
add_compiled_collation(&my_charset_utf32_german2_uca_ci);
add_compiled_collation(&my_charset_utf32_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf32_latvian_uca_ci);
add_compiled_collation(&my_charset_utf32_romanian_uca_ci);

View File

@ -214,6 +214,8 @@ copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from)
to->max_sort_char= from->max_sort_char;
to->mbminlen= from->mbminlen;
to->mbmaxlen= from->mbmaxlen;
to->caseup_multiply= from->caseup_multiply;
to->casedn_multiply= from->casedn_multiply;
to->state|= MY_CS_AVAILABLE | MY_CS_LOADED |
MY_CS_STRNXFRM | MY_CS_UNICODE;
}
@ -349,6 +351,7 @@ static int add_collation(struct charset_info_st *cs)
return MY_XML_OK;
}
/**
Report character set initialization errors and warnings.
Be silent by default: no warnings on the client side.
@ -361,13 +364,53 @@ default_reporter(enum loglevel level __attribute__ ((unused)),
}
my_error_reporter my_charset_error_reporter= default_reporter;
/**
Wrappers for memory functions my_malloc (and friends)
with C-compatbile API without extra "myf" argument.
*/
static void *
my_once_alloc_c(size_t size)
{ return my_once_alloc(size, MYF(MY_WME)); }
static void *
my_malloc_c(size_t size)
{ return my_malloc(size, MYF(MY_WME)); }
static void *
my_realloc_c(void *old, size_t size)
{ return my_realloc(old, size, MYF(MY_WME|MY_ALLOW_ZERO_PTR)); }
/**
Initialize character set loader to use mysys memory management functions.
@param loader Loader to initialize
*/
void
my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader)
{
loader->error[0]= '\0';
loader->once_alloc= my_once_alloc_c;
loader->malloc= my_malloc_c;
loader->realloc= my_realloc_c;
loader->free= my_free;
loader->reporter= my_charset_error_reporter;
loader->add_collation= add_collation;
}
#define MY_MAX_ALLOWED_BUF 1024*1024
#define MY_CHARSET_INDEX "Index.xml"
const char *charsets_dir= NULL;
static my_bool my_read_charset_file(const char *filename, myf myflags)
static my_bool
my_read_charset_file(MY_CHARSET_LOADER *loader,
const char *filename,
myf myflags)
{
uchar *buf;
int fd;
@ -386,14 +429,11 @@ static my_bool my_read_charset_file(const char *filename, myf myflags)
if (tmp_len != len)
goto error;
if (my_parse_charset_xml((char*) buf,len,add_collation))
if (my_parse_charset_xml(loader, (char *) buf, len))
{
#ifdef NOT_YET
printf("ERROR at line %d pos %d '%s'\n",
my_xml_error_lineno(&p)+1,
my_xml_error_pos(&p),
my_xml_error_string(&p));
#endif
my_printf_error(EE_UNKNOWN_CHARSET, "Error while parsing '%s': %s\n",
MYF(0), filename, loader->error);
goto error;
}
my_free(buf);
@ -437,11 +477,6 @@ void add_compiled_collation(struct charset_info_st *cs)
cs->state|= MY_CS_AVAILABLE;
}
static void *cs_alloc(size_t size)
{
return my_once_alloc(size, MYF(MY_WME));
}
static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;
static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
@ -450,6 +485,7 @@ static void init_available_charsets(void)
{
char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
struct charset_info_st **cs;
MY_CHARSET_LOADER loader;
bzero((char*) &all_charsets,sizeof(all_charsets));
init_compiled_charsets(MYF(0));
@ -468,8 +504,9 @@ static void init_available_charsets(void)
}
}
my_charset_loader_init_mysys(&loader);
strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);
my_read_charset_file(fname, MYF(0));
my_read_charset_file(&loader, fname, MYF(0));
}
@ -558,7 +595,8 @@ const char *get_charset_name(uint charset_number)
}
static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
static CHARSET_INFO *
get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags)
{
char buf[FN_REFLEN];
struct charset_info_st *cs;
@ -578,17 +616,21 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */
{
MY_CHARSET_LOADER loader;
strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS);
my_read_charset_file(buf,flags);
my_charset_loader_init_mysys(&loader);
my_read_charset_file(&loader, buf, flags);
}
if (cs->state & MY_CS_AVAILABLE)
{
if (!(cs->state & MY_CS_READY))
{
if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
(cs->coll->init && cs->coll->init(cs, cs_alloc)))
if ((cs->cset->init && cs->cset->init(cs, loader)) ||
(cs->coll->init && cs->coll->init(cs, loader)))
{
cs= NULL;
}
else
cs->state|= MY_CS_READY;
}
@ -605,6 +647,8 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
CHARSET_INFO *get_charset(uint cs_number, myf flags)
{
CHARSET_INFO *cs;
MY_CHARSET_LOADER loader;
if (cs_number == default_charset_info->number)
return default_charset_info;
@ -612,8 +656,9 @@ CHARSET_INFO *get_charset(uint cs_number, myf flags)
if (cs_number >= array_elements(all_charsets))
return NULL;
cs=get_internal_charset(cs_number, flags);
my_charset_loader_init_mysys(&loader);
cs= get_internal_charset(&loader, cs_number, flags);
if (!cs && (flags & MY_WME))
{
@ -626,29 +671,58 @@ CHARSET_INFO *get_charset(uint cs_number, myf flags)
return cs;
}
CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
/**
Find collation by name: extended version of get_charset_by_name()
to return error messages to the caller.
@param loader Character set loader
@param name Collation name
@param flags Flags
@return NULL on error, pointer to collation on success
*/
CHARSET_INFO *
my_collation_get_by_name(MY_CHARSET_LOADER *loader,
const char *name, myf flags)
{
uint cs_number;
CHARSET_INFO *cs;
my_pthread_once(&charsets_initialized, init_available_charsets);
cs_number=get_collation_number(cs_name);
cs= cs_number ? get_internal_charset(cs_number,flags) : NULL;
cs_number= get_collation_number(name);
my_charset_loader_init_mysys(loader);
cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
if (!cs && (flags & MY_WME))
{
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), name, index_file);
}
return cs;
}
CHARSET_INFO *get_charset_by_csname(const char *cs_name,
uint cs_flags,
myf flags)
CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
{
MY_CHARSET_LOADER loader;
my_charset_loader_init_mysys(&loader);
return my_collation_get_by_name(&loader, cs_name, flags);
}
/**
Find character set by name: extended version of get_charset_by_csname()
to return error messages to the caller.
@param loader Character set loader
@param name Collation name
@param cs_flags Character set flags (e.g. default or binary collation)
@param flags Flags
@return NULL on error, pointer to collation on success
*/
CHARSET_INFO *
my_charset_get_by_name(MY_CHARSET_LOADER *loader,
const char *cs_name, uint cs_flags, myf flags)
{
uint cs_number;
CHARSET_INFO *cs;
@ -658,7 +732,7 @@ CHARSET_INFO *get_charset_by_csname(const char *cs_name,
my_pthread_once(&charsets_initialized, init_available_charsets);
cs_number= get_charset_number(cs_name, cs_flags);
cs= cs_number ? get_internal_charset(cs_number, flags) : NULL;
cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
if (!cs && (flags & MY_WME))
{
@ -671,6 +745,15 @@ CHARSET_INFO *get_charset_by_csname(const char *cs_name,
}
CHARSET_INFO *
get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags)
{
MY_CHARSET_LOADER loader;
my_charset_loader_init_mysys(&loader);
return my_charset_get_by_name(&loader, cs_name, cs_flags, flags);
}
/**
Resolve character set by the character set name (utf8, latin1, ...).
@ -868,8 +951,11 @@ CHARSET_INFO *fs_character_set()
As we're now interested in cp932 only,
let's just detect it using strcmp().
*/
fs_cset_cache= !strcmp(buf, "cp932") ?
&my_charset_cp932_japanese_ci : &my_charset_bin;
fs_cset_cache=
#ifdef HAVE_CHARSET_cp932
!strcmp(buf, "cp932") ? &my_charset_cp932_japanese_ci :
#endif
&my_charset_bin;
}
return fs_cset_cache;
}

View File

@ -1286,7 +1286,7 @@ struct my_rnd_struct sql_rand; ///< used by sql_class.cc:THD::THD()
@param level log message level
@param format log message format string
*/
C_MODE_START
static void buffered_option_error_reporter(enum loglevel level,
const char *format, ...)
{
@ -1299,6 +1299,33 @@ static void buffered_option_error_reporter(enum loglevel level,
buffered_logs.buffer(level, buffer);
}
/**
Character set and collation error reporter that prints to sql error log.
@param level log message level
@param format log message format string
This routine is used to print character set and collation
warnings and errors inside an already running mysqld server,
e.g. when a character set or collation is requested for the very first time
and its initialization does not go well for some reasons.
Note: At early mysqld initialization stage,
when error log is not yet available,
we use buffered_option_error_reporter() instead,
to print general character set subsystem initialization errors,
such as Index.xml syntax problems, bad XML tag hierarchy, etc.
*/
static void charset_error_reporter(enum loglevel level,
const char *format, ...)
{
va_list args;
va_start(args, format);
vprint_msg_to_log(level, format, args);
va_end(args);
}
C_MODE_END
struct passwd *user_info;
static pthread_t select_thread;
#endif
@ -3377,6 +3404,7 @@ void my_message_sql(uint error, const char *str, myf MyFlags)
#ifndef EMBEDDED_LIBRARY
extern "C" void *my_str_malloc_mysqld(size_t size);
extern "C" void my_str_free_mysqld(void *ptr);
extern "C" void *my_str_realloc_mysqld(void *ptr, size_t size);
void *my_str_malloc_mysqld(size_t size)
{
@ -3388,6 +3416,11 @@ void my_str_free_mysqld(void *ptr)
{
my_free(ptr);
}
void *my_str_realloc_mysqld(void *ptr, size_t size)
{
return my_realloc(ptr, size, MYF(MY_FAE));
}
#endif /* EMBEDDED_LIBRARY */
@ -4543,6 +4576,15 @@ static int init_server_components()
buffered_logs.cleanup();
#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
#ifndef EMBEDDED_LIBRARY
/*
Now that the logger is available, redirect character set
errors directly to the logger
(instead of the buffered_logs used at the server startup time).
*/
my_charset_error_reporter= charset_error_reporter;
#endif
if (xid_cache_init())
{
sql_print_error("Out of memory");
@ -5221,10 +5263,11 @@ int mysqld_main(int argc, char **argv)
#endif
/*
Initialize my_str_malloc() and my_str_free()
Initialize my_str_malloc(), my_str_realloc() and my_str_free()
*/
my_str_malloc= &my_str_malloc_mysqld;
my_str_free= &my_str_free_mysqld;
my_str_realloc= &my_str_realloc_mysqld;
/*
init signals & alarm

View File

@ -745,6 +745,35 @@ typedef struct system_status_var
void mark_transaction_to_rollback(THD *thd, bool all);
/**
Get collation by name, send error to client on failure.
@param name Collation name
@param name_cs Character set of the name string
@return
@retval NULL on error
@retval Pointter to CHARSET_INFO with the given name on success
*/
inline CHARSET_INFO *
mysqld_collation_get_by_name(const char *name,
CHARSET_INFO *name_cs= system_charset_info)
{
CHARSET_INFO *cs;
MY_CHARSET_LOADER loader;
my_charset_loader_init_mysys(&loader);
if (!(cs= my_collation_get_by_name(&loader, name, MYF(0))))
{
ErrConvString err(name, name_cs);
my_error(ER_UNKNOWN_COLLATION, MYF(0), err.ptr());
if (loader.error[0])
push_warning_printf(current_thd,
Sql_condition::WARN_LEVEL_WARN,
ER_UNKNOWN_COLLATION, "%s", loader.error);
}
return cs;
}
#ifdef MYSQL_SERVER
void free_tmp_table(THD *thd, TABLE *entry);

View File

@ -565,6 +565,8 @@ class ErrConvString : public ErrConv
public:
ErrConvString(const char *str_arg, size_t len_arg, CHARSET_INFO *cs_arg)
: ErrConv(), str(str_arg), len(len_arg), cs(cs_arg) {}
ErrConvString(const char *str_arg, CHARSET_INFO *cs_arg)
: ErrConv(), str(str_arg), len(strlen(str_arg)), cs(cs_arg) {}
ErrConvString(String *s)
: ErrConv(), str(s->ptr()), len(s->length()), cs(s->charset()) {}
const char *ptr() const

View File

@ -6504,11 +6504,8 @@ old_or_new_charset_name_or_default:
collation_name:
ident_or_text
{
if (!($$=get_charset_by_name($1.str,MYF(0))))
{
my_error(ER_UNKNOWN_COLLATION, MYF(0), $1.str);
if (!($$= mysqld_collation_get_by_name($1.str)))
MYSQL_YYABORT;
}
}
;
@ -6552,19 +6549,13 @@ unicode:
}
| UNICODE_SYM BINARY
{
if (!(Lex->charset=get_charset_by_name("ucs2_bin", MYF(0))))
{
if (!(Lex->charset= mysqld_collation_get_by_name("ucs2_bin")))
my_error(ER_UNKNOWN_COLLATION, MYF(0), "ucs2_bin");
MYSQL_YYABORT;
}
}
| BINARY UNICODE_SYM
{
if (!(Lex->charset=get_charset_by_name("ucs2_bin", MYF(0))))
{
my_error(ER_UNKNOWN_COLLATION, MYF(0), "ucs2_bin");
if (!(Lex->charset= mysqld_collation_get_by_name("ucs2_bin")))
MYSQL_YYABORT;
}
}
;

View File

@ -32,3 +32,6 @@ ENDIF()
# Avoid dependencies on perschema data defined in mysys
ADD_DEFINITIONS(-DDISABLE_MYSQL_THREAD_H)
ADD_CONVENIENCE_LIBRARY(strings ${STRINGS_SOURCES})
ADD_EXECUTABLE(conf_to_src EXCLUDE_FROM_ALL conf_to_src.c)
TARGET_LINK_LIBRARIES(conf_to_src strings)

View File

@ -145,12 +145,35 @@ static int add_collation(struct charset_info_st *cs)
}
static void
default_reporter(enum loglevel level __attribute__ ((unused)),
const char *format __attribute__ ((unused)),
...)
{
}
static void
my_charset_loader_init(MY_CHARSET_LOADER *loader)
{
loader->error[0]= '\0';
loader->once_alloc= malloc;
loader->malloc= malloc;
loader->realloc= realloc;
loader->free= free;
loader->reporter= default_reporter;
loader->add_collation= add_collation;
}
static int my_read_charset_file(const char *filename)
{
char buf[MAX_BUF];
int fd;
uint len;
MY_CHARSET_LOADER loader;
my_charset_loader_init(&loader);
if ((fd=open(filename,O_RDONLY)) < 0)
{
fprintf(stderr,"Can't open '%s'\n",filename);
@ -161,14 +184,10 @@ static int my_read_charset_file(const char *filename)
DBUG_ASSERT(len < MAX_BUF);
close(fd);
if (my_parse_charset_xml(buf,len,add_collation))
if (my_parse_charset_xml(&loader, buf, len))
{
#if 0
printf("ERROR at line %d pos %d '%s'\n",
my_xml_error_lineno(&p)+1,
my_xml_error_pos(&p),
my_xml_error_string(&p));
#endif
fprintf(stderr, "Error while parsing '%s': %s\n", filename, loader.error);
exit(1);
}
return FALSE;
@ -207,8 +226,7 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
fprintf(f," sort_order_%s, /* sort_order */\n",cs->name);
else
fprintf(f," NULL, /* sort_order */\n");
fprintf(f," NULL, /* contractions */\n");
fprintf(f," NULL, /* sort_order_big*/\n");
fprintf(f," NULL, /* uca */\n");
fprintf(f," to_uni_%s, /* to_uni */\n",cs->name);
}
else
@ -221,13 +239,12 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
fprintf(f," NULL, /* lower */\n");
fprintf(f," NULL, /* upper */\n");
fprintf(f," NULL, /* sort order */\n");
fprintf(f," NULL, /* contractions */\n");
fprintf(f," NULL, /* sort_order_big*/\n");
fprintf(f," NULL, /* uca */\n");
fprintf(f," NULL, /* to_uni */\n");
}
fprintf(f," NULL, /* from_uni */\n");
fprintf(f," my_unicase_default, /* caseinfo */\n");
fprintf(f," &my_unicase_default, /* caseinfo */\n");
fprintf(f," NULL, /* state map */\n");
fprintf(f," NULL, /* ident map */\n");
fprintf(f," 1, /* strxfrm_multiply*/\n");

View File

@ -177,7 +177,7 @@ static const uchar sort_order_big5[]=
};
static MY_UNICASE_INFO cA2[256]=
static MY_UNICASE_CHARACTER cA2[256]=
{
/* A200-A20F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -370,7 +370,7 @@ static MY_UNICASE_INFO cA2[256]=
};
static MY_UNICASE_INFO cA3[256]=
static MY_UNICASE_CHARACTER cA3[256]=
{
/* A300-A30F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -563,7 +563,7 @@ static MY_UNICASE_INFO cA3[256]=
};
static MY_UNICASE_INFO cC7[256]=
static MY_UNICASE_CHARACTER cC7[256]=
{
/* C700-C70F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -756,7 +756,7 @@ static MY_UNICASE_INFO cC7[256]=
};
static MY_UNICASE_INFO *my_caseinfo_big5[256]=
static MY_UNICASE_CHARACTER *my_caseinfo_pages_big5[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -793,6 +793,13 @@ static MY_UNICASE_INFO *my_caseinfo_big5[256]=
};
static MY_UNICASE_INFO my_caseinfo_big5=
{
0xFFFF,
my_caseinfo_pages_big5
};
static uint16 big5strokexfrm(uint16 i)
{
if ((i == 0xA440) || (i == 0xA441)) return 0xA440;
@ -6926,11 +6933,10 @@ struct charset_info_st my_charset_big5_chinese_ci=
to_lower_big5,
to_upper_big5,
sort_order_big5,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_big5, /* caseinfo */
&my_caseinfo_big5, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -6959,11 +6965,10 @@ struct charset_info_st my_charset_big5_bin=
to_lower_big5,
to_upper_big5,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_big5, /* caseinfo */
&my_caseinfo_big5, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -69,7 +69,7 @@ static const uchar bin_char_array[] =
static my_bool
my_coll_init_8bit_bin(struct charset_info_st *cs,
void *(*alloc)(size_t) __attribute__((unused)))
MY_CHARSET_LOADER *loader __attribute__((unused)))
{
cs->max_sort_char=255;
return FALSE;
@ -571,11 +571,10 @@ struct charset_info_st my_charset_bin =
bin_char_array, /* to_lower */
bin_char_array, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -197,7 +197,7 @@ static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c)
#define cp932code(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
static MY_UNICASE_INFO c81[256]=
static MY_UNICASE_CHARACTER c81[256]=
{
/* 8100-810F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -407,7 +407,7 @@ static MY_UNICASE_INFO c81[256]=
};
static MY_UNICASE_INFO c82[256]=
static MY_UNICASE_CHARACTER c82[256]=
{
/* 8200-820F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -615,7 +615,7 @@ static MY_UNICASE_INFO c82[256]=
};
static MY_UNICASE_INFO c83[256]=
static MY_UNICASE_CHARACTER c83[256]=
{
/* 8300-830F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -825,7 +825,7 @@ static MY_UNICASE_INFO c83[256]=
};
static MY_UNICASE_INFO c84[256]=
static MY_UNICASE_CHARACTER c84[256]=
{
/* 8400-840F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1035,7 +1035,7 @@ static MY_UNICASE_INFO c84[256]=
};
static MY_UNICASE_INFO c87[256]=
static MY_UNICASE_CHARACTER c87[256]=
{
/* 8700-870F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1245,7 +1245,7 @@ static MY_UNICASE_INFO c87[256]=
};
static MY_UNICASE_INFO cEE[256]=
static MY_UNICASE_CHARACTER cEE[256]=
{
/* EE00-EE0F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1456,7 +1456,7 @@ static MY_UNICASE_INFO cEE[256]=
};
static MY_UNICASE_INFO cFA[256]=
static MY_UNICASE_CHARACTER cFA[256]=
{
/* FA00-FA0F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1666,7 +1666,7 @@ static MY_UNICASE_INFO cFA[256]=
};
static MY_UNICASE_INFO *my_caseinfo_cp932[256]=
static MY_UNICASE_CHARACTER *my_caseinfo_pages_cp932[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -1703,7 +1703,13 @@ static MY_UNICASE_INFO *my_caseinfo_cp932[256]=
};
static int my_strnncoll_cp932_internal(CHARSET_INFO *cs,
MY_UNICASE_INFO my_caseinfo_cp932=
{
0xFFFF,
my_caseinfo_pages_cp932
};
static int my_strnncoll_cp932_internal(const CHARSET_INFO *cs,
const uchar **a_res, size_t a_length,
const uchar **b_res, size_t b_length)
{
@ -34834,11 +34840,10 @@ struct charset_info_st my_charset_cp932_japanese_ci=
to_lower_cp932,
to_upper_cp932,
sort_order_cp932,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_cp932, /* caseinfo */
&my_caseinfo_cp932, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -34866,11 +34871,10 @@ struct charset_info_st my_charset_cp932_bin=
to_lower_cp932,
to_upper_cp932,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_cp932, /* caseinfo */
&my_caseinfo_cp932, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -613,11 +613,10 @@ struct charset_info_st my_charset_latin2_czech_ci =
to_lower_czech,
to_upper_czech,
sort_order_czech,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
tab_8859_2_uni, /* tab_to_uni */
idx_uni_8859_2, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
4, /* strxfrm_multiply */

View File

@ -216,7 +216,7 @@ static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
}
static MY_UNICASE_INFO cA3[256]=
static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -421,7 +421,7 @@ static MY_UNICASE_INFO cA3[256]=
};
static MY_UNICASE_INFO cA5[256]=
static MY_UNICASE_CHARACTER cA5[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -626,7 +626,7 @@ static MY_UNICASE_INFO cA5[256]=
};
static MY_UNICASE_INFO cA7[256]=
static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -831,7 +831,7 @@ static MY_UNICASE_INFO cA7[256]=
};
static MY_UNICASE_INFO cA8[256]=
static MY_UNICASE_CHARACTER cA8[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1036,7 +1036,7 @@ static MY_UNICASE_INFO cA8[256]=
};
static MY_UNICASE_INFO cA9[256]=
static MY_UNICASE_CHARACTER cA9[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1241,7 +1241,7 @@ static MY_UNICASE_INFO cA9[256]=
};
static MY_UNICASE_INFO cAC[256]=
static MY_UNICASE_CHARACTER cAC[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1446,7 +1446,7 @@ static MY_UNICASE_INFO cAC[256]=
};
static MY_UNICASE_INFO *my_caseinfo_euckr[256]=
static MY_UNICASE_CHARACTER *my_caseinfo_pages_euckr[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -1483,6 +1483,13 @@ static MY_UNICASE_INFO *my_caseinfo_euckr[256]=
};
static MY_UNICASE_INFO my_caseinfo_euckr=
{
0xFFFF,
my_caseinfo_pages_euckr
};
/* page 0 0x8141-0xC8FE */
static const uint16 tab_ksc5601_uni0[]={
0xAC02,0xAC03,0xAC05,0xAC06,0xAC0B,0xAC0C,0xAC0D,0xAC0E,
@ -10016,11 +10023,10 @@ struct charset_info_st my_charset_euckr_korean_ci=
to_lower_euc_kr,
to_upper_euc_kr,
sort_order_euc_kr,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_euckr, /* caseinfo */
&my_caseinfo_euckr, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -10049,11 +10055,10 @@ struct charset_info_st my_charset_euckr_bin=
to_lower_euc_kr,
to_upper_euc_kr,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_euckr, /* caseinfo */
&my_caseinfo_euckr, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -203,7 +203,7 @@ static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c)
/* Case info pages for JIS-X-0208 range */
static MY_UNICASE_INFO cA2[256]=
static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -324,7 +324,7 @@ static MY_UNICASE_INFO cA2[256]=
};
static MY_UNICASE_INFO cA3[256]=
static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -445,7 +445,7 @@ static MY_UNICASE_INFO cA3[256]=
};
static MY_UNICASE_INFO cA6[256]=
static MY_UNICASE_CHARACTER cA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -566,7 +566,7 @@ static MY_UNICASE_INFO cA6[256]=
};
static MY_UNICASE_INFO cA7[256]=
static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -687,7 +687,7 @@ static MY_UNICASE_INFO cA7[256]=
};
static MY_UNICASE_INFO cAD[256]=
static MY_UNICASE_CHARACTER cAD[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -810,7 +810,7 @@ static MY_UNICASE_INFO cAD[256]=
/* Case info pages for JIS-X-0212 range */
static MY_UNICASE_INFO c8FA6[256]=
static MY_UNICASE_CHARACTER c8FA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -931,7 +931,7 @@ static MY_UNICASE_INFO c8FA6[256]=
};
static MY_UNICASE_INFO c8FA7[256]=
static MY_UNICASE_CHARACTER c8FA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1052,7 +1052,7 @@ static MY_UNICASE_INFO c8FA7[256]=
};
static MY_UNICASE_INFO c8FA9[256]=
static MY_UNICASE_CHARACTER c8FA9[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1173,7 +1173,7 @@ static MY_UNICASE_INFO c8FA9[256]=
};
static MY_UNICASE_INFO c8FAA[256]=
static MY_UNICASE_CHARACTER c8FAA[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1294,7 +1294,7 @@ static MY_UNICASE_INFO c8FAA[256]=
};
static MY_UNICASE_INFO c8FAB[256]=
static MY_UNICASE_CHARACTER c8FAB[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1415,7 +1415,7 @@ static MY_UNICASE_INFO c8FAB[256]=
};
static MY_UNICASE_INFO c8FF3[256]=
static MY_UNICASE_CHARACTER c8FF3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1536,7 +1536,7 @@ static MY_UNICASE_INFO c8FF3[256]=
};
static MY_UNICASE_INFO c8FF4[256]=
static MY_UNICASE_CHARACTER c8FF4[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1657,7 +1657,7 @@ static MY_UNICASE_INFO c8FF4[256]=
};
static MY_UNICASE_INFO *my_caseinfo_eucjpms[512]=
static MY_UNICASE_CHARACTER *my_caseinfo_pages_eucjpms[512]=
{
/* JIS-X-0208 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
@ -1729,7 +1729,14 @@ static MY_UNICASE_INFO *my_caseinfo_eucjpms[512]=
};
static const uint16 jisx0208_eucjpms_to_unicode[65536]=
static MY_UNICASE_INFO my_caseinfo_eucjpms=
{
0x0FFFF,
my_caseinfo_pages_eucjpms
};
static uint16 jisx0208_eucjpms_to_unicode[65536]=
{
0x0000, 0x0001, 0x0002, 0x0003, /* 0000 */
0x0004, 0x0005, 0x0006, 0x0007,
@ -67559,11 +67566,10 @@ struct charset_info_st my_charset_eucjpms_japanese_ci=
to_lower_eucjpms,
to_upper_eucjpms,
sort_order_eucjpms,
NULL, /* sort_order_big*/
NULL, /* contractions */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_eucjpms,/* caseinfo */
&my_caseinfo_eucjpms,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -67592,11 +67598,10 @@ struct charset_info_st my_charset_eucjpms_bin=
to_lower_eucjpms,
to_upper_eucjpms,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_eucjpms,/* caseinfo */
&my_caseinfo_eucjpms,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -6616,11 +6616,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_dec8_swedish_ci, /* lower */
to_upper_dec8_swedish_ci, /* upper */
sort_order_dec8_swedish_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_dec8_swedish_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6649,11 +6648,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp850_general_ci, /* lower */
to_upper_cp850_general_ci, /* upper */
sort_order_cp850_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp850_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6682,11 +6680,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_german1_ci, /* lower */
to_upper_latin1_german1_ci, /* upper */
sort_order_latin1_german1_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin1_german1_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6715,11 +6712,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_hp8_english_ci, /* lower */
to_upper_hp8_english_ci, /* upper */
sort_order_hp8_english_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_hp8_english_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6748,11 +6744,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_koi8r_general_ci, /* lower */
to_upper_koi8r_general_ci, /* upper */
sort_order_koi8r_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_koi8r_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6781,11 +6776,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin2_general_ci, /* lower */
to_upper_latin2_general_ci, /* upper */
sort_order_latin2_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin2_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6814,11 +6808,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_swe7_swedish_ci, /* lower */
to_upper_swe7_swedish_ci, /* upper */
sort_order_swe7_swedish_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_swe7_swedish_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6847,11 +6840,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_ascii_general_ci, /* lower */
to_upper_ascii_general_ci, /* upper */
sort_order_ascii_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_ascii_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6880,11 +6872,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_bulgarian_ci, /* lower */
to_upper_cp1251_bulgarian_ci, /* upper */
sort_order_cp1251_bulgarian_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1251_bulgarian_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6913,11 +6904,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_danish_ci, /* lower */
to_upper_latin1_danish_ci, /* upper */
sort_order_latin1_danish_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin1_danish_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6946,11 +6936,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_hebrew_general_ci, /* lower */
to_upper_hebrew_general_ci, /* upper */
sort_order_hebrew_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_hebrew_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -6979,11 +6968,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin7_estonian_cs, /* lower */
to_upper_latin7_estonian_cs, /* upper */
sort_order_latin7_estonian_cs, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin7_estonian_cs, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7012,11 +7000,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin2_hungarian_ci, /* lower */
to_upper_latin2_hungarian_ci, /* upper */
sort_order_latin2_hungarian_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin2_hungarian_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7045,11 +7032,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_koi8u_general_ci, /* lower */
to_upper_koi8u_general_ci, /* upper */
sort_order_koi8u_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_koi8u_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7078,11 +7064,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_ukrainian_ci, /* lower */
to_upper_cp1251_ukrainian_ci, /* upper */
sort_order_cp1251_ukrainian_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1251_ukrainian_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7111,11 +7096,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_greek_general_ci, /* lower */
to_upper_greek_general_ci, /* upper */
sort_order_greek_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_greek_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7144,11 +7128,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1250_general_ci, /* lower */
to_upper_cp1250_general_ci, /* upper */
sort_order_cp1250_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1250_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7177,11 +7160,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin2_croatian_ci, /* lower */
to_upper_latin2_croatian_ci, /* upper */
sort_order_latin2_croatian_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin2_croatian_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7210,11 +7192,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1257_lithuanian_ci, /* lower */
to_upper_cp1257_lithuanian_ci, /* upper */
sort_order_cp1257_lithuanian_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1257_lithuanian_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7243,11 +7224,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin5_turkish_ci, /* lower */
to_upper_latin5_turkish_ci, /* upper */
sort_order_latin5_turkish_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin5_turkish_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7276,11 +7256,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_armscii8_general_ci, /* lower */
to_upper_armscii8_general_ci, /* upper */
sort_order_armscii8_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_armscii8_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7309,11 +7288,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp866_general_ci, /* lower */
to_upper_cp866_general_ci, /* upper */
sort_order_cp866_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp866_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7342,11 +7320,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_keybcs2_general_ci, /* lower */
to_upper_keybcs2_general_ci, /* upper */
sort_order_keybcs2_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_keybcs2_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7375,11 +7352,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_macce_general_ci, /* lower */
to_upper_macce_general_ci, /* upper */
sort_order_macce_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_macce_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7408,11 +7384,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_macroman_general_ci, /* lower */
to_upper_macroman_general_ci, /* upper */
sort_order_macroman_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_macroman_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7441,11 +7416,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp852_general_ci, /* lower */
to_upper_cp852_general_ci, /* upper */
sort_order_cp852_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp852_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7474,11 +7448,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin7_general_ci, /* lower */
to_upper_latin7_general_ci, /* upper */
sort_order_latin7_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin7_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7507,11 +7480,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin7_general_cs, /* lower */
to_upper_latin7_general_cs, /* upper */
sort_order_latin7_general_cs, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin7_general_cs, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7540,11 +7512,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_macce_bin, /* lower */
to_upper_macce_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_macce_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7573,11 +7544,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1250_croatian_ci, /* lower */
to_upper_cp1250_croatian_ci, /* upper */
sort_order_cp1250_croatian_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1250_croatian_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7606,11 +7576,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_general_ci, /* lower */
to_upper_latin1_general_ci, /* upper */
sort_order_latin1_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin1_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7639,11 +7608,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_general_cs, /* lower */
to_upper_latin1_general_cs, /* upper */
sort_order_latin1_general_cs, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin1_general_cs, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7672,11 +7640,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_bin, /* lower */
to_upper_cp1251_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1251_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7705,11 +7672,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_general_ci, /* lower */
to_upper_cp1251_general_ci, /* upper */
sort_order_cp1251_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1251_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7738,11 +7704,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_general_cs, /* lower */
to_upper_cp1251_general_cs, /* upper */
sort_order_cp1251_general_cs, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1251_general_cs, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7771,11 +7736,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_macroman_bin, /* lower */
to_upper_macroman_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_macroman_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7804,11 +7768,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1256_general_ci, /* lower */
to_upper_cp1256_general_ci, /* upper */
sort_order_cp1256_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1256_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7837,11 +7800,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1257_bin, /* lower */
to_upper_cp1257_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1257_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7870,11 +7832,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1257_general_ci, /* lower */
to_upper_cp1257_general_ci, /* upper */
sort_order_cp1257_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1257_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7903,11 +7864,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_armscii8_bin, /* lower */
to_upper_armscii8_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_armscii8_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7936,11 +7896,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_ascii_bin, /* lower */
to_upper_ascii_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_ascii_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -7969,11 +7928,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1250_bin, /* lower */
to_upper_cp1250_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1250_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8002,11 +7960,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1256_bin, /* lower */
to_upper_cp1256_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1256_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8035,11 +7992,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp866_bin, /* lower */
to_upper_cp866_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp866_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8068,11 +8024,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_dec8_bin, /* lower */
to_upper_dec8_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_dec8_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8101,11 +8056,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_greek_bin, /* lower */
to_upper_greek_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_greek_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8134,11 +8088,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_hebrew_bin, /* lower */
to_upper_hebrew_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_hebrew_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8167,11 +8120,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_hp8_bin, /* lower */
to_upper_hp8_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_hp8_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8200,11 +8152,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_keybcs2_bin, /* lower */
to_upper_keybcs2_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_keybcs2_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8233,11 +8184,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_koi8r_bin, /* lower */
to_upper_koi8r_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_koi8r_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8266,11 +8216,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_koi8u_bin, /* lower */
to_upper_koi8u_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_koi8u_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8299,11 +8248,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin2_bin, /* lower */
to_upper_latin2_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin2_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8332,11 +8280,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin5_bin, /* lower */
to_upper_latin5_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin5_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8365,11 +8312,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin7_bin, /* lower */
to_upper_latin7_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin7_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8398,11 +8344,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp850_bin, /* lower */
to_upper_cp850_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp850_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8431,11 +8376,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp852_bin, /* lower */
to_upper_cp852_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp852_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8464,11 +8408,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_swe7_bin, /* lower */
to_upper_swe7_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_swe7_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8497,11 +8440,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_geostd8_general_ci, /* lower */
to_upper_geostd8_general_ci, /* upper */
sort_order_geostd8_general_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_geostd8_general_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8530,11 +8472,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_geostd8_bin, /* lower */
to_upper_geostd8_bin, /* upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_geostd8_bin, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8563,11 +8504,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_spanish_ci, /* lower */
to_upper_latin1_spanish_ci, /* upper */
sort_order_latin1_spanish_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_latin1_spanish_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8596,11 +8536,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1250_polish_ci, /* lower */
to_upper_cp1250_polish_ci, /* upper */
sort_order_cp1250_polish_ci, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
to_uni_cp1250_polish_ci, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@ -8628,11 +8567,10 @@ struct charset_info_st compiled_charsets[] = {
NULL, /* lower */
NULL, /* upper */
NULL, /* sort order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* to_uni */
NULL, /* from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/

View File

@ -177,7 +177,7 @@ static uint mbcharlen_gb2312(CHARSET_INFO *cs __attribute__((unused)),uint c)
}
static MY_UNICASE_INFO cA2[256]=
static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -298,7 +298,7 @@ static MY_UNICASE_INFO cA2[256]=
};
static MY_UNICASE_INFO cA3[256]=
static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -419,7 +419,7 @@ static MY_UNICASE_INFO cA3[256]=
};
static MY_UNICASE_INFO cA6[256]=
static MY_UNICASE_CHARACTER cA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -540,7 +540,7 @@ static MY_UNICASE_INFO cA6[256]=
};
static MY_UNICASE_INFO cA7[256]=
static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -661,7 +661,7 @@ static MY_UNICASE_INFO cA7[256]=
};
static MY_UNICASE_INFO cA8[256]=
static MY_UNICASE_CHARACTER cA8[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -782,7 +782,7 @@ static MY_UNICASE_INFO cA8[256]=
};
static MY_UNICASE_INFO *my_caseinfo_gb2312[256]=
static MY_UNICASE_CHARACTER *my_caseinfo_pages_gb2312[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -819,6 +819,13 @@ static MY_UNICASE_INFO *my_caseinfo_gb2312[256]=
};
static MY_UNICASE_INFO my_caseinfo_gb2312=
{
0xFFFF,
my_caseinfo_pages_gb2312
};
/* page 0 0x2121-0x2658 */
static const uint16 tab_gb2312_uni0[]={
0x3000,0x3001,0x3002,0x30FB,0x02C9,0x02C7,0x00A8,0x3003,
@ -6419,11 +6426,10 @@ struct charset_info_st my_charset_gb2312_chinese_ci=
to_lower_gb2312,
to_upper_gb2312,
sort_order_gb2312,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_gb2312, /* caseinfo */
&my_caseinfo_gb2312,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -6451,11 +6457,10 @@ struct charset_info_st my_charset_gb2312_bin=
to_lower_gb2312,
to_upper_gb2312,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_gb2312, /* caseinfo */
&my_caseinfo_gb2312,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -136,7 +136,8 @@ static const uchar to_upper_gbk[]=
(uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377',
};
static MY_UNICASE_INFO cA2[256]=
static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -340,7 +341,7 @@ static MY_UNICASE_INFO cA2[256]=
{0xA2FF,0xA2FF,0xA2FF}
};
static MY_UNICASE_INFO cA3[256]=
static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -545,7 +546,7 @@ static MY_UNICASE_INFO cA3[256]=
};
static MY_UNICASE_INFO cA6[256]=
static MY_UNICASE_CHARACTER cA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -750,7 +751,7 @@ static MY_UNICASE_INFO cA6[256]=
};
static MY_UNICASE_INFO cA7[256]=
static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -955,7 +956,7 @@ static MY_UNICASE_INFO cA7[256]=
};
static MY_UNICASE_INFO *my_caseinfo_gbk[256]=
static MY_UNICASE_CHARACTER *my_caseinfo_pages_gbk[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -991,7 +992,15 @@ static MY_UNICASE_INFO *my_caseinfo_gbk[256]=
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
};
static const uchar sort_order_gbk[]=
static MY_UNICASE_INFO my_caseinfo_gbk=
{
0xFFFF,
my_caseinfo_pages_gbk
};
static uchar sort_order_gbk[]=
{
'\000','\001','\002','\003','\004','\005','\006','\007',
'\010','\011','\012','\013','\014','\015','\016','\017',
@ -10809,11 +10818,10 @@ struct charset_info_st my_charset_gbk_chinese_ci=
to_lower_gbk,
to_upper_gbk,
sort_order_gbk,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_gbk, /* caseinfo */
&my_caseinfo_gbk, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -10841,11 +10849,10 @@ struct charset_info_st my_charset_gbk_bin=
to_lower_gbk,
to_upper_gbk,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_gbk, /* caseinfo */
&my_caseinfo_gbk, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -437,11 +437,10 @@ struct charset_info_st my_charset_latin1=
to_lower_latin1,
to_upper_latin1,
sort_order_latin1,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -736,11 +735,10 @@ struct charset_info_st my_charset_latin1_german2_ci=
to_lower_latin1,
to_upper_latin1,
sort_order_latin1_de,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
2, /* strxfrm_multiply */
@ -769,11 +767,10 @@ struct charset_info_st my_charset_latin1_bin=
to_lower_latin1,
to_upper_latin1,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -62,11 +62,11 @@ size_t my_casedn_str_mb(CHARSET_INFO * cs, char *str)
}
static inline MY_UNICASE_INFO*
get_case_info_for_ch(CHARSET_INFO *cs, uint page, uint offs)
static inline MY_UNICASE_CHARACTER*
get_case_info_for_ch(const CHARSET_INFO *cs, uint page, uint offs)
{
MY_UNICASE_INFO *p;
return cs->caseinfo ? ((p= cs->caseinfo[page]) ? &p[offs] : NULL) : NULL;
MY_UNICASE_CHARACTER *p;
return cs->caseinfo && (p= cs->caseinfo->page[page]) ? &p[offs] : NULL;
}
@ -89,7 +89,7 @@ size_t my_caseup_mb(CHARSET_INFO * cs, char *src, size_t srclen,
{
if ((l=my_ismbchar(cs, src, srcend)))
{
MY_UNICASE_INFO *ch;
MY_UNICASE_CHARACTER *ch;
if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
{
*src++= ch->toupper >> 8;
@ -124,7 +124,7 @@ size_t my_casedn_mb(CHARSET_INFO * cs, char *src, size_t srclen,
{
if ((l= my_ismbchar(cs, src, srcend)))
{
MY_UNICASE_INFO *ch;
MY_UNICASE_CHARACTER *ch;
if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
{
*src++= ch->tolower >> 8;
@ -168,7 +168,7 @@ my_casefold_mb_varlen(CHARSET_INFO *cs,
size_t mblen= my_ismbchar(cs, src, srcend);
if (mblen)
{
MY_UNICASE_INFO *ch;
MY_UNICASE_CHARACTER *ch;
if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
{
int code= is_upper ? ch->toupper : ch->tolower;
@ -696,7 +696,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
char *min_end= min_str + res_length;
char *max_end= max_str + res_length;
size_t maxcharlen= res_length / cs->mbmaxlen;
my_bool have_contractions= my_cs_have_contractions(cs);
const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
{
@ -764,8 +764,8 @@ fill_max_and_min:
'ab\min\min\min\min' and 'ab\max\max\max\max'.
*/
if (have_contractions && ptr + 1 < end &&
my_cs_can_be_contraction_head(cs, (uchar) *ptr))
if (contractions && ptr + 1 < end &&
my_uca_can_be_contraction_head(contractions, (uchar) *ptr))
{
/* Ptr[0] is a contraction head. */
@ -787,8 +787,8 @@ fill_max_and_min:
is not a contraction, then we put only ptr[0],
and continue with ptr[1] on the next loop.
*/
if (my_cs_can_be_contraction_tail(cs, (uchar) ptr[1]) &&
my_cs_contraction2_weight(cs, (uchar) ptr[0], (uchar) ptr[1]))
if (my_uca_can_be_contraction_tail(contractions, (uchar) ptr[1]) &&
my_uca_contraction2_weight(contractions, (uchar) ptr[0], ptr[1]))
{
/* Contraction found */
if (maxcharlen == 1 || min_str + 1 >= min_end)
@ -853,7 +853,7 @@ my_like_range_generic(CHARSET_INFO *cs,
char *max_end= max_str + res_length;
size_t charlen= res_length / cs->mbmaxlen;
size_t res_length_diff;
my_bool have_contractions= my_cs_have_contractions(cs);
const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
for ( ; charlen > 0; charlen--)
{
@ -921,8 +921,8 @@ my_like_range_generic(CHARSET_INFO *cs,
goto pad_min_max;
}
if (have_contractions &&
my_cs_can_be_contraction_head(cs, wc) &&
if (contractions &&
my_uca_can_be_contraction_head(contractions, wc) &&
(res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
{
const uint16 *weight;
@ -933,8 +933,8 @@ my_like_range_generic(CHARSET_INFO *cs,
goto pad_min_max;
}
if (my_cs_can_be_contraction_tail(cs, wc2) &&
(weight= my_cs_contraction2_weight(cs, wc, wc2)) && weight[0])
if (my_uca_can_be_contraction_tail(contractions, wc2) &&
(weight= my_uca_contraction2_weight(contractions, wc, wc2)) && weight[0])
{
/* Contraction found */
if (charlen == 1)

View File

@ -1163,12 +1163,12 @@ static int pcmp(const void * f, const void * s)
return res;
}
static my_bool create_fromuni(struct charset_info_st *cs,
void *(*alloc)(size_t))
static my_bool
create_fromuni(struct charset_info_st *cs,
MY_CHARSET_LOADER *loader)
{
uni_idx idx[PLANE_NUM];
int i,n;
struct my_uni_idx_st *tab_from_uni;
/*
Check that Unicode map is loaded.
@ -1217,7 +1217,8 @@ static my_bool create_fromuni(struct charset_info_st *cs,
numchars=idx[i].uidx.to-idx[i].uidx.from+1;
if (!(idx[i].uidx.tab= tab= (uchar*)
alloc(numchars * sizeof(*idx[i].uidx.tab))))
(loader->once_alloc) (numchars *
sizeof(*idx[i].uidx.tab))))
return TRUE;
bzero(tab,numchars*sizeof(*tab));
@ -1235,25 +1236,25 @@ static my_bool create_fromuni(struct charset_info_st *cs,
/* Allocate and fill reverse table for each plane */
n=i;
if (!(cs->tab_from_uni= tab_from_uni= (struct my_uni_idx_st*)
alloc(sizeof(MY_UNI_IDX)*(n+1))))
if (!(cs->tab_from_uni= (MY_UNI_IDX *)
(loader->once_alloc)(sizeof(MY_UNI_IDX) * (n + 1))))
return TRUE;
for (i=0; i< n; i++)
tab_from_uni[i]= idx[i].uidx;
((struct my_uni_idx_st*)cs->tab_from_uni)[i]= idx[i].uidx;
/* Set end-of-list marker */
bzero(&tab_from_uni[i],sizeof(MY_UNI_IDX));
bzero((char*) &cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
return FALSE;
}
static my_bool my_cset_init_8bit(struct charset_info_st *cs,
void *(*alloc)(size_t))
static my_bool
my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
{
cs->caseup_multiply= 1;
cs->casedn_multiply= 1;
cs->pad_char= ' ';
return create_fromuni(cs, alloc);
return create_fromuni(cs, loader);
}
static void set_max_sort_char(struct charset_info_st *cs)
@ -1276,7 +1277,7 @@ static void set_max_sort_char(struct charset_info_st *cs)
}
static my_bool my_coll_init_simple(struct charset_info_st *cs,
void *(*alloc)(size_t) __attribute__((unused)))
MY_CHARSET_LOADER *loader __attribute__((unused)))
{
set_max_sort_char(cs);
return FALSE;

View File

@ -197,7 +197,7 @@ static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
#define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
static MY_UNICASE_INFO c81[256]=
static MY_UNICASE_CHARACTER c81[256]=
{
/* 8100-810F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -407,7 +407,7 @@ static MY_UNICASE_INFO c81[256]=
};
static MY_UNICASE_INFO c82[256]=
static MY_UNICASE_CHARACTER c82[256]=
{
/* 8200-820F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -615,7 +615,7 @@ static MY_UNICASE_INFO c82[256]=
};
static MY_UNICASE_INFO c83[256]=
static MY_UNICASE_CHARACTER c83[256]=
{
/* 8300-830F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -825,7 +825,7 @@ static MY_UNICASE_INFO c83[256]=
};
static MY_UNICASE_INFO c84[256]=
static MY_UNICASE_CHARACTER c84[256]=
{
/* 8400-840F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -1035,7 +1035,7 @@ static MY_UNICASE_INFO c84[256]=
};
static MY_UNICASE_INFO *my_caseinfo_sjis[256]=
static MY_UNICASE_CHARACTER *my_caseinfo_pages_sjis[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -1072,7 +1072,14 @@ static MY_UNICASE_INFO *my_caseinfo_sjis[256]=
};
static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
static MY_UNICASE_INFO my_caseinfo_sjis=
{
0xFFFF,
my_caseinfo_pages_sjis
};
static int my_strnncoll_sjis_internal(const CHARSET_INFO *cs,
const uchar **a_res, size_t a_length,
const uchar **b_res, size_t b_length)
{
@ -34204,11 +34211,10 @@ struct charset_info_st my_charset_sjis_japanese_ci=
to_lower_sjis,
to_upper_sjis,
sort_order_sjis,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_sjis, /* caseinfo */
&my_caseinfo_sjis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -34236,11 +34242,10 @@ struct charset_info_st my_charset_sjis_bin=
to_lower_sjis,
to_upper_sjis,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_sjis, /* caseinfo */
&my_caseinfo_sjis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -894,11 +894,10 @@ struct charset_info_st my_charset_tis620_thai_ci=
to_lower_tis620,
to_upper_tis620,
sort_order_tis620,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
4, /* strxfrm_multiply */
@ -926,11 +925,10 @@ struct charset_info_st my_charset_tis620_bin=
to_lower_tis620,
to_upper_tis620,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

File diff suppressed because it is too large Load Diff

View File

@ -1161,31 +1161,31 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
static inline void
my_tolower_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].tolower;
MY_UNICASE_CHARACTER *page;
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].tolower;
}
static inline void
my_toupper_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].toupper;
MY_UNICASE_CHARACTER *page;
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].toupper;
}
static inline void
my_tosort_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
if (page < 256)
if (*wc <= uni_plane->maxchar)
{
if (uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].sort;
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].sort;
}
else
{
@ -1194,6 +1194,7 @@ my_tosort_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
}
static size_t
my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst __attribute__((unused)),
@ -1204,7 +1205,7 @@ my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
char *srcend= src + srclen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@ -1227,7 +1228,7 @@ my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int res;
const uchar *e= s + cs->cset->lengthsp(cs, (const char *) s, slen);
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
{
@ -1251,7 +1252,7 @@ my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
char *srcend= src + srclen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@ -1277,7 +1278,7 @@ my_strnncoll_utf16(CHARSET_INFO *cs,
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
const uchar *se= s + slen;
const uchar *te= t + tlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (s < se && t < te)
{
@ -1341,7 +1342,7 @@ my_strnncollsp_utf16(CHARSET_INFO *cs,
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
const uchar *se= s + slen, *te= t + tlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT((slen % 2) == 0);
DBUG_ASSERT((tlen % 2) == 0);
@ -1483,7 +1484,7 @@ my_wildcmp_utf16_ci(CHARSET_INFO *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, uni_plane);
}
@ -1695,11 +1696,10 @@ struct charset_info_st my_charset_utf16_general_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -1728,11 +1728,10 @@ struct charset_info_st my_charset_utf16_bin=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -1864,11 +1863,10 @@ struct charset_info_st my_charset_utf16le_general_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -1897,11 +1895,10 @@ struct charset_info_st my_charset_utf16le_bin=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -1950,31 +1947,31 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
static inline void
my_tolower_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].tolower;
MY_UNICASE_CHARACTER *page;
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].tolower;
}
static inline void
my_toupper_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].toupper;
MY_UNICASE_CHARACTER *page;
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].toupper;
}
static inline void
my_tosort_utf32(MY_UNICASE_INFO *const* uni_plane, my_wc_t *wc)
my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
if (page < 256)
if (*wc <= uni_plane->maxchar)
{
if (uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].sort;
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].sort;
}
else
{
@ -1991,7 +1988,7 @@ my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int res;
char *srcend= src + srclen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@ -2021,7 +2018,7 @@ my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_wc_t wc;
int res;
const uchar *e= s + slen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
/* Skip trailing spaces */
while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4])
@ -2047,7 +2044,7 @@ my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int res;
char *srcend= src + srclen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
@ -2070,7 +2067,7 @@ my_strnncoll_utf32(CHARSET_INFO *cs,
my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
const uchar *se= s + slen;
const uchar *te= t + tlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (s < se && t < te)
{
@ -2134,7 +2131,7 @@ my_strnncollsp_utf32(CHARSET_INFO *cs,
int res;
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
const uchar *se= s + slen, *te= t + tlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT((slen % 4) == 0);
DBUG_ASSERT((tlen % 4) == 0);
@ -2582,7 +2579,7 @@ my_wildcmp_utf32_ci(CHARSET_INFO *cs,
const char *wildstr, const char *wildend,
int escape, int w_one, int w_many)
{
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, uni_plane);
}
@ -2790,11 +2787,10 @@ struct charset_info_st my_charset_utf32_general_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -2823,11 +2819,10 @@ struct charset_info_st my_charset_utf32_bin=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -2934,32 +2929,29 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
static inline void
my_tolower_ucs2(MY_UNICASE_INFO *const *uni_plane, my_wc_t *wc)
my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
DBUG_ASSERT(page < 256);
if (uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].tolower;
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
*wc= page[*wc & 0xFF].tolower;
}
static inline void
my_toupper_ucs2(MY_UNICASE_INFO *const *uni_plane, my_wc_t *wc)
my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
DBUG_ASSERT(page < 256);
if (uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].toupper;
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
*wc= page[*wc & 0xFF].toupper;
}
static inline void
my_tosort_ucs2(MY_UNICASE_INFO *const *uni_plane, my_wc_t *wc)
my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
DBUG_ASSERT(page < 256);
if (uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].sort;
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
*wc= page[*wc & 0xFF].sort;
}
static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
@ -2969,7 +2961,7 @@ static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int res;
char *srcend= src + srclen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@ -2990,7 +2982,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_wc_t wc;
int res;
const uchar *e=s+slen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
e-= 2;
@ -3014,7 +3006,7 @@ static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int res;
char *srcend= src + srclen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@ -3062,7 +3054,7 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs,
my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
const uchar *se=s+slen;
const uchar *te=t+tlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while ( s < se && t < te )
{
@ -3124,7 +3116,7 @@ static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
{
const uchar *se, *te;
size_t minlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
/* extra safety to make sure the lengths are even numbers */
slen&= ~1;
@ -3135,11 +3127,11 @@ static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
{
int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort :
(((int) s[0]) << 8) + (int) s[1];
int s_wc = uni_plane->page[s[0]] ? (int) uni_plane->page[s[0]][s[1]].sort :
(((int) s[0]) << 8) + (int) s[1];
int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort :
(((int) t[0]) << 8) + (int) t[1];
int t_wc = uni_plane->page[t[0]] ? (int) uni_plane->page[t[0]][t[1]].sort :
(((int) t[0]) << 8) + (int) t[1];
if ( s_wc != t_wc )
return s_wc > t_wc ? 1 : -1;
@ -3220,7 +3212,7 @@ int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane);
}
@ -3412,11 +3404,10 @@ struct charset_info_st my_charset_ucs2_general_ci=
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -3445,11 +3436,10 @@ struct charset_info_st my_charset_ucs2_general_mysql500_ci=
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_mysql500, /* caseinfo */
&my_unicase_mysql500, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -3478,11 +3468,10 @@ struct charset_info_st my_charset_ucs2_bin=
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -65988,7 +65988,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *cs __attribute__((unused)),
/* Case info pages for JIS-X-0208 range */
static MY_UNICASE_INFO cA2[256]=
static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -66109,7 +66109,7 @@ static MY_UNICASE_INFO cA2[256]=
};
static MY_UNICASE_INFO cA3[256]=
static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -66230,7 +66230,7 @@ static MY_UNICASE_INFO cA3[256]=
};
static MY_UNICASE_INFO cA6[256]=
static MY_UNICASE_CHARACTER cA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -66351,7 +66351,7 @@ static MY_UNICASE_INFO cA6[256]=
};
static MY_UNICASE_INFO cA7[256]=
static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -66473,7 +66473,7 @@ static MY_UNICASE_INFO cA7[256]=
/* Case info pages for JIS-X-0212 range */
static MY_UNICASE_INFO c8FA6[]=
static MY_UNICASE_CHARACTER c8FA6[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -66594,7 +66594,7 @@ static MY_UNICASE_INFO c8FA6[]=
};
static MY_UNICASE_INFO c8FA7[]=
static MY_UNICASE_CHARACTER c8FA7[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -66715,7 +66715,7 @@ static MY_UNICASE_INFO c8FA7[]=
};
static MY_UNICASE_INFO c8FA9[]=
static MY_UNICASE_CHARACTER c8FA9[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -66836,7 +66836,7 @@ static MY_UNICASE_INFO c8FA9[]=
};
static MY_UNICASE_INFO c8FAA[]=
static MY_UNICASE_CHARACTER c8FAA[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -66957,7 +66957,7 @@ static MY_UNICASE_INFO c8FAA[]=
};
static MY_UNICASE_INFO c8FAB[]=
static MY_UNICASE_CHARACTER c8FAB[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@ -67078,7 +67078,7 @@ static MY_UNICASE_INFO c8FAB[]=
};
static MY_UNICASE_INFO *my_caseinfo_ujis[512]=
static MY_UNICASE_CHARACTER *my_caseinfo_pages_ujis[512]=
{
/* JIS-X-0208 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
@ -67148,6 +67148,15 @@ static MY_UNICASE_INFO *my_caseinfo_ujis[512]=
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* F */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
};
static MY_UNICASE_INFO my_caseinfo_ujis=
{
0x0FFFF,
my_caseinfo_pages_ujis
};
#endif /* HAVE_CHARSET_ujis */
@ -67158,11 +67167,11 @@ static MY_UNICASE_INFO *my_caseinfo_ujis[512]=
UJIS and EUCJPMS share the same UPPER/LOWER functions.
*/
static MY_UNICASE_INFO*
static MY_UNICASE_CHARACTER*
get_case_info_for_ch(CHARSET_INFO *cs, uint plane, uint page, uint offs)
{
MY_UNICASE_INFO *p;
return (p= cs->caseinfo[page + plane * 256]) ? &p[offs & 0xFF] : NULL;
MY_UNICASE_CHARACTER *p;
return (p= cs->caseinfo->page[page + plane * 256]) ? &p[offs & 0xFF] : NULL;
}
@ -67183,7 +67192,7 @@ my_casefold_ujis(CHARSET_INFO *cs,
size_t mblen= my_ismbchar(cs, src, srcend);
if (mblen)
{
MY_UNICASE_INFO *ch;
MY_UNICASE_CHARACTER *ch;
ch= (mblen == 2) ?
get_case_info_for_ch(cs, 0, (uchar) src[0], (uchar) src[1]) :
get_case_info_for_ch(cs, 1, (uchar) src[1], (uchar) src[2]);
@ -67304,11 +67313,10 @@ struct charset_info_st my_charset_ujis_japanese_ci=
to_lower_ujis,
to_upper_ujis,
sort_order_ujis,
NULL, /* sort_order_big*/
NULL, /* contractions */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_ujis, /* caseinfo */
&my_caseinfo_ujis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -67337,11 +67345,10 @@ struct charset_info_st my_charset_ujis_bin=
to_lower_ujis,
to_upper_ujis,
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_caseinfo_ujis, /* caseinfo */
&my_caseinfo_ujis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -60,7 +60,7 @@
#include "my_uctype.h"
static MY_UNICASE_INFO plane00[]={
static MY_UNICASE_CHARACTER plane00[]={
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
@ -196,7 +196,7 @@ static MY_UNICASE_INFO plane00[]={
Almost similar to plane00, but maps sorting order
for U+00DF to 0x00DF instead of 0x0053.
*/
static MY_UNICASE_INFO plane00_mysql500[]={
static MY_UNICASE_CHARACTER plane00_mysql500[]={
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
@ -328,7 +328,7 @@ static MY_UNICASE_INFO plane00_mysql500[]={
};
static MY_UNICASE_INFO plane01[]={
static MY_UNICASE_CHARACTER plane01[]={
{0x0100,0x0101,0x0041}, {0x0100,0x0101,0x0041},
{0x0102,0x0103,0x0041}, {0x0102,0x0103,0x0041},
{0x0104,0x0105,0x0041}, {0x0104,0x0105,0x0041},
@ -459,7 +459,7 @@ static MY_UNICASE_INFO plane01[]={
{0x01FE,0x01FF,0x00D8}, {0x01FE,0x01FF,0x00D8}
};
static MY_UNICASE_INFO plane02[]={
static MY_UNICASE_CHARACTER plane02[]={
{0x0200,0x0201,0x0041}, {0x0200,0x0201,0x0041},
{0x0202,0x0203,0x0041}, {0x0202,0x0203,0x0041},
{0x0204,0x0205,0x0045}, {0x0204,0x0205,0x0045},
@ -590,7 +590,7 @@ static MY_UNICASE_INFO plane02[]={
{0x02FE,0x02FE,0x02FE}, {0x02FF,0x02FF,0x02FF}
};
static MY_UNICASE_INFO plane03[]={
static MY_UNICASE_CHARACTER plane03[]={
{0x0300,0x0300,0x0300}, {0x0301,0x0301,0x0301},
{0x0302,0x0302,0x0302}, {0x0303,0x0303,0x0303},
{0x0304,0x0304,0x0304}, {0x0305,0x0305,0x0305},
@ -721,7 +721,7 @@ static MY_UNICASE_INFO plane03[]={
{0x03FE,0x03FE,0x03FE}, {0x03FF,0x03FF,0x03FF}
};
static MY_UNICASE_INFO plane04[]={
static MY_UNICASE_CHARACTER plane04[]={
{0x0400,0x0450,0x0415}, {0x0401,0x0451,0x0415},
{0x0402,0x0452,0x0402}, {0x0403,0x0453,0x0413},
{0x0404,0x0454,0x0404}, {0x0405,0x0455,0x0405},
@ -852,7 +852,7 @@ static MY_UNICASE_INFO plane04[]={
{0x04FE,0x04FE,0x04FE}, {0x04FF,0x04FF,0x04FF}
};
static MY_UNICASE_INFO plane05[]={
static MY_UNICASE_CHARACTER plane05[]={
{0x0500,0x0500,0x0500}, {0x0501,0x0501,0x0501},
{0x0502,0x0502,0x0502}, {0x0503,0x0503,0x0503},
{0x0504,0x0504,0x0504}, {0x0505,0x0505,0x0505},
@ -983,7 +983,7 @@ static MY_UNICASE_INFO plane05[]={
{0x05FE,0x05FE,0x05FE}, {0x05FF,0x05FF,0x05FF}
};
static MY_UNICASE_INFO plane1E[]={
static MY_UNICASE_CHARACTER plane1E[]={
{0x1E00,0x1E01,0x0041}, {0x1E00,0x1E01,0x0041},
{0x1E02,0x1E03,0x0042}, {0x1E02,0x1E03,0x0042},
{0x1E04,0x1E05,0x0042}, {0x1E04,0x1E05,0x0042},
@ -1114,7 +1114,7 @@ static MY_UNICASE_INFO plane1E[]={
{0x1EFE,0x1EFE,0x1EFE}, {0x1EFF,0x1EFF,0x1EFF}
};
static MY_UNICASE_INFO plane1F[]={
static MY_UNICASE_CHARACTER plane1F[]={
{0x1F08,0x1F00,0x0391}, {0x1F09,0x1F01,0x0391},
{0x1F0A,0x1F02,0x0391}, {0x1F0B,0x1F03,0x0391},
{0x1F0C,0x1F04,0x0391}, {0x1F0D,0x1F05,0x0391},
@ -1245,7 +1245,7 @@ static MY_UNICASE_INFO plane1F[]={
{0x1FFE,0x1FFE,0x1FFE}, {0x1FFF,0x1FFF,0x1FFF}
};
static MY_UNICASE_INFO plane21[]={
static MY_UNICASE_CHARACTER plane21[]={
{0x2100,0x2100,0x2100}, {0x2101,0x2101,0x2101},
{0x2102,0x2102,0x2102}, {0x2103,0x2103,0x2103},
{0x2104,0x2104,0x2104}, {0x2105,0x2105,0x2105},
@ -1376,7 +1376,7 @@ static MY_UNICASE_INFO plane21[]={
{0x21FE,0x21FE,0x21FE}, {0x21FF,0x21FF,0x21FF}
};
static MY_UNICASE_INFO plane24[]={
static MY_UNICASE_CHARACTER plane24[]={
{0x2400,0x2400,0x2400}, {0x2401,0x2401,0x2401},
{0x2402,0x2402,0x2402}, {0x2403,0x2403,0x2403},
{0x2404,0x2404,0x2404}, {0x2405,0x2405,0x2405},
@ -1507,7 +1507,7 @@ static MY_UNICASE_INFO plane24[]={
{0x24FE,0x24FE,0x24FE}, {0x24FF,0x24FF,0x24FF}
};
static MY_UNICASE_INFO planeFF[]={
static MY_UNICASE_CHARACTER planeFF[]={
{0xFF00,0xFF00,0xFF00}, {0xFF01,0xFF01,0xFF01},
{0xFF02,0xFF02,0xFF02}, {0xFF03,0xFF03,0xFF03},
{0xFF04,0xFF04,0xFF04}, {0xFF05,0xFF05,0xFF05},
@ -1638,7 +1638,9 @@ static MY_UNICASE_INFO planeFF[]={
{0xFFFE,0xFFFE,0xFFFE}, {0xFFFF,0xFFFF,0xFFFF}
};
MY_UNICASE_INFO *const my_unicase_default[256]={
static MY_UNICASE_CHARACTER *my_unicase_pages_default[256]=
{
plane00, plane01, plane02, plane03, plane04, plane05, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -1671,14 +1673,20 @@ MY_UNICASE_INFO *const my_unicase_default[256]={
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, planeFF
};
MY_UNICASE_INFO my_unicase_default=
{
0xFFFF,
my_unicase_pages_default
};
/*
Reproduce old utf8_general_ci behaviour before we fixed Bug#27877.
*/
MY_UNICASE_INFO *const my_unicase_mysql500[256]={
MY_UNICASE_CHARACTER *my_unicase_pages_mysql500[256]={
plane00_mysql500,
plane01, plane02, plane03, plane04, plane05, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -1716,6 +1724,13 @@ MY_UNICASE_INFO *const my_unicase_mysql500[256]={
};
MY_UNICASE_INFO my_unicase_mysql500=
{
0xFFFF,
my_unicase_pages_mysql500
};
/*
Turkish lower/upper mapping:
1. LOWER(0x0049 LATIN CAPITAL LETTER I) ->
@ -1724,7 +1739,7 @@ MY_UNICASE_INFO *const my_unicase_mysql500[256]={
0x0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
*/
static MY_UNICASE_INFO turk00[]=
static MY_UNICASE_CHARACTER turk00[]=
{
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
@ -1858,7 +1873,7 @@ static MY_UNICASE_INFO turk00[]=
MY_UNICASE_INFO *const my_unicase_turkish[256]=
static MY_UNICASE_CHARACTER *my_unicase_pages_turkish[256]=
{
turk00, plane01, plane02, plane03, plane04, plane05, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -1895,14 +1910,23 @@ MY_UNICASE_INFO *const my_unicase_turkish[256]=
};
static inline void
my_tosort_unicode(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
MY_UNICASE_INFO my_unicase_turkish=
{
int page= *wc >> 8;
if (page < 256)
0xFFFF,
my_unicase_pages_turkish
};
static inline void
my_tosort_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc, uint flags)
{
if (*wc <= uni_plane->maxchar)
{
if (uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].sort;
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[*wc >> 8]))
*wc= (flags & MY_CS_LOWER_SORT) ?
page[*wc & 0xFF].tolower :
page[*wc & 0xFF].sort;
}
else
{
@ -1925,7 +1949,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
MY_UNICASE_INFO *const *weights, int recurse_level)
MY_UNICASE_INFO *weights, int recurse_level)
{
int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
@ -1974,8 +1998,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
{
if (weights)
{
my_tosort_unicode(weights, &s_wc);
my_tosort_unicode(weights, &w_wc);
my_tosort_unicode(weights, &s_wc, cs->state);
my_tosort_unicode(weights, &w_wc, cs->state);
}
if (s_wc != w_wc)
return 1; /* No match */
@ -2045,8 +2069,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
return 1;
if (weights)
{
my_tosort_unicode(weights, &s_wc);
my_tosort_unicode(weights, &w_wc);
my_tosort_unicode(weights, &s_wc, cs->state);
my_tosort_unicode(weights, &w_wc, cs->state);
}
if (s_wc == w_wc)
@ -2074,7 +2098,7 @@ my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
MY_UNICASE_INFO *const *weights)
MY_UNICASE_INFO *weights)
{
return my_wildcmp_unicode_impl(cs, str, str_end,
wildstr, wildend,
@ -2099,7 +2123,7 @@ my_strnxfrm_unicode(CHARSET_INFO *cs,
uchar *de= dst + dstlen;
uchar *de_beg= de - 1;
const uchar *se = src + srclen;
MY_UNICASE_INFO * const*uni_plane= (cs->state & MY_CS_BINSORT) ?
MY_UNICASE_INFO *uni_plane= (cs->state & MY_CS_BINSORT) ?
NULL : cs->caseinfo;
DBUG_ASSERT(src);
@ -2110,7 +2134,7 @@ my_strnxfrm_unicode(CHARSET_INFO *cs,
src+=res;
if (uni_plane)
my_tosort_unicode(uni_plane, &wc);
my_tosort_unicode(uni_plane, &wc, cs->state);
*dst++= (uchar) (wc >> 8);
if (dst < de)
@ -2476,20 +2500,45 @@ static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)),
}
static inline void
my_tolower_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
*wc= page[*wc & 0xFF].tolower;
}
static inline void
my_toupper_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
*wc= page[*wc & 0xFF].toupper;
}
static inline void
my_tosort_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
*wc= page[*wc & 0xFF].sort;
}
static size_t my_caseup_utf8(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int srcres, dstres;
char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src != dst || cs->caseup_multiply == 1);
while ((src < srcend) &&
(srcres= my_utf8_uni(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
my_toupper_utf8mb3(uni_plane, &wc);
if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
break;
src+= srcres;
@ -2505,7 +2554,7 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_wc_t wc;
int res;
const uchar *e=s+slen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
/*
Remove end space. We have to do this to be able to compare
@ -2516,8 +2565,7 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
while ((s < e) && (res=my_utf8_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
{
int plane = (wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
my_tosort_unicode(uni_plane, &wc, cs->state);
n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
n2[0]+=3;
n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
@ -2532,14 +2580,13 @@ static size_t my_caseup_str_utf8(CHARSET_INFO *cs, char *src)
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->caseup_multiply == 1);
while (*src &&
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
my_toupper_utf8mb3(uni_plane, &wc);
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
break;
src+= srcres;
@ -2556,14 +2603,13 @@ static size_t my_casedn_utf8(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int srcres, dstres;
char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src != dst || cs->casedn_multiply == 1);
while ((src < srcend) &&
(srcres= my_utf8_uni(cs, &wc, (uchar*) src, (uchar*)srcend)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
my_tolower_utf8mb3(uni_plane, &wc);
if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
break;
src+= srcres;
@ -2578,14 +2624,13 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src)
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->casedn_multiply == 1);
while (*src &&
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
my_tolower_utf8mb3(uni_plane, &wc);
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
break;
src+= srcres;
@ -2621,11 +2666,10 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
const uchar *se=s+slen;
const uchar *te=t+tlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while ( s < se && t < te )
{
int plane;
s_res=my_utf8_uni(cs,&s_wc, s, se);
t_res=my_utf8_uni(cs,&t_wc, t, te);
@ -2635,10 +2679,9 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
return bincmp(s, se, t, te);
}
plane=(s_wc>>8) & 0xFF;
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
plane=(t_wc>>8) & 0xFF;
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
my_tosort_unicode(uni_plane, &s_wc, cs->state);
my_tosort_unicode(uni_plane, &t_wc, cs->state);
if ( s_wc != t_wc )
{
return s_wc > t_wc ? 1 : -1;
@ -2690,7 +2733,7 @@ static int my_strnncollsp_utf8(CHARSET_INFO *cs,
int s_res, t_res, res;
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
const uchar *se= s+slen, *te= t+tlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff_if_only_endspace_difference= 0;
@ -2698,7 +2741,6 @@ static int my_strnncollsp_utf8(CHARSET_INFO *cs,
while ( s < se && t < te )
{
int plane;
s_res=my_utf8_uni(cs,&s_wc, s, se);
t_res=my_utf8_uni(cs,&t_wc, t, te);
@ -2708,10 +2750,9 @@ static int my_strnncollsp_utf8(CHARSET_INFO *cs,
return bincmp(s, se, t, te);
}
plane=(s_wc>>8) & 0xFF;
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
plane=(t_wc>>8) & 0xFF;
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
my_tosort_unicode(uni_plane, &s_wc, cs->state);
my_tosort_unicode(uni_plane, &t_wc, cs->state);
if ( s_wc != t_wc )
{
return s_wc > t_wc ? 1 : -1;
@ -2778,7 +2819,7 @@ static int my_strnncollsp_utf8(CHARSET_INFO *cs,
static
int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
{
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (s[0] && t[0])
{
my_wc_t s_wc,t_wc;
@ -2795,7 +2836,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
}
else
{
int plane, res;
int res;
/*
Scan a multibyte character.
@ -2823,8 +2864,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
s+= res;
/* Convert Unicode code into weight according to collation */
plane=(s_wc>>8) & 0xFF;
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc;
my_tolower_utf8mb3(uni_plane, &s_wc);
}
@ -2838,15 +2878,13 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
}
else
{
int plane;
int res=my_utf8_uni(cs,&t_wc, (const uchar*)t, (const uchar*) t + 3);
if (res <= 0)
return strcmp(s, t);
t+= res;
/* Convert code into weight */
plane=(t_wc>>8) & 0xFF;
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc;
my_tolower_utf8mb3(uni_plane, &t_wc);
}
/* Now we have two weights, let's compare them */
@ -2863,7 +2901,7 @@ int my_wildcmp_utf8(CHARSET_INFO *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane);
}
@ -2966,11 +3004,10 @@ struct charset_info_st my_charset_utf8_general_ci=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
to_upper_utf8, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -2999,11 +3036,10 @@ struct charset_info_st my_charset_utf8_general_mysql500_ci=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
to_upper_utf8, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_mysql500, /* caseinfo */
&my_unicase_mysql500, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -3032,11 +3068,10 @@ struct charset_info_st my_charset_utf8_bin=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -3117,7 +3152,7 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
const uchar *se= s + slen;
const uchar *te= t + tlen;
int save_diff= 0;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff_if_only_endspace_difference= 0;
@ -3125,7 +3160,6 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
while ( s < se && t < te )
{
int plane;
s_res=my_utf8_uni(cs,&s_wc, s, se);
t_res=my_utf8_uni(cs,&t_wc, t, te);
@ -3139,10 +3173,10 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
{
save_diff = ((int)s_wc) - ((int)t_wc);
}
plane=(s_wc>>8) & 0xFF;
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
plane=(t_wc>>8) & 0xFF;
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
my_tosort_unicode(uni_plane, &s_wc, cs->state);
my_tosort_unicode(uni_plane, &t_wc, cs->state);
if ( s_wc != t_wc )
{
return ((int) s_wc) - ((int) t_wc);
@ -4521,11 +4555,10 @@ struct charset_info_st my_charset_filename=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
to_upper_utf8, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -4887,20 +4920,26 @@ my_wc_mb_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)),
static inline void
my_tolower_utf8mb4(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
my_tolower_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
int page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].tolower;
if (*wc <= uni_plane->maxchar)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8)]))
*wc= page[*wc & 0xFF].tolower;
}
}
static inline void
my_toupper_utf8mb4(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
my_toupper_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
int page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].toupper;
if (*wc <= uni_plane->maxchar)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8)]))
*wc= page[*wc & 0xFF].toupper;
}
}
@ -4911,7 +4950,7 @@ my_caseup_utf8mb4(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int srcres, dstres;
char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src != dst || cs->caseup_multiply == 1);
while ((src < srcend) &&
@ -4943,7 +4982,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_wc_t wc;
int res;
const uchar *e= s + slen;
MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
/*
Remove end space. We do this to be able to compare
@ -4954,7 +4993,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
{
my_tosort_unicode(uni_plane, &wc);
my_tosort_unicode(uni_plane, &wc, cs->state);
my_hash_add(n1, n2, (uint) (wc & 0xFF));
my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF);
if (wc > 0xFFFF)
@ -4979,7 +5018,7 @@ my_caseup_str_utf8mb4(CHARSET_INFO *cs, char *src)
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->caseup_multiply == 1);
while (*src &&
@ -5004,7 +5043,7 @@ my_casedn_utf8mb4(CHARSET_INFO *cs,
my_wc_t wc;
int srcres, dstres;
char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src != dst || cs->casedn_multiply == 1);
while ((src < srcend) &&
@ -5027,7 +5066,7 @@ my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src)
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->casedn_multiply == 1);
while (*src &&
@ -5069,7 +5108,7 @@ my_strnncoll_utf8mb4(CHARSET_INFO *cs,
my_wc_t s_wc,t_wc;
const uchar *se= s + slen;
const uchar *te= t + tlen;
MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
LINT_INIT(s_wc);
LINT_INIT(t_wc);
@ -5084,9 +5123,9 @@ my_strnncoll_utf8mb4(CHARSET_INFO *cs,
return bincmp_utf8mb4(s, se, t, te);
}
my_tosort_unicode(uni_plane, &s_wc);
my_tosort_unicode(uni_plane, &t_wc);
my_tosort_unicode(uni_plane, &s_wc, cs->state);
my_tosort_unicode(uni_plane, &t_wc, cs->state);
if ( s_wc != t_wc )
{
return s_wc > t_wc ? 1 : -1;
@ -5136,7 +5175,7 @@ my_strnncollsp_utf8mb4(CHARSET_INFO *cs,
int res;
my_wc_t s_wc, t_wc;
const uchar *se= s + slen, *te= t + tlen;
MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
LINT_INIT(s_wc);
LINT_INIT(t_wc);
@ -5155,8 +5194,8 @@ my_strnncollsp_utf8mb4(CHARSET_INFO *cs,
return bincmp_utf8mb4(s, se, t, te);
}
my_tosort_unicode(uni_plane, &s_wc);
my_tosort_unicode(uni_plane, &t_wc);
my_tosort_unicode(uni_plane, &s_wc, cs->state);
my_tosort_unicode(uni_plane, &t_wc, cs->state);
if ( s_wc != t_wc )
{
@ -5220,7 +5259,7 @@ my_strnncollsp_utf8mb4(CHARSET_INFO *cs,
static int
my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
{
MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (s[0] && t[0])
{
my_wc_t s_wc,t_wc;
@ -5399,11 +5438,10 @@ struct charset_info_st my_charset_utf8mb4_general_ci=
to_lower_utf8mb4, /* to_lower */
to_upper_utf8mb4, /* to_upper */
to_upper_utf8mb4, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@ -5432,11 +5470,10 @@ struct charset_info_st my_charset_utf8mb4_bin=
to_lower_utf8mb4, /* to_lower */
to_upper_utf8mb4, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */

View File

@ -690,11 +690,10 @@ struct charset_info_st my_charset_cp1250_czech_ci =
to_lower_win1250ch,
to_upper_win1250ch,
sort_order_win1250ch,
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* uca */
tab_cp1250_uni, /* tab_to_uni */
idx_uni_cp1250, /* tab_from_uni */
my_unicase_default, /* caseinfo */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
2, /* strxfrm_multiply */

View File

@ -38,6 +38,18 @@
*/
/*
Avoid using my_snprintf
We cannot use my_snprintf() here, because ctype.o is
used to build conf_to_src, which must require minimun
dependency.
*/
#undef my_snprinf
#define my_snprintf "We cannot use my_snprintf in this file"
int (*my_string_stack_guard)(int)= NULL;
static char *mstr(char *str,const char *src,size_t l1,size_t l2)
@ -71,11 +83,75 @@ struct my_cs_file_section_st
#define _CS_PRIMARY_ID 15
#define _CS_BINARY_ID 16
#define _CS_CSDESCRIPT 17
#define _CS_RESET 18
#define _CS_DIFF1 19
#define _CS_DIFF2 20
#define _CS_DIFF3 21
#define _CS_IDENTICAL 22
/* Special purpose commands */
#define _CS_UCA_VERSION 100
#define _CS_CL_SUPPRESS_CONTRACTIONS 101
#define _CS_CL_OPTIMIZE 102
#define _CS_CL_SHIFT_AFTER_METHOD 103
/* Collation Settings */
#define _CS_ST_SETTINGS 200
#define _CS_ST_STRENGTH 201
#define _CS_ST_ALTERNATE 202
#define _CS_ST_BACKWARDS 203
#define _CS_ST_NORMALIZATION 204
#define _CS_ST_CASE_LEVEL 205
#define _CS_ST_CASE_FIRST 206
#define _CS_ST_HIRAGANA_QUATERNARY 207
#define _CS_ST_NUMERIC 208
#define _CS_ST_VARIABLE_TOP 209
#define _CS_ST_MATCH_BOUNDARIES 210
#define _CS_ST_MATCH_STYLE 211
/* Rules */
#define _CS_RULES 300
#define _CS_RESET 301
#define _CS_DIFF1 302
#define _CS_DIFF2 303
#define _CS_DIFF3 304
#define _CS_DIFF4 305
#define _CS_IDENTICAL 306
/* Rules: Expansions */
#define _CS_EXP_X 320
#define _CS_EXP_EXTEND 321
#define _CS_EXP_DIFF1 322
#define _CS_EXP_DIFF2 323
#define _CS_EXP_DIFF3 324
#define _CS_EXP_DIFF4 325
#define _CS_EXP_IDENTICAL 326
/* Rules: Abbreviating Ordering Specifications */
#define _CS_A_DIFF1 351
#define _CS_A_DIFF2 352
#define _CS_A_DIFF3 353
#define _CS_A_DIFF4 354
#define _CS_A_IDENTICAL 355
/* Rules: previous context */
#define _CS_CONTEXT 370
/* Rules: Placing Characters Before Others*/
#define _CS_RESET_BEFORE 380
/* Rules: Logical Reset Positions */
#define _CS_RESET_FIRST_PRIMARY_IGNORABLE 401
#define _CS_RESET_LAST_PRIMARY_IGNORABLE 402
#define _CS_RESET_FIRST_SECONDARY_IGNORABLE 403
#define _CS_RESET_LAST_SECONDARY_IGNORABLE 404
#define _CS_RESET_FIRST_TERTIARY_IGNORABLE 405
#define _CS_RESET_LAST_TERTIARY_IGNORABLE 406
#define _CS_RESET_FIRST_TRAILING 407
#define _CS_RESET_LAST_TRAILING 408
#define _CS_RESET_FIRST_VARIABLE 409
#define _CS_RESET_LAST_VARIABLE 410
#define _CS_RESET_FIRST_NON_IGNORABLE 411
#define _CS_RESET_LAST_NON_IGNORABLE 412
static const struct my_cs_file_section_st sec[] =
@ -85,6 +161,8 @@ static const struct my_cs_file_section_st sec[] =
{_CS_MISC, "xml/encoding"},
{_CS_MISC, "charsets"},
{_CS_MISC, "charsets/max-id"},
{_CS_MISC, "charsets/copyright"},
{_CS_MISC, "charsets/description"},
{_CS_CHARSET, "charsets/charset"},
{_CS_PRIMARY_ID, "charsets/charset/primary-id"},
{_CS_BINARY_ID, "charsets/charset/binary-id"},
@ -106,11 +184,72 @@ static const struct my_cs_file_section_st sec[] =
{_CS_ORDER, "charsets/charset/collation/order"},
{_CS_FLAG, "charsets/charset/collation/flag"},
{_CS_COLLMAP, "charsets/charset/collation/map"},
{_CS_RESET, "charsets/charset/collation/rules/reset"},
{_CS_DIFF1, "charsets/charset/collation/rules/p"},
{_CS_DIFF2, "charsets/charset/collation/rules/s"},
{_CS_DIFF3, "charsets/charset/collation/rules/t"},
{_CS_IDENTICAL, "charsets/charset/collation/rules/i"},
/* Special purpose commands */
{_CS_UCA_VERSION, "charsets/charset/collation/version"},
{_CS_CL_SUPPRESS_CONTRACTIONS, "charsets/charset/collation/suppress_contractions"},
{_CS_CL_OPTIMIZE, "charsets/charset/collation/optimize"},
{_CS_CL_SHIFT_AFTER_METHOD, "charsets/charset/collation/shift-after-method"},
/* Collation Settings */
{_CS_ST_SETTINGS, "charsets/charset/collation/settings"},
{_CS_ST_STRENGTH, "charsets/charset/collation/settings/strength"},
{_CS_ST_ALTERNATE, "charsets/charset/collation/settings/alternate"},
{_CS_ST_BACKWARDS, "charsets/charset/collation/settings/backwards"},
{_CS_ST_NORMALIZATION, "charsets/charset/collation/settings/normalization"},
{_CS_ST_CASE_LEVEL, "charsets/charset/collation/settings/caseLevel"},
{_CS_ST_CASE_FIRST, "charsets/charset/collation/settings/caseFirst"},
{_CS_ST_HIRAGANA_QUATERNARY, "charsets/charset/collation/settings/hiraganaQuaternary"},
{_CS_ST_NUMERIC, "charsets/charset/collation/settings/numeric"},
{_CS_ST_VARIABLE_TOP, "charsets/charset/collation/settings/variableTop"},
{_CS_ST_MATCH_BOUNDARIES, "charsets/charset/collation/settings/match-boundaries"},
{_CS_ST_MATCH_STYLE, "charsets/charset/collation/settings/match-style"},
/* Rules */
{_CS_RULES, "charsets/charset/collation/rules"},
{_CS_RESET, "charsets/charset/collation/rules/reset"},
{_CS_DIFF1, "charsets/charset/collation/rules/p"},
{_CS_DIFF2, "charsets/charset/collation/rules/s"},
{_CS_DIFF3, "charsets/charset/collation/rules/t"},
{_CS_DIFF4, "charsets/charset/collation/rules/q"},
{_CS_IDENTICAL, "charsets/charset/collation/rules/i"},
/* Rules: expansions */
{_CS_EXP_X, "charsets/charset/collation/rules/x"},
{_CS_EXP_EXTEND, "charsets/charset/collation/rules/x/extend"},
{_CS_EXP_DIFF1, "charsets/charset/collation/rules/x/p"},
{_CS_EXP_DIFF2, "charsets/charset/collation/rules/x/s"},
{_CS_EXP_DIFF3, "charsets/charset/collation/rules/x/t"},
{_CS_EXP_DIFF4, "charsets/charset/collation/rules/x/q"},
{_CS_EXP_IDENTICAL, "charsets/charset/collation/rules/x/i"},
/* Rules: previous context */
{_CS_CONTEXT, "charsets/charset/collation/rules/x/context"},
/* Rules: Abbreviating Ordering Specifications */
{_CS_A_DIFF1, "charsets/charset/collation/rules/pc"},
{_CS_A_DIFF2, "charsets/charset/collation/rules/sc"},
{_CS_A_DIFF3, "charsets/charset/collation/rules/tc"},
{_CS_A_DIFF4, "charsets/charset/collation/rules/qc"},
{_CS_A_IDENTICAL, "charsets/charset/collation/rules/ic"},
/* Rules: Placing Characters Before Others*/
{_CS_RESET_BEFORE, "charsets/charset/collation/rules/reset/before"},
/* Rules: Logical Reset Positions */
{_CS_RESET_FIRST_NON_IGNORABLE, "charsets/charset/collation/rules/reset/first_non_ignorable"},
{_CS_RESET_LAST_NON_IGNORABLE, "charsets/charset/collation/rules/reset/last_non_ignorable"},
{_CS_RESET_FIRST_PRIMARY_IGNORABLE, "charsets/charset/collation/rules/reset/first_primary_ignorable"},
{_CS_RESET_LAST_PRIMARY_IGNORABLE, "charsets/charset/collation/rules/reset/last_primary_ignorable"},
{_CS_RESET_FIRST_SECONDARY_IGNORABLE, "charsets/charset/collation/rules/reset/first_secondary_ignorable"},
{_CS_RESET_LAST_SECONDARY_IGNORABLE, "charsets/charset/collation/rules/reset/last_secondary_ignorable"},
{_CS_RESET_FIRST_TERTIARY_IGNORABLE, "charsets/charset/collation/rules/reset/first_tertiary_ignorable"},
{_CS_RESET_LAST_TERTIARY_IGNORABLE, "charsets/charset/collation/rules/reset/last_tertiary_ignorable"},
{_CS_RESET_FIRST_TRAILING, "charsets/charset/collation/rules/reset/first_trailing"},
{_CS_RESET_LAST_TRAILING, "charsets/charset/collation/rules/reset/last_trailing"},
{_CS_RESET_FIRST_VARIABLE, "charsets/charset/collation/rules/reset/first_variable"},
{_CS_RESET_LAST_VARIABLE, "charsets/charset/collation/rules/reset/last_variable"},
{0, NULL}
};
@ -120,14 +259,16 @@ static const struct my_cs_file_section_st
const struct my_cs_file_section_st *s;
for (s=sec; s->str; s++)
{
if (!strncmp(attr,s->str,len))
if (!strncmp(attr, s->str, len) && s->str[len] == 0)
return s;
}
return NULL;
}
#define MY_CS_CSDESCR_SIZE 64
#define MY_CS_TAILORING_SIZE 1024
#define MY_CS_TAILORING_SIZE 32*1024
#define MY_CS_UCA_VERSION_SIZE 64
#define MY_CS_CONTEXT_SIZE 64
typedef struct my_cs_file_info
{
@ -139,13 +280,60 @@ typedef struct my_cs_file_info
uchar sort_order[MY_CS_SORT_ORDER_TABLE_SIZE];
uint16 tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE];
char comment[MY_CS_CSDESCR_SIZE];
char tailoring[MY_CS_TAILORING_SIZE];
char *tailoring;
size_t tailoring_length;
size_t tailoring_alloced_length;
char context[MY_CS_CONTEXT_SIZE];
struct charset_info_st cs;
int (*add_collation)(struct charset_info_st *cs);
} MY_CHARSET_LOADER;
MY_CHARSET_LOADER *loader;
} MY_CHARSET_FILE;
static void
my_charset_file_reset_charset(MY_CHARSET_FILE *i)
{
memset(&i->cs, 0, sizeof(i->cs));
}
static void
my_charset_file_reset_collation(MY_CHARSET_FILE *i)
{
i->tailoring_length= 0;
i->context[0]= '\0';
}
static void
my_charset_file_init(MY_CHARSET_FILE *i)
{
my_charset_file_reset_charset(i);
my_charset_file_reset_collation(i);
i->tailoring= NULL;
i->tailoring_alloced_length= 0;
}
static void
my_charset_file_free(MY_CHARSET_FILE *i)
{
i->loader->free(i->tailoring);
}
static int
my_charset_file_tailoring_realloc(MY_CHARSET_FILE *i, size_t newlen)
{
if (i->tailoring_alloced_length > newlen ||
(i->tailoring= i->loader->realloc(i->tailoring,
(i->tailoring_alloced_length=
(newlen + 32*1024)))))
{
return MY_XML_OK;
}
return MY_XML_ERROR;
}
static int fill_uchar(uchar *a,uint size,const char *str, size_t len)
{
@ -182,17 +370,119 @@ static int fill_uint16(uint16 *a,uint size,const char *str, size_t len)
}
static int
tailoring_append(MY_XML_PARSER *st,
const char *fmt, size_t len, const char *attr)
{
struct my_cs_file_info *i= (struct my_cs_file_info *) st->user_data;
size_t newlen= i->tailoring_length + len + 64; /* 64 for format */
if (MY_XML_OK == my_charset_file_tailoring_realloc(i, newlen))
{
char *dst= i->tailoring + i->tailoring_length;
sprintf(dst, fmt, (int) len, attr);
i->tailoring_length+= strlen(dst);
return MY_XML_OK;
}
return MY_XML_ERROR;
}
static int
tailoring_append2(MY_XML_PARSER *st,
const char *fmt,
size_t len1, const char *attr1,
size_t len2, const char *attr2)
{
struct my_cs_file_info *i= (struct my_cs_file_info *) st->user_data;
size_t newlen= i->tailoring_length + len1 + len2 + 64; /* 64 for format */
if (MY_XML_OK == my_charset_file_tailoring_realloc(i, newlen))
{
char *dst= i->tailoring + i->tailoring_length;
sprintf(dst, fmt, (int) len1, attr1, (int) len2, attr2);
i->tailoring_length+= strlen(dst);
return MY_XML_OK;
}
return MY_XML_ERROR;
}
static size_t
scan_one_character(const char *s, const char *e, my_wc_t *wc)
{
CHARSET_INFO *cs= &my_charset_utf8_general_ci;
if (s >= e)
return 0;
/* Escape sequence: \uXXXX */
if (s[0] == '\\' && s + 2 < e && s[1] == 'u' && my_isxdigit(cs, s[2]))
{
size_t len= 3; /* We have at least one digit */
for (s+= 3; s < e && my_isxdigit(cs, s[0]); s++, len++)
{
}
wc[0]= 0;
return len;
}
else if (s[0] > 0) /* 7-bit character */
{
wc[0]= 0;
return 1;
}
else /* Non-escaped character */
{
int rc= cs->cset->mb_wc(cs, wc, (uchar *) s, (uchar *) e);
if (rc > 0)
return (size_t) rc;
}
return 0;
}
static int
tailoring_append_abbreviation(MY_XML_PARSER *st,
const char *fmt, size_t len, const char *attr)
{
size_t clen;
const char *attrend= attr + len;
my_wc_t wc;
for ( ; (clen= scan_one_character(attr, attrend, &wc)) > 0; attr+= clen)
{
DBUG_ASSERT(attr < attrend);
if (tailoring_append(st, fmt, clen, attr) != MY_XML_OK)
return MY_XML_ERROR;
}
return MY_XML_OK;
}
static int cs_enter(MY_XML_PARSER *st,const char *attr, size_t len)
{
struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
const struct my_cs_file_section_st *s= cs_file_sec(attr,len);
int state= s ? s->state : 0;
if ( s && (s->state == _CS_CHARSET))
bzero(&i->cs,sizeof(i->cs));
if (s && (s->state == _CS_COLLATION))
i->tailoring_length= 0;
switch (state) {
case 0:
i->loader->reporter(WARNING_LEVEL, "Unknown LDML tag: '%.*s'", len, attr);
break;
case _CS_CHARSET:
my_charset_file_reset_charset(i);
break;
case _CS_COLLATION:
my_charset_file_reset_collation(i);
break;
case _CS_RESET:
return tailoring_append(st, " &", 0, NULL);
default:
break;
}
return MY_XML_OK;
}
@ -206,8 +496,60 @@ static int cs_leave(MY_XML_PARSER *st,const char *attr, size_t len)
switch(state){
case _CS_COLLATION:
rc= i->add_collation ? i->add_collation(&i->cs) : MY_XML_OK;
if (i->tailoring_length)
i->cs.tailoring= i->tailoring;
rc= i->loader->add_collation ? i->loader->add_collation(&i->cs) : MY_XML_OK;
break;
/* Rules: Logical Reset Positions */
case _CS_RESET_FIRST_NON_IGNORABLE:
rc= tailoring_append(st, "[first non-ignorable]", 0, NULL);
break;
case _CS_RESET_LAST_NON_IGNORABLE:
rc= tailoring_append(st, "[last non-ignorable]", 0, NULL);
break;
case _CS_RESET_FIRST_PRIMARY_IGNORABLE:
rc= tailoring_append(st, "[first primary ignorable]", 0, NULL);
break;
case _CS_RESET_LAST_PRIMARY_IGNORABLE:
rc= tailoring_append(st, "[last primary ignorable]", 0, NULL);
break;
case _CS_RESET_FIRST_SECONDARY_IGNORABLE:
rc= tailoring_append(st, "[first secondary ignorable]", 0, NULL);
break;
case _CS_RESET_LAST_SECONDARY_IGNORABLE:
rc= tailoring_append(st, "[last secondary ignorable]", 0, NULL);
break;
case _CS_RESET_FIRST_TERTIARY_IGNORABLE:
rc= tailoring_append(st, "[first tertiary ignorable]", 0, NULL);
break;
case _CS_RESET_LAST_TERTIARY_IGNORABLE:
rc= tailoring_append(st, "[last tertiary ignorable]", 0, NULL);
break;
case _CS_RESET_FIRST_TRAILING:
rc= tailoring_append(st, "[first trailing]", 0, NULL);
break;
case _CS_RESET_LAST_TRAILING:
rc= tailoring_append(st, "[last trailing]", 0, NULL);
break;
case _CS_RESET_FIRST_VARIABLE:
rc= tailoring_append(st, "[first variable]", 0, NULL);
break;
case _CS_RESET_LAST_VARIABLE:
rc= tailoring_append(st, "[last variable]", 0, NULL);
break;
default:
rc=MY_XML_OK;
}
@ -215,14 +557,40 @@ static int cs_leave(MY_XML_PARSER *st,const char *attr, size_t len)
}
static const char *diff_fmt[5]=
{
"<%.*s",
"<<%.*s",
"<<<%.*s",
"<<<<%.*s",
"=%.*s"
};
static const char *context_diff_fmt[5]=
{
"<%.*s|%.*s",
"<<%.*s|%.*s",
"<<<%.*s|%.*s",
"<<<<%.*s|%.*s",
"=%.*s|%.*s"
};
static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
{
struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
const struct my_cs_file_section_st *s;
int state= (int)((s= cs_file_sec(st->attr, strlen(st->attr))) ? s->state :
0);
int state= (int)((s= cs_file_sec(st->attr.start,
st->attr.end - st->attr.start)) ?
s->state : 0);
int rc= MY_XML_OK;
switch (state) {
case _CS_MISC:
case _CS_FAMILY:
case _CS_ORDER:
break;
case _CS_ID:
i->cs.number= strtol(attr,(char**)NULL,10);
break;
@ -269,47 +637,185 @@ static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
fill_uchar(i->ctype,MY_CS_CTYPE_TABLE_SIZE,attr,len);
i->cs.ctype=i->ctype;
break;
/* Special purpose commands */
case _CS_UCA_VERSION:
rc= tailoring_append(st, "[version %.*s]", len, attr);
break;
case _CS_CL_SUPPRESS_CONTRACTIONS:
rc= tailoring_append(st, "[suppress contractions %.*s]", len, attr);
break;
case _CS_CL_OPTIMIZE:
rc= tailoring_append(st, "[optimize %.*s]", len, attr);
break;
case _CS_CL_SHIFT_AFTER_METHOD:
rc= tailoring_append(st, "[shift-after-method %.*s]", len, attr);
break;
/* Collation Settings */
case _CS_ST_STRENGTH:
/* 1, 2, 3, 4, 5, or primary, secondary, tertiary, quaternary, identical */
rc= tailoring_append(st, "[strength %.*s]", len, attr);
break;
case _CS_ST_ALTERNATE:
/* non-ignorable, shifted */
rc= tailoring_append(st, "[alternate %.*s]", len, attr);
break;
case _CS_ST_BACKWARDS:
/* on, off, 2 */
rc= tailoring_append(st, "[backwards %.*s]", len, attr);
break;
case _CS_ST_NORMALIZATION:
/*
TODO for WL#896: check collations for normalization: vi.xml
We want precomposed characters work well at this point.
*/
/* on, off */
rc= tailoring_append(st, "[normalization %.*s]", len, attr);
break;
case _CS_ST_CASE_LEVEL:
/* on, off */
rc= tailoring_append(st, "[caseLevel %.*s]", len, attr);
break;
case _CS_ST_CASE_FIRST:
/* upper, lower, off */
rc= tailoring_append(st, "[caseFirst %.*s]", len, attr);
break;
case _CS_ST_HIRAGANA_QUATERNARY:
/* on, off */
rc= tailoring_append(st, "[hiraganaQ %.*s]", len, attr);
break;
case _CS_ST_NUMERIC:
/* on, off */
rc= tailoring_append(st, "[numeric %.*s]", len, attr);
break;
case _CS_ST_VARIABLE_TOP:
/* TODO for WL#896: check value format */
rc= tailoring_append(st, "[variableTop %.*s]", len, attr);
break;
case _CS_ST_MATCH_BOUNDARIES:
/* none, whole-character, whole-word */
rc= tailoring_append(st, "[match-boundaries %.*s]", len, attr);
break;
case _CS_ST_MATCH_STYLE:
/* minimal, medial, maximal */
rc= tailoring_append(st, "[match-style %.*s]", len, attr);
break;
/* Rules */
case _CS_RESET:
rc= tailoring_append(st, "%.*s", len, attr);
break;
case _CS_DIFF1:
case _CS_DIFF2:
case _CS_DIFF3:
case _CS_DIFF4:
case _CS_IDENTICAL:
rc= tailoring_append(st, diff_fmt[state - _CS_DIFF1], len, attr);
break;
/* Rules: Expansion */
case _CS_EXP_EXTEND:
rc= tailoring_append(st, " / %.*s", len, attr);
break;
case _CS_EXP_DIFF1:
case _CS_EXP_DIFF2:
case _CS_EXP_DIFF3:
case _CS_EXP_DIFF4:
case _CS_EXP_IDENTICAL:
if (i->context[0])
{
/*
Convert collation description from
Locale Data Markup Language (LDML)
into ICU Collation Customization expression.
*/
char arg[16];
const char *cmd[]= {"&","<","<<","<<<","="};
i->cs.tailoring= i->tailoring;
mstr(arg,attr,len,sizeof(arg)-1);
if (i->tailoring_length + 20 < sizeof(i->tailoring))
{
char *dst= i->tailoring_length + i->tailoring;
i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg);
}
rc= tailoring_append2(st, context_diff_fmt[state - _CS_EXP_DIFF1],
strlen(i->context), i->context, len, attr);
i->context[0]= 0;
}
else
rc= tailoring_append(st, diff_fmt[state - _CS_EXP_DIFF1], len, attr);
break;
/* Rules: Context */
case _CS_CONTEXT:
if (len < sizeof(i->context) + 1)
{
memcpy(i->context, attr, len);
i->context[len]= '\0';
}
break;
/* Rules: Abbreviating Ordering Specifications */
case _CS_A_DIFF1:
case _CS_A_DIFF2:
case _CS_A_DIFF3:
case _CS_A_DIFF4:
case _CS_A_IDENTICAL:
rc= tailoring_append_abbreviation(st, diff_fmt[state - _CS_A_DIFF1], len, attr);
break;
/* Rules: Placing Characters Before Others */
case _CS_RESET_BEFORE:
/*
TODO for WL#896: Add this check into text customization parser:
It is an error if the strength of the before relation is not identical
to the relation after the reset. We'll need this for WL#896.
*/
rc= tailoring_append(st, "[before %.*s]", len, attr);
break;
default:
break;
}
return MY_XML_OK;
return rc;
}
my_bool my_parse_charset_xml(const char *buf, size_t len,
int (*add_collation)(struct charset_info_st *cs))
my_bool
my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len)
{
MY_XML_PARSER p;
struct my_cs_file_info i;
struct my_cs_file_info info;
my_bool rc;
my_charset_file_init(&info);
my_xml_parser_create(&p);
my_xml_set_enter_handler(&p,cs_enter);
my_xml_set_value_handler(&p,cs_value);
my_xml_set_leave_handler(&p,cs_leave);
i.add_collation= add_collation;
my_xml_set_user_data(&p,(void*)&i);
info.loader= loader;
my_xml_set_user_data(&p, (void *) &info);
rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? FALSE : TRUE;
my_xml_parser_free(&p);
my_charset_file_free(&info);
if (rc != MY_XML_OK)
{
const char *errstr= my_xml_error_string(&p);
if (sizeof(loader->error) > 32 + strlen(errstr))
{
/* We cannot use my_snprintf() here. See previous comment. */
sprintf(loader->error, "at line %d pos %d: %s",
my_xml_error_lineno(&p)+1,
(int) my_xml_error_pos(&p),
my_xml_error_string(&p));
}
}
return rc;
}

View File

@ -31,5 +31,11 @@ static void my_str_free_default(void *ptr)
free(ptr);
}
void *my_str_realloc_default(void *ptr, size_t size)
{
return realloc(ptr, size);
}
void *(*my_str_malloc)(size_t)= &my_str_malloc_default;
void (*my_str_free)(void *)= &my_str_free_default;
void *(*my_str_realloc)(void *, size_t)= &my_str_realloc_default;

View File

@ -15,6 +15,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
#include "strings_def.h"
#include "m_string.h"
#include "my_xml.h"
@ -207,25 +208,71 @@ static int my_xml_value(MY_XML_PARSER *st, const char *str, size_t len)
}
/**
Ensure the attr buffer is wide enough to hold the new value
Expand and/or allocate dynamic buffer as needed to hold the concatenated
path and the terminating zero.
@attr st the parser instance
@attr len the length of the attribute to be added
@return state
@retval 1 failed
@retval 0 success
*/
static int my_xml_attr_ensure_space(MY_XML_PARSER *st, size_t len)
{
size_t ofs= st->attr.end - st->attr.start;
len++; // Add terminating zero.
if (ofs + len > st->attr.buffer_size)
{
st->attr.buffer_size= (SIZE_T_MAX - len) / 2 > st->attr.buffer_size ?
st->attr.buffer_size * 2 + len : SIZE_T_MAX;
if (!st->attr.buffer)
{
st->attr.buffer= (char *) my_str_malloc(st->attr.buffer_size);
if (st->attr.buffer)
memcpy(st->attr.buffer, st->attr.static_buffer, ofs + 1 /*term. zero */);
}
else
st->attr.buffer= (char *) my_str_realloc(st->attr.buffer,
st->attr.buffer_size);
st->attr.start= st->attr.buffer;
st->attr.end= st->attr.start + ofs;
return st->attr.buffer ? MY_XML_OK : MY_XML_ERROR;
}
return MY_XML_OK;
}
/** rewind the attr buffer to initial state */
static void my_xml_attr_rewind(MY_XML_PARSER *p)
{
/* keep the buffer already allocated */
p->attr.end= p->attr.start;
}
static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len)
{
if ((size_t) (st->attrend-st->attr+len+1) > sizeof(st->attr))
{
sprintf(st->errstr,"To deep XML");
if (my_xml_attr_ensure_space(st, len + 1 /* the separator char */))
return MY_XML_ERROR;
}
if (st->attrend > st->attr)
if (st->attr.end > st->attr.start)
{
st->attrend[0]= '/';
st->attrend++;
st->attr.end[0]= '/';
st->attr.end++;
}
memcpy(st->attrend,str,len);
st->attrend+=len;
st->attrend[0]='\0';
memcpy(st->attr.end, str, len);
st->attr.end+= len;
st->attr.end[0]= '\0';
if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
return st->enter ? st->enter(st, str, len) : MY_XML_OK;
else
return st->enter ? st->enter(st,st->attr,st->attrend-st->attr) : MY_XML_OK;
return st->enter ?
st->enter(st, st->attr.start, st->attr.end - st->attr.start) : MY_XML_OK;
}
@ -246,8 +293,8 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
int rc;
/* Find previous '/' or beginning */
for (e=p->attrend; (e>p->attr) && (e[0] != '/') ; e--);
glen = (size_t) ((e[0] == '/') ? (p->attrend-e-1) : p->attrend-e);
for (e= p->attr.end; (e > p->attr.start) && (e[0] != '/') ; e--);
glen= (size_t) ((e[0] == '/') ? (p->attr.end - e - 1) : p->attr.end - e);
if (str && (slen != glen))
{
@ -265,11 +312,12 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
rc= p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
else
rc= (p->leave_xml ? p->leave_xml(p,p->attr,p->attrend-p->attr) :
rc= (p->leave_xml ?
p->leave_xml(p, p->attr.start, p->attr.end - p->attr.start) :
MY_XML_OK);
*e='\0';
p->attrend=e;
p->attr.end= e;
return rc;
}
@ -277,7 +325,9 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
{
p->attrend=p->attr;
my_xml_attr_rewind(p);
p->beg=str;
p->cur=str;
p->end=str+len;
@ -432,7 +482,7 @@ gt:
}
}
if (p->attr[0])
if (p->attr.start[0])
{
sprintf(p->errstr,"unexpected END-OF-INPUT");
return MY_XML_ERROR;
@ -443,12 +493,22 @@ gt:
void my_xml_parser_create(MY_XML_PARSER *p)
{
bzero((void*)p,sizeof(p[0]));
memset(p, 0, sizeof(p[0]));
/*
Use static buffer while it's sufficient.
*/
p->attr.start= p->attr.end= p->attr.static_buffer;
p->attr.buffer_size= sizeof(p->attr.static_buffer);
}
void my_xml_parser_free(MY_XML_PARSER *p __attribute__((unused)))
void my_xml_parser_free(MY_XML_PARSER *p)
{
if (p->attr.buffer)
{
my_str_free(p->attr.buffer);
p->attr.buffer= NULL;
}
}