Bug#15375 Unassigned multibyte codes are broken
into parts when converting to Unicode. m_ctype.h: Reorganizing mb_wc return codes to be able to return "an unassigned N-byte-long character". sql_string.cc: Adding code to detect and properly handle unassigned characters (i.e. the those character which are correctly formed according to the character specifications, but don't have Unicode mapping). Many files: Fixing conversion function to return new codes. ctype_ujis.test, ctype_gbk.test, ctype_big5.test: Adding a test case. ctype_ujis.result, ctype_gbk.result, ctype_big5.result: Fixing results accordingly. include/m_ctype.h: Reorganizing mb_wc return codes to be able to return "an unassigned N-byte long character". Bug#15375 Unassigned multibyte codes are broken into parts when converting to Unicode. mysql-test/r/ctype_big5.result: Fixing results accordingly. mysql-test/r/ctype_gbk.result: Fixing results accordingly. mysql-test/r/ctype_ujis.result: Fixing results accordingly. mysql-test/t/ctype_big5.test: Adding a test case. mysql-test/t/ctype_gbk.test: Adding a test case. mysql-test/t/ctype_ujis.test: Adding a test case. sql/sql_string.cc: Adding code to detect and properly hanlde unassigned characters (i.e. the those character which are correctly formed according to the character specifications, but don't have Unicode mapping). strings/ctype-big5.c: Fixing conversion function to return new codes. strings/ctype-bin.c: Fixing conversion function to return new codes. strings/ctype-cp932.c: Fixing conversion function to return new codes. strings/ctype-euc_kr.c: Fixing conversion function to return new codes. strings/ctype-gb2312.c: Fixing conversion function to return new codes. strings/ctype-gbk.c: Fixing conversion function to return new codes. strings/ctype-latin1.c: Fixing conversion function to return new codes. strings/ctype-simple.c: Fixing conversion function to return new codes. strings/ctype-sjis.c: Fixing conversion function to return new codes. strings/ctype-tis620.c: Fixing conversion function to return new codes. strings/ctype-ucs2.c: Fixing conversion function to return new codes. strings/ctype-ujis.c: Fixing conversion function to return new codes. strings/ctype-utf8.c: Fixing conversion function to return new codes.
This commit is contained in:
parent
5aeb69296a
commit
a464e01713
@ -44,10 +44,19 @@ typedef struct unicase_info_st
|
|||||||
uint16 sort;
|
uint16 sort;
|
||||||
} MY_UNICASE_INFO;
|
} MY_UNICASE_INFO;
|
||||||
|
|
||||||
#define MY_CS_ILSEQ 0
|
|
||||||
#define MY_CS_ILUNI 0
|
/* wm_wc and wc_mb return codes */
|
||||||
#define MY_CS_TOOSMALL -1
|
#define MY_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */
|
||||||
#define MY_CS_TOOFEW(n) (-1-(n))
|
#define MY_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */
|
||||||
|
#define MY_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */
|
||||||
|
#define MY_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */
|
||||||
|
#define MY_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */
|
||||||
|
/* These following three are currently not really used */
|
||||||
|
#define MY_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */
|
||||||
|
#define MY_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */
|
||||||
|
#define MY_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */
|
||||||
|
/* A helper macros for "need at least n bytes" */
|
||||||
|
#define MY_CS_TOOSMALLN(n) (-100-(n))
|
||||||
|
|
||||||
#define MY_SEQ_INTTAIL 1
|
#define MY_SEQ_INTTAIL 1
|
||||||
#define MY_SEQ_SPACES 2
|
#define MY_SEQ_SPACES 2
|
||||||
|
@ -189,3 +189,6 @@ select hex(a) from t1 where a = _big5 0xF9DC;
|
|||||||
hex(a)
|
hex(a)
|
||||||
E5ABBA
|
E5ABBA
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
select hex(convert(_big5 0xC84041 using ucs2));
|
||||||
|
hex(convert(_big5 0xC84041 using ucs2))
|
||||||
|
003F0041
|
||||||
|
@ -165,3 +165,6 @@ hex(a)
|
|||||||
A1A1
|
A1A1
|
||||||
A3A0
|
A3A0
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
select hex(convert(_gbk 0xA14041 using ucs2));
|
||||||
|
hex(convert(_gbk 0xA14041 using ucs2))
|
||||||
|
003F0041
|
||||||
|
@ -2307,3 +2307,9 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
|
|||||||
c2h
|
c2h
|
||||||
ab_def
|
ab_def
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
select hex(convert(_ujis 0xA5FE41 using ucs2));
|
||||||
|
hex(convert(_ujis 0xA5FE41 using ucs2))
|
||||||
|
003F0041
|
||||||
|
select hex(convert(_ujis 0x8FABF841 using ucs2));
|
||||||
|
hex(convert(_ujis 0x8FABF841 using ucs2))
|
||||||
|
003F0041
|
||||||
|
@ -53,4 +53,14 @@ alter table t1 convert to character set utf8;
|
|||||||
select hex(a) from t1 where a = _big5 0xF9DC;
|
select hex(a) from t1 where a = _big5 0xF9DC;
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
|
||||||
|
#
|
||||||
|
# Bugs#15375: Unassigned multibyte codes are broken
|
||||||
|
# into parts when converting to Unicode.
|
||||||
|
# This query should return 0x003F0041. I.e. it should
|
||||||
|
# scan unassigned double-byte character 0xC840, convert
|
||||||
|
# it as QUESTION MARK 0x003F and then scan the next
|
||||||
|
# character, which is a single byte character 0x41.
|
||||||
|
#
|
||||||
|
select hex(convert(_big5 0xC84041 using ucs2));
|
||||||
|
|
||||||
# End of 4.1 tests
|
# End of 4.1 tests
|
||||||
|
@ -31,4 +31,14 @@ INSERT INTO t1 VALUES (0xA3A0),(0xA1A1);
|
|||||||
SELECT hex(a) FROM t1 ORDER BY a;
|
SELECT hex(a) FROM t1 ORDER BY a;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
#
|
||||||
|
# Bugs#15375: Unassigned multibyte codes are broken
|
||||||
|
# into parts when converting to Unicode.
|
||||||
|
# This query should return 0x003F0041. I.e. it should
|
||||||
|
# scan unassigned double-byte character 0xA140, convert
|
||||||
|
# it as QUESTION MARK 0x003F and then scan the next
|
||||||
|
# character, which is a single byte character 0x41.
|
||||||
|
#
|
||||||
|
select hex(convert(_gbk 0xA14041 using ucs2));
|
||||||
|
|
||||||
# End of 4.1 tests
|
# End of 4.1 tests
|
||||||
|
@ -1152,4 +1152,19 @@ SET collation_connection='ujis_bin';
|
|||||||
-- source include/ctype_innodb_like.inc
|
-- source include/ctype_innodb_like.inc
|
||||||
-- source include/ctype_like_escape.inc
|
-- source include/ctype_like_escape.inc
|
||||||
|
|
||||||
|
#
|
||||||
|
# Bugs#15375: Unassigned multibyte codes are broken
|
||||||
|
# into parts when converting to Unicode.
|
||||||
|
# This query should return 0x003F0041. I.e. it should
|
||||||
|
# scan unassigned double-byte character 0xA5FE, convert
|
||||||
|
# it as QUESTION MARK 0x003F and then scan the next
|
||||||
|
# character, which is a single byte character 0x41.
|
||||||
|
#
|
||||||
|
select hex(convert(_ujis 0xA5FE41 using ucs2));
|
||||||
|
# This one should return 0x003F0041:
|
||||||
|
# scan unassigned three-byte character 0x8FABF8,
|
||||||
|
# convert it as QUESTION MARK 0x003F and then scan
|
||||||
|
# the next character, which is a single byte character 0x41.
|
||||||
|
select hex(convert(_ujis 0x8FABF841 using ucs2));
|
||||||
|
|
||||||
# End of 4.1 tests
|
# End of 4.1 tests
|
||||||
|
@ -806,8 +806,18 @@ copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
|
|||||||
from++;
|
from++;
|
||||||
wc= '?';
|
wc= '?';
|
||||||
}
|
}
|
||||||
|
else if (cnvres > MY_CS_TOOSMALL)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
A correct multibyte sequence detected
|
||||||
|
But it doesn't have Unicode mapping.
|
||||||
|
*/
|
||||||
|
error_count++;
|
||||||
|
from+= (-cnvres);
|
||||||
|
wc= '?';
|
||||||
|
}
|
||||||
else
|
else
|
||||||
break; // Impossible char.
|
break; // Not enough characters
|
||||||
|
|
||||||
outp:
|
outp:
|
||||||
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
|
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
|
||||||
|
@ -6259,7 +6259,7 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
int hi=s[0];
|
int hi=s[0];
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
if (hi<0x80)
|
if (hi<0x80)
|
||||||
{
|
{
|
||||||
@ -6268,10 +6268,10 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
if (!(pwc[0]=func_big5_uni_onechar((hi<<8)+s[1])))
|
if (!(pwc[0]=func_big5_uni_onechar((hi<<8)+s[1])))
|
||||||
return MY_CS_ILSEQ;
|
return -2;
|
||||||
|
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
@ -220,7 +220,7 @@ static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
const unsigned char *end __attribute__((unused)))
|
const unsigned char *end __attribute__((unused)))
|
||||||
{
|
{
|
||||||
if (str >= end)
|
if (str >= end)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
*wc=str[0];
|
*wc=str[0];
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -5355,7 +5355,7 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
int hi=s[0];
|
int hi=s[0];
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
if (hi < 0x80)
|
if (hi < 0x80)
|
||||||
{
|
{
|
||||||
@ -5370,10 +5370,10 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1])))
|
if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1])))
|
||||||
return MY_CS_ILSEQ;
|
return -2;
|
||||||
|
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
@ -8601,7 +8601,7 @@ my_wc_mb_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
return MY_CS_ILUNI;
|
return MY_CS_ILUNI;
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
s[0]=code>>8;
|
s[0]=code>>8;
|
||||||
s[1]=code&0xFF;
|
s[1]=code&0xFF;
|
||||||
@ -8617,7 +8617,7 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
int hi=s[0];
|
int hi=s[0];
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
if (hi<0x80)
|
if (hi<0x80)
|
||||||
{
|
{
|
||||||
@ -8626,10 +8626,10 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
if (!(pwc[0]=func_ksc5601_uni_onechar((hi<<8)+s[1])))
|
if (!(pwc[0]=func_ksc5601_uni_onechar((hi<<8)+s[1])))
|
||||||
return MY_CS_ILSEQ;
|
return -2;
|
||||||
|
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
@ -5651,7 +5651,7 @@ my_wc_mb_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
return MY_CS_ILUNI;
|
return MY_CS_ILUNI;
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
code|=0x8080;
|
code|=0x8080;
|
||||||
s[0]=code>>8;
|
s[0]=code>>8;
|
||||||
@ -5668,7 +5668,7 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
hi=(int) s[0];
|
hi=(int) s[0];
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
if (hi<0x80)
|
if (hi<0x80)
|
||||||
{
|
{
|
||||||
@ -5677,10 +5677,10 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
if (!(pwc[0]=func_gb2312_uni_onechar(((hi<<8)+s[1])&0x7F7F)))
|
if (!(pwc[0]=func_gb2312_uni_onechar(((hi<<8)+s[1])&0x7F7F)))
|
||||||
return MY_CS_ILSEQ;
|
return -2;
|
||||||
|
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
@ -9889,7 +9889,7 @@ my_wc_mb_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
return MY_CS_ILUNI;
|
return MY_CS_ILUNI;
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
s[0]=code>>8;
|
s[0]=code>>8;
|
||||||
s[1]=code&0xFF;
|
s[1]=code&0xFF;
|
||||||
@ -9903,7 +9903,7 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
int hi;
|
int hi;
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
hi=s[0];
|
hi=s[0];
|
||||||
|
|
||||||
@ -9914,10 +9914,10 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1])))
|
if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1])))
|
||||||
return MY_CS_ILSEQ;
|
return -2;
|
||||||
|
|
||||||
return 2;
|
return 2;
|
||||||
|
|
||||||
|
@ -363,10 +363,10 @@ int my_mb_wc_latin1(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
const unsigned char *end __attribute__((unused)))
|
const unsigned char *end __attribute__((unused)))
|
||||||
{
|
{
|
||||||
if (str >= end)
|
if (str >= end)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
*wc=cs_to_uni[*str];
|
*wc=cs_to_uni[*str];
|
||||||
return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
|
return (!wc[0] && str[0]) ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
|
@ -207,10 +207,10 @@ int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
|
|||||||
const unsigned char *end __attribute__((unused)))
|
const unsigned char *end __attribute__((unused)))
|
||||||
{
|
{
|
||||||
if (str >= end)
|
if (str >= end)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
*wc=cs->tab_to_uni[*str];
|
*wc=cs->tab_to_uni[*str];
|
||||||
return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
|
return (!wc[0] && str[0]) ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
|
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
|
||||||
|
@ -4501,7 +4501,7 @@ my_wc_mb_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
|
|
||||||
mb:
|
mb:
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
s[0]=code>>8;
|
s[0]=code>>8;
|
||||||
s[1]=code&0xFF;
|
s[1]=code&0xFF;
|
||||||
@ -4515,7 +4515,7 @@ my_mb_wc_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
int hi=s[0];
|
int hi=s[0];
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
if (hi < 0x80)
|
if (hi < 0x80)
|
||||||
{
|
{
|
||||||
@ -4530,10 +4530,10 @@ my_mb_wc_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
if (!(pwc[0]=func_sjis_uni_onechar((hi<<8)+s[1])))
|
if (!(pwc[0]=func_sjis_uni_onechar((hi<<8)+s[1])))
|
||||||
return MY_CS_ILSEQ;
|
return -2;
|
||||||
|
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
@ -820,10 +820,10 @@ int my_mb_wc_tis620(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
const unsigned char *end __attribute__((unused)))
|
const unsigned char *end __attribute__((unused)))
|
||||||
{
|
{
|
||||||
if (str >= end)
|
if (str >= end)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
*wc=cs_to_uni[*str];
|
*wc=cs_to_uni[*str];
|
||||||
return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
|
return (!wc[0] && str[0]) ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
|
@ -95,7 +95,7 @@ static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
my_wc_t * pwc, const uchar *s, const uchar *e)
|
my_wc_t * pwc, const uchar *s, const uchar *e)
|
||||||
{
|
{
|
||||||
if (s+2 > e) /* Need 2 characters */
|
if (s+2 > e) /* Need 2 characters */
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
*pwc= ((unsigned char)s[0]) * 256 + ((unsigned char)s[1]);
|
*pwc= ((unsigned char)s[0]) * 256 + ((unsigned char)s[1]);
|
||||||
return 2;
|
return 2;
|
||||||
@ -105,7 +105,7 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
|
|||||||
my_wc_t wc, uchar *r, uchar *e)
|
my_wc_t wc, uchar *r, uchar *e)
|
||||||
{
|
{
|
||||||
if ( r+2 > e )
|
if ( r+2 > e )
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
r[0]= (uchar) (wc >> 8);
|
r[0]= (uchar) (wc >> 8);
|
||||||
r[1]= (uchar) (wc & 0xFF);
|
r[1]= (uchar) (wc & 0xFF);
|
||||||
|
@ -242,7 +242,7 @@ my_mb_wc_jisx0201(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
const uchar *e __attribute__((unused)))
|
const uchar *e __attribute__((unused)))
|
||||||
{
|
{
|
||||||
wc[0]=tab_jisx0201_uni[*s];
|
wc[0]=tab_jisx0201_uni[*s];
|
||||||
return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1;
|
return (!wc[0] && s[0]) ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -8341,7 +8341,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
int c1,c2,c3;
|
int c1,c2,c3;
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
c1=s[0];
|
c1=s[0];
|
||||||
|
|
||||||
@ -8353,7 +8353,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
c2=s[1];
|
c2=s[1];
|
||||||
|
|
||||||
@ -8368,7 +8368,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
{
|
{
|
||||||
pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80));
|
pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80));
|
||||||
if (!pwc[0])
|
if (!pwc[0])
|
||||||
return MY_CS_ILSEQ;
|
return -2;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -8388,7 +8388,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
|
|
||||||
ret = my_mb_wc_jisx0201(cs,pwc,s+1,e);
|
ret = my_mb_wc_jisx0201(cs,pwc,s+1,e);
|
||||||
if (ret!=1)
|
if (ret!=1)
|
||||||
return ret;
|
return -2;
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -8399,7 +8399,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
return MY_CS_ILSEQ;
|
return MY_CS_ILSEQ;
|
||||||
|
|
||||||
if (s+3>e)
|
if (s+3>e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL3;
|
||||||
|
|
||||||
c3=s[2];
|
c3=s[2];
|
||||||
if (c3 < 0xA1 || c3>=0xFF)
|
if (c3 < 0xA1 || c3>=0xFF)
|
||||||
@ -8408,8 +8408,8 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
|
|||||||
if (c2<0xF5)
|
if (c2<0xF5)
|
||||||
{
|
{
|
||||||
pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80));
|
pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80));
|
||||||
if (!pwc)
|
if (!pwc[0])
|
||||||
return MY_CS_ILSEQ;
|
return -3;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -8440,7 +8440,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if ((jp=my_uni_jisx0208_onechar(wc)))
|
if ((jp=my_uni_jisx0208_onechar(wc)))
|
||||||
{
|
{
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
jp+=0x8080;
|
jp+=0x8080;
|
||||||
s[0]=jp>>8;
|
s[0]=jp>>8;
|
||||||
@ -8452,7 +8452,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if (my_wc_mb_jisx0201(c,wc,s,e) == 1)
|
if (my_wc_mb_jisx0201(c,wc,s,e) == 1)
|
||||||
{
|
{
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
s[1]= s[0];
|
s[1]= s[0];
|
||||||
s[0]= 0x8E;
|
s[0]= 0x8E;
|
||||||
return 2;
|
return 2;
|
||||||
@ -8462,7 +8462,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if ((jp=my_uni_jisx0212_onechar(wc)))
|
if ((jp=my_uni_jisx0212_onechar(wc)))
|
||||||
{
|
{
|
||||||
if (s+3>e)
|
if (s+3>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL3;
|
||||||
|
|
||||||
jp+=0x8080;
|
jp+=0x8080;
|
||||||
s[0]=0x8F;
|
s[0]=0x8F;
|
||||||
@ -8476,7 +8476,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if (wc>=0xE000 && wc<0xE3AC)
|
if (wc>=0xE000 && wc<0xE3AC)
|
||||||
{
|
{
|
||||||
if (s+2>e)
|
if (s+2>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
c1=((unsigned)(wc-0xE000)/94)+0xF5;
|
c1=((unsigned)(wc-0xE000)/94)+0xF5;
|
||||||
s[0]=c1;
|
s[0]=c1;
|
||||||
@ -8490,7 +8490,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
|
|||||||
if (wc>=0xE3AC && wc<0xE758)
|
if (wc>=0xE3AC && wc<0xE758)
|
||||||
{
|
{
|
||||||
if (s+3>e)
|
if (s+3>e)
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALL3;
|
||||||
|
|
||||||
s[0]=0x8F;
|
s[0]=0x8F;
|
||||||
c1=((unsigned)(wc-0xE3AC)/94)+0xF5;
|
c1=((unsigned)(wc-0xE3AC)/94)+0xF5;
|
||||||
|
@ -1765,7 +1765,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
unsigned char c;
|
unsigned char c;
|
||||||
|
|
||||||
if (s >= e)
|
if (s >= e)
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL;
|
||||||
|
|
||||||
c= s[0];
|
c= s[0];
|
||||||
if (c < 0x80)
|
if (c < 0x80)
|
||||||
@ -1778,7 +1778,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
else if (c < 0xe0)
|
else if (c < 0xe0)
|
||||||
{
|
{
|
||||||
if (s+2 > e) /* We need 2 characters */
|
if (s+2 > e) /* We need 2 characters */
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL2;
|
||||||
|
|
||||||
if (!((s[1] ^ 0x80) < 0x40))
|
if (!((s[1] ^ 0x80) < 0x40))
|
||||||
return MY_CS_ILSEQ;
|
return MY_CS_ILSEQ;
|
||||||
@ -1789,7 +1789,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
else if (c < 0xf0)
|
else if (c < 0xf0)
|
||||||
{
|
{
|
||||||
if (s+3 > e) /* We need 3 characters */
|
if (s+3 > e) /* We need 3 characters */
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL3;
|
||||||
|
|
||||||
if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0)))
|
if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0)))
|
||||||
return MY_CS_ILSEQ;
|
return MY_CS_ILSEQ;
|
||||||
@ -1804,7 +1804,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32)
|
else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32)
|
||||||
{
|
{
|
||||||
if (s+4 > e) /* We need 4 characters */
|
if (s+4 > e) /* We need 4 characters */
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL4;
|
||||||
|
|
||||||
if (!((s[1] ^ 0x80) < 0x40 &&
|
if (!((s[1] ^ 0x80) < 0x40 &&
|
||||||
(s[2] ^ 0x80) < 0x40 &&
|
(s[2] ^ 0x80) < 0x40 &&
|
||||||
@ -1822,7 +1822,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32)
|
else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32)
|
||||||
{
|
{
|
||||||
if (s+5 >e) /* We need 5 characters */
|
if (s+5 >e) /* We need 5 characters */
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL5;
|
||||||
|
|
||||||
if (!((s[1] ^ 0x80) < 0x40 &&
|
if (!((s[1] ^ 0x80) < 0x40 &&
|
||||||
(s[2] ^ 0x80) < 0x40 &&
|
(s[2] ^ 0x80) < 0x40 &&
|
||||||
@ -1841,7 +1841,7 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32)
|
else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32)
|
||||||
{
|
{
|
||||||
if ( s+6 >e ) /* We need 6 characters */
|
if ( s+6 >e ) /* We need 6 characters */
|
||||||
return MY_CS_TOOFEW(0);
|
return MY_CS_TOOSMALL6;
|
||||||
|
|
||||||
if (!((s[1] ^ 0x80) < 0x40 &&
|
if (!((s[1] ^ 0x80) < 0x40 &&
|
||||||
(s[2] ^ 0x80) < 0x40 &&
|
(s[2] ^ 0x80) < 0x40 &&
|
||||||
@ -1892,7 +1892,7 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
|
|||||||
Because of it (r+count > e), not (r+count-1 >e )
|
Because of it (r+count > e), not (r+count-1 >e )
|
||||||
*/
|
*/
|
||||||
if ( r+count > e )
|
if ( r+count > e )
|
||||||
return MY_CS_TOOSMALL;
|
return MY_CS_TOOSMALLN(count);
|
||||||
|
|
||||||
switch (count) {
|
switch (count) {
|
||||||
/* Fall through all cases!!! */
|
/* Fall through all cases!!! */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user