New wildcmp() function in CHARSET_INFO structure

This commit is contained in:
bar@bar.mysql.r18.ru 2002-11-14 16:07:29 +04:00
parent 979f27ee4a
commit 237e2ca001
16 changed files with 420 additions and 42 deletions

View File

@ -83,6 +83,10 @@ typedef struct charset_info_st
my_bool (*like_range)(struct charset_info_st *,
const char *, uint, pchar, uint,
char *, char *, uint *, uint *);
int (*wildcmp)(struct charset_info_st *,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape,int w_one, int w_many);
/* Multibyte routines */
uint mbmaxlen;
@ -169,6 +173,11 @@ ulonglong my_strtoull_8bit(CHARSET_INFO *, const char *s, char **e, int base);
double my_strtod_8bit(CHARSET_INFO *, const char *s, char **e);
int my_wildcmp_8bit(CHARSET_INFO *,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many);
#ifdef USE_MB
/* Functions for multibyte charsets */
@ -178,6 +187,10 @@ extern void my_caseup_mb(CHARSET_INFO *, char *, uint);
extern void my_casedn_mb(CHARSET_INFO *, char *, uint);
extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char *, const char *);
extern int my_strncasecmp_mb(CHARSET_INFO * cs,const char *, const char *t, uint);
int my_wildcmp_mb(CHARSET_INFO *,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many);
#endif
@ -219,6 +232,7 @@ extern int my_strncasecmp_mb(CHARSET_INFO * cs,const char *, const char *t, uint
#define my_strnncoll(s, a, b, c, d) ((s)->strnncoll((s), (a), (b), (c), (d)))
#define my_like_range(s, a, b, c, d, e, f, g, h) \
((s)->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h)))
#define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->wildcmp,(s),(se),(w),(we),(e),(o),(m))
#define use_mb(s) ((s)->ismbchar != NULL)
#define my_ismbchar(s, a, b) ((s)->ismbchar((s), (a), (b)))

View File

@ -6232,6 +6232,7 @@ CHARSET_INFO my_charset_big5 =
my_strnncoll_big5,
my_strnxfrm_big5,
my_like_range_big5,
my_wildcmp_mb,
2, /* mbmaxlen */
ismbchar_big5,
ismbhead_big5,

View File

@ -145,6 +145,96 @@ void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)),
}
static int my_wildcmp_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
int result= -1; // Not found, using wildcards
while (wildstr != wildend)
{
while (*wildstr != w_many && *wildstr != w_one)
{
if (*wildstr == escape && wildstr+1 != wildend)
wildstr++;
if (str == str_end || *wildstr++ != *str++)
{
return(1);
}
if (wildstr == wildend)
{
return(str != str_end); // Match if both are at end
}
result=1; // Found an anchor char
}
if (*wildstr == w_one)
{
do
{
if (str == str_end) // Skip one char if possible
return(result);
str++;
} while (*++wildstr == w_one && wildstr != wildend);
if (wildstr == wildend)
break;
}
if (*wildstr == w_many)
{ // Found w_many
char cmp;
wildstr++;
/* Remove any '%' and '_' from the wild search string */
for (; wildstr != wildend ; wildstr++)
{
if (*wildstr == w_many)
continue;
if (*wildstr == w_one)
{
if (str == str_end)
{
return(-1);
}
str++;
continue;
}
break; // Not a wild character
}
if (wildstr == wildend)
{
return(0); // Ok if w_many is last
}
if (str == str_end)
{
return(-1);
}
if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
cmp= *++wildstr;
wildstr++; // This is compared trough cmp
do
{
while (str != str_end && *str != cmp)
str++;
if (str++ == str_end)
{
return(-1);
}
{
int tmp=my_wildcmp_bin(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
if (tmp <= 0)
{
return(tmp);
}
}
} while (str != str_end && wildstr[0] != w_many);
return(-1);
}
}
return(str != str_end ? 1 : 0);
}
static CHARSET_INFO my_charset_bin_st =
{
@ -161,7 +251,8 @@ static CHARSET_INFO my_charset_bin_st =
0, /* strxfrm_multiply */
my_strnncoll_binary, /* strnncoll */
NULL, /* strxnfrm */
NULL, /* like_rabge */
NULL, /* like_range */
my_wildcmp_bin, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -178,7 +269,13 @@ static CHARSET_INFO my_charset_bin_st =
my_hash_caseup_bin, /* hash_caseup */
my_hash_sort_bin, /* hash_sort */
255, /* max_sort_char */
my_snprintf_8bit /* snprintf */
my_snprintf_8bit, /* snprintf */
my_strtol_8bit,
my_strtoul_8bit,
my_strtoll_8bit,
my_strtoull_8bit,
my_strtod_8bit
};

View File

@ -610,6 +610,7 @@ CHARSET_INFO my_charset_czech =
my_strnncoll_czech,
my_strnxfrm_czech,
my_like_range_czech,
my_wildcmp_8bit,
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */

View File

@ -8649,6 +8649,7 @@ CHARSET_INFO my_charset_euc_kr =
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_mb, /* wildcmp */
2, /* mbmaxlen */
ismbchar_euc_kr,
ismbhead_euc_kr,

View File

@ -5699,6 +5699,7 @@ CHARSET_INFO my_charset_gb2312 =
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_mb, /* wildcmp */
2, /* mbmaxlen */
ismbchar_gb2312,
ismbhead_gb2312,

View File

@ -9887,6 +9887,7 @@ CHARSET_INFO my_charset_gbk =
my_strnncoll_gbk,
my_strnxfrm_gbk,
my_like_range_gbk,
my_wildcmp_mb, /* wildcmp */
2, /* mbmaxlen */
ismbchar_gbk,
ismbhead_gbk,

View File

@ -428,6 +428,7 @@ CHARSET_INFO my_charset_latin1_de =
my_strnncoll_latin1_de,
my_strnxfrm_latin1_de,
my_like_range_latin1_de,
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */

View File

@ -138,4 +138,142 @@ int my_strncasecmp_mb(CHARSET_INFO * cs,
return 0;
}
/*
** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
#define INC_PTR(cs,A,B) A+=((use_mb_flag && \
my_ismbchar(cs,A,B)) ? my_ismbchar(cs,A,B) : 1)
#ifdef LIKE_CMP_TOUPPER
#define likeconv(s,A) (uchar) my_toupper(s,A)
#else
#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
#endif
int my_wildcmp_mb(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
int result= -1; // Not found, using wildcards
bool use_mb_flag=use_mb(cs);
while (wildstr != wildend)
{
while (*wildstr != w_many && *wildstr != w_one)
{
int l;
if (*wildstr == escape && wildstr+1 != wildend)
wildstr++;
if (use_mb_flag &&
(l = my_ismbchar(cs, wildstr, wildend)))
{
if (str+l > str_end || memcmp(str, wildstr, l) != 0)
return 1;
str += l;
wildstr += l;
}
else
if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
return(1); // No match
if (wildstr == wildend)
return (str != str_end); // Match if both are at end
result=1; // Found an anchor char
}
if (*wildstr == w_one)
{
do
{
if (str == str_end) // Skip one char if possible
return (result);
INC_PTR(cs,str,str_end);
} while (++wildstr < wildend && *wildstr == w_one);
if (wildstr == wildend)
break;
}
if (*wildstr == w_many)
{ // Found w_many
uchar cmp;
const char* mb = wildstr;
int mblen;
wildstr++;
/* Remove any '%' and '_' from the wild search string */
for (; wildstr != wildend ; wildstr++)
{
if (*wildstr == w_many)
continue;
if (*wildstr == w_one)
{
if (str == str_end)
return (-1);
INC_PTR(cs,str,str_end);
continue;
}
break; // Not a wild character
}
if (wildstr == wildend)
return(0); // Ok if w_many is last
if (str == str_end)
return -1;
if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
cmp= *++wildstr;
mb=wildstr;
LINT_INIT(mblen);
if (use_mb_flag)
mblen = my_ismbchar(cs, wildstr, wildend);
INC_PTR(cs,wildstr,wildend); // This is compared trough cmp
cmp=likeconv(cs,cmp);
do
{
if (use_mb_flag)
{
for (;;)
{
if (str >= str_end)
return -1;
if (mblen)
{
if (str+mblen <= str_end && memcmp(str, mb, mblen) == 0)
{
str += mblen;
break;
}
}
else if (!my_ismbchar(cs, str, str_end) &&
likeconv(cs,*str) == cmp)
{
str++;
break;
}
INC_PTR(cs,str, str_end);
}
}
else
{
while (str != str_end && likeconv(cs,*str) != cmp)
str++;
if (str++ == str_end) return (-1);
}
{
int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
if (tmp <= 0)
return (tmp);
}
} while (str != str_end && wildstr[0] != w_many);
return(-1);
}
}
return (str != str_end ? 1 : 0);
}
#endif

View File

@ -18,7 +18,6 @@
#include "my_sys.h"
#include "m_ctype.h"
#include "m_string.h"
#include "dbug.h"
#include "stdarg.h"
#include "assert.h"
@ -274,3 +273,97 @@ double my_strtod_8bit(CHARSET_INFO *cs __attribute__((unused)),
{
return strtod(s,e);
}
/*
** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
#ifdef LIKE_CMP_TOUPPER
#define likeconv(s,A) (uchar) my_toupper(s,A)
#else
#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
#endif
#define INC_PTR(cs,A,B) A++
int my_wildcmp_8bit(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
int result= -1; // Not found, using wildcards
while (wildstr != wildend)
{
while (*wildstr != w_many && *wildstr != w_one)
{
if (*wildstr == escape && wildstr+1 != wildend)
wildstr++;
if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
return(1); // No match
if (wildstr == wildend)
return (str != str_end); // Match if both are at end
result=1; // Found an anchor char
}
if (*wildstr == w_one)
{
do
{
if (str == str_end) // Skip one char if possible
return (result);
INC_PTR(cs,str,str_end);
} while (++wildstr < wildend && *wildstr == w_one);
if (wildstr == wildend)
break;
}
if (*wildstr == w_many)
{ // Found w_many
uchar cmp;
wildstr++;
/* Remove any '%' and '_' from the wild search string */
for (; wildstr != wildend ; wildstr++)
{
if (*wildstr == w_many)
continue;
if (*wildstr == w_one)
{
if (str == str_end)
return (-1);
INC_PTR(cs,str,str_end);
continue;
}
break; // Not a wild character
}
if (wildstr == wildend)
return(0); // Ok if w_many is last
if (str == str_end)
return -1;
if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
cmp= *++wildstr;
INC_PTR(cs,wildstr,wildend); // This is compared trough cmp
cmp=likeconv(cs,cmp);
do
{
while (str != str_end && likeconv(cs,*str) != cmp)
str++;
if (str++ == str_end) return (-1);
{
int tmp=my_wildcmp_8bit(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
if (tmp <= 0)
return (tmp);
}
} while (str != str_end && wildstr[0] != w_many);
return(-1);
}
}
return (str != str_end ? 1 : 0);
}

View File

@ -4474,6 +4474,7 @@ CHARSET_INFO my_charset_sjis =
my_strnncoll_sjis,
my_strnxfrm_sjis,
my_like_range_sjis,
my_wildcmp_mb, /* wildcmp */
2, /* mbmaxlen */
ismbchar_sjis,
ismbhead_sjis,

View File

@ -702,6 +702,7 @@ CHARSET_INFO my_charset_tis620 =
my_strnncoll_tis620,
my_strnxfrm_tis620,
my_like_range_tis620,
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */

View File

@ -8443,6 +8443,7 @@ CHARSET_INFO my_charset_ujis =
NULL, /* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_mb, /* wildcmp */
3, /* mbmaxlen */
ismbchar_ujis,
ismbhead_ujis,

View File

@ -1968,6 +1968,7 @@ CHARSET_INFO my_charset_utf8 =
my_strnncoll_utf8, /* strnncoll */
my_strnxfrm_utf8, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_mb, /* wildcmp */
3, /* mbmaxlen */
my_ismbchar_utf8, /* ismbchar */
my_ismbhead_utf8, /* ismbhead */
@ -2372,7 +2373,7 @@ static int my_vsnprintf_ucs2(char *dst, uint n, const char* fmt, va_list ap)
fmt++;
/* Skip if max size is used (to be compatible with printf) */
while (my_isdigit(system_charset_info,*fmt) || *fmt == '.' || *fmt == '-')
while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
fmt++;
if (*fmt == 'l')
@ -2485,6 +2486,7 @@ CHARSET_INFO my_charset_ucs2 =
my_strnncoll_ucs2, /* strnncoll */
my_strnxfrm_ucs2, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_mb, /* wildcmp */
2, /* mbmaxlen */
my_ismbchar_ucs2, /* ismbchar */
my_ismbhead_ucs2, /* ismbhead */

View File

@ -636,6 +636,7 @@ CHARSET_INFO my_charset_win1250ch =
my_strnncoll_win1250ch,
my_strnxfrm_win1250ch,
my_like_range_win1250ch,
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */

View File

@ -2823,6 +2823,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -2865,6 +2866,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -2906,6 +2908,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -2947,6 +2950,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -2989,6 +2993,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3030,6 +3035,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3071,6 +3077,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3112,6 +3119,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3154,6 +3162,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3195,6 +3204,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3236,6 +3246,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3277,6 +3288,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3318,6 +3330,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3359,6 +3372,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3400,6 +3414,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3442,6 +3457,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3483,6 +3499,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3525,6 +3542,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3567,6 +3585,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3608,6 +3627,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3649,6 +3669,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3690,6 +3711,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3731,6 +3753,7 @@ static CHARSET_INFO compiled_charsets[] = {
my_strnncoll_simple,/* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
my_wildcmp_8bit, /* wildcmp */
1, /* mbmaxlen */
NULL, /* ismbchar */
NULL, /* ismbhead */
@ -3768,9 +3791,10 @@ static CHARSET_INFO compiled_charsets[] = {
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
0,
NULL,
NULL,
NULL,
NULL, /* strnncoll */
NULL, /* strnxfrm */
NULL, /* like_range */
NULL, /* wildcmp */
0,
NULL,
NULL,