A preparatory patch for MDEV-6566.
Adding a new virtual function MY_CHARSET_HANDLER::copy_abort(). Moving character set specific code into the correspoding implementations (for simple, multi-byte and mbmaxlen>1 character sets).
This commit is contained in:
parent
7047bef1ef
commit
b1b6101af2
@ -364,6 +364,23 @@ typedef int (*my_charset_conv_wc_mb)(CHARSET_INFO *, my_wc_t,
|
||||
typedef size_t (*my_charset_conv_case)(CHARSET_INFO *,
|
||||
char *, size_t, char *, size_t);
|
||||
|
||||
/*
|
||||
A structure to return the statistics of a native string copying,
|
||||
when no Unicode conversion is involved.
|
||||
|
||||
The stucture is OK to be unitialized before calling a copying routine.
|
||||
A copying routine must populate the structure as follows:
|
||||
- m_source_end_pos must be set by to a non-NULL value
|
||||
in the range of the input string.
|
||||
- m_well_formed_error_pos must be set to NULL if the string was
|
||||
well formed, or to the position of the leftmost bad byte sequence.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const char *m_source_end_pos; /* Position where reading stopped */
|
||||
const char *m_well_formed_error_pos; /* Position where a bad byte was found*/
|
||||
} MY_STRCOPY_STATUS;
|
||||
|
||||
|
||||
/* See strings/CHARSET_INFO.txt about information on this structure */
|
||||
struct my_charset_handler_st
|
||||
@ -426,6 +443,23 @@ struct my_charset_handler_st
|
||||
char **endptr, int *error);
|
||||
size_t (*scan)(CHARSET_INFO *, const char *b, const char *e,
|
||||
int sq);
|
||||
|
||||
/* Copying routines */
|
||||
/*
|
||||
copy_abort() - copy a string, abort if a bad byte sequence was found.
|
||||
Not more than "nchars" characters are copied.
|
||||
|
||||
status->m_source_end_pos is set to a position in the range
|
||||
between "src" and "src + src_length".
|
||||
|
||||
status->m_well_formed_error_pos is set to NULL if the string
|
||||
in the range "src" and "status->m_source_end_pos" was well formed,
|
||||
or is set to "src + src_length" otherwise.
|
||||
*/
|
||||
size_t (*copy_abort)(CHARSET_INFO *,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status);
|
||||
};
|
||||
|
||||
extern MY_CHARSET_HANDLER my_charset_8bit_handler;
|
||||
@ -558,6 +592,14 @@ extern uint my_instr_simple(CHARSET_INFO *,
|
||||
const char *s, size_t s_length,
|
||||
my_match_t *match, uint nmatch);
|
||||
|
||||
size_t my_copy_8bit(CHARSET_INFO *,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *);
|
||||
size_t my_copy_abort_mb(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *);
|
||||
|
||||
/* Functions for 8bit */
|
||||
extern size_t my_caseup_str_8bit(CHARSET_INFO *, char *);
|
||||
|
@ -921,73 +921,9 @@ String_copier::well_formed_copy(CHARSET_INFO *to_cs,
|
||||
(to_cs == from_cs) ||
|
||||
my_charset_same(from_cs, to_cs))
|
||||
{
|
||||
if (to_length < to_cs->mbminlen || !nchars)
|
||||
{
|
||||
m_source_end_pos= from;
|
||||
m_cannot_convert_error_pos= NULL;
|
||||
m_well_formed_error_pos= NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (to_cs == &my_charset_bin)
|
||||
{
|
||||
res= MY_MIN(MY_MIN(nchars, to_length), from_length);
|
||||
memmove(to, from, res);
|
||||
m_source_end_pos= from + res;
|
||||
m_well_formed_error_pos= NULL;
|
||||
m_cannot_convert_error_pos= NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
int well_formed_error;
|
||||
uint from_offset;
|
||||
|
||||
if ((from_offset= (from_length % to_cs->mbminlen)) &&
|
||||
(from_cs == &my_charset_bin))
|
||||
{
|
||||
/*
|
||||
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
|
||||
INSERT INTO t1 (ucs2_column) VALUES (0x01);
|
||||
0x01 -> 0x0001
|
||||
*/
|
||||
uint pad_length= to_cs->mbminlen - from_offset;
|
||||
bzero(to, pad_length);
|
||||
memmove(to + pad_length, from, from_offset);
|
||||
/*
|
||||
In some cases left zero-padding can create an incorrect character.
|
||||
For example:
|
||||
INSERT INTO t1 (utf32_column) VALUES (0x110000);
|
||||
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
|
||||
The valid characters range is limited to 0x00000000..0x0010FFFF.
|
||||
|
||||
Make sure we didn't pad to an incorrect character.
|
||||
*/
|
||||
if (to_cs->cset->well_formed_len(to_cs,
|
||||
to, to + to_cs->mbminlen, 1,
|
||||
&well_formed_error) !=
|
||||
to_cs->mbminlen)
|
||||
{
|
||||
m_source_end_pos= m_well_formed_error_pos= from;
|
||||
m_cannot_convert_error_pos= NULL;
|
||||
return 0;
|
||||
}
|
||||
nchars--;
|
||||
from+= from_offset;
|
||||
from_length-= from_offset;
|
||||
to+= to_cs->mbminlen;
|
||||
to_length-= to_cs->mbminlen;
|
||||
}
|
||||
|
||||
set_if_smaller(from_length, to_length);
|
||||
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
|
||||
nchars, &well_formed_error);
|
||||
memmove(to, from, res);
|
||||
m_source_end_pos= from + res;
|
||||
m_well_formed_error_pos= well_formed_error ? from + res : NULL;
|
||||
m_cannot_convert_error_pos= NULL;
|
||||
if (from_offset)
|
||||
res+= to_cs->mbminlen;
|
||||
}
|
||||
m_cannot_convert_error_pos= NULL;
|
||||
return to_cs->cset->copy_abort(to_cs, to, to_length, from, from_length,
|
||||
nchars, this);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -43,10 +43,8 @@ inline uint32 copy_and_convert(char *to, uint32 to_length,
|
||||
}
|
||||
|
||||
|
||||
class String_copier
|
||||
class String_copier: private MY_STRCOPY_STATUS
|
||||
{
|
||||
const char *m_source_end_pos;
|
||||
const char *m_well_formed_error_pos;
|
||||
const char *m_cannot_convert_error_pos;
|
||||
public:
|
||||
const char *source_end_pos() const
|
||||
|
@ -6922,7 +6922,8 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
struct charset_info_st my_charset_big5_chinese_ci=
|
||||
|
@ -548,7 +548,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_8bit,
|
||||
};
|
||||
|
||||
|
||||
|
@ -34800,7 +34800,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
|
@ -10007,7 +10007,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
|
@ -67549,7 +67549,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
|
@ -6410,7 +6410,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
|
@ -10806,7 +10806,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
|
@ -421,7 +421,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_8bit,
|
||||
};
|
||||
|
||||
|
||||
|
@ -423,6 +423,29 @@ size_t my_well_formed_len_mb(CHARSET_INFO *cs, const char *b, const char *e,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Copy a multi-byte string. Abort if a bad byte sequence was found.
|
||||
Note more than "nchars" characters are copied.
|
||||
*/
|
||||
size_t
|
||||
my_copy_abort_mb(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
int well_formed_error;
|
||||
size_t res;
|
||||
|
||||
set_if_smaller(src_length, dst_length);
|
||||
res= cs->cset->well_formed_len(cs, src, src + src_length,
|
||||
nchars, &well_formed_error);
|
||||
memmove(dst, src, res);
|
||||
status->m_source_end_pos= src + res;
|
||||
status->m_well_formed_error_pos= well_formed_error ? src + res : NULL;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
uint my_instr_mb(CHARSET_INFO *cs,
|
||||
const char *b, size_t b_length,
|
||||
const char *s, size_t s_length,
|
||||
|
@ -1108,6 +1108,25 @@ size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Copy a 8-bit string. Not more than "nchars" character are copied.
|
||||
*/
|
||||
size_t
|
||||
my_copy_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
set_if_smaller(src_length, dst_length);
|
||||
set_if_smaller(src_length, nchars);
|
||||
if (src_length)
|
||||
memmove(dst, src, src_length);
|
||||
status->m_source_end_pos= src + src_length;
|
||||
status->m_well_formed_error_pos= NULL;
|
||||
return src_length;
|
||||
}
|
||||
|
||||
|
||||
size_t my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *ptr, size_t length)
|
||||
{
|
||||
@ -1886,7 +1905,8 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_8bit,
|
||||
};
|
||||
|
||||
MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
|
||||
|
@ -34172,7 +34172,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
|
@ -885,7 +885,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_8bit,
|
||||
};
|
||||
|
||||
|
||||
|
@ -92,6 +92,65 @@ my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Copy an UCS2/UTF16/UTF32 string.
|
||||
Not more that "nchars" characters are copied.
|
||||
|
||||
UCS2/UTF16/UTF32 may need to prepend zero some bytes,
|
||||
e.g. when copying from a BINARY source:
|
||||
INSERT INTO t1 (ucs2_column) VALUES (0x01);
|
||||
0x01 -> 0x0001
|
||||
*/
|
||||
static size_t
|
||||
my_copy_abort_mb2_or_mb4(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
size_t src_offset;
|
||||
|
||||
if ((src_offset= (src_length % cs->mbminlen)))
|
||||
{
|
||||
int well_formed_error;
|
||||
size_t pad_length;
|
||||
if (dst_length < cs->mbminlen || !nchars)
|
||||
{
|
||||
status->m_source_end_pos= status->m_well_formed_error_pos= src;
|
||||
return 0;
|
||||
}
|
||||
|
||||
pad_length= cs->mbminlen - src_offset;
|
||||
bzero(dst, pad_length);
|
||||
memmove(dst + pad_length, src, src_offset);
|
||||
/*
|
||||
In some cases left zero-padding can create an incorrect character.
|
||||
For example:
|
||||
INSERT INTO t1 (utf32_column) VALUES (0x110000);
|
||||
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
|
||||
The valid characters range is limited to 0x00000000..0x0010FFFF.
|
||||
|
||||
Make sure we didn't pad to an incorrect character.
|
||||
*/
|
||||
if (cs->cset->well_formed_len(cs,
|
||||
dst, dst + cs->mbminlen, 1,
|
||||
&well_formed_error) != cs->mbminlen)
|
||||
{
|
||||
status->m_source_end_pos= status->m_well_formed_error_pos= src;
|
||||
return 0;
|
||||
}
|
||||
nchars--;
|
||||
src+= src_offset;
|
||||
src_length-= src_offset;
|
||||
dst+= cs->mbminlen;
|
||||
dst_length-= cs->mbminlen;
|
||||
return
|
||||
cs->mbminlen /* The left-padded character */ +
|
||||
my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
|
||||
}
|
||||
return my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
|
||||
}
|
||||
|
||||
|
||||
static long
|
||||
my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
|
||||
const char *nptr, size_t l, int base,
|
||||
@ -1682,7 +1741,8 @@ MY_CHARSET_HANDLER my_charset_utf16_handler=
|
||||
my_strntod_mb2_or_mb4,
|
||||
my_strtoll10_mb2,
|
||||
my_strntoull10rnd_mb2_or_mb4,
|
||||
my_scan_mb2
|
||||
my_scan_mb2,
|
||||
my_copy_abort_mb2_or_mb4,
|
||||
};
|
||||
|
||||
|
||||
@ -1851,7 +1911,8 @@ static MY_CHARSET_HANDLER my_charset_utf16le_handler=
|
||||
my_strntod_mb2_or_mb4,
|
||||
my_strtoll10_mb2,
|
||||
my_strntoull10rnd_mb2_or_mb4,
|
||||
my_scan_mb2
|
||||
my_scan_mb2,
|
||||
my_copy_abort_mb2_or_mb4,
|
||||
};
|
||||
|
||||
|
||||
@ -2765,7 +2826,8 @@ MY_CHARSET_HANDLER my_charset_utf32_handler=
|
||||
my_strntod_mb2_or_mb4,
|
||||
my_strtoll10_utf32,
|
||||
my_strntoull10rnd_mb2_or_mb4,
|
||||
my_scan_utf32
|
||||
my_scan_utf32,
|
||||
my_copy_abort_mb2_or_mb4,
|
||||
};
|
||||
|
||||
|
||||
@ -3383,7 +3445,8 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
|
||||
my_strntod_mb2_or_mb4,
|
||||
my_strtoll10_mb2,
|
||||
my_strntoull10rnd_mb2_or_mb4,
|
||||
my_scan_mb2
|
||||
my_scan_mb2,
|
||||
my_copy_abort_mb2_or_mb4,
|
||||
};
|
||||
|
||||
|
||||
|
@ -67295,7 +67295,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
|
@ -5614,7 +5614,8 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
@ -7167,7 +7168,8 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
@ -8110,7 +8112,8 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
|
||||
my_strntod_8bit,
|
||||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
};
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user