Moving the conversion code from String::well_formed_copy()
to my_convert_fix() - a new function in /strings.
This commit is contained in:
parent
c4b268add0
commit
f48dc5ccc7
@ -382,6 +382,16 @@ typedef struct
|
||||
} MY_STRCOPY_STATUS;
|
||||
|
||||
|
||||
/*
|
||||
A structure to return the statistics of a Unicode string conversion.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
MY_STRCOPY_STATUS m_native_copy_status;
|
||||
const char *m_cannot_convert_error_pos;
|
||||
} MY_STRCONV_STATUS;
|
||||
|
||||
|
||||
/* See strings/CHARSET_INFO.txt about information on this structure */
|
||||
struct my_charset_handler_st
|
||||
{
|
||||
@ -852,10 +862,38 @@ const MY_CONTRACTIONS *my_charset_get_contractions(CHARSET_INFO *cs,
|
||||
extern size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
|
||||
const char* fmt, va_list ap);
|
||||
|
||||
/*
|
||||
Convert a string between two character sets.
|
||||
Bad byte sequences as well as characters that cannot be
|
||||
encoded in the destination character set are replaced to '?'.
|
||||
*/
|
||||
uint32 my_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
|
||||
const char *from, uint32 from_length,
|
||||
CHARSET_INFO *from_cs, uint *errors);
|
||||
|
||||
/*
|
||||
Convert a string between two character sets.
|
||||
Bad byte sequences as well as characters that cannot be
|
||||
encoded in the destination character set are replaced to '?'.
|
||||
Not more than "nchars" characters are copied.
|
||||
Conversion statistics is returnd in "status" and is set as follows:
|
||||
- status->m_native_copy_status.m_source_end_pos - to the position
|
||||
between (src) and (src+src_length), where the function stopped reading
|
||||
the source string.
|
||||
- status->m_native_copy_status.m_well_formed_error_pos - to the position
|
||||
between (src) and (src+src_length), where the first badly formed byte
|
||||
sequence was found, or to NULL if the string was well formed in the
|
||||
given range.
|
||||
- status->m_cannot_convert_error_pos - to the position
|
||||
between (src) and (src+src_length), where the first character that
|
||||
cannot be represented in the destination character set was found,
|
||||
or to NULL if all characters in the given range were successfully
|
||||
converted.
|
||||
*/
|
||||
size_t my_convert_fix(CHARSET_INFO *dstcs, char *dst, size_t dst_length,
|
||||
CHARSET_INFO *srccs, const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCONV_STATUS *status);
|
||||
|
||||
#define _MY_U 01 /* Upper case */
|
||||
#define _MY_L 02 /* Lower case */
|
||||
#define _MY_NMR 04 /* Numeral (digit) */
|
||||
|
@ -914,8 +914,6 @@ String_copier::well_formed_copy(CHARSET_INFO *to_cs,
|
||||
const char *from, uint from_length,
|
||||
uint nchars)
|
||||
{
|
||||
uint res;
|
||||
|
||||
if ((to_cs == &my_charset_bin) ||
|
||||
(from_cs == &my_charset_bin) ||
|
||||
(to_cs == from_cs) ||
|
||||
@ -923,73 +921,10 @@ String_copier::well_formed_copy(CHARSET_INFO *to_cs,
|
||||
{
|
||||
m_cannot_convert_error_pos= NULL;
|
||||
return to_cs->cset->copy_fix(to_cs, to, to_length, from, from_length,
|
||||
nchars, this);
|
||||
nchars, &m_native_copy_status);
|
||||
}
|
||||
else
|
||||
{
|
||||
int cnvres;
|
||||
my_wc_t wc;
|
||||
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
|
||||
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
|
||||
const uchar *from_end= (const uchar*) from + from_length;
|
||||
uchar *to_end= (uchar*) to + to_length;
|
||||
char *to_start= to;
|
||||
m_well_formed_error_pos= NULL;
|
||||
m_cannot_convert_error_pos= NULL;
|
||||
|
||||
for ( ; nchars; nchars--)
|
||||
{
|
||||
const char *from_prev= from;
|
||||
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
|
||||
from+= cnvres;
|
||||
else if (cnvres == MY_CS_ILSEQ)
|
||||
{
|
||||
if (!m_well_formed_error_pos)
|
||||
m_well_formed_error_pos= from;
|
||||
from++;
|
||||
wc= '?';
|
||||
}
|
||||
else if (cnvres > MY_CS_TOOSMALL)
|
||||
{
|
||||
/*
|
||||
A correct multibyte sequence detected
|
||||
But it doesn't have Unicode mapping.
|
||||
*/
|
||||
if (!m_cannot_convert_error_pos)
|
||||
m_cannot_convert_error_pos= from;
|
||||
from+= (-cnvres);
|
||||
wc= '?';
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((uchar *) from >= from_end)
|
||||
break; // End of line
|
||||
// Incomplete byte sequence
|
||||
if (!m_well_formed_error_pos)
|
||||
m_well_formed_error_pos= from;
|
||||
from++;
|
||||
wc= '?';
|
||||
}
|
||||
outp:
|
||||
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
|
||||
to+= cnvres;
|
||||
else if (cnvres == MY_CS_ILUNI && wc != '?')
|
||||
{
|
||||
if (!m_cannot_convert_error_pos)
|
||||
m_cannot_convert_error_pos= from_prev;
|
||||
wc= '?';
|
||||
goto outp;
|
||||
}
|
||||
else
|
||||
{
|
||||
from= from_prev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
m_source_end_pos= from;
|
||||
res= (uint) (to - to_start);
|
||||
}
|
||||
return res;
|
||||
return my_convert_fix(to_cs, to, to_length, from_cs, from, from_length,
|
||||
nchars, this);
|
||||
}
|
||||
|
||||
|
||||
|
@ -43,14 +43,13 @@ inline uint32 copy_and_convert(char *to, uint32 to_length,
|
||||
}
|
||||
|
||||
|
||||
class String_copier: private MY_STRCOPY_STATUS
|
||||
class String_copier: private MY_STRCONV_STATUS
|
||||
{
|
||||
const char *m_cannot_convert_error_pos;
|
||||
public:
|
||||
const char *source_end_pos() const
|
||||
{ return m_source_end_pos; }
|
||||
{ return m_native_copy_status.m_source_end_pos; }
|
||||
const char *well_formed_error_pos() const
|
||||
{ return m_well_formed_error_pos; }
|
||||
{ return m_native_copy_status.m_well_formed_error_pos; }
|
||||
const char *cannot_convert_error_pos() const
|
||||
{ return m_cannot_convert_error_pos; }
|
||||
const char *most_important_error_pos() const
|
||||
|
@ -1161,3 +1161,76 @@ my_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
|
||||
DBUG_ASSERT(FALSE); // Should never get to here
|
||||
return 0; // Make compiler happy
|
||||
}
|
||||
|
||||
|
||||
size_t
|
||||
my_convert_fix(CHARSET_INFO *to_cs, char *to, size_t to_length,
|
||||
CHARSET_INFO *from_cs, const char *from, size_t from_length,
|
||||
size_t nchars, MY_STRCONV_STATUS *status)
|
||||
{
|
||||
int cnvres;
|
||||
my_wc_t wc;
|
||||
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
|
||||
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
|
||||
const uchar *from_end= (const uchar*) from + from_length;
|
||||
uchar *to_end= (uchar*) to + to_length;
|
||||
char *to_start= to;
|
||||
|
||||
DBUG_ASSERT(to_cs != &my_charset_bin);
|
||||
DBUG_ASSERT(from_cs != &my_charset_bin);
|
||||
|
||||
status->m_native_copy_status.m_well_formed_error_pos= NULL;
|
||||
status->m_cannot_convert_error_pos= NULL;
|
||||
|
||||
for ( ; nchars; nchars--)
|
||||
{
|
||||
const char *from_prev= from;
|
||||
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
|
||||
from+= cnvres;
|
||||
else if (cnvres == MY_CS_ILSEQ)
|
||||
{
|
||||
if (!status->m_native_copy_status.m_well_formed_error_pos)
|
||||
status->m_native_copy_status.m_well_formed_error_pos= from;
|
||||
from++;
|
||||
wc= '?';
|
||||
}
|
||||
else if (cnvres > MY_CS_TOOSMALL)
|
||||
{
|
||||
/*
|
||||
A correct multibyte sequence detected
|
||||
But it doesn't have Unicode mapping.
|
||||
*/
|
||||
if (!status->m_cannot_convert_error_pos)
|
||||
status->m_cannot_convert_error_pos= from;
|
||||
from+= (-cnvres);
|
||||
wc= '?';
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((uchar *) from >= from_end)
|
||||
break; // End of line
|
||||
// Incomplete byte sequence
|
||||
if (!status->m_native_copy_status.m_well_formed_error_pos)
|
||||
status->m_native_copy_status.m_well_formed_error_pos= from;
|
||||
from++;
|
||||
wc= '?';
|
||||
}
|
||||
outp:
|
||||
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
|
||||
to+= cnvres;
|
||||
else if (cnvres == MY_CS_ILUNI && wc != '?')
|
||||
{
|
||||
if (!status->m_cannot_convert_error_pos)
|
||||
status->m_cannot_convert_error_pos= from_prev;
|
||||
wc= '?';
|
||||
goto outp;
|
||||
}
|
||||
else
|
||||
{
|
||||
from= from_prev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
status->m_native_copy_status.m_source_end_pos= from;
|
||||
return to - to_start;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user