Cleanup QUtf32::convertToUnicode

Cleanup the implementation and improve performance by
handling the first char outside of the main loop.

Also avoid one copy of the data when using QStringConverter.

Change-Id: Ie698e62de1864352612a4dddc907cb139e7e6407
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Lars Knoll 2020-04-23 16:44:38 +02:00
parent b1d8ce32cd
commit 4b2edde373
2 changed files with 91 additions and 68 deletions

View File

@ -903,18 +903,18 @@ QByteArray QUtf32::convertFromUnicode(const QChar *uc, qsizetype len, QStringCon
if (writeBom) if (writeBom)
length += 4; length += 4;
QByteArray ba(length, Qt::Uninitialized); QByteArray ba(length, Qt::Uninitialized);
char *end = convertFromUnicode(ba.data(), uc, len, state, endian); char *end = convertFromUnicode(ba.data(), QStringView(uc, len), state, endian);
Q_ASSERT(end - ba.constData() == length); Q_ASSERT(end - ba.constData() == length);
Q_UNUSED(end); Q_UNUSED(end);
return ba; return ba;
} }
char *QUtf32::convertFromUnicode(char *out, const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian) char *QUtf32::convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian)
{ {
Q_ASSERT(state); Q_ASSERT(state);
bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom; bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
qsizetype length = 4*len; qsizetype length = 4*in.length();
if (writeBom) if (writeBom)
length += 4; length += 4;
@ -937,7 +937,8 @@ char *QUtf32::convertFromUnicode(char *out, const QChar *uc, qsizetype len, QStr
state->internalState |= HeaderDone; state->internalState |= HeaderDone;
} }
const QChar *end = uc + len; const QChar *uc = in.data();
const QChar *end = in.data() + in.length();
QChar ch; QChar ch;
uint ucs4; uint ucs4;
if (state->remainingChars == 1) { if (state->remainingChars == 1) {
@ -981,69 +982,96 @@ decode_surrogate:
QString QUtf32::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian) QString QUtf32::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian)
{ {
Q_ASSERT(state); QString result;
if (state->flags & QStringConverter::Flag::Stateless) // temporary result.resize((len + 7) >> 1); // worst case
state = nullptr; QChar *end = convertToUnicode(result.data(), chars, len, state, endian);
result.truncate(end - result.constData());
return result;
}
uchar tuple[4]; QChar *QUtf32::convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian)
int num = 0; {
bool headerdone = state && state->internalState & HeaderDone; Q_ASSERT(state);
if (state) {
if (state->flags & QStringConverter::Flag::DontSkipInitialBom)
headerdone = true;
if (endian == DetectEndianness) if (endian == DetectEndianness)
endian = (DataEndianness)state->state_data[Endian]; endian = (DataEndianness)state->state_data[Endian];
num = state->remainingChars;
memcpy(tuple, &state->state_data[Data], 4);
}
if (headerdone && endian == DetectEndianness)
endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
QString result;
result.resize((num + len) >> 2 << 1); // worst case
QChar *qch = (QChar *)result.data();
const char *end = chars + len; const char *end = chars + len;
uchar tuple[4];
memcpy(tuple, &state->state_data[Data], 4);
// make sure we can decode at least one char
if (state->remainingChars + len < 4) {
if (len) {
while (chars < end) { while (chars < end) {
tuple[num++] = *chars++; tuple[state->remainingChars] = *chars;
if (num == 4) { ++state->remainingChars;
if (!headerdone) { ++chars;
}
Q_ASSERT(state->remainingChars < 4);
memcpy(&state->state_data[Data], tuple, 4);
}
return out;
}
bool headerdone = state->internalState & HeaderDone;
if (state->flags & QStringConverter::Flag::DontSkipInitialBom)
headerdone = true; headerdone = true;
int num = state->remainingChars;
state->remainingChars = 0;
if (!headerdone || endian == DetectEndianness || num) {
while (num < 4)
tuple[num++] = *chars++;
if (endian == DetectEndianness) { if (endian == DetectEndianness) {
if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BigEndianness) { if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0) {
endian = LittleEndianness; endian = LittleEndianness;
num = 0; } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff) {
continue;
} else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LittleEndianness) {
endian = BigEndianness; endian = BigEndianness;
num = 0;
continue;
} else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) { } else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
endian = BigEndianness; endian = BigEndianness;
} else { } else {
endian = LittleEndianness; endian = LittleEndianness;
} }
} else if (((endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) {
num = 0;
continue;
}
} }
uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
if (headerdone || code != QChar::ByteOrderMark) {
if (QChar::requiresSurrogates(code)) {
*out++ = QChar(QChar::highSurrogate(code));
*out++ = QChar(QChar::lowSurrogate(code));
} else {
*out++ = QChar(code);
}
}
num = 0;
} else if (endian == DetectEndianness) {
endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
}
state->state_data[Endian] = endian;
state->internalState |= HeaderDone;
while (chars < end) {
tuple[num++] = *chars++;
if (num == 4) {
uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple); uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
for (char16_t c : QChar::fromUcs4(code)) for (char16_t c : QChar::fromUcs4(code))
*qch++ = c; *out++ = c;
num = 0; num = 0;
} }
} }
result.truncate(qch - result.unicode());
if (state) { if (num) {
if (headerdone) if (state->flags & QStringDecoder::Flag::Stateless) {
state->internalState |= HeaderDone; *out++ = QChar::ReplacementCharacter;
} else {
state->state_data[Endian] = endian; state->state_data[Endian] = endian;
state->remainingChars = num; state->remainingChars = num;
memcpy(&state->state_data[Data], tuple, 4); memcpy(&state->state_data[Data], tuple, 4);
} }
return result; }
return out;
} }
QString qFromUtfEncoded(const QByteArray &ba) QString qFromUtfEncoded(const QByteArray &ba)
@ -1322,38 +1350,32 @@ static char *toUtf16LE(char *out, QStringView in, QStringConverter::State *state
static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromUtf32(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
{ {
QString s = QUtf32::convertToUnicode(in, length, state); return QUtf32::convertToUnicode(out, in, length, state, DetectEndianness);
memcpy(out, s.constData(), s.length()*sizeof(QChar));
return out + s.length();
} }
static char *toUtf32(char *out, QStringView in, QStringConverter::State *state) static char *toUtf32(char *out, QStringView in, QStringConverter::State *state)
{ {
return QUtf32::convertFromUnicode(out, in.data(), in.length(), state, DetectEndianness); return QUtf32::convertFromUnicode(out, in, state, DetectEndianness);
} }
static QChar *fromUtf32BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromUtf32BE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
{ {
QString s = QUtf32::convertToUnicode(in, length, state, BigEndianness); return QUtf32::convertToUnicode(out, in, length, state, BigEndianness);
memcpy(out, s.constData(), s.length()*sizeof(QChar));
return out + s.length();
} }
static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state) static char *toUtf32BE(char *out, QStringView in, QStringConverter::State *state)
{ {
return QUtf32::convertFromUnicode(out, in.data(), in.length(), state, BigEndianness); return QUtf32::convertFromUnicode(out, in, state, BigEndianness);
} }
static QChar *fromUtf32LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state) static QChar *fromUtf32LE(QChar *out, const char *in, qsizetype length, QStringConverter::State *state)
{ {
QString s = QUtf32::convertToUnicode(in, length, state, LittleEndianness); return QUtf32::convertToUnicode(out, in, length, state, LittleEndianness);
memcpy(out, s.constData(), s.length()*sizeof(QChar));
return out + s.length();
} }
static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state) static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state)
{ {
return QUtf32::convertFromUnicode(out, in.data(), in.length(), state, LittleEndianness); return QUtf32::convertFromUnicode(out, in, state, LittleEndianness);
} }
void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept; void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept;
@ -1411,7 +1433,7 @@ static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); }
static qsizetype fromUtf16Len(qsizetype l) { return l/2 + 2; } static qsizetype fromUtf16Len(qsizetype l) { return l/2 + 2; }
static qsizetype toUtf16Len(qsizetype l) { return 2*(l + 1); } static qsizetype toUtf16Len(qsizetype l) { return 2*(l + 1); }
static qsizetype fromUtf32Len(qsizetype l) { return l + 1; } static qsizetype fromUtf32Len(qsizetype l) { return l/2 + 2; }
static qsizetype toUtf32Len(qsizetype l) { return 4*(l + 1); } static qsizetype toUtf32Len(qsizetype l) { return 4*(l + 1); }
static qsizetype fromLatin1Len(qsizetype l) { return l + 1; } static qsizetype fromLatin1Len(qsizetype l) { return l + 1; }

View File

@ -312,9 +312,10 @@ struct QUtf16
struct QUtf32 struct QUtf32
{ {
static QChar *convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian);
static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
static char *convertFromUnicode(char *out, const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness endian); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian);
}; };
struct QLocal8Bit struct QLocal8Bit