From 8d1dfdd912712876267bcf5b45b3def39caf8dca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5rten=20Nordheim?= Date: Thu, 19 Oct 2023 15:28:12 +0200 Subject: [PATCH] QLocal8Bit::convertToUnicode[win]: Simplify state-handling Instead of having separate variables for the state, that we then store back at the end, let's just make state-handing explicit, making the logic around it easier to follow. We now output Replacement Characters, if we try to decode stateless and have an invalid sequence at the end. Otherwise we fall back to convertToUnicodeCharByChar as before. Pick-to: 6.5 Task-number: QTBUG-118318 Task-number: QTBUG-105105 Change-Id: Ifea64bc241113f468b69cad16fc3cc97a6ebe646 Reviewed-by: Thiago Macieira (cherry picked from commit 2e9bb2ee5d3a3e025036e2dd6722eac15c922505) Reviewed-by: Qt Cherry-pick Bot --- src/corelib/text/qstringconverter.cpp | 47 ++++++++++----------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index 1b85d563d31..744ccfa7635 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -1331,27 +1331,17 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage, int len; QString sp; - char state_data = 0; - int remainingChars = 0; - - //save the current state information - if (state) { - state_data = (char)state->state_data[0]; - remainingChars = state->remainingChars; - } //convert the pending character (if available) - if (state && remainingChars) { + if (state && state->remainingChars) { char prev[3] = {0}; - prev[0] = state_data; + prev[0] = state->state_data[0]; prev[1] = mb[0]; - remainingChars = 0; + state->remainingChars = 0; len = MultiByteToWideChar(codePage, MB_PRECOMPOSED, prev, 2, out, outlen); if (len) { - if (mblen == 1) { - state->remainingChars = 0; + if (mblen == 1) return QStringView(out, len).toString(); - } mb++; mblen--; ++out; @@ -1373,11 +1363,13 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage, outlen = wclen; } else if (r == ERROR_NO_UNICODE_TRANSLATION) { //check whether, we hit an invalid character in the middle - if ((mblen <= 1) || (remainingChars && state_data)) + if (state && ((mblen <= 1) || (state->remainingChars && state->state_data[0]))) return convertToUnicodeCharByChar(in, codePage, state); //Remove the last character and try again... - state_data = mb[mblen-1]; - remainingChars = 1; + if (state) { + state->state_data[0] = mb[mblen - 1]; + state->remainingChars = 1; + } // else: We have discarded a character that we won't handle? @todo mblen--; } else { // Fail. @@ -1386,24 +1378,21 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage, } } - if (len <= 0) - return QString(); - - if (out[len - 1] == u'\0') - --len; - - //save the new state information - if (state) { - state->state_data[0] = (char)state_data; - state->remainingChars = remainingChars; - } - if (QtPrivate::q_points_into_range(out, buf.data(), buf.data() + buf.size())) { if (out - buf.data() + len > 0) sp = QStringView(buf.data(), out + len).toString(); } else{ sp.truncate(out - reinterpret_cast(sp.data()) + len); } + + if (sp.size() && sp.back().isNull()) + sp.chop(1); + + if (!state && mblen != length) { // We have trailing characters that should be converted + qsizetype diff = length - mblen; + sp.resize(sp.size() + diff, QChar::ReplacementCharacter); + } + return sp; }