From 98f743ca47f2c5664a7c1bf165eeff0bb09e153c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5rten=20Nordheim?= Date: Thu, 2 Nov 2023 18:17:48 +0100 Subject: [PATCH] QLocal8Bit::convertFromUnicode[win]: fix code unit pairing When we restore a high surrogate from the state, we need to make sure that the next code unit is a low surrogate. And if it is not then we should at least not throw it away. Amends d8d5922f16f1710b66caf718c302b633d2f78b0b Pick-to: 6.5 Task-number: QTBUG-118185 Task-number: QTBUG-105105 Change-Id: I64afa0d323d73422128e24e16755e648a8811523 Reviewed-by: Thiago Macieira (cherry picked from commit f3d074b6b0dbf80dfec28de50d36928904e7ac0e) Reviewed-by: Qt Cherry-pick Bot --- src/corelib/text/qstringconverter.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index 216ec91bf3f..1b8f00fc3d4 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -1419,14 +1419,20 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage, Q_ASSERT(state->remainingChars == 1); // Let's try to decode the pending character wchar_t wc[2] = { wchar_t(state->state_data[0]), ch[0] }; - int len = WideCharToMultiByte(codePage, 0, wc, int(std::size(wc)), out, outlen, nullptr, + // Check if the second character is a valid low surrogate, + // otherwise we'll just decode the first character, for which windows + // will output a replacement character. + const bool validCodePoint = QChar::isLowSurrogate(wc[1]); + int len = WideCharToMultiByte(codePage, 0, wc, validCodePoint ? 2 : 1, out, outlen, nullptr, nullptr); if (!len) return {}; // Cannot recover, and I refuse to believe it was a size limitation out += len; outlen -= len; - ++ch; - --uclen; + if (validCodePoint) { + ++ch; + --uclen; + } state->remainingChars = 0; state->state_data[0] = 0; if (uclen == 0)