QLocal8Bit::convertFromUnicode[win]: handle trailing high surrogate
The win32 API doesn't give us much choice. _Some_ code pages have support for returning some error if we pass a specific flag, but not all of them. Anyway, since the code pages might not support all that UTF-16 provides, we can't reasonably make it error out on characters that cannot be converted. So, the most reasonable thing we can handle is a unpaired high surrogate at the end of a string, assume that the rest of the string was fine, and that the low surrogate will be provided in the next call. Pick-to: 6.5 Fixes: QTBUG-118185 Task-number: QTBUG-105105 Change-Id: I1f193c9d8e04bec769d885d32440c759d9dff0c2 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> (cherry picked from commit d8d5922f16f1710b66caf718c302b633d2f78b0b)
This commit is contained in:
parent
90a23d8d19
commit
f542134498
@ -1387,7 +1387,6 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
|||||||
|
|
||||||
Q_ASSERT(uclen < INT_MAX); // ### FIXME
|
Q_ASSERT(uclen < INT_MAX); // ### FIXME
|
||||||
Q_ASSERT(state);
|
Q_ASSERT(state);
|
||||||
Q_UNUSED(state); // ### Fixme
|
|
||||||
if (state->flags & QStringConverter::Flag::Stateless) // temporary
|
if (state->flags & QStringConverter::Flag::Stateless) // temporary
|
||||||
state = nullptr;
|
state = nullptr;
|
||||||
|
|
||||||
@ -1401,15 +1400,47 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
|||||||
qsizetype outlen = buf.size();
|
qsizetype outlen = buf.size();
|
||||||
QByteArray mb;
|
QByteArray mb;
|
||||||
|
|
||||||
int len;
|
if (state && state->remainingChars > 0) {
|
||||||
|
Q_ASSERT(state->remainingChars == 1);
|
||||||
|
// Let's try to decode the pending character
|
||||||
|
wchar_t wc[2] = { wchar_t(state->state_data[0]), ch[0] };
|
||||||
|
int len = WideCharToMultiByte(codePage, 0, wc, int(std::size(wc)), out, outlen, nullptr,
|
||||||
|
nullptr);
|
||||||
|
if (!len)
|
||||||
|
return {}; // Cannot recover, and I refuse to believe it was a size limitation
|
||||||
|
out += len;
|
||||||
|
outlen -= len;
|
||||||
|
++ch;
|
||||||
|
--uclen;
|
||||||
|
state->remainingChars = 0;
|
||||||
|
state->state_data[0] = 0;
|
||||||
|
if (uclen == 0)
|
||||||
|
return QByteArrayView(buf.data(), len).toByteArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state && QChar::isHighSurrogate(ch[uclen - 1])) {
|
||||||
|
// We can handle a missing low surrogate at the end of the string,
|
||||||
|
// so if there is one, exclude it now and store it in the state.
|
||||||
|
state->remainingChars = 1;
|
||||||
|
state->state_data[0] = ch[uclen - 1];
|
||||||
|
--uclen;
|
||||||
|
if (uclen == 0)
|
||||||
|
return QByteArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
Q_ASSERT(uclen > 0);
|
||||||
|
|
||||||
|
int len = 0;
|
||||||
while (!(len = WideCharToMultiByte(codePage, 0, ch, int(uclen), out, int(outlen), nullptr,
|
while (!(len = WideCharToMultiByte(codePage, 0, ch, int(uclen), out, int(outlen), nullptr,
|
||||||
nullptr))) {
|
nullptr))) {
|
||||||
int r = GetLastError();
|
int r = GetLastError();
|
||||||
if (r == ERROR_INSUFFICIENT_BUFFER) {
|
if (r == ERROR_INSUFFICIENT_BUFFER) {
|
||||||
int neededLength = WideCharToMultiByte(codePage, 0, ch, int(uclen), nullptr, 0, nullptr,
|
int neededLength = WideCharToMultiByte(codePage, 0, ch, int(uclen), nullptr, 0, nullptr,
|
||||||
nullptr);
|
nullptr);
|
||||||
mb.resize(neededLength);
|
const qsizetype currentLength = out - buf.data();
|
||||||
out = mb.data();
|
mb.resize(currentLength + neededLength);
|
||||||
|
memcpy(mb.data(), out, currentLength * sizeof(*out));
|
||||||
|
out = mb.data() + currentLength;
|
||||||
outlen = neededLength;
|
outlen = neededLength;
|
||||||
// and try again...
|
// and try again...
|
||||||
} else {
|
} else {
|
||||||
@ -1423,12 +1454,13 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!len)
|
auto end = out + len;
|
||||||
return QByteArray();
|
if (QtPrivate::q_points_into_range(out, buf.data(), buf.data() + buf.size())) {
|
||||||
if (out == buf.data())
|
if (end != buf.data()) // else: we return null-array
|
||||||
mb = QByteArray(buf.data(), len);
|
mb = QByteArrayView(buf.data(), end).toByteArray();
|
||||||
else
|
} else {
|
||||||
mb.resize(len);
|
mb.truncate(end - mb.data());
|
||||||
|
}
|
||||||
return mb;
|
return mb;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -2499,6 +2499,10 @@ void tst_QStringConverter::fromLocal8Bit_data()
|
|||||||
QTest::newRow("shiftJIS")
|
QTest::newRow("shiftJIS")
|
||||||
<< "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd\xa4\x90\xa2\x8a\x45\x81\x49"_ba
|
<< "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd\xa4\x90\xa2\x8a\x45\x81\x49"_ba
|
||||||
<< u"こんにちは、世界!"_s << SHIFT_JIS;
|
<< u"こんにちは、世界!"_s << SHIFT_JIS;
|
||||||
|
|
||||||
|
constexpr uint GB_18030 = 54936u;
|
||||||
|
QTest::newRow("GB-18030") << "\xc4\xe3\xba\xc3\xca\xc0\xbd\xe7\xa3\xa1"_ba << u"你好世界!"_s
|
||||||
|
<< GB_18030;
|
||||||
}
|
}
|
||||||
|
|
||||||
void tst_QStringConverter::fromLocal8Bit()
|
void tst_QStringConverter::fromLocal8Bit()
|
||||||
@ -2595,6 +2599,7 @@ void tst_QStringConverter::toLocal8Bit()
|
|||||||
for (QChar c : utf16)
|
for (QChar c : utf16)
|
||||||
result += QLocal8Bit::convertFromUnicode_sys(QStringView(&c, 1), codePage, &state);
|
result += QLocal8Bit::convertFromUnicode_sys(QStringView(&c, 1), codePage, &state);
|
||||||
QCOMPARE(result, eightBit);
|
QCOMPARE(result, eightBit);
|
||||||
|
QCOMPARE(state.remainingChars, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tst_QStringConverter::toLocal8Bit_special_cases()
|
void tst_QStringConverter::toLocal8Bit_special_cases()
|
||||||
@ -2604,20 +2609,33 @@ void tst_QStringConverter::toLocal8Bit_special_cases()
|
|||||||
constexpr uint UTF8 = 65001u;
|
constexpr uint UTF8 = 65001u;
|
||||||
// Decode a 2-code unit character, but only provide 1 code unit at first:
|
// Decode a 2-code unit character, but only provide 1 code unit at first:
|
||||||
const char16_t a[] = u"𬽦";
|
const char16_t a[] = u"𬽦";
|
||||||
QStringView firstHalf = QStringView(a, 1);
|
QStringView codeUnits = a;
|
||||||
QByteArray result = QLocal8Bit::convertFromUnicode_sys(firstHalf, UTF8, &state);
|
QByteArray result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &state);
|
||||||
QEXPECT_FAIL("", "We don't currently handle missing the low surrogate", Abort);
|
|
||||||
QCOMPARE(result, QString());
|
QCOMPARE(result, QString());
|
||||||
QVERIFY(result.isNull());
|
QVERIFY(result.isNull());
|
||||||
QCOMPARE_GT(state.remainingChars, 0);
|
QCOMPARE_GT(state.remainingChars, 0);
|
||||||
// Then provide the second code unit:
|
// Then provide the second code unit:
|
||||||
QStringView secondHalf = QStringView(a + 1, 1);
|
result = QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &state);
|
||||||
result = QLocal8Bit::convertFromUnicode_sys(secondHalf, UTF8, &state);
|
|
||||||
QCOMPARE(result, "\xf0\xac\xbd\xa6"_ba);
|
QCOMPARE(result, "\xf0\xac\xbd\xa6"_ba);
|
||||||
QCOMPARE(state.remainingChars, 0);
|
QCOMPARE(state.remainingChars, 0);
|
||||||
|
|
||||||
// Retain compat with the behavior for toLocal8Bit:
|
// Retain compat with the behavior for toLocal8Bit:
|
||||||
QCOMPARE(firstHalf.toLocal8Bit(), "?");
|
QCOMPARE(codeUnits.first(1).toLocal8Bit(), "?");
|
||||||
|
|
||||||
|
// Now do the same, but the second time we feed in a character, we also
|
||||||
|
// provide many more so the internal stack buffer is not large enough.
|
||||||
|
result.clear();
|
||||||
|
state.clear();
|
||||||
|
QString str = QStringView(a).toString().repeated(2048);
|
||||||
|
codeUnits = str;
|
||||||
|
result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &state);
|
||||||
|
QCOMPARE(result, QString());
|
||||||
|
QVERIFY(result.isNull());
|
||||||
|
QCOMPARE_GT(state.remainingChars, 0);
|
||||||
|
// Then we provide the rest of the string:
|
||||||
|
result = QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &state);
|
||||||
|
QCOMPARE(result.first(4), "\xf0\xac\xbd\xa6"_ba);
|
||||||
|
QCOMPARE(state.remainingChars, 0);
|
||||||
}
|
}
|
||||||
#endif // Q_OS_WIN
|
#endif // Q_OS_WIN
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user