QLocal8Bit::convertFromUnicode[win]: support more than 2Gi input
As we did for convertToUnicode. To support more than 2Gi input, we need to handle the input in chunks because of the `int` parameter in the Windows API. Testing also revealed some corner cases we also need to handle, which is mostly happening when there is an incomplete surrogate pair at the end of the current input window. The test takes between 3 (plain MinGW) and 8 (MSVC with ASAN) seconds to run on my machine. Fixes: QTBUG-105105 Change-Id: I4fb0420b88ca41dfa8b561a35c6d96659bd81468 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> (cherry picked from commit 496340f33ad48738d1595c5c4048e4a05819786c) Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io> (cherry picked from commit b04fe2de612ac477da39ad2342a2bd825325b03f) (cherry picked from commit 5826551a1d5e0870dc3f0ba2f61c56f99c733dd4) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
This commit is contained in:
parent
fc24040962
commit
1f77ae5109
@ -1452,7 +1452,6 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||
const wchar_t *ch = reinterpret_cast<const wchar_t *>(in.data());
|
||||
qsizetype uclen = in.size();
|
||||
|
||||
Q_ASSERT(uclen < INT_MAX); // ### FIXME
|
||||
Q_ASSERT(state);
|
||||
if (state->flags & QStringConverter::Flag::Stateless) // temporary
|
||||
state = nullptr;
|
||||
@ -1503,9 +1502,42 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||
|
||||
Q_ASSERT(uclen > 0);
|
||||
|
||||
// Return a pointer to storage where we have enough space for `size`
|
||||
const auto growOut = [&](qsizetype size) -> std::tuple<char *, qsizetype> {
|
||||
if (outlen >= size)
|
||||
return {out, outlen};
|
||||
const bool wasStackBuffer = mb.isEmpty();
|
||||
const auto begin = wasStackBuffer ? buf.data() : mb.data();
|
||||
const qsizetype offset = qsizetype(std::distance(begin, out));
|
||||
qsizetype newSize = 0;
|
||||
if (Q_UNLIKELY(qAddOverflow(offset, size, &newSize))) {
|
||||
Q_CHECK_PTR(false);
|
||||
return {nullptr, 0};
|
||||
}
|
||||
mb.resize(newSize);
|
||||
auto it = mb.data();
|
||||
if (wasStackBuffer)
|
||||
it = std::copy_n(buf.data(), offset, it);
|
||||
else
|
||||
it += offset;
|
||||
return {it, size};
|
||||
};
|
||||
|
||||
const auto getNextWindowSize = [&]() {
|
||||
int nextIn = qt_saturate<int>(uclen);
|
||||
// The Windows API has some issues if the current window ends in the
|
||||
// middle of a surrogate pair, so we avoid that:
|
||||
if (nextIn > 1 && QChar::isHighSurrogate(ch[nextIn - 1]))
|
||||
--nextIn;
|
||||
return nextIn;
|
||||
};
|
||||
|
||||
int len = 0;
|
||||
while (uclen > 0) {
|
||||
const int nextIn = qt_saturate<int>(uclen);
|
||||
const int nextIn = getNextWindowSize();
|
||||
std::tie(out, outlen) = growOut(1); // We need at least one byte
|
||||
if (!out)
|
||||
return {};
|
||||
const int nextOut = qt_saturate<int>(outlen);
|
||||
len = WideCharToMultiByte(codePage, 0, ch, nextIn, out, nextOut, nullptr, nullptr);
|
||||
if (len > 0) {
|
||||
@ -1516,14 +1548,21 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||
} else {
|
||||
int r = GetLastError();
|
||||
if (r == ERROR_INSUFFICIENT_BUFFER) {
|
||||
Q_ASSERT(mb.isEmpty());
|
||||
int neededLength = WideCharToMultiByte(codePage, 0, ch, nextIn, nullptr, 0,
|
||||
nullptr, nullptr);
|
||||
const qsizetype currentLength = out - buf.data();
|
||||
mb.resize(currentLength + neededLength);
|
||||
memcpy(mb.data(), out, currentLength * sizeof(*out));
|
||||
out = mb.data() + currentLength;
|
||||
outlen = neededLength;
|
||||
if (neededLength <= 0) {
|
||||
// Fail. Observed with UTF8 where the input window was max int and ended in an
|
||||
// incomplete sequence, probably a Windows bug. We try to avoid that from
|
||||
// happening by reducing the window size in that case. But let's keep this
|
||||
// branch just in case of other bugs.
|
||||
r = GetLastError();
|
||||
fprintf(stderr,
|
||||
"WideCharToMultiByte: Cannot convert multibyte text (error %d)\n", r);
|
||||
break;
|
||||
}
|
||||
std::tie(out, outlen) = growOut(neededLength);
|
||||
if (!out)
|
||||
return {};
|
||||
// and try again...
|
||||
} else {
|
||||
// Fail. Probably can't happen in fact (dwFlags is 0).
|
||||
|
@ -201,6 +201,7 @@ private slots:
|
||||
void toLocal8Bit_data();
|
||||
void toLocal8Bit();
|
||||
void toLocal8Bit_special_cases();
|
||||
void toLocal8Bit_2GiB();
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -2709,6 +2710,39 @@ void tst_QStringConverter::toLocal8Bit_special_cases()
|
||||
QCOMPARE(result.first(4), "\xf0\xac\xbd\xa6"_ba);
|
||||
QCOMPARE(state.remainingChars, 0);
|
||||
}
|
||||
|
||||
void tst_QStringConverter::toLocal8Bit_2GiB()
|
||||
{
|
||||
#if QT_POINTER_SIZE == 4
|
||||
QSKIP("This test is only relevant for 64-bit builds");
|
||||
#else
|
||||
constexpr qsizetype TwoGiB = qsizetype(std::numeric_limits<int>::max());
|
||||
QString input;
|
||||
QT_TRY {
|
||||
input.reserve(TwoGiB + 1);
|
||||
} QT_CATCH (const std::bad_alloc &) {
|
||||
QSKIP("Out of memory");
|
||||
}
|
||||
// Fill with a single code unit character
|
||||
input.fill(u'.', TwoGiB - 1);
|
||||
// Then append a 2 code unit character, so that the input straddles the 2 GiB
|
||||
// boundary
|
||||
input += u"🙂";
|
||||
QCOMPARE(input.size(), input.capacity());
|
||||
constexpr uint UTF8 = 65001u;
|
||||
QStringConverter::State state;
|
||||
QByteArray result;
|
||||
QT_TRY {
|
||||
result = QLocal8Bit::convertFromUnicode_sys(input, UTF8, &state);
|
||||
} QT_CATCH (const std::bad_alloc &) {
|
||||
QSKIP("Out of memory");
|
||||
}
|
||||
QUtf8StringView rView = result;
|
||||
QCOMPARE(rView.size(), TwoGiB + 3); // The 2 code unit smiley is 4 code units in UTF-8
|
||||
QCOMPARE(rView.last(7), u8"...🙂"); // Check we correctly decoded it
|
||||
QCOMPARE(state.remainingChars, 0); // and there is nothing left in the state
|
||||
#endif
|
||||
}
|
||||
#endif // Q_OS_WIN
|
||||
|
||||
struct DontCrashAtExit {
|
||||
|
Loading…
x
Reference in New Issue
Block a user