diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index b768444a012..207afdc1da6 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -1452,7 +1452,6 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage, const wchar_t *ch = reinterpret_cast(in.data()); qsizetype uclen = in.size(); - Q_ASSERT(uclen < INT_MAX); // ### FIXME Q_ASSERT(state); if (state->flags & QStringConverter::Flag::Stateless) // temporary state = nullptr; @@ -1503,9 +1502,42 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage, Q_ASSERT(uclen > 0); + // Return a pointer to storage where we have enough space for `size` + const auto growOut = [&](qsizetype size) -> std::tuple { + if (outlen >= size) + return {out, outlen}; + const bool wasStackBuffer = mb.isEmpty(); + const auto begin = wasStackBuffer ? buf.data() : mb.data(); + const qsizetype offset = qsizetype(std::distance(begin, out)); + qsizetype newSize = 0; + if (Q_UNLIKELY(qAddOverflow(offset, size, &newSize))) { + Q_CHECK_PTR(false); + return {nullptr, 0}; + } + mb.resize(newSize); + auto it = mb.data(); + if (wasStackBuffer) + it = std::copy_n(buf.data(), offset, it); + else + it += offset; + return {it, size}; + }; + + const auto getNextWindowSize = [&]() { + int nextIn = qt_saturate(uclen); + // The Windows API has some issues if the current window ends in the + // middle of a surrogate pair, so we avoid that: + if (nextIn > 1 && QChar::isHighSurrogate(ch[nextIn - 1])) + --nextIn; + return nextIn; + }; + int len = 0; while (uclen > 0) { - const int nextIn = qt_saturate(uclen); + const int nextIn = getNextWindowSize(); + std::tie(out, outlen) = growOut(1); // We need at least one byte + if (!out) + return {}; const int nextOut = qt_saturate(outlen); len = WideCharToMultiByte(codePage, 0, ch, nextIn, out, nextOut, nullptr, nullptr); if (len > 0) { @@ -1516,14 +1548,21 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage, } else { int r = GetLastError(); if (r == ERROR_INSUFFICIENT_BUFFER) { - Q_ASSERT(mb.isEmpty()); int neededLength = WideCharToMultiByte(codePage, 0, ch, nextIn, nullptr, 0, nullptr, nullptr); - const qsizetype currentLength = out - buf.data(); - mb.resize(currentLength + neededLength); - memcpy(mb.data(), out, currentLength * sizeof(*out)); - out = mb.data() + currentLength; - outlen = neededLength; + if (neededLength <= 0) { + // Fail. Observed with UTF8 where the input window was max int and ended in an + // incomplete sequence, probably a Windows bug. We try to avoid that from + // happening by reducing the window size in that case. But let's keep this + // branch just in case of other bugs. + r = GetLastError(); + fprintf(stderr, + "WideCharToMultiByte: Cannot convert multibyte text (error %d)\n", r); + break; + } + std::tie(out, outlen) = growOut(neededLength); + if (!out) + return {}; // and try again... } else { // Fail. Probably can't happen in fact (dwFlags is 0). diff --git a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp index 9f48d59ce57..d5fff83321a 100644 --- a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp +++ b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp @@ -205,6 +205,7 @@ private slots: void toLocal8Bit_data(); void toLocal8Bit(); void toLocal8Bit_special_cases(); + void toLocal8Bit_2GiB(); #endif }; @@ -2751,6 +2752,39 @@ void tst_QStringConverter::toLocal8Bit_special_cases() QCOMPARE(result.first(4), "\xf0\xac\xbd\xa6"_ba); QCOMPARE(state.remainingChars, 0); } + +void tst_QStringConverter::toLocal8Bit_2GiB() +{ +#if QT_POINTER_SIZE == 4 + QSKIP("This test is only relevant for 64-bit builds"); +#else + constexpr qsizetype TwoGiB = qsizetype(std::numeric_limits::max()); + QString input; + QT_TRY { + input.reserve(TwoGiB + 1); + } QT_CATCH (const std::bad_alloc &) { + QSKIP("Out of memory"); + } + // Fill with a single code unit character + input.fill(u'.', TwoGiB - 1); + // Then append a 2 code unit character, so that the input straddles the 2 GiB + // boundary + input += u"🙂"; + QCOMPARE(input.size(), input.capacity()); + constexpr uint UTF8 = 65001u; + QStringConverter::State state; + QByteArray result; + QT_TRY { + result = QLocal8Bit::convertFromUnicode_sys(input, UTF8, &state); + } QT_CATCH (const std::bad_alloc &) { + QSKIP("Out of memory"); + } + QUtf8StringView rView = result; + QCOMPARE(rView.size(), TwoGiB + 3); // The 2 code unit smiley is 4 code units in UTF-8 + QCOMPARE(rView.last(7), u8"...🙂"); // Check we correctly decoded it + QCOMPARE(state.remainingChars, 0); // and there is nothing left in the state +#endif +} #endif // Q_OS_WIN struct DontCrashAtExit {