QLocal8Bit::convertFromUnicode[win]: support more than 2Gi input
As we did for convertToUnicode. To support more than 2Gi input, we need to handle the input in chunks because of the `int` parameter in the Windows API. Testing also revealed some corner cases we also need to handle, which is mostly happening when there is an incomplete surrogate pair at the end of the current input window. The test takes between 3 (plain MinGW) and 8 (MSVC with ASAN) seconds to run on my machine. Pick-to: 6.6 6.5 Fixes: QTBUG-105105 Change-Id: I4fb0420b88ca41dfa8b561a35c6d96659bd81468 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> (cherry picked from commit 496340f33ad48738d1595c5c4048e4a05819786c) Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io>
This commit is contained in:
parent
5e882b5de9
commit
b04fe2de61
@ -1452,7 +1452,6 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||
const wchar_t *ch = reinterpret_cast<const wchar_t *>(in.data());
|
||||
qsizetype uclen = in.size();
|
||||
|
||||
Q_ASSERT(uclen < INT_MAX); // ### FIXME
|
||||
Q_ASSERT(state);
|
||||
if (state->flags & QStringConverter::Flag::Stateless) // temporary
|
||||
state = nullptr;
|
||||
@ -1503,9 +1502,42 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||
|
||||
Q_ASSERT(uclen > 0);
|
||||
|
||||
// Return a pointer to storage where we have enough space for `size`
|
||||
const auto growOut = [&](qsizetype size) -> std::tuple<char *, qsizetype> {
|
||||
if (outlen >= size)
|
||||
return {out, outlen};
|
||||
const bool wasStackBuffer = mb.isEmpty();
|
||||
const auto begin = wasStackBuffer ? buf.data() : mb.data();
|
||||
const qsizetype offset = qsizetype(std::distance(begin, out));
|
||||
qsizetype newSize = 0;
|
||||
if (Q_UNLIKELY(qAddOverflow(offset, size, &newSize))) {
|
||||
Q_CHECK_PTR(false);
|
||||
return {nullptr, 0};
|
||||
}
|
||||
mb.resize(newSize);
|
||||
auto it = mb.data();
|
||||
if (wasStackBuffer)
|
||||
it = std::copy_n(buf.data(), offset, it);
|
||||
else
|
||||
it += offset;
|
||||
return {it, size};
|
||||
};
|
||||
|
||||
const auto getNextWindowSize = [&]() {
|
||||
int nextIn = qt_saturate<int>(uclen);
|
||||
// The Windows API has some issues if the current window ends in the
|
||||
// middle of a surrogate pair, so we avoid that:
|
||||
if (nextIn > 1 && QChar::isHighSurrogate(ch[nextIn - 1]))
|
||||
--nextIn;
|
||||
return nextIn;
|
||||
};
|
||||
|
||||
int len = 0;
|
||||
while (uclen > 0) {
|
||||
const int nextIn = qt_saturate<int>(uclen);
|
||||
const int nextIn = getNextWindowSize();
|
||||
std::tie(out, outlen) = growOut(1); // We need at least one byte
|
||||
if (!out)
|
||||
return {};
|
||||
const int nextOut = qt_saturate<int>(outlen);
|
||||
len = WideCharToMultiByte(codePage, 0, ch, nextIn, out, nextOut, nullptr, nullptr);
|
||||
if (len > 0) {
|
||||
@ -1516,14 +1548,21 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
|
||||
} else {
|
||||
int r = GetLastError();
|
||||
if (r == ERROR_INSUFFICIENT_BUFFER) {
|
||||
Q_ASSERT(mb.isEmpty());
|
||||
int neededLength = WideCharToMultiByte(codePage, 0, ch, nextIn, nullptr, 0,
|
||||
nullptr, nullptr);
|
||||
const qsizetype currentLength = out - buf.data();
|
||||
mb.resize(currentLength + neededLength);
|
||||
memcpy(mb.data(), out, currentLength * sizeof(*out));
|
||||
out = mb.data() + currentLength;
|
||||
outlen = neededLength;
|
||||
if (neededLength <= 0) {
|
||||
// Fail. Observed with UTF8 where the input window was max int and ended in an
|
||||
// incomplete sequence, probably a Windows bug. We try to avoid that from
|
||||
// happening by reducing the window size in that case. But let's keep this
|
||||
// branch just in case of other bugs.
|
||||
r = GetLastError();
|
||||
fprintf(stderr,
|
||||
"WideCharToMultiByte: Cannot convert multibyte text (error %d)\n", r);
|
||||
break;
|
||||
}
|
||||
std::tie(out, outlen) = growOut(neededLength);
|
||||
if (!out)
|
||||
return {};
|
||||
// and try again...
|
||||
} else {
|
||||
// Fail. Probably can't happen in fact (dwFlags is 0).
|
||||
|
@ -205,6 +205,7 @@ private slots:
|
||||
void toLocal8Bit_data();
|
||||
void toLocal8Bit();
|
||||
void toLocal8Bit_special_cases();
|
||||
void toLocal8Bit_2GiB();
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -2751,6 +2752,39 @@ void tst_QStringConverter::toLocal8Bit_special_cases()
|
||||
QCOMPARE(result.first(4), "\xf0\xac\xbd\xa6"_ba);
|
||||
QCOMPARE(state.remainingChars, 0);
|
||||
}
|
||||
|
||||
void tst_QStringConverter::toLocal8Bit_2GiB()
|
||||
{
|
||||
#if QT_POINTER_SIZE == 4
|
||||
QSKIP("This test is only relevant for 64-bit builds");
|
||||
#else
|
||||
constexpr qsizetype TwoGiB = qsizetype(std::numeric_limits<int>::max());
|
||||
QString input;
|
||||
QT_TRY {
|
||||
input.reserve(TwoGiB + 1);
|
||||
} QT_CATCH (const std::bad_alloc &) {
|
||||
QSKIP("Out of memory");
|
||||
}
|
||||
// Fill with a single code unit character
|
||||
input.fill(u'.', TwoGiB - 1);
|
||||
// Then append a 2 code unit character, so that the input straddles the 2 GiB
|
||||
// boundary
|
||||
input += u"🙂";
|
||||
QCOMPARE(input.size(), input.capacity());
|
||||
constexpr uint UTF8 = 65001u;
|
||||
QStringConverter::State state;
|
||||
QByteArray result;
|
||||
QT_TRY {
|
||||
result = QLocal8Bit::convertFromUnicode_sys(input, UTF8, &state);
|
||||
} QT_CATCH (const std::bad_alloc &) {
|
||||
QSKIP("Out of memory");
|
||||
}
|
||||
QUtf8StringView rView = result;
|
||||
QCOMPARE(rView.size(), TwoGiB + 3); // The 2 code unit smiley is 4 code units in UTF-8
|
||||
QCOMPARE(rView.last(7), u8"...🙂"); // Check we correctly decoded it
|
||||
QCOMPARE(state.remainingChars, 0); // and there is nothing left in the state
|
||||
#endif
|
||||
}
|
||||
#endif // Q_OS_WIN
|
||||
|
||||
struct DontCrashAtExit {
|
||||
|
Loading…
x
Reference in New Issue
Block a user