diff --git a/src/corelib/text/qbytearray.cpp b/src/corelib/text/qbytearray.cpp index 9e229b9e835..28094ec34e8 100644 --- a/src/corelib/text/qbytearray.cpp +++ b/src/corelib/text/qbytearray.cpp @@ -22,6 +22,7 @@ #ifndef QT_NO_COMPRESS #include #include +#include #endif #include #include @@ -576,6 +577,101 @@ static QByteArray invalidCompressedData() return zlibError(ZLibOp::Decompression, "Input data is corrupted"); } +Q_DECL_COLD_FUNCTION +static QByteArray unexpectedZlibError(ZLibOp op, int err, const char *msg) +{ + qWarning("%s unexpected zlib error: %s (%d)", + zlibOpAsString(op), + msg ? msg : "", + err); + return QByteArray(); +} + +static QByteArray xxflate(ZLibOp op, QArrayDataPointer out, QByteArrayView input, + qxp::function_ref init, + qxp::function_ref processChunk, + qxp::function_ref deinit) +{ + if (out.data() == nullptr) // allocation failed + return tooMuchData(op); + qsizetype capacity = out.allocatedCapacity(); + + const auto initalSize = out.size; + + z_stream zs = {}; + zs.next_in = reinterpret_cast(const_cast(input.data())); // 1980s C API... + if (const int err = init(&zs); err != Z_OK) + return unexpectedZlibError(op, err, zs.msg); + const auto sg = qScopeGuard([&] { deinit(&zs); }); + + using ZlibChunkSize_t = decltype(zs.avail_in); + static_assert(!std::is_signed_v); + static_assert(std::is_same_v); + constexpr auto MaxChunkSize = std::numeric_limits::max(); + [[maybe_unused]] + constexpr auto MaxStatisticsSize = std::numeric_limits::max(); + + size_t inputLeft = size_t(input.size()); + + int res; + do { + Q_ASSERT(out.freeSpaceAtBegin() == 0); // ensure prepend optimization stays out of the way + Q_ASSERT(capacity == out.allocatedCapacity()); + + if (zs.avail_out == 0) { + Q_ASSERT(size_t(out.size) - initalSize > MaxStatisticsSize || // total_out overflow + size_t(out.size) - initalSize == zs.total_out); + Q_ASSERT(out.size <= capacity); + + qsizetype avail_out = capacity - out.size; + if (avail_out == 0) { + out->reallocateAndGrow(QArrayData::GrowsAtEnd, 1); // grow to next natural capacity + if (out.data() == nullptr) // reallocation failed + return tooMuchData(op); + capacity = out.allocatedCapacity(); + avail_out = capacity - out.size; + } + zs.next_out = reinterpret_cast(out.data()) + out.size; + zs.avail_out = avail_out > MaxChunkSize ? MaxChunkSize : ZlibChunkSize_t(avail_out); + out.size += zs.avail_out; + + Q_ASSERT(zs.avail_out > 0); + } + + if (zs.avail_in == 0) { + // zs.next_in is kept up-to-date by processChunk(), so nothing to do + zs.avail_in = inputLeft > MaxChunkSize ? MaxChunkSize : ZlibChunkSize_t(inputLeft); + inputLeft -= zs.avail_in; + } + + res = processChunk(&zs, inputLeft); + } while (res == Z_OK); + + switch (res) { + case Z_STREAM_END: + out.size -= zs.avail_out; + Q_ASSERT(size_t(out.size) - initalSize > MaxStatisticsSize || // total_out overflow + size_t(out.size) - initalSize == zs.total_out); + Q_ASSERT(out.size <= out.allocatedCapacity()); + out.data()[out.size] = '\0'; + return QByteArray(std::move(out)); + + case Z_MEM_ERROR: + return tooMuchData(op); + + case Z_BUF_ERROR: + Q_UNREACHABLE(); // cannot happen - we supply a buffer that can hold the result, + // or else error out early + + case Z_DATA_ERROR: // can only happen on decompression + Q_ASSERT(op == ZLibOp::Decompression); + return invalidCompressedData(); + + default: + return unexpectedZlibError(op, res, zs.msg); + } +} + QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel) { constexpr qsizetype HeaderSize = sizeof(CompressSizeHint_t); @@ -636,16 +732,16 @@ QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel) data that was compressed using zlib, you first need to prepend a four byte header to the byte array containing the data. The header must contain the expected length (in bytes) of the uncompressed data, - expressed as an unsigned, big-endian, 32-bit integer. + expressed as an unsigned, big-endian, 32-bit integer. This number is + just a hint for the initial size of the output buffer size, + though. If the indicated size is too small to hold the result, the + output buffer size will still be increased until either the output + fits or the system runs out of memory. So, despite the 32-bit + header, this function, on 64-bit platforms, can produce more than + 4GiB of output. -//![uncompress-limit-note] - \note The maximum size of data that this function can produce is limited by - what the platform's \c{unsigned long} can represent (a Zlib limitation). - That means that data > 4GiB can be compressed and decompressed on a 64-bit - Unix system, but not on a 64-bit Windows system. Portable code should - therefore avoid using qCompress()/qUncompress() to compress more than 4GiB - of input. -//![uncompress-limit-note] + \note In Qt versions prior to Qt 6.5, more than 2GiB of data + worked unreliably; in Qt versions prior to Qt 6.0, not at all. \sa qCompress() */ @@ -657,8 +753,6 @@ QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel) Uncompresses the first \a nbytes of \a data and returns a new byte array with the uncompressed data. - - \include qbytearray.cpp uncompress-limit-note */ QByteArray qUncompress(const uchar* data, qsizetype nbytes) { @@ -678,49 +772,23 @@ QByteArray qUncompress(const uchar* data, qsizetype nbytes) return invalidCompressedData(); return QByteArray(); } - uLong len = qMax(expectedSize, 1u); - constexpr size_t MaxZLibSize = (std::numeric_limits::max)(); - constexpr size_t MaxDecompressedSize = (std::min)(size_t(MaxByteArraySize), MaxZLibSize); - if (len > MaxDecompressedSize) - return tooMuchData(ZLibOp::Decompression); - Q_ASSERT(len <= size_t((std::numeric_limits::max)())); - QByteArray::DataPointer d(QByteArray::Data::allocate(qsizetype(len))); - if (d.data() == nullptr) // allocation failed - return tooMuchData(ZLibOp::Decompression); - - forever { - const auto alloc = len; - int res = ::uncompress(reinterpret_cast(d.data()), &len, - data + HeaderSize, nbytes - HeaderSize); - - switch (res) { - case Z_OK: { - Q_ASSERT(len <= alloc); - Q_UNUSED(alloc); - d.data()[len] = '\0'; - d.size = len; - return QByteArray(d); - } - - case Z_MEM_ERROR: + constexpr auto MaxDecompressedSize = size_t(MaxByteArraySize); + if constexpr (MaxDecompressedSize < std::numeric_limits::max()) { + if (expectedSize > MaxDecompressedSize) return tooMuchData(ZLibOp::Decompression); - - case Z_BUF_ERROR: - if (len == MaxDecompressedSize) // can't grow further - return tooMuchData(ZLibOp::Decompression); - if (qMulOverflow<2>(len, &len)) - len = MaxDecompressedSize; - d->reallocate(qsizetype(len), QArrayData::Grow); // cannot overflow! - if (d.data() == nullptr) // reallocation failed - return tooMuchData(ZLibOp::Decompression); - - continue; - - case Z_DATA_ERROR: - return invalidCompressedData(); - } } + + // expectedSize may be truncated, so always use at least nbytes + // (larger by at most 1%, according to zlib docs) + qsizetype capacity = std::max(qsizetype(expectedSize), // cannot overflow! + nbytes); + + QArrayDataPointer d(QTypedArrayData::allocate(capacity, QArrayData::KeepSize)); + return xxflate(ZLibOp::Decompression, std::move(d), {data + HeaderSize, nbytes - HeaderSize}, + [] (z_stream *zs) { return inflateInit(zs); }, + [] (z_stream *zs, size_t) { return inflate(zs, Z_NO_FLUSH); }, + [] (z_stream *zs) { inflateEnd(zs); }); } #endif diff --git a/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp b/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp index ff3feb33db4..a3534f30a46 100644 --- a/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp +++ b/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp @@ -12,6 +12,8 @@ #include "../shared/test_number_shared.h" +#include + #include #include @@ -31,6 +33,7 @@ private slots: void qCompress(); void qUncompressCorruptedData_data(); void qUncompressCorruptedData(); + void qUncompress4GiBPlus(); void qCompressionZeroTermination(); #endif void constByteArray(); @@ -300,6 +303,66 @@ void tst_QByteArray::qUncompressCorruptedData() QCOMPARE(res, QByteArray()); } +void tst_QByteArray::qUncompress4GiBPlus() +{ + // after three rounds, this decompresses to 4GiB + 1 'X' bytes: + constexpr uchar compressed_3x[] = { + 0x00, 0x00, 0x1a, 0x76, 0x78, 0x9c, 0x63, 0xb0, 0xdf, 0xb4, 0xad, 0x62, + 0xce, 0xdb, 0x3b, 0x0b, 0xf3, 0x26, 0x27, 0x4a, 0xb4, 0x3d, 0x34, 0x5b, + 0xed, 0xb4, 0x41, 0xf1, 0xc0, 0x99, 0x2f, 0x02, 0x05, 0x67, 0x26, 0x88, + 0x6c, 0x66, 0x71, 0x34, 0x62, 0x9c, 0x75, 0x26, 0xb1, 0xa0, 0xe5, 0xcc, + 0xda, 0x94, 0x83, 0xc9, 0x05, 0x73, 0x0e, 0x3c, 0x39, 0xc2, 0xc7, 0xd0, + 0xae, 0x38, 0x53, 0x7b, 0x87, 0xdc, 0x01, 0x91, 0x45, 0x59, 0x4f, 0xda, + 0xbf, 0xca, 0xcc, 0x52, 0xdb, 0xbb, 0xde, 0xbb, 0xf6, 0xd3, 0x55, 0xff, + 0x7d, 0x77, 0x0e, 0x1b, 0xf0, 0xa4, 0xdf, 0xcf, 0xdb, 0x5f, 0x2f, 0xf5, + 0xd7, 0x7c, 0xfe, 0xbf, 0x3f, 0xbf, 0x3f, 0x9d, 0x7c, 0xda, 0x2c, 0xc8, + 0xc0, 0xc0, 0xb0, 0xe1, 0xf1, 0xb3, 0xfd, 0xfa, 0xdf, 0x8e, 0x7d, 0xef, + 0x7f, 0xb9, 0xc1, 0xc2, 0xae, 0x92, 0x19, 0x28, 0xf2, 0x66, 0xd7, 0xe5, + 0xbf, 0xed, 0x93, 0xbf, 0x6a, 0x14, 0x7c, 0xff, 0xf6, 0xe1, 0xe8, 0xb6, + 0x7e, 0x46, 0xa0, 0x90, 0xd9, 0xbb, 0xcf, 0x9f, 0x17, 0x37, 0x7f, 0xe5, + 0x6f, 0xb4, 0x7f, 0xfe, 0x5e, 0xfd, 0xb6, 0x1d, 0x1b, 0x50, 0xe8, 0xc6, + 0x8e, 0xe3, 0xab, 0x9f, 0xe6, 0xec, 0x65, 0xfd, 0x23, 0xb1, 0x4e, 0x7e, + 0xef, 0xbd, 0x6f, 0xa6, 0x40, 0xa1, 0x03, 0xc7, 0xfe, 0x0a, 0xf1, 0x00, + 0xe9, 0x06, 0x91, 0x83, 0x40, 0x92, 0x21, 0x43, 0x10, 0xcc, 0x11, 0x03, + 0x73, 0x3a, 0x90, 0x39, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, + 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, + 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, + 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, + 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0x34, 0x90, 0x99, 0xb6, 0x7e, 0xf5, + 0xd3, 0xe9, 0xbf, 0x35, 0x13, 0xca, 0x8c, 0x75, 0xec, 0xec, 0xa4, 0x2f, + 0x7e, 0x2d, 0xf9, 0xf3, 0xf0, 0xee, 0xea, 0xd5, 0xf5, 0xd3, 0x14, 0x57, + 0x06, 0x00, 0x00, 0xb9, 0x1e, 0x35, 0xce + }; + + constexpr qint64 GiB = 1024LL * 1024 * 1024; + + if constexpr (sizeof(qsizetype) == sizeof(int)) { + QSKIP("This is a 64-bit-only test."); + } else { + + // 1st + auto c = ::qUncompress(std::data(compressed_3x), q20::ssize(compressed_3x)); + QVERIFY(!c.isNull()); // check for decompression error + + // 2nd + c = ::qUncompress(c); + QVERIFY(!c.isNull()); + + // 3rd + try { + c = ::qUncompress(c); + if (c.isNull()) // this step (~18MiB -> 4GiB) might have run out of memory + QSKIP("Failed to allocate enough memory."); + } catch (const std::bad_alloc &) { + QSKIP("Failed to allocate enough memory."); + } + + QCOMPARE(c.size(), 4 * GiB + 1); + QCOMPARE(std::string_view{c}.find_first_not_of('X'), + std::string_view::npos); + } +} + void tst_QByteArray::qCompressionZeroTermination() { QByteArray s = "Hello, I'm a string.";