Port qUncompress() to zstream/inflate()

The zlib convenience API we've been using so far has two problems:

- On Windows-64, where sizeof(long) == 4, the use of ulong for sizes
  meant that we could not uncompress data compressed on other 64-bit
  platforms (Unix). While zstream also uses ulong, being a stream API,
  it allows feeding data in chunks. The total_in and total_out members
  are only required for gzip compression and are otherwise just
  informational. They're unsigned, so their overflow does not cause
  UB. In summary, using zstream + inflate() allows us to decompress
  more than 4GiB of data even on Windows-64.

- On all platforms, if the size hint in the header was too short, we'd
  double the output buffer size and try again, from scratch. Using
  zstream + inflate(), we still need to reallocate, but we can then
  let zlib pick up where it left off when it ran out of output buffer
  space. In all but the most pathological cases, copying the
  already-decoded data instead of re-decoding it again should be
  faster, esp. if QArrayData uses realloc() instead of malloc() +
  free() to grow the buffer.

We also now directly allocate at least as much output buffer as we
have input, to cut the first few rounds of reallocations when the
expectedSize was created, as qCompress still does, using modulo
arithmetic mod 4GiB instead of saturation arithmethic.

Factor the growing of the output buffer into a wrapper function,
flate(), which can be reused when porting qCompress().

This completely fixes the uncompression side of QTBUG-106542 and
QTBUG-104972.

Task-number: QTBUG-104972
Task-number: QTBUG-106542
Change-Id: I97f55ea322c24db1ac48b31c16855bc91708e7e2
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
(cherry picked from commit bda3628402d04ed6fc244616791e1170a0cb61d0)
This commit is contained in:
Marc Mutz 2022-09-17 11:32:45 +02:00
parent 164f4f1c00
commit 1eff7844a5
2 changed files with 182 additions and 51 deletions

View File

@ -22,6 +22,7 @@
#ifndef QT_NO_COMPRESS
#include <zconf.h>
#include <zlib.h>
#include <qxpfunctional.h>
#endif
#include <ctype.h>
#include <limits.h>
@ -576,6 +577,101 @@ static QByteArray invalidCompressedData()
return zlibError(ZLibOp::Decompression, "Input data is corrupted");
}
Q_DECL_COLD_FUNCTION
static QByteArray unexpectedZlibError(ZLibOp op, int err, const char *msg)
{
qWarning("%s unexpected zlib error: %s (%d)",
zlibOpAsString(op),
msg ? msg : "",
err);
return QByteArray();
}
static QByteArray xxflate(ZLibOp op, QArrayDataPointer<char> out, QByteArrayView input,
qxp::function_ref<int(z_stream *) const> init,
qxp::function_ref<int(z_stream *, size_t) const> processChunk,
qxp::function_ref<void(z_stream *) const> deinit)
{
if (out.data() == nullptr) // allocation failed
return tooMuchData(op);
qsizetype capacity = out.allocatedCapacity();
const auto initalSize = out.size;
z_stream zs = {};
zs.next_in = reinterpret_cast<uchar *>(const_cast<char *>(input.data())); // 1980s C API...
if (const int err = init(&zs); err != Z_OK)
return unexpectedZlibError(op, err, zs.msg);
const auto sg = qScopeGuard([&] { deinit(&zs); });
using ZlibChunkSize_t = decltype(zs.avail_in);
static_assert(!std::is_signed_v<ZlibChunkSize_t>);
static_assert(std::is_same_v<ZlibChunkSize_t, decltype(zs.avail_out)>);
constexpr auto MaxChunkSize = std::numeric_limits<ZlibChunkSize_t>::max();
[[maybe_unused]]
constexpr auto MaxStatisticsSize = std::numeric_limits<decltype(zs.total_out)>::max();
size_t inputLeft = size_t(input.size());
int res;
do {
Q_ASSERT(out.freeSpaceAtBegin() == 0); // ensure prepend optimization stays out of the way
Q_ASSERT(capacity == out.allocatedCapacity());
if (zs.avail_out == 0) {
Q_ASSERT(size_t(out.size) - initalSize > MaxStatisticsSize || // total_out overflow
size_t(out.size) - initalSize == zs.total_out);
Q_ASSERT(out.size <= capacity);
qsizetype avail_out = capacity - out.size;
if (avail_out == 0) {
out->reallocateAndGrow(QArrayData::GrowsAtEnd, 1); // grow to next natural capacity
if (out.data() == nullptr) // reallocation failed
return tooMuchData(op);
capacity = out.allocatedCapacity();
avail_out = capacity - out.size;
}
zs.next_out = reinterpret_cast<uchar *>(out.data()) + out.size;
zs.avail_out = avail_out > MaxChunkSize ? MaxChunkSize : ZlibChunkSize_t(avail_out);
out.size += zs.avail_out;
Q_ASSERT(zs.avail_out > 0);
}
if (zs.avail_in == 0) {
// zs.next_in is kept up-to-date by processChunk(), so nothing to do
zs.avail_in = inputLeft > MaxChunkSize ? MaxChunkSize : ZlibChunkSize_t(inputLeft);
inputLeft -= zs.avail_in;
}
res = processChunk(&zs, inputLeft);
} while (res == Z_OK);
switch (res) {
case Z_STREAM_END:
out.size -= zs.avail_out;
Q_ASSERT(size_t(out.size) - initalSize > MaxStatisticsSize || // total_out overflow
size_t(out.size) - initalSize == zs.total_out);
Q_ASSERT(out.size <= out.allocatedCapacity());
out.data()[out.size] = '\0';
return QByteArray(std::move(out));
case Z_MEM_ERROR:
return tooMuchData(op);
case Z_BUF_ERROR:
Q_UNREACHABLE(); // cannot happen - we supply a buffer that can hold the result,
// or else error out early
case Z_DATA_ERROR: // can only happen on decompression
Q_ASSERT(op == ZLibOp::Decompression);
return invalidCompressedData();
default:
return unexpectedZlibError(op, res, zs.msg);
}
}
QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel)
{
constexpr qsizetype HeaderSize = sizeof(CompressSizeHint_t);
@ -636,16 +732,16 @@ QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel)
data that was compressed using zlib, you first need to prepend a four
byte header to the byte array containing the data. The header must
contain the expected length (in bytes) of the uncompressed data,
expressed as an unsigned, big-endian, 32-bit integer.
expressed as an unsigned, big-endian, 32-bit integer. This number is
just a hint for the initial size of the output buffer size,
though. If the indicated size is too small to hold the result, the
output buffer size will still be increased until either the output
fits or the system runs out of memory. So, despite the 32-bit
header, this function, on 64-bit platforms, can produce more than
4GiB of output.
//![uncompress-limit-note]
\note The maximum size of data that this function can produce is limited by
what the platform's \c{unsigned long} can represent (a Zlib limitation).
That means that data > 4GiB can be compressed and decompressed on a 64-bit
Unix system, but not on a 64-bit Windows system. Portable code should
therefore avoid using qCompress()/qUncompress() to compress more than 4GiB
of input.
//![uncompress-limit-note]
\note In Qt versions prior to Qt 6.5, more than 2GiB of data
worked unreliably; in Qt versions prior to Qt 6.0, not at all.
\sa qCompress()
*/
@ -657,8 +753,6 @@ QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel)
Uncompresses the first \a nbytes of \a data and returns a new byte
array with the uncompressed data.
\include qbytearray.cpp uncompress-limit-note
*/
QByteArray qUncompress(const uchar* data, qsizetype nbytes)
{
@ -678,49 +772,23 @@ QByteArray qUncompress(const uchar* data, qsizetype nbytes)
return invalidCompressedData();
return QByteArray();
}
uLong len = qMax(expectedSize, 1u);
constexpr size_t MaxZLibSize = (std::numeric_limits<uLong>::max)();
constexpr size_t MaxDecompressedSize = (std::min)(size_t(MaxByteArraySize), MaxZLibSize);
if (len > MaxDecompressedSize)
return tooMuchData(ZLibOp::Decompression);
Q_ASSERT(len <= size_t((std::numeric_limits<qsizetype>::max)()));
QByteArray::DataPointer d(QByteArray::Data::allocate(qsizetype(len)));
if (d.data() == nullptr) // allocation failed
return tooMuchData(ZLibOp::Decompression);
forever {
const auto alloc = len;
int res = ::uncompress(reinterpret_cast<uchar *>(d.data()), &len,
data + HeaderSize, nbytes - HeaderSize);
switch (res) {
case Z_OK: {
Q_ASSERT(len <= alloc);
Q_UNUSED(alloc);
d.data()[len] = '\0';
d.size = len;
return QByteArray(d);
}
case Z_MEM_ERROR:
constexpr auto MaxDecompressedSize = size_t(MaxByteArraySize);
if constexpr (MaxDecompressedSize < std::numeric_limits<CompressSizeHint_t>::max()) {
if (expectedSize > MaxDecompressedSize)
return tooMuchData(ZLibOp::Decompression);
case Z_BUF_ERROR:
if (len == MaxDecompressedSize) // can't grow further
return tooMuchData(ZLibOp::Decompression);
if (qMulOverflow<2>(len, &len))
len = MaxDecompressedSize;
d->reallocate(qsizetype(len), QArrayData::Grow); // cannot overflow!
if (d.data() == nullptr) // reallocation failed
return tooMuchData(ZLibOp::Decompression);
continue;
case Z_DATA_ERROR:
return invalidCompressedData();
}
}
// expectedSize may be truncated, so always use at least nbytes
// (larger by at most 1%, according to zlib docs)
qsizetype capacity = std::max(qsizetype(expectedSize), // cannot overflow!
nbytes);
QArrayDataPointer d(QTypedArrayData<char>::allocate(capacity, QArrayData::KeepSize));
return xxflate(ZLibOp::Decompression, std::move(d), {data + HeaderSize, nbytes - HeaderSize},
[] (z_stream *zs) { return inflateInit(zs); },
[] (z_stream *zs, size_t) { return inflate(zs, Z_NO_FLUSH); },
[] (z_stream *zs) { inflateEnd(zs); });
}
#endif

View File

@ -12,6 +12,8 @@
#include "../shared/test_number_shared.h"
#include <q20iterator.h>
#include <stdexcept>
#include <string_view>
@ -31,6 +33,7 @@ private slots:
void qCompress();
void qUncompressCorruptedData_data();
void qUncompressCorruptedData();
void qUncompress4GiBPlus();
void qCompressionZeroTermination();
#endif
void constByteArray();
@ -300,6 +303,66 @@ void tst_QByteArray::qUncompressCorruptedData()
QCOMPARE(res, QByteArray());
}
void tst_QByteArray::qUncompress4GiBPlus()
{
// after three rounds, this decompresses to 4GiB + 1 'X' bytes:
constexpr uchar compressed_3x[] = {
0x00, 0x00, 0x1a, 0x76, 0x78, 0x9c, 0x63, 0xb0, 0xdf, 0xb4, 0xad, 0x62,
0xce, 0xdb, 0x3b, 0x0b, 0xf3, 0x26, 0x27, 0x4a, 0xb4, 0x3d, 0x34, 0x5b,
0xed, 0xb4, 0x41, 0xf1, 0xc0, 0x99, 0x2f, 0x02, 0x05, 0x67, 0x26, 0x88,
0x6c, 0x66, 0x71, 0x34, 0x62, 0x9c, 0x75, 0x26, 0xb1, 0xa0, 0xe5, 0xcc,
0xda, 0x94, 0x83, 0xc9, 0x05, 0x73, 0x0e, 0x3c, 0x39, 0xc2, 0xc7, 0xd0,
0xae, 0x38, 0x53, 0x7b, 0x87, 0xdc, 0x01, 0x91, 0x45, 0x59, 0x4f, 0xda,
0xbf, 0xca, 0xcc, 0x52, 0xdb, 0xbb, 0xde, 0xbb, 0xf6, 0xd3, 0x55, 0xff,
0x7d, 0x77, 0x0e, 0x1b, 0xf0, 0xa4, 0xdf, 0xcf, 0xdb, 0x5f, 0x2f, 0xf5,
0xd7, 0x7c, 0xfe, 0xbf, 0x3f, 0xbf, 0x3f, 0x9d, 0x7c, 0xda, 0x2c, 0xc8,
0xc0, 0xc0, 0xb0, 0xe1, 0xf1, 0xb3, 0xfd, 0xfa, 0xdf, 0x8e, 0x7d, 0xef,
0x7f, 0xb9, 0xc1, 0xc2, 0xae, 0x92, 0x19, 0x28, 0xf2, 0x66, 0xd7, 0xe5,
0xbf, 0xed, 0x93, 0xbf, 0x6a, 0x14, 0x7c, 0xff, 0xf6, 0xe1, 0xe8, 0xb6,
0x7e, 0x46, 0xa0, 0x90, 0xd9, 0xbb, 0xcf, 0x9f, 0x17, 0x37, 0x7f, 0xe5,
0x6f, 0xb4, 0x7f, 0xfe, 0x5e, 0xfd, 0xb6, 0x1d, 0x1b, 0x50, 0xe8, 0xc6,
0x8e, 0xe3, 0xab, 0x9f, 0xe6, 0xec, 0x65, 0xfd, 0x23, 0xb1, 0x4e, 0x7e,
0xef, 0xbd, 0x6f, 0xa6, 0x40, 0xa1, 0x03, 0xc7, 0xfe, 0x0a, 0xf1, 0x00,
0xe9, 0x06, 0x91, 0x83, 0x40, 0x92, 0x21, 0x43, 0x10, 0xcc, 0x11, 0x03,
0x73, 0x3a, 0x90, 0x39, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32,
0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32,
0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32,
0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32,
0xa3, 0x32, 0xa3, 0x32, 0xa3, 0x32, 0x34, 0x90, 0x99, 0xb6, 0x7e, 0xf5,
0xd3, 0xe9, 0xbf, 0x35, 0x13, 0xca, 0x8c, 0x75, 0xec, 0xec, 0xa4, 0x2f,
0x7e, 0x2d, 0xf9, 0xf3, 0xf0, 0xee, 0xea, 0xd5, 0xf5, 0xd3, 0x14, 0x57,
0x06, 0x00, 0x00, 0xb9, 0x1e, 0x35, 0xce
};
constexpr qint64 GiB = 1024LL * 1024 * 1024;
if constexpr (sizeof(qsizetype) == sizeof(int)) {
QSKIP("This is a 64-bit-only test.");
} else {
// 1st
auto c = ::qUncompress(std::data(compressed_3x), q20::ssize(compressed_3x));
QVERIFY(!c.isNull()); // check for decompression error
// 2nd
c = ::qUncompress(c);
QVERIFY(!c.isNull());
// 3rd
try {
c = ::qUncompress(c);
if (c.isNull()) // this step (~18MiB -> 4GiB) might have run out of memory
QSKIP("Failed to allocate enough memory.");
} catch (const std::bad_alloc &) {
QSKIP("Failed to allocate enough memory.");
}
QCOMPARE(c.size(), 4 * GiB + 1);
QCOMPARE(std::string_view{c}.find_first_not_of('X'),
std::string_view::npos);
}
}
void tst_QByteArray::qCompressionZeroTermination()
{
QByteArray s = "Hello, I'm a string.";