rhi: Reuse the data in buffer ops in res.update batches
Because having profilers bombarded with mallocs (due to creating deep copy QByteArrays) is not nice. Change-Id: I848f41f3465d6dc2a58a193cc863495aacf13d79 Reviewed-by: Andy Nichols <andy.nichols@qt.io>
This commit is contained in:
parent
6b52ba4286
commit
be2635b8dd
@ -289,6 +289,7 @@ public:
|
|||||||
QRhiBuffer *buf;
|
QRhiBuffer *buf;
|
||||||
int offset;
|
int offset;
|
||||||
QByteArray data;
|
QByteArray data;
|
||||||
|
int dataSize; // the real number of currently used bytes in data, not the same as data.size()
|
||||||
int readSize;
|
int readSize;
|
||||||
QRhiBufferReadbackResult *result;
|
QRhiBufferReadbackResult *result;
|
||||||
|
|
||||||
@ -298,7 +299,9 @@ public:
|
|||||||
op.type = DynamicUpdate;
|
op.type = DynamicUpdate;
|
||||||
op.buf = buf;
|
op.buf = buf;
|
||||||
op.offset = offset;
|
op.offset = offset;
|
||||||
op.data = QByteArray(reinterpret_cast<const char *>(data), size ? size : buf->size());
|
const int effectiveSize = size ? size : buf->size();
|
||||||
|
op.data = QByteArray(reinterpret_cast<const char *>(data), effectiveSize);
|
||||||
|
op.dataSize = effectiveSize;
|
||||||
return op;
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -307,7 +310,29 @@ public:
|
|||||||
op->type = DynamicUpdate;
|
op->type = DynamicUpdate;
|
||||||
op->buf = buf;
|
op->buf = buf;
|
||||||
op->offset = offset;
|
op->offset = offset;
|
||||||
op->data = QByteArray(reinterpret_cast<const char *>(data), size ? size : buf->size());
|
const int effectiveSize = size ? size : buf->size();
|
||||||
|
|
||||||
|
// Why the isDetached check? Simply because the cost of detaching
|
||||||
|
// with a larger allocation may be a lot higher than creating a new
|
||||||
|
// deep copy bytearray with our (potentially lot smaller) data.
|
||||||
|
// This reduces the benefits with certain backends (e.g. Vulkan)
|
||||||
|
// that hold on to the data (implicit sharing!) of host visible
|
||||||
|
// buffers for the current and next frame (assuming 2 frames in
|
||||||
|
// flight), but it is still an improvement (enabled by
|
||||||
|
// nextResourceUpdateBatch's shuffling when choosing a free batch
|
||||||
|
// from the pool). For other backends (e.g. D3D11) this can reduce
|
||||||
|
// mallocs (caused by creating new deep copy bytearrays) almost
|
||||||
|
// completely after a few frames (assuming of course that no
|
||||||
|
// dynamic elements with larger buffer data appear).
|
||||||
|
|
||||||
|
if (op->data.isDetached()) {
|
||||||
|
if (op->data.size() < effectiveSize)
|
||||||
|
op->data.resize(effectiveSize);
|
||||||
|
memcpy(op->data.data(), data, effectiveSize);
|
||||||
|
} else {
|
||||||
|
op->data = QByteArray(reinterpret_cast<const char *>(data), effectiveSize);
|
||||||
|
}
|
||||||
|
op->dataSize = effectiveSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
static BufferOp staticUpload(QRhiBuffer *buf, int offset, int size, const void *data)
|
static BufferOp staticUpload(QRhiBuffer *buf, int offset, int size, const void *data)
|
||||||
@ -316,7 +341,9 @@ public:
|
|||||||
op.type = StaticUpload;
|
op.type = StaticUpload;
|
||||||
op.buf = buf;
|
op.buf = buf;
|
||||||
op.offset = offset;
|
op.offset = offset;
|
||||||
op.data = QByteArray(reinterpret_cast<const char *>(data), size ? size : buf->size());
|
const int effectiveSize = size ? size : buf->size();
|
||||||
|
op.data = QByteArray(reinterpret_cast<const char *>(data), effectiveSize);
|
||||||
|
op.dataSize = effectiveSize;
|
||||||
return op;
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -325,7 +352,15 @@ public:
|
|||||||
op->type = StaticUpload;
|
op->type = StaticUpload;
|
||||||
op->buf = buf;
|
op->buf = buf;
|
||||||
op->offset = offset;
|
op->offset = offset;
|
||||||
op->data = QByteArray(reinterpret_cast<const char *>(data), size ? size : buf->size());
|
const int effectiveSize = size ? size : buf->size();
|
||||||
|
if (op->data.isDetached()) {
|
||||||
|
if (op->data.size() < effectiveSize)
|
||||||
|
op->data.resize(effectiveSize);
|
||||||
|
memcpy(op->data.data(), data, effectiveSize);
|
||||||
|
} else {
|
||||||
|
op->data = QByteArray(reinterpret_cast<const char *>(data), effectiveSize);
|
||||||
|
}
|
||||||
|
op->dataSize = effectiveSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
static BufferOp read(QRhiBuffer *buf, int offset, int size, QRhiBufferReadbackResult *result)
|
static BufferOp read(QRhiBuffer *buf, int offset, int size, QRhiBufferReadbackResult *result)
|
||||||
|
@ -1418,12 +1418,12 @@ void QRhiD3D11::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
|||||||
if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::DynamicUpdate) {
|
if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::DynamicUpdate) {
|
||||||
QD3D11Buffer *bufD = QRHI_RES(QD3D11Buffer, u.buf);
|
QD3D11Buffer *bufD = QRHI_RES(QD3D11Buffer, u.buf);
|
||||||
Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic);
|
Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic);
|
||||||
memcpy(bufD->dynBuf.data() + u.offset, u.data.constData(), size_t(u.data.size()));
|
memcpy(bufD->dynBuf.data() + u.offset, u.data.constData(), size_t(u.dataSize));
|
||||||
bufD->hasPendingDynamicUpdates = true;
|
bufD->hasPendingDynamicUpdates = true;
|
||||||
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) {
|
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) {
|
||||||
QD3D11Buffer *bufD = QRHI_RES(QD3D11Buffer, u.buf);
|
QD3D11Buffer *bufD = QRHI_RES(QD3D11Buffer, u.buf);
|
||||||
Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
|
Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
|
||||||
Q_ASSERT(u.offset + u.data.size() <= bufD->m_size);
|
Q_ASSERT(u.offset + u.dataSize <= bufD->m_size);
|
||||||
QD3D11CommandBuffer::Command cmd;
|
QD3D11CommandBuffer::Command cmd;
|
||||||
cmd.cmd = QD3D11CommandBuffer::Command::UpdateSubRes;
|
cmd.cmd = QD3D11CommandBuffer::Command::UpdateSubRes;
|
||||||
cmd.args.updateSubRes.dst = bufD->buffer;
|
cmd.args.updateSubRes.dst = bufD->buffer;
|
||||||
@ -1437,7 +1437,7 @@ void QRhiD3D11::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
|||||||
box.left = UINT(u.offset);
|
box.left = UINT(u.offset);
|
||||||
box.top = box.front = 0;
|
box.top = box.front = 0;
|
||||||
box.back = box.bottom = 1;
|
box.back = box.bottom = 1;
|
||||||
box.right = UINT(u.offset + u.data.size()); // no -1: right, bottom, back are exclusive, see D3D11_BOX doc
|
box.right = UINT(u.offset + u.dataSize); // no -1: right, bottom, back are exclusive, see D3D11_BOX doc
|
||||||
cmd.args.updateSubRes.hasDstBox = true;
|
cmd.args.updateSubRes.hasDstBox = true;
|
||||||
cmd.args.updateSubRes.dstBox = box;
|
cmd.args.updateSubRes.dstBox = box;
|
||||||
cbD->commands.append(cmd);
|
cbD->commands.append(cmd);
|
||||||
|
@ -1687,7 +1687,7 @@ void QRhiGles2::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
|||||||
QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, u.buf);
|
QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, u.buf);
|
||||||
Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic);
|
Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic);
|
||||||
if (bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer)) {
|
if (bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer)) {
|
||||||
memcpy(bufD->ubuf.data() + u.offset, u.data.constData(), size_t(u.data.size()));
|
memcpy(bufD->ubuf.data() + u.offset, u.data.constData(), size_t(u.dataSize));
|
||||||
} else {
|
} else {
|
||||||
trackedBufferBarrier(cbD, bufD, QGles2Buffer::AccessUpdate);
|
trackedBufferBarrier(cbD, bufD, QGles2Buffer::AccessUpdate);
|
||||||
QGles2CommandBuffer::Command cmd;
|
QGles2CommandBuffer::Command cmd;
|
||||||
@ -1695,16 +1695,16 @@ void QRhiGles2::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
|||||||
cmd.args.bufferSubData.target = bufD->targetForDataOps;
|
cmd.args.bufferSubData.target = bufD->targetForDataOps;
|
||||||
cmd.args.bufferSubData.buffer = bufD->buffer;
|
cmd.args.bufferSubData.buffer = bufD->buffer;
|
||||||
cmd.args.bufferSubData.offset = u.offset;
|
cmd.args.bufferSubData.offset = u.offset;
|
||||||
cmd.args.bufferSubData.size = u.data.size();
|
cmd.args.bufferSubData.size = u.dataSize;
|
||||||
cmd.args.bufferSubData.data = cbD->retainData(u.data);
|
cmd.args.bufferSubData.data = cbD->retainData(u.data);
|
||||||
cbD->commands.append(cmd);
|
cbD->commands.append(cmd);
|
||||||
}
|
}
|
||||||
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) {
|
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) {
|
||||||
QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, u.buf);
|
QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, u.buf);
|
||||||
Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
|
Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
|
||||||
Q_ASSERT(u.offset + u.data.size() <= bufD->m_size);
|
Q_ASSERT(u.offset + u.dataSize <= bufD->m_size);
|
||||||
if (bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer)) {
|
if (bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer)) {
|
||||||
memcpy(bufD->ubuf.data() + u.offset, u.data.constData(), size_t(u.data.size()));
|
memcpy(bufD->ubuf.data() + u.offset, u.data.constData(), size_t(u.dataSize));
|
||||||
} else {
|
} else {
|
||||||
trackedBufferBarrier(cbD, bufD, QGles2Buffer::AccessUpdate);
|
trackedBufferBarrier(cbD, bufD, QGles2Buffer::AccessUpdate);
|
||||||
QGles2CommandBuffer::Command cmd;
|
QGles2CommandBuffer::Command cmd;
|
||||||
@ -1712,7 +1712,7 @@ void QRhiGles2::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
|||||||
cmd.args.bufferSubData.target = bufD->targetForDataOps;
|
cmd.args.bufferSubData.target = bufD->targetForDataOps;
|
||||||
cmd.args.bufferSubData.buffer = bufD->buffer;
|
cmd.args.bufferSubData.buffer = bufD->buffer;
|
||||||
cmd.args.bufferSubData.offset = u.offset;
|
cmd.args.bufferSubData.offset = u.offset;
|
||||||
cmd.args.bufferSubData.size = u.data.size();
|
cmd.args.bufferSubData.size = u.dataSize;
|
||||||
cmd.args.bufferSubData.data = cbD->retainData(u.data);
|
cmd.args.bufferSubData.data = cbD->retainData(u.data);
|
||||||
cbD->commands.append(cmd);
|
cbD->commands.append(cmd);
|
||||||
}
|
}
|
||||||
|
@ -1703,10 +1703,10 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
|||||||
// basically the same. So go through the same pendingUpdates machinery.
|
// basically the same. So go through the same pendingUpdates machinery.
|
||||||
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
|
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
|
||||||
Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
|
Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
|
||||||
Q_ASSERT(u.offset + u.data.size() <= bufD->m_size);
|
Q_ASSERT(u.offset + u.dataSize <= bufD->m_size);
|
||||||
for (int i = 0, ie = bufD->d->slotted ? QMTL_FRAMES_IN_FLIGHT : 1; i != ie; ++i)
|
for (int i = 0, ie = bufD->d->slotted ? QMTL_FRAMES_IN_FLIGHT : 1; i != ie; ++i)
|
||||||
bufD->d->pendingUpdates[i].append(
|
bufD->d->pendingUpdates[i].append(
|
||||||
QRhiResourceUpdateBatchPrivate::BufferOp::dynamicUpdate(u.buf, u.offset, u.data.size(), u.data.constData()));
|
QRhiResourceUpdateBatchPrivate::BufferOp::dynamicUpdate(u.buf, u.offset, u.dataSize, u.data.constData()));
|
||||||
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::Read) {
|
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::Read) {
|
||||||
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
|
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
|
||||||
executeBufferHostWritesForCurrentFrame(bufD);
|
executeBufferHostWritesForCurrentFrame(bufD);
|
||||||
@ -1868,11 +1868,11 @@ void QRhiMetal::executeBufferHostWritesForSlot(QMetalBuffer *bufD, int slot)
|
|||||||
int changeEnd = -1;
|
int changeEnd = -1;
|
||||||
for (const QRhiResourceUpdateBatchPrivate::BufferOp &u : qAsConst(bufD->d->pendingUpdates[slot])) {
|
for (const QRhiResourceUpdateBatchPrivate::BufferOp &u : qAsConst(bufD->d->pendingUpdates[slot])) {
|
||||||
Q_ASSERT(bufD == QRHI_RES(QMetalBuffer, u.buf));
|
Q_ASSERT(bufD == QRHI_RES(QMetalBuffer, u.buf));
|
||||||
memcpy(static_cast<char *>(p) + u.offset, u.data.constData(), size_t(u.data.size()));
|
memcpy(static_cast<char *>(p) + u.offset, u.data.constData(), size_t(u.dataSize));
|
||||||
if (changeBegin == -1 || u.offset < changeBegin)
|
if (changeBegin == -1 || u.offset < changeBegin)
|
||||||
changeBegin = u.offset;
|
changeBegin = u.offset;
|
||||||
if (changeEnd == -1 || u.offset + u.data.size() > changeEnd)
|
if (changeEnd == -1 || u.offset + u.dataSize > changeEnd)
|
||||||
changeEnd = u.offset + u.data.size();
|
changeEnd = u.offset + u.dataSize;
|
||||||
}
|
}
|
||||||
#ifdef Q_OS_MACOS
|
#ifdef Q_OS_MACOS
|
||||||
if (changeBegin >= 0 && bufD->d->managed)
|
if (changeBegin >= 0 && bufD->d->managed)
|
||||||
|
@ -465,7 +465,7 @@ void QRhiNull::resourceUpdate(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *re
|
|||||||
|| u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload)
|
|| u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload)
|
||||||
{
|
{
|
||||||
QNullBuffer *bufD = QRHI_RES(QNullBuffer, u.buf);
|
QNullBuffer *bufD = QRHI_RES(QNullBuffer, u.buf);
|
||||||
memcpy(bufD->data.data() + u.offset, u.data.constData(), size_t(u.data.size()));
|
memcpy(bufD->data.data() + u.offset, u.data.constData(), size_t(u.dataSize));
|
||||||
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::Read) {
|
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::Read) {
|
||||||
QRhiBufferReadbackResult *result = u.result;
|
QRhiBufferReadbackResult *result = u.result;
|
||||||
result->data.resize(u.readSize);
|
result->data.resize(u.readSize);
|
||||||
|
@ -2933,7 +2933,7 @@ void QRhiVulkan::enqueueResourceUpdates(QVkCommandBuffer *cbD, QRhiResourceUpdat
|
|||||||
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) {
|
} else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) {
|
||||||
QVkBuffer *bufD = QRHI_RES(QVkBuffer, u.buf);
|
QVkBuffer *bufD = QRHI_RES(QVkBuffer, u.buf);
|
||||||
Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
|
Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
|
||||||
Q_ASSERT(u.offset + u.data.size() <= bufD->m_size);
|
Q_ASSERT(u.offset + u.dataSize <= bufD->m_size);
|
||||||
|
|
||||||
if (!bufD->stagingBuffers[currentFrameSlot]) {
|
if (!bufD->stagingBuffers[currentFrameSlot]) {
|
||||||
VkBufferCreateInfo bufferInfo;
|
VkBufferCreateInfo bufferInfo;
|
||||||
@ -2967,9 +2967,9 @@ void QRhiVulkan::enqueueResourceUpdates(QVkCommandBuffer *cbD, QRhiResourceUpdat
|
|||||||
qWarning("Failed to map buffer: %d", err);
|
qWarning("Failed to map buffer: %d", err);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
memcpy(static_cast<uchar *>(p) + u.offset, u.data.constData(), size_t(u.data.size()));
|
memcpy(static_cast<uchar *>(p) + u.offset, u.data.constData(), size_t(u.dataSize));
|
||||||
vmaUnmapMemory(toVmaAllocator(allocator), a);
|
vmaUnmapMemory(toVmaAllocator(allocator), a);
|
||||||
vmaFlushAllocation(toVmaAllocator(allocator), a, VkDeviceSize(u.offset), VkDeviceSize(u.data.size()));
|
vmaFlushAllocation(toVmaAllocator(allocator), a, VkDeviceSize(u.offset), VkDeviceSize(u.dataSize));
|
||||||
|
|
||||||
trackedBufferBarrier(cbD, bufD, 0,
|
trackedBufferBarrier(cbD, bufD, 0,
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||||
@ -2978,7 +2978,7 @@ void QRhiVulkan::enqueueResourceUpdates(QVkCommandBuffer *cbD, QRhiResourceUpdat
|
|||||||
memset(©Info, 0, sizeof(copyInfo));
|
memset(©Info, 0, sizeof(copyInfo));
|
||||||
copyInfo.srcOffset = VkDeviceSize(u.offset);
|
copyInfo.srcOffset = VkDeviceSize(u.offset);
|
||||||
copyInfo.dstOffset = VkDeviceSize(u.offset);
|
copyInfo.dstOffset = VkDeviceSize(u.offset);
|
||||||
copyInfo.size = VkDeviceSize(u.data.size());
|
copyInfo.size = VkDeviceSize(u.dataSize);
|
||||||
|
|
||||||
QVkCommandBuffer::Command cmd;
|
QVkCommandBuffer::Command cmd;
|
||||||
cmd.cmd = QVkCommandBuffer::Command::CopyBuffer;
|
cmd.cmd = QVkCommandBuffer::Command::CopyBuffer;
|
||||||
@ -3428,11 +3428,11 @@ void QRhiVulkan::executeBufferHostWritesForSlot(QVkBuffer *bufD, int slot)
|
|||||||
int changeEnd = -1;
|
int changeEnd = -1;
|
||||||
for (const QRhiResourceUpdateBatchPrivate::BufferOp &u : qAsConst(bufD->pendingDynamicUpdates[slot])) {
|
for (const QRhiResourceUpdateBatchPrivate::BufferOp &u : qAsConst(bufD->pendingDynamicUpdates[slot])) {
|
||||||
Q_ASSERT(bufD == QRHI_RES(QVkBuffer, u.buf));
|
Q_ASSERT(bufD == QRHI_RES(QVkBuffer, u.buf));
|
||||||
memcpy(static_cast<char *>(p) + u.offset, u.data.constData(), size_t(u.data.size()));
|
memcpy(static_cast<char *>(p) + u.offset, u.data.constData(), size_t(u.dataSize));
|
||||||
if (changeBegin == -1 || u.offset < changeBegin)
|
if (changeBegin == -1 || u.offset < changeBegin)
|
||||||
changeBegin = u.offset;
|
changeBegin = u.offset;
|
||||||
if (changeEnd == -1 || u.offset + u.data.size() > changeEnd)
|
if (changeEnd == -1 || u.offset + u.dataSize > changeEnd)
|
||||||
changeEnd = u.offset + u.data.size();
|
changeEnd = u.offset + u.dataSize;
|
||||||
}
|
}
|
||||||
vmaUnmapMemory(toVmaAllocator(allocator), a);
|
vmaUnmapMemory(toVmaAllocator(allocator), a);
|
||||||
if (changeBegin >= 0)
|
if (changeBegin >= 0)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user