RHI: Buffer readback fixes and unit test

Fixes issues with readback of storage buffers modified on GPU for D3D
and Metal.  Adds unit test for storage buffer readback.

D3D
*  Fixes issue where QRhiBufferReadbackResult::completed callback could
be called twice on buffer readback completion.

Metal
* Fixes issue where buffer readback occurred prior to command buffer
being committed.

Change-Id: If55ac005f4438d66d2f65ea2e1ee0d5686c884ff
Reviewed-by: Laszlo Agocs <laszlo.agocs@qt.io>
This commit is contained in:
Ben Fletcher 2022-12-08 21:34:12 -08:00
parent 2946447f50
commit 509fd9f2bb
6 changed files with 199 additions and 9 deletions

View File

@ -1631,6 +1631,8 @@ void QRhiD3D11::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
if (bufD->m_type == QRhiBuffer::Dynamic) {
u.result->data.resize(u.readSize);
memcpy(u.result->data.data(), bufD->dynBuf + u.offset, size_t(u.readSize));
if (u.result->completed)
u.result->completed();
} else {
BufferReadback readback;
readback.result = u.result;
@ -1666,8 +1668,6 @@ void QRhiD3D11::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
activeBufferReadbacks.append(readback);
}
if (u.result->completed)
u.result->completed();
}
}
for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) {

View File

@ -208,6 +208,17 @@ struct QRhiMetalData
};
QVarLengthArray<TextureReadback, 2> activeTextureReadbacks;
struct BufferReadback
{
int activeFrameSlot = -1;
QRhiBufferReadbackResult *result;
quint32 offset;
quint32 readSize;
id<MTLBuffer> buf;
};
QVarLengthArray<BufferReadback, 2> activeBufferReadbacks;
MTLCaptureManager *captureMgr;
id<MTLCaptureScope> captureScope = nil;
@ -2430,13 +2441,23 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
executeBufferHostWritesForCurrentFrame(bufD);
const int idx = bufD->d->slotted ? currentFrameSlot : 0;
char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
if (p) {
u.result->data.resize(u.readSize);
memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
if (bufD->m_type == QRhiBuffer::Dynamic) {
char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
if (p) {
u.result->data.resize(u.readSize);
memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
}
if (u.result->completed)
u.result->completed();
} else {
QRhiMetalData::BufferReadback readback;
readback.activeFrameSlot = idx;
readback.buf = bufD->d->buf[idx];
readback.offset = u.offset;
readback.readSize = u.readSize;
readback.result = u.result;
d->activeBufferReadbacks.append(readback);
}
if (u.result->completed)
u.result->completed();
}
}
@ -2867,7 +2888,23 @@ void QRhiMetal::finishActiveReadbacks(bool forced)
if (readback.result->completed)
completedCallbacks.append(readback.result->completed);
d->activeTextureReadbacks.removeLast();
d->activeTextureReadbacks.remove(i);
}
}
for (int i = d->activeBufferReadbacks.count() - 1; i >= 0; --i) {
const QRhiMetalData::BufferReadback &readback(d->activeBufferReadbacks[i]);
if (forced || currentFrameSlot == readback.activeFrameSlot
|| readback.activeFrameSlot < 0) {
readback.result->data.resize(readback.readSize);
char *p = reinterpret_cast<char *>([readback.buf contents]);
Q_ASSERT(p);
memcpy(readback.result->data.data(), p + readback.offset, size_t(readback.readSize));
if (readback.result->completed)
completedCallbacks.append(readback.result->completed);
d->activeBufferReadbacks.remove(i);
}
}

View File

@ -15,3 +15,5 @@ qsb --glsl 320es,410 --msl 12 --msltess simpletess.vert -o simpletess.vert.qsb
qsb --glsl 320es,410 --msl 12 --tess-mode triangles simpletess.tesc -o simpletess.tesc.qsb
qsb --glsl 320es,410 --msl 12 --tess-vertex-count 3 simpletess.tese -o simpletess.tese.qsb
qsb --glsl 320es,410 --msl 12 simpletess.frag -o simpletess.frag.qsb
qsb --glsl 310es,430 --msl 12 --hlsl 50 storagebuffer.comp -o storagebuffer.comp.qsb

View File

@ -0,0 +1,28 @@
#version 430
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout (binding = 0, std430) buffer toGpu
{
float _float;
vec2 _vec2;
vec3 _vec3;
vec4 _vec4;
};
layout (binding = 1, std140) buffer fromGpu
{
int _int;
ivec2 _ivec2;
ivec3 _ivec3;
ivec4 _ivec4;
};
void main()
{
_int = int(_float);
_ivec2 = ivec2(_vec2);
_ivec3 = ivec3(_vec3);
_ivec4 = ivec4(_vec4);
}

Binary file not shown.

View File

@ -141,6 +141,9 @@ private slots:
void tessellation_data();
void tessellation();
void storageBuffer_data();
void storageBuffer();
private:
void setWindowType(QWindow *window, QRhi::Implementation impl);
@ -5572,5 +5575,125 @@ void tst_QRhi::tessellation()
QVERIFY(greenCount > 50);
}
void tst_QRhi::storageBuffer_data()
{
rhiTestData();
}
void tst_QRhi::storageBuffer()
{
// Use a compute shader to copy from one storage buffer of float types to
// another of int types. We fill the "toGpu" buffer with known float type
// data generated and uploaded from the CPU, then dispatch a compute shader
// to copy from the "toGpu" buffer to the "fromGpu" buffer. We then
// readback the "fromGpu" buffer and verify that the results are as
// expected.
QFETCH(QRhi::Implementation, impl);
QFETCH(QRhiInitParams *, initParams);
// we can't test with Null as there is no compute
if (impl == QRhi::Null)
return;
QScopedPointer<QRhi> rhi(QRhi::create(impl, initParams, QRhi::Flags(), nullptr));
if (!rhi)
QSKIP("QRhi could not be created, skipping testing");
if (!rhi->isFeatureSupported(QRhi::Feature::Compute))
QSKIP("Compute is not supported with this graphics API, skipping test");
QShader s = loadShader(":/data/storagebuffer.comp.qsb");
QVERIFY(s.isValid());
QCOMPARE(s.description().storageBlocks().size(), 2);
QMap<QByteArray, QShaderDescription::StorageBlock> blocks;
for (const QShaderDescription::StorageBlock &block : s.description().storageBlocks())
blocks[block.blockName] = block;
QMap<QByteArray, QShaderDescription::BlockVariable> toGpuMembers;
for (const QShaderDescription::BlockVariable &member: blocks["toGpu"].members)
toGpuMembers[member.name] = member;
QMap<QByteArray, QShaderDescription::BlockVariable> fromGpuMembers;
for (const QShaderDescription::BlockVariable &member: blocks["fromGpu"].members)
fromGpuMembers[member.name] = member;
for (QRhiBuffer::Type type : {QRhiBuffer::Type::Immutable, QRhiBuffer::Type::Static}) {
QRhiCommandBuffer *cb = nullptr;
rhi->beginOffscreenFrame(&cb);
QVERIFY(cb);
QRhiResourceUpdateBatch *u = rhi->nextResourceUpdateBatch();
QVERIFY(u);
QScopedPointer<QRhiBuffer> toGpuBuffer(rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer, blocks["toGpu"].knownSize));
QVERIFY(toGpuBuffer->create());
QScopedPointer<QRhiBuffer> fromGpuBuffer(rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer, blocks["fromGpu"].knownSize));
QVERIFY(fromGpuBuffer->create());
QByteArray toGpuData(blocks["toGpu"].knownSize, 0);
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_float"].offset])[0] = 1.0f;
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec2"].offset])[0] = 2.0f;
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec2"].offset])[1] = 3.0f;
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec3"].offset])[0] = 4.0f;
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec3"].offset])[1] = 5.0f;
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec3"].offset])[2] = 6.0f;
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[0] = 7.0f;
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[1] = 8.0f;
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[2] = 9.0f;
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[3] = 10.0f;
u->uploadStaticBuffer(toGpuBuffer.data(), 0, toGpuData.size(), toGpuData.constData());
u->uploadStaticBuffer(fromGpuBuffer.data(), 0, blocks["fromGpu"].knownSize, QByteArray(blocks["fromGpu"].knownSize, 0).constData());
QScopedPointer<QRhiShaderResourceBindings> srb(rhi->newShaderResourceBindings());
srb->setBindings({QRhiShaderResourceBinding::bufferLoadStore(blocks["toGpu"].binding, QRhiShaderResourceBinding::ComputeStage, toGpuBuffer.data()),
QRhiShaderResourceBinding::bufferLoadStore(blocks["fromGpu"].binding, QRhiShaderResourceBinding::ComputeStage, fromGpuBuffer.data())});
QVERIFY(srb->create());
QScopedPointer<QRhiComputePipeline> pipeline(rhi->newComputePipeline());
pipeline->setShaderStage({QRhiShaderStage::Compute, s});
pipeline->setShaderResourceBindings(srb.data());
QVERIFY(pipeline->create());
cb->beginComputePass(u);
cb->setComputePipeline(pipeline.data());
cb->setShaderResources();
cb->dispatch(1, 1, 1);
u = rhi->nextResourceUpdateBatch();
QVERIFY(u);
int readCompletedNotifications = 0;
QRhiBufferReadbackResult result;
result.completed = [&readCompletedNotifications]() { readCompletedNotifications++; };
u->readBackBuffer(fromGpuBuffer.data(), 0, blocks["fromGpu"].knownSize, &result);
cb->endComputePass(u);
rhi->endOffscreenFrame();
QCOMPARE(readCompletedNotifications, 1);
QCOMPARE(result.data.size(), blocks["fromGpu"].knownSize);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_int"].offset])[0], 1);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec2"].offset])[0], 2);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec2"].offset])[1], 3);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec3"].offset])[0], 4);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec3"].offset])[1], 5);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec3"].offset])[2], 6);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[0], 7);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[1], 8);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[2], 9);
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[3], 10);
}
}
#include <tst_qrhi.moc>
QTEST_MAIN(tst_QRhi)