RHI: Buffer readback fixes and unit test
Fixes issues with readback of storage buffers modified on GPU for D3D and Metal. Adds unit test for storage buffer readback. D3D * Fixes issue where QRhiBufferReadbackResult::completed callback could be called twice on buffer readback completion. Metal * Fixes issue where buffer readback occurred prior to command buffer being committed. Change-Id: If55ac005f4438d66d2f65ea2e1ee0d5686c884ff Reviewed-by: Laszlo Agocs <laszlo.agocs@qt.io>
This commit is contained in:
parent
2946447f50
commit
509fd9f2bb
@ -1631,6 +1631,8 @@ void QRhiD3D11::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
||||
if (bufD->m_type == QRhiBuffer::Dynamic) {
|
||||
u.result->data.resize(u.readSize);
|
||||
memcpy(u.result->data.data(), bufD->dynBuf + u.offset, size_t(u.readSize));
|
||||
if (u.result->completed)
|
||||
u.result->completed();
|
||||
} else {
|
||||
BufferReadback readback;
|
||||
readback.result = u.result;
|
||||
@ -1666,8 +1668,6 @@ void QRhiD3D11::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
||||
|
||||
activeBufferReadbacks.append(readback);
|
||||
}
|
||||
if (u.result->completed)
|
||||
u.result->completed();
|
||||
}
|
||||
}
|
||||
for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) {
|
||||
|
@ -208,6 +208,17 @@ struct QRhiMetalData
|
||||
};
|
||||
QVarLengthArray<TextureReadback, 2> activeTextureReadbacks;
|
||||
|
||||
struct BufferReadback
|
||||
{
|
||||
int activeFrameSlot = -1;
|
||||
QRhiBufferReadbackResult *result;
|
||||
quint32 offset;
|
||||
quint32 readSize;
|
||||
id<MTLBuffer> buf;
|
||||
};
|
||||
|
||||
QVarLengthArray<BufferReadback, 2> activeBufferReadbacks;
|
||||
|
||||
MTLCaptureManager *captureMgr;
|
||||
id<MTLCaptureScope> captureScope = nil;
|
||||
|
||||
@ -2430,13 +2441,23 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
||||
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
|
||||
executeBufferHostWritesForCurrentFrame(bufD);
|
||||
const int idx = bufD->d->slotted ? currentFrameSlot : 0;
|
||||
char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
|
||||
if (p) {
|
||||
u.result->data.resize(u.readSize);
|
||||
memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
|
||||
if (bufD->m_type == QRhiBuffer::Dynamic) {
|
||||
char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
|
||||
if (p) {
|
||||
u.result->data.resize(u.readSize);
|
||||
memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
|
||||
}
|
||||
if (u.result->completed)
|
||||
u.result->completed();
|
||||
} else {
|
||||
QRhiMetalData::BufferReadback readback;
|
||||
readback.activeFrameSlot = idx;
|
||||
readback.buf = bufD->d->buf[idx];
|
||||
readback.offset = u.offset;
|
||||
readback.readSize = u.readSize;
|
||||
readback.result = u.result;
|
||||
d->activeBufferReadbacks.append(readback);
|
||||
}
|
||||
if (u.result->completed)
|
||||
u.result->completed();
|
||||
}
|
||||
}
|
||||
|
||||
@ -2867,7 +2888,23 @@ void QRhiMetal::finishActiveReadbacks(bool forced)
|
||||
if (readback.result->completed)
|
||||
completedCallbacks.append(readback.result->completed);
|
||||
|
||||
d->activeTextureReadbacks.removeLast();
|
||||
d->activeTextureReadbacks.remove(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = d->activeBufferReadbacks.count() - 1; i >= 0; --i) {
|
||||
const QRhiMetalData::BufferReadback &readback(d->activeBufferReadbacks[i]);
|
||||
if (forced || currentFrameSlot == readback.activeFrameSlot
|
||||
|| readback.activeFrameSlot < 0) {
|
||||
readback.result->data.resize(readback.readSize);
|
||||
char *p = reinterpret_cast<char *>([readback.buf contents]);
|
||||
Q_ASSERT(p);
|
||||
memcpy(readback.result->data.data(), p + readback.offset, size_t(readback.readSize));
|
||||
|
||||
if (readback.result->completed)
|
||||
completedCallbacks.append(readback.result->completed);
|
||||
|
||||
d->activeBufferReadbacks.remove(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,3 +15,5 @@ qsb --glsl 320es,410 --msl 12 --msltess simpletess.vert -o simpletess.vert.qsb
|
||||
qsb --glsl 320es,410 --msl 12 --tess-mode triangles simpletess.tesc -o simpletess.tesc.qsb
|
||||
qsb --glsl 320es,410 --msl 12 --tess-vertex-count 3 simpletess.tese -o simpletess.tese.qsb
|
||||
qsb --glsl 320es,410 --msl 12 simpletess.frag -o simpletess.frag.qsb
|
||||
qsb --glsl 310es,430 --msl 12 --hlsl 50 storagebuffer.comp -o storagebuffer.comp.qsb
|
||||
|
||||
|
28
tests/auto/gui/rhi/qrhi/data/storagebuffer.comp
Normal file
28
tests/auto/gui/rhi/qrhi/data/storagebuffer.comp
Normal file
@ -0,0 +1,28 @@
|
||||
#version 430
|
||||
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
|
||||
layout (binding = 0, std430) buffer toGpu
|
||||
{
|
||||
float _float;
|
||||
vec2 _vec2;
|
||||
vec3 _vec3;
|
||||
vec4 _vec4;
|
||||
};
|
||||
|
||||
layout (binding = 1, std140) buffer fromGpu
|
||||
{
|
||||
int _int;
|
||||
ivec2 _ivec2;
|
||||
ivec3 _ivec3;
|
||||
ivec4 _ivec4;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
_int = int(_float);
|
||||
_ivec2 = ivec2(_vec2);
|
||||
_ivec3 = ivec3(_vec3);
|
||||
_ivec4 = ivec4(_vec4);
|
||||
}
|
||||
|
BIN
tests/auto/gui/rhi/qrhi/data/storagebuffer.comp.qsb
Normal file
BIN
tests/auto/gui/rhi/qrhi/data/storagebuffer.comp.qsb
Normal file
Binary file not shown.
@ -141,6 +141,9 @@ private slots:
|
||||
void tessellation_data();
|
||||
void tessellation();
|
||||
|
||||
void storageBuffer_data();
|
||||
void storageBuffer();
|
||||
|
||||
private:
|
||||
void setWindowType(QWindow *window, QRhi::Implementation impl);
|
||||
|
||||
@ -5572,5 +5575,125 @@ void tst_QRhi::tessellation()
|
||||
QVERIFY(greenCount > 50);
|
||||
}
|
||||
|
||||
void tst_QRhi::storageBuffer_data()
|
||||
{
|
||||
rhiTestData();
|
||||
}
|
||||
|
||||
void tst_QRhi::storageBuffer()
|
||||
{
|
||||
// Use a compute shader to copy from one storage buffer of float types to
|
||||
// another of int types. We fill the "toGpu" buffer with known float type
|
||||
// data generated and uploaded from the CPU, then dispatch a compute shader
|
||||
// to copy from the "toGpu" buffer to the "fromGpu" buffer. We then
|
||||
// readback the "fromGpu" buffer and verify that the results are as
|
||||
// expected.
|
||||
|
||||
QFETCH(QRhi::Implementation, impl);
|
||||
QFETCH(QRhiInitParams *, initParams);
|
||||
|
||||
// we can't test with Null as there is no compute
|
||||
if (impl == QRhi::Null)
|
||||
return;
|
||||
|
||||
QScopedPointer<QRhi> rhi(QRhi::create(impl, initParams, QRhi::Flags(), nullptr));
|
||||
if (!rhi)
|
||||
QSKIP("QRhi could not be created, skipping testing");
|
||||
|
||||
if (!rhi->isFeatureSupported(QRhi::Feature::Compute))
|
||||
QSKIP("Compute is not supported with this graphics API, skipping test");
|
||||
|
||||
QShader s = loadShader(":/data/storagebuffer.comp.qsb");
|
||||
QVERIFY(s.isValid());
|
||||
QCOMPARE(s.description().storageBlocks().size(), 2);
|
||||
|
||||
QMap<QByteArray, QShaderDescription::StorageBlock> blocks;
|
||||
for (const QShaderDescription::StorageBlock &block : s.description().storageBlocks())
|
||||
blocks[block.blockName] = block;
|
||||
|
||||
QMap<QByteArray, QShaderDescription::BlockVariable> toGpuMembers;
|
||||
for (const QShaderDescription::BlockVariable &member: blocks["toGpu"].members)
|
||||
toGpuMembers[member.name] = member;
|
||||
|
||||
QMap<QByteArray, QShaderDescription::BlockVariable> fromGpuMembers;
|
||||
for (const QShaderDescription::BlockVariable &member: blocks["fromGpu"].members)
|
||||
fromGpuMembers[member.name] = member;
|
||||
|
||||
for (QRhiBuffer::Type type : {QRhiBuffer::Type::Immutable, QRhiBuffer::Type::Static}) {
|
||||
|
||||
QRhiCommandBuffer *cb = nullptr;
|
||||
rhi->beginOffscreenFrame(&cb);
|
||||
QVERIFY(cb);
|
||||
|
||||
QRhiResourceUpdateBatch *u = rhi->nextResourceUpdateBatch();
|
||||
QVERIFY(u);
|
||||
|
||||
QScopedPointer<QRhiBuffer> toGpuBuffer(rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer, blocks["toGpu"].knownSize));
|
||||
QVERIFY(toGpuBuffer->create());
|
||||
|
||||
QScopedPointer<QRhiBuffer> fromGpuBuffer(rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer, blocks["fromGpu"].knownSize));
|
||||
QVERIFY(fromGpuBuffer->create());
|
||||
|
||||
QByteArray toGpuData(blocks["toGpu"].knownSize, 0);
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_float"].offset])[0] = 1.0f;
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec2"].offset])[0] = 2.0f;
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec2"].offset])[1] = 3.0f;
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec3"].offset])[0] = 4.0f;
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec3"].offset])[1] = 5.0f;
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec3"].offset])[2] = 6.0f;
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[0] = 7.0f;
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[1] = 8.0f;
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[2] = 9.0f;
|
||||
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[3] = 10.0f;
|
||||
|
||||
u->uploadStaticBuffer(toGpuBuffer.data(), 0, toGpuData.size(), toGpuData.constData());
|
||||
u->uploadStaticBuffer(fromGpuBuffer.data(), 0, blocks["fromGpu"].knownSize, QByteArray(blocks["fromGpu"].knownSize, 0).constData());
|
||||
|
||||
QScopedPointer<QRhiShaderResourceBindings> srb(rhi->newShaderResourceBindings());
|
||||
srb->setBindings({QRhiShaderResourceBinding::bufferLoadStore(blocks["toGpu"].binding, QRhiShaderResourceBinding::ComputeStage, toGpuBuffer.data()),
|
||||
QRhiShaderResourceBinding::bufferLoadStore(blocks["fromGpu"].binding, QRhiShaderResourceBinding::ComputeStage, fromGpuBuffer.data())});
|
||||
|
||||
QVERIFY(srb->create());
|
||||
|
||||
QScopedPointer<QRhiComputePipeline> pipeline(rhi->newComputePipeline());
|
||||
pipeline->setShaderStage({QRhiShaderStage::Compute, s});
|
||||
pipeline->setShaderResourceBindings(srb.data());
|
||||
QVERIFY(pipeline->create());
|
||||
|
||||
cb->beginComputePass(u);
|
||||
|
||||
cb->setComputePipeline(pipeline.data());
|
||||
cb->setShaderResources();
|
||||
cb->dispatch(1, 1, 1);
|
||||
|
||||
u = rhi->nextResourceUpdateBatch();
|
||||
QVERIFY(u);
|
||||
|
||||
int readCompletedNotifications = 0;
|
||||
QRhiBufferReadbackResult result;
|
||||
result.completed = [&readCompletedNotifications]() { readCompletedNotifications++; };
|
||||
u->readBackBuffer(fromGpuBuffer.data(), 0, blocks["fromGpu"].knownSize, &result);
|
||||
|
||||
cb->endComputePass(u);
|
||||
|
||||
rhi->endOffscreenFrame();
|
||||
|
||||
QCOMPARE(readCompletedNotifications, 1);
|
||||
|
||||
QCOMPARE(result.data.size(), blocks["fromGpu"].knownSize);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_int"].offset])[0], 1);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec2"].offset])[0], 2);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec2"].offset])[1], 3);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec3"].offset])[0], 4);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec3"].offset])[1], 5);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec3"].offset])[2], 6);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[0], 7);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[1], 8);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[2], 9);
|
||||
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[3], 10);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#include <tst_qrhi.moc>
|
||||
QTEST_MAIN(tst_QRhi)
|
||||
|
Loading…
x
Reference in New Issue
Block a user