RHI: Buffer readback fixes and unit test
Fixes issues with readback of storage buffers modified on GPU for D3D and Metal. Adds unit test for storage buffer readback. D3D * Fixes issue where QRhiBufferReadbackResult::completed callback could be called twice on buffer readback completion. Metal * Fixes issue where buffer readback occurred prior to command buffer being committed. Change-Id: If55ac005f4438d66d2f65ea2e1ee0d5686c884ff Reviewed-by: Laszlo Agocs <laszlo.agocs@qt.io>
This commit is contained in:
parent
2946447f50
commit
509fd9f2bb
@ -1631,6 +1631,8 @@ void QRhiD3D11::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
|||||||
if (bufD->m_type == QRhiBuffer::Dynamic) {
|
if (bufD->m_type == QRhiBuffer::Dynamic) {
|
||||||
u.result->data.resize(u.readSize);
|
u.result->data.resize(u.readSize);
|
||||||
memcpy(u.result->data.data(), bufD->dynBuf + u.offset, size_t(u.readSize));
|
memcpy(u.result->data.data(), bufD->dynBuf + u.offset, size_t(u.readSize));
|
||||||
|
if (u.result->completed)
|
||||||
|
u.result->completed();
|
||||||
} else {
|
} else {
|
||||||
BufferReadback readback;
|
BufferReadback readback;
|
||||||
readback.result = u.result;
|
readback.result = u.result;
|
||||||
@ -1666,8 +1668,6 @@ void QRhiD3D11::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
|||||||
|
|
||||||
activeBufferReadbacks.append(readback);
|
activeBufferReadbacks.append(readback);
|
||||||
}
|
}
|
||||||
if (u.result->completed)
|
|
||||||
u.result->completed();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) {
|
for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) {
|
||||||
|
@ -208,6 +208,17 @@ struct QRhiMetalData
|
|||||||
};
|
};
|
||||||
QVarLengthArray<TextureReadback, 2> activeTextureReadbacks;
|
QVarLengthArray<TextureReadback, 2> activeTextureReadbacks;
|
||||||
|
|
||||||
|
struct BufferReadback
|
||||||
|
{
|
||||||
|
int activeFrameSlot = -1;
|
||||||
|
QRhiBufferReadbackResult *result;
|
||||||
|
quint32 offset;
|
||||||
|
quint32 readSize;
|
||||||
|
id<MTLBuffer> buf;
|
||||||
|
};
|
||||||
|
|
||||||
|
QVarLengthArray<BufferReadback, 2> activeBufferReadbacks;
|
||||||
|
|
||||||
MTLCaptureManager *captureMgr;
|
MTLCaptureManager *captureMgr;
|
||||||
id<MTLCaptureScope> captureScope = nil;
|
id<MTLCaptureScope> captureScope = nil;
|
||||||
|
|
||||||
@ -2430,13 +2441,23 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
|
|||||||
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
|
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
|
||||||
executeBufferHostWritesForCurrentFrame(bufD);
|
executeBufferHostWritesForCurrentFrame(bufD);
|
||||||
const int idx = bufD->d->slotted ? currentFrameSlot : 0;
|
const int idx = bufD->d->slotted ? currentFrameSlot : 0;
|
||||||
char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
|
if (bufD->m_type == QRhiBuffer::Dynamic) {
|
||||||
if (p) {
|
char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
|
||||||
u.result->data.resize(u.readSize);
|
if (p) {
|
||||||
memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
|
u.result->data.resize(u.readSize);
|
||||||
|
memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
|
||||||
|
}
|
||||||
|
if (u.result->completed)
|
||||||
|
u.result->completed();
|
||||||
|
} else {
|
||||||
|
QRhiMetalData::BufferReadback readback;
|
||||||
|
readback.activeFrameSlot = idx;
|
||||||
|
readback.buf = bufD->d->buf[idx];
|
||||||
|
readback.offset = u.offset;
|
||||||
|
readback.readSize = u.readSize;
|
||||||
|
readback.result = u.result;
|
||||||
|
d->activeBufferReadbacks.append(readback);
|
||||||
}
|
}
|
||||||
if (u.result->completed)
|
|
||||||
u.result->completed();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2867,7 +2888,23 @@ void QRhiMetal::finishActiveReadbacks(bool forced)
|
|||||||
if (readback.result->completed)
|
if (readback.result->completed)
|
||||||
completedCallbacks.append(readback.result->completed);
|
completedCallbacks.append(readback.result->completed);
|
||||||
|
|
||||||
d->activeTextureReadbacks.removeLast();
|
d->activeTextureReadbacks.remove(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = d->activeBufferReadbacks.count() - 1; i >= 0; --i) {
|
||||||
|
const QRhiMetalData::BufferReadback &readback(d->activeBufferReadbacks[i]);
|
||||||
|
if (forced || currentFrameSlot == readback.activeFrameSlot
|
||||||
|
|| readback.activeFrameSlot < 0) {
|
||||||
|
readback.result->data.resize(readback.readSize);
|
||||||
|
char *p = reinterpret_cast<char *>([readback.buf contents]);
|
||||||
|
Q_ASSERT(p);
|
||||||
|
memcpy(readback.result->data.data(), p + readback.offset, size_t(readback.readSize));
|
||||||
|
|
||||||
|
if (readback.result->completed)
|
||||||
|
completedCallbacks.append(readback.result->completed);
|
||||||
|
|
||||||
|
d->activeBufferReadbacks.remove(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,3 +15,5 @@ qsb --glsl 320es,410 --msl 12 --msltess simpletess.vert -o simpletess.vert.qsb
|
|||||||
qsb --glsl 320es,410 --msl 12 --tess-mode triangles simpletess.tesc -o simpletess.tesc.qsb
|
qsb --glsl 320es,410 --msl 12 --tess-mode triangles simpletess.tesc -o simpletess.tesc.qsb
|
||||||
qsb --glsl 320es,410 --msl 12 --tess-vertex-count 3 simpletess.tese -o simpletess.tese.qsb
|
qsb --glsl 320es,410 --msl 12 --tess-vertex-count 3 simpletess.tese -o simpletess.tese.qsb
|
||||||
qsb --glsl 320es,410 --msl 12 simpletess.frag -o simpletess.frag.qsb
|
qsb --glsl 320es,410 --msl 12 simpletess.frag -o simpletess.frag.qsb
|
||||||
|
qsb --glsl 310es,430 --msl 12 --hlsl 50 storagebuffer.comp -o storagebuffer.comp.qsb
|
||||||
|
|
||||||
|
28
tests/auto/gui/rhi/qrhi/data/storagebuffer.comp
Normal file
28
tests/auto/gui/rhi/qrhi/data/storagebuffer.comp
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#version 430
|
||||||
|
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
|
||||||
|
layout (binding = 0, std430) buffer toGpu
|
||||||
|
{
|
||||||
|
float _float;
|
||||||
|
vec2 _vec2;
|
||||||
|
vec3 _vec3;
|
||||||
|
vec4 _vec4;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout (binding = 1, std140) buffer fromGpu
|
||||||
|
{
|
||||||
|
int _int;
|
||||||
|
ivec2 _ivec2;
|
||||||
|
ivec3 _ivec3;
|
||||||
|
ivec4 _ivec4;
|
||||||
|
};
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
_int = int(_float);
|
||||||
|
_ivec2 = ivec2(_vec2);
|
||||||
|
_ivec3 = ivec3(_vec3);
|
||||||
|
_ivec4 = ivec4(_vec4);
|
||||||
|
}
|
||||||
|
|
BIN
tests/auto/gui/rhi/qrhi/data/storagebuffer.comp.qsb
Normal file
BIN
tests/auto/gui/rhi/qrhi/data/storagebuffer.comp.qsb
Normal file
Binary file not shown.
@ -141,6 +141,9 @@ private slots:
|
|||||||
void tessellation_data();
|
void tessellation_data();
|
||||||
void tessellation();
|
void tessellation();
|
||||||
|
|
||||||
|
void storageBuffer_data();
|
||||||
|
void storageBuffer();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void setWindowType(QWindow *window, QRhi::Implementation impl);
|
void setWindowType(QWindow *window, QRhi::Implementation impl);
|
||||||
|
|
||||||
@ -5572,5 +5575,125 @@ void tst_QRhi::tessellation()
|
|||||||
QVERIFY(greenCount > 50);
|
QVERIFY(greenCount > 50);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tst_QRhi::storageBuffer_data()
|
||||||
|
{
|
||||||
|
rhiTestData();
|
||||||
|
}
|
||||||
|
|
||||||
|
void tst_QRhi::storageBuffer()
|
||||||
|
{
|
||||||
|
// Use a compute shader to copy from one storage buffer of float types to
|
||||||
|
// another of int types. We fill the "toGpu" buffer with known float type
|
||||||
|
// data generated and uploaded from the CPU, then dispatch a compute shader
|
||||||
|
// to copy from the "toGpu" buffer to the "fromGpu" buffer. We then
|
||||||
|
// readback the "fromGpu" buffer and verify that the results are as
|
||||||
|
// expected.
|
||||||
|
|
||||||
|
QFETCH(QRhi::Implementation, impl);
|
||||||
|
QFETCH(QRhiInitParams *, initParams);
|
||||||
|
|
||||||
|
// we can't test with Null as there is no compute
|
||||||
|
if (impl == QRhi::Null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
QScopedPointer<QRhi> rhi(QRhi::create(impl, initParams, QRhi::Flags(), nullptr));
|
||||||
|
if (!rhi)
|
||||||
|
QSKIP("QRhi could not be created, skipping testing");
|
||||||
|
|
||||||
|
if (!rhi->isFeatureSupported(QRhi::Feature::Compute))
|
||||||
|
QSKIP("Compute is not supported with this graphics API, skipping test");
|
||||||
|
|
||||||
|
QShader s = loadShader(":/data/storagebuffer.comp.qsb");
|
||||||
|
QVERIFY(s.isValid());
|
||||||
|
QCOMPARE(s.description().storageBlocks().size(), 2);
|
||||||
|
|
||||||
|
QMap<QByteArray, QShaderDescription::StorageBlock> blocks;
|
||||||
|
for (const QShaderDescription::StorageBlock &block : s.description().storageBlocks())
|
||||||
|
blocks[block.blockName] = block;
|
||||||
|
|
||||||
|
QMap<QByteArray, QShaderDescription::BlockVariable> toGpuMembers;
|
||||||
|
for (const QShaderDescription::BlockVariable &member: blocks["toGpu"].members)
|
||||||
|
toGpuMembers[member.name] = member;
|
||||||
|
|
||||||
|
QMap<QByteArray, QShaderDescription::BlockVariable> fromGpuMembers;
|
||||||
|
for (const QShaderDescription::BlockVariable &member: blocks["fromGpu"].members)
|
||||||
|
fromGpuMembers[member.name] = member;
|
||||||
|
|
||||||
|
for (QRhiBuffer::Type type : {QRhiBuffer::Type::Immutable, QRhiBuffer::Type::Static}) {
|
||||||
|
|
||||||
|
QRhiCommandBuffer *cb = nullptr;
|
||||||
|
rhi->beginOffscreenFrame(&cb);
|
||||||
|
QVERIFY(cb);
|
||||||
|
|
||||||
|
QRhiResourceUpdateBatch *u = rhi->nextResourceUpdateBatch();
|
||||||
|
QVERIFY(u);
|
||||||
|
|
||||||
|
QScopedPointer<QRhiBuffer> toGpuBuffer(rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer, blocks["toGpu"].knownSize));
|
||||||
|
QVERIFY(toGpuBuffer->create());
|
||||||
|
|
||||||
|
QScopedPointer<QRhiBuffer> fromGpuBuffer(rhi->newBuffer(type, QRhiBuffer::UsageFlag::StorageBuffer, blocks["fromGpu"].knownSize));
|
||||||
|
QVERIFY(fromGpuBuffer->create());
|
||||||
|
|
||||||
|
QByteArray toGpuData(blocks["toGpu"].knownSize, 0);
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_float"].offset])[0] = 1.0f;
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec2"].offset])[0] = 2.0f;
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec2"].offset])[1] = 3.0f;
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec3"].offset])[0] = 4.0f;
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec3"].offset])[1] = 5.0f;
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec3"].offset])[2] = 6.0f;
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[0] = 7.0f;
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[1] = 8.0f;
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[2] = 9.0f;
|
||||||
|
reinterpret_cast<float *>(&toGpuData.data()[toGpuMembers["_vec4"].offset])[3] = 10.0f;
|
||||||
|
|
||||||
|
u->uploadStaticBuffer(toGpuBuffer.data(), 0, toGpuData.size(), toGpuData.constData());
|
||||||
|
u->uploadStaticBuffer(fromGpuBuffer.data(), 0, blocks["fromGpu"].knownSize, QByteArray(blocks["fromGpu"].knownSize, 0).constData());
|
||||||
|
|
||||||
|
QScopedPointer<QRhiShaderResourceBindings> srb(rhi->newShaderResourceBindings());
|
||||||
|
srb->setBindings({QRhiShaderResourceBinding::bufferLoadStore(blocks["toGpu"].binding, QRhiShaderResourceBinding::ComputeStage, toGpuBuffer.data()),
|
||||||
|
QRhiShaderResourceBinding::bufferLoadStore(blocks["fromGpu"].binding, QRhiShaderResourceBinding::ComputeStage, fromGpuBuffer.data())});
|
||||||
|
|
||||||
|
QVERIFY(srb->create());
|
||||||
|
|
||||||
|
QScopedPointer<QRhiComputePipeline> pipeline(rhi->newComputePipeline());
|
||||||
|
pipeline->setShaderStage({QRhiShaderStage::Compute, s});
|
||||||
|
pipeline->setShaderResourceBindings(srb.data());
|
||||||
|
QVERIFY(pipeline->create());
|
||||||
|
|
||||||
|
cb->beginComputePass(u);
|
||||||
|
|
||||||
|
cb->setComputePipeline(pipeline.data());
|
||||||
|
cb->setShaderResources();
|
||||||
|
cb->dispatch(1, 1, 1);
|
||||||
|
|
||||||
|
u = rhi->nextResourceUpdateBatch();
|
||||||
|
QVERIFY(u);
|
||||||
|
|
||||||
|
int readCompletedNotifications = 0;
|
||||||
|
QRhiBufferReadbackResult result;
|
||||||
|
result.completed = [&readCompletedNotifications]() { readCompletedNotifications++; };
|
||||||
|
u->readBackBuffer(fromGpuBuffer.data(), 0, blocks["fromGpu"].knownSize, &result);
|
||||||
|
|
||||||
|
cb->endComputePass(u);
|
||||||
|
|
||||||
|
rhi->endOffscreenFrame();
|
||||||
|
|
||||||
|
QCOMPARE(readCompletedNotifications, 1);
|
||||||
|
|
||||||
|
QCOMPARE(result.data.size(), blocks["fromGpu"].knownSize);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_int"].offset])[0], 1);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec2"].offset])[0], 2);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec2"].offset])[1], 3);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec3"].offset])[0], 4);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec3"].offset])[1], 5);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec3"].offset])[2], 6);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[0], 7);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[1], 8);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[2], 9);
|
||||||
|
QCOMPARE(reinterpret_cast<const int *>(&result.data.constData()[fromGpuMembers["_ivec4"].offset])[3], 10);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#include <tst_qrhi.moc>
|
#include <tst_qrhi.moc>
|
||||||
QTEST_MAIN(tst_QRhi)
|
QTEST_MAIN(tst_QRhi)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user