rhi: Replace the temporary GPU time query API with a saner one
Modeled after Metal's cb.GPUStart/EndTime. Implemented with timestamp queries for other APIs. Implemented for Metal, D3D11, Vulkan for now. No more callback, just a getter on the command buffer which returns the latest known value, referring to some previous frame. This makes it a lot more usable than the original solution that is not really used anywhere at the moment. Now works for offscreen "frames" as well, this was not implemented before. Opt in with a new QRhi::create() flag because we cannot tell in advance if the getter will be called or not, and this way we can skip recording the timestamps by default. The cost is probably minimal, though. Qt Quick will set this automatically when running with QSG_RHI_PROFILE=1. Change-Id: I903779984a4e0bbf1d03806d04bf61571ce23d72 Reviewed-by: Laszlo Agocs <laszlo.agocs@qt.io>
This commit is contained in:
parent
62a4ca773a
commit
e539e9a7af
@ -418,8 +418,15 @@ Q_LOGGING_CATEGORY(QRHI_LOG_INFO, "qt.rhi.general")
|
||||
\value EnableDebugMarkers Enables debug marker groups. Without this frame
|
||||
debugging features like making debug groups and custom resource name
|
||||
visible in external GPU debugging tools will not be available and functions
|
||||
like QRhiCommandBuffer::debugMarkBegin() will become a no-op. Avoid
|
||||
enabling in production builds as it may involve a performance penalty.
|
||||
like QRhiCommandBuffer::debugMarkBegin() will become no-ops. Avoid enabling
|
||||
in production builds as it may involve a small performance impact. Has no
|
||||
effect when the QRhi::DebugMarkers feature is not reported as supported.
|
||||
|
||||
\value EnableTimestamps Enables GPU timestamp collection. When not set,
|
||||
QRhiCommandBuffer::lastCompletedGpuTime() always returns 0. Enable this
|
||||
only when needed since there may be a small amount of extra work involved
|
||||
(e.g. timestamp queries), depending on the underlying graphics API. Has no
|
||||
effect when the QRhi::Timestamps feature is not reported as supported.
|
||||
|
||||
\value PreferSoftwareRenderer Indicates that backends should prefer
|
||||
choosing an adapter or physical device that renders in software on the CPU.
|
||||
@ -490,8 +497,9 @@ Q_LOGGING_CATEGORY(QRHI_LOG_INFO, "qt.rhi.general")
|
||||
QRhiCommandBuffer::debugMarkBegin()) are supported.
|
||||
|
||||
\value Timestamps Indicates that command buffer timestamps are supported.
|
||||
Relevant for addGpuFrameTimeCallback(). Can be expected to be supported on
|
||||
D3D11 and Vulkan, assuming the underlying implementation supports it.
|
||||
Relevant for QRhiCommandBuffer::lastCompletedGpuTime(). Can be expected to
|
||||
be supported on Metal, Vulkan, and Direct 3D, assuming the underlying
|
||||
implementation supports timestamp queries or similar.
|
||||
|
||||
\value Instancing Indicates that instanced drawing is supported. In
|
||||
practice this feature will be unsupported with OpenGL ES 2.0 and OpenGL
|
||||
@ -4853,11 +4861,21 @@ QRhiResource::Type QRhiSwapChain::resourceType() const
|
||||
/*!
|
||||
\fn QRhiCommandBuffer *QRhiSwapChain::currentFrameCommandBuffer()
|
||||
|
||||
\return a command buffer on which rendering commands can be recorded. Only
|
||||
valid within a QRhi::beginFrame() - QRhi::endFrame() block where
|
||||
beginFrame() was called with this swapchain.
|
||||
\return a command buffer on which rendering commands and resource updates
|
||||
can be recorded within a \l{QRhi::beginFrame()}{beginFrame} -
|
||||
\l{QRhi::endFrame()}{endFrame} block, assuming beginFrame() was called with
|
||||
this swapchain.
|
||||
|
||||
\note the value must not be cached and reused between frames
|
||||
\note The returned object is valid also after endFrame(), up until the next
|
||||
beginFrame(), but the returned command buffer should not be used to record
|
||||
any commands then. Rather, it can be used to query data collected during
|
||||
the frame (or previous frames), for example by calling
|
||||
\l{QRhiCommandBuffer::lastCompletedGpuTime()}{lastCompletedGpuTime()}.
|
||||
|
||||
\note The value must not be cached and reused between frames. The caller
|
||||
should not hold on to the returned object once
|
||||
\l{QRhi::beginFrame()}{beginFrame()} is called again. Instead, the command
|
||||
buffer object should be queried again by calling this function.
|
||||
*/
|
||||
|
||||
/*!
|
||||
@ -5857,36 +5875,6 @@ void QRhi::runCleanup()
|
||||
d->cleanupCallbacks.clear();
|
||||
}
|
||||
|
||||
/*!
|
||||
Registers a \a callback that is called with an elapsed time calculated from
|
||||
GPU timestamps asynchronously after a timestamp becomes available at some
|
||||
point after presenting a frame.
|
||||
|
||||
The callback is called with a float value that is meant to be in
|
||||
milliseconds and represents the elapsed time on the GPU side for a given
|
||||
frame. Care must be exercised with the interpretation of the value, as what
|
||||
it exactly is is not controlled by Qt and depends on the underlying
|
||||
graphics API and its implementation. In particular, comparing the values
|
||||
between different graphics APIs is discouraged and may be meaningless.
|
||||
|
||||
The timing values become available asynchronously, sometimes several frames
|
||||
after the frame has been submitted in endFrame(). There is currently no way
|
||||
to identify the frame. The callback is invoked whenever the timestamp
|
||||
queries complete.
|
||||
|
||||
\note This is only supported when the Timestamp feature is reported as
|
||||
supported from isFeatureSupported(). Otherwise the \a callback is never
|
||||
called.
|
||||
|
||||
The \a callback is always called on the thread the QRhi lives and operates
|
||||
on. While not guaranteed, it is typical that the callback is invoked from
|
||||
within beginFrame().
|
||||
*/
|
||||
void QRhi::addGpuFrameTimeCallback(const GpuFrameTimeCallback &callback)
|
||||
{
|
||||
d->addGpuFrameTimeCallback(callback);
|
||||
}
|
||||
|
||||
/*!
|
||||
\class QRhiResourceUpdateBatch
|
||||
\internal
|
||||
@ -6901,6 +6889,41 @@ void QRhiCommandBuffer::endExternal()
|
||||
m_rhi->endExternal(this);
|
||||
}
|
||||
|
||||
/*!
|
||||
\return the last available timestamp, in seconds. The value indicates the
|
||||
elapsed time on the GPU during the last completed frame.
|
||||
|
||||
Care must be exercised with the interpretation of the value, as its
|
||||
precision and granularity is often not controlled by Qt, and depends on the
|
||||
underlying graphics API and its implementation. In particular, comparing
|
||||
the values between different graphics APIs and hardware is discouraged and
|
||||
may be meaningless.
|
||||
|
||||
The timing values may become available asynchronously. The returned value
|
||||
may therefore be 0 or the last known value referring to some previous
|
||||
frame. The value my also become 0 again under certain conditions, such as
|
||||
when resizing the window. It can be expected that the most up-to-date
|
||||
available value is retrieved in beginFrame() and becomes queriable via this
|
||||
function once beginFrame() returns.
|
||||
|
||||
\note Do not assume that the value refers to the previous
|
||||
(\c{currently_recorded - 1}) frame. It may refer to \c{currently_recorded -
|
||||
2} or \c{currently_recorded - 3} as well. The exact behavior may depend on
|
||||
the graphics API and its implementation.
|
||||
|
||||
\note The result is always 0 when the QRhi::Timestamps feature is not
|
||||
reported as supported, or when QRhi::EnableTimestamps was not passed to
|
||||
QRhi::create(). There are exceptions to the latter, because with some
|
||||
graphics APIs timings are available without having to perform extra
|
||||
operations, but portable applications should always consciously opt-in to
|
||||
timestamp collection when they know it is needed, and call this function
|
||||
accordingly.
|
||||
*/
|
||||
double QRhiCommandBuffer::lastCompletedGpuTime()
|
||||
{
|
||||
return m_rhi->lastCompletedGpuTime(this);
|
||||
}
|
||||
|
||||
/*!
|
||||
\return the value (typically an offset) \a v aligned to the uniform buffer
|
||||
alignment given by by ubufAlignment().
|
||||
|
@ -1666,6 +1666,8 @@ public:
|
||||
void beginExternal();
|
||||
void endExternal();
|
||||
|
||||
double lastCompletedGpuTime();
|
||||
|
||||
protected:
|
||||
QRhiCommandBuffer(QRhiImplementation *rhi);
|
||||
};
|
||||
@ -1768,7 +1770,8 @@ public:
|
||||
enum Flag {
|
||||
EnableDebugMarkers = 1 << 0,
|
||||
PreferSoftwareRenderer = 1 << 1,
|
||||
EnablePipelineCacheDataSave = 1 << 2
|
||||
EnablePipelineCacheDataSave = 1 << 2,
|
||||
EnableTimestamps = 1 << 3
|
||||
};
|
||||
Q_DECLARE_FLAGS(Flags, Flag)
|
||||
|
||||
@ -1866,9 +1869,6 @@ public:
|
||||
void addCleanupCallback(const CleanupCallback &callback);
|
||||
void runCleanup();
|
||||
|
||||
using GpuFrameTimeCallback = std::function<void(float t)>;
|
||||
void addGpuFrameTimeCallback(const GpuFrameTimeCallback &callback);
|
||||
|
||||
QRhiGraphicsPipeline *newGraphicsPipeline();
|
||||
QRhiComputePipeline *newComputePipeline();
|
||||
QRhiShaderResourceBindings *newShaderResourceBindings();
|
||||
|
@ -120,6 +120,7 @@ public:
|
||||
virtual const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) = 0;
|
||||
virtual void beginExternal(QRhiCommandBuffer *cb) = 0;
|
||||
virtual void endExternal(QRhiCommandBuffer *cb) = 0;
|
||||
virtual double lastCompletedGpuTime(QRhiCommandBuffer *cb) = 0;
|
||||
|
||||
virtual QList<int> supportedSampleCounts() const = 0;
|
||||
virtual int ubufAlignment() const = 0;
|
||||
@ -177,22 +178,6 @@ public:
|
||||
cleanupCallbacks.append(callback);
|
||||
}
|
||||
|
||||
void addGpuFrameTimeCallback(const QRhi::GpuFrameTimeCallback &callback)
|
||||
{
|
||||
gpuFrameTimeCallbacks.append(callback);
|
||||
}
|
||||
|
||||
bool hasGpuFrameTimeCallback() const
|
||||
{
|
||||
return !gpuFrameTimeCallbacks.isEmpty();
|
||||
}
|
||||
|
||||
void runGpuFrameTimeCallbacks(float t)
|
||||
{
|
||||
for (const QRhi::GpuFrameTimeCallback &f : std::as_const(gpuFrameTimeCallbacks))
|
||||
f(t);
|
||||
}
|
||||
|
||||
bool sanityCheckGraphicsPipeline(QRhiGraphicsPipeline *ps);
|
||||
bool sanityCheckShaderResourceBindings(QRhiShaderResourceBindings *srb);
|
||||
void updateLayoutDesc(QRhiShaderResourceBindings *srb);
|
||||
@ -253,7 +238,6 @@ private:
|
||||
QHash<QRhiResource *, bool> resources;
|
||||
QSet<QRhiResource *> pendingDeleteResources;
|
||||
QVarLengthArray<QRhi::CleanupCallback, 4> cleanupCallbacks;
|
||||
QVarLengthArray<QRhi::GpuFrameTimeCallback, 4> gpuFrameTimeCallbacks;
|
||||
QElapsedTimer pipelineCreationTimer;
|
||||
qint64 accumulatedPipelineCreationTime = 0;
|
||||
|
||||
|
@ -315,6 +315,11 @@ bool QRhiD3D11::create(QRhi::Flags flags)
|
||||
if (FAILED(context->QueryInterface(__uuidof(ID3DUserDefinedAnnotation), reinterpret_cast<void **>(&annotations))))
|
||||
annotations = nullptr;
|
||||
|
||||
if (flags.testFlag(QRhi::EnableTimestamps)) {
|
||||
ofr.timestamps.prepare(2, this);
|
||||
// timestamp queries are optional so we can go on even if they failed
|
||||
}
|
||||
|
||||
deviceLost = false;
|
||||
|
||||
nativeHandlesStruct.dev = dev;
|
||||
@ -340,6 +345,8 @@ void QRhiD3D11::destroy()
|
||||
|
||||
clearShaderCache();
|
||||
|
||||
ofr.timestamps.destroy();
|
||||
|
||||
if (annotations) {
|
||||
annotations->Release();
|
||||
annotations = nullptr;
|
||||
@ -1235,6 +1242,12 @@ void QRhiD3D11::endExternal(QRhiCommandBuffer *cb)
|
||||
}
|
||||
}
|
||||
|
||||
double QRhiD3D11::lastCompletedGpuTime(QRhiCommandBuffer *cb)
|
||||
{
|
||||
QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb);
|
||||
return cbD->lastGpuTime;
|
||||
}
|
||||
|
||||
QRhi::FrameOpResult QRhiD3D11::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
|
||||
{
|
||||
Q_UNUSED(flags);
|
||||
@ -1243,30 +1256,6 @@ QRhi::FrameOpResult QRhiD3D11::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF
|
||||
contextState.currentSwapChain = swapChainD;
|
||||
const int currentFrameSlot = swapChainD->currentFrameSlot;
|
||||
|
||||
if (swapChainD->timestampActive[currentFrameSlot]) {
|
||||
ID3D11Query *tsDisjoint = swapChainD->timestampDisjointQuery[currentFrameSlot];
|
||||
const int tsIdx = QD3D11SwapChain::BUFFER_COUNT * currentFrameSlot;
|
||||
ID3D11Query *tsStart = swapChainD->timestampQuery[tsIdx];
|
||||
ID3D11Query *tsEnd = swapChainD->timestampQuery[tsIdx + 1];
|
||||
quint64 timestamps[2];
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj;
|
||||
bool ok = true;
|
||||
ok &= context->GetData(tsDisjoint, &dj, sizeof(dj), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
|
||||
ok &= context->GetData(tsEnd, ×tamps[1], sizeof(quint64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
|
||||
// this above is often not ready, not even in frame_where_recorded+2,
|
||||
// not clear why. so make the whole thing async and do not touch the
|
||||
// queries until they are finally all available in frame this+2 or
|
||||
// this+4 or ...
|
||||
ok &= context->GetData(tsStart, ×tamps[0], sizeof(quint64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
|
||||
if (ok) {
|
||||
if (!dj.Disjoint && dj.Frequency) {
|
||||
const float elapsedMs = (timestamps[1] - timestamps[0]) / float(dj.Frequency) * 1000.0f;
|
||||
runGpuFrameTimeCallbacks(elapsedMs);
|
||||
}
|
||||
swapChainD->timestampActive[currentFrameSlot] = false;
|
||||
} // else leave timestampActive set to true, will retry in a subsequent beginFrame
|
||||
}
|
||||
|
||||
swapChainD->cb.resetState();
|
||||
|
||||
swapChainD->rt.d.rtv[0] = swapChainD->sampleDesc.Count > 1 ?
|
||||
@ -1275,6 +1264,12 @@ QRhi::FrameOpResult QRhiD3D11::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF
|
||||
|
||||
finishActiveReadbacks();
|
||||
|
||||
if (swapChainD->timestamps.active[currentFrameSlot]) {
|
||||
double elapsedSec = 0;
|
||||
if (swapChainD->timestamps.tryQueryTimestamps(currentFrameSlot, context, &elapsedSec))
|
||||
swapChainD->cb.lastGpuTime = elapsedSec;
|
||||
}
|
||||
|
||||
return QRhi::FrameOpSuccess;
|
||||
}
|
||||
|
||||
@ -1284,11 +1279,11 @@ QRhi::FrameOpResult QRhiD3D11::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame
|
||||
Q_ASSERT(contextState.currentSwapChain = swapChainD);
|
||||
const int currentFrameSlot = swapChainD->currentFrameSlot;
|
||||
|
||||
ID3D11Query *tsDisjoint = swapChainD->timestampDisjointQuery[currentFrameSlot];
|
||||
ID3D11Query *tsDisjoint = swapChainD->timestamps.disjointQuery[currentFrameSlot];
|
||||
const int tsIdx = QD3D11SwapChain::BUFFER_COUNT * currentFrameSlot;
|
||||
ID3D11Query *tsStart = swapChainD->timestampQuery[tsIdx];
|
||||
ID3D11Query *tsEnd = swapChainD->timestampQuery[tsIdx + 1];
|
||||
const bool recordTimestamps = tsDisjoint && tsStart && tsEnd && !swapChainD->timestampActive[currentFrameSlot];
|
||||
ID3D11Query *tsStart = swapChainD->timestamps.query[tsIdx];
|
||||
ID3D11Query *tsEnd = swapChainD->timestamps.query[tsIdx + 1];
|
||||
const bool recordTimestamps = tsDisjoint && tsStart && tsEnd && !swapChainD->timestamps.active[currentFrameSlot];
|
||||
|
||||
// send all commands to the context
|
||||
if (recordTimestamps)
|
||||
@ -1306,7 +1301,7 @@ QRhi::FrameOpResult QRhiD3D11::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame
|
||||
if (recordTimestamps) {
|
||||
context->End(tsEnd);
|
||||
context->End(tsDisjoint);
|
||||
swapChainD->timestampActive[currentFrameSlot] = true;
|
||||
swapChainD->timestamps.active[currentFrameSlot] = true;
|
||||
}
|
||||
|
||||
if (!flags.testFlag(QRhi::SkipPresent)) {
|
||||
@ -1347,6 +1342,12 @@ QRhi::FrameOpResult QRhiD3D11::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi:
|
||||
ofr.cbWrapper.resetState();
|
||||
*cb = &ofr.cbWrapper;
|
||||
|
||||
if (ofr.timestamps.active[ofr.timestampIdx]) {
|
||||
double elapsedSec = 0;
|
||||
if (ofr.timestamps.tryQueryTimestamps(ofr.timestampIdx, context, &elapsedSec))
|
||||
ofr.cbWrapper.lastGpuTime = elapsedSec;
|
||||
}
|
||||
|
||||
return QRhi::FrameOpSuccess;
|
||||
}
|
||||
|
||||
@ -1355,10 +1356,27 @@ QRhi::FrameOpResult QRhiD3D11::endOffscreenFrame(QRhi::EndFrameFlags flags)
|
||||
Q_UNUSED(flags);
|
||||
ofr.active = false;
|
||||
|
||||
ID3D11Query *tsDisjoint = ofr.timestamps.disjointQuery[ofr.timestampIdx];
|
||||
ID3D11Query *tsStart = ofr.timestamps.query[ofr.timestampIdx * 2];
|
||||
ID3D11Query *tsEnd = ofr.timestamps.query[ofr.timestampIdx * 2 + 1];
|
||||
const bool recordTimestamps = tsDisjoint && tsStart && tsEnd && !ofr.timestamps.active[ofr.timestampIdx];
|
||||
if (recordTimestamps) {
|
||||
context->Begin(tsDisjoint);
|
||||
context->End(tsStart); // record timestamp; no Begin() for D3D11_QUERY_TIMESTAMP
|
||||
}
|
||||
|
||||
executeCommandBuffer(&ofr.cbWrapper);
|
||||
context->Flush();
|
||||
|
||||
finishActiveReadbacks();
|
||||
|
||||
if (recordTimestamps) {
|
||||
context->End(tsEnd);
|
||||
context->End(tsDisjoint);
|
||||
ofr.timestamps.active[ofr.timestampIdx] = true;
|
||||
ofr.timestampIdx = (ofr.timestampIdx + 1) % 2;
|
||||
}
|
||||
|
||||
return QRhi::FrameOpSuccess;
|
||||
}
|
||||
|
||||
@ -2607,10 +2625,10 @@ void QRhiD3D11::executeCommandBuffer(QD3D11CommandBuffer *cbD, QD3D11SwapChain *
|
||||
|
||||
if (timestampSwapChain) {
|
||||
const int currentFrameSlot = timestampSwapChain->currentFrameSlot;
|
||||
ID3D11Query *tsDisjoint = timestampSwapChain->timestampDisjointQuery[currentFrameSlot];
|
||||
ID3D11Query *tsDisjoint = timestampSwapChain->timestamps.disjointQuery[currentFrameSlot];
|
||||
const int tsIdx = QD3D11SwapChain::BUFFER_COUNT * currentFrameSlot;
|
||||
ID3D11Query *tsStart = timestampSwapChain->timestampQuery[tsIdx];
|
||||
if (tsDisjoint && tsStart && !timestampSwapChain->timestampActive[currentFrameSlot]) {
|
||||
ID3D11Query *tsStart = timestampSwapChain->timestamps.query[tsIdx];
|
||||
if (tsDisjoint && tsStart && !timestampSwapChain->timestamps.active[currentFrameSlot]) {
|
||||
// The timestamps seem to include vsync time with Present(1), except
|
||||
// when running on a non-primary gpu. This is not ideal. So try working
|
||||
// it around by issuing a semi-fake OMSetRenderTargets early and
|
||||
@ -4636,6 +4654,92 @@ void QD3D11CommandBuffer::destroy()
|
||||
// nothing to do here
|
||||
}
|
||||
|
||||
bool QD3D11Timestamps::prepare(int pairCount, QRhiD3D11 *rhiD)
|
||||
{
|
||||
// Creates the query objects if not yet done, but otherwise calling this
|
||||
// function is expected to be a no-op.
|
||||
|
||||
Q_ASSERT(pairCount <= MAX_TIMESTAMP_PAIRS);
|
||||
D3D11_QUERY_DESC queryDesc = {};
|
||||
for (int i = 0; i < pairCount; ++i) {
|
||||
if (!disjointQuery[i]) {
|
||||
queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
HRESULT hr = rhiD->dev->CreateQuery(&queryDesc, &disjointQuery[i]);
|
||||
if (FAILED(hr)) {
|
||||
qWarning("Failed to create timestamp disjoint query: %s",
|
||||
qPrintable(QSystemError::windowsComString(hr)));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
queryDesc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
for (int j = 0; j < 2; ++j) {
|
||||
const int idx = pairCount * i + j;
|
||||
if (!query[idx]) {
|
||||
HRESULT hr = rhiD->dev->CreateQuery(&queryDesc, &query[idx]);
|
||||
if (FAILED(hr)) {
|
||||
qWarning("Failed to create timestamp query: %s",
|
||||
qPrintable(QSystemError::windowsComString(hr)));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
this->pairCount = pairCount;
|
||||
return true;
|
||||
}
|
||||
|
||||
void QD3D11Timestamps::destroy()
|
||||
{
|
||||
for (int i = 0; i < MAX_TIMESTAMP_PAIRS; ++i) {
|
||||
active[i] = false;
|
||||
if (disjointQuery[i]) {
|
||||
disjointQuery[i]->Release();
|
||||
disjointQuery[i] = nullptr;
|
||||
}
|
||||
for (int j = 0; j < 2; ++j) {
|
||||
const int idx = MAX_TIMESTAMP_PAIRS * i + j;
|
||||
if (query[idx]) {
|
||||
query[idx]->Release();
|
||||
query[idx] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool QD3D11Timestamps::tryQueryTimestamps(int idx, ID3D11DeviceContext *context, double *elapsedSec)
|
||||
{
|
||||
bool result = false;
|
||||
if (!active[idx])
|
||||
return result;
|
||||
|
||||
ID3D11Query *tsDisjoint = disjointQuery[idx];
|
||||
const int tsIdx = pairCount * idx;
|
||||
ID3D11Query *tsStart = query[tsIdx];
|
||||
ID3D11Query *tsEnd = query[tsIdx + 1];
|
||||
quint64 timestamps[2];
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj;
|
||||
|
||||
bool ok = true;
|
||||
ok &= context->GetData(tsDisjoint, &dj, sizeof(dj), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
|
||||
ok &= context->GetData(tsEnd, ×tamps[1], sizeof(quint64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
|
||||
// this above is often not ready, not even in frame_where_recorded+2,
|
||||
// not clear why. so make the whole thing async and do not touch the
|
||||
// queries until they are finally all available in frame this+2 or
|
||||
// this+4 or ...
|
||||
ok &= context->GetData(tsStart, ×tamps[0], sizeof(quint64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
|
||||
|
||||
if (ok) {
|
||||
if (!dj.Disjoint && dj.Frequency) {
|
||||
const float elapsedMs = (timestamps[1] - timestamps[0]) / float(dj.Frequency) * 1000.0f;
|
||||
*elapsedSec = elapsedMs / 1000.0;
|
||||
result = true;
|
||||
}
|
||||
active[idx] = false;
|
||||
} // else leave active set, will retry in a subsequent beginFrame or similar
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
QD3D11SwapChain::QD3D11SwapChain(QRhiImplementation *rhi)
|
||||
: QRhiSwapChain(rhi),
|
||||
rt(rhi, this),
|
||||
@ -4646,10 +4750,6 @@ QD3D11SwapChain::QD3D11SwapChain(QRhiImplementation *rhi)
|
||||
for (int i = 0; i < BUFFER_COUNT; ++i) {
|
||||
msaaTex[i] = nullptr;
|
||||
msaaRtv[i] = nullptr;
|
||||
timestampActive[i] = false;
|
||||
timestampDisjointQuery[i] = nullptr;
|
||||
timestampQuery[2 * i] = nullptr;
|
||||
timestampQuery[2 * i + 1] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4687,19 +4787,7 @@ void QD3D11SwapChain::destroy()
|
||||
|
||||
releaseBuffers();
|
||||
|
||||
for (int i = 0; i < BUFFER_COUNT; ++i) {
|
||||
if (timestampDisjointQuery[i]) {
|
||||
timestampDisjointQuery[i]->Release();
|
||||
timestampDisjointQuery[i] = nullptr;
|
||||
}
|
||||
for (int j = 0; j < 2; ++j) {
|
||||
const int idx = BUFFER_COUNT * i + j;
|
||||
if (timestampQuery[idx]) {
|
||||
timestampQuery[idx]->Release();
|
||||
timestampQuery[idx] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
timestamps.destroy();
|
||||
|
||||
swapChain->Release();
|
||||
swapChain = nullptr;
|
||||
@ -5128,31 +5216,8 @@ bool QD3D11SwapChain::createOrResize()
|
||||
rtD->d.colorAttCount = 1;
|
||||
rtD->d.dsAttCount = m_depthStencil ? 1 : 0;
|
||||
|
||||
if (rhiD->hasGpuFrameTimeCallback()) {
|
||||
D3D11_QUERY_DESC queryDesc = {};
|
||||
for (int i = 0; i < BUFFER_COUNT; ++i) {
|
||||
if (!timestampDisjointQuery[i]) {
|
||||
queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
hr = rhiD->dev->CreateQuery(&queryDesc, ×tampDisjointQuery[i]);
|
||||
if (FAILED(hr)) {
|
||||
qWarning("Failed to create timestamp disjoint query: %s",
|
||||
qPrintable(QSystemError::windowsComString(hr)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
queryDesc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
for (int j = 0; j < 2; ++j) {
|
||||
const int idx = BUFFER_COUNT * i + j; // one pair per buffer (frame)
|
||||
if (!timestampQuery[idx]) {
|
||||
hr = rhiD->dev->CreateQuery(&queryDesc, ×tampQuery[idx]);
|
||||
if (FAILED(hr)) {
|
||||
qWarning("Failed to create timestamp query: %s",
|
||||
qPrintable(QSystemError::windowsComString(hr)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (rhiD->rhiFlags.testFlag(QRhi::EnableTimestamps)) {
|
||||
timestamps.prepare(BUFFER_COUNT, rhiD);
|
||||
// timestamp queries are optional so we can go on even if they failed
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,8 @@
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
class QRhiD3D11;
|
||||
|
||||
struct QD3D11Buffer : public QRhiBuffer
|
||||
{
|
||||
QD3D11Buffer(QRhiImplementation *rhi, Type type, UsageFlags usage, quint32 size);
|
||||
@ -496,6 +498,7 @@ struct QD3D11CommandBuffer : public QRhiCommandBuffer
|
||||
|
||||
QRhiBackendCommandList<Command> commands;
|
||||
PassType recordingPass;
|
||||
double lastGpuTime = 0;
|
||||
QRhiRenderTarget *currentTarget;
|
||||
QRhiGraphicsPipeline *currentGraphicsPipeline;
|
||||
QRhiComputePipeline *currentComputePipeline;
|
||||
@ -534,6 +537,7 @@ struct QD3D11CommandBuffer : public QRhiCommandBuffer
|
||||
}
|
||||
void resetState() {
|
||||
recordingPass = NoPass;
|
||||
// do not zero lastGpuTime
|
||||
currentTarget = nullptr;
|
||||
resetCommands();
|
||||
resetCachedState();
|
||||
@ -553,6 +557,21 @@ struct QD3D11CommandBuffer : public QRhiCommandBuffer
|
||||
}
|
||||
};
|
||||
|
||||
static const int QD3D11_SWAPCHAIN_BUFFER_COUNT = 2;
|
||||
|
||||
struct QD3D11Timestamps
|
||||
{
|
||||
static const int MAX_TIMESTAMP_PAIRS = QD3D11_SWAPCHAIN_BUFFER_COUNT;
|
||||
bool active[MAX_TIMESTAMP_PAIRS] = {};
|
||||
ID3D11Query *disjointQuery[MAX_TIMESTAMP_PAIRS] = {};
|
||||
ID3D11Query *query[MAX_TIMESTAMP_PAIRS * 2] = {};
|
||||
int pairCount = 0;
|
||||
|
||||
bool prepare(int pairCount, QRhiD3D11 *rhiD);
|
||||
void destroy();
|
||||
bool tryQueryTimestamps(int idx, ID3D11DeviceContext *context, double *elapsedSec);
|
||||
};
|
||||
|
||||
struct QD3D11SwapChain : public QRhiSwapChain
|
||||
{
|
||||
QD3D11SwapChain(QRhiImplementation *rhi);
|
||||
@ -581,21 +600,19 @@ struct QD3D11SwapChain : public QRhiSwapChain
|
||||
DXGI_FORMAT srgbAdjustedColorFormat;
|
||||
IDXGISwapChain *swapChain = nullptr;
|
||||
UINT swapChainFlags = 0;
|
||||
static const int BUFFER_COUNT = 2;
|
||||
ID3D11Texture2D *backBufferTex;
|
||||
ID3D11RenderTargetView *backBufferRtv;
|
||||
static const int BUFFER_COUNT = QD3D11_SWAPCHAIN_BUFFER_COUNT;
|
||||
ID3D11Texture2D *msaaTex[BUFFER_COUNT];
|
||||
ID3D11RenderTargetView *msaaRtv[BUFFER_COUNT];
|
||||
DXGI_SAMPLE_DESC sampleDesc;
|
||||
int currentFrameSlot = 0;
|
||||
int frameCount = 0;
|
||||
QD3D11RenderBuffer *ds = nullptr;
|
||||
bool timestampActive[BUFFER_COUNT];
|
||||
ID3D11Query *timestampDisjointQuery[BUFFER_COUNT];
|
||||
ID3D11Query *timestampQuery[BUFFER_COUNT * 2];
|
||||
UINT swapInterval = 1;
|
||||
IDCompositionTarget *dcompTarget = nullptr;
|
||||
IDCompositionVisual *dcompVisual = nullptr;
|
||||
QD3D11Timestamps timestamps;
|
||||
};
|
||||
|
||||
class QRhiD3D11 : public QRhiImplementation
|
||||
@ -689,6 +706,7 @@ public:
|
||||
const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override;
|
||||
void beginExternal(QRhiCommandBuffer *cb) override;
|
||||
void endExternal(QRhiCommandBuffer *cb) override;
|
||||
double lastCompletedGpuTime(QRhiCommandBuffer *cb) override;
|
||||
|
||||
QList<int> supportedSampleCounts() const override;
|
||||
int ubufAlignment() const override;
|
||||
@ -761,6 +779,8 @@ public:
|
||||
OffscreenFrame(QRhiImplementation *rhi) : cbWrapper(rhi) { }
|
||||
bool active = false;
|
||||
QD3D11CommandBuffer cbWrapper;
|
||||
QD3D11Timestamps timestamps;
|
||||
int timestampIdx = 0;
|
||||
} ofr;
|
||||
|
||||
struct TextureReadback {
|
||||
|
@ -1333,6 +1333,12 @@ void QRhiD3D12::endExternal(QRhiCommandBuffer *cb)
|
||||
}
|
||||
}
|
||||
|
||||
double QRhiD3D12::lastCompletedGpuTime(QRhiCommandBuffer *cb)
|
||||
{
|
||||
Q_UNUSED(cb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
|
||||
{
|
||||
Q_UNUSED(flags);
|
||||
|
@ -1088,6 +1088,7 @@ public:
|
||||
const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override;
|
||||
void beginExternal(QRhiCommandBuffer *cb) override;
|
||||
void endExternal(QRhiCommandBuffer *cb) override;
|
||||
double lastCompletedGpuTime(QRhiCommandBuffer *cb) override;
|
||||
|
||||
QList<int> supportedSampleCounts() const override;
|
||||
int ubufAlignment() const override;
|
||||
|
@ -1962,6 +1962,12 @@ void QRhiGles2::endExternal(QRhiCommandBuffer *cb)
|
||||
enqueueBindFramebuffer(cbD->currentTarget, cbD);
|
||||
}
|
||||
|
||||
double QRhiGles2::lastCompletedGpuTime(QRhiCommandBuffer *cb)
|
||||
{
|
||||
Q_UNUSED(cb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
QRhi::FrameOpResult QRhiGles2::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags)
|
||||
{
|
||||
QGles2SwapChain *swapChainD = QRHI_RES(QGles2SwapChain, swapChain);
|
||||
|
@ -806,6 +806,7 @@ public:
|
||||
const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override;
|
||||
void beginExternal(QRhiCommandBuffer *cb) override;
|
||||
void endExternal(QRhiCommandBuffer *cb) override;
|
||||
double lastCompletedGpuTime(QRhiCommandBuffer *cb) override;
|
||||
|
||||
QList<int> supportedSampleCounts() const override;
|
||||
int ubufAlignment() const override;
|
||||
|
@ -194,6 +194,7 @@ struct QRhiMetalData
|
||||
struct OffscreenFrame {
|
||||
OffscreenFrame(QRhiImplementation *rhi) : cbWrapper(rhi) { }
|
||||
bool active = false;
|
||||
double lastGpuTime = 0;
|
||||
QMetalCommandBuffer cbWrapper;
|
||||
} ofr;
|
||||
|
||||
@ -296,6 +297,7 @@ struct QMetalShaderResourceBindingsData {
|
||||
struct QMetalCommandBufferData
|
||||
{
|
||||
id<MTLCommandBuffer> cb;
|
||||
double lastGpuTime = 0;
|
||||
id<MTLRenderCommandEncoder> currentRenderPassEncoder;
|
||||
id<MTLComputeCommandEncoder> currentComputePassEncoder;
|
||||
id<MTLComputeCommandEncoder> tessellationComputeEncoder;
|
||||
@ -413,6 +415,7 @@ struct QMetalSwapChainData
|
||||
CAMetalLayer *layer = nullptr;
|
||||
id<CAMetalDrawable> curDrawable = nil;
|
||||
dispatch_semaphore_t sem[QMTL_FRAMES_IN_FLIGHT];
|
||||
double lastGpuTime[QMTL_FRAMES_IN_FLIGHT];
|
||||
MTLRenderPassDescriptor *rp = nullptr;
|
||||
id<MTLTexture> msaaTex[QMTL_FRAMES_IN_FLIGHT];
|
||||
QRhiTexture::Format rhiColorFormat;
|
||||
@ -727,7 +730,7 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const
|
||||
case QRhi::DebugMarkers:
|
||||
return true;
|
||||
case QRhi::Timestamps:
|
||||
return false;
|
||||
return true;
|
||||
case QRhi::Instancing:
|
||||
return true;
|
||||
case QRhi::CustomInstanceStepRate:
|
||||
@ -2237,6 +2240,12 @@ void QRhiMetal::endExternal(QRhiCommandBuffer *cb)
|
||||
cbD->resetPerPassCachedState();
|
||||
}
|
||||
|
||||
double QRhiMetal::lastCompletedGpuTime(QRhiCommandBuffer *cb)
|
||||
{
|
||||
QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
|
||||
return cbD->d->lastGpuTime;
|
||||
}
|
||||
|
||||
QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
|
||||
{
|
||||
Q_UNUSED(flags);
|
||||
@ -2285,7 +2294,8 @@ QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF
|
||||
swapChainD->ds->lastActiveFrameSlot = currentFrameSlot;
|
||||
|
||||
executeDeferredReleases();
|
||||
swapChainD->cbWrapper.resetState();
|
||||
swapChainD->cbWrapper.resetState(swapChainD->d->lastGpuTime[currentFrameSlot]);
|
||||
swapChainD->d->lastGpuTime[currentFrameSlot] = 0;
|
||||
finishActiveReadbacks();
|
||||
|
||||
return QRhi::FrameOpSuccess;
|
||||
@ -2297,7 +2307,8 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame
|
||||
Q_ASSERT(currentSwapChain == swapChainD);
|
||||
|
||||
__block int thisFrameSlot = currentFrameSlot;
|
||||
[swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer>) {
|
||||
[swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer> cb) {
|
||||
swapChainD->d->lastGpuTime[thisFrameSlot] += cb.GPUEndTime - cb.GPUStartTime;
|
||||
dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]);
|
||||
}];
|
||||
|
||||
@ -2350,7 +2361,8 @@ QRhi::FrameOpResult QRhiMetal::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi:
|
||||
d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
|
||||
|
||||
executeDeferredReleases();
|
||||
d->ofr.cbWrapper.resetState();
|
||||
d->ofr.cbWrapper.resetState(d->ofr.lastGpuTime);
|
||||
d->ofr.lastGpuTime = 0;
|
||||
finishActiveReadbacks();
|
||||
|
||||
return QRhi::FrameOpSuccess;
|
||||
@ -2362,10 +2374,13 @@ QRhi::FrameOpResult QRhiMetal::endOffscreenFrame(QRhi::EndFrameFlags flags)
|
||||
Q_ASSERT(d->ofr.active);
|
||||
d->ofr.active = false;
|
||||
|
||||
[d->ofr.cbWrapper.d->cb commit];
|
||||
id<MTLCommandBuffer> cb = d->ofr.cbWrapper.d->cb;
|
||||
[cb commit];
|
||||
|
||||
// offscreen frames wait for completion, unlike swapchain ones
|
||||
[d->ofr.cbWrapper.d->cb waitUntilCompleted];
|
||||
[cb waitUntilCompleted];
|
||||
|
||||
d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime;
|
||||
|
||||
finishActiveReadbacks(true);
|
||||
|
||||
@ -2406,10 +2421,13 @@ QRhi::FrameOpResult QRhiMetal::finish()
|
||||
}
|
||||
|
||||
if (inFrame) {
|
||||
if (d->ofr.active)
|
||||
if (d->ofr.active) {
|
||||
d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime;
|
||||
d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
|
||||
else
|
||||
} else {
|
||||
swapChainD->d->lastGpuTime[currentFrameSlot] += cb.GPUEndTime - cb.GPUStartTime;
|
||||
swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
|
||||
}
|
||||
}
|
||||
|
||||
executeDeferredReleases(true);
|
||||
@ -5914,8 +5932,9 @@ const QRhiNativeHandles *QMetalCommandBuffer::nativeHandles()
|
||||
return &nativeHandlesStruct;
|
||||
}
|
||||
|
||||
void QMetalCommandBuffer::resetState()
|
||||
void QMetalCommandBuffer::resetState(double lastGpuTime)
|
||||
{
|
||||
d->lastGpuTime = lastGpuTime;
|
||||
d->currentRenderPassEncoder = nil;
|
||||
d->currentComputePassEncoder = nil;
|
||||
d->tessellationComputeEncoder = nil;
|
||||
@ -6222,6 +6241,7 @@ bool QMetalSwapChain::createOrResize()
|
||||
d->curDrawable = nil;
|
||||
|
||||
for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
|
||||
d->lastGpuTime[i] = 0;
|
||||
if (!d->sem[i])
|
||||
d->sem[i] = dispatch_semaphore_create(QMTL_FRAMES_IN_FLIGHT - 1);
|
||||
}
|
||||
|
@ -283,7 +283,7 @@ struct QMetalCommandBuffer : public QRhiCommandBuffer
|
||||
QPair<float, float> currentDepthBiasValues;
|
||||
|
||||
const QRhiNativeHandles *nativeHandles();
|
||||
void resetState();
|
||||
void resetState(double lastGpuTime = 0);
|
||||
void resetPerPassState();
|
||||
void resetPerPassCachedState();
|
||||
};
|
||||
@ -418,6 +418,7 @@ public:
|
||||
const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override;
|
||||
void beginExternal(QRhiCommandBuffer *cb) override;
|
||||
void endExternal(QRhiCommandBuffer *cb) override;
|
||||
double lastCompletedGpuTime(QRhiCommandBuffer *cb) override;
|
||||
|
||||
QList<int> supportedSampleCounts() const override;
|
||||
int ubufAlignment() const override;
|
||||
|
@ -344,6 +344,12 @@ void QRhiNull::endExternal(QRhiCommandBuffer *cb)
|
||||
Q_UNUSED(cb);
|
||||
}
|
||||
|
||||
double QRhiNull::lastCompletedGpuTime(QRhiCommandBuffer *cb)
|
||||
{
|
||||
Q_UNUSED(cb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
QRhi::FrameOpResult QRhiNull::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
|
||||
{
|
||||
Q_UNUSED(flags);
|
||||
|
@ -261,6 +261,7 @@ public:
|
||||
const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override;
|
||||
void beginExternal(QRhiCommandBuffer *cb) override;
|
||||
void endExternal(QRhiCommandBuffer *cb) override;
|
||||
double lastCompletedGpuTime(QRhiCommandBuffer *cb) override;
|
||||
|
||||
QList<int> supportedSampleCounts() const override;
|
||||
int ubufAlignment() const override;
|
||||
|
@ -1671,6 +1671,24 @@ void QRhiVulkan::ensureCommandPoolForNewFrame()
|
||||
df->vkResetCommandPool(dev, cmdPool[currentFrameSlot], flags);
|
||||
}
|
||||
|
||||
double QRhiVulkan::elapsedSecondsFromTimestamp(quint64 timestamp[2], bool *ok)
|
||||
{
|
||||
quint64 mask = 0;
|
||||
for (quint64 i = 0; i < timestampValidBits; i += 8)
|
||||
mask |= 0xFFULL << i;
|
||||
const quint64 ts0 = timestamp[0] & mask;
|
||||
const quint64 ts1 = timestamp[1] & mask;
|
||||
const float nsecsPerTick = physDevProperties.limits.timestampPeriod;
|
||||
if (!qFuzzyIsNull(nsecsPerTick)) {
|
||||
const float elapsedMs = float(ts1 - ts0) * nsecsPerTick / 1000000.0f;
|
||||
const double elapsedSec = elapsedMs / 1000.0;
|
||||
*ok = true;
|
||||
return elapsedSec;
|
||||
}
|
||||
*ok = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags)
|
||||
{
|
||||
QVkSwapChain *swapChainD = QRHI_RES(QVkSwapChain, swapChain);
|
||||
@ -1720,30 +1738,6 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin
|
||||
// mess up A's in-flight commands (as they are not in flight anymore).
|
||||
waitCommandCompletion(frameResIndex);
|
||||
|
||||
// Now is the time to read the timestamps for the previous frame for this slot.
|
||||
if (frame.timestampQueryIndex >= 0) {
|
||||
quint64 timestamp[2] = { 0, 0 };
|
||||
VkResult err = df->vkGetQueryPoolResults(dev, timestampQueryPool, uint32_t(frame.timestampQueryIndex), 2,
|
||||
2 * sizeof(quint64), timestamp, sizeof(quint64),
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
timestampQueryPoolMap.clearBit(frame.timestampQueryIndex / 2);
|
||||
frame.timestampQueryIndex = -1;
|
||||
if (err == VK_SUCCESS) {
|
||||
quint64 mask = 0;
|
||||
for (quint64 i = 0; i < timestampValidBits; i += 8)
|
||||
mask |= 0xFFULL << i;
|
||||
const quint64 ts0 = timestamp[0] & mask;
|
||||
const quint64 ts1 = timestamp[1] & mask;
|
||||
const float nsecsPerTick = physDevProperties.limits.timestampPeriod;
|
||||
if (!qFuzzyIsNull(nsecsPerTick)) {
|
||||
const float elapsedMs = float(ts1 - ts0) * nsecsPerTick / 1000000.0f;
|
||||
runGpuFrameTimeCallbacks(elapsedMs);
|
||||
}
|
||||
} else {
|
||||
qWarning("Failed to query timestamp: %d", err);
|
||||
}
|
||||
}
|
||||
|
||||
currentFrameSlot = int(swapChainD->currentFrameSlot);
|
||||
currentSwapChain = swapChainD;
|
||||
if (swapChainD->ds)
|
||||
@ -1757,9 +1751,34 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin
|
||||
if (cbres != QRhi::FrameOpSuccess)
|
||||
return cbres;
|
||||
|
||||
// when profiling is enabled, pick a free query (pair) from the pool
|
||||
int timestampQueryIdx = -1;
|
||||
if (hasGpuFrameTimeCallback() && swapChainD->bufferCount > 1) { // no timestamps if not having at least 2 frames in flight
|
||||
swapChainD->cbWrapper.cb = frame.cmdBuf;
|
||||
|
||||
QVkSwapChain::ImageResources &image(swapChainD->imageRes[swapChainD->currentImageIndex]);
|
||||
swapChainD->rtWrapper.d.fb = image.fb;
|
||||
|
||||
prepareNewFrame(&swapChainD->cbWrapper);
|
||||
|
||||
// Read the timestamps for the previous frame for this slot.
|
||||
if (frame.timestampQueryIndex >= 0) {
|
||||
quint64 timestamp[2] = { 0, 0 };
|
||||
VkResult err = df->vkGetQueryPoolResults(dev, timestampQueryPool, uint32_t(frame.timestampQueryIndex), 2,
|
||||
2 * sizeof(quint64), timestamp, sizeof(quint64),
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
timestampQueryPoolMap.clearBit(frame.timestampQueryIndex / 2);
|
||||
frame.timestampQueryIndex = -1;
|
||||
if (err == VK_SUCCESS) {
|
||||
bool ok = false;
|
||||
const double elapsedSec = elapsedSecondsFromTimestamp(timestamp, &ok);
|
||||
if (ok)
|
||||
swapChainD->cbWrapper.lastGpuTime = elapsedSec;
|
||||
} else {
|
||||
qWarning("Failed to query timestamp: %d", err);
|
||||
}
|
||||
}
|
||||
|
||||
// No timestamps if the client did not opt in, or when not having at least 2 frames in flight.
|
||||
if (rhiFlags.testFlag(QRhi::EnableTimestamps) && swapChainD->bufferCount > 1) {
|
||||
int timestampQueryIdx = -1;
|
||||
for (int i = 0; i < timestampQueryPoolMap.size(); ++i) {
|
||||
if (!timestampQueryPoolMap.testBit(i)) {
|
||||
timestampQueryPoolMap.setBit(i);
|
||||
@ -1767,21 +1786,14 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (timestampQueryIdx >= 0) {
|
||||
df->vkCmdResetQueryPool(frame.cmdBuf, timestampQueryPool, uint32_t(timestampQueryIdx), 2);
|
||||
// record timestamp at the start of the command buffer
|
||||
df->vkCmdWriteTimestamp(frame.cmdBuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
timestampQueryPool, uint32_t(timestampQueryIdx));
|
||||
frame.timestampQueryIndex = timestampQueryIdx;
|
||||
}
|
||||
}
|
||||
if (timestampQueryIdx >= 0) {
|
||||
df->vkCmdResetQueryPool(frame.cmdBuf, timestampQueryPool, uint32_t(timestampQueryIdx), 2);
|
||||
// record timestamp at the start of the command buffer
|
||||
df->vkCmdWriteTimestamp(frame.cmdBuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
timestampQueryPool, uint32_t(timestampQueryIdx));
|
||||
frame.timestampQueryIndex = timestampQueryIdx;
|
||||
}
|
||||
|
||||
swapChainD->cbWrapper.cb = frame.cmdBuf;
|
||||
|
||||
QVkSwapChain::ImageResources &image(swapChainD->imageRes[swapChainD->currentImageIndex]);
|
||||
swapChainD->rtWrapper.d.fb = image.fb;
|
||||
|
||||
prepareNewFrame(&swapChainD->cbWrapper);
|
||||
|
||||
return QRhi::FrameOpSuccess;
|
||||
}
|
||||
@ -2031,6 +2043,24 @@ QRhi::FrameOpResult QRhiVulkan::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi
|
||||
prepareNewFrame(cbWrapper);
|
||||
ofr.active = true;
|
||||
|
||||
if (rhiFlags.testFlag(QRhi::EnableTimestamps)) {
|
||||
int timestampQueryIdx = -1;
|
||||
for (int i = 0; i < timestampQueryPoolMap.size(); ++i) {
|
||||
if (!timestampQueryPoolMap.testBit(i)) {
|
||||
timestampQueryPoolMap.setBit(i);
|
||||
timestampQueryIdx = i * 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (timestampQueryIdx >= 0) {
|
||||
df->vkCmdResetQueryPool(cbWrapper->cb, timestampQueryPool, uint32_t(timestampQueryIdx), 2);
|
||||
// record timestamp at the start of the command buffer
|
||||
df->vkCmdWriteTimestamp(cbWrapper->cb, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
timestampQueryPool, uint32_t(timestampQueryIdx));
|
||||
ofr.timestampQueryIndex = timestampQueryIdx;
|
||||
}
|
||||
}
|
||||
|
||||
*cb = cbWrapper;
|
||||
return QRhi::FrameOpSuccess;
|
||||
}
|
||||
@ -2044,6 +2074,12 @@ QRhi::FrameOpResult QRhiVulkan::endOffscreenFrame(QRhi::EndFrameFlags flags)
|
||||
QVkCommandBuffer *cbWrapper(ofr.cbWrapper[currentFrameSlot]);
|
||||
recordPrimaryCommandBuffer(cbWrapper);
|
||||
|
||||
// record another timestamp, when enabled
|
||||
if (ofr.timestampQueryIndex >= 0) {
|
||||
df->vkCmdWriteTimestamp(cbWrapper->cb, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
timestampQueryPool, uint32_t(ofr.timestampQueryIndex + 1));
|
||||
}
|
||||
|
||||
if (!ofr.cmdFence) {
|
||||
VkFenceCreateInfo fenceInfo = {};
|
||||
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
@ -2066,6 +2102,24 @@ QRhi::FrameOpResult QRhiVulkan::endOffscreenFrame(QRhi::EndFrameFlags flags)
|
||||
// previous) frame is safe since we waited for completion above.
|
||||
finishActiveReadbacks(true);
|
||||
|
||||
// Read the timestamps, if we wrote them.
|
||||
if (ofr.timestampQueryIndex >= 0) {
|
||||
quint64 timestamp[2] = { 0, 0 };
|
||||
VkResult err = df->vkGetQueryPoolResults(dev, timestampQueryPool, uint32_t(ofr.timestampQueryIndex), 2,
|
||||
2 * sizeof(quint64), timestamp, sizeof(quint64),
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
timestampQueryPoolMap.clearBit(ofr.timestampQueryIndex / 2);
|
||||
ofr.timestampQueryIndex = -1;
|
||||
if (err == VK_SUCCESS) {
|
||||
bool ok = false;
|
||||
const double elapsedSec = elapsedSecondsFromTimestamp(timestamp, &ok);
|
||||
if (ok)
|
||||
cbWrapper->lastGpuTime = elapsedSec;
|
||||
} else {
|
||||
qWarning("Failed to query timestamp: %d", err);
|
||||
}
|
||||
}
|
||||
|
||||
return QRhi::FrameOpSuccess;
|
||||
}
|
||||
|
||||
@ -5153,6 +5207,12 @@ void QRhiVulkan::endExternal(QRhiCommandBuffer *cb)
|
||||
cbD->resetCachedState();
|
||||
}
|
||||
|
||||
double QRhiVulkan::lastCompletedGpuTime(QRhiCommandBuffer *cb)
|
||||
{
|
||||
QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb);
|
||||
return cbD->lastGpuTime;
|
||||
}
|
||||
|
||||
void QRhiVulkan::setObjectName(uint64_t object, VkObjectType type, const QByteArray &name, int slot)
|
||||
{
|
||||
#ifdef VK_EXT_debug_utils
|
||||
|
@ -320,6 +320,7 @@ struct QVkCommandBuffer : public QRhiCommandBuffer
|
||||
void resetState() {
|
||||
recordingPass = NoPass;
|
||||
passUsesSecondaryCb = false;
|
||||
lastGpuTime = 0;
|
||||
currentTarget = nullptr;
|
||||
activeSecondaryCbStack.clear();
|
||||
resetCommands();
|
||||
@ -344,6 +345,7 @@ struct QVkCommandBuffer : public QRhiCommandBuffer
|
||||
|
||||
PassType recordingPass;
|
||||
bool passUsesSecondaryCb;
|
||||
double lastGpuTime = 0;
|
||||
QRhiRenderTarget *currentTarget;
|
||||
QRhiGraphicsPipeline *currentGraphicsPipeline;
|
||||
QRhiComputePipeline *currentComputePipeline;
|
||||
@ -722,6 +724,7 @@ public:
|
||||
const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override;
|
||||
void beginExternal(QRhiCommandBuffer *cb) override;
|
||||
void endExternal(QRhiCommandBuffer *cb) override;
|
||||
double lastCompletedGpuTime(QRhiCommandBuffer *cb) override;
|
||||
|
||||
QList<int> supportedSampleCounts() const override;
|
||||
int ubufAlignment() const override;
|
||||
@ -813,6 +816,7 @@ public:
|
||||
int startLevel, int levelCount);
|
||||
void updateShaderResourceBindings(QRhiShaderResourceBindings *srb, int descSetIdx = -1);
|
||||
void ensureCommandPoolForNewFrame();
|
||||
double elapsedSecondsFromTimestamp(quint64 timestamp[2], bool *ok);
|
||||
|
||||
QVulkanInstance *inst = nullptr;
|
||||
QWindow *maybeWindow = nullptr;
|
||||
@ -903,6 +907,7 @@ public:
|
||||
bool active = false;
|
||||
QVkCommandBuffer *cbWrapper[QVK_FRAMES_IN_FLIGHT];
|
||||
VkFence cmdFence = VK_NULL_HANDLE;
|
||||
int timestampQueryIndex = -1;
|
||||
} ofr;
|
||||
|
||||
struct TextureReadback {
|
||||
|
@ -126,10 +126,11 @@ int main(int argc, char **argv)
|
||||
qDebug("This is a multi-api example, use command line arguments to override:\n%s", qPrintable(cmdLineParser.helpText()));
|
||||
|
||||
QRhi *r = nullptr;
|
||||
QRhi::Flags rhiFlags = QRhi::EnableTimestamps;
|
||||
|
||||
if (graphicsApi == Null) {
|
||||
QRhiNullInitParams params;
|
||||
r = QRhi::create(QRhi::Null, ¶ms);
|
||||
r = QRhi::create(QRhi::Null, ¶ms, rhiFlags);
|
||||
}
|
||||
|
||||
#if QT_CONFIG(vulkan)
|
||||
@ -141,7 +142,7 @@ int main(int argc, char **argv)
|
||||
if (inst.create()) {
|
||||
QRhiVulkanInitParams params;
|
||||
params.inst = &inst;
|
||||
r = QRhi::create(QRhi::Vulkan, ¶ms);
|
||||
r = QRhi::create(QRhi::Vulkan, ¶ms, rhiFlags);
|
||||
} else {
|
||||
qWarning("Failed to create Vulkan instance, switching to OpenGL");
|
||||
graphicsApi = OpenGL;
|
||||
@ -155,7 +156,7 @@ int main(int argc, char **argv)
|
||||
offscreenSurface.reset(QRhiGles2InitParams::newFallbackSurface());
|
||||
QRhiGles2InitParams params;
|
||||
params.fallbackSurface = offscreenSurface.data();
|
||||
r = QRhi::create(QRhi::OpenGLES2, ¶ms);
|
||||
r = QRhi::create(QRhi::OpenGLES2, ¶ms, rhiFlags);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -163,18 +164,18 @@ int main(int argc, char **argv)
|
||||
if (graphicsApi == D3D11) {
|
||||
QRhiD3D11InitParams params;
|
||||
params.enableDebugLayer = true;
|
||||
r = QRhi::create(QRhi::D3D11, ¶ms);
|
||||
r = QRhi::create(QRhi::D3D11, ¶ms, rhiFlags);
|
||||
} else if (graphicsApi == D3D12) {
|
||||
QRhiD3D12InitParams params;
|
||||
params.enableDebugLayer = true;
|
||||
r = QRhi::create(QRhi::D3D12, ¶ms);
|
||||
r = QRhi::create(QRhi::D3D12, ¶ms, rhiFlags);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(Q_OS_MACOS) || defined(Q_OS_IOS)
|
||||
if (graphicsApi == Metal) {
|
||||
QRhiMetalInitParams params;
|
||||
r = QRhi::create(QRhi::Metal, ¶ms);
|
||||
r = QRhi::create(QRhi::Metal, ¶ms, rhiFlags);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -301,6 +302,8 @@ int main(int argc, char **argv)
|
||||
#ifdef TEST_FINISH
|
||||
r->endOffscreenFrame();
|
||||
#endif
|
||||
if (r->isFeatureSupported(QRhi::Timestamps))
|
||||
qDebug() << "GPU time:" << cb->lastCompletedGpuTime() << "seconds (may refer to a previous frame)";
|
||||
}
|
||||
|
||||
delete ps;
|
||||
|
@ -92,7 +92,7 @@ QString graphicsApiName()
|
||||
return QString();
|
||||
}
|
||||
|
||||
QRhi::Flags rhiFlags = QRhi::EnableDebugMarkers;
|
||||
QRhi::Flags rhiFlags = QRhi::EnableDebugMarkers | QRhi::EnableTimestamps;
|
||||
int sampleCount = 1;
|
||||
QRhiSwapChain::Flags scFlags;
|
||||
QRhi::BeginFrameFlags beginFrameFlags;
|
||||
|
@ -35,7 +35,6 @@ struct {
|
||||
QSize lastOutputSize;
|
||||
int frameCount = 0;
|
||||
QFile profOut;
|
||||
QVarLengthArray<float, 64> gpuFrameTimes;
|
||||
QElapsedTimer gpuFrameTimePrintTimer;
|
||||
} d;
|
||||
|
||||
@ -136,20 +135,8 @@ void Window::customInit()
|
||||
// With Vulkan at least we should see some details from the memory allocator.
|
||||
qDebug() << m_r->statistics();
|
||||
|
||||
// Every two seconds try printing an average of the gpu frame times.
|
||||
// Every two seconds try printing last known gpu frame time.
|
||||
d.gpuFrameTimePrintTimer.start();
|
||||
m_r->addGpuFrameTimeCallback([](float elapsedMs) {
|
||||
d.gpuFrameTimes.append(elapsedMs);
|
||||
if (d.gpuFrameTimePrintTimer.elapsed() > 2000) {
|
||||
float at = 0.0f;
|
||||
for (float t : d.gpuFrameTimes)
|
||||
at += t;
|
||||
at /= d.gpuFrameTimes.count();
|
||||
qDebug() << "Average GPU frame time" << at;
|
||||
d.gpuFrameTimes.clear();
|
||||
d.gpuFrameTimePrintTimer.restart();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void Window::customRelease()
|
||||
@ -170,6 +157,11 @@ void Window::customRender()
|
||||
const QSize outputSize = m_sc->currentPixelSize();
|
||||
QRhiCommandBuffer *cb = m_sc->currentFrameCommandBuffer();
|
||||
|
||||
if (d.gpuFrameTimePrintTimer.elapsed() > 2000) {
|
||||
qDebug() << "Last completed GPU frame time" << cb->lastCompletedGpuTime() << "seconds";
|
||||
d.gpuFrameTimePrintTimer.restart();
|
||||
}
|
||||
|
||||
if (outputSize != d.lastOutputSize) {
|
||||
d.triRenderer.resize(outputSize);
|
||||
if (!d.triangleOnly) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user