From 704d4597a57465077ff077f7edc3bb18d2391f5d Mon Sep 17 00:00:00 2001 From: Laszlo Agocs Date: Mon, 4 Sep 2023 14:37:17 +0200 Subject: [PATCH] d3d12: Avoid full-size staging for partial texture updates Running the BenchmarkDemoQt6 application features a massive startup time, followed by running out of memory (GPU-side, assuming a discreet adapter), which is then remedied by the system with heavy swapping. This is due to creating 8+ GB of staging data. While there are doubts about the application itself (seems there is an upload for each Quick 3D Texture per material per model, even though the texture itself is the same?), there is no reason for the D3D12 backend to use so much more memory compared with other backends. That is now ensured by only creating a staging area for the data (taking the 256-aligned row pitch requirement into account), not simply using the subresource (i.e. the whole image) size. This is equivalent to what e.g. the Vulkan backend does. Fixes: QTBUG-116729 Change-Id: I1b5083c6b859ec0736cd9fd792cba8236aeba6e3 Reviewed-by: Andy Nichols (cherry picked from commit d5eed2c5f54da60be9bc5966a6be7c18dc77011f) Reviewed-by: Qt Cherry-pick Bot --- src/gui/rhi/qrhid3d12.cpp | 85 +++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/src/gui/rhi/qrhid3d12.cpp b/src/gui/rhi/qrhid3d12.cpp index 8e11e2dfdd8..38e4fbd2d63 100644 --- a/src/gui/rhi/qrhid3d12.cpp +++ b/src/gui/rhi/qrhid3d12.cpp @@ -3073,18 +3073,42 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd for (int layer = 0, maxLayer = u.subresDesc.size(); layer < maxLayer; ++layer) { for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) { for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level])) { - const UINT subresource = calcSubresource(UINT(level), is3D ? 0u : UINT(layer), texD->mipLevelCount); - D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; - UINT64 totalBytes = 0; - D3D12_RESOURCE_DESC desc = res->desc; - if (is3D) { - desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - desc.DepthOrArraySize = 1; - } - dev->GetCopyableFootprints(&desc, subresource, 1, 0, - &layout, nullptr, nullptr, &totalBytes); + D3D12_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Format = res->desc.Format; + footprint.Depth = 1; + quint32 totalBytes = 0; - const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(quint32(totalBytes), 1); + const QSize subresSize = subresDesc.sourceSize().isEmpty() ? q->sizeForMipLevel(level, texD->m_pixelSize) + : subresDesc.sourceSize(); + const QPoint srcPos = subresDesc.sourceTopLeft(); + QPoint dstPos = subresDesc.destinationTopLeft(); + + if (!subresDesc.image().isNull()) { + const QImage img = subresDesc.image(); + const int bpl = img.bytesPerLine(); + footprint.RowPitch = aligned(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + totalBytes = footprint.RowPitch * img.height(); + } else if (!subresDesc.data().isEmpty() && isCompressedFormat(texD->m_format)) { + QSize blockDim; + quint32 bpl = 0; + compressedFormatInfo(texD->m_format, subresSize, &bpl, nullptr, &blockDim); + footprint.RowPitch = aligned(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const int rowCount = aligned(subresSize.height(), blockDim.height()) / blockDim.height(); + totalBytes = footprint.RowPitch * rowCount; + } else if (!subresDesc.data().isEmpty()) { + quint32 bpl = 0; + if (subresDesc.dataStride()) + bpl = subresDesc.dataStride(); + else + textureFormatInfo(texD->m_format, subresSize, &bpl, nullptr, nullptr); + footprint.RowPitch = aligned(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + totalBytes = footprint.RowPitch * subresSize.height(); + } else { + qWarning("Invalid texture upload for %p layer=%d mip=%d", texD, layer, level); + continue; + } + + const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(totalBytes, 1); QD3D12StagingArea::Allocation stagingAlloc; if (smallStagingAreas[currentFrameSlot].remainingCapacity() >= allocSize) stagingAlloc = smallStagingAreas[currentFrameSlot].get(allocSize); @@ -3101,32 +3125,29 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd } } - const UINT requiredBytesPerLine = layout.Footprint.RowPitch; // multiple of 256 - const QSize subresSize = subresDesc.sourceSize().isEmpty() ? q->sizeForMipLevel(level, texD->m_pixelSize) - : subresDesc.sourceSize(); - const QPoint srcPos = subresDesc.sourceTopLeft(); - QPoint dstPos = subresDesc.destinationTopLeft(); - D3D12_TEXTURE_COPY_LOCATION dst; dst.pResource = res->resource; dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.SubresourceIndex = subresource; + dst.SubresourceIndex = calcSubresource(UINT(level), is3D ? 0u : UINT(layer), texD->mipLevelCount); D3D12_TEXTURE_COPY_LOCATION src; src.pResource = stagingAlloc.buffer; src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; src.PlacedFootprint.Offset = stagingAlloc.bufferOffset; - src.PlacedFootprint.Footprint = layout.Footprint; D3D12_BOX srcBox; // back, right, bottom are exclusive if (!subresDesc.image().isNull()) { - QImage img = subresDesc.image(); + const QImage img = subresDesc.image(); const int bpc = qMax(1, img.depth() / 8); const int bpl = img.bytesPerLine(); QSize size = subresDesc.sourceSize().isEmpty() ? img.size() : subresDesc.sourceSize(); size.setWidth(qMin(size.width(), img.width() - srcPos.x())); size.setHeight(qMin(size.height(), img.height() - srcPos.y())); + + footprint.Width = size.width(); + footprint.Height = size.height(); + srcBox.left = 0; srcBox.top = 0; srcBox.right = UINT(size.width()); @@ -3137,7 +3158,7 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd const uchar *imgPtr = img.constBits(); const quint32 lineBytes = size.width() * bpc; for (int y = 0, h = size.height(); y < h; ++y) { - memcpy(stagingAlloc.p + y * requiredBytesPerLine, + memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + srcPos.x() * bpc + (y + srcPos.y()) * bpl, lineBytes); } @@ -3154,15 +3175,19 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd // width and height must be multiples of the block width and height srcBox.right = aligned(subresSize.width(), blockDim.width()); srcBox.bottom = aligned(subresSize.height(), blockDim.height()); + srcBox.front = 0; srcBox.back = 1; - const quint32 copyBytes = qMin(bpl, requiredBytesPerLine); + footprint.Width = aligned(subresSize.width(), blockDim.width()); + footprint.Height = aligned(subresSize.height(), blockDim.height()); + + const quint32 copyBytes = qMin(bpl, footprint.RowPitch); const QByteArray imgData = subresDesc.data(); const char *imgPtr = imgData.constData(); const int rowCount = aligned(subresSize.height(), blockDim.height()) / blockDim.height(); for (int y = 0; y < rowCount; ++y) - memcpy(stagingAlloc.p + y * requiredBytesPerLine, imgPtr + y * bpl, copyBytes); + memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + y * bpl, copyBytes); } else if (!subresDesc.data().isEmpty()) { srcBox.left = 0; srcBox.top = 0; @@ -3171,24 +3196,24 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd srcBox.front = 0; srcBox.back = 1; + footprint.Width = subresSize.width(); + footprint.Height = subresSize.height(); + quint32 bpl = 0; if (subresDesc.dataStride()) bpl = subresDesc.dataStride(); else textureFormatInfo(texD->m_format, subresSize, &bpl, nullptr, nullptr); - const quint32 copyBytes = qMin(bpl, requiredBytesPerLine); + const quint32 copyBytes = qMin(bpl, footprint.RowPitch); const QByteArray data = subresDesc.data(); const char *imgPtr = data.constData(); for (int y = 0, h = subresSize.height(); y < h; ++y) - memcpy(stagingAlloc.p + y * requiredBytesPerLine, imgPtr + y * bpl, copyBytes); - } else { - qWarning("Invalid texture upload for %p layer=%d mip=%d", texD, layer, level); - if (ownStagingArea.has_value()) - ownStagingArea->destroyWithDeferredRelease(&releaseQueue); - continue; + memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + y * bpl, copyBytes); } + src.PlacedFootprint.Footprint = footprint; + cbD->cmdList->CopyTextureRegion(&dst, UINT(dstPos.x()), UINT(dstPos.y()),