Skip to content

Commit

Permalink
feat: staging resource renaming optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
3Shain committed Jan 26, 2025
1 parent 4abd7b6 commit cb8c76c
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 45 deletions.
47 changes: 22 additions & 25 deletions src/d3d11/d3d11_context_imm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "dxmt_command_queue.hpp"
#include "d3d11_context_impl.cpp"
#include "dxmt_context.hpp"
#include "dxmt_staging.hpp"

namespace dxmt {
struct ContextInternalState {
Expand Down Expand Up @@ -81,6 +82,8 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase {
D3D11_MAPPED_SUBRESOURCE *pMappedResource) override {
UINT buffer_length = 0, &row_pitch = buffer_length;
UINT bind_flag = 0, &depth_pitch = bind_flag;
auto current_seq_id = cmd_queue.CurrentSeqId();
auto coherent_seq_id = cmd_queue.CoherentSeqId();
if (auto dynamic = GetDynamicBuffer(pResource, &buffer_length, &bind_flag)) {
if (!pMappedResource)
return E_INVALIDARG;
Expand All @@ -104,9 +107,7 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase {
}
}

dynamic->updateImmediateName(
ctx_state.cmd_queue.CurrentSeqId(),
dynamic->allocate(ctx_state.cmd_queue.CoherentSeqId()), false);
dynamic->updateImmediateName(current_seq_id, dynamic->allocate(coherent_seq_id), false);
Emit([allocation = dynamic->immediateName(),
buffer = Rc(dynamic->buffer)](ArgumentEncodingContext &enc) mutable {
auto _ = buffer->rename(forward_rc(allocation));
Expand Down Expand Up @@ -139,9 +140,7 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase {
stage.SRVs.set_dirty();
}

dynamic->updateImmediateName(
ctx_state.cmd_queue.CurrentSeqId(),
dynamic->allocate(ctx_state.cmd_queue.CoherentSeqId()), false);
dynamic->updateImmediateName(current_seq_id, dynamic->allocate(coherent_seq_id), false);
Emit([allocation = dynamic->immediateName(),
texture = Rc(dynamic->texture)](ArgumentEncodingContext &enc) mutable {
auto _ = texture->rename(forward_rc(allocation));
Expand All @@ -162,28 +161,24 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase {
return S_OK;
}
if (auto staging = GetStagingResource(pResource, Subresource)) {
if (MapType > 3 || MapType == 0)
return E_INVALIDARG;
while (true) {
auto coh = cmd_queue.CoherentSeqId();
int64_t ret = -1;
switch (MapType) {
case D3D11_MAP_READ:
ret = staging->tryMap(coh, true, false);
break;
case D3D11_MAP_READ_WRITE:
ret = staging->tryMap(coh, false, true);
break;
case D3D11_MAP_WRITE:
ret = staging->tryMap(coh, true, true);
break;
default:
break;
}
if (ret < 0) {
auto result = staging->tryMap(coherent_seq_id, MapType & D3D11_MAP_READ, MapType & D3D11_MAP_WRITE);
if (result == StagingMapResult::Mapped)
return E_FAIL;
if (result == StagingMapResult::Renamable) {
// when write to a buffer that is gpu-readonly
Obj<MTL::Buffer> new_name = staging->allocate(coherent_seq_id);
Emit([staging, new_name](ArgumentEncodingContext &enc) mutable { staging->current = std::move(new_name); });
// can't guarantee a full overwrite
std::memcpy(new_name->contents(), staging->mappedMemory(), new_name->length());
staging->updateImmediateName(current_seq_id, std::move(new_name));
result = StagingMapResult::Mappable;
}
if (ret == 0) {
if (result == StagingMapResult::Mappable) {
TRACE("staging map ready");
pMappedResource->pData = staging->current->contents();
pMappedResource->pData = staging->mappedMemory();
pMappedResource->RowPitch = staging->bytesPerRow;
pMappedResource->DepthPitch = staging->bytesPerImage;
return S_OK;
Expand All @@ -196,7 +191,9 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase {
// and the following calls are essentially no-op
Flush();
TRACE("staging map block");
cmd_queue.FIXME_YieldUntilCoherenceBoundaryUpdate(coh);
cmd_queue.FIXME_YieldUntilCoherenceBoundaryUpdate(coherent_seq_id);
current_seq_id = cmd_queue.CurrentSeqId();
coherent_seq_id = cmd_queue.CoherentSeqId();
};
};
UNIMPLEMENTED("unknown mapped resource (USAGE_DEFAULT?)");
Expand Down
21 changes: 10 additions & 11 deletions src/d3d11/d3d11_context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2715,8 +2715,8 @@ template <typename ContextInternalState> class MTLD3D11DeviceContextImplBase : p
SwitchToBlitEncoder(CommandBufferState::ReadbackBlitEncoderActive);
Emit([src_ = src->buffer(), dst = std::move(staging_dst), DstX, SrcBox](ArgumentEncodingContext &enc) {
auto src = enc.access(src_, SrcBox.left, SrcBox.right - SrcBox.left, DXMT_ENCODER_RESOURCE_ACESS_READ);
enc.encodeBlitCommand([=, &SrcBox, &dst](BlitCommandContext &ctx) {
ctx.encoder->copyFromBuffer(src, SrcBox.left, dst->current, DstX, SrcBox.right - SrcBox.left);
enc.encodeBlitCommand([=, &SrcBox, dst = dst->current](BlitCommandContext &ctx) {
ctx.encoder->copyFromBuffer(src, SrcBox.left, dst, DstX, SrcBox.right - SrcBox.left);
});
});
promote_flush = true;
Expand All @@ -2729,8 +2729,8 @@ template <typename ContextInternalState> class MTLD3D11DeviceContextImplBase : p
SwitchToBlitEncoder(CommandBufferState::UpdateBlitEncoderActive);
Emit([dst_ = dst->buffer(), src = std::move(staging_src), DstX, SrcBox](ArgumentEncodingContext &enc) {
auto dst = enc.access(dst_, DstX, SrcBox.right - SrcBox.left, DXMT_ENCODER_RESOURCE_ACESS_WRITE);
enc.encodeBlitCommand([=, &SrcBox, &src](BlitCommandContext &ctx) {
ctx.encoder->copyFromBuffer(src->current, SrcBox.left, dst, DstX, SrcBox.right - SrcBox.left);
enc.encodeBlitCommand([=, &SrcBox, src = src->current](BlitCommandContext &ctx) {
ctx.encoder->copyFromBuffer(src, SrcBox.left, dst, DstX, SrcBox.right - SrcBox.left);
});
});
} else if (auto src = reinterpret_cast<D3D11ResourceCommon *>(pSrcResource)) {
Expand Down Expand Up @@ -2779,10 +2779,10 @@ template <typename ContextInternalState> class MTLD3D11DeviceContextImplBase : p
auto src = enc.access(src_, cmd.Src.MipLevel, cmd.Src.ArraySlice, DXMT_ENCODER_RESOURCE_ACESS_READ);
auto offset = cmd.DstOrigin.z * dst->bytesPerImage + cmd.DstOrigin.y * dst->bytesPerRow +
cmd.DstOrigin.x * cmd.DstFormat.BytesPerTexel;
enc.encodeBlitCommand([=, &cmd, &dst](BlitCommandContext &ctx) {
enc.encodeBlitCommand([=, &cmd, dst = dst->current, bpr = dst->bytesPerRow,
bpi = dst->bytesPerImage](BlitCommandContext &ctx) {
ctx.encoder->copyFromTexture(
src, cmd.Src.ArraySlice, cmd.Src.MipLevel, cmd.SrcOrigin, cmd.SrcSize, dst->current, offset, dst->bytesPerRow,
dst->bytesPerImage
src, cmd.Src.ArraySlice, cmd.Src.MipLevel, cmd.SrcOrigin, cmd.SrcSize, dst, offset, bpr, bpi
);
});
});
Expand All @@ -2806,13 +2806,12 @@ template <typename ContextInternalState> class MTLD3D11DeviceContextImplBase : p
offset = cmd.SrcOrigin.z * src->bytesPerImage + cmd.SrcOrigin.y * src->bytesPerRow +
cmd.SrcOrigin.x * cmd.SrcFormat.BytesPerTexel;
}
enc.encodeBlitCommand([=, &cmd, &src](BlitCommandContext &ctx) {
enc.encodeBlitCommand([=, &cmd, src = src->current, bpr = src->bytesPerRow,
bpi = src->bytesPerImage](BlitCommandContext &ctx) {
ctx.encoder->copyFromBuffer(
src->current, offset, src->bytesPerRow, src->bytesPerImage, cmd.SrcSize, dst, cmd.Dst.ArraySlice, cmd.Dst.MipLevel,
cmd.DstOrigin
src, offset, bpr, bpi, cmd.SrcSize, dst, cmd.Dst.ArraySlice, cmd.Dst.MipLevel, cmd.DstOrigin
);
});
;
});
} else if (auto src = GetTexture(cmd.pSrc)) {
// on-device copy
Expand Down
50 changes: 42 additions & 8 deletions src/dxmt/dxmt_staging.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#include "dxmt_staging.hpp"
#include "Metal/MTLDevice.hpp"

namespace dxmt {

StagingResource::StagingResource(Obj<MTL::Buffer> &&buffer, uint32_t bytes_per_row, uint32_t bytes_per_image) :
current(std::move(buffer)),
bytesPerRow(bytes_per_row),
bytesPerImage(bytes_per_image) {}
bytesPerImage(bytes_per_image),
name_(current) {}

void
StagingResource::incRef() {
Expand All @@ -32,21 +34,53 @@ StagingResource::useCopySource(uint64_t seq_id) {
gpu_occupied_until_finished_seq_id = std::max(seq_id, gpu_occupied_until_finished_seq_id);
}

int64_t
StagingMapResult
StagingResource::tryMap(uint64_t coherent_seq_id, bool read, bool write) {
if (mapped)
return -1;
if (read && coherent_seq_id < cpu_coherent_after_finished_seq_id) {
return cpu_coherent_after_finished_seq_id - coherent_seq_id;
}
return StagingMapResult::Mapped;
if (read && coherent_seq_id < cpu_coherent_after_finished_seq_id)
return StagingMapResult(cpu_coherent_after_finished_seq_id - coherent_seq_id);
if (write && coherent_seq_id < gpu_occupied_until_finished_seq_id) {
return gpu_occupied_until_finished_seq_id - coherent_seq_id;
if (coherent_seq_id >= cpu_coherent_after_finished_seq_id)
return StagingMapResult::Renamable;
return StagingMapResult(gpu_occupied_until_finished_seq_id - coherent_seq_id);
}
return 0;
return StagingMapResult::Mappable;
}

void
StagingResource::unmap() {
mapped = false;
}

Obj<MTL::Buffer>
StagingResource::allocate(uint64_t coherent_seq_id) {
std::lock_guard<dxmt::mutex> lock(mutex_);
Obj<MTL::Buffer> ret;
for (;;) {
if (fifo.empty()) {
break;
}
auto entry = fifo.front();
if (entry.will_free_at > coherent_seq_id) {
break;
}
ret = std::move(entry.allocation);
fifo.pop();
break;
}
if (!ret.ptr())
ret = transfer(name_->device()->newBuffer(name_->length(), name_->resourceOptions()));
return ret;
}

void
StagingResource::updateImmediateName(uint64_t current_seq_id, Obj<MTL::Buffer> &&allocation) {
std::lock_guard<dxmt::mutex> lock(mutex_);
fifo.push(QueueEntry{.allocation = std::move(name_), .will_free_at = current_seq_id});
name_ = std::move(allocation);
cpu_coherent_after_finished_seq_id = 0;
gpu_occupied_until_finished_seq_id = 0;
}

} // namespace dxmt
31 changes: 30 additions & 1 deletion src/dxmt/dxmt_staging.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
#pragma once
#include "Metal/MTLBuffer.hpp"
#include "objc_pointer.hpp"
#include "thread.hpp"
#include <cstdint>
#include <atomic>
#include <queue>

namespace dxmt {

enum class StagingMapResult : uint64_t {
Mappable = 0,
Renamable = 0xffffffffffffffff,
Mapped = 0xfffffffffffffffe,
};

class StagingResource {
public:
void incRef();
Expand All @@ -14,9 +22,22 @@ class StagingResource {
void useCopyDestination(uint64_t seq_id);
void useCopySource(uint64_t seq_id);

int64_t tryMap(uint64_t coherent_seq_id, bool read, bool write);
StagingMapResult tryMap(uint64_t coherent_seq_id, bool read, bool write);
void unmap();

Obj<MTL::Buffer> allocate(uint64_t coherent_seq_id);
void updateImmediateName(uint64_t current_seq_id, Obj<MTL::Buffer> &&allocation);

Obj<MTL::Buffer>
immediateName() {
return name_;
};

void *
mappedMemory() {
return name_->contents();
}

Obj<MTL::Buffer> current;
/**
readonly
Expand All @@ -30,7 +51,15 @@ class StagingResource {
StagingResource(Obj<MTL::Buffer> &&buffer, uint32_t bytes_per_row, uint32_t bytes_per_image);

private:
struct QueueEntry {
Obj<MTL::Buffer> allocation;
uint64_t will_free_at;
};

Obj<MTL::Buffer> name_;
std::atomic<uint32_t> refcount_ = {0u};
std::queue<QueueEntry> fifo;
dxmt::mutex mutex_;
bool mapped = false;
// prevent read from staging before
uint64_t cpu_coherent_after_finished_seq_id = 0;
Expand Down

0 comments on commit cb8c76c

Please sign in to comment.