diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index 06e056b7..7e7cbdac 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -3,6 +3,7 @@ #include "dxmt_command_queue.hpp" #include "d3d11_context_impl.cpp" #include "dxmt_context.hpp" +#include "dxmt_staging.hpp" namespace dxmt { struct ContextInternalState { @@ -81,6 +82,8 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase { D3D11_MAPPED_SUBRESOURCE *pMappedResource) override { UINT buffer_length = 0, &row_pitch = buffer_length; UINT bind_flag = 0, &depth_pitch = bind_flag; + auto current_seq_id = cmd_queue.CurrentSeqId(); + auto coherent_seq_id = cmd_queue.CoherentSeqId(); if (auto dynamic = GetDynamicBuffer(pResource, &buffer_length, &bind_flag)) { if (!pMappedResource) return E_INVALIDARG; @@ -104,9 +107,7 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase { } } - dynamic->updateImmediateName( - ctx_state.cmd_queue.CurrentSeqId(), - dynamic->allocate(ctx_state.cmd_queue.CoherentSeqId()), false); + dynamic->updateImmediateName(current_seq_id, dynamic->allocate(coherent_seq_id), false); Emit([allocation = dynamic->immediateName(), buffer = Rc(dynamic->buffer)](ArgumentEncodingContext &enc) mutable { auto _ = buffer->rename(forward_rc(allocation)); @@ -139,9 +140,7 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase { stage.SRVs.set_dirty(); } - dynamic->updateImmediateName( - ctx_state.cmd_queue.CurrentSeqId(), - dynamic->allocate(ctx_state.cmd_queue.CoherentSeqId()), false); + dynamic->updateImmediateName(current_seq_id, dynamic->allocate(coherent_seq_id), false); Emit([allocation = dynamic->immediateName(), texture = Rc(dynamic->texture)](ArgumentEncodingContext &enc) mutable { auto _ = texture->rename(forward_rc(allocation)); @@ -162,28 +161,24 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase { return S_OK; } if (auto staging = GetStagingResource(pResource, Subresource)) { + if (MapType > 3 || MapType == 0) + return E_INVALIDARG; while (true) { - auto coh = cmd_queue.CoherentSeqId(); - int64_t ret = -1; - switch (MapType) { - case D3D11_MAP_READ: - ret = staging->tryMap(coh, true, false); - break; - case D3D11_MAP_READ_WRITE: - ret = staging->tryMap(coh, false, true); - break; - case D3D11_MAP_WRITE: - ret = staging->tryMap(coh, true, true); - break; - default: - break; - } - if (ret < 0) { + auto result = staging->tryMap(coherent_seq_id, MapType & D3D11_MAP_READ, MapType & D3D11_MAP_WRITE); + if (result == StagingMapResult::Mapped) return E_FAIL; + if (result == StagingMapResult::Renamable) { + // when write to a buffer that is gpu-readonly + Obj new_name = staging->allocate(coherent_seq_id); + Emit([staging, new_name](ArgumentEncodingContext &enc) mutable { staging->current = std::move(new_name); }); + // can't guarantee a full overwrite + std::memcpy(new_name->contents(), staging->mappedMemory(), new_name->length()); + staging->updateImmediateName(current_seq_id, std::move(new_name)); + result = StagingMapResult::Mappable; } - if (ret == 0) { + if (result == StagingMapResult::Mappable) { TRACE("staging map ready"); - pMappedResource->pData = staging->current->contents(); + pMappedResource->pData = staging->mappedMemory(); pMappedResource->RowPitch = staging->bytesPerRow; pMappedResource->DepthPitch = staging->bytesPerImage; return S_OK; @@ -196,7 +191,9 @@ class MTLD3D11ImmediateContext : public ImmediateContextBase { // and the following calls are essentially no-op Flush(); TRACE("staging map block"); - cmd_queue.FIXME_YieldUntilCoherenceBoundaryUpdate(coh); + cmd_queue.FIXME_YieldUntilCoherenceBoundaryUpdate(coherent_seq_id); + current_seq_id = cmd_queue.CurrentSeqId(); + coherent_seq_id = cmd_queue.CoherentSeqId(); }; }; UNIMPLEMENTED("unknown mapped resource (USAGE_DEFAULT?)"); diff --git a/src/d3d11/d3d11_context_impl.cpp b/src/d3d11/d3d11_context_impl.cpp index c850c585..fd3ce478 100644 --- a/src/d3d11/d3d11_context_impl.cpp +++ b/src/d3d11/d3d11_context_impl.cpp @@ -2715,8 +2715,8 @@ template class MTLD3D11DeviceContextImplBase : p SwitchToBlitEncoder(CommandBufferState::ReadbackBlitEncoderActive); Emit([src_ = src->buffer(), dst = std::move(staging_dst), DstX, SrcBox](ArgumentEncodingContext &enc) { auto src = enc.access(src_, SrcBox.left, SrcBox.right - SrcBox.left, DXMT_ENCODER_RESOURCE_ACESS_READ); - enc.encodeBlitCommand([=, &SrcBox, &dst](BlitCommandContext &ctx) { - ctx.encoder->copyFromBuffer(src, SrcBox.left, dst->current, DstX, SrcBox.right - SrcBox.left); + enc.encodeBlitCommand([=, &SrcBox, dst = dst->current](BlitCommandContext &ctx) { + ctx.encoder->copyFromBuffer(src, SrcBox.left, dst, DstX, SrcBox.right - SrcBox.left); }); }); promote_flush = true; @@ -2729,8 +2729,8 @@ template class MTLD3D11DeviceContextImplBase : p SwitchToBlitEncoder(CommandBufferState::UpdateBlitEncoderActive); Emit([dst_ = dst->buffer(), src = std::move(staging_src), DstX, SrcBox](ArgumentEncodingContext &enc) { auto dst = enc.access(dst_, DstX, SrcBox.right - SrcBox.left, DXMT_ENCODER_RESOURCE_ACESS_WRITE); - enc.encodeBlitCommand([=, &SrcBox, &src](BlitCommandContext &ctx) { - ctx.encoder->copyFromBuffer(src->current, SrcBox.left, dst, DstX, SrcBox.right - SrcBox.left); + enc.encodeBlitCommand([=, &SrcBox, src = src->current](BlitCommandContext &ctx) { + ctx.encoder->copyFromBuffer(src, SrcBox.left, dst, DstX, SrcBox.right - SrcBox.left); }); }); } else if (auto src = reinterpret_cast(pSrcResource)) { @@ -2779,10 +2779,10 @@ template class MTLD3D11DeviceContextImplBase : p auto src = enc.access(src_, cmd.Src.MipLevel, cmd.Src.ArraySlice, DXMT_ENCODER_RESOURCE_ACESS_READ); auto offset = cmd.DstOrigin.z * dst->bytesPerImage + cmd.DstOrigin.y * dst->bytesPerRow + cmd.DstOrigin.x * cmd.DstFormat.BytesPerTexel; - enc.encodeBlitCommand([=, &cmd, &dst](BlitCommandContext &ctx) { + enc.encodeBlitCommand([=, &cmd, dst = dst->current, bpr = dst->bytesPerRow, + bpi = dst->bytesPerImage](BlitCommandContext &ctx) { ctx.encoder->copyFromTexture( - src, cmd.Src.ArraySlice, cmd.Src.MipLevel, cmd.SrcOrigin, cmd.SrcSize, dst->current, offset, dst->bytesPerRow, - dst->bytesPerImage + src, cmd.Src.ArraySlice, cmd.Src.MipLevel, cmd.SrcOrigin, cmd.SrcSize, dst, offset, bpr, bpi ); }); }); @@ -2806,13 +2806,12 @@ template class MTLD3D11DeviceContextImplBase : p offset = cmd.SrcOrigin.z * src->bytesPerImage + cmd.SrcOrigin.y * src->bytesPerRow + cmd.SrcOrigin.x * cmd.SrcFormat.BytesPerTexel; } - enc.encodeBlitCommand([=, &cmd, &src](BlitCommandContext &ctx) { + enc.encodeBlitCommand([=, &cmd, src = src->current, bpr = src->bytesPerRow, + bpi = src->bytesPerImage](BlitCommandContext &ctx) { ctx.encoder->copyFromBuffer( - src->current, offset, src->bytesPerRow, src->bytesPerImage, cmd.SrcSize, dst, cmd.Dst.ArraySlice, cmd.Dst.MipLevel, - cmd.DstOrigin + src, offset, bpr, bpi, cmd.SrcSize, dst, cmd.Dst.ArraySlice, cmd.Dst.MipLevel, cmd.DstOrigin ); }); - ; }); } else if (auto src = GetTexture(cmd.pSrc)) { // on-device copy diff --git a/src/dxmt/dxmt_staging.cpp b/src/dxmt/dxmt_staging.cpp index 632b4dab..289e980a 100644 --- a/src/dxmt/dxmt_staging.cpp +++ b/src/dxmt/dxmt_staging.cpp @@ -1,11 +1,13 @@ #include "dxmt_staging.hpp" +#include "Metal/MTLDevice.hpp" namespace dxmt { StagingResource::StagingResource(Obj &&buffer, uint32_t bytes_per_row, uint32_t bytes_per_image) : current(std::move(buffer)), bytesPerRow(bytes_per_row), - bytesPerImage(bytes_per_image) {} + bytesPerImage(bytes_per_image), + name_(current) {} void StagingResource::incRef() { @@ -32,21 +34,53 @@ StagingResource::useCopySource(uint64_t seq_id) { gpu_occupied_until_finished_seq_id = std::max(seq_id, gpu_occupied_until_finished_seq_id); } -int64_t +StagingMapResult StagingResource::tryMap(uint64_t coherent_seq_id, bool read, bool write) { if (mapped) - return -1; - if (read && coherent_seq_id < cpu_coherent_after_finished_seq_id) { - return cpu_coherent_after_finished_seq_id - coherent_seq_id; - } + return StagingMapResult::Mapped; + if (read && coherent_seq_id < cpu_coherent_after_finished_seq_id) + return StagingMapResult(cpu_coherent_after_finished_seq_id - coherent_seq_id); if (write && coherent_seq_id < gpu_occupied_until_finished_seq_id) { - return gpu_occupied_until_finished_seq_id - coherent_seq_id; + if (coherent_seq_id >= cpu_coherent_after_finished_seq_id) + return StagingMapResult::Renamable; + return StagingMapResult(gpu_occupied_until_finished_seq_id - coherent_seq_id); } - return 0; + return StagingMapResult::Mappable; } + void StagingResource::unmap() { mapped = false; } +Obj +StagingResource::allocate(uint64_t coherent_seq_id) { + std::lock_guard lock(mutex_); + Obj ret; + for (;;) { + if (fifo.empty()) { + break; + } + auto entry = fifo.front(); + if (entry.will_free_at > coherent_seq_id) { + break; + } + ret = std::move(entry.allocation); + fifo.pop(); + break; + } + if (!ret.ptr()) + ret = transfer(name_->device()->newBuffer(name_->length(), name_->resourceOptions())); + return ret; +} + +void +StagingResource::updateImmediateName(uint64_t current_seq_id, Obj &&allocation) { + std::lock_guard lock(mutex_); + fifo.push(QueueEntry{.allocation = std::move(name_), .will_free_at = current_seq_id}); + name_ = std::move(allocation); + cpu_coherent_after_finished_seq_id = 0; + gpu_occupied_until_finished_seq_id = 0; +} + } // namespace dxmt diff --git a/src/dxmt/dxmt_staging.hpp b/src/dxmt/dxmt_staging.hpp index 152e6464..2e627c16 100644 --- a/src/dxmt/dxmt_staging.hpp +++ b/src/dxmt/dxmt_staging.hpp @@ -1,11 +1,19 @@ #pragma once #include "Metal/MTLBuffer.hpp" #include "objc_pointer.hpp" +#include "thread.hpp" #include #include +#include namespace dxmt { +enum class StagingMapResult : uint64_t { + Mappable = 0, + Renamable = 0xffffffffffffffff, + Mapped = 0xfffffffffffffffe, +}; + class StagingResource { public: void incRef(); @@ -14,9 +22,22 @@ class StagingResource { void useCopyDestination(uint64_t seq_id); void useCopySource(uint64_t seq_id); - int64_t tryMap(uint64_t coherent_seq_id, bool read, bool write); + StagingMapResult tryMap(uint64_t coherent_seq_id, bool read, bool write); void unmap(); + Obj allocate(uint64_t coherent_seq_id); + void updateImmediateName(uint64_t current_seq_id, Obj &&allocation); + + Obj + immediateName() { + return name_; + }; + + void * + mappedMemory() { + return name_->contents(); + } + Obj current; /** readonly @@ -30,7 +51,15 @@ class StagingResource { StagingResource(Obj &&buffer, uint32_t bytes_per_row, uint32_t bytes_per_image); private: + struct QueueEntry { + Obj allocation; + uint64_t will_free_at; + }; + + Obj name_; std::atomic refcount_ = {0u}; + std::queue fifo; + dxmt::mutex mutex_; bool mapped = false; // prevent read from staging before uint64_t cpu_coherent_after_finished_seq_id = 0;