diff --git a/examples_tests b/examples_tests index 9c9b650a01..7906d1cac9 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9c9b650a011216af555d14c099d8d2bf1b6d7b95 +Subproject commit 7906d1cac91881862aca8295bab0726f76350fec diff --git a/include/nbl/asset/IDescriptorSetLayout.h b/include/nbl/asset/IDescriptorSetLayout.h index 6feea80c74..9317379b44 100644 --- a/include/nbl/asset/IDescriptorSetLayout.h +++ b/include/nbl/asset/IDescriptorSetLayout.h @@ -118,6 +118,12 @@ class IDescriptorSetLayout : public virtual core::IReferenceCounted // TODO: tr return m_stageFlags[index.data]; } + inline core::bitflag getCreateFlags(const storage_range_index_t index) const + { + assert(index.data < m_count); + return m_createFlags[index.data]; + } + inline uint32_t getCount(const storage_range_index_t index) const { assert(index.data < m_count); diff --git a/include/nbl/core/alloc/address_allocator_traits.h b/include/nbl/core/alloc/address_allocator_traits.h index 293dc3503e..1b228154f9 100644 --- a/include/nbl/core/alloc/address_allocator_traits.h +++ b/include/nbl/core/alloc/address_allocator_traits.h @@ -53,6 +53,18 @@ namespace nbl::core } } + static inline void multi_alloc_addr(AddressAlloc& alloc, uint32_t count, size_type* outAddresses, const size_type* bytes, + const size_type alignment, const size_type* hint=nullptr) noexcept + { + for (uint32_t i=0; i::value>::multi_alloc_addr( + alloc,std::min(count-i,maxMultiOps),outAddresses+i,bytes+i,alignment,hint ? (hint+i):nullptr); + } + static inline void multi_free_addr(AddressAlloc& alloc, uint32_t count, const size_type* addr, const size_type* bytes) noexcept { for (uint32_t i=0; i(asset::IDescriptor::E_TYPE::ET_COUNT); t++) + { + const auto type = static_cast(t); + const auto& bindingRedirect = getLayout()->getDescriptorRedirect(type); + if (bindingRedirect.getStorageOffset(redirect_t::binding_number_t{binding}).data!=redirect_t::Invalid) + return type; + } + return asset::IDescriptor::E_TYPE::ET_COUNT; + } + protected: IGPUDescriptorSet(core::smart_refctd_ptr&& _layout, core::smart_refctd_ptr&& pool, IDescriptorPool::SStorageOffsets&& offsets); virtual ~IGPUDescriptorSet(); @@ -61,6 +83,7 @@ class IGPUDescriptorSet : public asset::IDescriptorSet(asset::IDescriptor::E_TYPE::ET_COUNT); t++) - { - const auto type = static_cast(t); - const auto& bindingRedirect = getLayout()->getDescriptorRedirect(type); - if (bindingRedirect.getStorageOffset(redirect_t::binding_number_t{binding}).data!=redirect_t::Invalid) - return type; - } - return asset::IDescriptor::E_TYPE::ET_COUNT; - } inline core::smart_refctd_ptr* getMutableSamplers(const uint32_t binding) const { diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 557aee37a6..8b831fc3b5 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -632,6 +632,9 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe return updateDescriptorSets({pDescriptorWrites,descriptorWriteCount},{pDescriptorCopies,descriptorCopyCount}); } + // should this be joined together with the existing updateDescriptorSets? + bool nullifyDescriptors(const std::span dropDescriptors); + //! Renderpasses and Framebuffers core::smart_refctd_ptr createRenderpass(const IGPURenderpass::SCreationParams& params); inline core::smart_refctd_ptr createFramebuffer(IGPUFramebuffer::SCreationParams&& params) @@ -848,6 +851,10 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe }; virtual void updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) = 0; + // Drops refcounted references of the descriptors in these indices for the descriptor lifetime tracking + // If the nullDescriptor device feature is enabled, this would also write a null descriptor to the descriptor set + virtual void nullifyDescriptors_impl(const std::span dropDescriptors) = 0; + virtual core::smart_refctd_ptr createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) = 0; virtual core::smart_refctd_ptr createFramebuffer_impl(IGPUFramebuffer::SCreationParams&& params) = 0; diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h new file mode 100644 index 0000000000..601f0ee12a --- /dev/null +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -0,0 +1,378 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_VIDEO_SUB_ALLOCATED_DESCRIPTOR_SET_H_ +#define _NBL_VIDEO_SUB_ALLOCATED_DESCRIPTOR_SET_H + +#include "nbl/video/alloc/IBufferAllocator.h" + +#include +#include + +namespace nbl::video +{ + +class SubAllocatedDescriptorSet : public core::IReferenceCounted +{ +public: + // address allocator gives offsets + // reserved allocator allocates memory to keep the address allocator state inside + using AddressAllocator = core::PoolAddressAllocator; + using ReservedAllocator = core::allocator; + using size_type = typename AddressAllocator::size_type; + using value_type = typename AddressAllocator::size_type; + static constexpr value_type invalid_value = AddressAllocator::invalid_address; + + class DeferredFreeFunctor + { + public: + inline DeferredFreeFunctor(SubAllocatedDescriptorSet* composed, uint32_t binding, size_type count, const value_type* addresses) + : m_addresses(std::move(core::make_refctd_dynamic_array>(count))), + m_binding(binding), m_composed(composed) + { + memcpy(m_addresses->data(), addresses, count * sizeof(value_type)); + } + inline DeferredFreeFunctor(DeferredFreeFunctor&& other) + { + operator=(std::move(other)); + } + + // + inline auto getWorstCaseCount() const {return m_addresses->size();} + + // Just does the de-allocation, note that the parameter is a reference + inline void operator()(IGPUDescriptorSet::SDropDescriptorSet* &outNullify) + { + #ifdef _NBL_DEBUG + assert(m_composed); + #endif // _NBL_DEBUG + outNullify = m_composed->multi_deallocate(outNullify, m_binding, m_addresses->size(), m_addresses->data()); + m_composed->m_totalDeferredFrees -= getWorstCaseCount(); + } + + DeferredFreeFunctor(const DeferredFreeFunctor& other) = delete; + DeferredFreeFunctor& operator=(const DeferredFreeFunctor& other) = delete; + inline DeferredFreeFunctor& operator=(DeferredFreeFunctor&& other) + { + m_composed = other.m_composed; + m_addresses = other.m_addresses; + m_binding = other.m_binding; + return *this; + } + + // This is needed for the destructor of TimelineEventHandlerST + // Don't call this directly + // TODO: Find a workaround for this + inline void operator()() + { + assert(false); // This should not be called, timeline needs to be drained before destructor + // core::vector nulls(m_addresses->size()); + // auto ptr = nulls.data(); + // operator()(ptr); + // auto size = ptr - nulls.data(); + // m_composed->m_logicalDevice->nullifyDescriptors({nulls.data(),size_type(size)}); + } + + // Takes count of allocations we want to free up as reference, true is returned if + // the amount of allocations freed was >= allocationsToFreeUp + // False is returned if there are more allocations to free up + inline bool operator()(size_type& allocationsToFreeUp, IGPUDescriptorSet::SDropDescriptorSet* &outNullify) + { + auto prevNullify = outNullify; + operator()(outNullify); + auto totalFreed = outNullify-prevNullify; + + // This does the same logic as bool operator()(size_type&) on + // CAsyncSingleBufferSubAllocator + bool freedEverything = totalFreed >= allocationsToFreeUp; + + if (freedEverything) allocationsToFreeUp = 0u; + else allocationsToFreeUp -= totalFreed; + return freedEverything; + } + protected: + core::smart_refctd_dynamic_array m_addresses; + SubAllocatedDescriptorSet* m_composed; // TODO: shouldn't be called `composed`, maybe `parent` or something + uint32_t m_binding; + }; + using EventHandler = MultiTimelineEventHandlerST; +protected: + struct SubAllocDescriptorSetRange { + std::unique_ptr eventHandler = nullptr; + std::unique_ptr addressAllocator = nullptr; + std::unique_ptr reservedAllocator = nullptr; + size_t reservedSize = 0; + asset::IDescriptor::E_TYPE descriptorType = asset::IDescriptor::E_TYPE::ET_COUNT; + + SubAllocDescriptorSetRange( + std::unique_ptr&& inEventHandler, + std::unique_ptr&& inAddressAllocator, + std::unique_ptr&& inReservedAllocator, + size_t inReservedSize, + asset::IDescriptor::E_TYPE inDescriptorType) : + eventHandler(std::move(inEventHandler)), addressAllocator(std::move(inAddressAllocator)), + reservedAllocator(std::move(inReservedAllocator)), + reservedSize(inReservedSize), + descriptorType(inDescriptorType) {} + SubAllocDescriptorSetRange() {} + + SubAllocDescriptorSetRange& operator=(SubAllocDescriptorSetRange&& other) + { + eventHandler = std::move(other.eventHandler); + addressAllocator = std::move(other.addressAllocator); + reservedAllocator = std::move(other.reservedAllocator); + reservedSize = other.reservedSize; + descriptorType = other.descriptorType; + + // Nullify other + other.eventHandler = nullptr; + other.addressAllocator = nullptr; + other.reservedAllocator = nullptr; + other.reservedSize = 0u; + other.descriptorType = asset::IDescriptor::E_TYPE::ET_COUNT; + return *this; + } + }; + std::map m_allocatableRanges = {}; + core::smart_refctd_ptr m_descriptorSet; + core::smart_refctd_ptr m_logicalDevice; + value_type m_totalDeferredFrees = 0; + + #ifdef _NBL_DEBUG + std::recursive_mutex stAccessVerfier; + #endif // _NBL_DEBUG + + constexpr static inline uint32_t MaxDescriptorSetAllocationAlignment = 1u; + constexpr static inline uint32_t MinDescriptorSetAllocationSize = 1u; + +public: + + // constructors + inline SubAllocatedDescriptorSet(core::smart_refctd_ptr&& descriptorSet, + core::smart_refctd_ptr&& logicalDevice) + { + auto layout = descriptorSet->getLayout(); + for (uint32_t descriptorType = 0; descriptorType < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); descriptorType++) + { + auto descType = static_cast(descriptorType); + auto& redirect = layout->getDescriptorRedirect(descType); + + for (uint32_t i = 0; i < redirect.getBindingCount(); i++) + { + auto binding = redirect.getBinding(i); + auto storageIndex = redirect.findBindingStorageIndex(binding); + + auto count = redirect.getCount(storageIndex); + auto flags = redirect.getCreateFlags(storageIndex); + + // Only bindings with these flags will be allocatable + if (flags.hasFlags(core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT)) + { + auto reservedSize = AddressAllocator::reserved_size(MaxDescriptorSetAllocationAlignment, static_cast(count), MinDescriptorSetAllocationSize); + auto reservedAllocator = std::unique_ptr(new ReservedAllocator()); + auto addressAllocator = std::unique_ptr(new AddressAllocator( + reservedAllocator->allocate(reservedSize, _NBL_SIMD_ALIGNMENT), + static_cast(0), 0u, MaxDescriptorSetAllocationAlignment, static_cast(count), + MinDescriptorSetAllocationSize + )); + auto eventHandler = std::unique_ptr(new EventHandler(core::smart_refctd_ptr(logicalDevice))); + + m_allocatableRanges[binding.data] = SubAllocDescriptorSetRange(std::move(eventHandler), std::move(addressAllocator), std::move(reservedAllocator), reservedSize, descType); + assert(m_allocatableRanges[binding.data].eventHandler->getLogicalDevice()); + } + } + } + m_descriptorSet = std::move(descriptorSet); + m_logicalDevice = std::move(logicalDevice); + } + + inline ~SubAllocatedDescriptorSet() + { + uint32_t remainingFrees; + do { + remainingFrees = cull_frees(); + } while (remainingFrees > 0); + + for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) + { + auto& range = m_allocatableRanges[i]; + if (range.reservedSize == 0) + continue; + assert(range.eventHandler->getTimelines().size() == 0); + auto ptr = reinterpret_cast(core::address_allocator_traits::getReservedSpacePtr(*range.addressAllocator)); + range.addressAllocator = nullptr; + range.reservedAllocator->deallocate(const_cast(ptr), range.reservedSize); + } + } + + // whether that binding index can be sub-allocated + inline bool isBindingAllocatable(uint32_t binding) { return m_allocatableRanges.find(binding) != m_allocatableRanges.end(); } + + inline AddressAllocator* getBindingAllocator(uint32_t binding) + { + auto range = m_allocatableRanges.find(binding); + // Check if this binding has an allocator + if (range == m_allocatableRanges.end()) + return nullptr; + return range->second.addressAllocator.get(); + } + + // main methods + +#ifdef _NBL_DEBUG + inline std::unique_lock stAccessVerifyDebugGuard() + { + std::unique_lock tLock(stAccessVerfier,std::try_to_lock_t()); + assert(tLock.owns_lock()); + return tLock; + } +#else + inline bool stAccessVerifyDebugGuard() { return false; } +#endif + + inline video::IGPUDescriptorSet* getDescriptorSet() { return m_descriptorSet.get(); } + + //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` + inline size_type try_multi_allocate(const uint32_t binding, const size_type count, value_type* outAddresses) noexcept + { + auto debugGuard = stAccessVerifyDebugGuard(); + + // we assume you've validated that the binding is allocatable before trying this + auto allocator = getBindingAllocator(binding); + + size_type unallocatedSize = 0u; + for (size_type i=0; ialloc_addr(1,1); + if (outAddresses[i] == AddressAllocator::invalid_address) + { + unallocatedSize = count - i; + break; + } + } + + return unallocatedSize; + } + + template + inline size_type multi_allocate(const std::chrono::time_point& maxWaitPoint, const uint32_t binding, const size_type count, value_type* outAddresses) noexcept + { + auto debugGuard = stAccessVerifyDebugGuard(); + + auto range = m_allocatableRanges.find(binding); + // Check if this binding has an allocator + if (range == m_allocatableRanges.end()) + return count; + + // try allocate once + size_type unallocatedSize = try_multi_allocate(binding,count,outAddresses); + if (!unallocatedSize) + return 0u; + + // then try to wait at least once and allocate + auto& eventHandler = range->second.eventHandler; + core::vector nulls; + do + { + // FUTURE TODO: later we could only nullify the descriptors we don't end up reallocating if without robustness features + nulls.resize(m_totalDeferredFrees); + auto outNulls = nulls.data(); + eventHandler->wait(maxWaitPoint, unallocatedSize, outNulls); + m_logicalDevice->nullifyDescriptors({ nulls.data(),outNulls }); + + // always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps + unallocatedSize = try_multi_allocate(binding,count,outAddresses); + if (!unallocatedSize) + break; + } while(Clock::now()free_addr(addr[i], 1); + outNullify->dstSet = m_descriptorSet.get(); + outNullify->binding = binding; + outNullify->arrayElement = i; + outNullify->count = 1; + outNullify++; + } + } + return outNullify; + } + + // 100% will defer + inline void multi_deallocate(uint32_t binding, const ISemaphore::SWaitInfo& futureWait, DeferredFreeFunctor&& functor) noexcept + { + auto range = m_allocatableRanges.find(binding); + // Check if this binding has an allocator + if (range == m_allocatableRanges.end()) + return; + + auto& eventHandler = range->second.eventHandler; + auto debugGuard = stAccessVerifyDebugGuard(); + m_totalDeferredFrees += functor.getWorstCaseCount(); + eventHandler->latch(futureWait,std::move(functor)); + } + + // defers based on the conservative estimation if `futureWait` needs to be waited on, if doesn't will call nullify descriiptors internally immediately + inline void multi_deallocate(uint32_t binding, size_type count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept + { + if (futureWait.semaphore) + multi_deallocate(binding, futureWait, DeferredFreeFunctor(this, binding, count, addr)); + else + { + core::vector nulls(count); + auto actualEnd = multi_deallocate(nulls.data(), binding, count, addr); + // This is checked to be valid above + auto range = m_allocatableRanges.find(binding); + m_logicalDevice->nullifyDescriptors({nulls.data(),actualEnd}); + } + } + + //! Returns free events still outstanding + inline uint32_t cull_frees() noexcept + { + auto debugGuard = stAccessVerifyDebugGuard(); + uint32_t frees = 0; + core::vector nulls(m_totalDeferredFrees); + auto outNulls = nulls.data(); + for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) + { + auto& it = m_allocatableRanges[i]; + frees += it.eventHandler->poll(outNulls).eventsLeft; + } + m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}); + return frees; + } +}; + +} + +#endif diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 2af11fb044..847885da02 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -635,12 +635,15 @@ core::smart_refctd_ptr CVulkanLogicalDevice::createDescriptorPo return nullptr; } +// a lot of empirical research went into defining this constant +constexpr uint32_t MaxDescriptorSetAsWrites = 69u; + void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) { // Each pNext member of any structure (including this one) in the pNext chain must be either NULL or a pointer to a valid instance of // VkWriteDescriptorSetAccelerationStructureKHR, VkWriteDescriptorSetAccelerationStructureNV, or VkWriteDescriptorSetInlineUniformBlockEXT core::vector vk_writeDescriptorSets(params.writes.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr}); - core::vector vk_writeDescriptorSetAS(69u,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); + core::vector vk_writeDescriptorSetAS(MaxDescriptorSetAsWrites,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); core::vector vk_bufferInfos(params.bufferCount); core::vector vk_imageInfos(params.imageCount); @@ -727,6 +730,81 @@ void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSets m_devf.vk.vkUpdateDescriptorSets(m_vkdev,vk_writeDescriptorSets.size(),vk_writeDescriptorSets.data(),vk_copyDescriptorSets.size(),vk_copyDescriptorSets.data()); } +void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span drops) +{ + if (getEnabledFeatures().nullDescriptor) + { + return; + } + + core::vector vk_writeDescriptorSets(drops.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr}); + core::vector vk_writeDescriptorSetAS(MaxDescriptorSetAsWrites,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); + + size_t maxSize = 0; + for (auto i = 0; i < drops.size(); i++) + { + const auto& write = drops[i]; + auto descriptorType = write.dstSet->getBindingType(write.binding); + size_t descriptorSize; + switch (asset::IDescriptor::GetTypeCategory(descriptorType)) + { + case asset::IDescriptor::EC_BUFFER: + descriptorSize = sizeof(VkDescriptorBufferInfo); + break; + case asset::IDescriptor::EC_IMAGE: + descriptorSize = sizeof(VkDescriptorImageInfo); + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + descriptorSize = sizeof(VkBufferView); + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + descriptorSize = sizeof(VkAccelerationStructureKHR); + break; + } + maxSize = core::max(maxSize, write.count * descriptorSize); + } + + core::vector nullDescriptors(maxSize, 0u); + + { + auto outWrite = vk_writeDescriptorSets.data(); + auto outWriteAS = vk_writeDescriptorSetAS.data(); + + for (auto i=0; igetBindingType(write.binding); + + outWrite->dstSet = static_cast(write.dstSet)->getInternalObject(); + outWrite->dstBinding = write.binding; + outWrite->dstArrayElement = write.arrayElement; + outWrite->descriptorType = getVkDescriptorTypeFromDescriptorType(descriptorType); + outWrite->descriptorCount = write.count; + switch (asset::IDescriptor::GetTypeCategory(descriptorType)) + { + case asset::IDescriptor::EC_BUFFER: + outWrite->pBufferInfo = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_IMAGE: + outWrite->pImageInfo = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + outWrite->pTexelBufferView = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + outWriteAS->accelerationStructureCount = write.count; + outWriteAS->pAccelerationStructures = reinterpret_cast(nullDescriptors.data()); + outWrite->pNext = outWriteAS++; + break; + default: + assert(!"Invalid code path."); + } + outWrite++; + } + } + m_devf.vk.vkUpdateDescriptorSets(m_vkdev,vk_writeDescriptorSets.size(),vk_writeDescriptorSets.data(),0,nullptr); +} + core::smart_refctd_ptr CVulkanLogicalDevice::createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) { diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index dd12e0a9d3..b35fa7459c 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -282,6 +282,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice // descriptor sets core::smart_refctd_ptr createDescriptorPool_impl(const IDescriptorPool::SCreateInfo& createInfo) override; void updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) override; + void nullifyDescriptors_impl(const std::span dropDescriptors) override; // renderpasses and framebuffers core::smart_refctd_ptr createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) override; diff --git a/src/nbl/video/IGPUDescriptorSet.cpp b/src/nbl/video/IGPUDescriptorSet.cpp index 29b4eaab08..e29211f019 100644 --- a/src/nbl/video/IGPUDescriptorSet.cpp +++ b/src/nbl/video/IGPUDescriptorSet.cpp @@ -124,6 +124,31 @@ void IGPUDescriptorSet::processWrite(const IGPUDescriptorSet::SWriteDescriptorSe incrementVersion(); } +void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop) +{ + assert(drop.dstSet == this); + + const auto descriptorType = getBindingType(drop.binding); + + auto* dstDescriptors = drop.dstSet->getDescriptors(descriptorType, drop.binding); + auto* dstSamplers = drop.dstSet->getMutableSamplers(drop.binding); + + if (dstDescriptors) + for (uint32_t i = 0; i < drop.count; i++) + dstDescriptors[drop.arrayElement + i] = nullptr; + + if (dstSamplers) + for (uint32_t i = 0; i < drop.count; i++) + dstSamplers[drop.arrayElement + i] = nullptr; + + // we only increment the version to detect UPDATE-AFTER-BIND and automagically invalidate descriptor sets + // so, only if we do the path that writes descriptors, do we want to increment version + if (getOriginDevice()->getEnabledFeatures().nullDescriptor) + { + incrementVersion(); + } +} + bool IGPUDescriptorSet::validateCopy(const IGPUDescriptorSet::SCopyDescriptorSet& copy) const { assert(copy.dstSet == this); diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index c1de775731..2c852c7631 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -444,6 +444,28 @@ bool ILogicalDevice::updateDescriptorSets(const std::span dropDescriptors) +{ + for (const auto& drop : dropDescriptors) + { + auto ds = drop.dstSet; + if (!ds || !ds->wasCreatedBy(this)) + return false; + // (no binding) + if (ds->getBindingType(drop.binding) == asset::IDescriptor::E_TYPE::ET_COUNT) + return false; + } + + for (const auto& drop : dropDescriptors) + { + auto ds = drop.dstSet; + ds->dropDescriptors(drop); + } + + nullifyDescriptors_impl(dropDescriptors); + return true; +} + core::smart_refctd_ptr ILogicalDevice::createRenderpass(const IGPURenderpass::SCreationParams& params) { IGPURenderpass::SCreationParamValidationResult validation = IGPURenderpass::validateCreationParams(params);